OpenCloudOS-Kernel/include/linux/kernfs.h

496 lines
15 KiB
C
Raw Normal View History

/*
* kernfs.h - pseudo filesystem decoupled from vfs locking
*
* This file is released under the GPLv2.
*/
#ifndef __LINUX_KERNFS_H
#define __LINUX_KERNFS_H
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/lockdep.h>
#include <linux/rbtree.h>
#include <linux/atomic.h>
#include <linux/wait.h>
struct file;
struct dentry;
struct iattr;
struct seq_file;
struct vm_area_struct;
struct super_block;
struct file_system_type;
struct kernfs_open_node;
struct kernfs_iattrs;
enum kernfs_node_type {
KERNFS_DIR = 0x0001,
KERNFS_FILE = 0x0002,
KERNFS_LINK = 0x0004,
};
#define KERNFS_TYPE_MASK 0x000f
#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
enum kernfs_node_flag {
KERNFS_ACTIVATED = 0x0010,
KERNFS_NS = 0x0020,
KERNFS_HAS_SEQ_SHOW = 0x0040,
KERNFS_HAS_MMAP = 0x0080,
KERNFS_LOCKDEP = 0x0100,
kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-02-04 03:03:01 +08:00
KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
};
/* @flags for kernfs_create_root() */
enum kernfs_root_flag {
2014-05-13 01:56:27 +08:00
/*
* kernfs_nodes are created in the deactivated state and invisible.
* They require explicit kernfs_activate() to become visible. This
* can be used to make related nodes become visible atomically
* after all nodes are created successfully.
*/
KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001,
/*
* For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2)
* succeeds regardless of the RW permissions. sysfs had an extra
* layer of enforcement where open(2) fails with -EACCES regardless
* of CAP_DAC_OVERRIDE if the permission doesn't have the
* respective read or write access at all (none of S_IRUGO or
* S_IWUGO) or the respective operation isn't implemented. The
* following flag enables that behavior.
*/
KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002,
};
/* type-specific structures for kernfs_node union members */
struct kernfs_elem_dir {
unsigned long subdirs;
/* children rbtree starts here and goes through kn->rb */
struct rb_root children;
/*
* The kernfs hierarchy this directory belongs to. This fits
* better directly in kernfs_node but is here to save space.
*/
struct kernfs_root *root;
};
struct kernfs_elem_symlink {
struct kernfs_node *target_kn;
};
struct kernfs_elem_attr {
const struct kernfs_ops *ops;
struct kernfs_open_node *open;
loff_t size;
kernfs: kernfs_notify() must be useable from non-sleepable contexts d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events too") added fsnotify triggering to kernfs_notify() which requires a sleepable context. There are already existing users of kernfs_notify() which invoke it from an atomic context and in general it's silly to require a sleepable context for triggering a notification. The following is an invalid context bug triggerd by md invoking sysfs_notify() from IO completion path. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 2 locks held by swapper/1/0: #0: (&(&vblk->vq_lock)->rlock){-.-...}, at: [<ffffffffa0039042>] virtblk_done+0x42/0xe0 [virtio_blk] #1: (&(&bitmap->counts.lock)->rlock){-.....}, at: [<ffffffff81633718>] bitmap_endwrite+0x68/0x240 irq event stamp: 33518 hardirqs last enabled at (33515): [<ffffffff8102544f>] default_idle+0x1f/0x230 hardirqs last disabled at (33516): [<ffffffff818122ed>] common_interrupt+0x6d/0x72 softirqs last enabled at (33518): [<ffffffff810a1272>] _local_bh_enable+0x22/0x50 softirqs last disabled at (33517): [<ffffffff810a29e0>] irq_enter+0x60/0x80 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c 0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000 0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2 Call Trace: <IRQ> [<ffffffff81807b4c>] dump_stack+0x4d/0x66 [<ffffffff810d4f14>] __might_sleep+0x184/0x240 [<ffffffff8180caf2>] mutex_lock_nested+0x42/0x440 [<ffffffff812d76a0>] kernfs_notify+0x90/0x150 [<ffffffff8163377c>] bitmap_endwrite+0xcc/0x240 [<ffffffffa00de863>] close_write+0x93/0xb0 [raid1] [<ffffffffa00df029>] r1_bio_write_done+0x29/0x50 [raid1] [<ffffffffa00e0474>] raid1_end_write_request+0xe4/0x260 [raid1] [<ffffffff813acb8b>] bio_endio+0x6b/0xa0 [<ffffffff813b46c4>] blk_update_request+0x94/0x420 [<ffffffff813bf0ea>] blk_mq_end_io+0x1a/0x70 [<ffffffffa00392c2>] virtblk_request_done+0x32/0x80 [virtio_blk] [<ffffffff813c0648>] __blk_mq_complete_request+0x88/0x120 [<ffffffff813c070a>] blk_mq_complete_request+0x2a/0x30 [<ffffffffa0039066>] virtblk_done+0x66/0xe0 [virtio_blk] [<ffffffffa002535a>] vring_interrupt+0x3a/0xa0 [virtio_ring] [<ffffffff81116177>] handle_irq_event_percpu+0x77/0x340 [<ffffffff8111647d>] handle_irq_event+0x3d/0x60 [<ffffffff81119436>] handle_edge_irq+0x66/0x130 [<ffffffff8101c3e4>] handle_irq+0x84/0x150 [<ffffffff818146ad>] do_IRQ+0x4d/0xe0 [<ffffffff818122f2>] common_interrupt+0x72/0x72 <EOI> [<ffffffff8105f706>] ? native_safe_halt+0x6/0x10 [<ffffffff81025454>] default_idle+0x24/0x230 [<ffffffff81025f9f>] arch_cpu_idle+0xf/0x20 [<ffffffff810f5adc>] cpu_startup_entry+0x37c/0x7b0 [<ffffffff8104df1b>] start_secondary+0x25b/0x300 This patch fixes it by punting the notification delivery through a work item. This ends up adding an extra pointer to kernfs_elem_attr enlarging kernfs_node by a pointer, which is not ideal but not a very big deal either. If this turns out to be an actual issue, we can move kernfs_elem_attr->size to kernfs_node->iattr later. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Josh Boyer <jwboyer@fedoraproject.org> Cc: Jens Axboe <axboe@kernel.dk> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-07-02 04:41:03 +08:00
struct kernfs_node *notify_next; /* for kernfs_notify() */
};
/*
* kernfs_node - the building block of kernfs hierarchy. Each and every
* kernfs node is represented by single kernfs_node. Most fields are
* private to kernfs and shouldn't be accessed directly by kernfs users.
*
* As long as s_count reference is held, the kernfs_node itself is
* accessible. Dereferencing elem or any other outer entity requires
* active reference.
*/
struct kernfs_node {
atomic_t count;
atomic_t active;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
/*
* Use kernfs_get_parent() and kernfs_name/path() instead of
* accessing the following two fields directly. If the node is
* never moved to a different parent, it is safe to access the
* parent directly.
*/
struct kernfs_node *parent;
const char *name;
struct rb_node rb;
const void *ns; /* namespace tag */
unsigned int hash; /* ns + name hash */
union {
struct kernfs_elem_dir dir;
struct kernfs_elem_symlink symlink;
struct kernfs_elem_attr attr;
};
void *priv;
unsigned short flags;
umode_t mode;
unsigned int ino;
struct kernfs_iattrs *iattr;
};
/*
* kernfs_syscall_ops may be specified on kernfs_create_root() to support
* syscalls. These optional callbacks are invoked on the matching syscalls
* and can perform any kernfs operations which don't necessarily have to be
* the exact operation requested. An active reference is held for each
* kernfs_node parameter.
*/
struct kernfs_syscall_ops {
int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
int (*mkdir)(struct kernfs_node *parent, const char *name,
umode_t mode);
int (*rmdir)(struct kernfs_node *kn);
int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name);
};
sysfs, kernfs: implement kernfs_create/destroy_root() There currently is single kernfs hierarchy in the whole system which is used for sysfs. kernfs needs to support multiple hierarchies to allow other users. This patch introduces struct kernfs_root which serves as the root of each kernfs hierarchy and implements kernfs_create/destroy_root(). * Each kernfs_root is associated with a root sd (sysfs_dentry). The root is freed when the root sd is released and kernfs_destory_root() simply invokes kernfs_remove() on the root sd. sysfs_remove_one() is updated to handle release of the root sd. Note that ps_iattr update in sysfs_remove_one() is trivially updated for readability. * Root sd's are now dynamically allocated using sysfs_new_dirent(). Update sysfs_alloc_ino() so that it gives out ino from 1 so that the root sd still gets ino 1. * While kernfs currently only points to the root sd, it'll soon grow fields which are specific to each hierarchy. As determining a given sd's root will be necessary, sd->s_dir.root is added. This backlink fits better as a separate field in sd; however, sd->s_dir is inside union with space to spare, so use it to save space and provide kernfs_root() accessor to determine the root sd. * As hierarchies may be destroyed now, each mount needs to hold onto the hierarchy it's attached to. Update sysfs_fill_super() and sysfs_kill_sb() so that they get and put the kernfs_root respectively. * sysfs_root is replaced with kernfs_root which is dynamically created by invoking kernfs_create_root() from sysfs_init(). This patch doesn't introduce any visible behavior changes. v2: kernfs_create_root() forgot to set @sd->priv. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:40 +08:00
struct kernfs_root {
/* published fields */
struct kernfs_node *kn;
unsigned int flags; /* KERNFS_ROOT_* flags */
/* private fields, do not use outside kernfs proper */
struct ida ino_ida;
struct kernfs_syscall_ops *syscall_ops;
/* list of kernfs_super_info of this root, protected by kernfs_mutex */
struct list_head supers;
wait_queue_head_t deactivate_waitq;
sysfs, kernfs: implement kernfs_create/destroy_root() There currently is single kernfs hierarchy in the whole system which is used for sysfs. kernfs needs to support multiple hierarchies to allow other users. This patch introduces struct kernfs_root which serves as the root of each kernfs hierarchy and implements kernfs_create/destroy_root(). * Each kernfs_root is associated with a root sd (sysfs_dentry). The root is freed when the root sd is released and kernfs_destory_root() simply invokes kernfs_remove() on the root sd. sysfs_remove_one() is updated to handle release of the root sd. Note that ps_iattr update in sysfs_remove_one() is trivially updated for readability. * Root sd's are now dynamically allocated using sysfs_new_dirent(). Update sysfs_alloc_ino() so that it gives out ino from 1 so that the root sd still gets ino 1. * While kernfs currently only points to the root sd, it'll soon grow fields which are specific to each hierarchy. As determining a given sd's root will be necessary, sd->s_dir.root is added. This backlink fits better as a separate field in sd; however, sd->s_dir is inside union with space to spare, so use it to save space and provide kernfs_root() accessor to determine the root sd. * As hierarchies may be destroyed now, each mount needs to hold onto the hierarchy it's attached to. Update sysfs_fill_super() and sysfs_kill_sb() so that they get and put the kernfs_root respectively. * sysfs_root is replaced with kernfs_root which is dynamically created by invoking kernfs_create_root() from sysfs_init(). This patch doesn't introduce any visible behavior changes. v2: kernfs_create_root() forgot to set @sd->priv. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:40 +08:00
};
struct kernfs_open_file {
/* published fields */
struct kernfs_node *kn;
struct file *file;
void *priv;
/* private fields, do not use outside kernfs proper */
struct mutex mutex;
int event;
struct list_head list;
sysfs/kernfs: allow attributes to request write buffer be pre-allocated. md/raid allows metadata management to be performed in user-space. A various times, particularly on device failure, the metadata needs to be updated before further writes can be permitted. This means that the user-space program which updates metadata much not block on writeout, and so must not allocate memory. mlockall(MCL_CURRENT|MCL_FUTURE) and pre-allocation can avoid all memory allocation issues for user-memory, but that does not help kernel memory. Several kernel objects can be pre-allocated. e.g. files opened before any writes to the array are permitted. However some kernel allocation happens in places that cannot be pre-allocated. In particular, writes to sysfs files (to tell md that it can now allow writes to the array) allocate a buffer using GFP_KERNEL. This patch allows attributes to be marked as "PREALLOC". In that case the maximal buffer is allocated when the file is opened, and then used on each write instead of allocating a new buffer. As the same buffer is now shared for all writes on the same file description, the mutex is extended to cover full use of the buffer including the copy_from_user(). The new __ATTR_PREALLOC() 'or's a new flag in to the 'mode', which is inspected by sysfs_add_file_mode_ns() to determine if the file should be marked as requiring prealloc. Despite the comment, we *do* use ->seq_show together with ->prealloc in this patch. The next patch fixes that. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-10-13 13:41:28 +08:00
char *prealloc_buf;
kernfs: cache atomic_write_len in kernfs_open_file While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<mm/memory.c:4188>] might_fault+0x7e/0xb0 [<arch/x86/include/asm/uaccess.h:713 fs/kernfs/file.c:291>] kernfs_fop_write+0xd8/0x190 [<fs/read_write.c:473>] vfs_write+0xe3/0x1d0 [<fs/read_write.c:523 fs/read_write.c:515>] SyS_write+0x5d/0xa0 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [<mm/util.c:397>] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [<lib/dump_stack.c:52>] dump_stack+0x52/0x7f [<kernel/locking/lockdep.c:1213>] print_circular_bug+0x129/0x160 [<kernel/locking/lockdep.c:1840>] check_prev_add+0x13f/0x560 [<include/linux/spinlock.h:343 mm/slub.c:1933>] ? deactivate_slab+0x511/0x550 [<kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131>] validate_chain+0x6c5/0x7b0 [<kernel/locking/lockdep.c:3182>] __lock_acquire+0x4cd/0x5a0 [<mm/mmap.c:1552>] ? mmap_region+0x24a/0x5c0 [<arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602>] lock_acquire+0x182/0x1d0 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/locking/mutex.c:470 kernel/locking/mutex.c:571>] mutex_lock_nested+0x6a/0x510 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<kernel/sched/core.c:2477>] ? get_parent_ip+0x11/0x50 [<fs/kernfs/file.c:487>] ? kernfs_fop_mmap+0x54/0x120 [<fs/kernfs/file.c:487>] kernfs_fop_mmap+0x54/0x120 [<mm/mmap.c:1573>] mmap_region+0x310/0x5c0 [<mm/mmap.c:1365>] do_mmap_pgoff+0x385/0x430 [<mm/util.c:397>] ? vm_mmap_pgoff+0x6e/0xe0 [<mm/util.c:399>] vm_mmap_pgoff+0x8f/0xe0 [<kernel/rcu/update.c:97>] ? __rcu_read_unlock+0x44/0xb0 [<fs/file.c:641>] ? dup_fd+0x3c0/0x3c0 [<mm/mmap.c:1416 mm/mmap.c:1374>] SyS_mmap_pgoff+0x1b0/0x210 [<arch/x86/kernel/sys_x86_64.c:72>] SyS_mmap+0x1d/0x20 [<arch/x86/kernel/entry_64.S:749>] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Sasha Levin <sasha.levin@oracle.com> References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-03-05 04:38:46 +08:00
size_t atomic_write_len;
bool mmapped;
const struct vm_operations_struct *vm_ops;
};
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
struct kernfs_ops {
/*
* Read is handled by either seq_file or raw_read().
*
* If seq_show() is present, seq_file path is active. Other seq
* operations are optional and if not implemented, the behavior is
* equivalent to single_open(). @sf->private points to the
* associated kernfs_open_file.
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
*
* read() is bounced through kernel buffer and a read larger than
* PAGE_SIZE results in partial operation of PAGE_SIZE.
*/
int (*seq_show)(struct seq_file *sf, void *v);
void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
void (*seq_stop)(struct seq_file *sf, void *v);
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
ssize_t (*read)(struct kernfs_open_file *of, char *buf, size_t bytes,
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
loff_t off);
/*
* write() is bounced through kernel buffer. If atomic_write_len
* is not set, a write larger than PAGE_SIZE results in partial
* operations of PAGE_SIZE chunks. If atomic_write_len is set,
* writes upto the specified size are executed atomically but
* larger ones are rejected with -E2BIG.
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
*/
size_t atomic_write_len;
sysfs/kernfs: allow attributes to request write buffer be pre-allocated. md/raid allows metadata management to be performed in user-space. A various times, particularly on device failure, the metadata needs to be updated before further writes can be permitted. This means that the user-space program which updates metadata much not block on writeout, and so must not allocate memory. mlockall(MCL_CURRENT|MCL_FUTURE) and pre-allocation can avoid all memory allocation issues for user-memory, but that does not help kernel memory. Several kernel objects can be pre-allocated. e.g. files opened before any writes to the array are permitted. However some kernel allocation happens in places that cannot be pre-allocated. In particular, writes to sysfs files (to tell md that it can now allow writes to the array) allocate a buffer using GFP_KERNEL. This patch allows attributes to be marked as "PREALLOC". In that case the maximal buffer is allocated when the file is opened, and then used on each write instead of allocating a new buffer. As the same buffer is now shared for all writes on the same file description, the mutex is extended to cover full use of the buffer including the copy_from_user(). The new __ATTR_PREALLOC() 'or's a new flag in to the 'mode', which is inspected by sysfs_add_file_mode_ns() to determine if the file should be marked as requiring prealloc. Despite the comment, we *do* use ->seq_show together with ->prealloc in this patch. The next patch fixes that. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-10-13 13:41:28 +08:00
/*
* "prealloc" causes a buffer to be allocated at open for
* all read/write requests. As ->seq_show uses seq_read()
* which does its own allocation, it is incompatible with
* ->prealloc. Provide ->read and ->write with ->prealloc.
*/
bool prealloc;
ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
loff_t off);
int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lock_class_key lockdep_key;
#endif
sysfs, kernfs: introduce kernfs_ops We're in the process of separating out core sysfs functionality into kernfs which will deal with sysfs_dirents directly. This patch introduces kernfs_ops which hosts methods kernfs users implement and updates fs/sysfs/file.c such that sysfs_kf_*() functions populate kernfs_ops and kernfs_file_*() functions call the matching entries from kernfs_ops. kernfs_ops contains the following groups of methods. * seq_show() - for kernfs files which use seq_file for reads. * read() - for direct read implementations. Used iff seq_show() is not implemented. * write() - for writes. * mmap() - for mmaps. Notes: * sysfs_elem_attr->ops is added so that kernfs_ops can be accessed from sysfs_dirent. kernfs_ops() helper is added to verify locking and access the field. * SYSFS_FLAG_HAS_(SEQ_SHOW|MMAP) added. sd->s_attr->ops is accessible only while holding active_ref and there are cases where we want to take different actions depending on which ops are implemented. These two flags cache whether the two ops are implemented for those. * kernfs_file_*() no longer test sysfs type but chooses different behaviors depending on which methods in kernfs_ops are implemented. The conversions are trivial except for the open path. As kernfs_file_open() now decides whether to allow read/write accesses depending on the kernfs_ops implemented, the presence of methods in kobjs and attribute_bin should be propagated to kernfs_ops. sysfs_add_file_mode_ns() is updated so that it propagates presence / absence of the callbacks through _empty, _ro, _wo, _rw kernfs_ops. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:21 +08:00
};
#ifdef CONFIG_KERNFS
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
{
return kn->flags & KERNFS_TYPE_MASK;
}
/**
* kernfs_enable_ns - enable namespace under a directory
* @kn: directory of interest, should be empty
*
* This is to be called right after @kn is created to enable namespace
* under it. All children of @kn must have non-NULL namespace tags and
* only the ones which match the super_block's tag will be visible.
*/
static inline void kernfs_enable_ns(struct kernfs_node *kn)
{
WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children));
kn->flags |= KERNFS_NS;
}
/**
* kernfs_ns_enabled - test whether namespace is enabled
* @kn: the node to test
*
* Test whether namespace filtering is enabled for the children of @ns.
*/
static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
{
return kn->flags & KERNFS_NS;
}
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
size_t kernfs_path_len(struct kernfs_node *kn);
char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
size_t buflen);
void pr_cont_kernfs_name(struct kernfs_node *kn);
void pr_cont_kernfs_path(struct kernfs_node *kn);
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
const char *name, const void *ns);
struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
const char *path, const void *ns);
void kernfs_get(struct kernfs_node *kn);
void kernfs_put(struct kernfs_node *kn);
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
unsigned int flags, void *priv);
sysfs, kernfs: implement kernfs_create/destroy_root() There currently is single kernfs hierarchy in the whole system which is used for sysfs. kernfs needs to support multiple hierarchies to allow other users. This patch introduces struct kernfs_root which serves as the root of each kernfs hierarchy and implements kernfs_create/destroy_root(). * Each kernfs_root is associated with a root sd (sysfs_dentry). The root is freed when the root sd is released and kernfs_destory_root() simply invokes kernfs_remove() on the root sd. sysfs_remove_one() is updated to handle release of the root sd. Note that ps_iattr update in sysfs_remove_one() is trivially updated for readability. * Root sd's are now dynamically allocated using sysfs_new_dirent(). Update sysfs_alloc_ino() so that it gives out ino from 1 so that the root sd still gets ino 1. * While kernfs currently only points to the root sd, it'll soon grow fields which are specific to each hierarchy. As determining a given sd's root will be necessary, sd->s_dir.root is added. This backlink fits better as a separate field in sd; however, sd->s_dir is inside union with space to spare, so use it to save space and provide kernfs_root() accessor to determine the root sd. * As hierarchies may be destroyed now, each mount needs to hold onto the hierarchy it's attached to. Update sysfs_fill_super() and sysfs_kill_sb() so that they get and put the kernfs_root respectively. * sysfs_root is replaced with kernfs_root which is dynamically created by invoking kernfs_create_root() from sysfs_init(). This patch doesn't introduce any visible behavior changes. v2: kernfs_create_root() forgot to set @sd->priv. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:40 +08:00
void kernfs_destroy_root(struct kernfs_root *root);
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode,
void *priv, const void *ns);
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
const char *name);
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
const char *name,
umode_t mode, loff_t size,
const struct kernfs_ops *ops,
void *priv, const void *ns,
struct lock_class_key *key);
struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
const char *name,
struct kernfs_node *target);
void kernfs_activate(struct kernfs_node *kn);
void kernfs_remove(struct kernfs_node *kn);
kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-02-04 03:03:01 +08:00
void kernfs_break_active_protection(struct kernfs_node *kn);
void kernfs_unbreak_active_protection(struct kernfs_node *kn);
bool kernfs_remove_self(struct kernfs_node *kn);
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
const void *ns);
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name, const void *new_ns);
int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
void kernfs_notify(struct kernfs_node *kn);
const void *kernfs_super_ns(struct super_block *sb);
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns);
void kernfs_kill_sb(struct super_block *sb);
struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
void kernfs_init(void);
#else /* CONFIG_KERNFS */
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
{ return 0; } /* whatever */
static inline void kernfs_enable_ns(struct kernfs_node *kn) { }
static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
{ return false; }
static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{ return -ENOSYS; }
static inline size_t kernfs_path_len(struct kernfs_node *kn)
{ return 0; }
static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
size_t buflen)
{ return NULL; }
static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
static inline struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
{ return NULL; }
static inline struct kernfs_node *
kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
const void *ns)
{ return NULL; }
static inline struct kernfs_node *
kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path,
const void *ns)
{ return NULL; }
static inline void kernfs_get(struct kernfs_node *kn) { }
static inline void kernfs_put(struct kernfs_node *kn) { }
static inline struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{ return NULL; }
static inline struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
{ return NULL; }
static inline struct inode *
kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
{ return NULL; }
static inline struct kernfs_root *
kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags,
void *priv)
sysfs, kernfs: implement kernfs_create/destroy_root() There currently is single kernfs hierarchy in the whole system which is used for sysfs. kernfs needs to support multiple hierarchies to allow other users. This patch introduces struct kernfs_root which serves as the root of each kernfs hierarchy and implements kernfs_create/destroy_root(). * Each kernfs_root is associated with a root sd (sysfs_dentry). The root is freed when the root sd is released and kernfs_destory_root() simply invokes kernfs_remove() on the root sd. sysfs_remove_one() is updated to handle release of the root sd. Note that ps_iattr update in sysfs_remove_one() is trivially updated for readability. * Root sd's are now dynamically allocated using sysfs_new_dirent(). Update sysfs_alloc_ino() so that it gives out ino from 1 so that the root sd still gets ino 1. * While kernfs currently only points to the root sd, it'll soon grow fields which are specific to each hierarchy. As determining a given sd's root will be necessary, sd->s_dir.root is added. This backlink fits better as a separate field in sd; however, sd->s_dir is inside union with space to spare, so use it to save space and provide kernfs_root() accessor to determine the root sd. * As hierarchies may be destroyed now, each mount needs to hold onto the hierarchy it's attached to. Update sysfs_fill_super() and sysfs_kill_sb() so that they get and put the kernfs_root respectively. * sysfs_root is replaced with kernfs_root which is dynamically created by invoking kernfs_create_root() from sysfs_init(). This patch doesn't introduce any visible behavior changes. v2: kernfs_create_root() forgot to set @sd->priv. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2013-11-29 03:54:40 +08:00
{ return ERR_PTR(-ENOSYS); }
static inline void kernfs_destroy_root(struct kernfs_root *root) { }
static inline struct kernfs_node *
kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
umode_t mode, void *priv, const void *ns)
{ return ERR_PTR(-ENOSYS); }
static inline struct kernfs_node *
__kernfs_create_file(struct kernfs_node *parent, const char *name,
umode_t mode, loff_t size, const struct kernfs_ops *ops,
void *priv, const void *ns, struct lock_class_key *key)
{ return ERR_PTR(-ENOSYS); }
static inline struct kernfs_node *
kernfs_create_link(struct kernfs_node *parent, const char *name,
struct kernfs_node *target)
{ return ERR_PTR(-ENOSYS); }
static inline void kernfs_activate(struct kernfs_node *kn) { }
static inline void kernfs_remove(struct kernfs_node *kn) { }
kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-02-04 03:03:01 +08:00
static inline bool kernfs_remove_self(struct kernfs_node *kn)
{ return false; }
static inline int kernfs_remove_by_name_ns(struct kernfs_node *kn,
const char *name, const void *ns)
{ return -ENOSYS; }
static inline int kernfs_rename_ns(struct kernfs_node *kn,
struct kernfs_node *new_parent,
const char *new_name, const void *new_ns)
{ return -ENOSYS; }
static inline int kernfs_setattr(struct kernfs_node *kn,
const struct iattr *iattr)
{ return -ENOSYS; }
static inline void kernfs_notify(struct kernfs_node *kn) { }
static inline const void *kernfs_super_ns(struct super_block *sb)
{ return NULL; }
static inline struct dentry *
kernfs_mount_ns(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns)
{ return ERR_PTR(-ENOSYS); }
static inline void kernfs_kill_sb(struct super_block *sb) { }
static inline void kernfs_init(void) { }
#endif /* CONFIG_KERNFS */
static inline struct kernfs_node *
kernfs_find_and_get(struct kernfs_node *kn, const char *name)
{
return kernfs_find_and_get_ns(kn, name, NULL);
}
static inline struct kernfs_node *
kernfs_walk_and_get(struct kernfs_node *kn, const char *path)
{
return kernfs_walk_and_get_ns(kn, path, NULL);
}
static inline struct kernfs_node *
kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
void *priv)
{
return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
}
static inline struct kernfs_node *
kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
umode_t mode, loff_t size, const struct kernfs_ops *ops,
void *priv, const void *ns)
{
struct lock_class_key *key = NULL;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = (struct lock_class_key *)&ops->lockdep_key;
#endif
return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
key);
}
static inline struct kernfs_node *
kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
loff_t size, const struct kernfs_ops *ops, void *priv)
{
return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
}
static inline int kernfs_remove_by_name(struct kernfs_node *parent,
const char *name)
{
return kernfs_remove_by_name_ns(parent, name, NULL);
}
static inline int kernfs_rename(struct kernfs_node *kn,
struct kernfs_node *new_parent,
const char *new_name)
{
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
}
static inline struct dentry *
kernfs_mount(struct file_system_type *fs_type, int flags,
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created)
{
return kernfs_mount_ns(fs_type, flags, root,
magic, new_sb_created, NULL);
}
#endif /* __LINUX_KERNFS_H */