Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits) [PATCH] ocfs2: zero_user_page conversion ocfs2: Support xfs style space reservation ioctls ocfs2: support for removing file regions ocfs2: update truncate handling of partial clusters ocfs2: btree support for removal of arbirtrary extents ocfs2: Support creation of unwritten extents ocfs2: support writing of unwritten extents ocfs2: small cleanup of ocfs2_write_begin_nolock() ocfs2: btree changes for unwritten extents ocfs2: abstract btree growing calls ocfs2: use all extent block suballocators ocfs2: plug truncate into cached dealloc routines ocfs2: simplify deallocation locking ocfs2: harden buffer check during mapping of page blocks ocfs2: shared writeable mmap ocfs2: factor out write aops into nolock variants ocfs2: rework ocfs2_buffered_write_cluster() ocfs2: take ip_alloc_sem during entire truncate ocfs2: Add "preferred slot" mount option [KJ PATCH] Replacing memset(<addr>,0,PAGE_SIZE) with clear_page() in fs/ocfs2/dlm/dlmrecovery.c ...
This commit is contained in:
commit
add096909d
|
@ -238,6 +238,8 @@ config_item_type.
|
|||
struct config_group *(*make_group)(struct config_group *group,
|
||||
const char *name);
|
||||
int (*commit_item)(struct config_item *item);
|
||||
void (*disconnect_notify)(struct config_group *group,
|
||||
struct config_item *item);
|
||||
void (*drop_item)(struct config_group *group,
|
||||
struct config_item *item);
|
||||
};
|
||||
|
@ -268,6 +270,16 @@ the item in other threads, the memory is safe. It may take some time
|
|||
for the item to actually disappear from the subsystem's usage. But it
|
||||
is gone from configfs.
|
||||
|
||||
When drop_item() is called, the item's linkage has already been torn
|
||||
down. It no longer has a reference on its parent and has no place in
|
||||
the item hierarchy. If a client needs to do some cleanup before this
|
||||
teardown happens, the subsystem can implement the
|
||||
ct_group_ops->disconnect_notify() method. The method is called after
|
||||
configfs has removed the item from the filesystem view but before the
|
||||
item is removed from its parent group. Like drop_item(),
|
||||
disconnect_notify() is void and cannot fail. Client subsystems should
|
||||
not drop any references here, as they still must do it in drop_item().
|
||||
|
||||
A config_group cannot be removed while it still has child items. This
|
||||
is implemented in the configfs rmdir(2) code. ->drop_item() will not be
|
||||
called, as the item has not been dropped. rmdir(2) will fail, as the
|
||||
|
@ -280,18 +292,18 @@ tells configfs to make the subsystem appear in the file tree.
|
|||
|
||||
struct configfs_subsystem {
|
||||
struct config_group su_group;
|
||||
struct semaphore su_sem;
|
||||
struct mutex su_mutex;
|
||||
};
|
||||
|
||||
int configfs_register_subsystem(struct configfs_subsystem *subsys);
|
||||
void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
|
||||
|
||||
A subsystem consists of a toplevel config_group and a semaphore.
|
||||
A subsystem consists of a toplevel config_group and a mutex.
|
||||
The group is where child config_items are created. For a subsystem,
|
||||
this group is usually defined statically. Before calling
|
||||
configfs_register_subsystem(), the subsystem must have initialized the
|
||||
group via the usual group _init() functions, and it must also have
|
||||
initialized the semaphore.
|
||||
initialized the mutex.
|
||||
When the register call returns, the subsystem is live, and it
|
||||
will be visible via configfs. At that point, mkdir(2) can be called and
|
||||
the subsystem must be ready for it.
|
||||
|
@ -303,7 +315,7 @@ subsystem/group and the simple_child item in configfs_example.c It
|
|||
shows a trivial object displaying and storing an attribute, and a simple
|
||||
group creating and destroying these children.
|
||||
|
||||
[Hierarchy Navigation and the Subsystem Semaphore]
|
||||
[Hierarchy Navigation and the Subsystem Mutex]
|
||||
|
||||
There is an extra bonus that configfs provides. The config_groups and
|
||||
config_items are arranged in a hierarchy due to the fact that they
|
||||
|
@ -314,19 +326,19 @@ and config_item->ci_parent structure members.
|
|||
|
||||
A subsystem can navigate the cg_children list and the ci_parent pointer
|
||||
to see the tree created by the subsystem. This can race with configfs'
|
||||
management of the hierarchy, so configfs uses the subsystem semaphore to
|
||||
management of the hierarchy, so configfs uses the subsystem mutex to
|
||||
protect modifications. Whenever a subsystem wants to navigate the
|
||||
hierarchy, it must do so under the protection of the subsystem
|
||||
semaphore.
|
||||
mutex.
|
||||
|
||||
A subsystem will be prevented from acquiring the semaphore while a newly
|
||||
A subsystem will be prevented from acquiring the mutex while a newly
|
||||
allocated item has not been linked into this hierarchy. Similarly, it
|
||||
will not be able to acquire the semaphore while a dropping item has not
|
||||
will not be able to acquire the mutex while a dropping item has not
|
||||
yet been unlinked. This means that an item's ci_parent pointer will
|
||||
never be NULL while the item is in configfs, and that an item will only
|
||||
be in its parent's cg_children list for the same duration. This allows
|
||||
a subsystem to trust ci_parent and cg_children while they hold the
|
||||
semaphore.
|
||||
mutex.
|
||||
|
||||
[Item Aggregation Via symlink(2)]
|
||||
|
||||
|
@ -386,6 +398,33 @@ As a consequence of this, default_groups cannot be removed directly via
|
|||
rmdir(2). They also are not considered when rmdir(2) on the parent
|
||||
group is checking for children.
|
||||
|
||||
[Dependant Subsystems]
|
||||
|
||||
Sometimes other drivers depend on particular configfs items. For
|
||||
example, ocfs2 mounts depend on a heartbeat region item. If that
|
||||
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
|
||||
readonly. Not happy.
|
||||
|
||||
configfs provides two additional API calls: configfs_depend_item() and
|
||||
configfs_undepend_item(). A client driver can call
|
||||
configfs_depend_item() on an existing item to tell configfs that it is
|
||||
depended on. configfs will then return -EBUSY from rmdir(2) for that
|
||||
item. When the item is no longer depended on, the client driver calls
|
||||
configfs_undepend_item() on it.
|
||||
|
||||
These API cannot be called underneath any configfs callbacks, as
|
||||
they will conflict. They can block and allocate. A client driver
|
||||
probably shouldn't calling them of its own gumption. Rather it should
|
||||
be providing an API that external subsystems call.
|
||||
|
||||
How does this work? Imagine the ocfs2 mount process. When it mounts,
|
||||
it asks for a heartbeat region item. This is done via a call into the
|
||||
heartbeat code. Inside the heartbeat code, the region item is looked
|
||||
up. Here, the heartbeat code calls configfs_depend_item(). If it
|
||||
succeeds, then heartbeat knows the region is safe to give to ocfs2.
|
||||
If it fails, it was being torn down anyway, and heartbeat can gracefully
|
||||
pass up an error.
|
||||
|
||||
[Committable Items]
|
||||
|
||||
NOTE: Committable items are currently unimplemented.
|
||||
|
|
|
@ -453,7 +453,7 @@ static int __init configfs_example_init(void)
|
|||
subsys = example_subsys[i];
|
||||
|
||||
config_group_init(&subsys->su_group);
|
||||
init_MUTEX(&subsys->su_sem);
|
||||
mutex_init(&subsys->su_mutex);
|
||||
ret = configfs_register_subsystem(subsys);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "Error %d while registering subsystem %s\n",
|
||||
|
|
|
@ -29,10 +29,11 @@
|
|||
|
||||
struct configfs_dirent {
|
||||
atomic_t s_count;
|
||||
int s_dependent_count;
|
||||
struct list_head s_sibling;
|
||||
struct list_head s_children;
|
||||
struct list_head s_links;
|
||||
void * s_element;
|
||||
void * s_element;
|
||||
int s_type;
|
||||
umode_t s_mode;
|
||||
struct dentry * s_dentry;
|
||||
|
@ -41,8 +42,8 @@ struct configfs_dirent {
|
|||
|
||||
#define CONFIGFS_ROOT 0x0001
|
||||
#define CONFIGFS_DIR 0x0002
|
||||
#define CONFIGFS_ITEM_ATTR 0x0004
|
||||
#define CONFIGFS_ITEM_LINK 0x0020
|
||||
#define CONFIGFS_ITEM_ATTR 0x0004
|
||||
#define CONFIGFS_ITEM_LINK 0x0020
|
||||
#define CONFIGFS_USET_DIR 0x0040
|
||||
#define CONFIGFS_USET_DEFAULT 0x0080
|
||||
#define CONFIGFS_USET_DROPPING 0x0100
|
||||
|
|
|
@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry)
|
|||
/* Mark that we've taken i_mutex */
|
||||
sd->s_type |= CONFIGFS_USET_DROPPING;
|
||||
|
||||
/*
|
||||
* Yup, recursive. If there's a problem, blame
|
||||
* deep nesting of default_groups
|
||||
*/
|
||||
ret = configfs_detach_prep(sd->s_dentry);
|
||||
if (!ret)
|
||||
continue;
|
||||
|
@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group)
|
|||
|
||||
/*
|
||||
* All of link_obj/unlink_obj/link_group/unlink_group require that
|
||||
* subsys->su_sem is held.
|
||||
* subsys->su_mutex is held.
|
||||
*/
|
||||
|
||||
static void unlink_obj(struct config_item *item)
|
||||
|
@ -713,6 +717,28 @@ static void configfs_detach_group(struct config_item *item)
|
|||
configfs_detach_item(item);
|
||||
}
|
||||
|
||||
/*
|
||||
* After the item has been detached from the filesystem view, we are
|
||||
* ready to tear it out of the hierarchy. Notify the client before
|
||||
* we do that so they can perform any cleanup that requires
|
||||
* navigating the hierarchy. A client does not need to provide this
|
||||
* callback. The subsystem semaphore MUST be held by the caller, and
|
||||
* references must be valid for both items. It also assumes the
|
||||
* caller has validated ci_type.
|
||||
*/
|
||||
static void client_disconnect_notify(struct config_item *parent_item,
|
||||
struct config_item *item)
|
||||
{
|
||||
struct config_item_type *type;
|
||||
|
||||
type = parent_item->ci_type;
|
||||
BUG_ON(!type);
|
||||
|
||||
if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
|
||||
type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
|
||||
item);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the initial reference from make_item()/make_group()
|
||||
* This function assumes that reference is held on item
|
||||
|
@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item,
|
|||
*/
|
||||
if (type->ct_group_ops && type->ct_group_ops->drop_item)
|
||||
type->ct_group_ops->drop_item(to_config_group(parent_item),
|
||||
item);
|
||||
item);
|
||||
else
|
||||
config_item_put(item);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void configfs_dump_one(struct configfs_dirent *sd, int level)
|
||||
{
|
||||
printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
|
||||
|
||||
#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
|
||||
type_print(CONFIGFS_ROOT);
|
||||
type_print(CONFIGFS_DIR);
|
||||
type_print(CONFIGFS_ITEM_ATTR);
|
||||
type_print(CONFIGFS_ITEM_LINK);
|
||||
type_print(CONFIGFS_USET_DIR);
|
||||
type_print(CONFIGFS_USET_DEFAULT);
|
||||
type_print(CONFIGFS_USET_DROPPING);
|
||||
#undef type_print
|
||||
}
|
||||
|
||||
static int configfs_dump(struct configfs_dirent *sd, int level)
|
||||
{
|
||||
struct configfs_dirent *child_sd;
|
||||
int ret = 0;
|
||||
|
||||
configfs_dump_one(sd, level);
|
||||
|
||||
if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
|
||||
ret = configfs_dump(child_sd, level + 2);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* configfs_depend_item() and configfs_undepend_item()
|
||||
*
|
||||
* WARNING: Do not call these from a configfs callback!
|
||||
*
|
||||
* This describes these functions and their helpers.
|
||||
*
|
||||
* Allow another kernel system to depend on a config_item. If this
|
||||
* happens, the item cannot go away until the dependant can live without
|
||||
* it. The idea is to give client modules as simple an interface as
|
||||
* possible. When a system asks them to depend on an item, they just
|
||||
* call configfs_depend_item(). If the item is live and the client
|
||||
* driver is in good shape, we'll happily do the work for them.
|
||||
*
|
||||
* Why is the locking complex? Because configfs uses the VFS to handle
|
||||
* all locking, but this function is called outside the normal
|
||||
* VFS->configfs path. So it must take VFS locks to prevent the
|
||||
* VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is
|
||||
* why you can't call these functions underneath configfs callbacks.
|
||||
*
|
||||
* Note, btw, that this can be called at *any* time, even when a configfs
|
||||
* subsystem isn't registered, or when configfs is loading or unloading.
|
||||
* Just like configfs_register_subsystem(). So we take the same
|
||||
* precautions. We pin the filesystem. We lock each i_mutex _in_order_
|
||||
* on our way down the tree. If we can find the target item in the
|
||||
* configfs tree, it must be part of the subsystem tree as well, so we
|
||||
* do not need the subsystem semaphore. Holding the i_mutex chain locks
|
||||
* out mkdir() and rmdir(), who might be racing us.
|
||||
*/
|
||||
|
||||
/*
|
||||
* configfs_depend_prep()
|
||||
*
|
||||
* Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
|
||||
* attributes. This is similar but not the same to configfs_detach_prep().
|
||||
* Note that configfs_detach_prep() expects the parent to be locked when it
|
||||
* is called, but we lock the parent *inside* configfs_depend_prep(). We
|
||||
* do that so we can unlock it if we find nothing.
|
||||
*
|
||||
* Here we do a depth-first search of the dentry hierarchy looking for
|
||||
* our object. We take i_mutex on each step of the way down. IT IS
|
||||
* ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
|
||||
* we'll drop the i_mutex.
|
||||
*
|
||||
* If the target is not found, -ENOENT is bubbled up and we have released
|
||||
* all locks. If the target was found, the locks will be cleared by
|
||||
* configfs_depend_rollback().
|
||||
*
|
||||
* This adds a requirement that all config_items be unique!
|
||||
*
|
||||
* This is recursive because the locking traversal is tricky. There isn't
|
||||
* much on the stack, though, so folks that need this function - be careful
|
||||
* about your stack! Patches will be accepted to make it iterative.
|
||||
*/
|
||||
static int configfs_depend_prep(struct dentry *origin,
|
||||
struct config_item *target)
|
||||
{
|
||||
struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!origin || !sd);
|
||||
|
||||
/* Lock this guy on the way down */
|
||||
mutex_lock(&sd->s_dentry->d_inode->i_mutex);
|
||||
if (sd->s_element == target) /* Boo-yah */
|
||||
goto out;
|
||||
|
||||
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
|
||||
if (child_sd->s_type & CONFIGFS_DIR) {
|
||||
ret = configfs_depend_prep(child_sd->s_dentry,
|
||||
target);
|
||||
if (!ret)
|
||||
goto out; /* Child path boo-yah */
|
||||
}
|
||||
}
|
||||
|
||||
/* We looped all our children and didn't find target */
|
||||
mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
|
||||
ret = -ENOENT;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is ONLY called if configfs_depend_prep() did its job. So we can
|
||||
* trust the entire path from item back up to origin.
|
||||
*
|
||||
* We walk backwards from item, unlocking each i_mutex. We finish by
|
||||
* unlocking origin.
|
||||
*/
|
||||
static void configfs_depend_rollback(struct dentry *origin,
|
||||
struct config_item *item)
|
||||
{
|
||||
struct dentry *dentry = item->ci_dentry;
|
||||
|
||||
while (dentry != origin) {
|
||||
mutex_unlock(&dentry->d_inode->i_mutex);
|
||||
dentry = dentry->d_parent;
|
||||
}
|
||||
|
||||
mutex_unlock(&origin->d_inode->i_mutex);
|
||||
}
|
||||
|
||||
int configfs_depend_item(struct configfs_subsystem *subsys,
|
||||
struct config_item *target)
|
||||
{
|
||||
int ret;
|
||||
struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
|
||||
struct config_item *s_item = &subsys->su_group.cg_item;
|
||||
|
||||
/*
|
||||
* Pin the configfs filesystem. This means we can safely access
|
||||
* the root of the configfs filesystem.
|
||||
*/
|
||||
ret = configfs_pin_fs();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Next, lock the root directory. We're going to check that the
|
||||
* subsystem is really registered, and so we need to lock out
|
||||
* configfs_[un]register_subsystem().
|
||||
*/
|
||||
mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
|
||||
|
||||
root_sd = configfs_sb->s_root->d_fsdata;
|
||||
|
||||
list_for_each_entry(p, &root_sd->s_children, s_sibling) {
|
||||
if (p->s_type & CONFIGFS_DIR) {
|
||||
if (p->s_element == s_item) {
|
||||
subsys_sd = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!subsys_sd) {
|
||||
ret = -ENOENT;
|
||||
goto out_unlock_fs;
|
||||
}
|
||||
|
||||
/* Ok, now we can trust subsys/s_item */
|
||||
|
||||
/* Scan the tree, locking i_mutex recursively, return 0 if found */
|
||||
ret = configfs_depend_prep(subsys_sd->s_dentry, target);
|
||||
if (ret)
|
||||
goto out_unlock_fs;
|
||||
|
||||
/* We hold all i_mutexes from the subsystem down to the target */
|
||||
p = target->ci_dentry->d_fsdata;
|
||||
p->s_dependent_count += 1;
|
||||
|
||||
configfs_depend_rollback(subsys_sd->s_dentry, target);
|
||||
|
||||
out_unlock_fs:
|
||||
mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
|
||||
|
||||
/*
|
||||
* If we succeeded, the fs is pinned via other methods. If not,
|
||||
* we're done with it anyway. So release_fs() is always right.
|
||||
*/
|
||||
configfs_release_fs();
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(configfs_depend_item);
|
||||
|
||||
/*
|
||||
* Release the dependent linkage. This is much simpler than
|
||||
* configfs_depend_item() because we know that that the client driver is
|
||||
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
|
||||
*/
|
||||
void configfs_undepend_item(struct configfs_subsystem *subsys,
|
||||
struct config_item *target)
|
||||
{
|
||||
struct configfs_dirent *sd;
|
||||
|
||||
/*
|
||||
* Since we can trust everything is pinned, we just need i_mutex
|
||||
* on the item.
|
||||
*/
|
||||
mutex_lock(&target->ci_dentry->d_inode->i_mutex);
|
||||
|
||||
sd = target->ci_dentry->d_fsdata;
|
||||
BUG_ON(sd->s_dependent_count < 1);
|
||||
|
||||
sd->s_dependent_count -= 1;
|
||||
|
||||
/*
|
||||
* After this unlock, we cannot trust the item to stay alive!
|
||||
* DO NOT REFERENCE item after this unlock.
|
||||
*/
|
||||
mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(configfs_undepend_item);
|
||||
|
||||
static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
||||
{
|
||||
|
@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|||
|
||||
snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
|
||||
|
||||
down(&subsys->su_sem);
|
||||
mutex_lock(&subsys->su_mutex);
|
||||
group = NULL;
|
||||
item = NULL;
|
||||
if (type->ct_group_ops->make_group) {
|
||||
|
@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|||
if (item)
|
||||
link_obj(parent_item, item);
|
||||
}
|
||||
up(&subsys->su_sem);
|
||||
mutex_unlock(&subsys->su_mutex);
|
||||
|
||||
kfree(name);
|
||||
if (!item) {
|
||||
|
@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|||
out_unlink:
|
||||
if (ret) {
|
||||
/* Tear down everything we built up */
|
||||
down(&subsys->su_sem);
|
||||
mutex_lock(&subsys->su_mutex);
|
||||
|
||||
client_disconnect_notify(parent_item, item);
|
||||
if (group)
|
||||
unlink_group(group);
|
||||
else
|
||||
unlink_obj(item);
|
||||
client_drop_item(parent_item, item);
|
||||
up(&subsys->su_sem);
|
||||
|
||||
mutex_unlock(&subsys->su_mutex);
|
||||
|
||||
if (module_got)
|
||||
module_put(owner);
|
||||
|
@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
if (sd->s_type & CONFIGFS_USET_DEFAULT)
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
* Here's where we check for dependents. We're protected by
|
||||
* i_mutex.
|
||||
*/
|
||||
if (sd->s_dependent_count)
|
||||
return -EBUSY;
|
||||
|
||||
/* Get a working ref until we have the child */
|
||||
parent_item = configfs_get_config_item(dentry->d_parent);
|
||||
subsys = to_config_group(parent_item)->cg_subsys;
|
||||
|
@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
if (sd->s_type & CONFIGFS_USET_DIR) {
|
||||
configfs_detach_group(item);
|
||||
|
||||
down(&subsys->su_sem);
|
||||
mutex_lock(&subsys->su_mutex);
|
||||
client_disconnect_notify(parent_item, item);
|
||||
unlink_group(to_config_group(item));
|
||||
} else {
|
||||
configfs_detach_item(item);
|
||||
|
||||
down(&subsys->su_sem);
|
||||
mutex_lock(&subsys->su_mutex);
|
||||
client_disconnect_notify(parent_item, item);
|
||||
unlink_obj(item);
|
||||
}
|
||||
|
||||
client_drop_item(parent_item, item);
|
||||
up(&subsys->su_sem);
|
||||
mutex_unlock(&subsys->su_mutex);
|
||||
|
||||
/* Drop our reference from above */
|
||||
config_item_put(item);
|
||||
|
|
|
@ -27,19 +27,26 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
#include <linux/configfs.h>
|
||||
#include "configfs_internal.h"
|
||||
|
||||
/*
|
||||
* A simple attribute can only be 4096 characters. Why 4k? Because the
|
||||
* original code limited it to PAGE_SIZE. That's a bad idea, though,
|
||||
* because an attribute of 16k on ia64 won't work on x86. So we limit to
|
||||
* 4k, our minimum common page size.
|
||||
*/
|
||||
#define SIMPLE_ATTR_SIZE 4096
|
||||
|
||||
struct configfs_buffer {
|
||||
size_t count;
|
||||
loff_t pos;
|
||||
char * page;
|
||||
struct configfs_item_operations * ops;
|
||||
struct semaphore sem;
|
||||
struct mutex mutex;
|
||||
int needs_read_fill;
|
||||
};
|
||||
|
||||
|
@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
|
|||
|
||||
count = ops->show_attribute(item,attr,buffer->page);
|
||||
buffer->needs_read_fill = 0;
|
||||
BUG_ON(count > (ssize_t)PAGE_SIZE);
|
||||
BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
|
||||
if (count >= 0)
|
||||
buffer->count = count;
|
||||
else
|
||||
|
@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
|
|||
struct configfs_buffer * buffer = file->private_data;
|
||||
ssize_t retval = 0;
|
||||
|
||||
down(&buffer->sem);
|
||||
mutex_lock(&buffer->mutex);
|
||||
if (buffer->needs_read_fill) {
|
||||
if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
|
||||
goto out;
|
||||
|
@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
|
|||
retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
|
||||
buffer->count);
|
||||
out:
|
||||
up(&buffer->sem);
|
||||
mutex_unlock(&buffer->mutex);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
|
|||
if (!buffer->page)
|
||||
return -ENOMEM;
|
||||
|
||||
if (count >= PAGE_SIZE)
|
||||
count = PAGE_SIZE - 1;
|
||||
if (count >= SIMPLE_ATTR_SIZE)
|
||||
count = SIMPLE_ATTR_SIZE - 1;
|
||||
error = copy_from_user(buffer->page,buf,count);
|
||||
buffer->needs_read_fill = 1;
|
||||
/* if buf is assumed to contain a string, terminate it by \0,
|
||||
|
@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
|
|||
struct configfs_buffer * buffer = file->private_data;
|
||||
ssize_t len;
|
||||
|
||||
down(&buffer->sem);
|
||||
mutex_lock(&buffer->mutex);
|
||||
len = fill_write_buffer(buffer, buf, count);
|
||||
if (len > 0)
|
||||
len = flush_write_buffer(file->f_path.dentry, buffer, count);
|
||||
if (len > 0)
|
||||
*ppos += len;
|
||||
up(&buffer->sem);
|
||||
mutex_unlock(&buffer->mutex);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file)
|
|||
error = -ENOMEM;
|
||||
goto Enomem;
|
||||
}
|
||||
init_MUTEX(&buffer->sem);
|
||||
mutex_init(&buffer->mutex);
|
||||
buffer->needs_read_fill = 1;
|
||||
buffer->ops = ops;
|
||||
file->private_data = buffer;
|
||||
|
@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
|
|||
if (buffer) {
|
||||
if (buffer->page)
|
||||
free_page((unsigned long)buffer->page);
|
||||
mutex_destroy(&buffer->mutex);
|
||||
kfree(buffer);
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -62,7 +62,6 @@ void config_item_init(struct config_item * item)
|
|||
* dynamically allocated string that @item->ci_name points to.
|
||||
* Otherwise, use the static @item->ci_namebuf array.
|
||||
*/
|
||||
|
||||
int config_item_set_name(struct config_item * item, const char * fmt, ...)
|
||||
{
|
||||
int error = 0;
|
||||
|
@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item)
|
|||
return item;
|
||||
}
|
||||
|
||||
/**
|
||||
* config_item_cleanup - free config_item resources.
|
||||
* @item: item.
|
||||
*/
|
||||
|
||||
void config_item_cleanup(struct config_item * item)
|
||||
static void config_item_cleanup(struct config_item * item)
|
||||
{
|
||||
struct config_item_type * t = item->ci_type;
|
||||
struct config_group * s = item->ci_group;
|
||||
|
@ -179,39 +173,35 @@ void config_item_put(struct config_item * item)
|
|||
kref_put(&item->ci_kref, config_item_release);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* config_group_init - initialize a group for use
|
||||
* @k: group
|
||||
*/
|
||||
|
||||
void config_group_init(struct config_group *group)
|
||||
{
|
||||
config_item_init(&group->cg_item);
|
||||
INIT_LIST_HEAD(&group->cg_children);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* config_group_find_obj - search for item in group.
|
||||
* config_group_find_item - search for item in group.
|
||||
* @group: group we're looking in.
|
||||
* @name: item's name.
|
||||
*
|
||||
* Lock group via @group->cg_subsys, and iterate over @group->cg_list,
|
||||
* looking for a matching config_item. If matching item is found
|
||||
* take a reference and return the item.
|
||||
* Iterate over @group->cg_list, looking for a matching config_item.
|
||||
* If matching item is found take a reference and return the item.
|
||||
* Caller must have locked group via @group->cg_subsys->su_mtx.
|
||||
*/
|
||||
|
||||
struct config_item * config_group_find_obj(struct config_group * group, const char * name)
|
||||
struct config_item *config_group_find_item(struct config_group *group,
|
||||
const char *name)
|
||||
{
|
||||
struct list_head * entry;
|
||||
struct config_item * ret = NULL;
|
||||
|
||||
/* XXX LOCKING! */
|
||||
list_for_each(entry,&group->cg_children) {
|
||||
struct config_item * item = to_item(entry);
|
||||
if (config_item_name(item) &&
|
||||
!strcmp(config_item_name(item), name)) {
|
||||
!strcmp(config_item_name(item), name)) {
|
||||
ret = config_item_get(item);
|
||||
break;
|
||||
}
|
||||
|
@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch
|
|||
return ret;
|
||||
}
|
||||
|
||||
|
||||
EXPORT_SYMBOL(config_item_init);
|
||||
EXPORT_SYMBOL(config_group_init);
|
||||
EXPORT_SYMBOL(config_item_get);
|
||||
EXPORT_SYMBOL(config_item_put);
|
||||
EXPORT_SYMBOL(config_group_find_obj);
|
||||
EXPORT_SYMBOL(config_group_find_item);
|
||||
|
|
|
@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
|
|||
return len;
|
||||
}
|
||||
|
||||
#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
|
||||
.attr = { .ca_name = __stringify(_name), \
|
||||
.ca_mode = _mode, \
|
||||
.ca_owner = THIS_MODULE }, \
|
||||
.show = _read, \
|
||||
.store = _write, \
|
||||
}
|
||||
|
||||
#define CLUSTER_ATTR(name, check_zero) \
|
||||
static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
|
||||
{ \
|
||||
|
@ -615,7 +607,7 @@ static struct clusters clusters_root = {
|
|||
int dlm_config_init(void)
|
||||
{
|
||||
config_group_init(&clusters_root.subsys.su_group);
|
||||
init_MUTEX(&clusters_root.subsys.su_sem);
|
||||
mutex_init(&clusters_root.subsys.su_mutex);
|
||||
return configfs_register_subsystem(&clusters_root.subsys);
|
||||
}
|
||||
|
||||
|
@ -759,9 +751,9 @@ static struct space *get_space(char *name)
|
|||
if (!space_list)
|
||||
return NULL;
|
||||
|
||||
down(&space_list->cg_subsys->su_sem);
|
||||
i = config_group_find_obj(space_list, name);
|
||||
up(&space_list->cg_subsys->su_sem);
|
||||
mutex_lock(&space_list->cg_subsys->su_mutex);
|
||||
i = config_group_find_item(space_list, name);
|
||||
mutex_unlock(&space_list->cg_subsys->su_mutex);
|
||||
|
||||
return to_space(i);
|
||||
}
|
||||
|
@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
|
|||
if (!comm_list)
|
||||
return NULL;
|
||||
|
||||
down(&clusters_root.subsys.su_sem);
|
||||
mutex_lock(&clusters_root.subsys.su_mutex);
|
||||
|
||||
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
|
||||
cm = to_comm(i);
|
||||
|
@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
|
|||
break;
|
||||
}
|
||||
}
|
||||
up(&clusters_root.subsys.su_sem);
|
||||
mutex_unlock(&clusters_root.subsys.su_mutex);
|
||||
|
||||
if (!found)
|
||||
cm = NULL;
|
||||
|
|
2676
fs/ocfs2/alloc.c
2676
fs/ocfs2/alloc.c
File diff suppressed because it is too large
Load Diff
|
@ -34,7 +34,17 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
|
|||
u32 cpos,
|
||||
u64 start_blk,
|
||||
u32 new_clusters,
|
||||
u8 flags,
|
||||
struct ocfs2_alloc_context *meta_ac);
|
||||
struct ocfs2_cached_dealloc_ctxt;
|
||||
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
|
||||
handle_t *handle, u32 cpos, u32 len, u32 phys,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc);
|
||||
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
|
||||
u32 cpos, u32 len, handle_t *handle,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc);
|
||||
int ocfs2_num_free_extents(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
struct ocfs2_dinode *fe);
|
||||
|
@ -62,17 +72,41 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
|
|||
struct ocfs2_dinode **tl_copy);
|
||||
int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
|
||||
struct ocfs2_dinode *tl_copy);
|
||||
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb);
|
||||
int ocfs2_truncate_log_append(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
u64 start_blk,
|
||||
unsigned int num_clusters);
|
||||
int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
|
||||
|
||||
/*
|
||||
* Process local structure which describes the block unlinks done
|
||||
* during an operation. This is populated via
|
||||
* ocfs2_cache_block_dealloc().
|
||||
*
|
||||
* ocfs2_run_deallocs() should be called after the potentially
|
||||
* de-allocating routines. No journal handles should be open, and most
|
||||
* locks should have been dropped.
|
||||
*/
|
||||
struct ocfs2_cached_dealloc_ctxt {
|
||||
struct ocfs2_per_slot_free_list *c_first_suballocator;
|
||||
};
|
||||
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
|
||||
{
|
||||
c->c_first_suballocator = NULL;
|
||||
}
|
||||
int ocfs2_run_deallocs(struct ocfs2_super *osb,
|
||||
struct ocfs2_cached_dealloc_ctxt *ctxt);
|
||||
|
||||
struct ocfs2_truncate_context {
|
||||
struct inode *tc_ext_alloc_inode;
|
||||
struct buffer_head *tc_ext_alloc_bh;
|
||||
struct ocfs2_cached_dealloc_ctxt tc_dealloc;
|
||||
int tc_ext_alloc_locked; /* is it cluster locked? */
|
||||
/* these get destroyed once it's passed to ocfs2_commit_truncate. */
|
||||
struct buffer_head *tc_last_eb_bh;
|
||||
};
|
||||
|
||||
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
|
||||
u64 new_i_size);
|
||||
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
|
||||
u64 range_start, u64 range_end);
|
||||
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
|
@ -84,6 +118,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
|
|||
|
||||
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
|
||||
u32 cpos, struct buffer_head **leaf_bh);
|
||||
int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
|
||||
|
||||
/*
|
||||
* Helper function to look at the # of clusters in an extent record.
|
||||
|
|
1069
fs/ocfs2/aops.c
1069
fs/ocfs2/aops.c
File diff suppressed because it is too large
Load Diff
|
@ -42,57 +42,22 @@ int walk_page_buffers( handle_t *handle,
|
|||
int (*fn)( handle_t *handle,
|
||||
struct buffer_head *bh));
|
||||
|
||||
struct ocfs2_write_ctxt;
|
||||
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
|
||||
u64 *, unsigned int *, unsigned int *);
|
||||
int ocfs2_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata);
|
||||
|
||||
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
|
||||
size_t count, ocfs2_page_writer *actor,
|
||||
void *priv);
|
||||
int ocfs2_write_end(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata);
|
||||
|
||||
struct ocfs2_write_ctxt {
|
||||
size_t w_count;
|
||||
loff_t w_pos;
|
||||
u32 w_cpos;
|
||||
unsigned int w_finished_copy;
|
||||
int ocfs2_write_end_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata);
|
||||
|
||||
/* This is true if page_size > cluster_size */
|
||||
unsigned int w_large_pages;
|
||||
|
||||
/* Filler callback and private data */
|
||||
ocfs2_page_writer *w_write_data_page;
|
||||
void *w_private;
|
||||
|
||||
/* Only valid for the filler callback */
|
||||
struct page *w_this_page;
|
||||
unsigned int w_this_page_new;
|
||||
};
|
||||
|
||||
struct ocfs2_buffered_write_priv {
|
||||
char *b_src_buf;
|
||||
const struct iovec *b_cur_iov; /* Current iovec */
|
||||
size_t b_cur_off; /* Offset in the
|
||||
* current iovec */
|
||||
};
|
||||
int ocfs2_map_and_write_user_data(struct inode *inode,
|
||||
struct ocfs2_write_ctxt *wc,
|
||||
u64 *p_blkno,
|
||||
unsigned int *ret_from,
|
||||
unsigned int *ret_to);
|
||||
|
||||
struct ocfs2_splice_write_priv {
|
||||
struct splice_desc *s_sd;
|
||||
struct pipe_buffer *s_buf;
|
||||
struct pipe_inode_info *s_pipe;
|
||||
/* Neither offset value is ever larger than one page */
|
||||
unsigned int s_offset;
|
||||
unsigned int s_buf_offset;
|
||||
};
|
||||
int ocfs2_map_and_write_splice_data(struct inode *inode,
|
||||
struct ocfs2_write_ctxt *wc,
|
||||
u64 *p_blkno,
|
||||
unsigned int *ret_from,
|
||||
unsigned int *ret_to);
|
||||
int ocfs2_write_begin_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
struct buffer_head *di_bh, struct page *mmap_page);
|
||||
|
||||
/* all ocfs2_dio_end_io()'s fault */
|
||||
#define ocfs2_iocb_is_rw_locked(iocb) \
|
||||
|
|
|
@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
|
|||
ret = wait_event_interruptible(o2hb_steady_queue,
|
||||
atomic_read(®->hr_steady_iterations) == 0);
|
||||
if (ret) {
|
||||
/* We got interrupted (hello ptrace!). Clean up */
|
||||
spin_lock(&o2hb_live_lock);
|
||||
hb_task = reg->hr_task;
|
||||
reg->hr_task = NULL;
|
||||
|
@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = count;
|
||||
/* Ok, we were woken. Make sure it wasn't by drop_item() */
|
||||
spin_lock(&o2hb_live_lock);
|
||||
hb_task = reg->hr_task;
|
||||
spin_unlock(&o2hb_live_lock);
|
||||
|
||||
if (hb_task)
|
||||
ret = count;
|
||||
else
|
||||
ret = -EIO;
|
||||
|
||||
out:
|
||||
if (filp)
|
||||
fput(filp);
|
||||
|
@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
|
|||
if (hb_task)
|
||||
kthread_stop(hb_task);
|
||||
|
||||
/*
|
||||
* If we're racing a dev_write(), we need to wake them. They will
|
||||
* check reg->hr_task
|
||||
*/
|
||||
if (atomic_read(®->hr_steady_iterations) != 0) {
|
||||
atomic_set(®->hr_steady_iterations, 0);
|
||||
wake_up(&o2hb_steady_queue);
|
||||
}
|
||||
|
||||
config_item_put(item);
|
||||
}
|
||||
|
||||
|
@ -1665,7 +1684,67 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(o2hb_setup_callback);
|
||||
|
||||
int o2hb_register_callback(struct o2hb_callback_func *hc)
|
||||
static struct o2hb_region *o2hb_find_region(const char *region_uuid)
|
||||
{
|
||||
struct o2hb_region *p, *reg = NULL;
|
||||
|
||||
assert_spin_locked(&o2hb_live_lock);
|
||||
|
||||
list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
|
||||
if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
|
||||
reg = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
static int o2hb_region_get(const char *region_uuid)
|
||||
{
|
||||
int ret = 0;
|
||||
struct o2hb_region *reg;
|
||||
|
||||
spin_lock(&o2hb_live_lock);
|
||||
|
||||
reg = o2hb_find_region(region_uuid);
|
||||
if (!reg)
|
||||
ret = -ENOENT;
|
||||
spin_unlock(&o2hb_live_lock);
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = o2nm_depend_this_node();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = o2nm_depend_item(®->hr_item);
|
||||
if (ret)
|
||||
o2nm_undepend_this_node();
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void o2hb_region_put(const char *region_uuid)
|
||||
{
|
||||
struct o2hb_region *reg;
|
||||
|
||||
spin_lock(&o2hb_live_lock);
|
||||
|
||||
reg = o2hb_find_region(region_uuid);
|
||||
|
||||
spin_unlock(&o2hb_live_lock);
|
||||
|
||||
if (reg) {
|
||||
o2nm_undepend_item(®->hr_item);
|
||||
o2nm_undepend_this_node();
|
||||
}
|
||||
}
|
||||
|
||||
int o2hb_register_callback(const char *region_uuid,
|
||||
struct o2hb_callback_func *hc)
|
||||
{
|
||||
struct o2hb_callback_func *tmp;
|
||||
struct list_head *iter;
|
||||
|
@ -1681,6 +1760,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (region_uuid) {
|
||||
ret = o2hb_region_get(region_uuid);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
down_write(&o2hb_callback_sem);
|
||||
|
||||
list_for_each(iter, &hbcall->list) {
|
||||
|
@ -1702,16 +1787,21 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(o2hb_register_callback);
|
||||
|
||||
void o2hb_unregister_callback(struct o2hb_callback_func *hc)
|
||||
void o2hb_unregister_callback(const char *region_uuid,
|
||||
struct o2hb_callback_func *hc)
|
||||
{
|
||||
BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
|
||||
|
||||
mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
|
||||
__builtin_return_address(0), hc);
|
||||
|
||||
/* XXX Can this happen _with_ a region reference? */
|
||||
if (list_empty(&hc->hc_item))
|
||||
return;
|
||||
|
||||
if (region_uuid)
|
||||
o2hb_region_put(region_uuid);
|
||||
|
||||
down_write(&o2hb_callback_sem);
|
||||
|
||||
list_del_init(&hc->hc_item);
|
||||
|
|
|
@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
|
|||
o2hb_cb_func *func,
|
||||
void *data,
|
||||
int priority);
|
||||
int o2hb_register_callback(struct o2hb_callback_func *hc);
|
||||
void o2hb_unregister_callback(struct o2hb_callback_func *hc);
|
||||
int o2hb_register_callback(const char *region_uuid,
|
||||
struct o2hb_callback_func *hc);
|
||||
void o2hb_unregister_callback(const char *region_uuid,
|
||||
struct o2hb_callback_func *hc);
|
||||
void o2hb_fill_node_map(unsigned long *map,
|
||||
unsigned bytes);
|
||||
void o2hb_init(void);
|
||||
|
|
|
@ -900,6 +900,46 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
|
|||
},
|
||||
};
|
||||
|
||||
int o2nm_depend_item(struct config_item *item)
|
||||
{
|
||||
return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
|
||||
}
|
||||
|
||||
void o2nm_undepend_item(struct config_item *item)
|
||||
{
|
||||
configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
|
||||
}
|
||||
|
||||
int o2nm_depend_this_node(void)
|
||||
{
|
||||
int ret = 0;
|
||||
struct o2nm_node *local_node;
|
||||
|
||||
local_node = o2nm_get_node_by_num(o2nm_this_node());
|
||||
if (!local_node) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = o2nm_depend_item(&local_node->nd_item);
|
||||
o2nm_node_put(local_node);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void o2nm_undepend_this_node(void)
|
||||
{
|
||||
struct o2nm_node *local_node;
|
||||
|
||||
local_node = o2nm_get_node_by_num(o2nm_this_node());
|
||||
BUG_ON(!local_node);
|
||||
|
||||
o2nm_undepend_item(&local_node->nd_item);
|
||||
o2nm_node_put(local_node);
|
||||
}
|
||||
|
||||
|
||||
static void __exit exit_o2nm(void)
|
||||
{
|
||||
if (ocfs2_table_header)
|
||||
|
@ -934,7 +974,7 @@ static int __init init_o2nm(void)
|
|||
goto out_sysctl;
|
||||
|
||||
config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
|
||||
init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem);
|
||||
mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex);
|
||||
ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);
|
||||
|
|
|
@ -77,4 +77,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
|
|||
void o2nm_node_get(struct o2nm_node *node);
|
||||
void o2nm_node_put(struct o2nm_node *node);
|
||||
|
||||
int o2nm_depend_item(struct config_item *item);
|
||||
void o2nm_undepend_item(struct config_item *item);
|
||||
int o2nm_depend_this_node(void);
|
||||
void o2nm_undepend_this_node(void);
|
||||
|
||||
#endif /* O2CLUSTER_NODEMANAGER_H */
|
||||
|
|
|
@ -261,14 +261,12 @@ out:
|
|||
|
||||
static void o2net_complete_nodes_nsw(struct o2net_node *nn)
|
||||
{
|
||||
struct list_head *iter, *tmp;
|
||||
struct o2net_status_wait *nsw, *tmp;
|
||||
unsigned int num_kills = 0;
|
||||
struct o2net_status_wait *nsw;
|
||||
|
||||
assert_spin_locked(&nn->nn_lock);
|
||||
|
||||
list_for_each_safe(iter, tmp, &nn->nn_status_list) {
|
||||
nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
|
||||
list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
|
||||
o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
|
||||
num_kills++;
|
||||
}
|
||||
|
@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler);
|
|||
|
||||
void o2net_unregister_handler_list(struct list_head *list)
|
||||
{
|
||||
struct list_head *pos, *n;
|
||||
struct o2net_msg_handler *nmh;
|
||||
struct o2net_msg_handler *nmh, *n;
|
||||
|
||||
write_lock(&o2net_handler_lock);
|
||||
list_for_each_safe(pos, n, list) {
|
||||
nmh = list_entry(pos, struct o2net_msg_handler,
|
||||
nh_unregister_item);
|
||||
list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
|
||||
mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
|
||||
nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
|
||||
rb_erase(&nmh->nh_node, &o2net_handler_tree);
|
||||
|
@ -1638,8 +1633,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
|
|||
|
||||
void o2net_unregister_hb_callbacks(void)
|
||||
{
|
||||
o2hb_unregister_callback(&o2net_hb_up);
|
||||
o2hb_unregister_callback(&o2net_hb_down);
|
||||
o2hb_unregister_callback(NULL, &o2net_hb_up);
|
||||
o2hb_unregister_callback(NULL, &o2net_hb_down);
|
||||
}
|
||||
|
||||
int o2net_register_hb_callbacks(void)
|
||||
|
@ -1651,9 +1646,9 @@ int o2net_register_hb_callbacks(void)
|
|||
o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
|
||||
o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
|
||||
|
||||
ret = o2hb_register_callback(&o2net_hb_up);
|
||||
ret = o2hb_register_callback(NULL, &o2net_hb_up);
|
||||
if (ret == 0)
|
||||
ret = o2hb_register_callback(&o2net_hb_down);
|
||||
ret = o2hb_register_callback(NULL, &o2net_hb_down);
|
||||
|
||||
if (ret)
|
||||
o2net_unregister_hb_callbacks();
|
||||
|
|
|
@ -368,7 +368,7 @@ int ocfs2_do_extend_dir(struct super_block *sb,
|
|||
u32 offset = OCFS2_I(dir)->ip_clusters;
|
||||
|
||||
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
|
||||
1, parent_fe_bh, handle,
|
||||
1, 0, parent_fe_bh, handle,
|
||||
data_ac, meta_ac, NULL);
|
||||
BUG_ON(status == -EAGAIN);
|
||||
if (status < 0) {
|
||||
|
|
|
@ -1128,8 +1128,8 @@ bail:
|
|||
|
||||
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
|
||||
{
|
||||
o2hb_unregister_callback(&dlm->dlm_hb_up);
|
||||
o2hb_unregister_callback(&dlm->dlm_hb_down);
|
||||
o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
|
||||
o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
|
||||
o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
|
||||
}
|
||||
|
||||
|
@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
|
|||
|
||||
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
|
||||
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
|
||||
status = o2hb_register_callback(&dlm->dlm_hb_down);
|
||||
status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
|
||||
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
|
||||
status = o2hb_register_callback(&dlm->dlm_hb_up);
|
||||
status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
|
||||
if (status)
|
||||
goto bail;
|
||||
|
||||
|
|
|
@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
|
|||
static void dlm_dump_mles(struct dlm_ctxt *dlm)
|
||||
{
|
||||
struct dlm_master_list_entry *mle;
|
||||
struct list_head *iter;
|
||||
|
||||
mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
|
||||
spin_lock(&dlm->master_lock);
|
||||
list_for_each(iter, &dlm->master_list) {
|
||||
mle = list_entry(iter, struct dlm_master_list_entry, list);
|
||||
list_for_each_entry(mle, &dlm->master_list, list)
|
||||
dlm_print_one_mle(mle);
|
||||
}
|
||||
spin_unlock(&dlm->master_lock);
|
||||
}
|
||||
|
||||
int dlm_dump_all_mles(const char __user *data, unsigned int len)
|
||||
{
|
||||
struct list_head *iter;
|
||||
struct dlm_ctxt *dlm;
|
||||
|
||||
spin_lock(&dlm_domain_lock);
|
||||
list_for_each(iter, &dlm_domains) {
|
||||
dlm = list_entry (iter, struct dlm_ctxt, list);
|
||||
list_for_each_entry(dlm, &dlm_domains, list) {
|
||||
mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
|
||||
dlm_dump_mles(dlm);
|
||||
}
|
||||
|
@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
|
|||
char *name, unsigned int namelen)
|
||||
{
|
||||
struct dlm_master_list_entry *tmpmle;
|
||||
struct list_head *iter;
|
||||
|
||||
assert_spin_locked(&dlm->master_lock);
|
||||
|
||||
list_for_each(iter, &dlm->master_list) {
|
||||
tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
|
||||
list_for_each_entry(tmpmle, &dlm->master_list, list) {
|
||||
if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
|
||||
continue;
|
||||
dlm_get_mle(tmpmle);
|
||||
|
@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
|
|||
void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
|
||||
{
|
||||
struct dlm_master_list_entry *mle;
|
||||
struct list_head *iter;
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
|
||||
list_for_each(iter, &dlm->mle_hb_events) {
|
||||
mle = list_entry(iter, struct dlm_master_list_entry,
|
||||
hb_events);
|
||||
list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
|
||||
if (node_up)
|
||||
dlm_mle_node_up(dlm, mle, NULL, idx);
|
||||
else
|
||||
|
@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
|
|||
int ret;
|
||||
int i;
|
||||
int count = 0;
|
||||
struct list_head *queue, *iter;
|
||||
struct list_head *queue;
|
||||
struct dlm_lock *lock;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
|
|||
ret = 0;
|
||||
queue = &res->granted;
|
||||
for (i = 0; i < 3; i++) {
|
||||
list_for_each(iter, queue) {
|
||||
lock = list_entry(iter, struct dlm_lock, list);
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
++count;
|
||||
if (lock->ml.node == dlm->node_num) {
|
||||
mlog(0, "found a lock owned by this node still "
|
||||
|
@ -2923,18 +2912,16 @@ again:
|
|||
static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res)
|
||||
{
|
||||
struct list_head *iter, *iter2;
|
||||
struct list_head *queue = &res->granted;
|
||||
int i, bit;
|
||||
struct dlm_lock *lock;
|
||||
struct dlm_lock *lock, *next;
|
||||
|
||||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
BUG_ON(res->owner == dlm->node_num);
|
||||
|
||||
for (i=0; i<3; i++) {
|
||||
list_for_each_safe(iter, iter2, queue) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry_safe(lock, next, queue, list) {
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
mlog(0, "putting lock for node %u\n",
|
||||
lock->ml.node);
|
||||
|
@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
|
|||
{
|
||||
int i;
|
||||
struct list_head *queue = &res->granted;
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *lock;
|
||||
int nodenum;
|
||||
|
||||
|
@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
|
|||
|
||||
spin_lock(&res->spinlock);
|
||||
for (i=0; i<3; i++) {
|
||||
list_for_each(iter, queue) {
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
/* up to the caller to make sure this node
|
||||
* is alive */
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
if (lock->ml.node != dlm->node_num) {
|
||||
spin_unlock(&res->spinlock);
|
||||
return lock->ml.node;
|
||||
|
@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
|
|||
|
||||
void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
{
|
||||
struct list_head *iter, *iter2;
|
||||
struct dlm_master_list_entry *mle;
|
||||
struct dlm_master_list_entry *mle, *next;
|
||||
struct dlm_lock_resource *res;
|
||||
unsigned int hash;
|
||||
|
||||
|
@ -3245,9 +3229,7 @@ top:
|
|||
|
||||
/* clean the master list */
|
||||
spin_lock(&dlm->master_lock);
|
||||
list_for_each_safe(iter, iter2, &dlm->master_list) {
|
||||
mle = list_entry(iter, struct dlm_master_list_entry, list);
|
||||
|
||||
list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
|
||||
BUG_ON(mle->type != DLM_MLE_BLOCK &&
|
||||
mle->type != DLM_MLE_MASTER &&
|
||||
mle->type != DLM_MLE_MIGRATION);
|
||||
|
|
|
@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work)
|
|||
struct dlm_ctxt *dlm =
|
||||
container_of(work, struct dlm_ctxt, dispatched_work);
|
||||
LIST_HEAD(tmp_list);
|
||||
struct list_head *iter, *iter2;
|
||||
struct dlm_work_item *item;
|
||||
struct dlm_work_item *item, *next;
|
||||
dlm_workfunc_t *workfunc;
|
||||
int tot=0;
|
||||
|
||||
|
@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work)
|
|||
list_splice_init(&dlm->work_list, &tmp_list);
|
||||
spin_unlock(&dlm->work_lock);
|
||||
|
||||
list_for_each_safe(iter, iter2, &tmp_list) {
|
||||
list_for_each_entry(item, &tmp_list, list) {
|
||||
tot++;
|
||||
}
|
||||
mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
|
||||
|
||||
list_for_each_safe(iter, iter2, &tmp_list) {
|
||||
item = list_entry(iter, struct dlm_work_item, list);
|
||||
list_for_each_entry_safe(item, next, &tmp_list, list) {
|
||||
workfunc = item->func;
|
||||
list_del_init(&item->list);
|
||||
|
||||
|
@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
{
|
||||
int status = 0;
|
||||
struct dlm_reco_node_data *ndata;
|
||||
struct list_head *iter;
|
||||
int all_nodes_done;
|
||||
int destroy = 0;
|
||||
int pass = 0;
|
||||
|
@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
|
||||
/* safe to access the node data list without a lock, since this
|
||||
* process is the only one to change the list */
|
||||
list_for_each(iter, &dlm->reco.node_data) {
|
||||
ndata = list_entry (iter, struct dlm_reco_node_data, list);
|
||||
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
|
||||
BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
|
||||
ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
|
||||
|
||||
|
@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
* done, or if anyone died */
|
||||
all_nodes_done = 1;
|
||||
spin_lock(&dlm_reco_state_lock);
|
||||
list_for_each(iter, &dlm->reco.node_data) {
|
||||
ndata = list_entry (iter, struct dlm_reco_node_data, list);
|
||||
|
||||
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
|
||||
mlog(0, "checking recovery state of node %u\n",
|
||||
ndata->node_num);
|
||||
switch (ndata->state) {
|
||||
|
@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
|
|||
|
||||
static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
|
||||
{
|
||||
struct list_head *iter, *iter2;
|
||||
struct dlm_reco_node_data *ndata;
|
||||
struct dlm_reco_node_data *ndata, *next;
|
||||
LIST_HEAD(tmplist);
|
||||
|
||||
spin_lock(&dlm_reco_state_lock);
|
||||
list_splice_init(&dlm->reco.node_data, &tmplist);
|
||||
spin_unlock(&dlm_reco_state_lock);
|
||||
|
||||
list_for_each_safe(iter, iter2, &tmplist) {
|
||||
ndata = list_entry (iter, struct dlm_reco_node_data, list);
|
||||
list_for_each_entry_safe(ndata, next, &tmplist, list) {
|
||||
list_del_init(&ndata->list);
|
||||
kfree(ndata);
|
||||
}
|
||||
|
@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
|
|||
struct dlm_lock_resource *res;
|
||||
struct dlm_ctxt *dlm;
|
||||
LIST_HEAD(resources);
|
||||
struct list_head *iter;
|
||||
int ret;
|
||||
u8 dead_node, reco_master;
|
||||
int skip_all_done = 0;
|
||||
|
@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
|
|||
|
||||
/* any errors returned will be due to the new_master dying,
|
||||
* the dlm_reco_thread should detect this */
|
||||
list_for_each(iter, &resources) {
|
||||
res = list_entry (iter, struct dlm_lock_resource, recovering);
|
||||
list_for_each_entry(res, &resources, recovering) {
|
||||
ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
|
||||
DLM_MRES_RECOVERY);
|
||||
if (ret < 0) {
|
||||
|
@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
{
|
||||
struct dlm_ctxt *dlm = data;
|
||||
struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
|
||||
struct list_head *iter;
|
||||
struct dlm_reco_node_data *ndata = NULL;
|
||||
int ret = -EINVAL;
|
||||
|
||||
|
@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
dlm->reco.dead_node, done->node_idx, dlm->node_num);
|
||||
|
||||
spin_lock(&dlm_reco_state_lock);
|
||||
list_for_each(iter, &dlm->reco.node_data) {
|
||||
ndata = list_entry (iter, struct dlm_reco_node_data, list);
|
||||
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
|
||||
if (ndata->node_num != done->node_idx)
|
||||
continue;
|
||||
|
||||
|
@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
|
|||
struct list_head *list,
|
||||
u8 dead_node)
|
||||
{
|
||||
struct dlm_lock_resource *res;
|
||||
struct list_head *iter, *iter2;
|
||||
struct dlm_lock_resource *res, *next;
|
||||
struct dlm_lock *lock;
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
list_for_each_safe(iter, iter2, &dlm->reco.resources) {
|
||||
res = list_entry (iter, struct dlm_lock_resource, recovering);
|
||||
list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
|
||||
/* always prune any $RECOVERY entries for dead nodes,
|
||||
* otherwise hangs can occur during later recovery */
|
||||
if (dlm_is_recovery_lock(res->lockname.name,
|
||||
|
@ -1169,7 +1155,7 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
|
|||
u8 flags, u8 master)
|
||||
{
|
||||
/* mres here is one full page */
|
||||
memset(mres, 0, PAGE_SIZE);
|
||||
clear_page(mres);
|
||||
mres->lockname_len = namelen;
|
||||
memcpy(mres->lockname, lockname, namelen);
|
||||
mres->num_locks = 0;
|
||||
|
@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
|||
struct dlm_migratable_lockres *mres,
|
||||
u8 send_to, u8 flags)
|
||||
{
|
||||
struct list_head *queue, *iter;
|
||||
struct list_head *queue;
|
||||
int total_locks, i;
|
||||
u64 mig_cookie = 0;
|
||||
struct dlm_lock *lock;
|
||||
|
@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
|||
total_locks = 0;
|
||||
for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
|
||||
queue = dlm_list_idx_to_ptr(res, i);
|
||||
list_for_each(iter, queue) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
/* add another lock. */
|
||||
total_locks++;
|
||||
if (!dlm_add_lock_to_array(lock, mres, i))
|
||||
|
@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
|||
struct dlm_lockstatus *lksb = NULL;
|
||||
int ret = 0;
|
||||
int i, j, bad;
|
||||
struct list_head *iter;
|
||||
struct dlm_lock *lock = NULL;
|
||||
u8 from = O2NM_MAX_NODES;
|
||||
unsigned int added = 0;
|
||||
|
@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
|
|||
spin_lock(&res->spinlock);
|
||||
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
|
||||
tmpq = dlm_list_idx_to_ptr(res, j);
|
||||
list_for_each(iter, tmpq) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry(lock, tmpq, list) {
|
||||
if (lock->ml.cookie != ml->cookie)
|
||||
lock = NULL;
|
||||
else
|
||||
|
@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
|
|||
struct dlm_lock_resource *res)
|
||||
{
|
||||
int i;
|
||||
struct list_head *queue, *iter, *iter2;
|
||||
struct dlm_lock *lock;
|
||||
struct list_head *queue;
|
||||
struct dlm_lock *lock, *next;
|
||||
|
||||
res->state |= DLM_LOCK_RES_RECOVERING;
|
||||
if (!list_empty(&res->recovering)) {
|
||||
|
@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
|
|||
/* find any pending locks and put them back on proper list */
|
||||
for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
|
||||
queue = dlm_list_idx_to_ptr(res, i);
|
||||
list_for_each_safe(iter, iter2, queue) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry_safe(lock, next, queue, list) {
|
||||
dlm_lock_get(lock);
|
||||
if (lock->convert_pending) {
|
||||
/* move converting lock back to granted */
|
||||
|
@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
|
|||
u8 dead_node, u8 new_master)
|
||||
{
|
||||
int i;
|
||||
struct list_head *iter, *iter2;
|
||||
struct hlist_node *hash_iter;
|
||||
struct hlist_head *bucket;
|
||||
|
||||
struct dlm_lock_resource *res;
|
||||
struct dlm_lock_resource *res, *next;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
assert_spin_locked(&dlm->spinlock);
|
||||
|
||||
list_for_each_safe(iter, iter2, &dlm->reco.resources) {
|
||||
res = list_entry (iter, struct dlm_lock_resource, recovering);
|
||||
list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
|
||||
if (res->owner == dead_node) {
|
||||
list_del_init(&res->recovering);
|
||||
spin_lock(&res->spinlock);
|
||||
|
@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
|
|||
static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res, u8 dead_node)
|
||||
{
|
||||
struct list_head *iter, *queue;
|
||||
struct list_head *queue;
|
||||
struct dlm_lock *lock;
|
||||
int blank_lvb = 0, local = 0;
|
||||
int i;
|
||||
|
@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
|
|||
|
||||
for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
|
||||
queue = dlm_list_idx_to_ptr(res, i);
|
||||
list_for_each(iter, queue) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry(lock, queue, list) {
|
||||
if (lock->ml.node == search_node) {
|
||||
if (dlm_lvb_needs_invalidation(lock, local)) {
|
||||
/* zero the lksb lvb and lockres lvb */
|
||||
|
@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
|
|||
static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res, u8 dead_node)
|
||||
{
|
||||
struct list_head *iter, *tmpiter;
|
||||
struct dlm_lock *lock;
|
||||
struct dlm_lock *lock, *next;
|
||||
unsigned int freed = 0;
|
||||
|
||||
/* this node is the lockres master:
|
||||
|
@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
|
|||
assert_spin_locked(&res->spinlock);
|
||||
|
||||
/* TODO: check pending_asts, pending_basts here */
|
||||
list_for_each_safe(iter, tmpiter, &res->granted) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry_safe(lock, next, &res->granted, list) {
|
||||
if (lock->ml.node == dead_node) {
|
||||
list_del_init(&lock->list);
|
||||
dlm_lock_put(lock);
|
||||
freed++;
|
||||
}
|
||||
}
|
||||
list_for_each_safe(iter, tmpiter, &res->converting) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry_safe(lock, next, &res->converting, list) {
|
||||
if (lock->ml.node == dead_node) {
|
||||
list_del_init(&lock->list);
|
||||
dlm_lock_put(lock);
|
||||
freed++;
|
||||
}
|
||||
}
|
||||
list_for_each_safe(iter, tmpiter, &res->blocked) {
|
||||
lock = list_entry (iter, struct dlm_lock, list);
|
||||
list_for_each_entry_safe(lock, next, &res->blocked, list) {
|
||||
if (lock->ml.node == dead_node) {
|
||||
list_del_init(&lock->list);
|
||||
dlm_lock_put(lock);
|
||||
|
|
|
@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level)
|
|||
static void lockres_set_flags(struct ocfs2_lock_res *lockres,
|
||||
unsigned long newflags)
|
||||
{
|
||||
struct list_head *pos, *tmp;
|
||||
struct ocfs2_mask_waiter *mw;
|
||||
struct ocfs2_mask_waiter *mw, *tmp;
|
||||
|
||||
assert_spin_locked(&lockres->l_lock);
|
||||
|
||||
lockres->l_flags = newflags;
|
||||
|
||||
list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
|
||||
mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
|
||||
list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
|
||||
if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -32,6 +32,11 @@ static inline void le32_add_cpu(__le32 *var, u32 val)
|
|||
*var = cpu_to_le32(le32_to_cpu(*var) + val);
|
||||
}
|
||||
|
||||
static inline void le64_add_cpu(__le64 *var, u64 val)
|
||||
{
|
||||
*var = cpu_to_le64(le64_to_cpu(*var) + val);
|
||||
}
|
||||
|
||||
static inline void le32_and_cpu(__le32 *var, u32 val)
|
||||
{
|
||||
*var = cpu_to_le32(le32_to_cpu(*var) & val);
|
||||
|
|
|
@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
|
|||
*/
|
||||
void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
|
||||
{
|
||||
struct list_head *p, *n;
|
||||
struct ocfs2_extent_map_item *emi;
|
||||
struct ocfs2_extent_map_item *emi, *n;
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
struct ocfs2_extent_map *em = &oi->ip_extent_map;
|
||||
LIST_HEAD(tmp_list);
|
||||
unsigned int range;
|
||||
|
||||
spin_lock(&oi->ip_lock);
|
||||
list_for_each_safe(p, n, &em->em_list) {
|
||||
emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
|
||||
|
||||
list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
|
||||
if (emi->ei_cpos >= cpos) {
|
||||
/* Full truncate of this record. */
|
||||
list_move(&emi->ei_list, &tmp_list);
|
||||
|
@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
|
|||
}
|
||||
spin_unlock(&oi->ip_lock);
|
||||
|
||||
list_for_each_safe(p, n, &tmp_list) {
|
||||
emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
|
||||
list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
|
||||
list_del(&emi->ei_list);
|
||||
kfree(emi);
|
||||
}
|
||||
|
@ -377,37 +373,6 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the index of the extent record which contains cluster #v_cluster.
|
||||
* -1 is returned if it was not found.
|
||||
*
|
||||
* Should work fine on interior and exterior nodes.
|
||||
*/
|
||||
static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
|
||||
u32 v_cluster)
|
||||
{
|
||||
int ret = -1;
|
||||
int i;
|
||||
struct ocfs2_extent_rec *rec;
|
||||
u32 rec_end, rec_start, clusters;
|
||||
|
||||
for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
|
||||
rec = &el->l_recs[i];
|
||||
|
||||
rec_start = le32_to_cpu(rec->e_cpos);
|
||||
clusters = ocfs2_rec_clusters(el, rec);
|
||||
|
||||
rec_end = rec_start + clusters;
|
||||
|
||||
if (v_cluster >= rec_start && v_cluster < rec_end) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
|
||||
u32 *p_cluster, u32 *num_clusters,
|
||||
unsigned int *extent_flags)
|
||||
|
|
702
fs/ocfs2/file.c
702
fs/ocfs2/file.c
|
@ -263,6 +263,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
|
|||
int status;
|
||||
handle_t *handle;
|
||||
struct ocfs2_dinode *di;
|
||||
u64 cluster_bytes;
|
||||
|
||||
mlog_entry_void();
|
||||
|
||||
|
@ -286,7 +287,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
|
|||
/*
|
||||
* Do this before setting i_size.
|
||||
*/
|
||||
status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size);
|
||||
cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
|
||||
status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
|
||||
cluster_bytes);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
goto out_commit;
|
||||
|
@ -326,9 +329,6 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)new_i_size);
|
||||
|
||||
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
|
||||
truncate_inode_pages(inode->i_mapping, new_i_size);
|
||||
|
||||
fe = (struct ocfs2_dinode *) di_bh->b_data;
|
||||
if (!OCFS2_IS_VALID_DINODE(fe)) {
|
||||
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
|
||||
|
@ -363,16 +363,23 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
if (new_i_size == le64_to_cpu(fe->i_size))
|
||||
goto bail;
|
||||
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
/* This forces other nodes to sync and drop their pages. Do
|
||||
* this even if we have a truncate without allocation change -
|
||||
* ocfs2 cluster sizes can be much greater than page size, so
|
||||
* we have to truncate them anyway. */
|
||||
status = ocfs2_data_lock(inode, 1);
|
||||
if (status < 0) {
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
|
||||
truncate_inode_pages(inode->i_mapping, new_i_size);
|
||||
|
||||
/* alright, we're going to need to do a full blown alloc size
|
||||
* change. Orphan the inode so that recovery can complete the
|
||||
* truncate if necessary. This does the task of marking
|
||||
|
@ -399,6 +406,8 @@ static int ocfs2_truncate_file(struct inode *inode,
|
|||
bail_unlock_data:
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
bail:
|
||||
|
||||
mlog_exit(status);
|
||||
|
@ -419,6 +428,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
struct inode *inode,
|
||||
u32 *logical_offset,
|
||||
u32 clusters_to_add,
|
||||
int mark_unwritten,
|
||||
struct buffer_head *fe_bh,
|
||||
handle_t *handle,
|
||||
struct ocfs2_alloc_context *data_ac,
|
||||
|
@ -431,9 +441,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
enum ocfs2_alloc_restarted reason = RESTART_NONE;
|
||||
u32 bit_off, num_bits;
|
||||
u64 block;
|
||||
u8 flags = 0;
|
||||
|
||||
BUG_ON(!clusters_to_add);
|
||||
|
||||
if (mark_unwritten)
|
||||
flags = OCFS2_EXT_UNWRITTEN;
|
||||
|
||||
free_extents = ocfs2_num_free_extents(osb, inode, fe);
|
||||
if (free_extents < 0) {
|
||||
status = free_extents;
|
||||
|
@ -483,7 +497,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
|
||||
*logical_offset, block, num_bits,
|
||||
meta_ac);
|
||||
flags, meta_ac);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
|
@ -516,25 +530,31 @@ leave:
|
|||
* For a given allocation, determine which allocators will need to be
|
||||
* accessed, and lock them, reserving the appropriate number of bits.
|
||||
*
|
||||
* Called from ocfs2_extend_allocation() for file systems which don't
|
||||
* support holes, and from ocfs2_write() for file systems which
|
||||
* understand sparse inodes.
|
||||
* Sparse file systems call this from ocfs2_write_begin_nolock()
|
||||
* and ocfs2_allocate_unwritten_extents().
|
||||
*
|
||||
* File systems which don't support holes call this from
|
||||
* ocfs2_extend_allocation().
|
||||
*/
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
||||
u32 clusters_to_add,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac)
|
||||
{
|
||||
int ret, num_free_extents;
|
||||
int ret = 0, num_free_extents;
|
||||
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
*meta_ac = NULL;
|
||||
*data_ac = NULL;
|
||||
if (data_ac)
|
||||
*data_ac = NULL;
|
||||
|
||||
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
|
||||
|
||||
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
|
||||
"clusters_to_add = %u\n",
|
||||
"clusters_to_add = %u, extents_to_split = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
|
||||
le32_to_cpu(di->i_clusters), clusters_to_add);
|
||||
le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
|
||||
|
||||
num_free_extents = ocfs2_num_free_extents(osb, inode, di);
|
||||
if (num_free_extents < 0) {
|
||||
|
@ -552,9 +572,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
|||
*
|
||||
* Most of the time we'll only be seeing this 1 cluster at a time
|
||||
* anyway.
|
||||
*
|
||||
* Always lock for any unwritten extents - we might want to
|
||||
* add blocks during a split.
|
||||
*/
|
||||
if (!num_free_extents ||
|
||||
(ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
|
||||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
|
||||
ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
|
@ -563,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
|||
}
|
||||
}
|
||||
|
||||
if (clusters_to_add == 0)
|
||||
goto out;
|
||||
|
||||
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
|
@ -585,14 +611,13 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_extend_allocation(struct inode *inode,
|
||||
u32 clusters_to_add)
|
||||
static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
u32 clusters_to_add, int mark_unwritten)
|
||||
{
|
||||
int status = 0;
|
||||
int restart_func = 0;
|
||||
int drop_alloc_sem = 0;
|
||||
int credits;
|
||||
u32 prev_clusters, logical_start;
|
||||
u32 prev_clusters;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_dinode *fe = NULL;
|
||||
handle_t *handle = NULL;
|
||||
|
@ -607,7 +632,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
* This function only exists for file systems which don't
|
||||
* support holes.
|
||||
*/
|
||||
BUG_ON(ocfs2_sparse_alloc(osb));
|
||||
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
|
||||
|
||||
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
|
||||
OCFS2_BH_CACHED, inode);
|
||||
|
@ -623,19 +648,10 @@ static int ocfs2_extend_allocation(struct inode *inode,
|
|||
goto leave;
|
||||
}
|
||||
|
||||
logical_start = OCFS2_I(inode)->ip_clusters;
|
||||
|
||||
restart_all:
|
||||
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
|
||||
|
||||
/* blocks peope in read/write from reading our allocation
|
||||
* until we're done changing it. We depend on i_mutex to block
|
||||
* other extend/truncate calls while we're here. Ordering wrt
|
||||
* start_trans is important here -- always do it before! */
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
drop_alloc_sem = 1;
|
||||
|
||||
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
|
||||
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
|
||||
&meta_ac);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
|
@ -668,6 +684,7 @@ restarted_transaction:
|
|||
inode,
|
||||
&logical_start,
|
||||
clusters_to_add,
|
||||
mark_unwritten,
|
||||
bh,
|
||||
handle,
|
||||
data_ac,
|
||||
|
@ -720,10 +737,6 @@ restarted_transaction:
|
|||
OCFS2_I(inode)->ip_clusters, i_size_read(inode));
|
||||
|
||||
leave:
|
||||
if (drop_alloc_sem) {
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
drop_alloc_sem = 0;
|
||||
}
|
||||
if (handle) {
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
handle = NULL;
|
||||
|
@ -749,6 +762,25 @@ leave:
|
|||
return status;
|
||||
}
|
||||
|
||||
static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
u32 clusters_to_add, int mark_unwritten)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* The alloc sem blocks peope in read/write from reading our
|
||||
* allocation until we're done changing it. We depend on
|
||||
* i_mutex to block other extend/truncate calls while we're
|
||||
* here.
|
||||
*/
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
|
||||
mark_unwritten);
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Some parts of this taken from generic_cont_expand, which turned out
|
||||
* to be too fragile to do exactly what we need without us having to
|
||||
* worry about recursive locking in ->prepare_write() and
|
||||
|
@ -890,7 +922,9 @@ static int ocfs2_extend_file(struct inode *inode,
|
|||
}
|
||||
|
||||
if (clusters_to_add) {
|
||||
ret = ocfs2_extend_allocation(inode, clusters_to_add);
|
||||
ret = ocfs2_extend_allocation(inode,
|
||||
OCFS2_I(inode)->ip_clusters,
|
||||
clusters_to_add, 0);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_unlock;
|
||||
|
@ -995,6 +1029,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
goto bail_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* This will intentionally not wind up calling vmtruncate(),
|
||||
* since all the work for a size change has been done above.
|
||||
* Otherwise, we could get into problems with truncate as
|
||||
* ip_alloc_sem is used there to protect against i_size
|
||||
* changes.
|
||||
*/
|
||||
status = inode_setattr(inode, attr);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
|
@ -1070,17 +1111,16 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_write_remove_suid(struct inode *inode)
|
||||
static int __ocfs2_write_remove_suid(struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int ret;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
handle_t *handle;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct ocfs2_dinode *di;
|
||||
|
||||
mlog_entry("(Inode %llu, mode 0%o)\n",
|
||||
(unsigned long long)oi->ip_blkno, inode->i_mode);
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (handle == NULL) {
|
||||
|
@ -1089,17 +1129,11 @@ static int ocfs2_write_remove_suid(struct inode *inode)
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_bh;
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
inode->i_mode &= ~S_ISUID;
|
||||
|
@ -1112,8 +1146,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
|
|||
ret = ocfs2_journal_dirty(handle, bh);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
out_bh:
|
||||
brelse(bh);
|
||||
|
||||
out_trans:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
out:
|
||||
|
@ -1159,6 +1192,460 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_write_remove_suid(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
|
||||
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
|
||||
oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __ocfs2_write_remove_suid(inode, bh);
|
||||
out:
|
||||
brelse(bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate enough extents to cover the region starting at byte offset
|
||||
* start for len bytes. Existing extents are skipped, any extents
|
||||
* added are marked as "unwritten".
|
||||
*/
|
||||
static int ocfs2_allocate_unwritten_extents(struct inode *inode,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
int ret;
|
||||
u32 cpos, phys_cpos, clusters, alloc_size;
|
||||
|
||||
/*
|
||||
* We consider both start and len to be inclusive.
|
||||
*/
|
||||
cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
|
||||
clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
|
||||
clusters -= cpos;
|
||||
|
||||
while (clusters) {
|
||||
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
|
||||
&alloc_size, NULL);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hole or existing extent len can be arbitrary, so
|
||||
* cap it to our own allocation request.
|
||||
*/
|
||||
if (alloc_size > clusters)
|
||||
alloc_size = clusters;
|
||||
|
||||
if (phys_cpos) {
|
||||
/*
|
||||
* We already have an allocation at this
|
||||
* region so we can safely skip it.
|
||||
*/
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
|
||||
if (ret) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
next:
|
||||
cpos += alloc_size;
|
||||
clusters -= alloc_size;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __ocfs2_remove_inode_range(struct inode *inode,
|
||||
struct buffer_head *di_bh,
|
||||
u32 cpos, u32 phys_cpos, u32 len,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc)
|
||||
{
|
||||
int ret;
|
||||
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct inode *tl_inode = osb->osb_tl_inode;
|
||||
handle_t *handle;
|
||||
struct ocfs2_alloc_context *meta_ac = NULL;
|
||||
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
|
||||
|
||||
ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
mutex_lock(&tl_inode->i_mutex);
|
||||
|
||||
if (ocfs2_truncate_log_needs_flush(osb)) {
|
||||
ret = __ocfs2_flush_truncate_log(osb);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
|
||||
if (handle == NULL) {
|
||||
ret = -ENOMEM;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_journal_access(handle, inode, di_bh,
|
||||
OCFS2_JOURNAL_ACCESS_WRITE);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
|
||||
dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
OCFS2_I(inode)->ip_clusters -= len;
|
||||
di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
|
||||
|
||||
ret = ocfs2_journal_dirty(handle, di_bh);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_commit;
|
||||
}
|
||||
|
||||
ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
out_commit:
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
out:
|
||||
mutex_unlock(&tl_inode->i_mutex);
|
||||
|
||||
if (meta_ac)
|
||||
ocfs2_free_alloc_context(meta_ac);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Truncate a byte range, avoiding pages within partial clusters. This
|
||||
* preserves those pages for the zeroing code to write to.
|
||||
*/
|
||||
static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
|
||||
u64 byte_len)
|
||||
{
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
loff_t start, end;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
|
||||
end = byte_start + byte_len;
|
||||
end = end & ~(osb->s_clustersize - 1);
|
||||
|
||||
if (start < end) {
|
||||
unmap_mapping_range(mapping, start, end - start, 0);
|
||||
truncate_inode_pages_range(mapping, start, end - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int ocfs2_zero_partial_clusters(struct inode *inode,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
int ret = 0;
|
||||
u64 tmpend, end = start + len;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
unsigned int csize = osb->s_clustersize;
|
||||
handle_t *handle;
|
||||
|
||||
/*
|
||||
* The "start" and "end" values are NOT necessarily part of
|
||||
* the range whose allocation is being deleted. Rather, this
|
||||
* is what the user passed in with the request. We must zero
|
||||
* partial clusters here. There's no need to worry about
|
||||
* physical allocation - the zeroing code knows to skip holes.
|
||||
*/
|
||||
mlog(0, "byte start: %llu, end: %llu\n",
|
||||
(unsigned long long)start, (unsigned long long)end);
|
||||
|
||||
/*
|
||||
* If both edges are on a cluster boundary then there's no
|
||||
* zeroing required as the region is part of the allocation to
|
||||
* be truncated.
|
||||
*/
|
||||
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
|
||||
goto out;
|
||||
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (handle == NULL) {
|
||||
ret = -ENOMEM;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to get the byte offset of the end of the 1st cluster.
|
||||
*/
|
||||
tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
|
||||
if (tmpend > end)
|
||||
tmpend = end;
|
||||
|
||||
mlog(0, "1st range: start: %llu, tmpend: %llu\n",
|
||||
(unsigned long long)start, (unsigned long long)tmpend);
|
||||
|
||||
ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
if (tmpend < end) {
|
||||
/*
|
||||
* This may make start and end equal, but the zeroing
|
||||
* code will skip any work in that case so there's no
|
||||
* need to catch it up here.
|
||||
*/
|
||||
start = end & ~(osb->s_clustersize - 1);
|
||||
|
||||
mlog(0, "2nd range: start: %llu, end: %llu\n",
|
||||
(unsigned long long)start, (unsigned long long)end);
|
||||
|
||||
ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
}
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_remove_inode_range(struct inode *inode,
|
||||
struct buffer_head *di_bh, u64 byte_start,
|
||||
u64 byte_len)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct ocfs2_cached_dealloc_ctxt dealloc;
|
||||
|
||||
ocfs2_init_dealloc_ctxt(&dealloc);
|
||||
|
||||
if (byte_len == 0)
|
||||
return 0;
|
||||
|
||||
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
|
||||
trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
|
||||
if (trunc_len >= trunc_start)
|
||||
trunc_len -= trunc_start;
|
||||
else
|
||||
trunc_len = 0;
|
||||
|
||||
mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(unsigned long long)byte_start,
|
||||
(unsigned long long)byte_len, trunc_start, trunc_len);
|
||||
|
||||
ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
cpos = trunc_start;
|
||||
while (trunc_len) {
|
||||
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
|
||||
&alloc_size, NULL);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (alloc_size > trunc_len)
|
||||
alloc_size = trunc_len;
|
||||
|
||||
/* Only do work for non-holes */
|
||||
if (phys_cpos != 0) {
|
||||
ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
|
||||
phys_cpos, alloc_size,
|
||||
&dealloc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
cpos += alloc_size;
|
||||
trunc_len -= alloc_size;
|
||||
}
|
||||
|
||||
ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
|
||||
|
||||
out:
|
||||
ocfs2_schedule_truncate_log_flush(osb, 1);
|
||||
ocfs2_run_deallocs(osb, &dealloc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parts of this function taken from xfs_change_file_space()
|
||||
*/
|
||||
int ocfs2_change_file_space(struct file *file, unsigned int cmd,
|
||||
struct ocfs2_space_resv *sr)
|
||||
{
|
||||
int ret;
|
||||
s64 llen;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct buffer_head *di_bh = NULL;
|
||||
handle_t *handle;
|
||||
unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
|
||||
|
||||
if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
|
||||
!ocfs2_writes_unwritten_extents(osb))
|
||||
return -ENOTTY;
|
||||
else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&
|
||||
!ocfs2_sparse_alloc(osb))
|
||||
return -ENOTTY;
|
||||
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return -EINVAL;
|
||||
|
||||
if (!(file->f_mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
|
||||
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
|
||||
return -EROFS;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
||||
/*
|
||||
* This prevents concurrent writes on other nodes
|
||||
*/
|
||||
ret = ocfs2_rw_lock(inode, 1);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_meta_lock(inode, &di_bh, 1);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_rw_unlock;
|
||||
}
|
||||
|
||||
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
|
||||
ret = -EPERM;
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
|
||||
switch (sr->l_whence) {
|
||||
case 0: /*SEEK_SET*/
|
||||
break;
|
||||
case 1: /*SEEK_CUR*/
|
||||
sr->l_start += file->f_pos;
|
||||
break;
|
||||
case 2: /*SEEK_END*/
|
||||
sr->l_start += i_size_read(inode);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
sr->l_whence = 0;
|
||||
|
||||
llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
|
||||
|
||||
if (sr->l_start < 0
|
||||
|| sr->l_start > max_off
|
||||
|| (sr->l_start + llen) < 0
|
||||
|| (sr->l_start + llen) > max_off) {
|
||||
ret = -EINVAL;
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
|
||||
if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
|
||||
if (sr->l_len <= 0) {
|
||||
ret = -EINVAL;
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (should_remove_suid(file->f_path.dentry)) {
|
||||
ret = __ocfs2_write_remove_suid(inode, di_bh);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
switch (cmd) {
|
||||
case OCFS2_IOC_RESVSP:
|
||||
case OCFS2_IOC_RESVSP64:
|
||||
/*
|
||||
* This takes unsigned offsets, but the signed ones we
|
||||
* pass have been checked against overflow above.
|
||||
*/
|
||||
ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
|
||||
sr->l_len);
|
||||
break;
|
||||
case OCFS2_IOC_UNRESVSP:
|
||||
case OCFS2_IOC_UNRESVSP64:
|
||||
ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
|
||||
sr->l_len);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* We update c/mtime for these changes
|
||||
*/
|
||||
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
mlog_errno(ret);
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
|
||||
ocfs2_commit_trans(osb, handle);
|
||||
|
||||
out_meta_unlock:
|
||||
brelse(di_bh);
|
||||
ocfs2_meta_unlock(inode, 1);
|
||||
out_rw_unlock:
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
|
||||
loff_t *ppos,
|
||||
size_t count,
|
||||
|
@ -1329,15 +1816,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
|
|||
*basep = base;
|
||||
}
|
||||
|
||||
static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
|
||||
static struct page * ocfs2_get_write_source(char **ret_src_buf,
|
||||
const struct iovec *cur_iov,
|
||||
size_t iov_offset)
|
||||
{
|
||||
int ret;
|
||||
char *buf;
|
||||
char *buf = cur_iov->iov_base + iov_offset;
|
||||
struct page *src_page = NULL;
|
||||
unsigned long off;
|
||||
|
||||
buf = cur_iov->iov_base + iov_offset;
|
||||
off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
|
||||
|
||||
if (!segment_eq(get_fs(), KERNEL_DS)) {
|
||||
/*
|
||||
|
@ -1349,18 +1837,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp
|
|||
(unsigned long)buf & PAGE_CACHE_MASK, 1,
|
||||
0, 0, &src_page, NULL);
|
||||
if (ret == 1)
|
||||
bp->b_src_buf = kmap(src_page);
|
||||
*ret_src_buf = kmap(src_page) + off;
|
||||
else
|
||||
src_page = ERR_PTR(-EFAULT);
|
||||
} else {
|
||||
bp->b_src_buf = buf;
|
||||
*ret_src_buf = buf;
|
||||
}
|
||||
|
||||
return src_page;
|
||||
}
|
||||
|
||||
static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp,
|
||||
struct page *page)
|
||||
static void ocfs2_put_write_source(struct page *page)
|
||||
{
|
||||
if (page) {
|
||||
kunmap(page);
|
||||
|
@ -1376,10 +1863,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
|
|||
{
|
||||
int ret = 0;
|
||||
ssize_t copied, total = 0;
|
||||
size_t iov_offset = 0;
|
||||
size_t iov_offset = 0, bytes;
|
||||
loff_t pos;
|
||||
const struct iovec *cur_iov = iov;
|
||||
struct ocfs2_buffered_write_priv bp;
|
||||
struct page *page;
|
||||
struct page *user_page, *page;
|
||||
char *buf, *dst;
|
||||
void *fsdata;
|
||||
|
||||
/*
|
||||
* handle partial DIO write. Adjust cur_iov if needed.
|
||||
|
@ -1387,21 +1876,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
|
|||
ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
|
||||
|
||||
do {
|
||||
bp.b_cur_off = iov_offset;
|
||||
bp.b_cur_iov = cur_iov;
|
||||
pos = *ppos;
|
||||
|
||||
page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
|
||||
if (IS_ERR(page)) {
|
||||
ret = PTR_ERR(page);
|
||||
user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
|
||||
if (IS_ERR(user_page)) {
|
||||
ret = PTR_ERR(user_page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied = ocfs2_buffered_write_cluster(file, *ppos, count,
|
||||
ocfs2_map_and_write_user_data,
|
||||
&bp);
|
||||
/* Stay within our page boundaries */
|
||||
bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
|
||||
(PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
|
||||
/* Stay within the vector boundary */
|
||||
bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
|
||||
/* Stay within count */
|
||||
bytes = min(bytes, count);
|
||||
|
||||
ocfs2_put_write_source(&bp, page);
|
||||
page = NULL;
|
||||
ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
|
||||
&page, &fsdata);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dst = kmap_atomic(page, KM_USER0);
|
||||
memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
|
||||
kunmap_atomic(dst, KM_USER0);
|
||||
flush_dcache_page(page);
|
||||
ocfs2_put_write_source(user_page);
|
||||
|
||||
copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
|
||||
bytes, page, fsdata);
|
||||
if (copied < 0) {
|
||||
mlog_errno(copied);
|
||||
ret = copied;
|
||||
|
@ -1409,7 +1915,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
|
|||
}
|
||||
|
||||
total += copied;
|
||||
*ppos = *ppos + copied;
|
||||
*ppos = pos + copied;
|
||||
count -= copied;
|
||||
|
||||
ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
|
||||
|
@ -1579,52 +2085,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
|
|||
struct pipe_buffer *buf,
|
||||
struct splice_desc *sd)
|
||||
{
|
||||
int ret, count, total = 0;
|
||||
int ret, count;
|
||||
ssize_t copied = 0;
|
||||
struct ocfs2_splice_write_priv sp;
|
||||
struct file *file = sd->u.file;
|
||||
unsigned int offset;
|
||||
struct page *page = NULL;
|
||||
void *fsdata;
|
||||
char *src, *dst;
|
||||
|
||||
ret = buf->ops->confirm(pipe, buf);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
sp.s_sd = sd;
|
||||
sp.s_buf = buf;
|
||||
sp.s_pipe = pipe;
|
||||
sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
|
||||
sp.s_buf_offset = buf->offset;
|
||||
|
||||
offset = sd->pos & ~PAGE_CACHE_MASK;
|
||||
count = sd->len;
|
||||
if (count + sp.s_offset > PAGE_CACHE_SIZE)
|
||||
count = PAGE_CACHE_SIZE - sp.s_offset;
|
||||
if (count + offset > PAGE_CACHE_SIZE)
|
||||
count = PAGE_CACHE_SIZE - offset;
|
||||
|
||||
do {
|
||||
/*
|
||||
* splice wants us to copy up to one page at a
|
||||
* time. For pagesize > cluster size, this means we
|
||||
* might enter ocfs2_buffered_write_cluster() more
|
||||
* than once, so keep track of our progress here.
|
||||
*/
|
||||
copied = ocfs2_buffered_write_cluster(sd->u.file,
|
||||
(loff_t)sd->pos + total,
|
||||
count,
|
||||
ocfs2_map_and_write_splice_data,
|
||||
&sp);
|
||||
if (copied < 0) {
|
||||
mlog_errno(copied);
|
||||
ret = copied;
|
||||
goto out;
|
||||
}
|
||||
ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
|
||||
&page, &fsdata);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
count -= copied;
|
||||
sp.s_offset += copied;
|
||||
sp.s_buf_offset += copied;
|
||||
total += copied;
|
||||
} while (count);
|
||||
src = buf->ops->map(pipe, buf, 1);
|
||||
dst = kmap_atomic(page, KM_USER1);
|
||||
memcpy(dst + offset, src + buf->offset, count);
|
||||
kunmap_atomic(page, KM_USER1);
|
||||
buf->ops->unmap(pipe, buf, src);
|
||||
|
||||
ret = 0;
|
||||
copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
|
||||
page, fsdata);
|
||||
if (copied < 0) {
|
||||
mlog_errno(copied);
|
||||
ret = copied;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
|
||||
return total ? total : ret;
|
||||
return copied ? copied : ret;
|
||||
}
|
||||
|
||||
static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
|
||||
|
|
|
@ -39,15 +39,16 @@ enum ocfs2_alloc_restarted {
|
|||
};
|
||||
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
u32 *cluster_start,
|
||||
u32 *logical_offset,
|
||||
u32 clusters_to_add,
|
||||
int mark_unwritten,
|
||||
struct buffer_head *fe_bh,
|
||||
handle_t *handle,
|
||||
struct ocfs2_alloc_context *data_ac,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
enum ocfs2_alloc_restarted *reason);
|
||||
enum ocfs2_alloc_restarted *reason_ret);
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
|
||||
u32 clusters_to_add,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac);
|
||||
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
|
@ -61,4 +62,7 @@ int ocfs2_should_update_atime(struct inode *inode,
|
|||
int ocfs2_update_inode_atime(struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
|
||||
int ocfs2_change_file_space(struct file *file, unsigned int cmd,
|
||||
struct ocfs2_space_resv *sr);
|
||||
|
||||
#endif /* OCFS2_FILE_H */
|
||||
|
|
|
@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
|
|||
if (ocfs2_mount_local(osb))
|
||||
return 0;
|
||||
|
||||
status = o2hb_register_callback(&osb->osb_hb_down);
|
||||
status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
status = o2hb_register_callback(&osb->osb_hb_up);
|
||||
status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
o2hb_unregister_callback(&osb->osb_hb_down);
|
||||
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
|
||||
}
|
||||
|
||||
bail:
|
||||
|
@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
|
|||
if (ocfs2_mount_local(osb))
|
||||
return;
|
||||
|
||||
o2hb_unregister_callback(&osb->osb_hb_down);
|
||||
o2hb_unregister_callback(&osb->osb_hb_up);
|
||||
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
|
||||
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
|
||||
}
|
||||
|
||||
void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "ocfs2.h"
|
||||
#include "alloc.h"
|
||||
#include "dlmglue.h"
|
||||
#include "file.h"
|
||||
#include "inode.h"
|
||||
#include "journal.h"
|
||||
|
||||
|
@ -115,6 +116,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
|
|||
{
|
||||
unsigned int flags;
|
||||
int status;
|
||||
struct ocfs2_space_resv sr;
|
||||
|
||||
switch (cmd) {
|
||||
case OCFS2_IOC_GETFLAGS:
|
||||
|
@ -130,6 +132,14 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
|
|||
|
||||
return ocfs2_set_inode_attr(inode, flags,
|
||||
OCFS2_FL_MODIFIABLE);
|
||||
case OCFS2_IOC_RESVSP:
|
||||
case OCFS2_IOC_RESVSP64:
|
||||
case OCFS2_IOC_UNRESVSP:
|
||||
case OCFS2_IOC_UNRESVSP64:
|
||||
if (copy_from_user(&sr, (int __user *) arg, sizeof(sr)))
|
||||
return -EFAULT;
|
||||
|
||||
return ocfs2_change_file_space(filp, cmd, &sr);
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
@ -148,6 +158,11 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
|||
case OCFS2_IOC32_SETFLAGS:
|
||||
cmd = OCFS2_IOC_SETFLAGS;
|
||||
break;
|
||||
case OCFS2_IOC_RESVSP:
|
||||
case OCFS2_IOC_RESVSP64:
|
||||
case OCFS2_IOC_UNRESVSP:
|
||||
case OCFS2_IOC_UNRESVSP64:
|
||||
break;
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
|
|
@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
|
|||
container_of(work, struct ocfs2_journal, j_recovery_work);
|
||||
struct ocfs2_super *osb = journal->j_osb;
|
||||
struct ocfs2_dinode *la_dinode, *tl_dinode;
|
||||
struct ocfs2_la_recovery_item *item;
|
||||
struct list_head *p, *n;
|
||||
struct ocfs2_la_recovery_item *item, *n;
|
||||
LIST_HEAD(tmp_la_list);
|
||||
|
||||
mlog_entry_void();
|
||||
|
@ -734,8 +733,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
|
|||
list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
|
||||
spin_unlock(&journal->j_lock);
|
||||
|
||||
list_for_each_safe(p, n, &tmp_la_list) {
|
||||
item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
|
||||
list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
|
||||
list_del_init(&item->lri_list);
|
||||
|
||||
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
|
||||
|
|
|
@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
|
|||
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
|
||||
+ OCFS2_TRUNCATE_LOG_UPDATE)
|
||||
|
||||
#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
|
||||
|
||||
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
|
||||
* bitmap block for the new bit) */
|
||||
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
|
||||
|
|
167
fs/ocfs2/mmap.c
167
fs/ocfs2/mmap.c
|
@ -37,11 +37,29 @@
|
|||
|
||||
#include "ocfs2.h"
|
||||
|
||||
#include "aops.h"
|
||||
#include "dlmglue.h"
|
||||
#include "file.h"
|
||||
#include "inode.h"
|
||||
#include "mmap.h"
|
||||
|
||||
static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
|
||||
{
|
||||
/* The best way to deal with signals in the vm path is
|
||||
* to block them upfront, rather than allowing the
|
||||
* locking paths to return -ERESTARTSYS. */
|
||||
sigfillset(blocked);
|
||||
|
||||
/* We should technically never get a bad return value
|
||||
* from sigprocmask */
|
||||
return sigprocmask(SIG_BLOCK, blocked, oldset);
|
||||
}
|
||||
|
||||
static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
|
||||
{
|
||||
return sigprocmask(SIG_SETMASK, oldset, NULL);
|
||||
}
|
||||
|
||||
static struct page *ocfs2_nopage(struct vm_area_struct * area,
|
||||
unsigned long address,
|
||||
int *type)
|
||||
|
@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
|
|||
mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
|
||||
type);
|
||||
|
||||
/* The best way to deal with signals in this path is
|
||||
* to block them upfront, rather than allowing the
|
||||
* locking paths to return -ERESTARTSYS. */
|
||||
sigfillset(&blocked);
|
||||
|
||||
/* We should technically never get a bad ret return
|
||||
* from sigprocmask */
|
||||
ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
|
||||
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
|
|||
|
||||
page = filemap_nopage(area, address, type);
|
||||
|
||||
ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
|
||||
ret = ocfs2_vm_op_unblock_sigs(&oldset);
|
||||
if (ret < 0)
|
||||
mlog_errno(ret);
|
||||
out:
|
||||
|
@ -76,27 +87,135 @@ out:
|
|||
return page;
|
||||
}
|
||||
|
||||
static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
|
||||
struct page *page)
|
||||
{
|
||||
int ret;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
loff_t pos = page->index << PAGE_CACHE_SHIFT;
|
||||
unsigned int len = PAGE_CACHE_SIZE;
|
||||
pgoff_t last_index;
|
||||
struct page *locked_page = NULL;
|
||||
void *fsdata;
|
||||
loff_t size = i_size_read(inode);
|
||||
|
||||
/*
|
||||
* Another node might have truncated while we were waiting on
|
||||
* cluster locks.
|
||||
*/
|
||||
last_index = size >> PAGE_CACHE_SHIFT;
|
||||
if (page->index > last_index) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The i_size check above doesn't catch the case where nodes
|
||||
* truncated and then re-extended the file. We'll re-check the
|
||||
* page mapping after taking the page lock inside of
|
||||
* ocfs2_write_begin_nolock().
|
||||
*/
|
||||
if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call ocfs2_write_begin() and ocfs2_write_end() to take
|
||||
* advantage of the allocation code there. We pass a write
|
||||
* length of the whole page (chopped to i_size) to make sure
|
||||
* the whole thing is allocated.
|
||||
*
|
||||
* Since we know the page is up to date, we don't have to
|
||||
* worry about ocfs2_write_begin() skipping some buffer reads
|
||||
* because the "write" would invalidate their data.
|
||||
*/
|
||||
if (page->index == last_index)
|
||||
len = size & ~PAGE_CACHE_MASK;
|
||||
|
||||
ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
|
||||
&fsdata, di_bh, page);
|
||||
if (ret) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
|
||||
fsdata);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
BUG_ON(ret != len);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
|
||||
{
|
||||
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
|
||||
struct buffer_head *di_bh = NULL;
|
||||
sigset_t blocked, oldset;
|
||||
int ret, ret2;
|
||||
|
||||
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The cluster locks taken will block a truncate from another
|
||||
* node. Taking the data lock will also ensure that we don't
|
||||
* attempt page truncation as part of a downconvert.
|
||||
*/
|
||||
ret = ocfs2_meta_lock(inode, &di_bh, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The alloc sem should be enough to serialize with
|
||||
* ocfs2_truncate_file() changing i_size as well as any thread
|
||||
* modifying the inode btree.
|
||||
*/
|
||||
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
ret = ocfs2_data_lock(inode, 1);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out_meta_unlock;
|
||||
}
|
||||
|
||||
ret = __ocfs2_page_mkwrite(inode, di_bh, page);
|
||||
|
||||
ocfs2_data_unlock(inode, 1);
|
||||
|
||||
out_meta_unlock:
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
|
||||
brelse(di_bh);
|
||||
ocfs2_meta_unlock(inode, 1);
|
||||
|
||||
out:
|
||||
ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
|
||||
if (ret2 < 0)
|
||||
mlog_errno(ret2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vm_operations_struct ocfs2_file_vm_ops = {
|
||||
.nopage = ocfs2_nopage,
|
||||
.nopage = ocfs2_nopage,
|
||||
.page_mkwrite = ocfs2_page_mkwrite,
|
||||
};
|
||||
|
||||
int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
int ret = 0, lock_level = 0;
|
||||
struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
|
||||
|
||||
/*
|
||||
* Only support shared writeable mmap for local mounts which
|
||||
* don't know about holes.
|
||||
*/
|
||||
if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
|
||||
((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
|
||||
((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
|
||||
mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
|
||||
/* This is -EINVAL because generic_file_readonly_mmap
|
||||
* returns it in a similar situation. */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
|
||||
file->f_vfsmnt, &lock_level);
|
||||
|
|
|
@ -1674,7 +1674,7 @@ static int ocfs2_symlink(struct inode *dir,
|
|||
u32 offset = 0;
|
||||
|
||||
inode->i_op = &ocfs2_symlink_inode_operations;
|
||||
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
|
||||
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
|
||||
new_fe_bh,
|
||||
handle, data_ac, NULL,
|
||||
NULL);
|
||||
|
|
|
@ -219,6 +219,7 @@ struct ocfs2_super
|
|||
u16 max_slots;
|
||||
s16 node_num;
|
||||
s16 slot_num;
|
||||
s16 preferred_slot;
|
||||
int s_sectsize_bits;
|
||||
int s_clustersize;
|
||||
int s_clustersize_bits;
|
||||
|
@ -305,6 +306,19 @@ static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
|
||||
{
|
||||
/*
|
||||
* Support for sparse files is a pre-requisite
|
||||
*/
|
||||
if (!ocfs2_sparse_alloc(osb))
|
||||
return 0;
|
||||
|
||||
if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set / clear functions because cluster events can make these happen
|
||||
* in parallel so we want the transitions to be atomic. this also
|
||||
* means that any future flags osb_flags must be protected by spinlock
|
||||
|
|
|
@ -88,7 +88,7 @@
|
|||
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
|
||||
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
|
||||
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
|
||||
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
|
||||
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
|
||||
|
||||
/*
|
||||
* Heartbeat-only devices are missing journals and other files. The
|
||||
|
@ -116,6 +116,11 @@
|
|||
*/
|
||||
#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
|
||||
|
||||
/*
|
||||
* Unwritten extents support.
|
||||
*/
|
||||
#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
|
||||
|
||||
/* The byte offset of the first backup block will be 1G.
|
||||
* The following will be 4G, 16G, 64G, 256G and 1T.
|
||||
*/
|
||||
|
@ -169,6 +174,32 @@
|
|||
#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int)
|
||||
#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
|
||||
|
||||
/*
|
||||
* Space reservation / allocation / free ioctls and argument structure
|
||||
* are designed to be compatible with XFS.
|
||||
*
|
||||
* ALLOCSP* and FREESP* are not and will never be supported, but are
|
||||
* included here for completeness.
|
||||
*/
|
||||
struct ocfs2_space_resv {
|
||||
__s16 l_type;
|
||||
__s16 l_whence;
|
||||
__s64 l_start;
|
||||
__s64 l_len; /* len == 0 means until end of file */
|
||||
__s32 l_sysid;
|
||||
__u32 l_pid;
|
||||
__s32 l_pad[4]; /* reserve area */
|
||||
};
|
||||
|
||||
#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
|
||||
#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
|
||||
|
||||
/*
|
||||
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
|
||||
*/
|
||||
|
|
|
@ -121,17 +121,25 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si)
|
||||
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
|
||||
{
|
||||
int i;
|
||||
s16 ret = OCFS2_INVALID_SLOT;
|
||||
|
||||
if (preferred >= 0 && preferred < si->si_num_slots) {
|
||||
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
|
||||
ret = preferred;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < si->si_num_slots; i++) {
|
||||
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
|
||||
ret = (s16) i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -248,7 +256,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
|
|||
if (slot == OCFS2_INVALID_SLOT) {
|
||||
/* if no slot yet, then just take 1st available
|
||||
* one. */
|
||||
slot = __ocfs2_find_empty_slot(si);
|
||||
slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
|
||||
if (slot == OCFS2_INVALID_SLOT) {
|
||||
spin_unlock(&si->si_lock);
|
||||
mlog(ML_ERROR, "no free slots available!\n");
|
||||
|
|
|
@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle,
|
|||
u16 chain);
|
||||
static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
|
||||
u32 wanted);
|
||||
static int ocfs2_free_suballoc_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct buffer_head *alloc_bh,
|
||||
unsigned int start_bit,
|
||||
u64 bg_blkno,
|
||||
unsigned int count);
|
||||
static inline u64 ocfs2_which_suballoc_group(u64 block,
|
||||
unsigned int bit);
|
||||
static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
|
||||
u64 bg_blkno,
|
||||
u16 bg_bit_off);
|
||||
|
@ -496,13 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
|
|||
|
||||
(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
|
||||
(*ac)->ac_which = OCFS2_AC_USE_META;
|
||||
|
||||
#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
|
||||
slot = 0;
|
||||
#else
|
||||
slot = osb->slot_num;
|
||||
#endif
|
||||
|
||||
(*ac)->ac_group_search = ocfs2_block_group_search;
|
||||
|
||||
status = ocfs2_reserve_suballoc_bits(osb, (*ac),
|
||||
|
@ -1626,12 +1612,12 @@ bail:
|
|||
/*
|
||||
* expects the suballoc inode to already be locked.
|
||||
*/
|
||||
static int ocfs2_free_suballoc_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct buffer_head *alloc_bh,
|
||||
unsigned int start_bit,
|
||||
u64 bg_blkno,
|
||||
unsigned int count)
|
||||
int ocfs2_free_suballoc_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct buffer_head *alloc_bh,
|
||||
unsigned int start_bit,
|
||||
u64 bg_blkno,
|
||||
unsigned int count)
|
||||
{
|
||||
int status = 0;
|
||||
u32 tmp_used;
|
||||
|
@ -1703,13 +1689,6 @@ bail:
|
|||
return status;
|
||||
}
|
||||
|
||||
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
|
||||
{
|
||||
u64 group = block - (u64) bit;
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
int ocfs2_free_dinode(handle_t *handle,
|
||||
struct inode *inode_alloc_inode,
|
||||
struct buffer_head *inode_alloc_bh,
|
||||
|
@ -1723,19 +1702,6 @@ int ocfs2_free_dinode(handle_t *handle,
|
|||
inode_alloc_bh, bit, bg_blkno, 1);
|
||||
}
|
||||
|
||||
int ocfs2_free_extent_block(handle_t *handle,
|
||||
struct inode *eb_alloc_inode,
|
||||
struct buffer_head *eb_alloc_bh,
|
||||
struct ocfs2_extent_block *eb)
|
||||
{
|
||||
u64 blk = le64_to_cpu(eb->h_blkno);
|
||||
u16 bit = le16_to_cpu(eb->h_suballoc_bit);
|
||||
u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
|
||||
|
||||
return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
|
||||
bit, bg_blkno, 1);
|
||||
}
|
||||
|
||||
int ocfs2_free_clusters(handle_t *handle,
|
||||
struct inode *bitmap_inode,
|
||||
struct buffer_head *bitmap_bh,
|
||||
|
|
|
@ -86,20 +86,29 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
|
|||
u32 *cluster_start,
|
||||
u32 *num_clusters);
|
||||
|
||||
int ocfs2_free_suballoc_bits(handle_t *handle,
|
||||
struct inode *alloc_inode,
|
||||
struct buffer_head *alloc_bh,
|
||||
unsigned int start_bit,
|
||||
u64 bg_blkno,
|
||||
unsigned int count);
|
||||
int ocfs2_free_dinode(handle_t *handle,
|
||||
struct inode *inode_alloc_inode,
|
||||
struct buffer_head *inode_alloc_bh,
|
||||
struct ocfs2_dinode *di);
|
||||
int ocfs2_free_extent_block(handle_t *handle,
|
||||
struct inode *eb_alloc_inode,
|
||||
struct buffer_head *eb_alloc_bh,
|
||||
struct ocfs2_extent_block *eb);
|
||||
int ocfs2_free_clusters(handle_t *handle,
|
||||
struct inode *bitmap_inode,
|
||||
struct buffer_head *bitmap_bh,
|
||||
u64 start_blk,
|
||||
unsigned int num_clusters);
|
||||
|
||||
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
|
||||
{
|
||||
u64 group = block - (u64) bit;
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
|
||||
u64 bg_blkno)
|
||||
{
|
||||
|
|
|
@ -82,7 +82,8 @@ MODULE_AUTHOR("Oracle");
|
|||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int ocfs2_parse_options(struct super_block *sb, char *options,
|
||||
unsigned long *mount_opt, int is_remount);
|
||||
unsigned long *mount_opt, s16 *slot,
|
||||
int is_remount);
|
||||
static void ocfs2_put_super(struct super_block *sb);
|
||||
static int ocfs2_mount_volume(struct super_block *sb);
|
||||
static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
|
||||
|
@ -114,8 +115,6 @@ static void ocfs2_write_super(struct super_block *sb);
|
|||
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
|
||||
static void ocfs2_destroy_inode(struct inode *inode);
|
||||
|
||||
static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
|
||||
|
||||
static const struct super_operations ocfs2_sops = {
|
||||
.statfs = ocfs2_statfs,
|
||||
.alloc_inode = ocfs2_alloc_inode,
|
||||
|
@ -140,6 +139,7 @@ enum {
|
|||
Opt_data_ordered,
|
||||
Opt_data_writeback,
|
||||
Opt_atime_quantum,
|
||||
Opt_slot,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
|
@ -154,6 +154,7 @@ static match_table_t tokens = {
|
|||
{Opt_data_ordered, "data=ordered"},
|
||||
{Opt_data_writeback, "data=writeback"},
|
||||
{Opt_atime_quantum, "atime_quantum=%u"},
|
||||
{Opt_slot, "preferred_slot=%u"},
|
||||
{Opt_err, NULL}
|
||||
};
|
||||
|
||||
|
@ -318,7 +319,7 @@ static void ocfs2_destroy_inode(struct inode *inode)
|
|||
/* From xfs_super.c:xfs_max_file_offset
|
||||
* Copyright (c) 2000-2004 Silicon Graphics, Inc.
|
||||
*/
|
||||
static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
|
||||
unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
|
||||
{
|
||||
unsigned int pagefactor = 1;
|
||||
unsigned int bitshift = BITS_PER_LONG - 1;
|
||||
|
@ -355,9 +356,10 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
|
|||
int incompat_features;
|
||||
int ret = 0;
|
||||
unsigned long parsed_options;
|
||||
s16 slot;
|
||||
struct ocfs2_super *osb = OCFS2_SB(sb);
|
||||
|
||||
if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
|
||||
if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -534,6 +536,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
|||
struct dentry *root;
|
||||
int status, sector_size;
|
||||
unsigned long parsed_opt;
|
||||
s16 slot;
|
||||
struct inode *inode = NULL;
|
||||
struct ocfs2_super *osb = NULL;
|
||||
struct buffer_head *bh = NULL;
|
||||
|
@ -541,7 +544,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
|||
|
||||
mlog_entry("%p, %p, %i", sb, data, silent);
|
||||
|
||||
if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
|
||||
if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
|
||||
status = -EINVAL;
|
||||
goto read_super_error;
|
||||
}
|
||||
|
@ -571,6 +574,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
|
|||
brelse(bh);
|
||||
bh = NULL;
|
||||
osb->s_mount_opt = parsed_opt;
|
||||
osb->preferred_slot = slot;
|
||||
|
||||
sb->s_magic = OCFS2_SUPER_MAGIC;
|
||||
|
||||
|
@ -713,6 +717,7 @@ static struct file_system_type ocfs2_fs_type = {
|
|||
static int ocfs2_parse_options(struct super_block *sb,
|
||||
char *options,
|
||||
unsigned long *mount_opt,
|
||||
s16 *slot,
|
||||
int is_remount)
|
||||
{
|
||||
int status;
|
||||
|
@ -722,6 +727,7 @@ static int ocfs2_parse_options(struct super_block *sb,
|
|||
options ? options : "(none)");
|
||||
|
||||
*mount_opt = 0;
|
||||
*slot = OCFS2_INVALID_SLOT;
|
||||
|
||||
if (!options) {
|
||||
status = 1;
|
||||
|
@ -782,6 +788,15 @@ static int ocfs2_parse_options(struct super_block *sb,
|
|||
else
|
||||
osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
|
||||
break;
|
||||
case Opt_slot:
|
||||
option = 0;
|
||||
if (match_int(&args[0], &option)) {
|
||||
status = 0;
|
||||
goto bail;
|
||||
}
|
||||
if (option)
|
||||
*slot = (s16)option;
|
||||
break;
|
||||
default:
|
||||
mlog(ML_ERROR,
|
||||
"Unrecognized mount option \"%s\" "
|
||||
|
|
|
@ -45,4 +45,6 @@ void __ocfs2_abort(struct super_block *sb,
|
|||
|
||||
#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
|
||||
|
||||
unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
|
||||
|
||||
#endif /* OCFS2_SUPER_H */
|
||||
|
|
|
@ -40,9 +40,9 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/semaphore.h>
|
||||
|
||||
#define CONFIGFS_ITEM_NAME_LEN 20
|
||||
|
||||
|
@ -75,7 +75,6 @@ extern void config_item_init(struct config_item *);
|
|||
extern void config_item_init_type_name(struct config_item *item,
|
||||
const char *name,
|
||||
struct config_item_type *type);
|
||||
extern void config_item_cleanup(struct config_item *);
|
||||
|
||||
extern struct config_item * config_item_get(struct config_item *);
|
||||
extern void config_item_put(struct config_item *);
|
||||
|
@ -87,12 +86,10 @@ struct config_item_type {
|
|||
struct configfs_attribute **ct_attrs;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* group - a group of config_items of a specific type, belonging
|
||||
* to a specific subsystem.
|
||||
*/
|
||||
|
||||
struct config_group {
|
||||
struct config_item cg_item;
|
||||
struct list_head cg_children;
|
||||
|
@ -100,13 +97,11 @@ struct config_group {
|
|||
struct config_group **default_groups;
|
||||
};
|
||||
|
||||
|
||||
extern void config_group_init(struct config_group *group);
|
||||
extern void config_group_init_type_name(struct config_group *group,
|
||||
const char *name,
|
||||
struct config_item_type *type);
|
||||
|
||||
|
||||
static inline struct config_group *to_config_group(struct config_item *item)
|
||||
{
|
||||
return item ? container_of(item,struct config_group,cg_item) : NULL;
|
||||
|
@ -122,7 +117,8 @@ static inline void config_group_put(struct config_group *group)
|
|||
config_item_put(&group->cg_item);
|
||||
}
|
||||
|
||||
extern struct config_item *config_group_find_obj(struct config_group *, const char *);
|
||||
extern struct config_item *config_group_find_item(struct config_group *,
|
||||
const char *);
|
||||
|
||||
|
||||
struct configfs_attribute {
|
||||
|
@ -131,6 +127,22 @@ struct configfs_attribute {
|
|||
mode_t ca_mode;
|
||||
};
|
||||
|
||||
/*
|
||||
* Users often need to create attribute structures for their configurable
|
||||
* attributes, containing a configfs_attribute member and function pointers
|
||||
* for the show() and store() operations on that attribute. They can use
|
||||
* this macro (similar to sysfs' __ATTR) to make defining attributes easier.
|
||||
*/
|
||||
#define __CONFIGFS_ATTR(_name, _mode, _show, _store) \
|
||||
{ \
|
||||
.attr = { \
|
||||
.ca_name = __stringify(_name), \
|
||||
.ca_mode = _mode, \
|
||||
.ca_owner = THIS_MODULE, \
|
||||
}, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
}
|
||||
|
||||
/*
|
||||
* If allow_link() exists, the item can symlink(2) out to other
|
||||
|
@ -157,12 +169,13 @@ struct configfs_group_operations {
|
|||
struct config_item *(*make_item)(struct config_group *group, const char *name);
|
||||
struct config_group *(*make_group)(struct config_group *group, const char *name);
|
||||
int (*commit_item)(struct config_item *item);
|
||||
void (*disconnect_notify)(struct config_group *group, struct config_item *item);
|
||||
void (*drop_item)(struct config_group *group, struct config_item *item);
|
||||
};
|
||||
|
||||
struct configfs_subsystem {
|
||||
struct config_group su_group;
|
||||
struct semaphore su_sem;
|
||||
struct mutex su_mutex;
|
||||
};
|
||||
|
||||
static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group)
|
||||
|
@ -175,6 +188,11 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro
|
|||
int configfs_register_subsystem(struct configfs_subsystem *subsys);
|
||||
void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
|
||||
|
||||
/* These functions can sleep and can alloc with GFP_KERNEL */
|
||||
/* WARNING: These cannot be called underneath configfs callbacks!! */
|
||||
int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
|
||||
void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _CONFIGFS_H_ */
|
||||
|
|
Loading…
Reference in New Issue