OpenCloudOS-Kernel/fs/ocfs2/stackglue.c

754 lines
18 KiB
C
Raw Normal View History

/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* stackglue.c
*
* Code which implements an OCFS2 specific interface to underlying
* cluster stacks.
*
* Copyright (C) 2007, 2009 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/fs.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/sysctl.h>
#include "ocfs2_fs.h"
#include "stackglue.h"
#define OCFS2_STACK_PLUGIN_O2CB "o2cb"
#define OCFS2_STACK_PLUGIN_USER "user"
#define OCFS2_MAX_HB_CTL_PATH 256
static struct ocfs2_protocol_version locking_max_version;
static DEFINE_SPINLOCK(ocfs2_stack_lock);
static LIST_HEAD(ocfs2_stack_list);
static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1];
static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
/*
* The stack currently in use. If not null, active_stack->sp_count > 0,
* the module is pinned, and the locking protocol cannot be changed.
*/
static struct ocfs2_stack_plugin *active_stack;
ocfs2: fix crash caused by stale lvb with fsdlm plugin The crash happens rather often when we reset some cluster nodes while nodes contend fiercely to do truncate and append. The crash backtrace is below: dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover_grant 1 locks on 971 resources dlm: C21CBDA5E0774F4BA5A9D4F317717495: dlm_recover 9 generation 5 done: 4 ms ocfs2: Begin replay journal (node 318952601, slot 2) on device (253,18) ocfs2: End replay journal (node 318952601, slot 2) on device (253,18) ocfs2: Beginning quota recovery on device (253,18) for slot 2 ocfs2: Finishing quota recovery on device (253,18) for slot 2 (truncate,30154,1):ocfs2_truncate_file:470 ERROR: bug expression: le64_to_cpu(fe->i_size) != i_size_read(inode) (truncate,30154,1):ocfs2_truncate_file:470 ERROR: Inode 290321, inode i_size = 732 != di i_size = 937, i_flags = 0x1 ------------[ cut here ]------------ kernel BUG at /usr/src/linux/fs/ocfs2/file.c:470! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2_stack_user(OEN) ocfs2(OEN) ocfs2_nodemanager ocfs2_stackglue(OEN) quota_tree dlm(OEN) configfs fuse sd_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi af_packet iscsi_ibft iscsi_boot_sysfs softdog xfs libcrc32c ppdev parport_pc pcspkr parport joydev virtio_balloon virtio_net i2c_piix4 acpi_cpufreq button processor ext4 crc16 jbd2 mbcache ata_generic cirrus virtio_blk ata_piix drm_kms_helper ahci syscopyarea libahci sysfillrect sysimgblt fb_sys_fops ttm floppy libata drm virtio_pci virtio_ring uhci_hcd virtio ehci_hcd usbcore serio_raw usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 Supported: No, Unsupported modules are loaded CPU: 1 PID: 30154 Comm: truncate Tainted: G OE N 4.4.21-69-default #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20151112_172657-sheep25 04/01/2014 task: ffff88004ff6d240 ti: ffff880074e68000 task.ti: ffff880074e68000 RIP: 0010:[<ffffffffa05c8c30>] [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] RSP: 0018:ffff880074e6bd50 EFLAGS: 00010282 RAX: 0000000000000074 RBX: 000000000000029e RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000246 RDI: 0000000000000246 RBP: ffff880074e6bda8 R08: 000000003675dc7a R09: ffffffff82013414 R10: 0000000000034c50 R11: 0000000000000000 R12: ffff88003aab3448 R13: 00000000000002dc R14: 0000000000046e11 R15: 0000000000000020 FS: 00007f839f965700(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f839f97e000 CR3: 0000000036723000 CR4: 00000000000006e0 Call Trace: ocfs2_setattr+0x698/0xa90 [ocfs2] notify_change+0x1ae/0x380 do_truncate+0x5e/0x90 do_sys_ftruncate.constprop.11+0x108/0x160 entry_SYSCALL_64_fastpath+0x12/0x6d Code: 24 28 ba d6 01 00 00 48 c7 c6 30 43 62 a0 8b 41 2c 89 44 24 08 48 8b 41 20 48 c7 c1 78 a3 62 a0 48 89 04 24 31 c0 e8 a0 97 f9 ff <0f> 0b 3d 00 fe ff ff 0f 84 ab fd ff ff 83 f8 fc 0f 84 a2 fd ff RIP [<ffffffffa05c8c30>] ocfs2_truncate_file+0x640/0x6c0 [ocfs2] It's because ocfs2_inode_lock() get us stale LVB in which the i_size is not equal to the disk i_size. We mistakenly trust the LVB because the underlaying fsdlm dlm_lock() doesn't set lkb_sbflags with DLM_SBF_VALNOTVALID properly for us. But, why? The current code tries to downconvert lock without DLM_LKF_VALBLK flag to tell o2cb don't update RSB's LVB if it's a PR->NULL conversion, even if the lock resource type needs LVB. This is not the right way for fsdlm. The fsdlm plugin behaves different on DLM_LKF_VALBLK, it depends on DLM_LKF_VALBLK to decide if we care about the LVB in the LKB. If DLM_LKF_VALBLK is not set, fsdlm will skip recovering RSB's LVB from this lkb and set the right DLM_SBF_VALNOTVALID appropriately when node failure happens. The following diagram briefly illustrates how this crash happens: RSB1 is inode metadata lock resource with LOCK_TYPE_USES_LVB; The 1st round: Node1 Node2 RSB1: PR RSB1(master): NULL->EX ocfs2_downconvert_lock(PR->NULL, set_lvb==0) ocfs2_dlm_lock(no DLM_LKF_VALBLK) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dlm_lock(no DLM_LKF_VALBLK) convert_lock(overwrite lkb->lkb_exflags with no DLM_LKF_VALBLK) RSB1: NULL RSB1: EX reset Node2 dlm_recover_rsbs() recover_lvb() /* The LVB is not trustable if the node with EX fails and * no lock >= PR is left. We should set RSB_VALNOTVALID for RSB1. */ if(!(kb_exflags & DLM_LKF_VALBLK)) /* This means we miss the chance to return; * to invalid the LVB here. */ The 2nd round: Node 1 Node2 RSB1(become master from recovery) ocfs2_setattr() ocfs2_inode_lock(NULL->EX) /* dlm_lock() return the stale lvb without setting DLM_SBF_VALNOTVALID */ ocfs2_meta_lvb_is_trustable() return 1 /* so we don't refresh inode from disk */ ocfs2_truncate_file() mlog_bug_on_msg(disk isize != i_size_read(inode)) /* crash! */ The fix is quite straightforward. We keep to set DLM_LKF_VALBLK flag for dlm_lock() if the lock resource type needs LVB and the fsdlm plugin is uesed. Link: http://lkml.kernel.org/r/1481275846-6604-1-git-send-email-zren@suse.com Signed-off-by: Eric Ren <zren@suse.com> Reviewed-by: Joseph Qi <jiangqi903@gmail.com> Cc: Mark Fasheh <mfasheh@versity.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-01-11 08:57:33 +08:00
inline int ocfs2_is_o2cb_active(void)
{
return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
}
EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{
struct ocfs2_stack_plugin *p;
assert_spin_locked(&ocfs2_stack_lock);
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
if (!strcmp(p->sp_name, name))
return p;
}
return NULL;
}
static int ocfs2_stack_driver_request(const char *stack_name,
const char *plugin_name)
{
int rc;
struct ocfs2_stack_plugin *p;
spin_lock(&ocfs2_stack_lock);
/*
* If the stack passed by the filesystem isn't the selected one,
* we can't continue.
*/
if (strcmp(stack_name, cluster_stack_name)) {
rc = -EBUSY;
goto out;
}
if (active_stack) {
/*
* If the active stack isn't the one we want, it cannot
* be selected right now.
*/
if (!strcmp(active_stack->sp_name, plugin_name))
rc = 0;
else
rc = -EBUSY;
goto out;
}
p = ocfs2_stack_lookup(plugin_name);
if (!p || !try_module_get(p->sp_owner)) {
rc = -ENOENT;
goto out;
}
active_stack = p;
rc = 0;
out:
/* If we found it, pin it */
if (!rc)
active_stack->sp_count++;
spin_unlock(&ocfs2_stack_lock);
return rc;
}
/*
* This function looks up the appropriate stack and makes it active. If
* there is no stack, it tries to load it. It will fail if the stack still
* cannot be found. It will also fail if a different stack is in use.
*/
static int ocfs2_stack_driver_get(const char *stack_name)
{
int rc;
char *plugin_name = OCFS2_STACK_PLUGIN_O2CB;
/*
* Classic stack does not pass in a stack name. This is
* compatible with older tools as well.
*/
if (!stack_name || !*stack_name)
stack_name = OCFS2_STACK_PLUGIN_O2CB;
if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) {
printk(KERN_ERR
"ocfs2 passed an invalid cluster stack label: \"%s\"\n",
stack_name);
return -EINVAL;
}
/* Anything that isn't the classic stack is a user stack */
if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB))
plugin_name = OCFS2_STACK_PLUGIN_USER;
rc = ocfs2_stack_driver_request(stack_name, plugin_name);
if (rc == -ENOENT) {
request_module("ocfs2_stack_%s", plugin_name);
rc = ocfs2_stack_driver_request(stack_name, plugin_name);
}
if (rc == -ENOENT) {
printk(KERN_ERR
"ocfs2: Cluster stack driver \"%s\" cannot be found\n",
plugin_name);
} else if (rc == -EBUSY) {
printk(KERN_ERR
"ocfs2: A different cluster stack is in use\n");
}
return rc;
}
static void ocfs2_stack_driver_put(void)
{
spin_lock(&ocfs2_stack_lock);
BUG_ON(active_stack == NULL);
BUG_ON(active_stack->sp_count == 0);
active_stack->sp_count--;
if (!active_stack->sp_count) {
module_put(active_stack->sp_owner);
active_stack = NULL;
}
spin_unlock(&ocfs2_stack_lock);
}
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin)
{
int rc;
spin_lock(&ocfs2_stack_lock);
if (!ocfs2_stack_lookup(plugin->sp_name)) {
plugin->sp_count = 0;
plugin->sp_max_proto = locking_max_version;
list_add(&plugin->sp_list, &ocfs2_stack_list);
printk(KERN_INFO "ocfs2: Registered cluster interface %s\n",
plugin->sp_name);
rc = 0;
} else {
printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n",
plugin->sp_name);
rc = -EEXIST;
}
spin_unlock(&ocfs2_stack_lock);
return rc;
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin)
{
struct ocfs2_stack_plugin *p;
spin_lock(&ocfs2_stack_lock);
p = ocfs2_stack_lookup(plugin->sp_name);
if (p) {
BUG_ON(p != plugin);
BUG_ON(plugin == active_stack);
BUG_ON(plugin->sp_count != 0);
list_del_init(&plugin->sp_list);
printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n",
plugin->sp_name);
} else {
printk(KERN_ERR "Stack \"%s\" is not registered\n",
plugin->sp_name);
}
spin_unlock(&ocfs2_stack_lock);
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister);
void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_proto)
{
struct ocfs2_stack_plugin *p;
spin_lock(&ocfs2_stack_lock);
if (memcmp(max_proto, &locking_max_version,
sizeof(struct ocfs2_protocol_version))) {
BUG_ON(locking_max_version.pv_major != 0);
locking_max_version = *max_proto;
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
p->sp_max_proto = locking_max_version;
}
}
spin_unlock(&ocfs2_stack_lock);
}
EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_max_proto_version);
/*
* The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take no argument
* for the ast and bast functions. They will pass the lksb to the ast
* and bast. The caller can wrap the lksb with their own structure to
* get more information.
*/
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
int mode,
struct ocfs2_dlm_lksb *lksb,
u32 flags,
void *name,
unsigned int namelen)
{
if (!lksb->lksb_conn)
lksb->lksb_conn = conn;
else
BUG_ON(lksb->lksb_conn != conn);
return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags,
name, namelen);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock);
int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
struct ocfs2_dlm_lksb *lksb,
u32 flags)
{
BUG_ON(lksb->lksb_conn == NULL);
return active_stack->sp_ops->dlm_unlock(conn, lksb, flags);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock);
int ocfs2_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
{
return active_stack->sp_ops->lock_status(lksb);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
int ocfs2_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb)
{
return active_stack->sp_ops->lvb_valid(lksb);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
void *ocfs2_dlm_lvb(struct ocfs2_dlm_lksb *lksb)
{
return active_stack->sp_ops->lock_lvb(lksb);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb);
void ocfs2_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb)
{
active_stack->sp_ops->dump_lksb(lksb);
}
EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
int ocfs2_stack_supports_plocks(void)
{
return active_stack && active_stack->sp_ops->plock;
}
EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
/*
* ocfs2_plock() can only be safely called if
* ocfs2_stack_supports_plocks() returned true
*/
int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
struct file *file, int cmd, struct file_lock *fl)
{
WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
if (active_stack->sp_ops->plock)
return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(ocfs2_plock);
int ocfs2_cluster_connect(const char *stack_name,
ocfs2: add clustername to cluster connection This is an effort of removing ocfs2_controld.pcmk and getting ocfs2 DLM handling up to the times with respect to DLM (>=4.0.1) and corosync (2.3.x). AFAIK, cman also is being phased out for a unified corosync cluster stack. fs/dlm performs all the functions with respect to fencing and node management and provides the API's to do so for ocfs2. For all future references, DLM stands for fs/dlm code. The advantages are: + No need to run an additional userspace daemon (ocfs2_controld) + No controld device handling and controld protocol + Shifting responsibilities of node management to DLM layer For backward compatibility, we are keeping the controld handling code. Once enough time has passed we can remove a significant portion of the code. This was tested by using the kernel with changes on older unmodified tools. The kernel used ocfs2_controld as expected, and displayed the appropriate warning message. This feature requires modification in the userspace ocfs2-tools. The changes can be found at: https://github.com/goldwynr/ocfs2-tools branch: nocontrold Currently, not many checks are present in the userspace code, but that would change soon. This patch (of 6): Add clustername to cluster connection. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-22 07:48:21 +08:00
const char *cluster_name,
int cluster_name_len,
const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
void (*recovery_handler)(int node_num,
void *recovery_data),
void *recovery_data,
struct ocfs2_cluster_connection **conn)
{
int rc = 0;
struct ocfs2_cluster_connection *new_conn;
BUG_ON(group == NULL);
BUG_ON(conn == NULL);
BUG_ON(recovery_handler == NULL);
if (grouplen > GROUP_NAME_MAX) {
rc = -EINVAL;
goto out;
}
if (memcmp(&lproto->lp_max_version, &locking_max_version,
sizeof(struct ocfs2_protocol_version))) {
rc = -EINVAL;
goto out;
}
new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection),
GFP_KERNEL);
if (!new_conn) {
rc = -ENOMEM;
goto out;
}
ocfs2: add clustername to cluster connection This is an effort of removing ocfs2_controld.pcmk and getting ocfs2 DLM handling up to the times with respect to DLM (>=4.0.1) and corosync (2.3.x). AFAIK, cman also is being phased out for a unified corosync cluster stack. fs/dlm performs all the functions with respect to fencing and node management and provides the API's to do so for ocfs2. For all future references, DLM stands for fs/dlm code. The advantages are: + No need to run an additional userspace daemon (ocfs2_controld) + No controld device handling and controld protocol + Shifting responsibilities of node management to DLM layer For backward compatibility, we are keeping the controld handling code. Once enough time has passed we can remove a significant portion of the code. This was tested by using the kernel with changes on older unmodified tools. The kernel used ocfs2_controld as expected, and displayed the appropriate warning message. This feature requires modification in the userspace ocfs2-tools. The changes can be found at: https://github.com/goldwynr/ocfs2-tools branch: nocontrold Currently, not many checks are present in the userspace code, but that would change soon. This patch (of 6): Add clustername to cluster connection. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-22 07:48:21 +08:00
strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
new_conn->cc_namelen = grouplen;
if (cluster_name_len)
strlcpy(new_conn->cc_cluster_name, cluster_name,
CLUSTER_NAME_MAX + 1);
ocfs2: add clustername to cluster connection This is an effort of removing ocfs2_controld.pcmk and getting ocfs2 DLM handling up to the times with respect to DLM (>=4.0.1) and corosync (2.3.x). AFAIK, cman also is being phased out for a unified corosync cluster stack. fs/dlm performs all the functions with respect to fencing and node management and provides the API's to do so for ocfs2. For all future references, DLM stands for fs/dlm code. The advantages are: + No need to run an additional userspace daemon (ocfs2_controld) + No controld device handling and controld protocol + Shifting responsibilities of node management to DLM layer For backward compatibility, we are keeping the controld handling code. Once enough time has passed we can remove a significant portion of the code. This was tested by using the kernel with changes on older unmodified tools. The kernel used ocfs2_controld as expected, and displayed the appropriate warning message. This feature requires modification in the userspace ocfs2-tools. The changes can be found at: https://github.com/goldwynr/ocfs2-tools branch: nocontrold Currently, not many checks are present in the userspace code, but that would change soon. This patch (of 6): Add clustername to cluster connection. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-22 07:48:21 +08:00
new_conn->cc_cluster_name_len = cluster_name_len;
new_conn->cc_recovery_handler = recovery_handler;
new_conn->cc_recovery_data = recovery_data;
new_conn->cc_proto = lproto;
/* Start the new connection at our maximum compatibility level */
new_conn->cc_version = lproto->lp_max_version;
/* This will pin the stack driver if successful */
rc = ocfs2_stack_driver_get(stack_name);
if (rc)
goto out_free;
rc = active_stack->sp_ops->connect(new_conn);
if (rc) {
ocfs2_stack_driver_put();
goto out_free;
}
*conn = new_conn;
out_free:
if (rc)
kfree(new_conn);
out:
return rc;
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect);
/* The caller will ensure all nodes have the same cluster stack */
int ocfs2_cluster_connect_agnostic(const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
void (*recovery_handler)(int node_num,
void *recovery_data),
void *recovery_data,
struct ocfs2_cluster_connection **conn)
{
char *stack_name = NULL;
if (cluster_stack_name[0])
stack_name = cluster_stack_name;
ocfs2: add clustername to cluster connection This is an effort of removing ocfs2_controld.pcmk and getting ocfs2 DLM handling up to the times with respect to DLM (>=4.0.1) and corosync (2.3.x). AFAIK, cman also is being phased out for a unified corosync cluster stack. fs/dlm performs all the functions with respect to fencing and node management and provides the API's to do so for ocfs2. For all future references, DLM stands for fs/dlm code. The advantages are: + No need to run an additional userspace daemon (ocfs2_controld) + No controld device handling and controld protocol + Shifting responsibilities of node management to DLM layer For backward compatibility, we are keeping the controld handling code. Once enough time has passed we can remove a significant portion of the code. This was tested by using the kernel with changes on older unmodified tools. The kernel used ocfs2_controld as expected, and displayed the appropriate warning message. This feature requires modification in the userspace ocfs2-tools. The changes can be found at: https://github.com/goldwynr/ocfs2-tools branch: nocontrold Currently, not many checks are present in the userspace code, but that would change soon. This patch (of 6): Add clustername to cluster connection. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-01-22 07:48:21 +08:00
return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
lproto, recovery_handler, recovery_data,
conn);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
/* If hangup_pending is 0, the stack driver will be dropped */
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
int hangup_pending)
{
int ret;
BUG_ON(conn == NULL);
ret = active_stack->sp_ops->disconnect(conn);
/* XXX Should we free it anyway? */
if (!ret) {
kfree(conn);
if (!hangup_pending)
ocfs2_stack_driver_put();
}
return ret;
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect);
/*
* Leave the group for this filesystem. This is executed by a userspace
* program (stored in ocfs2_hb_ctl_path).
*/
static void ocfs2_leave_group(const char *group)
{
int ret;
char *argv[5], *envp[3];
argv[0] = ocfs2_hb_ctl_path;
argv[1] = "-K";
argv[2] = "-u";
argv[3] = (char *)group;
argv[4] = NULL;
/* minimal command environment taken from cpu_run_sbin_hotplug */
envp[0] = "HOME=/";
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[2] = NULL;
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
if (ret < 0) {
printk(KERN_ERR
"ocfs2: Error %d running user helper "
"\"%s %s %s %s\"\n",
ret, argv[0], argv[1], argv[2], argv[3]);
}
}
/*
* Hangup is a required post-umount. ocfs2-tools software expects the
* filesystem to call "ocfs2_hb_ctl" during unmount. This happens
* regardless of whether the DLM got started, so we can't do it
* in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does
* the actual work.
*/
void ocfs2_cluster_hangup(const char *group, int grouplen)
{
BUG_ON(group == NULL);
BUG_ON(group[grouplen] != '\0');
ocfs2_leave_group(group);
/* cluster_disconnect() was called with hangup_pending==1 */
ocfs2_stack_driver_put();
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
unsigned int *node)
{
return active_stack->sp_ops->this_node(conn, node);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
/*
* Sysfs bits
*/
static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
ssize_t ret = 0;
spin_lock(&ocfs2_stack_lock);
if (locking_max_version.pv_major)
ret = snprintf(buf, PAGE_SIZE, "%u.%u\n",
locking_max_version.pv_major,
locking_max_version.pv_minor);
spin_unlock(&ocfs2_stack_lock);
return ret;
}
static struct kobj_attribute ocfs2_attr_max_locking_protocol =
__ATTR(max_locking_protocol, S_IRUGO,
ocfs2_max_locking_protocol_show, NULL);
static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
struct ocfs2_stack_plugin *p;
spin_lock(&ocfs2_stack_lock);
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
ret = snprintf(buf, remain, "%s\n",
p->sp_name);
if (ret < 0) {
total = ret;
break;
}
if (ret == remain) {
/* snprintf() didn't fit */
total = -E2BIG;
break;
}
total += ret;
remain -= ret;
}
spin_unlock(&ocfs2_stack_lock);
return total;
}
static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
__ATTR(loaded_cluster_plugins, S_IRUGO,
ocfs2_loaded_cluster_plugins_show, NULL);
static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
ssize_t ret = 0;
spin_lock(&ocfs2_stack_lock);
if (active_stack) {
ret = snprintf(buf, PAGE_SIZE, "%s\n",
active_stack->sp_name);
if (ret == PAGE_SIZE)
ret = -E2BIG;
}
spin_unlock(&ocfs2_stack_lock);
return ret;
}
static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
__ATTR(active_cluster_plugin, S_IRUGO,
ocfs2_active_cluster_plugin_show, NULL);
static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
ssize_t ret;
spin_lock(&ocfs2_stack_lock);
ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name);
spin_unlock(&ocfs2_stack_lock);
return ret;
}
static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
size_t len = count;
ssize_t ret;
if (len == 0)
return len;
if (buf[len - 1] == '\n')
len--;
if ((len != OCFS2_STACK_LABEL_LEN) ||
(strnlen(buf, len) != len))
return -EINVAL;
spin_lock(&ocfs2_stack_lock);
if (active_stack) {
if (!strncmp(buf, cluster_stack_name, len))
ret = count;
else
ret = -EBUSY;
} else {
memcpy(cluster_stack_name, buf, len);
ret = count;
}
spin_unlock(&ocfs2_stack_lock);
return ret;
}
static struct kobj_attribute ocfs2_attr_cluster_stack =
__ATTR(cluster_stack, S_IRUGO | S_IWUSR,
ocfs2_cluster_stack_show,
ocfs2_cluster_stack_store);
static ssize_t ocfs2_dlm_recover_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "1\n");
}
static struct kobj_attribute ocfs2_attr_dlm_recover_support =
__ATTR(dlm_recover_callback_support, S_IRUGO,
ocfs2_dlm_recover_show, NULL);
static struct attribute *ocfs2_attrs[] = {
&ocfs2_attr_max_locking_protocol.attr,
&ocfs2_attr_loaded_cluster_plugins.attr,
&ocfs2_attr_active_cluster_plugin.attr,
&ocfs2_attr_cluster_stack.attr,
&ocfs2_attr_dlm_recover_support.attr,
NULL,
};
static struct attribute_group ocfs2_attr_group = {
.attrs = ocfs2_attrs,
};
ocfs2: export ocfs2_kset for online file check When there are errors in the ocfs2 filesystem, they are usually accompanied by the inode number which caused the error. This inode number would be the input to fixing the file. One of these options could be considered: A file in the sys filesytem which would accept inode numbers. This could be used to communication back what has to be fixed or is fixed. You could write: $# echo "<inode>" > /sys/fs/ocfs2/devname/filecheck/check or $# echo "<inode>" > /sys/fs/ocfs2/devname/filecheck/fix Compare with second version, I re-design filecheck sysfs interfaces, there are three sysfs files (check, fix and set) under filecheck directory (see above), sysfs will accept only one argument <inode>. Second, I adjust some code in ocfs2_filecheck_repair_inode_block() function according to upstream feedback, we cannot just add VALID_FL flag back as a inode block fix, then we will not fix this field corruption currently until having a complete solution. Compare with first version, I use strncasecmp instead of double strncmp functions. Second, update the source file contribution vendor. This patch (of 4): Export ocfs2_kset object from ocfs2_stackglue kernel module, then online file check code will create the related sysfiles under ocfs2_kset object. We're exporting this because it's built in ocfs2_stackglue.ko. Signed-off-by: Gang He <ghe@suse.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <joseph.qi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-23 05:24:20 +08:00
struct kset *ocfs2_kset;
EXPORT_SYMBOL_GPL(ocfs2_kset);
static void ocfs2_sysfs_exit(void)
{
kset_unregister(ocfs2_kset);
}
static int ocfs2_sysfs_init(void)
{
int ret;
ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj);
if (!ocfs2_kset)
return -ENOMEM;
ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group);
if (ret)
goto error;
return 0;
error:
kset_unregister(ocfs2_kset);
return ret;
}
/*
* Sysctl bits
*
* The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't
* make as much sense in a multiple cluster stack world, but it's safer
* and easier to preserve the name.
*/
#define FS_OCFS2_NM 1
static struct ctl_table ocfs2_nm_table[] = {
{
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
.maxlen = OCFS2_MAX_HB_CTL_PATH,
.mode = 0644,
.proc_handler = proc_dostring,
},
{ }
};
static struct ctl_table ocfs2_mod_table[] = {
{
.procname = "nm",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_nm_table
},
{ }
};
static struct ctl_table ocfs2_kern_table[] = {
{
.procname = "ocfs2",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_mod_table
},
{ }
};
static struct ctl_table ocfs2_root_table[] = {
{
.procname = "fs",
.data = NULL,
.maxlen = 0,
.mode = 0555,
.child = ocfs2_kern_table
},
{ }
};
static struct ctl_table_header *ocfs2_table_header;
/*
* Initialization
*/
static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
return -ENOMEM; /* or something. */
}
return ocfs2_sysfs_init();
}
static void __exit ocfs2_stack_glue_exit(void)
{
memset(&locking_max_version, 0,
sizeof(struct ocfs2_protocol_version));
ocfs2_sysfs_exit();
if (ocfs2_table_header)
unregister_sysctl_table(ocfs2_table_header);
}
MODULE_AUTHOR("Oracle");
MODULE_DESCRIPTION("ocfs2 cluter stack glue layer");
MODULE_LICENSE("GPL");
module_init(ocfs2_stack_glue_init);
module_exit(ocfs2_stack_glue_exit);