OpenCloudOS-Kernel/drivers/mtd/mtdchar.c

1218 lines
26 KiB
C
Raw Normal View History

/*
* Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/blkpg.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/mtd/map.h>
#include <asm/uaccess.h>
#define MTD_INODE_FS_MAGIC 0x11307854
static DEFINE_MUTEX(mtd_mutex);
static struct vfsmount *mtd_inode_mnt __read_mostly;
/*
* Data structure to hold the pointer to the mtd device as well
* as mode information ofr various use cases.
*/
struct mtd_file_info {
struct mtd_info *mtd;
struct inode *ino;
enum mtd_file_modes mode;
};
static loff_t mtd_lseek (struct file *file, loff_t offset, int orig)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
switch (orig) {
case SEEK_SET:
break;
case SEEK_CUR:
offset += file->f_pos;
break;
case SEEK_END:
offset += mtd->size;
break;
default:
return -EINVAL;
}
if (offset >= 0 && offset <= mtd->size)
return file->f_pos = offset;
return -EINVAL;
}
static int mtd_open(struct inode *inode, struct file *file)
{
int minor = iminor(inode);
int devnum = minor >> 1;
int ret = 0;
struct mtd_info *mtd;
struct mtd_file_info *mfi;
struct inode *mtd_ino;
DEBUG(MTD_DEBUG_LEVEL0, "MTD_open\n");
/* You can't open the RO devices RW */
if ((file->f_mode & FMODE_WRITE) && (minor & 1))
return -EACCES;
mutex_lock(&mtd_mutex);
mtd = get_mtd_device(NULL, devnum);
if (IS_ERR(mtd)) {
ret = PTR_ERR(mtd);
goto out;
}
if (mtd->type == MTD_ABSENT) {
put_mtd_device(mtd);
ret = -ENODEV;
goto out;
}
mtd_ino = iget_locked(mtd_inode_mnt->mnt_sb, devnum);
if (!mtd_ino) {
put_mtd_device(mtd);
ret = -ENOMEM;
goto out;
}
if (mtd_ino->i_state & I_NEW) {
mtd_ino->i_private = mtd;
mtd_ino->i_mode = S_IFCHR;
mtd_ino->i_data.backing_dev_info = mtd->backing_dev_info;
unlock_new_inode(mtd_ino);
}
file->f_mapping = mtd_ino->i_mapping;
/* You can't open it RW if it's not a writeable device */
if ((file->f_mode & FMODE_WRITE) && !(mtd->flags & MTD_WRITEABLE)) {
iput(mtd_ino);
put_mtd_device(mtd);
ret = -EACCES;
goto out;
}
mfi = kzalloc(sizeof(*mfi), GFP_KERNEL);
if (!mfi) {
iput(mtd_ino);
put_mtd_device(mtd);
ret = -ENOMEM;
goto out;
}
mfi->ino = mtd_ino;
mfi->mtd = mtd;
file->private_data = mfi;
out:
mutex_unlock(&mtd_mutex);
return ret;
} /* mtd_open */
/*====================================================================*/
static int mtd_close(struct inode *inode, struct file *file)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n");
/* Only sync if opened RW */
if ((file->f_mode & FMODE_WRITE) && mtd->sync)
mtd->sync(mtd);
iput(mfi->ino);
put_mtd_device(mtd);
file->private_data = NULL;
kfree(mfi);
return 0;
} /* mtd_close */
/* FIXME: This _really_ needs to die. In 2.5, we should lock the
userspace buffer down and use it directly with readv/writev.
*/
#define MAX_KMALLOC_SIZE 0x20000
static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
size_t retlen=0;
size_t total_retlen=0;
int ret=0;
int len;
char *kbuf;
DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
if (*ppos + count > mtd->size)
count = mtd->size - *ppos;
if (!count)
return 0;
/* FIXME: Use kiovec in 2.5 to lock down the user's buffers
and pass them directly to the MTD functions */
if (count > MAX_KMALLOC_SIZE)
kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
else
kbuf=kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
while (count) {
if (count > MAX_KMALLOC_SIZE)
len = MAX_KMALLOC_SIZE;
else
len = count;
switch (mfi->mode) {
case MTD_MODE_OTP_FACTORY:
ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, kbuf);
break;
case MTD_MODE_OTP_USER:
ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
break;
case MTD_MODE_RAW:
{
struct mtd_oob_ops ops;
ops.mode = MTD_OOB_RAW;
ops.datbuf = kbuf;
ops.oobbuf = NULL;
ops.len = len;
ret = mtd->read_oob(mtd, *ppos, &ops);
retlen = ops.retlen;
break;
}
default:
ret = mtd->read(mtd, *ppos, len, &retlen, kbuf);
}
/* Nand returns -EBADMSG on ecc errors, but it returns
* the data. For our userspace tools it is important
* to dump areas with ecc errors !
* For kernel internal usage it also might return -EUCLEAN
* to signal the caller that a bitflip has occured and has
* been corrected by the ECC algorithm.
* Userspace software which accesses NAND this way
* must be aware of the fact that it deals with NAND
*/
if (!ret || (ret == -EUCLEAN) || (ret == -EBADMSG)) {
*ppos += retlen;
if (copy_to_user(buf, kbuf, retlen)) {
kfree(kbuf);
return -EFAULT;
}
else
total_retlen += retlen;
count -= retlen;
buf += retlen;
if (retlen == 0)
count = 0;
}
else {
kfree(kbuf);
return ret;
}
}
kfree(kbuf);
return total_retlen;
} /* mtd_read */
static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count,loff_t *ppos)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
char *kbuf;
size_t retlen;
size_t total_retlen=0;
int ret=0;
int len;
DEBUG(MTD_DEBUG_LEVEL0,"MTD_write\n");
if (*ppos == mtd->size)
return -ENOSPC;
if (*ppos + count > mtd->size)
count = mtd->size - *ppos;
if (!count)
return 0;
if (count > MAX_KMALLOC_SIZE)
kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
else
kbuf=kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
while (count) {
if (count > MAX_KMALLOC_SIZE)
len = MAX_KMALLOC_SIZE;
else
len = count;
if (copy_from_user(kbuf, buf, len)) {
kfree(kbuf);
return -EFAULT;
}
switch (mfi->mode) {
case MTD_MODE_OTP_FACTORY:
ret = -EROFS;
break;
case MTD_MODE_OTP_USER:
if (!mtd->write_user_prot_reg) {
ret = -EOPNOTSUPP;
break;
}
ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
break;
case MTD_MODE_RAW:
{
struct mtd_oob_ops ops;
ops.mode = MTD_OOB_RAW;
ops.datbuf = kbuf;
ops.oobbuf = NULL;
ops.len = len;
ret = mtd->write_oob(mtd, *ppos, &ops);
retlen = ops.retlen;
break;
}
default:
ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf);
}
if (!ret) {
*ppos += retlen;
total_retlen += retlen;
count -= retlen;
buf += retlen;
}
else {
kfree(kbuf);
return ret;
}
}
kfree(kbuf);
return total_retlen;
} /* mtd_write */
/*======================================================================
IOCTL calls for getting device parameters.
======================================================================*/
static void mtdchar_erase_callback (struct erase_info *instr)
{
wake_up((wait_queue_head_t *)instr->priv);
}
#ifdef CONFIG_HAVE_MTD_OTP
static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
{
struct mtd_info *mtd = mfi->mtd;
int ret = 0;
switch (mode) {
case MTD_OTP_FACTORY:
if (!mtd->read_fact_prot_reg)
ret = -EOPNOTSUPP;
else
mfi->mode = MTD_MODE_OTP_FACTORY;
break;
case MTD_OTP_USER:
if (!mtd->read_fact_prot_reg)
ret = -EOPNOTSUPP;
else
mfi->mode = MTD_MODE_OTP_USER;
break;
default:
ret = -EINVAL;
case MTD_OTP_OFF:
break;
}
return ret;
}
#else
# define otp_select_filemode(f,m) -EOPNOTSUPP
#endif
static int mtd_do_writeoob(struct file *file, struct mtd_info *mtd,
uint64_t start, uint32_t length, void __user *ptr,
uint32_t __user *retp)
{
struct mtd_oob_ops ops;
uint32_t retlen;
int ret = 0;
if (!(file->f_mode & FMODE_WRITE))
return -EPERM;
if (length > 4096)
return -EINVAL;
if (!mtd->write_oob)
ret = -EOPNOTSUPP;
else
ret = access_ok(VERIFY_READ, ptr, length) ? 0 : -EFAULT;
if (ret)
return ret;
ops.ooblen = length;
ops.ooboffs = start & (mtd->oobsize - 1);
ops.datbuf = NULL;
ops.mode = MTD_OOB_PLACE;
if (ops.ooboffs && ops.ooblen > (mtd->oobsize - ops.ooboffs))
return -EINVAL;
ops.oobbuf = memdup_user(ptr, length);
if (IS_ERR(ops.oobbuf))
return PTR_ERR(ops.oobbuf);
start &= ~((uint64_t)mtd->oobsize - 1);
ret = mtd->write_oob(mtd, start, &ops);
if (ops.oobretlen > 0xFFFFFFFFU)
ret = -EOVERFLOW;
retlen = ops.oobretlen;
if (copy_to_user(retp, &retlen, sizeof(length)))
ret = -EFAULT;
kfree(ops.oobbuf);
return ret;
}
static int mtd_do_readoob(struct mtd_info *mtd, uint64_t start,
uint32_t length, void __user *ptr, uint32_t __user *retp)
{
struct mtd_oob_ops ops;
int ret = 0;
if (length > 4096)
return -EINVAL;
if (!mtd->read_oob)
ret = -EOPNOTSUPP;
else
ret = access_ok(VERIFY_WRITE, ptr,
length) ? 0 : -EFAULT;
if (ret)
return ret;
ops.ooblen = length;
ops.ooboffs = start & (mtd->oobsize - 1);
ops.datbuf = NULL;
ops.mode = MTD_OOB_PLACE;
if (ops.ooboffs && ops.ooblen > (mtd->oobsize - ops.ooboffs))
return -EINVAL;
ops.oobbuf = kmalloc(length, GFP_KERNEL);
if (!ops.oobbuf)
return -ENOMEM;
start &= ~((uint64_t)mtd->oobsize - 1);
ret = mtd->read_oob(mtd, start, &ops);
if (put_user(ops.oobretlen, retp))
ret = -EFAULT;
else if (ops.oobretlen && copy_to_user(ptr, ops.oobbuf,
ops.oobretlen))
ret = -EFAULT;
kfree(ops.oobbuf);
return ret;
}
/*
* Copies (and truncates, if necessary) data from the larger struct,
* nand_ecclayout, to the smaller, deprecated layout struct,
* nand_ecclayout_user. This is necessary only to suppport the deprecated
* API ioctl ECCGETLAYOUT while allowing all new functionality to use
* nand_ecclayout flexibly (i.e. the struct may change size in new
* releases without requiring major rewrites).
*/
static int shrink_ecclayout(const struct nand_ecclayout *from,
struct nand_ecclayout_user *to)
{
int i;
if (!from || !to)
return -EINVAL;
memset(to, 0, sizeof(*to));
to->eccbytes = min((int)from->eccbytes, MTD_MAX_ECCPOS_ENTRIES);
for (i = 0; i < to->eccbytes; i++)
to->eccpos[i] = from->eccpos[i];
for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES; i++) {
if (from->oobfree[i].length == 0 &&
from->oobfree[i].offset == 0)
break;
to->oobavail += from->oobfree[i].length;
to->oobfree[i] = from->oobfree[i];
}
return 0;
}
#ifdef CONFIG_MTD_PARTITIONS
static int mtd_blkpg_ioctl(struct mtd_info *mtd,
struct blkpg_ioctl_arg __user *arg)
{
struct blkpg_ioctl_arg a;
struct blkpg_partition p;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* Only master mtd device must be used to control partitions */
if (!mtd_is_master(mtd))
return -EINVAL;
if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
return -EFAULT;
if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
return -EFAULT;
switch (a.op) {
case BLKPG_ADD_PARTITION:
return mtd_add_partition(mtd, p.devname, p.start, p.length);
case BLKPG_DEL_PARTITION:
if (p.pno < 0)
return -EINVAL;
return mtd_del_partition(mtd, p.pno);
default:
return -EINVAL;
}
}
#endif
static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
void __user *argp = (void __user *)arg;
int ret = 0;
u_long size;
struct mtd_info_user info;
DEBUG(MTD_DEBUG_LEVEL0, "MTD_ioctl\n");
size = (cmd & IOCSIZE_MASK) >> IOCSIZE_SHIFT;
if (cmd & IOC_IN) {
if (!access_ok(VERIFY_READ, argp, size))
return -EFAULT;
}
if (cmd & IOC_OUT) {
if (!access_ok(VERIFY_WRITE, argp, size))
return -EFAULT;
}
switch (cmd) {
case MEMGETREGIONCOUNT:
if (copy_to_user(argp, &(mtd->numeraseregions), sizeof(int)))
return -EFAULT;
break;
case MEMGETREGIONINFO:
{
uint32_t ur_idx;
struct mtd_erase_region_info *kr;
struct region_info_user __user *ur = argp;
if (get_user(ur_idx, &(ur->regionindex)))
return -EFAULT;
if (ur_idx >= mtd->numeraseregions)
return -EINVAL;
kr = &(mtd->eraseregions[ur_idx]);
if (put_user(kr->offset, &(ur->offset))
|| put_user(kr->erasesize, &(ur->erasesize))
|| put_user(kr->numblocks, &(ur->numblocks)))
return -EFAULT;
break;
}
case MEMGETINFO:
info.type = mtd->type;
info.flags = mtd->flags;
info.size = mtd->size;
info.erasesize = mtd->erasesize;
info.writesize = mtd->writesize;
info.oobsize = mtd->oobsize;
/* The below fields are obsolete */
info.ecctype = -1;
info.eccsize = 0;
if (copy_to_user(argp, &info, sizeof(struct mtd_info_user)))
return -EFAULT;
break;
case MEMERASE:
case MEMERASE64:
{
struct erase_info *erase;
if(!(file->f_mode & FMODE_WRITE))
return -EPERM;
erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL);
if (!erase)
ret = -ENOMEM;
else {
wait_queue_head_t waitq;
DECLARE_WAITQUEUE(wait, current);
init_waitqueue_head(&waitq);
if (cmd == MEMERASE64) {
struct erase_info_user64 einfo64;
if (copy_from_user(&einfo64, argp,
sizeof(struct erase_info_user64))) {
kfree(erase);
return -EFAULT;
}
erase->addr = einfo64.start;
erase->len = einfo64.length;
} else {
struct erase_info_user einfo32;
if (copy_from_user(&einfo32, argp,
sizeof(struct erase_info_user))) {
kfree(erase);
return -EFAULT;
}
erase->addr = einfo32.start;
erase->len = einfo32.length;
}
erase->mtd = mtd;
erase->callback = mtdchar_erase_callback;
erase->priv = (unsigned long)&waitq;
/*
FIXME: Allow INTERRUPTIBLE. Which means
not having the wait_queue head on the stack.
If the wq_head is on the stack, and we
leave because we got interrupted, then the
wq_head is no longer there when the
callback routine tries to wake us up.
*/
ret = mtd->erase(mtd, erase);
if (!ret) {
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&waitq, &wait);
if (erase->state != MTD_ERASE_DONE &&
erase->state != MTD_ERASE_FAILED)
schedule();
remove_wait_queue(&waitq, &wait);
set_current_state(TASK_RUNNING);
ret = (erase->state == MTD_ERASE_FAILED)?-EIO:0;
}
kfree(erase);
}
break;
}
case MEMWRITEOOB:
{
struct mtd_oob_buf buf;
struct mtd_oob_buf __user *buf_user = argp;
/* NOTE: writes return length to buf_user->length */
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_writeoob(file, mtd, buf.start, buf.length,
buf.ptr, &buf_user->length);
break;
}
case MEMREADOOB:
{
struct mtd_oob_buf buf;
struct mtd_oob_buf __user *buf_user = argp;
[MTD] Rework the out of band handling completely Hopefully the last iteration on this! The handling of out of band data on NAND was accompanied by tons of fruitless discussions and halfarsed patches to make it work for a particular problem. Sufficiently annoyed by I all those "I know it better" mails and the resonable amount of discarded "it solves my problem" patches, I finally decided to go for the big rework. After removing the _ecc variants of mtd read/write functions the solution to satisfy the various requirements was to refactor the read/write _oob functions in mtd. The major change is that read/write_oob now takes a pointer to an operation descriptor structure "struct mtd_oob_ops".instead of having a function with at least seven arguments. read/write_oob which should probably renamed to a more descriptive name, can do the following tasks: - read/write out of band data - read/write data content and out of band data - read/write raw data content and out of band data (ecc disabled) struct mtd_oob_ops has a mode field, which determines the oob handling mode. Aside of the MTD_OOB_RAW mode, which is intended to be especially for diagnostic purposes and some internal functions e.g. bad block table creation, the other two modes are for mtd clients: MTD_OOB_PLACE puts/gets the given oob data exactly to/from the place which is described by the ooboffs and ooblen fields of the mtd_oob_ops strcuture. It's up to the caller to make sure that the byte positions are not used by the ECC placement algorithms. MTD_OOB_AUTO puts/gets the given oob data automaticaly to/from the places in the out of band area which are described by the oobfree tuples in the ecclayout data structre which is associated to the devicee. The decision whether data plus oob or oob only handling is done depends on the setting of the datbuf member of the data structure. When datbuf == NULL then the internal read/write_oob functions are selected, otherwise the read/write data routines are invoked. Tested on a few platforms with all variants. Please be aware of possible regressions for your particular device / application scenario Disclaimer: Any whining will be ignored from those who just contributed "hot air blurb" and never sat down to tackle the underlying problem of the mess in the NAND driver grown over time and the big chunk of work to fix up the existing users. The problem was not the holiness of the existing MTD interfaces. The problems was the lack of time to go for the big overhaul. It's easy to add more mess to the existing one, but it takes alot of effort to go for a real solution. Improvements and bugfixes are welcome! Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2006-05-29 09:26:58 +08:00
/* NOTE: writes return length to buf_user->start */
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_readoob(mtd, buf.start, buf.length,
buf.ptr, &buf_user->start);
break;
}
case MEMWRITEOOB64:
{
struct mtd_oob_buf64 buf;
struct mtd_oob_buf64 __user *buf_user = argp;
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_writeoob(file, mtd, buf.start, buf.length,
(void __user *)(uintptr_t)buf.usr_ptr,
&buf_user->length);
break;
}
case MEMREADOOB64:
{
struct mtd_oob_buf64 buf;
struct mtd_oob_buf64 __user *buf_user = argp;
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_readoob(mtd, buf.start, buf.length,
(void __user *)(uintptr_t)buf.usr_ptr,
&buf_user->length);
break;
}
case MEMLOCK:
{
struct erase_info_user einfo;
if (copy_from_user(&einfo, argp, sizeof(einfo)))
return -EFAULT;
if (!mtd->lock)
ret = -EOPNOTSUPP;
else
ret = mtd->lock(mtd, einfo.start, einfo.length);
break;
}
case MEMUNLOCK:
{
struct erase_info_user einfo;
if (copy_from_user(&einfo, argp, sizeof(einfo)))
return -EFAULT;
if (!mtd->unlock)
ret = -EOPNOTSUPP;
else
ret = mtd->unlock(mtd, einfo.start, einfo.length);
break;
}
case MEMISLOCKED:
{
struct erase_info_user einfo;
if (copy_from_user(&einfo, argp, sizeof(einfo)))
return -EFAULT;
if (!mtd->is_locked)
ret = -EOPNOTSUPP;
else
ret = mtd->is_locked(mtd, einfo.start, einfo.length);
break;
}
/* Legacy interface */
case MEMGETOOBSEL:
{
struct nand_oobinfo oi;
if (!mtd->ecclayout)
return -EOPNOTSUPP;
if (mtd->ecclayout->eccbytes > ARRAY_SIZE(oi.eccpos))
return -EINVAL;
oi.useecc = MTD_NANDECC_AUTOPLACE;
memcpy(&oi.eccpos, mtd->ecclayout->eccpos, sizeof(oi.eccpos));
memcpy(&oi.oobfree, mtd->ecclayout->oobfree,
sizeof(oi.oobfree));
oi.eccbytes = mtd->ecclayout->eccbytes;
if (copy_to_user(argp, &oi, sizeof(struct nand_oobinfo)))
return -EFAULT;
break;
}
case MEMGETBADBLOCK:
{
loff_t offs;
if (copy_from_user(&offs, argp, sizeof(loff_t)))
return -EFAULT;
if (!mtd->block_isbad)
ret = -EOPNOTSUPP;
else
return mtd->block_isbad(mtd, offs);
break;
}
case MEMSETBADBLOCK:
{
loff_t offs;
if (copy_from_user(&offs, argp, sizeof(loff_t)))
return -EFAULT;
if (!mtd->block_markbad)
ret = -EOPNOTSUPP;
else
return mtd->block_markbad(mtd, offs);
break;
}
#ifdef CONFIG_HAVE_MTD_OTP
case OTPSELECT:
{
int mode;
if (copy_from_user(&mode, argp, sizeof(int)))
return -EFAULT;
mfi->mode = MTD_MODE_NORMAL;
ret = otp_select_filemode(mfi, mode);
file->f_pos = 0;
break;
}
case OTPGETREGIONCOUNT:
case OTPGETREGIONINFO:
{
struct otp_info *buf = kmalloc(4096, GFP_KERNEL);
if (!buf)
return -ENOMEM;
ret = -EOPNOTSUPP;
switch (mfi->mode) {
case MTD_MODE_OTP_FACTORY:
if (mtd->get_fact_prot_info)
ret = mtd->get_fact_prot_info(mtd, buf, 4096);
break;
case MTD_MODE_OTP_USER:
if (mtd->get_user_prot_info)
ret = mtd->get_user_prot_info(mtd, buf, 4096);
break;
default:
break;
}
if (ret >= 0) {
if (cmd == OTPGETREGIONCOUNT) {
int nbr = ret / sizeof(struct otp_info);
ret = copy_to_user(argp, &nbr, sizeof(int));
} else
ret = copy_to_user(argp, buf, ret);
if (ret)
ret = -EFAULT;
}
kfree(buf);
break;
}
case OTPLOCK:
{
struct otp_info oinfo;
if (mfi->mode != MTD_MODE_OTP_USER)
return -EINVAL;
if (copy_from_user(&oinfo, argp, sizeof(oinfo)))
return -EFAULT;
if (!mtd->lock_user_prot_reg)
return -EOPNOTSUPP;
ret = mtd->lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
break;
}
#endif
/* This ioctl is being deprecated - it truncates the ecc layout */
case ECCGETLAYOUT:
{
struct nand_ecclayout_user *usrlay;
if (!mtd->ecclayout)
return -EOPNOTSUPP;
usrlay = kmalloc(sizeof(*usrlay), GFP_KERNEL);
if (!usrlay)
return -ENOMEM;
shrink_ecclayout(mtd->ecclayout, usrlay);
if (copy_to_user(argp, usrlay, sizeof(*usrlay)))
ret = -EFAULT;
kfree(usrlay);
break;
}
case ECCGETSTATS:
{
if (copy_to_user(argp, &mtd->ecc_stats,
sizeof(struct mtd_ecc_stats)))
return -EFAULT;
break;
}
case MTDFILEMODE:
{
mfi->mode = 0;
switch(arg) {
case MTD_MODE_OTP_FACTORY:
case MTD_MODE_OTP_USER:
ret = otp_select_filemode(mfi, arg);
break;
case MTD_MODE_RAW:
if (!mtd->read_oob || !mtd->write_oob)
return -EOPNOTSUPP;
mfi->mode = arg;
case MTD_MODE_NORMAL:
break;
default:
ret = -EINVAL;
}
file->f_pos = 0;
break;
}
#ifdef CONFIG_MTD_PARTITIONS
case BLKPG:
{
ret = mtd_blkpg_ioctl(mtd,
(struct blkpg_ioctl_arg __user *)arg);
break;
}
case BLKRRPART:
{
/* No reread partition feature. Just return ok */
ret = 0;
break;
}
#endif
default:
ret = -ENOTTY;
}
return ret;
} /* memory_ioctl */
static long mtd_unlocked_ioctl(struct file *file, u_int cmd, u_long arg)
{
int ret;
mutex_lock(&mtd_mutex);
ret = mtd_ioctl(file, cmd, arg);
mutex_unlock(&mtd_mutex);
return ret;
}
#ifdef CONFIG_COMPAT
struct mtd_oob_buf32 {
u_int32_t start;
u_int32_t length;
compat_caddr_t ptr; /* unsigned char* */
};
#define MEMWRITEOOB32 _IOWR('M', 3, struct mtd_oob_buf32)
#define MEMREADOOB32 _IOWR('M', 4, struct mtd_oob_buf32)
static long mtd_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
void __user *argp = compat_ptr(arg);
int ret = 0;
mutex_lock(&mtd_mutex);
switch (cmd) {
case MEMWRITEOOB32:
{
struct mtd_oob_buf32 buf;
struct mtd_oob_buf32 __user *buf_user = argp;
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_writeoob(file, mtd, buf.start,
buf.length, compat_ptr(buf.ptr),
&buf_user->length);
break;
}
case MEMREADOOB32:
{
struct mtd_oob_buf32 buf;
struct mtd_oob_buf32 __user *buf_user = argp;
/* NOTE: writes return length to buf->start */
if (copy_from_user(&buf, argp, sizeof(buf)))
ret = -EFAULT;
else
ret = mtd_do_readoob(mtd, buf.start,
buf.length, compat_ptr(buf.ptr),
&buf_user->start);
break;
}
default:
ret = mtd_ioctl(file, cmd, (unsigned long)argp);
}
mutex_unlock(&mtd_mutex);
return ret;
}
#endif /* CONFIG_COMPAT */
/*
* try to determine where a shared mapping can be made
* - only supported for NOMMU at the moment (MMU can't doesn't copy private
* mappings)
*/
#ifndef CONFIG_MMU
static unsigned long mtd_get_unmapped_area(struct file *file,
unsigned long addr,
unsigned long len,
unsigned long pgoff,
unsigned long flags)
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
if (mtd->get_unmapped_area) {
unsigned long offset;
if (addr != 0)
return (unsigned long) -EINVAL;
if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
return (unsigned long) -EINVAL;
offset = pgoff << PAGE_SHIFT;
if (offset > mtd->size - len)
return (unsigned long) -EINVAL;
return mtd->get_unmapped_area(mtd, len, offset, flags);
}
/* can't map directly */
return (unsigned long) -ENOSYS;
}
#endif
/*
* set up a mapping for shared memory segments
*/
static int mtd_mmap(struct file *file, struct vm_area_struct *vma)
{
#ifdef CONFIG_MMU
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
struct map_info *map = mtd->priv;
unsigned long start;
unsigned long off;
u32 len;
if (mtd->type == MTD_RAM || mtd->type == MTD_ROM) {
off = vma->vm_pgoff << PAGE_SHIFT;
start = map->phys;
len = PAGE_ALIGN((start & ~PAGE_MASK) + map->size);
start &= PAGE_MASK;
if ((vma->vm_end - vma->vm_start + off) > len)
return -EINVAL;
off += start;
vma->vm_pgoff = off >> PAGE_SHIFT;
vma->vm_flags |= VM_IO | VM_RESERVED;
#ifdef pgprot_noncached
if (file->f_flags & O_DSYNC || off >= __pa(high_memory))
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
#endif
if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
vma->vm_end - vma->vm_start,
vma->vm_page_prot))
return -EAGAIN;
return 0;
}
return -ENOSYS;
#else
return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS;
#endif
}
static const struct file_operations mtd_fops = {
.owner = THIS_MODULE,
.llseek = mtd_lseek,
.read = mtd_read,
.write = mtd_write,
.unlocked_ioctl = mtd_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = mtd_compat_ioctl,
#endif
.open = mtd_open,
.release = mtd_close,
.mmap = mtd_mmap,
#ifndef CONFIG_MMU
.get_unmapped_area = mtd_get_unmapped_area,
#endif
};
static struct dentry *mtd_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_pseudo(fs_type, "mtd_inode:", NULL, MTD_INODE_FS_MAGIC);
}
static struct file_system_type mtd_inodefs_type = {
.name = "mtd_inodefs",
.mount = mtd_inodefs_mount,
.kill_sb = kill_anon_super,
};
static void mtdchar_notify_add(struct mtd_info *mtd)
{
}
static void mtdchar_notify_remove(struct mtd_info *mtd)
{
struct inode *mtd_ino = ilookup(mtd_inode_mnt->mnt_sb, mtd->index);
if (mtd_ino) {
/* Destroy the inode if it exists */
mtd_ino->i_nlink = 0;
iput(mtd_ino);
}
}
static struct mtd_notifier mtdchar_notifier = {
.add = mtdchar_notify_add,
.remove = mtdchar_notify_remove,
};
static int __init init_mtdchar(void)
{
int ret;
ret = __register_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS,
"mtd", &mtd_fops);
if (ret < 0) {
pr_notice("Can't allocate major number %d for "
"Memory Technology Devices.\n", MTD_CHAR_MAJOR);
return ret;
}
ret = register_filesystem(&mtd_inodefs_type);
if (ret) {
pr_notice("Can't register mtd_inodefs filesystem: %d\n", ret);
goto err_unregister_chdev;
}
mtd_inode_mnt = kern_mount(&mtd_inodefs_type);
if (IS_ERR(mtd_inode_mnt)) {
ret = PTR_ERR(mtd_inode_mnt);
pr_notice("Error mounting mtd_inodefs filesystem: %d\n", ret);
goto err_unregister_filesystem;
}
register_mtd_user(&mtdchar_notifier);
return ret;
err_unregister_filesystem:
unregister_filesystem(&mtd_inodefs_type);
err_unregister_chdev:
__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
return ret;
}
static void __exit cleanup_mtdchar(void)
{
unregister_mtd_user(&mtdchar_notifier);
fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
2011-01-07 14:50:11 +08:00
mntput_long(mtd_inode_mnt);
unregister_filesystem(&mtd_inodefs_type);
__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
}
module_init(init_mtdchar);
module_exit(cleanup_mtdchar);
MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
MODULE_DESCRIPTION("Direct character-device access to MTD devices");
MODULE_ALIAS_CHARDEV_MAJOR(MTD_CHAR_MAJOR);