OpenCloudOS-Kernel/drivers/mmc/core/block.c

2341 lines
58 KiB
C
Raw Normal View History

/*
* Block driver for media (i.e., flash cards)
*
* Copyright 2002 Hewlett-Packard Company
* Copyright 2005-2008 Pierre Ossman
*
* Use consistent with the GNU GPL is permitted,
* provided that this copyright notice is
* preserved in its entirety in all copies and derived works.
*
* HEWLETT-PACKARD COMPANY MAKES NO WARRANTIES, EXPRESSED OR IMPLIED,
* AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS
* FITNESS FOR ANY PARTICULAR PURPOSE.
*
* Many thanks to Alessandro Rubini and Jonathan Corbet!
*
* Author: Andrew Christian
* 28 May 2002
*/
#include <linux/moduleparam.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/fs.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/hdreg.h>
#include <linux/kdev_t.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/string_helpers.h>
#include <linux/delay.h>
#include <linux/capability.h>
#include <linux/compat.h>
#include <linux/pm_runtime.h>
#include <linux/idr.h>
#include <linux/mmc/ioctl.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
#include <linux/mmc/sd.h>
#include <linux/uaccess.h>
#include "queue.h"
#include "block.h"
#include "core.h"
#include "card.h"
#include "host.h"
#include "bus.h"
#include "mmc_ops.h"
#include "quirks.h"
#include "sd_ops.h"
MODULE_ALIAS("mmc:block");
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "mmcblk."
#define MMC_BLK_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */
#define MMC_SANITIZE_REQ_TIMEOUT 240000
#define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
#define mmc_req_rel_wr(req) ((req->cmd_flags & REQ_FUA) && \
(rq_data_dir(req) == WRITE))
static DEFINE_MUTEX(block_mutex);
/*
* The defaults come from config options but can be overriden by module
* or bootarg options.
*/
static int perdev_minors = CONFIG_MMC_BLOCK_MINORS;
/*
* We've only got one major, so number of mmcblk devices is
* limited to (1 << 20) / number of minors per device. It is also
* limited by the MAX_DEVICES below.
*/
static int max_devices;
#define MAX_DEVICES 256
static DEFINE_IDA(mmc_blk_ida);
/*
* There is one mmc_blk_data per slot.
*/
struct mmc_blk_data {
spinlock_t lock;
struct device *parent;
struct gendisk *disk;
struct mmc_queue queue;
struct list_head part;
unsigned int flags;
#define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */
#define MMC_BLK_REL_WR (1 << 1) /* MMC Reliable write support */
unsigned int usage;
unsigned int read_only;
unsigned int part_type;
unsigned int reset_done;
#define MMC_BLK_READ BIT(0)
#define MMC_BLK_WRITE BIT(1)
#define MMC_BLK_DISCARD BIT(2)
#define MMC_BLK_SECDISCARD BIT(3)
/*
* Only set in main mmc_blk_data associated
* with mmc_card with dev_set_drvdata, and keeps
* track of the current selected device partition.
*/
unsigned int part_curr;
struct device_attribute force_ro;
struct device_attribute power_ro_lock;
int area_type;
};
static DEFINE_MUTEX(open_lock);
module_param(perdev_minors, int, 0444);
MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
static inline int mmc_blk_part_switch(struct mmc_card *card,
struct mmc_blk_data *md);
static int get_card_status(struct mmc_card *card, u32 *status, int retries);
static void mmc_blk_requeue(struct request_queue *q, struct request *req)
{
spin_lock_irq(q->queue_lock);
blk_requeue_request(q, req);
spin_unlock_irq(q->queue_lock);
}
static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
{
struct mmc_blk_data *md;
mutex_lock(&open_lock);
md = disk->private_data;
if (md && md->usage == 0)
md = NULL;
if (md)
md->usage++;
mutex_unlock(&open_lock);
return md;
}
static inline int mmc_get_devidx(struct gendisk *disk)
{
int devidx = disk->first_minor / perdev_minors;
return devidx;
}
static void mmc_blk_put(struct mmc_blk_data *md)
{
mutex_lock(&open_lock);
md->usage--;
if (md->usage == 0) {
int devidx = mmc_get_devidx(md->disk);
blk_cleanup_queue(md->queue.queue);
ida_simple_remove(&mmc_blk_ida, devidx);
put_disk(md->disk);
kfree(md);
}
mutex_unlock(&open_lock);
}
static ssize_t power_ro_lock_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int ret;
struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
struct mmc_card *card = md->queue.card;
int locked = 0;
if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN)
locked = 2;
else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN)
locked = 1;
ret = snprintf(buf, PAGE_SIZE, "%d\n", locked);
mmc_blk_put(md);
return ret;
}
static ssize_t power_ro_lock_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
int ret;
struct mmc_blk_data *md, *part_md;
struct mmc_card *card;
unsigned long set;
if (kstrtoul(buf, 0, &set))
return -EINVAL;
if (set != 1)
return count;
md = mmc_blk_get(dev_to_disk(dev));
card = md->queue.card;
mmc_get_card(card);
ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP,
card->ext_csd.boot_ro_lock |
EXT_CSD_BOOT_WP_B_PWR_WP_EN,
card->ext_csd.part_time);
if (ret)
pr_err("%s: Locking boot partition ro until next power on failed: %d\n", md->disk->disk_name, ret);
else
card->ext_csd.boot_ro_lock |= EXT_CSD_BOOT_WP_B_PWR_WP_EN;
mmc_put_card(card);
if (!ret) {
pr_info("%s: Locking boot partition ro until next power on\n",
md->disk->disk_name);
set_disk_ro(md->disk, 1);
list_for_each_entry(part_md, &md->part, part)
if (part_md->area_type == MMC_BLK_DATA_AREA_BOOT) {
pr_info("%s: Locking boot partition ro until next power on\n", part_md->disk->disk_name);
set_disk_ro(part_md->disk, 1);
}
}
mmc_blk_put(md);
return count;
}
static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int ret;
struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
ret = snprintf(buf, PAGE_SIZE, "%d\n",
get_disk_ro(dev_to_disk(dev)) ^
md->read_only);
mmc_blk_put(md);
return ret;
}
static ssize_t force_ro_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
int ret;
char *end;
struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
unsigned long set = simple_strtoul(buf, &end, 0);
if (end == buf) {
ret = -EINVAL;
goto out;
}
set_disk_ro(dev_to_disk(dev), set || md->read_only);
ret = count;
out:
mmc_blk_put(md);
return ret;
}
static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
int ret = -ENXIO;
mutex_lock(&block_mutex);
if (md) {
if (md->usage == 2)
check_disk_change(bdev);
ret = 0;
if ((mode & FMODE_WRITE) && md->read_only) {
mmc_blk_put(md);
ret = -EROFS;
}
}
mutex_unlock(&block_mutex);
return ret;
}
static void mmc_blk_release(struct gendisk *disk, fmode_t mode)
{
struct mmc_blk_data *md = disk->private_data;
mutex_lock(&block_mutex);
mmc_blk_put(md);
mutex_unlock(&block_mutex);
}
static int
mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
geo->heads = 4;
geo->sectors = 16;
return 0;
}
struct mmc_blk_ioc_data {
struct mmc_ioc_cmd ic;
unsigned char *buf;
u64 buf_bytes;
};
static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
struct mmc_ioc_cmd __user *user)
{
struct mmc_blk_ioc_data *idata;
int err;
idata = kmalloc(sizeof(*idata), GFP_KERNEL);
if (!idata) {
err = -ENOMEM;
goto out;
}
if (copy_from_user(&idata->ic, user, sizeof(idata->ic))) {
err = -EFAULT;
goto idata_err;
}
idata->buf_bytes = (u64) idata->ic.blksz * idata->ic.blocks;
if (idata->buf_bytes > MMC_IOC_MAX_BYTES) {
err = -EOVERFLOW;
goto idata_err;
}
if (!idata->buf_bytes) {
idata->buf = NULL;
return idata;
}
idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
if (!idata->buf) {
err = -ENOMEM;
goto idata_err;
}
if (copy_from_user(idata->buf, (void __user *)(unsigned long)
idata->ic.data_ptr, idata->buf_bytes)) {
err = -EFAULT;
goto copy_err;
}
return idata;
copy_err:
kfree(idata->buf);
idata_err:
kfree(idata);
out:
return ERR_PTR(err);
}
static int mmc_blk_ioctl_copy_to_user(struct mmc_ioc_cmd __user *ic_ptr,
struct mmc_blk_ioc_data *idata)
{
struct mmc_ioc_cmd *ic = &idata->ic;
if (copy_to_user(&(ic_ptr->response), ic->response,
sizeof(ic->response)))
return -EFAULT;
if (!idata->ic.write_flag) {
if (copy_to_user((void __user *)(unsigned long)ic->data_ptr,
idata->buf, idata->buf_bytes))
return -EFAULT;
}
return 0;
}
static int ioctl_rpmb_card_status_poll(struct mmc_card *card, u32 *status,
u32 retries_max)
{
int err;
u32 retry_count = 0;
if (!status || !retries_max)
return -EINVAL;
do {
err = get_card_status(card, status, 5);
if (err)
break;
if (!R1_STATUS(*status) &&
(R1_CURRENT_STATE(*status) != R1_STATE_PRG))
break; /* RPMB programming operation complete */
/*
* Rechedule to give the MMC device a chance to continue
* processing the previous command without being polled too
* frequently.
*/
usleep_range(1000, 5000);
} while (++retry_count < retries_max);
if (retry_count == retries_max)
err = -EPERM;
return err;
}
static int ioctl_do_sanitize(struct mmc_card *card)
{
int err;
if (!mmc_can_sanitize(card)) {
pr_warn("%s: %s - SANITIZE is not supported\n",
mmc_hostname(card->host), __func__);
err = -EOPNOTSUPP;
goto out;
}
pr_debug("%s: %s - SANITIZE IN PROGRESS...\n",
mmc_hostname(card->host), __func__);
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_SANITIZE_START, 1,
MMC_SANITIZE_REQ_TIMEOUT);
if (err)
pr_err("%s: %s - EXT_CSD_SANITIZE_START failed. err=%d\n",
mmc_hostname(card->host), __func__, err);
pr_debug("%s: %s - SANITIZE COMPLETED\n", mmc_hostname(card->host),
__func__);
out:
return err;
}
static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
struct mmc_blk_ioc_data *idata)
{
struct mmc_command cmd = {};
struct mmc_data data = {};
struct mmc_request mrq = {};
struct scatterlist sg;
int err;
int is_rpmb = false;
u32 status = 0;
if (!card || !md || !idata)
return -EINVAL;
if (md->area_type & MMC_BLK_DATA_AREA_RPMB)
is_rpmb = true;
cmd.opcode = idata->ic.opcode;
cmd.arg = idata->ic.arg;
cmd.flags = idata->ic.flags;
if (idata->buf_bytes) {
data.sg = &sg;
data.sg_len = 1;
data.blksz = idata->ic.blksz;
data.blocks = idata->ic.blocks;
sg_init_one(data.sg, idata->buf, idata->buf_bytes);
if (idata->ic.write_flag)
data.flags = MMC_DATA_WRITE;
else
data.flags = MMC_DATA_READ;
/* data.flags must already be set before doing this. */
mmc_set_data_timeout(&data, card);
/* Allow overriding the timeout_ns for empirical tuning. */
if (idata->ic.data_timeout_ns)
data.timeout_ns = idata->ic.data_timeout_ns;
if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
/*
* Pretend this is a data transfer and rely on the
* host driver to compute timeout. When all host
* drivers support cmd.cmd_timeout for R1B, this
* can be changed to:
*
* mrq.data = NULL;
* cmd.cmd_timeout = idata->ic.cmd_timeout_ms;
*/
data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000;
}
mrq.data = &data;
}
mrq.cmd = &cmd;
err = mmc_blk_part_switch(card, md);
if (err)
return err;
if (idata->ic.is_acmd) {
err = mmc_app_cmd(card->host, card);
if (err)
return err;
}
if (is_rpmb) {
err = mmc_set_blockcount(card, data.blocks,
idata->ic.write_flag & (1 << 31));
if (err)
return err;
}
if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_SANITIZE_START) &&
(cmd.opcode == MMC_SWITCH)) {
err = ioctl_do_sanitize(card);
if (err)
pr_err("%s: ioctl_do_sanitize() failed. err = %d",
__func__, err);
return err;
}
mmc_wait_for_req(card->host, &mrq);
if (cmd.error) {
dev_err(mmc_dev(card->host), "%s: cmd error %d\n",
__func__, cmd.error);
return cmd.error;
}
if (data.error) {
dev_err(mmc_dev(card->host), "%s: data error %d\n",
__func__, data.error);
return data.error;
}
/*
* According to the SD specs, some commands require a delay after
* issuing the command.
*/
if (idata->ic.postsleep_min_us)
usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us);
memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp));
if (is_rpmb) {
/*
* Ensure RPMB command has completed by polling CMD13
* "Send Status".
*/
err = ioctl_rpmb_card_status_poll(card, &status, 5);
if (err)
dev_err(mmc_dev(card->host),
"%s: Card Status=0x%08X, error %d\n",
__func__, status, err);
}
return err;
}
static int mmc_blk_ioctl_cmd(struct block_device *bdev,
struct mmc_ioc_cmd __user *ic_ptr)
{
struct mmc_blk_ioc_data *idata;
struct mmc_blk_data *md;
struct mmc_card *card;
int err = 0, ioc_err = 0;
/*
* The caller must have CAP_SYS_RAWIO, and must be calling this on the
* whole block device, not on a partition. This prevents overspray
* between sibling partitions.
*/
if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
return -EPERM;
idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
if (IS_ERR(idata))
return PTR_ERR(idata);
md = mmc_blk_get(bdev->bd_disk);
if (!md) {
err = -EINVAL;
goto cmd_err;
}
card = md->queue.card;
if (IS_ERR(card)) {
err = PTR_ERR(card);
goto cmd_done;
}
mmc_get_card(card);
ioc_err = __mmc_blk_ioctl_cmd(card, md, idata);
/* Always switch back to main area after RPMB access */
if (md->area_type & MMC_BLK_DATA_AREA_RPMB)
mmc_blk_part_switch(card, dev_get_drvdata(&card->dev));
mmc_put_card(card);
err = mmc_blk_ioctl_copy_to_user(ic_ptr, idata);
cmd_done:
mmc_blk_put(md);
cmd_err:
kfree(idata->buf);
kfree(idata);
return ioc_err ? ioc_err : err;
}
static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
struct mmc_ioc_multi_cmd __user *user)
{
struct mmc_blk_ioc_data **idata = NULL;
struct mmc_ioc_cmd __user *cmds = user->cmds;
struct mmc_card *card;
struct mmc_blk_data *md;
int i, err = 0, ioc_err = 0;
__u64 num_of_cmds;
/*
* The caller must have CAP_SYS_RAWIO, and must be calling this on the
* whole block device, not on a partition. This prevents overspray
* between sibling partitions.
*/
if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
return -EPERM;
if (copy_from_user(&num_of_cmds, &user->num_of_cmds,
sizeof(num_of_cmds)))
return -EFAULT;
if (num_of_cmds > MMC_IOC_MAX_CMDS)
return -EINVAL;
idata = kcalloc(num_of_cmds, sizeof(*idata), GFP_KERNEL);
if (!idata)
return -ENOMEM;
for (i = 0; i < num_of_cmds; i++) {
idata[i] = mmc_blk_ioctl_copy_from_user(&cmds[i]);
if (IS_ERR(idata[i])) {
err = PTR_ERR(idata[i]);
num_of_cmds = i;
goto cmd_err;
}
}
md = mmc_blk_get(bdev->bd_disk);
if (!md) {
err = -EINVAL;
goto cmd_err;
}
card = md->queue.card;
if (IS_ERR(card)) {
err = PTR_ERR(card);
goto cmd_done;
}
mmc_get_card(card);
for (i = 0; i < num_of_cmds && !ioc_err; i++)
ioc_err = __mmc_blk_ioctl_cmd(card, md, idata[i]);
/* Always switch back to main area after RPMB access */
if (md->area_type & MMC_BLK_DATA_AREA_RPMB)
mmc_blk_part_switch(card, dev_get_drvdata(&card->dev));
mmc_put_card(card);
/* copy to user if data and response */
for (i = 0; i < num_of_cmds && !err; i++)
err = mmc_blk_ioctl_copy_to_user(&cmds[i], idata[i]);
cmd_done:
mmc_blk_put(md);
cmd_err:
for (i = 0; i < num_of_cmds; i++) {
kfree(idata[i]->buf);
kfree(idata[i]);
}
kfree(idata);
return ioc_err ? ioc_err : err;
}
static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case MMC_IOC_CMD:
return mmc_blk_ioctl_cmd(bdev,
(struct mmc_ioc_cmd __user *)arg);
case MMC_IOC_MULTI_CMD:
return mmc_blk_ioctl_multi_cmd(bdev,
(struct mmc_ioc_multi_cmd __user *)arg);
default:
return -EINVAL;
}
}
#ifdef CONFIG_COMPAT
static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
return mmc_blk_ioctl(bdev, mode, cmd, (unsigned long) compat_ptr(arg));
}
#endif
static const struct block_device_operations mmc_bdops = {
.open = mmc_blk_open,
.release = mmc_blk_release,
.getgeo = mmc_blk_getgeo,
.owner = THIS_MODULE,
.ioctl = mmc_blk_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = mmc_blk_compat_ioctl,
#endif
};
static int mmc_blk_part_switch_pre(struct mmc_card *card,
unsigned int part_type)
{
int ret = 0;
if (part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) {
if (card->ext_csd.cmdq_en) {
ret = mmc_cmdq_disable(card);
if (ret)
return ret;
}
mmc_retune_pause(card->host);
}
return ret;
}
static int mmc_blk_part_switch_post(struct mmc_card *card,
unsigned int part_type)
{
int ret = 0;
if (part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) {
mmc_retune_unpause(card->host);
if (card->reenable_cmdq && !card->ext_csd.cmdq_en)
ret = mmc_cmdq_enable(card);
}
return ret;
}
static inline int mmc_blk_part_switch(struct mmc_card *card,
struct mmc_blk_data *md)
{
int ret = 0;
struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
if (main_md->part_curr == md->part_type)
return 0;
if (mmc_card_mmc(card)) {
u8 part_config = card->ext_csd.part_config;
ret = mmc_blk_part_switch_pre(card, md->part_type);
if (ret)
return ret;
part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
part_config |= md->part_type;
ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_PART_CONFIG, part_config,
card->ext_csd.part_time);
if (ret) {
mmc_blk_part_switch_post(card, md->part_type);
return ret;
}
card->ext_csd.part_config = part_config;
ret = mmc_blk_part_switch_post(card, main_md->part_curr);
}
main_md->part_curr = md->part_type;
return ret;
}
static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks)
{
int err;
u32 result;
__be32 *blocks;
struct mmc_request mrq = {};
struct mmc_command cmd = {};
struct mmc_data data = {};
struct scatterlist sg;
cmd.opcode = MMC_APP_CMD;
cmd.arg = card->rca << 16;
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
err = mmc_wait_for_cmd(card->host, &cmd, 0);
if (err)
return err;
if (!mmc_host_is_spi(card->host) && !(cmd.resp[0] & R1_APP_CMD))
return -EIO;
memset(&cmd, 0, sizeof(struct mmc_command));
cmd.opcode = SD_APP_SEND_NUM_WR_BLKS;
cmd.arg = 0;
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
data.blksz = 4;
data.blocks = 1;
data.flags = MMC_DATA_READ;
data.sg = &sg;
data.sg_len = 1;
mmc_set_data_timeout(&data, card);
mrq.cmd = &cmd;
mrq.data = &data;
blocks = kmalloc(4, GFP_KERNEL);
if (!blocks)
return -ENOMEM;
sg_init_one(&sg, blocks, 4);
mmc_wait_for_req(card->host, &mrq);
result = ntohl(*blocks);
kfree(blocks);
if (cmd.error || data.error)
return -EIO;
*written_blocks = result;
return 0;
}
static int get_card_status(struct mmc_card *card, u32 *status, int retries)
{
struct mmc_command cmd = {};
int err;
cmd.opcode = MMC_SEND_STATUS;
if (!mmc_host_is_spi(card->host))
cmd.arg = card->rca << 16;
cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC;
err = mmc_wait_for_cmd(card->host, &cmd, retries);
if (err == 0)
*status = cmd.resp[0];
return err;
}
static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms,
bool hw_busy_detect, struct request *req, bool *gen_err)
{
unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
int err = 0;
u32 status;
do {
err = get_card_status(card, &status, 5);
if (err) {
pr_err("%s: error %d requesting status\n",
req->rq_disk->disk_name, err);
return err;
}
if (status & R1_ERROR) {
pr_err("%s: %s: error sending status cmd, status %#x\n",
req->rq_disk->disk_name, __func__, status);
*gen_err = true;
}
/* We may rely on the host hw to handle busy detection.*/
if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) &&
hw_busy_detect)
break;
/*
* Timeout if the device never becomes ready for data and never
* leaves the program state.
*/
if (time_after(jiffies, timeout)) {
pr_err("%s: Card stuck in programming state! %s %s\n",
mmc_hostname(card->host),
req->rq_disk->disk_name, __func__);
return -ETIMEDOUT;
}
/*
* Some cards mishandle the status bits,
* so make sure to check both the busy
* indication and the card state.
*/
} while (!(status & R1_READY_FOR_DATA) ||
(R1_CURRENT_STATE(status) == R1_STATE_PRG));
return err;
}
static int send_stop(struct mmc_card *card, unsigned int timeout_ms,
struct request *req, bool *gen_err, u32 *stop_status)
{
struct mmc_host *host = card->host;
struct mmc_command cmd = {};
int err;
bool use_r1b_resp = rq_data_dir(req) == WRITE;
/*
* Normally we use R1B responses for WRITE, but in cases where the host
* has specified a max_busy_timeout we need to validate it. A failure
* means we need to prevent the host from doing hw busy detection, which
* is done by converting to a R1 response instead.
*/
if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout))
use_r1b_resp = false;
cmd.opcode = MMC_STOP_TRANSMISSION;
if (use_r1b_resp) {
cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
cmd.busy_timeout = timeout_ms;
} else {
cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
}
err = mmc_wait_for_cmd(host, &cmd, 5);
if (err)
return err;
*stop_status = cmd.resp[0];
/* No need to check card status in case of READ. */
if (rq_data_dir(req) == READ)
return 0;
if (!mmc_host_is_spi(host) &&
(*stop_status & R1_ERROR)) {
pr_err("%s: %s: general error sending stop command, resp %#x\n",
req->rq_disk->disk_name, __func__, *stop_status);
*gen_err = true;
}
return card_busy_detect(card, timeout_ms, use_r1b_resp, req, gen_err);
}
#define ERR_NOMEDIUM 3
#define ERR_RETRY 2
#define ERR_ABORT 1
#define ERR_CONTINUE 0
static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
bool status_valid, u32 status)
{
switch (error) {
case -EILSEQ:
/* response crc error, retry the r/w cmd */
pr_err("%s: %s sending %s command, card status %#x\n",
req->rq_disk->disk_name, "response CRC error",
name, status);
return ERR_RETRY;
case -ETIMEDOUT:
pr_err("%s: %s sending %s command, card status %#x\n",
req->rq_disk->disk_name, "timed out", name, status);
/* If the status cmd initially failed, retry the r/w cmd */
if (!status_valid) {
pr_err("%s: status not valid, retrying timeout\n",
req->rq_disk->disk_name);
return ERR_RETRY;
}
/*
* If it was a r/w cmd crc error, or illegal command
* (eg, issued in wrong state) then retry - we should
* have corrected the state problem above.
*/
if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
pr_err("%s: command error, retrying timeout\n",
req->rq_disk->disk_name);
return ERR_RETRY;
}
/* Otherwise abort the command */
return ERR_ABORT;
default:
/* We don't understand the error code the driver gave us */
pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
req->rq_disk->disk_name, error, status);
return ERR_ABORT;
}
}
/*
* Initial r/w and stop cmd error recovery.
* We don't know whether the card received the r/w cmd or not, so try to
* restore things back to a sane state. Essentially, we do this as follows:
* - Obtain card status. If the first attempt to obtain card status fails,
* the status word will reflect the failed status cmd, not the failed
* r/w cmd. If we fail to obtain card status, it suggests we can no
* longer communicate with the card.
* - Check the card state. If the card received the cmd but there was a
* transient problem with the response, it might still be in a data transfer
* mode. Try to send it a stop command. If this fails, we can't recover.
* - If the r/w cmd failed due to a response CRC error, it was probably
* transient, so retry the cmd.
* - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry.
* - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or
* illegal cmd, retry.
* Otherwise we don't understand what happened, so abort.
*/
static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
struct mmc_blk_request *brq, bool *ecc_err, bool *gen_err)
{
bool prev_cmd_status_valid = true;
u32 status, stop_status = 0;
int err, retry;
if (mmc_card_removed(card))
return ERR_NOMEDIUM;
/*
* Try to get card status which indicates both the card state
* and why there was no response. If the first attempt fails,
* we can't be sure the returned status is for the r/w command.
*/
for (retry = 2; retry >= 0; retry--) {
err = get_card_status(card, &status, 0);
if (!err)
break;
/* Re-tune if needed */
mmc_retune_recheck(card->host);
prev_cmd_status_valid = false;
pr_err("%s: error %d sending status command, %sing\n",
req->rq_disk->disk_name, err, retry ? "retry" : "abort");
}
/* We couldn't get a response from the card. Give up. */
if (err) {
/* Check if the card is removed */
if (mmc_detect_card_removed(card->host))
return ERR_NOMEDIUM;
return ERR_ABORT;
}
/* Flag ECC errors */
if ((status & R1_CARD_ECC_FAILED) ||
(brq->stop.resp[0] & R1_CARD_ECC_FAILED) ||
(brq->cmd.resp[0] & R1_CARD_ECC_FAILED))
*ecc_err = true;
/* Flag General errors */
if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
if ((status & R1_ERROR) ||
(brq->stop.resp[0] & R1_ERROR)) {
pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, __func__,
brq->stop.resp[0], status);
*gen_err = true;
}
/*
* Check the current card state. If it is in some data transfer
* mode, tell it to stop (and hopefully transition back to TRAN.)
*/
if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
R1_CURRENT_STATE(status) == R1_STATE_RCV) {
err = send_stop(card,
DIV_ROUND_UP(brq->data.timeout_ns, 1000000),
req, gen_err, &stop_status);
if (err) {
pr_err("%s: error %d sending stop command\n",
req->rq_disk->disk_name, err);
/*
* If the stop cmd also timed out, the card is probably
* not present, so abort. Other errors are bad news too.
*/
return ERR_ABORT;
}
if (stop_status & R1_CARD_ECC_FAILED)
*ecc_err = true;
}
/* Check for set block count errors */
if (brq->sbc.error)
return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error,
prev_cmd_status_valid, status);
/* Check for r/w command errors */
if (brq->cmd.error)
return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error,
prev_cmd_status_valid, status);
/* Data errors */
if (!brq->stop.error)
return ERR_CONTINUE;
/* Now for stop errors. These aren't fatal to the transfer. */
pr_info("%s: error %d sending stop command, original cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, brq->stop.error,
brq->cmd.resp[0], status);
/*
* Subsitute in our own stop status as this will give the error
* state which happened during the execution of the r/w command.
*/
if (stop_status) {
brq->stop.resp[0] = stop_status;
brq->stop.error = 0;
}
return ERR_CONTINUE;
}
static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
int type)
{
int err;
if (md->reset_done & type)
return -EEXIST;
md->reset_done |= type;
err = mmc_hw_reset(host);
/* Ensure we switch back to the correct partition */
if (err != -EOPNOTSUPP) {
struct mmc_blk_data *main_md =
dev_get_drvdata(&host->card->dev);
int part_err;
main_md->part_curr = main_md->part_type;
part_err = mmc_blk_part_switch(host->card, md);
if (part_err) {
/*
* We have failed to get back into the correct
* partition, so we need to abort the whole request.
*/
return -ENODEV;
}
}
return err;
}
static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type)
{
md->reset_done &= ~type;
}
mmc: card: Don't access RPMB partitions for normal read/write During kernel boot, it will try to read some logical sectors of each block device node for the possible partition table. But since RPMB partition is special and can not be accessed by normal eMMC read / write CMDs, it will cause below error messages during kernel boot: ... mmc0: Got data interrupt 0x00000002 even though no data operation was in progress. mmcblk0rpmb: error -110 transferring data, sector 0, nr 32, cmd response 0x900, card status 0xb00 mmcblk0rpmb: retrying using single block read mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 end_request: I/O error, dev mmcblk0rpmb, sector 0 Buffer I/O error on device mmcblk0rpmb, logical block 0 end_request: I/O error, dev mmcblk0rpmb, sector 8 Buffer I/O error on device mmcblk0rpmb, logical block 1 end_request: I/O error, dev mmcblk0rpmb, sector 16 Buffer I/O error on device mmcblk0rpmb, logical block 2 end_request: I/O error, dev mmcblk0rpmb, sector 24 Buffer I/O error on device mmcblk0rpmb, logical block 3 ... This patch will discard the access request in eMMC queue if it is RPMB partition access request. By this way, it avoids trigger above error messages. Fixes: 090d25fe224c ("mmc: core: Expose access to RPMB partition") Signed-off-by: Yunpeng Gao <yunpeng.gao@intel.com> Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com> Tested-by: Michael Shigorin <mike@altlinux.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2014-08-12 12:01:30 +08:00
int mmc_access_rpmb(struct mmc_queue *mq)
{
struct mmc_blk_data *md = mq->blkdata;
mmc: card: Don't access RPMB partitions for normal read/write During kernel boot, it will try to read some logical sectors of each block device node for the possible partition table. But since RPMB partition is special and can not be accessed by normal eMMC read / write CMDs, it will cause below error messages during kernel boot: ... mmc0: Got data interrupt 0x00000002 even though no data operation was in progress. mmcblk0rpmb: error -110 transferring data, sector 0, nr 32, cmd response 0x900, card status 0xb00 mmcblk0rpmb: retrying using single block read mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 mmcblk0rpmb: timed out sending r/w cmd command, card status 0x400900 end_request: I/O error, dev mmcblk0rpmb, sector 0 Buffer I/O error on device mmcblk0rpmb, logical block 0 end_request: I/O error, dev mmcblk0rpmb, sector 8 Buffer I/O error on device mmcblk0rpmb, logical block 1 end_request: I/O error, dev mmcblk0rpmb, sector 16 Buffer I/O error on device mmcblk0rpmb, logical block 2 end_request: I/O error, dev mmcblk0rpmb, sector 24 Buffer I/O error on device mmcblk0rpmb, logical block 3 ... This patch will discard the access request in eMMC queue if it is RPMB partition access request. By this way, it avoids trigger above error messages. Fixes: 090d25fe224c ("mmc: core: Expose access to RPMB partition") Signed-off-by: Yunpeng Gao <yunpeng.gao@intel.com> Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com> Tested-by: Michael Shigorin <mike@altlinux.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2014-08-12 12:01:30 +08:00
/*
* If this is a RPMB partition access, return ture
*/
if (md && md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
return true;
return false;
}
static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
{
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_DISCARD;
if (!mmc_can_erase(card)) {
err = -EOPNOTSUPP;
goto fail;
}
from = blk_rq_pos(req);
nr = blk_rq_sectors(req);
if (mmc_can_discard(card))
arg = MMC_DISCARD_ARG;
else if (mmc_can_trim(card))
arg = MMC_TRIM_ARG;
else
arg = MMC_ERASE_ARG;
do {
err = 0;
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
INAND_CMD38_ARG_EXT_CSD,
arg == MMC_TRIM_ARG ?
INAND_CMD38_ARG_TRIM :
INAND_CMD38_ARG_ERASE,
0);
}
if (!err)
err = mmc_erase(card, from, nr, arg);
} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
if (!err)
mmc_blk_reset_success(md, type);
fail:
mmc: block: replace __blk_end_request() with blk_end_request() For completing any block request, MMC block driver is calling: spin_lock_irq(queue) __blk_end_request() spin_unlock_irq(queue) But if we analyze the sources of latency in kernel using ftrace, __blk_end_request() function at times may take up to 6.5ms with spinlock held and irq disabled. __blk_end_request() calls couple of functions and ftrace output shows that blk_update_bidi_request() function is almost taking 6ms. There are 2 function to end the current request: ___blk_end_request() and blk_end_request(). Both these functions do same thing except that blk_end_request() function doesn't take up the spinlock while calling the blk_update_bidi_request(). This patch replaces all __blk_end_request() calls with blk_end_request() and __blk_end_request_all() calls with blk_end_request_all(). Testing done: 20 process concurrent read/write on sd card and eMMC. Ran this test for almost a day on multicore system and no errors observed. This change is not meant for improving MMC throughput; it's basically about becoming fair to other threads/interrupts in the system. By holding spin lock and interrupts disabled for longer duration, we won't allow other threads/interrupts to run at all. Actually slight performance degradation at file system level can be expected as we are not holding the spin lock during blk_update_bidi_request() which means our mmcqd thread may get preempted for other high priority thread or any interrupt in the system. These are performance numbers (100MB file write) with eMMC running in DDR mode: Without this patch: Name of the Test Value Unit LMDD Read Test 53.79 MBPS LMDD Write Test 18.86 MBPS IOZONE Read Test 51.65 MBPS IOZONE Write Test 24.36 MBPS With this patch: Name of the Test Value Unit LMDD Read Test 52.94 MBPS LMDD Write Test 16.70 MBPS IOZONE Read Test 52.08 MBPS IOZONE Write Test 23.29 MBPS Read numbers are fine. Write numbers are bit down (especially LMDD write), may be because write requests normally have large transfer size and which means there are chances that while mmcq is executing blk_update_bidi_request(), it may get interrupted by interrupts or other high priority thread. Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> Reviewed-by: Namjae Jeon <linkinjeon@gmail.com> Signed-off-by: Chris Ball <cjb@laptop.org>
2012-06-07 18:16:58 +08:00
blk_end_request(req, err, blk_rq_bytes(req));
}
static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
struct request *req)
{
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_SECDISCARD;
if (!(mmc_can_secure_erase_trim(card))) {
err = -EOPNOTSUPP;
goto out;
}
from = blk_rq_pos(req);
nr = blk_rq_sectors(req);
if (mmc_can_trim(card) && !mmc_erase_group_aligned(card, from, nr))
arg = MMC_SECURE_TRIM1_ARG;
else
arg = MMC_SECURE_ERASE_ARG;
retry:
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
INAND_CMD38_ARG_EXT_CSD,
arg == MMC_SECURE_TRIM1_ARG ?
INAND_CMD38_ARG_SECTRIM1 :
INAND_CMD38_ARG_SECERASE,
0);
if (err)
goto out_retry;
}
err = mmc_erase(card, from, nr, arg);
if (err == -EIO)
goto out_retry;
if (err)
goto out;
if (arg == MMC_SECURE_TRIM1_ARG) {
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
INAND_CMD38_ARG_EXT_CSD,
INAND_CMD38_ARG_SECTRIM2,
0);
if (err)
goto out_retry;
}
err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
if (err == -EIO)
goto out_retry;
if (err)
goto out;
}
out_retry:
if (err && !mmc_blk_reset(md, card->host, type))
goto retry;
if (!err)
mmc_blk_reset_success(md, type);
out:
mmc: block: replace __blk_end_request() with blk_end_request() For completing any block request, MMC block driver is calling: spin_lock_irq(queue) __blk_end_request() spin_unlock_irq(queue) But if we analyze the sources of latency in kernel using ftrace, __blk_end_request() function at times may take up to 6.5ms with spinlock held and irq disabled. __blk_end_request() calls couple of functions and ftrace output shows that blk_update_bidi_request() function is almost taking 6ms. There are 2 function to end the current request: ___blk_end_request() and blk_end_request(). Both these functions do same thing except that blk_end_request() function doesn't take up the spinlock while calling the blk_update_bidi_request(). This patch replaces all __blk_end_request() calls with blk_end_request() and __blk_end_request_all() calls with blk_end_request_all(). Testing done: 20 process concurrent read/write on sd card and eMMC. Ran this test for almost a day on multicore system and no errors observed. This change is not meant for improving MMC throughput; it's basically about becoming fair to other threads/interrupts in the system. By holding spin lock and interrupts disabled for longer duration, we won't allow other threads/interrupts to run at all. Actually slight performance degradation at file system level can be expected as we are not holding the spin lock during blk_update_bidi_request() which means our mmcqd thread may get preempted for other high priority thread or any interrupt in the system. These are performance numbers (100MB file write) with eMMC running in DDR mode: Without this patch: Name of the Test Value Unit LMDD Read Test 53.79 MBPS LMDD Write Test 18.86 MBPS IOZONE Read Test 51.65 MBPS IOZONE Write Test 24.36 MBPS With this patch: Name of the Test Value Unit LMDD Read Test 52.94 MBPS LMDD Write Test 16.70 MBPS IOZONE Read Test 52.08 MBPS IOZONE Write Test 23.29 MBPS Read numbers are fine. Write numbers are bit down (especially LMDD write), may be because write requests normally have large transfer size and which means there are chances that while mmcq is executing blk_update_bidi_request(), it may get interrupted by interrupts or other high priority thread. Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> Reviewed-by: Namjae Jeon <linkinjeon@gmail.com> Signed-off-by: Chris Ball <cjb@laptop.org>
2012-06-07 18:16:58 +08:00
blk_end_request(req, err, blk_rq_bytes(req));
}
static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
{
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
int ret = 0;
ret = mmc_flush_cache(card);
if (ret)
ret = -EIO;
mmc: block: replace __blk_end_request() with blk_end_request() For completing any block request, MMC block driver is calling: spin_lock_irq(queue) __blk_end_request() spin_unlock_irq(queue) But if we analyze the sources of latency in kernel using ftrace, __blk_end_request() function at times may take up to 6.5ms with spinlock held and irq disabled. __blk_end_request() calls couple of functions and ftrace output shows that blk_update_bidi_request() function is almost taking 6ms. There are 2 function to end the current request: ___blk_end_request() and blk_end_request(). Both these functions do same thing except that blk_end_request() function doesn't take up the spinlock while calling the blk_update_bidi_request(). This patch replaces all __blk_end_request() calls with blk_end_request() and __blk_end_request_all() calls with blk_end_request_all(). Testing done: 20 process concurrent read/write on sd card and eMMC. Ran this test for almost a day on multicore system and no errors observed. This change is not meant for improving MMC throughput; it's basically about becoming fair to other threads/interrupts in the system. By holding spin lock and interrupts disabled for longer duration, we won't allow other threads/interrupts to run at all. Actually slight performance degradation at file system level can be expected as we are not holding the spin lock during blk_update_bidi_request() which means our mmcqd thread may get preempted for other high priority thread or any interrupt in the system. These are performance numbers (100MB file write) with eMMC running in DDR mode: Without this patch: Name of the Test Value Unit LMDD Read Test 53.79 MBPS LMDD Write Test 18.86 MBPS IOZONE Read Test 51.65 MBPS IOZONE Write Test 24.36 MBPS With this patch: Name of the Test Value Unit LMDD Read Test 52.94 MBPS LMDD Write Test 16.70 MBPS IOZONE Read Test 52.08 MBPS IOZONE Write Test 23.29 MBPS Read numbers are fine. Write numbers are bit down (especially LMDD write), may be because write requests normally have large transfer size and which means there are chances that while mmcq is executing blk_update_bidi_request(), it may get interrupted by interrupts or other high priority thread. Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> Reviewed-by: Namjae Jeon <linkinjeon@gmail.com> Signed-off-by: Chris Ball <cjb@laptop.org>
2012-06-07 18:16:58 +08:00
blk_end_request_all(req, ret);
}
/*
* Reformat current write as a reliable write, supporting
* both legacy and the enhanced reliable write MMC cards.
* In each transfer we'll handle only as much as a single
* reliable write can handle, thus finish the request in
* partial completions.
*/
static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
struct mmc_card *card,
struct request *req)
{
if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
/* Legacy mode imposes restrictions on transfers. */
if (!IS_ALIGNED(blk_rq_pos(req), card->ext_csd.rel_sectors))
brq->data.blocks = 1;
if (brq->data.blocks > card->ext_csd.rel_sectors)
brq->data.blocks = card->ext_csd.rel_sectors;
else if (brq->data.blocks < card->ext_csd.rel_sectors)
brq->data.blocks = 1;
}
}
#define CMD_ERRORS \
(R1_OUT_OF_RANGE | /* Command argument out of range */ \
R1_ADDRESS_ERROR | /* Misaligned address */ \
R1_BLOCK_LEN_ERROR | /* Transferred block length incorrect */\
R1_WP_VIOLATION | /* Tried to write to protected block */ \
R1_CC_ERROR | /* Card controller error */ \
R1_ERROR) /* General/unknown error */
mmc: core: use enum mmc_blk_status properly There were several instances of code using the enum mmc_blk_status by arbitrarily converting it to an int and throwing it around to different functions. This makes the code hard to understand to may give rise to strange errors. Especially the function prototype mmc_start_req() had to be modified to take a pointer to an enum mmc_blk_status and the function pointer .err_check() inside struct mmc_async_req needed to return an enum mmc_blk_status. In every case: instead of assigning the block layer error code to an int, use the enum, also change the signature of all functions actually passing this enum to use the enum. To make it possible to use the enum everywhere applicable, move it to <linux/mmc/core.h> so that all code actually using it can also see it. An interesting case was encountered in the MMC test code which did not return a enum mmc_blk_status at all in the .err_check function supposed to check whether asynchronous requests worked or not: instead it returned a normal -ERROR or even the test frameworks internal error codes. The test code would also pass on enum mmc_blk_status codes as error codes inside the test code instead of converting them to the local RESULT_* codes. I have tried to fix all instances properly and run some tests on the result. Cc: Chunyan Zhang <zhang.chunyan@linaro.org> Cc: Baolin Wang <baolin.wang@linaro.org> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-11-04 18:05:19 +08:00
static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
struct mmc_async_req *areq)
{
struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req,
areq);
struct mmc_blk_request *brq = &mq_mrq->brq;
struct request *req = mq_mrq->req;
int need_retune = card->host->need_retune;
bool ecc_err = false;
bool gen_err = false;
/*
* sbc.error indicates a problem with the set block count
* command. No data will have been transferred.
*
* cmd.error indicates a problem with the r/w command. No
* data will have been transferred.
*
* stop.error indicates a problem with the stop command. Data
* may have been transferred, or may still be transferring.
*/
if (brq->sbc.error || brq->cmd.error || brq->stop.error ||
brq->data.error) {
switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
case ERR_RETRY:
return MMC_BLK_RETRY;
case ERR_ABORT:
return MMC_BLK_ABORT;
case ERR_NOMEDIUM:
return MMC_BLK_NOMEDIUM;
case ERR_CONTINUE:
break;
}
}
/*
* Check for errors relating to the execution of the
* initial command - such as address errors. No data
* has been transferred.
*/
if (brq->cmd.resp[0] & CMD_ERRORS) {
pr_err("%s: r/w command failed, status = %#x\n",
req->rq_disk->disk_name, brq->cmd.resp[0]);
return MMC_BLK_ABORT;
}
/*
* Everything else is either success, or a data error of some
* kind. If it was a write, we may have transitioned to
* program mode, which we have to wait for it to complete.
*/
if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
int err;
/* Check stop command response */
if (brq->stop.resp[0] & R1_ERROR) {
pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
req->rq_disk->disk_name, __func__,
brq->stop.resp[0]);
gen_err = true;
}
err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req,
&gen_err);
if (err)
return MMC_BLK_CMD_ERR;
}
/* if general error occurs, retry the write operation. */
if (gen_err) {
pr_warn("%s: retrying write for general error\n",
req->rq_disk->disk_name);
return MMC_BLK_RETRY;
}
if (brq->data.error) {
if (need_retune && !brq->retune_retry_done) {
pr_debug("%s: retrying because a re-tune was needed\n",
req->rq_disk->disk_name);
brq->retune_retry_done = 1;
return MMC_BLK_RETRY;
}
pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
req->rq_disk->disk_name, brq->data.error,
(unsigned)blk_rq_pos(req),
(unsigned)blk_rq_sectors(req),
brq->cmd.resp[0], brq->stop.resp[0]);
if (rq_data_dir(req) == READ) {
if (ecc_err)
return MMC_BLK_ECC_ERR;
return MMC_BLK_DATA_ERR;
} else {
return MMC_BLK_CMD_ERR;
}
}
if (!brq->data.bytes_xfered)
return MMC_BLK_RETRY;
if (blk_rq_bytes(req) != brq->data.bytes_xfered)
return MMC_BLK_PARTIAL;
return MMC_BLK_SUCCESS;
}
static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
int disable_multi, bool *do_rel_wr,
bool *do_data_tag)
{
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
struct mmc_blk_request *brq = &mqrq->brq;
struct request *req = mqrq->req;
/*
* Reliable writes are used to implement Forced Unit Access and
* are supported only on MMCs.
*/
*do_rel_wr = (req->cmd_flags & REQ_FUA) &&
rq_data_dir(req) == WRITE &&
(md->flags & MMC_BLK_REL_WR);
memset(brq, 0, sizeof(struct mmc_blk_request));
brq->mrq.data = &brq->data;
brq->stop.opcode = MMC_STOP_TRANSMISSION;
brq->stop.arg = 0;
if (rq_data_dir(req) == READ) {
brq->data.flags = MMC_DATA_READ;
brq->stop.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
} else {
brq->data.flags = MMC_DATA_WRITE;
brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
}
brq->data.blksz = 512;
brq->data.blocks = blk_rq_sectors(req);
/*
* The block layer doesn't support all sector count
* restrictions, so we need to be prepared for too big
* requests.
*/
if (brq->data.blocks > card->host->max_blk_count)
brq->data.blocks = card->host->max_blk_count;
if (brq->data.blocks > 1) {
/*
* After a read error, we redo the request one sector
* at a time in order to accurately determine which
* sectors can be read successfully.
*/
if (disable_multi)
brq->data.blocks = 1;
/*
* Some controllers have HW issues while operating
* in multiple I/O mode
*/
if (card->host->ops->multi_io_quirk)
brq->data.blocks = card->host->ops->multi_io_quirk(card,
(rq_data_dir(req) == READ) ?
MMC_DATA_READ : MMC_DATA_WRITE,
brq->data.blocks);
}
if (*do_rel_wr)
mmc_apply_rel_rw(brq, card, req);
/*
* Data tag is used only during writing meta data to speed
* up write and any subsequent read of this meta data
*/
*do_data_tag = card->ext_csd.data_tag_unit_size &&
(req->cmd_flags & REQ_META) &&
(rq_data_dir(req) == WRITE) &&
((brq->data.blocks * brq->data.blksz) >=
card->ext_csd.data_tag_unit_size);
mmc_set_data_timeout(&brq->data, card);
brq->data.sg = mqrq->sg;
brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
/*
* Adjust the sg list so it is the same size as the
* request.
*/
if (brq->data.blocks != blk_rq_sectors(req)) {
int i, data_size = brq->data.blocks << 9;
struct scatterlist *sg;
for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
data_size -= sg->length;
if (data_size <= 0) {
sg->length += data_size;
i++;
break;
}
}
brq->data.sg_len = i;
}
mqrq->areq.mrq = &brq->mrq;
mmc_queue_bounce_pre(mqrq);
}
static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
struct mmc_card *card,
int disable_multi,
struct mmc_queue *mq)
{
u32 readcmd, writecmd;
struct mmc_blk_request *brq = &mqrq->brq;
struct request *req = mqrq->req;
struct mmc_blk_data *md = mq->blkdata;
bool do_rel_wr, do_data_tag;
mmc_blk_data_prep(mq, mqrq, disable_multi, &do_rel_wr, &do_data_tag);
brq->mrq.cmd = &brq->cmd;
brq->cmd.arg = blk_rq_pos(req);
if (!mmc_card_blockaddr(card))
brq->cmd.arg <<= 9;
brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
if (brq->data.blocks > 1 || do_rel_wr) {
/* SPI multiblock writes terminate using a special
* token, not a STOP_TRANSMISSION request.
*/
if (!mmc_host_is_spi(card->host) ||
rq_data_dir(req) == READ)
brq->mrq.stop = &brq->stop;
readcmd = MMC_READ_MULTIPLE_BLOCK;
writecmd = MMC_WRITE_MULTIPLE_BLOCK;
} else {
brq->mrq.stop = NULL;
readcmd = MMC_READ_SINGLE_BLOCK;
writecmd = MMC_WRITE_BLOCK;
}
brq->cmd.opcode = rq_data_dir(req) == READ ? readcmd : writecmd;
/*
* Pre-defined multi-block transfers are preferable to
* open ended-ones (and necessary for reliable writes).
* However, it is not sufficient to just send CMD23,
* and avoid the final CMD12, as on an error condition
* CMD12 (stop) needs to be sent anyway. This, coupled
* with Auto-CMD23 enhancements provided by some
* hosts, means that the complexity of dealing
* with this is best left to the host. If CMD23 is
* supported by card and host, we'll fill sbc in and let
* the host deal with handling it correctly. This means
* that for hosts that don't expose MMC_CAP_CMD23, no
* change of behavior will be observed.
*
* N.B: Some MMC cards experience perf degradation.
* We'll avoid using CMD23-bounded multiblock writes for
* these, while retaining features like reliable writes.
*/
if ((md->flags & MMC_BLK_CMD23) && mmc_op_multi(brq->cmd.opcode) &&
(do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23) ||
do_data_tag)) {
brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
brq->sbc.arg = brq->data.blocks |
(do_rel_wr ? (1 << 31) : 0) |
(do_data_tag ? (1 << 29) : 0);
brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
brq->mrq.sbc = &brq->sbc;
}
mqrq->areq.err_check = mmc_blk_err_check;
}
static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
struct mmc_blk_request *brq, struct request *req,
bool old_req_pending)
{
bool req_pending;
/*
* If this is an SD card and we're writing, we can first
* mark the known good sectors as ok.
*
* If the card is not SD, we can still ok written sectors
* as reported by the controller (which might be less than
* the real number of written sectors, but never more).
*/
if (mmc_card_sd(card)) {
u32 blocks;
int err;
err = mmc_sd_num_wr_blocks(card, &blocks);
if (err)
req_pending = old_req_pending;
else
req_pending = blk_end_request(req, 0, blocks << 9);
} else {
req_pending = blk_end_request(req, 0, brq->data.bytes_xfered);
}
return req_pending;
}
static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
struct request *req,
struct mmc_queue_req *mqrq)
{
if (mmc_card_removed(card))
req->rq_flags |= RQF_QUIET;
while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
mmc_queue_req_free(mq, mqrq);
}
/**
* mmc_blk_rw_try_restart() - tries to restart the current async request
* @mq: the queue with the card and host to restart
* @req: a new request that want to be started after the current one
*/
static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
struct mmc_queue_req *mqrq)
{
if (!req)
return;
/*
* If the card was removed, just cancel everything and return.
*/
if (mmc_card_removed(mq->card)) {
req->rq_flags |= RQF_QUIET;
blk_end_request_all(req, -EIO);
mmc_queue_req_free(mq, mqrq);
return;
}
/* Else proceed and try to restart the current async request */
mmc_blk_rw_rq_prep(mqrq, mq->card, 0, mq);
mmc_start_areq(mq->card->host, &mqrq->areq, NULL);
}
static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
{
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
struct mmc_blk_request *brq;
int disable_multi = 0, retry = 0, type, retune_retry_done = 0;
enum mmc_blk_status status;
struct mmc_queue_req *mqrq_cur = NULL;
struct mmc_queue_req *mq_rq;
struct request *old_req;
struct mmc_async_req *new_areq;
struct mmc_async_req *old_areq;
bool req_pending = true;
if (new_req) {
mqrq_cur = mmc_queue_req_find(mq, new_req);
if (!mqrq_cur) {
WARN_ON(1);
mmc_blk_requeue(mq->queue, new_req);
new_req = NULL;
}
}
if (!mq->qcnt)
return;
do {
if (new_req) {
/*
* When 4KB native sector is enabled, only 8 blocks
* multiple read or write is allowed
*/
if (mmc_large_sector(card) &&
!IS_ALIGNED(blk_rq_sectors(new_req), 8)) {
pr_err("%s: Transfer size is not 4KB sector size aligned\n",
new_req->rq_disk->disk_name);
mmc_blk_rw_cmd_abort(mq, card, new_req, mqrq_cur);
return;
}
mmc_blk_rw_rq_prep(mqrq_cur, card, 0, mq);
new_areq = &mqrq_cur->areq;
} else
new_areq = NULL;
old_areq = mmc_start_areq(card->host, new_areq, &status);
if (!old_areq) {
/*
* We have just put the first request into the pipeline
* and there is nothing more to do until it is
* complete.
*/
return;
}
/*
* An asynchronous request has been completed and we proceed
* to handle the result of it.
*/
mq_rq = container_of(old_areq, struct mmc_queue_req, areq);
brq = &mq_rq->brq;
old_req = mq_rq->req;
type = rq_data_dir(old_req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE;
mmc_queue_bounce_post(mq_rq);
switch (status) {
case MMC_BLK_SUCCESS:
case MMC_BLK_PARTIAL:
/*
* A block was successfully transferred.
*/
mmc_blk_reset_success(md, type);
req_pending = blk_end_request(old_req, 0,
brq->data.bytes_xfered);
/*
* If the blk_end_request function returns non-zero even
* though all data has been transferred and no errors
* were returned by the host controller, it's a bug.
*/
if (status == MMC_BLK_SUCCESS && req_pending) {
pr_err("%s BUG rq_tot %d d_xfer %d\n",
__func__, blk_rq_bytes(old_req),
brq->data.bytes_xfered);
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
return;
}
break;
case MMC_BLK_CMD_ERR:
req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending);
if (mmc_blk_reset(md, card->host, type)) {
if (req_pending)
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
else
mmc_queue_req_free(mq, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
}
if (!req_pending) {
mmc_queue_req_free(mq, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
}
break;
case MMC_BLK_RETRY:
retune_retry_done = brq->retune_retry_done;
if (retry++ < 5)
break;
/* Fall through */
case MMC_BLK_ABORT:
if (!mmc_blk_reset(md, card->host, type))
break;
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
case MMC_BLK_DATA_ERR: {
int err;
err = mmc_blk_reset(md, card->host, type);
if (!err)
break;
if (err == -ENODEV) {
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
}
/* Fall through */
}
case MMC_BLK_ECC_ERR:
if (brq->data.blocks > 1) {
/* Redo read one sector at a time */
pr_warn("%s: retrying using single block read\n",
old_req->rq_disk->disk_name);
disable_multi = 1;
break;
}
/*
* After an error, we redo I/O one sector at a
* time, so we only reach here after trying to
* read a single sector.
*/
req_pending = blk_end_request(old_req, -EIO,
brq->data.blksz);
if (!req_pending) {
mmc_queue_req_free(mq, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
}
break;
case MMC_BLK_NOMEDIUM:
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
default:
pr_err("%s: Unhandled return value (%d)",
old_req->rq_disk->disk_name, status);
mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
return;
}
if (req_pending) {
mmc: block: delete packed command support I've had it with this code now. The packed command support is a complex hurdle in the MMC/SD block layer, around 500+ lines of code which was introduced in 2013 in commit ce39f9d17c14 ("mmc: support packed write command for eMMC4.5 devices") commit abd9ac144947 ("mmc: add packed command feature of eMMC4.5") ...and since then it has been rotting. The original author of the code has disappeared from the community and the mail address is bouncing. For the code to be exercised the host must flag that it supports packed commands, so in mmc_blk_prep_packed_list() which is called for every single request, the following construction appears: u8 max_packed_rw = 0; if ((rq_data_dir(cur) == WRITE) && mmc_host_packed_wr(card->host)) max_packed_rw = card->ext_csd.max_packed_writes; if (max_packed_rw == 0) goto no_packed; This has the following logical deductions: - Only WRITE commands can really be packed, so the solution is only half-done: we support packed WRITE but not packed READ. The packed command support has not been finalized by supporting reads in three years! - mmc_host_packed_wr() is just a static inline that checks host->caps2 & MMC_CAP2_PACKED_WR. The problem with this is that NO upstream host sets this capability flag! No driver in the kernel is using it, and we can't test it. Packed command may be supported in out-of-tree code, but I doubt it. I doubt that the code is even working anymore due to other refactorings in the MMC block layer, who would notice if patches affecting it broke packed commands? No one. - There is no Device Tree binding or code to mark a host as supporting packed read or write commands, just this flag in caps2, so for sure there are not any DT systems using it either. It has other problems as well: mmc_blk_prep_packed_list() is speculatively picking requests out of the request queue with blk_fetch_request() making the MMC/SD stack harder to convert to the multiqueue block layer. By this we get rid of an obstacle. The way I see it this is just cruft littering the MMC/SD stack. Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Maya Erez <qca_merez@qca.qualcomm.com> Acked-by: Jaehoon Chung <jh80.chung@samsung.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-11-25 17:35:00 +08:00
/*
* In case of a incomplete request
* prepare it again and resend.
*/
mmc_blk_rw_rq_prep(mq_rq, card,
disable_multi, mq);
mmc_start_areq(card->host,
&mq_rq->areq, NULL);
mq_rq->brq.retune_retry_done = retune_retry_done;
}
} while (req_pending);
mmc_queue_req_free(mq, mq_rq);
}
void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
{
int ret;
struct mmc_blk_data *md = mq->blkdata;
struct mmc_card *card = md->queue.card;
if (req && !mq->qcnt)
/* claim host only for the first request */
mmc_get_card(card);
ret = mmc_blk_part_switch(card, md);
if (ret) {
if (req) {
mmc: block: replace __blk_end_request() with blk_end_request() For completing any block request, MMC block driver is calling: spin_lock_irq(queue) __blk_end_request() spin_unlock_irq(queue) But if we analyze the sources of latency in kernel using ftrace, __blk_end_request() function at times may take up to 6.5ms with spinlock held and irq disabled. __blk_end_request() calls couple of functions and ftrace output shows that blk_update_bidi_request() function is almost taking 6ms. There are 2 function to end the current request: ___blk_end_request() and blk_end_request(). Both these functions do same thing except that blk_end_request() function doesn't take up the spinlock while calling the blk_update_bidi_request(). This patch replaces all __blk_end_request() calls with blk_end_request() and __blk_end_request_all() calls with blk_end_request_all(). Testing done: 20 process concurrent read/write on sd card and eMMC. Ran this test for almost a day on multicore system and no errors observed. This change is not meant for improving MMC throughput; it's basically about becoming fair to other threads/interrupts in the system. By holding spin lock and interrupts disabled for longer duration, we won't allow other threads/interrupts to run at all. Actually slight performance degradation at file system level can be expected as we are not holding the spin lock during blk_update_bidi_request() which means our mmcqd thread may get preempted for other high priority thread or any interrupt in the system. These are performance numbers (100MB file write) with eMMC running in DDR mode: Without this patch: Name of the Test Value Unit LMDD Read Test 53.79 MBPS LMDD Write Test 18.86 MBPS IOZONE Read Test 51.65 MBPS IOZONE Write Test 24.36 MBPS With this patch: Name of the Test Value Unit LMDD Read Test 52.94 MBPS LMDD Write Test 16.70 MBPS IOZONE Read Test 52.08 MBPS IOZONE Write Test 23.29 MBPS Read numbers are fine. Write numbers are bit down (especially LMDD write), may be because write requests normally have large transfer size and which means there are chances that while mmcq is executing blk_update_bidi_request(), it may get interrupted by interrupts or other high priority thread. Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> Reviewed-by: Namjae Jeon <linkinjeon@gmail.com> Signed-off-by: Chris Ball <cjb@laptop.org>
2012-06-07 18:16:58 +08:00
blk_end_request_all(req, -EIO);
}
goto out;
}
if (req && req_op(req) == REQ_OP_DISCARD) {
/* complete ongoing async transfer before issuing discard */
if (mq->qcnt)
mmc_blk_issue_rw_rq(mq, NULL);
mmc_blk_issue_discard_rq(mq, req);
} else if (req && req_op(req) == REQ_OP_SECURE_ERASE) {
/* complete ongoing async transfer before issuing secure erase*/
if (mq->qcnt)
mmc_blk_issue_rw_rq(mq, NULL);
mmc_blk_issue_secdiscard_rq(mq, req);
} else if (req && req_op(req) == REQ_OP_FLUSH) {
/* complete ongoing async transfer before issuing flush */
if (mq->qcnt)
mmc_blk_issue_rw_rq(mq, NULL);
mmc_blk_issue_flush(mq, req);
} else {
mmc_blk_issue_rw_rq(mq, req);
card->host->context_info.is_waiting_last_req = false;
}
out:
if (!mq->qcnt)
mmc_put_card(card);
}
static inline int mmc_blk_readonly(struct mmc_card *card)
{
return mmc_card_readonly(card) ||
!(card->csd.cmdclass & CCC_BLOCK_WRITE);
}
static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
struct device *parent,
sector_t size,
bool default_ro,
const char *subname,
int area_type)
{
struct mmc_blk_data *md;
int devidx, ret;
devidx = ida_simple_get(&mmc_blk_ida, 0, max_devices, GFP_KERNEL);
if (devidx < 0)
return ERR_PTR(devidx);
2007-07-19 16:49:03 +08:00
md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
if (!md) {
ret = -ENOMEM;
goto out;
}
md->area_type = area_type;
/*
* Set the read-only status based on the supported commands
* and the write protect switch.
*/
md->read_only = mmc_blk_readonly(card);
md->disk = alloc_disk(perdev_minors);
if (md->disk == NULL) {
ret = -ENOMEM;
goto err_kfree;
}
spin_lock_init(&md->lock);
INIT_LIST_HEAD(&md->part);
md->usage = 1;
ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
if (ret)
goto err_putdisk;
md->queue.blkdata = md;
md->disk->major = MMC_BLOCK_MAJOR;
md->disk->first_minor = devidx * perdev_minors;
md->disk->fops = &mmc_bdops;
md->disk->private_data = md;
md->disk->queue = md->queue.queue;
md->parent = parent;
set_disk_ro(md->disk, md->read_only || default_ro);
md->disk->flags = GENHD_FL_EXT_DEVT;
if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
md->disk->flags |= GENHD_FL_NO_PART_SCAN;
/*
* As discussed on lkml, GENHD_FL_REMOVABLE should:
*
* - be set for removable media with permanent block devices
* - be unset for removable block devices with permanent media
*
* Since MMC block devices clearly fall under the second
* case, we do not set GENHD_FL_REMOVABLE. Userspace
* should use the block device creation/destruction hotplug
* messages to tell when the card is present.
*/
snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
mmc: block: Use the mmc host device index as the mmcblk device index Commit 520bd7a8b415 ("mmc: core: Optimize boot time by detecting cards simultaneously") causes regressions for some platforms. These platforms relies on fixed mmcblk device indexes, instead of deploying the defacto standard with UUID/PARTUUID. In other words their rootfs needs to be available at hardcoded paths, like /dev/mmcblk0p2. Such guarantees have never been made by the kernel, but clearly the above commit changes the behaviour. More precisely, because of that the order changes of how cards becomes detected, so do their corresponding mmcblk device indexes. As the above commit significantly improves boot time for some platforms (magnitude of seconds), let's avoid reverting this change but instead restore the behaviour of how mmcblk device indexes becomes picked. By using the same index for the mmcblk device as for the corresponding mmc host device, the probe order of mmc host devices decides the index we get for the mmcblk device. For those platforms that suffers from a regression, one could expect that this updated behaviour should be sufficient to meet their expectations of "fixed" mmcblk device indexes. Another side effect from this change, is that the same index is used for the mmc host device, the mmcblk device and the mmc block queue. That should clarify their relationship. Reported-by: Peter Hurley <peter@hurleysoftware.com> Reported-by: Laszlo Fiat <laszlo.fiat@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Fixes: 520bd7a8b415 ("mmc: core: Optimize boot time by detecting cards simultaneously") Cc: <stable@vger.kernel.org> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-04-06 22:12:08 +08:00
"mmcblk%u%s", card->host->index, subname ? subname : "");
if (mmc_card_mmc(card))
blk_queue_logical_block_size(md->queue.queue,
card->ext_csd.data_sector_size);
else
blk_queue_logical_block_size(md->queue.queue, 512);
set_capacity(md->disk, size);
if (mmc_host_cmd23(card->host)) {
if ((mmc_card_mmc(card) &&
card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
(mmc_card_sd(card) &&
card->scr.cmds & SD_SCR_CMD23_SUPPORT))
md->flags |= MMC_BLK_CMD23;
}
if (mmc_card_mmc(card) &&
md->flags & MMC_BLK_CMD23 &&
((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
card->ext_csd.rel_sectors)) {
md->flags |= MMC_BLK_REL_WR;
blk_queue_write_cache(md->queue.queue, true, true);
}
return md;
err_putdisk:
put_disk(md->disk);
err_kfree:
kfree(md);
out:
ida_simple_remove(&mmc_blk_ida, devidx);
return ERR_PTR(ret);
}
static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
{
sector_t size;
if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
/*
* The EXT_CSD sector count is in number or 512 byte
* sectors.
*/
size = card->ext_csd.sectors;
} else {
/*
* The CSD capacity field is in units of read_blkbits.
* set_capacity takes units of 512 bytes.
*/
size = (typeof(sector_t))card->csd.capacity
<< (card->csd.read_blkbits - 9);
}
return mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
MMC_BLK_DATA_AREA_MAIN);
}
static int mmc_blk_alloc_part(struct mmc_card *card,
struct mmc_blk_data *md,
unsigned int part_type,
sector_t size,
bool default_ro,
const char *subname,
int area_type)
{
char cap_str[10];
struct mmc_blk_data *part_md;
part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
subname, area_type);
if (IS_ERR(part_md))
return PTR_ERR(part_md);
part_md->part_type = part_type;
list_add(&part_md->part, &md->part);
string_get_size((u64)get_capacity(part_md->disk), 512, STRING_UNITS_2,
cap_str, sizeof(cap_str));
pr_info("%s: %s %s partition %u %s\n",
part_md->disk->disk_name, mmc_card_id(card),
mmc_card_name(card), part_md->part_type, cap_str);
return 0;
}
/* MMC Physical partitions consist of two boot partitions and
* up to four general purpose partitions.
* For each partition enabled in EXT_CSD a block device will be allocatedi
* to provide access to the partition.
*/
static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
{
int idx, ret = 0;
if (!mmc_card_mmc(card))
return 0;
for (idx = 0; idx < card->nr_parts; idx++) {
if (card->part[idx].size) {
ret = mmc_blk_alloc_part(card, md,
card->part[idx].part_cfg,
card->part[idx].size >> 9,
card->part[idx].force_ro,
card->part[idx].name,
card->part[idx].area_type);
if (ret)
return ret;
}
}
return ret;
}
static void mmc_blk_remove_req(struct mmc_blk_data *md)
{
struct mmc_card *card;
if (md) {
mmc: reordered shutdown sequence in mmc_bld_remove_req We had a multi-partition SD-Card with two ext2 file systems. The partition table was getting overwritten by a race between the card removal and the unmount of the 2nd ext2 partition. What was observed: 1. Suspend/resume would call to remove the device. The clearing of the device information is done asynchronously. 2. A request is made to unmount the file system (this is called after the removal has started). 3. The remapping table was cleared by the asynchronous part of the device removal. 4. A write request to the super block (block 0 of the partition) was sent down and instead of being remapped to the partition offset, it was remapped to block 0 of the device which is where the partition table is located. 5. Write was queued and written resulting in the overwriting of the partition table with the ext2 super block. 6. The mmc_queue is cleaned up. The mmc card device driver used to access SD cards, was calling del_gendisk before calling mmc_cleanup-queue. The comment in the mmc_blk_remove_req code indicated that it expected del_gendisk to block all further requests from being queued but it doesn't. The mmc driver uses the presences of the mmc_queue to determine if the request should be queued. The fix was to clean up the mmc_queue before the rest of the the delete partition code is called. This prevents the overwriting of the partition table. However, the umount gets an error trying to write the super block. The umount should be issued before the device is removed but that is not always possible. The umount is still needed to cleanup other data structures. Addresses the problem described in http://crbug.com/240815 Signed-off-by: Paul Taysom <taysom@chromium.org> Signed-off-by: Chris Ball <cjb@laptop.org>
2013-06-05 05:42:40 +08:00
/*
* Flush remaining requests and free queues. It
* is freeing the queue that stops new requests
* from being accepted.
*/
card = md->queue.card;
mmc: reordered shutdown sequence in mmc_bld_remove_req We had a multi-partition SD-Card with two ext2 file systems. The partition table was getting overwritten by a race between the card removal and the unmount of the 2nd ext2 partition. What was observed: 1. Suspend/resume would call to remove the device. The clearing of the device information is done asynchronously. 2. A request is made to unmount the file system (this is called after the removal has started). 3. The remapping table was cleared by the asynchronous part of the device removal. 4. A write request to the super block (block 0 of the partition) was sent down and instead of being remapped to the partition offset, it was remapped to block 0 of the device which is where the partition table is located. 5. Write was queued and written resulting in the overwriting of the partition table with the ext2 super block. 6. The mmc_queue is cleaned up. The mmc card device driver used to access SD cards, was calling del_gendisk before calling mmc_cleanup-queue. The comment in the mmc_blk_remove_req code indicated that it expected del_gendisk to block all further requests from being queued but it doesn't. The mmc driver uses the presences of the mmc_queue to determine if the request should be queued. The fix was to clean up the mmc_queue before the rest of the the delete partition code is called. This prevents the overwriting of the partition table. However, the umount gets an error trying to write the super block. The umount should be issued before the device is removed but that is not always possible. The umount is still needed to cleanup other data structures. Addresses the problem described in http://crbug.com/240815 Signed-off-by: Paul Taysom <taysom@chromium.org> Signed-off-by: Chris Ball <cjb@laptop.org>
2013-06-05 05:42:40 +08:00
mmc_cleanup_queue(&md->queue);
if (md->disk->flags & GENHD_FL_UP) {
device_remove_file(disk_to_dev(md->disk), &md->force_ro);
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
card->ext_csd.boot_ro_lockable)
device_remove_file(disk_to_dev(md->disk),
&md->power_ro_lock);
del_gendisk(md->disk);
}
mmc_blk_put(md);
}
}
static void mmc_blk_remove_parts(struct mmc_card *card,
struct mmc_blk_data *md)
{
struct list_head *pos, *q;
struct mmc_blk_data *part_md;
list_for_each_safe(pos, q, &md->part) {
part_md = list_entry(pos, struct mmc_blk_data, part);
list_del(pos);
mmc_blk_remove_req(part_md);
}
}
static int mmc_add_disk(struct mmc_blk_data *md)
{
int ret;
struct mmc_card *card = md->queue.card;
device_add_disk(md->parent, md->disk);
md->force_ro.show = force_ro_show;
md->force_ro.store = force_ro_store;
sysfs_attr_init(&md->force_ro.attr);
md->force_ro.attr.name = "force_ro";
md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
if (ret)
goto force_ro_fail;
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
card->ext_csd.boot_ro_lockable) {
umode_t mode;
if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
mode = S_IRUGO;
else
mode = S_IRUGO | S_IWUSR;
md->power_ro_lock.show = power_ro_lock_show;
md->power_ro_lock.store = power_ro_lock_store;
sysfs_attr_init(&md->power_ro_lock.attr);
md->power_ro_lock.attr.mode = mode;
md->power_ro_lock.attr.name =
"ro_lock_until_next_power_on";
ret = device_create_file(disk_to_dev(md->disk),
&md->power_ro_lock);
if (ret)
goto power_ro_lock_fail;
}
return ret;
power_ro_lock_fail:
device_remove_file(disk_to_dev(md->disk), &md->force_ro);
force_ro_fail:
del_gendisk(md->disk);
return ret;
}
static int mmc_blk_probe(struct mmc_card *card)
{
struct mmc_blk_data *md, *part_md;
char cap_str[10];
int ret;
/*
* Check that the card supports the command class(es) we need.
*/
if (!(card->csd.cmdclass & CCC_BLOCK_READ))
return -ENODEV;
mmc_fixup_device(card, mmc_blk_fixups);
ret = mmc_queue_alloc_shared_queue(card);
if (ret)
return ret;
md = mmc_blk_alloc(card);
if (IS_ERR(md)) {
mmc_queue_free_shared_queue(card);
return PTR_ERR(md);
}
string_get_size((u64)get_capacity(md->disk), 512, STRING_UNITS_2,
cap_str, sizeof(cap_str));
pr_info("%s: %s %s %s %s\n",
md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
cap_str, md->read_only ? "(ro)" : "");
if (mmc_blk_alloc_parts(card, md))
goto out;
dev_set_drvdata(&card->dev, md);
if (mmc_add_disk(md))
goto out;
list_for_each_entry(part_md, &md->part, part) {
if (mmc_add_disk(part_md))
goto out;
}
pm_runtime_set_autosuspend_delay(&card->dev, 3000);
pm_runtime_use_autosuspend(&card->dev);
/*
* Don't enable runtime PM for SD-combo cards here. Leave that
* decision to be taken during the SDIO init sequence instead.
*/
if (card->type != MMC_TYPE_SD_COMBO) {
pm_runtime_set_active(&card->dev);
pm_runtime_enable(&card->dev);
}
return 0;
out:
mmc_blk_remove_parts(card, md);
mmc_blk_remove_req(md);
mmc_queue_free_shared_queue(card);
return 0;
}
static void mmc_blk_remove(struct mmc_card *card)
{
struct mmc_blk_data *md = dev_get_drvdata(&card->dev);
mmc_blk_remove_parts(card, md);
pm_runtime_get_sync(&card->dev);
mmc_claim_host(card->host);
mmc_blk_part_switch(card, md);
mmc_release_host(card->host);
if (card->type != MMC_TYPE_SD_COMBO)
pm_runtime_disable(&card->dev);
pm_runtime_put_noidle(&card->dev);
mmc_blk_remove_req(md);
dev_set_drvdata(&card->dev, NULL);
mmc_queue_free_shared_queue(card);
}
static int _mmc_blk_suspend(struct mmc_card *card)
{
struct mmc_blk_data *part_md;
struct mmc_blk_data *md = dev_get_drvdata(&card->dev);
if (md) {
mmc_queue_suspend(&md->queue);
list_for_each_entry(part_md, &md->part, part) {
mmc_queue_suspend(&part_md->queue);
}
}
return 0;
}
static void mmc_blk_shutdown(struct mmc_card *card)
{
_mmc_blk_suspend(card);
}
#ifdef CONFIG_PM_SLEEP
static int mmc_blk_suspend(struct device *dev)
{
struct mmc_card *card = mmc_dev_to_card(dev);
return _mmc_blk_suspend(card);
}
static int mmc_blk_resume(struct device *dev)
{
struct mmc_blk_data *part_md;
struct mmc_blk_data *md = dev_get_drvdata(dev);
if (md) {
/*
* Resume involves the card going into idle state,
* so current partition is always the main one.
*/
md->part_curr = md->part_type;
mmc_queue_resume(&md->queue);
list_for_each_entry(part_md, &md->part, part) {
mmc_queue_resume(&part_md->queue);
}
}
return 0;
}
#endif
static SIMPLE_DEV_PM_OPS(mmc_blk_pm_ops, mmc_blk_suspend, mmc_blk_resume);
static struct mmc_driver mmc_driver = {
.drv = {
.name = "mmcblk",
.pm = &mmc_blk_pm_ops,
},
.probe = mmc_blk_probe,
.remove = mmc_blk_remove,
.shutdown = mmc_blk_shutdown,
};
static int __init mmc_blk_init(void)
{
int res;
if (perdev_minors != CONFIG_MMC_BLOCK_MINORS)
pr_info("mmcblk: using %d minors per device\n", perdev_minors);
max_devices = min(MAX_DEVICES, (1 << MINORBITS) / perdev_minors);
res = register_blkdev(MMC_BLOCK_MAJOR, "mmc");
if (res)
goto out;
res = mmc_register_driver(&mmc_driver);
if (res)
goto out2;
return 0;
out2:
unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
out:
return res;
}
static void __exit mmc_blk_exit(void)
{
mmc_unregister_driver(&mmc_driver);
unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
}
module_init(mmc_blk_init);
module_exit(mmc_blk_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Multimedia Card (MMC) block device driver");