OpenCloudOS-Kernel/drivers/ata/libata-scsi.c

4657 lines
115 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* libata-scsi.c - helper library for ATA
*
* Copyright 2003-2004 Red Hat, Inc. All rights reserved.
* Copyright 2003-2004 Jeff Garzik
*
* libata documentation is available via 'make {ps|pdf}docs',
* as Documentation/driver-api/libata.rst
*
* Hardware documentation available from
* - http://www.t10.org/
* - http://www.t13.org/
*/
#include <linux/compat.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <scsi/scsi.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi_transport.h>
#include <linux/libata.h>
#include <linux/hdreg.h>
#include <linux/uaccess.h>
#include <linux/suspend.h>
#include <asm/unaligned.h>
#include <linux/ioprio.h>
#include <linux/of.h>
#include "libata.h"
#include "libata-transport.h"
#define ATA_SCSI_RBUF_SIZE 576
static DEFINE_SPINLOCK(ata_scsi_rbuf_lock);
static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE];
typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc);
static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
const struct scsi_device *scsidev);
#define RW_RECOVERY_MPAGE 0x1
#define RW_RECOVERY_MPAGE_LEN 12
#define CACHE_MPAGE 0x8
#define CACHE_MPAGE_LEN 20
#define CONTROL_MPAGE 0xa
#define CONTROL_MPAGE_LEN 12
#define ALL_MPAGES 0x3f
#define ALL_SUB_MPAGES 0xff
static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = {
RW_RECOVERY_MPAGE,
RW_RECOVERY_MPAGE_LEN - 2,
(1 << 7), /* AWRE */
0, /* read retry count */
0, 0, 0, 0,
0, /* write retry count */
0, 0, 0
};
static const u8 def_cache_mpage[CACHE_MPAGE_LEN] = {
CACHE_MPAGE,
CACHE_MPAGE_LEN - 2,
0, /* contains WCE, needs to be 0 for logic */
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, /* contains DRA, needs to be 0 for logic */
0, 0, 0, 0, 0, 0, 0
};
static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = {
CONTROL_MPAGE,
CONTROL_MPAGE_LEN - 2,
2, /* DSENSE=0, GLTSD=1 */
0, /* [QAM+QERR may be 1, see 05-359r1] */
0, 0, 0, 0, 0xff, 0xff,
0, 30 /* extended self test time, see 05-359r1 */
};
static ssize_t ata_scsi_park_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct scsi_device *sdev = to_scsi_device(device);
struct ata_port *ap;
struct ata_link *link;
struct ata_device *dev;
unsigned long now;
treewide: Remove uninitialized_var() usage Using uninitialized_var() is dangerous as it papers over real bugs[1] (or can in the future), and suppresses unrelated compiler warnings (e.g. "unused variable"). If the compiler thinks it is uninitialized, either simply initialize the variable or make compiler changes. In preparation for removing[2] the[3] macro[4], remove all remaining needless uses with the following script: git grep '\buninitialized_var\b' | cut -d: -f1 | sort -u | \ xargs perl -pi -e \ 's/\buninitialized_var\(([^\)]+)\)/\1/g; s:\s*/\* (GCC be quiet|to make compiler happy) \*/$::g;' drivers/video/fbdev/riva/riva_hw.c was manually tweaked to avoid pathological white-space. No outstanding warnings were found building allmodconfig with GCC 9.3.0 for x86_64, i386, arm64, arm, powerpc, powerpc64le, s390x, mips, sparc64, alpha, and m68k. [1] https://lore.kernel.org/lkml/20200603174714.192027-1-glider@google.com/ [2] https://lore.kernel.org/lkml/CA+55aFw+Vbj0i=1TGqCR5vQkCzWJ0QxK6CernOU6eedsudAixw@mail.gmail.com/ [3] https://lore.kernel.org/lkml/CA+55aFwgbgqhbp1fkxvRKEpzyR5J8n1vKT1VZdz9knmPuXhOeg@mail.gmail.com/ [4] https://lore.kernel.org/lkml/CA+55aFz2500WfbKXAx8s67wrm9=yVJu65TpLgN_ybYNv0VEOKA@mail.gmail.com/ Reviewed-by: Leon Romanovsky <leonro@mellanox.com> # drivers/infiniband and mlx4/mlx5 Acked-by: Jason Gunthorpe <jgg@mellanox.com> # IB Acked-by: Kalle Valo <kvalo@codeaurora.org> # wireless drivers Reviewed-by: Chao Yu <yuchao0@huawei.com> # erofs Signed-off-by: Kees Cook <keescook@chromium.org>
2020-06-04 04:09:38 +08:00
unsigned int msecs;
int rc = 0;
ap = ata_shost_to_port(sdev->host);
spin_lock_irq(ap->lock);
dev = ata_scsi_find_dev(ap, sdev);
if (!dev) {
rc = -ENODEV;
goto unlock;
}
if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
rc = -EOPNOTSUPP;
goto unlock;
}
link = dev->link;
now = jiffies;
if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS &&
link->eh_context.unloaded_mask & (1 << dev->devno) &&
time_after(dev->unpark_deadline, now))
msecs = jiffies_to_msecs(dev->unpark_deadline - now);
else
msecs = 0;
unlock:
spin_unlock_irq(ap->lock);
return rc ? rc : snprintf(buf, 20, "%u\n", msecs);
}
static ssize_t ata_scsi_park_store(struct device *device,
struct device_attribute *attr,
const char *buf, size_t len)
{
struct scsi_device *sdev = to_scsi_device(device);
struct ata_port *ap;
struct ata_device *dev;
long int input;
unsigned long flags;
int rc;
rc = kstrtol(buf, 10, &input);
if (rc)
return rc;
if (input < -2)
return -EINVAL;
if (input > ATA_TMOUT_MAX_PARK) {
rc = -EOVERFLOW;
input = ATA_TMOUT_MAX_PARK;
}
ap = ata_shost_to_port(sdev->host);
spin_lock_irqsave(ap->lock, flags);
dev = ata_scsi_find_dev(ap, sdev);
if (unlikely(!dev)) {
rc = -ENODEV;
goto unlock;
}
if (dev->class != ATA_DEV_ATA &&
dev->class != ATA_DEV_ZAC) {
rc = -EOPNOTSUPP;
goto unlock;
}
if (input >= 0) {
if (dev->flags & ATA_DFLAG_NO_UNLOAD) {
rc = -EOPNOTSUPP;
goto unlock;
}
dev->unpark_deadline = ata_deadline(jiffies, input);
dev->link->eh_info.dev_action[dev->devno] |= ATA_EH_PARK;
ata_port_schedule_eh(ap);
complete(&ap->park_req_pending);
} else {
switch (input) {
case -1:
dev->flags &= ~ATA_DFLAG_NO_UNLOAD;
break;
case -2:
dev->flags |= ATA_DFLAG_NO_UNLOAD;
break;
}
}
unlock:
spin_unlock_irqrestore(ap->lock, flags);
return rc ? rc : len;
}
DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR,
ata_scsi_park_show, ata_scsi_park_store);
EXPORT_SYMBOL_GPL(dev_attr_unload_heads);
void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd,
u8 sk, u8 asc, u8 ascq)
{
bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE);
if (!cmd)
return;
cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
scsi_build_sense_buffer(d_sense, cmd->sense_buffer, sk, asc, ascq);
}
void ata_scsi_set_sense_information(struct ata_device *dev,
struct scsi_cmnd *cmd,
const struct ata_taskfile *tf)
{
u64 information;
if (!cmd)
return;
information = ata_tf_read_block(tf, dev);
if (information == U64_MAX)
return;
scsi_set_sense_information(cmd->sense_buffer,
SCSI_SENSE_BUFFERSIZE, information);
}
static void ata_scsi_set_invalid_field(struct ata_device *dev,
struct scsi_cmnd *cmd, u16 field, u8 bit)
{
ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x24, 0x0);
/* "Invalid field in CDB" */
scsi_set_sense_field_pointer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
field, bit, 1);
}
static void ata_scsi_set_invalid_parameter(struct ata_device *dev,
struct scsi_cmnd *cmd, u16 field)
{
/* "Invalid field in parameter list" */
ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x26, 0x0);
scsi_set_sense_field_pointer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
field, 0xff, 0);
}
struct device_attribute *ata_common_sdev_attrs[] = {
&dev_attr_unload_heads,
NULL
};
EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
/**
* ata_std_bios_param - generic bios head/sector/cylinder calculator used by sd.
* @sdev: SCSI device for which BIOS geometry is to be determined
* @bdev: block device associated with @sdev
* @capacity: capacity of SCSI device
* @geom: location to which geometry will be output
*
* Generic bios head/sector/cylinder calculator
* used by sd. Most BIOSes nowadays expect a XXX/255/16 (CHS)
* mapping. Some situations may arise where the disk is not
* bootable if this is not used.
*
* LOCKING:
* Defined by the SCSI layer. We don't really care.
*
* RETURNS:
* Zero.
*/
int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int geom[])
{
geom[0] = 255;
geom[1] = 63;
sector_div(capacity, 255*63);
geom[2] = capacity;
return 0;
}
EXPORT_SYMBOL_GPL(ata_std_bios_param);
/**
* ata_scsi_unlock_native_capacity - unlock native capacity
* @sdev: SCSI device to adjust device capacity for
*
* This function is called if a partition on @sdev extends beyond
* the end of the device. It requests EH to unlock HPA.
*
* LOCKING:
* Defined by the SCSI layer. Might sleep.
*/
void ata_scsi_unlock_native_capacity(struct scsi_device *sdev)
{
struct ata_port *ap = ata_shost_to_port(sdev->host);
struct ata_device *dev;
unsigned long flags;
spin_lock_irqsave(ap->lock, flags);
dev = ata_scsi_find_dev(ap, sdev);
if (dev && dev->n_sectors < dev->n_native_sectors) {
dev->flags |= ATA_DFLAG_UNLOCK_HPA;
dev->link->eh_info.action |= ATA_EH_RESET;
ata_port_schedule_eh(ap);
}
spin_unlock_irqrestore(ap->lock, flags);
ata_port_wait_eh(ap);
}
EXPORT_SYMBOL_GPL(ata_scsi_unlock_native_capacity);
/**
* ata_get_identity - Handler for HDIO_GET_IDENTITY ioctl
* @ap: target port
* @sdev: SCSI device to get identify data for
* @arg: User buffer area for identify data
*
* LOCKING:
* Defined by the SCSI layer. We don't really care.
*
* RETURNS:
* Zero on success, negative errno on error.
*/
static int ata_get_identity(struct ata_port *ap, struct scsi_device *sdev,
void __user *arg)
{
struct ata_device *dev = ata_scsi_find_dev(ap, sdev);
u16 __user *dst = arg;
char buf[40];
if (!dev)
return -ENOMSG;
if (copy_to_user(dst, dev->id, ATA_ID_WORDS * sizeof(u16)))
return -EFAULT;
ata_id_string(dev->id, buf, ATA_ID_PROD, ATA_ID_PROD_LEN);
if (copy_to_user(dst + ATA_ID_PROD, buf, ATA_ID_PROD_LEN))
return -EFAULT;
ata_id_string(dev->id, buf, ATA_ID_FW_REV, ATA_ID_FW_REV_LEN);
if (copy_to_user(dst + ATA_ID_FW_REV, buf, ATA_ID_FW_REV_LEN))
return -EFAULT;
ata_id_string(dev->id, buf, ATA_ID_SERNO, ATA_ID_SERNO_LEN);
if (copy_to_user(dst + ATA_ID_SERNO, buf, ATA_ID_SERNO_LEN))
return -EFAULT;
return 0;
}
/**
* ata_cmd_ioctl - Handler for HDIO_DRIVE_CMD ioctl
* @scsidev: Device to which we are issuing command
* @arg: User provided data for issuing command
*
* LOCKING:
* Defined by the SCSI layer. We don't really care.
*
* RETURNS:
* Zero on success, negative errno on error.
*/
int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
{
int rc = 0;
u8 sensebuf[SCSI_SENSE_BUFFERSIZE];
u8 scsi_cmd[MAX_COMMAND_SIZE];
u8 args[4], *argbuf = NULL;
int argsize = 0;
enum dma_data_direction data_dir;
struct scsi_sense_hdr sshdr;
int cmd_result;
if (arg == NULL)
return -EINVAL;
if (copy_from_user(args, arg, sizeof(args)))
return -EFAULT;
memset(sensebuf, 0, sizeof(sensebuf));
memset(scsi_cmd, 0, sizeof(scsi_cmd));
if (args[3]) {
argsize = ATA_SECT_SIZE * args[3];
argbuf = kmalloc(argsize, GFP_KERNEL);
2005-10-04 22:21:19 +08:00
if (argbuf == NULL) {
rc = -ENOMEM;
goto error;
}
scsi_cmd[1] = (4 << 1); /* PIO Data-in */
scsi_cmd[2] = 0x0e; /* no off.line or cc, read from dev,
block count in sector count field */
data_dir = DMA_FROM_DEVICE;
} else {
scsi_cmd[1] = (3 << 1); /* Non-data */
scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */
data_dir = DMA_NONE;
}
scsi_cmd[0] = ATA_16;
scsi_cmd[4] = args[2];
if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */
scsi_cmd[6] = args[3];
scsi_cmd[8] = args[1];
scsi_cmd[10] = ATA_SMART_LBAM_PASS;
scsi_cmd[12] = ATA_SMART_LBAH_PASS;
} else {
scsi_cmd[6] = args[1];
}
scsi_cmd[14] = args[0];
/* Good values for timeout and retries? Values below
from scsi_ioctl_send_command() for default case... */
cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize,
sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
u8 *desc = sensebuf + 8;
cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
/* If we set cc then ATA pass-through will cause a
* check condition even if no error. Filter that. */
if (cmd_result & SAM_STAT_CHECK_CONDITION) {
if (sshdr.sense_key == RECOVERED_ERROR &&
sshdr.asc == 0 && sshdr.ascq == 0x1d)
cmd_result &= ~SAM_STAT_CHECK_CONDITION;
}
/* Send userspace a few ATA registers (same as drivers/ide) */
if (sensebuf[0] == 0x72 && /* format is "descriptor" */
desc[0] == 0x09) { /* code is "ATA Descriptor" */
args[0] = desc[13]; /* status */
args[1] = desc[3]; /* error */
args[2] = desc[5]; /* sector count (0:7) */
if (copy_to_user(arg, args, sizeof(args)))
rc = -EFAULT;
}
}
if (cmd_result) {
rc = -EIO;
goto error;
}
if ((argbuf)
&& copy_to_user(arg + sizeof(args), argbuf, argsize))
rc = -EFAULT;
error:
kfree(argbuf);
return rc;
}
/**
* ata_task_ioctl - Handler for HDIO_DRIVE_TASK ioctl
* @scsidev: Device to which we are issuing command
* @arg: User provided data for issuing command
*
* LOCKING:
* Defined by the SCSI layer. We don't really care.
*
* RETURNS:
* Zero on success, negative errno on error.
*/
int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
{
int rc = 0;
u8 sensebuf[SCSI_SENSE_BUFFERSIZE];
u8 scsi_cmd[MAX_COMMAND_SIZE];
u8 args[7];
struct scsi_sense_hdr sshdr;
int cmd_result;
if (arg == NULL)
return -EINVAL;
if (copy_from_user(args, arg, sizeof(args)))
return -EFAULT;
memset(sensebuf, 0, sizeof(sensebuf));
memset(scsi_cmd, 0, sizeof(scsi_cmd));
scsi_cmd[0] = ATA_16;
scsi_cmd[1] = (3 << 1); /* Non-data */
scsi_cmd[2] = 0x20; /* cc but no off.line or data xfer */
scsi_cmd[4] = args[1];
scsi_cmd[6] = args[2];
scsi_cmd[8] = args[3];
scsi_cmd[10] = args[4];
scsi_cmd[12] = args[5];
scsi_cmd[13] = args[6] & 0x4f;
scsi_cmd[14] = args[0];
/* Good values for timeout and retries? Values below
from scsi_ioctl_send_command() for default case... */
cmd_result = scsi_execute(scsidev, scsi_cmd, DMA_NONE, NULL, 0,
sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
u8 *desc = sensebuf + 8;
cmd_result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
/* If we set cc then ATA pass-through will cause a
* check condition even if no error. Filter that. */
if (cmd_result & SAM_STAT_CHECK_CONDITION) {
if (sshdr.sense_key == RECOVERED_ERROR &&
sshdr.asc == 0 && sshdr.ascq == 0x1d)
cmd_result &= ~SAM_STAT_CHECK_CONDITION;
}
/* Send userspace ATA registers */
if (sensebuf[0] == 0x72 && /* format is "descriptor" */
desc[0] == 0x09) {/* code is "ATA Descriptor" */
args[0] = desc[13]; /* status */
args[1] = desc[3]; /* error */
args[2] = desc[5]; /* sector count (0:7) */
args[3] = desc[7]; /* lbal */
args[4] = desc[9]; /* lbam */
args[5] = desc[11]; /* lbah */
args[6] = desc[12]; /* select */
if (copy_to_user(arg, args, sizeof(args)))
rc = -EFAULT;
}
}
if (cmd_result) {
rc = -EIO;
goto error;
}
error:
return rc;
}
static int ata_ioc32(struct ata_port *ap)
{
if (ap->flags & ATA_FLAG_PIO_DMA)
return 1;
if (ap->pflags & ATA_PFLAG_PIO32)
return 1;
return 0;
}
/*
* This handles both native and compat commands, so anything added
* here must have a compatible argument, or check in_compat_syscall()
*/
int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
scsi: ata: Use unsigned int for cmd's type in ioctls in scsi_host_template Clang warns several times in the scsi subsystem (trimmed for brevity): drivers/scsi/hpsa.c:6209:7: warning: overflow converting case value to switch condition type (2147762695 to 18446744071562347015) [-Wswitch] case CCISS_GETBUSTYPES: ^ drivers/scsi/hpsa.c:6208:7: warning: overflow converting case value to switch condition type (2147762694 to 18446744071562347014) [-Wswitch] case CCISS_GETHEARTBEAT: ^ The root cause is that the _IOC macro can generate really large numbers, which don't fit into type 'int', which is used for the cmd parameter in the ioctls in scsi_host_template. My research into how GCC and Clang are handling this at a low level didn't prove fruitful. However, looking at the rest of the kernel tree, all ioctls use an 'unsigned int' for the cmd parameter, which will fit all of the _IOC values in the scsi/ata subsystems. Make that change because none of the ioctls expect a negative value for any command, it brings the ioctls inline with the reset of the kernel, and it removes ambiguity, which is never good when dealing with compilers. Link: https://github.com/ClangBuiltLinux/linux/issues/85 Link: https://github.com/ClangBuiltLinux/linux/issues/154 Link: https://github.com/ClangBuiltLinux/linux/issues/157 Signed-off-by: Nathan Chancellor <natechancellor@gmail.com> Acked-by: Bradley Grove <bgrove@attotech.com> Acked-by: Don Brace <don.brace@microsemi.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Nick Desaulniers <ndesaulniers@google.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-02-08 00:07:20 +08:00
unsigned int cmd, void __user *arg)
{
libata: fix HDIO_GET_32BIT ioctl As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reported-by: Soohoon Lee <Soohoon.Lee@f5.com> Tested-by: Soohoon Lee <Soohoon.Lee@f5.com> Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo <tj@kernel.org>
2016-02-11 21:16:27 +08:00
unsigned long val;
int rc = -EINVAL;
unsigned long flags;
switch (cmd) {
libata: fix HDIO_GET_32BIT ioctl As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reported-by: Soohoon Lee <Soohoon.Lee@f5.com> Tested-by: Soohoon Lee <Soohoon.Lee@f5.com> Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo <tj@kernel.org>
2016-02-11 21:16:27 +08:00
case HDIO_GET_32BIT:
spin_lock_irqsave(ap->lock, flags);
val = ata_ioc32(ap);
spin_unlock_irqrestore(ap->lock, flags);
#ifdef CONFIG_COMPAT
if (in_compat_syscall())
return put_user(val, (compat_ulong_t __user *)arg);
#endif
libata: fix HDIO_GET_32BIT ioctl As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reported-by: Soohoon Lee <Soohoon.Lee@f5.com> Tested-by: Soohoon Lee <Soohoon.Lee@f5.com> Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo <tj@kernel.org>
2016-02-11 21:16:27 +08:00
return put_user(val, (unsigned long __user *)arg);
libata: fix HDIO_GET_32BIT ioctl As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reported-by: Soohoon Lee <Soohoon.Lee@f5.com> Tested-by: Soohoon Lee <Soohoon.Lee@f5.com> Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo <tj@kernel.org>
2016-02-11 21:16:27 +08:00
case HDIO_SET_32BIT:
val = (unsigned long) arg;
rc = 0;
spin_lock_irqsave(ap->lock, flags);
if (ap->pflags & ATA_PFLAG_PIO32CHANGE) {
if (val)
ap->pflags |= ATA_PFLAG_PIO32;
else
ap->pflags &= ~ATA_PFLAG_PIO32;
} else {
if (val != ata_ioc32(ap))
rc = -EINVAL;
}
spin_unlock_irqrestore(ap->lock, flags);
return rc;
case HDIO_GET_IDENTITY:
return ata_get_identity(ap, scsidev, arg);
case HDIO_DRIVE_CMD:
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
return -EACCES;
return ata_cmd_ioctl(scsidev, arg);
case HDIO_DRIVE_TASK:
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
return -EACCES;
return ata_task_ioctl(scsidev, arg);
default:
rc = -ENOTTY;
break;
}
return rc;
}
EXPORT_SYMBOL_GPL(ata_sas_scsi_ioctl);
scsi: ata: Use unsigned int for cmd's type in ioctls in scsi_host_template Clang warns several times in the scsi subsystem (trimmed for brevity): drivers/scsi/hpsa.c:6209:7: warning: overflow converting case value to switch condition type (2147762695 to 18446744071562347015) [-Wswitch] case CCISS_GETBUSTYPES: ^ drivers/scsi/hpsa.c:6208:7: warning: overflow converting case value to switch condition type (2147762694 to 18446744071562347014) [-Wswitch] case CCISS_GETHEARTBEAT: ^ The root cause is that the _IOC macro can generate really large numbers, which don't fit into type 'int', which is used for the cmd parameter in the ioctls in scsi_host_template. My research into how GCC and Clang are handling this at a low level didn't prove fruitful. However, looking at the rest of the kernel tree, all ioctls use an 'unsigned int' for the cmd parameter, which will fit all of the _IOC values in the scsi/ata subsystems. Make that change because none of the ioctls expect a negative value for any command, it brings the ioctls inline with the reset of the kernel, and it removes ambiguity, which is never good when dealing with compilers. Link: https://github.com/ClangBuiltLinux/linux/issues/85 Link: https://github.com/ClangBuiltLinux/linux/issues/154 Link: https://github.com/ClangBuiltLinux/linux/issues/157 Signed-off-by: Nathan Chancellor <natechancellor@gmail.com> Acked-by: Bradley Grove <bgrove@attotech.com> Acked-by: Don Brace <don.brace@microsemi.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Nick Desaulniers <ndesaulniers@google.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-02-08 00:07:20 +08:00
int ata_scsi_ioctl(struct scsi_device *scsidev, unsigned int cmd,
void __user *arg)
{
return ata_sas_scsi_ioctl(ata_shost_to_port(scsidev->host),
scsidev, cmd, arg);
}
EXPORT_SYMBOL_GPL(ata_scsi_ioctl);
/**
* ata_scsi_qc_new - acquire new ata_queued_cmd reference
* @dev: ATA device to which the new command is attached
* @cmd: SCSI command that originated this ATA command
*
* Obtain a reference to an unused ata_queued_cmd structure,
* which is the basic libata structure representing a single
* ATA command sent to the hardware.
*
* If a command was available, fill in the SCSI-specific
* portions of the structure with information on the
* current command.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Command allocated, or %NULL if none available.
*/
static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev,
struct scsi_cmnd *cmd)
{
struct ata_queued_cmd *qc;
qc = ata_qc_new_init(dev, cmd->request->tag);
if (qc) {
qc->scsicmd = cmd;
qc->scsidone = cmd->scsi_done;
qc->sg = scsi_sglist(cmd);
qc->n_elem = scsi_sg_count(cmd);
if (cmd->request->rq_flags & RQF_QUIET)
qc->flags |= ATA_QCFLAG_QUIET;
} else {
cmd->result = (DID_OK << 16) | (QUEUE_FULL << 1);
cmd->scsi_done(cmd);
}
return qc;
}
static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
qc->extrabytes = scmd->extra_len;
qc->nbytes = scsi_bufflen(scmd) + qc->extrabytes;
}
/**
* ata_dump_status - user friendly display of error info
* @id: id of the port in question
* @tf: ptr to filled out taskfile
*
* Decode and dump the ATA error/status registers for the user so
* that they have some idea what really happened at the non
* make-believe layer.
*
* LOCKING:
* inherited from caller
*/
static void ata_dump_status(unsigned id, struct ata_taskfile *tf)
{
u8 stat = tf->command, err = tf->feature;
pr_warn("ata%u: status=0x%02x { ", id, stat);
if (stat & ATA_BUSY) {
pr_cont("Busy }\n"); /* Data is not valid in this case */
} else {
if (stat & ATA_DRDY) pr_cont("DriveReady ");
if (stat & ATA_DF) pr_cont("DeviceFault ");
if (stat & ATA_DSC) pr_cont("SeekComplete ");
if (stat & ATA_DRQ) pr_cont("DataRequest ");
if (stat & ATA_CORR) pr_cont("CorrectedError ");
if (stat & ATA_SENSE) pr_cont("Sense ");
if (stat & ATA_ERR) pr_cont("Error ");
pr_cont("}\n");
if (err) {
pr_warn("ata%u: error=0x%02x { ", id, err);
if (err & ATA_ABORTED) pr_cont("DriveStatusError ");
if (err & ATA_ICRC) {
if (err & ATA_ABORTED)
pr_cont("BadCRC ");
else pr_cont("Sector ");
}
if (err & ATA_UNC) pr_cont("UncorrectableError ");
if (err & ATA_IDNF) pr_cont("SectorIdNotFound ");
if (err & ATA_TRK0NF) pr_cont("TrackZeroNotFound ");
if (err & ATA_AMNF) pr_cont("AddrMarkNotFound ");
pr_cont("}\n");
}
}
}
/**
* ata_to_sense_error - convert ATA error to SCSI error
* @id: ATA device number
* @drv_stat: value contained in ATA status register
* @drv_err: value contained in ATA error register
* @sk: the sense key we'll fill out
* @asc: the additional sense code we'll fill out
* @ascq: the additional sense code qualifier we'll fill out
* @verbose: be verbose
*
* Converts an ATA error into a SCSI error. Fill out pointers to
* SK, ASC, and ASCQ bytes for later use in fixed or descriptor
* format sense blocks.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
u8 *asc, u8 *ascq, int verbose)
{
int i;
2005-10-09 22:40:44 +08:00
/* Based on the 3ware driver translation table */
static const unsigned char sense_table[][4] = {
/* BBD|ECC|ID|MAR */
{0xd1, ABORTED_COMMAND, 0x00, 0x00},
// Device busy Aborted command
/* BBD|ECC|ID */
{0xd0, ABORTED_COMMAND, 0x00, 0x00},
// Device busy Aborted command
/* ECC|MC|MARK */
{0x61, HARDWARE_ERROR, 0x00, 0x00},
// Device fault Hardware error
/* ICRC|ABRT */ /* NB: ICRC & !ABRT is BBD */
{0x84, ABORTED_COMMAND, 0x47, 0x00},
// Data CRC error SCSI parity error
/* MC|ID|ABRT|TRK0|MARK */
{0x37, NOT_READY, 0x04, 0x00},
// Unit offline Not ready
/* MCR|MARK */
{0x09, NOT_READY, 0x04, 0x00},
// Unrecovered disk error Not ready
/* Bad address mark */
{0x01, MEDIUM_ERROR, 0x13, 0x00},
// Address mark not found for data field
/* TRK0 - Track 0 not found */
{0x02, HARDWARE_ERROR, 0x00, 0x00},
// Hardware error
/* Abort: 0x04 is not translated here, see below */
/* Media change request */
{0x08, NOT_READY, 0x04, 0x00},
// FIXME: faking offline
/* SRV/IDNF - ID not found */
{0x10, ILLEGAL_REQUEST, 0x21, 0x00},
// Logical address out of range
/* MC - Media Changed */
{0x20, UNIT_ATTENTION, 0x28, 0x00},
// Not ready to ready change, medium may have changed
/* ECC - Uncorrectable ECC error */
{0x40, MEDIUM_ERROR, 0x11, 0x04},
// Unrecovered read error
/* BBD - block marked bad */
{0x80, MEDIUM_ERROR, 0x11, 0x04},
// Block marked bad Medium error, unrecovered read error
{0xFF, 0xFF, 0xFF, 0xFF}, // END mark
};
static const unsigned char stat_table[][4] = {
/* Must be first because BUSY means no other bits valid */
{0x80, ABORTED_COMMAND, 0x47, 0x00},
// Busy, fake parity for now
{0x40, ILLEGAL_REQUEST, 0x21, 0x04},
// Device ready, unaligned write command
{0x20, HARDWARE_ERROR, 0x44, 0x00},
// Device fault, internal target failure
{0x08, ABORTED_COMMAND, 0x47, 0x00},
// Timed out in xfer, fake parity for now
{0x04, RECOVERED_ERROR, 0x11, 0x00},
// Recovered ECC error Medium error, recovered
{0xFF, 0xFF, 0xFF, 0xFF}, // END mark
};
/*
* Is this an error we can process/parse
*/
if (drv_stat & ATA_BUSY) {
drv_err = 0; /* Ignore the err bits, they're invalid */
}
if (drv_err) {
/* Look for drv_err */
for (i = 0; sense_table[i][0] != 0xFF; i++) {
/* Look for best matches first */
if ((sense_table[i][0] & drv_err) ==
sense_table[i][0]) {
*sk = sense_table[i][1];
*asc = sense_table[i][2];
*ascq = sense_table[i][3];
goto translate_done;
}
}
}
/*
* Fall back to interpreting status bits. Note that if the drv_err
* has only the ABRT bit set, we decode drv_stat. ABRT by itself
* is not descriptive enough.
*/
for (i = 0; stat_table[i][0] != 0xFF; i++) {
if (stat_table[i][0] & drv_stat) {
*sk = stat_table[i][1];
*asc = stat_table[i][2];
*ascq = stat_table[i][3];
goto translate_done;
}
}
/*
* We need a sensible error return here, which is tricky, and one
* that won't cause people to do things like return a disk wrongly.
*/
*sk = ABORTED_COMMAND;
*asc = 0x00;
*ascq = 0x00;
translate_done:
if (verbose)
pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n",
id, drv_stat, drv_err, *sk, *asc, *ascq);
return;
}
/*
* ata_gen_passthru_sense - Generate check condition sense block.
* @qc: Command that completed.
*
* This function is specific to the ATA descriptor format sense
* block specified for the ATA pass through commands. Regardless
* of whether the command errored or not, return a sense
* block. Copy all controller registers into the sense
* block. If there was no error, we get the request from an ATA
* passthrough command, so we use the following sense data:
* sk = RECOVERED ERROR
* asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
*
*
* LOCKING:
* None.
*/
static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *cmd = qc->scsicmd;
struct ata_taskfile *tf = &qc->result_tf;
unsigned char *sb = cmd->sense_buffer;
unsigned char *desc = sb + 8;
int verbose = qc->ap->ops->error_handler == NULL;
u8 sense_key, asc, ascq;
memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
/*
* Use ata_to_sense_error() to map status register bits
* onto sense key, asc & ascq.
*/
if (qc->err_mask ||
tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature,
&sense_key, &asc, &ascq, verbose);
ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
} else {
/*
* ATA PASS-THROUGH INFORMATION AVAILABLE
* Always in descriptor format sense.
*/
scsi_build_sense_buffer(1, cmd->sense_buffer,
RECOVERED_ERROR, 0, 0x1D);
}
if ((cmd->sense_buffer[0] & 0x7f) >= 0x72) {
u8 len;
/* descriptor format */
len = sb[7];
desc = (char *)scsi_sense_desc_find(sb, len + 8, 9);
if (!desc) {
if (SCSI_SENSE_BUFFERSIZE < len + 14)
return;
sb[7] = len + 14;
desc = sb + 8 + len;
}
desc[0] = 9;
desc[1] = 12;
/*
* Copy registers into sense buffer.
*/
desc[2] = 0x00;
desc[3] = tf->feature; /* == error reg */
desc[5] = tf->nsect;
desc[7] = tf->lbal;
desc[9] = tf->lbam;
desc[11] = tf->lbah;
desc[12] = tf->device;
desc[13] = tf->command; /* == status reg */
/*
* Fill in Extend bit, and the high order bytes
* if applicable.
*/
if (tf->flags & ATA_TFLAG_LBA48) {
desc[2] |= 0x01;
desc[4] = tf->hob_nsect;
desc[6] = tf->hob_lbal;
desc[8] = tf->hob_lbam;
desc[10] = tf->hob_lbah;
}
} else {
/* Fixed sense format */
desc[0] = tf->feature;
desc[1] = tf->command; /* status */
desc[2] = tf->device;
desc[3] = tf->nsect;
desc[7] = 0;
if (tf->flags & ATA_TFLAG_LBA48) {
desc[8] |= 0x80;
if (tf->hob_nsect)
desc[8] |= 0x40;
if (tf->hob_lbal || tf->hob_lbam || tf->hob_lbah)
desc[8] |= 0x20;
}
desc[9] = tf->lbal;
desc[10] = tf->lbam;
desc[11] = tf->lbah;
}
}
/**
* ata_gen_ata_sense - generate a SCSI fixed sense block
* @qc: Command that we are erroring out
*
* Generate sense block for a failed ATA command @qc. Descriptor
* format is used to accommodate LBA48 block address.
*
* LOCKING:
* None.
*/
static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
{
struct ata_device *dev = qc->dev;
struct scsi_cmnd *cmd = qc->scsicmd;
struct ata_taskfile *tf = &qc->result_tf;
unsigned char *sb = cmd->sense_buffer;
int verbose = qc->ap->ops->error_handler == NULL;
u64 block;
u8 sense_key, asc, ascq;
memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
if (ata_dev_disabled(dev)) {
/* Device disabled after error recovery */
/* LOGICAL UNIT NOT READY, HARD RESET REQUIRED */
ata_scsi_set_sense(dev, cmd, NOT_READY, 0x04, 0x21);
return;
}
/* Use ata_to_sense_error() to map status register bits
* onto sense key, asc & ascq.
*/
if (qc->err_mask ||
tf->command & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
ata_to_sense_error(qc->ap->print_id, tf->command, tf->feature,
&sense_key, &asc, &ascq, verbose);
ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq);
} else {
/* Could not decode error */
ata_dev_warn(dev, "could not decode error status 0x%x err_mask 0x%x\n",
tf->command, qc->err_mask);
ata_scsi_set_sense(dev, cmd, ABORTED_COMMAND, 0, 0);
return;
}
block = ata_tf_read_block(&qc->result_tf, dev);
if (block == U64_MAX)
return;
scsi_set_sense_information(sb, SCSI_SENSE_BUFFERSIZE, block);
}
void ata_scsi_sdev_config(struct scsi_device *sdev)
{
sdev->use_10_for_rw = 1;
sdev->use_10_for_ms = 1;
libata-scsi: disable SCT Write Same for the moment SCT Write Same support had been introduced with commit 7b2030942859 ("libata: Add support for SCT Write Same") Some problems, namely excessive userspace segfaults, had been reported at http://lkml.kernel.org/r/20160908192736.GA4356@gmail.com This lead to commit 0ce1b18c42a5 ("libata: Some drives failing on SCT Write Same") which strived to disable SCT Write Same on !ZAC devices. Due to the way this was done and to the logic in sd_config_write_same(), this didn't work for those devices that have ->max_ws_blocks > SD_MAX_WS10_BLOCKS: for these, ->no_write_same and ->max_write_same_sectors would still be non-zero, but ->ws10 == ->ws16 == 0. This would cause sd_setup_write_same_cmnd() to demultiplex REQ_OP_WRITE_SAME requests to WRITE_SAME, and these in turn aren't supported by libata-scsi: EXT4-fs (dm-1): Delayed block allocation failed for inode 2625094 at logical offset 2032 with max blocks 2 with error 121 EXT4-fs (dm-1): This should not happen!! Data will be lost 121 == EREMOTEIO is what scsi_io_completion() asserts in case of invalid opcodes. Back to the original problem of userspace segfaults: this can be tracked down to ata_format_sct_write_same() overwriting the input page. Sometimes, this page is ZERO_PAGE(0) which ceases to be filled with zeros from that point on. Since ZERO_PAGE(0) is used for userspace .bss mappings, code of the following is doomed: static char *a = NULL; /* .bss */ ... if (a) *a = 'a'; This problem is not solved by disabling SCT Write Same for !ZAC devices only. It can certainly be fixed, but the final release is quite close -- so disable SCT Write Same for all ATA devices rather than introducing some SCT key buffer allocation schemes at this point. Fixes: 7b2030942859 ("libata: Add support for SCT Write Same") Signed-off-by: Nicolai Stange <nicstange@gmail.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2016-12-08 05:21:33 +08:00
sdev->no_write_same = 1;
/* Schedule policy is determined by ->qc_defer() callback and
* it needs to see every deferred qc. Set dev_blocked to 1 to
* prevent SCSI midlayer from automatically deferring
* requests.
*/
sdev->max_device_blocked = 1;
}
/**
* atapi_drain_needed - Check whether data transfer may overflow
* @rq: request to be checked
*
* ATAPI commands which transfer variable length data to host
* might overflow due to application error or hardware bug. This
* function checks whether overflow should be drained and ignored
* for @request.
*
* LOCKING:
* None.
*
* RETURNS:
* 1 if ; otherwise, 0.
*/
bool ata_scsi_dma_need_drain(struct request *rq)
{
return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
}
EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
{
struct request_queue *q = sdev->request_queue;
if (!ata_id_has_unload(dev->id))
dev->flags |= ATA_DFLAG_NO_UNLOAD;
/* configure max sectors */
blk_queue_max_hw_sectors(q, dev->max_sectors);
if (dev->class == ATA_DEV_ATAPI) {
sdev->sector_size = ATA_SECT_SIZE;
/* set DMA padding */
blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
/* make room for appending the drain */
blk_queue_max_segments(q, queue_max_segments(q) - 1);
sdev->dma_drain_len = ATAPI_MAX_DRAIN;
sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len,
q->bounce_gfp | GFP_KERNEL);
if (!sdev->dma_drain_buf) {
ata_dev_err(dev, "drain buffer allocation failed\n");
return -ENOMEM;
}
} else {
sdev->sector_size = ata_id_logical_sector_size(dev->id);
sdev->manage_start_stop = 1;
libata: eliminate the home grown dma padding in favour of that provided by the block layer ATA requires that all DMA transfers begin and end on word boundaries. Because of this, a large amount of machinery grew up in ide to adjust scatterlists on this basis. However, as of 2.5, the block layer has a dma_alignment variable which ensures both the beginning and length of a DMA transfer are aligned on the dma_alignment boundary. Although the block layer does adjust the beginning of the transfer to ensure this happens, it doesn't actually adjust the length, it merely makes sure that space is allocated for transfers beyond the declared length. The upshot of this is that scatterlists may be padded to any size between the actual length and the length adjusted to the dma_alignment safely knowing that memory is allocated in this region. Right at the moment, SCSI takes the default dma_aligment which is on a 512 byte boundary. Note that this aligment only applies to transfers coming in from user space. However, since all kernel allocations are automatically aligned on a minimum of 32 byte boundaries, it is safe to adjust them in this manner as well. tj: * Adjusting sg after padding is done in block layer. Make libata set queue alignment correctly for ATAPI devices and drop broken sg mangling from ata_sg_setup(). * Use request->raw_data_len for ATAPI transfer chunk size. * Killed qc->raw_nbytes. * Separated out killing qc->n_iter. Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com> Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2008-02-19 18:36:56 +08:00
}
/*
* ata_pio_sectors() expects buffer for each sector to not cross
* page boundary. Enforce it by requiring buffers to be sector
* aligned, which works iff sector_size is not larger than
* PAGE_SIZE. ATAPI devices also need the alignment as
* IDENTIFY_PACKET is executed as ATA_PROT_PIO.
*/
if (sdev->sector_size > PAGE_SIZE)
ata_dev_warn(dev,
"sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
sdev->sector_size);
blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
if (dev->flags & ATA_DFLAG_AN)
set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
if (dev->flags & ATA_DFLAG_NCQ) {
int depth;
depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
depth = min(ATA_MAX_QUEUE, depth);
scsi_change_queue_depth(sdev, depth);
}
if (dev->flags & ATA_DFLAG_TRUSTED)
sdev->security_supported = 1;
dev->sdev = sdev;
return 0;
}
/**
* ata_scsi_slave_config - Set SCSI device attributes
* @sdev: SCSI device to examine
*
* This is called before we actually start reading
* and writing to the device, to configure certain
* SCSI mid-layer behaviors.
*
* LOCKING:
* Defined by SCSI layer. We don't really care.
*/
int ata_scsi_slave_config(struct scsi_device *sdev)
{
struct ata_port *ap = ata_shost_to_port(sdev->host);
struct ata_device *dev = __ata_scsi_find_dev(ap, sdev);
int rc = 0;
ata_scsi_sdev_config(sdev);
if (dev)
rc = ata_scsi_dev_config(sdev, dev);
return rc;
}
EXPORT_SYMBOL_GPL(ata_scsi_slave_config);
/**
* ata_scsi_slave_destroy - SCSI device is about to be destroyed
* @sdev: SCSI device to be destroyed
*
* @sdev is about to be destroyed for hot/warm unplugging. If
* this unplugging was initiated by libata as indicated by NULL
* dev->sdev, this function doesn't have to do anything.
* Otherwise, SCSI layer initiated warm-unplug is in progress.
* Clear dev->sdev, schedule the device for ATA detach and invoke
* EH.
*
* LOCKING:
* Defined by SCSI layer. We don't really care.
*/
void ata_scsi_slave_destroy(struct scsi_device *sdev)
{
struct ata_port *ap = ata_shost_to_port(sdev->host);
unsigned long flags;
struct ata_device *dev;
if (!ap->ops->error_handler)
return;
spin_lock_irqsave(ap->lock, flags);
dev = __ata_scsi_find_dev(ap, sdev);
if (dev && dev->sdev) {
/* SCSI device already in CANCEL state, no need to offline it */
dev->sdev = NULL;
dev->flags |= ATA_DFLAG_DETACH;
ata_port_schedule_eh(ap);
}
spin_unlock_irqrestore(ap->lock, flags);
kfree(sdev->dma_drain_buf);
}
EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy);
/**
* ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
* @qc: Storage for translated ATA taskfile
*
* Sets up an ATA taskfile to issue STANDBY (to stop) or READ VERIFY
* (to start). Perhaps these commands should be preceded by
* CHECK POWER MODE to see what power mode the device is already in.
* [See SAT revision 5 at www.t10.org]
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Zero on success, non-zero on error.
*/
static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
struct ata_taskfile *tf = &qc->tf;
const u8 *cdb = scmd->cmnd;
u16 fp;
u8 bp = 0xff;
if (scmd->cmd_len < 5) {
fp = 4;
goto invalid_fld;
}
tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
tf->protocol = ATA_PROT_NODATA;
if (cdb[1] & 0x1) {
; /* ignore IMMED bit, violates sat-r05 */
}
if (cdb[4] & 0x2) {
fp = 4;
bp = 1;
goto invalid_fld; /* LOEJ bit set not supported */
}
if (((cdb[4] >> 4) & 0xf) != 0) {
fp = 4;
bp = 3;
goto invalid_fld; /* power conditions not supported */
}
if (cdb[4] & 0x1) {
tf->nsect = 1; /* 1 sector, lba=0 */
if (qc->dev->flags & ATA_DFLAG_LBA) {
tf->flags |= ATA_TFLAG_LBA;
tf->lbah = 0x0;
tf->lbam = 0x0;
tf->lbal = 0x0;
tf->device |= ATA_LBA;
} else {
/* CHS */
tf->lbal = 0x1; /* sect */
tf->lbam = 0x0; /* cyl low */
tf->lbah = 0x0; /* cyl high */
}
tf->command = ATA_CMD_VERIFY; /* READ VERIFY */
libata: implement libata.spindown_compat Now that libata uses sd->manage_start_stop, libata spins down disk on shutdown. In an attempt to compensate libata's previous shortcoming, some distros sync and spin down disks attached via libata in their shutdown(8). Some disks spin back up just to spin down again on STANDBYNOW1 if the command is issued when the disk is spun down, so this double spinning down causes problem. This patch implements module parameter libata.spindown_compat which, when set to one (default value), prevents libata from spinning down disks on shutdown thus avoiding double spinning down. Note that libata spins down disks for suspend to mem and disk, so with libata.spindown_compat set to one, disks should be properly spun down in all cases without modifying shutdown(8). shutdown(8) should be fixed eventually. Some drive do spin up on SYNCHRONZE_CACHE even when their cache is clean. Those disks currently spin up briefly when sd tries to shutdown the device and then the machine powers off immediately, which can't be good for the head. We can't skip SYNCHRONIZE_CACHE during shudown as it can be dangerous data integrity-wise. So, this spindown_compat parameter is already scheduled for removal by the end of the next year and here's what shutdown(8) should do. * Check whether /sys/modules/libata/parameters/spindown_compat exists. If it does, write 0 to it. * For each libata harddisk { * Check whether /sys/class/scsi_disk/h:c:i:l/manage_start_stop exists. Iff it doesn't, synchronize cache and spin the disk down as before. } The above procedure will make shutdown(8) work properly with kernels before this change, ones with this workaround and later ones without it. To accelerate shutdown(8) updates, if the compat mode is in use, this patch prints BIG FAT warning for five seconds during shutdown (the optimal interval to annoy the user just the right amount discovered by hours of tireless usability testing). Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-05-05 03:28:48 +08:00
} else {
/* Some odd clown BIOSen issue spindown on power off (ACPI S4
* or S5) causing some drives to spin up and down again.
*/
if ((qc->ap->flags & ATA_FLAG_NO_POWEROFF_SPINDOWN) &&
system_state == SYSTEM_POWER_OFF)
goto skip;
if ((qc->ap->flags & ATA_FLAG_NO_HIBERNATE_SPINDOWN) &&
system_entering_hibernation())
goto skip;
/* Issue ATA STANDBY IMMEDIATE command */
tf->command = ATA_CMD_STANDBYNOW1;
libata: implement libata.spindown_compat Now that libata uses sd->manage_start_stop, libata spins down disk on shutdown. In an attempt to compensate libata's previous shortcoming, some distros sync and spin down disks attached via libata in their shutdown(8). Some disks spin back up just to spin down again on STANDBYNOW1 if the command is issued when the disk is spun down, so this double spinning down causes problem. This patch implements module parameter libata.spindown_compat which, when set to one (default value), prevents libata from spinning down disks on shutdown thus avoiding double spinning down. Note that libata spins down disks for suspend to mem and disk, so with libata.spindown_compat set to one, disks should be properly spun down in all cases without modifying shutdown(8). shutdown(8) should be fixed eventually. Some drive do spin up on SYNCHRONZE_CACHE even when their cache is clean. Those disks currently spin up briefly when sd tries to shutdown the device and then the machine powers off immediately, which can't be good for the head. We can't skip SYNCHRONIZE_CACHE during shudown as it can be dangerous data integrity-wise. So, this spindown_compat parameter is already scheduled for removal by the end of the next year and here's what shutdown(8) should do. * Check whether /sys/modules/libata/parameters/spindown_compat exists. If it does, write 0 to it. * For each libata harddisk { * Check whether /sys/class/scsi_disk/h:c:i:l/manage_start_stop exists. Iff it doesn't, synchronize cache and spin the disk down as before. } The above procedure will make shutdown(8) work properly with kernels before this change, ones with this workaround and later ones without it. To accelerate shutdown(8) updates, if the compat mode is in use, this patch prints BIG FAT warning for five seconds during shutdown (the optimal interval to annoy the user just the right amount discovered by hours of tireless usability testing). Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-05-05 03:28:48 +08:00
}
/*
* Standby and Idle condition timers could be implemented but that
* would require libata to implement the Power condition mode page
* and allow the user to change it. Changing mode pages requires
* MODE SELECT to be implemented.
*/
return 0;
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp);
return 1;
skip:
scmd->result = SAM_STAT_GOOD;
return 1;
}
/**
* ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command
* @qc: Storage for translated ATA taskfile
*
* Sets up an ATA taskfile to issue FLUSH CACHE or
* FLUSH CACHE EXT.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Zero on success, non-zero on error.
*/
static unsigned int ata_scsi_flush_xlat(struct ata_queued_cmd *qc)
{
struct ata_taskfile *tf = &qc->tf;
tf->flags |= ATA_TFLAG_DEVICE;
tf->protocol = ATA_PROT_NODATA;
if (qc->dev->flags & ATA_DFLAG_FLUSH_EXT)
tf->command = ATA_CMD_FLUSH_EXT;
else
tf->command = ATA_CMD_FLUSH;
/* flush is critical for IO integrity, consider it an IO command */
qc->flags |= ATA_QCFLAG_IO;
return 0;
}
/**
* scsi_6_lba_len - Get LBA and transfer length
* @cdb: SCSI command to translate
*
* Calculate LBA and transfer length for 6-byte commands.
*
* RETURNS:
* @plba: the LBA
* @plen: the transfer length
*/
static void scsi_6_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
{
u64 lba = 0;
u32 len;
VPRINTK("six-byte command\n");
lba |= ((u64)(cdb[1] & 0x1f)) << 16;
lba |= ((u64)cdb[2]) << 8;
lba |= ((u64)cdb[3]);
len = cdb[4];
*plba = lba;
*plen = len;
}
/**
* scsi_10_lba_len - Get LBA and transfer length
* @cdb: SCSI command to translate
*
* Calculate LBA and transfer length for 10-byte commands.
*
* RETURNS:
* @plba: the LBA
* @plen: the transfer length
*/
static void scsi_10_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
{
u64 lba = 0;
u32 len = 0;
VPRINTK("ten-byte command\n");
lba |= ((u64)cdb[2]) << 24;
lba |= ((u64)cdb[3]) << 16;
lba |= ((u64)cdb[4]) << 8;
lba |= ((u64)cdb[5]);
len |= ((u32)cdb[7]) << 8;
len |= ((u32)cdb[8]);
*plba = lba;
*plen = len;
}
/**
* scsi_16_lba_len - Get LBA and transfer length
* @cdb: SCSI command to translate
*
* Calculate LBA and transfer length for 16-byte commands.
*
* RETURNS:
* @plba: the LBA
* @plen: the transfer length
*/
static void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
{
u64 lba = 0;
u32 len = 0;
VPRINTK("sixteen-byte command\n");
lba |= ((u64)cdb[2]) << 56;
lba |= ((u64)cdb[3]) << 48;
lba |= ((u64)cdb[4]) << 40;
lba |= ((u64)cdb[5]) << 32;
lba |= ((u64)cdb[6]) << 24;
lba |= ((u64)cdb[7]) << 16;
lba |= ((u64)cdb[8]) << 8;
lba |= ((u64)cdb[9]);
len |= ((u32)cdb[10]) << 24;
len |= ((u32)cdb[11]) << 16;
len |= ((u32)cdb[12]) << 8;
len |= ((u32)cdb[13]);
*plba = lba;
*plen = len;
}
/**
* ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one
* @qc: Storage for translated ATA taskfile
*
* Converts SCSI VERIFY command to an ATA READ VERIFY command.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Zero on success, non-zero on error.
*/
static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
struct ata_taskfile *tf = &qc->tf;
struct ata_device *dev = qc->dev;
u64 dev_sectors = qc->dev->n_sectors;
const u8 *cdb = scmd->cmnd;
u64 block;
u32 n_block;
u16 fp;
tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
tf->protocol = ATA_PROT_NODATA;
if (cdb[0] == VERIFY) {
if (scmd->cmd_len < 10) {
fp = 9;
goto invalid_fld;
}
scsi_10_lba_len(cdb, &block, &n_block);
} else if (cdb[0] == VERIFY_16) {
if (scmd->cmd_len < 16) {
fp = 15;
goto invalid_fld;
}
scsi_16_lba_len(cdb, &block, &n_block);
} else {
fp = 0;
goto invalid_fld;
}
if (!n_block)
goto nothing_to_do;
if (block >= dev_sectors)
goto out_of_range;
if ((block + n_block) > dev_sectors)
goto out_of_range;
if (dev->flags & ATA_DFLAG_LBA) {
tf->flags |= ATA_TFLAG_LBA;
if (lba_28_ok(block, n_block)) {
/* use LBA28 */
tf->command = ATA_CMD_VERIFY;
tf->device |= (block >> 24) & 0xf;
} else if (lba_48_ok(block, n_block)) {
if (!(dev->flags & ATA_DFLAG_LBA48))
goto out_of_range;
/* use LBA48 */
tf->flags |= ATA_TFLAG_LBA48;
tf->command = ATA_CMD_VERIFY_EXT;
tf->hob_nsect = (n_block >> 8) & 0xff;
tf->hob_lbah = (block >> 40) & 0xff;
tf->hob_lbam = (block >> 32) & 0xff;
tf->hob_lbal = (block >> 24) & 0xff;
} else
/* request too large even for LBA48 */
goto out_of_range;
tf->nsect = n_block & 0xff;
tf->lbah = (block >> 16) & 0xff;
tf->lbam = (block >> 8) & 0xff;
tf->lbal = block & 0xff;
tf->device |= ATA_LBA;
} else {
/* CHS */
u32 sect, head, cyl, track;
if (!lba_28_ok(block, n_block))
goto out_of_range;
/* Convert LBA to CHS */
track = (u32)block / dev->sectors;
cyl = track / dev->heads;
head = track % dev->heads;
sect = (u32)block % dev->sectors + 1;
DPRINTK("block %u track %u cyl %u head %u sect %u\n",
(u32)block, track, cyl, head, sect);
/* Check whether the converted CHS can fit.
Cylinder: 0-65535
Head: 0-15
Sector: 1-255*/
if ((cyl >> 16) || (head >> 4) || (sect >> 8) || (!sect))
goto out_of_range;
tf->command = ATA_CMD_VERIFY;
tf->nsect = n_block & 0xff; /* Sector count 0 means 256 sectors */
tf->lbal = sect;
tf->lbam = cyl;
tf->lbah = cyl >> 8;
tf->device |= head;
}
return 0;
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff);
return 1;
out_of_range:
ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0);
/* "Logical Block Address out of range" */
return 1;
nothing_to_do:
scmd->result = SAM_STAT_GOOD;
return 1;
}
static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks)
{
struct request *rq = scmd->request;
u32 req_blocks;
if (!blk_rq_is_passthrough(rq))
return true;
req_blocks = blk_rq_bytes(rq) / scmd->device->sector_size;
if (n_blocks > req_blocks)
return false;
return true;
}
/**
* ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one
* @qc: Storage for translated ATA taskfile
*
* Converts any of six SCSI read/write commands into the
* ATA counterpart, including starting sector (LBA),
* sector count, and taking into account the device's LBA48
* support.
*
* Commands %READ_6, %READ_10, %READ_16, %WRITE_6, %WRITE_10, and
* %WRITE_16 are currently supported.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Zero on success, non-zero on error.
*/
static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
const u8 *cdb = scmd->cmnd;
struct request *rq = scmd->request;
int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
unsigned int tf_flags = 0;
u64 block;
u32 n_block;
int rc;
u16 fp = 0;
if (cdb[0] == WRITE_10 || cdb[0] == WRITE_6 || cdb[0] == WRITE_16)
tf_flags |= ATA_TFLAG_WRITE;
/* Calculate the SCSI LBA, transfer length and FUA. */
switch (cdb[0]) {
case READ_10:
case WRITE_10:
if (unlikely(scmd->cmd_len < 10)) {
fp = 9;
goto invalid_fld;
}
scsi_10_lba_len(cdb, &block, &n_block);
if (cdb[1] & (1 << 3))
tf_flags |= ATA_TFLAG_FUA;
if (!ata_check_nblocks(scmd, n_block))
goto invalid_fld;
break;
case READ_6:
case WRITE_6:
if (unlikely(scmd->cmd_len < 6)) {
fp = 5;
goto invalid_fld;
}
scsi_6_lba_len(cdb, &block, &n_block);
/* for 6-byte r/w commands, transfer length 0
* means 256 blocks of data, not 0 block.
*/
if (!n_block)
n_block = 256;
if (!ata_check_nblocks(scmd, n_block))
goto invalid_fld;
break;
case READ_16:
case WRITE_16:
if (unlikely(scmd->cmd_len < 16)) {
fp = 15;
goto invalid_fld;
}
scsi_16_lba_len(cdb, &block, &n_block);
if (cdb[1] & (1 << 3))
tf_flags |= ATA_TFLAG_FUA;
if (!ata_check_nblocks(scmd, n_block))
goto invalid_fld;
break;
default:
DPRINTK("no-byte command\n");
fp = 0;
goto invalid_fld;
}
/* Check and compose ATA command */
if (!n_block)
/* For 10-byte and 16-byte SCSI R/W commands, transfer
* length 0 means transfer 0 block of data.
* However, for ATA R/W commands, sector count 0 means
* 256 or 65536 sectors, not 0 sectors as in SCSI.
*
* WARNING: one or two older ATA drives treat 0 as 0...
*/
goto nothing_to_do;
qc->flags |= ATA_QCFLAG_IO;
qc->nbytes = n_block * scmd->device->sector_size;
rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags,
qc->hw_tag, class);
if (likely(rc == 0))
return 0;
if (rc == -ERANGE)
goto out_of_range;
/* treat all other errors as -EINVAL, fall through */
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff);
return 1;
out_of_range:
ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x0);
/* "Logical Block Address out of range" */
return 1;
nothing_to_do:
scmd->result = SAM_STAT_GOOD;
return 1;
}
static void ata_qc_done(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *cmd = qc->scsicmd;
void (*done)(struct scsi_cmnd *) = qc->scsidone;
ata_qc_free(qc);
done(cmd);
}
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0);
/* For ATA pass thru (SAT) commands, generate a sense block if
* user mandated it or if there's an error. Note that if we
* generate because the user forced us to [CK_COND =1], a check
* condition is generated and the ATA register values are returned
* whether the command completed successfully or not. If there
* was no error, we use the following sense data:
* sk = RECOVERED ERROR
* asc,ascq = ATA PASS-THROUGH INFORMATION AVAILABLE
*/
if (((cdb[0] == ATA_16) || (cdb[0] == ATA_12)) &&
((cdb[2] & 0x20) || need_sense))
ata_gen_passthru_sense(qc);
else if (qc->flags & ATA_QCFLAG_SENSE_VALID)
cmd->result = SAM_STAT_CHECK_CONDITION;
else if (need_sense)
ata_gen_ata_sense(qc);
else
cmd->result = SAM_STAT_GOOD;
if (need_sense && !ap->ops->error_handler)
ata_dump_status(ap->print_id, &qc->result_tf);
ata_qc_done(qc);
}
/**
* ata_scsi_translate - Translate then issue SCSI command to ATA device
* @dev: ATA device to which the command is addressed
* @cmd: SCSI command to execute
* @xlat_func: Actor which translates @cmd to an ATA taskfile
*
* Our ->queuecommand() function has decided that the SCSI
* command issued can be directly translated into an ATA
* command, rather than handled internally.
*
* This function sets up an ata_queued_cmd structure for the
* SCSI command, and sends that ata_queued_cmd to the hardware.
*
* The xlat_func argument (actor) returns 0 if ready to execute
* ATA command, else 1 to finish translation. If 1 is returned
* then cmd->result (and possibly cmd->sense_buffer) are assumed
* to be set reflecting an error condition or clean (early)
* termination.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* 0 on success, SCSI_ML_QUEUE_DEVICE_BUSY if the command
* needs to be deferred.
*/
static int ata_scsi_translate(struct ata_device *dev, struct scsi_cmnd *cmd,
ata_xlat_func_t xlat_func)
{
struct ata_port *ap = dev->link->ap;
struct ata_queued_cmd *qc;
int rc;
VPRINTK("ENTER\n");
qc = ata_scsi_qc_new(dev, cmd);
if (!qc)
goto err_mem;
/* data is present; dma-map it */
if (cmd->sc_data_direction == DMA_FROM_DEVICE ||
cmd->sc_data_direction == DMA_TO_DEVICE) {
if (unlikely(scsi_bufflen(cmd) < 1)) {
ata_dev_warn(dev, "WARNING: zero len r/w req\n");
goto err_did;
}
ata_sg_init(qc, scsi_sglist(cmd), scsi_sg_count(cmd));
qc->dma_dir = cmd->sc_data_direction;
}
qc->complete_fn = ata_scsi_qc_complete;
if (xlat_func(qc))
goto early_finish;
if (ap->ops->qc_defer) {
if ((rc = ap->ops->qc_defer(qc)))
goto defer;
}
/* select device, send command to hardware */
ata_qc_issue(qc);
VPRINTK("EXIT\n");
return 0;
early_finish:
ata_qc_free(qc);
cmd->scsi_done(cmd);
DPRINTK("EXIT - early finish (good or error)\n");
return 0;
err_did:
ata_qc_free(qc);
cmd->result = (DID_ERROR << 16);
cmd->scsi_done(cmd);
err_mem:
DPRINTK("EXIT - internal\n");
return 0;
defer:
ata_qc_free(qc);
DPRINTK("EXIT - defer\n");
if (rc == ATA_DEFER_LINK)
return SCSI_MLQUEUE_DEVICE_BUSY;
else
return SCSI_MLQUEUE_HOST_BUSY;
}
struct ata_scsi_args {
struct ata_device *dev;
u16 *id;
struct scsi_cmnd *cmd;
};
/**
* ata_scsi_rbuf_get - Map response buffer.
* @cmd: SCSI command containing buffer to be mapped.
* @flags: unsigned long variable to store irq enable status
* @copy_in: copy in from user buffer
*
* Prepare buffer for simulated SCSI commands.
*
* LOCKING:
* spin_lock_irqsave(ata_scsi_rbuf_lock) on success
*
* RETURNS:
* Pointer to response buffer.
*/
static void *ata_scsi_rbuf_get(struct scsi_cmnd *cmd, bool copy_in,
unsigned long *flags)
{
spin_lock_irqsave(&ata_scsi_rbuf_lock, *flags);
memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE);
if (copy_in)
sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
return ata_scsi_rbuf;
}
/**
* ata_scsi_rbuf_put - Unmap response buffer.
* @cmd: SCSI command containing buffer to be unmapped.
* @copy_out: copy out result
* @flags: @flags passed to ata_scsi_rbuf_get()
*
* Returns rbuf buffer. The result is copied to @cmd's buffer if
* @copy_back is true.
*
* LOCKING:
* Unlocks ata_scsi_rbuf_lock.
*/
static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, bool copy_out,
unsigned long *flags)
{
if (copy_out)
sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd),
ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE);
spin_unlock_irqrestore(&ata_scsi_rbuf_lock, *flags);
}
/**
* ata_scsi_rbuf_fill - wrapper for SCSI command simulators
* @args: device IDENTIFY data / SCSI command of interest.
* @actor: Callback hook for desired SCSI command simulator
*
* Takes care of the hard work of simulating a SCSI command...
* Mapping the response buffer, calling the command's handler,
* and handling the handler's return value. This return value
* indicates whether the handler wishes the SCSI command to be
* completed successfully (0), or not (in which case cmd->result
* and sense buffer are assumed to be set).
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
unsigned int (*actor)(struct ata_scsi_args *args, u8 *rbuf))
{
u8 *rbuf;
unsigned int rc;
struct scsi_cmnd *cmd = args->cmd;
unsigned long flags;
rbuf = ata_scsi_rbuf_get(cmd, false, &flags);
rc = actor(args, rbuf);
ata_scsi_rbuf_put(cmd, rc == 0, &flags);
if (rc == 0)
cmd->result = SAM_STAT_GOOD;
}
/**
* ata_scsiop_inq_std - Simulate INQUIRY command
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns standard device identification data associated
* with non-VPD INQUIRY command output.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
{
static const u8 versions[] = {
0x00,
0x60, /* SAM-3 (no version claimed) */
0x03,
0x20, /* SBC-2 (no version claimed) */
0x03,
0x00 /* SPC-3 (no version claimed) */
};
static const u8 versions_zbc[] = {
0x00,
0xA0, /* SAM-5 (no version claimed) */
0x06,
0x00, /* SBC-4 (no version claimed) */
0x05,
0xC0, /* SPC-5 (no version claimed) */
0x60,
0x24, /* ZBC r05 */
};
u8 hdr[] = {
TYPE_DISK,
0,
0x5, /* claim SPC-3 version compatibility */
2,
95 - 4,
0,
0,
2
};
VPRINTK("ENTER\n");
/* set scsi removable (RMB) bit per ata bit, or if the
* AHCI port says it's external (Hotplug-capable, eSATA).
*/
if (ata_id_removable(args->id) ||
(args->dev->link->ap->pflags & ATA_PFLAG_EXTERNAL))
hdr[1] |= (1 << 7);
if (args->dev->class == ATA_DEV_ZAC) {
hdr[0] = TYPE_ZBC;
hdr[2] = 0x7; /* claim SPC-5 version compatibility */
}
memcpy(rbuf, hdr, sizeof(hdr));
memcpy(&rbuf[8], "ATA ", 8);
ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16);
/* From SAT, use last 2 words from fw rev unless they are spaces */
ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV + 2, 4);
if (strncmp(&rbuf[32], " ", 4) == 0)
ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4);
if (rbuf[32] == 0 || rbuf[32] == ' ')
memcpy(&rbuf[32], "n/a ", 4);
if (ata_id_zoned_cap(args->id) || args->dev->class == ATA_DEV_ZAC)
memcpy(rbuf + 58, versions_zbc, sizeof(versions_zbc));
else
memcpy(rbuf + 58, versions, sizeof(versions));
return 0;
}
/**
* ata_scsiop_inq_00 - Simulate INQUIRY VPD page 0, list of pages
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns list of inquiry VPD pages available.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
{
int num_pages;
static const u8 pages[] = {
0x00, /* page 0x00, this page */
0x80, /* page 0x80, unit serial no page */
0x83, /* page 0x83, device ident page */
0x89, /* page 0x89, ata info page */
0xb0, /* page 0xb0, block limits page */
0xb1, /* page 0xb1, block device characteristics page */
0xb2, /* page 0xb2, thin provisioning page */
0xb6, /* page 0xb6, zoned block device characteristics */
};
num_pages = sizeof(pages);
if (!(args->dev->flags & ATA_DFLAG_ZAC))
num_pages--;
rbuf[3] = num_pages; /* number of supported VPD pages */
memcpy(rbuf + 4, pages, num_pages);
return 0;
}
/**
* ata_scsiop_inq_80 - Simulate INQUIRY VPD page 80, device serial number
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Returns ATA device serial number.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf)
{
static const u8 hdr[] = {
0,
0x80, /* this page code */
0,
ATA_ID_SERNO_LEN, /* page len */
};
memcpy(rbuf, hdr, sizeof(hdr));
ata_id_string(args->id, (unsigned char *) &rbuf[4],
ATA_ID_SERNO, ATA_ID_SERNO_LEN);
return 0;
}
/**
* ata_scsiop_inq_83 - Simulate INQUIRY VPD page 83, device identity
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields two logical unit device identification designators:
* - vendor specific ASCII containing the ATA serial number
* - SAT defined "t10 vendor id based" containing ASCII vendor
* name ("ATA "), model and serial numbers.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
{
const int sat_model_serial_desc_len = 68;
int num;
rbuf[1] = 0x83; /* this page code */
num = 4;
/* piv=0, assoc=lu, code_set=ACSII, designator=vendor */
rbuf[num + 0] = 2;
rbuf[num + 3] = ATA_ID_SERNO_LEN;
num += 4;
ata_id_string(args->id, (unsigned char *) rbuf + num,
ATA_ID_SERNO, ATA_ID_SERNO_LEN);
num += ATA_ID_SERNO_LEN;
/* SAT defined lu model and serial numbers descriptor */
/* piv=0, assoc=lu, code_set=ACSII, designator=t10 vendor id */
rbuf[num + 0] = 2;
rbuf[num + 1] = 1;
rbuf[num + 3] = sat_model_serial_desc_len;
num += 4;
memcpy(rbuf + num, "ATA ", 8);
num += 8;
ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_PROD,
ATA_ID_PROD_LEN);
num += ATA_ID_PROD_LEN;
ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_SERNO,
ATA_ID_SERNO_LEN);
num += ATA_ID_SERNO_LEN;
if (ata_id_has_wwn(args->id)) {
/* SAT defined lu world wide name */
/* piv=0, assoc=lu, code_set=binary, designator=NAA */
rbuf[num + 0] = 1;
rbuf[num + 1] = 3;
rbuf[num + 3] = ATA_ID_WWN_LEN;
num += 4;
ata_id_string(args->id, (unsigned char *) rbuf + num,
ATA_ID_WWN, ATA_ID_WWN_LEN);
num += ATA_ID_WWN_LEN;
}
rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */
return 0;
}
/**
* ata_scsiop_inq_89 - Simulate INQUIRY VPD page 89, ATA info
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields SAT-specified ATA VPD page.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf)
{
rbuf[1] = 0x89; /* our page code */
rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */
rbuf[3] = (0x238 & 0xff);
memcpy(&rbuf[8], "linux ", 8);
memcpy(&rbuf[16], "libata ", 16);
memcpy(&rbuf[32], DRV_VERSION, 4);
rbuf[36] = 0x34; /* force D2H Reg FIS (34h) */
rbuf[37] = (1 << 7); /* bit 7 indicates Command FIS */
/* TODO: PMP? */
/* we don't store the ATA device signature, so we fake it */
rbuf[38] = ATA_DRDY; /* really, this is Status reg */
rbuf[40] = 0x1;
rbuf[48] = 0x1;
rbuf[56] = ATA_CMD_ID_ATA;
memcpy(&rbuf[60], &args->id[0], 512);
return 0;
}
static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf)
{
u16 min_io_sectors;
rbuf[1] = 0xb0;
rbuf[3] = 0x3c; /* required VPD size with unmap support */
/*
* Optimal transfer length granularity.
*
* This is always one physical block, but for disks with a smaller
* logical than physical sector size we need to figure out what the
* latter is.
*/
min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id);
put_unaligned_be16(min_io_sectors, &rbuf[6]);
/*
* Optimal unmap granularity.
*
* The ATA spec doesn't even know about a granularity or alignment
* for the TRIM command. We can leave away most of the unmap related
* VPD page entries, but we have specifify a granularity to signal
* that we support some form of unmap - in thise case via WRITE SAME
* with the unmap bit set.
*/
if (ata_id_has_trim(args->id)) {
put_unaligned_be64(65535 * ATA_MAX_TRIM_RNUM, &rbuf[36]);
put_unaligned_be32(1, &rbuf[28]);
}
return 0;
}
static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf)
{
int form_factor = ata_id_form_factor(args->id);
int media_rotation_rate = ata_id_rotation_rate(args->id);
u8 zoned = ata_id_zoned_cap(args->id);
rbuf[1] = 0xb1;
rbuf[3] = 0x3c;
rbuf[4] = media_rotation_rate >> 8;
rbuf[5] = media_rotation_rate;
rbuf[7] = form_factor;
if (zoned)
rbuf[8] = (zoned << 4);
return 0;
}
static unsigned int ata_scsiop_inq_b2(struct ata_scsi_args *args, u8 *rbuf)
{
/* SCSI Thin Provisioning VPD page: SBC-3 rev 22 or later */
rbuf[1] = 0xb2;
rbuf[3] = 0x4;
rbuf[5] = 1 << 6; /* TPWS */
return 0;
}
static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf)
{
/*
* zbc-r05 SCSI Zoned Block device characteristics VPD page
*/
rbuf[1] = 0xb6;
rbuf[3] = 0x3C;
/*
* URSWRZ bit is only meaningful for host-managed ZAC drives
*/
if (args->dev->zac_zoned_cap & 1)
rbuf[4] |= 1;
put_unaligned_be32(args->dev->zac_zones_optimal_open, &rbuf[8]);
put_unaligned_be32(args->dev->zac_zones_optimal_nonseq, &rbuf[12]);
put_unaligned_be32(args->dev->zac_zones_max_open, &rbuf[16]);
return 0;
}
/**
* modecpy - Prepare response for MODE SENSE
* @dest: output buffer
* @src: data being copied
* @n: length of mode page
* @changeable: whether changeable parameters are requested
*
* Generate a generic MODE SENSE page for either current or changeable
* parameters.
*
* LOCKING:
* None.
*/
static void modecpy(u8 *dest, const u8 *src, int n, bool changeable)
{
if (changeable) {
memcpy(dest, src, 2);
memset(dest + 2, 0, n - 2);
} else {
memcpy(dest, src, n);
}
}
/**
* ata_msense_caching - Simulate MODE SENSE caching info page
* @id: device IDENTIFY data
* @buf: output buffer
* @changeable: whether changeable parameters are requested
*
* Generate a caching info page, which conditionally indicates
* write caching to the SCSI layer, depending on device
* capabilities.
*
* LOCKING:
* None.
*/
static unsigned int ata_msense_caching(u16 *id, u8 *buf, bool changeable)
{
modecpy(buf, def_cache_mpage, sizeof(def_cache_mpage), changeable);
if (changeable) {
buf[2] |= (1 << 2); /* ata_mselect_caching() */
} else {
buf[2] |= (ata_id_wcache_enabled(id) << 2); /* write cache enable */
buf[12] |= (!ata_id_rahead_enabled(id) << 5); /* disable read ahead */
}
return sizeof(def_cache_mpage);
}
/**
* ata_msense_control - Simulate MODE SENSE control mode page
* @dev: ATA device of interest
* @buf: output buffer
* @changeable: whether changeable parameters are requested
*
* Generate a generic MODE SENSE control mode page.
*
* LOCKING:
* None.
*/
static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf,
bool changeable)
{
modecpy(buf, def_control_mpage, sizeof(def_control_mpage), changeable);
if (changeable) {
buf[2] |= (1 << 2); /* ata_mselect_control() */
} else {
bool d_sense = (dev->flags & ATA_DFLAG_D_SENSE);
buf[2] |= (d_sense << 2); /* descriptor format sense data */
}
return sizeof(def_control_mpage);
}
/**
* ata_msense_rw_recovery - Simulate MODE SENSE r/w error recovery page
* @buf: output buffer
* @changeable: whether changeable parameters are requested
*
* Generate a generic MODE SENSE r/w error recovery page.
*
* LOCKING:
* None.
*/
static unsigned int ata_msense_rw_recovery(u8 *buf, bool changeable)
{
modecpy(buf, def_rw_recovery_mpage, sizeof(def_rw_recovery_mpage),
changeable);
return sizeof(def_rw_recovery_mpage);
}
/*
* We can turn this into a real blacklist if it's needed, for now just
* blacklist any Maxtor BANC1G10 revision firmware
*/
static int ata_dev_supports_fua(u16 *id)
{
unsigned char model[ATA_ID_PROD_LEN + 1], fw[ATA_ID_FW_REV_LEN + 1];
if (!libata_fua)
return 0;
if (!ata_id_has_fua(id))
return 0;
ata_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
ata_id_c_string(id, fw, ATA_ID_FW_REV, sizeof(fw));
if (strcmp(model, "Maxtor"))
return 1;
if (strcmp(fw, "BANC1G10"))
return 1;
return 0; /* blacklisted */
}
/**
* ata_scsiop_mode_sense - Simulate MODE SENSE 6, 10 commands
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate MODE SENSE commands. Assume this is invoked for direct
* access devices (e.g. disks) only. There should be no block
* descriptor for other device types.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
{
struct ata_device *dev = args->dev;
u8 *scsicmd = args->cmd->cmnd, *p = rbuf;
static const u8 sat_blk_desc[] = {
0, 0, 0, 0, /* number of blocks: sat unspecified */
0,
0, 0x2, 0x0 /* block length: 512 bytes */
};
u8 pg, spg;
unsigned int ebd, page_control, six_byte;
u8 dpofua, bp = 0xff;
u16 fp;
VPRINTK("ENTER\n");
six_byte = (scsicmd[0] == MODE_SENSE);
ebd = !(scsicmd[1] & 0x8); /* dbd bit inverted == edb */
/*
* LLBA bit in msense(10) ignored (compliant)
*/
page_control = scsicmd[2] >> 6;
switch (page_control) {
case 0: /* current */
case 1: /* changeable */
case 2: /* defaults */
break; /* supported */
case 3: /* saved */
goto saving_not_supp;
default:
fp = 2;
bp = 6;
goto invalid_fld;
}
if (six_byte)
p += 4 + (ebd ? 8 : 0);
else
p += 8 + (ebd ? 8 : 0);
pg = scsicmd[2] & 0x3f;
spg = scsicmd[3];
/*
* No mode subpages supported (yet) but asking for _all_
* subpages may be valid
*/
if (spg && (spg != ALL_SUB_MPAGES)) {
fp = 3;
goto invalid_fld;
}
switch(pg) {
case RW_RECOVERY_MPAGE:
p += ata_msense_rw_recovery(p, page_control == 1);
break;
case CACHE_MPAGE:
p += ata_msense_caching(args->id, p, page_control == 1);
break;
case CONTROL_MPAGE:
p += ata_msense_control(args->dev, p, page_control == 1);
break;
case ALL_MPAGES:
p += ata_msense_rw_recovery(p, page_control == 1);
p += ata_msense_caching(args->id, p, page_control == 1);
p += ata_msense_control(args->dev, p, page_control == 1);
break;
default: /* invalid page code */
fp = 2;
goto invalid_fld;
}
dpofua = 0;
if (ata_dev_supports_fua(args->id) && (dev->flags & ATA_DFLAG_LBA48) &&
(!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count))
dpofua = 1 << 4;
if (six_byte) {
rbuf[0] = p - rbuf - 1;
rbuf[2] |= dpofua;
if (ebd) {
rbuf[3] = sizeof(sat_blk_desc);
memcpy(rbuf + 4, sat_blk_desc, sizeof(sat_blk_desc));
}
} else {
unsigned int output_len = p - rbuf - 2;
rbuf[0] = output_len >> 8;
rbuf[1] = output_len;
rbuf[3] |= dpofua;
if (ebd) {
rbuf[7] = sizeof(sat_blk_desc);
memcpy(rbuf + 8, sat_blk_desc, sizeof(sat_blk_desc));
}
}
return 0;
invalid_fld:
ata_scsi_set_invalid_field(dev, args->cmd, fp, bp);
return 1;
saving_not_supp:
ata_scsi_set_sense(dev, args->cmd, ILLEGAL_REQUEST, 0x39, 0x0);
/* "Saving parameters not supported" */
return 1;
}
/**
* ata_scsiop_read_cap - Simulate READ CAPACITY[ 16] commands
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate READ CAPACITY commands.
*
* LOCKING:
* None.
*/
static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf)
{
struct ata_device *dev = args->dev;
u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */
u32 sector_size; /* physical sector size in bytes */
u8 log2_per_phys;
u16 lowest_aligned;
sector_size = ata_id_logical_sector_size(dev->id);
log2_per_phys = ata_id_log2_per_physical_sector(dev->id);
lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys);
VPRINTK("ENTER\n");
if (args->cmd->cmnd[0] == READ_CAPACITY) {
if (last_lba >= 0xffffffffULL)
last_lba = 0xffffffff;
/* sector count, 32-bit */
rbuf[0] = last_lba >> (8 * 3);
rbuf[1] = last_lba >> (8 * 2);
rbuf[2] = last_lba >> (8 * 1);
rbuf[3] = last_lba;
/* sector size */
rbuf[4] = sector_size >> (8 * 3);
rbuf[5] = sector_size >> (8 * 2);
rbuf[6] = sector_size >> (8 * 1);
rbuf[7] = sector_size;
} else {
/* sector count, 64-bit */
rbuf[0] = last_lba >> (8 * 7);
rbuf[1] = last_lba >> (8 * 6);
rbuf[2] = last_lba >> (8 * 5);
rbuf[3] = last_lba >> (8 * 4);
rbuf[4] = last_lba >> (8 * 3);
rbuf[5] = last_lba >> (8 * 2);
rbuf[6] = last_lba >> (8 * 1);
rbuf[7] = last_lba;
/* sector size */
rbuf[ 8] = sector_size >> (8 * 3);
rbuf[ 9] = sector_size >> (8 * 2);
rbuf[10] = sector_size >> (8 * 1);
rbuf[11] = sector_size;
rbuf[12] = 0;
rbuf[13] = log2_per_phys;
rbuf[14] = (lowest_aligned >> 8) & 0x3f;
rbuf[15] = lowest_aligned;
if (ata_id_has_trim(args->id) &&
!(dev->horkage & ATA_HORKAGE_NOTRIM)) {
libata: Whitelist SSDs that are known to properly return zeroes after TRIM As defined, the DRAT (Deterministic Read After Trim) and RZAT (Return Zero After Trim) flags in the ATA Command Set are unreliable in the sense that they only define what happens if the device successfully executed the DSM TRIM command. TRIM is only advisory, however, and the device is free to silently ignore all or parts of the request. In practice this renders the DRAT and RZAT flags completely useless and because the results are unpredictable we decided to disable discard in MD for 3.18 to avoid the risk of data corruption. Hardware vendors in the real world obviously need better guarantees than what the standards bodies provide. Unfortuntely those guarantees are encoded in product requirements documents rather than somewhere we can key off of them programatically. So we are compelled to disabling discard_zeroes_data for all devices unless we explicitly have data to support whitelisting them. This patch whitelists SSDs from a few of the main vendors. None of the whitelists are based on written guarantees. They are purely based on empirical evidence collected from internal and external users that have tested or qualified these drives in RAID deployments. The whitelist is only meant as a starting point and is by no means comprehensive: - All intel SSD models except for 510 - Micron M5?0/M600 - Samsung SSDs - Seagate SSDs Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Tejun Heo <tj@kernel.org>
2015-01-08 23:34:27 +08:00
rbuf[14] |= 0x80; /* LBPME */
libata: Whitelist SSDs that are known to properly return zeroes after TRIM As defined, the DRAT (Deterministic Read After Trim) and RZAT (Return Zero After Trim) flags in the ATA Command Set are unreliable in the sense that they only define what happens if the device successfully executed the DSM TRIM command. TRIM is only advisory, however, and the device is free to silently ignore all or parts of the request. In practice this renders the DRAT and RZAT flags completely useless and because the results are unpredictable we decided to disable discard in MD for 3.18 to avoid the risk of data corruption. Hardware vendors in the real world obviously need better guarantees than what the standards bodies provide. Unfortuntely those guarantees are encoded in product requirements documents rather than somewhere we can key off of them programatically. So we are compelled to disabling discard_zeroes_data for all devices unless we explicitly have data to support whitelisting them. This patch whitelists SSDs from a few of the main vendors. None of the whitelists are based on written guarantees. They are purely based on empirical evidence collected from internal and external users that have tested or qualified these drives in RAID deployments. The whitelist is only meant as a starting point and is by no means comprehensive: - All intel SSD models except for 510 - Micron M5?0/M600 - Samsung SSDs - Seagate SSDs Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Tejun Heo <tj@kernel.org>
2015-01-08 23:34:27 +08:00
if (ata_id_has_zero_after_trim(args->id) &&
dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM) {
ata_dev_info(dev, "Enabling discard_zeroes_data\n");
rbuf[14] |= 0x40; /* LBPRZ */
}
}
if (ata_id_zoned_cap(args->id) ||
args->dev->class == ATA_DEV_ZAC)
rbuf[12] = (1 << 4); /* RC_BASIS */
}
return 0;
}
/**
* ata_scsiop_report_luns - Simulate REPORT LUNS command
* @args: device IDENTIFY data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Simulate REPORT LUNS command.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf)
{
VPRINTK("ENTER\n");
rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */
return 0;
}
static void atapi_sense_complete(struct ata_queued_cmd *qc)
{
if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) {
/* FIXME: not quite right; we don't want the
* translation of taskfile registers into
* a sense descriptors, since that's only
* correct for ATA, not ATAPI
*/
ata_gen_passthru_sense(qc);
}
ata_qc_done(qc);
}
/* is it pointless to prefer PIO for "safety reasons"? */
static inline int ata_pio_use_silly(struct ata_port *ap)
{
return (ap->flags & ATA_FLAG_PIO_DMA);
}
static void atapi_request_sense(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
DPRINTK("ATAPI request sense\n");
memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
#ifdef CONFIG_ATA_SFF
if (ap->ops->sff_tf_read)
ap->ops->sff_tf_read(ap, &qc->tf);
#endif
/* fill these in, for the case where they are -not- overwritten */
cmd->sense_buffer[0] = 0x70;
cmd->sense_buffer[2] = qc->tf.feature >> 4;
ata_qc_reinit(qc);
/* setup sg table and init transfer direction */
sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
ata_sg_init(qc, &qc->sgent, 1);
qc->dma_dir = DMA_FROM_DEVICE;
memset(&qc->cdb, 0, qc->dev->cdb_len);
qc->cdb[0] = REQUEST_SENSE;
qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
qc->tf.command = ATA_CMD_PACKET;
if (ata_pio_use_silly(ap)) {
qc->tf.protocol = ATAPI_PROT_DMA;
qc->tf.feature |= ATAPI_PKT_DMA;
} else {
qc->tf.protocol = ATAPI_PROT_PIO;
qc->tf.lbam = SCSI_SENSE_BUFFERSIZE;
qc->tf.lbah = 0;
}
qc->nbytes = SCSI_SENSE_BUFFERSIZE;
qc->complete_fn = atapi_sense_complete;
ata_qc_issue(qc);
DPRINTK("EXIT\n");
}
/*
* ATAPI devices typically report zero for their SCSI version, and sometimes
* deviate from the spec WRT response data format. If SCSI version is
* reported as zero like normal, then we make the following fixups:
* 1) Fake MMC-5 version, to indicate to the Linux scsi midlayer this is a
* modern device.
* 2) Ensure response data format / ATAPI information are always correct.
*/
static void atapi_fixup_inquiry(struct scsi_cmnd *cmd)
{
u8 buf[4];
sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, 4);
if (buf[2] == 0) {
buf[2] = 0x5;
buf[3] = 0x32;
}
sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, 4);
}
static void atapi_qc_complete(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *cmd = qc->scsicmd;
unsigned int err_mask = qc->err_mask;
VPRINTK("ENTER, err_mask 0x%X\n", err_mask);
/* handle completion from new EH */
if (unlikely(qc->ap->ops->error_handler &&
(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) {
if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
/* FIXME: not quite right; we don't want the
* translation of taskfile registers into a
* sense descriptors, since that's only
* correct for ATA, not ATAPI
*/
ata_gen_passthru_sense(qc);
}
/* SCSI EH automatically locks door if sdev->locked is
* set. Sometimes door lock request continues to
* fail, for example, when no media is present. This
* creates a loop - SCSI EH issues door lock which
* fails and gets invoked again to acquire sense data
* for the failed command.
*
* If door lock fails, always clear sdev->locked to
* avoid this infinite loop.
*
* This may happen before SCSI scan is complete. Make
* sure qc->dev->sdev isn't NULL before dereferencing.
*/
if (qc->cdb[0] == ALLOW_MEDIUM_REMOVAL && qc->dev->sdev)
qc->dev->sdev->locked = 0;
qc->scsicmd->result = SAM_STAT_CHECK_CONDITION;
ata_qc_done(qc);
return;
}
/* successful completion or old EH failure path */
if (unlikely(err_mask & AC_ERR_DEV)) {
cmd->result = SAM_STAT_CHECK_CONDITION;
atapi_request_sense(qc);
return;
} else if (unlikely(err_mask)) {
/* FIXME: not quite right; we don't want the
* translation of taskfile registers into
* a sense descriptors, since that's only
* correct for ATA, not ATAPI
*/
ata_gen_passthru_sense(qc);
} else {
if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
atapi_fixup_inquiry(cmd);
cmd->result = SAM_STAT_GOOD;
}
ata_qc_done(qc);
}
/**
* atapi_xlat - Initialize PACKET taskfile
* @qc: command structure to be initialized
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Zero on success, non-zero on failure.
*/
static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
struct ata_device *dev = qc->dev;
int nodata = (scmd->sc_data_direction == DMA_NONE);
int using_pio = !nodata && (dev->flags & ATA_DFLAG_PIO);
unsigned int nbytes;
memset(qc->cdb, 0, dev->cdb_len);
memcpy(qc->cdb, scmd->cmnd, scmd->cmd_len);
qc->complete_fn = atapi_qc_complete;
qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
if (scmd->sc_data_direction == DMA_TO_DEVICE) {
qc->tf.flags |= ATA_TFLAG_WRITE;
DPRINTK("direction: write\n");
}
qc->tf.command = ATA_CMD_PACKET;
ata_qc_set_pc_nbytes(qc);
/* check whether ATAPI DMA is safe */
if (!nodata && !using_pio && atapi_check_dma(qc))
using_pio = 1;
libata: bump transfer chunk size if it's odd None of the drives I have follows what the standard says about transfer chunk size. Of the four SATA and six PATA ATAPI devices tested, four ignore transfer chunk size completely and the ones which honor it don't behave according to the spec when it's odd. According to the spec, transfer chunk size can be odd if the amount of data to transfer equals or is smaller than the chunk size and the device can indicate the same odd number and transfer the whole thing at one go with a pad byte appended. However, in reality, none of the drives I have does that. They all indicate and transfer even number of bytes one byte shorter than the chunk size first; then indicate and transfer two bytes, which is clearly out of spec. In addition to unnecessary second PIO data phase, this also creates a weird problem when combined with SATA controllers which perform PIO via DMA. Some of these controllers use actualy number of bytes received to update DMA pointer so chunks which are sized 4n + 2 makes DMA pointer off by two bytes. This causes data corruption and buffer overruns. This patch rounds nbytes up to the nearest even number such that ATAPI devices don't split data transfer for the last odd byte. This shouldn't confuse controllers which depend on transfer chunk size as devices will report the rounded-up number, actually transfer that much and padding buffer is there to receive them. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-11-26 19:58:02 +08:00
/* Some controller variants snoop this value for Packet
* transfers to do state machine and FIFO management. Thus we
* want to set it properly, and for DMA where it is
* effectively meaningless.
*/
nbytes = min(ata_qc_raw_nbytes(qc), (unsigned int)63 * 1024);
libata: bump transfer chunk size if it's odd None of the drives I have follows what the standard says about transfer chunk size. Of the four SATA and six PATA ATAPI devices tested, four ignore transfer chunk size completely and the ones which honor it don't behave according to the spec when it's odd. According to the spec, transfer chunk size can be odd if the amount of data to transfer equals or is smaller than the chunk size and the device can indicate the same odd number and transfer the whole thing at one go with a pad byte appended. However, in reality, none of the drives I have does that. They all indicate and transfer even number of bytes one byte shorter than the chunk size first; then indicate and transfer two bytes, which is clearly out of spec. In addition to unnecessary second PIO data phase, this also creates a weird problem when combined with SATA controllers which perform PIO via DMA. Some of these controllers use actualy number of bytes received to update DMA pointer so chunks which are sized 4n + 2 makes DMA pointer off by two bytes. This causes data corruption and buffer overruns. This patch rounds nbytes up to the nearest even number such that ATAPI devices don't split data transfer for the last odd byte. This shouldn't confuse controllers which depend on transfer chunk size as devices will report the rounded-up number, actually transfer that much and padding buffer is there to receive them. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-11-26 19:58:02 +08:00
/* Most ATAPI devices which honor transfer chunk size don't
* behave according to the spec when odd chunk size which
* matches the transfer length is specified. If the number of
* bytes to transfer is 2n+1. According to the spec, what
* should happen is to indicate that 2n+1 is going to be
* transferred and transfer 2n+2 bytes where the last byte is
* padding.
*
* In practice, this doesn't happen. ATAPI devices first
* indicate and transfer 2n bytes and then indicate and
* transfer 2 bytes where the last byte is padding.
*
* This inconsistency confuses several controllers which
* perform PIO using DMA such as Intel AHCIs and sil3124/32.
* These controllers use actual number of transferred bytes to
* update DMA pointer and transfer of 4n+2 bytes make those
libata: bump transfer chunk size if it's odd None of the drives I have follows what the standard says about transfer chunk size. Of the four SATA and six PATA ATAPI devices tested, four ignore transfer chunk size completely and the ones which honor it don't behave according to the spec when it's odd. According to the spec, transfer chunk size can be odd if the amount of data to transfer equals or is smaller than the chunk size and the device can indicate the same odd number and transfer the whole thing at one go with a pad byte appended. However, in reality, none of the drives I have does that. They all indicate and transfer even number of bytes one byte shorter than the chunk size first; then indicate and transfer two bytes, which is clearly out of spec. In addition to unnecessary second PIO data phase, this also creates a weird problem when combined with SATA controllers which perform PIO via DMA. Some of these controllers use actualy number of bytes received to update DMA pointer so chunks which are sized 4n + 2 makes DMA pointer off by two bytes. This causes data corruption and buffer overruns. This patch rounds nbytes up to the nearest even number such that ATAPI devices don't split data transfer for the last odd byte. This shouldn't confuse controllers which depend on transfer chunk size as devices will report the rounded-up number, actually transfer that much and padding buffer is there to receive them. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-11-26 19:58:02 +08:00
* controller push DMA pointer by 4n+4 bytes because SATA data
* FISes are aligned to 4 bytes. This causes data corruption
* and buffer overrun.
*
* Always setting nbytes to even number solves this problem
* because then ATAPI devices don't have to split data at 2n
* boundaries.
*/
if (nbytes & 0x1)
nbytes++;
qc->tf.lbam = (nbytes & 0xFF);
qc->tf.lbah = (nbytes >> 8);
if (nodata)
qc->tf.protocol = ATAPI_PROT_NODATA;
else if (using_pio)
qc->tf.protocol = ATAPI_PROT_PIO;
else {
/* DMA data xfer */
qc->tf.protocol = ATAPI_PROT_DMA;
qc->tf.feature |= ATAPI_PKT_DMA;
if ((dev->flags & ATA_DFLAG_DMADIR) &&
(scmd->sc_data_direction != DMA_TO_DEVICE))
/* some SATA bridges need us to indicate data xfer direction */
qc->tf.feature |= ATAPI_DMADIR;
}
/* FIXME: We need to translate 0x05 READ_BLOCK_LIMITS to a MODE_SENSE
as ATAPI tape drives don't get this right otherwise */
return 0;
}
static struct ata_device *ata_find_dev(struct ata_port *ap, int devno)
{
if (!sata_pmp_attached(ap)) {
if (likely(devno >= 0 &&
devno < ata_link_max_devices(&ap->link)))
return &ap->link.device[devno];
} else {
if (likely(devno >= 0 &&
devno < ap->nr_pmp_links))
return &ap->pmp_link[devno].device[0];
}
return NULL;
}
static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
const struct scsi_device *scsidev)
{
int devno;
/* skip commands not addressed to targets we simulate */
if (!sata_pmp_attached(ap)) {
if (unlikely(scsidev->channel || scsidev->lun))
return NULL;
devno = scsidev->id;
} else {
if (unlikely(scsidev->id || scsidev->lun))
return NULL;
devno = scsidev->channel;
}
return ata_find_dev(ap, devno);
}
/**
* ata_scsi_find_dev - lookup ata_device from scsi_cmnd
* @ap: ATA port to which the device is attached
* @scsidev: SCSI device from which we derive the ATA device
*
* Given various information provided in struct scsi_cmnd,
* map that onto an ATA bus, and using that mapping
* determine which ata_device is associated with the
* SCSI command to be sent.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* Associated ATA device, or %NULL if not found.
*/
struct ata_device *
ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
{
struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
if (unlikely(!dev || !ata_dev_enabled(dev)))
return NULL;
return dev;
}
/*
* ata_scsi_map_proto - Map pass-thru protocol value to taskfile value.
* @byte1: Byte 1 from pass-thru CDB.
*
* RETURNS:
* ATA_PROT_UNKNOWN if mapping failed/unimplemented, protocol otherwise.
*/
static u8
ata_scsi_map_proto(u8 byte1)
{
switch((byte1 & 0x1e) >> 1) {
case 3: /* Non-data */
return ATA_PROT_NODATA;
case 6: /* DMA */
case 10: /* UDMA Data-in */
case 11: /* UDMA Data-Out */
return ATA_PROT_DMA;
case 4: /* PIO Data-in */
case 5: /* PIO Data-out */
return ATA_PROT_PIO;
case 12: /* FPDMA */
return ATA_PROT_NCQ;
case 0: /* Hard Reset */
case 1: /* SRST */
case 8: /* Device Diagnostic */
case 9: /* Device Reset */
case 7: /* DMA Queued */
case 15: /* Return Response Info */
default: /* Reserved */
break;
}
return ATA_PROT_UNKNOWN;
}
/**
* ata_scsi_pass_thru - convert ATA pass-thru CDB to taskfile
* @qc: command structure to be initialized
*
* Handles either 12, 16, or 32-byte versions of the CDB.
*
* RETURNS:
* Zero on success, non-zero on failure.
*/
static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
{
struct ata_taskfile *tf = &(qc->tf);
struct scsi_cmnd *scmd = qc->scsicmd;
struct ata_device *dev = qc->dev;
const u8 *cdb = scmd->cmnd;
u16 fp;
u16 cdb_offset = 0;
/* 7Fh variable length cmd means a ata pass-thru(32) */
if (cdb[0] == VARIABLE_LENGTH_CMD)
cdb_offset = 9;
tf->protocol = ata_scsi_map_proto(cdb[1 + cdb_offset]);
if (tf->protocol == ATA_PROT_UNKNOWN) {
fp = 1;
goto invalid_fld;
}
if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0)
tf->protocol = ATA_PROT_NCQ_NODATA;
/* enable LBA */
tf->flags |= ATA_TFLAG_LBA;
/*
* 12 and 16 byte CDBs use different offsets to
* provide the various register values.
*/
if (cdb[0] == ATA_16) {
/*
* 16-byte CDB - may contain extended commands.
*
* If that is the case, copy the upper byte register values.
*/
if (cdb[1] & 0x01) {
tf->hob_feature = cdb[3];
tf->hob_nsect = cdb[5];
tf->hob_lbal = cdb[7];
tf->hob_lbam = cdb[9];
tf->hob_lbah = cdb[11];
tf->flags |= ATA_TFLAG_LBA48;
} else
tf->flags &= ~ATA_TFLAG_LBA48;
/*
* Always copy low byte, device and command registers.
*/
tf->feature = cdb[4];
tf->nsect = cdb[6];
tf->lbal = cdb[8];
tf->lbam = cdb[10];
tf->lbah = cdb[12];
tf->device = cdb[13];
tf->command = cdb[14];
} else if (cdb[0] == ATA_12) {
/*
* 12-byte CDB - incapable of extended commands.
*/
tf->flags &= ~ATA_TFLAG_LBA48;
tf->feature = cdb[3];
tf->nsect = cdb[4];
tf->lbal = cdb[5];
tf->lbam = cdb[6];
tf->lbah = cdb[7];
tf->device = cdb[8];
tf->command = cdb[9];
} else {
/*
* 32-byte CDB - may contain extended command fields.
*
* If that is the case, copy the upper byte register values.
*/
if (cdb[10] & 0x01) {
tf->hob_feature = cdb[20];
tf->hob_nsect = cdb[22];
tf->hob_lbal = cdb[16];
tf->hob_lbam = cdb[15];
tf->hob_lbah = cdb[14];
tf->flags |= ATA_TFLAG_LBA48;
} else
tf->flags &= ~ATA_TFLAG_LBA48;
tf->feature = cdb[21];
tf->nsect = cdb[23];
tf->lbal = cdb[19];
tf->lbam = cdb[18];
tf->lbah = cdb[17];
tf->device = cdb[24];
tf->command = cdb[25];
tf->auxiliary = get_unaligned_be32(&cdb[28]);
}
/* For NCQ commands copy the tag value */
if (ata_is_ncq(tf->protocol))
tf->nsect = qc->hw_tag << 3;
/* enforce correct master/slave bit */
tf->device = dev->devno ?
tf->device | ATA_DEV1 : tf->device & ~ATA_DEV1;
switch (tf->command) {
/* READ/WRITE LONG use a non-standard sect_size */
case ATA_CMD_READ_LONG:
case ATA_CMD_READ_LONG_ONCE:
case ATA_CMD_WRITE_LONG:
case ATA_CMD_WRITE_LONG_ONCE:
if (tf->protocol != ATA_PROT_PIO || tf->nsect != 1) {
fp = 1;
goto invalid_fld;
}
qc->sect_size = scsi_bufflen(scmd);
break;
/* commands using reported Logical Block size (e.g. 512 or 4K) */
case ATA_CMD_CFA_WRITE_NE:
case ATA_CMD_CFA_TRANS_SECT:
case ATA_CMD_CFA_WRITE_MULT_NE:
/* XXX: case ATA_CMD_CFA_WRITE_SECTORS_WITHOUT_ERASE: */
case ATA_CMD_READ:
case ATA_CMD_READ_EXT:
case ATA_CMD_READ_QUEUED:
/* XXX: case ATA_CMD_READ_QUEUED_EXT: */
case ATA_CMD_FPDMA_READ:
case ATA_CMD_READ_MULTI:
case ATA_CMD_READ_MULTI_EXT:
case ATA_CMD_PIO_READ:
case ATA_CMD_PIO_READ_EXT:
case ATA_CMD_READ_STREAM_DMA_EXT:
case ATA_CMD_READ_STREAM_EXT:
case ATA_CMD_VERIFY:
case ATA_CMD_VERIFY_EXT:
case ATA_CMD_WRITE:
case ATA_CMD_WRITE_EXT:
case ATA_CMD_WRITE_FUA_EXT:
case ATA_CMD_WRITE_QUEUED:
case ATA_CMD_WRITE_QUEUED_FUA_EXT:
case ATA_CMD_FPDMA_WRITE:
case ATA_CMD_WRITE_MULTI:
case ATA_CMD_WRITE_MULTI_EXT:
case ATA_CMD_WRITE_MULTI_FUA_EXT:
case ATA_CMD_PIO_WRITE:
case ATA_CMD_PIO_WRITE_EXT:
case ATA_CMD_WRITE_STREAM_DMA_EXT:
case ATA_CMD_WRITE_STREAM_EXT:
qc->sect_size = scmd->device->sector_size;
break;
/* Everything else uses 512 byte "sectors" */
default:
qc->sect_size = ATA_SECT_SIZE;
}
/*
* Set flags so that all registers will be written, pass on
* write indication (used for PIO/DMA setup), result TF is
* copied back and we don't whine too much about its failure.
*/
tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
if (scmd->sc_data_direction == DMA_TO_DEVICE)
tf->flags |= ATA_TFLAG_WRITE;
qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET;
/*
* Set transfer length.
*
* TODO: find out if we need to do more here to
* cover scatter/gather case.
*/
ata_qc_set_pc_nbytes(qc);
/* We may not issue DMA commands if no DMA mode is set */
if (tf->protocol == ATA_PROT_DMA && dev->dma_mode == 0) {
fp = 1;
goto invalid_fld;
}
/* We may not issue NCQ commands to devices not supporting NCQ */
if (ata_is_ncq(tf->protocol) && !ata_ncq_enabled(dev)) {
fp = 1;
goto invalid_fld;
}
/* sanity check for pio multi commands */
if ((cdb[1] & 0xe0) && !is_multi_taskfile(tf)) {
fp = 1;
goto invalid_fld;
}
if (is_multi_taskfile(tf)) {
unsigned int multi_count = 1 << (cdb[1] >> 5);
/* compare the passed through multi_count
* with the cached multi_count of libata
*/
if (multi_count != dev->multi_count)
ata_dev_warn(dev, "invalid multi_count %u ignored\n",
multi_count);
}
/*
* Filter SET_FEATURES - XFER MODE command -- otherwise,
* SET_FEATURES - XFER MODE must be preceded/succeeded
* by an update to hardware-specific registers for each
* controller (i.e. the reason for ->set_piomode(),
* ->set_dmamode(), and ->post_set_mode() hooks).
*/
if (tf->command == ATA_CMD_SET_FEATURES &&
tf->feature == SETFEATURES_XFER) {
fp = (cdb[0] == ATA_16) ? 4 : 3;
goto invalid_fld;
}
/*
* Filter TPM commands by default. These provide an
* essentially uncontrolled encrypted "back door" between
* applications and the disk. Set libata.allow_tpm=1 if you
* have a real reason for wanting to use them. This ensures
* that installed software cannot easily mess stuff up without
* user intent. DVR type users will probably ship with this enabled
* for movie content management.
*
* Note that for ATA8 we can issue a DCS change and DCS freeze lock
* for this and should do in future but that it is not sufficient as
* DCS is an optional feature set. Thus we also do the software filter
* so that we comply with the TC consortium stated goal that the user
* can turn off TC features of their system.
*/
if (tf->command >= 0x5C && tf->command <= 0x5F && !libata_allow_tpm) {
fp = (cdb[0] == ATA_16) ? 14 : 9;
goto invalid_fld;
}
return 0;
invalid_fld:
ata_scsi_set_invalid_field(dev, scmd, fp, 0xff);
return 1;
}
/**
* ata_format_dsm_trim_descr() - SATL Write Same to DSM Trim
* @cmd: SCSI command being translated
* @trmax: Maximum number of entries that will fit in sector_size bytes.
* @sector: Starting sector
* @count: Total Range of request in logical sectors
*
* Rewrite the WRITE SAME descriptor to be a DSM TRIM little-endian formatted
* descriptor.
*
* Upto 64 entries of the format:
* 63:48 Range Length
* 47:0 LBA
*
* Range Length of 0 is ignored.
* LBA's should be sorted order and not overlap.
*
* NOTE: this is the same format as ADD LBA(S) TO NV CACHE PINNED SET
*
* Return: Number of bytes copied into sglist.
*/
static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax,
u64 sector, u32 count)
{
struct scsi_device *sdp = cmd->device;
size_t len = sdp->sector_size;
size_t r;
__le64 *buf;
u32 i = 0;
unsigned long flags;
WARN_ON(len > ATA_SCSI_RBUF_SIZE);
if (len > ATA_SCSI_RBUF_SIZE)
len = ATA_SCSI_RBUF_SIZE;
spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
buf = ((void *)ata_scsi_rbuf);
memset(buf, 0, len);
while (i < trmax) {
u64 entry = sector |
((u64)(count > 0xffff ? 0xffff : count) << 48);
buf[i++] = __cpu_to_le64(entry);
if (count <= 0xffff)
break;
count -= 0xffff;
sector += 0xffff;
}
r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
return r;
}
/**
* ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same
* @qc: Command to be translated
*
* Translate a SCSI WRITE SAME command to be either a DSM TRIM command or
* an SCT Write Same command.
* Based on WRITE SAME has the UNMAP flag:
*
* - When set translate to DSM TRIM
* - When clear translate to SCT Write Same
*/
static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
{
struct ata_taskfile *tf = &qc->tf;
struct scsi_cmnd *scmd = qc->scsicmd;
struct scsi_device *sdp = scmd->device;
size_t len = sdp->sector_size;
struct ata_device *dev = qc->dev;
const u8 *cdb = scmd->cmnd;
u64 block;
u32 n_block;
const u32 trmax = len >> 3;
u32 size;
u16 fp;
u8 bp = 0xff;
u8 unmap = cdb[1] & 0x8;
/* we may not issue DMA commands if no DMA mode is set */
if (unlikely(!dev->dma_mode))
goto invalid_opcode;
/*
* We only allow sending this command through the block layer,
* as it modifies the DATA OUT buffer, which would corrupt user
* memory for SG_IO commands.
*/
if (unlikely(blk_rq_is_passthrough(scmd->request)))
goto invalid_opcode;
if (unlikely(scmd->cmd_len < 16)) {
fp = 15;
goto invalid_fld;
}
scsi_16_lba_len(cdb, &block, &n_block);
if (!unmap ||
(dev->horkage & ATA_HORKAGE_NOTRIM) ||
!ata_id_has_trim(dev->id)) {
fp = 1;
bp = 3;
goto invalid_fld;
}
/* If the request is too large the cmd is invalid */
if (n_block > 0xffff * trmax) {
fp = 2;
goto invalid_fld;
}
/*
* WRITE SAME always has a sector sized buffer as payload, this
* should never be a multiple entry S/G list.
*/
if (!scsi_sg_count(scmd))
goto invalid_param_len;
/*
* size must match sector size in bytes
* For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count)
* is defined as number of 512 byte blocks to be transferred.
*/
size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
if (size != len)
goto invalid_param_len;
if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
/* Newer devices support queued TRIM commands */
tf->protocol = ATA_PROT_NCQ;
tf->command = ATA_CMD_FPDMA_SEND;
tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
tf->nsect = qc->hw_tag << 3;
tf->hob_feature = (size / 512) >> 8;
tf->feature = size / 512;
tf->auxiliary = 1;
} else {
tf->protocol = ATA_PROT_DMA;
tf->hob_feature = 0;
tf->feature = ATA_DSM_TRIM;
tf->hob_nsect = (size / 512) >> 8;
tf->nsect = size / 512;
tf->command = ATA_CMD_DSM;
}
tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
ATA_TFLAG_WRITE;
ata_qc_set_pc_nbytes(qc);
return 0;
invalid_fld:
ata_scsi_set_invalid_field(dev, scmd, fp, bp);
return 1;
invalid_param_len:
/* "Parameter list length error" */
ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);
return 1;
invalid_opcode:
/* "Invalid command operation code" */
ata_scsi_set_sense(dev, scmd, ILLEGAL_REQUEST, 0x20, 0x0);
return 1;
}
/**
* ata_scsiop_maint_in - Simulate a subset of MAINTENANCE_IN
* @args: device MAINTENANCE_IN data / SCSI command of interest.
* @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
*
* Yields a subset to satisfy scsi_report_opcode()
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
{
struct ata_device *dev = args->dev;
u8 *cdb = args->cmd->cmnd;
u8 supported = 0;
unsigned int err = 0;
if (cdb[2] != 1) {
ata_dev_warn(dev, "invalid command format %d\n", cdb[2]);
err = 2;
goto out;
}
switch (cdb[3]) {
case INQUIRY:
case MODE_SENSE:
case MODE_SENSE_10:
case READ_CAPACITY:
case SERVICE_ACTION_IN_16:
case REPORT_LUNS:
case REQUEST_SENSE:
case SYNCHRONIZE_CACHE:
case REZERO_UNIT:
case SEEK_6:
case SEEK_10:
case TEST_UNIT_READY:
case SEND_DIAGNOSTIC:
case MAINTENANCE_IN:
case READ_6:
case READ_10:
case READ_16:
case WRITE_6:
case WRITE_10:
case WRITE_16:
case ATA_12:
case ATA_16:
case VERIFY:
case VERIFY_16:
case MODE_SELECT:
case MODE_SELECT_10:
case START_STOP:
supported = 3;
break;
case ZBC_IN:
case ZBC_OUT:
if (ata_id_zoned_cap(dev->id) ||
dev->class == ATA_DEV_ZAC)
supported = 3;
break;
case SECURITY_PROTOCOL_IN:
case SECURITY_PROTOCOL_OUT:
if (dev->flags & ATA_DFLAG_TRUSTED)
supported = 3;
break;
default:
break;
}
out:
rbuf[1] = supported; /* supported */
return err;
}
/**
* ata_scsi_report_zones_complete - convert ATA output
* @qc: command structure returning the data
*
* Convert T-13 little-endian field representation into
* T-10 big-endian field representation.
* What a mess.
*/
static void ata_scsi_report_zones_complete(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
struct sg_mapping_iter miter;
unsigned long flags;
unsigned int bytes = 0;
sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd),
SG_MITER_TO_SG | SG_MITER_ATOMIC);
local_irq_save(flags);
while (sg_miter_next(&miter)) {
unsigned int offset = 0;
if (bytes == 0) {
char *hdr;
u32 list_length;
u64 max_lba, opt_lba;
u16 same;
/* Swizzle header */
hdr = miter.addr;
list_length = get_unaligned_le32(&hdr[0]);
same = get_unaligned_le16(&hdr[4]);
max_lba = get_unaligned_le64(&hdr[8]);
opt_lba = get_unaligned_le64(&hdr[16]);
put_unaligned_be32(list_length, &hdr[0]);
hdr[4] = same & 0xf;
put_unaligned_be64(max_lba, &hdr[8]);
put_unaligned_be64(opt_lba, &hdr[16]);
offset += 64;
bytes += 64;
}
while (offset < miter.length) {
char *rec;
u8 cond, type, non_seq, reset;
u64 size, start, wp;
/* Swizzle zone descriptor */
rec = miter.addr + offset;
type = rec[0] & 0xf;
cond = (rec[1] >> 4) & 0xf;
non_seq = (rec[1] & 2);
reset = (rec[1] & 1);
size = get_unaligned_le64(&rec[8]);
start = get_unaligned_le64(&rec[16]);
wp = get_unaligned_le64(&rec[24]);
rec[0] = type;
rec[1] = (cond << 4) | non_seq | reset;
put_unaligned_be64(size, &rec[8]);
put_unaligned_be64(start, &rec[16]);
put_unaligned_be64(wp, &rec[24]);
WARN_ON(offset + 64 > miter.length);
offset += 64;
bytes += 64;
}
}
sg_miter_stop(&miter);
local_irq_restore(flags);
ata_scsi_qc_complete(qc);
}
static unsigned int ata_scsi_zbc_in_xlat(struct ata_queued_cmd *qc)
{
struct ata_taskfile *tf = &qc->tf;
struct scsi_cmnd *scmd = qc->scsicmd;
const u8 *cdb = scmd->cmnd;
u16 sect, fp = (u16)-1;
u8 sa, options, bp = 0xff;
u64 block;
u32 n_block;
if (unlikely(scmd->cmd_len < 16)) {
ata_dev_warn(qc->dev, "invalid cdb length %d\n",
scmd->cmd_len);
fp = 15;
goto invalid_fld;
}
scsi_16_lba_len(cdb, &block, &n_block);
if (n_block != scsi_bufflen(scmd)) {
ata_dev_warn(qc->dev, "non-matching transfer count (%d/%d)\n",
n_block, scsi_bufflen(scmd));
goto invalid_param_len;
}
sa = cdb[1] & 0x1f;
if (sa != ZI_REPORT_ZONES) {
ata_dev_warn(qc->dev, "invalid service action %d\n", sa);
fp = 1;
goto invalid_fld;
}
/*
* ZAC allows only for transfers in 512 byte blocks,
* and uses a 16 bit value for the transfer count.
*/
if ((n_block / 512) > 0xffff || n_block < 512 || (n_block % 512)) {
ata_dev_warn(qc->dev, "invalid transfer count %d\n", n_block);
goto invalid_param_len;
}
sect = n_block / 512;
options = cdb[14] & 0xbf;
if (ata_ncq_enabled(qc->dev) &&
ata_fpdma_zac_mgmt_in_supported(qc->dev)) {
tf->protocol = ATA_PROT_NCQ;
tf->command = ATA_CMD_FPDMA_RECV;
tf->hob_nsect = ATA_SUBCMD_FPDMA_RECV_ZAC_MGMT_IN & 0x1f;
tf->nsect = qc->hw_tag << 3;
tf->feature = sect & 0xff;
tf->hob_feature = (sect >> 8) & 0xff;
tf->auxiliary = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES | (options << 8);
} else {
tf->command = ATA_CMD_ZAC_MGMT_IN;
tf->feature = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES;
tf->protocol = ATA_PROT_DMA;
tf->hob_feature = options;
tf->hob_nsect = (sect >> 8) & 0xff;
tf->nsect = sect & 0xff;
}
tf->device = ATA_LBA;
tf->lbah = (block >> 16) & 0xff;
tf->lbam = (block >> 8) & 0xff;
tf->lbal = block & 0xff;
tf->hob_lbah = (block >> 40) & 0xff;
tf->hob_lbam = (block >> 32) & 0xff;
tf->hob_lbal = (block >> 24) & 0xff;
tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48;
qc->flags |= ATA_QCFLAG_RESULT_TF;
ata_qc_set_pc_nbytes(qc);
qc->complete_fn = ata_scsi_report_zones_complete;
return 0;
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp);
return 1;
invalid_param_len:
/* "Parameter list length error" */
ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);
return 1;
}
static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
{
struct ata_taskfile *tf = &qc->tf;
struct scsi_cmnd *scmd = qc->scsicmd;
struct ata_device *dev = qc->dev;
const u8 *cdb = scmd->cmnd;
u8 all, sa;
u64 block;
u32 n_block;
u16 fp = (u16)-1;
if (unlikely(scmd->cmd_len < 16)) {
fp = 15;
goto invalid_fld;
}
sa = cdb[1] & 0x1f;
if ((sa != ZO_CLOSE_ZONE) && (sa != ZO_FINISH_ZONE) &&
(sa != ZO_OPEN_ZONE) && (sa != ZO_RESET_WRITE_POINTER)) {
fp = 1;
goto invalid_fld;
}
scsi_16_lba_len(cdb, &block, &n_block);
if (n_block) {
/*
* ZAC MANAGEMENT OUT doesn't define any length
*/
goto invalid_param_len;
}
all = cdb[14] & 0x1;
if (all) {
/*
* Ignore the block address (zone ID) as defined by ZBC.
*/
block = 0;
} else if (block >= dev->n_sectors) {
/*
* Block must be a valid zone ID (a zone start LBA).
*/
fp = 2;
goto invalid_fld;
}
if (ata_ncq_enabled(qc->dev) &&
ata_fpdma_zac_mgmt_out_supported(qc->dev)) {
tf->protocol = ATA_PROT_NCQ_NODATA;
tf->command = ATA_CMD_NCQ_NON_DATA;
tf->feature = ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT;
tf->nsect = qc->hw_tag << 3;
tf->auxiliary = sa | ((u16)all << 8);
} else {
tf->protocol = ATA_PROT_NODATA;
tf->command = ATA_CMD_ZAC_MGMT_OUT;
tf->feature = sa;
tf->hob_feature = all;
}
tf->lbah = (block >> 16) & 0xff;
tf->lbam = (block >> 8) & 0xff;
tf->lbal = block & 0xff;
tf->hob_lbah = (block >> 40) & 0xff;
tf->hob_lbam = (block >> 32) & 0xff;
tf->hob_lbal = (block >> 24) & 0xff;
tf->device = ATA_LBA;
tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48;
return 0;
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff);
return 1;
invalid_param_len:
/* "Parameter list length error" */
ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);
return 1;
}
/**
* ata_mselect_caching - Simulate MODE SELECT for caching info page
* @qc: Storage for translated ATA taskfile
* @buf: input buffer
* @len: number of valid bytes in the input buffer
* @fp: out parameter for the failed field on error
*
* Prepare a taskfile to modify caching information for the device.
*
* LOCKING:
* None.
*/
static int ata_mselect_caching(struct ata_queued_cmd *qc,
const u8 *buf, int len, u16 *fp)
{
struct ata_taskfile *tf = &qc->tf;
struct ata_device *dev = qc->dev;
u8 mpage[CACHE_MPAGE_LEN];
u8 wce;
int i;
/*
* The first two bytes of def_cache_mpage are a header, so offsets
* in mpage are off by 2 compared to buf. Same for len.
*/
if (len != CACHE_MPAGE_LEN - 2) {
if (len < CACHE_MPAGE_LEN - 2)
*fp = len;
else
*fp = CACHE_MPAGE_LEN - 2;
return -EINVAL;
}
wce = buf[0] & (1 << 2);
/*
* Check that read-only bits are not modified.
*/
ata_msense_caching(dev->id, mpage, false);
for (i = 0; i < CACHE_MPAGE_LEN - 2; i++) {
if (i == 0)
continue;
if (mpage[i + 2] != buf[i]) {
*fp = i;
return -EINVAL;
}
}
tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
tf->protocol = ATA_PROT_NODATA;
tf->nsect = 0;
tf->command = ATA_CMD_SET_FEATURES;
tf->feature = wce ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF;
return 0;
}
/**
* ata_mselect_control - Simulate MODE SELECT for control page
* @qc: Storage for translated ATA taskfile
* @buf: input buffer
* @len: number of valid bytes in the input buffer
* @fp: out parameter for the failed field on error
*
* Prepare a taskfile to modify caching information for the device.
*
* LOCKING:
* None.
*/
static int ata_mselect_control(struct ata_queued_cmd *qc,
const u8 *buf, int len, u16 *fp)
{
struct ata_device *dev = qc->dev;
u8 mpage[CONTROL_MPAGE_LEN];
u8 d_sense;
int i;
/*
* The first two bytes of def_control_mpage are a header, so offsets
* in mpage are off by 2 compared to buf. Same for len.
*/
if (len != CONTROL_MPAGE_LEN - 2) {
if (len < CONTROL_MPAGE_LEN - 2)
*fp = len;
else
*fp = CONTROL_MPAGE_LEN - 2;
return -EINVAL;
}
d_sense = buf[0] & (1 << 2);
/*
* Check that read-only bits are not modified.
*/
ata_msense_control(dev, mpage, false);
for (i = 0; i < CONTROL_MPAGE_LEN - 2; i++) {
if (i == 0)
continue;
if (mpage[2 + i] != buf[i]) {
*fp = i;
return -EINVAL;
}
}
if (d_sense & (1 << 2))
dev->flags |= ATA_DFLAG_D_SENSE;
else
dev->flags &= ~ATA_DFLAG_D_SENSE;
return 0;
}
/**
* ata_scsi_mode_select_xlat - Simulate MODE SELECT 6, 10 commands
* @qc: Storage for translated ATA taskfile
*
* Converts a MODE SELECT command to an ATA SET FEATURES taskfile.
* Assume this is invoked for direct access devices (e.g. disks) only.
* There should be no block descriptor for other device types.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
const u8 *cdb = scmd->cmnd;
u8 pg, spg;
unsigned six_byte, pg_len, hdr_len, bd_len;
int len;
u16 fp = (u16)-1;
u8 bp = 0xff;
ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function BUG: KASAN: use-after-free in ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 Read of size 1 at addr ffff88803b8cd003 by task syz-executor.6/12621 CPU: 1 PID: 12621 Comm: syz-executor.6 Not tainted 4.19.95 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xac/0xee lib/dump_stack.c:118 print_address_description+0x60/0x223 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report mm/kasan/report.c:409 [inline] kasan_report.cold+0xae/0x2d8 mm/kasan/report.c:393 ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 ata_scsi_translate+0x2da/0x680 drivers/ata/libata-scsi.c:2035 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4360 [inline] ata_scsi_queuecmd+0x2e4/0x790 drivers/ata/libata-scsi.c:4409 scsi_dispatch_cmd+0x2ee/0x6c0 drivers/scsi/scsi_lib.c:1867 scsi_queue_rq+0xfd7/0x1990 drivers/scsi/scsi_lib.c:2170 blk_mq_dispatch_rq_list+0x1e1/0x19a0 block/blk-mq.c:1186 blk_mq_do_dispatch_sched+0x147/0x3d0 block/blk-mq-sched.c:108 blk_mq_sched_dispatch_requests+0x427/0x680 block/blk-mq-sched.c:204 __blk_mq_run_hw_queue+0xbc/0x200 block/blk-mq.c:1308 __blk_mq_delay_run_hw_queue+0x3c0/0x460 block/blk-mq.c:1376 blk_mq_run_hw_queue+0x152/0x310 block/blk-mq.c:1413 blk_mq_sched_insert_request+0x337/0x6c0 block/blk-mq-sched.c:397 blk_execute_rq_nowait+0x124/0x320 block/blk-exec.c:64 blk_execute_rq+0xc5/0x112 block/blk-exec.c:101 sg_scsi_ioctl+0x3b0/0x6a0 block/scsi_ioctl.c:507 sg_ioctl+0xd37/0x23f0 drivers/scsi/sg.c:1106 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:501 [inline] do_vfs_ioctl+0xae6/0x1030 fs/ioctl.c:688 ksys_ioctl+0x76/0xa0 fs/ioctl.c:705 __do_sys_ioctl fs/ioctl.c:712 [inline] __se_sys_ioctl fs/ioctl.c:710 [inline] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:710 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb0e9602c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb0e96036d4 RCX: 000000000045c479 RDX: 0000000020000040 RSI: 0000000000000001 RDI: 0000000000000003 RBP: 000000000076bfc0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000046d R14: 00000000004c6e1a R15: 000000000076bfcc Allocated by task 12577: set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc mm/kasan/kasan.c:553 [inline] kasan_kmalloc+0xbf/0xe0 mm/kasan/kasan.c:531 __kmalloc+0xf3/0x1e0 mm/slub.c:3749 kmalloc include/linux/slab.h:520 [inline] load_elf_phdrs+0x118/0x1b0 fs/binfmt_elf.c:441 load_elf_binary+0x2de/0x4610 fs/binfmt_elf.c:737 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 12577: set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x129/0x170 mm/kasan/kasan.c:521 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kfree+0x8b/0x1a0 mm/slub.c:3904 load_elf_binary+0x1be7/0x4610 fs/binfmt_elf.c:1118 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88803b8ccf00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 259 bytes inside of 512-byte region [ffff88803b8ccf00, ffff88803b8cd100) The buggy address belongs to the page: page:ffffea0000ee3300 count:1 mapcount:0 mapping:ffff88806cc03080 index:0xffff88803b8cc780 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 ffffea0001104080 0000000200000002 ffff88806cc03080 raw: ffff88803b8cc780 00000000800c000b 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803b8ccf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8ccf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803b8cd000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803b8cd080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8cd100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc You can refer to "https://www.lkml.org/lkml/2019/1/17/474" reproduce this error. The exception code is "bd_len = p[3];", "p" value is ffff88803b8cd000 which belongs to the cache kmalloc-512 of size 512. The "page_address(sg_page(scsi_sglist(scmd)))" maybe from sg_scsi_ioctl function "buffer" which allocated by kzalloc, so "buffer" may not page aligned. This also looks completely buggy on highmem systems and really needs to use a kmap_atomic. --Christoph Hellwig To address above bugs, Paolo Bonzini advise to simpler to just make a char array of size CACHE_MPAGE_LEN+8+8+4-2(or just 64 to make it easy), use sg_copy_to_buffer to copy from the sglist into the buffer, and workthere. Signed-off-by: Ye Bin <yebin10@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-06-05 09:41:49 +08:00
u8 buffer[64];
const u8 *p = buffer;
VPRINTK("ENTER\n");
six_byte = (cdb[0] == MODE_SELECT);
if (six_byte) {
if (scmd->cmd_len < 5) {
fp = 4;
goto invalid_fld;
}
len = cdb[4];
hdr_len = 4;
} else {
if (scmd->cmd_len < 9) {
fp = 8;
goto invalid_fld;
}
len = (cdb[7] << 8) + cdb[8];
hdr_len = 8;
}
/* We only support PF=1, SP=0. */
if ((cdb[1] & 0x11) != 0x10) {
fp = 1;
bp = (cdb[1] & 0x01) ? 1 : 5;
goto invalid_fld;
}
/* Test early for possible overrun. */
if (!scsi_sg_count(scmd) || scsi_sglist(scmd)->length < len)
goto invalid_param_len;
/* Move past header and block descriptors. */
if (len < hdr_len)
goto invalid_param_len;
ata/libata: Fix usage of page address by page_address in ata_scsi_mode_select_xlat function BUG: KASAN: use-after-free in ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 Read of size 1 at addr ffff88803b8cd003 by task syz-executor.6/12621 CPU: 1 PID: 12621 Comm: syz-executor.6 Not tainted 4.19.95 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xac/0xee lib/dump_stack.c:118 print_address_description+0x60/0x223 mm/kasan/report.c:253 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report mm/kasan/report.c:409 [inline] kasan_report.cold+0xae/0x2d8 mm/kasan/report.c:393 ata_scsi_mode_select_xlat+0x10bd/0x10f0 drivers/ata/libata-scsi.c:4045 ata_scsi_translate+0x2da/0x680 drivers/ata/libata-scsi.c:2035 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4360 [inline] ata_scsi_queuecmd+0x2e4/0x790 drivers/ata/libata-scsi.c:4409 scsi_dispatch_cmd+0x2ee/0x6c0 drivers/scsi/scsi_lib.c:1867 scsi_queue_rq+0xfd7/0x1990 drivers/scsi/scsi_lib.c:2170 blk_mq_dispatch_rq_list+0x1e1/0x19a0 block/blk-mq.c:1186 blk_mq_do_dispatch_sched+0x147/0x3d0 block/blk-mq-sched.c:108 blk_mq_sched_dispatch_requests+0x427/0x680 block/blk-mq-sched.c:204 __blk_mq_run_hw_queue+0xbc/0x200 block/blk-mq.c:1308 __blk_mq_delay_run_hw_queue+0x3c0/0x460 block/blk-mq.c:1376 blk_mq_run_hw_queue+0x152/0x310 block/blk-mq.c:1413 blk_mq_sched_insert_request+0x337/0x6c0 block/blk-mq-sched.c:397 blk_execute_rq_nowait+0x124/0x320 block/blk-exec.c:64 blk_execute_rq+0xc5/0x112 block/blk-exec.c:101 sg_scsi_ioctl+0x3b0/0x6a0 block/scsi_ioctl.c:507 sg_ioctl+0xd37/0x23f0 drivers/scsi/sg.c:1106 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:501 [inline] do_vfs_ioctl+0xae6/0x1030 fs/ioctl.c:688 ksys_ioctl+0x76/0xa0 fs/ioctl.c:705 __do_sys_ioctl fs/ioctl.c:712 [inline] __se_sys_ioctl fs/ioctl.c:710 [inline] __x64_sys_ioctl+0x6f/0xb0 fs/ioctl.c:710 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c479 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb0e9602c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fb0e96036d4 RCX: 000000000045c479 RDX: 0000000020000040 RSI: 0000000000000001 RDI: 0000000000000003 RBP: 000000000076bfc0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 000000000000046d R14: 00000000004c6e1a R15: 000000000076bfcc Allocated by task 12577: set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc mm/kasan/kasan.c:553 [inline] kasan_kmalloc+0xbf/0xe0 mm/kasan/kasan.c:531 __kmalloc+0xf3/0x1e0 mm/slub.c:3749 kmalloc include/linux/slab.h:520 [inline] load_elf_phdrs+0x118/0x1b0 fs/binfmt_elf.c:441 load_elf_binary+0x2de/0x4610 fs/binfmt_elf.c:737 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 12577: set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x129/0x170 mm/kasan/kasan.c:521 slab_free_hook mm/slub.c:1370 [inline] slab_free_freelist_hook mm/slub.c:1397 [inline] slab_free mm/slub.c:2952 [inline] kfree+0x8b/0x1a0 mm/slub.c:3904 load_elf_binary+0x1be7/0x4610 fs/binfmt_elf.c:1118 search_binary_handler fs/exec.c:1654 [inline] search_binary_handler+0x15c/0x4e0 fs/exec.c:1632 exec_binprm fs/exec.c:1696 [inline] __do_execve_file.isra.0+0xf52/0x1a90 fs/exec.c:1820 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8a/0xb0 fs/exec.c:1959 do_syscall_64+0xa0/0x2e0 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88803b8ccf00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 259 bytes inside of 512-byte region [ffff88803b8ccf00, ffff88803b8cd100) The buggy address belongs to the page: page:ffffea0000ee3300 count:1 mapcount:0 mapping:ffff88806cc03080 index:0xffff88803b8cc780 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 ffffea0001104080 0000000200000002 ffff88806cc03080 raw: ffff88803b8cc780 00000000800c000b 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88803b8ccf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8ccf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88803b8cd000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88803b8cd080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88803b8cd100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc You can refer to "https://www.lkml.org/lkml/2019/1/17/474" reproduce this error. The exception code is "bd_len = p[3];", "p" value is ffff88803b8cd000 which belongs to the cache kmalloc-512 of size 512. The "page_address(sg_page(scsi_sglist(scmd)))" maybe from sg_scsi_ioctl function "buffer" which allocated by kzalloc, so "buffer" may not page aligned. This also looks completely buggy on highmem systems and really needs to use a kmap_atomic. --Christoph Hellwig To address above bugs, Paolo Bonzini advise to simpler to just make a char array of size CACHE_MPAGE_LEN+8+8+4-2(or just 64 to make it easy), use sg_copy_to_buffer to copy from the sglist into the buffer, and workthere. Signed-off-by: Ye Bin <yebin10@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-06-05 09:41:49 +08:00
if (!sg_copy_to_buffer(scsi_sglist(scmd), scsi_sg_count(scmd),
buffer, sizeof(buffer)))
goto invalid_param_len;
if (six_byte)
bd_len = p[3];
else
bd_len = (p[6] << 8) + p[7];
len -= hdr_len;
p += hdr_len;
if (len < bd_len)
goto invalid_param_len;
if (bd_len != 0 && bd_len != 8) {
fp = (six_byte) ? 3 : 6;
fp += bd_len + hdr_len;
goto invalid_param;
}
len -= bd_len;
p += bd_len;
if (len == 0)
goto skip;
/* Parse both possible formats for the mode page headers. */
pg = p[0] & 0x3f;
if (p[0] & 0x40) {
if (len < 4)
goto invalid_param_len;
spg = p[1];
pg_len = (p[2] << 8) | p[3];
p += 4;
len -= 4;
} else {
if (len < 2)
goto invalid_param_len;
spg = 0;
pg_len = p[1];
p += 2;
len -= 2;
}
/*
* No mode subpages supported (yet) but asking for _all_
* subpages may be valid
*/
if (spg && (spg != ALL_SUB_MPAGES)) {
fp = (p[0] & 0x40) ? 1 : 0;
fp += hdr_len + bd_len;
goto invalid_param;
}
if (pg_len > len)
goto invalid_param_len;
switch (pg) {
case CACHE_MPAGE:
if (ata_mselect_caching(qc, p, pg_len, &fp) < 0) {
fp += hdr_len + bd_len;
goto invalid_param;
}
break;
case CONTROL_MPAGE:
if (ata_mselect_control(qc, p, pg_len, &fp) < 0) {
fp += hdr_len + bd_len;
goto invalid_param;
} else {
goto skip; /* No ATA command to send */
}
break;
default: /* invalid page code */
fp = bd_len + hdr_len;
goto invalid_param;
}
/*
* Only one page has changeable data, so we only support setting one
* page at a time.
*/
if (len > pg_len)
goto invalid_param;
return 0;
invalid_fld:
ata_scsi_set_invalid_field(qc->dev, scmd, fp, bp);
return 1;
invalid_param:
ata_scsi_set_invalid_parameter(qc->dev, scmd, fp);
return 1;
invalid_param_len:
/* "Parameter list length error" */
ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);
return 1;
skip:
scmd->result = SAM_STAT_GOOD;
return 1;
}
static u8 ata_scsi_trusted_op(u32 len, bool send, bool dma)
{
if (len == 0)
return ATA_CMD_TRUSTED_NONDATA;
else if (send)
return dma ? ATA_CMD_TRUSTED_SND_DMA : ATA_CMD_TRUSTED_SND;
else
return dma ? ATA_CMD_TRUSTED_RCV_DMA : ATA_CMD_TRUSTED_RCV;
}
static unsigned int ata_scsi_security_inout_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
const u8 *cdb = scmd->cmnd;
struct ata_taskfile *tf = &qc->tf;
u8 secp = cdb[1];
bool send = (cdb[0] == SECURITY_PROTOCOL_OUT);
u16 spsp = get_unaligned_be16(&cdb[2]);
u32 len = get_unaligned_be32(&cdb[6]);
bool dma = !(qc->dev->flags & ATA_DFLAG_PIO);
/*
* We don't support the ATA "security" protocol.
*/
if (secp == 0xef) {
ata_scsi_set_invalid_field(qc->dev, scmd, 1, 0);
return 1;
}
if (cdb[4] & 7) { /* INC_512 */
if (len > 0xffff) {
ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0);
return 1;
}
} else {
if (len > 0x01fffe00) {
ata_scsi_set_invalid_field(qc->dev, scmd, 6, 0);
return 1;
}
/* convert to the sector-based ATA addressing */
len = (len + 511) / 512;
}
tf->protocol = dma ? ATA_PROT_DMA : ATA_PROT_PIO;
tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR | ATA_TFLAG_LBA;
if (send)
tf->flags |= ATA_TFLAG_WRITE;
tf->command = ata_scsi_trusted_op(len, send, dma);
tf->feature = secp;
tf->lbam = spsp & 0xff;
tf->lbah = spsp >> 8;
if (len) {
tf->nsect = len & 0xff;
tf->lbal = len >> 8;
} else {
if (!send)
tf->lbah = (1 << 7);
}
ata_qc_set_pc_nbytes(qc);
return 0;
}
/**
* ata_scsi_var_len_cdb_xlat - SATL variable length CDB to Handler
* @qc: Command to be translated
*
* Translate a SCSI variable length CDB to specified commands.
* It checks a service action value in CDB to call corresponding handler.
*
* RETURNS:
* Zero on success, non-zero on failure
*
*/
static unsigned int ata_scsi_var_len_cdb_xlat(struct ata_queued_cmd *qc)
{
struct scsi_cmnd *scmd = qc->scsicmd;
const u8 *cdb = scmd->cmnd;
const u16 sa = get_unaligned_be16(&cdb[8]);
/*
* if service action represents a ata pass-thru(32) command,
* then pass it to ata_scsi_pass_thru handler.
*/
if (sa == ATA_32)
return ata_scsi_pass_thru(qc);
/* unsupported service action */
return 1;
}
/**
* ata_get_xlat_func - check if SCSI to ATA translation is possible
* @dev: ATA device
* @cmd: SCSI command opcode to consider
*
* Look up the SCSI command given, and determine whether the
* SCSI command is to be translated or simulated.
*
* RETURNS:
* Pointer to translation function if possible, %NULL if not.
*/
static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd)
{
switch (cmd) {
case READ_6:
case READ_10:
case READ_16:
case WRITE_6:
case WRITE_10:
case WRITE_16:
return ata_scsi_rw_xlat;
case WRITE_SAME_16:
return ata_scsi_write_same_xlat;
case SYNCHRONIZE_CACHE:
if (ata_try_flush_cache(dev))
return ata_scsi_flush_xlat;
break;
case VERIFY:
case VERIFY_16:
return ata_scsi_verify_xlat;
case ATA_12:
case ATA_16:
return ata_scsi_pass_thru;
case VARIABLE_LENGTH_CMD:
return ata_scsi_var_len_cdb_xlat;
case MODE_SELECT:
case MODE_SELECT_10:
return ata_scsi_mode_select_xlat;
break;
case ZBC_IN:
return ata_scsi_zbc_in_xlat;
case ZBC_OUT:
return ata_scsi_zbc_out_xlat;
case SECURITY_PROTOCOL_IN:
case SECURITY_PROTOCOL_OUT:
if (!(dev->flags & ATA_DFLAG_TRUSTED))
break;
return ata_scsi_security_inout_xlat;
case START_STOP:
return ata_scsi_start_stop_xlat;
}
return NULL;
}
/**
* ata_scsi_dump_cdb - dump SCSI command contents to dmesg
* @ap: ATA port to which the command was being sent
* @cmd: SCSI command to dump
*
* Prints the contents of a SCSI command via printk().
*/
void ata_scsi_dump_cdb(struct ata_port *ap, struct scsi_cmnd *cmd)
{
#ifdef ATA_VERBOSE_DEBUG
struct scsi_device *scsidev = cmd->device;
VPRINTK("CDB (%u:%d,%d,%lld) %9ph\n",
ap->print_id,
scsidev->channel, scsidev->id, scsidev->lun,
cmd->cmnd);
#endif
}
int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
{
u8 scsi_op = scmd->cmnd[0];
ata_xlat_func_t xlat_func;
int rc = 0;
if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len))
goto bad_cdb_len;
xlat_func = ata_get_xlat_func(dev, scsi_op);
} else {
if (unlikely(!scmd->cmd_len))
goto bad_cdb_len;
xlat_func = NULL;
if (likely((scsi_op != ATA_16) || !atapi_passthru16)) {
/* relay SCSI command to ATAPI device */
int len = COMMAND_SIZE(scsi_op);
libata: fix length validation of ATAPI-relayed SCSI commands syzkaller reported a crash in ata_bmdma_fill_sg() when writing to /dev/sg1. The immediate cause was that the ATA command's scatterlist was not DMA-mapped, which causes 'pi - 1' to underflow, resulting in a write to 'qc->ap->bmdma_prd[0xffffffff]'. Strangely though, the flag ATA_QCFLAG_DMAMAP was set in qc->flags. The root cause is that when __ata_scsi_queuecmd() is preparing to relay a SCSI command to an ATAPI device, it doesn't correctly validate the CDB length before copying it into the 16-byte buffer 'cdb' in 'struct ata_queued_cmd'. Namely, it validates the fixed CDB length expected based on the SCSI opcode but not the actual CDB length, which can be larger due to the use of the SG_NEXT_CMD_LEN ioctl. Since 'flags' is the next member in ata_queued_cmd, a buffer overflow corrupts it. Fix it by requiring that the actual CDB length be <= 16 (ATAPI_CDB_LEN). [Really it seems the length should be required to be <= dev->cdb_len, but the current behavior seems to have been intentionally introduced by commit 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") to work around a userspace bug in mplayer. Probably the workaround is no longer needed (mplayer was fixed in 2007), but continuing to allow lengths to up 16 appears harmless for now.] Here's a reproducer that works in QEMU when /dev/sg1 refers to the CD-ROM drive that qemu-system-x86_64 creates by default: #include <fcntl.h> #include <sys/ioctl.h> #include <unistd.h> #define SG_NEXT_CMD_LEN 0x2283 int main() { char buf[53] = { [36] = 0x7e, [52] = 0x02 }; int fd = open("/dev/sg1", O_RDWR); ioctl(fd, SG_NEXT_CMD_LEN, &(int){ 17 }); write(fd, buf, sizeof(buf)); } The crash was: BUG: unable to handle kernel paging request at ffff8cb97db37ffc IP: ata_bmdma_fill_sg drivers/ata/libata-sff.c:2623 [inline] IP: ata_bmdma_qc_prep+0xa4/0xc0 drivers/ata/libata-sff.c:2727 PGD fb6c067 P4D fb6c067 PUD 0 Oops: 0002 [#1] SMP CPU: 1 PID: 150 Comm: syz_ata_bmdma_q Not tainted 4.15.0-next-20180202 #99 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 [...] Call Trace: ata_qc_issue+0x100/0x1d0 drivers/ata/libata-core.c:5421 ata_scsi_translate+0xc9/0x1a0 drivers/ata/libata-scsi.c:2024 __ata_scsi_queuecmd drivers/ata/libata-scsi.c:4326 [inline] ata_scsi_queuecmd+0x8c/0x210 drivers/ata/libata-scsi.c:4375 scsi_dispatch_cmd+0xa2/0xe0 drivers/scsi/scsi_lib.c:1727 scsi_request_fn+0x24c/0x530 drivers/scsi/scsi_lib.c:1865 __blk_run_queue_uncond block/blk-core.c:412 [inline] __blk_run_queue+0x3a/0x60 block/blk-core.c:432 blk_execute_rq_nowait+0x93/0xc0 block/blk-exec.c:78 sg_common_write.isra.7+0x272/0x5a0 drivers/scsi/sg.c:806 sg_write+0x1ef/0x340 drivers/scsi/sg.c:677 __vfs_write+0x31/0x160 fs/read_write.c:480 vfs_write+0xa7/0x160 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0x4d/0xc0 fs/read_write.c:581 do_syscall_64+0x5e/0x110 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x21/0x86 Fixes: 607126c2a21c ("libata-scsi: be tolerant of 12-byte ATAPI commands in 16-byte CDBs") Reported-by: syzbot+1ff6f9fcc3c35f1c72a95e26528c8e7e3276e4da@syzkaller.appspotmail.com Cc: <stable@vger.kernel.org> # v2.6.24+ Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Tejun Heo <tj@kernel.org>
2018-02-04 12:30:56 +08:00
if (unlikely(len > scmd->cmd_len ||
len > dev->cdb_len ||
scmd->cmd_len > ATAPI_CDB_LEN))
goto bad_cdb_len;
xlat_func = atapi_xlat;
} else {
/* ATA_16 passthru, treat as an ATA command */
if (unlikely(scmd->cmd_len > 16))
goto bad_cdb_len;
xlat_func = ata_get_xlat_func(dev, scsi_op);
}
}
if (xlat_func)
rc = ata_scsi_translate(dev, scmd, xlat_func);
else
ata_scsi_simulate(dev, scmd);
return rc;
bad_cdb_len:
DPRINTK("bad CDB len=%u, scsi_op=0x%02x, max=%u\n",
scmd->cmd_len, scsi_op, dev->cdb_len);
scmd->result = DID_ERROR << 16;
scmd->scsi_done(scmd);
return 0;
}
/**
* ata_scsi_queuecmd - Issue SCSI cdb to libata-managed device
* @shost: SCSI host of command to be sent
* @cmd: SCSI command to be sent
*
* In some cases, this function translates SCSI commands into
* ATA taskfiles, and queues the taskfiles to be sent to
* hardware. In other cases, this function simulates a
* SCSI device by evaluating and responding to certain
* SCSI commands. This creates the overall effect of
* ATA and ATAPI devices appearing as SCSI devices.
*
* LOCKING:
* ATA host lock
*
* RETURNS:
* Return value from __ata_scsi_queuecmd() if @cmd can be queued,
* 0 otherwise.
*/
int ata_scsi_queuecmd(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
{
struct ata_port *ap;
struct ata_device *dev;
struct scsi_device *scsidev = cmd->device;
int rc = 0;
unsigned long irq_flags;
2006-04-12 01:12:34 +08:00
ap = ata_shost_to_port(shost);
spin_lock_irqsave(ap->lock, irq_flags);
ata_scsi_dump_cdb(ap, cmd);
dev = ata_scsi_find_dev(ap, scsidev);
if (likely(dev))
rc = __ata_scsi_queuecmd(cmd, dev);
else {
cmd->result = (DID_BAD_TARGET << 16);
cmd->scsi_done(cmd);
}
spin_unlock_irqrestore(ap->lock, irq_flags);
return rc;
}
EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
/**
* ata_scsi_simulate - simulate SCSI command on ATA device
* @dev: the target device
* @cmd: SCSI command being sent to device.
*
* Interprets and directly executes a select list of SCSI commands
* that can be handled internally.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
{
struct ata_scsi_args args;
const u8 *scsicmd = cmd->cmnd;
u8 tmp8;
args.dev = dev;
args.id = dev->id;
args.cmd = cmd;
switch(scsicmd[0]) {
case INQUIRY:
if (scsicmd[1] & 2) /* is CmdDt set? */
ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
else if ((scsicmd[1] & 1) == 0) /* is EVPD clear? */
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_std);
else switch (scsicmd[2]) {
case 0x00:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_00);
break;
case 0x80:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_80);
break;
case 0x83:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_83);
break;
case 0x89:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_89);
break;
case 0xb0:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b0);
break;
case 0xb1:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1);
break;
case 0xb2:
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2);
break;
case 0xb6:
if (dev->flags & ATA_DFLAG_ZAC) {
ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b6);
break;
}
/* Fallthrough */
default:
ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
break;
}
break;
case MODE_SENSE:
case MODE_SENSE_10:
ata_scsi_rbuf_fill(&args, ata_scsiop_mode_sense);
break;
case READ_CAPACITY:
ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
break;
case SERVICE_ACTION_IN_16:
if ((scsicmd[1] & 0x1f) == SAI_READ_CAPACITY_16)
ata_scsi_rbuf_fill(&args, ata_scsiop_read_cap);
else
ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
break;
case REPORT_LUNS:
ata_scsi_rbuf_fill(&args, ata_scsiop_report_luns);
break;
case REQUEST_SENSE:
ata_scsi_set_sense(dev, cmd, 0, 0, 0);
cmd->result = (DRIVER_SENSE << 24);
break;
/* if we reach this, then writeback caching is disabled,
* turning this into a no-op.
*/
case SYNCHRONIZE_CACHE:
/* fall through */
/* no-op's, complete with success */
case REZERO_UNIT:
case SEEK_6:
case SEEK_10:
case TEST_UNIT_READY:
break;
case SEND_DIAGNOSTIC:
tmp8 = scsicmd[1] & ~(1 << 3);
if (tmp8 != 0x4 || scsicmd[3] || scsicmd[4])
ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
break;
case MAINTENANCE_IN:
if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES)
ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in);
else
ata_scsi_set_invalid_field(dev, cmd, 1, 0xff);
break;
/* all other commands */
default:
ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0);
/* "Invalid command operation code" */
break;
}
cmd->scsi_done(cmd);
}
int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
{
int i, rc;
for (i = 0; i < host->n_ports; i++) {
struct ata_port *ap = host->ports[i];
struct Scsi_Host *shost;
rc = -ENOMEM;
shost = scsi_host_alloc(sht, sizeof(struct ata_port *));
if (!shost)
goto err_alloc;
shost->eh_noresume = 1;
*(struct ata_port **)&shost->hostdata[0] = ap;
ap->scsi_host = shost;
shost->transportt = ata_scsi_transport_template;
shost->unique_id = ap->print_id;
shost->max_id = 16;
shost->max_lun = 1;
shost->max_channel = 1;
shost->max_cmd_len = 32;
/* Schedule policy is determined by ->qc_defer()
* callback and it needs to see every deferred qc.
* Set host_blocked to 1 to prevent SCSI midlayer from
* automatically deferring requests.
*/
shost->max_host_blocked = 1;
libata: Remove extra scsi_host_put() in ata_scsi_add_hosts() If the call to scsi_add_host_with_dma() in ata_scsi_add_hosts() fails, then we may get use-after-free KASAN warns: ================================================================== BUG: KASAN: use-after-free in kobject_put+0x24/0x180 Read of size 1 at addr ffff0026b8c80364 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 5.6.0-rc3-00004-g5a71b206ea82-dirty #1765 Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDD, BIOS 2280-V2 CS V3.B160.01 02/24/2020 Call trace: dump_backtrace+0x0/0x298 show_stack+0x14/0x20 dump_stack+0x118/0x190 print_address_description.isra.9+0x6c/0x3b8 __kasan_report+0x134/0x23c kasan_report+0xc/0x18 __asan_load1+0x5c/0x68 kobject_put+0x24/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_devres_release+0x74/0xb0 release_nodes+0x2d0/0x470 devres_release_all+0x50/0x78 really_probe+0x2d4/0x560 driver_probe_device+0x7c/0x148 device_driver_attach+0x94/0xa0 __driver_attach+0xa8/0x110 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x220/0x2e0 driver_register+0xbc/0x1d0 __pci_register_driver+0xbc/0xd0 ahci_pci_driver_init+0x20/0x28 do_one_initcall+0xf0/0x608 kernel_init_freeable+0x31c/0x384 kernel_init+0x10/0x118 ret_from_fork+0x10/0x18 Allocated by task 5: save_stack+0x28/0xc8 __kasan_kmalloc.isra.8+0xbc/0xd8 kasan_kmalloc+0xc/0x18 __kmalloc+0x1a8/0x280 scsi_host_alloc+0x44/0x678 ata_scsi_add_hosts+0x74/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 Freed by task 5: save_stack+0x28/0xc8 __kasan_slab_free+0x118/0x180 kasan_slab_free+0x10/0x18 slab_free_freelist_hook+0xa4/0x1a0 kfree+0xd4/0x3a0 scsi_host_dev_release+0x100/0x148 device_release+0x7c/0xe0 kobject_put+0xb0/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_scsi_add_hosts+0x210/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 There is also refcount issue, as well: WARNING: CPU: 1 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0xf8/0x170 The issue is that we make an erroneous extra call to scsi_host_put() for that host: So in ahci_init_one()->ata_host_alloc_pinfo()->ata_host_alloc(), we setup a device release method - ata_devres_release() - which intends to release the SCSI hosts: static void ata_devres_release(struct device *gendev, void *res) { ... for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (!ap) continue; if (ap->scsi_host) scsi_host_put(ap->scsi_host); } ... } However in the ata_scsi_add_hosts() error path, we also call scsi_host_put() for the SCSI hosts. Fix by removing the the scsi_host_put() calls in ata_scsi_add_hosts() and leave this to ata_devres_release(). Fixes: f31871951b38 ("libata: separate out ata_host_alloc() and ata_host_register()") Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-02-28 19:33:35 +08:00
rc = scsi_add_host_with_dma(shost, &ap->tdev, ap->host->dev);
if (rc)
libata: Remove extra scsi_host_put() in ata_scsi_add_hosts() If the call to scsi_add_host_with_dma() in ata_scsi_add_hosts() fails, then we may get use-after-free KASAN warns: ================================================================== BUG: KASAN: use-after-free in kobject_put+0x24/0x180 Read of size 1 at addr ffff0026b8c80364 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 5.6.0-rc3-00004-g5a71b206ea82-dirty #1765 Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDD, BIOS 2280-V2 CS V3.B160.01 02/24/2020 Call trace: dump_backtrace+0x0/0x298 show_stack+0x14/0x20 dump_stack+0x118/0x190 print_address_description.isra.9+0x6c/0x3b8 __kasan_report+0x134/0x23c kasan_report+0xc/0x18 __asan_load1+0x5c/0x68 kobject_put+0x24/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_devres_release+0x74/0xb0 release_nodes+0x2d0/0x470 devres_release_all+0x50/0x78 really_probe+0x2d4/0x560 driver_probe_device+0x7c/0x148 device_driver_attach+0x94/0xa0 __driver_attach+0xa8/0x110 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x220/0x2e0 driver_register+0xbc/0x1d0 __pci_register_driver+0xbc/0xd0 ahci_pci_driver_init+0x20/0x28 do_one_initcall+0xf0/0x608 kernel_init_freeable+0x31c/0x384 kernel_init+0x10/0x118 ret_from_fork+0x10/0x18 Allocated by task 5: save_stack+0x28/0xc8 __kasan_kmalloc.isra.8+0xbc/0xd8 kasan_kmalloc+0xc/0x18 __kmalloc+0x1a8/0x280 scsi_host_alloc+0x44/0x678 ata_scsi_add_hosts+0x74/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 Freed by task 5: save_stack+0x28/0xc8 __kasan_slab_free+0x118/0x180 kasan_slab_free+0x10/0x18 slab_free_freelist_hook+0xa4/0x1a0 kfree+0xd4/0x3a0 scsi_host_dev_release+0x100/0x148 device_release+0x7c/0xe0 kobject_put+0xb0/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_scsi_add_hosts+0x210/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 There is also refcount issue, as well: WARNING: CPU: 1 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0xf8/0x170 The issue is that we make an erroneous extra call to scsi_host_put() for that host: So in ahci_init_one()->ata_host_alloc_pinfo()->ata_host_alloc(), we setup a device release method - ata_devres_release() - which intends to release the SCSI hosts: static void ata_devres_release(struct device *gendev, void *res) { ... for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (!ap) continue; if (ap->scsi_host) scsi_host_put(ap->scsi_host); } ... } However in the ata_scsi_add_hosts() error path, we also call scsi_host_put() for the SCSI hosts. Fix by removing the the scsi_host_put() calls in ata_scsi_add_hosts() and leave this to ata_devres_release(). Fixes: f31871951b38 ("libata: separate out ata_host_alloc() and ata_host_register()") Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-02-28 19:33:35 +08:00
goto err_alloc;
}
return 0;
err_alloc:
while (--i >= 0) {
struct Scsi_Host *shost = host->ports[i]->scsi_host;
libata: Remove extra scsi_host_put() in ata_scsi_add_hosts() If the call to scsi_add_host_with_dma() in ata_scsi_add_hosts() fails, then we may get use-after-free KASAN warns: ================================================================== BUG: KASAN: use-after-free in kobject_put+0x24/0x180 Read of size 1 at addr ffff0026b8c80364 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 5.6.0-rc3-00004-g5a71b206ea82-dirty #1765 Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDD, BIOS 2280-V2 CS V3.B160.01 02/24/2020 Call trace: dump_backtrace+0x0/0x298 show_stack+0x14/0x20 dump_stack+0x118/0x190 print_address_description.isra.9+0x6c/0x3b8 __kasan_report+0x134/0x23c kasan_report+0xc/0x18 __asan_load1+0x5c/0x68 kobject_put+0x24/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_devres_release+0x74/0xb0 release_nodes+0x2d0/0x470 devres_release_all+0x50/0x78 really_probe+0x2d4/0x560 driver_probe_device+0x7c/0x148 device_driver_attach+0x94/0xa0 __driver_attach+0xa8/0x110 bus_for_each_dev+0xe8/0x158 driver_attach+0x30/0x40 bus_add_driver+0x220/0x2e0 driver_register+0xbc/0x1d0 __pci_register_driver+0xbc/0xd0 ahci_pci_driver_init+0x20/0x28 do_one_initcall+0xf0/0x608 kernel_init_freeable+0x31c/0x384 kernel_init+0x10/0x118 ret_from_fork+0x10/0x18 Allocated by task 5: save_stack+0x28/0xc8 __kasan_kmalloc.isra.8+0xbc/0xd8 kasan_kmalloc+0xc/0x18 __kmalloc+0x1a8/0x280 scsi_host_alloc+0x44/0x678 ata_scsi_add_hosts+0x74/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 Freed by task 5: save_stack+0x28/0xc8 __kasan_slab_free+0x118/0x180 kasan_slab_free+0x10/0x18 slab_free_freelist_hook+0xa4/0x1a0 kfree+0xd4/0x3a0 scsi_host_dev_release+0x100/0x148 device_release+0x7c/0xe0 kobject_put+0xb0/0x180 put_device+0x10/0x20 scsi_host_put+0x10/0x18 ata_scsi_add_hosts+0x210/0x268 ata_host_register+0x228/0x488 ahci_host_activate+0x1c4/0x2a8 ahci_init_one+0xd18/0x1298 local_pci_probe+0x74/0xf0 work_for_cpu_fn+0x2c/0x48 process_one_work+0x488/0xc08 worker_thread+0x330/0x5d0 kthread+0x1c8/0x1d0 ret_from_fork+0x10/0x18 There is also refcount issue, as well: WARNING: CPU: 1 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0xf8/0x170 The issue is that we make an erroneous extra call to scsi_host_put() for that host: So in ahci_init_one()->ata_host_alloc_pinfo()->ata_host_alloc(), we setup a device release method - ata_devres_release() - which intends to release the SCSI hosts: static void ata_devres_release(struct device *gendev, void *res) { ... for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; if (!ap) continue; if (ap->scsi_host) scsi_host_put(ap->scsi_host); } ... } However in the ata_scsi_add_hosts() error path, we also call scsi_host_put() for the SCSI hosts. Fix by removing the the scsi_host_put() calls in ata_scsi_add_hosts() and leave this to ata_devres_release(). Fixes: f31871951b38 ("libata: separate out ata_host_alloc() and ata_host_register()") Signed-off-by: John Garry <john.garry@huawei.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-02-28 19:33:35 +08:00
/* scsi_host_put() is in ata_devres_release() */
scsi_remove_host(shost);
}
return rc;
}
#ifdef CONFIG_OF
static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
{
struct scsi_device *sdev = dev->sdev;
struct device *d = ap->host->dev;
struct device_node *np = d->of_node;
struct device_node *child;
for_each_available_child_of_node(np, child) {
int ret;
u32 val;
ret = of_property_read_u32(child, "reg", &val);
if (ret)
continue;
if (val == dev->devno) {
dev_dbg(d, "found matching device node\n");
sdev->sdev_gendev.of_node = child;
return;
}
}
}
#else
static void ata_scsi_assign_ofnode(struct ata_device *dev, struct ata_port *ap)
{
}
#endif
void ata_scsi_scan_host(struct ata_port *ap, int sync)
{
int tries = 5;
struct ata_device *last_failed_dev = NULL;
struct ata_link *link;
struct ata_device *dev;
repeat:
ata_for_each_link(link, ap, EDGE) {
ata_for_each_dev(dev, link, ENABLED) {
struct scsi_device *sdev;
int channel = 0, id = 0;
if (dev->sdev)
continue;
if (ata_is_host_link(link))
id = dev->devno;
else
channel = link->pmp;
sdev = __scsi_add_device(ap->scsi_host, channel, id, 0,
NULL);
if (!IS_ERR(sdev)) {
dev->sdev = sdev;
ata_scsi_assign_ofnode(dev, ap);
scsi_device_put(sdev);
} else {
dev->sdev = NULL;
}
}
}
/* If we scanned while EH was in progress or allocation
* failure occurred, scan would have failed silently. Check
* whether all devices are attached.
*/
ata_for_each_link(link, ap, EDGE) {
ata_for_each_dev(dev, link, ENABLED) {
if (!dev->sdev)
goto exit_loop;
}
}
exit_loop:
if (!link)
return;
/* we're missing some SCSI devices */
if (sync) {
/* If caller requested synchrnous scan && we've made
* any progress, sleep briefly and repeat.
*/
if (dev != last_failed_dev) {
msleep(100);
last_failed_dev = dev;
goto repeat;
}
/* We might be failing to detect boot device, give it
* a few more chances.
*/
if (--tries) {
msleep(100);
goto repeat;
}
ata_port_err(ap,
"WARNING: synchronous SCSI scan failed without making any progress, switching to async\n");
}
queue_delayed_work(system_long_wq, &ap->hotplug_task,
round_jiffies_relative(HZ));
}
/**
* ata_scsi_offline_dev - offline attached SCSI device
* @dev: ATA device to offline attached SCSI device for
*
* This function is called from ata_eh_hotplug() and responsible
* for taking the SCSI device attached to @dev offline. This
* function is called with host lock which protects dev->sdev
* against clearing.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*
* RETURNS:
* 1 if attached SCSI device exists, 0 otherwise.
*/
int ata_scsi_offline_dev(struct ata_device *dev)
{
if (dev->sdev) {
scsi_device_set_state(dev->sdev, SDEV_OFFLINE);
return 1;
}
return 0;
}
/**
* ata_scsi_remove_dev - remove attached SCSI device
* @dev: ATA device to remove attached SCSI device for
*
* This function is called from ata_eh_scsi_hotplug() and
* responsible for removing the SCSI device attached to @dev.
*
* LOCKING:
* Kernel thread context (may sleep).
*/
static void ata_scsi_remove_dev(struct ata_device *dev)
{
struct ata_port *ap = dev->link->ap;
struct scsi_device *sdev;
unsigned long flags;
/* Alas, we need to grab scan_mutex to ensure SCSI device
* state doesn't change underneath us and thus
* scsi_device_get() always succeeds. The mutex locking can
* be removed if there is __scsi_device_get() interface which
* increments reference counts regardless of device state.
*/
mutex_lock(&ap->scsi_host->scan_mutex);
spin_lock_irqsave(ap->lock, flags);
/* clearing dev->sdev is protected by host lock */
sdev = dev->sdev;
dev->sdev = NULL;
if (sdev) {
/* If user initiated unplug races with us, sdev can go
* away underneath us after the host lock and
* scan_mutex are released. Hold onto it.
*/
if (scsi_device_get(sdev) == 0) {
/* The following ensures the attached sdev is
* offline on return from ata_scsi_offline_dev()
* regardless it wins or loses the race
* against this function.
*/
scsi_device_set_state(sdev, SDEV_OFFLINE);
} else {
WARN_ON(1);
sdev = NULL;
}
}
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_host->scan_mutex);
if (sdev) {
ata_dev_info(dev, "detaching (SCSI %s)\n",
dev_name(&sdev->sdev_gendev));
scsi_remove_device(sdev);
scsi_device_put(sdev);
}
}
static void ata_scsi_handle_link_detach(struct ata_link *link)
{
struct ata_port *ap = link->ap;
struct ata_device *dev;
ata_for_each_dev(dev, link, ALL) {
unsigned long flags;
if (!(dev->flags & ATA_DFLAG_DETACHED))
continue;
spin_lock_irqsave(ap->lock, flags);
dev->flags &= ~ATA_DFLAG_DETACHED;
spin_unlock_irqrestore(ap->lock, flags);
if (zpodd_dev_enabled(dev))
zpodd_exit(dev);
ata_scsi_remove_dev(dev);
}
}
/**
* ata_scsi_media_change_notify - send media change event
* @dev: Pointer to the disk device with media change event
*
* Tell the block layer to send a media change notification
* event.
*
* LOCKING:
* spin_lock_irqsave(host lock)
*/
void ata_scsi_media_change_notify(struct ata_device *dev)
{
if (dev->sdev)
sdev_evt_send_simple(dev->sdev, SDEV_EVT_MEDIA_CHANGE,
GFP_ATOMIC);
}
/**
* ata_scsi_hotplug - SCSI part of hotplug
2006-11-22 22:55:48 +08:00
* @work: Pointer to ATA port to perform SCSI hotplug on
*
* Perform SCSI part of hotplug. It's executed from a separate
* workqueue after EH completes. This is necessary because SCSI
* hot plugging requires working EH and hot unplugging is
* synchronized with hot plugging with a mutex.
*
* LOCKING:
* Kernel thread context (may sleep).
*/
2006-11-22 22:55:48 +08:00
void ata_scsi_hotplug(struct work_struct *work)
{
2006-11-22 22:55:48 +08:00
struct ata_port *ap =
container_of(work, struct ata_port, hotplug_task.work);
int i;
if (ap->pflags & ATA_PFLAG_UNLOADING) {
DPRINTK("ENTER/EXIT - unloading\n");
return;
}
DPRINTK("ENTER\n");
mutex_lock(&ap->scsi_scan_mutex);
/* Unplug detached devices. We cannot use link iterator here
* because PMP links have to be scanned even if PMP is
* currently not attached. Iterate manually.
*/
ata_scsi_handle_link_detach(&ap->link);
if (ap->pmp_link)
for (i = 0; i < SATA_PMP_MAX_PORTS; i++)
ata_scsi_handle_link_detach(&ap->pmp_link[i]);
/* scan for new ones */
ata_scsi_scan_host(ap, 0);
mutex_unlock(&ap->scsi_scan_mutex);
DPRINTK("EXIT\n");
}
/**
* ata_scsi_user_scan - indication for user-initiated bus scan
* @shost: SCSI host to scan
* @channel: Channel to scan
* @id: ID to scan
* @lun: LUN to scan
*
* This function is called when user explicitly requests bus
* scan. Set probe pending flag and invoke EH.
*
* LOCKING:
* SCSI layer (we don't care)
*
* RETURNS:
* Zero.
*/
int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
unsigned int id, u64 lun)
{
struct ata_port *ap = ata_shost_to_port(shost);
unsigned long flags;
int devno, rc = 0;
if (!ap->ops->error_handler)
return -EOPNOTSUPP;
if (lun != SCAN_WILD_CARD && lun)
return -EINVAL;
if (!sata_pmp_attached(ap)) {
if (channel != SCAN_WILD_CARD && channel)
return -EINVAL;
devno = id;
} else {
if (id != SCAN_WILD_CARD && id)
return -EINVAL;
devno = channel;
}
spin_lock_irqsave(ap->lock, flags);
if (devno == SCAN_WILD_CARD) {
struct ata_link *link;
ata_for_each_link(link, ap, EDGE) {
struct ata_eh_info *ehi = &link->eh_info;
ehi->probe_mask |= ATA_ALL_DEVICES;
libata: prefer hardreset When both soft and hard resets are available, libata preferred softreset till now. The logic behind it was to be softer to devices; however, this doesn't really help much. Rationales for the change: * BIOS may freeze lock certain things during boot and softreset can't unlock those. This by itself is okay but during operation PHY event or other error conditions can trigger hardreset and the device may end up with different configuration. For example, after a hardreset, previously unlockable HPA can be unlocked resulting in different device size and thus revalidation failure. Similar condition can occur during or after resume. * Certain ATAPI devices require hardreset to recover after certain error conditions. On PATA, this is done by issuing the DEVICE RESET command. On SATA, COMRESET has equivalent effect. The problem is that DEVICE RESET needs its own execution protocol. For SFF controllers with bare TF access, it can be easily implemented but more advanced controllers (e.g. ahci and sata_sil24) require specialized implementations. Simply using hardreset solves the problem nicely. * COMRESET initialization sequence is the norm in SATA land and many SATA devices don't work properly if only SRST is used. For example, some PMPs behave this way and libata works around by always issuing hardreset if the host supports PMP. Like the above example, libata has developed a number of mechanisms aiming to promote softreset to hardreset if softreset is not going to work. This approach is time consuming and error prone. Also, note that, dependingon how you read the specs, it could be argued that PMP fan-out ports require COMRESET to start operation. In fact, all the PMPs on the market except one don't work properly if COMRESET is not issued to fan-out ports after PMP reset. * COMRESET is an integral part of SATA connection and any working device should be able to handle COMRESET properly. After all, it's the way to signal hardreset during reboot. This is the most used and recommended (at least by the ahci spec) method of resetting devices. So, this patch makes libata prefer hardreset over softreset by making the following changes. * Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is now only used to tell prereset whether soft or hard reset will be issued. * Strip out now unneeded promote-to-hardreset logics from ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and other places. Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
ehi->action |= ATA_EH_RESET;
}
} else {
struct ata_device *dev = ata_find_dev(ap, devno);
if (dev) {
struct ata_eh_info *ehi = &dev->link->eh_info;
ehi->probe_mask |= 1 << dev->devno;
libata: prefer hardreset When both soft and hard resets are available, libata preferred softreset till now. The logic behind it was to be softer to devices; however, this doesn't really help much. Rationales for the change: * BIOS may freeze lock certain things during boot and softreset can't unlock those. This by itself is okay but during operation PHY event or other error conditions can trigger hardreset and the device may end up with different configuration. For example, after a hardreset, previously unlockable HPA can be unlocked resulting in different device size and thus revalidation failure. Similar condition can occur during or after resume. * Certain ATAPI devices require hardreset to recover after certain error conditions. On PATA, this is done by issuing the DEVICE RESET command. On SATA, COMRESET has equivalent effect. The problem is that DEVICE RESET needs its own execution protocol. For SFF controllers with bare TF access, it can be easily implemented but more advanced controllers (e.g. ahci and sata_sil24) require specialized implementations. Simply using hardreset solves the problem nicely. * COMRESET initialization sequence is the norm in SATA land and many SATA devices don't work properly if only SRST is used. For example, some PMPs behave this way and libata works around by always issuing hardreset if the host supports PMP. Like the above example, libata has developed a number of mechanisms aiming to promote softreset to hardreset if softreset is not going to work. This approach is time consuming and error prone. Also, note that, dependingon how you read the specs, it could be argued that PMP fan-out ports require COMRESET to start operation. In fact, all the PMPs on the market except one don't work properly if COMRESET is not issued to fan-out ports after PMP reset. * COMRESET is an integral part of SATA connection and any working device should be able to handle COMRESET properly. After all, it's the way to signal hardreset during reboot. This is the most used and recommended (at least by the ahci spec) method of resetting devices. So, this patch makes libata prefer hardreset over softreset by making the following changes. * Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is now only used to tell prereset whether soft or hard reset will be issued. * Strip out now unneeded promote-to-hardreset logics from ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and other places. Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
ehi->action |= ATA_EH_RESET;
} else
rc = -EINVAL;
}
if (rc == 0) {
ata_port_schedule_eh(ap);
spin_unlock_irqrestore(ap->lock, flags);
ata_port_wait_eh(ap);
} else
spin_unlock_irqrestore(ap->lock, flags);
return rc;
}
/**
* ata_scsi_dev_rescan - initiate scsi_rescan_device()
2006-11-22 22:55:48 +08:00
* @work: Pointer to ATA port to perform scsi_rescan_device()
*
* After ATA pass thru (SAT) commands are executed successfully,
* libata need to propagate the changes to SCSI layer.
*
* LOCKING:
* Kernel thread context (may sleep).
*/
2006-11-22 22:55:48 +08:00
void ata_scsi_dev_rescan(struct work_struct *work)
{
2006-11-22 22:55:48 +08:00
struct ata_port *ap =
container_of(work, struct ata_port, scsi_rescan_task);
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
mutex_lock(&ap->scsi_scan_mutex);
spin_lock_irqsave(ap->lock, flags);
ata_for_each_link(link, ap, EDGE) {
ata_for_each_dev(dev, link, ENABLED) {
struct scsi_device *sdev = dev->sdev;
if (!sdev)
continue;
if (scsi_device_get(sdev))
continue;
spin_unlock_irqrestore(ap->lock, flags);
scsi_rescan_device(&(sdev->sdev_gendev));
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
}
}
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_scan_mutex);
}