for-4.17/block-20180402
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCAAGBQJawr05AAoJEPfTWPspceCmT2UP/1uuaqwzyl4VjFNb/k7KS7UM +Cs/1HBlGomgMA8orDTGqtWqLRdR3z4RSh0+MvXTzQ78HpFVYz7CbDc9itHm+G9M X0ypD4kF/JGCFb5cxk+x6qv28uO2nv4DP3+0hHqJWLH4UVJBWDY6bs4BPShsf9QB I6XjioNMhoqylXgdOITLODJZz+TcChlJMDAqwhpJwh9TH1wjobleAZ6AdmCPfgi5 h0UCKMUKzcVJlNZwQUrzrs2cxcx9Uhunnbz7HK0ZV4n/FKFtDpGynFpQQ71pZxKe Be0ZOBPCQvC3ykOM/egCIvC/e5y7FgrjORD6jxyu1PTwAugI5E1VYSMxHkXvgPAx zOo9A7RT4GPO2tDQv+DbzNFpqeSAclTgSmr+/y1wmheBs8DiSt7MPVBiNM4zdCNv NLk9z7IEjFhdmluSB/LbTb1aokypMb/q7QTLouPHdwGn80k7yrhFyLHgdjpNTQ2K UHfHZvGxkOX6SmFhBNOtIFUkuSceenh64a0RkRle7filx+ImpbCVm2/GYi9zZNCu EtctgzLbLmz40zMiyDaZS2bxBgGzfn6yf4xd9LsaAJPMhvZnmXogT0D9ctWXB0WU mMaS7sOkLnNjnGkzF1fHkeiZ/oigrstJbe+CA7BtOdwxpWn6MZBgKEoFQ6iA2b3X 5J1axMgVH5LAsIEcEQVq =RVhK -----END PGP SIGNATURE----- Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
This commit is contained in:
commit
3526dd0c78
|
@ -234,6 +234,7 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
|
|||
&int& (* open)(struct\ cdrom_device_info *, int)\cr
|
||||
&void& (* release)(struct\ cdrom_device_info *);\cr
|
||||
&int& (* drive_status)(struct\ cdrom_device_info *, int);\cr
|
||||
&unsigned\ int& (* check_events)(struct\ cdrom_device_info *, unsigned\ int, int);\cr
|
||||
&int& (* media_changed)(struct\ cdrom_device_info *, int);\cr
|
||||
&int& (* tray_move)(struct\ cdrom_device_info *, int);\cr
|
||||
&int& (* lock_door)(struct\ cdrom_device_info *, int);\cr
|
||||
|
@ -245,10 +246,9 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
|
|||
&int& (* reset)(struct\ cdrom_device_info *);\cr
|
||||
&int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
|
||||
void *{});\cr
|
||||
&int& (* dev_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
|
||||
unsigned\ long);\cr
|
||||
\noalign{\medskip}
|
||||
&const\ int& capability;& capability flags \cr
|
||||
&int& (* generic_packet)(struct\ cdrom_device_info *, struct\ packet_command *{});\cr
|
||||
\};\cr
|
||||
}
|
||||
$$
|
||||
|
@ -274,19 +274,32 @@ $$
|
|||
\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
|
||||
$/*$ \rm# $*/$\hfil\cr
|
||||
struct& cdrom_device_info\ \{ \hidewidth\cr
|
||||
& struct\ cdrom_device_ops *& ops;& device operations for this major\cr
|
||||
& struct\ cdrom_device_info *& next;& next device_info for this major\cr
|
||||
& const\ struct\ cdrom_device_ops *& ops;& device operations for this major\cr
|
||||
& struct\ list_head& list;& linked list of all device_info\cr
|
||||
& struct\ gendisk *& disk;& matching block layer disk\cr
|
||||
& void *& handle;& driver-dependent data\cr
|
||||
\noalign{\medskip}
|
||||
& kdev_t& dev;& device number (incorporates minor)\cr
|
||||
& int& mask;& mask of capability: disables them \cr
|
||||
& int& speed;& maximum speed for reading data \cr
|
||||
& int& capacity;& number of discs in a jukebox \cr
|
||||
\noalign{\medskip}
|
||||
&int& options : 30;& options flags \cr
|
||||
&unsigned\ int& options : 30;& options flags \cr
|
||||
&unsigned& mc_flags : 2;& media-change buffer flags \cr
|
||||
&unsigned\ int& vfs_events;& cached events for vfs path\cr
|
||||
&unsigned\ int& ioctl_events;& cached events for ioctl path\cr
|
||||
& int& use_count;& number of times device is opened\cr
|
||||
& char& name[20];& name of the device type\cr
|
||||
\noalign{\medskip}
|
||||
&__u8& sanyo_slot : 2;& Sanyo 3-CD changer support\cr
|
||||
&__u8& keeplocked : 1;& CDROM_LOCKDOOR status\cr
|
||||
&__u8& reserved : 5;& not used yet\cr
|
||||
& int& cdda_method;& see CDDA_* flags\cr
|
||||
&__u8& last_sense;& saves last sense key\cr
|
||||
&__u8& media_written;& dirty flag, DVD+RW bookkeeping\cr
|
||||
&unsigned\ short& mmc3_profile;& current MMC3 profile\cr
|
||||
& int& for_data;& unknown:TBD\cr
|
||||
& int\ (* exit)\ (struct\ cdrom_device_info *);&& unknown:TBD\cr
|
||||
& int& mrw_mode_page;& which MRW mode page is in use\cr
|
||||
\}\cr
|
||||
}$$
|
||||
Using this $struct$, a linked list of the registered minor devices is
|
||||
|
@ -298,9 +311,7 @@ The $mask$ flags can be used to mask out some of the capabilities listed
|
|||
in $ops\to capability$, if a specific drive doesn't support a feature
|
||||
of the driver. The value $speed$ specifies the maximum head-rate of the
|
||||
drive, measured in units of normal audio speed (176\,kB/sec raw data or
|
||||
150\,kB/sec file system data). The value $n_discs$ should reflect the
|
||||
number of discs the drive can hold simultaneously, if it is designed
|
||||
as a juke-box, or otherwise~1. The parameters are declared $const$
|
||||
150\,kB/sec file system data). The parameters are declared $const$
|
||||
because they describe properties of the drive, which don't change after
|
||||
registration.
|
||||
|
||||
|
@ -1002,7 +1013,7 @@ taken over the torch in maintaining \cdromc\ and integrating much
|
|||
\cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and
|
||||
Gerd Knorr, who were the first to implement this interface for SCSI
|
||||
and IDE-CD drivers and added many ideas for extension of the data
|
||||
structures relative to kernel~2.0. Further thanks to Heiko Ei{\sz}feldt,
|
||||
structures relative to kernel~2.0. Further thanks to Heiko Ei{\ss}feldt,
|
||||
Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew
|
||||
Kroll, the \linux\ \cdrom\ device driver developers who were kind
|
||||
enough to give suggestions and criticisms during the writing. Finally
|
||||
|
|
|
@ -36,6 +36,14 @@ o fail_function
|
|||
ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
|
||||
under /sys/kernel/debug/fail_function. No boot option supported.
|
||||
|
||||
o NVMe fault injection
|
||||
|
||||
inject NVMe status code and retry flag on devices permitted by setting
|
||||
debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default
|
||||
status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
|
||||
retry flag can be set via the debugfs.
|
||||
|
||||
|
||||
Configure fault-injection capabilities behavior
|
||||
-----------------------------------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
NVMe Fault Injection
|
||||
====================
|
||||
Linux's fault injection framework provides a systematic way to support
|
||||
error injection via debugfs in the /sys/kernel/debug directory. When
|
||||
enabled, the default NVME_SC_INVALID_OPCODE with no retry will be
|
||||
injected into the nvme_end_request. Users can change the default status
|
||||
code and no retry flag via the debugfs. The list of Generic Command
|
||||
Status can be found in include/linux/nvme.h
|
||||
|
||||
Following examples show how to inject an error into the nvme.
|
||||
|
||||
First, enable CONFIG_FAULT_INJECTION_DEBUG_FS kernel config,
|
||||
recompile the kernel. After booting up the kernel, do the
|
||||
following.
|
||||
|
||||
Example 1: Inject default status code with no retry
|
||||
---------------------------------------------------
|
||||
|
||||
mount /dev/nvme0n1 /mnt
|
||||
echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
|
||||
echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
|
||||
cp a.file /mnt
|
||||
|
||||
Expected Result:
|
||||
|
||||
cp: cannot stat ‘/mnt/a.file’: Input/output error
|
||||
|
||||
Message from dmesg:
|
||||
|
||||
FAULT_INJECTION: forcing a failure.
|
||||
name fault_inject, interval 1, probability 100, space 0, times 1
|
||||
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc8+ #2
|
||||
Hardware name: innotek GmbH VirtualBox/VirtualBox,
|
||||
BIOS VirtualBox 12/01/2006
|
||||
Call Trace:
|
||||
<IRQ>
|
||||
dump_stack+0x5c/0x7d
|
||||
should_fail+0x148/0x170
|
||||
nvme_should_fail+0x2f/0x50 [nvme_core]
|
||||
nvme_process_cq+0xe7/0x1d0 [nvme]
|
||||
nvme_irq+0x1e/0x40 [nvme]
|
||||
__handle_irq_event_percpu+0x3a/0x190
|
||||
handle_irq_event_percpu+0x30/0x70
|
||||
handle_irq_event+0x36/0x60
|
||||
handle_fasteoi_irq+0x78/0x120
|
||||
handle_irq+0xa7/0x130
|
||||
? tick_irq_enter+0xa8/0xc0
|
||||
do_IRQ+0x43/0xc0
|
||||
common_interrupt+0xa2/0xa2
|
||||
</IRQ>
|
||||
RIP: 0010:native_safe_halt+0x2/0x10
|
||||
RSP: 0018:ffffffff82003e90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffdd
|
||||
RAX: ffffffff817a10c0 RBX: ffffffff82012480 RCX: 0000000000000000
|
||||
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
|
||||
RBP: 0000000000000000 R08: 000000008e38ce64 R09: 0000000000000000
|
||||
R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff82012480
|
||||
R13: ffffffff82012480 R14: 0000000000000000 R15: 0000000000000000
|
||||
? __sched_text_end+0x4/0x4
|
||||
default_idle+0x18/0xf0
|
||||
do_idle+0x150/0x1d0
|
||||
cpu_startup_entry+0x6f/0x80
|
||||
start_kernel+0x4c4/0x4e4
|
||||
? set_init_arg+0x55/0x55
|
||||
secondary_startup_64+0xa5/0xb0
|
||||
print_req_error: I/O error, dev nvme0n1, sector 9240
|
||||
EXT4-fs error (device nvme0n1): ext4_find_entry:1436:
|
||||
inode #2: comm cp: reading directory lblock 0
|
||||
|
||||
Example 2: Inject default status code with retry
|
||||
------------------------------------------------
|
||||
|
||||
mount /dev/nvme0n1 /mnt
|
||||
echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/times
|
||||
echo 100 > /sys/kernel/debug/nvme0n1/fault_inject/probability
|
||||
echo 1 > /sys/kernel/debug/nvme0n1/fault_inject/status
|
||||
echo 0 > /sys/kernel/debug/nvme0n1/fault_inject/dont_retry
|
||||
|
||||
cp a.file /mnt
|
||||
|
||||
Expected Result:
|
||||
|
||||
command success without error
|
||||
|
||||
Message from dmesg:
|
||||
|
||||
FAULT_INJECTION: forcing a failure.
|
||||
name fault_inject, interval 1, probability 100, space 0, times 1
|
||||
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc8+ #4
|
||||
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
|
||||
Call Trace:
|
||||
<IRQ>
|
||||
dump_stack+0x5c/0x7d
|
||||
should_fail+0x148/0x170
|
||||
nvme_should_fail+0x30/0x60 [nvme_core]
|
||||
nvme_loop_queue_response+0x84/0x110 [nvme_loop]
|
||||
nvmet_req_complete+0x11/0x40 [nvmet]
|
||||
nvmet_bio_done+0x28/0x40 [nvmet]
|
||||
blk_update_request+0xb0/0x310
|
||||
blk_mq_end_request+0x18/0x60
|
||||
flush_smp_call_function_queue+0x3d/0xf0
|
||||
smp_call_function_single_interrupt+0x2c/0xc0
|
||||
call_function_single_interrupt+0xa2/0xb0
|
||||
</IRQ>
|
||||
RIP: 0010:native_safe_halt+0x2/0x10
|
||||
RSP: 0018:ffffc9000068bec0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff04
|
||||
RAX: ffffffff817a10c0 RBX: ffff88011a3c9680 RCX: 0000000000000000
|
||||
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
|
||||
RBP: 0000000000000001 R08: 000000008e38c131 R09: 0000000000000000
|
||||
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011a3c9680
|
||||
R13: ffff88011a3c9680 R14: 0000000000000000 R15: 0000000000000000
|
||||
? __sched_text_end+0x4/0x4
|
||||
default_idle+0x18/0xf0
|
||||
do_idle+0x150/0x1d0
|
||||
cpu_startup_entry+0x6f/0x80
|
||||
start_secondary+0x187/0x1e0
|
||||
secondary_startup_64+0xa5/0xb0
|
|
@ -2646,6 +2646,7 @@ L: linux-block@vger.kernel.org
|
|||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
|
||||
S: Maintained
|
||||
F: block/
|
||||
F: drivers/block/
|
||||
F: kernel/trace/blktrace.c
|
||||
F: lib/sbitmap.c
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include <platform/simcall.h>
|
||||
|
||||
#define SIMDISK_MAJOR 240
|
||||
#define SECTOR_SHIFT 9
|
||||
#define SIMDISK_MINORS 1
|
||||
#define MAX_SIMDISK_COUNT 10
|
||||
|
||||
|
|
|
@ -201,7 +201,20 @@ static struct kmem_cache *bfq_pool;
|
|||
/* Target observation time interval for a peak-rate update (ns) */
|
||||
#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC
|
||||
|
||||
/* Shift used for peak rate fixed precision calculations. */
|
||||
/*
|
||||
* Shift used for peak-rate fixed precision calculations.
|
||||
* With
|
||||
* - the current shift: 16 positions
|
||||
* - the current type used to store rate: u32
|
||||
* - the current unit of measure for rate: [sectors/usec], or, more precisely,
|
||||
* [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
|
||||
* the range of rates that can be stored is
|
||||
* [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
|
||||
* [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
|
||||
* [15, 65G] sectors/sec
|
||||
* Which, assuming a sector size of 512B, corresponds to a range of
|
||||
* [7.5K, 33T] B/sec
|
||||
*/
|
||||
#define BFQ_RATE_SHIFT 16
|
||||
|
||||
/*
|
||||
|
@ -2637,6 +2650,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
|
|||
rate /= divisor; /* smoothing constant alpha = 1/divisor */
|
||||
|
||||
bfqd->peak_rate += rate;
|
||||
|
||||
/*
|
||||
* For a very slow device, bfqd->peak_rate can reach 0 (see
|
||||
* the minimum representable values reported in the comments
|
||||
* on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
|
||||
* divisions by zero where bfqd->peak_rate is used as a
|
||||
* divisor.
|
||||
*/
|
||||
bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
|
||||
|
||||
update_thr_responsiveness_params(bfqd);
|
||||
|
||||
reset_computation:
|
||||
|
|
|
@ -499,7 +499,7 @@ struct bfq_data {
|
|||
u64 delta_from_first;
|
||||
/*
|
||||
* Current estimate of the device peak rate, measured in
|
||||
* [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
|
||||
* [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
|
||||
* BFQ_RATE_SHIFT is performed to increase precision in
|
||||
* fixed-point calculations.
|
||||
*/
|
||||
|
|
|
@ -43,9 +43,9 @@
|
|||
* break badly! cannot be bigger than what you can fit into an
|
||||
* unsigned short
|
||||
*/
|
||||
#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
|
||||
#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
|
||||
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
|
||||
BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
|
||||
BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
|
||||
};
|
||||
#undef BV
|
||||
|
||||
|
|
|
@ -307,11 +307,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|||
}
|
||||
}
|
||||
|
||||
static void blkg_pd_offline(struct blkcg_gq *blkg)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(blkg->q->queue_lock);
|
||||
lockdep_assert_held(&blkg->blkcg->lock);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && !blkg->pd[i]->offline &&
|
||||
pol->pd_offline_fn) {
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
blkg->pd[i]->offline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(blkg->q->queue_lock);
|
||||
lockdep_assert_held(&blkcg->lock);
|
||||
|
@ -320,13 +337,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
|||
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
||||
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
|
||||
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
|
||||
|
@ -369,6 +379,7 @@ static void blkg_destroy_all(struct request_queue *q)
|
|||
struct blkcg *blkcg = blkg->blkcg;
|
||||
|
||||
spin_lock(&blkcg->lock);
|
||||
blkg_pd_offline(blkg);
|
||||
blkg_destroy(blkg);
|
||||
spin_unlock(&blkcg->lock);
|
||||
}
|
||||
|
@ -995,25 +1006,25 @@ static struct cftype blkcg_legacy_files[] = {
|
|||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away and responsible
|
||||
* for shooting down all blkgs associated with @css. blkgs should be
|
||||
* removed while holding both q and blkcg locks. As blkcg lock is nested
|
||||
* inside q lock, this function performs reverse double lock dancing.
|
||||
* for offlining all blkgs pd and killing all wbs associated with @css.
|
||||
* blkgs pd offline should be done while holding both q and blkcg locks.
|
||||
* As blkcg lock is nested inside q lock, this function performs reverse
|
||||
* double lock dancing.
|
||||
*
|
||||
* This is the blkcg counterpart of ioc_release_fn().
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
while (!hlist_empty(&blkcg->blkg_list)) {
|
||||
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
|
||||
struct blkcg_gq, blkcg_node);
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct request_queue *q = blkg->q;
|
||||
|
||||
if (spin_trylock(q->queue_lock)) {
|
||||
blkg_destroy(blkg);
|
||||
blkg_pd_offline(blkg);
|
||||
spin_unlock(q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
@ -1027,11 +1038,43 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
|||
wb_blkcg_offline(blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* This function is called when blkcg css is about to free and responsible for
|
||||
* destroying all blkgs associated with @blkcg.
|
||||
* blkgs should be removed while holding both q and blkcg locks. As blkcg lock
|
||||
* is nested inside q lock, this function performs reverse double lock dancing.
|
||||
*/
|
||||
static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
|
||||
{
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
while (!hlist_empty(&blkcg->blkg_list)) {
|
||||
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
|
||||
struct blkcg_gq,
|
||||
blkcg_node);
|
||||
struct request_queue *q = blkg->q;
|
||||
|
||||
if (spin_trylock(q->queue_lock)) {
|
||||
blkg_destroy(blkg);
|
||||
spin_unlock(q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
cpu_relax();
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
}
|
||||
|
||||
static void blkcg_css_free(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
int i;
|
||||
|
||||
blkcg_destroy_all_blkgs(blkcg);
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
list_del(&blkcg->all_blkcgs_node);
|
||||
|
@ -1371,8 +1414,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
|||
spin_lock(&blkg->blkcg->lock);
|
||||
|
||||
if (blkg->pd[pol->plid]) {
|
||||
if (pol->pd_offline_fn)
|
||||
if (!blkg->pd[pol->plid]->offline &&
|
||||
pol->pd_offline_fn) {
|
||||
pol->pd_offline_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid]->offline = true;
|
||||
}
|
||||
pol->pd_free_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid] = NULL;
|
||||
}
|
||||
|
|
250
block/blk-core.c
250
block/blk-core.c
|
@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
|
|||
*/
|
||||
static struct workqueue_struct *kblockd_workqueue;
|
||||
|
||||
/**
|
||||
* blk_queue_flag_set - atomically set a queue flag
|
||||
* @flag: flag to be set
|
||||
* @q: request queue
|
||||
*/
|
||||
void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_set(flag, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_flag_set);
|
||||
|
||||
/**
|
||||
* blk_queue_flag_clear - atomically clear a queue flag
|
||||
* @flag: flag to be cleared
|
||||
* @q: request queue
|
||||
*/
|
||||
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_clear(flag, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_flag_clear);
|
||||
|
||||
/**
|
||||
* blk_queue_flag_test_and_set - atomically test and set a queue flag
|
||||
* @flag: flag to be set
|
||||
* @q: request queue
|
||||
*
|
||||
* Returns the previous value of @flag - 0 if the flag was not set and 1 if
|
||||
* the flag was already set.
|
||||
*/
|
||||
bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool res;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
res = queue_flag_test_and_set(flag, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
|
||||
|
||||
/**
|
||||
* blk_queue_flag_test_and_clear - atomically test and clear a queue flag
|
||||
* @flag: flag to be cleared
|
||||
* @q: request queue
|
||||
*
|
||||
* Returns the previous value of @flag - 0 if the flag was not set and 1 if
|
||||
* the flag was set.
|
||||
*/
|
||||
bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool res;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
res = queue_flag_test_and_clear(flag, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
|
||||
|
||||
static void blk_clear_congested(struct request_list *rl, int sync)
|
||||
{
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
|
|||
*/
|
||||
int blk_set_preempt_only(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
int res;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
|
||||
return res;
|
||||
return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_set_preempt_only);
|
||||
|
||||
void blk_clear_preempt_only(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
|
||||
|
||||
|
@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
|
|||
|
||||
void blk_set_queue_dying(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
|
||||
|
||||
/*
|
||||
* When queue DYING flag is set, we need to block new req
|
||||
|
@ -719,6 +778,37 @@ void blk_cleanup_queue(struct request_queue *q)
|
|||
del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
|
||||
blk_sync_queue(q);
|
||||
|
||||
/*
|
||||
* I/O scheduler exit is only safe after the sysfs scheduler attribute
|
||||
* has been removed.
|
||||
*/
|
||||
WARN_ON_ONCE(q->kobj.state_in_sysfs);
|
||||
|
||||
/*
|
||||
* Since the I/O scheduler exit code may access cgroup information,
|
||||
* perform I/O scheduler exit before disassociating from the block
|
||||
* cgroup controller.
|
||||
*/
|
||||
if (q->elevator) {
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
q->elevator = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all references to @q from the block cgroup controller before
|
||||
* restoring @q->queue_lock to avoid that restoring this pointer causes
|
||||
* e.g. blkcg_print_blkgs() to crash.
|
||||
*/
|
||||
blkcg_exit_queue(q);
|
||||
|
||||
/*
|
||||
* Since the cgroup code may dereference the @q->backing_dev_info
|
||||
* pointer, only decrease its reference count after having removed the
|
||||
* association with the block cgroup controller.
|
||||
*/
|
||||
bdi_put(q->backing_dev_info);
|
||||
|
||||
if (q->mq_ops)
|
||||
blk_mq_free_queue(q);
|
||||
percpu_ref_exit(&q->q_usage_counter);
|
||||
|
@ -810,7 +900,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
|
|||
|
||||
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
|
||||
{
|
||||
return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
|
||||
return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_alloc_queue);
|
||||
|
||||
|
@ -827,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
|||
bool success = false;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock_sched();
|
||||
rcu_read_lock();
|
||||
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
|
||||
/*
|
||||
* The code that sets the PREEMPT_ONLY flag is
|
||||
|
@ -840,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
|||
percpu_ref_put(&q->q_usage_counter);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock_sched();
|
||||
rcu_read_unlock();
|
||||
|
||||
if (success)
|
||||
return 0;
|
||||
|
@ -888,7 +978,21 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
|
|||
kblockd_schedule_work(&q->timeout_work);
|
||||
}
|
||||
|
||||
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||
/**
|
||||
* blk_alloc_queue_node - allocate a request queue
|
||||
* @gfp_mask: memory allocation flags
|
||||
* @node_id: NUMA node to allocate memory from
|
||||
* @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
|
||||
* serialize calls to the legacy .request_fn() callback. Ignored for
|
||||
* blk-mq request queues.
|
||||
*
|
||||
* Note: pass the queue lock as the third argument to this function instead of
|
||||
* setting the queue lock pointer explicitly to avoid triggering a sporadic
|
||||
* crash in the blkcg code. This function namely calls blkcg_init_queue() and
|
||||
* the queue lock pointer must be set before blkcg_init_queue() is called.
|
||||
*/
|
||||
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
|
||||
spinlock_t *lock)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
||||
|
@ -939,11 +1043,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
|||
mutex_init(&q->sysfs_lock);
|
||||
spin_lock_init(&q->__queue_lock);
|
||||
|
||||
/*
|
||||
* By default initialize queue_lock to internal lock and driver can
|
||||
* override it later if need be.
|
||||
*/
|
||||
q->queue_lock = &q->__queue_lock;
|
||||
if (!q->mq_ops)
|
||||
q->queue_lock = lock ? : &q->__queue_lock;
|
||||
|
||||
/*
|
||||
* A queue starts its life with bypass turned on to avoid
|
||||
|
@ -952,7 +1053,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
|||
* registered by blk_register_queue().
|
||||
*/
|
||||
q->bypass_depth = 1;
|
||||
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
|
||||
|
||||
init_waitqueue_head(&q->mq_freeze_wq);
|
||||
|
||||
|
@ -1030,13 +1131,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
|
|||
{
|
||||
struct request_queue *q;
|
||||
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, node_id);
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
q->request_fn = rfn;
|
||||
if (lock)
|
||||
q->queue_lock = lock;
|
||||
if (blk_init_allocated_queue(q) < 0) {
|
||||
blk_cleanup_queue(q);
|
||||
return NULL;
|
||||
|
@ -2023,7 +2122,7 @@ out_unlock:
|
|||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static void handle_bad_sector(struct bio *bio)
|
||||
static void handle_bad_sector(struct bio *bio, sector_t maxsector)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
|
@ -2031,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
|
|||
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
|
||||
bio_devname(bio, b), bio->bi_opf,
|
||||
(unsigned long long)bio_end_sector(bio),
|
||||
(long long)get_capacity(bio->bi_disk));
|
||||
(long long)maxsector);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
|
@ -2092,68 +2191,59 @@ static noinline int should_fail_bio(struct bio *bio)
|
|||
}
|
||||
ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
|
||||
|
||||
/*
|
||||
* Check whether this bio extends beyond the end of the device or partition.
|
||||
* This may well happen - the kernel calls bread() without checking the size of
|
||||
* the device, e.g., when mounting a file system.
|
||||
*/
|
||||
static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
|
||||
{
|
||||
unsigned int nr_sectors = bio_sectors(bio);
|
||||
|
||||
if (nr_sectors && maxsector &&
|
||||
(nr_sectors > maxsector ||
|
||||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
|
||||
handle_bad_sector(bio, maxsector);
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remap block n of partition p to block n+start(p) of the disk.
|
||||
*/
|
||||
static inline int blk_partition_remap(struct bio *bio)
|
||||
{
|
||||
struct hd_struct *p;
|
||||
int ret = 0;
|
||||
int ret = -EIO;
|
||||
|
||||
rcu_read_lock();
|
||||
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
|
||||
if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
|
||||
bio_check_ro(bio, p))) {
|
||||
ret = -EIO;
|
||||
if (unlikely(!p))
|
||||
goto out;
|
||||
if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
|
||||
goto out;
|
||||
if (unlikely(bio_check_ro(bio, p)))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Zone reset does not include bi_size so bio_sectors() is always 0.
|
||||
* Include a test for the reset op code and perform the remap if needed.
|
||||
*/
|
||||
if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
|
||||
goto out;
|
||||
|
||||
bio->bi_iter.bi_sector += p->start_sect;
|
||||
bio->bi_partno = 0;
|
||||
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
|
||||
bio->bi_iter.bi_sector - p->start_sect);
|
||||
|
||||
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
|
||||
if (bio_check_eod(bio, part_nr_sects_read(p)))
|
||||
goto out;
|
||||
bio->bi_iter.bi_sector += p->start_sect;
|
||||
bio->bi_partno = 0;
|
||||
trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
|
||||
bio->bi_iter.bi_sector - p->start_sect);
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether this bio extends beyond the end of the device.
|
||||
*/
|
||||
static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
|
||||
{
|
||||
sector_t maxsector;
|
||||
|
||||
if (!nr_sectors)
|
||||
return 0;
|
||||
|
||||
/* Test device or partition size, when known. */
|
||||
maxsector = get_capacity(bio->bi_disk);
|
||||
if (maxsector) {
|
||||
sector_t sector = bio->bi_iter.bi_sector;
|
||||
|
||||
if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
|
||||
/*
|
||||
* This may well happen - the kernel calls bread()
|
||||
* without checking the size of the device, e.g., when
|
||||
* mounting a device.
|
||||
*/
|
||||
handle_bad_sector(bio);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline_for_stack bool
|
||||
generic_make_request_checks(struct bio *bio)
|
||||
{
|
||||
|
@ -2164,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
|
|||
|
||||
might_sleep();
|
||||
|
||||
if (bio_check_eod(bio, nr_sectors))
|
||||
goto end_io;
|
||||
|
||||
q = bio->bi_disk->queue;
|
||||
if (unlikely(!q)) {
|
||||
printk(KERN_ERR
|
||||
|
@ -2186,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
|
|||
if (should_fail_bio(bio))
|
||||
goto end_io;
|
||||
|
||||
if (!bio->bi_partno) {
|
||||
if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
|
||||
if (bio->bi_partno) {
|
||||
if (unlikely(blk_partition_remap(bio)))
|
||||
goto end_io;
|
||||
} else {
|
||||
if (blk_partition_remap(bio))
|
||||
if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
|
||||
goto end_io;
|
||||
if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
if (bio_check_eod(bio, nr_sectors))
|
||||
goto end_io;
|
||||
|
||||
/*
|
||||
* Filter flush bio's early so that make_request based
|
||||
* drivers without flush support don't have to worry
|
||||
|
|
|
@ -24,6 +24,64 @@
|
|||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
||||
{
|
||||
if (stat->nr_samples) {
|
||||
seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
|
||||
stat->nr_samples, stat->mean, stat->min, stat->max);
|
||||
} else {
|
||||
seq_puts(m, "samples=0");
|
||||
}
|
||||
}
|
||||
|
||||
static int queue_poll_stat_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
int bucket;
|
||||
|
||||
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
|
||||
seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
|
||||
print_stat(m, &q->poll_stat[2*bucket]);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
|
||||
print_stat(m, &q->poll_stat[2*bucket+1]);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(&q->requeue_lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
spin_lock_irq(&q->requeue_lock);
|
||||
return seq_list_start(&q->requeue_list, *pos);
|
||||
}
|
||||
|
||||
static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
return seq_list_next(v, &q->requeue_list, pos);
|
||||
}
|
||||
|
||||
static void queue_requeue_list_stop(struct seq_file *m, void *v)
|
||||
__releases(&q->requeue_lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
spin_unlock_irq(&q->requeue_lock);
|
||||
}
|
||||
|
||||
static const struct seq_operations queue_requeue_list_seq_ops = {
|
||||
.start = queue_requeue_list_start,
|
||||
.next = queue_requeue_list_next,
|
||||
.stop = queue_requeue_list_stop,
|
||||
.show = blk_mq_debugfs_rq_show,
|
||||
};
|
||||
|
||||
static int blk_flags_show(struct seq_file *m, const unsigned long flags,
|
||||
const char *const *flag_name, int flag_name_count)
|
||||
{
|
||||
|
@ -125,16 +183,6 @@ inval:
|
|||
return count;
|
||||
}
|
||||
|
||||
static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
|
||||
{
|
||||
if (stat->nr_samples) {
|
||||
seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
|
||||
stat->nr_samples, stat->mean, stat->min, stat->max);
|
||||
} else {
|
||||
seq_puts(m, "samples=0");
|
||||
}
|
||||
}
|
||||
|
||||
static int queue_write_hint_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
|
@ -158,23 +206,30 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
|
|||
return count;
|
||||
}
|
||||
|
||||
static int queue_poll_stat_show(void *data, struct seq_file *m)
|
||||
static int queue_zone_wlock_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
int bucket;
|
||||
unsigned int i;
|
||||
|
||||
for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
|
||||
seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
|
||||
print_stat(m, &q->poll_stat[2*bucket]);
|
||||
seq_puts(m, "\n");
|
||||
if (!q->seq_zones_wlock)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < blk_queue_nr_zones(q); i++)
|
||||
if (test_bit(i, q->seq_zones_wlock))
|
||||
seq_printf(m, "%u\n", i);
|
||||
|
||||
seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
|
||||
print_stat(m, &q->poll_stat[2*bucket+1]);
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
|
||||
{ "poll_stat", 0400, queue_poll_stat_show },
|
||||
{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
|
||||
{ "state", 0600, queue_state_show, queue_state_write },
|
||||
{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
|
||||
{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
|
||||
{ },
|
||||
};
|
||||
|
||||
#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
|
||||
static const char *const hctx_state_name[] = {
|
||||
HCTX_STATE_NAME(STOPPED),
|
||||
|
@ -295,6 +350,20 @@ static const char *const rqf_name[] = {
|
|||
};
|
||||
#undef RQF_NAME
|
||||
|
||||
static const char *const blk_mq_rq_state_name_array[] = {
|
||||
[MQ_RQ_IDLE] = "idle",
|
||||
[MQ_RQ_IN_FLIGHT] = "in_flight",
|
||||
[MQ_RQ_COMPLETE] = "complete",
|
||||
};
|
||||
|
||||
static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
|
||||
{
|
||||
if (WARN_ON_ONCE((unsigned int)rq_state >
|
||||
ARRAY_SIZE(blk_mq_rq_state_name_array)))
|
||||
return "(?)";
|
||||
return blk_mq_rq_state_name_array[rq_state];
|
||||
}
|
||||
|
||||
int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
|
||||
{
|
||||
const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
|
||||
|
@ -311,7 +380,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
|
|||
seq_puts(m, ", .rq_flags=");
|
||||
blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
|
||||
ARRAY_SIZE(rqf_name));
|
||||
seq_printf(m, ", complete=%d", blk_rq_is_complete(rq));
|
||||
seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq)));
|
||||
seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
|
||||
rq->internal_tag);
|
||||
if (mq_ops->show_rq)
|
||||
|
@ -327,37 +396,6 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
|
||||
|
||||
static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(&q->requeue_lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
spin_lock_irq(&q->requeue_lock);
|
||||
return seq_list_start(&q->requeue_list, *pos);
|
||||
}
|
||||
|
||||
static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
return seq_list_next(v, &q->requeue_list, pos);
|
||||
}
|
||||
|
||||
static void queue_requeue_list_stop(struct seq_file *m, void *v)
|
||||
__releases(&q->requeue_lock)
|
||||
{
|
||||
struct request_queue *q = m->private;
|
||||
|
||||
spin_unlock_irq(&q->requeue_lock);
|
||||
}
|
||||
|
||||
static const struct seq_operations queue_requeue_list_seq_ops = {
|
||||
.start = queue_requeue_list_start,
|
||||
.next = queue_requeue_list_next,
|
||||
.stop = queue_requeue_list_stop,
|
||||
.show = blk_mq_debugfs_rq_show,
|
||||
};
|
||||
|
||||
static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(&hctx->lock)
|
||||
{
|
||||
|
@ -747,14 +785,6 @@ static const struct file_operations blk_mq_debugfs_fops = {
|
|||
.release = blk_mq_debugfs_release,
|
||||
};
|
||||
|
||||
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
|
||||
{"poll_stat", 0400, queue_poll_stat_show},
|
||||
{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
|
||||
{"state", 0600, queue_state_show, queue_state_write},
|
||||
{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
|
||||
{"state", 0400, hctx_state_show},
|
||||
{"flags", 0400, hctx_flags_show},
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
* blk_mq_pci_map_queues - provide a default queue mapping for PCI device
|
||||
* @set: tagset to provide the mapping for
|
||||
* @pdev: PCI device associated with @set.
|
||||
* @offset: Offset to use for the pci irq vector
|
||||
*
|
||||
* This function assumes the PCI device @pdev has at least as many available
|
||||
* interrupt vectors as @set has queues. It will then query the vector
|
||||
|
@ -28,13 +29,14 @@
|
|||
* that maps a queue to the CPUs that have irq affinity for the corresponding
|
||||
* vector.
|
||||
*/
|
||||
int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
|
||||
int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
|
||||
int offset)
|
||||
{
|
||||
const struct cpumask *mask;
|
||||
unsigned int queue, cpu;
|
||||
|
||||
for (queue = 0; queue < set->nr_hw_queues; queue++) {
|
||||
mask = pci_irq_get_affinity(pdev, queue);
|
||||
mask = pci_irq_get_affinity(pdev, queue + offset);
|
||||
if (!mask)
|
||||
goto fallback;
|
||||
|
||||
|
|
|
@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
|||
*/
|
||||
void blk_mq_quiesce_queue_nowait(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_set(QUEUE_FLAG_QUIESCED, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
|
||||
|
||||
|
@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
|
|||
*/
|
||||
void blk_mq_unquiesce_queue(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
|
||||
|
||||
/* dispatch requests which are inserted during quiescing */
|
||||
blk_mq_run_hw_queues(q, true);
|
||||
|
@ -986,9 +978,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
|
|||
struct blk_mq_hw_ctx *hctx = flush_data->hctx;
|
||||
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
|
||||
|
||||
sbitmap_clear_bit(sb, bitnr);
|
||||
spin_lock(&ctx->lock);
|
||||
list_splice_tail_init(&ctx->rq_list, flush_data->list);
|
||||
sbitmap_clear_bit(sb, bitnr);
|
||||
spin_unlock(&ctx->lock);
|
||||
return true;
|
||||
}
|
||||
|
@ -2556,7 +2548,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
|||
{
|
||||
struct request_queue *uninit_q, *q;
|
||||
|
||||
uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
|
||||
uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
|
||||
if (!uninit_q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
|
@ -2678,7 +2670,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
|
||||
|
||||
if (!(set->flags & BLK_MQ_F_SG_MERGE))
|
||||
q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
|
||||
|
||||
q->sg_reserved_size = INT_MAX;
|
||||
|
||||
|
@ -3005,7 +2997,7 @@ EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
|
|||
static bool blk_poll_stats_enable(struct request_queue *q)
|
||||
{
|
||||
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
|
||||
test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||
blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
|
||||
return true;
|
||||
blk_stat_add_callback(q, q->poll_cb);
|
||||
return false;
|
||||
|
|
|
@ -859,12 +859,10 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
|
|||
|
||||
void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (queueable)
|
||||
clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
|
||||
else
|
||||
set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q,
|
|||
|
||||
spin_lock(&q->stats->lock);
|
||||
list_add_tail_rcu(&cb->list, &q->stats->callbacks);
|
||||
set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock(&q->stats->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_stat_add_callback);
|
||||
|
@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q,
|
|||
spin_lock(&q->stats->lock);
|
||||
list_del_rcu(&cb->list);
|
||||
if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
|
||||
clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock(&q->stats->lock);
|
||||
|
||||
del_timer_sync(&cb->timer);
|
||||
|
@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
|
|||
{
|
||||
spin_lock(&q->stats->lock);
|
||||
q->stats->enable_accounting = true;
|
||||
set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock(&q->stats->lock);
|
||||
}
|
||||
|
||||
|
|
|
@ -276,12 +276,10 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \
|
|||
if (neg) \
|
||||
val = !val; \
|
||||
\
|
||||
spin_lock_irq(q->queue_lock); \
|
||||
if (val) \
|
||||
queue_flag_set(QUEUE_FLAG_##flag, q); \
|
||||
blk_queue_flag_set(QUEUE_FLAG_##flag, q); \
|
||||
else \
|
||||
queue_flag_clear(QUEUE_FLAG_##flag, q); \
|
||||
spin_unlock_irq(q->queue_lock); \
|
||||
blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
|
@ -414,12 +412,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
|
|||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (poll_on)
|
||||
queue_flag_set(QUEUE_FLAG_POLL, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
|
||||
else
|
||||
queue_flag_clear(QUEUE_FLAG_POLL, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -487,12 +483,10 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
|
|||
if (set == -1)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (set)
|
||||
queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
else
|
||||
queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -798,13 +792,6 @@ static void __blk_release_queue(struct work_struct *work)
|
|||
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
|
||||
blk_stat_remove_callback(q, q->poll_cb);
|
||||
blk_stat_free_callback(q->poll_cb);
|
||||
bdi_put(q->backing_dev_info);
|
||||
blkcg_exit_queue(q);
|
||||
|
||||
if (q->elevator) {
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
}
|
||||
|
||||
blk_free_queue_stats(q->stats);
|
||||
|
||||
|
@ -953,9 +940,7 @@ void blk_unregister_queue(struct gendisk *disk)
|
|||
*/
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
|
||||
|
||||
/*
|
||||
* Remove the sysfs attributes before unregistering the queue data
|
||||
|
|
|
@ -57,12 +57,10 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
|
|||
char *p = (char *) buf;
|
||||
|
||||
val = simple_strtoul(p, &p, 10);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (val)
|
||||
queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
|
||||
else
|
||||
queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
|
||||
}
|
||||
|
||||
return count;
|
||||
|
@ -165,7 +163,7 @@ void blk_abort_request(struct request *req)
|
|||
* No need for fancy synchronizations.
|
||||
*/
|
||||
blk_rq_set_deadline(req, jiffies);
|
||||
mod_timer(&req->q->timeout, 0);
|
||||
kblockd_schedule_work(&req->q->timeout_work);
|
||||
} else {
|
||||
if (blk_mark_rq_complete(req))
|
||||
return;
|
||||
|
|
|
@ -296,7 +296,7 @@ int blkdev_reset_zones(struct block_device *bdev,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_reset_zones);
|
||||
|
||||
/**
|
||||
/*
|
||||
* BLKREPORTZONE ioctl processing.
|
||||
* Called from blkdev_ioctl.
|
||||
*/
|
||||
|
@ -355,7 +355,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* BLKRESETZONE ioctl processing.
|
||||
* Called from blkdev_ioctl.
|
||||
*/
|
||||
|
|
69
block/blk.h
69
block/blk.h
|
@ -41,6 +41,75 @@ extern struct kmem_cache *request_cachep;
|
|||
extern struct kobj_type blk_queue_ktype;
|
||||
extern struct ida blk_queue_ida;
|
||||
|
||||
/*
|
||||
* @q->queue_lock is set while a queue is being initialized. Since we know
|
||||
* that no other threads access the queue object before @q->queue_lock has
|
||||
* been set, it is safe to manipulate queue flags without holding the
|
||||
* queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
|
||||
* blk_init_allocated_queue().
|
||||
*/
|
||||
static inline void queue_lockdep_assert_held(struct request_queue *q)
|
||||
{
|
||||
if (q->queue_lock)
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
}
|
||||
|
||||
static inline void queue_flag_set_unlocked(unsigned int flag,
|
||||
struct request_queue *q)
|
||||
{
|
||||
if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
|
||||
kref_read(&q->kobj.kref))
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
__set_bit(flag, &q->queue_flags);
|
||||
}
|
||||
|
||||
static inline void queue_flag_clear_unlocked(unsigned int flag,
|
||||
struct request_queue *q)
|
||||
{
|
||||
if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
|
||||
kref_read(&q->kobj.kref))
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
__clear_bit(flag, &q->queue_flags);
|
||||
}
|
||||
|
||||
static inline int queue_flag_test_and_clear(unsigned int flag,
|
||||
struct request_queue *q)
|
||||
{
|
||||
queue_lockdep_assert_held(q);
|
||||
|
||||
if (test_bit(flag, &q->queue_flags)) {
|
||||
__clear_bit(flag, &q->queue_flags);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int queue_flag_test_and_set(unsigned int flag,
|
||||
struct request_queue *q)
|
||||
{
|
||||
queue_lockdep_assert_held(q);
|
||||
|
||||
if (!test_bit(flag, &q->queue_flags)) {
|
||||
__set_bit(flag, &q->queue_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
queue_lockdep_assert_held(q);
|
||||
__set_bit(flag, &q->queue_flags);
|
||||
}
|
||||
|
||||
static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
queue_lockdep_assert_held(q);
|
||||
__clear_bit(flag, &q->queue_flags);
|
||||
}
|
||||
|
||||
static inline struct blk_flush_queue *blk_get_flush_queue(
|
||||
struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
{
|
||||
|
|
165
block/bsg-lib.c
165
block/bsg-lib.c
|
@ -27,6 +27,94 @@
|
|||
#include <linux/bsg-lib.h>
|
||||
#include <linux/export.h>
|
||||
#include <scsi/scsi_cmnd.h>
|
||||
#include <scsi/sg.h>
|
||||
|
||||
#define uptr64(val) ((void __user *)(uintptr_t)(val))
|
||||
|
||||
static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
|
||||
{
|
||||
if (hdr->protocol != BSG_PROTOCOL_SCSI ||
|
||||
hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_TRANSPORT)
|
||||
return -EINVAL;
|
||||
if (!capable(CAP_SYS_RAWIO))
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
|
||||
fmode_t mode)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
job->request_len = hdr->request_len;
|
||||
job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
|
||||
if (IS_ERR(job->request))
|
||||
return PTR_ERR(job->request);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* The assignments below don't make much sense, but are kept for
|
||||
* bug by bug backwards compatibility:
|
||||
*/
|
||||
hdr->device_status = job->result & 0xff;
|
||||
hdr->transport_status = host_byte(job->result);
|
||||
hdr->driver_status = driver_byte(job->result);
|
||||
hdr->info = 0;
|
||||
if (hdr->device_status || hdr->transport_status || hdr->driver_status)
|
||||
hdr->info |= SG_INFO_CHECK;
|
||||
hdr->response_len = 0;
|
||||
|
||||
if (job->result < 0) {
|
||||
/* we're only returning the result field in the reply */
|
||||
job->reply_len = sizeof(u32);
|
||||
ret = job->result;
|
||||
}
|
||||
|
||||
if (job->reply_len && hdr->response) {
|
||||
int len = min(hdr->max_response_len, job->reply_len);
|
||||
|
||||
if (copy_to_user(uptr64(hdr->response), job->reply, len))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
hdr->response_len = len;
|
||||
}
|
||||
|
||||
/* we assume all request payload was transferred, residual == 0 */
|
||||
hdr->dout_resid = 0;
|
||||
|
||||
if (rq->next_rq) {
|
||||
unsigned int rsp_len = job->reply_payload.payload_len;
|
||||
|
||||
if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
|
||||
hdr->din_resid = 0;
|
||||
else
|
||||
hdr->din_resid = rsp_len - job->reply_payload_rcv_len;
|
||||
} else {
|
||||
hdr->din_resid = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bsg_transport_free_rq(struct request *rq)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(rq);
|
||||
|
||||
kfree(job->request);
|
||||
}
|
||||
|
||||
static const struct bsg_ops bsg_transport_ops = {
|
||||
.check_proto = bsg_transport_check_proto,
|
||||
.fill_hdr = bsg_transport_fill_hdr,
|
||||
.complete_rq = bsg_transport_complete_rq,
|
||||
.free_rq = bsg_transport_free_rq,
|
||||
};
|
||||
|
||||
/**
|
||||
* bsg_teardown_job - routine to teardown a bsg job
|
||||
|
@ -35,7 +123,7 @@
|
|||
static void bsg_teardown_job(struct kref *kref)
|
||||
{
|
||||
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
|
||||
struct request *rq = job->req;
|
||||
struct request *rq = blk_mq_rq_from_pdu(job);
|
||||
|
||||
put_device(job->dev); /* release reference for the request */
|
||||
|
||||
|
@ -68,28 +156,9 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
|
|||
void bsg_job_done(struct bsg_job *job, int result,
|
||||
unsigned int reply_payload_rcv_len)
|
||||
{
|
||||
struct request *req = job->req;
|
||||
struct request *rsp = req->next_rq;
|
||||
struct scsi_request *rq = scsi_req(req);
|
||||
int err;
|
||||
|
||||
err = scsi_req(job->req)->result = result;
|
||||
if (err < 0)
|
||||
/* we're only returning the result field in the reply */
|
||||
rq->sense_len = sizeof(u32);
|
||||
else
|
||||
rq->sense_len = job->reply_len;
|
||||
/* we assume all request payload was transferred, residual == 0 */
|
||||
rq->resid_len = 0;
|
||||
|
||||
if (rsp) {
|
||||
WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
|
||||
|
||||
/* set reply (bidi) residual */
|
||||
scsi_req(rsp)->resid_len -=
|
||||
min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
|
||||
}
|
||||
blk_complete_request(req);
|
||||
job->result = result;
|
||||
job->reply_payload_rcv_len = reply_payload_rcv_len;
|
||||
blk_complete_request(blk_mq_rq_from_pdu(job));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_job_done);
|
||||
|
||||
|
@ -114,7 +183,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
|
|||
if (!buf->sg_list)
|
||||
return -ENOMEM;
|
||||
sg_init_table(buf->sg_list, req->nr_phys_segments);
|
||||
scsi_req(req)->resid_len = blk_rq_bytes(req);
|
||||
buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
|
||||
buf->payload_len = blk_rq_bytes(req);
|
||||
return 0;
|
||||
|
@ -125,15 +193,13 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
|
|||
* @dev: device that is being sent the bsg request
|
||||
* @req: BSG request that needs a job structure
|
||||
*/
|
||||
static int bsg_prepare_job(struct device *dev, struct request *req)
|
||||
static bool bsg_prepare_job(struct device *dev, struct request *req)
|
||||
{
|
||||
struct request *rsp = req->next_rq;
|
||||
struct scsi_request *rq = scsi_req(req);
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(req);
|
||||
int ret;
|
||||
|
||||
job->request = rq->cmd;
|
||||
job->request_len = rq->cmd_len;
|
||||
job->timeout = req->timeout;
|
||||
|
||||
if (req->bio) {
|
||||
ret = bsg_map_buffer(&job->request_payload, req);
|
||||
|
@ -149,12 +215,13 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
|
|||
/* take a reference for the request */
|
||||
get_device(job->dev);
|
||||
kref_init(&job->kref);
|
||||
return 0;
|
||||
return true;
|
||||
|
||||
failjob_rls_rqst_payload:
|
||||
kfree(job->request_payload.sg_list);
|
||||
failjob_rls_job:
|
||||
return -ENOMEM;
|
||||
job->result = -ENOMEM;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -183,9 +250,7 @@ static void bsg_request_fn(struct request_queue *q)
|
|||
break;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
ret = bsg_prepare_job(dev, req);
|
||||
if (ret) {
|
||||
scsi_req(req)->result = ret;
|
||||
if (!bsg_prepare_job(dev, req)) {
|
||||
blk_end_request_all(req, BLK_STS_OK);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
continue;
|
||||
|
@ -202,47 +267,34 @@ static void bsg_request_fn(struct request_queue *q)
|
|||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/* called right after the request is allocated for the request_queue */
|
||||
static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(req);
|
||||
struct scsi_request *sreq = &job->sreq;
|
||||
|
||||
/* called right after the request is allocated for the request_queue */
|
||||
|
||||
sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
|
||||
if (!sreq->sense)
|
||||
job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
|
||||
if (!job->reply)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* called right before the request is given to the request_queue user */
|
||||
static void bsg_initialize_rq(struct request *req)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(req);
|
||||
struct scsi_request *sreq = &job->sreq;
|
||||
void *sense = sreq->sense;
|
||||
|
||||
/* called right before the request is given to the request_queue user */
|
||||
void *reply = job->reply;
|
||||
|
||||
memset(job, 0, sizeof(*job));
|
||||
|
||||
scsi_req_init(sreq);
|
||||
|
||||
sreq->sense = sense;
|
||||
sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
|
||||
|
||||
job->req = req;
|
||||
job->reply = sense;
|
||||
job->reply_len = sreq->sense_len;
|
||||
job->reply = reply;
|
||||
job->reply_len = SCSI_SENSE_BUFFERSIZE;
|
||||
job->dd_data = job + 1;
|
||||
}
|
||||
|
||||
static void bsg_exit_rq(struct request_queue *q, struct request *req)
|
||||
{
|
||||
struct bsg_job *job = blk_mq_rq_to_pdu(req);
|
||||
struct scsi_request *sreq = &job->sreq;
|
||||
|
||||
kfree(sreq->sense);
|
||||
kfree(job->reply);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -275,12 +327,11 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
|
|||
|
||||
q->queuedata = dev;
|
||||
q->bsg_job_fn = job_fn;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
|
||||
blk_queue_softirq_done(q, bsg_softirq_done);
|
||||
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
|
||||
|
||||
ret = bsg_register_queue(q, dev, name, release);
|
||||
ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
|
||||
if (ret) {
|
||||
printk(KERN_ERR "%s: bsg interface failed to "
|
||||
"initialize - register queue\n", dev->kobj.name);
|
||||
|
|
298
block/bsg.c
298
block/bsg.c
|
@ -130,32 +130,110 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
|
|||
return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
|
||||
}
|
||||
|
||||
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
|
||||
struct sg_io_v4 *hdr, struct bsg_device *bd,
|
||||
fmode_t mode)
|
||||
{
|
||||
struct scsi_request *req = scsi_req(rq);
|
||||
#define uptr64(val) ((void __user *)(uintptr_t)(val))
|
||||
|
||||
if (hdr->request_len > BLK_MAX_CDB) {
|
||||
req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
|
||||
if (!req->cmd)
|
||||
static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
|
||||
{
|
||||
if (hdr->protocol != BSG_PROTOCOL_SCSI ||
|
||||
hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
|
||||
fmode_t mode)
|
||||
{
|
||||
struct scsi_request *sreq = scsi_req(rq);
|
||||
|
||||
sreq->cmd_len = hdr->request_len;
|
||||
if (sreq->cmd_len > BLK_MAX_CDB) {
|
||||
sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
|
||||
if (!sreq->cmd)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
|
||||
hdr->request_len))
|
||||
if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
|
||||
return -EFAULT;
|
||||
|
||||
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
|
||||
if (blk_verify_command(req->cmd, mode))
|
||||
return -EPERM;
|
||||
} else if (!capable(CAP_SYS_RAWIO))
|
||||
if (blk_verify_command(sreq->cmd, mode))
|
||||
return -EPERM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
|
||||
{
|
||||
struct scsi_request *sreq = scsi_req(rq);
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* fill in request structure
|
||||
* fill in all the output members
|
||||
*/
|
||||
req->cmd_len = hdr->request_len;
|
||||
hdr->device_status = sreq->result & 0xff;
|
||||
hdr->transport_status = host_byte(sreq->result);
|
||||
hdr->driver_status = driver_byte(sreq->result);
|
||||
hdr->info = 0;
|
||||
if (hdr->device_status || hdr->transport_status || hdr->driver_status)
|
||||
hdr->info |= SG_INFO_CHECK;
|
||||
hdr->response_len = 0;
|
||||
|
||||
if (sreq->sense_len && hdr->response) {
|
||||
int len = min_t(unsigned int, hdr->max_response_len,
|
||||
sreq->sense_len);
|
||||
|
||||
if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
hdr->response_len = len;
|
||||
}
|
||||
|
||||
if (rq->next_rq) {
|
||||
hdr->dout_resid = sreq->resid_len;
|
||||
hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
|
||||
} else if (rq_data_dir(rq) == READ) {
|
||||
hdr->din_resid = sreq->resid_len;
|
||||
} else {
|
||||
hdr->dout_resid = sreq->resid_len;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bsg_scsi_free_rq(struct request *rq)
|
||||
{
|
||||
scsi_req_free_cmd(scsi_req(rq));
|
||||
}
|
||||
|
||||
static const struct bsg_ops bsg_scsi_ops = {
|
||||
.check_proto = bsg_scsi_check_proto,
|
||||
.fill_hdr = bsg_scsi_fill_hdr,
|
||||
.complete_rq = bsg_scsi_complete_rq,
|
||||
.free_rq = bsg_scsi_free_rq,
|
||||
};
|
||||
|
||||
static struct request *
|
||||
bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
|
||||
{
|
||||
struct request *rq, *next_rq = NULL;
|
||||
int ret;
|
||||
|
||||
if (!q->bsg_dev.class_dev)
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
if (hdr->guard != 'Q')
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
ret = q->bsg_dev.ops->check_proto(hdr);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
rq = blk_get_request(q, hdr->dout_xfer_len ?
|
||||
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
|
||||
GFP_KERNEL);
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
rq->timeout = msecs_to_jiffies(hdr->timeout);
|
||||
if (!rq->timeout)
|
||||
|
@ -165,79 +243,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
|
|||
if (rq->timeout < BLK_MIN_SG_TIMEOUT)
|
||||
rq->timeout = BLK_MIN_SG_TIMEOUT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if sg_io_v4 from user is allowed and valid
|
||||
*/
|
||||
static int
|
||||
bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (hdr->guard != 'Q')
|
||||
return -EINVAL;
|
||||
|
||||
switch (hdr->protocol) {
|
||||
case BSG_PROTOCOL_SCSI:
|
||||
switch (hdr->subprotocol) {
|
||||
case BSG_SUB_PROTOCOL_SCSI_CMD:
|
||||
case BSG_SUB_PROTOCOL_SCSI_TRANSPORT:
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
*op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* map sg_io_v4 to a request.
|
||||
*/
|
||||
static struct request *
|
||||
bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
|
||||
{
|
||||
struct request_queue *q = bd->queue;
|
||||
struct request *rq, *next_rq = NULL;
|
||||
int ret;
|
||||
unsigned int op, dxfer_len;
|
||||
void __user *dxferp = NULL;
|
||||
struct bsg_class_device *bcd = &q->bsg_dev;
|
||||
|
||||
/* if the LLD has been removed then the bsg_unregister_queue will
|
||||
* eventually be called and the class_dev was freed, so we can no
|
||||
* longer use this request_queue. Return no such address.
|
||||
*/
|
||||
if (!bcd->class_dev)
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
bsg_dbg(bd, "map hdr %llx/%u %llx/%u\n",
|
||||
(unsigned long long) hdr->dout_xferp,
|
||||
hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
|
||||
hdr->din_xfer_len);
|
||||
|
||||
ret = bsg_validate_sgv4_hdr(hdr, &op);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
/*
|
||||
* map scatter-gather elements separately and string them to request
|
||||
*/
|
||||
rq = blk_get_request(q, op, GFP_KERNEL);
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) {
|
||||
if (hdr->dout_xfer_len && hdr->din_xfer_len) {
|
||||
if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
|
@ -246,42 +252,39 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
|
|||
next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
|
||||
if (IS_ERR(next_rq)) {
|
||||
ret = PTR_ERR(next_rq);
|
||||
next_rq = NULL;
|
||||
goto out;
|
||||
}
|
||||
rq->next_rq = next_rq;
|
||||
|
||||
dxferp = (void __user *)(unsigned long)hdr->din_xferp;
|
||||
ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
|
||||
rq->next_rq = next_rq;
|
||||
ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
|
||||
hdr->din_xfer_len, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_free_nextrq;
|
||||
}
|
||||
|
||||
if (hdr->dout_xfer_len) {
|
||||
dxfer_len = hdr->dout_xfer_len;
|
||||
dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
|
||||
ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
|
||||
hdr->dout_xfer_len, GFP_KERNEL);
|
||||
} else if (hdr->din_xfer_len) {
|
||||
dxfer_len = hdr->din_xfer_len;
|
||||
dxferp = (void __user *)(unsigned long)hdr->din_xferp;
|
||||
} else
|
||||
dxfer_len = 0;
|
||||
|
||||
if (dxfer_len) {
|
||||
ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
|
||||
GFP_KERNEL);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
|
||||
hdr->din_xfer_len, GFP_KERNEL);
|
||||
} else {
|
||||
ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto out_unmap_nextrq;
|
||||
return rq;
|
||||
|
||||
out_unmap_nextrq:
|
||||
if (rq->next_rq)
|
||||
blk_rq_unmap_user(rq->next_rq->bio);
|
||||
out_free_nextrq:
|
||||
if (rq->next_rq)
|
||||
blk_put_request(rq->next_rq);
|
||||
out:
|
||||
scsi_req_free_cmd(scsi_req(rq));
|
||||
q->bsg_dev.ops->free_rq(rq);
|
||||
blk_put_request(rq);
|
||||
if (next_rq) {
|
||||
blk_rq_unmap_user(next_rq->bio);
|
||||
blk_put_request(next_rq);
|
||||
}
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
|
@ -383,56 +386,18 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
|
|||
static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
|
||||
struct bio *bio, struct bio *bidi_bio)
|
||||
{
|
||||
struct scsi_request *req = scsi_req(rq);
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
pr_debug("rq %p bio %p 0x%x\n", rq, bio, req->result);
|
||||
/*
|
||||
* fill in all the output members
|
||||
*/
|
||||
hdr->device_status = req->result & 0xff;
|
||||
hdr->transport_status = host_byte(req->result);
|
||||
hdr->driver_status = driver_byte(req->result);
|
||||
hdr->info = 0;
|
||||
if (hdr->device_status || hdr->transport_status || hdr->driver_status)
|
||||
hdr->info |= SG_INFO_CHECK;
|
||||
hdr->response_len = 0;
|
||||
|
||||
if (req->sense_len && hdr->response) {
|
||||
int len = min_t(unsigned int, hdr->max_response_len,
|
||||
req->sense_len);
|
||||
|
||||
ret = copy_to_user((void __user *)(unsigned long)hdr->response,
|
||||
req->sense, len);
|
||||
if (!ret)
|
||||
hdr->response_len = len;
|
||||
else
|
||||
ret = -EFAULT;
|
||||
}
|
||||
ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
|
||||
|
||||
if (rq->next_rq) {
|
||||
hdr->dout_resid = req->resid_len;
|
||||
hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
|
||||
blk_rq_unmap_user(bidi_bio);
|
||||
blk_put_request(rq->next_rq);
|
||||
} else if (rq_data_dir(rq) == READ)
|
||||
hdr->din_resid = req->resid_len;
|
||||
else
|
||||
hdr->dout_resid = req->resid_len;
|
||||
|
||||
/*
|
||||
* If the request generated a negative error number, return it
|
||||
* (providing we aren't already returning an error); if it's
|
||||
* just a protocol response (i.e. non negative), that gets
|
||||
* processed above.
|
||||
*/
|
||||
if (!ret && req->result < 0)
|
||||
ret = req->result;
|
||||
}
|
||||
|
||||
blk_rq_unmap_user(bio);
|
||||
scsi_req_free_cmd(req);
|
||||
rq->q->bsg_dev.ops->free_rq(rq);
|
||||
blk_put_request(rq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -614,7 +579,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
|
|||
/*
|
||||
* get a request, fill in the blanks, and add to request queue
|
||||
*/
|
||||
rq = bsg_map_hdr(bd, &bc->hdr, mode);
|
||||
rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
|
||||
if (IS_ERR(rq)) {
|
||||
ret = PTR_ERR(rq);
|
||||
rq = NULL;
|
||||
|
@ -742,11 +707,6 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
|
|||
struct bsg_device *bd;
|
||||
unsigned char buf[32];
|
||||
|
||||
if (!blk_queue_scsi_passthrough(rq)) {
|
||||
WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (!blk_get_queue(rq))
|
||||
return ERR_PTR(-ENXIO);
|
||||
|
||||
|
@ -907,7 +867,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|||
if (copy_from_user(&hdr, uarg, sizeof(hdr)))
|
||||
return -EFAULT;
|
||||
|
||||
rq = bsg_map_hdr(bd, &hdr, file->f_mode);
|
||||
rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
|
@ -959,7 +919,8 @@ void bsg_unregister_queue(struct request_queue *q)
|
|||
EXPORT_SYMBOL_GPL(bsg_unregister_queue);
|
||||
|
||||
int bsg_register_queue(struct request_queue *q, struct device *parent,
|
||||
const char *name, void (*release)(struct device *))
|
||||
const char *name, const struct bsg_ops *ops,
|
||||
void (*release)(struct device *))
|
||||
{
|
||||
struct bsg_class_device *bcd;
|
||||
dev_t dev;
|
||||
|
@ -996,6 +957,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
|
|||
bcd->queue = q;
|
||||
bcd->parent = get_device(parent);
|
||||
bcd->release = release;
|
||||
bcd->ops = ops;
|
||||
kref_init(&bcd->ref);
|
||||
dev = MKDEV(bsg_major, bcd->minor);
|
||||
class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
|
||||
|
@ -1023,7 +985,17 @@ unlock:
|
|||
mutex_unlock(&bsg_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_register_queue);
|
||||
|
||||
int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
|
||||
{
|
||||
if (!blk_queue_scsi_passthrough(q)) {
|
||||
WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
|
||||
|
||||
static struct cdev bsg_cdev;
|
||||
|
||||
|
|
|
@ -554,15 +554,14 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
|
|||
|
||||
size_t len;
|
||||
int msb;
|
||||
u8 n;
|
||||
|
||||
if (!(number & ~TINY_ATOM_DATA_MASK)) {
|
||||
add_token_u8(err, cmd, number);
|
||||
return;
|
||||
}
|
||||
|
||||
msb = fls(number);
|
||||
len = DIV_ROUND_UP(msb, 4);
|
||||
msb = fls64(number);
|
||||
len = DIV_ROUND_UP(msb, 8);
|
||||
|
||||
if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
|
||||
pr_debug("Error adding u64: end of buffer.\n");
|
||||
|
@ -570,10 +569,8 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
|
|||
return;
|
||||
}
|
||||
add_short_atom_header(cmd, false, false, len);
|
||||
while (len--) {
|
||||
n = number >> (len * 8);
|
||||
add_token_u8(err, cmd, n);
|
||||
}
|
||||
while (len--)
|
||||
add_token_u8(err, cmd, number >> (len * 8));
|
||||
}
|
||||
|
||||
static void add_token_bytestring(int *err, struct opal_dev *cmd,
|
||||
|
@ -871,6 +868,9 @@ static int response_parse(const u8 *buf, size_t length,
|
|||
static size_t response_get_string(const struct parsed_resp *resp, int n,
|
||||
const char **store)
|
||||
{
|
||||
u8 skip;
|
||||
const struct opal_resp_tok *token;
|
||||
|
||||
*store = NULL;
|
||||
if (!resp) {
|
||||
pr_debug("Response is NULL\n");
|
||||
|
@ -883,13 +883,30 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
|
||||
token = &resp->toks[n];
|
||||
if (token->type != OPAL_DTA_TOKENID_BYTESTRING) {
|
||||
pr_debug("Token is not a byte string!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
*store = resp->toks[n].pos + 1;
|
||||
return resp->toks[n].len - 1;
|
||||
switch (token->width) {
|
||||
case OPAL_WIDTH_TINY:
|
||||
case OPAL_WIDTH_SHORT:
|
||||
skip = 1;
|
||||
break;
|
||||
case OPAL_WIDTH_MEDIUM:
|
||||
skip = 2;
|
||||
break;
|
||||
case OPAL_WIDTH_LONG:
|
||||
skip = 4;
|
||||
break;
|
||||
default:
|
||||
pr_debug("Token has invalid width!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
*store = token->pos + skip;
|
||||
return token->len - skip;
|
||||
}
|
||||
|
||||
static u64 response_get_u64(const struct parsed_resp *resp, int n)
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#define SECTOR_SHIFT 9
|
||||
#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
|
||||
#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
|
||||
|
||||
|
|
|
@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
|||
|
||||
drbd_init_set_defaults(device);
|
||||
|
||||
q = blk_alloc_queue(GFP_KERNEL);
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
|
||||
if (!q)
|
||||
goto out_no_q;
|
||||
device->rq_queue = q;
|
||||
|
@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
|||
/* Setting the max_hw_sectors to an odd value of 8kibyte here
|
||||
This triggers a max_bio_size message upon first attach or connect */
|
||||
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
|
||||
q->queue_lock = &resource->req_lock;
|
||||
|
||||
device->md_io.page = alloc_page(GFP_KERNEL);
|
||||
if (!device->md_io.page)
|
||||
|
|
|
@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device,
|
|||
* topology on all peers. */
|
||||
blk_queue_discard_granularity(q, 512);
|
||||
q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
|
||||
} else {
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_discard_granularity(q, 0);
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_write_zeroes_sectors = 0;
|
||||
|
|
|
@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
|
|||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
lo->use_dio = use_dio;
|
||||
if (use_dio) {
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
|
||||
} else {
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
|
||||
}
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo)
|
|||
q->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo)
|
|||
|
||||
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
|
||||
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
}
|
||||
|
||||
static void loop_unprepare_queue(struct loop_device *lo)
|
||||
|
@ -1167,21 +1167,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
|
|||
static int
|
||||
loop_get_status(struct loop_device *lo, struct loop_info64 *info)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct file *file;
|
||||
struct kstat stat;
|
||||
int error;
|
||||
int ret;
|
||||
|
||||
if (lo->lo_state != Lo_bound)
|
||||
if (lo->lo_state != Lo_bound) {
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
return -ENXIO;
|
||||
error = vfs_getattr(&file->f_path, &stat,
|
||||
STATX_INO, AT_STATX_SYNC_AS_STAT);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->lo_number = lo->lo_number;
|
||||
info->lo_device = huge_encode_dev(stat.dev);
|
||||
info->lo_inode = stat.ino;
|
||||
info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
|
||||
info->lo_offset = lo->lo_offset;
|
||||
info->lo_sizelimit = lo->lo_sizelimit;
|
||||
info->lo_flags = lo->lo_flags;
|
||||
|
@ -1194,7 +1190,19 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
|
|||
memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
|
||||
lo->lo_encrypt_key_size);
|
||||
}
|
||||
return 0;
|
||||
|
||||
/* Drop lo_ctl_mutex while we call into the filesystem. */
|
||||
file = get_file(lo->lo_backing_file);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
ret = vfs_getattr(&file->f_path, &stat, STATX_INO,
|
||||
AT_STATX_SYNC_AS_STAT);
|
||||
if (!ret) {
|
||||
info->lo_device = huge_encode_dev(stat.dev);
|
||||
info->lo_inode = stat.ino;
|
||||
info->lo_rdevice = huge_encode_dev(stat.rdev);
|
||||
}
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1352,7 +1360,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
struct loop_device *lo = bdev->bd_disk->private_data;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&lo->lo_ctl_mutex, 1);
|
||||
err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1);
|
||||
if (err)
|
||||
goto out_unlocked;
|
||||
|
||||
switch (cmd) {
|
||||
case LOOP_SET_FD:
|
||||
err = loop_set_fd(lo, mode, bdev, arg);
|
||||
|
@ -1374,7 +1385,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
break;
|
||||
case LOOP_GET_STATUS:
|
||||
err = loop_get_status_old(lo, (struct loop_info __user *) arg);
|
||||
break;
|
||||
/* loop_get_status() unlocks lo_ctl_mutex */
|
||||
goto out_unlocked;
|
||||
case LOOP_SET_STATUS64:
|
||||
err = -EPERM;
|
||||
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
|
||||
|
@ -1383,7 +1395,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
break;
|
||||
case LOOP_GET_STATUS64:
|
||||
err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
|
||||
break;
|
||||
/* loop_get_status() unlocks lo_ctl_mutex */
|
||||
goto out_unlocked;
|
||||
case LOOP_SET_CAPACITY:
|
||||
err = -EPERM;
|
||||
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
|
||||
|
@ -1535,16 +1548,20 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
|
||||
switch(cmd) {
|
||||
case LOOP_SET_STATUS:
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
err = loop_set_status_compat(
|
||||
lo, (const struct compat_loop_info __user *) arg);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
err = mutex_lock_killable(&lo->lo_ctl_mutex);
|
||||
if (!err) {
|
||||
err = loop_set_status_compat(lo,
|
||||
(const struct compat_loop_info __user *)arg);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
}
|
||||
break;
|
||||
case LOOP_GET_STATUS:
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
err = loop_get_status_compat(
|
||||
lo, (struct compat_loop_info __user *) arg);
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
err = mutex_lock_killable(&lo->lo_ctl_mutex);
|
||||
if (!err) {
|
||||
err = loop_get_status_compat(lo,
|
||||
(struct compat_loop_info __user *)arg);
|
||||
/* loop_get_status() unlocks lo_ctl_mutex */
|
||||
}
|
||||
break;
|
||||
case LOOP_SET_CAPACITY:
|
||||
case LOOP_CLR_FD:
|
||||
|
@ -1808,7 +1825,7 @@ static int loop_add(struct loop_device **l, int i)
|
|||
* page. For directio mode, merge does help to dispatch bigger request
|
||||
* to underlayer disk. We will enable merge once directio is enabled.
|
||||
*/
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
|
||||
err = -ENOMEM;
|
||||
disk = lo->lo_disk = alloc_disk(1 << part_shift);
|
||||
|
@ -1864,8 +1881,8 @@ out:
|
|||
|
||||
static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_queue(lo->lo_queue);
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
put_disk(lo->lo_disk);
|
||||
kfree(lo);
|
||||
|
@ -1949,7 +1966,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
|
|||
ret = loop_lookup(&lo, parm);
|
||||
if (ret < 0)
|
||||
break;
|
||||
mutex_lock(&lo->lo_ctl_mutex);
|
||||
ret = mutex_lock_killable(&lo->lo_ctl_mutex);
|
||||
if (ret)
|
||||
break;
|
||||
if (lo->lo_state != Lo_unbound) {
|
||||
ret = -EBUSY;
|
||||
mutex_unlock(&lo->lo_ctl_mutex);
|
||||
|
|
|
@ -159,7 +159,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
|
|||
if (vendor_id == 0xFFFF) {
|
||||
dd->sr = true;
|
||||
if (dd->queue)
|
||||
set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue);
|
||||
else
|
||||
dev_warn(&dd->pdev->dev,
|
||||
"%s: dd->queue is NULL\n", __func__);
|
||||
|
@ -3855,8 +3855,8 @@ skip_create_disk:
|
|||
goto start_service_thread;
|
||||
|
||||
/* Set device limits. */
|
||||
set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
|
||||
clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
|
||||
blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
|
||||
blk_queue_physical_block_size(dd->queue, 4096);
|
||||
blk_queue_max_hw_sectors(dd->queue, 0xffff);
|
||||
|
@ -3866,7 +3866,7 @@ skip_create_disk:
|
|||
|
||||
/* Signal trim support */
|
||||
if (dd->trim_supp == true) {
|
||||
set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue);
|
||||
dd->queue->limits.discard_granularity = 4096;
|
||||
blk_queue_max_discard_sectors(dd->queue,
|
||||
MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
|
||||
|
|
|
@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd)
|
|||
else
|
||||
set_disk_ro(nbd->disk, false);
|
||||
if (config->flags & NBD_FLAG_SEND_TRIM)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
if (config->flags & NBD_FLAG_SEND_FLUSH) {
|
||||
if (config->flags & NBD_FLAG_SEND_FUA)
|
||||
blk_queue_write_cache(nbd->disk->queue, true, true);
|
||||
|
@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd)
|
|||
nbd->config = NULL;
|
||||
|
||||
nbd->tag_set.timeout = 0;
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
nbd_put(nbd);
|
||||
|
@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index)
|
|||
/*
|
||||
* Tell the block layer that we are not a rotational device
|
||||
*/
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
disk->queue->limits.discard_granularity = 512;
|
||||
blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
|
||||
blk_queue_max_segment_size(disk->queue, UINT_MAX);
|
||||
|
|
|
@ -16,10 +16,8 @@
|
|||
#include <linux/badblocks.h>
|
||||
#include <linux/fault-inject.h>
|
||||
|
||||
#define SECTOR_SHIFT 9
|
||||
#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
|
||||
#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
|
||||
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
|
||||
#define SECTOR_MASK (PAGE_SECTORS - 1)
|
||||
|
||||
#define FREE_BATCH 16
|
||||
|
@ -29,6 +27,7 @@
|
|||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
static DECLARE_FAULT_ATTR(null_timeout_attr);
|
||||
static DECLARE_FAULT_ATTR(null_requeue_attr);
|
||||
#endif
|
||||
|
||||
static inline u64 mb_per_tick(int mbps)
|
||||
|
@ -53,6 +52,7 @@ struct nullb_queue {
|
|||
wait_queue_head_t wait;
|
||||
unsigned int queue_depth;
|
||||
struct nullb_device *dev;
|
||||
unsigned int requeue_selection;
|
||||
|
||||
struct nullb_cmd *cmds;
|
||||
};
|
||||
|
@ -72,6 +72,7 @@ enum nullb_device_flags {
|
|||
NULLB_DEV_FL_CACHE = 3,
|
||||
};
|
||||
|
||||
#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
|
||||
/*
|
||||
* nullb_page is a page in memory for nullb devices.
|
||||
*
|
||||
|
@ -86,10 +87,10 @@ enum nullb_device_flags {
|
|||
*/
|
||||
struct nullb_page {
|
||||
struct page *page;
|
||||
unsigned long bitmap;
|
||||
DECLARE_BITMAP(bitmap, MAP_SZ);
|
||||
};
|
||||
#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
|
||||
#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
|
||||
#define NULLB_PAGE_LOCK (MAP_SZ - 1)
|
||||
#define NULLB_PAGE_FREE (MAP_SZ - 2)
|
||||
|
||||
struct nullb_device {
|
||||
struct nullb *nullb;
|
||||
|
@ -170,6 +171,9 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
|
|||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
static char g_timeout_str[80];
|
||||
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
|
||||
|
||||
static char g_requeue_str[80];
|
||||
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
|
||||
#endif
|
||||
|
||||
static int g_queue_mode = NULL_Q_MQ;
|
||||
|
@ -728,7 +732,7 @@ static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
|
|||
if (!t_page->page)
|
||||
goto out_freepage;
|
||||
|
||||
t_page->bitmap = 0;
|
||||
memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
|
||||
return t_page;
|
||||
out_freepage:
|
||||
kfree(t_page);
|
||||
|
@ -738,13 +742,20 @@ out:
|
|||
|
||||
static void null_free_page(struct nullb_page *t_page)
|
||||
{
|
||||
__set_bit(NULLB_PAGE_FREE, &t_page->bitmap);
|
||||
if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap))
|
||||
__set_bit(NULLB_PAGE_FREE, t_page->bitmap);
|
||||
if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
|
||||
return;
|
||||
__free_page(t_page->page);
|
||||
kfree(t_page);
|
||||
}
|
||||
|
||||
static bool null_page_empty(struct nullb_page *page)
|
||||
{
|
||||
int size = MAP_SZ - 2;
|
||||
|
||||
return find_first_bit(page->bitmap, size) == size;
|
||||
}
|
||||
|
||||
static void null_free_sector(struct nullb *nullb, sector_t sector,
|
||||
bool is_cache)
|
||||
{
|
||||
|
@ -759,9 +770,9 @@ static void null_free_sector(struct nullb *nullb, sector_t sector,
|
|||
|
||||
t_page = radix_tree_lookup(root, idx);
|
||||
if (t_page) {
|
||||
__clear_bit(sector_bit, &t_page->bitmap);
|
||||
__clear_bit(sector_bit, t_page->bitmap);
|
||||
|
||||
if (!t_page->bitmap) {
|
||||
if (null_page_empty(t_page)) {
|
||||
ret = radix_tree_delete_item(root, idx, t_page);
|
||||
WARN_ON(ret != t_page);
|
||||
null_free_page(ret);
|
||||
|
@ -832,7 +843,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb,
|
|||
t_page = radix_tree_lookup(root, idx);
|
||||
WARN_ON(t_page && t_page->page->index != idx);
|
||||
|
||||
if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
|
||||
if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
|
||||
return t_page;
|
||||
|
||||
return NULL;
|
||||
|
@ -895,10 +906,10 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
|
|||
|
||||
t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
|
||||
|
||||
__clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap);
|
||||
if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) {
|
||||
__clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
|
||||
if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
|
||||
null_free_page(c_page);
|
||||
if (t_page && t_page->bitmap == 0) {
|
||||
if (t_page && null_page_empty(t_page)) {
|
||||
ret = radix_tree_delete_item(&nullb->dev->data,
|
||||
idx, t_page);
|
||||
null_free_page(t_page);
|
||||
|
@ -914,11 +925,11 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
|
|||
|
||||
for (i = 0; i < PAGE_SECTORS;
|
||||
i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
|
||||
if (test_bit(i, &c_page->bitmap)) {
|
||||
if (test_bit(i, c_page->bitmap)) {
|
||||
offset = (i << SECTOR_SHIFT);
|
||||
memcpy(dst + offset, src + offset,
|
||||
nullb->dev->blocksize);
|
||||
__set_bit(i, &t_page->bitmap);
|
||||
__set_bit(i, t_page->bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -955,10 +966,10 @@ again:
|
|||
* We found the page which is being flushed to disk by other
|
||||
* threads
|
||||
*/
|
||||
if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap))
|
||||
if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
|
||||
c_pages[i] = NULL;
|
||||
else
|
||||
__set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap);
|
||||
__set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
|
||||
}
|
||||
|
||||
one_round = 0;
|
||||
|
@ -1011,7 +1022,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
|
|||
kunmap_atomic(dst);
|
||||
kunmap_atomic(src);
|
||||
|
||||
__set_bit(sector & SECTOR_MASK, &t_page->bitmap);
|
||||
__set_bit(sector & SECTOR_MASK, t_page->bitmap);
|
||||
|
||||
if (is_fua)
|
||||
null_free_sector(nullb, sector, true);
|
||||
|
@ -1380,7 +1391,15 @@ static bool should_timeout_request(struct request *rq)
|
|||
if (g_timeout_str[0])
|
||||
return should_fail(&null_timeout_attr, 1);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool should_requeue_request(struct request *rq)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
if (g_requeue_str[0])
|
||||
return should_fail(&null_requeue_attr, 1);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1391,11 +1410,17 @@ static void null_request_fn(struct request_queue *q)
|
|||
while ((rq = blk_fetch_request(q)) != NULL) {
|
||||
struct nullb_cmd *cmd = rq->special;
|
||||
|
||||
if (!should_timeout_request(rq)) {
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
null_handle_cmd(cmd);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
/* just ignore the request */
|
||||
if (should_timeout_request(rq))
|
||||
continue;
|
||||
if (should_requeue_request(rq)) {
|
||||
blk_requeue_request(q, rq);
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
null_handle_cmd(cmd);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1422,10 +1447,23 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
if (!should_timeout_request(bd->rq))
|
||||
return null_handle_cmd(cmd);
|
||||
if (should_requeue_request(bd->rq)) {
|
||||
/*
|
||||
* Alternate between hitting the core BUSY path, and the
|
||||
* driver driven requeue path
|
||||
*/
|
||||
nq->requeue_selection++;
|
||||
if (nq->requeue_selection & 1)
|
||||
return BLK_STS_RESOURCE;
|
||||
else {
|
||||
blk_mq_requeue_request(bd->rq, true);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
}
|
||||
if (should_timeout_request(bd->rq))
|
||||
return BLK_STS_OK;
|
||||
|
||||
return BLK_STS_OK;
|
||||
return null_handle_cmd(cmd);
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops null_mq_ops = {
|
||||
|
@ -1485,7 +1523,7 @@ static void null_config_discard(struct nullb *nullb)
|
|||
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
|
||||
nullb->q->limits.discard_alignment = nullb->dev->blocksize;
|
||||
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
|
||||
}
|
||||
|
||||
static int null_open(struct block_device *bdev, fmode_t mode)
|
||||
|
@ -1659,16 +1697,27 @@ static void null_validate_conf(struct nullb_device *dev)
|
|||
dev->mbps = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
static bool __null_setup_fault(struct fault_attr *attr, char *str)
|
||||
{
|
||||
if (!str[0])
|
||||
return true;
|
||||
|
||||
if (!setup_fault_attr(attr, str))
|
||||
return false;
|
||||
|
||||
attr->verbose = 0;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool null_setup_fault(void)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
if (!g_timeout_str[0])
|
||||
return true;
|
||||
|
||||
if (!setup_fault_attr(&null_timeout_attr, g_timeout_str))
|
||||
if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
|
||||
return false;
|
||||
if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
|
||||
return false;
|
||||
|
||||
null_timeout_attr.verbose = 0;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
@ -1717,7 +1766,8 @@ static int null_add_dev(struct nullb_device *dev)
|
|||
}
|
||||
null_init_queues(nullb);
|
||||
} else if (dev->queue_mode == NULL_Q_BIO) {
|
||||
nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
|
||||
nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
|
||||
NULL);
|
||||
if (!nullb->q) {
|
||||
rv = -ENOMEM;
|
||||
goto out_cleanup_queues;
|
||||
|
@ -1758,8 +1808,8 @@ static int null_add_dev(struct nullb_device *dev)
|
|||
}
|
||||
|
||||
nullb->q->queuedata = nullb;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
|
||||
|
||||
mutex_lock(&lock);
|
||||
nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
|
||||
|
@ -1802,10 +1852,6 @@ static int __init null_init(void)
|
|||
struct nullb *nullb;
|
||||
struct nullb_device *dev;
|
||||
|
||||
/* check for nullb_page.bitmap */
|
||||
if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
|
||||
return -EINVAL;
|
||||
|
||||
if (g_bs > PAGE_SIZE) {
|
||||
pr_warn("null_blk: invalid block size\n");
|
||||
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
|
||||
|
|
|
@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode)
|
|||
struct pcd_unit *cd = bdev->bd_disk->private_data;
|
||||
int ret;
|
||||
|
||||
check_disk_change(bdev);
|
||||
|
||||
mutex_lock(&pcd_mutex);
|
||||
ret = cdrom_open(&cd->info, bdev, mode);
|
||||
mutex_unlock(&pcd_mutex);
|
||||
|
|
|
@ -50,15 +50,6 @@
|
|||
|
||||
#define RBD_DEBUG /* Activate rbd_assert() calls */
|
||||
|
||||
/*
|
||||
* The basic unit of block I/O is a sector. It is interpreted in a
|
||||
* number of contexts in Linux (blk, bio, genhd), but the default is
|
||||
* universally 512 bytes. These symbols are just slightly more
|
||||
* meaningful than the bare numbers they represent.
|
||||
*/
|
||||
#define SECTOR_SHIFT 9
|
||||
#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
|
||||
|
||||
/*
|
||||
* Increment the given counter and return its updated value.
|
||||
* If the counter is already 0 it will not be incremented.
|
||||
|
@ -4370,7 +4361,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
|||
goto out_tag_set;
|
||||
}
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
|
||||
|
||||
/* set io sizes to object size */
|
||||
|
@ -4383,7 +4374,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
|||
blk_queue_io_opt(q, segment_size);
|
||||
|
||||
/* enable the discard support */
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
q->limits.discard_granularity = segment_size;
|
||||
blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
|
||||
blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
|
||||
|
|
|
@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
|
|||
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
|
||||
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
|
||||
if (rsxx_discard_supported(card)) {
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
|
||||
blk_queue_max_discard_sectors(card->queue,
|
||||
RSXX_HW_BLK_SIZE >> 9);
|
||||
card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
|
||||
|
|
|
@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev)
|
|||
/* set optimal I/O size to 8KB */
|
||||
blk_queue_io_opt(q, 8192);
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
|
||||
blk_queue_rq_timeout(q, 8 * HZ);
|
||||
|
||||
|
|
|
@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
|||
card->Active = -1; /* no page is active */
|
||||
card->bio = NULL;
|
||||
card->biotail = &card->bio;
|
||||
spin_lock_init(&card->lock);
|
||||
|
||||
card->queue = blk_alloc_queue(GFP_KERNEL);
|
||||
card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
|
||||
&card->lock);
|
||||
if (!card->queue)
|
||||
goto failed_alloc;
|
||||
|
||||
blk_queue_make_request(card->queue, mm_make_request);
|
||||
card->queue->queue_lock = &card->lock;
|
||||
card->queue->queuedata = card;
|
||||
|
||||
tasklet_init(&card->tasklet, process_page, (unsigned long)card);
|
||||
|
@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
|||
dev_printk(KERN_INFO, &card->dev->dev,
|
||||
"Window size %d bytes, IRQ %d\n", data, dev->irq);
|
||||
|
||||
spin_lock_init(&card->lock);
|
||||
|
||||
pci_set_drvdata(dev, card);
|
||||
|
||||
if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */
|
||||
|
|
|
@ -932,15 +932,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
|
|||
unsigned int segments = info->max_indirect_segments ? :
|
||||
BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
|
||||
blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
|
||||
|
||||
if (info->feature_discard) {
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
|
||||
blk_queue_max_discard_sectors(rq, get_capacity(gd));
|
||||
rq->limits.discard_granularity = info->discard_granularity;
|
||||
rq->limits.discard_alignment = info->discard_alignment;
|
||||
if (info->feature_secdiscard)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
|
||||
}
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
|
@ -1611,8 +1611,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|||
blkif_req(req)->error = BLK_STS_NOTSUPP;
|
||||
info->feature_discard = 0;
|
||||
info->feature_secdiscard = 0;
|
||||
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
||||
queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
|
||||
}
|
||||
break;
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
|
|
|
@ -1530,8 +1530,8 @@ static int zram_add(void)
|
|||
/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
|
||||
set_capacity(zram->disk, 0);
|
||||
/* zram devices sort of resembles non-rotational disks */
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
|
||||
|
||||
/*
|
||||
* To ensure that we always get PAGE_SIZE aligned
|
||||
|
@ -1544,7 +1544,7 @@ static int zram_add(void)
|
|||
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
|
||||
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
|
||||
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
|
||||
|
||||
/*
|
||||
* zram_bio_discard() will clear all logical blocks if logical block
|
||||
|
@ -1620,8 +1620,8 @@ static int zram_remove(struct zram *zram)
|
|||
|
||||
pr_info("Removed device: %s\n", zram->disk->disk_name);
|
||||
|
||||
blk_cleanup_queue(zram->disk->queue);
|
||||
del_gendisk(zram->disk);
|
||||
blk_cleanup_queue(zram->disk->queue);
|
||||
put_disk(zram->disk);
|
||||
kfree(zram);
|
||||
return 0;
|
||||
|
|
|
@ -37,7 +37,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
|
|||
|
||||
/*-- End of configurable params */
|
||||
|
||||
#define SECTOR_SHIFT 9
|
||||
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
|
||||
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
|
||||
#define ZRAM_LOGICAL_BLOCK_SHIFT 12
|
||||
|
|
|
@ -1152,9 +1152,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
|
|||
|
||||
cd_dbg(CD_OPEN, "entering cdrom_open\n");
|
||||
|
||||
/* open is event synchronization point, check events first */
|
||||
check_disk_change(bdev);
|
||||
|
||||
/* if this was a O_NONBLOCK open and we should honor the flags,
|
||||
* do a quick open without drive/disc integrity checks. */
|
||||
cdi->use_count++;
|
||||
|
|
|
@ -497,6 +497,9 @@ static const struct cdrom_device_ops gdrom_ops = {
|
|||
static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
check_disk_change(bdev);
|
||||
|
||||
mutex_lock(&gdrom_mutex);
|
||||
ret = cdrom_open(gd.cd_info, bdev, mode);
|
||||
mutex_unlock(&gdrom_mutex);
|
||||
|
|
|
@ -712,7 +712,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
|
|||
struct request_queue *q = drive->queue;
|
||||
int write = rq_data_dir(rq) == WRITE;
|
||||
unsigned short sectors_per_frame =
|
||||
queue_logical_block_size(q) >> SECTOR_BITS;
|
||||
queue_logical_block_size(q) >> SECTOR_SHIFT;
|
||||
|
||||
ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
|
||||
"secs_per_frame: %u",
|
||||
|
@ -919,7 +919,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
|
|||
* end up being bogus.
|
||||
*/
|
||||
blocklen = be32_to_cpu(capbuf.blocklen);
|
||||
blocklen = (blocklen >> SECTOR_BITS) << SECTOR_BITS;
|
||||
blocklen = (blocklen >> SECTOR_SHIFT) << SECTOR_SHIFT;
|
||||
switch (blocklen) {
|
||||
case 512:
|
||||
case 1024:
|
||||
|
@ -935,7 +935,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
|
|||
}
|
||||
|
||||
*capacity = 1 + be32_to_cpu(capbuf.lba);
|
||||
*sectors_per_frame = blocklen >> SECTOR_BITS;
|
||||
*sectors_per_frame = blocklen >> SECTOR_SHIFT;
|
||||
|
||||
ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu",
|
||||
*capacity, *sectors_per_frame);
|
||||
|
@ -1012,7 +1012,7 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense)
|
|||
drive->probed_capacity = toc->capacity * sectors_per_frame;
|
||||
|
||||
blk_queue_logical_block_size(drive->queue,
|
||||
sectors_per_frame << SECTOR_BITS);
|
||||
sectors_per_frame << SECTOR_SHIFT);
|
||||
|
||||
/* first read just the header, so we know how long the TOC is */
|
||||
stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
|
||||
|
@ -1613,6 +1613,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode)
|
|||
struct cdrom_info *info;
|
||||
int rc = -ENXIO;
|
||||
|
||||
check_disk_change(bdev);
|
||||
|
||||
mutex_lock(&ide_cd_mutex);
|
||||
info = ide_cd_get(bdev->bd_disk);
|
||||
if (!info)
|
||||
|
|
|
@ -21,11 +21,7 @@
|
|||
|
||||
/************************************************************************/
|
||||
|
||||
#define SECTOR_BITS 9
|
||||
#ifndef SECTOR_SIZE
|
||||
#define SECTOR_SIZE (1 << SECTOR_BITS)
|
||||
#endif
|
||||
#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS)
|
||||
#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_SHIFT)
|
||||
#define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32)
|
||||
|
||||
/* Capabilities Page size including 8 bytes of Mode Page Header */
|
||||
|
|
|
@ -687,8 +687,8 @@ static void ide_disk_setup(ide_drive_t *drive)
|
|||
queue_max_sectors(q) / 2);
|
||||
|
||||
if (ata_id_is_ssd(id)) {
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
}
|
||||
|
||||
/* calculate drive capacity, and select LBA if possible */
|
||||
|
|
|
@ -766,14 +766,14 @@ static int ide_init_queue(ide_drive_t *drive)
|
|||
* limits and LBA48 we could raise it but as yet
|
||||
* do not.
|
||||
*/
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
|
||||
if (!q)
|
||||
return 1;
|
||||
|
||||
q->request_fn = do_ide_request;
|
||||
q->initialize_rq_fn = ide_initialize_rq;
|
||||
q->cmd_size = sizeof(struct ide_request);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
|
||||
if (blk_init_allocated_queue(q) < 0) {
|
||||
blk_cleanup_queue(q);
|
||||
return 1;
|
||||
|
|
|
@ -36,13 +36,13 @@ static DECLARE_RWSEM(nvm_lock);
|
|||
/* Map between virtual and physical channel and lun */
|
||||
struct nvm_ch_map {
|
||||
int ch_off;
|
||||
int nr_luns;
|
||||
int num_lun;
|
||||
int *lun_offs;
|
||||
};
|
||||
|
||||
struct nvm_dev_map {
|
||||
struct nvm_ch_map *chnls;
|
||||
int nr_chnls;
|
||||
int num_ch;
|
||||
};
|
||||
|
||||
static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
|
||||
|
@ -114,15 +114,15 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear)
|
|||
struct nvm_dev_map *dev_map = tgt_dev->map;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < dev_map->nr_chnls; i++) {
|
||||
for (i = 0; i < dev_map->num_ch; i++) {
|
||||
struct nvm_ch_map *ch_map = &dev_map->chnls[i];
|
||||
int *lun_offs = ch_map->lun_offs;
|
||||
int ch = i + ch_map->ch_off;
|
||||
|
||||
if (clear) {
|
||||
for (j = 0; j < ch_map->nr_luns; j++) {
|
||||
for (j = 0; j < ch_map->num_lun; j++) {
|
||||
int lun = j + lun_offs[j];
|
||||
int lunid = (ch * dev->geo.nr_luns) + lun;
|
||||
int lunid = (ch * dev->geo.num_lun) + lun;
|
||||
|
||||
WARN_ON(!test_and_clear_bit(lunid,
|
||||
dev->lun_map));
|
||||
|
@ -147,47 +147,46 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
|
|||
struct nvm_dev_map *dev_rmap = dev->rmap;
|
||||
struct nvm_dev_map *dev_map;
|
||||
struct ppa_addr *luns;
|
||||
int nr_luns = lun_end - lun_begin + 1;
|
||||
int luns_left = nr_luns;
|
||||
int nr_chnls = nr_luns / dev->geo.nr_luns;
|
||||
int nr_chnls_mod = nr_luns % dev->geo.nr_luns;
|
||||
int bch = lun_begin / dev->geo.nr_luns;
|
||||
int blun = lun_begin % dev->geo.nr_luns;
|
||||
int num_lun = lun_end - lun_begin + 1;
|
||||
int luns_left = num_lun;
|
||||
int num_ch = num_lun / dev->geo.num_lun;
|
||||
int num_ch_mod = num_lun % dev->geo.num_lun;
|
||||
int bch = lun_begin / dev->geo.num_lun;
|
||||
int blun = lun_begin % dev->geo.num_lun;
|
||||
int lunid = 0;
|
||||
int lun_balanced = 1;
|
||||
int prev_nr_luns;
|
||||
int sec_per_lun, prev_num_lun;
|
||||
int i, j;
|
||||
|
||||
nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
|
||||
num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1;
|
||||
|
||||
dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
|
||||
if (!dev_map)
|
||||
goto err_dev;
|
||||
|
||||
dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map),
|
||||
GFP_KERNEL);
|
||||
dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL);
|
||||
if (!dev_map->chnls)
|
||||
goto err_chnls;
|
||||
|
||||
luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
|
||||
luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL);
|
||||
if (!luns)
|
||||
goto err_luns;
|
||||
|
||||
prev_nr_luns = (luns_left > dev->geo.nr_luns) ?
|
||||
dev->geo.nr_luns : luns_left;
|
||||
for (i = 0; i < nr_chnls; i++) {
|
||||
prev_num_lun = (luns_left > dev->geo.num_lun) ?
|
||||
dev->geo.num_lun : luns_left;
|
||||
for (i = 0; i < num_ch; i++) {
|
||||
struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
|
||||
int *lun_roffs = ch_rmap->lun_offs;
|
||||
struct nvm_ch_map *ch_map = &dev_map->chnls[i];
|
||||
int *lun_offs;
|
||||
int luns_in_chnl = (luns_left > dev->geo.nr_luns) ?
|
||||
dev->geo.nr_luns : luns_left;
|
||||
int luns_in_chnl = (luns_left > dev->geo.num_lun) ?
|
||||
dev->geo.num_lun : luns_left;
|
||||
|
||||
if (lun_balanced && prev_nr_luns != luns_in_chnl)
|
||||
if (lun_balanced && prev_num_lun != luns_in_chnl)
|
||||
lun_balanced = 0;
|
||||
|
||||
ch_map->ch_off = ch_rmap->ch_off = bch;
|
||||
ch_map->nr_luns = luns_in_chnl;
|
||||
ch_map->num_lun = luns_in_chnl;
|
||||
|
||||
lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
|
||||
if (!lun_offs)
|
||||
|
@ -195,8 +194,8 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
|
|||
|
||||
for (j = 0; j < luns_in_chnl; j++) {
|
||||
luns[lunid].ppa = 0;
|
||||
luns[lunid].g.ch = i;
|
||||
luns[lunid++].g.lun = j;
|
||||
luns[lunid].a.ch = i;
|
||||
luns[lunid++].a.lun = j;
|
||||
|
||||
lun_offs[j] = blun;
|
||||
lun_roffs[j + blun] = blun;
|
||||
|
@ -209,24 +208,29 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
|
|||
luns_left -= luns_in_chnl;
|
||||
}
|
||||
|
||||
dev_map->nr_chnls = nr_chnls;
|
||||
dev_map->num_ch = num_ch;
|
||||
|
||||
tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
|
||||
if (!tgt_dev)
|
||||
goto err_ch;
|
||||
|
||||
/* Inherit device geometry from parent */
|
||||
memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
|
||||
|
||||
/* Target device only owns a portion of the physical device */
|
||||
tgt_dev->geo.nr_chnls = nr_chnls;
|
||||
tgt_dev->geo.all_luns = nr_luns;
|
||||
tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1;
|
||||
tgt_dev->geo.num_ch = num_ch;
|
||||
tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1;
|
||||
tgt_dev->geo.all_luns = num_lun;
|
||||
tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk;
|
||||
|
||||
tgt_dev->geo.op = op;
|
||||
tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
|
||||
|
||||
sec_per_lun = dev->geo.clba * dev->geo.num_chk;
|
||||
tgt_dev->geo.total_secs = num_lun * sec_per_lun;
|
||||
|
||||
tgt_dev->q = dev->q;
|
||||
tgt_dev->map = dev_map;
|
||||
tgt_dev->luns = luns;
|
||||
memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
|
||||
|
||||
tgt_dev->parent = dev;
|
||||
|
||||
return tgt_dev;
|
||||
|
@ -296,24 +300,20 @@ static int __nvm_config_simple(struct nvm_dev *dev,
|
|||
static int __nvm_config_extended(struct nvm_dev *dev,
|
||||
struct nvm_ioctl_create_extended *e)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) {
|
||||
e->lun_begin = 0;
|
||||
e->lun_end = dev->geo.all_luns - 1;
|
||||
}
|
||||
|
||||
/* op not set falls into target's default */
|
||||
if (e->op == 0xFFFF)
|
||||
if (e->op == 0xFFFF) {
|
||||
e->op = NVM_TARGET_DEFAULT_OP;
|
||||
|
||||
if (e->op < NVM_TARGET_MIN_OP ||
|
||||
e->op > NVM_TARGET_MAX_OP) {
|
||||
} else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) {
|
||||
pr_err("nvm: invalid over provisioning value\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nvm_config_check_luns(geo, e->lun_begin, e->lun_end);
|
||||
return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end);
|
||||
}
|
||||
|
||||
static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
||||
|
@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
|||
goto err_dev;
|
||||
}
|
||||
|
||||
tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
|
||||
tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
|
||||
if (!tqueue) {
|
||||
ret = -ENOMEM;
|
||||
goto err_disk;
|
||||
|
@ -407,7 +407,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
|||
tdisk->private_data = targetdata;
|
||||
tqueue->queuedata = targetdata;
|
||||
|
||||
blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
|
||||
blk_queue_max_hw_sectors(tqueue,
|
||||
(dev->geo.csecs >> 9) * NVM_MAX_VLBA);
|
||||
|
||||
set_capacity(tdisk, tt->capacity(targetdata));
|
||||
add_disk(tdisk);
|
||||
|
@ -503,20 +504,20 @@ static int nvm_register_map(struct nvm_dev *dev)
|
|||
if (!rmap)
|
||||
goto err_rmap;
|
||||
|
||||
rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map),
|
||||
rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map),
|
||||
GFP_KERNEL);
|
||||
if (!rmap->chnls)
|
||||
goto err_chnls;
|
||||
|
||||
for (i = 0; i < dev->geo.nr_chnls; i++) {
|
||||
for (i = 0; i < dev->geo.num_ch; i++) {
|
||||
struct nvm_ch_map *ch_rmap;
|
||||
int *lun_roffs;
|
||||
int luns_in_chnl = dev->geo.nr_luns;
|
||||
int luns_in_chnl = dev->geo.num_lun;
|
||||
|
||||
ch_rmap = &rmap->chnls[i];
|
||||
|
||||
ch_rmap->ch_off = -1;
|
||||
ch_rmap->nr_luns = luns_in_chnl;
|
||||
ch_rmap->num_lun = luns_in_chnl;
|
||||
|
||||
lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
|
||||
if (!lun_roffs)
|
||||
|
@ -545,7 +546,7 @@ static void nvm_unregister_map(struct nvm_dev *dev)
|
|||
struct nvm_dev_map *rmap = dev->rmap;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dev->geo.nr_chnls; i++)
|
||||
for (i = 0; i < dev->geo.num_ch; i++)
|
||||
kfree(rmap->chnls[i].lun_offs);
|
||||
|
||||
kfree(rmap->chnls);
|
||||
|
@ -555,22 +556,22 @@ static void nvm_unregister_map(struct nvm_dev *dev)
|
|||
static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
|
||||
{
|
||||
struct nvm_dev_map *dev_map = tgt_dev->map;
|
||||
struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch];
|
||||
int lun_off = ch_map->lun_offs[p->g.lun];
|
||||
struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch];
|
||||
int lun_off = ch_map->lun_offs[p->a.lun];
|
||||
|
||||
p->g.ch += ch_map->ch_off;
|
||||
p->g.lun += lun_off;
|
||||
p->a.ch += ch_map->ch_off;
|
||||
p->a.lun += lun_off;
|
||||
}
|
||||
|
||||
static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
struct nvm_dev_map *dev_rmap = dev->rmap;
|
||||
struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
|
||||
int lun_roff = ch_rmap->lun_offs[p->g.lun];
|
||||
struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch];
|
||||
int lun_roff = ch_rmap->lun_offs[p->a.lun];
|
||||
|
||||
p->g.ch -= ch_rmap->ch_off;
|
||||
p->g.lun -= lun_roff;
|
||||
p->a.ch -= ch_rmap->ch_off;
|
||||
p->a.lun -= lun_roff;
|
||||
}
|
||||
|
||||
static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
|
||||
|
@ -580,7 +581,7 @@ static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
|
|||
|
||||
for (i = 0; i < nr_ppas; i++) {
|
||||
nvm_map_to_dev(tgt_dev, &ppa_list[i]);
|
||||
ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]);
|
||||
ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -590,7 +591,7 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
|
|||
int i;
|
||||
|
||||
for (i = 0; i < nr_ppas; i++) {
|
||||
ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]);
|
||||
ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]);
|
||||
nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
|
||||
}
|
||||
}
|
||||
|
@ -674,7 +675,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
|
|||
int i, plane_cnt, pl_idx;
|
||||
struct ppa_addr ppa;
|
||||
|
||||
if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
|
||||
if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
|
||||
rqd->nr_ppas = nr_ppas;
|
||||
rqd->ppa_addr = ppas[0];
|
||||
|
||||
|
@ -688,7 +689,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
plane_cnt = geo->plane_mode;
|
||||
plane_cnt = geo->pln_mode;
|
||||
rqd->nr_ppas *= plane_cnt;
|
||||
|
||||
for (i = 0; i < nr_ppas; i++) {
|
||||
|
@ -711,6 +712,17 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
|
|||
nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
|
||||
}
|
||||
|
||||
int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
|
||||
struct ppa_addr ppa, int nchks)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
|
||||
|
||||
return dev->ops->get_chk_meta(tgt_dev->parent, meta,
|
||||
(sector_t)ppa.ppa, nchks);
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_get_chunk_meta);
|
||||
|
||||
int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
|
||||
int nr_ppas, int type)
|
||||
|
@ -719,7 +731,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
|
|||
struct nvm_rq rqd;
|
||||
int ret;
|
||||
|
||||
if (nr_ppas > dev->ops->max_phys_sect) {
|
||||
if (nr_ppas > NVM_MAX_VLBA) {
|
||||
pr_err("nvm: unable to update all blocks atomically\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -740,14 +752,6 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
|
|||
}
|
||||
EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
|
||||
|
||||
int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
|
||||
return dev->ops->max_phys_sect;
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_max_phys_sects);
|
||||
|
||||
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
|
@ -814,15 +818,15 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
int blk, offset, pl, blktype;
|
||||
|
||||
if (nr_blks != geo->nr_chks * geo->plane_mode)
|
||||
if (nr_blks != geo->num_chk * geo->pln_mode)
|
||||
return -EINVAL;
|
||||
|
||||
for (blk = 0; blk < geo->nr_chks; blk++) {
|
||||
offset = blk * geo->plane_mode;
|
||||
for (blk = 0; blk < geo->num_chk; blk++) {
|
||||
offset = blk * geo->pln_mode;
|
||||
blktype = blks[offset];
|
||||
|
||||
/* Bad blocks on any planes take precedence over other types */
|
||||
for (pl = 0; pl < geo->plane_mode; pl++) {
|
||||
for (pl = 0; pl < geo->pln_mode; pl++) {
|
||||
if (blks[offset + pl] &
|
||||
(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
|
||||
blktype = blks[offset + pl];
|
||||
|
@ -833,7 +837,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
|
|||
blks[blk] = blktype;
|
||||
}
|
||||
|
||||
return geo->nr_chks;
|
||||
return geo->num_chk;
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_bb_tbl_fold);
|
||||
|
||||
|
@ -850,44 +854,9 @@ EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
|
|||
|
||||
static int nvm_core_init(struct nvm_dev *dev)
|
||||
{
|
||||
struct nvm_id *id = &dev->identity;
|
||||
struct nvm_id_group *grp = &id->grp;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int ret;
|
||||
|
||||
memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
|
||||
|
||||
if (grp->mtype != 0) {
|
||||
pr_err("nvm: memory type not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Whole device values */
|
||||
geo->nr_chnls = grp->num_ch;
|
||||
geo->nr_luns = grp->num_lun;
|
||||
|
||||
/* Generic device geometry values */
|
||||
geo->ws_min = grp->ws_min;
|
||||
geo->ws_opt = grp->ws_opt;
|
||||
geo->ws_seq = grp->ws_seq;
|
||||
geo->ws_per_chk = grp->ws_per_chk;
|
||||
geo->nr_chks = grp->num_chk;
|
||||
geo->sec_size = grp->csecs;
|
||||
geo->oob_size = grp->sos;
|
||||
geo->mccap = grp->mccap;
|
||||
geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size;
|
||||
|
||||
geo->sec_per_chk = grp->clba;
|
||||
geo->sec_per_lun = geo->sec_per_chk * geo->nr_chks;
|
||||
geo->all_luns = geo->nr_luns * geo->nr_chnls;
|
||||
|
||||
/* 1.2 spec device geometry values */
|
||||
geo->plane_mode = 1 << geo->ws_seq;
|
||||
geo->nr_planes = geo->ws_opt / geo->ws_min;
|
||||
geo->sec_per_pg = geo->ws_min;
|
||||
geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes;
|
||||
|
||||
dev->total_secs = geo->all_luns * geo->sec_per_lun;
|
||||
dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns),
|
||||
sizeof(unsigned long), GFP_KERNEL);
|
||||
if (!dev->lun_map)
|
||||
|
@ -902,7 +871,6 @@ static int nvm_core_init(struct nvm_dev *dev)
|
|||
if (ret)
|
||||
goto err_fmtype;
|
||||
|
||||
blk_queue_logical_block_size(dev->q, geo->sec_size);
|
||||
return 0;
|
||||
err_fmtype:
|
||||
kfree(dev->lun_map);
|
||||
|
@ -927,18 +895,14 @@ static int nvm_init(struct nvm_dev *dev)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (dev->ops->identity(dev, &dev->identity)) {
|
||||
if (dev->ops->identity(dev)) {
|
||||
pr_err("nvm: device could not be identified\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
pr_debug("nvm: ver:%x nvm_vendor:%x\n",
|
||||
dev->identity.ver_id, dev->identity.vmnt);
|
||||
|
||||
if (dev->identity.ver_id != 1) {
|
||||
pr_err("nvm: device not supported by kernel.");
|
||||
goto err;
|
||||
}
|
||||
pr_debug("nvm: ver:%u.%u nvm_vendor:%x\n",
|
||||
geo->major_ver_id, geo->minor_ver_id,
|
||||
geo->vmnt);
|
||||
|
||||
ret = nvm_core_init(dev);
|
||||
if (ret) {
|
||||
|
@ -946,10 +910,10 @@ static int nvm_init(struct nvm_dev *dev)
|
|||
goto err;
|
||||
}
|
||||
|
||||
pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
|
||||
dev->name, geo->sec_per_pg, geo->nr_planes,
|
||||
geo->ws_per_chk, geo->nr_chks,
|
||||
geo->all_luns, geo->nr_chnls);
|
||||
pr_info("nvm: registered %s [%u/%u/%u/%u/%u]\n",
|
||||
dev->name, dev->geo.ws_min, dev->geo.ws_opt,
|
||||
dev->geo.num_chk, dev->geo.all_luns,
|
||||
dev->geo.num_ch);
|
||||
return 0;
|
||||
err:
|
||||
pr_err("nvm: failed to initialize nvm\n");
|
||||
|
@ -969,17 +933,10 @@ int nvm_register(struct nvm_dev *dev)
|
|||
if (!dev->q || !dev->ops)
|
||||
return -EINVAL;
|
||||
|
||||
if (dev->ops->max_phys_sect > 256) {
|
||||
pr_info("nvm: max sectors supported is 256.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (dev->ops->max_phys_sect > 1) {
|
||||
dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
|
||||
if (!dev->dma_pool) {
|
||||
pr_err("nvm: could not create dma pool\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
|
||||
if (!dev->dma_pool) {
|
||||
pr_err("nvm: could not create dma pool\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = nvm_init(dev);
|
||||
|
@ -1040,9 +997,6 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
|
|||
struct nvm_tgt_type *tt;
|
||||
int tgt_iter = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
info = memdup_user(arg, sizeof(struct nvm_ioctl_info));
|
||||
if (IS_ERR(info))
|
||||
return -EFAULT;
|
||||
|
@ -1081,9 +1035,6 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
|
|||
struct nvm_dev *dev;
|
||||
int i = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL);
|
||||
if (!devices)
|
||||
return -ENOMEM;
|
||||
|
@ -1124,9 +1075,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
|
|||
{
|
||||
struct nvm_ioctl_create create;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -1162,9 +1110,6 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
|
|||
struct nvm_dev *dev;
|
||||
int ret = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -1189,9 +1134,6 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
|
|||
{
|
||||
struct nvm_ioctl_dev_init init;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -1208,9 +1150,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
|
|||
{
|
||||
struct nvm_ioctl_dev_factory fact;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -1226,6 +1165,9 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
|
|||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
switch (cmd) {
|
||||
case NVM_INFO:
|
||||
return nvm_ioctl_info(file, argp);
|
||||
|
|
|
@ -63,6 +63,8 @@ retry:
|
|||
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
|
||||
}
|
||||
|
||||
atomic64_add(nr_entries, &pblk->user_wa);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_add(nr_entries, &pblk->inflight_writes);
|
||||
atomic_long_add(nr_entries, &pblk->req_writes);
|
||||
|
@ -117,6 +119,8 @@ retry:
|
|||
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
|
||||
"pblk: inconsistent GC write\n");
|
||||
|
||||
atomic64_add(valid_entries, &pblk->gc_wa);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_add(valid_entries, &pblk->inflight_writes);
|
||||
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
|
||||
|
|
|
@ -44,11 +44,12 @@ static void pblk_line_mark_bb(struct work_struct *work)
|
|||
}
|
||||
|
||||
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
|
||||
struct ppa_addr *ppa)
|
||||
struct ppa_addr ppa_addr)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int pos = pblk_ppa_to_pos(geo, *ppa);
|
||||
struct ppa_addr *ppa;
|
||||
int pos = pblk_ppa_to_pos(geo, ppa_addr);
|
||||
|
||||
pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
|
||||
atomic_long_inc(&pblk->erase_failed);
|
||||
|
@ -58,26 +59,38 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
|
||||
line->id, pos);
|
||||
|
||||
/* Not necessary to mark bad blocks on 2.0 spec. */
|
||||
if (geo->version == NVM_OCSSD_SPEC_20)
|
||||
return;
|
||||
|
||||
ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
|
||||
if (!ppa)
|
||||
return;
|
||||
|
||||
*ppa = ppa_addr;
|
||||
pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb,
|
||||
GFP_ATOMIC, pblk->bb_wq);
|
||||
}
|
||||
|
||||
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct nvm_chk_meta *chunk;
|
||||
struct pblk_line *line;
|
||||
int pos;
|
||||
|
||||
line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
|
||||
pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
|
||||
chunk = &line->chks[pos];
|
||||
|
||||
atomic_dec(&line->left_seblks);
|
||||
|
||||
if (rqd->error) {
|
||||
struct ppa_addr *ppa;
|
||||
|
||||
ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
|
||||
if (!ppa)
|
||||
return;
|
||||
|
||||
*ppa = rqd->ppa_addr;
|
||||
pblk_mark_bb(pblk, line, ppa);
|
||||
chunk->state = NVM_CHK_ST_OFFLINE;
|
||||
pblk_mark_bb(pblk, line, rqd->ppa_addr);
|
||||
} else {
|
||||
chunk->state = NVM_CHK_ST_FREE;
|
||||
}
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
@ -92,6 +105,49 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
|
|||
mempool_free(rqd, pblk->e_rq_pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get information for all chunks from the device.
|
||||
*
|
||||
* The caller is responsible for freeing the returned structure
|
||||
*/
|
||||
struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct nvm_chk_meta *meta;
|
||||
struct ppa_addr ppa;
|
||||
unsigned long len;
|
||||
int ret;
|
||||
|
||||
ppa.ppa = 0;
|
||||
|
||||
len = geo->all_chunks * sizeof(*meta);
|
||||
meta = kzalloc(len, GFP_KERNEL);
|
||||
if (!meta)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks);
|
||||
if (ret) {
|
||||
kfree(meta);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
|
||||
struct nvm_chk_meta *meta,
|
||||
struct ppa_addr ppa)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun;
|
||||
int lun_off = ppa.m.pu * geo->num_chk;
|
||||
int chk_off = ppa.m.chk;
|
||||
|
||||
return meta + ch_off + lun_off + chk_off;
|
||||
}
|
||||
|
||||
void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
|
||||
u64 paddr)
|
||||
{
|
||||
|
@ -613,7 +669,7 @@ next_rq:
|
|||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
|
||||
bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
|
||||
l_mg->emeta_alloc_type, GFP_KERNEL);
|
||||
|
@ -722,7 +778,7 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
|
|||
if (bit >= lm->blk_per_line)
|
||||
return -1;
|
||||
|
||||
return bit * geo->sec_per_pl;
|
||||
return bit * geo->ws_opt;
|
||||
}
|
||||
|
||||
static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
|
||||
|
@ -885,7 +941,7 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
|
|||
}
|
||||
|
||||
ppa = pblk->luns[bit].bppa; /* set ch and lun */
|
||||
ppa.g.blk = line->id;
|
||||
ppa.a.blk = line->id;
|
||||
|
||||
atomic_dec(&line->left_eblks);
|
||||
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
|
||||
|
@ -975,7 +1031,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
|
|||
memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
|
||||
smeta_buf->header.id = cpu_to_le32(line->id);
|
||||
smeta_buf->header.type = cpu_to_le16(line->type);
|
||||
smeta_buf->header.version = SMETA_VERSION;
|
||||
smeta_buf->header.version_major = SMETA_VERSION_MAJOR;
|
||||
smeta_buf->header.version_minor = SMETA_VERSION_MINOR;
|
||||
|
||||
/* Start metadata */
|
||||
smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
|
||||
|
@ -998,6 +1055,12 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
|
|||
/* End metadata */
|
||||
memcpy(&emeta_buf->header, &smeta_buf->header,
|
||||
sizeof(struct line_header));
|
||||
|
||||
emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
|
||||
emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
|
||||
emeta_buf->header.crc = cpu_to_le32(
|
||||
pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
|
||||
|
||||
emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
|
||||
emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
|
||||
emeta_buf->nr_valid_lbas = cpu_to_le64(0);
|
||||
|
@ -1018,28 +1081,26 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
int nr_bb = 0;
|
||||
u64 off;
|
||||
int bit = -1;
|
||||
int emeta_secs;
|
||||
|
||||
line->sec_in_line = lm->sec_per_line;
|
||||
|
||||
/* Capture bad block information on line mapping bitmaps */
|
||||
while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
|
||||
bit + 1)) < lm->blk_per_line) {
|
||||
off = bit * geo->sec_per_pl;
|
||||
off = bit * geo->ws_opt;
|
||||
bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
|
||||
lm->sec_per_line);
|
||||
bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
|
||||
lm->sec_per_line);
|
||||
line->sec_in_line -= geo->sec_per_chk;
|
||||
if (bit >= lm->emeta_bb)
|
||||
nr_bb++;
|
||||
line->sec_in_line -= geo->clba;
|
||||
}
|
||||
|
||||
/* Mark smeta metadata sectors as bad sectors */
|
||||
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
|
||||
off = bit * geo->sec_per_pl;
|
||||
off = bit * geo->ws_opt;
|
||||
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
|
||||
line->sec_in_line -= lm->smeta_sec;
|
||||
line->smeta_ssec = off;
|
||||
|
@ -1055,18 +1116,18 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
/* Mark emeta metadata sectors as bad sectors. We need to consider bad
|
||||
* blocks to make sure that there are enough sectors to store emeta
|
||||
*/
|
||||
off = lm->sec_per_line - lm->emeta_sec[0];
|
||||
bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
|
||||
while (nr_bb) {
|
||||
off -= geo->sec_per_pl;
|
||||
emeta_secs = lm->emeta_sec[0];
|
||||
off = lm->sec_per_line;
|
||||
while (emeta_secs) {
|
||||
off -= geo->ws_opt;
|
||||
if (!test_bit(off, line->invalid_bitmap)) {
|
||||
bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
|
||||
nr_bb--;
|
||||
bitmap_set(line->invalid_bitmap, off, geo->ws_opt);
|
||||
emeta_secs -= geo->ws_opt;
|
||||
}
|
||||
}
|
||||
|
||||
line->sec_in_line -= lm->emeta_sec[0];
|
||||
line->emeta_ssec = off;
|
||||
line->sec_in_line -= lm->emeta_sec[0];
|
||||
line->nr_valid_lbas = 0;
|
||||
line->left_msecs = line->sec_in_line;
|
||||
*line->vsc = cpu_to_le32(line->sec_in_line);
|
||||
|
@ -1086,10 +1147,34 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int blk_to_erase = atomic_read(&line->blk_in_line);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
struct pblk_lun *rlun = &pblk->luns[i];
|
||||
int pos = pblk_ppa_to_pos(geo, rlun->bppa);
|
||||
int state = line->chks[pos].state;
|
||||
|
||||
/* Free chunks should not be erased */
|
||||
if (state & NVM_CHK_ST_FREE) {
|
||||
set_bit(pblk_ppa_to_pos(geo, rlun->bppa),
|
||||
line->erase_bitmap);
|
||||
blk_to_erase--;
|
||||
}
|
||||
}
|
||||
|
||||
return blk_to_erase;
|
||||
}
|
||||
|
||||
static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int blk_in_line = atomic_read(&line->blk_in_line);
|
||||
int blk_to_erase;
|
||||
|
||||
line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
|
||||
if (!line->map_bitmap)
|
||||
|
@ -1102,7 +1187,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Bad blocks do not need to be erased */
|
||||
bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
|
||||
|
||||
spin_lock(&line->lock);
|
||||
|
||||
/* If we have not written to this line, we need to mark up free chunks
|
||||
* as already erased
|
||||
*/
|
||||
if (line->state == PBLK_LINESTATE_NEW) {
|
||||
blk_to_erase = pblk_prepare_new_line(pblk, line);
|
||||
line->state = PBLK_LINESTATE_FREE;
|
||||
} else {
|
||||
blk_to_erase = atomic_read(&line->blk_in_line);
|
||||
}
|
||||
|
||||
if (line->state != PBLK_LINESTATE_FREE) {
|
||||
kfree(line->map_bitmap);
|
||||
kfree(line->invalid_bitmap);
|
||||
|
@ -1114,15 +1213,12 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
|||
|
||||
line->state = PBLK_LINESTATE_OPEN;
|
||||
|
||||
atomic_set(&line->left_eblks, blk_in_line);
|
||||
atomic_set(&line->left_seblks, blk_in_line);
|
||||
atomic_set(&line->left_eblks, blk_to_erase);
|
||||
atomic_set(&line->left_seblks, blk_to_erase);
|
||||
|
||||
line->meta_distance = lm->meta_distance;
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
/* Bad blocks do not need to be erased */
|
||||
bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
|
||||
|
||||
kref_init(&line->ref);
|
||||
|
||||
return 0;
|
||||
|
@ -1399,13 +1495,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
|
|||
l_mg->data_line = new;
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
if (pblk->state != PBLK_STATE_RUNNING) {
|
||||
l_mg->data_line = NULL;
|
||||
l_mg->data_next = NULL;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pblk_line_setup_metadata(new, l_mg, &pblk->lm);
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
|
@ -1585,12 +1674,14 @@ static void pblk_line_should_sync_meta(struct pblk *pblk)
|
|||
|
||||
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct list_head *move_list;
|
||||
int i;
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
|
||||
WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
|
||||
"pblk: corrupt closed line %d\n", line->id);
|
||||
#endif
|
||||
|
@ -1612,6 +1703,15 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
|
|||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
struct pblk_lun *rlun = &pblk->luns[i];
|
||||
int pos = pblk_ppa_to_pos(geo, rlun->bppa);
|
||||
int state = line->chks[pos].state;
|
||||
|
||||
if (!(state & NVM_CHK_ST_OFFLINE))
|
||||
state = NVM_CHK_ST_CLOSED;
|
||||
}
|
||||
|
||||
spin_unlock(&line->lock);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
}
|
||||
|
@ -1622,11 +1722,16 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
|
|||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_emeta *emeta = line->emeta;
|
||||
struct line_emeta *emeta_buf = emeta->buf;
|
||||
struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
|
||||
|
||||
/* No need for exact vsc value; avoid a big line lock and take aprox. */
|
||||
memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
|
||||
memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
|
||||
|
||||
wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
|
||||
wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
|
||||
wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
|
||||
|
||||
emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
|
||||
emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
|
||||
|
||||
|
@ -1680,8 +1785,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
|
|||
int i;
|
||||
|
||||
for (i = 1; i < nr_ppas; i++)
|
||||
WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
|
||||
ppa_list[0].g.ch != ppa_list[i].g.ch);
|
||||
WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
|
||||
ppa_list[0].a.ch != ppa_list[i].a.ch);
|
||||
#endif
|
||||
|
||||
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
|
||||
|
@ -1725,8 +1830,8 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
|
|||
int i;
|
||||
|
||||
for (i = 1; i < nr_ppas; i++)
|
||||
WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
|
||||
ppa_list[0].g.ch != ppa_list[i].g.ch);
|
||||
WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
|
||||
ppa_list[0].a.ch != ppa_list[i].a.ch);
|
||||
#endif
|
||||
|
||||
rlun = &pblk->luns[pos];
|
||||
|
@ -1739,10 +1844,10 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
|||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_lun *rlun;
|
||||
int nr_luns = geo->all_luns;
|
||||
int num_lun = geo->all_luns;
|
||||
int bit = -1;
|
||||
|
||||
while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
|
||||
while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) {
|
||||
rlun = &pblk->luns[bit];
|
||||
up(&rlun->wr_sem);
|
||||
}
|
||||
|
@ -1829,6 +1934,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
|
|||
#endif
|
||||
/* Invalidate and discard padded entries */
|
||||
if (lba == ADDR_EMPTY) {
|
||||
atomic64_inc(&pblk->pad_wa);
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_inc(&pblk->padded_wb);
|
||||
#endif
|
||||
|
|
|
@ -88,7 +88,7 @@ static void pblk_gc_line_ws(struct work_struct *work)
|
|||
|
||||
up(&gc->gc_sem);
|
||||
|
||||
gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size);
|
||||
gc_rq->data = vmalloc(gc_rq->nr_secs * geo->csecs);
|
||||
if (!gc_rq->data) {
|
||||
pr_err("pblk: could not GC line:%d (%d/%d)\n",
|
||||
line->id, *line->vsc, gc_rq->nr_secs);
|
||||
|
@ -147,10 +147,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
|
|||
int ret;
|
||||
|
||||
invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
if (!invalid_bitmap) {
|
||||
pr_err("pblk: could not allocate GC invalid bitmap\n");
|
||||
if (!invalid_bitmap)
|
||||
goto fail_free_ws;
|
||||
}
|
||||
|
||||
emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
|
||||
GFP_KERNEL);
|
||||
|
@ -666,12 +664,10 @@ void pblk_gc_exit(struct pblk *pblk)
|
|||
kthread_stop(gc->gc_reader_ts);
|
||||
|
||||
flush_workqueue(gc->gc_reader_wq);
|
||||
if (gc->gc_reader_wq)
|
||||
destroy_workqueue(gc->gc_reader_wq);
|
||||
destroy_workqueue(gc->gc_reader_wq);
|
||||
|
||||
flush_workqueue(gc->gc_line_reader_wq);
|
||||
if (gc->gc_line_reader_wq)
|
||||
destroy_workqueue(gc->gc_line_reader_wq);
|
||||
destroy_workqueue(gc->gc_line_reader_wq);
|
||||
|
||||
if (gc->gc_writer_ts)
|
||||
kthread_stop(gc->gc_writer_ts);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -65,6 +65,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
|
|||
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
|
||||
if (lba_list[paddr] != addr_empty)
|
||||
line->nr_valid_lbas++;
|
||||
else
|
||||
atomic64_inc(&pblk->pad_wa);
|
||||
} else {
|
||||
lba_list[paddr] = meta_list[i].lba = addr_empty;
|
||||
__pblk_map_invalidate(pblk, line, paddr);
|
||||
|
@ -125,7 +127,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
atomic_dec(&e_line->left_eblks);
|
||||
|
||||
*erase_ppa = rqd->ppa_list[i];
|
||||
erase_ppa->g.blk = e_line->id;
|
||||
erase_ppa->a.blk = e_line->id;
|
||||
|
||||
spin_unlock(&e_line->lock);
|
||||
|
||||
|
@ -166,6 +168,6 @@ retry:
|
|||
set_bit(bit, e_line->erase_bitmap);
|
||||
atomic_dec(&e_line->left_eblks);
|
||||
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
|
||||
erase_ppa->g.blk = e_line->id;
|
||||
erase_ppa->a.blk = e_line->id;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -355,10 +355,13 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
|
|||
struct pblk_rb_entry *entry;
|
||||
unsigned int sync, flush_point;
|
||||
|
||||
pblk_rb_sync_init(rb, NULL);
|
||||
sync = READ_ONCE(rb->sync);
|
||||
|
||||
if (pos == sync)
|
||||
if (pos == sync) {
|
||||
pblk_rb_sync_end(rb, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_inc(&rb->inflight_flush_point);
|
||||
|
@ -367,8 +370,6 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
|
|||
flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
|
||||
entry = &rb->entries[flush_point];
|
||||
|
||||
pblk_rb_sync_init(rb, NULL);
|
||||
|
||||
/* Protect flush points */
|
||||
smp_store_release(&rb->flush_point, flush_point);
|
||||
|
||||
|
@ -437,9 +438,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
|
|||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
struct pblk *pblk = container_of(rb, struct pblk, rwb);
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_inc(&pblk->nr_flush);
|
||||
#endif
|
||||
atomic64_inc(&pblk->nr_flush);
|
||||
if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
|
||||
*io_ret = NVM_IO_OK;
|
||||
}
|
||||
|
@ -620,11 +619,17 @@ try:
|
|||
pr_err("pblk: could not pad page in write bio\n");
|
||||
return NVM_IO_ERR;
|
||||
}
|
||||
|
||||
if (pad < pblk->min_write_pgs)
|
||||
atomic64_inc(&pblk->pad_dist[pad - 1]);
|
||||
else
|
||||
pr_warn("pblk: padding more than min. sectors\n");
|
||||
|
||||
atomic64_add(pad, &pblk->pad_wa);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
atomic_long_add(pad, &((struct pblk *)
|
||||
(container_of(rb, struct pblk, rwb)))->padded_writes);
|
||||
atomic_long_add(pad, &pblk->padded_writes);
|
||||
#endif
|
||||
|
||||
return NVM_IO_OK;
|
||||
|
|
|
@ -563,7 +563,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
|
|||
if (!(gc_rq->secs_to_gc))
|
||||
goto out;
|
||||
|
||||
data_len = (gc_rq->secs_to_gc) * geo->sec_size;
|
||||
data_len = (gc_rq->secs_to_gc) * geo->csecs;
|
||||
bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len,
|
||||
PBLK_VMALLOC_META, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
|
|
|
@ -21,17 +21,15 @@ void pblk_submit_rec(struct work_struct *work)
|
|||
struct pblk_rec_ctx *recovery =
|
||||
container_of(work, struct pblk_rec_ctx, ws_rec);
|
||||
struct pblk *pblk = recovery->pblk;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_rq *rqd = recovery->rqd;
|
||||
struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
|
||||
int max_secs = nvm_max_phys_sects(dev);
|
||||
struct bio *bio;
|
||||
unsigned int nr_rec_secs;
|
||||
unsigned int pgs_read;
|
||||
int ret;
|
||||
|
||||
nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
|
||||
max_secs);
|
||||
NVM_MAX_VLBA);
|
||||
|
||||
bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
|
||||
|
||||
|
@ -74,8 +72,6 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
|
|||
struct pblk_rec_ctx *recovery, u64 *comp_bits,
|
||||
unsigned int comp)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
int max_secs = nvm_max_phys_sects(dev);
|
||||
struct nvm_rq *rec_rqd;
|
||||
struct pblk_c_ctx *rec_ctx;
|
||||
int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
|
||||
|
@ -86,7 +82,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
|
|||
/* Copy completion bitmap, but exclude the first X completed entries */
|
||||
bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
|
||||
(unsigned long int *)comp_bits,
|
||||
comp, max_secs);
|
||||
comp, NVM_MAX_VLBA);
|
||||
|
||||
/* Save the context for the entries that need to be re-written and
|
||||
* update current context with the completed entries.
|
||||
|
@ -188,7 +184,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
|
|||
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
|
||||
|
||||
return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
|
||||
nr_bb * geo->sec_per_chk;
|
||||
nr_bb * geo->clba;
|
||||
}
|
||||
|
||||
struct pblk_recov_alloc {
|
||||
|
@ -236,7 +232,7 @@ next_read_rq:
|
|||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
|
||||
if (!rq_ppas)
|
||||
rq_ppas = pblk->min_write_pgs;
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
|
||||
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio))
|
||||
|
@ -355,7 +351,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
|
|||
if (!pad_rq)
|
||||
return -ENOMEM;
|
||||
|
||||
data = vzalloc(pblk->max_write_pgs * geo->sec_size);
|
||||
data = vzalloc(pblk->max_write_pgs * geo->csecs);
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto free_rq;
|
||||
|
@ -372,7 +368,7 @@ next_pad_rq:
|
|||
goto fail_free_pad;
|
||||
}
|
||||
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
|
||||
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
|
||||
if (!meta_list) {
|
||||
|
@ -513,7 +509,7 @@ next_rq:
|
|||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
|
||||
if (!rq_ppas)
|
||||
rq_ppas = pblk->min_write_pgs;
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
|
||||
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio))
|
||||
|
@ -644,7 +640,7 @@ next_rq:
|
|||
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
|
||||
if (!rq_ppas)
|
||||
rq_ppas = pblk->min_write_pgs;
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
|
||||
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio))
|
||||
|
@ -749,7 +745,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
|
|||
ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
|
||||
dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
|
||||
|
||||
data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
|
||||
data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
|
||||
if (!data) {
|
||||
ret = -ENOMEM;
|
||||
goto free_meta_list;
|
||||
|
@ -826,6 +822,63 @@ static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line)
|
|||
return emeta_start;
|
||||
}
|
||||
|
||||
static int pblk_recov_check_line_version(struct pblk *pblk,
|
||||
struct line_emeta *emeta)
|
||||
{
|
||||
struct line_header *header = &emeta->header;
|
||||
|
||||
if (header->version_major != EMETA_VERSION_MAJOR) {
|
||||
pr_err("pblk: line major version mismatch: %d, expected: %d\n",
|
||||
header->version_major, EMETA_VERSION_MAJOR);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef NVM_DEBUG
|
||||
if (header->version_minor > EMETA_VERSION_MINOR)
|
||||
pr_info("pblk: newer line minor version found: %d\n", line_v);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pblk_recov_wa_counters(struct pblk *pblk,
|
||||
struct line_emeta *emeta)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct line_header *header = &emeta->header;
|
||||
struct wa_counters *wa = emeta_to_wa(lm, emeta);
|
||||
|
||||
/* WA counters were introduced in emeta version 0.2 */
|
||||
if (header->version_major > 0 || header->version_minor >= 2) {
|
||||
u64 user = le64_to_cpu(wa->user);
|
||||
u64 pad = le64_to_cpu(wa->pad);
|
||||
u64 gc = le64_to_cpu(wa->gc);
|
||||
|
||||
atomic64_set(&pblk->user_wa, user);
|
||||
atomic64_set(&pblk->pad_wa, pad);
|
||||
atomic64_set(&pblk->gc_wa, gc);
|
||||
|
||||
pblk->user_rst_wa = user;
|
||||
pblk->pad_rst_wa = pad;
|
||||
pblk->gc_rst_wa = gc;
|
||||
}
|
||||
}
|
||||
|
||||
static int pblk_line_was_written(struct pblk_line *line,
|
||||
struct pblk_line_meta *lm)
|
||||
{
|
||||
|
||||
int i;
|
||||
int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
if (!(line->chks[i].state & state_mask))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
|
@ -862,6 +915,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
|||
line->lun_bitmap = ((void *)(smeta_buf)) +
|
||||
sizeof(struct line_smeta);
|
||||
|
||||
if (!pblk_line_was_written(line, lm))
|
||||
continue;
|
||||
|
||||
/* Lines that cannot be read are assumed as not written here */
|
||||
if (pblk_line_read_smeta(pblk, line))
|
||||
continue;
|
||||
|
@ -873,9 +929,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
|||
if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
|
||||
continue;
|
||||
|
||||
if (smeta_buf->header.version != SMETA_VERSION) {
|
||||
if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) {
|
||||
pr_err("pblk: found incompatible line version %u\n",
|
||||
le16_to_cpu(smeta_buf->header.version));
|
||||
smeta_buf->header.version_major);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
|
@ -943,6 +999,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
|
|||
goto next;
|
||||
}
|
||||
|
||||
if (pblk_recov_check_line_version(pblk, line->emeta->buf))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
pblk_recov_wa_counters(pblk, line->emeta->buf);
|
||||
|
||||
if (pblk_recov_l2p_from_emeta(pblk, line))
|
||||
pblk_recov_l2p_from_oob(pblk, line);
|
||||
|
||||
|
|
|
@ -200,7 +200,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
|
|||
|
||||
/* Consider sectors used for metadata */
|
||||
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
|
||||
blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
|
||||
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
|
||||
|
||||
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
|
||||
rl->high_pw = get_count_order(rl->high);
|
||||
|
|
|
@ -39,8 +39,8 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
|
|||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
|
||||
i,
|
||||
rlun->bppa.g.ch,
|
||||
rlun->bppa.g.lun,
|
||||
rlun->bppa.a.ch,
|
||||
rlun->bppa.a.lun,
|
||||
active);
|
||||
}
|
||||
|
||||
|
@ -115,24 +115,47 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
ssize_t sz = 0;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE - sz,
|
||||
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
pblk->ppaf_bitsize,
|
||||
pblk->ppaf.blk_offset, geo->ppaf.blk_len,
|
||||
pblk->ppaf.pg_offset, geo->ppaf.pg_len,
|
||||
pblk->ppaf.lun_offset, geo->ppaf.lun_len,
|
||||
pblk->ppaf.ch_offset, geo->ppaf.ch_len,
|
||||
pblk->ppaf.pln_offset, geo->ppaf.pln_len,
|
||||
pblk->ppaf.sec_offset, geo->ppaf.sect_len);
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
|
||||
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
geo->ppaf.blk_offset, geo->ppaf.blk_len,
|
||||
geo->ppaf.pg_offset, geo->ppaf.pg_len,
|
||||
geo->ppaf.lun_offset, geo->ppaf.lun_len,
|
||||
geo->ppaf.ch_offset, geo->ppaf.ch_len,
|
||||
geo->ppaf.pln_offset, geo->ppaf.pln_len,
|
||||
geo->ppaf.sect_offset, geo->ppaf.sect_len);
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->blk_offset, ppaf->blk_len,
|
||||
ppaf->pg_offset, ppaf->pg_len,
|
||||
ppaf->lun_offset, ppaf->lun_len,
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
ppaf->pln_offset, ppaf->pln_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
gppaf->blk_offset, gppaf->blk_len,
|
||||
gppaf->pg_offset, gppaf->pg_len,
|
||||
gppaf->lun_offset, gppaf->lun_len,
|
||||
gppaf->ch_offset, gppaf->ch_len,
|
||||
gppaf->pln_offset, gppaf->pln_len,
|
||||
gppaf->sec_offset, gppaf->sec_len);
|
||||
} else {
|
||||
struct nvm_addrf *ppaf = &pblk->addrf;
|
||||
struct nvm_addrf *gppaf = &geo->addrf;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
ppaf->lun_offset, ppaf->lun_len,
|
||||
ppaf->chk_offset, ppaf->chk_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
|
||||
gppaf->ch_offset, gppaf->ch_len,
|
||||
gppaf->lun_offset, gppaf->lun_len,
|
||||
gppaf->chk_offset, gppaf->chk_len,
|
||||
gppaf->sec_offset, gppaf->sec_len);
|
||||
}
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
@ -288,7 +311,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
|
|||
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
|
||||
lm->blk_per_line,
|
||||
lm->sec_per_line,
|
||||
geo->sec_per_chk);
|
||||
geo->clba);
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
@ -298,15 +321,104 @@ static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
|
|||
return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
|
||||
}
|
||||
|
||||
static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
|
||||
char *page)
|
||||
{
|
||||
int sz;
|
||||
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
"user:%lld gc:%lld pad:%lld WA:",
|
||||
user, gc, pad);
|
||||
|
||||
if (!user) {
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
|
||||
} else {
|
||||
u64 wa_int;
|
||||
u32 wa_frac;
|
||||
|
||||
wa_int = (user + gc + pad) * 100000;
|
||||
wa_int = div_u64(wa_int, user);
|
||||
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
|
||||
wa_int, wa_frac);
|
||||
}
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
|
||||
{
|
||||
return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
|
||||
atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
|
||||
page);
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
|
||||
{
|
||||
return pblk_get_write_amp(
|
||||
atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
|
||||
atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
|
||||
atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
|
||||
}
|
||||
|
||||
static long long bucket_percentage(unsigned long long bucket,
|
||||
unsigned long long total)
|
||||
{
|
||||
int p = bucket * 100;
|
||||
|
||||
p = div_u64(p, total);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
|
||||
{
|
||||
int sz = 0;
|
||||
unsigned long long total;
|
||||
unsigned long long total_buckets = 0;
|
||||
int buckets = pblk->min_write_pgs - 1;
|
||||
int i;
|
||||
|
||||
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
|
||||
if (!total) {
|
||||
for (i = 0; i < (buckets + 1); i++)
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
"%d:0 ", i);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
for (i = 0; i < buckets; i++)
|
||||
total_buckets += atomic64_read(&pblk->pad_dist[i]);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
|
||||
bucket_percentage(total - total_buckets, total));
|
||||
|
||||
for (i = 0; i < buckets; i++) {
|
||||
unsigned long long p;
|
||||
|
||||
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
|
||||
total);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
|
||||
i + 1, p);
|
||||
}
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
|
||||
"%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
|
||||
atomic_long_read(&pblk->inflight_writes),
|
||||
atomic_long_read(&pblk->inflight_reads),
|
||||
atomic_long_read(&pblk->req_writes),
|
||||
atomic_long_read(&pblk->nr_flush),
|
||||
(u64)atomic64_read(&pblk->nr_flush),
|
||||
atomic_long_read(&pblk->padded_writes),
|
||||
atomic_long_read(&pblk->padded_wb),
|
||||
atomic_long_read(&pblk->sub_writes),
|
||||
|
@ -360,6 +472,56 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
|
|||
return len;
|
||||
}
|
||||
|
||||
static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
|
||||
const char *page, size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int reset_value;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &reset_value))
|
||||
return -EINVAL;
|
||||
|
||||
if (reset_value != 0)
|
||||
return -EINVAL;
|
||||
|
||||
pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
|
||||
pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
|
||||
pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
|
||||
const char *page, size_t len)
|
||||
{
|
||||
size_t c_len;
|
||||
int reset_value;
|
||||
int buckets = pblk->min_write_pgs - 1;
|
||||
int i;
|
||||
|
||||
c_len = strcspn(page, "\n");
|
||||
if (c_len >= len)
|
||||
return -EINVAL;
|
||||
|
||||
if (kstrtouint(page, 0, &reset_value))
|
||||
return -EINVAL;
|
||||
|
||||
if (reset_value != 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < buckets; i++)
|
||||
atomic64_set(&pblk->pad_dist[i], 0);
|
||||
|
||||
pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct attribute sys_write_luns = {
|
||||
.name = "write_luns",
|
||||
.mode = 0444,
|
||||
|
@ -410,6 +572,21 @@ static struct attribute sys_max_sec_per_write = {
|
|||
.mode = 0644,
|
||||
};
|
||||
|
||||
static struct attribute sys_write_amp_mileage = {
|
||||
.name = "write_amp_mileage",
|
||||
.mode = 0444,
|
||||
};
|
||||
|
||||
static struct attribute sys_write_amp_trip = {
|
||||
.name = "write_amp_trip",
|
||||
.mode = 0644,
|
||||
};
|
||||
|
||||
static struct attribute sys_padding_dist = {
|
||||
.name = "padding_dist",
|
||||
.mode = 0644,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
static struct attribute sys_stats_debug_attr = {
|
||||
.name = "stats",
|
||||
|
@ -428,6 +605,9 @@ static struct attribute *pblk_attrs[] = {
|
|||
&sys_stats_ppaf_attr,
|
||||
&sys_lines_attr,
|
||||
&sys_lines_info_attr,
|
||||
&sys_write_amp_mileage,
|
||||
&sys_write_amp_trip,
|
||||
&sys_padding_dist,
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
&sys_stats_debug_attr,
|
||||
#endif
|
||||
|
@ -457,6 +637,12 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
|
|||
return pblk_sysfs_lines_info(pblk, buf);
|
||||
else if (strcmp(attr->name, "max_sec_per_write") == 0)
|
||||
return pblk_sysfs_get_sec_per_write(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_amp_mileage") == 0)
|
||||
return pblk_sysfs_get_write_amp_mileage(pblk, buf);
|
||||
else if (strcmp(attr->name, "write_amp_trip") == 0)
|
||||
return pblk_sysfs_get_write_amp_trip(pblk, buf);
|
||||
else if (strcmp(attr->name, "padding_dist") == 0)
|
||||
return pblk_sysfs_get_padding_dist(pblk, buf);
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
else if (strcmp(attr->name, "stats") == 0)
|
||||
return pblk_sysfs_stats_debug(pblk, buf);
|
||||
|
@ -473,7 +659,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
|
|||
return pblk_sysfs_gc_force(pblk, buf, len);
|
||||
else if (strcmp(attr->name, "max_sec_per_write") == 0)
|
||||
return pblk_sysfs_set_sec_per_write(pblk, buf, len);
|
||||
|
||||
else if (strcmp(attr->name, "write_amp_trip") == 0)
|
||||
return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
|
||||
else if (strcmp(attr->name, "padding_dist") == 0)
|
||||
return pblk_sysfs_set_padding_dist(pblk, buf, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -333,7 +333,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
|
|||
m_ctx = nvm_rq_to_pdu(rqd);
|
||||
m_ctx->private = meta_line;
|
||||
|
||||
rq_len = rq_ppas * geo->sec_size;
|
||||
rq_len = rq_ppas * geo->csecs;
|
||||
data = ((void *)emeta->buf) + emeta->mem;
|
||||
|
||||
bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
|
||||
|
|
|
@ -201,12 +201,6 @@ struct pblk_rb {
|
|||
|
||||
struct pblk_lun {
|
||||
struct ppa_addr bppa;
|
||||
|
||||
u8 *bb_list; /* Bad block list for LUN. Only used on
|
||||
* bring up. Bad blocks are managed
|
||||
* within lines on run-time.
|
||||
*/
|
||||
|
||||
struct semaphore wr_sem;
|
||||
};
|
||||
|
||||
|
@ -303,6 +297,7 @@ enum {
|
|||
PBLK_LINETYPE_DATA = 2,
|
||||
|
||||
/* Line state */
|
||||
PBLK_LINESTATE_NEW = 9,
|
||||
PBLK_LINESTATE_FREE = 10,
|
||||
PBLK_LINESTATE_OPEN = 11,
|
||||
PBLK_LINESTATE_CLOSED = 12,
|
||||
|
@ -320,14 +315,26 @@ enum {
|
|||
};
|
||||
|
||||
#define PBLK_MAGIC 0x70626c6b /*pblk*/
|
||||
#define SMETA_VERSION cpu_to_le16(1)
|
||||
|
||||
/* emeta/smeta persistent storage format versions:
|
||||
* Changes in major version requires offline migration.
|
||||
* Changes in minor version are handled automatically during
|
||||
* recovery.
|
||||
*/
|
||||
|
||||
#define SMETA_VERSION_MAJOR (0)
|
||||
#define SMETA_VERSION_MINOR (1)
|
||||
|
||||
#define EMETA_VERSION_MAJOR (0)
|
||||
#define EMETA_VERSION_MINOR (2)
|
||||
|
||||
struct line_header {
|
||||
__le32 crc;
|
||||
__le32 identifier; /* pblk identifier */
|
||||
__u8 uuid[16]; /* instance uuid */
|
||||
__le16 type; /* line type */
|
||||
__le16 version; /* type version */
|
||||
__u8 version_major; /* version major */
|
||||
__u8 version_minor; /* version minor */
|
||||
__le32 id; /* line id for current line */
|
||||
};
|
||||
|
||||
|
@ -349,11 +356,13 @@ struct line_smeta {
|
|||
__le64 lun_bitmap[];
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Metadata layout in media:
|
||||
* First sector:
|
||||
* 1. struct line_emeta
|
||||
* 2. bad block bitmap (u64 * window_wr_lun)
|
||||
* 3. write amplification counters
|
||||
* Mid sectors (start at lbas_sector):
|
||||
* 3. nr_lbas (u64) forming lba list
|
||||
* Last sectors (start at vsc_sector):
|
||||
|
@ -377,7 +386,15 @@ struct line_emeta {
|
|||
__le32 next_id; /* Line id for next line */
|
||||
__le64 nr_lbas; /* Number of lbas mapped in line */
|
||||
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
|
||||
__le64 bb_bitmap[]; /* Updated bad block bitmap for line */
|
||||
__le64 bb_bitmap[]; /* Updated bad block bitmap for line */
|
||||
};
|
||||
|
||||
|
||||
/* Write amplification counters stored on media */
|
||||
struct wa_counters {
|
||||
__le64 user; /* Number of user written sectors */
|
||||
__le64 gc; /* Number of sectors written by GC*/
|
||||
__le64 pad; /* Number of padded sectors */
|
||||
};
|
||||
|
||||
struct pblk_emeta {
|
||||
|
@ -410,6 +427,8 @@ struct pblk_line {
|
|||
|
||||
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
|
||||
|
||||
struct nvm_chk_meta *chks; /* Chunks forming line */
|
||||
|
||||
struct pblk_smeta *smeta; /* Start metadata */
|
||||
struct pblk_emeta *emeta; /* End medatada */
|
||||
|
||||
|
@ -507,10 +526,11 @@ struct pblk_line_meta {
|
|||
unsigned int smeta_sec; /* Sectors needed for smeta */
|
||||
|
||||
unsigned int emeta_len[4]; /* Lengths for emeta:
|
||||
* [0]: Total length
|
||||
* [1]: struct line_emeta length
|
||||
* [2]: L2P portion length
|
||||
* [3]: vsc list length
|
||||
* [0]: Total
|
||||
* [1]: struct line_emeta +
|
||||
* bb_bitmap + struct wa_counters
|
||||
* [2]: L2P portion
|
||||
* [3]: vsc
|
||||
*/
|
||||
unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
|
||||
* as emeta_len
|
||||
|
@ -534,21 +554,6 @@ struct pblk_line_meta {
|
|||
unsigned int meta_distance; /* Distance between data and metadata */
|
||||
};
|
||||
|
||||
struct pblk_addr_format {
|
||||
u64 ch_mask;
|
||||
u64 lun_mask;
|
||||
u64 pln_mask;
|
||||
u64 blk_mask;
|
||||
u64 pg_mask;
|
||||
u64 sec_mask;
|
||||
u8 ch_offset;
|
||||
u8 lun_offset;
|
||||
u8 pln_offset;
|
||||
u8 blk_offset;
|
||||
u8 pg_offset;
|
||||
u8 sec_offset;
|
||||
};
|
||||
|
||||
enum {
|
||||
PBLK_STATE_RUNNING = 0,
|
||||
PBLK_STATE_STOPPING = 1,
|
||||
|
@ -556,6 +561,18 @@ enum {
|
|||
PBLK_STATE_STOPPED = 3,
|
||||
};
|
||||
|
||||
/* Internal format to support not power-of-2 device formats */
|
||||
struct pblk_addrf {
|
||||
/* gen to dev */
|
||||
int sec_stripe;
|
||||
int ch_stripe;
|
||||
int lun_stripe;
|
||||
|
||||
/* dev to gen */
|
||||
int sec_lun_stripe;
|
||||
int sec_ws_stripe;
|
||||
};
|
||||
|
||||
struct pblk {
|
||||
struct nvm_tgt_dev *dev;
|
||||
struct gendisk *disk;
|
||||
|
@ -568,8 +585,9 @@ struct pblk {
|
|||
struct pblk_line_mgmt l_mg; /* Line management */
|
||||
struct pblk_line_meta lm; /* Line metadata */
|
||||
|
||||
int ppaf_bitsize;
|
||||
struct pblk_addr_format ppaf;
|
||||
struct nvm_addrf addrf; /* Aligned address format */
|
||||
struct pblk_addrf uaddrf; /* Unaligned address format */
|
||||
int addrf_len;
|
||||
|
||||
struct pblk_rb rwb;
|
||||
|
||||
|
@ -592,12 +610,27 @@ struct pblk {
|
|||
int sec_per_write;
|
||||
|
||||
unsigned char instance_uuid[16];
|
||||
|
||||
/* Persistent write amplification counters, 4kb sector I/Os */
|
||||
atomic64_t user_wa; /* Sectors written by user */
|
||||
atomic64_t gc_wa; /* Sectors written by GC */
|
||||
atomic64_t pad_wa; /* Padded sectors written */
|
||||
|
||||
/* Reset values for delta write amplification measurements */
|
||||
u64 user_rst_wa;
|
||||
u64 gc_rst_wa;
|
||||
u64 pad_rst_wa;
|
||||
|
||||
/* Counters used for calculating padding distribution */
|
||||
atomic64_t *pad_dist; /* Padding distribution buckets */
|
||||
u64 nr_flush_rst; /* Flushes reset value for pad dist.*/
|
||||
atomic64_t nr_flush; /* Number of flush/fua I/O */
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
/* All debug counters apply to 4kb sector I/Os */
|
||||
/* Non-persistent debug counters, 4kb sector I/Os */
|
||||
atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
|
||||
atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
|
||||
atomic_long_t padded_wb; /* Sectors padded in write buffer */
|
||||
atomic_long_t nr_flush; /* Number of flush/fua I/O */
|
||||
atomic_long_t req_writes; /* Sectors stored on write buffer */
|
||||
atomic_long_t sub_writes; /* Sectors submitted from buffer */
|
||||
atomic_long_t sync_writes; /* Sectors synced to media */
|
||||
|
@ -712,6 +745,10 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
|
|||
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct pblk_c_ctx *c_ctx);
|
||||
void pblk_discard(struct pblk *pblk, struct bio *bio);
|
||||
struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk);
|
||||
struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
|
||||
struct nvm_chk_meta *lp,
|
||||
struct ppa_addr ppa);
|
||||
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
|
||||
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
|
||||
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
|
||||
|
@ -888,6 +925,12 @@ static inline void *emeta_to_bb(struct line_emeta *emeta)
|
|||
return emeta->bb_bitmap;
|
||||
}
|
||||
|
||||
static inline void *emeta_to_wa(struct pblk_line_meta *lm,
|
||||
struct line_emeta *emeta)
|
||||
{
|
||||
return emeta->bb_bitmap + lm->blk_bitmap_len;
|
||||
}
|
||||
|
||||
static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
|
||||
{
|
||||
return ((void *)emeta + pblk->lm.emeta_len[1]);
|
||||
|
@ -903,38 +946,60 @@ static inline int pblk_line_vsc(struct pblk_line *line)
|
|||
return le32_to_cpu(*line->vsc);
|
||||
}
|
||||
|
||||
#define NVM_MEM_PAGE_WRITE (8)
|
||||
|
||||
static inline int pblk_pad_distance(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl;
|
||||
return geo->mw_cunits * geo->all_luns * geo->ws_opt;
|
||||
}
|
||||
|
||||
static inline int pblk_ppa_to_line(struct ppa_addr p)
|
||||
{
|
||||
return p.g.blk;
|
||||
return p.a.blk;
|
||||
}
|
||||
|
||||
static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
|
||||
{
|
||||
return p.g.lun * geo->nr_chnls + p.g.ch;
|
||||
return p.a.lun * geo->num_ch + p.a.ch;
|
||||
}
|
||||
|
||||
static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
|
||||
u64 line_id)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct ppa_addr ppa;
|
||||
|
||||
ppa.ppa = 0;
|
||||
ppa.g.blk = line_id;
|
||||
ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset;
|
||||
ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset;
|
||||
ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset;
|
||||
ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset;
|
||||
ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset;
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
|
||||
|
||||
ppa.ppa = 0;
|
||||
ppa.g.blk = line_id;
|
||||
ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset;
|
||||
ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset;
|
||||
ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset;
|
||||
ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset;
|
||||
ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset;
|
||||
} else {
|
||||
struct pblk_addrf *uaddrf = &pblk->uaddrf;
|
||||
int secs, chnls, luns;
|
||||
|
||||
ppa.ppa = 0;
|
||||
|
||||
ppa.m.chk = line_id;
|
||||
|
||||
paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs);
|
||||
ppa.m.sec = secs;
|
||||
|
||||
paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls);
|
||||
ppa.m.grp = chnls;
|
||||
|
||||
paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns);
|
||||
ppa.m.pu = luns;
|
||||
|
||||
ppa.m.sec += uaddrf->sec_stripe * paddr;
|
||||
}
|
||||
|
||||
return ppa;
|
||||
}
|
||||
|
@ -942,13 +1007,30 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
|
|||
static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
|
||||
struct ppa_addr p)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
u64 paddr;
|
||||
|
||||
paddr = (u64)p.g.pg << pblk->ppaf.pg_offset;
|
||||
paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset;
|
||||
paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset;
|
||||
paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset;
|
||||
paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset;
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
|
||||
|
||||
paddr = (u64)p.g.ch << ppaf->ch_offset;
|
||||
paddr |= (u64)p.g.lun << ppaf->lun_offset;
|
||||
paddr |= (u64)p.g.pg << ppaf->pg_offset;
|
||||
paddr |= (u64)p.g.pl << ppaf->pln_offset;
|
||||
paddr |= (u64)p.g.sec << ppaf->sec_offset;
|
||||
} else {
|
||||
struct pblk_addrf *uaddrf = &pblk->uaddrf;
|
||||
u64 secs = p.m.sec;
|
||||
int sec_stripe;
|
||||
|
||||
paddr = (u64)p.m.grp * uaddrf->sec_stripe;
|
||||
paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe;
|
||||
|
||||
secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe);
|
||||
paddr += secs * uaddrf->sec_ws_stripe;
|
||||
paddr += sec_stripe;
|
||||
}
|
||||
|
||||
return paddr;
|
||||
}
|
||||
|
@ -965,18 +1047,37 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
|
|||
ppa64.c.line = ppa32 & ((~0U) >> 1);
|
||||
ppa64.c.is_cached = 1;
|
||||
} else {
|
||||
ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >>
|
||||
pblk->ppaf.blk_offset;
|
||||
ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >>
|
||||
pblk->ppaf.pg_offset;
|
||||
ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >>
|
||||
pblk->ppaf.lun_offset;
|
||||
ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >>
|
||||
pblk->ppaf.ch_offset;
|
||||
ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >>
|
||||
pblk->ppaf.pln_offset;
|
||||
ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >>
|
||||
pblk->ppaf.sec_offset;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf =
|
||||
(struct nvm_addrf_12 *)&pblk->addrf;
|
||||
|
||||
ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
|
||||
ppaf->ch_offset;
|
||||
ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
|
||||
ppaf->lun_offset;
|
||||
ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
|
||||
ppaf->blk_offset;
|
||||
ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
|
||||
ppaf->pg_offset;
|
||||
ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
|
||||
ppaf->pln_offset;
|
||||
ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
|
||||
ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = &pblk->addrf;
|
||||
|
||||
ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
|
||||
lbaf->ch_offset;
|
||||
ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
|
||||
lbaf->lun_offset;
|
||||
ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
|
||||
lbaf->chk_offset;
|
||||
ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
|
||||
lbaf->sec_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return ppa64;
|
||||
|
@ -992,12 +1093,27 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
|
|||
ppa32 |= ppa64.c.line;
|
||||
ppa32 |= 1U << 31;
|
||||
} else {
|
||||
ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset;
|
||||
ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset;
|
||||
ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset;
|
||||
ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset;
|
||||
ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset;
|
||||
ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset;
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
struct nvm_addrf_12 *ppaf =
|
||||
(struct nvm_addrf_12 *)&pblk->addrf;
|
||||
|
||||
ppa32 |= ppa64.g.ch << ppaf->ch_offset;
|
||||
ppa32 |= ppa64.g.lun << ppaf->lun_offset;
|
||||
ppa32 |= ppa64.g.blk << ppaf->blk_offset;
|
||||
ppa32 |= ppa64.g.pg << ppaf->pg_offset;
|
||||
ppa32 |= ppa64.g.pl << ppaf->pln_offset;
|
||||
ppa32 |= ppa64.g.sec << ppaf->sec_offset;
|
||||
} else {
|
||||
struct nvm_addrf *lbaf = &pblk->addrf;
|
||||
|
||||
ppa32 |= ppa64.m.grp << lbaf->ch_offset;
|
||||
ppa32 |= ppa64.m.pu << lbaf->lun_offset;
|
||||
ppa32 |= ppa64.m.chk << lbaf->chk_offset;
|
||||
ppa32 |= ppa64.m.sec << lbaf->sec_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return ppa32;
|
||||
|
@ -1008,7 +1124,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
|
|||
{
|
||||
struct ppa_addr ppa;
|
||||
|
||||
if (pblk->ppaf_bitsize < 32) {
|
||||
if (pblk->addrf_len < 32) {
|
||||
u32 *map = (u32 *)pblk->trans_map;
|
||||
|
||||
ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
|
||||
|
@ -1024,7 +1140,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
|
|||
static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
|
||||
struct ppa_addr ppa)
|
||||
{
|
||||
if (pblk->ppaf_bitsize < 32) {
|
||||
if (pblk->addrf_len < 32) {
|
||||
u32 *map = (u32 *)pblk->trans_map;
|
||||
|
||||
map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
|
||||
|
@ -1115,7 +1231,10 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
int flags;
|
||||
|
||||
flags = geo->plane_mode >> 1;
|
||||
if (geo->version == NVM_OCSSD_SPEC_20)
|
||||
return 0;
|
||||
|
||||
flags = geo->pln_mode >> 1;
|
||||
|
||||
if (type == PBLK_WRITE)
|
||||
flags |= NVM_IO_SCRAMBLE_ENABLE;
|
||||
|
@ -1134,9 +1253,12 @@ static inline int pblk_set_read_mode(struct pblk *pblk, int type)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
int flags;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_20)
|
||||
return 0;
|
||||
|
||||
flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
|
||||
if (type == PBLK_READ_SEQUENTIAL)
|
||||
flags |= geo->plane_mode >> 1;
|
||||
flags |= geo->pln_mode >> 1;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
@ -1147,16 +1269,21 @@ static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_NVM_DEBUG
|
||||
static inline void print_ppa(struct ppa_addr *p, char *msg, int error)
|
||||
static inline void print_ppa(struct nvm_geo *geo, struct ppa_addr *p,
|
||||
char *msg, int error)
|
||||
{
|
||||
if (p->c.is_cached) {
|
||||
pr_err("ppa: (%s: %x) cache line: %llu\n",
|
||||
msg, error, (u64)p->c.line);
|
||||
} else {
|
||||
} else if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
|
||||
msg, error,
|
||||
p->g.ch, p->g.lun, p->g.blk,
|
||||
p->g.pg, p->g.pl, p->g.sec);
|
||||
} else {
|
||||
pr_err("ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n",
|
||||
msg, error,
|
||||
p->m.grp, p->m.pu, p->m.chk, p->m.sec);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1166,13 +1293,13 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
|
|||
int bit = -1;
|
||||
|
||||
if (rqd->nr_ppas == 1) {
|
||||
print_ppa(&rqd->ppa_addr, "rqd", error);
|
||||
print_ppa(&pblk->dev->geo, &rqd->ppa_addr, "rqd", error);
|
||||
return;
|
||||
}
|
||||
|
||||
while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
|
||||
bit + 1)) < rqd->nr_ppas) {
|
||||
print_ppa(&rqd->ppa_list[bit], "rqd", error);
|
||||
print_ppa(&pblk->dev->geo, &rqd->ppa_list[bit], "rqd", error);
|
||||
}
|
||||
|
||||
pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
|
||||
|
@ -1188,16 +1315,25 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
|
|||
for (i = 0; i < nr_ppas; i++) {
|
||||
ppa = &ppas[i];
|
||||
|
||||
if (!ppa->c.is_cached &&
|
||||
ppa->g.ch < geo->nr_chnls &&
|
||||
ppa->g.lun < geo->nr_luns &&
|
||||
ppa->g.pl < geo->nr_planes &&
|
||||
ppa->g.blk < geo->nr_chks &&
|
||||
ppa->g.pg < geo->ws_per_chk &&
|
||||
ppa->g.sec < geo->sec_per_pg)
|
||||
continue;
|
||||
if (geo->version == NVM_OCSSD_SPEC_12) {
|
||||
if (!ppa->c.is_cached &&
|
||||
ppa->g.ch < geo->num_ch &&
|
||||
ppa->g.lun < geo->num_lun &&
|
||||
ppa->g.pl < geo->num_pln &&
|
||||
ppa->g.blk < geo->num_chk &&
|
||||
ppa->g.pg < geo->num_pg &&
|
||||
ppa->g.sec < geo->ws_min)
|
||||
continue;
|
||||
} else {
|
||||
if (!ppa->c.is_cached &&
|
||||
ppa->m.grp < geo->num_ch &&
|
||||
ppa->m.pu < geo->num_lun &&
|
||||
ppa->m.chk < geo->num_chk &&
|
||||
ppa->m.sec < geo->clba)
|
||||
continue;
|
||||
}
|
||||
|
||||
print_ppa(ppa, "boundary", i);
|
||||
print_ppa(geo, ppa, "boundary", i);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -287,7 +287,8 @@ do { \
|
|||
break; \
|
||||
\
|
||||
mutex_unlock(&(ca)->set->bucket_lock); \
|
||||
if (kthread_should_stop()) { \
|
||||
if (kthread_should_stop() || \
|
||||
test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
|
||||
set_current_state(TASK_RUNNING); \
|
||||
return 0; \
|
||||
} \
|
||||
|
|
|
@ -188,6 +188,7 @@
|
|||
#include <linux/refcount.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include "bset.h"
|
||||
#include "util.h"
|
||||
|
@ -258,10 +259,11 @@ struct bcache_device {
|
|||
struct gendisk *disk;
|
||||
|
||||
unsigned long flags;
|
||||
#define BCACHE_DEV_CLOSING 0
|
||||
#define BCACHE_DEV_DETACHING 1
|
||||
#define BCACHE_DEV_UNLINK_DONE 2
|
||||
|
||||
#define BCACHE_DEV_CLOSING 0
|
||||
#define BCACHE_DEV_DETACHING 1
|
||||
#define BCACHE_DEV_UNLINK_DONE 2
|
||||
#define BCACHE_DEV_WB_RUNNING 3
|
||||
#define BCACHE_DEV_RATE_DW_RUNNING 4
|
||||
unsigned nr_stripes;
|
||||
unsigned stripe_size;
|
||||
atomic_t *stripe_sectors_dirty;
|
||||
|
@ -286,6 +288,12 @@ struct io {
|
|||
sector_t last;
|
||||
};
|
||||
|
||||
enum stop_on_failure {
|
||||
BCH_CACHED_DEV_STOP_AUTO = 0,
|
||||
BCH_CACHED_DEV_STOP_ALWAYS,
|
||||
BCH_CACHED_DEV_STOP_MODE_MAX,
|
||||
};
|
||||
|
||||
struct cached_dev {
|
||||
struct list_head list;
|
||||
struct bcache_device disk;
|
||||
|
@ -359,6 +367,7 @@ struct cached_dev {
|
|||
unsigned sequential_cutoff;
|
||||
unsigned readahead;
|
||||
|
||||
unsigned io_disable:1;
|
||||
unsigned verify:1;
|
||||
unsigned bypass_torture_test:1;
|
||||
|
||||
|
@ -378,6 +387,11 @@ struct cached_dev {
|
|||
unsigned writeback_rate_i_term_inverse;
|
||||
unsigned writeback_rate_p_term_inverse;
|
||||
unsigned writeback_rate_minimum;
|
||||
|
||||
enum stop_on_failure stop_when_cache_set_failed;
|
||||
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
|
||||
atomic_t io_errors;
|
||||
unsigned error_limit;
|
||||
};
|
||||
|
||||
enum alloc_reserve {
|
||||
|
@ -474,10 +488,15 @@ struct gc_stat {
|
|||
*
|
||||
* CACHE_SET_RUNNING means all cache devices have been registered and journal
|
||||
* replay is complete.
|
||||
*
|
||||
* CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
|
||||
* external and internal I/O should be denied when this flag is set.
|
||||
*
|
||||
*/
|
||||
#define CACHE_SET_UNREGISTERING 0
|
||||
#define CACHE_SET_STOPPING 1
|
||||
#define CACHE_SET_RUNNING 2
|
||||
#define CACHE_SET_IO_DISABLE 3
|
||||
|
||||
struct cache_set {
|
||||
struct closure cl;
|
||||
|
@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
|
|||
wake_up_process(ca->alloc_thread);
|
||||
}
|
||||
|
||||
static inline void closure_bio_submit(struct cache_set *c,
|
||||
struct bio *bio,
|
||||
struct closure *cl)
|
||||
{
|
||||
closure_get(cl);
|
||||
if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prevent the kthread exits directly, and make sure when kthread_stop()
|
||||
* is called to stop a kthread, it is still alive. If a kthread might be
|
||||
* stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
|
||||
* necessary before the kthread returns.
|
||||
*/
|
||||
static inline void wait_for_kthread_stop(void)
|
||||
{
|
||||
while (!kthread_should_stop()) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
/* Forward declarations */
|
||||
|
||||
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
|
||||
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
|
||||
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
|
||||
blk_status_t, const char *);
|
||||
|
@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
|
|||
struct bkey *, int, bool);
|
||||
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
|
||||
unsigned, unsigned, bool);
|
||||
bool bch_cached_dev_error(struct cached_dev *dc);
|
||||
|
||||
__printf(2, 3)
|
||||
bool bch_cache_set_error(struct cache_set *, const char *, ...);
|
||||
|
@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
|
|||
|
||||
extern struct workqueue_struct *bcache_wq;
|
||||
extern const char * const bch_cache_modes[];
|
||||
extern const char * const bch_stop_on_failure_modes[];
|
||||
extern struct mutex bch_register_lock;
|
||||
extern struct list_head bch_cache_sets;
|
||||
|
||||
|
|
|
@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
|
|||
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
|
||||
btree_iter_cmp_fn *cmp)
|
||||
{
|
||||
struct btree_iter_set unused;
|
||||
struct btree_iter_set b __maybe_unused;
|
||||
struct bkey *ret = NULL;
|
||||
|
||||
if (!btree_iter_end(iter)) {
|
||||
|
@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
|
|||
}
|
||||
|
||||
if (iter->data->k == iter->data->end)
|
||||
heap_pop(iter, unused, cmp);
|
||||
heap_pop(iter, b, cmp);
|
||||
else
|
||||
heap_sift(iter, 0, cmp);
|
||||
}
|
||||
|
|
|
@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
|
|||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
|
||||
int __bch_count_data(struct btree_keys *);
|
||||
void __bch_check_keys(struct btree_keys *, const char *, ...);
|
||||
void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
|
||||
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
|
||||
void bch_dump_bucket(struct btree_keys *);
|
||||
|
||||
#else
|
||||
|
||||
static inline int __bch_count_data(struct btree_keys *b) { return -1; }
|
||||
static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
|
||||
static inline void __printf(2, 3)
|
||||
__bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
|
||||
static inline void bch_dump_bucket(struct btree_keys *b) {}
|
||||
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
|
||||
|
||||
|
|
|
@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
|
|||
struct btree *b, *t;
|
||||
unsigned long i, nr = sc->nr_to_scan;
|
||||
unsigned long freed = 0;
|
||||
unsigned int btree_cache_used;
|
||||
|
||||
if (c->shrinker_disabled)
|
||||
return SHRINK_STOP;
|
||||
|
@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
|
|||
nr = min_t(unsigned long, nr, mca_can_free(c));
|
||||
|
||||
i = 0;
|
||||
btree_cache_used = c->btree_cache_used;
|
||||
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
|
||||
if (freed >= nr)
|
||||
break;
|
||||
if (nr <= 0)
|
||||
goto out;
|
||||
|
||||
if (++i > 3 &&
|
||||
!mca_reap(b, 0, false)) {
|
||||
|
@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
|
|||
rw_unlock(true, b);
|
||||
freed++;
|
||||
}
|
||||
nr--;
|
||||
}
|
||||
|
||||
for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
|
||||
for (; (nr--) && i < btree_cache_used; i++) {
|
||||
if (list_empty(&c->btree_cache))
|
||||
goto out;
|
||||
|
||||
|
@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
|
|||
}
|
||||
out:
|
||||
mutex_unlock(&c->bucket_lock);
|
||||
return freed;
|
||||
return freed * c->btree_pages;
|
||||
}
|
||||
|
||||
static unsigned long bch_mca_count(struct shrinker *shrink,
|
||||
|
@ -959,7 +962,7 @@ err:
|
|||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
/*
|
||||
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
|
||||
* in from disk if necessary.
|
||||
*
|
||||
|
@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
|
|||
|
||||
btree_gc_start(c);
|
||||
|
||||
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
|
||||
do {
|
||||
ret = btree_root(gc_root, c, &op, &writes, &stats);
|
||||
closure_sync(&writes);
|
||||
|
@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
|
|||
|
||||
if (ret && ret != -EAGAIN)
|
||||
pr_warn("gc failed!");
|
||||
} while (ret);
|
||||
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
bch_btree_gc_finish(c);
|
||||
wake_up_allocators(c);
|
||||
|
@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
|
|||
|
||||
while (1) {
|
||||
wait_event_interruptible(c->gc_wait,
|
||||
kthread_should_stop() || gc_should_run(c));
|
||||
kthread_should_stop() ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
|
||||
gc_should_run(c));
|
||||
|
||||
if (kthread_should_stop())
|
||||
if (kthread_should_stop() ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags))
|
||||
break;
|
||||
|
||||
set_gc_sectors(c);
|
||||
bch_btree_gc(c);
|
||||
}
|
||||
|
||||
wait_for_kthread_stop();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
|
|||
|
||||
if (b->key.ptr[0] != btree_ptr ||
|
||||
b->seq != seq + 1) {
|
||||
op->lock = b->level;
|
||||
op->lock = b->level;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
|
|||
}
|
||||
EXPORT_SYMBOL(closure_sub);
|
||||
|
||||
/**
|
||||
/*
|
||||
* closure_put - decrement a closure's refcount
|
||||
*/
|
||||
void closure_put(struct closure *cl)
|
||||
|
@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
|
|||
}
|
||||
EXPORT_SYMBOL(closure_put);
|
||||
|
||||
/**
|
||||
/*
|
||||
* closure_wake_up - wake up all closures on a wait list, without memory barrier
|
||||
*/
|
||||
void __closure_wake_up(struct closure_waitlist *wait_list)
|
||||
|
@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
|
|||
|
||||
/**
|
||||
* closure_wait - add a closure to a waitlist
|
||||
*
|
||||
* @waitlist will own a ref on @cl, which will be released when
|
||||
* @waitlist: will own a ref on @cl, which will be released when
|
||||
* closure_wake_up() is called on @waitlist.
|
||||
* @cl: closure pointer.
|
||||
*
|
||||
*/
|
||||
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
|
||||
|
@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
|
|||
}
|
||||
EXPORT_SYMBOL(closure_debug_destroy);
|
||||
|
||||
static struct dentry *debug;
|
||||
static struct dentry *closure_debug;
|
||||
|
||||
static int debug_seq_show(struct seq_file *f, void *data)
|
||||
{
|
||||
|
@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
|
|||
.release = single_release
|
||||
};
|
||||
|
||||
void __init closure_debug_init(void)
|
||||
int __init closure_debug_init(void)
|
||||
{
|
||||
debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
|
||||
closure_debug = debugfs_create_file("closures",
|
||||
0400, bcache_debug, NULL, &debug_ops);
|
||||
return IS_ERR_OR_NULL(closure_debug);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
|
||||
|
|
|
@ -105,6 +105,7 @@
|
|||
struct closure;
|
||||
struct closure_syncer;
|
||||
typedef void (closure_fn) (struct closure *);
|
||||
extern struct dentry *bcache_debug;
|
||||
|
||||
struct closure_waitlist {
|
||||
struct llist_head list;
|
||||
|
@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
|
|||
|
||||
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
|
||||
|
||||
void closure_debug_init(void);
|
||||
int closure_debug_init(void);
|
||||
void closure_debug_create(struct closure *cl);
|
||||
void closure_debug_destroy(struct closure *cl);
|
||||
|
||||
#else
|
||||
|
||||
static inline void closure_debug_init(void) {}
|
||||
static inline int closure_debug_init(void) { return 0; }
|
||||
static inline void closure_debug_create(struct closure *cl) {}
|
||||
static inline void closure_debug_destroy(struct closure *cl) {}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include <linux/random.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
static struct dentry *debug;
|
||||
struct dentry *bcache_debug;
|
||||
|
||||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
|
||||
|
@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
|
|||
|
||||
void bch_debug_init_cache_set(struct cache_set *c)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(debug)) {
|
||||
if (!IS_ERR_OR_NULL(bcache_debug)) {
|
||||
char name[50];
|
||||
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
|
||||
|
||||
c->debug = debugfs_create_file(name, 0400, debug, c,
|
||||
c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
|
||||
&cache_set_debug_ops);
|
||||
}
|
||||
}
|
||||
|
@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
|
|||
|
||||
void bch_debug_exit(void)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(debug))
|
||||
debugfs_remove_recursive(debug);
|
||||
if (!IS_ERR_OR_NULL(bcache_debug))
|
||||
debugfs_remove_recursive(bcache_debug);
|
||||
}
|
||||
|
||||
int __init bch_debug_init(struct kobject *kobj)
|
||||
{
|
||||
debug = debugfs_create_dir("bcache", NULL);
|
||||
bcache_debug = debugfs_create_dir("bcache", NULL);
|
||||
|
||||
return IS_ERR_OR_NULL(debug);
|
||||
return IS_ERR_OR_NULL(bcache_debug);
|
||||
}
|
||||
|
|
|
@ -534,7 +534,6 @@ err:
|
|||
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
|
||||
{
|
||||
struct btree *b = container_of(bk, struct btree, keys);
|
||||
struct bucket *g;
|
||||
unsigned i, stale;
|
||||
|
||||
if (!KEY_PTRS(k) ||
|
||||
|
@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
|
|||
return false;
|
||||
|
||||
for (i = 0; i < KEY_PTRS(k); i++) {
|
||||
g = PTR_BUCKET(b->c, k, i);
|
||||
stale = ptr_stale(b->c, k, i);
|
||||
|
||||
btree_bug_on(stale > 96, b,
|
||||
|
|
|
@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
|
|||
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
|
||||
|
||||
b->submit_time_us = local_clock_us();
|
||||
closure_bio_submit(bio, bio->bi_private);
|
||||
closure_bio_submit(c, bio, bio->bi_private);
|
||||
}
|
||||
|
||||
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
|
||||
|
@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
|
|||
}
|
||||
|
||||
/* IO errors */
|
||||
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
|
||||
{
|
||||
char buf[BDEVNAME_SIZE];
|
||||
unsigned errors;
|
||||
|
||||
WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
|
||||
|
||||
errors = atomic_add_return(1, &dc->io_errors);
|
||||
if (errors < dc->error_limit)
|
||||
pr_err("%s: IO error on backing device, unrecoverable",
|
||||
bio_devname(bio, buf));
|
||||
else
|
||||
bch_cached_dev_error(dc);
|
||||
}
|
||||
|
||||
void bch_count_io_errors(struct cache *ca,
|
||||
blk_status_t error,
|
||||
|
|
|
@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
|
|||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
bch_bio_map(bio, data);
|
||||
|
||||
closure_bio_submit(bio, &cl);
|
||||
closure_bio_submit(ca->set, bio, &cl);
|
||||
closure_sync(&cl);
|
||||
|
||||
/* This function could be simpler now since we no longer write
|
||||
|
@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
|
|||
struct cache *ca;
|
||||
uint64_t last_seq;
|
||||
unsigned iter, n = 0;
|
||||
atomic_t p;
|
||||
atomic_t p __maybe_unused;
|
||||
|
||||
atomic_long_inc(&c->reclaim);
|
||||
|
||||
|
@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
|
|||
}
|
||||
|
||||
static void journal_write_unlock(struct closure *cl)
|
||||
__releases(&c->journal.lock)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
|
||||
|
||||
|
@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
|
|||
spin_unlock(&c->journal.lock);
|
||||
|
||||
while ((bio = bio_list_pop(&list)))
|
||||
closure_bio_submit(bio, cl);
|
||||
closure_bio_submit(c, bio, cl);
|
||||
|
||||
continue_at(cl, journal_write_done, NULL);
|
||||
}
|
||||
|
@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
|
|||
|
||||
static struct journal_write *journal_wait_for_write(struct cache_set *c,
|
||||
unsigned nkeys)
|
||||
__acquires(&c->journal.lock)
|
||||
{
|
||||
size_t sectors;
|
||||
struct closure cl;
|
||||
|
|
|
@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
|
|||
}
|
||||
|
||||
op->insert_data_done = true;
|
||||
/* get in bch_data_insert() */
|
||||
bio_put(bio);
|
||||
out:
|
||||
continue_at(cl, bch_data_insert_keys, op->wq);
|
||||
|
@ -295,6 +296,7 @@ err:
|
|||
|
||||
/**
|
||||
* bch_data_insert - stick some data in the cache
|
||||
* @cl: closure pointer.
|
||||
*
|
||||
* This is the starting point for any data to end up in a cache device; it could
|
||||
* be from a normal write, or a writeback write, or a write to a flash only
|
||||
|
@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
|
|||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void backing_request_endio(struct bio *bio)
|
||||
{
|
||||
struct closure *cl = bio->bi_private;
|
||||
|
||||
if (bio->bi_status) {
|
||||
struct search *s = container_of(cl, struct search, cl);
|
||||
struct cached_dev *dc = container_of(s->d,
|
||||
struct cached_dev, disk);
|
||||
/*
|
||||
* If a bio has REQ_PREFLUSH for writeback mode, it is
|
||||
* speically assembled in cached_dev_write() for a non-zero
|
||||
* write request which has REQ_PREFLUSH. we don't set
|
||||
* s->iop.status by this failure, the status will be decided
|
||||
* by result of bch_data_insert() operation.
|
||||
*/
|
||||
if (unlikely(s->iop.writeback &&
|
||||
bio->bi_opf & REQ_PREFLUSH)) {
|
||||
char buf[BDEVNAME_SIZE];
|
||||
|
||||
bio_devname(bio, buf);
|
||||
pr_err("Can't flush %s: returned bi_status %i",
|
||||
buf, bio->bi_status);
|
||||
} else {
|
||||
/* set to orig_bio->bi_status in bio_complete() */
|
||||
s->iop.status = bio->bi_status;
|
||||
}
|
||||
s->recoverable = false;
|
||||
/* should count I/O error for backing device here */
|
||||
bch_count_backing_io_errors(dc, bio);
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
closure_put(cl);
|
||||
}
|
||||
|
||||
static void bio_complete(struct search *s)
|
||||
{
|
||||
if (s->orig_bio) {
|
||||
|
@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
|
|||
}
|
||||
}
|
||||
|
||||
static void do_bio_hook(struct search *s, struct bio *orig_bio)
|
||||
static void do_bio_hook(struct search *s,
|
||||
struct bio *orig_bio,
|
||||
bio_end_io_t *end_io_fn)
|
||||
{
|
||||
struct bio *bio = &s->bio.bio;
|
||||
|
||||
bio_init(bio, NULL, 0);
|
||||
__bio_clone_fast(bio, orig_bio);
|
||||
bio->bi_end_io = request_endio;
|
||||
/*
|
||||
* bi_end_io can be set separately somewhere else, e.g. the
|
||||
* variants in,
|
||||
* - cache_bio->bi_end_io from cached_dev_cache_miss()
|
||||
* - n->bi_end_io from cache_lookup_fn()
|
||||
*/
|
||||
bio->bi_end_io = end_io_fn;
|
||||
bio->bi_private = &s->cl;
|
||||
|
||||
bio_cnt_set(bio, 3);
|
||||
|
@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
|
|||
s = mempool_alloc(d->c->search, GFP_NOIO);
|
||||
|
||||
closure_init(&s->cl, NULL);
|
||||
do_bio_hook(s, bio);
|
||||
do_bio_hook(s, bio, request_endio);
|
||||
|
||||
s->orig_bio = bio;
|
||||
s->cache_miss = NULL;
|
||||
|
@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
|
|||
trace_bcache_read_retry(s->orig_bio);
|
||||
|
||||
s->iop.status = 0;
|
||||
do_bio_hook(s, s->orig_bio);
|
||||
do_bio_hook(s, s->orig_bio, backing_request_endio);
|
||||
|
||||
/* XXX: invalidate cache */
|
||||
|
||||
closure_bio_submit(bio, cl);
|
||||
/* I/O request sent to backing device */
|
||||
closure_bio_submit(s->iop.c, bio, cl);
|
||||
}
|
||||
|
||||
continue_at(cl, cached_dev_cache_miss_done, NULL);
|
||||
|
@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
|||
bio_copy_dev(cache_bio, miss);
|
||||
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
|
||||
|
||||
cache_bio->bi_end_io = request_endio;
|
||||
cache_bio->bi_end_io = backing_request_endio;
|
||||
cache_bio->bi_private = &s->cl;
|
||||
|
||||
bch_bio_map(cache_bio, NULL);
|
||||
|
@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
|
|||
s->cache_miss = miss;
|
||||
s->iop.bio = cache_bio;
|
||||
bio_get(cache_bio);
|
||||
closure_bio_submit(cache_bio, &s->cl);
|
||||
/* I/O request sent to backing device */
|
||||
closure_bio_submit(s->iop.c, cache_bio, &s->cl);
|
||||
|
||||
return ret;
|
||||
out_put:
|
||||
bio_put(cache_bio);
|
||||
out_submit:
|
||||
miss->bi_end_io = request_endio;
|
||||
miss->bi_end_io = backing_request_endio;
|
||||
miss->bi_private = &s->cl;
|
||||
closure_bio_submit(miss, &s->cl);
|
||||
/* I/O request sent to backing device */
|
||||
closure_bio_submit(s->iop.c, miss, &s->cl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
|
|||
s->iop.bio = s->orig_bio;
|
||||
bio_get(s->iop.bio);
|
||||
|
||||
if ((bio_op(bio) != REQ_OP_DISCARD) ||
|
||||
blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
closure_bio_submit(bio, cl);
|
||||
if (bio_op(bio) == REQ_OP_DISCARD &&
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
goto insert_data;
|
||||
|
||||
/* I/O request sent to backing device */
|
||||
bio->bi_end_io = backing_request_endio;
|
||||
closure_bio_submit(s->iop.c, bio, cl);
|
||||
|
||||
} else if (s->iop.writeback) {
|
||||
bch_writeback_add(dc);
|
||||
s->iop.bio = bio;
|
||||
|
||||
if (bio->bi_opf & REQ_PREFLUSH) {
|
||||
/* Also need to send a flush to the backing device */
|
||||
struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
|
||||
dc->disk.bio_split);
|
||||
/*
|
||||
* Also need to send a flush to the backing
|
||||
* device.
|
||||
*/
|
||||
struct bio *flush;
|
||||
|
||||
flush = bio_alloc_bioset(GFP_NOIO, 0,
|
||||
dc->disk.bio_split);
|
||||
if (!flush) {
|
||||
s->iop.status = BLK_STS_RESOURCE;
|
||||
goto insert_data;
|
||||
}
|
||||
bio_copy_dev(flush, bio);
|
||||
flush->bi_end_io = request_endio;
|
||||
flush->bi_end_io = backing_request_endio;
|
||||
flush->bi_private = cl;
|
||||
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
||||
|
||||
closure_bio_submit(flush, cl);
|
||||
/* I/O request sent to backing device */
|
||||
closure_bio_submit(s->iop.c, flush, cl);
|
||||
}
|
||||
} else {
|
||||
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
|
||||
|
||||
closure_bio_submit(bio, cl);
|
||||
/* I/O request sent to backing device */
|
||||
bio->bi_end_io = backing_request_endio;
|
||||
closure_bio_submit(s->iop.c, bio, cl);
|
||||
}
|
||||
|
||||
insert_data:
|
||||
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
|
||||
continue_at(cl, cached_dev_write_complete, NULL);
|
||||
}
|
||||
|
@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
|
|||
bch_journal_meta(s->iop.c, cl);
|
||||
|
||||
/* If it's a flush, we send the flush to the backing device too */
|
||||
closure_bio_submit(bio, cl);
|
||||
bio->bi_end_io = backing_request_endio;
|
||||
closure_bio_submit(s->iop.c, bio, cl);
|
||||
|
||||
continue_at(cl, cached_dev_bio_complete, NULL);
|
||||
}
|
||||
|
||||
struct detached_dev_io_private {
|
||||
struct bcache_device *d;
|
||||
unsigned long start_time;
|
||||
bio_end_io_t *bi_end_io;
|
||||
void *bi_private;
|
||||
};
|
||||
|
||||
static void detached_dev_end_io(struct bio *bio)
|
||||
{
|
||||
struct detached_dev_io_private *ddip;
|
||||
|
||||
ddip = bio->bi_private;
|
||||
bio->bi_end_io = ddip->bi_end_io;
|
||||
bio->bi_private = ddip->bi_private;
|
||||
|
||||
generic_end_io_acct(ddip->d->disk->queue,
|
||||
bio_data_dir(bio),
|
||||
&ddip->d->disk->part0, ddip->start_time);
|
||||
|
||||
if (bio->bi_status) {
|
||||
struct cached_dev *dc = container_of(ddip->d,
|
||||
struct cached_dev, disk);
|
||||
/* should count I/O error for backing device here */
|
||||
bch_count_backing_io_errors(dc, bio);
|
||||
}
|
||||
|
||||
kfree(ddip);
|
||||
bio->bi_end_io(bio);
|
||||
}
|
||||
|
||||
static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
|
||||
{
|
||||
struct detached_dev_io_private *ddip;
|
||||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
|
||||
/*
|
||||
* no need to call closure_get(&dc->disk.cl),
|
||||
* because upper layer had already opened bcache device,
|
||||
* which would call closure_get(&dc->disk.cl)
|
||||
*/
|
||||
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
|
||||
ddip->d = d;
|
||||
ddip->start_time = jiffies;
|
||||
ddip->bi_end_io = bio->bi_end_io;
|
||||
ddip->bi_private = bio->bi_private;
|
||||
bio->bi_end_io = detached_dev_end_io;
|
||||
bio->bi_private = ddip;
|
||||
|
||||
if ((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
bio->bi_end_io(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
}
|
||||
|
||||
/* Cached devices - read & write stuff */
|
||||
|
||||
static blk_qc_t cached_dev_make_request(struct request_queue *q,
|
||||
|
@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
|
|||
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
|
||||
int rw = bio_data_dir(bio);
|
||||
|
||||
if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
|
||||
dc->io_disable)) {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
atomic_set(&dc->backing_idle, 0);
|
||||
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
|
||||
|
||||
|
@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
|
|||
else
|
||||
cached_dev_read(dc, s);
|
||||
}
|
||||
} else {
|
||||
if ((bio_op(bio) == REQ_OP_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(dc->bdev)))
|
||||
bio_endio(bio);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
}
|
||||
} else
|
||||
/* I/O request sent to backing device */
|
||||
detached_dev_do_request(d, bio);
|
||||
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
|
|||
struct bcache_device *d = bio->bi_disk->private_data;
|
||||
int rw = bio_data_dir(bio);
|
||||
|
||||
if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
bio_endio(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
|
||||
|
||||
s = search_alloc(bio, d);
|
||||
|
|
|
@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
/* Default is -1; we skip past it for stop_when_cache_set_failed */
|
||||
const char * const bch_stop_on_failure_modes[] = {
|
||||
"default",
|
||||
"auto",
|
||||
"always",
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct kobject *bcache_kobj;
|
||||
struct mutex bch_register_lock;
|
||||
LIST_HEAD(bch_cache_sets);
|
||||
|
@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
|
|||
bio->bi_private = dc;
|
||||
|
||||
closure_get(cl);
|
||||
/* I/O request sent to backing device */
|
||||
__write_super(&dc->sb, bio);
|
||||
|
||||
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
|
||||
|
@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
|
|||
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
|
||||
bch_bio_map(bio, ca->disk_buckets);
|
||||
|
||||
closure_bio_submit(bio, &ca->prio);
|
||||
closure_bio_submit(ca->set, bio, &ca->prio);
|
||||
closure_sync(cl);
|
||||
}
|
||||
|
||||
|
@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
|
|||
sector_t sectors)
|
||||
{
|
||||
struct request_queue *q;
|
||||
const size_t max_stripes = min_t(size_t, INT_MAX,
|
||||
SIZE_MAX / sizeof(atomic_t));
|
||||
size_t n;
|
||||
int idx;
|
||||
|
||||
|
@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
|
|||
|
||||
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
|
||||
|
||||
if (!d->nr_stripes ||
|
||||
d->nr_stripes > INT_MAX ||
|
||||
d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
|
||||
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
|
||||
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
|
||||
(unsigned)d->nr_stripes);
|
||||
return -ENOMEM;
|
||||
|
@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
|
|||
q->limits.io_min = block_size;
|
||||
q->limits.logical_block_size = block_size;
|
||||
q->limits.physical_block_size = block_size;
|
||||
set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
|
||||
clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
|
||||
set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
|
||||
|
||||
blk_queue_write_cache(q, true, true);
|
||||
|
||||
|
@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
|
|||
pr_debug("error creating sysfs link");
|
||||
}
|
||||
|
||||
/*
|
||||
* If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
|
||||
* work dc->writeback_rate_update is running. Wait until the routine
|
||||
* quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
|
||||
* cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
|
||||
* seconds, give up waiting here and continue to cancel it too.
|
||||
*/
|
||||
static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
|
||||
{
|
||||
int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
|
||||
|
||||
do {
|
||||
if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
|
||||
&dc->disk.flags))
|
||||
break;
|
||||
time_out--;
|
||||
schedule_timeout_interruptible(1);
|
||||
} while (time_out > 0);
|
||||
|
||||
if (time_out == 0)
|
||||
pr_warn("give up waiting for dc->writeback_write_update to quit");
|
||||
|
||||
cancel_delayed_work_sync(&dc->writeback_rate_update);
|
||||
}
|
||||
|
||||
static void cached_dev_detach_finish(struct work_struct *w)
|
||||
{
|
||||
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
|
||||
|
@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
|
|||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
cancel_delayed_work_sync(&dc->writeback_rate_update);
|
||||
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
|
||||
cancel_writeback_rate_update_dwork(dc);
|
||||
|
||||
if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
|
||||
kthread_stop(dc->writeback_thread);
|
||||
dc->writeback_thread = NULL;
|
||||
|
@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
|
|||
closure_get(&dc->disk.cl);
|
||||
|
||||
bch_writeback_queue(dc);
|
||||
|
||||
cached_dev_put(dc);
|
||||
}
|
||||
|
||||
|
@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
|
|||
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
|
||||
bch_sectors_dirty_init(&dc->disk);
|
||||
atomic_set(&dc->has_dirty, 1);
|
||||
refcount_inc(&dc->count);
|
||||
bch_writeback_queue(dc);
|
||||
}
|
||||
|
||||
|
@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl)
|
|||
{
|
||||
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
|
||||
|
||||
cancel_delayed_work_sync(&dc->writeback_rate_update);
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
|
||||
cancel_writeback_rate_update_dwork(dc);
|
||||
|
||||
if (!IS_ERR_OR_NULL(dc->writeback_thread))
|
||||
kthread_stop(dc->writeback_thread);
|
||||
if (dc->writeback_write_wq)
|
||||
destroy_workqueue(dc->writeback_write_wq);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
if (atomic_read(&dc->running))
|
||||
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
|
||||
bcache_device_free(&dc->disk);
|
||||
|
@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
|
|||
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
|
||||
q->backing_dev_info->ra_pages);
|
||||
|
||||
atomic_set(&dc->io_errors, 0);
|
||||
dc->io_disable = false;
|
||||
dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
|
||||
/* default to auto */
|
||||
dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
|
||||
|
||||
bch_cached_dev_request_init(dc);
|
||||
bch_cached_dev_writeback_init(dc);
|
||||
return 0;
|
||||
|
@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
|
|||
return flash_dev_run(c, u);
|
||||
}
|
||||
|
||||
bool bch_cached_dev_error(struct cached_dev *dc)
|
||||
{
|
||||
char name[BDEVNAME_SIZE];
|
||||
|
||||
if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
|
||||
return false;
|
||||
|
||||
dc->io_disable = true;
|
||||
/* make others know io_disable is true earlier */
|
||||
smp_mb();
|
||||
|
||||
pr_err("stop %s: too many IO errors on backing device %s\n",
|
||||
dc->disk.disk->disk_name, bdevname(dc->bdev, name));
|
||||
|
||||
bcache_device_stop(&dc->disk);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Cache set */
|
||||
|
||||
__printf(2, 3)
|
||||
|
@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
|
|||
test_bit(CACHE_SET_STOPPING, &c->flags))
|
||||
return false;
|
||||
|
||||
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
|
||||
pr_warn("CACHE_SET_IO_DISABLE already set");
|
||||
|
||||
/* XXX: we can be called from atomic context
|
||||
acquire_console_sem();
|
||||
*/
|
||||
|
@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl)
|
|||
closure_return(cl);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is only called when CACHE_SET_IO_DISABLE is set, which means
|
||||
* cache set is unregistering due to too many I/O errors. In this condition,
|
||||
* the bcache device might be stopped, it depends on stop_when_cache_set_failed
|
||||
* value and whether the broken cache has dirty data:
|
||||
*
|
||||
* dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
|
||||
* BCH_CACHED_STOP_AUTO 0 NO
|
||||
* BCH_CACHED_STOP_AUTO 1 YES
|
||||
* BCH_CACHED_DEV_STOP_ALWAYS 0 YES
|
||||
* BCH_CACHED_DEV_STOP_ALWAYS 1 YES
|
||||
*
|
||||
* The expected behavior is, if stop_when_cache_set_failed is configured to
|
||||
* "auto" via sysfs interface, the bcache device will not be stopped if the
|
||||
* backing device is clean on the broken cache device.
|
||||
*/
|
||||
static void conditional_stop_bcache_device(struct cache_set *c,
|
||||
struct bcache_device *d,
|
||||
struct cached_dev *dc)
|
||||
{
|
||||
if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
|
||||
pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
|
||||
d->disk->disk_name, c->sb.set_uuid);
|
||||
bcache_device_stop(d);
|
||||
} else if (atomic_read(&dc->has_dirty)) {
|
||||
/*
|
||||
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
|
||||
* and dc->has_dirty == 1
|
||||
*/
|
||||
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
|
||||
d->disk->disk_name);
|
||||
bcache_device_stop(d);
|
||||
} else {
|
||||
/*
|
||||
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
|
||||
* and dc->has_dirty == 0
|
||||
*/
|
||||
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
|
||||
d->disk->disk_name);
|
||||
}
|
||||
}
|
||||
|
||||
static void __cache_set_unregister(struct closure *cl)
|
||||
{
|
||||
struct cache_set *c = container_of(cl, struct cache_set, caching);
|
||||
struct cached_dev *dc;
|
||||
struct bcache_device *d;
|
||||
size_t i;
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
for (i = 0; i < c->devices_max_used; i++)
|
||||
if (c->devices[i]) {
|
||||
if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
|
||||
test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
|
||||
dc = container_of(c->devices[i],
|
||||
struct cached_dev, disk);
|
||||
bch_cached_dev_detach(dc);
|
||||
} else {
|
||||
bcache_device_stop(c->devices[i]);
|
||||
}
|
||||
for (i = 0; i < c->devices_max_used; i++) {
|
||||
d = c->devices[i];
|
||||
if (!d)
|
||||
continue;
|
||||
|
||||
if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
|
||||
test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
|
||||
dc = container_of(d, struct cached_dev, disk);
|
||||
bch_cached_dev_detach(dc);
|
||||
if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
|
||||
conditional_stop_bcache_device(c, d, dc);
|
||||
} else {
|
||||
bcache_device_stop(d);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
|
@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
|||
c->congested_read_threshold_us = 2000;
|
||||
c->congested_write_threshold_us = 20000;
|
||||
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
|
||||
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
return c;
|
||||
err:
|
||||
|
@ -2148,7 +2261,6 @@ static int __init bcache_init(void)
|
|||
mutex_init(&bch_register_lock);
|
||||
init_waitqueue_head(&unregister_wait);
|
||||
register_reboot_notifier(&reboot);
|
||||
closure_debug_init();
|
||||
|
||||
bcache_major = register_blkdev(0, "bcache");
|
||||
if (bcache_major < 0) {
|
||||
|
@ -2160,7 +2272,7 @@ static int __init bcache_init(void)
|
|||
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
|
||||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
|
||||
bch_request_init() ||
|
||||
bch_debug_init(bcache_kobj) ||
|
||||
bch_debug_init(bcache_kobj) || closure_debug_init() ||
|
||||
sysfs_create_files(bcache_kobj, files))
|
||||
goto err;
|
||||
|
||||
|
|
|
@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
|
|||
rw_attribute(sequential_cutoff);
|
||||
rw_attribute(data_csum);
|
||||
rw_attribute(cache_mode);
|
||||
rw_attribute(stop_when_cache_set_failed);
|
||||
rw_attribute(writeback_metadata);
|
||||
rw_attribute(writeback_running);
|
||||
rw_attribute(writeback_percent);
|
||||
|
@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive);
|
|||
|
||||
rw_attribute(synchronous);
|
||||
rw_attribute(journal_delay_ms);
|
||||
rw_attribute(io_disable);
|
||||
rw_attribute(discard);
|
||||
rw_attribute(running);
|
||||
rw_attribute(label);
|
||||
|
@ -125,6 +127,12 @@ SHOW(__bch_cached_dev)
|
|||
bch_cache_modes + 1,
|
||||
BDEV_CACHE_MODE(&dc->sb));
|
||||
|
||||
if (attr == &sysfs_stop_when_cache_set_failed)
|
||||
return bch_snprint_string_list(buf, PAGE_SIZE,
|
||||
bch_stop_on_failure_modes + 1,
|
||||
dc->stop_when_cache_set_failed);
|
||||
|
||||
|
||||
sysfs_printf(data_csum, "%i", dc->disk.data_csum);
|
||||
var_printf(verify, "%i");
|
||||
var_printf(bypass_torture_test, "%i");
|
||||
|
@ -133,7 +141,9 @@ SHOW(__bch_cached_dev)
|
|||
var_print(writeback_delay);
|
||||
var_print(writeback_percent);
|
||||
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
|
||||
|
||||
sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
|
||||
sysfs_printf(io_error_limit, "%i", dc->error_limit);
|
||||
sysfs_printf(io_disable, "%i", dc->io_disable);
|
||||
var_print(writeback_rate_update_seconds);
|
||||
var_print(writeback_rate_i_term_inverse);
|
||||
var_print(writeback_rate_p_term_inverse);
|
||||
|
@ -173,7 +183,7 @@ SHOW(__bch_cached_dev)
|
|||
sysfs_hprint(dirty_data,
|
||||
bcache_dev_sectors_dirty(&dc->disk) << 9);
|
||||
|
||||
sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
|
||||
sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
|
||||
var_printf(partial_stripes_expensive, "%u");
|
||||
|
||||
var_hprint(sequential_cutoff);
|
||||
|
@ -224,6 +234,14 @@ STORE(__cached_dev)
|
|||
d_strtoul(writeback_rate_i_term_inverse);
|
||||
d_strtoul_nonzero(writeback_rate_p_term_inverse);
|
||||
|
||||
sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
|
||||
|
||||
if (attr == &sysfs_io_disable) {
|
||||
int v = strtoul_or_return(buf);
|
||||
|
||||
dc->io_disable = v ? 1 : 0;
|
||||
}
|
||||
|
||||
d_strtoi_h(sequential_cutoff);
|
||||
d_strtoi_h(readahead);
|
||||
|
||||
|
@ -246,6 +264,15 @@ STORE(__cached_dev)
|
|||
}
|
||||
}
|
||||
|
||||
if (attr == &sysfs_stop_when_cache_set_failed) {
|
||||
v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
|
||||
|
||||
if (v < 0)
|
||||
return v;
|
||||
|
||||
dc->stop_when_cache_set_failed = v;
|
||||
}
|
||||
|
||||
if (attr == &sysfs_label) {
|
||||
if (size > SB_LABEL_SIZE)
|
||||
return -EINVAL;
|
||||
|
@ -309,7 +336,8 @@ STORE(bch_cached_dev)
|
|||
bch_writeback_queue(dc);
|
||||
|
||||
if (attr == &sysfs_writeback_percent)
|
||||
schedule_delayed_work(&dc->writeback_rate_update,
|
||||
if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
|
||||
schedule_delayed_work(&dc->writeback_rate_update,
|
||||
dc->writeback_rate_update_seconds * HZ);
|
||||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
|
|||
&sysfs_data_csum,
|
||||
#endif
|
||||
&sysfs_cache_mode,
|
||||
&sysfs_stop_when_cache_set_failed,
|
||||
&sysfs_writeback_metadata,
|
||||
&sysfs_writeback_running,
|
||||
&sysfs_writeback_delay,
|
||||
|
@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
|
|||
&sysfs_writeback_rate_i_term_inverse,
|
||||
&sysfs_writeback_rate_p_term_inverse,
|
||||
&sysfs_writeback_rate_debug,
|
||||
&sysfs_errors,
|
||||
&sysfs_io_error_limit,
|
||||
&sysfs_io_disable,
|
||||
&sysfs_dirty_data,
|
||||
&sysfs_stripe_size,
|
||||
&sysfs_partial_stripes_expensive,
|
||||
|
@ -590,6 +622,8 @@ SHOW(__bch_cache_set)
|
|||
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
|
||||
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
|
||||
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
|
||||
sysfs_printf(io_disable, "%i",
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
if (attr == &sysfs_bset_tree_stats)
|
||||
return bch_bset_print_stats(c, buf);
|
||||
|
@ -679,6 +713,20 @@ STORE(__bch_cache_set)
|
|||
if (attr == &sysfs_io_error_halflife)
|
||||
c->error_decay = strtoul_or_return(buf) / 88;
|
||||
|
||||
if (attr == &sysfs_io_disable) {
|
||||
int v = strtoul_or_return(buf);
|
||||
|
||||
if (v) {
|
||||
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
|
||||
&c->flags))
|
||||
pr_warn("CACHE_SET_IO_DISABLE already set");
|
||||
} else {
|
||||
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
|
||||
&c->flags))
|
||||
pr_warn("CACHE_SET_IO_DISABLE already cleared");
|
||||
}
|
||||
}
|
||||
|
||||
sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
|
||||
sysfs_strtoul(verify, c->verify);
|
||||
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
|
||||
|
@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
|
|||
&sysfs_gc_always_rewrite,
|
||||
&sysfs_btree_shrinker_disabled,
|
||||
&sysfs_copy_gc_enabled,
|
||||
&sysfs_io_disable,
|
||||
NULL
|
||||
};
|
||||
KTYPE(bch_cache_set_internal);
|
||||
|
|
|
@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \
|
|||
case 'y': \
|
||||
case 'z': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 'e': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 'p': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 't': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 'g': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 'm': \
|
||||
u++; \
|
||||
/* fall through */ \
|
||||
case 'k': \
|
||||
u++; \
|
||||
if (e++ == cp) \
|
||||
return -EINVAL; \
|
||||
/* fall through */ \
|
||||
case '\n': \
|
||||
case '\0': \
|
||||
if (*e == '\n') \
|
||||
|
@ -75,10 +82,9 @@ STRTO_H(strtoll, long long)
|
|||
STRTO_H(strtoull, unsigned long long)
|
||||
|
||||
/**
|
||||
* bch_hprint() - formats @v to human readable string for sysfs.
|
||||
*
|
||||
* @v - signed 64 bit integer
|
||||
* @buf - the (at least 8 byte) buffer to format the result into.
|
||||
* bch_hprint - formats @v to human readable string for sysfs.
|
||||
* @buf: the (at least 8 byte) buffer to format the result into.
|
||||
* @v: signed 64 bit integer
|
||||
*
|
||||
* Returns the number of bytes used by format.
|
||||
*/
|
||||
|
@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
|
|||
}
|
||||
|
||||
/**
|
||||
* bch_next_delay() - increment @d by the amount of work done, and return how
|
||||
* long to delay until the next time to do some work.
|
||||
* bch_next_delay() - update ratelimiting statistics and calculate next delay
|
||||
* @d: the struct bch_ratelimit to update
|
||||
* @done: the amount of work done, in arbitrary units
|
||||
*
|
||||
* @d - the struct bch_ratelimit to update
|
||||
* @done - the amount of work done, in arbitrary units
|
||||
*
|
||||
* Returns the amount of time to delay by, in jiffies
|
||||
* Increment @d by the amount of work done, and return how long to delay in
|
||||
* jiffies until the next time to do some work.
|
||||
*/
|
||||
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
|
||||
{
|
||||
|
|
|
@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
|
|||
return bdev->bd_inode->i_size >> 9;
|
||||
}
|
||||
|
||||
#define closure_bio_submit(bio, cl) \
|
||||
do { \
|
||||
closure_get(cl); \
|
||||
generic_make_request(bio); \
|
||||
} while (0)
|
||||
|
||||
uint64_t bch_crc64_update(uint64_t, const void *, size_t);
|
||||
uint64_t bch_crc64(const void *, size_t);
|
||||
|
||||
|
|
|
@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work)
|
|||
struct cached_dev *dc = container_of(to_delayed_work(work),
|
||||
struct cached_dev,
|
||||
writeback_rate_update);
|
||||
struct cache_set *c = dc->disk.c;
|
||||
|
||||
/*
|
||||
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
|
||||
* cancel_delayed_work_sync().
|
||||
*/
|
||||
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
|
||||
/*
|
||||
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
|
||||
* check it here too.
|
||||
*/
|
||||
if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
|
||||
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
return;
|
||||
}
|
||||
|
||||
down_read(&dc->writeback_lock);
|
||||
|
||||
|
@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work)
|
|||
|
||||
up_read(&dc->writeback_lock);
|
||||
|
||||
schedule_delayed_work(&dc->writeback_rate_update,
|
||||
/*
|
||||
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
|
||||
* check it here too.
|
||||
*/
|
||||
if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
|
||||
!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
|
||||
schedule_delayed_work(&dc->writeback_rate_update,
|
||||
dc->writeback_rate_update_seconds * HZ);
|
||||
}
|
||||
|
||||
/*
|
||||
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
|
||||
* cancel_delayed_work_sync().
|
||||
*/
|
||||
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
|
||||
|
@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl)
|
|||
bio_set_dev(&io->bio, io->dc->bdev);
|
||||
io->bio.bi_end_io = dirty_endio;
|
||||
|
||||
closure_bio_submit(&io->bio, cl);
|
||||
/* I/O request sent to backing device */
|
||||
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
|
||||
}
|
||||
|
||||
atomic_set(&dc->writeback_sequence_next, next_sequence);
|
||||
|
@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl)
|
|||
{
|
||||
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
|
||||
|
||||
closure_bio_submit(&io->bio, cl);
|
||||
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
|
||||
|
||||
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
|
||||
}
|
||||
|
@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc)
|
|||
|
||||
next = bch_keybuf_next(&dc->writeback_keys);
|
||||
|
||||
while (!kthread_should_stop() && next) {
|
||||
while (!kthread_should_stop() &&
|
||||
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
|
||||
next) {
|
||||
size = 0;
|
||||
nk = 0;
|
||||
|
||||
|
@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc)
|
|||
}
|
||||
}
|
||||
|
||||
while (!kthread_should_stop() && delay) {
|
||||
while (!kthread_should_stop() &&
|
||||
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
|
||||
delay) {
|
||||
schedule_timeout_interruptible(delay);
|
||||
delay = writeback_delay(dc, 0);
|
||||
}
|
||||
|
@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc)
|
|||
static int bch_writeback_thread(void *arg)
|
||||
{
|
||||
struct cached_dev *dc = arg;
|
||||
struct cache_set *c = dc->disk.c;
|
||||
bool searched_full_index;
|
||||
|
||||
bch_ratelimit_reset(&dc->writeback_rate);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
while (!kthread_should_stop() &&
|
||||
!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
|
||||
down_write(&dc->writeback_lock);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
if (!atomic_read(&dc->has_dirty) ||
|
||||
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
|
||||
!dc->writeback_running)) {
|
||||
/*
|
||||
* If the bache device is detaching, skip here and continue
|
||||
* to perform writeback. Otherwise, if no dirty data on cache,
|
||||
* or there is dirty data on cache but writeback is disabled,
|
||||
* the writeback thread should sleep here and wait for others
|
||||
* to wake up it.
|
||||
*/
|
||||
if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
|
||||
(!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
|
||||
up_write(&dc->writeback_lock);
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
if (kthread_should_stop() ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
|
||||
set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
schedule();
|
||||
|
@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg)
|
|||
if (searched_full_index &&
|
||||
RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
|
||||
atomic_set(&dc->has_dirty, 0);
|
||||
cached_dev_put(dc);
|
||||
SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
|
||||
bch_write_bdev_super(dc, NULL);
|
||||
/*
|
||||
* If bcache device is detaching via sysfs interface,
|
||||
* writeback thread should stop after there is no dirty
|
||||
* data on cache. BCACHE_DEV_DETACHING flag is set in
|
||||
* bch_cached_dev_detach().
|
||||
*/
|
||||
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
|
||||
break;
|
||||
}
|
||||
|
||||
up_write(&dc->writeback_lock);
|
||||
|
@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg)
|
|||
|
||||
while (delay &&
|
||||
!kthread_should_stop() &&
|
||||
!test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
|
||||
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
|
||||
delay = schedule_timeout_interruptible(delay);
|
||||
|
||||
|
@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg)
|
|||
}
|
||||
}
|
||||
|
||||
cached_dev_put(dc);
|
||||
wait_for_kthread_stop();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
|||
dc->writeback_rate_p_term_inverse = 40;
|
||||
dc->writeback_rate_i_term_inverse = 10000;
|
||||
|
||||
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
|
||||
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
|
||||
}
|
||||
|
||||
|
@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
|
|||
if (!dc->writeback_write_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
cached_dev_get(dc);
|
||||
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
|
||||
"bcache_writeback");
|
||||
if (IS_ERR(dc->writeback_thread))
|
||||
if (IS_ERR(dc->writeback_thread)) {
|
||||
cached_dev_put(dc);
|
||||
return PTR_ERR(dc->writeback_thread);
|
||||
}
|
||||
|
||||
WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
|
||||
schedule_delayed_work(&dc->writeback_rate_update,
|
||||
dc->writeback_rate_update_seconds * HZ);
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
|
|||
|
||||
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
|
||||
continue;
|
||||
ret += bcache_dev_sectors_dirty(d);
|
||||
ret += bcache_dev_sectors_dirty(d);
|
||||
}
|
||||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
|
|||
{
|
||||
if (!atomic_read(&dc->has_dirty) &&
|
||||
!atomic_xchg(&dc->has_dirty, 1)) {
|
||||
refcount_inc(&dc->count);
|
||||
|
||||
if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
|
||||
SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
|
||||
/* XXX: should do this synchronously */
|
||||
|
|
|
@ -1857,7 +1857,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||
q->limits = *limits;
|
||||
|
||||
if (!dm_table_supports_discards(t)) {
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
|
||||
/* Must also clear discard limits... */
|
||||
q->limits.max_discard_sectors = 0;
|
||||
q->limits.max_hw_discard_sectors = 0;
|
||||
|
@ -1865,7 +1865,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||
q->limits.discard_alignment = 0;
|
||||
q->limits.discard_misaligned = 0;
|
||||
} else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
|
||||
wc = true;
|
||||
|
@ -1875,15 +1875,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||
blk_queue_write_cache(q, wc, fua);
|
||||
|
||||
if (dm_table_supports_dax(t))
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
if (dm_table_supports_dax_write_cache(t))
|
||||
dax_write_cache(t->md->dax_dev, true);
|
||||
|
||||
/* Ensure that all underlying devices are non-rotational. */
|
||||
if (dm_table_all_devices_attribute(t, device_is_nonrot))
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
if (!dm_table_supports_write_same(t))
|
||||
q->limits.max_write_same_sectors = 0;
|
||||
|
@ -1891,9 +1891,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||
q->limits.max_write_zeroes_sectors = 0;
|
||||
|
||||
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
|
||||
else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
|
||||
|
||||
dm_table_verify_integrity(t);
|
||||
|
||||
|
@ -1904,7 +1904,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||
* have it set.
|
||||
*/
|
||||
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
}
|
||||
|
||||
unsigned int dm_table_get_num_targets(struct dm_table *t)
|
||||
|
|
|
@ -1848,7 +1848,7 @@ static struct mapped_device *alloc_dev(int minor)
|
|||
INIT_LIST_HEAD(&md->table_devices);
|
||||
spin_lock_init(&md->uevent_lock);
|
||||
|
||||
md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
|
||||
md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
|
||||
if (!md->queue)
|
||||
goto bad;
|
||||
md->queue->queuedata = md;
|
||||
|
|
|
@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
|||
}
|
||||
|
||||
if (!discard_supported)
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
|
||||
/*
|
||||
* Here we calculate the device offsets.
|
||||
|
|
|
@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko)
|
|||
if (mddev->sysfs_state)
|
||||
sysfs_put(mddev->sysfs_state);
|
||||
|
||||
if (mddev->gendisk)
|
||||
del_gendisk(mddev->gendisk);
|
||||
if (mddev->queue)
|
||||
blk_cleanup_queue(mddev->queue);
|
||||
if (mddev->gendisk) {
|
||||
del_gendisk(mddev->gendisk);
|
||||
if (mddev->gendisk)
|
||||
put_disk(mddev->gendisk);
|
||||
}
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
|
||||
kfree(mddev);
|
||||
|
@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev)
|
|||
if (mddev->degraded)
|
||||
nonrot = false;
|
||||
if (nonrot)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
else
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
|
||||
mddev->queue->backing_dev_info->congested_data = mddev;
|
||||
mddev->queue->backing_dev_info->congested_fn = md_congested;
|
||||
}
|
||||
|
|
|
@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
|
|||
discard_supported = true;
|
||||
}
|
||||
if (!discard_supported)
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
else
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
}
|
||||
|
||||
/* calculate array device size */
|
||||
|
|
|
@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
}
|
||||
}
|
||||
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
print_conf(conf);
|
||||
return err;
|
||||
}
|
||||
|
@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev)
|
|||
|
||||
if (mddev->queue) {
|
||||
if (discard_supported)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
}
|
||||
|
||||
|
|
|
@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
break;
|
||||
}
|
||||
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
|
||||
|
||||
print_conf(conf);
|
||||
return err;
|
||||
|
@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev)
|
|||
|
||||
if (mddev->queue) {
|
||||
if (discard_supported)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
}
|
||||
/* need to check that every block has at least one working mirror */
|
||||
|
|
|
@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev)
|
|||
if (devices_handle_discard_safely &&
|
||||
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
|
||||
mddev->queue->limits.discard_granularity >= stripe)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
|
||||
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
|
||||
|
|
|
@ -444,12 +444,12 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
|
|||
{
|
||||
u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
|
||||
u64 val;
|
||||
u8 option = SG_VALID | SG_TRANS_DATA;
|
||||
u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA;
|
||||
|
||||
pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len);
|
||||
|
||||
if (end)
|
||||
option |= SG_END;
|
||||
option |= RTSX_SG_END;
|
||||
val = ((u64)addr << 32) | ((u64)len << 12) | option;
|
||||
|
||||
put_unaligned_le64(val, ptr);
|
||||
|
|
|
@ -2659,7 +2659,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
|
|||
* from being accepted.
|
||||
*/
|
||||
card = md->queue.card;
|
||||
mmc_cleanup_queue(&md->queue);
|
||||
if (md->disk->flags & GENHD_FL_UP) {
|
||||
device_remove_file(disk_to_dev(md->disk), &md->force_ro);
|
||||
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
|
||||
|
@ -2669,6 +2668,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
|
|||
|
||||
del_gendisk(md->disk);
|
||||
}
|
||||
mmc_cleanup_queue(&md->queue);
|
||||
mmc_blk_put(md);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -185,14 +185,14 @@ static void mmc_queue_setup_discard(struct request_queue *q,
|
|||
if (!max_discard)
|
||||
return;
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
|
||||
blk_queue_max_discard_sectors(q, max_discard);
|
||||
q->limits.discard_granularity = card->pref_erase << 9;
|
||||
/* granularity must not be greater than max. discard */
|
||||
if (card->pref_erase > max_discard)
|
||||
q->limits.discard_granularity = 0;
|
||||
if (mmc_can_secure_erase_trim(card))
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -356,8 +356,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
|
|||
if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
|
||||
limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
|
||||
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
|
||||
if (mmc_can_erase(card))
|
||||
mmc_queue_setup_discard(mq->queue, card);
|
||||
|
||||
|
|
|
@ -419,11 +419,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
|||
blk_queue_logical_block_size(new->rq, tr->blksize);
|
||||
|
||||
blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
|
||||
|
||||
if (tr->discard) {
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
|
||||
blk_queue_max_discard_sectors(new->rq, UINT_MAX);
|
||||
}
|
||||
|
||||
|
|
|
@ -266,7 +266,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
|
|||
blk_queue_make_request(q, nd_blk_make_request);
|
||||
blk_queue_max_hw_sectors(q, UINT_MAX);
|
||||
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
q->queuedata = nsblk;
|
||||
|
||||
disk = alloc_disk(0);
|
||||
|
|
|
@ -1542,7 +1542,7 @@ static int btt_blk_init(struct btt *btt)
|
|||
blk_queue_make_request(btt->btt_queue, btt_make_request);
|
||||
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
|
||||
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
|
||||
btt->btt_queue->queuedata = btt;
|
||||
|
||||
if (btt_meta_size(btt)) {
|
||||
|
|
|
@ -29,7 +29,6 @@ enum {
|
|||
* BTT instance
|
||||
*/
|
||||
ND_MAX_LANES = 256,
|
||||
SECTOR_SHIFT = 9,
|
||||
INT_LBASIZE_ALIGNMENT = 64,
|
||||
NVDIMM_IO_ATOMIC = 1,
|
||||
};
|
||||
|
|
|
@ -343,7 +343,7 @@ static int pmem_attach_disk(struct device *dev,
|
|||
return -EBUSY;
|
||||
}
|
||||
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
|
||||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -387,8 +387,8 @@ static int pmem_attach_disk(struct device *dev,
|
|||
blk_queue_physical_block_size(q, PAGE_SIZE);
|
||||
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
|
||||
blk_queue_max_hw_sectors(q, UINT_MAX);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
q->queuedata = pmem;
|
||||
|
||||
disk = alloc_disk_node(0, nid);
|
||||
|
|
|
@ -12,6 +12,7 @@ nvme-core-y := core.o
|
|||
nvme-core-$(CONFIG_TRACING) += trace.o
|
||||
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
|
||||
nvme-core-$(CONFIG_NVM) += lightnvm.o
|
||||
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
|
||||
|
||||
nvme-y += pci.o
|
||||
|
||||
|
|
|
@ -100,11 +100,6 @@ static struct class *nvme_subsys_class;
|
|||
static void nvme_ns_remove(struct nvme_ns *ns);
|
||||
static int nvme_revalidate_disk(struct gendisk *disk);
|
||||
|
||||
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
|
||||
{
|
||||
return cpu_to_le32((((size / 4) - 1) << 16) | lid);
|
||||
}
|
||||
|
||||
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
|
||||
|
@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work)
|
|||
struct nvme_ctrl *ctrl =
|
||||
container_of(work, struct nvme_ctrl, delete_work);
|
||||
|
||||
dev_info(ctrl->device,
|
||||
"Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn);
|
||||
|
||||
flush_work(&ctrl->reset_work);
|
||||
nvme_stop_ctrl(ctrl);
|
||||
nvme_remove_namespaces(ctrl);
|
||||
|
@ -948,7 +946,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
|
|||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
|
||||
c.identify.nsid = cpu_to_le32(nsid);
|
||||
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
|
||||
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list,
|
||||
NVME_IDENTIFY_DATA_SIZE);
|
||||
}
|
||||
|
||||
static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
|
||||
|
@ -1124,13 +1123,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
|
|||
struct nvme_ns *ns, *next;
|
||||
LIST_HEAD(rm_list);
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
if (ns->disk && nvme_revalidate_disk(ns->disk)) {
|
||||
list_move_tail(&ns->list, &rm_list);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &rm_list, list)
|
||||
nvme_ns_remove(ns);
|
||||
|
@ -1358,7 +1357,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
|
|||
|
||||
blk_queue_max_discard_sectors(queue, UINT_MAX);
|
||||
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
|
||||
|
||||
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
|
||||
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
|
||||
|
@ -1449,6 +1448,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
|
|||
if (ns->noiob)
|
||||
nvme_set_chunk_size(ns);
|
||||
nvme_update_disk_info(disk, ns, id);
|
||||
if (ns->ndev)
|
||||
nvme_nvm_update_nvm_info(ns);
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
if (ns->head->disk)
|
||||
nvme_update_disk_info(ns->head->disk, ns, id);
|
||||
|
@ -2217,16 +2218,33 @@ out_unlock:
|
|||
return ret;
|
||||
}
|
||||
|
||||
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 log_page, void *log,
|
||||
size_t size, size_t offset)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
unsigned long dwlen = size / 4 - 1;
|
||||
|
||||
c.get_log_page.opcode = nvme_admin_get_log_page;
|
||||
|
||||
if (ns)
|
||||
c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
|
||||
else
|
||||
c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
|
||||
|
||||
c.get_log_page.lid = log_page;
|
||||
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
|
||||
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
|
||||
c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1));
|
||||
c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL);
|
||||
|
||||
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
|
||||
}
|
||||
|
||||
static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
|
||||
size_t size)
|
||||
{
|
||||
struct nvme_command c = { };
|
||||
|
||||
c.common.opcode = nvme_admin_get_log_page;
|
||||
c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
|
||||
c.common.cdw10[0] = nvme_get_log_dw10(log_page, size);
|
||||
|
||||
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
|
||||
return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
|
||||
}
|
||||
|
||||
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
|
||||
|
@ -2440,7 +2458,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
|
|||
struct nvme_ns *ns;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
if (list_empty(&ctrl->namespaces)) {
|
||||
ret = -ENOTTY;
|
||||
goto out_unlock;
|
||||
|
@ -2457,14 +2475,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
|
|||
dev_warn(ctrl->device,
|
||||
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
|
||||
kref_get(&ns->kref);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
|
||||
ret = nvme_user_cmd(ctrl, ns, argp);
|
||||
nvme_put_ns(ns);
|
||||
return ret;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2793,6 +2811,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
|
|||
|
||||
list_for_each_entry(h, &subsys->nsheads, entry) {
|
||||
if (nvme_ns_ids_valid(&new->ids) &&
|
||||
!list_empty(&h->list) &&
|
||||
nvme_ns_ids_equal(&new->ids, &h->ids))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -2893,7 +2912,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
|||
{
|
||||
struct nvme_ns *ns, *ret = NULL;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
if (ns->head->ns_id == nsid) {
|
||||
if (!kref_get_unless_zero(&ns->kref))
|
||||
|
@ -2904,7 +2923,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
|||
if (ns->head->ns_id > nsid)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2949,7 +2968,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
|||
ns->queue = blk_mq_init_queue(ctrl->tagset);
|
||||
if (IS_ERR(ns->queue))
|
||||
goto out_free_ns;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
|
||||
ns->queue->queuedata = ns;
|
||||
ns->ctrl = ctrl;
|
||||
|
||||
|
@ -3015,9 +3034,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
|||
|
||||
__nvme_revalidate_disk(disk, id);
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_add_tail(&ns->list, &ctrl->namespaces);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
|
||||
nvme_get_ctrl(ctrl);
|
||||
|
||||
|
@ -3033,6 +3052,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
|||
ns->disk->disk_name);
|
||||
|
||||
nvme_mpath_add_disk(ns->head);
|
||||
nvme_fault_inject_init(ns);
|
||||
return;
|
||||
out_unlink_ns:
|
||||
mutex_lock(&ctrl->subsys->lock);
|
||||
|
@ -3051,6 +3071,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
|||
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
|
||||
return;
|
||||
|
||||
nvme_fault_inject_fini(ns);
|
||||
if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
|
||||
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvme_ns_id_attr_group);
|
||||
|
@ -3067,9 +3088,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
|||
list_del_rcu(&ns->siblings);
|
||||
mutex_unlock(&ns->ctrl->subsys->lock);
|
||||
|
||||
mutex_lock(&ns->ctrl->namespaces_mutex);
|
||||
down_write(&ns->ctrl->namespaces_rwsem);
|
||||
list_del_init(&ns->list);
|
||||
mutex_unlock(&ns->ctrl->namespaces_mutex);
|
||||
up_write(&ns->ctrl->namespaces_rwsem);
|
||||
|
||||
synchronize_srcu(&ns->head->srcu);
|
||||
nvme_mpath_check_last_path(ns);
|
||||
|
@ -3093,11 +3114,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
|
|||
unsigned nsid)
|
||||
{
|
||||
struct nvme_ns *ns, *next;
|
||||
LIST_HEAD(rm_list);
|
||||
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
|
||||
if (ns->head->ns_id > nsid)
|
||||
nvme_ns_remove(ns);
|
||||
list_move_tail(&ns->list, &rm_list);
|
||||
}
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &rm_list, list)
|
||||
nvme_ns_remove(ns);
|
||||
|
||||
}
|
||||
|
||||
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
|
||||
|
@ -3107,7 +3135,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
|
|||
unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
|
||||
int ret = 0;
|
||||
|
||||
ns_list = kzalloc(0x1000, GFP_KERNEL);
|
||||
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
|
||||
if (!ns_list)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -3173,9 +3201,9 @@ static void nvme_scan_work(struct work_struct *work)
|
|||
}
|
||||
nvme_scan_ns_sequential(ctrl, nn);
|
||||
done:
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_sort(NULL, &ctrl->namespaces, ns_cmp);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
kfree(id);
|
||||
}
|
||||
|
||||
|
@ -3197,6 +3225,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan);
|
|||
void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_ns *ns, *next;
|
||||
LIST_HEAD(ns_list);
|
||||
|
||||
/*
|
||||
* The dead states indicates the controller was not gracefully
|
||||
|
@ -3207,7 +3236,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
|
|||
if (ctrl->state == NVME_CTRL_DEAD)
|
||||
nvme_kill_queues(ctrl);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
|
||||
down_write(&ctrl->namespaces_rwsem);
|
||||
list_splice_init(&ctrl->namespaces, &ns_list);
|
||||
up_write(&ctrl->namespaces_rwsem);
|
||||
|
||||
list_for_each_entry_safe(ns, next, &ns_list, list)
|
||||
nvme_ns_remove(ns);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
|
||||
|
@ -3337,6 +3370,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
|||
flush_work(&ctrl->async_event_work);
|
||||
flush_work(&ctrl->scan_work);
|
||||
cancel_work_sync(&ctrl->fw_act_work);
|
||||
if (ctrl->ops->stop_ctrl)
|
||||
ctrl->ops->stop_ctrl(ctrl);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
|
||||
|
||||
|
@ -3394,7 +3429,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
|||
ctrl->state = NVME_CTRL_NEW;
|
||||
spin_lock_init(&ctrl->lock);
|
||||
INIT_LIST_HEAD(&ctrl->namespaces);
|
||||
mutex_init(&ctrl->namespaces_mutex);
|
||||
init_rwsem(&ctrl->namespaces_rwsem);
|
||||
ctrl->dev = dev;
|
||||
ctrl->ops = ops;
|
||||
ctrl->quirks = quirks;
|
||||
|
@ -3455,7 +3490,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
|
||||
/* Forcibly unquiesce queues to avoid blocking dispatch */
|
||||
if (ctrl->admin_q)
|
||||
|
@ -3474,7 +3509,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
|
|||
/* Forcibly unquiesce queues to avoid blocking dispatch */
|
||||
blk_mq_unquiesce_queue(ns->queue);
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_kill_queues);
|
||||
|
||||
|
@ -3482,10 +3517,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_mq_unfreeze_queue(ns->queue);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_unfreeze);
|
||||
|
||||
|
@ -3493,13 +3528,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
|
||||
if (timeout <= 0)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
|
||||
|
||||
|
@ -3507,10 +3542,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_mq_freeze_queue_wait(ns->queue);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_wait_freeze);
|
||||
|
||||
|
@ -3518,10 +3553,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_freeze_queue_start(ns->queue);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_start_freeze);
|
||||
|
||||
|
@ -3529,10 +3564,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_mq_quiesce_queue(ns->queue);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_stop_queues);
|
||||
|
||||
|
@ -3540,10 +3575,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list)
|
||||
blk_mq_unquiesce_queue(ns->queue);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_start_queues);
|
||||
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* fault injection support for nvme.
|
||||
*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/moduleparam.h>
|
||||
#include "nvme.h"
|
||||
|
||||
static DECLARE_FAULT_ATTR(fail_default_attr);
|
||||
/* optional fault injection attributes boot time option:
|
||||
* nvme_core.fail_request=<interval>,<probability>,<space>,<times>
|
||||
*/
|
||||
static char *fail_request;
|
||||
module_param(fail_request, charp, 0000);
|
||||
|
||||
void nvme_fault_inject_init(struct nvme_ns *ns)
|
||||
{
|
||||
struct dentry *dir, *parent;
|
||||
char *name = ns->disk->disk_name;
|
||||
struct nvme_fault_inject *fault_inj = &ns->fault_inject;
|
||||
struct fault_attr *attr = &fault_inj->attr;
|
||||
|
||||
/* set default fault injection attribute */
|
||||
if (fail_request)
|
||||
setup_fault_attr(&fail_default_attr, fail_request);
|
||||
|
||||
/* create debugfs directory and attribute */
|
||||
parent = debugfs_create_dir(name, NULL);
|
||||
if (!parent) {
|
||||
pr_warn("%s: failed to create debugfs directory\n", name);
|
||||
return;
|
||||
}
|
||||
|
||||
*attr = fail_default_attr;
|
||||
dir = fault_create_debugfs_attr("fault_inject", parent, attr);
|
||||
if (IS_ERR(dir)) {
|
||||
pr_warn("%s: failed to create debugfs attr\n", name);
|
||||
debugfs_remove_recursive(parent);
|
||||
return;
|
||||
}
|
||||
ns->fault_inject.parent = parent;
|
||||
|
||||
/* create debugfs for status code and dont_retry */
|
||||
fault_inj->status = NVME_SC_INVALID_OPCODE;
|
||||
fault_inj->dont_retry = true;
|
||||
debugfs_create_x16("status", 0600, dir, &fault_inj->status);
|
||||
debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry);
|
||||
}
|
||||
|
||||
void nvme_fault_inject_fini(struct nvme_ns *ns)
|
||||
{
|
||||
/* remove debugfs directories */
|
||||
debugfs_remove_recursive(ns->fault_inject.parent);
|
||||
}
|
||||
|
||||
void nvme_should_fail(struct request *req)
|
||||
{
|
||||
struct gendisk *disk = req->rq_disk;
|
||||
struct nvme_ns *ns = NULL;
|
||||
u16 status;
|
||||
|
||||
/*
|
||||
* make sure this request is coming from a valid namespace
|
||||
*/
|
||||
if (!disk)
|
||||
return;
|
||||
|
||||
ns = disk->private_data;
|
||||
if (ns && should_fail(&ns->fault_inject.attr, 1)) {
|
||||
/* inject status code and DNR bit */
|
||||
status = ns->fault_inject.status;
|
||||
if (ns->fault_inject.dont_retry)
|
||||
status |= NVME_SC_DNR;
|
||||
nvme_req(req)->status = status;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_should_fail);
|
|
@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
|
|||
return ERR_PTR(-ESTALE);
|
||||
}
|
||||
|
||||
rport->remoteport.port_role = pinfo->port_role;
|
||||
rport->remoteport.port_id = pinfo->port_id;
|
||||
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
|
||||
rport->dev_loss_end = 0;
|
||||
|
||||
|
@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
|
|||
*/
|
||||
if (nvme_reset_ctrl(&ctrl->ctrl)) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Couldn't schedule reset. "
|
||||
"Deleting controller.\n",
|
||||
"NVME-FC{%d}: Couldn't schedule reset.\n",
|
||||
ctrl->cnum);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
|
@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
|
|||
/* if dev_loss_tmo==0, dev loss is immediate */
|
||||
if (!portptr->dev_loss_tmo) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller connectivity lost. "
|
||||
"Deleting controller.\n",
|
||||
"NVME-FC{%d}: controller connectivity lost.\n",
|
||||
ctrl->cnum);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
} else
|
||||
|
@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved)
|
|||
{
|
||||
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
||||
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
||||
int ret;
|
||||
|
||||
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
|
||||
atomic_read(&op->state) == FCPOP_STATE_ABORTED)
|
||||
return BLK_EH_RESET_TIMER;
|
||||
|
||||
ret = __nvme_fc_abort_op(ctrl, op);
|
||||
if (ret)
|
||||
/* io wasn't active to abort */
|
||||
return BLK_EH_NOT_HANDLED;
|
||||
|
||||
/*
|
||||
* we can't individually ABTS an io without affecting the queue,
|
||||
* thus killing the queue, adn thus the association.
|
||||
* thus killing the queue, and thus the association.
|
||||
* So resolve by performing a controller reset, which will stop
|
||||
* the host/io stack, terminate the association on the link,
|
||||
* and recreate an association on the link.
|
||||
|
@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
|
|||
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
||||
struct nvme_command *sqe = &cmdiu->sqe;
|
||||
u32 csn;
|
||||
int ret;
|
||||
int ret, opstate;
|
||||
|
||||
/*
|
||||
* before attempting to send the io, check to see if we believe
|
||||
|
@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
|
|||
queue->lldd_handle, &op->fcp_req);
|
||||
|
||||
if (ret) {
|
||||
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
|
||||
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
|
||||
|
||||
if (!(op->flags & FCOP_FLAGS_AEN))
|
||||
nvme_fc_unmap_data(ctrl, op->rq, op);
|
||||
|
||||
|
@ -2889,14 +2882,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
|
|||
if (portptr->port_state == FC_OBJSTATE_ONLINE)
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
||||
"reached. Removing controller\n",
|
||||
"reached.\n",
|
||||
ctrl->cnum, ctrl->ctrl.nr_reconnects);
|
||||
else
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
|
||||
"while waiting for remoteport connectivity. "
|
||||
"Removing controller\n", ctrl->cnum,
|
||||
portptr->dev_loss_tmo);
|
||||
"while waiting for remoteport connectivity.\n",
|
||||
ctrl->cnum, portptr->dev_loss_tmo);
|
||||
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
|
||||
}
|
||||
}
|
||||
|
@ -3133,6 +3125,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
|||
}
|
||||
|
||||
if (ret) {
|
||||
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
|
||||
cancel_work_sync(&ctrl->ctrl.reset_work);
|
||||
cancel_delayed_work_sync(&ctrl->connect_work);
|
||||
|
||||
/* couldn't schedule retry - fail out */
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
|
||||
|
|
|
@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode {
|
|||
nvme_nvm_admin_set_bb_tbl = 0xf1,
|
||||
};
|
||||
|
||||
enum nvme_nvm_log_page {
|
||||
NVME_NVM_LOG_REPORT_CHUNK = 0xca,
|
||||
};
|
||||
|
||||
struct nvme_nvm_ph_rw {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
|
@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw {
|
|||
__le64 resv;
|
||||
};
|
||||
|
||||
struct nvme_nvm_erase_blk {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u16 command_id;
|
||||
__le32 nsid;
|
||||
__u64 rsvd[2];
|
||||
__le64 prp1;
|
||||
__le64 prp2;
|
||||
__le64 spba;
|
||||
__le16 length;
|
||||
__le16 control;
|
||||
__le32 dsmgmt;
|
||||
__le64 resv;
|
||||
};
|
||||
|
||||
struct nvme_nvm_identity {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
|
@ -59,8 +78,7 @@ struct nvme_nvm_identity {
|
|||
__u64 rsvd[2];
|
||||
__le64 prp1;
|
||||
__le64 prp2;
|
||||
__le32 chnl_off;
|
||||
__u32 rsvd11[5];
|
||||
__u32 rsvd11[6];
|
||||
};
|
||||
|
||||
struct nvme_nvm_getbbtbl {
|
||||
|
@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl {
|
|||
__u32 rsvd4[3];
|
||||
};
|
||||
|
||||
struct nvme_nvm_erase_blk {
|
||||
__u8 opcode;
|
||||
__u8 flags;
|
||||
__u16 command_id;
|
||||
__le32 nsid;
|
||||
__u64 rsvd[2];
|
||||
__le64 prp1;
|
||||
__le64 prp2;
|
||||
__le64 spba;
|
||||
__le16 length;
|
||||
__le16 control;
|
||||
__le32 dsmgmt;
|
||||
__le64 resv;
|
||||
};
|
||||
|
||||
struct nvme_nvm_command {
|
||||
union {
|
||||
struct nvme_common_command common;
|
||||
struct nvme_nvm_identity identity;
|
||||
struct nvme_nvm_ph_rw ph_rw;
|
||||
struct nvme_nvm_erase_blk erase;
|
||||
struct nvme_nvm_identity identity;
|
||||
struct nvme_nvm_getbbtbl get_bb;
|
||||
struct nvme_nvm_setbbtbl set_bb;
|
||||
struct nvme_nvm_erase_blk erase;
|
||||
};
|
||||
};
|
||||
|
||||
#define NVME_NVM_LP_MLC_PAIRS 886
|
||||
struct nvme_nvm_lp_mlc {
|
||||
__le16 num_pairs;
|
||||
__u8 pairs[NVME_NVM_LP_MLC_PAIRS];
|
||||
};
|
||||
|
||||
struct nvme_nvm_lp_tbl {
|
||||
__u8 id[8];
|
||||
struct nvme_nvm_lp_mlc mlc;
|
||||
};
|
||||
|
||||
struct nvme_nvm_id_group {
|
||||
struct nvme_nvm_id12_grp {
|
||||
__u8 mtype;
|
||||
__u8 fmtype;
|
||||
__le16 res16;
|
||||
|
@ -150,11 +142,10 @@ struct nvme_nvm_id_group {
|
|||
__le32 mpos;
|
||||
__le32 mccap;
|
||||
__le16 cpar;
|
||||
__u8 reserved[10];
|
||||
struct nvme_nvm_lp_tbl lptbl;
|
||||
__u8 reserved[906];
|
||||
} __packed;
|
||||
|
||||
struct nvme_nvm_addr_format {
|
||||
struct nvme_nvm_id12_addrf {
|
||||
__u8 ch_offset;
|
||||
__u8 ch_len;
|
||||
__u8 lun_offset;
|
||||
|
@ -165,21 +156,22 @@ struct nvme_nvm_addr_format {
|
|||
__u8 blk_len;
|
||||
__u8 pg_offset;
|
||||
__u8 pg_len;
|
||||
__u8 sect_offset;
|
||||
__u8 sect_len;
|
||||
__u8 sec_offset;
|
||||
__u8 sec_len;
|
||||
__u8 res[4];
|
||||
} __packed;
|
||||
|
||||
struct nvme_nvm_id {
|
||||
struct nvme_nvm_id12 {
|
||||
__u8 ver_id;
|
||||
__u8 vmnt;
|
||||
__u8 cgrps;
|
||||
__u8 res;
|
||||
__le32 cap;
|
||||
__le32 dom;
|
||||
struct nvme_nvm_addr_format ppaf;
|
||||
struct nvme_nvm_id12_addrf ppaf;
|
||||
__u8 resv[228];
|
||||
struct nvme_nvm_id_group groups[4];
|
||||
struct nvme_nvm_id12_grp grp;
|
||||
__u8 resv2[2880];
|
||||
} __packed;
|
||||
|
||||
struct nvme_nvm_bb_tbl {
|
||||
|
@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl {
|
|||
__u8 blk[0];
|
||||
};
|
||||
|
||||
struct nvme_nvm_id20_addrf {
|
||||
__u8 grp_len;
|
||||
__u8 pu_len;
|
||||
__u8 chk_len;
|
||||
__u8 lba_len;
|
||||
__u8 resv[4];
|
||||
};
|
||||
|
||||
struct nvme_nvm_id20 {
|
||||
__u8 mjr;
|
||||
__u8 mnr;
|
||||
__u8 resv[6];
|
||||
|
||||
struct nvme_nvm_id20_addrf lbaf;
|
||||
|
||||
__le32 mccap;
|
||||
__u8 resv2[12];
|
||||
|
||||
__u8 wit;
|
||||
__u8 resv3[31];
|
||||
|
||||
/* Geometry */
|
||||
__le16 num_grp;
|
||||
__le16 num_pu;
|
||||
__le32 num_chk;
|
||||
__le32 clba;
|
||||
__u8 resv4[52];
|
||||
|
||||
/* Write data requirements */
|
||||
__le32 ws_min;
|
||||
__le32 ws_opt;
|
||||
__le32 mw_cunits;
|
||||
__le32 maxoc;
|
||||
__le32 maxocpu;
|
||||
__u8 resv5[44];
|
||||
|
||||
/* Performance related metrics */
|
||||
__le32 trdt;
|
||||
__le32 trdm;
|
||||
__le32 twrt;
|
||||
__le32 twrm;
|
||||
__le32 tcrst;
|
||||
__le32 tcrsm;
|
||||
__u8 resv6[40];
|
||||
|
||||
/* Reserved area */
|
||||
__u8 resv7[2816];
|
||||
|
||||
/* Vendor specific */
|
||||
__u8 vs[1024];
|
||||
};
|
||||
|
||||
struct nvme_nvm_chk_meta {
|
||||
__u8 state;
|
||||
__u8 type;
|
||||
__u8 wi;
|
||||
__u8 rsvd[5];
|
||||
__le64 slba;
|
||||
__le64 cnlb;
|
||||
__le64 wp;
|
||||
};
|
||||
|
||||
/*
|
||||
* Check we didn't inadvertently grow the command struct
|
||||
*/
|
||||
|
@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void)
|
|||
{
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) !=
|
||||
sizeof(struct nvm_chk_meta));
|
||||
}
|
||||
|
||||
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
|
||||
static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst,
|
||||
struct nvme_nvm_id12_addrf *src)
|
||||
{
|
||||
struct nvme_nvm_id_group *src;
|
||||
struct nvm_id_group *grp;
|
||||
dst->ch_len = src->ch_len;
|
||||
dst->lun_len = src->lun_len;
|
||||
dst->blk_len = src->blk_len;
|
||||
dst->pg_len = src->pg_len;
|
||||
dst->pln_len = src->pln_len;
|
||||
dst->sec_len = src->sec_len;
|
||||
|
||||
dst->ch_offset = src->ch_offset;
|
||||
dst->lun_offset = src->lun_offset;
|
||||
dst->blk_offset = src->blk_offset;
|
||||
dst->pg_offset = src->pg_offset;
|
||||
dst->pln_offset = src->pln_offset;
|
||||
dst->sec_offset = src->sec_offset;
|
||||
|
||||
dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
|
||||
dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
|
||||
dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset;
|
||||
dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset;
|
||||
dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset;
|
||||
dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
|
||||
}
|
||||
|
||||
static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id,
|
||||
struct nvm_geo *geo)
|
||||
{
|
||||
struct nvme_nvm_id12_grp *src;
|
||||
int sec_per_pg, sec_per_pl, pg_per_blk;
|
||||
|
||||
if (nvme_nvm_id->cgrps != 1)
|
||||
if (id->cgrps != 1)
|
||||
return -EINVAL;
|
||||
|
||||
src = &nvme_nvm_id->groups[0];
|
||||
grp = &nvm_id->grp;
|
||||
src = &id->grp;
|
||||
|
||||
grp->mtype = src->mtype;
|
||||
grp->fmtype = src->fmtype;
|
||||
|
||||
grp->num_ch = src->num_ch;
|
||||
grp->num_lun = src->num_lun;
|
||||
|
||||
grp->num_chk = le16_to_cpu(src->num_chk);
|
||||
grp->csecs = le16_to_cpu(src->csecs);
|
||||
grp->sos = le16_to_cpu(src->sos);
|
||||
|
||||
pg_per_blk = le16_to_cpu(src->num_pg);
|
||||
sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs;
|
||||
sec_per_pl = sec_per_pg * src->num_pln;
|
||||
grp->clba = sec_per_pl * pg_per_blk;
|
||||
grp->ws_per_chk = pg_per_blk;
|
||||
|
||||
grp->mpos = le32_to_cpu(src->mpos);
|
||||
grp->cpar = le16_to_cpu(src->cpar);
|
||||
grp->mccap = le32_to_cpu(src->mccap);
|
||||
|
||||
grp->ws_opt = grp->ws_min = sec_per_pg;
|
||||
grp->ws_seq = NVM_IO_SNGL_ACCESS;
|
||||
|
||||
if (grp->mpos & 0x020202) {
|
||||
grp->ws_seq = NVM_IO_DUAL_ACCESS;
|
||||
grp->ws_opt <<= 1;
|
||||
} else if (grp->mpos & 0x040404) {
|
||||
grp->ws_seq = NVM_IO_QUAD_ACCESS;
|
||||
grp->ws_opt <<= 2;
|
||||
if (src->mtype != 0) {
|
||||
pr_err("nvm: memory type not supported\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
grp->trdt = le32_to_cpu(src->trdt);
|
||||
grp->trdm = le32_to_cpu(src->trdm);
|
||||
grp->tprt = le32_to_cpu(src->tprt);
|
||||
grp->tprm = le32_to_cpu(src->tprm);
|
||||
grp->tbet = le32_to_cpu(src->tbet);
|
||||
grp->tbem = le32_to_cpu(src->tbem);
|
||||
/* 1.2 spec. only reports a single version id - unfold */
|
||||
geo->major_ver_id = id->ver_id;
|
||||
geo->minor_ver_id = 2;
|
||||
|
||||
/* Set compacted version for upper layers */
|
||||
geo->version = NVM_OCSSD_SPEC_12;
|
||||
|
||||
geo->num_ch = src->num_ch;
|
||||
geo->num_lun = src->num_lun;
|
||||
geo->all_luns = geo->num_ch * geo->num_lun;
|
||||
|
||||
geo->num_chk = le16_to_cpu(src->num_chk);
|
||||
|
||||
geo->csecs = le16_to_cpu(src->csecs);
|
||||
geo->sos = le16_to_cpu(src->sos);
|
||||
|
||||
pg_per_blk = le16_to_cpu(src->num_pg);
|
||||
sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs;
|
||||
sec_per_pl = sec_per_pg * src->num_pln;
|
||||
geo->clba = sec_per_pl * pg_per_blk;
|
||||
|
||||
geo->all_chunks = geo->all_luns * geo->num_chk;
|
||||
geo->total_secs = geo->clba * geo->all_chunks;
|
||||
|
||||
geo->ws_min = sec_per_pg;
|
||||
geo->ws_opt = sec_per_pg;
|
||||
geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */
|
||||
|
||||
/* Do not impose values for maximum number of open blocks as it is
|
||||
* unspecified in 1.2. Users of 1.2 must be aware of this and eventually
|
||||
* specify these values through a quirk if restrictions apply.
|
||||
*/
|
||||
geo->maxoc = geo->all_luns * geo->num_chk;
|
||||
geo->maxocpu = geo->num_chk;
|
||||
|
||||
geo->mccap = le32_to_cpu(src->mccap);
|
||||
|
||||
geo->trdt = le32_to_cpu(src->trdt);
|
||||
geo->trdm = le32_to_cpu(src->trdm);
|
||||
geo->tprt = le32_to_cpu(src->tprt);
|
||||
geo->tprm = le32_to_cpu(src->tprm);
|
||||
geo->tbet = le32_to_cpu(src->tbet);
|
||||
geo->tbem = le32_to_cpu(src->tbem);
|
||||
|
||||
/* 1.2 compatibility */
|
||||
grp->num_pln = src->num_pln;
|
||||
grp->num_pg = le16_to_cpu(src->num_pg);
|
||||
grp->fpg_sz = le16_to_cpu(src->fpg_sz);
|
||||
geo->vmnt = id->vmnt;
|
||||
geo->cap = le32_to_cpu(id->cap);
|
||||
geo->dom = le32_to_cpu(id->dom);
|
||||
|
||||
geo->mtype = src->mtype;
|
||||
geo->fmtype = src->fmtype;
|
||||
|
||||
geo->cpar = le16_to_cpu(src->cpar);
|
||||
geo->mpos = le32_to_cpu(src->mpos);
|
||||
|
||||
geo->pln_mode = NVM_PLANE_SINGLE;
|
||||
|
||||
if (geo->mpos & 0x020202) {
|
||||
geo->pln_mode = NVM_PLANE_DOUBLE;
|
||||
geo->ws_opt <<= 1;
|
||||
} else if (geo->mpos & 0x040404) {
|
||||
geo->pln_mode = NVM_PLANE_QUAD;
|
||||
geo->ws_opt <<= 2;
|
||||
}
|
||||
|
||||
geo->num_pln = src->num_pln;
|
||||
geo->num_pg = le16_to_cpu(src->num_pg);
|
||||
geo->fpg_sz = le16_to_cpu(src->fpg_sz);
|
||||
|
||||
nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
|
||||
static void nvme_nvm_set_addr_20(struct nvm_addrf *dst,
|
||||
struct nvme_nvm_id20_addrf *src)
|
||||
{
|
||||
dst->ch_len = src->grp_len;
|
||||
dst->lun_len = src->pu_len;
|
||||
dst->chk_len = src->chk_len;
|
||||
dst->sec_len = src->lba_len;
|
||||
|
||||
dst->sec_offset = 0;
|
||||
dst->chk_offset = dst->sec_len;
|
||||
dst->lun_offset = dst->chk_offset + dst->chk_len;
|
||||
dst->ch_offset = dst->lun_offset + dst->lun_len;
|
||||
|
||||
dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset;
|
||||
dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset;
|
||||
dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset;
|
||||
dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset;
|
||||
}
|
||||
|
||||
static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id,
|
||||
struct nvm_geo *geo)
|
||||
{
|
||||
geo->major_ver_id = id->mjr;
|
||||
geo->minor_ver_id = id->mnr;
|
||||
|
||||
/* Set compacted version for upper layers */
|
||||
geo->version = NVM_OCSSD_SPEC_20;
|
||||
|
||||
if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) {
|
||||
pr_err("nvm: OCSSD version not supported (v%d.%d)\n",
|
||||
geo->major_ver_id, geo->minor_ver_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
geo->num_ch = le16_to_cpu(id->num_grp);
|
||||
geo->num_lun = le16_to_cpu(id->num_pu);
|
||||
geo->all_luns = geo->num_ch * geo->num_lun;
|
||||
|
||||
geo->num_chk = le32_to_cpu(id->num_chk);
|
||||
geo->clba = le32_to_cpu(id->clba);
|
||||
|
||||
geo->all_chunks = geo->all_luns * geo->num_chk;
|
||||
geo->total_secs = geo->clba * geo->all_chunks;
|
||||
|
||||
geo->ws_min = le32_to_cpu(id->ws_min);
|
||||
geo->ws_opt = le32_to_cpu(id->ws_opt);
|
||||
geo->mw_cunits = le32_to_cpu(id->mw_cunits);
|
||||
geo->maxoc = le32_to_cpu(id->maxoc);
|
||||
geo->maxocpu = le32_to_cpu(id->maxocpu);
|
||||
|
||||
geo->trdt = le32_to_cpu(id->trdt);
|
||||
geo->trdm = le32_to_cpu(id->trdm);
|
||||
geo->tprt = le32_to_cpu(id->twrt);
|
||||
geo->tprm = le32_to_cpu(id->twrm);
|
||||
geo->tbet = le32_to_cpu(id->tcrst);
|
||||
geo->tbem = le32_to_cpu(id->tcrsm);
|
||||
|
||||
nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_nvm_identity(struct nvm_dev *nvmdev)
|
||||
{
|
||||
struct nvme_ns *ns = nvmdev->q->queuedata;
|
||||
struct nvme_nvm_id *nvme_nvm_id;
|
||||
struct nvme_nvm_id12 *id;
|
||||
struct nvme_nvm_command c = {};
|
||||
int ret;
|
||||
|
||||
c.identity.opcode = nvme_nvm_admin_identity;
|
||||
c.identity.nsid = cpu_to_le32(ns->head->ns_id);
|
||||
c.identity.chnl_off = 0;
|
||||
|
||||
nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
|
||||
if (!nvme_nvm_id)
|
||||
id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL);
|
||||
if (!id)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
|
||||
nvme_nvm_id, sizeof(struct nvme_nvm_id));
|
||||
id, sizeof(struct nvme_nvm_id12));
|
||||
if (ret) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nvm_id->ver_id = nvme_nvm_id->ver_id;
|
||||
nvm_id->vmnt = nvme_nvm_id->vmnt;
|
||||
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
|
||||
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
|
||||
memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
|
||||
sizeof(struct nvm_addr_format));
|
||||
/*
|
||||
* The 1.2 and 2.0 specifications share the first byte in their geometry
|
||||
* command to make it possible to know what version a device implements.
|
||||
*/
|
||||
switch (id->ver_id) {
|
||||
case 1:
|
||||
ret = nvme_nvm_setup_12(id, &nvmdev->geo);
|
||||
break;
|
||||
case 2:
|
||||
ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id,
|
||||
&nvmdev->geo);
|
||||
break;
|
||||
default:
|
||||
dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n",
|
||||
id->ver_id);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
ret = init_grps(nvm_id, nvme_nvm_id);
|
||||
out:
|
||||
kfree(nvme_nvm_id);
|
||||
kfree(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
|
|||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
struct nvme_nvm_command c = {};
|
||||
struct nvme_nvm_bb_tbl *bb_tbl;
|
||||
int nr_blks = geo->nr_chks * geo->plane_mode;
|
||||
int nr_blks = geo->num_chk * geo->num_pln;
|
||||
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
|
||||
int ret = 0;
|
||||
|
||||
|
@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
|
|||
goto out;
|
||||
}
|
||||
|
||||
memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode);
|
||||
memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln);
|
||||
out:
|
||||
kfree(bb_tbl);
|
||||
return ret;
|
||||
|
@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Expect the lba in device format
|
||||
*/
|
||||
static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
|
||||
struct nvm_chk_meta *meta,
|
||||
sector_t slba, int nchks)
|
||||
{
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
struct nvme_ns *ns = ndev->q->queuedata;
|
||||
struct nvme_ctrl *ctrl = ns->ctrl;
|
||||
struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta;
|
||||
struct ppa_addr ppa;
|
||||
size_t left = nchks * sizeof(struct nvme_nvm_chk_meta);
|
||||
size_t log_pos, offset, len;
|
||||
int ret, i;
|
||||
|
||||
/* Normalize lba address space to obtain log offset */
|
||||
ppa.ppa = slba;
|
||||
ppa = dev_to_generic_addr(ndev, ppa);
|
||||
|
||||
log_pos = ppa.m.chk;
|
||||
log_pos += ppa.m.pu * geo->num_chk;
|
||||
log_pos += ppa.m.grp * geo->num_lun * geo->num_chk;
|
||||
|
||||
offset = log_pos * sizeof(struct nvme_nvm_chk_meta);
|
||||
|
||||
while (left) {
|
||||
len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9);
|
||||
|
||||
ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
|
||||
dev_meta, len, offset);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) {
|
||||
meta->state = dev_meta->state;
|
||||
meta->type = dev_meta->type;
|
||||
meta->wi = dev_meta->wi;
|
||||
meta->slba = le64_to_cpu(dev_meta->slba);
|
||||
meta->cnlb = le64_to_cpu(dev_meta->cnlb);
|
||||
meta->wp = le64_to_cpu(dev_meta->wp);
|
||||
|
||||
meta++;
|
||||
dev_meta++;
|
||||
}
|
||||
|
||||
offset += len;
|
||||
left -= len;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
|
||||
struct nvme_nvm_command *c)
|
||||
{
|
||||
|
@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
|
|||
.get_bb_tbl = nvme_nvm_get_bb_tbl,
|
||||
.set_bb_tbl = nvme_nvm_set_bb_tbl,
|
||||
|
||||
.get_chk_meta = nvme_nvm_get_chk_meta,
|
||||
|
||||
.submit_io = nvme_nvm_submit_io,
|
||||
.submit_io_sync = nvme_nvm_submit_io_sync,
|
||||
|
||||
|
@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
|
|||
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
|
||||
.dev_dma_alloc = nvme_nvm_dev_dma_alloc,
|
||||
.dev_dma_free = nvme_nvm_dev_dma_free,
|
||||
|
||||
.max_phys_sect = 64,
|
||||
};
|
||||
|
||||
static int nvme_nvm_submit_user_cmd(struct request_queue *q,
|
||||
|
@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
|
|||
}
|
||||
}
|
||||
|
||||
void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
|
||||
{
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
|
||||
geo->csecs = 1 << ns->lba_shift;
|
||||
geo->sos = ns->ms;
|
||||
}
|
||||
|
||||
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
|
||||
{
|
||||
struct request_queue *q = ns->queue;
|
||||
|
@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns)
|
|||
}
|
||||
|
||||
static ssize_t nvm_dev_attr_show(struct device *dev,
|
||||
struct device_attribute *dattr, char *page)
|
||||
struct device_attribute *dattr, char *page)
|
||||
{
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_id *id;
|
||||
struct nvm_id_group *grp;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
struct attribute *attr;
|
||||
|
||||
if (!ndev)
|
||||
return 0;
|
||||
|
||||
id = &ndev->identity;
|
||||
grp = &id->grp;
|
||||
attr = &dattr->attr;
|
||||
|
||||
if (strcmp(attr->name, "version") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id);
|
||||
} else if (strcmp(attr->name, "vendor_opcode") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt);
|
||||
if (geo->major_ver_id == 1)
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n",
|
||||
geo->major_ver_id);
|
||||
else
|
||||
return scnprintf(page, PAGE_SIZE, "%u.%u\n",
|
||||
geo->major_ver_id,
|
||||
geo->minor_ver_id);
|
||||
} else if (strcmp(attr->name, "capabilities") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap);
|
||||
} else if (strcmp(attr->name, "read_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt);
|
||||
} else if (strcmp(attr->name, "read_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm);
|
||||
} else {
|
||||
return scnprintf(page,
|
||||
PAGE_SIZE,
|
||||
"Unhandled attr(%s) in `%s`\n",
|
||||
attr->name, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
ppaf->lun_offset, ppaf->lun_len,
|
||||
ppaf->pln_offset, ppaf->pln_len,
|
||||
ppaf->blk_offset, ppaf->blk_len,
|
||||
ppaf->pg_offset, ppaf->pg_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
}
|
||||
|
||||
static ssize_t nvm_dev_attr_show_12(struct device *dev,
|
||||
struct device_attribute *dattr, char *page)
|
||||
{
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
struct attribute *attr;
|
||||
|
||||
if (!ndev)
|
||||
return 0;
|
||||
|
||||
attr = &dattr->attr;
|
||||
|
||||
if (strcmp(attr->name, "vendor_opcode") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt);
|
||||
} else if (strcmp(attr->name, "device_mode") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom);
|
||||
/* kept for compatibility */
|
||||
} else if (strcmp(attr->name, "media_manager") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
|
||||
} else if (strcmp(attr->name, "ppa_format") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
|
||||
id->ppaf.ch_offset, id->ppaf.ch_len,
|
||||
id->ppaf.lun_offset, id->ppaf.lun_len,
|
||||
id->ppaf.pln_offset, id->ppaf.pln_len,
|
||||
id->ppaf.blk_offset, id->ppaf.blk_len,
|
||||
id->ppaf.pg_offset, id->ppaf.pg_len,
|
||||
id->ppaf.sect_offset, id->ppaf.sect_len);
|
||||
return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page);
|
||||
} else if (strcmp(attr->name, "media_type") == 0) { /* u8 */
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype);
|
||||
} else if (strcmp(attr->name, "flash_media_type") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype);
|
||||
} else if (strcmp(attr->name, "num_channels") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
|
||||
} else if (strcmp(attr->name, "num_luns") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
|
||||
} else if (strcmp(attr->name, "num_planes") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln);
|
||||
} else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
|
||||
} else if (strcmp(attr->name, "num_pages") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg);
|
||||
} else if (strcmp(attr->name, "page_size") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz);
|
||||
} else if (strcmp(attr->name, "hw_sector_size") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs);
|
||||
} else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos);
|
||||
} else if (strcmp(attr->name, "read_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
|
||||
} else if (strcmp(attr->name, "read_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos);
|
||||
} else if (strcmp(attr->name, "prog_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
|
||||
} else if (strcmp(attr->name, "prog_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
|
||||
} else if (strcmp(attr->name, "erase_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
|
||||
} else if (strcmp(attr->name, "erase_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
|
||||
} else if (strcmp(attr->name, "multiplane_modes") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos);
|
||||
return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos);
|
||||
} else if (strcmp(attr->name, "media_capabilities") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap);
|
||||
return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap);
|
||||
} else if (strcmp(attr->name, "max_phys_secs") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n",
|
||||
ndev->ops->max_phys_sect);
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA);
|
||||
} else {
|
||||
return scnprintf(page,
|
||||
PAGE_SIZE,
|
||||
"Unhandled attr(%s) in `nvm_dev_attr_show`\n",
|
||||
attr->name);
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"Unhandled attr(%s) in `%s`\n",
|
||||
attr->name, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
#define NVM_DEV_ATTR_RO(_name) \
|
||||
static ssize_t nvm_dev_attr_show_20(struct device *dev,
|
||||
struct device_attribute *dattr, char *page)
|
||||
{
|
||||
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
struct attribute *attr;
|
||||
|
||||
if (!ndev)
|
||||
return 0;
|
||||
|
||||
attr = &dattr->attr;
|
||||
|
||||
if (strcmp(attr->name, "groups") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch);
|
||||
} else if (strcmp(attr->name, "punits") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun);
|
||||
} else if (strcmp(attr->name, "chunks") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk);
|
||||
} else if (strcmp(attr->name, "clba") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba);
|
||||
} else if (strcmp(attr->name, "ws_min") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min);
|
||||
} else if (strcmp(attr->name, "ws_opt") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt);
|
||||
} else if (strcmp(attr->name, "maxoc") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc);
|
||||
} else if (strcmp(attr->name, "maxocpu") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu);
|
||||
} else if (strcmp(attr->name, "mw_cunits") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits);
|
||||
} else if (strcmp(attr->name, "write_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt);
|
||||
} else if (strcmp(attr->name, "write_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm);
|
||||
} else if (strcmp(attr->name, "reset_typ") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet);
|
||||
} else if (strcmp(attr->name, "reset_max") == 0) {
|
||||
return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem);
|
||||
} else {
|
||||
return scnprintf(page, PAGE_SIZE,
|
||||
"Unhandled attr(%s) in `%s`\n",
|
||||
attr->name, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
#define NVM_DEV_ATTR_RO(_name) \
|
||||
DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
|
||||
#define NVM_DEV_ATTR_12_RO(_name) \
|
||||
DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL)
|
||||
#define NVM_DEV_ATTR_20_RO(_name) \
|
||||
DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL)
|
||||
|
||||
/* general attributes */
|
||||
static NVM_DEV_ATTR_RO(version);
|
||||
static NVM_DEV_ATTR_RO(vendor_opcode);
|
||||
static NVM_DEV_ATTR_RO(capabilities);
|
||||
static NVM_DEV_ATTR_RO(device_mode);
|
||||
static NVM_DEV_ATTR_RO(ppa_format);
|
||||
static NVM_DEV_ATTR_RO(media_manager);
|
||||
|
||||
static NVM_DEV_ATTR_RO(media_type);
|
||||
static NVM_DEV_ATTR_RO(flash_media_type);
|
||||
static NVM_DEV_ATTR_RO(num_channels);
|
||||
static NVM_DEV_ATTR_RO(num_luns);
|
||||
static NVM_DEV_ATTR_RO(num_planes);
|
||||
static NVM_DEV_ATTR_RO(num_blocks);
|
||||
static NVM_DEV_ATTR_RO(num_pages);
|
||||
static NVM_DEV_ATTR_RO(page_size);
|
||||
static NVM_DEV_ATTR_RO(hw_sector_size);
|
||||
static NVM_DEV_ATTR_RO(oob_sector_size);
|
||||
static NVM_DEV_ATTR_RO(read_typ);
|
||||
static NVM_DEV_ATTR_RO(read_max);
|
||||
static NVM_DEV_ATTR_RO(prog_typ);
|
||||
static NVM_DEV_ATTR_RO(prog_max);
|
||||
static NVM_DEV_ATTR_RO(erase_typ);
|
||||
static NVM_DEV_ATTR_RO(erase_max);
|
||||
static NVM_DEV_ATTR_RO(multiplane_modes);
|
||||
static NVM_DEV_ATTR_RO(media_capabilities);
|
||||
static NVM_DEV_ATTR_RO(max_phys_secs);
|
||||
|
||||
static struct attribute *nvm_dev_attrs[] = {
|
||||
/* 1.2 values */
|
||||
static NVM_DEV_ATTR_12_RO(vendor_opcode);
|
||||
static NVM_DEV_ATTR_12_RO(device_mode);
|
||||
static NVM_DEV_ATTR_12_RO(ppa_format);
|
||||
static NVM_DEV_ATTR_12_RO(media_manager);
|
||||
static NVM_DEV_ATTR_12_RO(media_type);
|
||||
static NVM_DEV_ATTR_12_RO(flash_media_type);
|
||||
static NVM_DEV_ATTR_12_RO(num_channels);
|
||||
static NVM_DEV_ATTR_12_RO(num_luns);
|
||||
static NVM_DEV_ATTR_12_RO(num_planes);
|
||||
static NVM_DEV_ATTR_12_RO(num_blocks);
|
||||
static NVM_DEV_ATTR_12_RO(num_pages);
|
||||
static NVM_DEV_ATTR_12_RO(page_size);
|
||||
static NVM_DEV_ATTR_12_RO(hw_sector_size);
|
||||
static NVM_DEV_ATTR_12_RO(oob_sector_size);
|
||||
static NVM_DEV_ATTR_12_RO(prog_typ);
|
||||
static NVM_DEV_ATTR_12_RO(prog_max);
|
||||
static NVM_DEV_ATTR_12_RO(erase_typ);
|
||||
static NVM_DEV_ATTR_12_RO(erase_max);
|
||||
static NVM_DEV_ATTR_12_RO(multiplane_modes);
|
||||
static NVM_DEV_ATTR_12_RO(media_capabilities);
|
||||
static NVM_DEV_ATTR_12_RO(max_phys_secs);
|
||||
|
||||
static struct attribute *nvm_dev_attrs_12[] = {
|
||||
&dev_attr_version.attr,
|
||||
&dev_attr_vendor_opcode.attr,
|
||||
&dev_attr_capabilities.attr,
|
||||
|
||||
&dev_attr_vendor_opcode.attr,
|
||||
&dev_attr_device_mode.attr,
|
||||
&dev_attr_media_manager.attr,
|
||||
|
||||
&dev_attr_ppa_format.attr,
|
||||
&dev_attr_media_type.attr,
|
||||
&dev_attr_flash_media_type.attr,
|
||||
|
@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = {
|
|||
&dev_attr_multiplane_modes.attr,
|
||||
&dev_attr_media_capabilities.attr,
|
||||
&dev_attr_max_phys_secs.attr,
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group nvm_dev_attr_group = {
|
||||
static const struct attribute_group nvm_dev_attr_group_12 = {
|
||||
.name = "lightnvm",
|
||||
.attrs = nvm_dev_attrs,
|
||||
.attrs = nvm_dev_attrs_12,
|
||||
};
|
||||
|
||||
/* 2.0 values */
|
||||
static NVM_DEV_ATTR_20_RO(groups);
|
||||
static NVM_DEV_ATTR_20_RO(punits);
|
||||
static NVM_DEV_ATTR_20_RO(chunks);
|
||||
static NVM_DEV_ATTR_20_RO(clba);
|
||||
static NVM_DEV_ATTR_20_RO(ws_min);
|
||||
static NVM_DEV_ATTR_20_RO(ws_opt);
|
||||
static NVM_DEV_ATTR_20_RO(maxoc);
|
||||
static NVM_DEV_ATTR_20_RO(maxocpu);
|
||||
static NVM_DEV_ATTR_20_RO(mw_cunits);
|
||||
static NVM_DEV_ATTR_20_RO(write_typ);
|
||||
static NVM_DEV_ATTR_20_RO(write_max);
|
||||
static NVM_DEV_ATTR_20_RO(reset_typ);
|
||||
static NVM_DEV_ATTR_20_RO(reset_max);
|
||||
|
||||
static struct attribute *nvm_dev_attrs_20[] = {
|
||||
&dev_attr_version.attr,
|
||||
&dev_attr_capabilities.attr,
|
||||
|
||||
&dev_attr_groups.attr,
|
||||
&dev_attr_punits.attr,
|
||||
&dev_attr_chunks.attr,
|
||||
&dev_attr_clba.attr,
|
||||
&dev_attr_ws_min.attr,
|
||||
&dev_attr_ws_opt.attr,
|
||||
&dev_attr_maxoc.attr,
|
||||
&dev_attr_maxocpu.attr,
|
||||
&dev_attr_mw_cunits.attr,
|
||||
|
||||
&dev_attr_read_typ.attr,
|
||||
&dev_attr_read_max.attr,
|
||||
&dev_attr_write_typ.attr,
|
||||
&dev_attr_write_max.attr,
|
||||
&dev_attr_reset_typ.attr,
|
||||
&dev_attr_reset_max.attr,
|
||||
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group nvm_dev_attr_group_20 = {
|
||||
.name = "lightnvm",
|
||||
.attrs = nvm_dev_attrs_20,
|
||||
};
|
||||
|
||||
int nvme_nvm_register_sysfs(struct nvme_ns *ns)
|
||||
{
|
||||
return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group);
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
|
||||
if (!ndev)
|
||||
return -EINVAL;
|
||||
|
||||
switch (geo->major_ver_id) {
|
||||
case 1:
|
||||
return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group_12);
|
||||
case 2:
|
||||
return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group_20);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
|
||||
{
|
||||
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group);
|
||||
struct nvm_dev *ndev = ns->ndev;
|
||||
struct nvm_geo *geo = &ndev->geo;
|
||||
|
||||
switch (geo->major_ver_id) {
|
||||
case 1:
|
||||
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group_12);
|
||||
break;
|
||||
case 2:
|
||||
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
|
||||
&nvm_dev_attr_group_20);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
|
|||
{
|
||||
struct nvme_ns *ns;
|
||||
|
||||
mutex_lock(&ctrl->namespaces_mutex);
|
||||
down_read(&ctrl->namespaces_rwsem);
|
||||
list_for_each_entry(ns, &ctrl->namespaces, list) {
|
||||
if (ns->head->disk)
|
||||
kblockd_schedule_work(&ns->head->requeue_work);
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
up_read(&ctrl->namespaces_rwsem);
|
||||
}
|
||||
|
||||
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
|
||||
|
@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
|
|||
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
|
||||
return 0;
|
||||
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
|
||||
q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
|
||||
if (!q)
|
||||
goto out;
|
||||
q->queuedata = head;
|
||||
blk_queue_make_request(q, nvme_ns_head_make_request);
|
||||
q->poll_fn = nvme_ns_head_poll;
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* set to a default value for 512 until disk is validated */
|
||||
blk_queue_logical_block_size(q, 512);
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/blk-mq.h>
|
||||
#include <linux/lightnvm.h>
|
||||
#include <linux/sed-opal.h>
|
||||
#include <linux/fault-inject.h>
|
||||
|
||||
extern unsigned int nvme_io_timeout;
|
||||
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
|
||||
|
@ -140,7 +141,7 @@ struct nvme_ctrl {
|
|||
struct blk_mq_tag_set *tagset;
|
||||
struct blk_mq_tag_set *admin_tagset;
|
||||
struct list_head namespaces;
|
||||
struct mutex namespaces_mutex;
|
||||
struct rw_semaphore namespaces_rwsem;
|
||||
struct device ctrl_device;
|
||||
struct device *device; /* char device */
|
||||
struct cdev cdev;
|
||||
|
@ -261,6 +262,15 @@ struct nvme_ns_head {
|
|||
int instance;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
struct nvme_fault_inject {
|
||||
struct fault_attr attr;
|
||||
struct dentry *parent;
|
||||
bool dont_retry; /* DNR, do not retry */
|
||||
u16 status; /* status code */
|
||||
};
|
||||
#endif
|
||||
|
||||
struct nvme_ns {
|
||||
struct list_head list;
|
||||
|
||||
|
@ -282,6 +292,11 @@ struct nvme_ns {
|
|||
#define NVME_NS_REMOVING 0
|
||||
#define NVME_NS_DEAD 1
|
||||
u16 noiob;
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
struct nvme_fault_inject fault_inject;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
struct nvme_ctrl_ops {
|
||||
|
@ -298,8 +313,19 @@ struct nvme_ctrl_ops {
|
|||
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
|
||||
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
|
||||
int (*reinit_request)(void *data, struct request *rq);
|
||||
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
void nvme_fault_inject_init(struct nvme_ns *ns);
|
||||
void nvme_fault_inject_fini(struct nvme_ns *ns);
|
||||
void nvme_should_fail(struct request *req);
|
||||
#else
|
||||
static inline void nvme_fault_inject_init(struct nvme_ns *ns) {}
|
||||
static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {}
|
||||
static inline void nvme_should_fail(struct request *req) {}
|
||||
#endif
|
||||
|
||||
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
u32 val = 0;
|
||||
|
@ -336,6 +362,8 @@ static inline void nvme_end_request(struct request *req, __le16 status,
|
|||
|
||||
rq->status = le16_to_cpu(status) >> 1;
|
||||
rq->result = result;
|
||||
/* inject error when permitted by fault injection framework */
|
||||
nvme_should_fail(req);
|
||||
blk_mq_complete_request(req);
|
||||
}
|
||||
|
||||
|
@ -401,6 +429,9 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
|
|||
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
|
||||
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
|
||||
|
||||
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 log_page, void *log, size_t size, size_t offset);
|
||||
|
||||
extern const struct attribute_group nvme_ns_id_attr_group;
|
||||
extern const struct block_device_operations nvme_ns_head_ops;
|
||||
|
||||
|
@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
|
|||
#endif /* CONFIG_NVME_MULTIPATH */
|
||||
|
||||
#ifdef CONFIG_NVM
|
||||
void nvme_nvm_update_nvm_info(struct nvme_ns *ns);
|
||||
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
|
||||
void nvme_nvm_unregister(struct nvme_ns *ns);
|
||||
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
|
||||
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
|
||||
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
|
||||
#else
|
||||
static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {};
|
||||
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
|
||||
int node)
|
||||
{
|
||||
|
|
|
@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
|||
{
|
||||
struct nvme_dev *dev = set->driver_data;
|
||||
|
||||
return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev));
|
||||
return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2197,7 +2197,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
|||
if (!dead) {
|
||||
if (shutdown)
|
||||
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
|
||||
}
|
||||
|
||||
nvme_stop_queues(&dev->ctrl);
|
||||
|
||||
if (!dead) {
|
||||
/*
|
||||
* If the controller is still alive tell it to stop using the
|
||||
* host memory buffer. In theory the shutdown / reset should
|
||||
|
@ -2206,11 +2210,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
|||
*/
|
||||
if (dev->host_mem_descs)
|
||||
nvme_set_host_mem(dev, 0);
|
||||
|
||||
}
|
||||
nvme_stop_queues(&dev->ctrl);
|
||||
|
||||
if (!dead) {
|
||||
nvme_disable_io_queues(dev);
|
||||
nvme_disable_admin_queue(dev, shutdown);
|
||||
}
|
||||
|
@ -2416,6 +2415,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
|
||||
|
||||
return snprintf(buf, size, "%s", dev_name(&pdev->dev));
|
||||
}
|
||||
|
||||
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
|
||||
.name = "pcie",
|
||||
.module = THIS_MODULE,
|
||||
|
@ -2425,6 +2431,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
|
|||
.reg_read64 = nvme_pci_reg_read64,
|
||||
.free_ctrl = nvme_pci_free_ctrl,
|
||||
.submit_async_event = nvme_pci_submit_async_event,
|
||||
.get_address = nvme_pci_get_address,
|
||||
};
|
||||
|
||||
static int nvme_dev_map(struct nvme_dev *dev)
|
||||
|
@ -2461,10 +2468,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
|
|||
} else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
|
||||
/*
|
||||
* Samsung SSD 960 EVO drops off the PCIe bus after system
|
||||
* suspend on a Ryzen board, ASUS PRIME B350M-A.
|
||||
* suspend on a Ryzen board, ASUS PRIME B350M-A, as well as
|
||||
* within few minutes after bootup on a Coffee Lake board -
|
||||
* ASUS PRIME Z370-A
|
||||
*/
|
||||
if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
|
||||
dmi_match(DMI_BOARD_NAME, "PRIME B350M-A"))
|
||||
(dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
|
||||
dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
|
||||
return NVME_QUIRK_NO_APST;
|
||||
}
|
||||
|
||||
|
|
|
@ -867,6 +867,14 @@ out_free_io_queues:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
|
||||
cancel_work_sync(&ctrl->err_work);
|
||||
cancel_delayed_work_sync(&ctrl->reconnect_work);
|
||||
}
|
||||
|
||||
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
|
@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
|
|||
queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
} else {
|
||||
dev_info(ctrl->ctrl.device, "Removing controller...\n");
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
}
|
||||
|
@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
|||
nvme_start_queues(&ctrl->ctrl);
|
||||
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
|
||||
/* state change failure should never happen */
|
||||
WARN_ON_ONCE(1);
|
||||
/* state change failure is ok if we're in DELETING state */
|
||||
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1719,9 +1726,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
|
|||
|
||||
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
|
||||
{
|
||||
cancel_work_sync(&ctrl->err_work);
|
||||
cancel_delayed_work_sync(&ctrl->reconnect_work);
|
||||
|
||||
if (ctrl->ctrl.queue_count > 1) {
|
||||
nvme_stop_queues(&ctrl->ctrl);
|
||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||
|
@ -1799,6 +1803,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
|
|||
.submit_async_event = nvme_rdma_submit_async_event,
|
||||
.delete_ctrl = nvme_rdma_delete_ctrl,
|
||||
.get_address = nvmf_get_address,
|
||||
.stop_ctrl = nvme_rdma_stop_ctrl,
|
||||
};
|
||||
|
||||
static inline bool
|
||||
|
@ -2025,15 +2030,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
|
|||
static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl;
|
||||
struct nvme_rdma_device *ndev;
|
||||
bool found = false;
|
||||
|
||||
mutex_lock(&device_list_mutex);
|
||||
list_for_each_entry(ndev, &device_list, entry) {
|
||||
if (ndev->dev == ib_device) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&device_list_mutex);
|
||||
|
||||
if (!found)
|
||||
return;
|
||||
|
||||
/* Delete all controllers using this device */
|
||||
mutex_lock(&nvme_rdma_ctrl_mutex);
|
||||
list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
|
||||
if (ctrl->device->dev != ib_device)
|
||||
continue;
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"Removing ctrl: NQN \"%s\", addr %pISp\n",
|
||||
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
mutex_unlock(&nvme_rdma_ctrl_mutex);
|
||||
|
|
|
@ -23,6 +23,15 @@
|
|||
static const struct config_item_type nvmet_host_type;
|
||||
static const struct config_item_type nvmet_subsys_type;
|
||||
|
||||
static const struct nvmet_transport_name {
|
||||
u8 type;
|
||||
const char *name;
|
||||
} nvmet_transport_names[] = {
|
||||
{ NVMF_TRTYPE_RDMA, "rdma" },
|
||||
{ NVMF_TRTYPE_FC, "fc" },
|
||||
{ NVMF_TRTYPE_LOOP, "loop" },
|
||||
};
|
||||
|
||||
/*
|
||||
* nvmet_port Generic ConfigFS definitions.
|
||||
* Used in any place in the ConfigFS tree that refers to an address.
|
||||
|
@ -208,43 +217,30 @@ CONFIGFS_ATTR(nvmet_, addr_trsvcid);
|
|||
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
|
||||
char *page)
|
||||
{
|
||||
switch (to_nvmet_port(item)->disc_addr.trtype) {
|
||||
case NVMF_TRTYPE_RDMA:
|
||||
return sprintf(page, "rdma\n");
|
||||
case NVMF_TRTYPE_LOOP:
|
||||
return sprintf(page, "loop\n");
|
||||
case NVMF_TRTYPE_FC:
|
||||
return sprintf(page, "fc\n");
|
||||
default:
|
||||
return sprintf(page, "\n");
|
||||
struct nvmet_port *port = to_nvmet_port(item);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
|
||||
if (port->disc_addr.trtype != nvmet_transport_names[i].type)
|
||||
continue;
|
||||
return sprintf(page, "%s\n", nvmet_transport_names[i].name);
|
||||
}
|
||||
|
||||
return sprintf(page, "\n");
|
||||
}
|
||||
|
||||
static void nvmet_port_init_tsas_rdma(struct nvmet_port *port)
|
||||
{
|
||||
port->disc_addr.trtype = NVMF_TRTYPE_RDMA;
|
||||
memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE);
|
||||
port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED;
|
||||
port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED;
|
||||
port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM;
|
||||
}
|
||||
|
||||
static void nvmet_port_init_tsas_loop(struct nvmet_port *port)
|
||||
{
|
||||
port->disc_addr.trtype = NVMF_TRTYPE_LOOP;
|
||||
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
|
||||
}
|
||||
|
||||
static void nvmet_port_init_tsas_fc(struct nvmet_port *port)
|
||||
{
|
||||
port->disc_addr.trtype = NVMF_TRTYPE_FC;
|
||||
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
|
||||
}
|
||||
|
||||
static ssize_t nvmet_addr_trtype_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct nvmet_port *port = to_nvmet_port(item);
|
||||
int i;
|
||||
|
||||
if (port->enabled) {
|
||||
pr_err("Cannot modify address while enabled\n");
|
||||
|
@ -252,17 +248,18 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
|
|||
return -EACCES;
|
||||
}
|
||||
|
||||
if (sysfs_streq(page, "rdma")) {
|
||||
nvmet_port_init_tsas_rdma(port);
|
||||
} else if (sysfs_streq(page, "loop")) {
|
||||
nvmet_port_init_tsas_loop(port);
|
||||
} else if (sysfs_streq(page, "fc")) {
|
||||
nvmet_port_init_tsas_fc(port);
|
||||
} else {
|
||||
pr_err("Invalid value '%s' for trtype\n", page);
|
||||
return -EINVAL;
|
||||
for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
|
||||
if (sysfs_streq(page, nvmet_transport_names[i].name))
|
||||
goto found;
|
||||
}
|
||||
|
||||
pr_err("Invalid value '%s' for trtype\n", page);
|
||||
return -EINVAL;
|
||||
found:
|
||||
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
|
||||
port->disc_addr.trtype = nvmet_transport_names[i].type;
|
||||
if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
|
||||
nvmet_port_init_tsas_rdma(port);
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -333,13 +330,13 @@ out_unlock:
|
|||
return ret ? ret : count;
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_ns_, device_uuid);
|
||||
|
||||
static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
|
||||
{
|
||||
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR(nvmet_ns_, device_uuid);
|
||||
|
||||
static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue