for-4.20/block-20181021
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAlvNQKgQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgps+8D/9Iy6YIeoPwN10gYsqIh0P2fS3wKzL3kiww 3vFsWO78PzgLxUlNmB7teLtNFc/R5mi8becZmAdvs9za5YFZk56o3Ifv1x+e+z00 VY1/gxhiJD6suLeJ6lECnERGDaiWOZVRMo2TE17vxYGW6GGaa0Ts6PUUXmpla1u5 WKctgt0Qv9WVNyiIdLdeHqzKJwsSSwNTt8fK7eFhy3x8e0CwJr+GtXckbbW3LFkY lug0npsTli3EmEPMovZhd25SjZmTk5GTM+ADZQ7Tnv5KXoDWB9jn6TcCSAi3G+5d 5WUVwfnDyYJiH8qvlg5tRJ690muIy3xMOmpr7QBQ0YnR/LQ3EW+1CVfqD+qimgLH TXzlREXQpBP3YlxSDS5nddz4o5z84GZmC9B/43ujPaZKIQ6eBXYdkmQH7tPtSugm C6VGomR5tHotjxIiAsexh/5hAus+wW8bObKGTPTyINT0ub3XNclwCKLh26CgI9ie WvbS9g3j/KPvu/7s6weZpgD+cks0YdWe/XdXXxiHwsGI9h3J2aJna5RQt1rKWDm5 wGCgbc/B8eSwiWx+GXlqdB9/Dy/bGXOnSTDnKpEVl1f5zNjeLwUKXbjvkMefWs4m jEIcquuDETORY+ZYEfa5YbmS4Lhskr0kzMVTVkZ++81tAWpSCU9Xh3IHrR8TNpt+ J0oh0FHBDg== =LRTT -----END PGP SIGNATURE----- Merge tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block Pull block layer updates from Jens Axboe: "This is the main pull request for block changes for 4.20. This contains: - Series enabling runtime PM for blk-mq (Bart). - Two pull requests from Christoph for NVMe, with items such as; - Better AEN tracking - Multipath improvements - RDMA fixes - Rework of FC for target removal - Fixes for issues identified by static checkers - Fabric cleanups, as prep for TCP transport - Various cleanups and bug fixes - Block merging cleanups (Christoph) - Conversion of drivers to generic DMA mapping API (Christoph) - Series fixing ref count issues with blkcg (Dennis) - Series improving BFQ heuristics (Paolo, et al) - Series improving heuristics for the Kyber IO scheduler (Omar) - Removal of dangerous bio_rewind_iter() API (Ming) - Apply single queue IPI redirection logic to blk-mq (Ming) - Set of fixes and improvements for bcache (Coly et al) - Series closing a hotplug race with sysfs group attributes (Hannes) - Set of patches for lightnvm: - pblk trace support (Hans) - SPDX license header update (Javier) - Tons of refactoring patches to cleanly abstract the 1.2 and 2.0 specs behind a common core interface. (Javier, Matias) - Enable pblk to use a common interface to retrieve chunk metadata (Matias) - Bug fixes (Various) - Set of fixes and updates to the blk IO latency target (Josef) - blk-mq queue number updates fixes (Jianchao) - Convert a bunch of drivers from the old legacy IO interface to blk-mq. This will conclude with the removal of the legacy IO interface itself in 4.21, with the rest of the drivers (me, Omar) - Removal of the DAC960 driver. The SCSI tree will introduce two replacement drivers for this (Hannes)" * tag 'for-4.20/block-20181021' of git://git.kernel.dk/linux-block: (204 commits) block: setup bounce bio_sets properly blkcg: reassociate bios when make_request() is called recursively blkcg: fix edge case for blk_get_rl() under memory pressure nvme-fabrics: move controller options matching to fabrics nvme-rdma: always have a valid trsvcid mtip32xx: fully switch to the generic DMA API rsxx: switch to the generic DMA API umem: switch to the generic DMA API sx8: switch to the generic DMA API sx8: remove dead IF_64BIT_DMA_IS_POSSIBLE code skd: switch to the generic DMA API ubd: remove use of blk_rq_map_sg nvme-pci: remove duplicate check drivers/block: Remove DAC960 driver nvme-pci: fix hot removal during error handling nvmet-fcloop: suppress a compiler warning nvme-core: make implicit seed truncation explicit nvmet-fc: fix kernel-doc headers nvme-fc: rework the request initialization code nvme-fc: introduce struct nvme_fcp_op_w_sgl ...
This commit is contained in:
commit
6ab9e09238
|
@ -1857,8 +1857,10 @@ following two functions.
|
|||
|
||||
wbc_init_bio(@wbc, @bio)
|
||||
Should be called for each bio carrying writeback data and
|
||||
associates the bio with the inode's owner cgroup. Can be
|
||||
called anytime between bio allocation and submission.
|
||||
associates the bio with the inode's owner cgroup and the
|
||||
corresponding request queue. This must be called after
|
||||
a queue (device) has been associated with the bio and
|
||||
before submission.
|
||||
|
||||
wbc_account_io(@wbc, @page, @bytes)
|
||||
Should be called for each data segment being written out.
|
||||
|
@ -1877,7 +1879,7 @@ the configuration, the bio may be executed at a lower priority and if
|
|||
the writeback session is holding shared resources, e.g. a journal
|
||||
entry, may lead to priority inversion. There is no one easy solution
|
||||
for the problem. Filesystems can try to work around specific problem
|
||||
cases by skipping wbc_init_bio() or using bio_associate_blkcg()
|
||||
cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
|
||||
directly.
|
||||
|
||||
|
||||
|
|
|
@ -1,756 +0,0 @@
|
|||
Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers
|
||||
|
||||
Version 2.2.11 for Linux 2.2.19
|
||||
Version 2.4.11 for Linux 2.4.12
|
||||
|
||||
PRODUCTION RELEASE
|
||||
|
||||
11 October 2001
|
||||
|
||||
Leonard N. Zubkoff
|
||||
Dandelion Digital
|
||||
lnz@dandelion.com
|
||||
|
||||
Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
|
||||
|
||||
INTRODUCTION
|
||||
|
||||
Mylex, Inc. designs and manufactures a variety of high performance PCI RAID
|
||||
controllers. Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont,
|
||||
California 94555, USA and can be reached at 510.796.6100 or on the World Wide
|
||||
Web at http://www.mylex.com. Mylex Technical Support can be reached by
|
||||
electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at
|
||||
510.745.7715. Contact information for offices in Europe and Japan is available
|
||||
on their Web site.
|
||||
|
||||
The latest information on Linux support for DAC960 PCI RAID Controllers, as
|
||||
well as the most recent release of this driver, will always be available from
|
||||
my Linux Home Page at URL "http://www.dandelion.com/Linux/". The Linux DAC960
|
||||
driver supports all current Mylex PCI RAID controllers including the new
|
||||
eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely
|
||||
new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250,
|
||||
and DAC960PJ/PG/PU/PD/PL. See below for a complete controller list as well as
|
||||
minimum firmware version requirements. For simplicity, in most places this
|
||||
documentation refers to DAC960 generically rather than explicitly listing all
|
||||
the supported models.
|
||||
|
||||
Driver bug reports should be sent via electronic mail to "lnz@dandelion.com".
|
||||
Please include with the bug report the complete configuration messages reported
|
||||
by the driver at startup, along with any subsequent system messages relevant to
|
||||
the controller's operation, and a detailed description of your system's
|
||||
hardware configuration. Driver bugs are actually quite rare; if you encounter
|
||||
problems with disks being marked offline, for example, please contact Mylex
|
||||
Technical Support as the problem is related to the hardware configuration
|
||||
rather than the Linux driver.
|
||||
|
||||
Please consult the RAID controller documentation for detailed information
|
||||
regarding installation and configuration of the controllers. This document
|
||||
primarily provides information specific to the Linux support.
|
||||
|
||||
|
||||
DRIVER FEATURES
|
||||
|
||||
The DAC960 RAID controllers are supported solely as high performance RAID
|
||||
controllers, not as interfaces to arbitrary SCSI devices. The Linux DAC960
|
||||
driver operates at the block device level, the same level as the SCSI and IDE
|
||||
drivers. Unlike other RAID controllers currently supported on Linux, the
|
||||
DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the
|
||||
complexity and unnecessary code that would be associated with an implementation
|
||||
as a SCSI driver. The DAC960 driver is designed for as high a performance as
|
||||
possible with no compromises or extra code for compatibility with lower
|
||||
performance devices. The DAC960 driver includes extensive error logging and
|
||||
online configuration management capabilities. Except for initial configuration
|
||||
of the controller and adding new disk drives, most everything can be handled
|
||||
from Linux while the system is operational.
|
||||
|
||||
The DAC960 driver is architected to support up to 8 controllers per system.
|
||||
Each DAC960 parallel SCSI controller can support up to 15 disk drives per
|
||||
channel, for a maximum of 60 drives on a four channel controller; the fibre
|
||||
channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for
|
||||
a total of 250 drives. The drives installed on a controller are divided into
|
||||
one or more "Drive Groups", and then each Drive Group is subdivided further
|
||||
into 1 to 32 "Logical Drives". Each Logical Drive has a specific RAID Level
|
||||
and caching policy associated with it, and it appears to Linux as a single
|
||||
block device. Logical Drives are further subdivided into up to 7 partitions
|
||||
through the normal Linux and PC disk partitioning schemes. Logical Drives are
|
||||
also known as "System Drives", and Drive Groups are also called "Packs". Both
|
||||
terms are in use in the Mylex documentation; I have chosen to standardize on
|
||||
the more generic "Logical Drive" and "Drive Group".
|
||||
|
||||
DAC960 RAID disk devices are named in the style of the obsolete Device File
|
||||
System (DEVFS). The device corresponding to Logical Drive D on Controller C
|
||||
is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1
|
||||
through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on
|
||||
Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI
|
||||
disks the device names will not change in the event of a disk drive failure.
|
||||
The DAC960 driver is assigned major numbers 48 - 55 with one major number per
|
||||
controller. The 8 bits of minor number are divided into 5 bits for the Logical
|
||||
Drive and 3 bits for the partition.
|
||||
|
||||
|
||||
SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS
|
||||
|
||||
The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID
|
||||
PCI RAID Controllers as of the date of this document. It is recommended that
|
||||
anyone purchasing a Mylex PCI RAID Controller not in the following table
|
||||
contact the author beforehand to verify that it is or will be supported.
|
||||
|
||||
eXtremeRAID 3000
|
||||
1 Wide Ultra-2/LVD SCSI channel
|
||||
2 External Fibre FC-AL channels
|
||||
233MHz StrongARM SA 110 Processor
|
||||
64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
|
||||
32MB/64MB ECC SDRAM Memory
|
||||
|
||||
eXtremeRAID 2000
|
||||
4 Wide Ultra-160 LVD SCSI channels
|
||||
233MHz StrongARM SA 110 Processor
|
||||
64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
|
||||
32MB/64MB ECC SDRAM Memory
|
||||
|
||||
AcceleRAID 352
|
||||
2 Wide Ultra-160 LVD SCSI channels
|
||||
100MHz Intel i960RN RISC Processor
|
||||
64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
|
||||
32MB/64MB ECC SDRAM Memory
|
||||
|
||||
AcceleRAID 170
|
||||
1 Wide Ultra-160 LVD SCSI channel
|
||||
100MHz Intel i960RM RISC Processor
|
||||
16MB/32MB/64MB ECC SDRAM Memory
|
||||
|
||||
AcceleRAID 160 (AcceleRAID 170LP)
|
||||
1 Wide Ultra-160 LVD SCSI channel
|
||||
100MHz Intel i960RS RISC Processor
|
||||
Built in 16M ECC SDRAM Memory
|
||||
PCI Low Profile Form Factor - fit for 2U height
|
||||
|
||||
eXtremeRAID 1100 (DAC1164P)
|
||||
3 Wide Ultra-2/LVD SCSI channels
|
||||
233MHz StrongARM SA 110 Processor
|
||||
64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
|
||||
16MB/32MB/64MB Parity SDRAM Memory with Battery Backup
|
||||
|
||||
AcceleRAID 250 (DAC960PTL1)
|
||||
Uses onboard Symbios SCSI chips on certain motherboards
|
||||
Also includes one onboard Wide Ultra-2/LVD SCSI Channel
|
||||
66MHz Intel i960RD RISC Processor
|
||||
4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
|
||||
|
||||
AcceleRAID 200 (DAC960PTL0)
|
||||
Uses onboard Symbios SCSI chips on certain motherboards
|
||||
Includes no onboard SCSI Channels
|
||||
66MHz Intel i960RD RISC Processor
|
||||
4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
|
||||
|
||||
AcceleRAID 150 (DAC960PRL)
|
||||
Uses onboard Symbios SCSI chips on certain motherboards
|
||||
Also includes one onboard Wide Ultra-2/LVD SCSI Channel
|
||||
33MHz Intel i960RP RISC Processor
|
||||
4MB Parity EDO Memory
|
||||
|
||||
DAC960PJ 1/2/3 Wide Ultra SCSI-3 Channels
|
||||
66MHz Intel i960RD RISC Processor
|
||||
4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
|
||||
|
||||
DAC960PG 1/2/3 Wide Ultra SCSI-3 Channels
|
||||
33MHz Intel i960RP RISC Processor
|
||||
4MB/8MB ECC EDO Memory
|
||||
|
||||
DAC960PU 1/2/3 Wide Ultra SCSI-3 Channels
|
||||
Intel i960CF RISC Processor
|
||||
4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
|
||||
|
||||
DAC960PD 1/2/3 Wide Fast SCSI-2 Channels
|
||||
Intel i960CF RISC Processor
|
||||
4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
|
||||
|
||||
DAC960PL 1/2/3 Wide Fast SCSI-2 Channels
|
||||
Intel i960 RISC Processor
|
||||
2MB/4MB/8MB/16MB/32MB DRAM Memory
|
||||
|
||||
DAC960P 1/2/3 Wide Fast SCSI-2 Channels
|
||||
Intel i960 RISC Processor
|
||||
2MB/4MB/8MB/16MB/32MB DRAM Memory
|
||||
|
||||
For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version
|
||||
6.00-01 or above is required.
|
||||
|
||||
For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required.
|
||||
|
||||
For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is
|
||||
required.
|
||||
|
||||
For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required.
|
||||
|
||||
For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version
|
||||
3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware
|
||||
version 2.73-0-00 or above is required (for single Flash ROM controllers)
|
||||
|
||||
Please note that not all SCSI disk drives are suitable for use with DAC960
|
||||
controllers, and only particular firmware versions of any given model may
|
||||
actually function correctly. Similarly, not all motherboards have a BIOS that
|
||||
properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150,
|
||||
DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device.
|
||||
If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to
|
||||
verify compatibility. Mylex makes available a hard disk compatibility list at
|
||||
http://www.mylex.com/support/hdcomp/hd-lists.html.
|
||||
|
||||
|
||||
DRIVER INSTALLATION
|
||||
|
||||
This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12.
|
||||
|
||||
To install the DAC960 RAID driver, you may use the following commands,
|
||||
replacing "/usr/src" with wherever you keep your Linux kernel source tree:
|
||||
|
||||
cd /usr/src
|
||||
tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz)
|
||||
mv README.DAC960 linux/Documentation
|
||||
mv DAC960.[ch] linux/drivers/block
|
||||
patch -p0 < DAC960.patch (if DAC960.patch is included)
|
||||
cd linux
|
||||
make config
|
||||
make bzImage (or zImage)
|
||||
|
||||
Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your
|
||||
standard kernel, run lilo if appropriate, and reboot.
|
||||
|
||||
To create the necessary devices in /dev, the "make_rd" script included in
|
||||
"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used.
|
||||
LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive
|
||||
are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with
|
||||
statically linked executables of LILO and FDISK. This modified version of LILO
|
||||
will allow booting from a DAC960 controller and/or mounting the root file
|
||||
system from a DAC960.
|
||||
|
||||
Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID
|
||||
controllers. Installing directly onto a DAC960 may be problematic from other
|
||||
Linux distributions until their installation utilities are updated.
|
||||
|
||||
|
||||
INSTALLATION NOTES
|
||||
|
||||
Before installing Linux or adding DAC960 logical drives to an existing Linux
|
||||
system, the controller must first be configured to provide one or more logical
|
||||
drives using the BIOS Configuration Utility or DACCF. Please note that since
|
||||
there are only at most 6 usable partitions on each logical drive, systems
|
||||
requiring more partitions should subdivide a drive group into multiple logical
|
||||
drives, each of which can have up to 6 usable partitions. Also, note that with
|
||||
large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63)
|
||||
rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do
|
||||
will cause the logical drive geometry to have more than 65535 cylinders which
|
||||
will make it impossible for FDISK to be used properly. The 8GB BIOS Geometry
|
||||
can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M
|
||||
during the BIOS initialization sequence.
|
||||
|
||||
For maximum performance and the most efficient E2FSCK performance, it is
|
||||
recommended that EXT2 file systems be built with a 4KB block size and 16 block
|
||||
stride to match the DAC960 controller's 64KB default stripe size. The command
|
||||
"mke2fs -b 4096 -R stride=16 <device>" is appropriate. Unless there will be a
|
||||
large number of small files on the file systems, it is also beneficial to add
|
||||
the "-i 16384" option to increase the bytes per inode parameter thereby
|
||||
reducing the file system metadata. Finally, on systems that will only be run
|
||||
with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks
|
||||
with the "-s 1" option.
|
||||
|
||||
|
||||
DAC960 ANNOUNCEMENTS MAILING LIST
|
||||
|
||||
The DAC960 Announcements Mailing List provides a forum for informing Linux
|
||||
users of new driver releases and other announcements regarding Linux support
|
||||
for DAC960 PCI RAID Controllers. To join the mailing list, send a message to
|
||||
"dac960-announce-request@dandelion.com" with the line "subscribe" in the
|
||||
message body.
|
||||
|
||||
|
||||
CONTROLLER CONFIGURATION AND STATUS MONITORING
|
||||
|
||||
The DAC960 RAID controllers running firmware 4.06 or above include a Background
|
||||
Initialization facility so that system downtime is minimized both for initial
|
||||
installation and subsequent configuration of additional storage. The BIOS
|
||||
Configuration Utility (accessible via Alt-R during the BIOS initialization
|
||||
sequence) is used to quickly configure the controller, and then the logical
|
||||
drives that have been created are available for immediate use even while they
|
||||
are still being initialized by the controller. The primary need for online
|
||||
configuration and status monitoring is then to avoid system downtime when disk
|
||||
drives fail and must be replaced. Mylex's online monitoring and configuration
|
||||
utilities are being ported to Linux and will become available at some point in
|
||||
the future. Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure)
|
||||
enclosure, the controller is able to rebuild failed drives automatically as
|
||||
soon as a drive replacement is made available.
|
||||
|
||||
The primary interfaces for controller configuration and status monitoring are
|
||||
special files created in the /proc/rd/... hierarchy along with the normal
|
||||
system console logging mechanism. Whenever the system is operating, the DAC960
|
||||
driver queries each controller for status information every 10 seconds, and
|
||||
checks for additional conditions every 60 seconds. The initial status of each
|
||||
controller is always available for controller N in /proc/rd/cN/initial_status,
|
||||
and the current status as of the last status monitoring query is available in
|
||||
/proc/rd/cN/current_status. In addition, status changes are also logged by the
|
||||
driver to the system console and will appear in the log files maintained by
|
||||
syslog. The progress of asynchronous rebuild or consistency check operations
|
||||
is also available in /proc/rd/cN/current_status, and progress messages are
|
||||
logged to the system console at most every 60 seconds.
|
||||
|
||||
Starting with the 2.2.3/2.0.3 versions of the driver, the status information
|
||||
available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been
|
||||
augmented to include the vendor, model, revision, and serial number (if
|
||||
available) for each physical device found connected to the controller:
|
||||
|
||||
***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 *****
|
||||
Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
Configuring Mylex DAC960PRL PCI RAID Controller
|
||||
Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB
|
||||
PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned
|
||||
PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21
|
||||
Controller Queue Depth: 128, Maximum Blocks per Command: 128
|
||||
Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
|
||||
Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
|
||||
SAF-TE Enclosure Management Enabled
|
||||
Physical Devices:
|
||||
0:0 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 68016775HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:1 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 68004E53HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:2 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 13013935HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:3 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 13016897HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:4 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 68019905HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:5 Vendor: IBM Model: DRVS09D Revision: 0270
|
||||
Serial Number: 68012753HA
|
||||
Disk Status: Online, 17928192 blocks
|
||||
0:6 Vendor: ESG-SHV Model: SCA HSBP M6 Revision: 0.61
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru
|
||||
No Rebuild or Consistency Check in Progress
|
||||
|
||||
To simplify the monitoring process for custom software, the special file
|
||||
/proc/rd/status returns "OK" when all DAC960 controllers in the system are
|
||||
operating normally and no failures have occurred, or "ALERT" if any logical
|
||||
drives are offline or critical or any non-standby physical drives are dead.
|
||||
|
||||
Configuration commands for controller N are available via the special file
|
||||
/proc/rd/cN/user_command. A human readable command can be written to this
|
||||
special file to initiate a configuration operation, and the results of the
|
||||
operation can then be read back from the special file in addition to being
|
||||
logged to the system console. The shell command sequence
|
||||
|
||||
echo "<configuration-command>" > /proc/rd/c0/user_command
|
||||
cat /proc/rd/c0/user_command
|
||||
|
||||
is typically used to execute configuration commands. The configuration
|
||||
commands are:
|
||||
|
||||
flush-cache
|
||||
|
||||
The "flush-cache" command flushes the controller's cache. The system
|
||||
automatically flushes the cache at shutdown or if the driver module is
|
||||
unloaded, so this command is only needed to be certain a write back cache
|
||||
is flushed to disk before the system is powered off by a command to a UPS.
|
||||
Note that the flush-cache command also stops an asynchronous rebuild or
|
||||
consistency check, so it should not be used except when the system is being
|
||||
halted.
|
||||
|
||||
kill <channel>:<target-id>
|
||||
|
||||
The "kill" command marks the physical drive <channel>:<target-id> as DEAD.
|
||||
This command is provided primarily for testing, and should not be used
|
||||
during normal system operation.
|
||||
|
||||
make-online <channel>:<target-id>
|
||||
|
||||
The "make-online" command changes the physical drive <channel>:<target-id>
|
||||
from status DEAD to status ONLINE. In cases where multiple physical drives
|
||||
have been killed simultaneously, this command may be used to bring all but
|
||||
one of them back online, after which a rebuild to the final drive is
|
||||
necessary.
|
||||
|
||||
Warning: make-online should only be used on a dead physical drive that is
|
||||
an active part of a drive group, never on a standby drive. The command
|
||||
should never be used on a dead drive that is part of a critical logical
|
||||
drive; rebuild should be used if only a single drive is dead.
|
||||
|
||||
make-standby <channel>:<target-id>
|
||||
|
||||
The "make-standby" command changes physical drive <channel>:<target-id>
|
||||
from status DEAD to status STANDBY. It should only be used in cases where
|
||||
a dead drive was replaced after an automatic rebuild was performed onto a
|
||||
standby drive. It cannot be used to add a standby drive to the controller
|
||||
configuration if one was not created initially; the BIOS Configuration
|
||||
Utility must be used for that currently.
|
||||
|
||||
rebuild <channel>:<target-id>
|
||||
|
||||
The "rebuild" command initiates an asynchronous rebuild onto physical drive
|
||||
<channel>:<target-id>. It should only be used when a dead drive has been
|
||||
replaced.
|
||||
|
||||
check-consistency <logical-drive-number>
|
||||
|
||||
The "check-consistency" command initiates an asynchronous consistency check
|
||||
of <logical-drive-number> with automatic restoration. It can be used
|
||||
whenever it is desired to verify the consistency of the redundancy
|
||||
information.
|
||||
|
||||
cancel-rebuild
|
||||
cancel-consistency-check
|
||||
|
||||
The "cancel-rebuild" and "cancel-consistency-check" commands cancel any
|
||||
rebuild or consistency check operations previously initiated.
|
||||
|
||||
|
||||
EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE
|
||||
|
||||
The following annotated logs demonstrate the controller configuration and and
|
||||
online status monitoring capabilities of the Linux DAC960 Driver. The test
|
||||
configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
|
||||
DAC960PJ controller. The physical drives are configured into a single drive
|
||||
group without a standby drive, and the drive group has been configured into two
|
||||
logical drives, one RAID-5 and one RAID-6. Note that these logs are from an
|
||||
earlier version of the driver and the messages have changed somewhat with newer
|
||||
releases, but the functionality remains similar. First, here is the current
|
||||
status of the RAID configuration:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
|
||||
Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
Configuring Mylex DAC960PJ PCI RAID Controller
|
||||
Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
|
||||
PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
|
||||
PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
|
||||
Controller Queue Depth: 128, Maximum Blocks per Command: 128
|
||||
Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
|
||||
Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
|
||||
No Rebuild or Consistency Check in Progress
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
OK
|
||||
|
||||
The above messages indicate that everything is healthy, and /proc/rd/status
|
||||
returns "OK" indicating that there are no problems with any DAC960 controller
|
||||
in the system. For demonstration purposes, while I/O is active Physical Drive
|
||||
1:1 is now disconnected, simulating a drive failure. The failure is noted by
|
||||
the driver within 10 seconds of the controller's having detected it, and the
|
||||
driver logs the following console status messages indicating that Logical
|
||||
Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD:
|
||||
|
||||
DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
|
||||
DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
|
||||
DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command
|
||||
DAC960#0: Physical Drive 1:1 is now DEAD
|
||||
DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
|
||||
DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
|
||||
|
||||
The Sense Keys logged here are just Check Condition / Unit Attention conditions
|
||||
arising from a SCSI bus reset that is forced by the controller during its error
|
||||
recovery procedures. Concurrently with the above, the driver status available
|
||||
from /proc/rd also reflects the drive failure. The status message in
|
||||
/proc/rd/status has changed from "OK" to "ALERT":
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
ALERT
|
||||
|
||||
and /proc/rd/c0/current_status has been updated:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Dead, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
|
||||
No Rebuild or Consistency Check in Progress
|
||||
|
||||
Since there are no standby drives configured, the system can continue to access
|
||||
the logical drives in a performance degraded mode until the failed drive is
|
||||
replaced and a rebuild operation completed to restore the redundancy of the
|
||||
logical drives. Once Physical Drive 1:1 is replaced with a properly
|
||||
functioning drive, or if the physical drive was killed without having failed
|
||||
(e.g., due to electrical problems on the SCSI bus), the user can instruct the
|
||||
controller to initiate a rebuild operation onto the newly replaced drive:
|
||||
|
||||
gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/user_command
|
||||
Rebuild of Physical Drive 1:1 Initiated
|
||||
|
||||
The echo command instructs the controller to initiate an asynchronous rebuild
|
||||
operation onto Physical Drive 1:1, and the status message that results from the
|
||||
operation is then available for reading from /proc/rd/c0/user_command, as well
|
||||
as being logged to the console by the driver.
|
||||
|
||||
Within 10 seconds of this command the driver logs the initiation of the
|
||||
asynchronous rebuild operation:
|
||||
|
||||
DAC960#0: Rebuild of Physical Drive 1:1 Initiated
|
||||
DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
|
||||
DAC960#0: Physical Drive 1:1 is now WRITE-ONLY
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed
|
||||
|
||||
and /proc/rd/c0/current_status is updated:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Write-Only, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
|
||||
Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed
|
||||
|
||||
As the rebuild progresses, the current status in /proc/rd/c0/current_status is
|
||||
updated every 10 seconds:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Write-Only, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
|
||||
Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed
|
||||
|
||||
and every minute a progress message is logged to the console by the driver:
|
||||
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed
|
||||
|
||||
Finally, the rebuild completes successfully. The driver logs the status of the
|
||||
logical and physical drives and the rebuild completion:
|
||||
|
||||
DAC960#0: Rebuild Completed Successfully
|
||||
DAC960#0: Physical Drive 1:1 is now ONLINE
|
||||
DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
|
||||
DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
|
||||
|
||||
/proc/rd/c0/current_status is updated:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
|
||||
Rebuild Completed Successfully
|
||||
|
||||
and /proc/rd/status indicates that everything is healthy once again:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
OK
|
||||
|
||||
|
||||
EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE
|
||||
|
||||
The following annotated logs demonstrate the controller configuration and and
|
||||
online status monitoring capabilities of the Linux DAC960 Driver. The test
|
||||
configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
|
||||
DAC960PJ controller. The physical drives are configured into a single drive
|
||||
group with a standby drive, and the drive group has been configured into two
|
||||
logical drives, one RAID-5 and one RAID-6. Note that these logs are from an
|
||||
earlier version of the driver and the messages have changed somewhat with newer
|
||||
releases, but the functionality remains similar. First, here is the current
|
||||
status of the RAID configuration:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
|
||||
Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
Configuring Mylex DAC960PJ PCI RAID Controller
|
||||
Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
|
||||
PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
|
||||
PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
|
||||
Controller Queue Depth: 128, Maximum Blocks per Command: 128
|
||||
Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
|
||||
Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Online, 2201600 blocks
|
||||
1:3 - Disk: Standby, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
|
||||
No Rebuild or Consistency Check in Progress
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
OK
|
||||
|
||||
The above messages indicate that everything is healthy, and /proc/rd/status
|
||||
returns "OK" indicating that there are no problems with any DAC960 controller
|
||||
in the system. For demonstration purposes, while I/O is active Physical Drive
|
||||
1:2 is now disconnected, simulating a drive failure. The failure is noted by
|
||||
the driver within 10 seconds of the controller's having detected it, and the
|
||||
driver logs the following console status messages:
|
||||
|
||||
DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
|
||||
DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
|
||||
DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command
|
||||
DAC960#0: Physical Drive 1:2 is now DEAD
|
||||
DAC960#0: Physical Drive 1:2 killed because it was removed
|
||||
DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
|
||||
DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
|
||||
|
||||
Since a standby drive is configured, the controller automatically begins
|
||||
rebuilding onto the standby drive:
|
||||
|
||||
DAC960#0: Physical Drive 1:3 is now WRITE-ONLY
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
|
||||
|
||||
Concurrently with the above, the driver status available from /proc/rd also
|
||||
reflects the drive failure and automatic rebuild. The status message in
|
||||
/proc/rd/status has changed from "OK" to "ALERT":
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
ALERT
|
||||
|
||||
and /proc/rd/c0/current_status has been updated:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Dead, 2201600 blocks
|
||||
1:3 - Disk: Write-Only, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
|
||||
Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
|
||||
|
||||
As the rebuild progresses, the current status in /proc/rd/c0/current_status is
|
||||
updated every 10 seconds:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Dead, 2201600 blocks
|
||||
1:3 - Disk: Write-Only, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
|
||||
Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
|
||||
|
||||
and every minute a progress message is logged on the console by the driver:
|
||||
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed
|
||||
DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed
|
||||
|
||||
Finally, the rebuild completes successfully. The driver logs the status of the
|
||||
logical and physical drives and the rebuild completion:
|
||||
|
||||
DAC960#0: Rebuild Completed Successfully
|
||||
DAC960#0: Physical Drive 1:3 is now ONLINE
|
||||
DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
|
||||
DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
|
||||
|
||||
/proc/rd/c0/current_status is updated:
|
||||
|
||||
***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
|
||||
Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
|
||||
Configuring Mylex DAC960PJ PCI RAID Controller
|
||||
Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
|
||||
PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
|
||||
PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
|
||||
Controller Queue Depth: 128, Maximum Blocks per Command: 128
|
||||
Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
|
||||
Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Dead, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
|
||||
Rebuild Completed Successfully
|
||||
|
||||
and /proc/rd/status indicates that everything is healthy once again:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/status
|
||||
OK
|
||||
|
||||
Note that the absence of a viable standby drive does not create an "ALERT"
|
||||
status. Once dead Physical Drive 1:2 has been replaced, the controller must be
|
||||
told that this has occurred and that the newly replaced drive should become the
|
||||
new standby drive:
|
||||
|
||||
gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/user_command
|
||||
Make Standby of Physical Drive 1:2 Succeeded
|
||||
|
||||
The echo command instructs the controller to make Physical Drive 1:2 into a
|
||||
standby drive, and the status message that results from the operation is then
|
||||
available for reading from /proc/rd/c0/user_command, as well as being logged to
|
||||
the console by the driver. Within 60 seconds of this command the driver logs:
|
||||
|
||||
DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
|
||||
DAC960#0: Physical Drive 1:2 is now STANDBY
|
||||
DAC960#0: Make Standby of Physical Drive 1:2 Succeeded
|
||||
|
||||
and /proc/rd/c0/current_status is updated:
|
||||
|
||||
gwynedd:/u/lnz# cat /proc/rd/c0/current_status
|
||||
...
|
||||
Physical Devices:
|
||||
0:1 - Disk: Online, 2201600 blocks
|
||||
0:2 - Disk: Online, 2201600 blocks
|
||||
0:3 - Disk: Online, 2201600 blocks
|
||||
1:1 - Disk: Online, 2201600 blocks
|
||||
1:2 - Disk: Standby, 2201600 blocks
|
||||
1:3 - Disk: Online, 2201600 blocks
|
||||
Logical Drives:
|
||||
/dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
|
||||
/dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
|
||||
Rebuild Completed Successfully
|
|
@ -190,7 +190,7 @@ whitespace:
|
|||
notify_free Depending on device usage scenario it may account
|
||||
a) the number of pages freed because of swap slot free
|
||||
notifications or b) the number of pages freed because of
|
||||
REQ_DISCARD requests sent by bio. The former ones are
|
||||
REQ_OP_DISCARD requests sent by bio. The former ones are
|
||||
sent to a swap block device when a swap slot is freed,
|
||||
which implies that this disk is being used as a swap disk.
|
||||
The latter ones are sent by filesystem mounted with
|
||||
|
|
|
@ -38,7 +38,7 @@ inconsistent file system.
|
|||
Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
|
||||
they complete as those requests will obviously bypass the device cache.
|
||||
|
||||
Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would
|
||||
Any REQ_OP_DISCARD requests are treated like WRITE requests. Otherwise we would
|
||||
have all the DISCARD requests, and then the WRITE requests and then the FLUSH
|
||||
request. Consider the following example:
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include <asm/byteorder.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm-generic/pci_iomap.h>
|
||||
#include <xen/xen.h>
|
||||
|
||||
/*
|
||||
* ISA I/O bus memory addresses are 1:1 with the physical address.
|
||||
|
@ -459,20 +458,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
|
|||
|
||||
#include <asm-generic/io.h>
|
||||
|
||||
/*
|
||||
* can the hardware map this into one segment or not, given no other
|
||||
* constraints.
|
||||
*/
|
||||
#define BIOVEC_MERGEABLE(vec1, vec2) \
|
||||
((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
|
||||
|
||||
struct bio_vec;
|
||||
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
|
||||
const struct bio_vec *vec2);
|
||||
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
||||
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
|
||||
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
|
||||
extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
|
||||
|
|
|
@ -31,8 +31,6 @@
|
|||
#include <asm/alternative.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
|
||||
/*
|
||||
* Generic IO read/write. These perform native-endian accesses.
|
||||
*/
|
||||
|
@ -205,12 +203,5 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
|
|||
|
||||
extern int devmem_is_allowed(unsigned long pfn);
|
||||
|
||||
struct bio_vec;
|
||||
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
|
||||
const struct bio_vec *vec2);
|
||||
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
||||
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
|
||||
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* __ASM_IO_H */
|
||||
|
|
|
@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
|
|||
len = bvec.bv_len;
|
||||
len >>= 9;
|
||||
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
|
||||
bvec_to_phys(&bvec));
|
||||
page_to_phys(bvec.bv_page) + bvec.bv_offset);
|
||||
sec += len;
|
||||
}
|
||||
bio_endio(bio);
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_M68K_FD_H
|
||||
#define _ASM_M68K_FD_H
|
||||
|
||||
/* Definitions for the Atari Floppy driver */
|
||||
|
||||
struct atari_format_descr {
|
||||
int track; /* to be formatted */
|
||||
int head; /* "" "" */
|
||||
int sect_offset; /* offset of first sector */
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,80 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_FDREG_H
|
||||
#define _LINUX_FDREG_H
|
||||
|
||||
/*
|
||||
** WD1772 stuff
|
||||
*/
|
||||
|
||||
/* register codes */
|
||||
|
||||
#define FDCSELREG_STP (0x80) /* command/status register */
|
||||
#define FDCSELREG_TRA (0x82) /* track register */
|
||||
#define FDCSELREG_SEC (0x84) /* sector register */
|
||||
#define FDCSELREG_DTA (0x86) /* data register */
|
||||
|
||||
/* register names for FDC_READ/WRITE macros */
|
||||
|
||||
#define FDCREG_CMD 0
|
||||
#define FDCREG_STATUS 0
|
||||
#define FDCREG_TRACK 2
|
||||
#define FDCREG_SECTOR 4
|
||||
#define FDCREG_DATA 6
|
||||
|
||||
/* command opcodes */
|
||||
|
||||
#define FDCCMD_RESTORE (0x00) /* - */
|
||||
#define FDCCMD_SEEK (0x10) /* | */
|
||||
#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */
|
||||
#define FDCCMD_STIN (0x40) /* | */
|
||||
#define FDCCMD_STOT (0x60) /* - */
|
||||
#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */
|
||||
#define FDCCMD_WRSEC (0xa0) /* - " */
|
||||
#define FDCCMD_RDADR (0xc0) /* - */
|
||||
#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */
|
||||
#define FDCCMD_WRTRA (0xf0) /* - */
|
||||
#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */
|
||||
|
||||
/* command modifier bits */
|
||||
|
||||
#define FDCCMDADD_SR6 (0x00) /* step rate settings */
|
||||
#define FDCCMDADD_SR12 (0x01)
|
||||
#define FDCCMDADD_SR2 (0x02)
|
||||
#define FDCCMDADD_SR3 (0x03)
|
||||
#define FDCCMDADD_V (0x04) /* verify */
|
||||
#define FDCCMDADD_H (0x08) /* wait for spin-up */
|
||||
#define FDCCMDADD_U (0x10) /* update track register */
|
||||
#define FDCCMDADD_M (0x10) /* multiple sector access */
|
||||
#define FDCCMDADD_E (0x04) /* head settling flag */
|
||||
#define FDCCMDADD_P (0x02) /* precompensation off */
|
||||
#define FDCCMDADD_A0 (0x01) /* DAM flag */
|
||||
|
||||
/* status register bits */
|
||||
|
||||
#define FDCSTAT_MOTORON (0x80) /* motor on */
|
||||
#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */
|
||||
#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */
|
||||
#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
|
||||
#define FDCSTAT_RECNF (0x10) /* record not found */
|
||||
#define FDCSTAT_CRC (0x08) /* CRC error */
|
||||
#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */
|
||||
#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */
|
||||
#define FDCSTAT_IDX (0x02) /* Index status (Type I) */
|
||||
#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */
|
||||
#define FDCSTAT_BUSY (0x01) /* FDC is busy */
|
||||
|
||||
|
||||
/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
|
||||
#define DSKSIDE (0x01)
|
||||
|
||||
#define DSKDRVNONE (0x06)
|
||||
#define DSKDRV0 (0x02)
|
||||
#define DSKDRV1 (0x04)
|
||||
|
||||
/* step rates */
|
||||
#define FDCSTEP_6 0x00
|
||||
#define FDCSTEP_12 0x01
|
||||
#define FDCSTEP_2 0x02
|
||||
#define FDCSTEP_3 0x03
|
||||
|
||||
#endif
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/ata.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/cdrom.h>
|
||||
|
@ -142,7 +143,6 @@ struct cow {
|
|||
#define MAX_SG 64
|
||||
|
||||
struct ubd {
|
||||
struct list_head restart;
|
||||
/* name (and fd, below) of the file opened for writing, either the
|
||||
* backing or the cow file. */
|
||||
char *file;
|
||||
|
@ -156,11 +156,8 @@ struct ubd {
|
|||
struct cow cow;
|
||||
struct platform_device pdev;
|
||||
struct request_queue *queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
spinlock_t lock;
|
||||
struct scatterlist sg[MAX_SG];
|
||||
struct request *request;
|
||||
int start_sg, end_sg;
|
||||
sector_t rq_pos;
|
||||
};
|
||||
|
||||
#define DEFAULT_COW { \
|
||||
|
@ -182,10 +179,6 @@ struct ubd {
|
|||
.shared = 0, \
|
||||
.cow = DEFAULT_COW, \
|
||||
.lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
|
||||
.request = NULL, \
|
||||
.start_sg = 0, \
|
||||
.end_sg = 0, \
|
||||
.rq_pos = 0, \
|
||||
}
|
||||
|
||||
/* Protected by ubd_lock */
|
||||
|
@ -196,6 +189,9 @@ static int fake_ide = 0;
|
|||
static struct proc_dir_entry *proc_ide_root = NULL;
|
||||
static struct proc_dir_entry *proc_ide = NULL;
|
||||
|
||||
static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd);
|
||||
|
||||
static void make_proc_ide(void)
|
||||
{
|
||||
proc_ide_root = proc_mkdir("ide", NULL);
|
||||
|
@ -436,11 +432,8 @@ __uml_help(udb_setup,
|
|||
" in the boot output.\n\n"
|
||||
);
|
||||
|
||||
static void do_ubd_request(struct request_queue * q);
|
||||
|
||||
/* Only changed by ubd_init, which is an initcall. */
|
||||
static int thread_fd = -1;
|
||||
static LIST_HEAD(restart);
|
||||
|
||||
/* Function to read several request pointers at a time
|
||||
* handling fractional reads if (and as) needed
|
||||
|
@ -498,9 +491,6 @@ static int bulk_req_safe_read(
|
|||
/* Called without dev->lock held, and only in interrupt context. */
|
||||
static void ubd_handler(void)
|
||||
{
|
||||
struct ubd *ubd;
|
||||
struct list_head *list, *next_ele;
|
||||
unsigned long flags;
|
||||
int n;
|
||||
int count;
|
||||
|
||||
|
@ -520,23 +510,17 @@ static void ubd_handler(void)
|
|||
return;
|
||||
}
|
||||
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
|
||||
blk_end_request(
|
||||
(*irq_req_buffer)[count]->req,
|
||||
BLK_STS_OK,
|
||||
(*irq_req_buffer)[count]->length
|
||||
);
|
||||
kfree((*irq_req_buffer)[count]);
|
||||
struct io_thread_req *io_req = (*irq_req_buffer)[count];
|
||||
int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
|
||||
|
||||
if (!blk_update_request(io_req->req, err, io_req->length))
|
||||
__blk_mq_end_request(io_req->req, err);
|
||||
|
||||
kfree(io_req);
|
||||
}
|
||||
}
|
||||
reactivate_fd(thread_fd, UBD_IRQ);
|
||||
|
||||
list_for_each_safe(list, next_ele, &restart){
|
||||
ubd = container_of(list, struct ubd, restart);
|
||||
list_del_init(&ubd->restart);
|
||||
spin_lock_irqsave(&ubd->lock, flags);
|
||||
do_ubd_request(ubd->queue);
|
||||
spin_unlock_irqrestore(&ubd->lock, flags);
|
||||
}
|
||||
reactivate_fd(thread_fd, UBD_IRQ);
|
||||
}
|
||||
|
||||
static irqreturn_t ubd_intr(int irq, void *dev)
|
||||
|
@ -857,6 +841,7 @@ static void ubd_device_release(struct device *dev)
|
|||
struct ubd *ubd_dev = dev_get_drvdata(dev);
|
||||
|
||||
blk_cleanup_queue(ubd_dev->queue);
|
||||
blk_mq_free_tag_set(&ubd_dev->tag_set);
|
||||
*ubd_dev = ((struct ubd) DEFAULT_UBD);
|
||||
}
|
||||
|
||||
|
@ -891,7 +876,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
|
|||
|
||||
disk->private_data = &ubd_devs[unit];
|
||||
disk->queue = ubd_devs[unit].queue;
|
||||
device_add_disk(parent, disk);
|
||||
device_add_disk(parent, disk, NULL);
|
||||
|
||||
*disk_out = disk;
|
||||
return 0;
|
||||
|
@ -899,6 +884,10 @@ static int ubd_disk_register(int major, u64 size, int unit,
|
|||
|
||||
#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
|
||||
|
||||
static const struct blk_mq_ops ubd_mq_ops = {
|
||||
.queue_rq = ubd_queue_rq,
|
||||
};
|
||||
|
||||
static int ubd_add(int n, char **error_out)
|
||||
{
|
||||
struct ubd *ubd_dev = &ubd_devs[n];
|
||||
|
@ -915,15 +904,23 @@ static int ubd_add(int n, char **error_out)
|
|||
|
||||
ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
|
||||
|
||||
INIT_LIST_HEAD(&ubd_dev->restart);
|
||||
sg_init_table(ubd_dev->sg, MAX_SG);
|
||||
ubd_dev->tag_set.ops = &ubd_mq_ops;
|
||||
ubd_dev->tag_set.queue_depth = 64;
|
||||
ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
|
||||
ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
ubd_dev->tag_set.driver_data = ubd_dev;
|
||||
ubd_dev->tag_set.nr_hw_queues = 1;
|
||||
|
||||
err = -ENOMEM;
|
||||
ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
|
||||
if (ubd_dev->queue == NULL) {
|
||||
*error_out = "Failed to initialize device queue";
|
||||
err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
|
||||
if (IS_ERR(ubd_dev->queue)) {
|
||||
err = PTR_ERR(ubd_dev->queue);
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
ubd_dev->queue->queuedata = ubd_dev;
|
||||
blk_queue_write_cache(ubd_dev->queue, true, false);
|
||||
|
||||
|
@ -931,7 +928,7 @@ static int ubd_add(int n, char **error_out)
|
|||
err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
|
||||
if(err){
|
||||
*error_out = "Failed to register device";
|
||||
goto out_cleanup;
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
|
||||
if (fake_major != UBD_MAJOR)
|
||||
|
@ -949,6 +946,8 @@ static int ubd_add(int n, char **error_out)
|
|||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup_tags:
|
||||
blk_mq_free_tag_set(&ubd_dev->tag_set);
|
||||
out_cleanup:
|
||||
blk_cleanup_queue(ubd_dev->queue);
|
||||
goto out;
|
||||
|
@ -1290,123 +1289,82 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
|
|||
req->bitmap_words, bitmap_len);
|
||||
}
|
||||
|
||||
/* Called with dev->lock held */
|
||||
static void prepare_request(struct request *req, struct io_thread_req *io_req,
|
||||
unsigned long long offset, int page_offset,
|
||||
int len, struct page *page)
|
||||
{
|
||||
struct gendisk *disk = req->rq_disk;
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
|
||||
io_req->req = req;
|
||||
io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
|
||||
ubd_dev->fd;
|
||||
io_req->fds[1] = ubd_dev->fd;
|
||||
io_req->cow_offset = -1;
|
||||
io_req->offset = offset;
|
||||
io_req->length = len;
|
||||
io_req->error = 0;
|
||||
io_req->sector_mask = 0;
|
||||
|
||||
io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
|
||||
io_req->offsets[0] = 0;
|
||||
io_req->offsets[1] = ubd_dev->cow.data_offset;
|
||||
io_req->buffer = page_address(page) + page_offset;
|
||||
io_req->sectorsize = 1 << 9;
|
||||
|
||||
if(ubd_dev->cow.file != NULL)
|
||||
cowify_req(io_req, ubd_dev->cow.bitmap,
|
||||
ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
|
||||
|
||||
}
|
||||
|
||||
/* Called with dev->lock held */
|
||||
static void prepare_flush_request(struct request *req,
|
||||
struct io_thread_req *io_req)
|
||||
{
|
||||
struct gendisk *disk = req->rq_disk;
|
||||
struct ubd *ubd_dev = disk->private_data;
|
||||
|
||||
io_req->req = req;
|
||||
io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
|
||||
ubd_dev->fd;
|
||||
io_req->op = UBD_FLUSH;
|
||||
}
|
||||
|
||||
static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
|
||||
{
|
||||
int n = os_write_file(thread_fd, &io_req,
|
||||
sizeof(io_req));
|
||||
if (n != sizeof(io_req)) {
|
||||
if (n != -EAGAIN)
|
||||
printk("write to io thread failed, "
|
||||
"errno = %d\n", -n);
|
||||
else if (list_empty(&dev->restart))
|
||||
list_add(&dev->restart, &restart);
|
||||
|
||||
kfree(io_req);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Called with dev->lock held */
|
||||
static void do_ubd_request(struct request_queue *q)
|
||||
static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
|
||||
u64 off, struct bio_vec *bvec)
|
||||
{
|
||||
struct ubd *dev = hctx->queue->queuedata;
|
||||
struct io_thread_req *io_req;
|
||||
struct request *req;
|
||||
int ret;
|
||||
|
||||
while(1){
|
||||
struct ubd *dev = q->queuedata;
|
||||
if(dev->request == NULL){
|
||||
struct request *req = blk_fetch_request(q);
|
||||
if(req == NULL)
|
||||
return;
|
||||
io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
|
||||
if (!io_req)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->request = req;
|
||||
dev->rq_pos = blk_rq_pos(req);
|
||||
dev->start_sg = 0;
|
||||
dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
|
||||
io_req->req = req;
|
||||
if (dev->cow.file)
|
||||
io_req->fds[0] = dev->cow.fd;
|
||||
else
|
||||
io_req->fds[0] = dev->fd;
|
||||
|
||||
if (req_op(req) == REQ_OP_FLUSH) {
|
||||
io_req->op = UBD_FLUSH;
|
||||
} else {
|
||||
io_req->fds[1] = dev->fd;
|
||||
io_req->cow_offset = -1;
|
||||
io_req->offset = off;
|
||||
io_req->length = bvec->bv_len;
|
||||
io_req->error = 0;
|
||||
io_req->sector_mask = 0;
|
||||
|
||||
io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
|
||||
io_req->offsets[0] = 0;
|
||||
io_req->offsets[1] = dev->cow.data_offset;
|
||||
io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
|
||||
io_req->sectorsize = 1 << 9;
|
||||
|
||||
if (dev->cow.file) {
|
||||
cowify_req(io_req, dev->cow.bitmap,
|
||||
dev->cow.bitmap_offset, dev->cow.bitmap_len);
|
||||
}
|
||||
|
||||
req = dev->request;
|
||||
|
||||
if (req_op(req) == REQ_OP_FLUSH) {
|
||||
io_req = kmalloc(sizeof(struct io_thread_req),
|
||||
GFP_ATOMIC);
|
||||
if (io_req == NULL) {
|
||||
if (list_empty(&dev->restart))
|
||||
list_add(&dev->restart, &restart);
|
||||
return;
|
||||
}
|
||||
prepare_flush_request(req, io_req);
|
||||
if (submit_request(io_req, dev) == false)
|
||||
return;
|
||||
}
|
||||
|
||||
while(dev->start_sg < dev->end_sg){
|
||||
struct scatterlist *sg = &dev->sg[dev->start_sg];
|
||||
|
||||
io_req = kmalloc(sizeof(struct io_thread_req),
|
||||
GFP_ATOMIC);
|
||||
if(io_req == NULL){
|
||||
if(list_empty(&dev->restart))
|
||||
list_add(&dev->restart, &restart);
|
||||
return;
|
||||
}
|
||||
prepare_request(req, io_req,
|
||||
(unsigned long long)dev->rq_pos << 9,
|
||||
sg->offset, sg->length, sg_page(sg));
|
||||
|
||||
if (submit_request(io_req, dev) == false)
|
||||
return;
|
||||
|
||||
dev->rq_pos += sg->length >> 9;
|
||||
dev->start_sg++;
|
||||
}
|
||||
dev->end_sg = 0;
|
||||
dev->request = NULL;
|
||||
}
|
||||
|
||||
ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
|
||||
if (ret != sizeof(io_req)) {
|
||||
if (ret != -EAGAIN)
|
||||
pr_err("write to io thread failed: %d\n", -ret);
|
||||
kfree(io_req);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req = bd->rq;
|
||||
int ret = 0;
|
||||
|
||||
blk_mq_start_request(req);
|
||||
|
||||
if (req_op(req) == REQ_OP_FLUSH) {
|
||||
ret = ubd_queue_one_vec(hctx, req, 0, NULL);
|
||||
} else {
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bvec;
|
||||
u64 off = (u64)blk_rq_pos(req) << 9;
|
||||
|
||||
rq_for_each_segment(bvec, req, iter) {
|
||||
ret = ubd_queue_one_vec(hctx, req, off, &bvec);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
off += bvec.bv_len;
|
||||
}
|
||||
}
|
||||
out:
|
||||
if (ret < 0) {
|
||||
blk_mq_requeue_request(req, true);
|
||||
}
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
|
|
|
@ -369,18 +369,6 @@ extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size);
|
|||
|
||||
extern bool is_early_ioremap_ptep(pte_t *ptep);
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
#include <xen/xen.h>
|
||||
struct bio_vec;
|
||||
|
||||
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
|
||||
const struct bio_vec *vec2);
|
||||
|
||||
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
||||
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
|
||||
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
|
||||
#endif /* CONFIG_XEN */
|
||||
|
||||
#define IO_SPACE_LIMIT 0xffff
|
||||
|
||||
#include <asm-generic/io.h>
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
#ifndef _ASM_X86_XEN_EVENTS_H
|
||||
#define _ASM_X86_XEN_EVENTS_H
|
||||
|
||||
#include <xen/xen.h>
|
||||
|
||||
enum ipi_vector {
|
||||
XEN_RESCHEDULE_VECTOR,
|
||||
XEN_CALL_FUNCTION_VECTOR,
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <linux/kexec.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/features.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/interface/memory.h>
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <asm/xen/interface.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/interface/memory.h>
|
||||
#include <xen/interface/hvm/start_info.h>
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/io.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/platform_pci.h>
|
||||
#include "xen-ops.h"
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <linux/interrupt.h>
|
||||
|
||||
#include <asm/xen/hypercall.h>
|
||||
#include <xen/xen.h>
|
||||
#include <xen/page.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <xen/interface/vcpu.h>
|
||||
|
|
|
@ -74,7 +74,6 @@ config BLK_DEV_BSG
|
|||
|
||||
config BLK_DEV_BSGLIB
|
||||
bool "Block layer SG support v4 helper lib"
|
||||
default n
|
||||
select BLK_DEV_BSG
|
||||
select BLK_SCSI_REQUEST
|
||||
help
|
||||
|
@ -107,7 +106,6 @@ config BLK_DEV_ZONED
|
|||
config BLK_DEV_THROTTLING
|
||||
bool "Block layer bio throttling support"
|
||||
depends on BLK_CGROUP=y
|
||||
default n
|
||||
---help---
|
||||
Block layer bio throttling support. It can be used to limit
|
||||
the IO rate to a device. IO rate policies are per cgroup and
|
||||
|
@ -119,7 +117,6 @@ config BLK_DEV_THROTTLING
|
|||
config BLK_DEV_THROTTLING_LOW
|
||||
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
|
||||
depends on BLK_DEV_THROTTLING
|
||||
default n
|
||||
---help---
|
||||
Add .low limit interface for block throttling. The low limit is a best
|
||||
effort limit to prioritize cgroups. Depending on the setting, the limit
|
||||
|
@ -130,7 +127,6 @@ config BLK_DEV_THROTTLING_LOW
|
|||
|
||||
config BLK_CMDLINE_PARSER
|
||||
bool "Block device command line partition parser"
|
||||
default n
|
||||
---help---
|
||||
Enabling this option allows you to specify the partition layout from
|
||||
the kernel boot args. This is typically of use for embedded devices
|
||||
|
@ -141,7 +137,6 @@ config BLK_CMDLINE_PARSER
|
|||
|
||||
config BLK_WBT
|
||||
bool "Enable support for block device writeback throttling"
|
||||
default n
|
||||
---help---
|
||||
Enabling this option enables the block layer to throttle buffered
|
||||
background writeback from the VM, making it more smooth and having
|
||||
|
@ -152,7 +147,6 @@ config BLK_WBT
|
|||
config BLK_CGROUP_IOLATENCY
|
||||
bool "Enable support for latency based cgroup IO protection"
|
||||
depends on BLK_CGROUP=y
|
||||
default n
|
||||
---help---
|
||||
Enabling this option enables the .latency interface for IO throttling.
|
||||
The IO controller will attempt to maintain average IO latencies below
|
||||
|
@ -163,7 +157,6 @@ config BLK_CGROUP_IOLATENCY
|
|||
|
||||
config BLK_WBT_SQ
|
||||
bool "Single queue writeback throttling"
|
||||
default n
|
||||
depends on BLK_WBT
|
||||
---help---
|
||||
Enable writeback throttling by default on legacy single queue devices
|
||||
|
@ -228,4 +221,7 @@ config BLK_MQ_RDMA
|
|||
depends on BLOCK && INFINIBAND
|
||||
default y
|
||||
|
||||
config BLK_PM
|
||||
def_bool BLOCK && PM
|
||||
|
||||
source block/Kconfig.iosched
|
||||
|
|
|
@ -36,7 +36,6 @@ config IOSCHED_CFQ
|
|||
config CFQ_GROUP_IOSCHED
|
||||
bool "CFQ Group Scheduling support"
|
||||
depends on IOSCHED_CFQ && BLK_CGROUP
|
||||
default n
|
||||
---help---
|
||||
Enable group IO scheduling in CFQ.
|
||||
|
||||
|
@ -82,7 +81,6 @@ config MQ_IOSCHED_KYBER
|
|||
|
||||
config IOSCHED_BFQ
|
||||
tristate "BFQ I/O scheduler"
|
||||
default n
|
||||
---help---
|
||||
BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
|
||||
of the device among all processes according to their weights,
|
||||
|
@ -94,7 +92,6 @@ config IOSCHED_BFQ
|
|||
config BFQ_GROUP_IOSCHED
|
||||
bool "BFQ hierarchical scheduling support"
|
||||
depends on IOSCHED_BFQ && BLK_CGROUP
|
||||
default n
|
||||
---help---
|
||||
|
||||
Enable hierarchical scheduling in BFQ, using the blkio
|
||||
|
|
|
@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o
|
|||
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
|
||||
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
|
||||
obj-$(CONFIG_BLK_PM) += blk-pm.o
|
||||
|
|
|
@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
|||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
|
||||
/*
|
||||
* Check whether blkcg has changed. The condition may trigger
|
||||
|
@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
|||
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
|
||||
goto out;
|
||||
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
|
||||
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
|
||||
/*
|
||||
* Update blkg_path for bfq_log_* functions. We cache this
|
||||
* path, and update it here, for the following
|
||||
|
|
|
@ -624,12 +624,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
|||
}
|
||||
|
||||
/*
|
||||
* Tell whether there are active queues or groups with differentiated weights.
|
||||
* Tell whether there are active queues with different weights or
|
||||
* active groups.
|
||||
*/
|
||||
static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
||||
static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
|
||||
{
|
||||
/*
|
||||
* For weights to differ, at least one of the trees must contain
|
||||
* For queue weights to differ, queue_weights_tree must contain
|
||||
* at least two nodes.
|
||||
*/
|
||||
return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
|
||||
|
@ -637,9 +638,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
|||
bfqd->queue_weights_tree.rb_node->rb_right)
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
) ||
|
||||
(!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
|
||||
(bfqd->group_weights_tree.rb_node->rb_left ||
|
||||
bfqd->group_weights_tree.rb_node->rb_right)
|
||||
(bfqd->num_active_groups > 0
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
@ -657,26 +656,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
|||
* 3) all active groups at the same level in the groups tree have the same
|
||||
* number of children.
|
||||
*
|
||||
* Unfortunately, keeping the necessary state for evaluating exactly the
|
||||
* above symmetry conditions would be quite complex and time-consuming.
|
||||
* Therefore this function evaluates, instead, the following stronger
|
||||
* sub-conditions, for which it is much easier to maintain the needed
|
||||
* state:
|
||||
* Unfortunately, keeping the necessary state for evaluating exactly
|
||||
* the last two symmetry sub-conditions above would be quite complex
|
||||
* and time consuming. Therefore this function evaluates, instead,
|
||||
* only the following stronger two sub-conditions, for which it is
|
||||
* much easier to maintain the needed state:
|
||||
* 1) all active queues have the same weight,
|
||||
* 2) all active groups have the same weight,
|
||||
* 3) all active groups have at most one active child each.
|
||||
* In particular, the last two conditions are always true if hierarchical
|
||||
* support and the cgroups interface are not enabled, thus no state needs
|
||||
* to be maintained in this case.
|
||||
* 2) there are no active groups.
|
||||
* In particular, the last condition is always true if hierarchical
|
||||
* support or the cgroups interface are not enabled, thus no state
|
||||
* needs to be maintained in this case.
|
||||
*/
|
||||
static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
|
||||
{
|
||||
return !bfq_differentiated_weights(bfqd);
|
||||
return !bfq_varied_queue_weights_or_active_groups(bfqd);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the weight-counter tree passed as input contains no counter for
|
||||
* the weight of the input entity, then add that counter; otherwise just
|
||||
* the weight of the input queue, then add that counter; otherwise just
|
||||
* increment the existing counter.
|
||||
*
|
||||
* Note that weight-counter trees contain few nodes in mostly symmetric
|
||||
|
@ -687,25 +685,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
|
|||
* In most scenarios, the rate at which nodes are created/destroyed
|
||||
* should be low too.
|
||||
*/
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
|
||||
/*
|
||||
* Do not insert if the entity is already associated with a
|
||||
* Do not insert if the queue is already associated with a
|
||||
* counter, which happens if:
|
||||
* 1) the entity is associated with a queue,
|
||||
* 2) a request arrival has caused the queue to become both
|
||||
* 1) a request arrival has caused the queue to become both
|
||||
* non-weight-raised, and hence change its weight, and
|
||||
* backlogged; in this respect, each of the two events
|
||||
* causes an invocation of this function,
|
||||
* 3) this is the invocation of this function caused by the
|
||||
* 2) this is the invocation of this function caused by the
|
||||
* second event. This second invocation is actually useless,
|
||||
* and we handle this fact by exiting immediately. More
|
||||
* efficient or clearer solutions might possibly be adopted.
|
||||
*/
|
||||
if (entity->weight_counter)
|
||||
if (bfqq->weight_counter)
|
||||
return;
|
||||
|
||||
while (*new) {
|
||||
|
@ -715,7 +713,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
|||
parent = *new;
|
||||
|
||||
if (entity->weight == __counter->weight) {
|
||||
entity->weight_counter = __counter;
|
||||
bfqq->weight_counter = __counter;
|
||||
goto inc_counter;
|
||||
}
|
||||
if (entity->weight < __counter->weight)
|
||||
|
@ -724,66 +722,67 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
|||
new = &((*new)->rb_right);
|
||||
}
|
||||
|
||||
entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
|
||||
GFP_ATOMIC);
|
||||
bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
|
||||
GFP_ATOMIC);
|
||||
|
||||
/*
|
||||
* In the unlucky event of an allocation failure, we just
|
||||
* exit. This will cause the weight of entity to not be
|
||||
* considered in bfq_differentiated_weights, which, in its
|
||||
* turn, causes the scenario to be deemed wrongly symmetric in
|
||||
* case entity's weight would have been the only weight making
|
||||
* the scenario asymmetric. On the bright side, no unbalance
|
||||
* will however occur when entity becomes inactive again (the
|
||||
* invocation of this function is triggered by an activation
|
||||
* of entity). In fact, bfq_weights_tree_remove does nothing
|
||||
* if !entity->weight_counter.
|
||||
* exit. This will cause the weight of queue to not be
|
||||
* considered in bfq_varied_queue_weights_or_active_groups,
|
||||
* which, in its turn, causes the scenario to be deemed
|
||||
* wrongly symmetric in case bfqq's weight would have been
|
||||
* the only weight making the scenario asymmetric. On the
|
||||
* bright side, no unbalance will however occur when bfqq
|
||||
* becomes inactive again (the invocation of this function
|
||||
* is triggered by an activation of queue). In fact,
|
||||
* bfq_weights_tree_remove does nothing if
|
||||
* !bfqq->weight_counter.
|
||||
*/
|
||||
if (unlikely(!entity->weight_counter))
|
||||
if (unlikely(!bfqq->weight_counter))
|
||||
return;
|
||||
|
||||
entity->weight_counter->weight = entity->weight;
|
||||
rb_link_node(&entity->weight_counter->weights_node, parent, new);
|
||||
rb_insert_color(&entity->weight_counter->weights_node, root);
|
||||
bfqq->weight_counter->weight = entity->weight;
|
||||
rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
|
||||
rb_insert_color(&bfqq->weight_counter->weights_node, root);
|
||||
|
||||
inc_counter:
|
||||
entity->weight_counter->num_active++;
|
||||
bfqq->weight_counter->num_active++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the weight counter associated with the entity, and, if the
|
||||
* Decrement the weight counter associated with the queue, and, if the
|
||||
* counter reaches 0, remove the counter from the tree.
|
||||
* See the comments to the function bfq_weights_tree_add() for considerations
|
||||
* about overhead.
|
||||
*/
|
||||
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_entity *entity,
|
||||
struct bfq_queue *bfqq,
|
||||
struct rb_root *root)
|
||||
{
|
||||
if (!entity->weight_counter)
|
||||
if (!bfqq->weight_counter)
|
||||
return;
|
||||
|
||||
entity->weight_counter->num_active--;
|
||||
if (entity->weight_counter->num_active > 0)
|
||||
bfqq->weight_counter->num_active--;
|
||||
if (bfqq->weight_counter->num_active > 0)
|
||||
goto reset_entity_pointer;
|
||||
|
||||
rb_erase(&entity->weight_counter->weights_node, root);
|
||||
kfree(entity->weight_counter);
|
||||
rb_erase(&bfqq->weight_counter->weights_node, root);
|
||||
kfree(bfqq->weight_counter);
|
||||
|
||||
reset_entity_pointer:
|
||||
entity->weight_counter = NULL;
|
||||
bfqq->weight_counter = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke __bfq_weights_tree_remove on bfqq and all its inactive
|
||||
* parent entities.
|
||||
* Invoke __bfq_weights_tree_remove on bfqq and decrement the number
|
||||
* of active groups for each queue's inactive parent entity.
|
||||
*/
|
||||
void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_entity *entity = bfqq->entity.parent;
|
||||
|
||||
__bfq_weights_tree_remove(bfqd, &bfqq->entity,
|
||||
__bfq_weights_tree_remove(bfqd, bfqq,
|
||||
&bfqd->queue_weights_tree);
|
||||
|
||||
for_each_entity(entity) {
|
||||
|
@ -797,17 +796,13 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
|||
* next_in_service for details on why
|
||||
* in_service_entity must be checked too).
|
||||
*
|
||||
* As a consequence, the weight of entity is
|
||||
* not to be removed. In addition, if entity
|
||||
* is active, then its parent entities are
|
||||
* active as well, and thus their weights are
|
||||
* not to be removed either. In the end, this
|
||||
* loop must stop here.
|
||||
* As a consequence, its parent entities are
|
||||
* active as well, and thus this loop must
|
||||
* stop here.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
__bfq_weights_tree_remove(bfqd, entity,
|
||||
&bfqd->group_weights_tree);
|
||||
bfqd->num_active_groups--;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3182,6 +3177,13 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
|
|||
jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
|
||||
}
|
||||
|
||||
static bool bfq_bfqq_injectable(struct bfq_queue *bfqq)
|
||||
{
|
||||
return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
|
||||
blk_queue_nonrot(bfqq->bfqd->queue) &&
|
||||
bfqq->bfqd->hw_tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* bfq_bfqq_expire - expire a queue.
|
||||
* @bfqd: device owning the queue.
|
||||
|
@ -3291,6 +3293,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
|||
if (ref == 1) /* bfqq is gone, no more actions on it */
|
||||
return;
|
||||
|
||||
bfqq->injected_service = 0;
|
||||
|
||||
/* mark bfqq as waiting a request only if a bic still points to it */
|
||||
if (!bfq_bfqq_busy(bfqq) &&
|
||||
reason != BFQQE_BUDGET_TIMEOUT &&
|
||||
|
@ -3497,9 +3501,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
|||
* symmetric scenario where:
|
||||
* (i) each of these processes must get the same throughput as
|
||||
* the others;
|
||||
* (ii) all these processes have the same I/O pattern
|
||||
(either sequential or random).
|
||||
* In fact, in such a scenario, the drive will tend to treat
|
||||
* (ii) the I/O of each process has the same properties, in
|
||||
* terms of locality (sequential or random), direction
|
||||
* (reads or writes), request sizes, greediness
|
||||
* (from I/O-bound to sporadic), and so on.
|
||||
* In fact, in such a scenario, the drive tends to treat
|
||||
* the requests of each of these processes in about the same
|
||||
* way as the requests of the others, and thus to provide
|
||||
* each of these processes with about the same throughput
|
||||
|
@ -3508,18 +3514,50 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
|||
* certainly needed to guarantee that bfqq receives its
|
||||
* assigned fraction of the device throughput (see [1] for
|
||||
* details).
|
||||
* The problem is that idling may significantly reduce
|
||||
* throughput with certain combinations of types of I/O and
|
||||
* devices. An important example is sync random I/O, on flash
|
||||
* storage with command queueing. So, unless bfqq falls in the
|
||||
* above cases where idling also boosts throughput, it would
|
||||
* be important to check conditions (i) and (ii) accurately,
|
||||
* so as to avoid idling when not strictly needed for service
|
||||
* guarantees.
|
||||
*
|
||||
* We address this issue by controlling, actually, only the
|
||||
* symmetry sub-condition (i), i.e., provided that
|
||||
* sub-condition (i) holds, idling is not performed,
|
||||
* regardless of whether sub-condition (ii) holds. In other
|
||||
* words, only if sub-condition (i) holds, then idling is
|
||||
* Unfortunately, it is extremely difficult to thoroughly
|
||||
* check condition (ii). And, in case there are active groups,
|
||||
* it becomes very difficult to check condition (i) too. In
|
||||
* fact, if there are active groups, then, for condition (i)
|
||||
* to become false, it is enough that an active group contains
|
||||
* more active processes or sub-groups than some other active
|
||||
* group. We address this issue with the following bi-modal
|
||||
* behavior, implemented in the function
|
||||
* bfq_symmetric_scenario().
|
||||
*
|
||||
* If there are active groups, then the scenario is tagged as
|
||||
* asymmetric, conservatively, without checking any of the
|
||||
* conditions (i) and (ii). So the device is idled for bfqq.
|
||||
* This behavior matches also the fact that groups are created
|
||||
* exactly if controlling I/O (to preserve bandwidth and
|
||||
* latency guarantees) is a primary concern.
|
||||
*
|
||||
* On the opposite end, if there are no active groups, then
|
||||
* only condition (i) is actually controlled, i.e., provided
|
||||
* that condition (i) holds, idling is not performed,
|
||||
* regardless of whether condition (ii) holds. In other words,
|
||||
* only if condition (i) does not hold, then idling is
|
||||
* allowed, and the device tends to be prevented from queueing
|
||||
* many requests, possibly of several processes. The reason
|
||||
* for not controlling also sub-condition (ii) is that we
|
||||
* exploit preemption to preserve guarantees in case of
|
||||
* symmetric scenarios, even if (ii) does not hold, as
|
||||
* explained in the next two paragraphs.
|
||||
* many requests, possibly of several processes. Since there
|
||||
* are no active groups, then, to control condition (i) it is
|
||||
* enough to check whether all active queues have the same
|
||||
* weight.
|
||||
*
|
||||
* Not checking condition (ii) evidently exposes bfqq to the
|
||||
* risk of getting less throughput than its fair share.
|
||||
* However, for queues with the same weight, a further
|
||||
* mechanism, preemption, mitigates or even eliminates this
|
||||
* problem. And it does so without consequences on overall
|
||||
* throughput. This mechanism and its benefits are explained
|
||||
* in the next three paragraphs.
|
||||
*
|
||||
* Even if a queue, say Q, is expired when it remains idle, Q
|
||||
* can still preempt the new in-service queue if the next
|
||||
|
@ -3533,11 +3571,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
|||
* idling allows the internal queues of the device to contain
|
||||
* many requests, and thus to reorder requests, we can rather
|
||||
* safely assume that the internal scheduler still preserves a
|
||||
* minimum of mid-term fairness. The motivation for using
|
||||
* preemption instead of idling is that, by not idling,
|
||||
* service guarantees are preserved without minimally
|
||||
* sacrificing throughput. In other words, both a high
|
||||
* throughput and its desired distribution are obtained.
|
||||
* minimum of mid-term fairness.
|
||||
*
|
||||
* More precisely, this preemption-based, idleless approach
|
||||
* provides fairness in terms of IOPS, and not sectors per
|
||||
|
@ -3556,22 +3590,27 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
|||
* 1024/8 times as high as the service received by the other
|
||||
* queue.
|
||||
*
|
||||
* On the other hand, device idling is performed, and thus
|
||||
* pure sector-domain guarantees are provided, for the
|
||||
* following queues, which are likely to need stronger
|
||||
* throughput guarantees: weight-raised queues, and queues
|
||||
* with a higher weight than other queues. When such queues
|
||||
* are active, sub-condition (i) is false, which triggers
|
||||
* device idling.
|
||||
* The motivation for using preemption instead of idling (for
|
||||
* queues with the same weight) is that, by not idling,
|
||||
* service guarantees are preserved (completely or at least in
|
||||
* part) without minimally sacrificing throughput. And, if
|
||||
* there is no active group, then the primary expectation for
|
||||
* this device is probably a high throughput.
|
||||
*
|
||||
* According to the above considerations, the next variable is
|
||||
* true (only) if sub-condition (i) holds. To compute the
|
||||
* value of this variable, we not only use the return value of
|
||||
* the function bfq_symmetric_scenario(), but also check
|
||||
* whether bfqq is being weight-raised, because
|
||||
* bfq_symmetric_scenario() does not take into account also
|
||||
* weight-raised queues (see comments on
|
||||
* bfq_weights_tree_add()).
|
||||
* We are now left only with explaining the additional
|
||||
* compound condition that is checked below for deciding
|
||||
* whether the scenario is asymmetric. To explain this
|
||||
* compound condition, we need to add that the function
|
||||
* bfq_symmetric_scenario checks the weights of only
|
||||
* non-weight-raised queues, for efficiency reasons (see
|
||||
* comments on bfq_weights_tree_add()). Then the fact that
|
||||
* bfqq is weight-raised is checked explicitly here. More
|
||||
* precisely, the compound condition below takes into account
|
||||
* also the fact that, even if bfqq is being weight-raised,
|
||||
* the scenario is still symmetric if all active queues happen
|
||||
* to be weight-raised. Actually, we should be even more
|
||||
* precise here, and differentiate between interactive weight
|
||||
* raising and soft real-time weight raising.
|
||||
*
|
||||
* As a side note, it is worth considering that the above
|
||||
* device-idling countermeasures may however fail in the
|
||||
|
@ -3583,7 +3622,8 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
|||
* to let requests be served in the desired order until all
|
||||
* the requests already queued in the device have been served.
|
||||
*/
|
||||
asymmetric_scenario = bfqq->wr_coeff > 1 ||
|
||||
asymmetric_scenario = (bfqq->wr_coeff > 1 &&
|
||||
bfqd->wr_busy_queues < bfqd->busy_queues) ||
|
||||
!bfq_symmetric_scenario(bfqd);
|
||||
|
||||
/*
|
||||
|
@ -3629,6 +3669,30 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
|
|||
return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq);
|
||||
}
|
||||
|
||||
static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd)
|
||||
{
|
||||
struct bfq_queue *bfqq;
|
||||
|
||||
/*
|
||||
* A linear search; but, with a high probability, very few
|
||||
* steps are needed to find a candidate queue, i.e., a queue
|
||||
* with enough budget left for its next request. In fact:
|
||||
* - BFQ dynamically updates the budget of every queue so as
|
||||
* to accommodate the expected backlog of the queue;
|
||||
* - if a queue gets all its requests dispatched as injected
|
||||
* service, then the queue is removed from the active list
|
||||
* (and re-added only if it gets new requests, but with
|
||||
* enough budget for its new backlog).
|
||||
*/
|
||||
list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
|
||||
if (!RB_EMPTY_ROOT(&bfqq->sort_list) &&
|
||||
bfq_serv_to_charge(bfqq->next_rq, bfqq) <=
|
||||
bfq_bfqq_budget_left(bfqq))
|
||||
return bfqq;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select a queue for service. If we have a current queue in service,
|
||||
* check whether to continue servicing it, or retrieve and set a new one.
|
||||
|
@ -3710,10 +3774,19 @@ check_queue:
|
|||
* No requests pending. However, if the in-service queue is idling
|
||||
* for a new request, or has requests waiting for a completion and
|
||||
* may idle after their completion, then keep it anyway.
|
||||
*
|
||||
* Yet, to boost throughput, inject service from other queues if
|
||||
* possible.
|
||||
*/
|
||||
if (bfq_bfqq_wait_request(bfqq) ||
|
||||
(bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) {
|
||||
bfqq = NULL;
|
||||
if (bfq_bfqq_injectable(bfqq) &&
|
||||
bfqq->injected_service * bfqq->inject_coeff <
|
||||
bfqq->entity.service * 10)
|
||||
bfqq = bfq_choose_bfqq_for_injection(bfqd);
|
||||
else
|
||||
bfqq = NULL;
|
||||
|
||||
goto keep_queue;
|
||||
}
|
||||
|
||||
|
@ -3803,6 +3876,14 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
|
|||
|
||||
bfq_dispatch_remove(bfqd->queue, rq);
|
||||
|
||||
if (bfqq != bfqd->in_service_queue) {
|
||||
if (likely(bfqd->in_service_queue))
|
||||
bfqd->in_service_queue->injected_service +=
|
||||
bfq_serv_to_charge(rq, bfqq);
|
||||
|
||||
goto return_rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* If weight raising has to terminate for bfqq, then next
|
||||
* function causes an immediate update of bfqq's weight,
|
||||
|
@ -3821,13 +3902,12 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
|
|||
* belongs to CLASS_IDLE and other queues are waiting for
|
||||
* service.
|
||||
*/
|
||||
if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
|
||||
goto expire;
|
||||
if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq)))
|
||||
goto return_rq;
|
||||
|
||||
return rq;
|
||||
|
||||
expire:
|
||||
bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
|
||||
|
||||
return_rq:
|
||||
return rq;
|
||||
}
|
||||
|
||||
|
@ -4232,6 +4312,13 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
|||
bfq_mark_bfqq_has_short_ttime(bfqq);
|
||||
bfq_mark_bfqq_sync(bfqq);
|
||||
bfq_mark_bfqq_just_created(bfqq);
|
||||
/*
|
||||
* Aggressively inject a lot of service: up to 90%.
|
||||
* This coefficient remains constant during bfqq life,
|
||||
* but this behavior might be changed, after enough
|
||||
* testing and tuning.
|
||||
*/
|
||||
bfqq->inject_coeff = 1;
|
||||
} else
|
||||
bfq_clear_bfqq_sync(bfqq);
|
||||
|
||||
|
@ -4297,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
|
|||
|
||||
rcu_read_lock();
|
||||
|
||||
bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
|
||||
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
|
||||
if (!bfqg) {
|
||||
bfqq = &bfqd->oom_bfqq;
|
||||
goto out;
|
||||
|
@ -5330,7 +5417,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
|||
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
|
||||
|
||||
bfqd->queue_weights_tree = RB_ROOT;
|
||||
bfqd->group_weights_tree = RB_ROOT;
|
||||
bfqd->num_active_groups = 0;
|
||||
|
||||
INIT_LIST_HEAD(&bfqd->active_list);
|
||||
INIT_LIST_HEAD(&bfqd->idle_list);
|
||||
|
|
|
@ -108,15 +108,14 @@ struct bfq_sched_data {
|
|||
};
|
||||
|
||||
/**
|
||||
* struct bfq_weight_counter - counter of the number of all active entities
|
||||
* struct bfq_weight_counter - counter of the number of all active queues
|
||||
* with a given weight.
|
||||
*/
|
||||
struct bfq_weight_counter {
|
||||
unsigned int weight; /* weight of the entities this counter refers to */
|
||||
unsigned int num_active; /* nr of active entities with this weight */
|
||||
unsigned int weight; /* weight of the queues this counter refers to */
|
||||
unsigned int num_active; /* nr of active queues with this weight */
|
||||
/*
|
||||
* Weights tree member (see bfq_data's @queue_weights_tree and
|
||||
* @group_weights_tree)
|
||||
* Weights tree member (see bfq_data's @queue_weights_tree)
|
||||
*/
|
||||
struct rb_node weights_node;
|
||||
};
|
||||
|
@ -151,8 +150,6 @@ struct bfq_weight_counter {
|
|||
struct bfq_entity {
|
||||
/* service_tree member */
|
||||
struct rb_node rb_node;
|
||||
/* pointer to the weight counter associated with this entity */
|
||||
struct bfq_weight_counter *weight_counter;
|
||||
|
||||
/*
|
||||
* Flag, true if the entity is on a tree (either the active or
|
||||
|
@ -266,6 +263,9 @@ struct bfq_queue {
|
|||
/* entity representing this queue in the scheduler */
|
||||
struct bfq_entity entity;
|
||||
|
||||
/* pointer to the weight counter associated with this entity */
|
||||
struct bfq_weight_counter *weight_counter;
|
||||
|
||||
/* maximum budget allowed from the feedback mechanism */
|
||||
int max_budget;
|
||||
/* budget expiration (in jiffies) */
|
||||
|
@ -351,6 +351,32 @@ struct bfq_queue {
|
|||
unsigned long split_time; /* time of last split */
|
||||
|
||||
unsigned long first_IO_time; /* time of first I/O for this queue */
|
||||
|
||||
/* max service rate measured so far */
|
||||
u32 max_service_rate;
|
||||
/*
|
||||
* Ratio between the service received by bfqq while it is in
|
||||
* service, and the cumulative service (of requests of other
|
||||
* queues) that may be injected while bfqq is empty but still
|
||||
* in service. To increase precision, the coefficient is
|
||||
* measured in tenths of unit. Here are some example of (1)
|
||||
* ratios, (2) resulting percentages of service injected
|
||||
* w.r.t. to the total service dispatched while bfqq is in
|
||||
* service, and (3) corresponding values of the coefficient:
|
||||
* 1 (50%) -> 10
|
||||
* 2 (33%) -> 20
|
||||
* 10 (9%) -> 100
|
||||
* 9.9 (9%) -> 99
|
||||
* 1.5 (40%) -> 15
|
||||
* 0.5 (66%) -> 5
|
||||
* 0.1 (90%) -> 1
|
||||
*
|
||||
* So, if the coefficient is lower than 10, then
|
||||
* injected service is more than bfqq service.
|
||||
*/
|
||||
unsigned int inject_coeff;
|
||||
/* amount of service injected in current service slot */
|
||||
unsigned int injected_service;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -423,14 +449,9 @@ struct bfq_data {
|
|||
*/
|
||||
struct rb_root queue_weights_tree;
|
||||
/*
|
||||
* rbtree of non-queue @bfq_entity weight counters, sorted by
|
||||
* weight. Used to keep track of whether all @bfq_groups have
|
||||
* the same weight. The tree contains one counter for each
|
||||
* distinct weight associated to some active @bfq_group (see
|
||||
* the comments to the functions bfq_weights_tree_[add|remove]
|
||||
* for further details).
|
||||
* number of groups with requests still waiting for completion
|
||||
*/
|
||||
struct rb_root group_weights_tree;
|
||||
unsigned int num_active_groups;
|
||||
|
||||
/*
|
||||
* Number of bfq_queues containing requests (including the
|
||||
|
@ -825,10 +846,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
|
|||
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
|
||||
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
|
||||
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct rb_root *root);
|
||||
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_entity *entity,
|
||||
struct bfq_queue *bfqq,
|
||||
struct rb_root *root);
|
||||
void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq);
|
||||
|
|
|
@ -788,25 +788,29 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
|
|||
new_weight = entity->orig_weight *
|
||||
(bfqq ? bfqq->wr_coeff : 1);
|
||||
/*
|
||||
* If the weight of the entity changes, remove the entity
|
||||
* from its old weight counter (if there is a counter
|
||||
* associated with the entity), and add it to the counter
|
||||
* associated with its new weight.
|
||||
* If the weight of the entity changes, and the entity is a
|
||||
* queue, remove the entity from its old weight counter (if
|
||||
* there is a counter associated with the entity).
|
||||
*/
|
||||
if (prev_weight != new_weight) {
|
||||
root = bfqq ? &bfqd->queue_weights_tree :
|
||||
&bfqd->group_weights_tree;
|
||||
__bfq_weights_tree_remove(bfqd, entity, root);
|
||||
if (bfqq) {
|
||||
root = &bfqd->queue_weights_tree;
|
||||
__bfq_weights_tree_remove(bfqd, bfqq, root);
|
||||
} else
|
||||
bfqd->num_active_groups--;
|
||||
}
|
||||
entity->weight = new_weight;
|
||||
/*
|
||||
* Add the entity to its weights tree only if it is
|
||||
* not associated with a weight-raised queue.
|
||||
* Add the entity, if it is not a weight-raised queue,
|
||||
* to the counter associated with its new weight.
|
||||
*/
|
||||
if (prev_weight != new_weight &&
|
||||
(bfqq ? bfqq->wr_coeff == 1 : 1))
|
||||
/* If we get here, root has been initialized. */
|
||||
bfq_weights_tree_add(bfqd, entity, root);
|
||||
if (prev_weight != new_weight) {
|
||||
if (bfqq && bfqq->wr_coeff == 1) {
|
||||
/* If we get here, root has been initialized. */
|
||||
bfq_weights_tree_add(bfqd, bfqq, root);
|
||||
} else
|
||||
bfqd->num_active_groups++;
|
||||
}
|
||||
|
||||
new_st->wsum += entity->weight;
|
||||
|
||||
|
@ -1012,9 +1016,9 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
|
|||
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
|
||||
struct bfq_group *bfqg =
|
||||
container_of(entity, struct bfq_group, entity);
|
||||
struct bfq_data *bfqd = bfqg->bfqd;
|
||||
|
||||
bfq_weights_tree_add(bfqg->bfqd, entity,
|
||||
&bfqd->group_weights_tree);
|
||||
bfqd->num_active_groups++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1181,10 +1185,17 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
|
|||
st = bfq_entity_service_tree(entity);
|
||||
is_in_service = entity == sd->in_service_entity;
|
||||
|
||||
if (is_in_service) {
|
||||
bfq_calc_finish(entity, entity->service);
|
||||
bfq_calc_finish(entity, entity->service);
|
||||
|
||||
if (is_in_service)
|
||||
sd->in_service_entity = NULL;
|
||||
}
|
||||
else
|
||||
/*
|
||||
* Non in-service entity: nobody will take care of
|
||||
* resetting its service counter on expiration. Do it
|
||||
* now.
|
||||
*/
|
||||
entity->service = 0;
|
||||
|
||||
if (entity->tree == &st->active)
|
||||
bfq_active_extract(st, entity);
|
||||
|
@ -1685,7 +1696,7 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
|||
|
||||
if (!bfqq->dispatched)
|
||||
if (bfqq->wr_coeff == 1)
|
||||
bfq_weights_tree_add(bfqd, &bfqq->entity,
|
||||
bfq_weights_tree_add(bfqd, bfqq,
|
||||
&bfqd->queue_weights_tree);
|
||||
|
||||
if (bfqq->wr_coeff > 1)
|
||||
|
|
|
@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio)
|
|||
if (bio_data_dir(bio) == WRITE) {
|
||||
bio_integrity_process(bio, &bio->bi_iter,
|
||||
bi->profile->generate_fn);
|
||||
} else {
|
||||
bip->bio_iter = bio->bi_iter;
|
||||
}
|
||||
return true;
|
||||
|
||||
|
@ -331,20 +333,14 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
|||
container_of(work, struct bio_integrity_payload, bip_work);
|
||||
struct bio *bio = bip->bip_bio;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
|
||||
struct bvec_iter iter = bio->bi_iter;
|
||||
|
||||
/*
|
||||
* At the moment verify is called bio's iterator was advanced
|
||||
* during split and completion, we need to rewind iterator to
|
||||
* it's original position.
|
||||
*/
|
||||
if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
|
||||
bio->bi_status = bio_integrity_process(bio, &iter,
|
||||
bi->profile->verify_fn);
|
||||
} else {
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
bio->bi_status = bio_integrity_process(bio, &bip->bio_iter,
|
||||
bi->profile->verify_fn);
|
||||
bio_integrity_free(bio);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
|
250
block/bio.c
250
block/bio.c
|
@ -609,7 +609,9 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
|
|||
bio->bi_iter = bio_src->bi_iter;
|
||||
bio->bi_io_vec = bio_src->bi_io_vec;
|
||||
|
||||
bio_clone_blkcg_association(bio, bio_src);
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
}
|
||||
EXPORT_SYMBOL(__bio_clone_fast);
|
||||
|
||||
|
@ -729,7 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
|
|||
}
|
||||
|
||||
/* If we may be able to merge these biovecs, force a recount */
|
||||
if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
|
||||
if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec))
|
||||
bio_clear_flag(bio, BIO_SEG_VALID);
|
||||
|
||||
done:
|
||||
|
@ -827,6 +829,8 @@ int bio_add_page(struct bio *bio, struct page *page,
|
|||
}
|
||||
EXPORT_SYMBOL(bio_add_page);
|
||||
|
||||
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
|
||||
|
||||
/**
|
||||
* __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
|
||||
* @bio: bio to add pages to
|
||||
|
@ -839,38 +843,35 @@ EXPORT_SYMBOL(bio_add_page);
|
|||
*/
|
||||
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||
{
|
||||
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
|
||||
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
|
||||
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
|
||||
struct page **pages = (struct page **)bv;
|
||||
ssize_t size, left;
|
||||
unsigned len, i;
|
||||
size_t offset;
|
||||
ssize_t size;
|
||||
|
||||
/*
|
||||
* Move page array up in the allocated memory for the bio vecs as far as
|
||||
* possible so that we can start filling biovecs from the beginning
|
||||
* without overwriting the temporary page array.
|
||||
*/
|
||||
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
|
||||
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
|
||||
|
||||
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
|
||||
if (unlikely(size <= 0))
|
||||
return size ? size : -EFAULT;
|
||||
idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Deep magic below: We need to walk the pinned pages backwards
|
||||
* because we are abusing the space allocated for the bio_vecs
|
||||
* for the page array. Because the bio_vecs are larger than the
|
||||
* page pointers by definition this will always work. But it also
|
||||
* means we can't use bio_add_page, so any changes to it's semantics
|
||||
* need to be reflected here as well.
|
||||
*/
|
||||
bio->bi_iter.bi_size += size;
|
||||
bio->bi_vcnt += nr_pages;
|
||||
for (left = size, i = 0; left > 0; left -= len, i++) {
|
||||
struct page *page = pages[i];
|
||||
|
||||
while (idx--) {
|
||||
bv[idx].bv_page = pages[idx];
|
||||
bv[idx].bv_len = PAGE_SIZE;
|
||||
bv[idx].bv_offset = 0;
|
||||
len = min_t(size_t, PAGE_SIZE - offset, left);
|
||||
if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len))
|
||||
return -EINVAL;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
bv[0].bv_offset += offset;
|
||||
bv[0].bv_len -= offset;
|
||||
bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
|
||||
|
||||
iov_iter_advance(iter, size);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1807,7 +1808,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
|
|||
bio_integrity_trim(split);
|
||||
|
||||
bio_advance(bio, split->bi_iter.bi_size);
|
||||
bio->bi_iter.bi_done = 0;
|
||||
|
||||
if (bio_flagged(bio, BIO_TRACE_COMPLETION))
|
||||
bio_set_flag(split, BIO_TRACE_COMPLETION);
|
||||
|
@ -1956,71 +1956,153 @@ EXPORT_SYMBOL(bioset_init_from_src);
|
|||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/**
|
||||
* bio_associate_blkcg_from_page - associate a bio with the page's blkcg
|
||||
* @bio: target bio
|
||||
* @page: the page to lookup the blkcg from
|
||||
*
|
||||
* Associate @bio with the blkcg from @page's owning memcg. This works like
|
||||
* every other associate function wrt references.
|
||||
*/
|
||||
int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
|
||||
{
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
|
||||
if (unlikely(bio->bi_css))
|
||||
return -EBUSY;
|
||||
if (!page->mem_cgroup)
|
||||
return 0;
|
||||
blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
|
||||
&io_cgrp_subsys);
|
||||
bio->bi_css = blkcg_css;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
/**
|
||||
* bio_associate_blkcg - associate a bio with the specified blkcg
|
||||
* @bio: target bio
|
||||
* @blkcg_css: css of the blkcg to associate
|
||||
*
|
||||
* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
|
||||
* treat @bio as if it were issued by a task which belongs to the blkcg.
|
||||
*
|
||||
* This function takes an extra reference of @blkcg_css which will be put
|
||||
* when @bio is released. The caller must own @bio and is responsible for
|
||||
* synchronizing calls to this function.
|
||||
*/
|
||||
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
|
||||
{
|
||||
if (unlikely(bio->bi_css))
|
||||
return -EBUSY;
|
||||
css_get(blkcg_css);
|
||||
bio->bi_css = blkcg_css;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkcg);
|
||||
|
||||
/**
|
||||
* bio_associate_blkg - associate a bio with the specified blkg
|
||||
* bio_associate_blkg - associate a bio with the a blkg
|
||||
* @bio: target bio
|
||||
* @blkg: the blkg to associate
|
||||
*
|
||||
* Associate @bio with the blkg specified by @blkg. This is the queue specific
|
||||
* blkcg information associated with the @bio, a reference will be taken on the
|
||||
* @blkg and will be freed when the bio is freed.
|
||||
* This tries to associate @bio with the specified blkg. Association failure
|
||||
* is handled by walking up the blkg tree. Therefore, the blkg associated can
|
||||
* be anything between @blkg and the root_blkg. This situation only happens
|
||||
* when a cgroup is dying and then the remaining bios will spill to the closest
|
||||
* alive blkg.
|
||||
*
|
||||
* A reference will be taken on the @blkg and will be released when @bio is
|
||||
* freed.
|
||||
*/
|
||||
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
|
||||
{
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
if (!blkg_try_get(blkg))
|
||||
return -ENODEV;
|
||||
bio->bi_blkg = blkg;
|
||||
bio->bi_blkg = blkg_tryget_closest(blkg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __bio_associate_blkg_from_css - internal blkg association function
|
||||
*
|
||||
* This in the core association function that all association paths rely on.
|
||||
* A blkg reference is taken which is released upon freeing of the bio.
|
||||
*/
|
||||
static int __bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
struct blkcg_gq *blkg;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
if (!css || !css->parent)
|
||||
blkg = q->root_blkg;
|
||||
else
|
||||
blkg = blkg_lookup_create(css_to_blkcg(css), q);
|
||||
|
||||
ret = bio_associate_blkg(bio, blkg);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_associate_blkg_from_css - associate a bio with a specified css
|
||||
* @bio: target bio
|
||||
* @css: target css
|
||||
*
|
||||
* Associate @bio with the blkg found by combining the css's blkg and the
|
||||
* request_queue of the @bio. This falls back to the queue's root_blkg if
|
||||
* the association fails with the css.
|
||||
*/
|
||||
int bio_associate_blkg_from_css(struct bio *bio,
|
||||
struct cgroup_subsys_state *css)
|
||||
{
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
return __bio_associate_blkg_from_css(bio, css);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/**
|
||||
* bio_associate_blkg_from_page - associate a bio with the page's blkg
|
||||
* @bio: target bio
|
||||
* @page: the page to lookup the blkcg from
|
||||
*
|
||||
* Associate @bio with the blkg from @page's owning memcg and the respective
|
||||
* request_queue. If cgroup_e_css returns NULL, fall back to the queue's
|
||||
* root_blkg.
|
||||
*
|
||||
* Note: this must be called after bio has an associated device.
|
||||
*/
|
||||
int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret;
|
||||
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
if (!page->mem_cgroup)
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
|
||||
|
||||
ret = __bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
/**
|
||||
* bio_associate_create_blkg - associate a bio with a blkg from q
|
||||
* @q: request_queue where bio is going
|
||||
* @bio: target bio
|
||||
*
|
||||
* Associate @bio with the blkg found from the bio's css and the request_queue.
|
||||
* If one is not found, bio_lookup_blkg creates the blkg. This falls back to
|
||||
* the queue's root_blkg if association fails.
|
||||
*/
|
||||
int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
int ret = 0;
|
||||
|
||||
/* someone has already associated this bio with a blkg */
|
||||
if (bio->bi_blkg)
|
||||
return ret;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
css = blkcg_css();
|
||||
|
||||
ret = __bio_associate_blkg_from_css(bio, css);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_reassociate_blkg - reassociate a bio with a blkg from q
|
||||
* @q: request_queue where bio is going
|
||||
* @bio: target bio
|
||||
*
|
||||
* When submitting a bio, multiple recursive calls to make_request() may occur.
|
||||
* This causes the initial associate done in blkcg_bio_issue_check() to be
|
||||
* incorrect and reference the prior request_queue. This performs reassociation
|
||||
* when this situation happens.
|
||||
*/
|
||||
int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
}
|
||||
|
||||
return bio_associate_create_blkg(q, bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_disassociate_task - undo bio_associate_current()
|
||||
* @bio: target bio
|
||||
|
@ -2031,10 +2113,6 @@ void bio_disassociate_task(struct bio *bio)
|
|||
put_io_context(bio->bi_ioc);
|
||||
bio->bi_ioc = NULL;
|
||||
}
|
||||
if (bio->bi_css) {
|
||||
css_put(bio->bi_css);
|
||||
bio->bi_css = NULL;
|
||||
}
|
||||
if (bio->bi_blkg) {
|
||||
blkg_put(bio->bi_blkg);
|
||||
bio->bi_blkg = NULL;
|
||||
|
@ -2042,16 +2120,16 @@ void bio_disassociate_task(struct bio *bio)
|
|||
}
|
||||
|
||||
/**
|
||||
* bio_clone_blkcg_association - clone blkcg association from src to dst bio
|
||||
* bio_clone_blkg_association - clone blkg association from src to dst bio
|
||||
* @dst: destination bio
|
||||
* @src: source bio
|
||||
*/
|
||||
void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
|
||||
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
|
||||
{
|
||||
if (src->bi_css)
|
||||
WARN_ON(bio_associate_blkcg(dst, src->bi_css));
|
||||
if (src->bi_blkg)
|
||||
bio_associate_blkg(dst, src->bi_blkg);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
|
||||
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
|
||||
#endif /* CONFIG_BLK_CGROUP */
|
||||
|
||||
static void __init biovec_init_slabs(void)
|
||||
|
|
|
@ -84,6 +84,37 @@ static void blkg_free(struct blkcg_gq *blkg)
|
|||
kfree(blkg);
|
||||
}
|
||||
|
||||
static void __blkg_release(struct rcu_head *rcu)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
|
||||
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
|
||||
wb_congested_put(blkg->wb_congested);
|
||||
|
||||
blkg_free(blkg);
|
||||
}
|
||||
|
||||
/*
|
||||
* A group is RCU protected, but having an rcu lock does not mean that one
|
||||
* can access all the fields of blkg and assume these are valid. For
|
||||
* example, don't try to follow throtl_data and request queue links.
|
||||
*
|
||||
* Having a reference to blkg under an rcu allows accesses to only values
|
||||
* local to groups like group stats and group rate limits.
|
||||
*/
|
||||
static void blkg_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
|
||||
|
||||
call_rcu(&blkg->rcu_head, __blkg_release);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
|
@ -110,7 +141,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
|||
blkg->q = q;
|
||||
INIT_LIST_HEAD(&blkg->q_node);
|
||||
blkg->blkcg = blkcg;
|
||||
atomic_set(&blkg->refcnt, 1);
|
||||
|
||||
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
|
||||
if (blkcg != &blkcg_root) {
|
||||
|
@ -217,6 +247,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
|||
blkg_get(blkg->parent);
|
||||
}
|
||||
|
||||
ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
if (ret)
|
||||
goto err_cancel_ref;
|
||||
|
||||
/* invoke per-policy init */
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
@ -249,6 +284,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
|
|||
blkg_put(blkg);
|
||||
return ERR_PTR(ret);
|
||||
|
||||
err_cancel_ref:
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
err_put_congested:
|
||||
wb_congested_put(wb_congested);
|
||||
err_put_css:
|
||||
|
@ -259,7 +296,7 @@ err_free_blkg:
|
|||
}
|
||||
|
||||
/**
|
||||
* blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* __blkg_lookup_create - lookup blkg, try to create one if not there
|
||||
* @blkcg: blkcg of interest
|
||||
* @q: request_queue of interest
|
||||
*
|
||||
|
@ -268,12 +305,11 @@ err_free_blkg:
|
|||
* that all non-root blkg's have access to the parent blkg. This function
|
||||
* should be called under RCU read lock and @q->queue_lock.
|
||||
*
|
||||
* Returns pointer to the looked up or created blkg on success, ERR_PTR()
|
||||
* value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
|
||||
* dead and bypassing, returns ERR_PTR(-EBUSY).
|
||||
* Returns the blkg or the closest blkg if blkg_create fails as it walks
|
||||
* down from root.
|
||||
*/
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
|
@ -285,7 +321,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|||
* we shouldn't allow anything to go through for a bypassing queue.
|
||||
*/
|
||||
if (unlikely(blk_queue_bypass(q)))
|
||||
return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
|
||||
return q->root_blkg;
|
||||
|
||||
blkg = __blkg_lookup(blkcg, q, true);
|
||||
if (blkg)
|
||||
|
@ -293,23 +329,58 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|||
|
||||
/*
|
||||
* Create blkgs walking down from blkcg_root to @blkcg, so that all
|
||||
* non-root blkgs have access to their parents.
|
||||
* non-root blkgs have access to their parents. Returns the closest
|
||||
* blkg to the intended blkg should blkg_create() fail.
|
||||
*/
|
||||
while (true) {
|
||||
struct blkcg *pos = blkcg;
|
||||
struct blkcg *parent = blkcg_parent(blkcg);
|
||||
struct blkcg_gq *ret_blkg = q->root_blkg;
|
||||
|
||||
while (parent && !__blkg_lookup(parent, q, false)) {
|
||||
while (parent) {
|
||||
blkg = __blkg_lookup(parent, q, false);
|
||||
if (blkg) {
|
||||
/* remember closest blkg */
|
||||
ret_blkg = blkg;
|
||||
break;
|
||||
}
|
||||
pos = parent;
|
||||
parent = blkcg_parent(parent);
|
||||
}
|
||||
|
||||
blkg = blkg_create(pos, q, NULL);
|
||||
if (pos == blkcg || IS_ERR(blkg))
|
||||
if (IS_ERR(blkg))
|
||||
return ret_blkg;
|
||||
if (pos == blkcg)
|
||||
return blkg;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_lookup_create - find or create a blkg
|
||||
* @blkcg: target block cgroup
|
||||
* @q: target request_queue
|
||||
*
|
||||
* This looks up or creates the blkg representing the unique pair
|
||||
* of the blkcg and the request_queue.
|
||||
*/
|
||||
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
||||
struct request_queue *q)
|
||||
{
|
||||
struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
|
||||
unsigned long flags;
|
||||
|
||||
if (unlikely(!blkg)) {
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
|
||||
blkg = __blkg_lookup_create(blkcg, q);
|
||||
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
|
||||
return blkg;
|
||||
}
|
||||
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
|
@ -353,7 +424,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
|||
* Put the reference taken at the time of creation so that when all
|
||||
* queues are gone, group can be destroyed.
|
||||
*/
|
||||
blkg_put(blkg);
|
||||
percpu_ref_kill(&blkg->refcnt);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -380,29 +451,6 @@ static void blkg_destroy_all(struct request_queue *q)
|
|||
q->root_rl.blkg = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* A group is RCU protected, but having an rcu lock does not mean that one
|
||||
* can access all the fields of blkg and assume these are valid. For
|
||||
* example, don't try to follow throtl_data and request queue links.
|
||||
*
|
||||
* Having a reference to blkg under an rcu allows accesses to only values
|
||||
* local to groups like group stats and group rate limits.
|
||||
*/
|
||||
void __blkg_release_rcu(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
|
||||
wb_congested_put(blkg->wb_congested);
|
||||
|
||||
blkg_free(blkg);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
|
||||
|
||||
/*
|
||||
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
|
||||
* because the root blkg uses @q->root_rl instead of its own rl.
|
||||
|
@ -1748,8 +1796,7 @@ void blkcg_maybe_throttle_current(void)
|
|||
blkg = blkg_lookup(blkcg, q);
|
||||
if (!blkg)
|
||||
goto out;
|
||||
blkg = blkg_try_get(blkg);
|
||||
if (!blkg)
|
||||
if (!blkg_tryget(blkg))
|
||||
goto out;
|
||||
rcu_read_unlock();
|
||||
|
||||
|
|
276
block/blk-core.c
276
block/blk-core.c
|
@ -42,6 +42,7 @@
|
|||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
@ -421,24 +422,25 @@ void blk_sync_queue(struct request_queue *q)
|
|||
EXPORT_SYMBOL(blk_sync_queue);
|
||||
|
||||
/**
|
||||
* blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
|
||||
* blk_set_pm_only - increment pm_only counter
|
||||
* @q: request queue pointer
|
||||
*
|
||||
* Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
|
||||
* set and 1 if the flag was already set.
|
||||
*/
|
||||
int blk_set_preempt_only(struct request_queue *q)
|
||||
void blk_set_pm_only(struct request_queue *q)
|
||||
{
|
||||
return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
atomic_inc(&q->pm_only);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_set_preempt_only);
|
||||
EXPORT_SYMBOL_GPL(blk_set_pm_only);
|
||||
|
||||
void blk_clear_preempt_only(struct request_queue *q)
|
||||
void blk_clear_pm_only(struct request_queue *q)
|
||||
{
|
||||
blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
int pm_only;
|
||||
|
||||
pm_only = atomic_dec_return(&q->pm_only);
|
||||
WARN_ON_ONCE(pm_only < 0);
|
||||
if (pm_only == 0)
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
|
||||
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
|
||||
|
||||
/**
|
||||
* __blk_run_queue_uncond - run a queue whether or not it has been stopped
|
||||
|
@ -917,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue);
|
|||
*/
|
||||
int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
||||
{
|
||||
const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
|
||||
const bool pm = flags & BLK_MQ_REQ_PREEMPT;
|
||||
|
||||
while (true) {
|
||||
bool success = false;
|
||||
|
@ -925,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
|||
rcu_read_lock();
|
||||
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
|
||||
/*
|
||||
* The code that sets the PREEMPT_ONLY flag is
|
||||
* responsible for ensuring that that flag is globally
|
||||
* visible before the queue is unfrozen.
|
||||
* The code that increments the pm_only counter is
|
||||
* responsible for ensuring that that counter is
|
||||
* globally visible before the queue is unfrozen.
|
||||
*/
|
||||
if (preempt || !blk_queue_preempt_only(q)) {
|
||||
if (pm || !blk_queue_pm_only(q)) {
|
||||
success = true;
|
||||
} else {
|
||||
percpu_ref_put(&q->q_usage_counter);
|
||||
|
@ -954,7 +956,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
|
|||
|
||||
wait_event(q->mq_freeze_wq,
|
||||
(atomic_read(&q->mq_freeze_depth) == 0 &&
|
||||
(preempt || !blk_queue_preempt_only(q))) ||
|
||||
(pm || (blk_pm_request_resume(q),
|
||||
!blk_queue_pm_only(q)))) ||
|
||||
blk_queue_dying(q));
|
||||
if (blk_queue_dying(q))
|
||||
return -ENODEV;
|
||||
|
@ -1051,8 +1054,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
|
|||
mutex_init(&q->sysfs_lock);
|
||||
spin_lock_init(&q->__queue_lock);
|
||||
|
||||
if (!q->mq_ops)
|
||||
q->queue_lock = lock ? : &q->__queue_lock;
|
||||
q->queue_lock = lock ? : &q->__queue_lock;
|
||||
|
||||
/*
|
||||
* A queue starts its life with bypass turned on to avoid
|
||||
|
@ -1160,7 +1162,7 @@ int blk_init_allocated_queue(struct request_queue *q)
|
|||
{
|
||||
WARN_ON_ONCE(q->mq_ops);
|
||||
|
||||
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
|
||||
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size, GFP_KERNEL);
|
||||
if (!q->fq)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1726,16 +1728,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(part_round_stats);
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static void blk_pm_put_request(struct request *rq)
|
||||
{
|
||||
if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
|
||||
pm_runtime_mark_last_busy(rq->q->dev);
|
||||
}
|
||||
#else
|
||||
static inline void blk_pm_put_request(struct request *rq) {}
|
||||
#endif
|
||||
|
||||
void __blk_put_request(struct request_queue *q, struct request *req)
|
||||
{
|
||||
req_flags_t rq_flags = req->rq_flags;
|
||||
|
@ -1752,6 +1744,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
|||
|
||||
blk_req_zone_write_unlock(req);
|
||||
blk_pm_put_request(req);
|
||||
blk_pm_mark_last_busy(req);
|
||||
|
||||
elv_completed_request(q, req);
|
||||
|
||||
|
@ -2440,6 +2433,7 @@ blk_qc_t generic_make_request(struct bio *bio)
|
|||
if (q)
|
||||
blk_queue_exit(q);
|
||||
q = bio->bi_disk->queue;
|
||||
bio_reassociate_blkg(q, bio);
|
||||
flags = 0;
|
||||
if (bio->bi_opf & REQ_NOWAIT)
|
||||
flags = BLK_MQ_REQ_NOWAIT;
|
||||
|
@ -2750,30 +2744,6 @@ void blk_account_io_done(struct request *req, u64 now)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
/*
|
||||
* Don't process normal requests when queue is suspended
|
||||
* or in the process of suspending/resuming
|
||||
*/
|
||||
static bool blk_pm_allow_request(struct request *rq)
|
||||
{
|
||||
switch (rq->q->rpm_status) {
|
||||
case RPM_RESUMING:
|
||||
case RPM_SUSPENDING:
|
||||
return rq->rq_flags & RQF_PM;
|
||||
case RPM_SUSPENDED:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static bool blk_pm_allow_request(struct request *rq)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void blk_account_io_start(struct request *rq, bool new_io)
|
||||
{
|
||||
struct hd_struct *part;
|
||||
|
@ -2819,11 +2789,14 @@ static struct request *elv_next_request(struct request_queue *q)
|
|||
|
||||
while (1) {
|
||||
list_for_each_entry(rq, &q->queue_head, queuelist) {
|
||||
if (blk_pm_allow_request(rq))
|
||||
return rq;
|
||||
|
||||
if (rq->rq_flags & RQF_SOFTBARRIER)
|
||||
break;
|
||||
#ifdef CONFIG_PM
|
||||
/*
|
||||
* If a request gets queued in state RPM_SUSPENDED
|
||||
* then that's a kernel bug.
|
||||
*/
|
||||
WARN_ON_ONCE(q->rpm_status == RPM_SUSPENDED);
|
||||
#endif
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3755,191 +3728,6 @@ void blk_finish_plug(struct blk_plug *plug)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_finish_plug);
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
/**
|
||||
* blk_pm_runtime_init - Block layer runtime PM initialization routine
|
||||
* @q: the queue of the device
|
||||
* @dev: the device the queue belongs to
|
||||
*
|
||||
* Description:
|
||||
* Initialize runtime-PM-related fields for @q and start auto suspend for
|
||||
* @dev. Drivers that want to take advantage of request-based runtime PM
|
||||
* should call this function after @dev has been initialized, and its
|
||||
* request queue @q has been allocated, and runtime PM for it can not happen
|
||||
* yet(either due to disabled/forbidden or its usage_count > 0). In most
|
||||
* cases, driver should call this function before any I/O has taken place.
|
||||
*
|
||||
* This function takes care of setting up using auto suspend for the device,
|
||||
* the autosuspend delay is set to -1 to make runtime suspend impossible
|
||||
* until an updated value is either set by user or by driver. Drivers do
|
||||
* not need to touch other autosuspend settings.
|
||||
*
|
||||
* The block layer runtime PM is request based, so only works for drivers
|
||||
* that use request as their IO unit instead of those directly use bio's.
|
||||
*/
|
||||
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
|
||||
{
|
||||
/* Don't enable runtime PM for blk-mq until it is ready */
|
||||
if (q->mq_ops) {
|
||||
pm_runtime_disable(dev);
|
||||
return;
|
||||
}
|
||||
|
||||
q->dev = dev;
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_set_autosuspend_delay(q->dev, -1);
|
||||
pm_runtime_use_autosuspend(q->dev);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pm_runtime_init);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_suspend - Pre runtime suspend check
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* This function will check if runtime suspend is allowed for the device
|
||||
* by examining if there are any requests pending in the queue. If there
|
||||
* are requests pending, the device can not be runtime suspended; otherwise,
|
||||
* the queue's status will be updated to SUSPENDING and the driver can
|
||||
* proceed to suspend the device.
|
||||
*
|
||||
* For the not allowed case, we mark last busy for the device so that
|
||||
* runtime PM core will try to autosuspend it some time later.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_suspend callback.
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK to runtime suspend the device
|
||||
* -EBUSY - Device should not be runtime suspended
|
||||
*/
|
||||
int blk_pre_runtime_suspend(struct request_queue *q)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!q->dev)
|
||||
return ret;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (q->nr_pending) {
|
||||
ret = -EBUSY;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
} else {
|
||||
q->rpm_status = RPM_SUSPENDING;
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_suspend - Post runtime suspend processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_suspend function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime suspend function and mark last busy for the device so
|
||||
* that PM core will try to auto suspend the device at a later time.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_suspend callback.
|
||||
*/
|
||||
void blk_post_runtime_suspend(struct request_queue *q, int err)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
} else {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_resume - Pre runtime resume processing
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status to RESUMING in preparation for the
|
||||
* runtime resume of the device.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_pre_runtime_resume(struct request_queue *q)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->rpm_status = RPM_RESUMING;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_resume);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_resume - Post runtime resume processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_resume function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime_resume function. If it is successfully resumed, process
|
||||
* the requests that are queued into the device's queue when it is resuming
|
||||
* and then mark last busy and initiate autosuspend for it.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_post_runtime_resume(struct request_queue *q, int err)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
__blk_run_queue(q);
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
} else {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_resume);
|
||||
|
||||
/**
|
||||
* blk_set_runtime_active - Force runtime status of the queue to be active
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* If the device is left runtime suspended during system suspend the resume
|
||||
* hook typically resumes the device and corrects runtime status
|
||||
* accordingly. However, that does not affect the queue runtime PM status
|
||||
* which is still "suspended". This prevents processing requests from the
|
||||
* queue.
|
||||
*
|
||||
* This function can be used in driver's resume hook to correct queue
|
||||
* runtime PM status and re-enable peeking requests from the queue. It
|
||||
* should be called before first request is added to the queue.
|
||||
*/
|
||||
void blk_set_runtime_active(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_runtime_active);
|
||||
#endif
|
||||
|
||||
int __init blk_dev_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
|
||||
|
|
|
@ -566,12 +566,12 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
|||
EXPORT_SYMBOL(blkdev_issue_flush);
|
||||
|
||||
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
|
||||
int node, int cmd_size)
|
||||
int node, int cmd_size, gfp_t flags)
|
||||
{
|
||||
struct blk_flush_queue *fq;
|
||||
int rq_sz = sizeof(struct request);
|
||||
|
||||
fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
|
||||
fq = kzalloc_node(sizeof(*fq), flags, node);
|
||||
if (!fq)
|
||||
goto fail;
|
||||
|
||||
|
@ -579,7 +579,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
|
|||
spin_lock_init(&fq->mq_flush_lock);
|
||||
|
||||
rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
|
||||
fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
|
||||
fq->flush_rq = kzalloc_node(rq_sz, flags, node);
|
||||
if (!fq->flush_rq)
|
||||
goto fail_rq;
|
||||
|
||||
|
|
|
@ -49,12 +49,8 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
|
|||
bio_for_each_integrity_vec(iv, bio, iter) {
|
||||
|
||||
if (prev) {
|
||||
if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
|
||||
if (!biovec_phys_mergeable(q, &ivprv, &iv))
|
||||
goto new_segment;
|
||||
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
|
||||
goto new_segment;
|
||||
|
||||
if (seg_size + iv.bv_len > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
|
||||
|
@ -95,12 +91,8 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
|
|||
bio_for_each_integrity_vec(iv, bio, iter) {
|
||||
|
||||
if (prev) {
|
||||
if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
|
||||
if (!biovec_phys_mergeable(q, &ivprv, &iv))
|
||||
goto new_segment;
|
||||
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
|
||||
goto new_segment;
|
||||
|
||||
if (sg->length + iv.bv_len > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
|
||||
|
|
|
@ -115,9 +115,22 @@ struct child_latency_info {
|
|||
atomic_t scale_cookie;
|
||||
};
|
||||
|
||||
struct percentile_stats {
|
||||
u64 total;
|
||||
u64 missed;
|
||||
};
|
||||
|
||||
struct latency_stat {
|
||||
union {
|
||||
struct percentile_stats ps;
|
||||
struct blk_rq_stat rqs;
|
||||
};
|
||||
};
|
||||
|
||||
struct iolatency_grp {
|
||||
struct blkg_policy_data pd;
|
||||
struct blk_rq_stat __percpu *stats;
|
||||
struct latency_stat __percpu *stats;
|
||||
struct latency_stat cur_stat;
|
||||
struct blk_iolatency *blkiolat;
|
||||
struct rq_depth rq_depth;
|
||||
struct rq_wait rq_wait;
|
||||
|
@ -132,6 +145,7 @@ struct iolatency_grp {
|
|||
/* Our current number of IO's for the last summation. */
|
||||
u64 nr_samples;
|
||||
|
||||
bool ssd;
|
||||
struct child_latency_info child_lat;
|
||||
};
|
||||
|
||||
|
@ -172,6 +186,80 @@ static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
|
|||
return pd_to_blkg(&iolat->pd);
|
||||
}
|
||||
|
||||
static inline void latency_stat_init(struct iolatency_grp *iolat,
|
||||
struct latency_stat *stat)
|
||||
{
|
||||
if (iolat->ssd) {
|
||||
stat->ps.total = 0;
|
||||
stat->ps.missed = 0;
|
||||
} else
|
||||
blk_rq_stat_init(&stat->rqs);
|
||||
}
|
||||
|
||||
static inline void latency_stat_sum(struct iolatency_grp *iolat,
|
||||
struct latency_stat *sum,
|
||||
struct latency_stat *stat)
|
||||
{
|
||||
if (iolat->ssd) {
|
||||
sum->ps.total += stat->ps.total;
|
||||
sum->ps.missed += stat->ps.missed;
|
||||
} else
|
||||
blk_rq_stat_sum(&sum->rqs, &stat->rqs);
|
||||
}
|
||||
|
||||
static inline void latency_stat_record_time(struct iolatency_grp *iolat,
|
||||
u64 req_time)
|
||||
{
|
||||
struct latency_stat *stat = get_cpu_ptr(iolat->stats);
|
||||
if (iolat->ssd) {
|
||||
if (req_time >= iolat->min_lat_nsec)
|
||||
stat->ps.missed++;
|
||||
stat->ps.total++;
|
||||
} else
|
||||
blk_rq_stat_add(&stat->rqs, req_time);
|
||||
put_cpu_ptr(stat);
|
||||
}
|
||||
|
||||
static inline bool latency_sum_ok(struct iolatency_grp *iolat,
|
||||
struct latency_stat *stat)
|
||||
{
|
||||
if (iolat->ssd) {
|
||||
u64 thresh = div64_u64(stat->ps.total, 10);
|
||||
thresh = max(thresh, 1ULL);
|
||||
return stat->ps.missed < thresh;
|
||||
}
|
||||
return stat->rqs.mean <= iolat->min_lat_nsec;
|
||||
}
|
||||
|
||||
static inline u64 latency_stat_samples(struct iolatency_grp *iolat,
|
||||
struct latency_stat *stat)
|
||||
{
|
||||
if (iolat->ssd)
|
||||
return stat->ps.total;
|
||||
return stat->rqs.nr_samples;
|
||||
}
|
||||
|
||||
static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
|
||||
struct latency_stat *stat)
|
||||
{
|
||||
int exp_idx;
|
||||
|
||||
if (iolat->ssd)
|
||||
return;
|
||||
|
||||
/*
|
||||
* CALC_LOAD takes in a number stored in fixed point representation.
|
||||
* Because we are using this for IO time in ns, the values stored
|
||||
* are significantly larger than the FIXED_1 denominator (2048).
|
||||
* Therefore, rounding errors in the calculation are negligible and
|
||||
* can be ignored.
|
||||
*/
|
||||
exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
|
||||
div64_u64(iolat->cur_win_nsec,
|
||||
BLKIOLATENCY_EXP_BUCKET_SIZE));
|
||||
CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean);
|
||||
}
|
||||
|
||||
static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
|
||||
wait_queue_entry_t *wait,
|
||||
bool first_block)
|
||||
|
@ -255,7 +343,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
|
|||
struct child_latency_info *lat_info,
|
||||
bool up)
|
||||
{
|
||||
unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
|
||||
unsigned long qd = blkiolat->rqos.q->nr_requests;
|
||||
unsigned long scale = scale_amount(qd, up);
|
||||
unsigned long old = atomic_read(&lat_info->scale_cookie);
|
||||
unsigned long max_scale = qd << 1;
|
||||
|
@ -295,10 +383,9 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
|
|||
*/
|
||||
static void scale_change(struct iolatency_grp *iolat, bool up)
|
||||
{
|
||||
unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
|
||||
unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
|
||||
unsigned long scale = scale_amount(qd, up);
|
||||
unsigned long old = iolat->rq_depth.max_depth;
|
||||
bool changed = false;
|
||||
|
||||
if (old > qd)
|
||||
old = qd;
|
||||
|
@ -308,15 +395,13 @@ static void scale_change(struct iolatency_grp *iolat, bool up)
|
|||
return;
|
||||
|
||||
if (old < qd) {
|
||||
changed = true;
|
||||
old += scale;
|
||||
old = min(old, qd);
|
||||
iolat->rq_depth.max_depth = old;
|
||||
wake_up_all(&iolat->rq_wait.wait);
|
||||
}
|
||||
} else if (old > 1) {
|
||||
} else {
|
||||
old >>= 1;
|
||||
changed = true;
|
||||
iolat->rq_depth.max_depth = max(old, 1UL);
|
||||
}
|
||||
}
|
||||
|
@ -369,7 +454,7 @@ static void check_scale_change(struct iolatency_grp *iolat)
|
|||
* scale down event.
|
||||
*/
|
||||
samples_thresh = lat_info->nr_samples * 5;
|
||||
samples_thresh = div64_u64(samples_thresh, 100);
|
||||
samples_thresh = max(1ULL, div64_u64(samples_thresh, 100));
|
||||
if (iolat->nr_samples <= samples_thresh)
|
||||
return;
|
||||
}
|
||||
|
@ -395,34 +480,12 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
|
|||
spinlock_t *lock)
|
||||
{
|
||||
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
struct request_queue *q = rqos->q;
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
bool issue_as_root = bio_issue_as_root_blkg(bio);
|
||||
|
||||
if (!blk_iolatency_enabled(blkiolat))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
blkcg = bio_blkcg(bio);
|
||||
bio_associate_blkcg(bio, &blkcg->css);
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
if (unlikely(!blkg)) {
|
||||
if (!lock)
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blkg = blkg_lookup_create(blkcg, q);
|
||||
if (IS_ERR(blkg))
|
||||
blkg = NULL;
|
||||
if (!lock)
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
if (!blkg)
|
||||
goto out;
|
||||
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
bio_associate_blkg(bio, blkg);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
while (blkg && blkg->parent) {
|
||||
struct iolatency_grp *iolat = blkg_to_lat(blkg);
|
||||
if (!iolat) {
|
||||
|
@ -443,7 +506,6 @@ static void iolatency_record_time(struct iolatency_grp *iolat,
|
|||
struct bio_issue *issue, u64 now,
|
||||
bool issue_as_root)
|
||||
{
|
||||
struct blk_rq_stat *rq_stat;
|
||||
u64 start = bio_issue_time(issue);
|
||||
u64 req_time;
|
||||
|
||||
|
@ -469,9 +531,7 @@ static void iolatency_record_time(struct iolatency_grp *iolat,
|
|||
return;
|
||||
}
|
||||
|
||||
rq_stat = get_cpu_ptr(iolat->stats);
|
||||
blk_rq_stat_add(rq_stat, req_time);
|
||||
put_cpu_ptr(rq_stat);
|
||||
latency_stat_record_time(iolat, req_time);
|
||||
}
|
||||
|
||||
#define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
|
||||
|
@ -482,17 +542,17 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
|
|||
struct blkcg_gq *blkg = lat_to_blkg(iolat);
|
||||
struct iolatency_grp *parent;
|
||||
struct child_latency_info *lat_info;
|
||||
struct blk_rq_stat stat;
|
||||
struct latency_stat stat;
|
||||
unsigned long flags;
|
||||
int cpu, exp_idx;
|
||||
int cpu;
|
||||
|
||||
blk_rq_stat_init(&stat);
|
||||
latency_stat_init(iolat, &stat);
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct blk_rq_stat *s;
|
||||
struct latency_stat *s;
|
||||
s = per_cpu_ptr(iolat->stats, cpu);
|
||||
blk_rq_stat_sum(&stat, s);
|
||||
blk_rq_stat_init(s);
|
||||
latency_stat_sum(iolat, &stat, s);
|
||||
latency_stat_init(iolat, s);
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
|
@ -502,41 +562,36 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
|
|||
|
||||
lat_info = &parent->child_lat;
|
||||
|
||||
/*
|
||||
* CALC_LOAD takes in a number stored in fixed point representation.
|
||||
* Because we are using this for IO time in ns, the values stored
|
||||
* are significantly larger than the FIXED_1 denominator (2048).
|
||||
* Therefore, rounding errors in the calculation are negligible and
|
||||
* can be ignored.
|
||||
*/
|
||||
exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
|
||||
div64_u64(iolat->cur_win_nsec,
|
||||
BLKIOLATENCY_EXP_BUCKET_SIZE));
|
||||
CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat.mean);
|
||||
iolat_update_total_lat_avg(iolat, &stat);
|
||||
|
||||
/* Everything is ok and we don't need to adjust the scale. */
|
||||
if (stat.mean <= iolat->min_lat_nsec &&
|
||||
if (latency_sum_ok(iolat, &stat) &&
|
||||
atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
|
||||
return;
|
||||
|
||||
/* Somebody beat us to the punch, just bail. */
|
||||
spin_lock_irqsave(&lat_info->lock, flags);
|
||||
|
||||
latency_stat_sum(iolat, &iolat->cur_stat, &stat);
|
||||
lat_info->nr_samples -= iolat->nr_samples;
|
||||
lat_info->nr_samples += stat.nr_samples;
|
||||
iolat->nr_samples = stat.nr_samples;
|
||||
lat_info->nr_samples += latency_stat_samples(iolat, &iolat->cur_stat);
|
||||
iolat->nr_samples = latency_stat_samples(iolat, &iolat->cur_stat);
|
||||
|
||||
if ((lat_info->last_scale_event >= now ||
|
||||
now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
|
||||
lat_info->scale_lat <= iolat->min_lat_nsec)
|
||||
now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME))
|
||||
goto out;
|
||||
|
||||
if (stat.mean <= iolat->min_lat_nsec &&
|
||||
stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
|
||||
if (latency_sum_ok(iolat, &iolat->cur_stat) &&
|
||||
latency_sum_ok(iolat, &stat)) {
|
||||
if (latency_stat_samples(iolat, &iolat->cur_stat) <
|
||||
BLKIOLATENCY_MIN_GOOD_SAMPLES)
|
||||
goto out;
|
||||
if (lat_info->scale_grp == iolat) {
|
||||
lat_info->last_scale_event = now;
|
||||
scale_cookie_change(iolat->blkiolat, lat_info, true);
|
||||
}
|
||||
} else if (stat.mean > iolat->min_lat_nsec) {
|
||||
} else if (lat_info->scale_lat == 0 ||
|
||||
lat_info->scale_lat >= iolat->min_lat_nsec) {
|
||||
lat_info->last_scale_event = now;
|
||||
if (!lat_info->scale_grp ||
|
||||
lat_info->scale_lat > iolat->min_lat_nsec) {
|
||||
|
@ -545,6 +600,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
|
|||
}
|
||||
scale_cookie_change(iolat->blkiolat, lat_info, false);
|
||||
}
|
||||
latency_stat_init(iolat, &iolat->cur_stat);
|
||||
out:
|
||||
spin_unlock_irqrestore(&lat_info->lock, flags);
|
||||
}
|
||||
|
@ -650,7 +706,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
|
|||
* We could be exiting, don't access the pd unless we have a
|
||||
* ref on the blkg.
|
||||
*/
|
||||
if (!blkg_try_get(blkg))
|
||||
if (!blkg_tryget(blkg))
|
||||
continue;
|
||||
|
||||
iolat = blkg_to_lat(blkg);
|
||||
|
@ -761,7 +817,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(of_css(of));
|
||||
struct blkcg_gq *blkg;
|
||||
struct blk_iolatency *blkiolat;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct iolatency_grp *iolat;
|
||||
char *p, *tok;
|
||||
|
@ -774,7 +829,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
|
|||
return ret;
|
||||
|
||||
iolat = blkg_to_lat(ctx.blkg);
|
||||
blkiolat = iolat->blkiolat;
|
||||
p = ctx.body;
|
||||
|
||||
ret = -EINVAL;
|
||||
|
@ -835,13 +889,43 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static size_t iolatency_ssd_stat(struct iolatency_grp *iolat, char *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct latency_stat stat;
|
||||
int cpu;
|
||||
|
||||
latency_stat_init(iolat, &stat);
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct latency_stat *s;
|
||||
s = per_cpu_ptr(iolat->stats, cpu);
|
||||
latency_stat_sum(iolat, &stat, s);
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
if (iolat->rq_depth.max_depth == UINT_MAX)
|
||||
return scnprintf(buf, size, " missed=%llu total=%llu depth=max",
|
||||
(unsigned long long)stat.ps.missed,
|
||||
(unsigned long long)stat.ps.total);
|
||||
return scnprintf(buf, size, " missed=%llu total=%llu depth=%u",
|
||||
(unsigned long long)stat.ps.missed,
|
||||
(unsigned long long)stat.ps.total,
|
||||
iolat->rq_depth.max_depth);
|
||||
}
|
||||
|
||||
static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct iolatency_grp *iolat = pd_to_lat(pd);
|
||||
unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
|
||||
unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
|
||||
unsigned long long avg_lat;
|
||||
unsigned long long cur_win;
|
||||
|
||||
if (iolat->ssd)
|
||||
return iolatency_ssd_stat(iolat, buf, size);
|
||||
|
||||
avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
|
||||
cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
|
||||
if (iolat->rq_depth.max_depth == UINT_MAX)
|
||||
return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
|
||||
avg_lat, cur_win);
|
||||
|
@ -858,8 +942,8 @@ static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
|
|||
iolat = kzalloc_node(sizeof(*iolat), gfp, node);
|
||||
if (!iolat)
|
||||
return NULL;
|
||||
iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
|
||||
__alignof__(struct blk_rq_stat), gfp);
|
||||
iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
|
||||
__alignof__(struct latency_stat), gfp);
|
||||
if (!iolat->stats) {
|
||||
kfree(iolat);
|
||||
return NULL;
|
||||
|
@ -876,15 +960,21 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
|
|||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
if (blk_queue_nonrot(blkg->q))
|
||||
iolat->ssd = true;
|
||||
else
|
||||
iolat->ssd = false;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct blk_rq_stat *stat;
|
||||
struct latency_stat *stat;
|
||||
stat = per_cpu_ptr(iolat->stats, cpu);
|
||||
blk_rq_stat_init(stat);
|
||||
latency_stat_init(iolat, stat);
|
||||
}
|
||||
|
||||
latency_stat_init(iolat, &iolat->cur_stat);
|
||||
rq_wait_init(&iolat->rq_wait);
|
||||
spin_lock_init(&iolat->child_lat.lock);
|
||||
iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
|
||||
iolat->rq_depth.queue_depth = blkg->q->nr_requests;
|
||||
iolat->rq_depth.max_depth = UINT_MAX;
|
||||
iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
|
||||
iolat->blkiolat = blkiolat;
|
||||
|
|
|
@ -12,6 +12,69 @@
|
|||
|
||||
#include "blk.h"
|
||||
|
||||
/*
|
||||
* Check if the two bvecs from two bios can be merged to one segment. If yes,
|
||||
* no need to check gap between the two bios since the 1st bio and the 1st bvec
|
||||
* in the 2nd bio can be handled in one segment.
|
||||
*/
|
||||
static inline bool bios_segs_mergeable(struct request_queue *q,
|
||||
struct bio *prev, struct bio_vec *prev_last_bv,
|
||||
struct bio_vec *next_first_bv)
|
||||
{
|
||||
if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv))
|
||||
return false;
|
||||
if (prev->bi_seg_back_size + next_first_bv->bv_len >
|
||||
queue_max_segment_size(q))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool bio_will_gap(struct request_queue *q,
|
||||
struct request *prev_rq, struct bio *prev, struct bio *next)
|
||||
{
|
||||
struct bio_vec pb, nb;
|
||||
|
||||
if (!bio_has_data(prev) || !queue_virt_boundary(q))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Don't merge if the 1st bio starts with non-zero offset, otherwise it
|
||||
* is quite difficult to respect the sg gap limit. We work hard to
|
||||
* merge a huge number of small single bios in case of mkfs.
|
||||
*/
|
||||
if (prev_rq)
|
||||
bio_get_first_bvec(prev_rq->bio, &pb);
|
||||
else
|
||||
bio_get_first_bvec(prev, &pb);
|
||||
if (pb.bv_offset)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* We don't need to worry about the situation that the merged segment
|
||||
* ends in unaligned virt boundary:
|
||||
*
|
||||
* - if 'pb' ends aligned, the merged segment ends aligned
|
||||
* - if 'pb' ends unaligned, the next bio must include
|
||||
* one single bvec of 'nb', otherwise the 'nb' can't
|
||||
* merge with 'pb'
|
||||
*/
|
||||
bio_get_last_bvec(prev, &pb);
|
||||
bio_get_first_bvec(next, &nb);
|
||||
if (bios_segs_mergeable(q, prev, &pb, &nb))
|
||||
return false;
|
||||
return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
|
||||
}
|
||||
|
||||
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
|
||||
{
|
||||
return bio_will_gap(req->q, req, req->biotail, bio);
|
||||
}
|
||||
|
||||
static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
|
||||
{
|
||||
return bio_will_gap(req->q, NULL, bio, req->bio);
|
||||
}
|
||||
|
||||
static struct bio *blk_bio_discard_split(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct bio_set *bs,
|
||||
|
@ -134,9 +197,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
|||
if (bvprvp && blk_queue_cluster(q)) {
|
||||
if (seg_size + bv.bv_len > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv))
|
||||
goto new_segment;
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv))
|
||||
if (!biovec_phys_mergeable(q, bvprvp, &bv))
|
||||
goto new_segment;
|
||||
|
||||
seg_size += bv.bv_len;
|
||||
|
@ -267,9 +328,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
|
|||
if (seg_size + bv.bv_len
|
||||
> queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
|
||||
goto new_segment;
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
|
||||
if (!biovec_phys_mergeable(q, &bvprv, &bv))
|
||||
goto new_segment;
|
||||
|
||||
seg_size += bv.bv_len;
|
||||
|
@ -349,17 +408,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
|
|||
bio_get_last_bvec(bio, &end_bv);
|
||||
bio_get_first_bvec(nxt, &nxt_bv);
|
||||
|
||||
if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* bio and nxt are contiguous in memory; check if the queue allows
|
||||
* these two to be merged into one
|
||||
*/
|
||||
if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
return biovec_phys_mergeable(q, &end_bv, &nxt_bv);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -373,10 +422,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
|
|||
if (*sg && *cluster) {
|
||||
if ((*sg)->length + nbytes > queue_max_segment_size(q))
|
||||
goto new_segment;
|
||||
|
||||
if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
|
||||
goto new_segment;
|
||||
if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
|
||||
if (!biovec_phys_mergeable(q, bvprv, bvec))
|
||||
goto new_segment;
|
||||
|
||||
(*sg)->length += nbytes;
|
||||
|
|
|
@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int queue_pm_only_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
|
||||
seq_printf(m, "%d\n", atomic_read(&q->pm_only));
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
|
||||
static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(QUEUED),
|
||||
|
@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = {
|
|||
QUEUE_FLAG_NAME(REGISTERED),
|
||||
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
|
||||
QUEUE_FLAG_NAME(QUIESCED),
|
||||
QUEUE_FLAG_NAME(PREEMPT_ONLY),
|
||||
};
|
||||
#undef QUEUE_FLAG_NAME
|
||||
|
||||
|
@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
|
|||
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
|
||||
{ "poll_stat", 0400, queue_poll_stat_show },
|
||||
{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
|
||||
{ "pm_only", 0600, queue_pm_only_show, NULL },
|
||||
{ "state", 0600, queue_state_show, queue_state_write },
|
||||
{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
|
||||
{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
|
||||
|
@ -423,8 +431,7 @@ static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
|
|||
{
|
||||
const struct show_busy_params *params = data;
|
||||
|
||||
if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
|
||||
blk_mq_rq_state(rq) != MQ_RQ_IDLE)
|
||||
if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx)
|
||||
__blk_mq_debugfs_rq_show(params->m,
|
||||
list_entry_rq(&rq->queuelist));
|
||||
}
|
||||
|
|
|
@ -49,12 +49,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_completed_request(struct request *rq)
|
||||
static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
|
||||
{
|
||||
struct elevator_queue *e = rq->q->elevator;
|
||||
|
||||
if (e && e->type->ops.mq.completed_request)
|
||||
e->type->ops.mq.completed_request(rq);
|
||||
e->type->ops.mq.completed_request(rq, now);
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_started_request(struct request *rq)
|
||||
|
|
|
@ -232,13 +232,26 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
|
||||
/*
|
||||
* We can hit rq == NULL here, because the tagging functions
|
||||
* test and set the bit before assining ->rqs[].
|
||||
* test and set the bit before assigning ->rqs[].
|
||||
*/
|
||||
if (rq && rq->q == hctx->queue)
|
||||
iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* bt_for_each - iterate over the requests associated with a hardware queue
|
||||
* @hctx: Hardware queue to examine.
|
||||
* @bt: sbitmap to examine. This is either the breserved_tags member
|
||||
* or the bitmap_tags member of struct blk_mq_tags.
|
||||
* @fn: Pointer to the function that will be called for each request
|
||||
* associated with @hctx that has been assigned a driver tag.
|
||||
* @fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
|
||||
* where rq is a pointer to a request.
|
||||
* @data: Will be passed as third argument to @fn.
|
||||
* @reserved: Indicates whether @bt is the breserved_tags member or the
|
||||
* bitmap_tags member of struct blk_mq_tags.
|
||||
*/
|
||||
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
|
||||
busy_iter_fn *fn, void *data, bool reserved)
|
||||
{
|
||||
|
@ -280,6 +293,18 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* bt_tags_for_each - iterate over the requests in a tag map
|
||||
* @tags: Tag map to iterate over.
|
||||
* @bt: sbitmap to examine. This is either the breserved_tags member
|
||||
* or the bitmap_tags member of struct blk_mq_tags.
|
||||
* @fn: Pointer to the function that will be called for each started
|
||||
* request. @fn will be called as follows: @fn(rq, @data,
|
||||
* @reserved) where rq is a pointer to a request.
|
||||
* @data: Will be passed as second argument to @fn.
|
||||
* @reserved: Indicates whether @bt is the breserved_tags member or the
|
||||
* bitmap_tags member of struct blk_mq_tags.
|
||||
*/
|
||||
static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
|
||||
busy_tag_iter_fn *fn, void *data, bool reserved)
|
||||
{
|
||||
|
@ -294,6 +319,15 @@ static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
|
|||
sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_all_tag_busy_iter - iterate over all started requests in a tag map
|
||||
* @tags: Tag map to iterate over.
|
||||
* @fn: Pointer to the function that will be called for each started
|
||||
* request. @fn will be called as follows: @fn(rq, @priv,
|
||||
* reserved) where rq is a pointer to a request. 'reserved'
|
||||
* indicates whether or not @rq is a reserved request.
|
||||
* @priv: Will be passed as second argument to @fn.
|
||||
*/
|
||||
static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
|
||||
busy_tag_iter_fn *fn, void *priv)
|
||||
{
|
||||
|
@ -302,6 +336,15 @@ static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
|
|||
bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
|
||||
* @tagset: Tag set to iterate over.
|
||||
* @fn: Pointer to the function that will be called for each started
|
||||
* request. @fn will be called as follows: @fn(rq, @priv,
|
||||
* reserved) where rq is a pointer to a request. 'reserved'
|
||||
* indicates whether or not @rq is a reserved request.
|
||||
* @priv: Will be passed as second argument to @fn.
|
||||
*/
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv)
|
||||
{
|
||||
|
@ -314,6 +357,20 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
|
||||
|
||||
/**
|
||||
* blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
|
||||
* @q: Request queue to examine.
|
||||
* @fn: Pointer to the function that will be called for each request
|
||||
* on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
|
||||
* reserved) where rq is a pointer to a request and hctx points
|
||||
* to the hardware queue associated with the request. 'reserved'
|
||||
* indicates whether or not @rq is a reserved request.
|
||||
* @priv: Will be passed as third argument to @fn.
|
||||
*
|
||||
* Note: if @q->tag_set is shared with other request queues then @fn will be
|
||||
* called for all requests on all queues that share that tag set and not only
|
||||
* for requests associated with @q.
|
||||
*/
|
||||
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
||||
void *priv)
|
||||
{
|
||||
|
@ -321,9 +378,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
|||
int i;
|
||||
|
||||
/*
|
||||
* __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
|
||||
* queue_hw_ctx after freeze the queue, so we use q_usage_counter
|
||||
* to avoid race with it.
|
||||
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
|
||||
* while the queue is frozen. So we can use q_usage_counter to avoid
|
||||
* racing with it. __blk_mq_update_nr_hw_queues() uses
|
||||
* synchronize_rcu() to ensure this function left the critical section
|
||||
* below.
|
||||
*/
|
||||
if (!percpu_ref_tryget(&q->q_usage_counter))
|
||||
return;
|
||||
|
@ -332,7 +391,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
|
|||
struct blk_mq_tags *tags = hctx->tags;
|
||||
|
||||
/*
|
||||
* If not software queues are currently mapped to this
|
||||
* If no software queues are currently mapped to this
|
||||
* hardware queue, there's nothing to check
|
||||
*/
|
||||
if (!blk_mq_hw_queue_mapped(hctx))
|
||||
|
|
211
block/blk-mq.c
211
block/blk-mq.c
|
@ -33,6 +33,7 @@
|
|||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-stat.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
@ -198,7 +199,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
|
|||
freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
|
||||
WARN_ON_ONCE(freeze_depth < 0);
|
||||
if (!freeze_depth) {
|
||||
percpu_ref_reinit(&q->q_usage_counter);
|
||||
percpu_ref_resurrect(&q->q_usage_counter);
|
||||
wake_up_all(&q->mq_freeze_wq);
|
||||
}
|
||||
}
|
||||
|
@ -475,6 +476,7 @@ static void __blk_mq_free_request(struct request *rq)
|
|||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
const int sched_tag = rq->internal_tag;
|
||||
|
||||
blk_pm_mark_last_busy(rq);
|
||||
if (rq->tag != -1)
|
||||
blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
|
||||
if (sched_tag != -1)
|
||||
|
@ -526,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
|
|||
blk_stat_add(rq, now);
|
||||
}
|
||||
|
||||
if (rq->internal_tag != -1)
|
||||
blk_mq_sched_completed_request(rq, now);
|
||||
|
||||
blk_account_io_done(rq, now);
|
||||
|
||||
if (rq->end_io) {
|
||||
|
@ -562,8 +567,20 @@ static void __blk_mq_complete_request(struct request *rq)
|
|||
|
||||
if (!blk_mq_mark_complete(rq))
|
||||
return;
|
||||
if (rq->internal_tag != -1)
|
||||
blk_mq_sched_completed_request(rq);
|
||||
|
||||
/*
|
||||
* Most of single queue controllers, there is only one irq vector
|
||||
* for handling IO completion, and the only irq's affinity is set
|
||||
* as all possible CPUs. On most of ARCHs, this affinity means the
|
||||
* irq is handled on one specific CPU.
|
||||
*
|
||||
* So complete IO reqeust in softirq context in case of single queue
|
||||
* for not degrading IO performance by irqsoff latency.
|
||||
*/
|
||||
if (rq->q->nr_hw_queues == 1) {
|
||||
__blk_complete_request(rq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
|
@ -2137,8 +2154,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
|||
struct blk_mq_tag_set *set,
|
||||
struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
{
|
||||
blk_mq_debugfs_unregister_hctx(hctx);
|
||||
|
||||
if (blk_mq_hw_queue_mapped(hctx))
|
||||
blk_mq_tag_idle(hctx);
|
||||
|
||||
|
@ -2165,6 +2180,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
|
|||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
if (i == nr_queue)
|
||||
break;
|
||||
blk_mq_debugfs_unregister_hctx(hctx);
|
||||
blk_mq_exit_hctx(q, set, hctx, i);
|
||||
}
|
||||
}
|
||||
|
@ -2194,12 +2210,12 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||
* runtime
|
||||
*/
|
||||
hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
|
||||
GFP_KERNEL, node);
|
||||
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node);
|
||||
if (!hctx->ctxs)
|
||||
goto unregister_cpu_notifier;
|
||||
|
||||
if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL,
|
||||
node))
|
||||
if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
|
||||
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node))
|
||||
goto free_ctxs;
|
||||
|
||||
hctx->nr_ctx = 0;
|
||||
|
@ -2212,7 +2228,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
|
||||
goto free_bitmap;
|
||||
|
||||
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
|
||||
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
|
||||
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
|
||||
if (!hctx->fq)
|
||||
goto exit_hctx;
|
||||
|
||||
|
@ -2222,8 +2239,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||
if (hctx->flags & BLK_MQ_F_BLOCKING)
|
||||
init_srcu_struct(hctx->srcu);
|
||||
|
||||
blk_mq_debugfs_register_hctx(q, hctx);
|
||||
|
||||
return 0;
|
||||
|
||||
free_fq:
|
||||
|
@ -2492,6 +2507,39 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_queue);
|
||||
|
||||
/*
|
||||
* Helper for setting up a queue with mq ops, given queue depth, and
|
||||
* the passed in mq ops flags.
|
||||
*/
|
||||
struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
|
||||
const struct blk_mq_ops *ops,
|
||||
unsigned int queue_depth,
|
||||
unsigned int set_flags)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
memset(set, 0, sizeof(*set));
|
||||
set->ops = ops;
|
||||
set->nr_hw_queues = 1;
|
||||
set->queue_depth = queue_depth;
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->flags = set_flags;
|
||||
|
||||
ret = blk_mq_alloc_tag_set(set);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
q = blk_mq_init_queue(set);
|
||||
if (IS_ERR(q)) {
|
||||
blk_mq_free_tag_set(set);
|
||||
return q;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_init_sq_queue);
|
||||
|
||||
static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
|
||||
{
|
||||
int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
|
||||
|
@ -2506,48 +2554,90 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
|
|||
return hw_ctx_size;
|
||||
}
|
||||
|
||||
static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
|
||||
struct blk_mq_tag_set *set, struct request_queue *q,
|
||||
int hctx_idx, int node)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
|
||||
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
|
||||
node);
|
||||
if (!hctx)
|
||||
return NULL;
|
||||
|
||||
if (!zalloc_cpumask_var_node(&hctx->cpumask,
|
||||
GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
|
||||
node)) {
|
||||
kfree(hctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
atomic_set(&hctx->nr_active, 0);
|
||||
hctx->numa_node = node;
|
||||
hctx->queue_num = hctx_idx;
|
||||
|
||||
if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
|
||||
free_cpumask_var(hctx->cpumask);
|
||||
kfree(hctx);
|
||||
return NULL;
|
||||
}
|
||||
blk_mq_hctx_kobj_init(hctx);
|
||||
|
||||
return hctx;
|
||||
}
|
||||
|
||||
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q)
|
||||
{
|
||||
int i, j;
|
||||
int i, j, end;
|
||||
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
|
||||
|
||||
blk_mq_sysfs_unregister(q);
|
||||
|
||||
/* protect against switching io scheduler */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||
int node;
|
||||
|
||||
if (hctxs[i])
|
||||
continue;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
node = blk_mq_hw_queue_to_node(q->mq_map, i);
|
||||
hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
|
||||
GFP_KERNEL, node);
|
||||
if (!hctxs[i])
|
||||
break;
|
||||
/*
|
||||
* If the hw queue has been mapped to another numa node,
|
||||
* we need to realloc the hctx. If allocation fails, fallback
|
||||
* to use the previous one.
|
||||
*/
|
||||
if (hctxs[i] && (hctxs[i]->numa_node == node))
|
||||
continue;
|
||||
|
||||
if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
|
||||
node)) {
|
||||
kfree(hctxs[i]);
|
||||
hctxs[i] = NULL;
|
||||
break;
|
||||
hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
|
||||
if (hctx) {
|
||||
if (hctxs[i]) {
|
||||
blk_mq_exit_hctx(q, set, hctxs[i], i);
|
||||
kobject_put(&hctxs[i]->kobj);
|
||||
}
|
||||
hctxs[i] = hctx;
|
||||
} else {
|
||||
if (hctxs[i])
|
||||
pr_warn("Allocate new hctx on node %d fails,\
|
||||
fallback to previous one on node %d\n",
|
||||
node, hctxs[i]->numa_node);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
atomic_set(&hctxs[i]->nr_active, 0);
|
||||
hctxs[i]->numa_node = node;
|
||||
hctxs[i]->queue_num = i;
|
||||
|
||||
if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
|
||||
free_cpumask_var(hctxs[i]->cpumask);
|
||||
kfree(hctxs[i]);
|
||||
hctxs[i] = NULL;
|
||||
break;
|
||||
}
|
||||
blk_mq_hctx_kobj_init(hctxs[i]);
|
||||
}
|
||||
for (j = i; j < q->nr_hw_queues; j++) {
|
||||
/*
|
||||
* Increasing nr_hw_queues fails. Free the newly allocated
|
||||
* hctxs and keep the previous q->nr_hw_queues.
|
||||
*/
|
||||
if (i != set->nr_hw_queues) {
|
||||
j = q->nr_hw_queues;
|
||||
end = i;
|
||||
} else {
|
||||
j = i;
|
||||
end = q->nr_hw_queues;
|
||||
q->nr_hw_queues = set->nr_hw_queues;
|
||||
}
|
||||
|
||||
for (; j < end; j++) {
|
||||
struct blk_mq_hw_ctx *hctx = hctxs[j];
|
||||
|
||||
if (hctx) {
|
||||
|
@ -2559,9 +2649,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
|||
|
||||
}
|
||||
}
|
||||
q->nr_hw_queues = i;
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
blk_mq_sysfs_register(q);
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
|
@ -2659,25 +2747,6 @@ void blk_mq_free_queue(struct request_queue *q)
|
|||
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
|
||||
}
|
||||
|
||||
/* Basically redo blk_mq_init_queue with queue frozen */
|
||||
static void blk_mq_queue_reinit(struct request_queue *q)
|
||||
{
|
||||
WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
|
||||
|
||||
blk_mq_debugfs_unregister_hctxs(q);
|
||||
blk_mq_sysfs_unregister(q);
|
||||
|
||||
/*
|
||||
* redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
|
||||
* we should change hctx numa_node according to the new topology (this
|
||||
* involves freeing and re-allocating memory, worth doing?)
|
||||
*/
|
||||
blk_mq_map_swqueue(q);
|
||||
|
||||
blk_mq_sysfs_register(q);
|
||||
blk_mq_debugfs_register_hctxs(q);
|
||||
}
|
||||
|
||||
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
||||
{
|
||||
int i;
|
||||
|
@ -2964,6 +3033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||
{
|
||||
struct request_queue *q;
|
||||
LIST_HEAD(head);
|
||||
int prev_nr_hw_queues;
|
||||
|
||||
lockdep_assert_held(&set->tag_list_lock);
|
||||
|
||||
|
@ -2987,11 +3057,30 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||
if (!blk_mq_elv_switch_none(&head, q))
|
||||
goto switch_back;
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_debugfs_unregister_hctxs(q);
|
||||
blk_mq_sysfs_unregister(q);
|
||||
}
|
||||
|
||||
prev_nr_hw_queues = set->nr_hw_queues;
|
||||
set->nr_hw_queues = nr_hw_queues;
|
||||
blk_mq_update_queue_map(set);
|
||||
fallback:
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
blk_mq_queue_reinit(q);
|
||||
if (q->nr_hw_queues != set->nr_hw_queues) {
|
||||
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
|
||||
nr_hw_queues, prev_nr_hw_queues);
|
||||
set->nr_hw_queues = prev_nr_hw_queues;
|
||||
blk_mq_map_queues(set);
|
||||
goto fallback;
|
||||
}
|
||||
blk_mq_map_swqueue(q);
|
||||
}
|
||||
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
blk_mq_sysfs_register(q);
|
||||
blk_mq_debugfs_register_hctxs(q);
|
||||
}
|
||||
|
||||
switch_back:
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-pm.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
|
||||
/**
|
||||
* blk_pm_runtime_init - Block layer runtime PM initialization routine
|
||||
* @q: the queue of the device
|
||||
* @dev: the device the queue belongs to
|
||||
*
|
||||
* Description:
|
||||
* Initialize runtime-PM-related fields for @q and start auto suspend for
|
||||
* @dev. Drivers that want to take advantage of request-based runtime PM
|
||||
* should call this function after @dev has been initialized, and its
|
||||
* request queue @q has been allocated, and runtime PM for it can not happen
|
||||
* yet(either due to disabled/forbidden or its usage_count > 0). In most
|
||||
* cases, driver should call this function before any I/O has taken place.
|
||||
*
|
||||
* This function takes care of setting up using auto suspend for the device,
|
||||
* the autosuspend delay is set to -1 to make runtime suspend impossible
|
||||
* until an updated value is either set by user or by driver. Drivers do
|
||||
* not need to touch other autosuspend settings.
|
||||
*
|
||||
* The block layer runtime PM is request based, so only works for drivers
|
||||
* that use request as their IO unit instead of those directly use bio's.
|
||||
*/
|
||||
void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
|
||||
{
|
||||
q->dev = dev;
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_set_autosuspend_delay(q->dev, -1);
|
||||
pm_runtime_use_autosuspend(q->dev);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pm_runtime_init);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_suspend - Pre runtime suspend check
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* This function will check if runtime suspend is allowed for the device
|
||||
* by examining if there are any requests pending in the queue. If there
|
||||
* are requests pending, the device can not be runtime suspended; otherwise,
|
||||
* the queue's status will be updated to SUSPENDING and the driver can
|
||||
* proceed to suspend the device.
|
||||
*
|
||||
* For the not allowed case, we mark last busy for the device so that
|
||||
* runtime PM core will try to autosuspend it some time later.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_suspend callback.
|
||||
*
|
||||
* Return:
|
||||
* 0 - OK to runtime suspend the device
|
||||
* -EBUSY - Device should not be runtime suspended
|
||||
*/
|
||||
int blk_pre_runtime_suspend(struct request_queue *q)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!q->dev)
|
||||
return ret;
|
||||
|
||||
WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE);
|
||||
|
||||
/*
|
||||
* Increase the pm_only counter before checking whether any
|
||||
* non-PM blk_queue_enter() calls are in progress to avoid that any
|
||||
* new non-PM blk_queue_enter() calls succeed before the pm_only
|
||||
* counter is decreased again.
|
||||
*/
|
||||
blk_set_pm_only(q);
|
||||
ret = -EBUSY;
|
||||
/* Switch q_usage_counter from per-cpu to atomic mode. */
|
||||
blk_freeze_queue_start(q);
|
||||
/*
|
||||
* Wait until atomic mode has been reached. Since that
|
||||
* involves calling call_rcu(), it is guaranteed that later
|
||||
* blk_queue_enter() calls see the pm-only state. See also
|
||||
* http://lwn.net/Articles/573497/.
|
||||
*/
|
||||
percpu_ref_switch_to_atomic_sync(&q->q_usage_counter);
|
||||
if (percpu_ref_is_zero(&q->q_usage_counter))
|
||||
ret = 0;
|
||||
/* Switch q_usage_counter back to per-cpu mode. */
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (ret < 0)
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
else
|
||||
q->rpm_status = RPM_SUSPENDING;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
if (ret)
|
||||
blk_clear_pm_only(q);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_suspend - Post runtime suspend processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_suspend function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime suspend function and mark last busy for the device so
|
||||
* that PM core will try to auto suspend the device at a later time.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_suspend callback.
|
||||
*/
|
||||
void blk_post_runtime_suspend(struct request_queue *q, int err)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
} else {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
if (err)
|
||||
blk_clear_pm_only(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_suspend);
|
||||
|
||||
/**
|
||||
* blk_pre_runtime_resume - Pre runtime resume processing
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status to RESUMING in preparation for the
|
||||
* runtime resume of the device.
|
||||
*
|
||||
* This function should be called near the start of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_pre_runtime_resume(struct request_queue *q)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->rpm_status = RPM_RESUMING;
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_pre_runtime_resume);
|
||||
|
||||
/**
|
||||
* blk_post_runtime_resume - Post runtime resume processing
|
||||
* @q: the queue of the device
|
||||
* @err: return value of the device's runtime_resume function
|
||||
*
|
||||
* Description:
|
||||
* Update the queue's runtime status according to the return value of the
|
||||
* device's runtime_resume function. If it is successfully resumed, process
|
||||
* the requests that are queued into the device's queue when it is resuming
|
||||
* and then mark last busy and initiate autosuspend for it.
|
||||
*
|
||||
* This function should be called near the end of the device's
|
||||
* runtime_resume callback.
|
||||
*/
|
||||
void blk_post_runtime_resume(struct request_queue *q, int err)
|
||||
{
|
||||
if (!q->dev)
|
||||
return;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
if (!err) {
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
} else {
|
||||
q->rpm_status = RPM_SUSPENDED;
|
||||
}
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
if (!err)
|
||||
blk_clear_pm_only(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_post_runtime_resume);
|
||||
|
||||
/**
|
||||
* blk_set_runtime_active - Force runtime status of the queue to be active
|
||||
* @q: the queue of the device
|
||||
*
|
||||
* If the device is left runtime suspended during system suspend the resume
|
||||
* hook typically resumes the device and corrects runtime status
|
||||
* accordingly. However, that does not affect the queue runtime PM status
|
||||
* which is still "suspended". This prevents processing requests from the
|
||||
* queue.
|
||||
*
|
||||
* This function can be used in driver's resume hook to correct queue
|
||||
* runtime PM status and re-enable peeking requests from the queue. It
|
||||
* should be called before first request is added to the queue.
|
||||
*/
|
||||
void blk_set_runtime_active(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_runtime_active);
|
|
@ -0,0 +1,69 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _BLOCK_BLK_PM_H_
|
||||
#define _BLOCK_BLK_PM_H_
|
||||
|
||||
#include <linux/pm_runtime.h>
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static inline void blk_pm_request_resume(struct request_queue *q)
|
||||
{
|
||||
if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
|
||||
q->rpm_status == RPM_SUSPENDING))
|
||||
pm_request_resume(q->dev);
|
||||
}
|
||||
|
||||
static inline void blk_pm_mark_last_busy(struct request *rq)
|
||||
{
|
||||
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
|
||||
pm_runtime_mark_last_busy(rq->q->dev);
|
||||
}
|
||||
|
||||
static inline void blk_pm_requeue_request(struct request *rq)
|
||||
{
|
||||
lockdep_assert_held(rq->q->queue_lock);
|
||||
|
||||
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
|
||||
rq->q->nr_pending--;
|
||||
}
|
||||
|
||||
static inline void blk_pm_add_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
lockdep_assert_held(q->queue_lock);
|
||||
|
||||
if (q->dev && !(rq->rq_flags & RQF_PM))
|
||||
q->nr_pending++;
|
||||
}
|
||||
|
||||
static inline void blk_pm_put_request(struct request *rq)
|
||||
{
|
||||
lockdep_assert_held(rq->q->queue_lock);
|
||||
|
||||
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
|
||||
--rq->q->nr_pending;
|
||||
}
|
||||
#else
|
||||
static inline void blk_pm_request_resume(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_pm_mark_last_busy(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_pm_requeue_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_pm_add_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_pm_put_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _BLOCK_BLK_PM_H_ */
|
|
@ -97,8 +97,8 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
|
|||
|
||||
void __blk_complete_request(struct request *req)
|
||||
{
|
||||
int ccpu, cpu;
|
||||
struct request_queue *q = req->q;
|
||||
int cpu, ccpu = q->mq_ops ? req->mq_ctx->cpu : req->cpu;
|
||||
unsigned long flags;
|
||||
bool shared = false;
|
||||
|
||||
|
@ -110,8 +110,7 @@ void __blk_complete_request(struct request *req)
|
|||
/*
|
||||
* Select completion CPU
|
||||
*/
|
||||
if (req->cpu != -1) {
|
||||
ccpu = req->cpu;
|
||||
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && ccpu != -1) {
|
||||
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
|
||||
shared = cpus_share_cache(cpu, ccpu);
|
||||
} else
|
||||
|
|
|
@ -190,6 +190,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
|
|||
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
|
||||
spin_unlock(&q->stats->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_stat_enable_accounting);
|
||||
|
||||
struct blk_queue_stats *blk_alloc_queue_stats(void)
|
||||
{
|
||||
|
|
|
@ -84,8 +84,7 @@ struct throtl_service_queue {
|
|||
* RB tree of active children throtl_grp's, which are sorted by
|
||||
* their ->disptime.
|
||||
*/
|
||||
struct rb_root pending_tree; /* RB tree of active tgs */
|
||||
struct rb_node *first_pending; /* first node in the tree */
|
||||
struct rb_root_cached pending_tree; /* RB tree of active tgs */
|
||||
unsigned int nr_pending; /* # queued in the tree */
|
||||
unsigned long first_pending_disptime; /* disptime of the first tg */
|
||||
struct timer_list pending_timer; /* fires on first_pending_disptime */
|
||||
|
@ -475,7 +474,7 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
|
|||
{
|
||||
INIT_LIST_HEAD(&sq->queued[0]);
|
||||
INIT_LIST_HEAD(&sq->queued[1]);
|
||||
sq->pending_tree = RB_ROOT;
|
||||
sq->pending_tree = RB_ROOT_CACHED;
|
||||
timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
|
||||
}
|
||||
|
||||
|
@ -616,31 +615,23 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
|
|||
static struct throtl_grp *
|
||||
throtl_rb_first(struct throtl_service_queue *parent_sq)
|
||||
{
|
||||
struct rb_node *n;
|
||||
/* Service tree is empty */
|
||||
if (!parent_sq->nr_pending)
|
||||
return NULL;
|
||||
|
||||
if (!parent_sq->first_pending)
|
||||
parent_sq->first_pending = rb_first(&parent_sq->pending_tree);
|
||||
|
||||
if (parent_sq->first_pending)
|
||||
return rb_entry_tg(parent_sq->first_pending);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rb_erase_init(struct rb_node *n, struct rb_root *root)
|
||||
{
|
||||
rb_erase(n, root);
|
||||
RB_CLEAR_NODE(n);
|
||||
n = rb_first_cached(&parent_sq->pending_tree);
|
||||
WARN_ON_ONCE(!n);
|
||||
if (!n)
|
||||
return NULL;
|
||||
return rb_entry_tg(n);
|
||||
}
|
||||
|
||||
static void throtl_rb_erase(struct rb_node *n,
|
||||
struct throtl_service_queue *parent_sq)
|
||||
{
|
||||
if (parent_sq->first_pending == n)
|
||||
parent_sq->first_pending = NULL;
|
||||
rb_erase_init(n, &parent_sq->pending_tree);
|
||||
rb_erase_cached(n, &parent_sq->pending_tree);
|
||||
RB_CLEAR_NODE(n);
|
||||
--parent_sq->nr_pending;
|
||||
}
|
||||
|
||||
|
@ -658,11 +649,11 @@ static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
|
|||
static void tg_service_queue_add(struct throtl_grp *tg)
|
||||
{
|
||||
struct throtl_service_queue *parent_sq = tg->service_queue.parent_sq;
|
||||
struct rb_node **node = &parent_sq->pending_tree.rb_node;
|
||||
struct rb_node **node = &parent_sq->pending_tree.rb_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct throtl_grp *__tg;
|
||||
unsigned long key = tg->disptime;
|
||||
int left = 1;
|
||||
bool leftmost = true;
|
||||
|
||||
while (*node != NULL) {
|
||||
parent = *node;
|
||||
|
@ -672,15 +663,13 @@ static void tg_service_queue_add(struct throtl_grp *tg)
|
|||
node = &parent->rb_left;
|
||||
else {
|
||||
node = &parent->rb_right;
|
||||
left = 0;
|
||||
leftmost = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (left)
|
||||
parent_sq->first_pending = &tg->rb_node;
|
||||
|
||||
rb_link_node(&tg->rb_node, parent, node);
|
||||
rb_insert_color(&tg->rb_node, &parent_sq->pending_tree);
|
||||
rb_insert_color_cached(&tg->rb_node, &parent_sq->pending_tree,
|
||||
leftmost);
|
||||
}
|
||||
|
||||
static void __throtl_enqueue_tg(struct throtl_grp *tg)
|
||||
|
@ -2126,21 +2115,11 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
|||
}
|
||||
#endif
|
||||
|
||||
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
/* fallback to root_blkg if we fail to get a blkg ref */
|
||||
if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
|
||||
bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct throtl_qnode *qn = NULL;
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
struct throtl_service_queue *sq;
|
||||
bool rw = bio_data_dir(bio);
|
||||
bool throttled = false;
|
||||
|
@ -2159,7 +2138,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
|||
if (unlikely(blk_queue_bypass(q)))
|
||||
goto out_unlock;
|
||||
|
||||
blk_throtl_assoc_bio(tg, bio);
|
||||
blk_throtl_update_idletime(tg);
|
||||
|
||||
sq = &tg->service_queue;
|
||||
|
|
73
block/blk.h
73
block/blk.h
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <linux/idr.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <xen/xen.h>
|
||||
#include "blk-mq.h"
|
||||
|
||||
/* Amount of time in which a process may batch requests */
|
||||
|
@ -124,7 +125,7 @@ static inline void __blk_get_queue(struct request_queue *q)
|
|||
}
|
||||
|
||||
struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
|
||||
int node, int cmd_size);
|
||||
int node, int cmd_size, gfp_t flags);
|
||||
void blk_free_flush_queue(struct blk_flush_queue *q);
|
||||
|
||||
int blk_init_rl(struct request_list *rl, struct request_queue *q,
|
||||
|
@ -149,6 +150,41 @@ static inline void blk_queue_enter_live(struct request_queue *q)
|
|||
percpu_ref_get(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
struct bio_vec *vec1, struct bio_vec *vec2)
|
||||
{
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
|
||||
phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
|
||||
|
||||
if (addr1 + vec1->bv_len != addr2)
|
||||
return false;
|
||||
if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2))
|
||||
return false;
|
||||
if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __bvec_gap_to_prev(struct request_queue *q,
|
||||
struct bio_vec *bprv, unsigned int offset)
|
||||
{
|
||||
return offset ||
|
||||
((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if adding a bio_vec after bprv with offset would create a gap in
|
||||
* the SG list. Most drivers don't care about this, but some do.
|
||||
*/
|
||||
static inline bool bvec_gap_to_prev(struct request_queue *q,
|
||||
struct bio_vec *bprv, unsigned int offset)
|
||||
{
|
||||
if (!queue_virt_boundary(q))
|
||||
return false;
|
||||
return __bvec_gap_to_prev(q, bprv, offset);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
void blk_flush_integrity(void);
|
||||
bool __bio_integrity_endio(struct bio *);
|
||||
|
@ -158,7 +194,38 @@ static inline bool bio_integrity_endio(struct bio *bio)
|
|||
return __bio_integrity_endio(bio);
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
|
||||
static inline bool integrity_req_gap_back_merge(struct request *req,
|
||||
struct bio *next)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(req->bio);
|
||||
struct bio_integrity_payload *bip_next = bio_integrity(next);
|
||||
|
||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
||||
bip_next->bip_vec[0].bv_offset);
|
||||
}
|
||||
|
||||
static inline bool integrity_req_gap_front_merge(struct request *req,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
|
||||
|
||||
return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
|
||||
bip_next->bip_vec[0].bv_offset);
|
||||
}
|
||||
#else /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
static inline bool integrity_req_gap_back_merge(struct request *req,
|
||||
struct bio *next)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool integrity_req_gap_front_merge(struct request *req,
|
||||
struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_flush_integrity(void)
|
||||
{
|
||||
}
|
||||
|
@ -166,7 +233,7 @@ static inline bool bio_integrity_endio(struct bio *bio)
|
|||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_BLK_DEV_INTEGRITY */
|
||||
|
||||
void blk_timeout_work(struct work_struct *work);
|
||||
unsigned long blk_rq_timeout(unsigned long timeout);
|
||||
|
|
|
@ -31,6 +31,24 @@
|
|||
static struct bio_set bounce_bio_set, bounce_bio_split;
|
||||
static mempool_t page_pool, isa_page_pool;
|
||||
|
||||
static void init_bounce_bioset(void)
|
||||
{
|
||||
static bool bounce_bs_setup;
|
||||
int ret;
|
||||
|
||||
if (bounce_bs_setup)
|
||||
return;
|
||||
|
||||
ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
BUG_ON(ret);
|
||||
if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
|
||||
BUG_ON(1);
|
||||
|
||||
ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
BUG_ON(ret);
|
||||
bounce_bs_setup = true;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HIGHMEM)
|
||||
static __init int init_emergency_pool(void)
|
||||
{
|
||||
|
@ -44,14 +62,7 @@ static __init int init_emergency_pool(void)
|
|||
BUG_ON(ret);
|
||||
pr_info("pool size: %d pages\n", POOL_SIZE);
|
||||
|
||||
ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
BUG_ON(ret);
|
||||
if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
|
||||
BUG_ON(1);
|
||||
|
||||
ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
init_bounce_bioset();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -86,6 +97,8 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
|
|||
return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(isa_mutex);
|
||||
|
||||
/*
|
||||
* gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
|
||||
* as the max address, so check if the pool has already been created.
|
||||
|
@ -94,14 +107,20 @@ int init_emergency_isa_pool(void)
|
|||
{
|
||||
int ret;
|
||||
|
||||
if (mempool_initialized(&isa_page_pool))
|
||||
mutex_lock(&isa_mutex);
|
||||
|
||||
if (mempool_initialized(&isa_page_pool)) {
|
||||
mutex_unlock(&isa_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
|
||||
mempool_free_pages, (void *) 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
|
||||
init_bounce_bioset();
|
||||
mutex_unlock(&isa_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -257,7 +276,9 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
|
|||
}
|
||||
}
|
||||
|
||||
bio_clone_blkcg_association(bio, bio_src);
|
||||
bio_clone_blkg_association(bio, bio_src);
|
||||
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
return bio;
|
||||
}
|
||||
|
|
|
@ -1644,14 +1644,20 @@ static void cfq_pd_offline(struct blkg_policy_data *pd)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < IOPRIO_BE_NR; i++) {
|
||||
if (cfqg->async_cfqq[0][i])
|
||||
if (cfqg->async_cfqq[0][i]) {
|
||||
cfq_put_queue(cfqg->async_cfqq[0][i]);
|
||||
if (cfqg->async_cfqq[1][i])
|
||||
cfqg->async_cfqq[0][i] = NULL;
|
||||
}
|
||||
if (cfqg->async_cfqq[1][i]) {
|
||||
cfq_put_queue(cfqg->async_cfqq[1][i]);
|
||||
cfqg->async_cfqq[1][i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (cfqg->async_idle_cfqq)
|
||||
if (cfqg->async_idle_cfqq) {
|
||||
cfq_put_queue(cfqg->async_idle_cfqq);
|
||||
cfqg->async_idle_cfqq = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* @blkg is going offline and will be ignored by
|
||||
|
@ -3753,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
|||
uint64_t serial_nr;
|
||||
|
||||
rcu_read_lock();
|
||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||
serial_nr = __bio_blkcg(bio)->css.serial_nr;
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
|
@ -3818,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
|
|||
struct cfq_group *cfqg;
|
||||
|
||||
rcu_read_lock();
|
||||
cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
|
||||
cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
|
||||
if (!cfqg) {
|
||||
cfqq = &cfqd->oom_cfqq;
|
||||
goto out;
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
|
||||
#include "blk.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-pm.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
static DEFINE_SPINLOCK(elv_list_lock);
|
||||
|
@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
|
|||
e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static void blk_pm_requeue_request(struct request *rq)
|
||||
{
|
||||
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
|
||||
rq->q->nr_pending--;
|
||||
}
|
||||
|
||||
static void blk_pm_add_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
|
||||
(q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
|
||||
pm_request_resume(q->dev);
|
||||
}
|
||||
#else
|
||||
static inline void blk_pm_requeue_request(struct request *rq) {}
|
||||
static inline void blk_pm_add_request(struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void elv_requeue_request(struct request_queue *q, struct request *rq)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -567,7 +567,8 @@ static int exact_lock(dev_t devt, void *data)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void register_disk(struct device *parent, struct gendisk *disk)
|
||||
static void register_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups)
|
||||
{
|
||||
struct device *ddev = disk_to_dev(disk);
|
||||
struct block_device *bdev;
|
||||
|
@ -582,6 +583,10 @@ static void register_disk(struct device *parent, struct gendisk *disk)
|
|||
/* delay uevents, until we scanned partition table */
|
||||
dev_set_uevent_suppress(ddev, 1);
|
||||
|
||||
if (groups) {
|
||||
WARN_ON(ddev->groups);
|
||||
ddev->groups = groups;
|
||||
}
|
||||
if (device_add(ddev))
|
||||
return;
|
||||
if (!sysfs_deprecated) {
|
||||
|
@ -647,6 +652,7 @@ exit:
|
|||
* __device_add_disk - add disk information to kernel list
|
||||
* @parent: parent device for the disk
|
||||
* @disk: per-device partitioning information
|
||||
* @groups: Additional per-device sysfs groups
|
||||
* @register_queue: register the queue if set to true
|
||||
*
|
||||
* This function registers the partitioning information in @disk
|
||||
|
@ -655,6 +661,7 @@ exit:
|
|||
* FIXME: error handling
|
||||
*/
|
||||
static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups,
|
||||
bool register_queue)
|
||||
{
|
||||
dev_t devt;
|
||||
|
@ -698,7 +705,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
|||
blk_register_region(disk_devt(disk), disk->minors, NULL,
|
||||
exact_match, exact_lock, disk);
|
||||
}
|
||||
register_disk(parent, disk);
|
||||
register_disk(parent, disk, groups);
|
||||
if (register_queue)
|
||||
blk_register_queue(disk);
|
||||
|
||||
|
@ -712,15 +719,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
|||
blk_integrity_add(disk);
|
||||
}
|
||||
|
||||
void device_add_disk(struct device *parent, struct gendisk *disk)
|
||||
void device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
const struct attribute_group **groups)
|
||||
|
||||
{
|
||||
__device_add_disk(parent, disk, true);
|
||||
__device_add_disk(parent, disk, groups, true);
|
||||
}
|
||||
EXPORT_SYMBOL(device_add_disk);
|
||||
|
||||
void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
|
||||
{
|
||||
__device_add_disk(parent, disk, false);
|
||||
__device_add_disk(parent, disk, NULL, false);
|
||||
}
|
||||
EXPORT_SYMBOL(device_add_disk_no_queue_reg);
|
||||
|
||||
|
|
|
@ -29,19 +29,30 @@
|
|||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-mq-tag.h"
|
||||
#include "blk-stat.h"
|
||||
|
||||
/* Scheduling domains. */
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/kyber.h>
|
||||
|
||||
/*
|
||||
* Scheduling domains: the device is divided into multiple domains based on the
|
||||
* request type.
|
||||
*/
|
||||
enum {
|
||||
KYBER_READ,
|
||||
KYBER_SYNC_WRITE,
|
||||
KYBER_OTHER, /* Async writes, discard, etc. */
|
||||
KYBER_WRITE,
|
||||
KYBER_DISCARD,
|
||||
KYBER_OTHER,
|
||||
KYBER_NUM_DOMAINS,
|
||||
};
|
||||
|
||||
enum {
|
||||
KYBER_MIN_DEPTH = 256,
|
||||
static const char *kyber_domain_names[] = {
|
||||
[KYBER_READ] = "READ",
|
||||
[KYBER_WRITE] = "WRITE",
|
||||
[KYBER_DISCARD] = "DISCARD",
|
||||
[KYBER_OTHER] = "OTHER",
|
||||
};
|
||||
|
||||
enum {
|
||||
/*
|
||||
* In order to prevent starvation of synchronous requests by a flood of
|
||||
* asynchronous requests, we reserve 25% of requests for synchronous
|
||||
|
@ -51,25 +62,87 @@ enum {
|
|||
};
|
||||
|
||||
/*
|
||||
* Initial device-wide depths for each scheduling domain.
|
||||
* Maximum device-wide depth for each scheduling domain.
|
||||
*
|
||||
* Even for fast devices with lots of tags like NVMe, you can saturate
|
||||
* the device with only a fraction of the maximum possible queue depth.
|
||||
* So, we cap these to a reasonable value.
|
||||
* Even for fast devices with lots of tags like NVMe, you can saturate the
|
||||
* device with only a fraction of the maximum possible queue depth. So, we cap
|
||||
* these to a reasonable value.
|
||||
*/
|
||||
static const unsigned int kyber_depth[] = {
|
||||
[KYBER_READ] = 256,
|
||||
[KYBER_SYNC_WRITE] = 128,
|
||||
[KYBER_OTHER] = 64,
|
||||
[KYBER_WRITE] = 128,
|
||||
[KYBER_DISCARD] = 64,
|
||||
[KYBER_OTHER] = 16,
|
||||
};
|
||||
|
||||
/*
|
||||
* Scheduling domain batch sizes. We favor reads.
|
||||
* Default latency targets for each scheduling domain.
|
||||
*/
|
||||
static const u64 kyber_latency_targets[] = {
|
||||
[KYBER_READ] = 2ULL * NSEC_PER_MSEC,
|
||||
[KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
|
||||
[KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
/*
|
||||
* Batch size (number of requests we'll dispatch in a row) for each scheduling
|
||||
* domain.
|
||||
*/
|
||||
static const unsigned int kyber_batch_size[] = {
|
||||
[KYBER_READ] = 16,
|
||||
[KYBER_SYNC_WRITE] = 8,
|
||||
[KYBER_OTHER] = 8,
|
||||
[KYBER_WRITE] = 8,
|
||||
[KYBER_DISCARD] = 1,
|
||||
[KYBER_OTHER] = 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Requests latencies are recorded in a histogram with buckets defined relative
|
||||
* to the target latency:
|
||||
*
|
||||
* <= 1/4 * target latency
|
||||
* <= 1/2 * target latency
|
||||
* <= 3/4 * target latency
|
||||
* <= target latency
|
||||
* <= 1 1/4 * target latency
|
||||
* <= 1 1/2 * target latency
|
||||
* <= 1 3/4 * target latency
|
||||
* > 1 3/4 * target latency
|
||||
*/
|
||||
enum {
|
||||
/*
|
||||
* The width of the latency histogram buckets is
|
||||
* 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
|
||||
*/
|
||||
KYBER_LATENCY_SHIFT = 2,
|
||||
/*
|
||||
* The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
|
||||
* thus, "good".
|
||||
*/
|
||||
KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
|
||||
/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
|
||||
KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
|
||||
};
|
||||
|
||||
/*
|
||||
* We measure both the total latency and the I/O latency (i.e., latency after
|
||||
* submitting to the device).
|
||||
*/
|
||||
enum {
|
||||
KYBER_TOTAL_LATENCY,
|
||||
KYBER_IO_LATENCY,
|
||||
};
|
||||
|
||||
static const char *kyber_latency_type_names[] = {
|
||||
[KYBER_TOTAL_LATENCY] = "total",
|
||||
[KYBER_IO_LATENCY] = "I/O",
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-cpu latency histograms: total latency and I/O latency for each scheduling
|
||||
* domain except for KYBER_OTHER.
|
||||
*/
|
||||
struct kyber_cpu_latency {
|
||||
atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -88,12 +161,9 @@ struct kyber_ctx_queue {
|
|||
struct kyber_queue_data {
|
||||
struct request_queue *q;
|
||||
|
||||
struct blk_stat_callback *cb;
|
||||
|
||||
/*
|
||||
* The device is divided into multiple scheduling domains based on the
|
||||
* request type. Each domain has a fixed number of in-flight requests of
|
||||
* that type device-wide, limited by these tokens.
|
||||
* Each scheduling domain has a limited number of in-flight requests
|
||||
* device-wide, limited by these tokens.
|
||||
*/
|
||||
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
|
||||
|
||||
|
@ -103,8 +173,19 @@ struct kyber_queue_data {
|
|||
*/
|
||||
unsigned int async_depth;
|
||||
|
||||
struct kyber_cpu_latency __percpu *cpu_latency;
|
||||
|
||||
/* Timer for stats aggregation and adjusting domain tokens. */
|
||||
struct timer_list timer;
|
||||
|
||||
unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
|
||||
|
||||
unsigned long latency_timeout[KYBER_OTHER];
|
||||
|
||||
int domain_p99[KYBER_OTHER];
|
||||
|
||||
/* Target latencies in nanoseconds. */
|
||||
u64 read_lat_nsec, write_lat_nsec;
|
||||
u64 latency_targets[KYBER_OTHER];
|
||||
};
|
||||
|
||||
struct kyber_hctx_data {
|
||||
|
@ -124,233 +205,219 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
|
|||
|
||||
static unsigned int kyber_sched_domain(unsigned int op)
|
||||
{
|
||||
if ((op & REQ_OP_MASK) == REQ_OP_READ)
|
||||
switch (op & REQ_OP_MASK) {
|
||||
case REQ_OP_READ:
|
||||
return KYBER_READ;
|
||||
else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
|
||||
return KYBER_SYNC_WRITE;
|
||||
else
|
||||
case REQ_OP_WRITE:
|
||||
return KYBER_WRITE;
|
||||
case REQ_OP_DISCARD:
|
||||
return KYBER_DISCARD;
|
||||
default:
|
||||
return KYBER_OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
enum {
|
||||
NONE = 0,
|
||||
GOOD = 1,
|
||||
GREAT = 2,
|
||||
BAD = -1,
|
||||
AWFUL = -2,
|
||||
};
|
||||
|
||||
#define IS_GOOD(status) ((status) > 0)
|
||||
#define IS_BAD(status) ((status) < 0)
|
||||
|
||||
static int kyber_lat_status(struct blk_stat_callback *cb,
|
||||
unsigned int sched_domain, u64 target)
|
||||
static void flush_latency_buckets(struct kyber_queue_data *kqd,
|
||||
struct kyber_cpu_latency *cpu_latency,
|
||||
unsigned int sched_domain, unsigned int type)
|
||||
{
|
||||
u64 latency;
|
||||
unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
|
||||
atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
|
||||
unsigned int bucket;
|
||||
|
||||
if (!cb->stat[sched_domain].nr_samples)
|
||||
return NONE;
|
||||
|
||||
latency = cb->stat[sched_domain].mean;
|
||||
if (latency >= 2 * target)
|
||||
return AWFUL;
|
||||
else if (latency > target)
|
||||
return BAD;
|
||||
else if (latency <= target / 2)
|
||||
return GREAT;
|
||||
else /* (latency <= target) */
|
||||
return GOOD;
|
||||
for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
|
||||
buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the read or synchronous write depth given the status of reads and
|
||||
* writes. The goal is that the latencies of the two domains are fair (i.e., if
|
||||
* one is good, then the other is good).
|
||||
* Calculate the histogram bucket with the given percentile rank, or -1 if there
|
||||
* aren't enough samples yet.
|
||||
*/
|
||||
static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
|
||||
unsigned int sched_domain, int this_status,
|
||||
int other_status)
|
||||
static int calculate_percentile(struct kyber_queue_data *kqd,
|
||||
unsigned int sched_domain, unsigned int type,
|
||||
unsigned int percentile)
|
||||
{
|
||||
unsigned int orig_depth, depth;
|
||||
unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
|
||||
unsigned int bucket, samples = 0, percentile_samples;
|
||||
|
||||
for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
|
||||
samples += buckets[bucket];
|
||||
|
||||
if (!samples)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* If this domain had no samples, or reads and writes are both good or
|
||||
* both bad, don't adjust the depth.
|
||||
* We do the calculation once we have 500 samples or one second passes
|
||||
* since the first sample was recorded, whichever comes first.
|
||||
*/
|
||||
if (this_status == NONE ||
|
||||
(IS_GOOD(this_status) && IS_GOOD(other_status)) ||
|
||||
(IS_BAD(this_status) && IS_BAD(other_status)))
|
||||
return;
|
||||
|
||||
orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
|
||||
|
||||
if (other_status == NONE) {
|
||||
depth++;
|
||||
} else {
|
||||
switch (this_status) {
|
||||
case GOOD:
|
||||
if (other_status == AWFUL)
|
||||
depth -= max(depth / 4, 1U);
|
||||
else
|
||||
depth -= max(depth / 8, 1U);
|
||||
break;
|
||||
case GREAT:
|
||||
if (other_status == AWFUL)
|
||||
depth /= 2;
|
||||
else
|
||||
depth -= max(depth / 4, 1U);
|
||||
break;
|
||||
case BAD:
|
||||
depth++;
|
||||
break;
|
||||
case AWFUL:
|
||||
if (other_status == GREAT)
|
||||
depth += 2;
|
||||
else
|
||||
depth++;
|
||||
break;
|
||||
}
|
||||
if (!kqd->latency_timeout[sched_domain])
|
||||
kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
|
||||
if (samples < 500 &&
|
||||
time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
|
||||
return -1;
|
||||
}
|
||||
kqd->latency_timeout[sched_domain] = 0;
|
||||
|
||||
percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
|
||||
for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
|
||||
if (buckets[bucket] >= percentile_samples)
|
||||
break;
|
||||
percentile_samples -= buckets[bucket];
|
||||
}
|
||||
memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
|
||||
|
||||
trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain],
|
||||
kyber_latency_type_names[type], percentile,
|
||||
bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
static void kyber_resize_domain(struct kyber_queue_data *kqd,
|
||||
unsigned int sched_domain, unsigned int depth)
|
||||
{
|
||||
depth = clamp(depth, 1U, kyber_depth[sched_domain]);
|
||||
if (depth != orig_depth)
|
||||
if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
|
||||
sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
|
||||
trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain],
|
||||
depth);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the depth of other requests given the status of reads and synchronous
|
||||
* writes. As long as either domain is doing fine, we don't throttle, but if
|
||||
* both domains are doing badly, we throttle heavily.
|
||||
*/
|
||||
static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
|
||||
int read_status, int write_status,
|
||||
bool have_samples)
|
||||
static void kyber_timer_fn(struct timer_list *t)
|
||||
{
|
||||
unsigned int orig_depth, depth;
|
||||
int status;
|
||||
struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
|
||||
unsigned int sched_domain;
|
||||
int cpu;
|
||||
bool bad = false;
|
||||
|
||||
orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;
|
||||
/* Sum all of the per-cpu latency histograms. */
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kyber_cpu_latency *cpu_latency;
|
||||
|
||||
if (read_status == NONE && write_status == NONE) {
|
||||
depth += 2;
|
||||
} else if (have_samples) {
|
||||
if (read_status == NONE)
|
||||
status = write_status;
|
||||
else if (write_status == NONE)
|
||||
status = read_status;
|
||||
else
|
||||
status = max(read_status, write_status);
|
||||
switch (status) {
|
||||
case GREAT:
|
||||
depth += 2;
|
||||
break;
|
||||
case GOOD:
|
||||
depth++;
|
||||
break;
|
||||
case BAD:
|
||||
depth -= max(depth / 4, 1U);
|
||||
break;
|
||||
case AWFUL:
|
||||
depth /= 2;
|
||||
break;
|
||||
cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
|
||||
for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
|
||||
flush_latency_buckets(kqd, cpu_latency, sched_domain,
|
||||
KYBER_TOTAL_LATENCY);
|
||||
flush_latency_buckets(kqd, cpu_latency, sched_domain,
|
||||
KYBER_IO_LATENCY);
|
||||
}
|
||||
}
|
||||
|
||||
depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
|
||||
if (depth != orig_depth)
|
||||
sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
|
||||
}
|
||||
/*
|
||||
* Check if any domains have a high I/O latency, which might indicate
|
||||
* congestion in the device. Note that we use the p90; we don't want to
|
||||
* be too sensitive to outliers here.
|
||||
*/
|
||||
for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
|
||||
int p90;
|
||||
|
||||
/*
|
||||
* Apply heuristics for limiting queue depths based on gathered latency
|
||||
* statistics.
|
||||
*/
|
||||
static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
|
||||
{
|
||||
struct kyber_queue_data *kqd = cb->data;
|
||||
int read_status, write_status;
|
||||
|
||||
read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
|
||||
write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
|
||||
|
||||
kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
|
||||
kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
|
||||
kyber_adjust_other_depth(kqd, read_status, write_status,
|
||||
cb->stat[KYBER_OTHER].nr_samples != 0);
|
||||
p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
|
||||
90);
|
||||
if (p90 >= KYBER_GOOD_BUCKETS)
|
||||
bad = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Continue monitoring latencies if we aren't hitting the targets or
|
||||
* we're still throttling other requests.
|
||||
* Adjust the scheduling domain depths. If we determined that there was
|
||||
* congestion, we throttle all domains with good latencies. Either way,
|
||||
* we ease up on throttling domains with bad latencies.
|
||||
*/
|
||||
if (!blk_stat_is_active(kqd->cb) &&
|
||||
((IS_BAD(read_status) || IS_BAD(write_status) ||
|
||||
kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
|
||||
blk_stat_activate_msecs(kqd->cb, 100);
|
||||
for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
|
||||
unsigned int orig_depth, depth;
|
||||
int p99;
|
||||
|
||||
p99 = calculate_percentile(kqd, sched_domain,
|
||||
KYBER_TOTAL_LATENCY, 99);
|
||||
/*
|
||||
* This is kind of subtle: different domains will not
|
||||
* necessarily have enough samples to calculate the latency
|
||||
* percentiles during the same window, so we have to remember
|
||||
* the p99 for the next time we observe congestion; once we do,
|
||||
* we don't want to throttle again until we get more data, so we
|
||||
* reset it to -1.
|
||||
*/
|
||||
if (bad) {
|
||||
if (p99 < 0)
|
||||
p99 = kqd->domain_p99[sched_domain];
|
||||
kqd->domain_p99[sched_domain] = -1;
|
||||
} else if (p99 >= 0) {
|
||||
kqd->domain_p99[sched_domain] = p99;
|
||||
}
|
||||
if (p99 < 0)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If this domain has bad latency, throttle less. Otherwise,
|
||||
* throttle more iff we determined that there is congestion.
|
||||
*
|
||||
* The new depth is scaled linearly with the p99 latency vs the
|
||||
* latency target. E.g., if the p99 is 3/4 of the target, then
|
||||
* we throttle down to 3/4 of the current depth, and if the p99
|
||||
* is 2x the target, then we double the depth.
|
||||
*/
|
||||
if (bad || p99 >= KYBER_GOOD_BUCKETS) {
|
||||
orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
|
||||
depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
|
||||
kyber_resize_domain(kqd, sched_domain, depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
|
||||
static unsigned int kyber_sched_tags_shift(struct request_queue *q)
|
||||
{
|
||||
/*
|
||||
* All of the hardware queues have the same depth, so we can just grab
|
||||
* the shift of the first one.
|
||||
*/
|
||||
return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
|
||||
}
|
||||
|
||||
static int kyber_bucket_fn(const struct request *rq)
|
||||
{
|
||||
return kyber_sched_domain(rq->cmd_flags);
|
||||
return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
|
||||
}
|
||||
|
||||
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
|
||||
{
|
||||
struct kyber_queue_data *kqd;
|
||||
unsigned int max_tokens;
|
||||
unsigned int shift;
|
||||
int ret = -ENOMEM;
|
||||
int i;
|
||||
|
||||
kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
|
||||
kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
|
||||
if (!kqd)
|
||||
goto err;
|
||||
|
||||
kqd->q = q;
|
||||
|
||||
kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
|
||||
KYBER_NUM_DOMAINS, kqd);
|
||||
if (!kqd->cb)
|
||||
kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!kqd->cpu_latency)
|
||||
goto err_kqd;
|
||||
|
||||
/*
|
||||
* The maximum number of tokens for any scheduling domain is at least
|
||||
* the queue depth of a single hardware queue. If the hardware doesn't
|
||||
* have many tags, still provide a reasonable number.
|
||||
*/
|
||||
max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
|
||||
KYBER_MIN_DEPTH);
|
||||
timer_setup(&kqd->timer, kyber_timer_fn, 0);
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||
WARN_ON(!kyber_depth[i]);
|
||||
WARN_ON(!kyber_batch_size[i]);
|
||||
ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
|
||||
max_tokens, -1, false, GFP_KERNEL,
|
||||
q->node);
|
||||
kyber_depth[i], -1, false,
|
||||
GFP_KERNEL, q->node);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
sbitmap_queue_free(&kqd->domain_tokens[i]);
|
||||
goto err_cb;
|
||||
goto err_buckets;
|
||||
}
|
||||
sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
|
||||
}
|
||||
|
||||
shift = kyber_sched_tags_shift(kqd);
|
||||
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
|
||||
for (i = 0; i < KYBER_OTHER; i++) {
|
||||
kqd->domain_p99[i] = -1;
|
||||
kqd->latency_targets[i] = kyber_latency_targets[i];
|
||||
}
|
||||
|
||||
kqd->read_lat_nsec = 2000000ULL;
|
||||
kqd->write_lat_nsec = 10000000ULL;
|
||||
shift = kyber_sched_tags_shift(q);
|
||||
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
|
||||
|
||||
return kqd;
|
||||
|
||||
err_cb:
|
||||
blk_stat_free_callback(kqd->cb);
|
||||
err_buckets:
|
||||
free_percpu(kqd->cpu_latency);
|
||||
err_kqd:
|
||||
kfree(kqd);
|
||||
err:
|
||||
|
@ -372,25 +439,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
|
|||
return PTR_ERR(kqd);
|
||||
}
|
||||
|
||||
blk_stat_enable_accounting(q);
|
||||
|
||||
eq->elevator_data = kqd;
|
||||
q->elevator = eq;
|
||||
|
||||
blk_stat_add_callback(q, kqd->cb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kyber_exit_sched(struct elevator_queue *e)
|
||||
{
|
||||
struct kyber_queue_data *kqd = e->elevator_data;
|
||||
struct request_queue *q = kqd->q;
|
||||
int i;
|
||||
|
||||
blk_stat_remove_callback(q, kqd->cb);
|
||||
del_timer_sync(&kqd->timer);
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++)
|
||||
sbitmap_queue_free(&kqd->domain_tokens[i]);
|
||||
blk_stat_free_callback(kqd->cb);
|
||||
free_percpu(kqd->cpu_latency);
|
||||
kfree(kqd);
|
||||
}
|
||||
|
||||
|
@ -558,41 +624,44 @@ static void kyber_finish_request(struct request *rq)
|
|||
rq_clear_domain_token(kqd, rq);
|
||||
}
|
||||
|
||||
static void kyber_completed_request(struct request *rq)
|
||||
static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
|
||||
unsigned int sched_domain, unsigned int type,
|
||||
u64 target, u64 latency)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
struct kyber_queue_data *kqd = q->elevator->elevator_data;
|
||||
unsigned int sched_domain;
|
||||
u64 now, latency, target;
|
||||
unsigned int bucket;
|
||||
u64 divisor;
|
||||
|
||||
/*
|
||||
* Check if this request met our latency goal. If not, quickly gather
|
||||
* some statistics and start throttling.
|
||||
*/
|
||||
sched_domain = kyber_sched_domain(rq->cmd_flags);
|
||||
switch (sched_domain) {
|
||||
case KYBER_READ:
|
||||
target = kqd->read_lat_nsec;
|
||||
break;
|
||||
case KYBER_SYNC_WRITE:
|
||||
target = kqd->write_lat_nsec;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
if (latency > 0) {
|
||||
divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
|
||||
bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
|
||||
KYBER_LATENCY_BUCKETS - 1);
|
||||
} else {
|
||||
bucket = 0;
|
||||
}
|
||||
|
||||
/* If we are already monitoring latencies, don't check again. */
|
||||
if (blk_stat_is_active(kqd->cb))
|
||||
atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
|
||||
}
|
||||
|
||||
static void kyber_completed_request(struct request *rq, u64 now)
|
||||
{
|
||||
struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
|
||||
struct kyber_cpu_latency *cpu_latency;
|
||||
unsigned int sched_domain;
|
||||
u64 target;
|
||||
|
||||
sched_domain = kyber_sched_domain(rq->cmd_flags);
|
||||
if (sched_domain == KYBER_OTHER)
|
||||
return;
|
||||
|
||||
now = ktime_get_ns();
|
||||
if (now < rq->io_start_time_ns)
|
||||
return;
|
||||
cpu_latency = get_cpu_ptr(kqd->cpu_latency);
|
||||
target = kqd->latency_targets[sched_domain];
|
||||
add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
|
||||
target, now - rq->start_time_ns);
|
||||
add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
|
||||
now - rq->io_start_time_ns);
|
||||
put_cpu_ptr(kqd->cpu_latency);
|
||||
|
||||
latency = now - rq->io_start_time_ns;
|
||||
|
||||
if (latency > target)
|
||||
blk_stat_activate_msecs(kqd->cb, 10);
|
||||
timer_reduce(&kqd->timer, jiffies + HZ / 10);
|
||||
}
|
||||
|
||||
struct flush_kcq_data {
|
||||
|
@ -713,6 +782,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
|
|||
rq_set_domain_token(rq, nr);
|
||||
list_del_init(&rq->queuelist);
|
||||
return rq;
|
||||
} else {
|
||||
trace_kyber_throttled(kqd->q,
|
||||
kyber_domain_names[khd->cur_domain]);
|
||||
}
|
||||
} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
|
||||
nr = kyber_get_domain_token(kqd, khd, hctx);
|
||||
|
@ -723,6 +795,9 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
|
|||
rq_set_domain_token(rq, nr);
|
||||
list_del_init(&rq->queuelist);
|
||||
return rq;
|
||||
} else {
|
||||
trace_kyber_throttled(kqd->q,
|
||||
kyber_domain_names[khd->cur_domain]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -790,17 +865,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
#define KYBER_LAT_SHOW_STORE(op) \
|
||||
static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \
|
||||
char *page) \
|
||||
#define KYBER_LAT_SHOW_STORE(domain, name) \
|
||||
static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \
|
||||
char *page) \
|
||||
{ \
|
||||
struct kyber_queue_data *kqd = e->elevator_data; \
|
||||
\
|
||||
return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \
|
||||
return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
|
||||
} \
|
||||
\
|
||||
static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
|
||||
const char *page, size_t count) \
|
||||
static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \
|
||||
const char *page, size_t count) \
|
||||
{ \
|
||||
struct kyber_queue_data *kqd = e->elevator_data; \
|
||||
unsigned long long nsec; \
|
||||
|
@ -810,12 +885,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
|
|||
if (ret) \
|
||||
return ret; \
|
||||
\
|
||||
kqd->op##_lat_nsec = nsec; \
|
||||
kqd->latency_targets[domain] = nsec; \
|
||||
\
|
||||
return count; \
|
||||
}
|
||||
KYBER_LAT_SHOW_STORE(read);
|
||||
KYBER_LAT_SHOW_STORE(write);
|
||||
KYBER_LAT_SHOW_STORE(KYBER_READ, read);
|
||||
KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
|
||||
#undef KYBER_LAT_SHOW_STORE
|
||||
|
||||
#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
|
||||
|
@ -882,7 +957,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \
|
|||
return 0; \
|
||||
}
|
||||
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
|
||||
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
|
||||
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
|
||||
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
|
||||
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
|
||||
#undef KYBER_DEBUGFS_DOMAIN_ATTRS
|
||||
|
||||
|
@ -900,20 +976,7 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m)
|
|||
struct blk_mq_hw_ctx *hctx = data;
|
||||
struct kyber_hctx_data *khd = hctx->sched_data;
|
||||
|
||||
switch (khd->cur_domain) {
|
||||
case KYBER_READ:
|
||||
seq_puts(m, "READ\n");
|
||||
break;
|
||||
case KYBER_SYNC_WRITE:
|
||||
seq_puts(m, "SYNC_WRITE\n");
|
||||
break;
|
||||
case KYBER_OTHER:
|
||||
seq_puts(m, "OTHER\n");
|
||||
break;
|
||||
default:
|
||||
seq_printf(m, "%u\n", khd->cur_domain);
|
||||
break;
|
||||
}
|
||||
seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -930,7 +993,8 @@ static int kyber_batching_show(void *data, struct seq_file *m)
|
|||
{#name "_tokens", 0400, kyber_##name##_tokens_show}
|
||||
static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
|
||||
KYBER_QUEUE_DOMAIN_ATTRS(read),
|
||||
KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
|
||||
KYBER_QUEUE_DOMAIN_ATTRS(write),
|
||||
KYBER_QUEUE_DOMAIN_ATTRS(discard),
|
||||
KYBER_QUEUE_DOMAIN_ATTRS(other),
|
||||
{"async_depth", 0400, kyber_async_depth_show},
|
||||
{},
|
||||
|
@ -942,7 +1006,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
|
|||
{#name "_waiting", 0400, kyber_##name##_waiting_show}
|
||||
static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
|
||||
KYBER_HCTX_DOMAIN_ATTRS(read),
|
||||
KYBER_HCTX_DOMAIN_ATTRS(sync_write),
|
||||
KYBER_HCTX_DOMAIN_ATTRS(write),
|
||||
KYBER_HCTX_DOMAIN_ATTRS(discard),
|
||||
KYBER_HCTX_DOMAIN_ATTRS(other),
|
||||
{"cur_domain", 0400, kyber_cur_domain_show},
|
||||
{"batching", 0400, kyber_batching_show},
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -121,18 +121,6 @@ source "drivers/block/mtip32xx/Kconfig"
|
|||
|
||||
source "drivers/block/zram/Kconfig"
|
||||
|
||||
config BLK_DEV_DAC960
|
||||
tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
|
||||
depends on PCI
|
||||
help
|
||||
This driver adds support for the Mylex DAC960, AcceleRAID, and
|
||||
eXtremeRAID PCI RAID controllers. See the file
|
||||
<file:Documentation/blockdev/README.DAC960> for further information
|
||||
about this driver.
|
||||
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called DAC960.
|
||||
|
||||
config BLK_DEV_UMEM
|
||||
tristate "Micro Memory MM5415 Battery Backed RAM support"
|
||||
depends on PCI
|
||||
|
@ -461,7 +449,6 @@ config BLK_DEV_RBD
|
|||
select LIBCRC32C
|
||||
select CRYPTO_AES
|
||||
select CRYPTO
|
||||
default n
|
||||
help
|
||||
Say Y here if you want include the Rados block device, which stripes
|
||||
a block device over objects stored in the Ceph distributed object
|
||||
|
|
|
@ -16,7 +16,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
|
|||
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
|
||||
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
|
||||
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
|
||||
obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
|
||||
obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
|
||||
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
|
||||
obj-$(CONFIG_SUNVDC) += sunvdc.o
|
||||
|
|
|
@ -61,10 +61,8 @@
|
|||
#include <linux/delay.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/amifdreg.h>
|
||||
#include <linux/amifd.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/elevator.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
@ -86,6 +84,126 @@
|
|||
* Defines
|
||||
*/
|
||||
|
||||
/*
|
||||
* CIAAPRA bits (read only)
|
||||
*/
|
||||
|
||||
#define DSKRDY (0x1<<5) /* disk ready when low */
|
||||
#define DSKTRACK0 (0x1<<4) /* head at track zero when low */
|
||||
#define DSKPROT (0x1<<3) /* disk protected when low */
|
||||
#define DSKCHANGE (0x1<<2) /* low when disk removed */
|
||||
|
||||
/*
|
||||
* CIAAPRB bits (read/write)
|
||||
*/
|
||||
|
||||
#define DSKMOTOR (0x1<<7) /* motor on when low */
|
||||
#define DSKSEL3 (0x1<<6) /* select drive 3 when low */
|
||||
#define DSKSEL2 (0x1<<5) /* select drive 2 when low */
|
||||
#define DSKSEL1 (0x1<<4) /* select drive 1 when low */
|
||||
#define DSKSEL0 (0x1<<3) /* select drive 0 when low */
|
||||
#define DSKSIDE (0x1<<2) /* side selection: 0 = upper, 1 = lower */
|
||||
#define DSKDIREC (0x1<<1) /* step direction: 0=in, 1=out (to trk 0) */
|
||||
#define DSKSTEP (0x1) /* pulse low to step head 1 track */
|
||||
|
||||
/*
|
||||
* DSKBYTR bits (read only)
|
||||
*/
|
||||
|
||||
#define DSKBYT (1<<15) /* register contains valid byte when set */
|
||||
#define DMAON (1<<14) /* disk DMA enabled */
|
||||
#define DISKWRITE (1<<13) /* disk write bit in DSKLEN enabled */
|
||||
#define WORDEQUAL (1<<12) /* DSKSYNC register match when true */
|
||||
/* bits 7-0 are data */
|
||||
|
||||
/*
|
||||
* ADKCON/ADKCONR bits
|
||||
*/
|
||||
|
||||
#ifndef SETCLR
|
||||
#define ADK_SETCLR (1<<15) /* control bit */
|
||||
#endif
|
||||
#define ADK_PRECOMP1 (1<<14) /* precompensation selection */
|
||||
#define ADK_PRECOMP0 (1<<13) /* 00=none, 01=140ns, 10=280ns, 11=500ns */
|
||||
#define ADK_MFMPREC (1<<12) /* 0=GCR precomp., 1=MFM precomp. */
|
||||
#define ADK_WORDSYNC (1<<10) /* enable DSKSYNC auto DMA */
|
||||
#define ADK_MSBSYNC (1<<9) /* when 1, enable sync on MSbit (for GCR) */
|
||||
#define ADK_FAST (1<<8) /* bit cell: 0=2us (GCR), 1=1us (MFM) */
|
||||
|
||||
/*
|
||||
* DSKLEN bits
|
||||
*/
|
||||
|
||||
#define DSKLEN_DMAEN (1<<15)
|
||||
#define DSKLEN_WRITE (1<<14)
|
||||
|
||||
/*
|
||||
* INTENA/INTREQ bits
|
||||
*/
|
||||
|
||||
#define DSKINDEX (0x1<<4) /* DSKINDEX bit */
|
||||
|
||||
/*
|
||||
* Misc
|
||||
*/
|
||||
|
||||
#define MFM_SYNC 0x4489 /* standard MFM sync value */
|
||||
|
||||
/* Values for FD_COMMAND */
|
||||
#define FD_RECALIBRATE 0x07 /* move to track 0 */
|
||||
#define FD_SEEK 0x0F /* seek track */
|
||||
#define FD_READ 0xE6 /* read with MT, MFM, SKip deleted */
|
||||
#define FD_WRITE 0xC5 /* write with MT, MFM */
|
||||
#define FD_SENSEI 0x08 /* Sense Interrupt Status */
|
||||
#define FD_SPECIFY 0x03 /* specify HUT etc */
|
||||
#define FD_FORMAT 0x4D /* format one track */
|
||||
#define FD_VERSION 0x10 /* get version code */
|
||||
#define FD_CONFIGURE 0x13 /* configure FIFO operation */
|
||||
#define FD_PERPENDICULAR 0x12 /* perpendicular r/w mode */
|
||||
|
||||
#define FD_MAX_UNITS 4 /* Max. Number of drives */
|
||||
#define FLOPPY_MAX_SECTORS 22 /* Max. Number of sectors per track */
|
||||
|
||||
struct fd_data_type {
|
||||
char *name; /* description of data type */
|
||||
int sects; /* sectors per track */
|
||||
int (*read_fkt)(int); /* read whole track */
|
||||
void (*write_fkt)(int); /* write whole track */
|
||||
};
|
||||
|
||||
struct fd_drive_type {
|
||||
unsigned long code; /* code returned from drive */
|
||||
char *name; /* description of drive */
|
||||
unsigned int tracks; /* number of tracks */
|
||||
unsigned int heads; /* number of heads */
|
||||
unsigned int read_size; /* raw read size for one track */
|
||||
unsigned int write_size; /* raw write size for one track */
|
||||
unsigned int sect_mult; /* sectors and gap multiplier (HD = 2) */
|
||||
unsigned int precomp1; /* start track for precomp 1 */
|
||||
unsigned int precomp2; /* start track for precomp 2 */
|
||||
unsigned int step_delay; /* time (in ms) for delay after step */
|
||||
unsigned int settle_time; /* time to settle after dir change */
|
||||
unsigned int side_time; /* time needed to change sides */
|
||||
};
|
||||
|
||||
struct amiga_floppy_struct {
|
||||
struct fd_drive_type *type; /* type of floppy for this unit */
|
||||
struct fd_data_type *dtype; /* type of floppy for this unit */
|
||||
int track; /* current track (-1 == unknown) */
|
||||
unsigned char *trackbuf; /* current track (kmaloc()'d */
|
||||
|
||||
int blocks; /* total # blocks on disk */
|
||||
|
||||
int changed; /* true when not known */
|
||||
int disk; /* disk in drive (-1 == unknown) */
|
||||
int motor; /* true when motor is at speed */
|
||||
int busy; /* true when drive is active */
|
||||
int dirty; /* true when trackbuf is not on disk */
|
||||
int status; /* current error code for unit */
|
||||
struct gendisk *gendisk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
};
|
||||
|
||||
/*
|
||||
* Error codes
|
||||
*/
|
||||
|
@ -164,7 +282,6 @@ static volatile int selected = -1; /* currently selected drive */
|
|||
static int writepending;
|
||||
static int writefromint;
|
||||
static char *raw_buf;
|
||||
static int fdc_queue;
|
||||
|
||||
static DEFINE_SPINLOCK(amiflop_lock);
|
||||
|
||||
|
@ -1337,76 +1454,20 @@ static int get_track(int drive, int track)
|
|||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Round-robin between our available drives, doing one request from each
|
||||
*/
|
||||
static struct request *set_next_request(void)
|
||||
static blk_status_t amiflop_rw_cur_segment(struct amiga_floppy_struct *floppy,
|
||||
struct request *rq)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int cnt = FD_MAX_UNITS;
|
||||
struct request *rq = NULL;
|
||||
|
||||
/* Find next queue we can dispatch from */
|
||||
fdc_queue = fdc_queue + 1;
|
||||
if (fdc_queue == FD_MAX_UNITS)
|
||||
fdc_queue = 0;
|
||||
|
||||
for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
|
||||
|
||||
if (unit[fdc_queue].type->code == FD_NODRIVE) {
|
||||
if (++fdc_queue == FD_MAX_UNITS)
|
||||
fdc_queue = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
q = unit[fdc_queue].gendisk->queue;
|
||||
if (q) {
|
||||
rq = blk_fetch_request(q);
|
||||
if (rq)
|
||||
break;
|
||||
}
|
||||
|
||||
if (++fdc_queue == FD_MAX_UNITS)
|
||||
fdc_queue = 0;
|
||||
}
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
static void redo_fd_request(void)
|
||||
{
|
||||
struct request *rq;
|
||||
int drive = floppy - unit;
|
||||
unsigned int cnt, block, track, sector;
|
||||
int drive;
|
||||
struct amiga_floppy_struct *floppy;
|
||||
char *data;
|
||||
unsigned long flags;
|
||||
blk_status_t err;
|
||||
|
||||
next_req:
|
||||
rq = set_next_request();
|
||||
if (!rq) {
|
||||
/* Nothing left to do */
|
||||
return;
|
||||
}
|
||||
|
||||
floppy = rq->rq_disk->private_data;
|
||||
drive = floppy - unit;
|
||||
|
||||
next_segment:
|
||||
/* Here someone could investigate to be more efficient */
|
||||
for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
|
||||
for (cnt = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
|
||||
#ifdef DEBUG
|
||||
printk("fd: sector %ld + %d requested for %s\n",
|
||||
blk_rq_pos(rq), cnt,
|
||||
(rq_data_dir(rq) == READ) ? "read" : "write");
|
||||
#endif
|
||||
block = blk_rq_pos(rq) + cnt;
|
||||
if ((int)block > floppy->blocks) {
|
||||
err = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
track = block / (floppy->dtype->sects * floppy->type->sect_mult);
|
||||
sector = block % (floppy->dtype->sects * floppy->type->sect_mult);
|
||||
data = bio_data(rq->bio) + 512 * cnt;
|
||||
|
@ -1415,10 +1476,8 @@ next_segment:
|
|||
"0x%08lx\n", track, sector, data);
|
||||
#endif
|
||||
|
||||
if (get_track(drive, track) == -1) {
|
||||
err = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
if (get_track(drive, track) == -1)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (rq_data_dir(rq) == READ) {
|
||||
memcpy(data, floppy->trackbuf + sector * 512, 512);
|
||||
|
@ -1426,31 +1485,40 @@ next_segment:
|
|||
memcpy(floppy->trackbuf + sector * 512, data, 512);
|
||||
|
||||
/* keep the drive spinning while writes are scheduled */
|
||||
if (!fd_motor_on(drive)) {
|
||||
err = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
if (!fd_motor_on(drive))
|
||||
return BLK_STS_IOERR;
|
||||
/*
|
||||
* setup a callback to write the track buffer
|
||||
* after a short (1 tick) delay.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
floppy->dirty = 1;
|
||||
/* reset the timer */
|
||||
mod_timer (flush_track_timer + drive, jiffies + 1);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (__blk_end_request_cur(rq, err))
|
||||
goto next_segment;
|
||||
goto next_req;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void do_fd_request(struct request_queue * q)
|
||||
static blk_status_t amiflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
redo_fd_request();
|
||||
struct request *rq = bd->rq;
|
||||
struct amiga_floppy_struct *floppy = rq->rq_disk->private_data;
|
||||
blk_status_t err;
|
||||
|
||||
if (!spin_trylock_irq(&amiflop_lock))
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
|
||||
blk_mq_start_request(rq);
|
||||
|
||||
do {
|
||||
err = amiflop_rw_cur_segment(floppy, rq);
|
||||
} while (blk_update_request(rq, err, blk_rq_cur_bytes(rq)));
|
||||
blk_mq_end_request(rq, err);
|
||||
|
||||
spin_unlock_irq(&amiflop_lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
|
@ -1701,11 +1769,47 @@ static const struct block_device_operations floppy_fops = {
|
|||
.check_events = amiga_check_events,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops amiflop_mq_ops = {
|
||||
.queue_rq = amiflop_queue_rq,
|
||||
};
|
||||
|
||||
static struct gendisk *fd_alloc_disk(int drive)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
goto out;
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops,
|
||||
2, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
disk->queue = NULL;
|
||||
goto out_put_disk;
|
||||
}
|
||||
|
||||
unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL);
|
||||
if (!unit[drive].trackbuf)
|
||||
goto out_cleanup_queue;
|
||||
|
||||
return disk;
|
||||
|
||||
out_cleanup_queue:
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
blk_mq_free_tag_set(&unit[drive].tag_set);
|
||||
out_put_disk:
|
||||
put_disk(disk);
|
||||
out:
|
||||
unit[drive].type->code = FD_NODRIVE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int __init fd_probe_drives(void)
|
||||
{
|
||||
int drive,drives,nomem;
|
||||
|
||||
printk(KERN_INFO "FD: probing units\nfound ");
|
||||
pr_info("FD: probing units\nfound");
|
||||
drives=0;
|
||||
nomem=0;
|
||||
for(drive=0;drive<FD_MAX_UNITS;drive++) {
|
||||
|
@ -1713,27 +1817,17 @@ static int __init fd_probe_drives(void)
|
|||
fd_probe(drive);
|
||||
if (unit[drive].type->code == FD_NODRIVE)
|
||||
continue;
|
||||
disk = alloc_disk(1);
|
||||
|
||||
disk = fd_alloc_disk(drive);
|
||||
if (!disk) {
|
||||
unit[drive].type->code = FD_NODRIVE;
|
||||
pr_cont(" no mem for fd%d", drive);
|
||||
nomem = 1;
|
||||
continue;
|
||||
}
|
||||
unit[drive].gendisk = disk;
|
||||
|
||||
disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
|
||||
if (!disk->queue) {
|
||||
unit[drive].type->code = FD_NODRIVE;
|
||||
continue;
|
||||
}
|
||||
|
||||
drives++;
|
||||
if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
|
||||
printk("no mem for ");
|
||||
unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */
|
||||
drives--;
|
||||
nomem = 1;
|
||||
}
|
||||
printk("fd%d ",drive);
|
||||
|
||||
pr_cont(" fd%d",drive);
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = drive;
|
||||
disk->fops = &floppy_fops;
|
||||
|
@ -1744,11 +1838,11 @@ static int __init fd_probe_drives(void)
|
|||
}
|
||||
if ((drives > 0) || (nomem == 0)) {
|
||||
if (drives == 0)
|
||||
printk("no drives");
|
||||
printk("\n");
|
||||
pr_cont(" no drives");
|
||||
pr_cont("\n");
|
||||
return drives;
|
||||
}
|
||||
printk("\n");
|
||||
pr_cont("\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -1831,30 +1925,6 @@ out_blkdev:
|
|||
return ret;
|
||||
}
|
||||
|
||||
#if 0 /* not safe to unload */
|
||||
static int __exit amiga_floppy_remove(struct platform_device *pdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < FD_MAX_UNITS; i++) {
|
||||
if (unit[i].type->code != FD_NODRIVE) {
|
||||
struct request_queue *q = unit[i].gendisk->queue;
|
||||
del_gendisk(unit[i].gendisk);
|
||||
put_disk(unit[i].gendisk);
|
||||
kfree(unit[i].trackbuf);
|
||||
if (q)
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
}
|
||||
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
|
||||
free_irq(IRQ_AMIGA_CIAA_TB, NULL);
|
||||
free_irq(IRQ_AMIGA_DSKBLK, NULL);
|
||||
custom.dmacon = DMAF_DISK; /* disable DMA */
|
||||
amiga_chip_free(raw_buf);
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct platform_driver amiga_floppy_driver = {
|
||||
.driver = {
|
||||
.name = "amiga-floppy",
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#define VERSION "85"
|
||||
#define AOE_MAJOR 152
|
||||
#define DEVICE_NAME "aoe"
|
||||
|
@ -164,6 +166,8 @@ struct aoedev {
|
|||
struct gendisk *gd;
|
||||
struct dentry *debugfs;
|
||||
struct request_queue *blkq;
|
||||
struct list_head rq_list;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct hd_geometry geo;
|
||||
sector_t ssize;
|
||||
struct timer_list timer;
|
||||
|
@ -201,7 +205,6 @@ int aoeblk_init(void);
|
|||
void aoeblk_exit(void);
|
||||
void aoeblk_gdalloc(void *);
|
||||
void aoedisk_rm_debugfs(struct aoedev *d);
|
||||
void aoedisk_rm_sysfs(struct aoedev *d);
|
||||
|
||||
int aoechr_init(void);
|
||||
void aoechr_exit(void);
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/ioctl.h>
|
||||
|
@ -177,10 +177,15 @@ static struct attribute *aoe_attrs[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group attr_group = {
|
||||
static const struct attribute_group aoe_attr_group = {
|
||||
.attrs = aoe_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *aoe_attr_groups[] = {
|
||||
&aoe_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct file_operations aoe_debugfs_fops = {
|
||||
.open = aoe_debugfs_open,
|
||||
.read = seq_read,
|
||||
|
@ -219,17 +224,6 @@ aoedisk_rm_debugfs(struct aoedev *d)
|
|||
d->debugfs = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
aoedisk_add_sysfs(struct aoedev *d)
|
||||
{
|
||||
return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
|
||||
}
|
||||
void
|
||||
aoedisk_rm_sysfs(struct aoedev *d)
|
||||
{
|
||||
sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
|
||||
}
|
||||
|
||||
static int
|
||||
aoeblk_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
|
@ -274,23 +268,25 @@ aoeblk_release(struct gendisk *disk, fmode_t mode)
|
|||
spin_unlock_irqrestore(&d->lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
aoeblk_request(struct request_queue *q)
|
||||
static blk_status_t aoeblk_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct aoedev *d;
|
||||
struct request *rq;
|
||||
struct aoedev *d = hctx->queue->queuedata;
|
||||
|
||||
spin_lock_irq(&d->lock);
|
||||
|
||||
d = q->queuedata;
|
||||
if ((d->flags & DEVFL_UP) == 0) {
|
||||
pr_info_ratelimited("aoe: device %ld.%d is not up\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
while ((rq = blk_peek_request(q))) {
|
||||
blk_start_request(rq);
|
||||
aoe_end_request(d, rq, 1);
|
||||
}
|
||||
return;
|
||||
spin_unlock_irq(&d->lock);
|
||||
blk_mq_start_request(bd->rq);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
list_add_tail(&bd->rq->queuelist, &d->rq_list);
|
||||
aoecmd_work(d);
|
||||
spin_unlock_irq(&d->lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -345,6 +341,10 @@ static const struct block_device_operations aoe_bdops = {
|
|||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops aoeblk_mq_ops = {
|
||||
.queue_rq = aoeblk_queue_rq,
|
||||
};
|
||||
|
||||
/* alloc_disk and add_disk can sleep */
|
||||
void
|
||||
aoeblk_gdalloc(void *vp)
|
||||
|
@ -353,9 +353,11 @@ aoeblk_gdalloc(void *vp)
|
|||
struct gendisk *gd;
|
||||
mempool_t *mp;
|
||||
struct request_queue *q;
|
||||
struct blk_mq_tag_set *set;
|
||||
enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
|
||||
ulong flags;
|
||||
int late = 0;
|
||||
int err;
|
||||
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
if (d->flags & DEVFL_GDALLOC
|
||||
|
@ -382,10 +384,25 @@ aoeblk_gdalloc(void *vp)
|
|||
d->aoemajor, d->aoeminor);
|
||||
goto err_disk;
|
||||
}
|
||||
q = blk_init_queue(aoeblk_request, &d->lock);
|
||||
if (q == NULL) {
|
||||
|
||||
set = &d->tag_set;
|
||||
set->ops = &aoeblk_mq_ops;
|
||||
set->nr_hw_queues = 1;
|
||||
set->queue_depth = 128;
|
||||
set->numa_node = NUMA_NO_NODE;
|
||||
set->flags = BLK_MQ_F_SHOULD_MERGE;
|
||||
err = blk_mq_alloc_tag_set(set);
|
||||
if (err) {
|
||||
pr_err("aoe: cannot allocate tag set for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
goto err_mempool;
|
||||
}
|
||||
|
||||
q = blk_mq_init_queue(set);
|
||||
if (IS_ERR(q)) {
|
||||
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
|
||||
d->aoemajor, d->aoeminor);
|
||||
blk_mq_free_tag_set(set);
|
||||
goto err_mempool;
|
||||
}
|
||||
|
||||
|
@ -417,8 +434,7 @@ aoeblk_gdalloc(void *vp)
|
|||
|
||||
spin_unlock_irqrestore(&d->lock, flags);
|
||||
|
||||
add_disk(gd);
|
||||
aoedisk_add_sysfs(d);
|
||||
device_add_disk(NULL, gd, aoe_attr_groups);
|
||||
aoedisk_add_debugfs(d);
|
||||
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <linux/ata.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/genhd.h>
|
||||
|
@ -813,7 +813,7 @@ rexmit_timer(struct timer_list *timer)
|
|||
out:
|
||||
if ((d->flags & DEVFL_KICKME) && d->blkq) {
|
||||
d->flags &= ~DEVFL_KICKME;
|
||||
d->blkq->request_fn(d->blkq);
|
||||
blk_mq_run_hw_queues(d->blkq, true);
|
||||
}
|
||||
|
||||
d->timer.expires = jiffies + TIMERTICK;
|
||||
|
@ -857,10 +857,12 @@ nextbuf(struct aoedev *d)
|
|||
return d->ip.buf;
|
||||
rq = d->ip.rq;
|
||||
if (rq == NULL) {
|
||||
rq = blk_peek_request(q);
|
||||
rq = list_first_entry_or_null(&d->rq_list, struct request,
|
||||
queuelist);
|
||||
if (rq == NULL)
|
||||
return NULL;
|
||||
blk_start_request(rq);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_start_request(rq);
|
||||
d->ip.rq = rq;
|
||||
d->ip.nxbio = rq->bio;
|
||||
rq->special = (void *) rqbiocnt(rq);
|
||||
|
@ -1045,6 +1047,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
|
|||
struct bio *bio;
|
||||
int bok;
|
||||
struct request_queue *q;
|
||||
blk_status_t err = BLK_STS_OK;
|
||||
|
||||
q = d->blkq;
|
||||
if (rq == d->ip.rq)
|
||||
|
@ -1052,11 +1055,15 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
|
|||
do {
|
||||
bio = rq->bio;
|
||||
bok = !fastfail && !bio->bi_status;
|
||||
} while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
|
||||
if (!bok)
|
||||
err = BLK_STS_IOERR;
|
||||
} while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
|
||||
|
||||
__blk_mq_end_request(rq, err);
|
||||
|
||||
/* cf. http://lkml.org/lkml/2006/10/31/28 */
|
||||
if (!fastfail)
|
||||
__blk_run_queue(q);
|
||||
blk_mq_run_hw_queues(q, true);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -197,7 +197,6 @@ aoedev_downdev(struct aoedev *d)
|
|||
{
|
||||
struct aoetgt *t, **tt, **te;
|
||||
struct list_head *head, *pos, *nx;
|
||||
struct request *rq;
|
||||
int i;
|
||||
|
||||
d->flags &= ~DEVFL_UP;
|
||||
|
@ -225,10 +224,11 @@ aoedev_downdev(struct aoedev *d)
|
|||
|
||||
/* fast fail all pending I/O */
|
||||
if (d->blkq) {
|
||||
while ((rq = blk_peek_request(d->blkq))) {
|
||||
blk_start_request(rq);
|
||||
aoe_end_request(d, rq, 1);
|
||||
}
|
||||
/* UP is cleared, freeze+quiesce to insure all are errored */
|
||||
blk_mq_freeze_queue(d->blkq);
|
||||
blk_mq_quiesce_queue(d->blkq);
|
||||
blk_mq_unquiesce_queue(d->blkq);
|
||||
blk_mq_unfreeze_queue(d->blkq);
|
||||
}
|
||||
|
||||
if (d->gd)
|
||||
|
@ -275,9 +275,9 @@ freedev(struct aoedev *d)
|
|||
del_timer_sync(&d->timer);
|
||||
if (d->gd) {
|
||||
aoedisk_rm_debugfs(d);
|
||||
aoedisk_rm_sysfs(d);
|
||||
del_gendisk(d->gd);
|
||||
put_disk(d->gd);
|
||||
blk_mq_free_tag_set(&d->tag_set);
|
||||
blk_cleanup_queue(d->blkq);
|
||||
}
|
||||
t = d->targets;
|
||||
|
@ -464,6 +464,7 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
|
|||
d->ntargets = NTARGETS;
|
||||
INIT_WORK(&d->work, aoecmd_sleepwork);
|
||||
spin_lock_init(&d->lock);
|
||||
INIT_LIST_HEAD(&d->rq_list);
|
||||
skb_queue_head_init(&d->skbpool);
|
||||
timer_setup(&d->timer, dummy_timer, 0);
|
||||
d->timer.expires = jiffies + HZ;
|
||||
|
|
|
@ -66,13 +66,11 @@
|
|||
#include <linux/fd.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#include <asm/atafd.h>
|
||||
#include <asm/atafdreg.h>
|
||||
#include <asm/atariints.h>
|
||||
#include <asm/atari_stdma.h>
|
||||
#include <asm/atari_stram.h>
|
||||
|
@ -83,7 +81,87 @@
|
|||
|
||||
static DEFINE_MUTEX(ataflop_mutex);
|
||||
static struct request *fd_request;
|
||||
static int fdc_queue;
|
||||
|
||||
/*
|
||||
* WD1772 stuff
|
||||
*/
|
||||
|
||||
/* register codes */
|
||||
|
||||
#define FDCSELREG_STP (0x80) /* command/status register */
|
||||
#define FDCSELREG_TRA (0x82) /* track register */
|
||||
#define FDCSELREG_SEC (0x84) /* sector register */
|
||||
#define FDCSELREG_DTA (0x86) /* data register */
|
||||
|
||||
/* register names for FDC_READ/WRITE macros */
|
||||
|
||||
#define FDCREG_CMD 0
|
||||
#define FDCREG_STATUS 0
|
||||
#define FDCREG_TRACK 2
|
||||
#define FDCREG_SECTOR 4
|
||||
#define FDCREG_DATA 6
|
||||
|
||||
/* command opcodes */
|
||||
|
||||
#define FDCCMD_RESTORE (0x00) /* - */
|
||||
#define FDCCMD_SEEK (0x10) /* | */
|
||||
#define FDCCMD_STEP (0x20) /* | TYP 1 Commands */
|
||||
#define FDCCMD_STIN (0x40) /* | */
|
||||
#define FDCCMD_STOT (0x60) /* - */
|
||||
#define FDCCMD_RDSEC (0x80) /* - TYP 2 Commands */
|
||||
#define FDCCMD_WRSEC (0xa0) /* - " */
|
||||
#define FDCCMD_RDADR (0xc0) /* - */
|
||||
#define FDCCMD_RDTRA (0xe0) /* | TYP 3 Commands */
|
||||
#define FDCCMD_WRTRA (0xf0) /* - */
|
||||
#define FDCCMD_FORCI (0xd0) /* - TYP 4 Command */
|
||||
|
||||
/* command modifier bits */
|
||||
|
||||
#define FDCCMDADD_SR6 (0x00) /* step rate settings */
|
||||
#define FDCCMDADD_SR12 (0x01)
|
||||
#define FDCCMDADD_SR2 (0x02)
|
||||
#define FDCCMDADD_SR3 (0x03)
|
||||
#define FDCCMDADD_V (0x04) /* verify */
|
||||
#define FDCCMDADD_H (0x08) /* wait for spin-up */
|
||||
#define FDCCMDADD_U (0x10) /* update track register */
|
||||
#define FDCCMDADD_M (0x10) /* multiple sector access */
|
||||
#define FDCCMDADD_E (0x04) /* head settling flag */
|
||||
#define FDCCMDADD_P (0x02) /* precompensation off */
|
||||
#define FDCCMDADD_A0 (0x01) /* DAM flag */
|
||||
|
||||
/* status register bits */
|
||||
|
||||
#define FDCSTAT_MOTORON (0x80) /* motor on */
|
||||
#define FDCSTAT_WPROT (0x40) /* write protected (FDCCMD_WR*) */
|
||||
#define FDCSTAT_SPINUP (0x20) /* motor speed stable (Type I) */
|
||||
#define FDCSTAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
|
||||
#define FDCSTAT_RECNF (0x10) /* record not found */
|
||||
#define FDCSTAT_CRC (0x08) /* CRC error */
|
||||
#define FDCSTAT_TR00 (0x04) /* Track 00 flag (Type I) */
|
||||
#define FDCSTAT_LOST (0x04) /* Lost Data (Type II+III) */
|
||||
#define FDCSTAT_IDX (0x02) /* Index status (Type I) */
|
||||
#define FDCSTAT_DRQ (0x02) /* DRQ status (Type II+III) */
|
||||
#define FDCSTAT_BUSY (0x01) /* FDC is busy */
|
||||
|
||||
|
||||
/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
|
||||
#define DSKSIDE (0x01)
|
||||
|
||||
#define DSKDRVNONE (0x06)
|
||||
#define DSKDRV0 (0x02)
|
||||
#define DSKDRV1 (0x04)
|
||||
|
||||
/* step rates */
|
||||
#define FDCSTEP_6 0x00
|
||||
#define FDCSTEP_12 0x01
|
||||
#define FDCSTEP_2 0x02
|
||||
#define FDCSTEP_3 0x03
|
||||
|
||||
struct atari_format_descr {
|
||||
int track; /* to be formatted */
|
||||
int head; /* "" "" */
|
||||
int sect_offset; /* offset of first sector */
|
||||
};
|
||||
|
||||
/* Disk types: DD, HD, ED */
|
||||
static struct atari_disk_type {
|
||||
|
@ -221,6 +299,7 @@ static struct atari_floppy_struct {
|
|||
struct gendisk *disk;
|
||||
int ref;
|
||||
int type;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
} unit[FD_MAX_UNITS];
|
||||
|
||||
#define UD unit[drive]
|
||||
|
@ -300,9 +379,6 @@ static int IsFormatting = 0, FormatError;
|
|||
static int UserSteprate[FD_MAX_UNITS] = { -1, -1 };
|
||||
module_param_array(UserSteprate, int, NULL, 0);
|
||||
|
||||
/* Synchronization of FDC access. */
|
||||
static volatile int fdc_busy = 0;
|
||||
static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
|
||||
static DECLARE_COMPLETION(format_wait);
|
||||
|
||||
static unsigned long changed_floppies = 0xff, fake_change = 0;
|
||||
|
@ -362,7 +438,6 @@ static void fd_times_out(struct timer_list *unused);
|
|||
static void finish_fdc( void );
|
||||
static void finish_fdc_done( int dummy );
|
||||
static void setup_req_params( int drive );
|
||||
static void redo_fd_request( void);
|
||||
static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
|
||||
cmd, unsigned long param);
|
||||
static void fd_probe( int drive );
|
||||
|
@ -380,8 +455,11 @@ static DEFINE_TIMER(fd_timer, check_change);
|
|||
|
||||
static void fd_end_request_cur(blk_status_t err)
|
||||
{
|
||||
if (!__blk_end_request_cur(fd_request, err))
|
||||
if (!blk_update_request(fd_request, err,
|
||||
blk_rq_cur_bytes(fd_request))) {
|
||||
__blk_mq_end_request(fd_request, err);
|
||||
fd_request = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void start_motor_off_timer(void)
|
||||
|
@ -627,7 +705,6 @@ static void fd_error( void )
|
|||
if (SelectedDrive != -1)
|
||||
SUD.track = -1;
|
||||
}
|
||||
redo_fd_request();
|
||||
}
|
||||
|
||||
|
||||
|
@ -645,14 +722,15 @@ static void fd_error( void )
|
|||
|
||||
static int do_format(int drive, int type, struct atari_format_descr *desc)
|
||||
{
|
||||
struct request_queue *q = unit[drive].disk->queue;
|
||||
unsigned char *p;
|
||||
int sect, nsect;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n",
|
||||
drive, desc->track, desc->head, desc->sect_offset ));
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0);
|
||||
local_irq_save(flags);
|
||||
stdma_lock(floppy_irq, NULL);
|
||||
atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */
|
||||
|
@ -661,16 +739,16 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
|
|||
if (type) {
|
||||
if (--type >= NUM_DISK_MINORS ||
|
||||
minor2disktype[type].drive_types > DriveType) {
|
||||
redo_fd_request();
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
type = minor2disktype[type].index;
|
||||
UDT = &atari_disk_type[type];
|
||||
}
|
||||
|
||||
if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) {
|
||||
redo_fd_request();
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nsect = UDT->spt;
|
||||
|
@ -709,8 +787,11 @@ static int do_format(int drive, int type, struct atari_format_descr *desc)
|
|||
|
||||
wait_for_completion(&format_wait);
|
||||
|
||||
redo_fd_request();
|
||||
return( FormatError ? -EIO : 0 );
|
||||
ret = FormatError ? -EIO : 0;
|
||||
out:
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
@ -740,7 +821,6 @@ static void do_fd_action( int drive )
|
|||
else {
|
||||
/* all sectors finished */
|
||||
fd_end_request_cur(BLK_STS_OK);
|
||||
redo_fd_request();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -1145,7 +1225,6 @@ static void fd_rwsec_done1(int status)
|
|||
else {
|
||||
/* all sectors finished */
|
||||
fd_end_request_cur(BLK_STS_OK);
|
||||
redo_fd_request();
|
||||
}
|
||||
return;
|
||||
|
||||
|
@ -1303,8 +1382,6 @@ static void finish_fdc_done( int dummy )
|
|||
|
||||
local_irq_save(flags);
|
||||
stdma_release();
|
||||
fdc_busy = 0;
|
||||
wake_up( &fdc_wait );
|
||||
local_irq_restore(flags);
|
||||
|
||||
DPRINT(("finish_fdc() finished\n"));
|
||||
|
@ -1394,59 +1471,34 @@ static void setup_req_params( int drive )
|
|||
ReqTrack, ReqSector, (unsigned long)ReqData ));
|
||||
}
|
||||
|
||||
/*
|
||||
* Round-robin between our available drives, doing one request from each
|
||||
*/
|
||||
static struct request *set_next_request(void)
|
||||
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int old_pos = fdc_queue;
|
||||
struct request *rq = NULL;
|
||||
struct atari_floppy_struct *floppy = bd->rq->rq_disk->private_data;
|
||||
int drive = floppy - unit;
|
||||
int type = floppy->type;
|
||||
|
||||
do {
|
||||
q = unit[fdc_queue].disk->queue;
|
||||
if (++fdc_queue == FD_MAX_UNITS)
|
||||
fdc_queue = 0;
|
||||
if (q) {
|
||||
rq = blk_fetch_request(q);
|
||||
if (rq) {
|
||||
rq->error_count = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (fdc_queue != old_pos);
|
||||
spin_lock_irq(&ataflop_lock);
|
||||
if (fd_request) {
|
||||
spin_unlock_irq(&ataflop_lock);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
if (!stdma_try_lock(floppy_irq, NULL)) {
|
||||
spin_unlock_irq(&ataflop_lock);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
fd_request = bd->rq;
|
||||
blk_mq_start_request(fd_request);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
|
||||
static void redo_fd_request(void)
|
||||
{
|
||||
int drive, type;
|
||||
struct atari_floppy_struct *floppy;
|
||||
|
||||
DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n",
|
||||
fd_request, fd_request ? fd_request->rq_disk->disk_name : "",
|
||||
fd_request ? blk_rq_pos(fd_request) : 0 ));
|
||||
atari_disable_irq( IRQ_MFP_FDC );
|
||||
|
||||
IsFormatting = 0;
|
||||
|
||||
repeat:
|
||||
if (!fd_request) {
|
||||
fd_request = set_next_request();
|
||||
if (!fd_request)
|
||||
goto the_end;
|
||||
}
|
||||
|
||||
floppy = fd_request->rq_disk->private_data;
|
||||
drive = floppy - unit;
|
||||
type = floppy->type;
|
||||
|
||||
if (!UD.connected) {
|
||||
/* drive not connected */
|
||||
printk(KERN_ERR "Unknown Device: fd%d\n", drive );
|
||||
fd_end_request_cur(BLK_STS_IOERR);
|
||||
goto repeat;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (type == 0) {
|
||||
|
@ -1462,23 +1514,18 @@ repeat:
|
|||
if (--type >= NUM_DISK_MINORS) {
|
||||
printk(KERN_WARNING "fd%d: invalid disk format", drive );
|
||||
fd_end_request_cur(BLK_STS_IOERR);
|
||||
goto repeat;
|
||||
goto out;
|
||||
}
|
||||
if (minor2disktype[type].drive_types > DriveType) {
|
||||
printk(KERN_WARNING "fd%d: unsupported disk format", drive );
|
||||
fd_end_request_cur(BLK_STS_IOERR);
|
||||
goto repeat;
|
||||
goto out;
|
||||
}
|
||||
type = minor2disktype[type].index;
|
||||
UDT = &atari_disk_type[type];
|
||||
set_capacity(floppy->disk, UDT->blocks);
|
||||
UD.autoprobe = 0;
|
||||
}
|
||||
|
||||
if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
|
||||
fd_end_request_cur(BLK_STS_IOERR);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* stop deselect timer */
|
||||
del_timer( &motor_off_timer );
|
||||
|
@ -1490,22 +1537,13 @@ repeat:
|
|||
setup_req_params( drive );
|
||||
do_fd_action( drive );
|
||||
|
||||
return;
|
||||
|
||||
the_end:
|
||||
finish_fdc();
|
||||
}
|
||||
|
||||
|
||||
void do_fd_request(struct request_queue * q)
|
||||
{
|
||||
DPRINT(("do_fd_request for pid %d\n",current->pid));
|
||||
wait_event(fdc_wait, cmpxchg(&fdc_busy, 0, 1) == 0);
|
||||
stdma_lock(floppy_irq, NULL);
|
||||
|
||||
atari_disable_irq( IRQ_MFP_FDC );
|
||||
redo_fd_request();
|
||||
if (bd->last)
|
||||
finish_fdc();
|
||||
atari_enable_irq( IRQ_MFP_FDC );
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&ataflop_lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
|
@ -1583,7 +1621,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
/* what if type > 0 here? Overwrite specified entry ? */
|
||||
if (type) {
|
||||
/* refuse to re-set a predefined type for now */
|
||||
redo_fd_request();
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -1651,10 +1688,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
|
||||
/* sanity check */
|
||||
if (setprm.track != dtp->blocks/dtp->spt/2 ||
|
||||
setprm.head != 2) {
|
||||
redo_fd_request();
|
||||
setprm.head != 2)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
UDT = dtp;
|
||||
set_capacity(floppy->disk, UDT->blocks);
|
||||
|
@ -1910,6 +1945,10 @@ static const struct block_device_operations floppy_fops = {
|
|||
.revalidate_disk= floppy_revalidate,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops ataflop_mq_ops = {
|
||||
.queue_rq = ataflop_queue_rq,
|
||||
};
|
||||
|
||||
static struct kobject *floppy_find(dev_t dev, int *part, void *data)
|
||||
{
|
||||
int drive = *part & 3;
|
||||
|
@ -1923,6 +1962,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
|
|||
static int __init atari_floppy_init (void)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (!MACH_IS_ATARI)
|
||||
/* Amiga, Mac, ... don't have Atari-compatible floppy :-) */
|
||||
|
@ -1933,8 +1973,19 @@ static int __init atari_floppy_init (void)
|
|||
|
||||
for (i = 0; i < FD_MAX_UNITS; i++) {
|
||||
unit[i].disk = alloc_disk(1);
|
||||
if (!unit[i].disk)
|
||||
goto Enomem;
|
||||
if (!unit[i].disk) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set,
|
||||
&ataflop_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(unit[i].disk->queue)) {
|
||||
ret = PTR_ERR(unit[i].disk->queue);
|
||||
unit[i].disk->queue = NULL;
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (UseTrackbuffer < 0)
|
||||
|
@ -1951,7 +2002,8 @@ static int __init atari_floppy_init (void)
|
|||
DMABuffer = atari_stram_alloc(BUFFER_SIZE+512, "ataflop");
|
||||
if (!DMABuffer) {
|
||||
printk(KERN_ERR "atari_floppy_init: cannot get dma buffer\n");
|
||||
goto Enomem;
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
TrackBuffer = DMABuffer + 512;
|
||||
PhysDMABuffer = atari_stram_to_phys(DMABuffer);
|
||||
|
@ -1966,10 +2018,6 @@ static int __init atari_floppy_init (void)
|
|||
sprintf(unit[i].disk->disk_name, "fd%d", i);
|
||||
unit[i].disk->fops = &floppy_fops;
|
||||
unit[i].disk->private_data = &unit[i];
|
||||
unit[i].disk->queue = blk_init_queue(do_fd_request,
|
||||
&ataflop_lock);
|
||||
if (!unit[i].disk->queue)
|
||||
goto Enomem;
|
||||
set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
|
||||
add_disk(unit[i].disk);
|
||||
}
|
||||
|
@ -1983,17 +2031,23 @@ static int __init atari_floppy_init (void)
|
|||
config_types();
|
||||
|
||||
return 0;
|
||||
Enomem:
|
||||
while (i--) {
|
||||
struct request_queue *q = unit[i].disk->queue;
|
||||
|
||||
put_disk(unit[i].disk);
|
||||
if (q)
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
err:
|
||||
do {
|
||||
struct gendisk *disk = unit[i].disk;
|
||||
|
||||
if (disk) {
|
||||
if (disk->queue) {
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
}
|
||||
blk_mq_free_tag_set(&unit[i].tag_set);
|
||||
put_disk(unit[i].disk);
|
||||
}
|
||||
} while (i--);
|
||||
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
return -ENOMEM;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef MODULE
|
||||
|
@ -2040,11 +2094,10 @@ static void __exit atari_floppy_exit(void)
|
|||
int i;
|
||||
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
|
||||
for (i = 0; i < FD_MAX_UNITS; i++) {
|
||||
struct request_queue *q = unit[i].disk->queue;
|
||||
|
||||
del_gendisk(unit[i].disk);
|
||||
blk_cleanup_queue(unit[i].disk->queue);
|
||||
blk_mq_free_tag_set(&unit[i].tag_set);
|
||||
put_disk(unit[i].disk);
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ config BLK_DEV_DRBD
|
|||
depends on PROC_FS && INET
|
||||
select LRU_CACHE
|
||||
select LIBCRC32C
|
||||
default n
|
||||
help
|
||||
|
||||
NOTE: In order to authenticate connections you have to select
|
||||
|
|
|
@ -429,7 +429,7 @@ enum {
|
|||
__EE_CALL_AL_COMPLETE_IO,
|
||||
__EE_MAY_SET_IN_SYNC,
|
||||
|
||||
/* is this a TRIM aka REQ_DISCARD? */
|
||||
/* is this a TRIM aka REQ_OP_DISCARD? */
|
||||
__EE_IS_TRIM,
|
||||
|
||||
/* In case a barrier failed,
|
||||
|
@ -724,10 +724,10 @@ struct drbd_connection {
|
|||
struct list_head transfer_log; /* all requests not yet fully processed */
|
||||
|
||||
struct crypto_shash *cram_hmac_tfm;
|
||||
struct crypto_ahash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */
|
||||
struct crypto_ahash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */
|
||||
struct crypto_ahash *csums_tfm;
|
||||
struct crypto_ahash *verify_tfm;
|
||||
struct crypto_shash *integrity_tfm; /* checksums we compute, updates protected by connection->data->mutex */
|
||||
struct crypto_shash *peer_integrity_tfm; /* checksums we verify, only accessed from receiver thread */
|
||||
struct crypto_shash *csums_tfm;
|
||||
struct crypto_shash *verify_tfm;
|
||||
void *int_dig_in;
|
||||
void *int_dig_vv;
|
||||
|
||||
|
@ -1531,8 +1531,9 @@ static inline void ov_out_of_sync_print(struct drbd_device *device)
|
|||
}
|
||||
|
||||
|
||||
extern void drbd_csum_bio(struct crypto_ahash *, struct bio *, void *);
|
||||
extern void drbd_csum_ee(struct crypto_ahash *, struct drbd_peer_request *, void *);
|
||||
extern void drbd_csum_bio(struct crypto_shash *, struct bio *, void *);
|
||||
extern void drbd_csum_ee(struct crypto_shash *, struct drbd_peer_request *,
|
||||
void *);
|
||||
/* worker callbacks */
|
||||
extern int w_e_end_data_req(struct drbd_work *, int);
|
||||
extern int w_e_end_rsdata_req(struct drbd_work *, int);
|
||||
|
|
|
@ -1377,7 +1377,7 @@ void drbd_send_ack_dp(struct drbd_peer_device *peer_device, enum drbd_packet cmd
|
|||
struct p_data *dp, int data_size)
|
||||
{
|
||||
if (peer_device->connection->peer_integrity_tfm)
|
||||
data_size -= crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
data_size -= crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
_drbd_send_ack(peer_device, cmd, dp->sector, cpu_to_be32(data_size),
|
||||
dp->block_id);
|
||||
}
|
||||
|
@ -1673,7 +1673,7 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
|
|||
return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
|
||||
}
|
||||
|
||||
/* Used to send write or TRIM aka REQ_DISCARD requests
|
||||
/* Used to send write or TRIM aka REQ_OP_DISCARD requests
|
||||
* R_PRIMARY -> Peer (P_DATA, P_TRIM)
|
||||
*/
|
||||
int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
|
||||
|
@ -1690,7 +1690,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
|
|||
sock = &peer_device->connection->data;
|
||||
p = drbd_prepare_command(peer_device, sock);
|
||||
digest_size = peer_device->connection->integrity_tfm ?
|
||||
crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0;
|
||||
crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
|
||||
|
||||
if (!p)
|
||||
return -EIO;
|
||||
|
@ -1796,7 +1796,7 @@ int drbd_send_block(struct drbd_peer_device *peer_device, enum drbd_packet cmd,
|
|||
p = drbd_prepare_command(peer_device, sock);
|
||||
|
||||
digest_size = peer_device->connection->integrity_tfm ?
|
||||
crypto_ahash_digestsize(peer_device->connection->integrity_tfm) : 0;
|
||||
crypto_shash_digestsize(peer_device->connection->integrity_tfm) : 0;
|
||||
|
||||
if (!p)
|
||||
return -EIO;
|
||||
|
@ -2557,11 +2557,11 @@ void conn_free_crypto(struct drbd_connection *connection)
|
|||
{
|
||||
drbd_free_sock(connection);
|
||||
|
||||
crypto_free_ahash(connection->csums_tfm);
|
||||
crypto_free_ahash(connection->verify_tfm);
|
||||
crypto_free_shash(connection->csums_tfm);
|
||||
crypto_free_shash(connection->verify_tfm);
|
||||
crypto_free_shash(connection->cram_hmac_tfm);
|
||||
crypto_free_ahash(connection->integrity_tfm);
|
||||
crypto_free_ahash(connection->peer_integrity_tfm);
|
||||
crypto_free_shash(connection->integrity_tfm);
|
||||
crypto_free_shash(connection->peer_integrity_tfm);
|
||||
kfree(connection->int_dig_in);
|
||||
kfree(connection->int_dig_vv);
|
||||
|
||||
|
|
|
@ -2303,10 +2303,10 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c
|
|||
}
|
||||
|
||||
struct crypto {
|
||||
struct crypto_ahash *verify_tfm;
|
||||
struct crypto_ahash *csums_tfm;
|
||||
struct crypto_shash *verify_tfm;
|
||||
struct crypto_shash *csums_tfm;
|
||||
struct crypto_shash *cram_hmac_tfm;
|
||||
struct crypto_ahash *integrity_tfm;
|
||||
struct crypto_shash *integrity_tfm;
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -2324,36 +2324,21 @@ alloc_shash(struct crypto_shash **tfm, char *tfm_name, int err_alg)
|
|||
return NO_ERROR;
|
||||
}
|
||||
|
||||
static int
|
||||
alloc_ahash(struct crypto_ahash **tfm, char *tfm_name, int err_alg)
|
||||
{
|
||||
if (!tfm_name[0])
|
||||
return NO_ERROR;
|
||||
|
||||
*tfm = crypto_alloc_ahash(tfm_name, 0, CRYPTO_ALG_ASYNC);
|
||||
if (IS_ERR(*tfm)) {
|
||||
*tfm = NULL;
|
||||
return err_alg;
|
||||
}
|
||||
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
static enum drbd_ret_code
|
||||
alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
|
||||
{
|
||||
char hmac_name[CRYPTO_MAX_ALG_NAME];
|
||||
enum drbd_ret_code rv;
|
||||
|
||||
rv = alloc_ahash(&crypto->csums_tfm, new_net_conf->csums_alg,
|
||||
rv = alloc_shash(&crypto->csums_tfm, new_net_conf->csums_alg,
|
||||
ERR_CSUMS_ALG);
|
||||
if (rv != NO_ERROR)
|
||||
return rv;
|
||||
rv = alloc_ahash(&crypto->verify_tfm, new_net_conf->verify_alg,
|
||||
rv = alloc_shash(&crypto->verify_tfm, new_net_conf->verify_alg,
|
||||
ERR_VERIFY_ALG);
|
||||
if (rv != NO_ERROR)
|
||||
return rv;
|
||||
rv = alloc_ahash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
|
||||
rv = alloc_shash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
|
||||
ERR_INTEGRITY_ALG);
|
||||
if (rv != NO_ERROR)
|
||||
return rv;
|
||||
|
@ -2371,9 +2356,9 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
|
|||
static void free_crypto(struct crypto *crypto)
|
||||
{
|
||||
crypto_free_shash(crypto->cram_hmac_tfm);
|
||||
crypto_free_ahash(crypto->integrity_tfm);
|
||||
crypto_free_ahash(crypto->csums_tfm);
|
||||
crypto_free_ahash(crypto->verify_tfm);
|
||||
crypto_free_shash(crypto->integrity_tfm);
|
||||
crypto_free_shash(crypto->csums_tfm);
|
||||
crypto_free_shash(crypto->verify_tfm);
|
||||
}
|
||||
|
||||
int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
|
||||
|
@ -2450,17 +2435,17 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
|
|||
rcu_assign_pointer(connection->net_conf, new_net_conf);
|
||||
|
||||
if (!rsr) {
|
||||
crypto_free_ahash(connection->csums_tfm);
|
||||
crypto_free_shash(connection->csums_tfm);
|
||||
connection->csums_tfm = crypto.csums_tfm;
|
||||
crypto.csums_tfm = NULL;
|
||||
}
|
||||
if (!ovr) {
|
||||
crypto_free_ahash(connection->verify_tfm);
|
||||
crypto_free_shash(connection->verify_tfm);
|
||||
connection->verify_tfm = crypto.verify_tfm;
|
||||
crypto.verify_tfm = NULL;
|
||||
}
|
||||
|
||||
crypto_free_ahash(connection->integrity_tfm);
|
||||
crypto_free_shash(connection->integrity_tfm);
|
||||
connection->integrity_tfm = crypto.integrity_tfm;
|
||||
if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
|
||||
/* Do this without trying to take connection->data.mutex again. */
|
||||
|
|
|
@ -57,7 +57,7 @@ enum drbd_packet {
|
|||
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
|
||||
/* 0x2e to 0x30 reserved, used in drbd 9 */
|
||||
|
||||
/* REQ_DISCARD. We used "discard" in different contexts before,
|
||||
/* REQ_OP_DISCARD. We used "discard" in different contexts before,
|
||||
* which is why I chose TRIM here, to disambiguate. */
|
||||
P_TRIM = 0x31,
|
||||
|
||||
|
@ -126,7 +126,7 @@ struct p_header100 {
|
|||
#define DP_UNPLUG 8 /* not used anymore */
|
||||
#define DP_FUA 16 /* equals REQ_FUA */
|
||||
#define DP_FLUSH 32 /* equals REQ_PREFLUSH */
|
||||
#define DP_DISCARD 64 /* equals REQ_DISCARD */
|
||||
#define DP_DISCARD 64 /* equals REQ_OP_DISCARD */
|
||||
#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
|
||||
#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
|
||||
#define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */
|
||||
|
|
|
@ -1732,7 +1732,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
|
|||
}
|
||||
|
||||
/* quick wrapper in case payload size != request_size (write same) */
|
||||
static void drbd_csum_ee_size(struct crypto_ahash *h,
|
||||
static void drbd_csum_ee_size(struct crypto_shash *h,
|
||||
struct drbd_peer_request *r, void *d,
|
||||
unsigned int payload_size)
|
||||
{
|
||||
|
@ -1769,7 +1769,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
|
|||
|
||||
digest_size = 0;
|
||||
if (!trim && peer_device->connection->peer_integrity_tfm) {
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
/*
|
||||
* FIXME: Receive the incoming digest into the receive buffer
|
||||
* here, together with its struct p_data?
|
||||
|
@ -1905,7 +1905,7 @@ static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_req
|
|||
|
||||
digest_size = 0;
|
||||
if (peer_device->connection->peer_integrity_tfm) {
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->peer_integrity_tfm);
|
||||
err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -3542,7 +3542,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
|||
int p_proto, p_discard_my_data, p_two_primaries, cf;
|
||||
struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
|
||||
char integrity_alg[SHARED_SECRET_MAX] = "";
|
||||
struct crypto_ahash *peer_integrity_tfm = NULL;
|
||||
struct crypto_shash *peer_integrity_tfm = NULL;
|
||||
void *int_dig_in = NULL, *int_dig_vv = NULL;
|
||||
|
||||
p_proto = be32_to_cpu(p->protocol);
|
||||
|
@ -3623,7 +3623,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
|||
* change.
|
||||
*/
|
||||
|
||||
peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
|
||||
peer_integrity_tfm = crypto_alloc_shash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
|
||||
if (IS_ERR(peer_integrity_tfm)) {
|
||||
peer_integrity_tfm = NULL;
|
||||
drbd_err(connection, "peer data-integrity-alg %s not supported\n",
|
||||
|
@ -3631,7 +3631,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
|||
goto disconnect;
|
||||
}
|
||||
|
||||
hash_size = crypto_ahash_digestsize(peer_integrity_tfm);
|
||||
hash_size = crypto_shash_digestsize(peer_integrity_tfm);
|
||||
int_dig_in = kmalloc(hash_size, GFP_KERNEL);
|
||||
int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
|
||||
if (!(int_dig_in && int_dig_vv)) {
|
||||
|
@ -3661,7 +3661,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
|||
mutex_unlock(&connection->resource->conf_update);
|
||||
mutex_unlock(&connection->data.mutex);
|
||||
|
||||
crypto_free_ahash(connection->peer_integrity_tfm);
|
||||
crypto_free_shash(connection->peer_integrity_tfm);
|
||||
kfree(connection->int_dig_in);
|
||||
kfree(connection->int_dig_vv);
|
||||
connection->peer_integrity_tfm = peer_integrity_tfm;
|
||||
|
@ -3679,7 +3679,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
|||
disconnect_rcu_unlock:
|
||||
rcu_read_unlock();
|
||||
disconnect:
|
||||
crypto_free_ahash(peer_integrity_tfm);
|
||||
crypto_free_shash(peer_integrity_tfm);
|
||||
kfree(int_dig_in);
|
||||
kfree(int_dig_vv);
|
||||
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
|
||||
|
@ -3691,15 +3691,16 @@ disconnect:
|
|||
* return: NULL (alg name was "")
|
||||
* ERR_PTR(error) if something goes wrong
|
||||
* or the crypto hash ptr, if it worked out ok. */
|
||||
static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
|
||||
static struct crypto_shash *drbd_crypto_alloc_digest_safe(
|
||||
const struct drbd_device *device,
|
||||
const char *alg, const char *name)
|
||||
{
|
||||
struct crypto_ahash *tfm;
|
||||
struct crypto_shash *tfm;
|
||||
|
||||
if (!alg[0])
|
||||
return NULL;
|
||||
|
||||
tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
|
||||
tfm = crypto_alloc_shash(alg, 0, 0);
|
||||
if (IS_ERR(tfm)) {
|
||||
drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
|
||||
alg, name, PTR_ERR(tfm));
|
||||
|
@ -3752,8 +3753,8 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
|
|||
struct drbd_device *device;
|
||||
struct p_rs_param_95 *p;
|
||||
unsigned int header_size, data_size, exp_max_sz;
|
||||
struct crypto_ahash *verify_tfm = NULL;
|
||||
struct crypto_ahash *csums_tfm = NULL;
|
||||
struct crypto_shash *verify_tfm = NULL;
|
||||
struct crypto_shash *csums_tfm = NULL;
|
||||
struct net_conf *old_net_conf, *new_net_conf = NULL;
|
||||
struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
|
||||
const int apv = connection->agreed_pro_version;
|
||||
|
@ -3900,14 +3901,14 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
|
|||
if (verify_tfm) {
|
||||
strcpy(new_net_conf->verify_alg, p->verify_alg);
|
||||
new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
|
||||
crypto_free_ahash(peer_device->connection->verify_tfm);
|
||||
crypto_free_shash(peer_device->connection->verify_tfm);
|
||||
peer_device->connection->verify_tfm = verify_tfm;
|
||||
drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
|
||||
}
|
||||
if (csums_tfm) {
|
||||
strcpy(new_net_conf->csums_alg, p->csums_alg);
|
||||
new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
|
||||
crypto_free_ahash(peer_device->connection->csums_tfm);
|
||||
crypto_free_shash(peer_device->connection->csums_tfm);
|
||||
peer_device->connection->csums_tfm = csums_tfm;
|
||||
drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
|
||||
}
|
||||
|
@ -3951,9 +3952,9 @@ disconnect:
|
|||
mutex_unlock(&connection->resource->conf_update);
|
||||
/* just for completeness: actually not needed,
|
||||
* as this is not reached if csums_tfm was ok. */
|
||||
crypto_free_ahash(csums_tfm);
|
||||
crypto_free_shash(csums_tfm);
|
||||
/* but free the verify_tfm again, if csums_tfm did not work out */
|
||||
crypto_free_ahash(verify_tfm);
|
||||
crypto_free_shash(verify_tfm);
|
||||
conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
|
||||
return -EIO;
|
||||
}
|
||||
|
|
|
@ -650,7 +650,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
|
|||
case DISCARD_COMPLETED_NOTSUPP:
|
||||
case DISCARD_COMPLETED_WITH_ERROR:
|
||||
/* I'd rather not detach from local disk just because it
|
||||
* failed a REQ_DISCARD. */
|
||||
* failed a REQ_OP_DISCARD. */
|
||||
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
|
||||
break;
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
|
|||
|
||||
do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
|
||||
|
||||
/* FIXME do we want to detach for failed REQ_DISCARD?
|
||||
/* FIXME do we want to detach for failed REQ_OP_DISCARD?
|
||||
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
|
||||
if (peer_req->flags & EE_WAS_ERROR)
|
||||
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
|
||||
|
@ -295,60 +295,61 @@ void drbd_request_endio(struct bio *bio)
|
|||
complete_master_bio(device, &m);
|
||||
}
|
||||
|
||||
void drbd_csum_ee(struct crypto_ahash *tfm, struct drbd_peer_request *peer_req, void *digest)
|
||||
void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest)
|
||||
{
|
||||
AHASH_REQUEST_ON_STACK(req, tfm);
|
||||
struct scatterlist sg;
|
||||
SHASH_DESC_ON_STACK(desc, tfm);
|
||||
struct page *page = peer_req->pages;
|
||||
struct page *tmp;
|
||||
unsigned len;
|
||||
void *src;
|
||||
|
||||
ahash_request_set_tfm(req, tfm);
|
||||
ahash_request_set_callback(req, 0, NULL, NULL);
|
||||
desc->tfm = tfm;
|
||||
desc->flags = 0;
|
||||
|
||||
sg_init_table(&sg, 1);
|
||||
crypto_ahash_init(req);
|
||||
crypto_shash_init(desc);
|
||||
|
||||
src = kmap_atomic(page);
|
||||
while ((tmp = page_chain_next(page))) {
|
||||
/* all but the last page will be fully used */
|
||||
sg_set_page(&sg, page, PAGE_SIZE, 0);
|
||||
ahash_request_set_crypt(req, &sg, NULL, sg.length);
|
||||
crypto_ahash_update(req);
|
||||
crypto_shash_update(desc, src, PAGE_SIZE);
|
||||
kunmap_atomic(src);
|
||||
page = tmp;
|
||||
src = kmap_atomic(page);
|
||||
}
|
||||
/* and now the last, possibly only partially used page */
|
||||
len = peer_req->i.size & (PAGE_SIZE - 1);
|
||||
sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
|
||||
ahash_request_set_crypt(req, &sg, digest, sg.length);
|
||||
crypto_ahash_finup(req);
|
||||
ahash_request_zero(req);
|
||||
crypto_shash_update(desc, src, len ?: PAGE_SIZE);
|
||||
kunmap_atomic(src);
|
||||
|
||||
crypto_shash_final(desc, digest);
|
||||
shash_desc_zero(desc);
|
||||
}
|
||||
|
||||
void drbd_csum_bio(struct crypto_ahash *tfm, struct bio *bio, void *digest)
|
||||
void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
|
||||
{
|
||||
AHASH_REQUEST_ON_STACK(req, tfm);
|
||||
struct scatterlist sg;
|
||||
SHASH_DESC_ON_STACK(desc, tfm);
|
||||
struct bio_vec bvec;
|
||||
struct bvec_iter iter;
|
||||
|
||||
ahash_request_set_tfm(req, tfm);
|
||||
ahash_request_set_callback(req, 0, NULL, NULL);
|
||||
desc->tfm = tfm;
|
||||
desc->flags = 0;
|
||||
|
||||
sg_init_table(&sg, 1);
|
||||
crypto_ahash_init(req);
|
||||
crypto_shash_init(desc);
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
|
||||
ahash_request_set_crypt(req, &sg, NULL, sg.length);
|
||||
crypto_ahash_update(req);
|
||||
u8 *src;
|
||||
|
||||
src = kmap_atomic(bvec.bv_page);
|
||||
crypto_shash_update(desc, src + bvec.bv_offset, bvec.bv_len);
|
||||
kunmap_atomic(src);
|
||||
|
||||
/* REQ_OP_WRITE_SAME has only one segment,
|
||||
* checksum the payload only once. */
|
||||
if (bio_op(bio) == REQ_OP_WRITE_SAME)
|
||||
break;
|
||||
}
|
||||
ahash_request_set_crypt(req, NULL, digest, 0);
|
||||
crypto_ahash_final(req);
|
||||
ahash_request_zero(req);
|
||||
crypto_shash_final(desc, digest);
|
||||
shash_desc_zero(desc);
|
||||
}
|
||||
|
||||
/* MAYBE merge common code with w_e_end_ov_req */
|
||||
|
@ -367,7 +368,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel)
|
|||
if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
|
||||
goto out;
|
||||
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
if (digest) {
|
||||
sector_t sector = peer_req->i.sector;
|
||||
|
@ -1205,7 +1206,7 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
|
|||
* a real fix would be much more involved,
|
||||
* introducing more locking mechanisms */
|
||||
if (peer_device->connection->csums_tfm) {
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->csums_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
|
||||
D_ASSERT(device, digest_size == di->digest_size);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
}
|
||||
|
@ -1255,7 +1256,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
|
|||
if (unlikely(cancel))
|
||||
goto out;
|
||||
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
if (!digest) {
|
||||
err = 1; /* terminate the connection in case the allocation failed */
|
||||
|
@ -1327,7 +1328,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
|
|||
di = peer_req->digest;
|
||||
|
||||
if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
|
||||
digest_size = crypto_ahash_digestsize(peer_device->connection->verify_tfm);
|
||||
digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
|
||||
digest = kmalloc(digest_size, GFP_NOIO);
|
||||
if (digest) {
|
||||
drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
|
||||
|
|
|
@ -252,13 +252,13 @@ static int allowed_drive_mask = 0x33;
|
|||
|
||||
static int irqdma_allocated;
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/cdrom.h> /* for the compatibility eject ioctl */
|
||||
#include <linux/completion.h>
|
||||
|
||||
static LIST_HEAD(floppy_reqs);
|
||||
static struct request *current_req;
|
||||
static void do_fd_request(struct request_queue *q);
|
||||
static int set_next_request(void);
|
||||
|
||||
#ifndef fd_get_dma_residue
|
||||
|
@ -414,10 +414,10 @@ static struct floppy_drive_struct drive_state[N_DRIVE];
|
|||
static struct floppy_write_errors write_errors[N_DRIVE];
|
||||
static struct timer_list motor_off_timer[N_DRIVE];
|
||||
static struct gendisk *disks[N_DRIVE];
|
||||
static struct blk_mq_tag_set tag_sets[N_DRIVE];
|
||||
static struct block_device *opened_bdev[N_DRIVE];
|
||||
static DEFINE_MUTEX(open_lock);
|
||||
static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
|
||||
static int fdc_queue;
|
||||
|
||||
/*
|
||||
* This struct defines the different floppy types.
|
||||
|
@ -2216,8 +2216,9 @@ static void floppy_end_request(struct request *req, blk_status_t error)
|
|||
/* current_count_sectors can be zero if transfer failed */
|
||||
if (error)
|
||||
nr_sectors = blk_rq_cur_sectors(req);
|
||||
if (__blk_end_request(req, error, nr_sectors << 9))
|
||||
if (blk_update_request(req, error, nr_sectors << 9))
|
||||
return;
|
||||
__blk_mq_end_request(req, error);
|
||||
|
||||
/* We're done with the request */
|
||||
floppy_off(drive);
|
||||
|
@ -2797,27 +2798,14 @@ static int make_raw_rw_request(void)
|
|||
return 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Round-robin between our available drives, doing one request from each
|
||||
*/
|
||||
static int set_next_request(void)
|
||||
{
|
||||
struct request_queue *q;
|
||||
int old_pos = fdc_queue;
|
||||
|
||||
do {
|
||||
q = disks[fdc_queue]->queue;
|
||||
if (++fdc_queue == N_DRIVE)
|
||||
fdc_queue = 0;
|
||||
if (q) {
|
||||
current_req = blk_fetch_request(q);
|
||||
if (current_req) {
|
||||
current_req->error_count = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (fdc_queue != old_pos);
|
||||
|
||||
current_req = list_first_entry_or_null(&floppy_reqs, struct request,
|
||||
queuelist);
|
||||
if (current_req) {
|
||||
current_req->error_count = 0;
|
||||
list_del_init(¤t_req->queuelist);
|
||||
}
|
||||
return current_req != NULL;
|
||||
}
|
||||
|
||||
|
@ -2901,29 +2889,38 @@ static void process_fd_request(void)
|
|||
schedule_bh(redo_fd_request);
|
||||
}
|
||||
|
||||
static void do_fd_request(struct request_queue *q)
|
||||
static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
if (WARN(max_buffer_sectors == 0,
|
||||
"VFS: %s called on non-open device\n", __func__))
|
||||
return;
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (WARN(atomic_read(&usage_count) == 0,
|
||||
"warning: usage count=0, current_req=%p sect=%ld flags=%llx\n",
|
||||
current_req, (long)blk_rq_pos(current_req),
|
||||
(unsigned long long) current_req->cmd_flags))
|
||||
return;
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
spin_lock_irq(&floppy_lock);
|
||||
list_add_tail(&bd->rq->queuelist, &floppy_reqs);
|
||||
spin_unlock_irq(&floppy_lock);
|
||||
|
||||
if (test_and_set_bit(0, &fdc_busy)) {
|
||||
/* fdc busy, this new request will be treated when the
|
||||
current one is done */
|
||||
is_alive(__func__, "old request running");
|
||||
return;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
command_status = FD_COMMAND_NONE;
|
||||
__reschedule_timeout(MAXTIMEOUT, "fd_request");
|
||||
set_fdc(0);
|
||||
process_fd_request();
|
||||
is_alive(__func__, "");
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static const struct cont_t poll_cont = {
|
||||
|
@ -4486,6 +4483,10 @@ static struct platform_driver floppy_driver = {
|
|||
},
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops floppy_mq_ops = {
|
||||
.queue_rq = floppy_queue_rq,
|
||||
};
|
||||
|
||||
static struct platform_device floppy_device[N_DRIVE];
|
||||
|
||||
static bool floppy_available(int drive)
|
||||
|
@ -4533,9 +4534,12 @@ static int __init do_floppy_init(void)
|
|||
goto out_put_disk;
|
||||
}
|
||||
|
||||
disks[drive]->queue = blk_init_queue(do_fd_request, &floppy_lock);
|
||||
if (!disks[drive]->queue) {
|
||||
err = -ENOMEM;
|
||||
disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive],
|
||||
&floppy_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disks[drive]->queue)) {
|
||||
err = PTR_ERR(disks[drive]->queue);
|
||||
disks[drive]->queue = NULL;
|
||||
goto out_put_disk;
|
||||
}
|
||||
|
||||
|
@ -4679,7 +4683,7 @@ static int __init do_floppy_init(void)
|
|||
/* to be cleaned up... */
|
||||
disks[drive]->private_data = (void *)(long)drive;
|
||||
disks[drive]->flags |= GENHD_FL_REMOVABLE;
|
||||
device_add_disk(&floppy_device[drive].dev, disks[drive]);
|
||||
device_add_disk(&floppy_device[drive].dev, disks[drive], NULL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -4708,6 +4712,7 @@ out_put_disk:
|
|||
del_timer_sync(&motor_off_timer[drive]);
|
||||
blk_cleanup_queue(disks[drive]->queue);
|
||||
disks[drive]->queue = NULL;
|
||||
blk_mq_free_tag_set(&tag_sets[drive]);
|
||||
}
|
||||
put_disk(disks[drive]);
|
||||
}
|
||||
|
@ -4935,6 +4940,7 @@ static void __exit floppy_module_exit(void)
|
|||
platform_device_unregister(&floppy_device[drive]);
|
||||
}
|
||||
blk_cleanup_queue(disks[drive]->queue);
|
||||
blk_mq_free_tag_set(&tag_sets[drive]);
|
||||
|
||||
/*
|
||||
* These disks have not called add_disk(). Don't put down
|
||||
|
|
|
@ -77,6 +77,7 @@
|
|||
#include <linux/falloc.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/ioprio.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
#include "loop.h"
|
||||
|
||||
|
@ -1760,8 +1761,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||
|
||||
/* always use the first bio's css */
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
|
||||
cmd->css = rq->bio->bi_css;
|
||||
if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
|
||||
cmd->css = &bio_blkcg(rq->bio)->css;
|
||||
css_get(cmd->css);
|
||||
} else
|
||||
#endif
|
||||
|
|
|
@ -1862,11 +1862,9 @@ static int exec_drive_taskfile(struct driver_data *dd,
|
|||
if (IS_ERR(outbuf))
|
||||
return PTR_ERR(outbuf);
|
||||
|
||||
outbuf_dma = pci_map_single(dd->pdev,
|
||||
outbuf,
|
||||
taskout,
|
||||
DMA_TO_DEVICE);
|
||||
if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) {
|
||||
outbuf_dma = dma_map_single(&dd->pdev->dev, outbuf,
|
||||
taskout, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(&dd->pdev->dev, outbuf_dma)) {
|
||||
err = -ENOMEM;
|
||||
goto abort;
|
||||
}
|
||||
|
@ -1880,10 +1878,9 @@ static int exec_drive_taskfile(struct driver_data *dd,
|
|||
inbuf = NULL;
|
||||
goto abort;
|
||||
}
|
||||
inbuf_dma = pci_map_single(dd->pdev,
|
||||
inbuf,
|
||||
taskin, DMA_FROM_DEVICE);
|
||||
if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) {
|
||||
inbuf_dma = dma_map_single(&dd->pdev->dev, inbuf,
|
||||
taskin, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(&dd->pdev->dev, inbuf_dma)) {
|
||||
err = -ENOMEM;
|
||||
goto abort;
|
||||
}
|
||||
|
@ -2002,11 +1999,11 @@ static int exec_drive_taskfile(struct driver_data *dd,
|
|||
|
||||
/* reclaim the DMA buffers.*/
|
||||
if (inbuf_dma)
|
||||
pci_unmap_single(dd->pdev, inbuf_dma,
|
||||
taskin, DMA_FROM_DEVICE);
|
||||
dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin,
|
||||
DMA_FROM_DEVICE);
|
||||
if (outbuf_dma)
|
||||
pci_unmap_single(dd->pdev, outbuf_dma,
|
||||
taskout, DMA_TO_DEVICE);
|
||||
dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout,
|
||||
DMA_TO_DEVICE);
|
||||
inbuf_dma = 0;
|
||||
outbuf_dma = 0;
|
||||
|
||||
|
@ -2053,11 +2050,11 @@ static int exec_drive_taskfile(struct driver_data *dd,
|
|||
}
|
||||
abort:
|
||||
if (inbuf_dma)
|
||||
pci_unmap_single(dd->pdev, inbuf_dma,
|
||||
taskin, DMA_FROM_DEVICE);
|
||||
dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin,
|
||||
DMA_FROM_DEVICE);
|
||||
if (outbuf_dma)
|
||||
pci_unmap_single(dd->pdev, outbuf_dma,
|
||||
taskout, DMA_TO_DEVICE);
|
||||
dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout,
|
||||
DMA_TO_DEVICE);
|
||||
kfree(outbuf);
|
||||
kfree(inbuf);
|
||||
|
||||
|
@ -3861,7 +3858,7 @@ skip_create_disk:
|
|||
set_capacity(dd->disk, capacity);
|
||||
|
||||
/* Enable the block device and add it to /dev */
|
||||
device_add_disk(&dd->pdev->dev, dd->disk);
|
||||
device_add_disk(&dd->pdev->dev, dd->disk, NULL);
|
||||
|
||||
dd->bdev = bdget_disk(dd->disk, 0);
|
||||
/*
|
||||
|
@ -4216,18 +4213,10 @@ static int mtip_pci_probe(struct pci_dev *pdev,
|
|||
goto iomap_err;
|
||||
}
|
||||
|
||||
if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
|
||||
if (rv) {
|
||||
rv = pci_set_consistent_dma_mask(pdev,
|
||||
DMA_BIT_MASK(32));
|
||||
if (rv) {
|
||||
dev_warn(&pdev->dev,
|
||||
"64-bit DMA enable failed\n");
|
||||
goto setmask_err;
|
||||
}
|
||||
}
|
||||
rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (rv) {
|
||||
dev_warn(&pdev->dev, "64-bit DMA enable failed\n");
|
||||
goto setmask_err;
|
||||
}
|
||||
|
||||
/* Copy the info we may need later into the private data structure. */
|
||||
|
|
|
@ -606,20 +606,12 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
|
|||
|
||||
static void end_cmd(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct request_queue *q = NULL;
|
||||
int queue_mode = cmd->nq->dev->queue_mode;
|
||||
|
||||
if (cmd->rq)
|
||||
q = cmd->rq->q;
|
||||
|
||||
switch (queue_mode) {
|
||||
case NULL_Q_MQ:
|
||||
blk_mq_end_request(cmd->rq, cmd->error);
|
||||
return;
|
||||
case NULL_Q_RQ:
|
||||
INIT_LIST_HEAD(&cmd->rq->queuelist);
|
||||
blk_end_request_all(cmd->rq, cmd->error);
|
||||
break;
|
||||
case NULL_Q_BIO:
|
||||
cmd->bio->bi_status = cmd->error;
|
||||
bio_endio(cmd->bio);
|
||||
|
@ -627,15 +619,6 @@ static void end_cmd(struct nullb_cmd *cmd)
|
|||
}
|
||||
|
||||
free_cmd(cmd);
|
||||
|
||||
/* Restart queue if needed, as we are freeing a tag */
|
||||
if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
blk_start_queue_async(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
|
||||
|
@ -1136,25 +1119,14 @@ static void null_stop_queue(struct nullb *nullb)
|
|||
|
||||
if (nullb->dev->queue_mode == NULL_Q_MQ)
|
||||
blk_mq_stop_hw_queues(q);
|
||||
else {
|
||||
spin_lock_irq(q->queue_lock);
|
||||
blk_stop_queue(q);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void null_restart_queue_async(struct nullb *nullb)
|
||||
{
|
||||
struct request_queue *q = nullb->q;
|
||||
unsigned long flags;
|
||||
|
||||
if (nullb->dev->queue_mode == NULL_Q_MQ)
|
||||
blk_mq_start_stopped_hw_queues(q, true);
|
||||
else {
|
||||
spin_lock_irqsave(q->queue_lock, flags);
|
||||
blk_start_queue_async(q);
|
||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd)
|
||||
|
@ -1197,17 +1169,8 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
|
|||
/* race with timer */
|
||||
if (atomic_long_read(&nullb->cur_bytes) > 0)
|
||||
null_restart_queue_async(nullb);
|
||||
if (dev->queue_mode == NULL_Q_RQ) {
|
||||
struct request_queue *q = nullb->q;
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
rq->rq_flags |= RQF_DONTPREP;
|
||||
blk_requeue_request(q, rq);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
return BLK_STS_OK;
|
||||
} else
|
||||
/* requeue request */
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
/* requeue request */
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1278,9 +1241,6 @@ out:
|
|||
case NULL_Q_MQ:
|
||||
blk_mq_complete_request(cmd->rq);
|
||||
break;
|
||||
case NULL_Q_RQ:
|
||||
blk_complete_request(cmd->rq);
|
||||
break;
|
||||
case NULL_Q_BIO:
|
||||
/*
|
||||
* XXX: no proper submitting cpu information available.
|
||||
|
@ -1349,30 +1309,6 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
|
|||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
|
||||
{
|
||||
pr_info("null: rq %p timed out\n", rq);
|
||||
__blk_complete_request(rq);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
|
||||
static int null_rq_prep_fn(struct request_queue *q, struct request *req)
|
||||
{
|
||||
struct nullb *nullb = q->queuedata;
|
||||
struct nullb_queue *nq = nullb_to_queue(nullb);
|
||||
struct nullb_cmd *cmd;
|
||||
|
||||
cmd = alloc_cmd(nq, 0);
|
||||
if (cmd) {
|
||||
cmd->rq = req;
|
||||
req->special = cmd;
|
||||
return BLKPREP_OK;
|
||||
}
|
||||
blk_stop_queue(q);
|
||||
|
||||
return BLKPREP_DEFER;
|
||||
}
|
||||
|
||||
static bool should_timeout_request(struct request *rq)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
|
@ -1391,27 +1327,6 @@ static bool should_requeue_request(struct request *rq)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void null_request_fn(struct request_queue *q)
|
||||
{
|
||||
struct request *rq;
|
||||
|
||||
while ((rq = blk_fetch_request(q)) != NULL) {
|
||||
struct nullb_cmd *cmd = rq->special;
|
||||
|
||||
/* just ignore the request */
|
||||
if (should_timeout_request(rq))
|
||||
continue;
|
||||
if (should_requeue_request(rq)) {
|
||||
blk_requeue_request(q, rq);
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
null_handle_cmd(cmd);
|
||||
spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
|
||||
{
|
||||
pr_info("null: rq %p timed out\n", rq);
|
||||
|
@ -1766,24 +1681,6 @@ static int null_add_dev(struct nullb_device *dev)
|
|||
rv = init_driver_queues(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_blk_queue;
|
||||
} else {
|
||||
nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock,
|
||||
dev->home_node);
|
||||
if (!nullb->q) {
|
||||
rv = -ENOMEM;
|
||||
goto out_cleanup_queues;
|
||||
}
|
||||
|
||||
if (!null_setup_fault())
|
||||
goto out_cleanup_blk_queue;
|
||||
|
||||
blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
|
||||
blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
|
||||
blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn);
|
||||
nullb->q->rq_timeout = 5 * HZ;
|
||||
rv = init_driver_queues(nullb);
|
||||
if (rv)
|
||||
goto out_cleanup_blk_queue;
|
||||
}
|
||||
|
||||
if (dev->mbps) {
|
||||
|
@ -1865,6 +1762,10 @@ static int __init null_init(void)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_RQ) {
|
||||
pr_err("null_blk: legacy IO path no longer available\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
|
||||
if (g_submit_queues != nr_online_nodes) {
|
||||
pr_warn("null_blk: submit_queues param is set to %u.\n",
|
||||
|
|
|
@ -137,7 +137,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
|
|||
#include <linux/delay.h>
|
||||
#include <linux/cdrom.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
@ -186,7 +186,8 @@ static int pcd_packet(struct cdrom_device_info *cdi,
|
|||
static int pcd_detect(void);
|
||||
static void pcd_probe_capabilities(void);
|
||||
static void do_pcd_read_drq(void);
|
||||
static void do_pcd_request(struct request_queue * q);
|
||||
static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd);
|
||||
static void do_pcd_read(void);
|
||||
|
||||
struct pcd_unit {
|
||||
|
@ -199,6 +200,8 @@ struct pcd_unit {
|
|||
char *name; /* pcd0, pcd1, etc */
|
||||
struct cdrom_device_info info; /* uniform cdrom interface */
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct list_head rq_list;
|
||||
};
|
||||
|
||||
static struct pcd_unit pcd[PCD_UNITS];
|
||||
|
@ -292,6 +295,10 @@ static const struct cdrom_device_ops pcd_dops = {
|
|||
CDC_CD_RW,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops pcd_mq_ops = {
|
||||
.queue_rq = pcd_queue_rq,
|
||||
};
|
||||
|
||||
static void pcd_init_units(void)
|
||||
{
|
||||
struct pcd_unit *cd;
|
||||
|
@ -300,13 +307,19 @@ static void pcd_init_units(void)
|
|||
pcd_drive_count = 0;
|
||||
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
|
||||
struct gendisk *disk = alloc_disk(1);
|
||||
|
||||
if (!disk)
|
||||
continue;
|
||||
disk->queue = blk_init_queue(do_pcd_request, &pcd_lock);
|
||||
if (!disk->queue) {
|
||||
put_disk(disk);
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&cd->tag_set, &pcd_mq_ops,
|
||||
1, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
disk->queue = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&cd->rq_list);
|
||||
disk->queue->queuedata = cd;
|
||||
blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
|
||||
cd->disk = disk;
|
||||
cd->pi = &cd->pia;
|
||||
|
@ -748,18 +761,18 @@ static int pcd_queue;
|
|||
static int set_next_request(void)
|
||||
{
|
||||
struct pcd_unit *cd;
|
||||
struct request_queue *q;
|
||||
int old_pos = pcd_queue;
|
||||
|
||||
do {
|
||||
cd = &pcd[pcd_queue];
|
||||
q = cd->present ? cd->disk->queue : NULL;
|
||||
if (++pcd_queue == PCD_UNITS)
|
||||
pcd_queue = 0;
|
||||
if (q) {
|
||||
pcd_req = blk_fetch_request(q);
|
||||
if (pcd_req)
|
||||
break;
|
||||
if (cd->present && !list_empty(&cd->rq_list)) {
|
||||
pcd_req = list_first_entry(&cd->rq_list, struct request,
|
||||
queuelist);
|
||||
list_del_init(&pcd_req->queuelist);
|
||||
blk_mq_start_request(pcd_req);
|
||||
break;
|
||||
}
|
||||
} while (pcd_queue != old_pos);
|
||||
|
||||
|
@ -768,33 +781,41 @@ static int set_next_request(void)
|
|||
|
||||
static void pcd_request(void)
|
||||
{
|
||||
struct pcd_unit *cd;
|
||||
|
||||
if (pcd_busy)
|
||||
return;
|
||||
while (1) {
|
||||
if (!pcd_req && !set_next_request())
|
||||
return;
|
||||
|
||||
if (rq_data_dir(pcd_req) == READ) {
|
||||
struct pcd_unit *cd = pcd_req->rq_disk->private_data;
|
||||
if (cd != pcd_current)
|
||||
pcd_bufblk = -1;
|
||||
pcd_current = cd;
|
||||
pcd_sector = blk_rq_pos(pcd_req);
|
||||
pcd_count = blk_rq_cur_sectors(pcd_req);
|
||||
pcd_buf = bio_data(pcd_req->bio);
|
||||
pcd_busy = 1;
|
||||
ps_set_intr(do_pcd_read, NULL, 0, nice);
|
||||
return;
|
||||
} else {
|
||||
__blk_end_request_all(pcd_req, BLK_STS_IOERR);
|
||||
pcd_req = NULL;
|
||||
}
|
||||
}
|
||||
if (!pcd_req && !set_next_request())
|
||||
return;
|
||||
|
||||
cd = pcd_req->rq_disk->private_data;
|
||||
if (cd != pcd_current)
|
||||
pcd_bufblk = -1;
|
||||
pcd_current = cd;
|
||||
pcd_sector = blk_rq_pos(pcd_req);
|
||||
pcd_count = blk_rq_cur_sectors(pcd_req);
|
||||
pcd_buf = bio_data(pcd_req->bio);
|
||||
pcd_busy = 1;
|
||||
ps_set_intr(do_pcd_read, NULL, 0, nice);
|
||||
}
|
||||
|
||||
static void do_pcd_request(struct request_queue *q)
|
||||
static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct pcd_unit *cd = hctx->queue->queuedata;
|
||||
|
||||
if (rq_data_dir(bd->rq) != READ) {
|
||||
blk_mq_start_request(bd->rq);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
spin_lock_irq(&pcd_lock);
|
||||
list_add_tail(&bd->rq->queuelist, &cd->rq_list);
|
||||
pcd_request();
|
||||
spin_unlock_irq(&pcd_lock);
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static inline void next_request(blk_status_t err)
|
||||
|
@ -802,8 +823,10 @@ static inline void next_request(blk_status_t err)
|
|||
unsigned long saved_flags;
|
||||
|
||||
spin_lock_irqsave(&pcd_lock, saved_flags);
|
||||
if (!__blk_end_request_cur(pcd_req, err))
|
||||
if (!blk_update_request(pcd_req, err, blk_rq_cur_bytes(pcd_req))) {
|
||||
__blk_mq_end_request(pcd_req, err);
|
||||
pcd_req = NULL;
|
||||
}
|
||||
pcd_busy = 0;
|
||||
pcd_request();
|
||||
spin_unlock_irqrestore(&pcd_lock, saved_flags);
|
||||
|
@ -1011,6 +1034,7 @@ static void __exit pcd_exit(void)
|
|||
unregister_cdrom(&cd->info);
|
||||
}
|
||||
blk_cleanup_queue(cd->disk->queue);
|
||||
blk_mq_free_tag_set(&cd->tag_set);
|
||||
put_disk(cd->disk);
|
||||
}
|
||||
unregister_blkdev(major, name);
|
||||
|
|
|
@ -151,7 +151,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
|
|||
#include <linux/delay.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/cdrom.h> /* for the eject ioctl */
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mutex.h>
|
||||
|
@ -236,6 +236,8 @@ struct pd_unit {
|
|||
int alt_geom;
|
||||
char name[PD_NAMELEN]; /* pda, pdb, etc ... */
|
||||
struct gendisk *gd;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct list_head rq_list;
|
||||
};
|
||||
|
||||
static struct pd_unit pd[PD_UNITS];
|
||||
|
@ -399,9 +401,17 @@ static int set_next_request(void)
|
|||
if (++pd_queue == PD_UNITS)
|
||||
pd_queue = 0;
|
||||
if (q) {
|
||||
pd_req = blk_fetch_request(q);
|
||||
if (pd_req)
|
||||
break;
|
||||
struct pd_unit *disk = q->queuedata;
|
||||
|
||||
if (list_empty(&disk->rq_list))
|
||||
continue;
|
||||
|
||||
pd_req = list_first_entry(&disk->rq_list,
|
||||
struct request,
|
||||
queuelist);
|
||||
list_del_init(&pd_req->queuelist);
|
||||
blk_mq_start_request(pd_req);
|
||||
break;
|
||||
}
|
||||
} while (pd_queue != old_pos);
|
||||
|
||||
|
@ -412,7 +422,6 @@ static void run_fsm(void)
|
|||
{
|
||||
while (1) {
|
||||
enum action res;
|
||||
unsigned long saved_flags;
|
||||
int stop = 0;
|
||||
|
||||
if (!phase) {
|
||||
|
@ -433,19 +442,24 @@ static void run_fsm(void)
|
|||
}
|
||||
|
||||
switch(res = phase()) {
|
||||
case Ok: case Fail:
|
||||
case Ok: case Fail: {
|
||||
blk_status_t err;
|
||||
|
||||
err = res == Ok ? 0 : BLK_STS_IOERR;
|
||||
pi_disconnect(pi_current);
|
||||
pd_claimed = 0;
|
||||
phase = NULL;
|
||||
spin_lock_irqsave(&pd_lock, saved_flags);
|
||||
if (!__blk_end_request_cur(pd_req,
|
||||
res == Ok ? 0 : BLK_STS_IOERR)) {
|
||||
if (!set_next_request())
|
||||
stop = 1;
|
||||
spin_lock_irq(&pd_lock);
|
||||
if (!blk_update_request(pd_req, err,
|
||||
blk_rq_cur_bytes(pd_req))) {
|
||||
__blk_mq_end_request(pd_req, err);
|
||||
pd_req = NULL;
|
||||
stop = !set_next_request();
|
||||
}
|
||||
spin_unlock_irqrestore(&pd_lock, saved_flags);
|
||||
spin_unlock_irq(&pd_lock);
|
||||
if (stop)
|
||||
return;
|
||||
}
|
||||
/* fall through */
|
||||
case Hold:
|
||||
schedule_fsm();
|
||||
|
@ -505,11 +519,17 @@ static int pd_next_buf(void)
|
|||
if (pd_count)
|
||||
return 0;
|
||||
spin_lock_irqsave(&pd_lock, saved_flags);
|
||||
__blk_end_request_cur(pd_req, 0);
|
||||
pd_count = blk_rq_cur_sectors(pd_req);
|
||||
pd_buf = bio_data(pd_req->bio);
|
||||
if (!blk_update_request(pd_req, 0, blk_rq_cur_bytes(pd_req))) {
|
||||
__blk_mq_end_request(pd_req, 0);
|
||||
pd_req = NULL;
|
||||
pd_count = 0;
|
||||
pd_buf = NULL;
|
||||
} else {
|
||||
pd_count = blk_rq_cur_sectors(pd_req);
|
||||
pd_buf = bio_data(pd_req->bio);
|
||||
}
|
||||
spin_unlock_irqrestore(&pd_lock, saved_flags);
|
||||
return 0;
|
||||
return !pd_count;
|
||||
}
|
||||
|
||||
static unsigned long pd_timeout;
|
||||
|
@ -726,15 +746,21 @@ static enum action pd_identify(struct pd_unit *disk)
|
|||
|
||||
/* end of io request engine */
|
||||
|
||||
static void do_pd_request(struct request_queue * q)
|
||||
static blk_status_t pd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
if (pd_req)
|
||||
return;
|
||||
pd_req = blk_fetch_request(q);
|
||||
if (!pd_req)
|
||||
return;
|
||||
struct pd_unit *disk = hctx->queue->queuedata;
|
||||
|
||||
schedule_fsm();
|
||||
spin_lock_irq(&pd_lock);
|
||||
if (!pd_req) {
|
||||
pd_req = bd->rq;
|
||||
blk_mq_start_request(pd_req);
|
||||
} else
|
||||
list_add_tail(&bd->rq->queuelist, &disk->rq_list);
|
||||
spin_unlock_irq(&pd_lock);
|
||||
|
||||
run_fsm();
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int pd_special_command(struct pd_unit *disk,
|
||||
|
@ -847,23 +873,33 @@ static const struct block_device_operations pd_fops = {
|
|||
|
||||
/* probing */
|
||||
|
||||
static const struct blk_mq_ops pd_mq_ops = {
|
||||
.queue_rq = pd_queue_rq,
|
||||
};
|
||||
|
||||
static void pd_probe_drive(struct pd_unit *disk)
|
||||
{
|
||||
struct gendisk *p = alloc_disk(1 << PD_BITS);
|
||||
struct gendisk *p;
|
||||
|
||||
p = alloc_disk(1 << PD_BITS);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
strcpy(p->disk_name, disk->name);
|
||||
p->fops = &pd_fops;
|
||||
p->major = major;
|
||||
p->first_minor = (disk - pd) << PD_BITS;
|
||||
disk->gd = p;
|
||||
p->private_data = disk;
|
||||
p->queue = blk_init_queue(do_pd_request, &pd_lock);
|
||||
if (!p->queue) {
|
||||
disk->gd = NULL;
|
||||
put_disk(p);
|
||||
|
||||
p->queue = blk_mq_init_sq_queue(&disk->tag_set, &pd_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (IS_ERR(p->queue)) {
|
||||
p->queue = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
p->queue->queuedata = disk;
|
||||
blk_queue_max_hw_sectors(p->queue, cluster);
|
||||
blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
|
||||
|
||||
|
@ -895,6 +931,7 @@ static int pd_detect(void)
|
|||
disk->standby = parm[D_SBY];
|
||||
if (parm[D_PRT])
|
||||
pd_drive_count++;
|
||||
INIT_LIST_HEAD(&disk->rq_list);
|
||||
}
|
||||
|
||||
par_drv = pi_register_driver(name);
|
||||
|
@ -972,6 +1009,7 @@ static void __exit pd_exit(void)
|
|||
disk->gd = NULL;
|
||||
del_gendisk(p);
|
||||
blk_cleanup_queue(p->queue);
|
||||
blk_mq_free_tag_set(&disk->tag_set);
|
||||
put_disk(p);
|
||||
pi_release(disk->pi);
|
||||
}
|
||||
|
|
|
@ -152,7 +152,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY};
|
|||
#include <linux/hdreg.h>
|
||||
#include <linux/cdrom.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
@ -206,7 +206,8 @@ module_param_array(drive3, int, NULL, 0);
|
|||
#define ATAPI_WRITE_10 0x2a
|
||||
|
||||
static int pf_open(struct block_device *bdev, fmode_t mode);
|
||||
static void do_pf_request(struct request_queue * q);
|
||||
static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd);
|
||||
static int pf_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo);
|
||||
|
@ -238,6 +239,8 @@ struct pf_unit {
|
|||
int present; /* device present ? */
|
||||
char name[PF_NAMELEN]; /* pf0, pf1, ... */
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct list_head rq_list;
|
||||
};
|
||||
|
||||
static struct pf_unit units[PF_UNITS];
|
||||
|
@ -277,6 +280,10 @@ static const struct block_device_operations pf_fops = {
|
|||
.check_events = pf_check_events,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops pf_mq_ops = {
|
||||
.queue_rq = pf_queue_rq,
|
||||
};
|
||||
|
||||
static void __init pf_init_units(void)
|
||||
{
|
||||
struct pf_unit *pf;
|
||||
|
@ -284,14 +291,22 @@ static void __init pf_init_units(void)
|
|||
|
||||
pf_drive_count = 0;
|
||||
for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
|
||||
struct gendisk *disk = alloc_disk(1);
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (!disk)
|
||||
continue;
|
||||
disk->queue = blk_init_queue(do_pf_request, &pf_spin_lock);
|
||||
if (!disk->queue) {
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&pf->tag_set, &pf_mq_ops,
|
||||
1, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
put_disk(disk);
|
||||
return;
|
||||
disk->queue = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&pf->rq_list);
|
||||
disk->queue->queuedata = pf;
|
||||
blk_queue_max_segments(disk->queue, cluster);
|
||||
blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
|
||||
pf->disk = disk;
|
||||
|
@ -784,18 +799,18 @@ static int pf_queue;
|
|||
static int set_next_request(void)
|
||||
{
|
||||
struct pf_unit *pf;
|
||||
struct request_queue *q;
|
||||
int old_pos = pf_queue;
|
||||
|
||||
do {
|
||||
pf = &units[pf_queue];
|
||||
q = pf->present ? pf->disk->queue : NULL;
|
||||
if (++pf_queue == PF_UNITS)
|
||||
pf_queue = 0;
|
||||
if (q) {
|
||||
pf_req = blk_fetch_request(q);
|
||||
if (pf_req)
|
||||
break;
|
||||
if (pf->present && !list_empty(&pf->rq_list)) {
|
||||
pf_req = list_first_entry(&pf->rq_list, struct request,
|
||||
queuelist);
|
||||
list_del_init(&pf_req->queuelist);
|
||||
blk_mq_start_request(pf_req);
|
||||
break;
|
||||
}
|
||||
} while (pf_queue != old_pos);
|
||||
|
||||
|
@ -804,8 +819,12 @@ static int set_next_request(void)
|
|||
|
||||
static void pf_end_request(blk_status_t err)
|
||||
{
|
||||
if (pf_req && !__blk_end_request_cur(pf_req, err))
|
||||
if (!pf_req)
|
||||
return;
|
||||
if (!blk_update_request(pf_req, err, blk_rq_cur_bytes(pf_req))) {
|
||||
__blk_mq_end_request(pf_req, err);
|
||||
pf_req = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void pf_request(void)
|
||||
|
@ -842,9 +861,17 @@ repeat:
|
|||
}
|
||||
}
|
||||
|
||||
static void do_pf_request(struct request_queue *q)
|
||||
static blk_status_t pf_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct pf_unit *pf = hctx->queue->queuedata;
|
||||
|
||||
spin_lock_irq(&pf_spin_lock);
|
||||
list_add_tail(&bd->rq->queuelist, &pf->rq_list);
|
||||
pf_request();
|
||||
spin_unlock_irq(&pf_spin_lock);
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int pf_next_buf(void)
|
||||
|
@ -1024,6 +1051,7 @@ static void __exit pf_exit(void)
|
|||
continue;
|
||||
del_gendisk(pf->disk);
|
||||
blk_cleanup_queue(pf->disk->queue);
|
||||
blk_mq_free_tag_set(&pf->tag_set);
|
||||
put_disk(pf->disk);
|
||||
pi_release(pf->pi);
|
||||
}
|
||||
|
|
|
@ -2645,7 +2645,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
|
|||
*/
|
||||
if (pd->refcnt == 1)
|
||||
pkt_lock_door(pd, 0);
|
||||
/* fallthru */
|
||||
/* fall through */
|
||||
/*
|
||||
* forward selected CDROM ioctls to CD-ROM, for UDF
|
||||
*/
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
|
||||
#include <linux/ata.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
|||
struct ps3disk_private {
|
||||
spinlock_t lock; /* Request queue spinlock */
|
||||
struct request_queue *queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *gendisk;
|
||||
unsigned int blocking_factor;
|
||||
struct request *req;
|
||||
|
@ -118,8 +119,8 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
|
|||
}
|
||||
}
|
||||
|
||||
static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
|
||||
struct request *req)
|
||||
static blk_status_t ps3disk_submit_request_sg(struct ps3_storage_device *dev,
|
||||
struct request *req)
|
||||
{
|
||||
struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
|
||||
int write = rq_data_dir(req), res;
|
||||
|
@ -158,16 +159,15 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
|
|||
if (res) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
|
||||
__LINE__, op, res);
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
return 0;
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
priv->req = req;
|
||||
return 1;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
|
||||
struct request *req)
|
||||
static blk_status_t ps3disk_submit_flush_request(struct ps3_storage_device *dev,
|
||||
struct request *req)
|
||||
{
|
||||
struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
|
||||
u64 res;
|
||||
|
@ -180,50 +180,45 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
|
|||
if (res) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
|
||||
__func__, __LINE__, res);
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
return 0;
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
priv->req = req;
|
||||
return 1;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void ps3disk_do_request(struct ps3_storage_device *dev,
|
||||
struct request_queue *q)
|
||||
static blk_status_t ps3disk_do_request(struct ps3_storage_device *dev,
|
||||
struct request *req)
|
||||
{
|
||||
struct request *req;
|
||||
|
||||
dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
|
||||
|
||||
while ((req = blk_fetch_request(q))) {
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_FLUSH:
|
||||
if (ps3disk_submit_flush_request(dev, req))
|
||||
return;
|
||||
break;
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
if (ps3disk_submit_request_sg(dev, req))
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
blk_dump_rq_flags(req, DEVICE_NAME " bad request");
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
}
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_FLUSH:
|
||||
return ps3disk_submit_flush_request(dev, req);
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
return ps3disk_submit_request_sg(dev, req);
|
||||
default:
|
||||
blk_dump_rq_flags(req, DEVICE_NAME " bad request");
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
}
|
||||
|
||||
static void ps3disk_request(struct request_queue *q)
|
||||
static blk_status_t ps3disk_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct ps3_storage_device *dev = q->queuedata;
|
||||
struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
|
||||
blk_status_t ret;
|
||||
|
||||
if (priv->req) {
|
||||
dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__);
|
||||
return;
|
||||
}
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
ps3disk_do_request(dev, q);
|
||||
spin_lock_irq(&priv->lock);
|
||||
ret = ps3disk_do_request(dev, bd->rq);
|
||||
spin_unlock_irq(&priv->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static irqreturn_t ps3disk_interrupt(int irq, void *data)
|
||||
|
@ -280,11 +275,11 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
|
|||
}
|
||||
|
||||
spin_lock(&priv->lock);
|
||||
__blk_end_request_all(req, error);
|
||||
priv->req = NULL;
|
||||
ps3disk_do_request(dev, priv->queue);
|
||||
blk_mq_end_request(req, error);
|
||||
spin_unlock(&priv->lock);
|
||||
|
||||
blk_mq_run_hw_queues(priv->queue, true);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -404,6 +399,10 @@ static unsigned long ps3disk_mask;
|
|||
|
||||
static DEFINE_MUTEX(ps3disk_mask_mutex);
|
||||
|
||||
static const struct blk_mq_ops ps3disk_mq_ops = {
|
||||
.queue_rq = ps3disk_queue_rq,
|
||||
};
|
||||
|
||||
static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
{
|
||||
struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core);
|
||||
|
@ -454,11 +453,12 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
|||
|
||||
ps3disk_identify(dev);
|
||||
|
||||
queue = blk_init_queue(ps3disk_request, &priv->lock);
|
||||
if (!queue) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n",
|
||||
queue = blk_mq_init_sq_queue(&priv->tag_set, &ps3disk_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(queue)) {
|
||||
dev_err(&dev->sbd.core, "%s:%u: blk_mq_init_queue failed\n",
|
||||
__func__, __LINE__);
|
||||
error = -ENOMEM;
|
||||
error = PTR_ERR(queue);
|
||||
goto fail_teardown;
|
||||
}
|
||||
|
||||
|
@ -500,11 +500,12 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
|||
gendisk->disk_name, priv->model, priv->raw_capacity >> 11,
|
||||
get_capacity(gendisk) >> 11);
|
||||
|
||||
device_add_disk(&dev->sbd.core, gendisk);
|
||||
device_add_disk(&dev->sbd.core, gendisk, NULL);
|
||||
return 0;
|
||||
|
||||
fail_cleanup_queue:
|
||||
blk_cleanup_queue(queue);
|
||||
blk_mq_free_tag_set(&priv->tag_set);
|
||||
fail_teardown:
|
||||
ps3stor_teardown(dev);
|
||||
fail_free_bounce:
|
||||
|
@ -530,6 +531,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev)
|
|||
mutex_unlock(&ps3disk_mask_mutex);
|
||||
del_gendisk(priv->gendisk);
|
||||
blk_cleanup_queue(priv->queue);
|
||||
blk_mq_free_tag_set(&priv->tag_set);
|
||||
put_disk(priv->gendisk);
|
||||
dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
|
||||
ps3disk_sync_cache(dev);
|
||||
|
|
|
@ -769,7 +769,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
|||
dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
|
||||
gendisk->disk_name, get_capacity(gendisk) >> 11);
|
||||
|
||||
device_add_disk(&dev->core, gendisk);
|
||||
device_add_disk(&dev->core, gendisk, NULL);
|
||||
return 0;
|
||||
|
||||
fail_cleanup_queue:
|
||||
|
|
|
@ -782,7 +782,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
|
|||
pci_set_master(dev);
|
||||
pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE);
|
||||
|
||||
st = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
|
||||
st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
|
||||
if (st) {
|
||||
dev_err(CARD_TO_DEV(card),
|
||||
"No usable DMA configuration,aborting\n");
|
||||
|
|
|
@ -276,7 +276,7 @@ static void creg_cmd_done(struct work_struct *work)
|
|||
st = -EIO;
|
||||
}
|
||||
|
||||
if ((cmd->op == CREG_OP_READ)) {
|
||||
if (cmd->op == CREG_OP_READ) {
|
||||
unsigned int cnt8 = ioread32(card->regmap + CREG_CNT);
|
||||
|
||||
/* Paranoid Sanity Checks */
|
||||
|
|
|
@ -226,7 +226,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card)
|
|||
set_capacity(card->gendisk, card->size8 >> 9);
|
||||
else
|
||||
set_capacity(card->gendisk, 0);
|
||||
device_add_disk(CARD_TO_DEV(card), card->gendisk);
|
||||
device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL);
|
||||
card->bdev_attached = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -224,12 +224,12 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
|
|||
static void rsxx_free_dma(struct rsxx_dma_ctrl *ctrl, struct rsxx_dma *dma)
|
||||
{
|
||||
if (dma->cmd != HW_CMD_BLK_DISCARD) {
|
||||
if (!pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
|
||||
pci_unmap_page(ctrl->card->dev, dma->dma_addr,
|
||||
if (!dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) {
|
||||
dma_unmap_page(&ctrl->card->dev->dev, dma->dma_addr,
|
||||
get_dma_size(dma),
|
||||
dma->cmd == HW_CMD_BLK_WRITE ?
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
DMA_TO_DEVICE :
|
||||
DMA_FROM_DEVICE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -438,23 +438,23 @@ static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
|
|||
|
||||
if (dma->cmd != HW_CMD_BLK_DISCARD) {
|
||||
if (dma->cmd == HW_CMD_BLK_WRITE)
|
||||
dir = PCI_DMA_TODEVICE;
|
||||
dir = DMA_TO_DEVICE;
|
||||
else
|
||||
dir = PCI_DMA_FROMDEVICE;
|
||||
dir = DMA_FROM_DEVICE;
|
||||
|
||||
/*
|
||||
* The function pci_map_page is placed here because we
|
||||
* The function dma_map_page is placed here because we
|
||||
* can only, by design, issue up to 255 commands to the
|
||||
* hardware at one time per DMA channel. So the maximum
|
||||
* amount of mapped memory would be 255 * 4 channels *
|
||||
* 4096 Bytes which is less than 2GB, the limit of a x8
|
||||
* Non-HWWD PCIe slot. This way the pci_map_page
|
||||
* Non-HWWD PCIe slot. This way the dma_map_page
|
||||
* function should never fail because of a lack of
|
||||
* mappable memory.
|
||||
*/
|
||||
dma->dma_addr = pci_map_page(ctrl->card->dev, dma->page,
|
||||
dma->dma_addr = dma_map_page(&ctrl->card->dev->dev, dma->page,
|
||||
dma->pg_off, dma->sub_page.cnt << 9, dir);
|
||||
if (pci_dma_mapping_error(ctrl->card->dev, dma->dma_addr)) {
|
||||
if (dma_mapping_error(&ctrl->card->dev->dev, dma->dma_addr)) {
|
||||
push_tracker(ctrl->trackers, tag);
|
||||
rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
|
||||
continue;
|
||||
|
@ -776,10 +776,10 @@ bvec_err:
|
|||
/*----------------- DMA Engine Initialization & Setup -------------------*/
|
||||
int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl)
|
||||
{
|
||||
ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
|
||||
&ctrl->status.dma_addr);
|
||||
ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
|
||||
&ctrl->cmd.dma_addr);
|
||||
ctrl->status.buf = dma_alloc_coherent(&dev->dev, STATUS_BUFFER_SIZE8,
|
||||
&ctrl->status.dma_addr, GFP_KERNEL);
|
||||
ctrl->cmd.buf = dma_alloc_coherent(&dev->dev, COMMAND_BUFFER_SIZE8,
|
||||
&ctrl->cmd.dma_addr, GFP_KERNEL);
|
||||
if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -962,12 +962,12 @@ failed_dma_setup:
|
|||
vfree(ctrl->trackers);
|
||||
|
||||
if (ctrl->status.buf)
|
||||
pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
|
||||
ctrl->status.buf,
|
||||
ctrl->status.dma_addr);
|
||||
dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8,
|
||||
ctrl->status.buf,
|
||||
ctrl->status.dma_addr);
|
||||
if (ctrl->cmd.buf)
|
||||
pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
|
||||
ctrl->cmd.buf, ctrl->cmd.dma_addr);
|
||||
dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8,
|
||||
ctrl->cmd.buf, ctrl->cmd.dma_addr);
|
||||
}
|
||||
|
||||
return st;
|
||||
|
@ -1023,10 +1023,10 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
|
|||
|
||||
vfree(ctrl->trackers);
|
||||
|
||||
pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
|
||||
ctrl->status.buf, ctrl->status.dma_addr);
|
||||
pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
|
||||
ctrl->cmd.buf, ctrl->cmd.dma_addr);
|
||||
dma_free_coherent(&card->dev->dev, STATUS_BUFFER_SIZE8,
|
||||
ctrl->status.buf, ctrl->status.dma_addr);
|
||||
dma_free_coherent(&card->dev->dev, COMMAND_BUFFER_SIZE8,
|
||||
ctrl->cmd.buf, ctrl->cmd.dma_addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1059,11 +1059,11 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
|
|||
card->ctrl[i].stats.reads_issued--;
|
||||
|
||||
if (dma->cmd != HW_CMD_BLK_DISCARD) {
|
||||
pci_unmap_page(card->dev, dma->dma_addr,
|
||||
dma_unmap_page(&card->dev->dev, dma->dma_addr,
|
||||
get_dma_size(dma),
|
||||
dma->cmd == HW_CMD_BLK_WRITE ?
|
||||
PCI_DMA_TODEVICE :
|
||||
PCI_DMA_FROMDEVICE);
|
||||
DMA_TO_DEVICE :
|
||||
DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
list_add_tail(&dma->list, &issued_dmas[i]);
|
||||
|
|
|
@ -632,7 +632,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
|
|||
* Map scatterlist to PCI bus addresses.
|
||||
* Note PCI might change the number of entries.
|
||||
*/
|
||||
n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir);
|
||||
n_sg = dma_map_sg(&skdev->pdev->dev, sgl, n_sg, skreq->data_dir);
|
||||
if (n_sg <= 0)
|
||||
return false;
|
||||
|
||||
|
@ -682,7 +682,8 @@ static void skd_postop_sg_list(struct skd_device *skdev,
|
|||
skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
|
||||
skreq->sksg_dma_address +
|
||||
((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
|
||||
pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir);
|
||||
dma_unmap_sg(&skdev->pdev->dev, &skreq->sg[0], skreq->n_sg,
|
||||
skreq->data_dir);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1416,7 +1417,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
|
|||
|
||||
case SKD_CHECK_STATUS_BUSY_IMMINENT:
|
||||
skd_log_skreq(skdev, skreq, "retry(busy)");
|
||||
blk_requeue_request(skdev->queue, req);
|
||||
blk_mq_requeue_request(req, true);
|
||||
dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
|
||||
skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
|
||||
skdev->timer_countdown = SKD_TIMER_MINUTES(20);
|
||||
|
@ -1426,7 +1427,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
|
|||
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
|
||||
if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
|
||||
skd_log_skreq(skdev, skreq, "retry");
|
||||
blk_requeue_request(skdev->queue, req);
|
||||
blk_mq_requeue_request(req, true);
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
|
@ -2632,8 +2633,8 @@ static int skd_cons_skcomp(struct skd_device *skdev)
|
|||
"comp pci_alloc, total bytes %zd entries %d\n",
|
||||
SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY);
|
||||
|
||||
skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
|
||||
&skdev->cq_dma_address);
|
||||
skcomp = dma_zalloc_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE,
|
||||
&skdev->cq_dma_address, GFP_KERNEL);
|
||||
|
||||
if (skcomp == NULL) {
|
||||
rc = -ENOMEM;
|
||||
|
@ -2674,10 +2675,10 @@ static int skd_cons_skmsg(struct skd_device *skdev)
|
|||
|
||||
skmsg->id = i + SKD_ID_FIT_MSG;
|
||||
|
||||
skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
|
||||
SKD_N_FITMSG_BYTES,
|
||||
&skmsg->mb_dma_address);
|
||||
|
||||
skmsg->msg_buf = dma_alloc_coherent(&skdev->pdev->dev,
|
||||
SKD_N_FITMSG_BYTES,
|
||||
&skmsg->mb_dma_address,
|
||||
GFP_KERNEL);
|
||||
if (skmsg->msg_buf == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto err_out;
|
||||
|
@ -2971,8 +2972,8 @@ err_out:
|
|||
static void skd_free_skcomp(struct skd_device *skdev)
|
||||
{
|
||||
if (skdev->skcomp_table)
|
||||
pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
|
||||
skdev->skcomp_table, skdev->cq_dma_address);
|
||||
dma_free_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE,
|
||||
skdev->skcomp_table, skdev->cq_dma_address);
|
||||
|
||||
skdev->skcomp_table = NULL;
|
||||
skdev->cq_dma_address = 0;
|
||||
|
@ -2991,8 +2992,8 @@ static void skd_free_skmsg(struct skd_device *skdev)
|
|||
skmsg = &skdev->skmsg_table[i];
|
||||
|
||||
if (skmsg->msg_buf != NULL) {
|
||||
pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES,
|
||||
skmsg->msg_buf,
|
||||
dma_free_coherent(&skdev->pdev->dev, SKD_N_FITMSG_BYTES,
|
||||
skmsg->msg_buf,
|
||||
skmsg->mb_dma_address);
|
||||
}
|
||||
skmsg->msg_buf = NULL;
|
||||
|
@ -3104,7 +3105,7 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
|||
static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
|
||||
{
|
||||
dev_dbg(&skdev->pdev->dev, "add_disk\n");
|
||||
device_add_disk(parent, skdev->disk);
|
||||
device_add_disk(parent, skdev->disk, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3172,18 +3173,12 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
rc = pci_request_regions(pdev, DRV_NAME);
|
||||
if (rc)
|
||||
goto err_out;
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (!rc) {
|
||||
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
dev_err(&pdev->dev, "consistent DMA mask error %d\n",
|
||||
rc);
|
||||
}
|
||||
} else {
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
|
||||
goto err_out_regions;
|
||||
}
|
||||
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (rc)
|
||||
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
|
||||
goto err_out_regions;
|
||||
}
|
||||
|
||||
if (!skd_major) {
|
||||
|
@ -3367,20 +3362,12 @@ static int skd_pci_resume(struct pci_dev *pdev)
|
|||
rc = pci_request_regions(pdev, DRV_NAME);
|
||||
if (rc)
|
||||
goto err_out;
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (!rc) {
|
||||
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||
|
||||
dev_err(&pdev->dev, "consistent DMA mask error %d\n",
|
||||
rc);
|
||||
}
|
||||
} else {
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
|
||||
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
|
||||
goto err_out_regions;
|
||||
}
|
||||
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (rc)
|
||||
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
|
||||
goto err_out_regions;
|
||||
}
|
||||
|
||||
pci_set_master(pdev);
|
||||
|
|
|
@ -857,7 +857,7 @@ static int probe_disk(struct vdc_port *port)
|
|||
port->vdisk_size, (port->vdisk_size >> (20 - 9)),
|
||||
port->vio.ver.major, port->vio.ver.minor);
|
||||
|
||||
device_add_disk(&port->vio.vdev->dev, g);
|
||||
device_add_disk(&port->vio.vdev->dev, g, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/fd.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/kernel.h>
|
||||
|
@ -190,6 +190,7 @@ struct floppy_state {
|
|||
int ref_count;
|
||||
|
||||
struct gendisk *disk;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
|
||||
/* parent controller */
|
||||
|
||||
|
@ -211,7 +212,6 @@ enum head {
|
|||
struct swim_priv {
|
||||
struct swim __iomem *base;
|
||||
spinlock_t lock;
|
||||
int fdc_queue;
|
||||
int floppy_count;
|
||||
struct floppy_state unit[FD_MAX_UNIT];
|
||||
};
|
||||
|
@ -525,58 +525,36 @@ static blk_status_t floppy_read_sectors(struct floppy_state *fs,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct request *swim_next_request(struct swim_priv *swd)
|
||||
static blk_status_t swim_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct request *rq;
|
||||
int old_pos = swd->fdc_queue;
|
||||
struct floppy_state *fs = hctx->queue->queuedata;
|
||||
struct swim_priv *swd = fs->swd;
|
||||
struct request *req = bd->rq;
|
||||
blk_status_t err;
|
||||
|
||||
if (!spin_trylock_irq(&swd->lock))
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
|
||||
blk_mq_start_request(req);
|
||||
|
||||
if (!fs->disk_in || rq_data_dir(req) == WRITE) {
|
||||
err = BLK_STS_IOERR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
q = swd->unit[swd->fdc_queue].disk->queue;
|
||||
if (++swd->fdc_queue == swd->floppy_count)
|
||||
swd->fdc_queue = 0;
|
||||
if (q) {
|
||||
rq = blk_fetch_request(q);
|
||||
if (rq)
|
||||
return rq;
|
||||
}
|
||||
} while (swd->fdc_queue != old_pos);
|
||||
err = floppy_read_sectors(fs, blk_rq_pos(req),
|
||||
blk_rq_cur_sectors(req),
|
||||
bio_data(req->bio));
|
||||
} while (blk_update_request(req, err, blk_rq_cur_bytes(req)));
|
||||
__blk_mq_end_request(req, err);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
err = BLK_STS_OK;
|
||||
out:
|
||||
spin_unlock_irq(&swd->lock);
|
||||
return err;
|
||||
|
||||
static void do_fd_request(struct request_queue *q)
|
||||
{
|
||||
struct swim_priv *swd = q->queuedata;
|
||||
struct request *req;
|
||||
struct floppy_state *fs;
|
||||
|
||||
req = swim_next_request(swd);
|
||||
while (req) {
|
||||
blk_status_t err = BLK_STS_IOERR;
|
||||
|
||||
fs = req->rq_disk->private_data;
|
||||
if (blk_rq_pos(req) >= fs->total_secs)
|
||||
goto done;
|
||||
if (!fs->disk_in)
|
||||
goto done;
|
||||
if (rq_data_dir(req) == WRITE && fs->write_protected)
|
||||
goto done;
|
||||
|
||||
switch (rq_data_dir(req)) {
|
||||
case WRITE:
|
||||
/* NOT IMPLEMENTED */
|
||||
break;
|
||||
case READ:
|
||||
err = floppy_read_sectors(fs, blk_rq_pos(req),
|
||||
blk_rq_cur_sectors(req),
|
||||
bio_data(req->bio));
|
||||
break;
|
||||
}
|
||||
done:
|
||||
if (!__blk_end_request_cur(req, err))
|
||||
req = swim_next_request(swd);
|
||||
}
|
||||
}
|
||||
|
||||
static struct floppy_struct floppy_type[4] = {
|
||||
|
@ -823,6 +801,10 @@ static int swim_add_floppy(struct swim_priv *swd, enum drive_location location)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops swim_mq_ops = {
|
||||
.queue_rq = swim_queue_rq,
|
||||
};
|
||||
|
||||
static int swim_floppy_init(struct swim_priv *swd)
|
||||
{
|
||||
int err;
|
||||
|
@ -852,20 +834,25 @@ static int swim_floppy_init(struct swim_priv *swd)
|
|||
spin_lock_init(&swd->lock);
|
||||
|
||||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||
struct request_queue *q;
|
||||
|
||||
swd->unit[drive].disk = alloc_disk(1);
|
||||
if (swd->unit[drive].disk == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto exit_put_disks;
|
||||
}
|
||||
swd->unit[drive].disk->queue = blk_init_queue(do_fd_request,
|
||||
&swd->lock);
|
||||
if (!swd->unit[drive].disk->queue) {
|
||||
err = -ENOMEM;
|
||||
|
||||
q = blk_mq_init_sq_queue(&swd->unit[drive].tag_set, &swim_mq_ops,
|
||||
2, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(q)) {
|
||||
err = PTR_ERR(q);
|
||||
goto exit_put_disks;
|
||||
}
|
||||
|
||||
swd->unit[drive].disk->queue = q;
|
||||
blk_queue_bounce_limit(swd->unit[drive].disk->queue,
|
||||
BLK_BOUNCE_HIGH);
|
||||
swd->unit[drive].disk->queue->queuedata = swd;
|
||||
swd->unit[drive].disk->queue->queuedata = &swd->unit[drive];
|
||||
swd->unit[drive].swd = swd;
|
||||
}
|
||||
|
||||
|
@ -887,8 +874,18 @@ static int swim_floppy_init(struct swim_priv *swd)
|
|||
|
||||
exit_put_disks:
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
while (drive--)
|
||||
put_disk(swd->unit[drive].disk);
|
||||
do {
|
||||
struct gendisk *disk = swd->unit[drive].disk;
|
||||
|
||||
if (disk) {
|
||||
if (disk->queue) {
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
}
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
put_disk(disk);
|
||||
}
|
||||
} while (drive--);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -961,6 +958,7 @@ static int swim_remove(struct platform_device *dev)
|
|||
for (drive = 0; drive < swd->floppy_count; drive++) {
|
||||
del_gendisk(swd->unit[drive].disk);
|
||||
blk_cleanup_queue(swd->unit[drive].disk->queue);
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
put_disk(swd->unit[drive].disk);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include <linux/delay.h>
|
||||
#include <linux/fd.h>
|
||||
#include <linux/ioctl.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -206,6 +206,7 @@ struct floppy_state {
|
|||
char dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)];
|
||||
int index;
|
||||
struct request *cur_req;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
};
|
||||
|
||||
#define swim3_err(fmt, arg...) dev_err(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg)
|
||||
|
@ -260,16 +261,15 @@ static int floppy_revalidate(struct gendisk *disk);
|
|||
static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
|
||||
{
|
||||
struct request *req = fs->cur_req;
|
||||
int rc;
|
||||
|
||||
swim3_dbg(" end request, err=%d nr_bytes=%d, cur_req=%p\n",
|
||||
err, nr_bytes, req);
|
||||
|
||||
if (err)
|
||||
nr_bytes = blk_rq_cur_bytes(req);
|
||||
rc = __blk_end_request(req, err, nr_bytes);
|
||||
if (rc)
|
||||
if (blk_update_request(req, err, nr_bytes))
|
||||
return true;
|
||||
__blk_mq_end_request(req, err);
|
||||
fs->cur_req = NULL;
|
||||
return false;
|
||||
}
|
||||
|
@ -309,86 +309,58 @@ static int swim3_readbit(struct floppy_state *fs, int bit)
|
|||
return (stat & DATA) == 0;
|
||||
}
|
||||
|
||||
static void start_request(struct floppy_state *fs)
|
||||
static blk_status_t swim3_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req;
|
||||
struct floppy_state *fs = hctx->queue->queuedata;
|
||||
struct request *req = bd->rq;
|
||||
unsigned long x;
|
||||
|
||||
swim3_dbg("start request, initial state=%d\n", fs->state);
|
||||
|
||||
if (fs->state == idle && fs->wanted) {
|
||||
fs->state = available;
|
||||
wake_up(&fs->wait);
|
||||
return;
|
||||
spin_lock_irq(&swim3_lock);
|
||||
if (fs->cur_req || fs->state != idle) {
|
||||
spin_unlock_irq(&swim3_lock);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
while (fs->state == idle) {
|
||||
swim3_dbg("start request, idle loop, cur_req=%p\n", fs->cur_req);
|
||||
if (!fs->cur_req) {
|
||||
fs->cur_req = blk_fetch_request(disks[fs->index]->queue);
|
||||
swim3_dbg(" fetched request %p\n", fs->cur_req);
|
||||
if (!fs->cur_req)
|
||||
break;
|
||||
}
|
||||
req = fs->cur_req;
|
||||
|
||||
if (fs->mdev->media_bay &&
|
||||
check_media_bay(fs->mdev->media_bay) != MB_FD) {
|
||||
swim3_dbg("%s", " media bay absent, dropping req\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
#if 0 /* This is really too verbose */
|
||||
swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
|
||||
req->rq_disk->disk_name, req->cmd,
|
||||
(long)blk_rq_pos(req), blk_rq_sectors(req),
|
||||
bio_data(req->bio));
|
||||
swim3_dbg(" current_nr_sectors=%u\n",
|
||||
blk_rq_cur_sectors(req));
|
||||
#endif
|
||||
|
||||
if (blk_rq_pos(req) >= fs->total_secs) {
|
||||
swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
|
||||
(long)blk_rq_pos(req), (long)fs->total_secs);
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
continue;
|
||||
}
|
||||
if (fs->ejected) {
|
||||
swim3_dbg("%s", " disk ejected\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rq_data_dir(req) == WRITE) {
|
||||
if (fs->write_prot < 0)
|
||||
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
|
||||
if (fs->write_prot) {
|
||||
swim3_dbg("%s", " try to write, disk write protected\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do not remove the cast. blk_rq_pos(req) is now a
|
||||
* sector_t and can be 64 bits, but it will never go
|
||||
* past 32 bits for this driver anyway, so we can
|
||||
* safely cast it down and not have to do a 64/32
|
||||
* division
|
||||
*/
|
||||
fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
|
||||
x = ((long)blk_rq_pos(req)) % fs->secpercyl;
|
||||
fs->head = x / fs->secpertrack;
|
||||
fs->req_sector = x % fs->secpertrack + 1;
|
||||
fs->state = do_transfer;
|
||||
fs->retries = 0;
|
||||
|
||||
act(fs);
|
||||
blk_mq_start_request(req);
|
||||
fs->cur_req = req;
|
||||
if (fs->mdev->media_bay &&
|
||||
check_media_bay(fs->mdev->media_bay) != MB_FD) {
|
||||
swim3_dbg("%s", " media bay absent, dropping req\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
goto out;
|
||||
}
|
||||
if (fs->ejected) {
|
||||
swim3_dbg("%s", " disk ejected\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
goto out;
|
||||
}
|
||||
if (rq_data_dir(req) == WRITE) {
|
||||
if (fs->write_prot < 0)
|
||||
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
|
||||
if (fs->write_prot) {
|
||||
swim3_dbg("%s", " try to write, disk write protected\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void do_fd_request(struct request_queue * q)
|
||||
{
|
||||
start_request(q->queuedata);
|
||||
/*
|
||||
* Do not remove the cast. blk_rq_pos(req) is now a sector_t and can be
|
||||
* 64 bits, but it will never go past 32 bits for this driver anyway, so
|
||||
* we can safely cast it down and not have to do a 64/32 division
|
||||
*/
|
||||
fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
|
||||
x = ((long)blk_rq_pos(req)) % fs->secpercyl;
|
||||
fs->head = x / fs->secpertrack;
|
||||
fs->req_sector = x % fs->secpertrack + 1;
|
||||
fs->state = do_transfer;
|
||||
fs->retries = 0;
|
||||
|
||||
act(fs);
|
||||
|
||||
out:
|
||||
spin_unlock_irq(&swim3_lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void set_timeout(struct floppy_state *fs, int nticks,
|
||||
|
@ -585,7 +557,6 @@ static void scan_timeout(struct timer_list *t)
|
|||
if (fs->retries > 5) {
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
} else {
|
||||
fs->state = jogging;
|
||||
act(fs);
|
||||
|
@ -609,7 +580,6 @@ static void seek_timeout(struct timer_list *t)
|
|||
swim3_err("%s", "Seek timeout\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
spin_unlock_irqrestore(&swim3_lock, flags);
|
||||
}
|
||||
|
||||
|
@ -638,7 +608,6 @@ static void settle_timeout(struct timer_list *t)
|
|||
swim3_err("%s", "Seek settle timeout\n");
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&swim3_lock, flags);
|
||||
}
|
||||
|
@ -667,7 +636,6 @@ static void xfer_timeout(struct timer_list *t)
|
|||
(long)blk_rq_pos(fs->cur_req));
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
spin_unlock_irqrestore(&swim3_lock, flags);
|
||||
}
|
||||
|
||||
|
@ -704,7 +672,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
|
|||
if (fs->retries > 5) {
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
} else {
|
||||
fs->state = jogging;
|
||||
act(fs);
|
||||
|
@ -796,7 +763,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
|
|||
fs->state, rq_data_dir(req), intr, err);
|
||||
swim3_end_request(fs, BLK_STS_IOERR, 0);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
break;
|
||||
}
|
||||
fs->retries = 0;
|
||||
|
@ -813,8 +779,6 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
|
|||
} else
|
||||
fs->state = idle;
|
||||
}
|
||||
if (fs->state == idle)
|
||||
start_request(fs);
|
||||
break;
|
||||
default:
|
||||
swim3_err("Don't know what to do in state %d\n", fs->state);
|
||||
|
@ -862,14 +826,19 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
|
|||
|
||||
static void release_drive(struct floppy_state *fs)
|
||||
{
|
||||
struct request_queue *q = disks[fs->index]->queue;
|
||||
unsigned long flags;
|
||||
|
||||
swim3_dbg("%s", "-> release drive\n");
|
||||
|
||||
spin_lock_irqsave(&swim3_lock, flags);
|
||||
fs->state = idle;
|
||||
start_request(fs);
|
||||
spin_unlock_irqrestore(&swim3_lock, flags);
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
}
|
||||
|
||||
static int fd_eject(struct floppy_state *fs)
|
||||
|
@ -1089,6 +1058,10 @@ static const struct block_device_operations floppy_fops = {
|
|||
.revalidate_disk= floppy_revalidate,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops swim3_mq_ops = {
|
||||
.queue_rq = swim3_queue_rq,
|
||||
};
|
||||
|
||||
static void swim3_mb_event(struct macio_dev* mdev, int mb_state)
|
||||
{
|
||||
struct floppy_state *fs = macio_get_drvdata(mdev);
|
||||
|
@ -1202,47 +1175,63 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
|
|||
static int swim3_attach(struct macio_dev *mdev,
|
||||
const struct of_device_id *match)
|
||||
{
|
||||
struct floppy_state *fs;
|
||||
struct gendisk *disk;
|
||||
int index, rc;
|
||||
int rc;
|
||||
|
||||
index = floppy_count++;
|
||||
if (index >= MAX_FLOPPIES)
|
||||
if (floppy_count >= MAX_FLOPPIES)
|
||||
return -ENXIO;
|
||||
|
||||
/* Add the drive */
|
||||
rc = swim3_add_device(mdev, index);
|
||||
if (rc)
|
||||
return rc;
|
||||
/* Now register that disk. Same comment about failure handling */
|
||||
disk = disks[index] = alloc_disk(1);
|
||||
if (disk == NULL)
|
||||
return -ENOMEM;
|
||||
disk->queue = blk_init_queue(do_fd_request, &swim3_lock);
|
||||
if (disk->queue == NULL) {
|
||||
put_disk(disk);
|
||||
return -ENOMEM;
|
||||
if (floppy_count == 0) {
|
||||
rc = register_blkdev(FLOPPY_MAJOR, "fd");
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
fs = &floppy_states[floppy_count];
|
||||
|
||||
disk = alloc_disk(1);
|
||||
if (disk == NULL) {
|
||||
rc = -ENOMEM;
|
||||
goto out_unregister;
|
||||
}
|
||||
|
||||
disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(disk->queue)) {
|
||||
rc = PTR_ERR(disk->queue);
|
||||
disk->queue = NULL;
|
||||
goto out_put_disk;
|
||||
}
|
||||
blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
|
||||
disk->queue->queuedata = &floppy_states[index];
|
||||
disk->queue->queuedata = fs;
|
||||
|
||||
if (index == 0) {
|
||||
/* If we failed, there isn't much we can do as the driver is still
|
||||
* too dumb to remove the device, just bail out
|
||||
*/
|
||||
if (register_blkdev(FLOPPY_MAJOR, "fd"))
|
||||
return 0;
|
||||
}
|
||||
rc = swim3_add_device(mdev, floppy_count);
|
||||
if (rc)
|
||||
goto out_cleanup_queue;
|
||||
|
||||
disk->major = FLOPPY_MAJOR;
|
||||
disk->first_minor = index;
|
||||
disk->first_minor = floppy_count;
|
||||
disk->fops = &floppy_fops;
|
||||
disk->private_data = &floppy_states[index];
|
||||
disk->private_data = fs;
|
||||
disk->flags |= GENHD_FL_REMOVABLE;
|
||||
sprintf(disk->disk_name, "fd%d", index);
|
||||
sprintf(disk->disk_name, "fd%d", floppy_count);
|
||||
set_capacity(disk, 2880);
|
||||
add_disk(disk);
|
||||
|
||||
disks[floppy_count++] = disk;
|
||||
return 0;
|
||||
|
||||
out_cleanup_queue:
|
||||
blk_cleanup_queue(disk->queue);
|
||||
disk->queue = NULL;
|
||||
blk_mq_free_tag_set(&fs->tag_set);
|
||||
out_put_disk:
|
||||
put_disk(disk);
|
||||
out_unregister:
|
||||
if (floppy_count == 0)
|
||||
unregister_blkdev(FLOPPY_MAJOR, "fd");
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const struct of_device_id swim3_match[] =
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/compiler.h>
|
||||
|
@ -197,7 +197,6 @@ enum {
|
|||
FL_NON_RAID = FW_VER_NON_RAID,
|
||||
FL_4PORT = FW_VER_4PORT,
|
||||
FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
|
||||
FL_DAC = (1 << 16),
|
||||
FL_DYN_MAJOR = (1 << 17),
|
||||
};
|
||||
|
||||
|
@ -244,6 +243,7 @@ struct carm_port {
|
|||
unsigned int port_no;
|
||||
struct gendisk *disk;
|
||||
struct carm_host *host;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
|
||||
/* attached device characteristics */
|
||||
u64 capacity;
|
||||
|
@ -279,6 +279,7 @@ struct carm_host {
|
|||
unsigned int state;
|
||||
u32 fw_ver;
|
||||
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct request_queue *oob_q;
|
||||
unsigned int n_oob;
|
||||
|
||||
|
@ -750,7 +751,7 @@ static inline void carm_end_request_queued(struct carm_host *host,
|
|||
struct request *req = crq->rq;
|
||||
int rc;
|
||||
|
||||
__blk_end_request_all(req, error);
|
||||
blk_mq_end_request(req, error);
|
||||
|
||||
rc = carm_put_request(host, crq);
|
||||
assert(rc == 0);
|
||||
|
@ -760,7 +761,7 @@ static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
|
|||
{
|
||||
unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
|
||||
|
||||
blk_stop_queue(q);
|
||||
blk_mq_stop_hw_queues(q);
|
||||
VPRINTK("STOPPED QUEUE %p\n", q);
|
||||
|
||||
host->wait_q[idx] = q;
|
||||
|
@ -785,7 +786,7 @@ static inline void carm_round_robin(struct carm_host *host)
|
|||
{
|
||||
struct request_queue *q = carm_pop_q(host);
|
||||
if (q) {
|
||||
blk_start_queue(q);
|
||||
blk_mq_start_hw_queues(q);
|
||||
VPRINTK("STARTED QUEUE %p\n", q);
|
||||
}
|
||||
}
|
||||
|
@ -802,82 +803,86 @@ static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
|
|||
}
|
||||
}
|
||||
|
||||
static void carm_oob_rq_fn(struct request_queue *q)
|
||||
static blk_status_t carm_oob_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct carm_host *host = q->queuedata;
|
||||
struct carm_request *crq;
|
||||
struct request *rq;
|
||||
int rc;
|
||||
|
||||
while (1) {
|
||||
DPRINTK("get req\n");
|
||||
rq = blk_fetch_request(q);
|
||||
if (!rq)
|
||||
break;
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
crq = rq->special;
|
||||
assert(crq != NULL);
|
||||
assert(crq->rq == rq);
|
||||
spin_lock_irq(&host->lock);
|
||||
|
||||
crq->n_elem = 0;
|
||||
crq = bd->rq->special;
|
||||
assert(crq != NULL);
|
||||
assert(crq->rq == bd->rq);
|
||||
|
||||
DPRINTK("send req\n");
|
||||
rc = carm_send_msg(host, crq);
|
||||
if (rc) {
|
||||
blk_requeue_request(q, rq);
|
||||
carm_push_q(host, q);
|
||||
return; /* call us again later, eventually */
|
||||
}
|
||||
crq->n_elem = 0;
|
||||
|
||||
DPRINTK("send req\n");
|
||||
rc = carm_send_msg(host, crq);
|
||||
if (rc) {
|
||||
carm_push_q(host, q);
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void carm_rq_fn(struct request_queue *q)
|
||||
static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct carm_port *port = q->queuedata;
|
||||
struct carm_host *host = port->host;
|
||||
struct carm_msg_rw *msg;
|
||||
struct carm_request *crq;
|
||||
struct request *rq;
|
||||
struct request *rq = bd->rq;
|
||||
struct scatterlist *sg;
|
||||
int writing = 0, pci_dir, i, n_elem, rc;
|
||||
u32 tmp;
|
||||
unsigned int msg_size;
|
||||
|
||||
queue_one_request:
|
||||
VPRINTK("get req\n");
|
||||
rq = blk_peek_request(q);
|
||||
if (!rq)
|
||||
return;
|
||||
blk_mq_start_request(rq);
|
||||
|
||||
spin_lock_irq(&host->lock);
|
||||
|
||||
crq = carm_get_request(host);
|
||||
if (!crq) {
|
||||
carm_push_q(host, q);
|
||||
return; /* call us again later, eventually */
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
crq->rq = rq;
|
||||
|
||||
blk_start_request(rq);
|
||||
|
||||
if (rq_data_dir(rq) == WRITE) {
|
||||
writing = 1;
|
||||
pci_dir = PCI_DMA_TODEVICE;
|
||||
pci_dir = DMA_TO_DEVICE;
|
||||
} else {
|
||||
pci_dir = PCI_DMA_FROMDEVICE;
|
||||
pci_dir = DMA_FROM_DEVICE;
|
||||
}
|
||||
|
||||
/* get scatterlist from block layer */
|
||||
sg = &crq->sg[0];
|
||||
n_elem = blk_rq_map_sg(q, rq, sg);
|
||||
if (n_elem <= 0) {
|
||||
/* request with no s/g entries? */
|
||||
carm_end_rq(host, crq, BLK_STS_IOERR);
|
||||
return; /* request with no s/g entries? */
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
/* map scatterlist to PCI bus addresses */
|
||||
n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
|
||||
n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, pci_dir);
|
||||
if (n_elem <= 0) {
|
||||
/* request with no s/g entries? */
|
||||
carm_end_rq(host, crq, BLK_STS_IOERR);
|
||||
return; /* request with no s/g entries? */
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
crq->n_elem = n_elem;
|
||||
crq->port = port;
|
||||
|
@ -927,12 +932,13 @@ queue_one_request:
|
|||
rc = carm_send_msg(host, crq);
|
||||
if (rc) {
|
||||
carm_put_request(host, crq);
|
||||
blk_requeue_request(q, rq);
|
||||
carm_push_q(host, q);
|
||||
return; /* call us again later, eventually */
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
}
|
||||
|
||||
goto queue_one_request;
|
||||
spin_unlock_irq(&host->lock);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void carm_handle_array_info(struct carm_host *host,
|
||||
|
@ -1052,11 +1058,11 @@ static inline void carm_handle_rw(struct carm_host *host,
|
|||
VPRINTK("ENTER\n");
|
||||
|
||||
if (rq_data_dir(crq->rq) == WRITE)
|
||||
pci_dir = PCI_DMA_TODEVICE;
|
||||
pci_dir = DMA_TO_DEVICE;
|
||||
else
|
||||
pci_dir = PCI_DMA_FROMDEVICE;
|
||||
pci_dir = DMA_FROM_DEVICE;
|
||||
|
||||
pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir);
|
||||
dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem, pci_dir);
|
||||
|
||||
carm_end_rq(host, crq, error);
|
||||
}
|
||||
|
@ -1485,6 +1491,14 @@ static int carm_init_host(struct carm_host *host)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops carm_oob_mq_ops = {
|
||||
.queue_rq = carm_oob_queue_rq,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops carm_mq_ops = {
|
||||
.queue_rq = carm_queue_rq,
|
||||
};
|
||||
|
||||
static int carm_init_disks(struct carm_host *host)
|
||||
{
|
||||
unsigned int i;
|
||||
|
@ -1513,9 +1527,10 @@ static int carm_init_disks(struct carm_host *host)
|
|||
disk->fops = &carm_bd_ops;
|
||||
disk->private_data = port;
|
||||
|
||||
q = blk_init_queue(carm_rq_fn, &host->lock);
|
||||
if (!q) {
|
||||
rc = -ENOMEM;
|
||||
q = blk_mq_init_sq_queue(&port->tag_set, &carm_mq_ops,
|
||||
max_queue, BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(q)) {
|
||||
rc = PTR_ERR(q);
|
||||
break;
|
||||
}
|
||||
disk->queue = q;
|
||||
|
@ -1533,14 +1548,18 @@ static void carm_free_disks(struct carm_host *host)
|
|||
unsigned int i;
|
||||
|
||||
for (i = 0; i < CARM_MAX_PORTS; i++) {
|
||||
struct gendisk *disk = host->port[i].disk;
|
||||
struct carm_port *port = &host->port[i];
|
||||
struct gendisk *disk = port->disk;
|
||||
|
||||
if (disk) {
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (disk->flags & GENHD_FL_UP)
|
||||
del_gendisk(disk);
|
||||
if (q)
|
||||
if (q) {
|
||||
blk_mq_free_tag_set(&port->tag_set);
|
||||
blk_cleanup_queue(q);
|
||||
}
|
||||
put_disk(disk);
|
||||
}
|
||||
}
|
||||
|
@ -1548,8 +1567,8 @@ static void carm_free_disks(struct carm_host *host)
|
|||
|
||||
static int carm_init_shm(struct carm_host *host)
|
||||
{
|
||||
host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE,
|
||||
&host->shm_dma);
|
||||
host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE,
|
||||
&host->shm_dma, GFP_KERNEL);
|
||||
if (!host->shm)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -1565,7 +1584,6 @@ static int carm_init_shm(struct carm_host *host)
|
|||
static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
{
|
||||
struct carm_host *host;
|
||||
unsigned int pci_dac;
|
||||
int rc;
|
||||
struct request_queue *q;
|
||||
unsigned int i;
|
||||
|
@ -1580,28 +1598,12 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
if (rc)
|
||||
goto err_out;
|
||||
|
||||
#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (!rc) {
|
||||
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (rc) {
|
||||
printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
|
||||
pci_name(pdev));
|
||||
goto err_out_regions;
|
||||
}
|
||||
pci_dac = 1;
|
||||
} else {
|
||||
#endif
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
|
||||
pci_name(pdev));
|
||||
goto err_out_regions;
|
||||
}
|
||||
pci_dac = 0;
|
||||
#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
|
||||
rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
|
||||
if (rc) {
|
||||
printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
|
||||
pci_name(pdev));
|
||||
goto err_out_regions;
|
||||
}
|
||||
#endif
|
||||
|
||||
host = kzalloc(sizeof(*host), GFP_KERNEL);
|
||||
if (!host) {
|
||||
|
@ -1612,7 +1614,6 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
}
|
||||
|
||||
host->pdev = pdev;
|
||||
host->flags = pci_dac ? FL_DAC : 0;
|
||||
spin_lock_init(&host->lock);
|
||||
INIT_WORK(&host->fsm_task, carm_fsm_task);
|
||||
init_completion(&host->probe_comp);
|
||||
|
@ -1636,12 +1637,13 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
|
|||
goto err_out_iounmap;
|
||||
}
|
||||
|
||||
q = blk_init_queue(carm_oob_rq_fn, &host->lock);
|
||||
if (!q) {
|
||||
q = blk_mq_init_sq_queue(&host->tag_set, &carm_oob_mq_ops, 1,
|
||||
BLK_MQ_F_NO_SCHED);
|
||||
if (IS_ERR(q)) {
|
||||
printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
|
||||
pci_name(pdev));
|
||||
rc = -ENOMEM;
|
||||
goto err_out_pci_free;
|
||||
rc = PTR_ERR(q);
|
||||
goto err_out_dma_free;
|
||||
}
|
||||
host->oob_q = q;
|
||||
q->queuedata = host;
|
||||
|
@ -1705,8 +1707,9 @@ err_out_free_majors:
|
|||
else if (host->major == 161)
|
||||
clear_bit(1, &carm_major_alloc);
|
||||
blk_cleanup_queue(host->oob_q);
|
||||
err_out_pci_free:
|
||||
pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
|
||||
blk_mq_free_tag_set(&host->tag_set);
|
||||
err_out_dma_free:
|
||||
dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
|
||||
err_out_iounmap:
|
||||
iounmap(host->mmio);
|
||||
err_out_kfree:
|
||||
|
@ -1736,7 +1739,8 @@ static void carm_remove_one (struct pci_dev *pdev)
|
|||
else if (host->major == 161)
|
||||
clear_bit(1, &carm_major_alloc);
|
||||
blk_cleanup_queue(host->oob_q);
|
||||
pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
|
||||
blk_mq_free_tag_set(&host->tag_set);
|
||||
dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
|
||||
iounmap(host->mmio);
|
||||
kfree(host);
|
||||
pci_release_regions(pdev);
|
||||
|
|
|
@ -363,12 +363,12 @@ static int add_bio(struct cardinfo *card)
|
|||
|
||||
vec = bio_iter_iovec(bio, card->current_iter);
|
||||
|
||||
dma_handle = pci_map_page(card->dev,
|
||||
dma_handle = dma_map_page(&card->dev->dev,
|
||||
vec.bv_page,
|
||||
vec.bv_offset,
|
||||
vec.bv_len,
|
||||
bio_op(bio) == REQ_OP_READ ?
|
||||
PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
|
||||
DMA_FROM_DEVICE : DMA_TO_DEVICE);
|
||||
|
||||
p = &card->mm_pages[card->Ready];
|
||||
desc = &p->desc[p->cnt];
|
||||
|
@ -421,7 +421,7 @@ static void process_page(unsigned long data)
|
|||
struct cardinfo *card = (struct cardinfo *)data;
|
||||
unsigned int dma_status = card->dma_status;
|
||||
|
||||
spin_lock_bh(&card->lock);
|
||||
spin_lock(&card->lock);
|
||||
if (card->Active < 0)
|
||||
goto out_unlock;
|
||||
page = &card->mm_pages[card->Active];
|
||||
|
@ -448,10 +448,10 @@ static void process_page(unsigned long data)
|
|||
page->iter = page->bio->bi_iter;
|
||||
}
|
||||
|
||||
pci_unmap_page(card->dev, desc->data_dma_handle,
|
||||
dma_unmap_page(&card->dev->dev, desc->data_dma_handle,
|
||||
vec.bv_len,
|
||||
(control & DMASCR_TRANSFER_READ) ?
|
||||
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
|
||||
DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
||||
if (control & DMASCR_HARD_ERROR) {
|
||||
/* error */
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
|
@ -496,7 +496,7 @@ static void process_page(unsigned long data)
|
|||
mm_start_io(card);
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock_bh(&card->lock);
|
||||
spin_unlock(&card->lock);
|
||||
|
||||
while (return_bio) {
|
||||
struct bio *bio = return_bio;
|
||||
|
@ -817,8 +817,8 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
|||
dev_printk(KERN_INFO, &dev->dev,
|
||||
"Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n");
|
||||
|
||||
if (pci_set_dma_mask(dev, DMA_BIT_MASK(64)) &&
|
||||
pci_set_dma_mask(dev, DMA_BIT_MASK(32))) {
|
||||
if (dma_set_mask(&dev->dev, DMA_BIT_MASK(64)) &&
|
||||
dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) {
|
||||
dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -871,12 +871,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
|||
goto failed_magic;
|
||||
}
|
||||
|
||||
card->mm_pages[0].desc = pci_alloc_consistent(card->dev,
|
||||
PAGE_SIZE * 2,
|
||||
&card->mm_pages[0].page_dma);
|
||||
card->mm_pages[1].desc = pci_alloc_consistent(card->dev,
|
||||
PAGE_SIZE * 2,
|
||||
&card->mm_pages[1].page_dma);
|
||||
card->mm_pages[0].desc = dma_alloc_coherent(&card->dev->dev,
|
||||
PAGE_SIZE * 2, &card->mm_pages[0].page_dma, GFP_KERNEL);
|
||||
card->mm_pages[1].desc = dma_alloc_coherent(&card->dev->dev,
|
||||
PAGE_SIZE * 2, &card->mm_pages[1].page_dma, GFP_KERNEL);
|
||||
if (card->mm_pages[0].desc == NULL ||
|
||||
card->mm_pages[1].desc == NULL) {
|
||||
dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
|
||||
|
@ -1002,13 +1000,13 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
|
|||
failed_req_irq:
|
||||
failed_alloc:
|
||||
if (card->mm_pages[0].desc)
|
||||
pci_free_consistent(card->dev, PAGE_SIZE*2,
|
||||
card->mm_pages[0].desc,
|
||||
card->mm_pages[0].page_dma);
|
||||
dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
|
||||
card->mm_pages[0].desc,
|
||||
card->mm_pages[0].page_dma);
|
||||
if (card->mm_pages[1].desc)
|
||||
pci_free_consistent(card->dev, PAGE_SIZE*2,
|
||||
card->mm_pages[1].desc,
|
||||
card->mm_pages[1].page_dma);
|
||||
dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
|
||||
card->mm_pages[1].desc,
|
||||
card->mm_pages[1].page_dma);
|
||||
failed_magic:
|
||||
iounmap(card->csr_remap);
|
||||
failed_remap_csr:
|
||||
|
@ -1027,11 +1025,11 @@ static void mm_pci_remove(struct pci_dev *dev)
|
|||
iounmap(card->csr_remap);
|
||||
|
||||
if (card->mm_pages[0].desc)
|
||||
pci_free_consistent(card->dev, PAGE_SIZE*2,
|
||||
dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
|
||||
card->mm_pages[0].desc,
|
||||
card->mm_pages[0].page_dma);
|
||||
if (card->mm_pages[1].desc)
|
||||
pci_free_consistent(card->dev, PAGE_SIZE*2,
|
||||
dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2,
|
||||
card->mm_pages[1].desc,
|
||||
card->mm_pages[1].page_dma);
|
||||
blk_cleanup_queue(card->queue);
|
||||
|
|
|
@ -351,8 +351,8 @@ static int minor_to_index(int minor)
|
|||
return minor >> PART_BITS;
|
||||
}
|
||||
|
||||
static ssize_t virtblk_serial_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t serial_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
int err;
|
||||
|
@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev,
|
|||
return err;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
|
||||
static DEVICE_ATTR_RO(serial);
|
||||
|
||||
/* The queue's logical block size must be set before calling this */
|
||||
static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
|
||||
|
@ -545,8 +545,8 @@ static const char *const virtblk_cache_types[] = {
|
|||
};
|
||||
|
||||
static ssize_t
|
||||
virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
cache_type_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct virtio_blk *vblk = disk->private_data;
|
||||
|
@ -564,8 +564,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
|
|||
}
|
||||
|
||||
static ssize_t
|
||||
virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct virtio_blk *vblk = disk->private_data;
|
||||
|
@ -575,12 +574,38 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
|
|||
return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
|
||||
}
|
||||
|
||||
static const struct device_attribute dev_attr_cache_type_ro =
|
||||
__ATTR(cache_type, 0444,
|
||||
virtblk_cache_type_show, NULL);
|
||||
static const struct device_attribute dev_attr_cache_type_rw =
|
||||
__ATTR(cache_type, 0644,
|
||||
virtblk_cache_type_show, virtblk_cache_type_store);
|
||||
static DEVICE_ATTR_RW(cache_type);
|
||||
|
||||
static struct attribute *virtblk_attrs[] = {
|
||||
&dev_attr_serial.attr,
|
||||
&dev_attr_cache_type.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
|
||||
struct attribute *a, int n)
|
||||
{
|
||||
struct device *dev = container_of(kobj, struct device, kobj);
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct virtio_blk *vblk = disk->private_data;
|
||||
struct virtio_device *vdev = vblk->vdev;
|
||||
|
||||
if (a == &dev_attr_cache_type.attr &&
|
||||
!virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
|
||||
return S_IRUGO;
|
||||
|
||||
return a->mode;
|
||||
}
|
||||
|
||||
static const struct attribute_group virtblk_attr_group = {
|
||||
.attrs = virtblk_attrs,
|
||||
.is_visible = virtblk_attrs_are_visible,
|
||||
};
|
||||
|
||||
static const struct attribute_group *virtblk_attr_groups[] = {
|
||||
&virtblk_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
unsigned int hctx_idx, unsigned int numa_node)
|
||||
|
@ -780,24 +805,9 @@ static int virtblk_probe(struct virtio_device *vdev)
|
|||
virtblk_update_capacity(vblk, false);
|
||||
virtio_device_ready(vdev);
|
||||
|
||||
device_add_disk(&vdev->dev, vblk->disk);
|
||||
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
|
||||
if (err)
|
||||
goto out_del_disk;
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
|
||||
err = device_create_file(disk_to_dev(vblk->disk),
|
||||
&dev_attr_cache_type_rw);
|
||||
else
|
||||
err = device_create_file(disk_to_dev(vblk->disk),
|
||||
&dev_attr_cache_type_ro);
|
||||
if (err)
|
||||
goto out_del_disk;
|
||||
device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
|
||||
return 0;
|
||||
|
||||
out_del_disk:
|
||||
del_gendisk(vblk->disk);
|
||||
blk_cleanup_queue(vblk->disk->queue);
|
||||
out_free_tags:
|
||||
blk_mq_free_tag_set(&vblk->tag_set);
|
||||
out_put_disk:
|
||||
|
|
|
@ -2420,7 +2420,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
|||
for (i = 0; i < info->nr_rings; i++)
|
||||
kick_pending_request_queues(&info->rinfo[i]);
|
||||
|
||||
device_add_disk(&info->xbdev->dev, info->gd);
|
||||
device_add_disk(&info->xbdev->dev, info->gd, NULL);
|
||||
|
||||
info->is_ready = 1;
|
||||
return;
|
||||
|
|
|
@ -88,7 +88,7 @@
|
|||
#include <linux/kernel.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ata.h>
|
||||
#include <linux/hdreg.h>
|
||||
|
@ -209,6 +209,8 @@ struct ace_device {
|
|||
struct device *dev;
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gd;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct list_head rq_list;
|
||||
|
||||
/* Inserted CF card parameters */
|
||||
u16 cf_id[ATA_ID_WORDS];
|
||||
|
@ -462,18 +464,26 @@ static inline void ace_fsm_yieldirq(struct ace_device *ace)
|
|||
ace->fsm_continue_flag = 0;
|
||||
}
|
||||
|
||||
static bool ace_has_next_request(struct request_queue *q)
|
||||
{
|
||||
struct ace_device *ace = q->queuedata;
|
||||
|
||||
return !list_empty(&ace->rq_list);
|
||||
}
|
||||
|
||||
/* Get the next read/write request; ending requests that we don't handle */
|
||||
static struct request *ace_get_next_request(struct request_queue *q)
|
||||
{
|
||||
struct request *req;
|
||||
struct ace_device *ace = q->queuedata;
|
||||
struct request *rq;
|
||||
|
||||
while ((req = blk_peek_request(q)) != NULL) {
|
||||
if (!blk_rq_is_passthrough(req))
|
||||
break;
|
||||
blk_start_request(req);
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
rq = list_first_entry_or_null(&ace->rq_list, struct request, queuelist);
|
||||
if (rq) {
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_start_request(rq);
|
||||
}
|
||||
return req;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ace_fsm_dostate(struct ace_device *ace)
|
||||
|
@ -499,11 +509,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
|
|||
|
||||
/* Drop all in-flight and pending requests */
|
||||
if (ace->req) {
|
||||
__blk_end_request_all(ace->req, BLK_STS_IOERR);
|
||||
blk_mq_end_request(ace->req, BLK_STS_IOERR);
|
||||
ace->req = NULL;
|
||||
}
|
||||
while ((req = blk_fetch_request(ace->queue)) != NULL)
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
while ((req = ace_get_next_request(ace->queue)) != NULL)
|
||||
blk_mq_end_request(req, BLK_STS_IOERR);
|
||||
|
||||
/* Drop back to IDLE state and notify waiters */
|
||||
ace->fsm_state = ACE_FSM_STATE_IDLE;
|
||||
|
@ -517,7 +527,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
|
|||
switch (ace->fsm_state) {
|
||||
case ACE_FSM_STATE_IDLE:
|
||||
/* See if there is anything to do */
|
||||
if (ace->id_req_count || ace_get_next_request(ace->queue)) {
|
||||
if (ace->id_req_count || ace_has_next_request(ace->queue)) {
|
||||
ace->fsm_iter_num++;
|
||||
ace->fsm_state = ACE_FSM_STATE_REQ_LOCK;
|
||||
mod_timer(&ace->stall_timer, jiffies + HZ);
|
||||
|
@ -651,7 +661,6 @@ static void ace_fsm_dostate(struct ace_device *ace)
|
|||
ace->fsm_state = ACE_FSM_STATE_IDLE;
|
||||
break;
|
||||
}
|
||||
blk_start_request(req);
|
||||
|
||||
/* Okay, it's a data request, set it up for transfer */
|
||||
dev_dbg(ace->dev,
|
||||
|
@ -728,7 +737,8 @@ static void ace_fsm_dostate(struct ace_device *ace)
|
|||
}
|
||||
|
||||
/* bio finished; is there another one? */
|
||||
if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
|
||||
if (blk_update_request(ace->req, BLK_STS_OK,
|
||||
blk_rq_cur_bytes(ace->req))) {
|
||||
/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
|
||||
* blk_rq_sectors(ace->req),
|
||||
* blk_rq_cur_sectors(ace->req));
|
||||
|
@ -854,17 +864,23 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id)
|
|||
/* ---------------------------------------------------------------------
|
||||
* Block ops
|
||||
*/
|
||||
static void ace_request(struct request_queue * q)
|
||||
static blk_status_t ace_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req;
|
||||
struct ace_device *ace;
|
||||
struct ace_device *ace = hctx->queue->queuedata;
|
||||
struct request *req = bd->rq;
|
||||
|
||||
req = ace_get_next_request(q);
|
||||
|
||||
if (req) {
|
||||
ace = req->rq_disk->private_data;
|
||||
tasklet_schedule(&ace->fsm_tasklet);
|
||||
if (blk_rq_is_passthrough(req)) {
|
||||
blk_mq_start_request(req);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
spin_lock_irq(&ace->lock);
|
||||
list_add_tail(&req->queuelist, &ace->rq_list);
|
||||
spin_unlock_irq(&ace->lock);
|
||||
|
||||
tasklet_schedule(&ace->fsm_tasklet);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
|
||||
|
@ -957,6 +973,10 @@ static const struct block_device_operations ace_fops = {
|
|||
.getgeo = ace_getgeo,
|
||||
};
|
||||
|
||||
static const struct blk_mq_ops ace_mq_ops = {
|
||||
.queue_rq = ace_queue_rq,
|
||||
};
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* SystemACE device setup/teardown code
|
||||
*/
|
||||
|
@ -972,6 +992,7 @@ static int ace_setup(struct ace_device *ace)
|
|||
|
||||
spin_lock_init(&ace->lock);
|
||||
init_completion(&ace->id_completion);
|
||||
INIT_LIST_HEAD(&ace->rq_list);
|
||||
|
||||
/*
|
||||
* Map the device
|
||||
|
@ -989,9 +1010,15 @@ static int ace_setup(struct ace_device *ace)
|
|||
/*
|
||||
* Initialize the request queue
|
||||
*/
|
||||
ace->queue = blk_init_queue(ace_request, &ace->lock);
|
||||
if (ace->queue == NULL)
|
||||
ace->queue = blk_mq_init_sq_queue(&ace->tag_set, &ace_mq_ops, 2,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(ace->queue)) {
|
||||
rc = PTR_ERR(ace->queue);
|
||||
ace->queue = NULL;
|
||||
goto err_blk_initq;
|
||||
}
|
||||
ace->queue->queuedata = ace;
|
||||
|
||||
blk_queue_logical_block_size(ace->queue, 512);
|
||||
blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
|
||||
|
||||
|
@ -1066,6 +1093,7 @@ err_read:
|
|||
put_disk(ace->gd);
|
||||
err_alloc_disk:
|
||||
blk_cleanup_queue(ace->queue);
|
||||
blk_mq_free_tag_set(&ace->tag_set);
|
||||
err_blk_initq:
|
||||
iounmap(ace->baseaddr);
|
||||
err_ioremap:
|
||||
|
@ -1081,8 +1109,10 @@ static void ace_teardown(struct ace_device *ace)
|
|||
put_disk(ace->gd);
|
||||
}
|
||||
|
||||
if (ace->queue)
|
||||
if (ace->queue) {
|
||||
blk_cleanup_queue(ace->queue);
|
||||
blk_mq_free_tag_set(&ace->tag_set);
|
||||
}
|
||||
|
||||
tasklet_kill(&ace->fsm_tasklet);
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -66,43 +66,44 @@ static DEFINE_SPINLOCK(z2ram_lock);
|
|||
|
||||
static struct gendisk *z2ram_gendisk;
|
||||
|
||||
static void do_z2_request(struct request_queue *q)
|
||||
static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req;
|
||||
struct request *req = bd->rq;
|
||||
unsigned long start = blk_rq_pos(req) << 9;
|
||||
unsigned long len = blk_rq_cur_bytes(req);
|
||||
|
||||
req = blk_fetch_request(q);
|
||||
while (req) {
|
||||
unsigned long start = blk_rq_pos(req) << 9;
|
||||
unsigned long len = blk_rq_cur_bytes(req);
|
||||
blk_status_t err = BLK_STS_OK;
|
||||
blk_mq_start_request(req);
|
||||
|
||||
if (start + len > z2ram_size) {
|
||||
pr_err(DEVICE_NAME ": bad access: block=%llu, "
|
||||
"count=%u\n",
|
||||
(unsigned long long)blk_rq_pos(req),
|
||||
blk_rq_cur_sectors(req));
|
||||
err = BLK_STS_IOERR;
|
||||
goto done;
|
||||
}
|
||||
while (len) {
|
||||
unsigned long addr = start & Z2RAM_CHUNKMASK;
|
||||
unsigned long size = Z2RAM_CHUNKSIZE - addr;
|
||||
void *buffer = bio_data(req->bio);
|
||||
|
||||
if (len < size)
|
||||
size = len;
|
||||
addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ];
|
||||
if (rq_data_dir(req) == READ)
|
||||
memcpy(buffer, (char *)addr, size);
|
||||
else
|
||||
memcpy((char *)addr, buffer, size);
|
||||
start += size;
|
||||
len -= size;
|
||||
}
|
||||
done:
|
||||
if (!__blk_end_request_cur(req, err))
|
||||
req = blk_fetch_request(q);
|
||||
if (start + len > z2ram_size) {
|
||||
pr_err(DEVICE_NAME ": bad access: block=%llu, "
|
||||
"count=%u\n",
|
||||
(unsigned long long)blk_rq_pos(req),
|
||||
blk_rq_cur_sectors(req));
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
spin_lock_irq(&z2ram_lock);
|
||||
|
||||
while (len) {
|
||||
unsigned long addr = start & Z2RAM_CHUNKMASK;
|
||||
unsigned long size = Z2RAM_CHUNKSIZE - addr;
|
||||
void *buffer = bio_data(req->bio);
|
||||
|
||||
if (len < size)
|
||||
size = len;
|
||||
addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ];
|
||||
if (rq_data_dir(req) == READ)
|
||||
memcpy(buffer, (char *)addr, size);
|
||||
else
|
||||
memcpy((char *)addr, buffer, size);
|
||||
start += size;
|
||||
len -= size;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&z2ram_lock);
|
||||
blk_mq_end_request(req, BLK_STS_OK);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -337,6 +338,11 @@ static struct kobject *z2_find(dev_t dev, int *part, void *data)
|
|||
}
|
||||
|
||||
static struct request_queue *z2_queue;
|
||||
static struct blk_mq_tag_set tag_set;
|
||||
|
||||
static const struct blk_mq_ops z2_mq_ops = {
|
||||
.queue_rq = z2_queue_rq,
|
||||
};
|
||||
|
||||
static int __init
|
||||
z2_init(void)
|
||||
|
@ -355,9 +361,13 @@ z2_init(void)
|
|||
if (!z2ram_gendisk)
|
||||
goto out_disk;
|
||||
|
||||
z2_queue = blk_init_queue(do_z2_request, &z2ram_lock);
|
||||
if (!z2_queue)
|
||||
z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16,
|
||||
BLK_MQ_F_SHOULD_MERGE);
|
||||
if (IS_ERR(z2_queue)) {
|
||||
ret = PTR_ERR(z2_queue);
|
||||
z2_queue = NULL;
|
||||
goto out_queue;
|
||||
}
|
||||
|
||||
z2ram_gendisk->major = Z2RAM_MAJOR;
|
||||
z2ram_gendisk->first_minor = 0;
|
||||
|
@ -387,6 +397,7 @@ static void __exit z2_exit(void)
|
|||
del_gendisk(z2ram_gendisk);
|
||||
put_disk(z2ram_gendisk);
|
||||
blk_cleanup_queue(z2_queue);
|
||||
blk_mq_free_tag_set(&tag_set);
|
||||
|
||||
if ( current_device != -1 )
|
||||
{
|
||||
|
|
|
@ -3,7 +3,6 @@ config ZRAM
|
|||
tristate "Compressed RAM block device support"
|
||||
depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO
|
||||
select CRYPTO_LZO
|
||||
default n
|
||||
help
|
||||
Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
|
||||
Pages written to these disks are compressed and stored in memory
|
||||
|
@ -18,7 +17,6 @@ config ZRAM
|
|||
config ZRAM_WRITEBACK
|
||||
bool "Write back incompressible page to backing device"
|
||||
depends on ZRAM
|
||||
default n
|
||||
help
|
||||
With incompressible page, there is no memory saving to keep it
|
||||
in memory. Instead, write it out to backing device.
|
||||
|
|
|
@ -1636,6 +1636,11 @@ static const struct attribute_group zram_disk_attr_group = {
|
|||
.attrs = zram_disk_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *zram_disk_attr_groups[] = {
|
||||
&zram_disk_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate and initialize new zram device. the function returns
|
||||
* '>= 0' device_id upon success, and negative value otherwise.
|
||||
|
@ -1716,24 +1721,14 @@ static int zram_add(void)
|
|||
|
||||
zram->disk->queue->backing_dev_info->capabilities |=
|
||||
(BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
|
||||
add_disk(zram->disk);
|
||||
device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
|
||||
|
||||
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
|
||||
&zram_disk_attr_group);
|
||||
if (ret < 0) {
|
||||
pr_err("Error creating sysfs group for device %d\n",
|
||||
device_id);
|
||||
goto out_free_disk;
|
||||
}
|
||||
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
|
||||
|
||||
zram_debugfs_register(zram);
|
||||
pr_info("Added device: %s\n", zram->disk->disk_name);
|
||||
return device_id;
|
||||
|
||||
out_free_disk:
|
||||
del_gendisk(zram->disk);
|
||||
put_disk(zram->disk);
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(queue);
|
||||
out_free_idr:
|
||||
|
@ -1762,15 +1757,6 @@ static int zram_remove(struct zram *zram)
|
|||
mutex_unlock(&bdev->bd_mutex);
|
||||
|
||||
zram_debugfs_unregister(zram);
|
||||
/*
|
||||
* Remove sysfs first, so no one will perform a disksize
|
||||
* store while we destroy the devices. This also helps during
|
||||
* hot_remove -- zram_reset_device() is the last holder of
|
||||
* ->init_lock, no later/concurrent disksize_store() or any
|
||||
* other sysfs handlers are possible.
|
||||
*/
|
||||
sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
|
||||
&zram_disk_attr_group);
|
||||
|
||||
/* Make sure all the pending I/O are finished */
|
||||
fsync_bdev(bdev);
|
||||
|
|
|
@ -410,10 +410,10 @@ static int cdrom_get_disc_info(struct cdrom_device_info *cdi,
|
|||
* hack to have the capability flags defined const, while we can still
|
||||
* change it here without gcc complaining at every line.
|
||||
*/
|
||||
#define ENSURE(call, bits) \
|
||||
do { \
|
||||
if (cdo->call == NULL) \
|
||||
*change_capability &= ~(bits); \
|
||||
#define ENSURE(cdo, call, bits) \
|
||||
do { \
|
||||
if (cdo->call == NULL) \
|
||||
WARN_ON_ONCE((cdo)->capability & (bits)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
@ -589,7 +589,6 @@ int register_cdrom(struct cdrom_device_info *cdi)
|
|||
{
|
||||
static char banner_printed;
|
||||
const struct cdrom_device_ops *cdo = cdi->ops;
|
||||
int *change_capability = (int *)&cdo->capability; /* hack */
|
||||
|
||||
cd_dbg(CD_OPEN, "entering register_cdrom\n");
|
||||
|
||||
|
@ -601,16 +600,16 @@ int register_cdrom(struct cdrom_device_info *cdi)
|
|||
cdrom_sysctl_register();
|
||||
}
|
||||
|
||||
ENSURE(drive_status, CDC_DRIVE_STATUS);
|
||||
ENSURE(cdo, drive_status, CDC_DRIVE_STATUS);
|
||||
if (cdo->check_events == NULL && cdo->media_changed == NULL)
|
||||
*change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC);
|
||||
ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
|
||||
ENSURE(lock_door, CDC_LOCK);
|
||||
ENSURE(select_speed, CDC_SELECT_SPEED);
|
||||
ENSURE(get_last_session, CDC_MULTI_SESSION);
|
||||
ENSURE(get_mcn, CDC_MCN);
|
||||
ENSURE(reset, CDC_RESET);
|
||||
ENSURE(generic_packet, CDC_GENERIC_PACKET);
|
||||
WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC));
|
||||
ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY);
|
||||
ENSURE(cdo, lock_door, CDC_LOCK);
|
||||
ENSURE(cdo, select_speed, CDC_SELECT_SPEED);
|
||||
ENSURE(cdo, get_last_session, CDC_MULTI_SESSION);
|
||||
ENSURE(cdo, get_mcn, CDC_MCN);
|
||||
ENSURE(cdo, reset, CDC_RESET);
|
||||
ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET);
|
||||
cdi->mc_flags = 0;
|
||||
cdi->options = CDO_USE_FFLAGS;
|
||||
|
||||
|
@ -2445,7 +2444,7 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi,
|
|||
return -ENOSYS;
|
||||
|
||||
if (arg != CDSL_CURRENT && arg != CDSL_NONE) {
|
||||
if ((int)arg >= cdi->capacity)
|
||||
if (arg >= cdi->capacity)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,12 +31,11 @@
|
|||
#include <linux/cdrom.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <scsi/scsi.h>
|
||||
#include <asm/io.h>
|
||||
|
@ -102,11 +101,6 @@ static int gdrom_major;
|
|||
static DECLARE_WAIT_QUEUE_HEAD(command_queue);
|
||||
static DECLARE_WAIT_QUEUE_HEAD(request_queue);
|
||||
|
||||
static DEFINE_SPINLOCK(gdrom_lock);
|
||||
static void gdrom_readdisk_dma(struct work_struct *work);
|
||||
static DECLARE_WORK(work, gdrom_readdisk_dma);
|
||||
static LIST_HEAD(gdrom_deferred);
|
||||
|
||||
struct gdromtoc {
|
||||
unsigned int entry[99];
|
||||
unsigned int first, last;
|
||||
|
@ -122,6 +116,7 @@ static struct gdrom_unit {
|
|||
char disk_type;
|
||||
struct gdromtoc *toc;
|
||||
struct request_queue *gdrom_rq;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
} gd;
|
||||
|
||||
struct gdrom_id {
|
||||
|
@ -584,103 +579,83 @@ static int gdrom_set_interrupt_handlers(void)
|
|||
* 9 -> sectors >> 8
|
||||
* 10 -> sectors
|
||||
*/
|
||||
static void gdrom_readdisk_dma(struct work_struct *work)
|
||||
static blk_status_t gdrom_readdisk_dma(struct request *req)
|
||||
{
|
||||
int block, block_cnt;
|
||||
blk_status_t err;
|
||||
struct packet_command *read_command;
|
||||
struct list_head *elem, *next;
|
||||
struct request *req;
|
||||
unsigned long timeout;
|
||||
|
||||
if (list_empty(&gdrom_deferred))
|
||||
return;
|
||||
read_command = kzalloc(sizeof(struct packet_command), GFP_KERNEL);
|
||||
if (!read_command)
|
||||
return; /* get more memory later? */
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
read_command->cmd[0] = 0x30;
|
||||
read_command->cmd[1] = 0x20;
|
||||
spin_lock(&gdrom_lock);
|
||||
list_for_each_safe(elem, next, &gdrom_deferred) {
|
||||
req = list_entry(elem, struct request, queuelist);
|
||||
spin_unlock(&gdrom_lock);
|
||||
block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
|
||||
block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
|
||||
__raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG);
|
||||
__raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
|
||||
__raw_writel(1, GDROM_DMA_DIRECTION_REG);
|
||||
__raw_writel(1, GDROM_DMA_ENABLE_REG);
|
||||
read_command->cmd[2] = (block >> 16) & 0xFF;
|
||||
read_command->cmd[3] = (block >> 8) & 0xFF;
|
||||
read_command->cmd[4] = block & 0xFF;
|
||||
read_command->cmd[8] = (block_cnt >> 16) & 0xFF;
|
||||
read_command->cmd[9] = (block_cnt >> 8) & 0xFF;
|
||||
read_command->cmd[10] = block_cnt & 0xFF;
|
||||
/* set for DMA */
|
||||
__raw_writeb(1, GDROM_ERROR_REG);
|
||||
/* other registers */
|
||||
__raw_writeb(0, GDROM_SECNUM_REG);
|
||||
__raw_writeb(0, GDROM_BCL_REG);
|
||||
__raw_writeb(0, GDROM_BCH_REG);
|
||||
__raw_writeb(0, GDROM_DSEL_REG);
|
||||
__raw_writeb(0, GDROM_INTSEC_REG);
|
||||
/* Wait for registers to reset after any previous activity */
|
||||
timeout = jiffies + HZ / 2;
|
||||
while (gdrom_is_busy() && time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
__raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG);
|
||||
timeout = jiffies + HZ / 2;
|
||||
/* Wait for packet command to finish */
|
||||
while (gdrom_is_busy() && time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
gd.pending = 1;
|
||||
gd.transfer = 1;
|
||||
outsw(GDROM_DATA_REG, &read_command->cmd, 6);
|
||||
timeout = jiffies + HZ / 2;
|
||||
/* Wait for any pending DMA to finish */
|
||||
while (__raw_readb(GDROM_DMA_STATUS_REG) &&
|
||||
time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
/* start transfer */
|
||||
__raw_writeb(1, GDROM_DMA_STATUS_REG);
|
||||
wait_event_interruptible_timeout(request_queue,
|
||||
gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
|
||||
err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
|
||||
gd.transfer = 0;
|
||||
gd.pending = 0;
|
||||
/* now seek to take the request spinlock
|
||||
* before handling ending the request */
|
||||
spin_lock(&gdrom_lock);
|
||||
list_del_init(&req->queuelist);
|
||||
__blk_end_request_all(req, err);
|
||||
}
|
||||
spin_unlock(&gdrom_lock);
|
||||
block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
|
||||
block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
|
||||
__raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG);
|
||||
__raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
|
||||
__raw_writel(1, GDROM_DMA_DIRECTION_REG);
|
||||
__raw_writel(1, GDROM_DMA_ENABLE_REG);
|
||||
read_command->cmd[2] = (block >> 16) & 0xFF;
|
||||
read_command->cmd[3] = (block >> 8) & 0xFF;
|
||||
read_command->cmd[4] = block & 0xFF;
|
||||
read_command->cmd[8] = (block_cnt >> 16) & 0xFF;
|
||||
read_command->cmd[9] = (block_cnt >> 8) & 0xFF;
|
||||
read_command->cmd[10] = block_cnt & 0xFF;
|
||||
/* set for DMA */
|
||||
__raw_writeb(1, GDROM_ERROR_REG);
|
||||
/* other registers */
|
||||
__raw_writeb(0, GDROM_SECNUM_REG);
|
||||
__raw_writeb(0, GDROM_BCL_REG);
|
||||
__raw_writeb(0, GDROM_BCH_REG);
|
||||
__raw_writeb(0, GDROM_DSEL_REG);
|
||||
__raw_writeb(0, GDROM_INTSEC_REG);
|
||||
/* Wait for registers to reset after any previous activity */
|
||||
timeout = jiffies + HZ / 2;
|
||||
while (gdrom_is_busy() && time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
__raw_writeb(GDROM_COM_PACKET, GDROM_STATUSCOMMAND_REG);
|
||||
timeout = jiffies + HZ / 2;
|
||||
/* Wait for packet command to finish */
|
||||
while (gdrom_is_busy() && time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
gd.pending = 1;
|
||||
gd.transfer = 1;
|
||||
outsw(GDROM_DATA_REG, &read_command->cmd, 6);
|
||||
timeout = jiffies + HZ / 2;
|
||||
/* Wait for any pending DMA to finish */
|
||||
while (__raw_readb(GDROM_DMA_STATUS_REG) &&
|
||||
time_before(jiffies, timeout))
|
||||
cpu_relax();
|
||||
/* start transfer */
|
||||
__raw_writeb(1, GDROM_DMA_STATUS_REG);
|
||||
wait_event_interruptible_timeout(request_queue,
|
||||
gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
|
||||
err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
|
||||
gd.transfer = 0;
|
||||
gd.pending = 0;
|
||||
|
||||
blk_mq_end_request(req, err);
|
||||
kfree(read_command);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void gdrom_request(struct request_queue *rq)
|
||||
static blk_status_t gdrom_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *req;
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
while ((req = blk_fetch_request(rq)) != NULL) {
|
||||
switch (req_op(req)) {
|
||||
case REQ_OP_READ:
|
||||
/*
|
||||
* Add to list of deferred work and then schedule
|
||||
* workqueue.
|
||||
*/
|
||||
list_add_tail(&req->queuelist, &gdrom_deferred);
|
||||
schedule_work(&work);
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
pr_notice("Read only device - write request ignored\n");
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
|
||||
__blk_end_request_all(req, BLK_STS_IOERR);
|
||||
break;
|
||||
}
|
||||
switch (req_op(bd->rq)) {
|
||||
case REQ_OP_READ:
|
||||
return gdrom_readdisk_dma(bd->rq);
|
||||
case REQ_OP_WRITE:
|
||||
pr_notice("Read only device - write request ignored\n");
|
||||
return BLK_STS_IOERR;
|
||||
default:
|
||||
printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -768,6 +743,10 @@ static int probe_gdrom_setupqueue(void)
|
|||
return gdrom_init_dma_mode();
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops gdrom_mq_ops = {
|
||||
.queue_rq = gdrom_queue_rq,
|
||||
};
|
||||
|
||||
/*
|
||||
* register this as a block device and as compliant with the
|
||||
* universal CD Rom driver interface
|
||||
|
@ -811,11 +790,15 @@ static int probe_gdrom(struct platform_device *devptr)
|
|||
err = gdrom_set_interrupt_handlers();
|
||||
if (err)
|
||||
goto probe_fail_cmdirq_register;
|
||||
gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock);
|
||||
if (!gd.gdrom_rq) {
|
||||
err = -ENOMEM;
|
||||
|
||||
gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1,
|
||||
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
|
||||
if (IS_ERR(gd.gdrom_rq)) {
|
||||
rc = PTR_ERR(gd.gdrom_rq);
|
||||
gd.gdrom_rq = NULL;
|
||||
goto probe_fail_requestq;
|
||||
}
|
||||
|
||||
blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
|
||||
|
||||
err = probe_gdrom_setupqueue();
|
||||
|
@ -832,6 +815,7 @@ static int probe_gdrom(struct platform_device *devptr)
|
|||
|
||||
probe_fail_toc:
|
||||
blk_cleanup_queue(gd.gdrom_rq);
|
||||
blk_mq_free_tag_set(&gd.tag_set);
|
||||
probe_fail_requestq:
|
||||
free_irq(HW_EVENT_GDROM_DMA, &gd);
|
||||
free_irq(HW_EVENT_GDROM_CMD, &gd);
|
||||
|
@ -849,8 +833,8 @@ probe_fail_no_mem:
|
|||
|
||||
static int remove_gdrom(struct platform_device *devptr)
|
||||
{
|
||||
flush_work(&work);
|
||||
blk_cleanup_queue(gd.gdrom_rq);
|
||||
blk_mq_free_tag_set(&gd.tag_set);
|
||||
free_irq(HW_EVENT_GDROM_CMD, &gd);
|
||||
free_irq(HW_EVENT_GDROM_DMA, &gd);
|
||||
del_gendisk(gd.disk);
|
||||
|
|
|
@ -1784,7 +1784,7 @@ static int ide_cd_probe(ide_drive_t *drive)
|
|||
ide_cd_read_toc(drive);
|
||||
g->fops = &idecd_ops;
|
||||
g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
|
||||
device_add_disk(&drive->gendev, g);
|
||||
device_add_disk(&drive->gendev, g, NULL);
|
||||
return 0;
|
||||
|
||||
out_free_disk:
|
||||
|
|
|
@ -416,7 +416,7 @@ static int ide_gd_probe(ide_drive_t *drive)
|
|||
if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
|
||||
g->flags = GENHD_FL_REMOVABLE;
|
||||
g->fops = &ide_gd_ops;
|
||||
device_add_disk(&drive->gendev, g);
|
||||
device_add_disk(&drive->gendev, g, NULL);
|
||||
return 0;
|
||||
|
||||
out_free_disk:
|
||||
|
|
|
@ -4,8 +4,7 @@
|
|||
|
||||
menuconfig NVM
|
||||
bool "Open-Channel SSD target support"
|
||||
depends on BLOCK && PCI
|
||||
select BLK_DEV_NVME
|
||||
depends on BLOCK
|
||||
help
|
||||
Say Y here to get to enable Open-channel SSDs.
|
||||
|
||||
|
|
|
@ -355,6 +355,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) {
|
||||
pr_err("nvm: device is incompatible with target L2P type.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (nvm_target_exists(create->tgtname)) {
|
||||
pr_err("nvm: target name already exists (%s)\n",
|
||||
create->tgtname);
|
||||
|
@ -598,22 +603,16 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
|
|||
|
||||
static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
||||
{
|
||||
if (rqd->nr_ppas == 1) {
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1);
|
||||
return;
|
||||
}
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas);
|
||||
}
|
||||
|
||||
static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
||||
{
|
||||
if (rqd->nr_ppas == 1) {
|
||||
nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1);
|
||||
return;
|
||||
}
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
|
||||
nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas);
|
||||
}
|
||||
|
||||
int nvm_register_tgt_type(struct nvm_tgt_type *tt)
|
||||
|
@ -712,45 +711,23 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
|
|||
nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
|
||||
}
|
||||
|
||||
int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
|
||||
struct ppa_addr ppa, int nchks)
|
||||
static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
int flags = 0;
|
||||
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
|
||||
if (geo->version == NVM_OCSSD_SPEC_20)
|
||||
return 0;
|
||||
|
||||
return dev->ops->get_chk_meta(tgt_dev->parent, meta,
|
||||
(sector_t)ppa.ppa, nchks);
|
||||
if (rqd->is_seq)
|
||||
flags |= geo->pln_mode >> 1;
|
||||
|
||||
if (rqd->opcode == NVM_OP_PREAD)
|
||||
flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND);
|
||||
else if (rqd->opcode == NVM_OP_PWRITE)
|
||||
flags |= NVM_IO_SCRAMBLE_ENABLE;
|
||||
|
||||
return flags;
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_get_chunk_meta);
|
||||
|
||||
int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
|
||||
int nr_ppas, int type)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
struct nvm_rq rqd;
|
||||
int ret;
|
||||
|
||||
if (nr_ppas > NVM_MAX_VLBA) {
|
||||
pr_err("nvm: unable to update all blocks atomically\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
|
||||
nvm_rq_tgt_to_dev(tgt_dev, &rqd);
|
||||
|
||||
ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
|
||||
nvm_free_rqd_ppalist(tgt_dev, &rqd);
|
||||
if (ret) {
|
||||
pr_err("nvm: failed bb mark\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
|
||||
|
||||
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
||||
{
|
||||
|
@ -763,6 +740,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
|||
nvm_rq_tgt_to_dev(tgt_dev, rqd);
|
||||
|
||||
rqd->dev = tgt_dev;
|
||||
rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
|
||||
|
||||
/* In case of error, fail with right address format */
|
||||
ret = dev->ops->submit_io(dev, rqd);
|
||||
|
@ -783,6 +761,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
|
|||
nvm_rq_tgt_to_dev(tgt_dev, rqd);
|
||||
|
||||
rqd->dev = tgt_dev;
|
||||
rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
|
||||
|
||||
/* In case of error, fail with right address format */
|
||||
ret = dev->ops->submit_io_sync(dev, rqd);
|
||||
|
@ -805,27 +784,159 @@ void nvm_end_io(struct nvm_rq *rqd)
|
|||
}
|
||||
EXPORT_SYMBOL(nvm_end_io);
|
||||
|
||||
static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
|
||||
{
|
||||
if (!dev->ops->submit_io_sync)
|
||||
return -ENODEV;
|
||||
|
||||
rqd->flags = nvm_set_flags(&dev->geo, rqd);
|
||||
|
||||
return dev->ops->submit_io_sync(dev, rqd);
|
||||
}
|
||||
|
||||
static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
|
||||
{
|
||||
struct nvm_rq rqd = { NULL };
|
||||
struct bio bio;
|
||||
struct bio_vec bio_vec;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
bio_init(&bio, &bio_vec, 1);
|
||||
bio_add_page(&bio, page, PAGE_SIZE, 0);
|
||||
bio_set_op_attrs(&bio, REQ_OP_READ, 0);
|
||||
|
||||
rqd.bio = &bio;
|
||||
rqd.opcode = NVM_OP_PREAD;
|
||||
rqd.is_seq = 1;
|
||||
rqd.nr_ppas = 1;
|
||||
rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
|
||||
|
||||
ret = nvm_submit_io_sync_raw(dev, &rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
__free_page(page);
|
||||
|
||||
return rqd.error;
|
||||
}
|
||||
|
||||
/*
|
||||
* folds a bad block list from its plane representation to its virtual
|
||||
* block representation. The fold is done in place and reduced size is
|
||||
* returned.
|
||||
*
|
||||
* If any of the planes status are bad or grown bad block, the virtual block
|
||||
* is marked bad. If not bad, the first plane state acts as the block state.
|
||||
* Scans a 1.2 chunk first and last page to determine if its state.
|
||||
* If the chunk is found to be open, also scan it to update the write
|
||||
* pointer.
|
||||
*/
|
||||
int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
|
||||
static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa,
|
||||
struct nvm_chk_meta *meta)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int blk, offset, pl, blktype;
|
||||
int ret, pg, pl;
|
||||
|
||||
if (nr_blks != geo->num_chk * geo->pln_mode)
|
||||
return -EINVAL;
|
||||
/* sense first page */
|
||||
ret = nvm_bb_chunk_sense(dev, ppa);
|
||||
if (ret < 0) /* io error */
|
||||
return ret;
|
||||
else if (ret == 0) /* valid data */
|
||||
meta->state = NVM_CHK_ST_OPEN;
|
||||
else if (ret > 0) {
|
||||
/*
|
||||
* If empty page, the chunk is free, else it is an
|
||||
* actual io error. In that case, mark it offline.
|
||||
*/
|
||||
switch (ret) {
|
||||
case NVM_RSP_ERR_EMPTYPAGE:
|
||||
meta->state = NVM_CHK_ST_FREE;
|
||||
return 0;
|
||||
case NVM_RSP_ERR_FAILCRC:
|
||||
case NVM_RSP_ERR_FAILECC:
|
||||
case NVM_RSP_WARN_HIGHECC:
|
||||
meta->state = NVM_CHK_ST_OPEN;
|
||||
goto scan;
|
||||
default:
|
||||
return -ret; /* other io error */
|
||||
}
|
||||
}
|
||||
|
||||
/* sense last page */
|
||||
ppa.g.pg = geo->num_pg - 1;
|
||||
ppa.g.pl = geo->num_pln - 1;
|
||||
|
||||
ret = nvm_bb_chunk_sense(dev, ppa);
|
||||
if (ret < 0) /* io error */
|
||||
return ret;
|
||||
else if (ret == 0) { /* Chunk fully written */
|
||||
meta->state = NVM_CHK_ST_CLOSED;
|
||||
meta->wp = geo->clba;
|
||||
return 0;
|
||||
} else if (ret > 0) {
|
||||
switch (ret) {
|
||||
case NVM_RSP_ERR_EMPTYPAGE:
|
||||
case NVM_RSP_ERR_FAILCRC:
|
||||
case NVM_RSP_ERR_FAILECC:
|
||||
case NVM_RSP_WARN_HIGHECC:
|
||||
meta->state = NVM_CHK_ST_OPEN;
|
||||
break;
|
||||
default:
|
||||
return -ret; /* other io error */
|
||||
}
|
||||
}
|
||||
|
||||
scan:
|
||||
/*
|
||||
* chunk is open, we scan sequentially to update the write pointer.
|
||||
* We make the assumption that targets write data across all planes
|
||||
* before moving to the next page.
|
||||
*/
|
||||
for (pg = 0; pg < geo->num_pg; pg++) {
|
||||
for (pl = 0; pl < geo->num_pln; pl++) {
|
||||
ppa.g.pg = pg;
|
||||
ppa.g.pl = pl;
|
||||
|
||||
ret = nvm_bb_chunk_sense(dev, ppa);
|
||||
if (ret < 0) /* io error */
|
||||
return ret;
|
||||
else if (ret == 0) {
|
||||
meta->wp += geo->ws_min;
|
||||
} else if (ret > 0) {
|
||||
switch (ret) {
|
||||
case NVM_RSP_ERR_EMPTYPAGE:
|
||||
return 0;
|
||||
case NVM_RSP_ERR_FAILCRC:
|
||||
case NVM_RSP_ERR_FAILECC:
|
||||
case NVM_RSP_WARN_HIGHECC:
|
||||
meta->wp += geo->ws_min;
|
||||
break;
|
||||
default:
|
||||
return -ret; /* other io error */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* folds a bad block list from its plane representation to its
|
||||
* chunk representation.
|
||||
*
|
||||
* If any of the planes status are bad or grown bad, the chunk is marked
|
||||
* offline. If not bad, the first plane state acts as the chunk state.
|
||||
*/
|
||||
static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
|
||||
u8 *blks, int nr_blks, struct nvm_chk_meta *meta)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int ret, blk, pl, offset, blktype;
|
||||
|
||||
for (blk = 0; blk < geo->num_chk; blk++) {
|
||||
offset = blk * geo->pln_mode;
|
||||
blktype = blks[offset];
|
||||
|
||||
/* Bad blocks on any planes take precedence over other types */
|
||||
for (pl = 0; pl < geo->pln_mode; pl++) {
|
||||
if (blks[offset + pl] &
|
||||
(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
|
||||
|
@ -834,23 +945,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
|
|||
}
|
||||
}
|
||||
|
||||
blks[blk] = blktype;
|
||||
ppa.g.blk = blk;
|
||||
|
||||
meta->wp = 0;
|
||||
meta->type = NVM_CHK_TP_W_SEQ;
|
||||
meta->wi = 0;
|
||||
meta->slba = generic_to_dev_addr(dev, ppa).ppa;
|
||||
meta->cnlb = dev->geo.clba;
|
||||
|
||||
if (blktype == NVM_BLK_T_FREE) {
|
||||
ret = nvm_bb_chunk_scan(dev, ppa, meta);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
meta->state = NVM_CHK_ST_OFFLINE;
|
||||
}
|
||||
|
||||
meta++;
|
||||
}
|
||||
|
||||
return geo->num_chk;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_bb_tbl_fold);
|
||||
|
||||
int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
|
||||
u8 *blks)
|
||||
static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba,
|
||||
int nchks, struct nvm_chk_meta *meta)
|
||||
{
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct ppa_addr ppa;
|
||||
u8 *blks;
|
||||
int ch, lun, nr_blks;
|
||||
int ret;
|
||||
|
||||
ppa.ppa = slba;
|
||||
ppa = dev_to_generic_addr(dev, ppa);
|
||||
|
||||
if (ppa.g.blk != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if ((nchks % geo->num_chk) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
nr_blks = geo->num_chk * geo->pln_mode;
|
||||
|
||||
blks = kmalloc(nr_blks, GFP_KERNEL);
|
||||
if (!blks)
|
||||
return -ENOMEM;
|
||||
|
||||
for (ch = ppa.g.ch; ch < geo->num_ch; ch++) {
|
||||
for (lun = ppa.g.lun; lun < geo->num_lun; lun++) {
|
||||
struct ppa_addr ppa_gen, ppa_dev;
|
||||
|
||||
if (!nchks)
|
||||
goto done;
|
||||
|
||||
ppa_gen.ppa = 0;
|
||||
ppa_gen.g.ch = ch;
|
||||
ppa_gen.g.lun = lun;
|
||||
ppa_dev = generic_to_dev_addr(dev, ppa_gen);
|
||||
|
||||
ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks,
|
||||
meta);
|
||||
if (ret)
|
||||
goto done;
|
||||
|
||||
meta += geo->num_chk;
|
||||
nchks -= geo->num_chk;
|
||||
}
|
||||
}
|
||||
done:
|
||||
kfree(blks);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
|
||||
int nchks, struct nvm_chk_meta *meta)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
|
||||
nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
|
||||
|
||||
return dev->ops->get_bb_tbl(dev, ppa, blks);
|
||||
if (dev->geo.version == NVM_OCSSD_SPEC_12)
|
||||
return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta);
|
||||
|
||||
return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta);
|
||||
}
|
||||
EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
|
||||
EXPORT_SYMBOL_GPL(nvm_get_chunk_meta);
|
||||
|
||||
int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
|
||||
int nr_ppas, int type)
|
||||
{
|
||||
struct nvm_dev *dev = tgt_dev->parent;
|
||||
struct nvm_rq rqd;
|
||||
int ret;
|
||||
|
||||
if (dev->geo.version == NVM_OCSSD_SPEC_20)
|
||||
return 0;
|
||||
|
||||
if (nr_ppas > NVM_MAX_VLBA) {
|
||||
pr_err("nvm: unable to update all blocks atomically\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
|
||||
nvm_rq_tgt_to_dev(tgt_dev, &rqd);
|
||||
|
||||
ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
|
||||
nvm_free_rqd_ppalist(tgt_dev, &rqd);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvm_set_chunk_meta);
|
||||
|
||||
static int nvm_core_init(struct nvm_dev *dev)
|
||||
{
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
|
@ -16,7 +17,10 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
|
||||
static void pblk_line_mark_bb(struct work_struct *work)
|
||||
{
|
||||
|
@ -27,12 +31,12 @@ static void pblk_line_mark_bb(struct work_struct *work)
|
|||
struct ppa_addr *ppa = line_ws->priv;
|
||||
int ret;
|
||||
|
||||
ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
|
||||
ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
|
||||
if (ret) {
|
||||
struct pblk_line *line;
|
||||
int pos;
|
||||
|
||||
line = &pblk->lines[pblk_ppa_to_line(*ppa)];
|
||||
line = pblk_ppa_to_line(pblk, *ppa);
|
||||
pos = pblk_ppa_to_pos(&dev->geo, *ppa);
|
||||
|
||||
pblk_err(pblk, "failed to mark bb, line:%d, pos:%d\n",
|
||||
|
@ -80,19 +84,28 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
|
|||
struct pblk_line *line;
|
||||
int pos;
|
||||
|
||||
line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)];
|
||||
line = pblk_ppa_to_line(pblk, rqd->ppa_addr);
|
||||
pos = pblk_ppa_to_pos(geo, rqd->ppa_addr);
|
||||
chunk = &line->chks[pos];
|
||||
|
||||
atomic_dec(&line->left_seblks);
|
||||
|
||||
if (rqd->error) {
|
||||
trace_pblk_chunk_reset(pblk_disk_name(pblk),
|
||||
&rqd->ppa_addr, PBLK_CHUNK_RESET_FAILED);
|
||||
|
||||
chunk->state = NVM_CHK_ST_OFFLINE;
|
||||
pblk_mark_bb(pblk, line, rqd->ppa_addr);
|
||||
} else {
|
||||
trace_pblk_chunk_reset(pblk_disk_name(pblk),
|
||||
&rqd->ppa_addr, PBLK_CHUNK_RESET_DONE);
|
||||
|
||||
chunk->state = NVM_CHK_ST_FREE;
|
||||
}
|
||||
|
||||
trace_pblk_chunk_state(pblk_disk_name(pblk), &rqd->ppa_addr,
|
||||
chunk->state);
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
}
|
||||
|
||||
|
@ -108,9 +121,9 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
|
|||
/*
|
||||
* Get information for all chunks from the device.
|
||||
*
|
||||
* The caller is responsible for freeing the returned structure
|
||||
* The caller is responsible for freeing (vmalloc) the returned structure
|
||||
*/
|
||||
struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
|
||||
struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
@ -122,11 +135,11 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
|
|||
ppa.ppa = 0;
|
||||
|
||||
len = geo->all_chunks * sizeof(*meta);
|
||||
meta = kzalloc(len, GFP_KERNEL);
|
||||
meta = vzalloc(len);
|
||||
if (!meta)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks);
|
||||
ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta);
|
||||
if (ret) {
|
||||
kfree(meta);
|
||||
return ERR_PTR(-EIO);
|
||||
|
@ -192,7 +205,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
|
|||
{
|
||||
struct pblk_line *line;
|
||||
u64 paddr;
|
||||
int line_id;
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
/* Callers must ensure that the ppa points to a device address */
|
||||
|
@ -200,8 +212,7 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
|
|||
BUG_ON(pblk_ppa_empty(ppa));
|
||||
#endif
|
||||
|
||||
line_id = pblk_ppa_to_line(ppa);
|
||||
line = &pblk->lines[line_id];
|
||||
line = pblk_ppa_to_line(pblk, ppa);
|
||||
paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
|
||||
|
||||
__pblk_map_invalidate(pblk, line, paddr);
|
||||
|
@ -227,6 +238,33 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
|
|||
spin_unlock(&pblk->trans_lock);
|
||||
}
|
||||
|
||||
int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
||||
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
|
||||
&rqd->dma_meta_list);
|
||||
if (!rqd->meta_list)
|
||||
return -ENOMEM;
|
||||
|
||||
if (rqd->nr_ppas == 1)
|
||||
return 0;
|
||||
|
||||
rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
|
||||
rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
||||
if (rqd->meta_list)
|
||||
nvm_dev_dma_free(dev->parent, rqd->meta_list,
|
||||
rqd->dma_meta_list);
|
||||
}
|
||||
|
||||
/* Caller must guarantee that the request is a valid type */
|
||||
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
|
||||
{
|
||||
|
@ -258,7 +296,6 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type)
|
|||
/* Typically used on completion path. Cannot guarantee request consistency */
|
||||
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
mempool_t *pool;
|
||||
|
||||
switch (type) {
|
||||
|
@ -279,9 +316,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type)
|
|||
return;
|
||||
}
|
||||
|
||||
if (rqd->meta_list)
|
||||
nvm_dev_dma_free(dev->parent, rqd->meta_list,
|
||||
rqd->dma_meta_list);
|
||||
pblk_free_rqd_meta(pblk, rqd);
|
||||
mempool_free(rqd, pool);
|
||||
}
|
||||
|
||||
|
@ -409,6 +444,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
|
|||
}
|
||||
} else {
|
||||
line->state = PBLK_LINESTATE_CORRUPT;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
|
||||
line->gc_group = PBLK_LINEGC_NONE;
|
||||
move_list = &l_mg->corrupt_list;
|
||||
pblk_err(pblk, "corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
|
||||
|
@ -479,9 +517,30 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
|
|||
return nvm_submit_io(dev, rqd);
|
||||
}
|
||||
|
||||
void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; i++) {
|
||||
struct ppa_addr *ppa = &ppa_list[i];
|
||||
struct nvm_chk_meta *chunk = pblk_dev_ppa_to_chunk(pblk, *ppa);
|
||||
u64 caddr = pblk_dev_ppa_to_chunk_addr(pblk, *ppa);
|
||||
|
||||
if (caddr == 0)
|
||||
trace_pblk_chunk_state(pblk_disk_name(pblk),
|
||||
ppa, NVM_CHK_ST_OPEN);
|
||||
else if (caddr == chunk->cnlb)
|
||||
trace_pblk_chunk_state(pblk_disk_name(pblk),
|
||||
ppa, NVM_CHK_ST_CLOSED);
|
||||
}
|
||||
}
|
||||
|
||||
int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
int ret;
|
||||
|
||||
atomic_inc(&pblk->inflight_io);
|
||||
|
||||
|
@ -490,7 +549,27 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
|
|||
return NVM_IO_ERR;
|
||||
#endif
|
||||
|
||||
return nvm_submit_io_sync(dev, rqd);
|
||||
ret = nvm_submit_io_sync(dev, rqd);
|
||||
|
||||
if (trace_pblk_chunk_state_enabled() && !ret &&
|
||||
rqd->opcode == NVM_OP_PWRITE)
|
||||
pblk_check_chunk_state_update(pblk, rqd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr *ppa_list;
|
||||
int ret;
|
||||
|
||||
ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
|
||||
|
||||
pblk_down_chunk(pblk, ppa_list[0]);
|
||||
ret = pblk_submit_io_sync(pblk, rqd);
|
||||
pblk_up_chunk(pblk, ppa_list[0]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pblk_bio_map_addr_endio(struct bio *bio)
|
||||
|
@ -621,12 +700,129 @@ u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
|
|||
return paddr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
|
||||
* taking the per LUN semaphore.
|
||||
*/
|
||||
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
|
||||
void *emeta_buf, u64 paddr, int dir)
|
||||
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int bit;
|
||||
|
||||
/* This usually only happens on bad lines */
|
||||
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
|
||||
if (bit >= lm->blk_per_line)
|
||||
return -1;
|
||||
|
||||
return bit * geo->ws_opt;
|
||||
}
|
||||
|
||||
int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct bio *bio;
|
||||
struct nvm_rq rqd;
|
||||
u64 paddr = pblk_line_smeta_start(pblk, line);
|
||||
int i, ret;
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
ret = pblk_alloc_rqd_meta(pblk, &rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = PTR_ERR(bio);
|
||||
goto clear_rqd;
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
|
||||
rqd.bio = bio;
|
||||
rqd.opcode = NVM_OP_PREAD;
|
||||
rqd.nr_ppas = lm->smeta_sec;
|
||||
rqd.is_seq = 1;
|
||||
|
||||
for (i = 0; i < lm->smeta_sec; i++, paddr++)
|
||||
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
|
||||
ret = pblk_submit_io_sync(pblk, &rqd);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
|
||||
bio_put(bio);
|
||||
goto clear_rqd;
|
||||
}
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error)
|
||||
pblk_log_read_err(pblk, &rqd);
|
||||
|
||||
clear_rqd:
|
||||
pblk_free_rqd_meta(pblk, &rqd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
|
||||
u64 paddr)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct bio *bio;
|
||||
struct nvm_rq rqd;
|
||||
__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
int i, ret;
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
ret = pblk_alloc_rqd_meta(pblk, &rqd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = PTR_ERR(bio);
|
||||
goto clear_rqd;
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
|
||||
rqd.bio = bio;
|
||||
rqd.opcode = NVM_OP_PWRITE;
|
||||
rqd.nr_ppas = lm->smeta_sec;
|
||||
rqd.is_seq = 1;
|
||||
|
||||
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
|
||||
struct pblk_sec_meta *meta_list = rqd.meta_list;
|
||||
|
||||
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
meta_list[i].lba = lba_list[paddr] = addr_empty;
|
||||
}
|
||||
|
||||
ret = pblk_submit_io_sync_sem(pblk, &rqd);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
|
||||
bio_put(bio);
|
||||
goto clear_rqd;
|
||||
}
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
pblk_log_write_err(pblk, &rqd);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
clear_rqd:
|
||||
pblk_free_rqd_meta(pblk, &rqd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
|
||||
void *emeta_buf)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
@ -635,24 +831,15 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
|
|||
void *ppa_list, *meta_list;
|
||||
struct bio *bio;
|
||||
struct nvm_rq rqd;
|
||||
u64 paddr = line->emeta_ssec;
|
||||
dma_addr_t dma_ppa_list, dma_meta_list;
|
||||
int min = pblk->min_write_pgs;
|
||||
int left_ppas = lm->emeta_sec[0];
|
||||
int id = line->id;
|
||||
int line_id = line->id;
|
||||
int rq_ppas, rq_len;
|
||||
int cmd_op, bio_op;
|
||||
int i, j;
|
||||
int ret;
|
||||
|
||||
if (dir == PBLK_WRITE) {
|
||||
bio_op = REQ_OP_WRITE;
|
||||
cmd_op = NVM_OP_PWRITE;
|
||||
} else if (dir == PBLK_READ) {
|
||||
bio_op = REQ_OP_READ;
|
||||
cmd_op = NVM_OP_PREAD;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
|
||||
&dma_meta_list);
|
||||
if (!meta_list)
|
||||
|
@ -675,66 +862,43 @@ next_rq:
|
|||
}
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
bio_set_op_attrs(bio, bio_op, 0);
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
|
||||
rqd.bio = bio;
|
||||
rqd.meta_list = meta_list;
|
||||
rqd.ppa_list = ppa_list;
|
||||
rqd.dma_meta_list = dma_meta_list;
|
||||
rqd.dma_ppa_list = dma_ppa_list;
|
||||
rqd.opcode = cmd_op;
|
||||
rqd.opcode = NVM_OP_PREAD;
|
||||
rqd.nr_ppas = rq_ppas;
|
||||
|
||||
if (dir == PBLK_WRITE) {
|
||||
struct pblk_sec_meta *meta_list = rqd.meta_list;
|
||||
for (i = 0; i < rqd.nr_ppas; ) {
|
||||
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
|
||||
int pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
|
||||
for (i = 0; i < rqd.nr_ppas; ) {
|
||||
spin_lock(&line->lock);
|
||||
paddr = __pblk_alloc_page(pblk, line, min);
|
||||
spin_unlock(&line->lock);
|
||||
for (j = 0; j < min; j++, i++, paddr++) {
|
||||
meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
|
||||
rqd.ppa_list[i] =
|
||||
addr_to_gen_ppa(pblk, paddr, id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < rqd.nr_ppas; ) {
|
||||
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
|
||||
int pos = pblk_ppa_to_pos(geo, ppa);
|
||||
int read_type = PBLK_READ_RANDOM;
|
||||
if (pblk_io_aligned(pblk, rq_ppas))
|
||||
rqd.is_seq = 1;
|
||||
|
||||
if (pblk_io_aligned(pblk, rq_ppas))
|
||||
read_type = PBLK_READ_SEQUENTIAL;
|
||||
rqd.flags = pblk_set_read_mode(pblk, read_type);
|
||||
|
||||
while (test_bit(pos, line->blk_bitmap)) {
|
||||
paddr += min;
|
||||
if (pblk_boundary_paddr_checks(pblk, paddr)) {
|
||||
pblk_err(pblk, "corrupt emeta line:%d\n",
|
||||
line->id);
|
||||
bio_put(bio);
|
||||
ret = -EINTR;
|
||||
goto free_rqd_dma;
|
||||
}
|
||||
|
||||
ppa = addr_to_gen_ppa(pblk, paddr, id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
}
|
||||
|
||||
if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
|
||||
pblk_err(pblk, "corrupt emeta line:%d\n",
|
||||
line->id);
|
||||
while (test_bit(pos, line->blk_bitmap)) {
|
||||
paddr += min;
|
||||
if (pblk_boundary_paddr_checks(pblk, paddr)) {
|
||||
bio_put(bio);
|
||||
ret = -EINTR;
|
||||
goto free_rqd_dma;
|
||||
}
|
||||
|
||||
for (j = 0; j < min; j++, i++, paddr++)
|
||||
rqd.ppa_list[i] =
|
||||
addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
ppa = addr_to_gen_ppa(pblk, paddr, line_id);
|
||||
pos = pblk_ppa_to_pos(geo, ppa);
|
||||
}
|
||||
|
||||
if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
|
||||
bio_put(bio);
|
||||
ret = -EINTR;
|
||||
goto free_rqd_dma;
|
||||
}
|
||||
|
||||
for (j = 0; j < min; j++, i++, paddr++)
|
||||
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
|
||||
}
|
||||
|
||||
ret = pblk_submit_io_sync(pblk, &rqd);
|
||||
|
@ -746,155 +910,36 @@ next_rq:
|
|||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
if (dir == PBLK_WRITE)
|
||||
pblk_log_write_err(pblk, &rqd);
|
||||
else
|
||||
pblk_log_read_err(pblk, &rqd);
|
||||
}
|
||||
if (rqd.error)
|
||||
pblk_log_read_err(pblk, &rqd);
|
||||
|
||||
emeta_buf += rq_len;
|
||||
left_ppas -= rq_ppas;
|
||||
if (left_ppas)
|
||||
goto next_rq;
|
||||
|
||||
free_rqd_dma:
|
||||
nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int bit;
|
||||
|
||||
/* This usually only happens on bad lines */
|
||||
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
|
||||
if (bit >= lm->blk_per_line)
|
||||
return -1;
|
||||
|
||||
return bit * geo->ws_opt;
|
||||
}
|
||||
|
||||
static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
|
||||
u64 paddr, int dir)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct bio *bio;
|
||||
struct nvm_rq rqd;
|
||||
__le64 *lba_list = NULL;
|
||||
int i, ret;
|
||||
int cmd_op, bio_op;
|
||||
int flags;
|
||||
|
||||
if (dir == PBLK_WRITE) {
|
||||
bio_op = REQ_OP_WRITE;
|
||||
cmd_op = NVM_OP_PWRITE;
|
||||
flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
|
||||
lba_list = emeta_to_lbas(pblk, line->emeta->buf);
|
||||
} else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) {
|
||||
bio_op = REQ_OP_READ;
|
||||
cmd_op = NVM_OP_PREAD;
|
||||
flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
|
||||
rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
|
||||
&rqd.dma_meta_list);
|
||||
if (!rqd.meta_list)
|
||||
return -ENOMEM;
|
||||
|
||||
rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
|
||||
rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
|
||||
|
||||
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
|
||||
if (IS_ERR(bio)) {
|
||||
ret = PTR_ERR(bio);
|
||||
goto free_ppa_list;
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = 0; /* internal bio */
|
||||
bio_set_op_attrs(bio, bio_op, 0);
|
||||
|
||||
rqd.bio = bio;
|
||||
rqd.opcode = cmd_op;
|
||||
rqd.flags = flags;
|
||||
rqd.nr_ppas = lm->smeta_sec;
|
||||
|
||||
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
|
||||
struct pblk_sec_meta *meta_list = rqd.meta_list;
|
||||
|
||||
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
|
||||
|
||||
if (dir == PBLK_WRITE) {
|
||||
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
|
||||
|
||||
meta_list[i].lba = lba_list[paddr] = addr_empty;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This I/O is sent by the write thread when a line is replace. Since
|
||||
* the write thread is the only one sending write and erase commands,
|
||||
* there is no need to take the LUN semaphore.
|
||||
*/
|
||||
ret = pblk_submit_io_sync(pblk, &rqd);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
|
||||
bio_put(bio);
|
||||
goto free_ppa_list;
|
||||
}
|
||||
|
||||
atomic_dec(&pblk->inflight_io);
|
||||
|
||||
if (rqd.error) {
|
||||
if (dir == PBLK_WRITE) {
|
||||
pblk_log_write_err(pblk, &rqd);
|
||||
ret = 1;
|
||||
} else if (dir == PBLK_READ)
|
||||
pblk_log_read_err(pblk, &rqd);
|
||||
}
|
||||
|
||||
free_ppa_list:
|
||||
nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
u64 bpaddr = pblk_line_smeta_start(pblk, line);
|
||||
|
||||
return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV);
|
||||
}
|
||||
|
||||
int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
|
||||
void *emeta_buf)
|
||||
{
|
||||
return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
|
||||
line->emeta_ssec, PBLK_READ);
|
||||
}
|
||||
|
||||
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
|
||||
struct ppa_addr ppa)
|
||||
{
|
||||
rqd->opcode = NVM_OP_ERASE;
|
||||
rqd->ppa_addr = ppa;
|
||||
rqd->nr_ppas = 1;
|
||||
rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE);
|
||||
rqd->is_seq = 1;
|
||||
rqd->bio = NULL;
|
||||
}
|
||||
|
||||
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
|
||||
{
|
||||
struct nvm_rq rqd;
|
||||
int ret = 0;
|
||||
struct nvm_rq rqd = {NULL};
|
||||
int ret;
|
||||
|
||||
memset(&rqd, 0, sizeof(struct nvm_rq));
|
||||
trace_pblk_chunk_reset(pblk_disk_name(pblk), &ppa,
|
||||
PBLK_CHUNK_RESET_START);
|
||||
|
||||
pblk_setup_e_rq(pblk, &rqd, ppa);
|
||||
|
||||
|
@ -902,19 +947,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
|
|||
* with writes. Thus, there is no need to take the LUN semaphore.
|
||||
*/
|
||||
ret = pblk_submit_io_sync(pblk, &rqd);
|
||||
if (ret) {
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
pblk_err(pblk, "could not sync erase line:%d,blk:%d\n",
|
||||
pblk_ppa_to_line(ppa),
|
||||
pblk_ppa_to_pos(geo, ppa));
|
||||
|
||||
rqd.error = ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
rqd.private = pblk;
|
||||
__pblk_end_io_erase(pblk, &rqd);
|
||||
|
||||
|
@ -1008,6 +1040,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
|
|||
spin_lock(&l_mg->free_lock);
|
||||
spin_lock(&line->lock);
|
||||
line->state = PBLK_LINESTATE_BAD;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_add_tail(&line->list, &l_mg->bad_list);
|
||||
|
@ -1071,15 +1105,18 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
|
|||
static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
||||
line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
line->map_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
|
||||
if (!line->map_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(line->map_bitmap, 0, lm->sec_bitmap_len);
|
||||
|
||||
/* will be initialized using bb info from map_bitmap */
|
||||
line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
|
||||
line->invalid_bitmap = mempool_alloc(l_mg->bitmap_pool, GFP_KERNEL);
|
||||
if (!line->invalid_bitmap) {
|
||||
kfree(line->map_bitmap);
|
||||
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
|
||||
line->map_bitmap = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -1122,7 +1159,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
line->smeta_ssec = off;
|
||||
line->cur_sec = off + lm->smeta_sec;
|
||||
|
||||
if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
|
||||
if (init && pblk_line_smeta_write(pblk, line, off)) {
|
||||
pblk_debug(pblk, "line smeta I/O failed. Retry\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -1152,6 +1189,8 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
|
|||
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
|
||||
spin_lock(&line->lock);
|
||||
line->state = PBLK_LINESTATE_BAD;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_add_tail(&line->list, &l_mg->bad_list);
|
||||
|
@ -1204,6 +1243,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
|||
if (line->state == PBLK_LINESTATE_NEW) {
|
||||
blk_to_erase = pblk_prepare_new_line(pblk, line);
|
||||
line->state = PBLK_LINESTATE_FREE;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
} else {
|
||||
blk_to_erase = blk_in_line;
|
||||
}
|
||||
|
@ -1221,6 +1262,8 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
|
|||
}
|
||||
|
||||
line->state = PBLK_LINESTATE_OPEN;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
|
||||
atomic_set(&line->left_eblks, blk_to_erase);
|
||||
atomic_set(&line->left_seblks, blk_to_erase);
|
||||
|
@ -1265,7 +1308,9 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
|
|||
|
||||
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
kfree(line->map_bitmap);
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
||||
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
|
||||
line->map_bitmap = NULL;
|
||||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
|
@ -1283,8 +1328,11 @@ static void pblk_line_reinit(struct pblk_line *line)
|
|||
|
||||
void pblk_line_free(struct pblk_line *line)
|
||||
{
|
||||
kfree(line->map_bitmap);
|
||||
kfree(line->invalid_bitmap);
|
||||
struct pblk *pblk = line->pblk;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
|
||||
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
|
||||
mempool_free(line->invalid_bitmap, l_mg->bitmap_pool);
|
||||
|
||||
pblk_line_reinit(line);
|
||||
}
|
||||
|
@ -1312,6 +1360,8 @@ retry:
|
|||
if (unlikely(bit >= lm->blk_per_line)) {
|
||||
spin_lock(&line->lock);
|
||||
line->state = PBLK_LINESTATE_BAD;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_add_tail(&line->list, &l_mg->bad_list);
|
||||
|
@ -1446,12 +1496,32 @@ retry_setup:
|
|||
return line;
|
||||
}
|
||||
|
||||
void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
|
||||
{
|
||||
struct pblk_line *line;
|
||||
|
||||
line = pblk_ppa_to_line(pblk, ppa);
|
||||
kref_put(&line->ref, pblk_line_put_wq);
|
||||
}
|
||||
|
||||
void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
|
||||
{
|
||||
struct ppa_addr *ppa_list;
|
||||
int i;
|
||||
|
||||
ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
|
||||
|
||||
for (i = 0; i < rqd->nr_ppas; i++)
|
||||
pblk_ppa_to_line_put(pblk, ppa_list[i]);
|
||||
}
|
||||
|
||||
static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
|
||||
{
|
||||
lockdep_assert_held(&pblk->l_mg.free_lock);
|
||||
|
||||
pblk_set_space_limit(pblk);
|
||||
pblk->state = PBLK_STATE_STOPPING;
|
||||
trace_pblk_state(pblk_disk_name(pblk), pblk->state);
|
||||
}
|
||||
|
||||
static void pblk_line_close_meta_sync(struct pblk *pblk)
|
||||
|
@ -1501,6 +1571,7 @@ void __pblk_pipeline_flush(struct pblk *pblk)
|
|||
return;
|
||||
}
|
||||
pblk->state = PBLK_STATE_RECOVERING;
|
||||
trace_pblk_state(pblk_disk_name(pblk), pblk->state);
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
pblk_flush_writer(pblk);
|
||||
|
@ -1522,6 +1593,7 @@ void __pblk_pipeline_stop(struct pblk *pblk)
|
|||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
pblk->state = PBLK_STATE_STOPPED;
|
||||
trace_pblk_state(pblk_disk_name(pblk), pblk->state);
|
||||
l_mg->data_line = NULL;
|
||||
l_mg->data_next = NULL;
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
@ -1539,13 +1611,14 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
|
|||
struct pblk_line *cur, *new = NULL;
|
||||
unsigned int left_seblks;
|
||||
|
||||
cur = l_mg->data_line;
|
||||
new = l_mg->data_next;
|
||||
if (!new)
|
||||
goto out;
|
||||
l_mg->data_line = new;
|
||||
|
||||
spin_lock(&l_mg->free_lock);
|
||||
cur = l_mg->data_line;
|
||||
l_mg->data_line = new;
|
||||
|
||||
pblk_line_setup_metadata(new, l_mg, &pblk->lm);
|
||||
spin_unlock(&l_mg->free_lock);
|
||||
|
||||
|
@ -1612,6 +1685,8 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
|
|||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_GC);
|
||||
line->state = PBLK_LINESTATE_FREE;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
line->gc_group = PBLK_LINEGC_NONE;
|
||||
pblk_line_free(line);
|
||||
|
||||
|
@ -1680,6 +1755,9 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
|
|||
rqd->end_io = pblk_end_io_erase;
|
||||
rqd->private = pblk;
|
||||
|
||||
trace_pblk_chunk_reset(pblk_disk_name(pblk),
|
||||
&ppa, PBLK_CHUNK_RESET_START);
|
||||
|
||||
/* The write thread schedules erases so that it minimizes disturbances
|
||||
* with writes. Thus, there is no need to take the LUN semaphore.
|
||||
*/
|
||||
|
@ -1689,7 +1767,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
|
|||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
pblk_err(pblk, "could not async erase line:%d,blk:%d\n",
|
||||
pblk_ppa_to_line(ppa),
|
||||
pblk_ppa_to_line_id(ppa),
|
||||
pblk_ppa_to_pos(geo, ppa));
|
||||
}
|
||||
|
||||
|
@ -1741,10 +1819,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
|
|||
WARN_ON(line->state != PBLK_LINESTATE_OPEN);
|
||||
line->state = PBLK_LINESTATE_CLOSED;
|
||||
move_list = pblk_line_gc_list(pblk, line);
|
||||
|
||||
list_add_tail(&line->list, move_list);
|
||||
|
||||
kfree(line->map_bitmap);
|
||||
mempool_free(line->map_bitmap, l_mg->bitmap_pool);
|
||||
line->map_bitmap = NULL;
|
||||
line->smeta = NULL;
|
||||
line->emeta = NULL;
|
||||
|
@ -1760,6 +1837,9 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
|
|||
|
||||
spin_unlock(&line->lock);
|
||||
spin_unlock(&l_mg->gc_lock);
|
||||
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
}
|
||||
|
||||
void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
|
||||
|
@ -1778,6 +1858,17 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
|
|||
wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
|
||||
wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
|
||||
|
||||
if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) {
|
||||
emeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
|
||||
memcpy(emeta_buf->header.uuid, pblk->instance_uuid, 16);
|
||||
emeta_buf->header.id = cpu_to_le32(line->id);
|
||||
emeta_buf->header.type = cpu_to_le16(line->type);
|
||||
emeta_buf->header.version_major = EMETA_VERSION_MAJOR;
|
||||
emeta_buf->header.version_minor = EMETA_VERSION_MINOR;
|
||||
emeta_buf->header.crc = cpu_to_le32(
|
||||
pblk_calc_meta_header_crc(pblk, &emeta_buf->header));
|
||||
}
|
||||
|
||||
emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
|
||||
emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
|
||||
|
||||
|
@ -1795,8 +1886,6 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
|
|||
spin_unlock(&l_mg->close_lock);
|
||||
|
||||
pblk_line_should_sync_meta(pblk);
|
||||
|
||||
|
||||
}
|
||||
|
||||
static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
|
||||
|
@ -1847,8 +1936,7 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
|
|||
queue_work(wq, &line_ws->ws);
|
||||
}
|
||||
|
||||
static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
|
||||
int nr_ppas, int pos)
|
||||
static void __pblk_down_chunk(struct pblk *pblk, int pos)
|
||||
{
|
||||
struct pblk_lun *rlun = &pblk->luns[pos];
|
||||
int ret;
|
||||
|
@ -1857,13 +1945,6 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
|
|||
* Only send one inflight I/O per LUN. Since we map at a page
|
||||
* granurality, all ppas in the I/O will map to the same LUN
|
||||
*/
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
int i;
|
||||
|
||||
for (i = 1; i < nr_ppas; i++)
|
||||
WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
|
||||
ppa_list[0].a.ch != ppa_list[i].a.ch);
|
||||
#endif
|
||||
|
||||
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
|
||||
if (ret == -ETIME || ret == -EINTR)
|
||||
|
@ -1871,21 +1952,21 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
|
|||
-ret);
|
||||
}
|
||||
|
||||
void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
|
||||
void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
|
||||
int pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
|
||||
__pblk_down_chunk(pblk, pos);
|
||||
}
|
||||
|
||||
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
||||
void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
|
||||
unsigned long *lun_bitmap)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
|
||||
int pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
/* If the LUN has been locked for this same request, do no attempt to
|
||||
* lock it again
|
||||
|
@ -1893,30 +1974,21 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
|||
if (test_and_set_bit(pos, lun_bitmap))
|
||||
return;
|
||||
|
||||
__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
|
||||
__pblk_down_chunk(pblk, pos);
|
||||
}
|
||||
|
||||
void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
|
||||
void pblk_up_chunk(struct pblk *pblk, struct ppa_addr ppa)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_lun *rlun;
|
||||
int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
|
||||
|
||||
#ifdef CONFIG_NVM_PBLK_DEBUG
|
||||
int i;
|
||||
|
||||
for (i = 1; i < nr_ppas; i++)
|
||||
WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun ||
|
||||
ppa_list[0].a.ch != ppa_list[i].a.ch);
|
||||
#endif
|
||||
int pos = pblk_ppa_to_pos(geo, ppa);
|
||||
|
||||
rlun = &pblk->luns[pos];
|
||||
up(&rlun->wr_sem);
|
||||
}
|
||||
|
||||
void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
|
||||
unsigned long *lun_bitmap)
|
||||
void pblk_up_rq(struct pblk *pblk, unsigned long *lun_bitmap)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
@ -2060,8 +2132,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
|
|||
|
||||
/* If the L2P entry maps to a line, the reference is valid */
|
||||
if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
|
||||
int line_id = pblk_ppa_to_line(ppa);
|
||||
struct pblk_line *line = &pblk->lines[line_id];
|
||||
struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
|
||||
|
||||
kref_get(&line->ref);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
|
||||
|
@ -16,8 +17,10 @@
|
|||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
#include <linux/delay.h>
|
||||
|
||||
|
||||
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
|
||||
{
|
||||
if (gc_rq->data)
|
||||
|
@ -64,6 +67,8 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
|
|||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_GC);
|
||||
line->state = PBLK_LINESTATE_CLOSED;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
move_list = pblk_line_gc_list(pblk, line);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
|
@ -144,7 +149,7 @@ static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
|
|||
if (!emeta_buf)
|
||||
return NULL;
|
||||
|
||||
ret = pblk_line_read_emeta(pblk, line, emeta_buf);
|
||||
ret = pblk_line_emeta_read(pblk, line, emeta_buf);
|
||||
if (ret) {
|
||||
pblk_err(pblk, "line %d read emeta failed (%d)\n",
|
||||
line->id, ret);
|
||||
|
@ -405,6 +410,8 @@ void pblk_gc_free_full_lines(struct pblk *pblk)
|
|||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
|
||||
line->state = PBLK_LINESTATE_GC;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_del(&line->list);
|
||||
|
@ -451,6 +458,8 @@ next_gc_group:
|
|||
spin_lock(&line->lock);
|
||||
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
|
||||
line->state = PBLK_LINESTATE_GC;
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
spin_unlock(&line->lock);
|
||||
|
||||
list_del(&line->list);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
|
||||
* Copyright (C) 2016 CNEX Labs
|
||||
|
@ -19,15 +20,31 @@
|
|||
*/
|
||||
|
||||
#include "pblk.h"
|
||||
#include "pblk-trace.h"
|
||||
|
||||
static unsigned int write_buffer_size;
|
||||
|
||||
module_param(write_buffer_size, uint, 0644);
|
||||
MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
|
||||
|
||||
static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
|
||||
*pblk_w_rq_cache;
|
||||
static DECLARE_RWSEM(pblk_lock);
|
||||
struct pblk_global_caches {
|
||||
struct kmem_cache *ws;
|
||||
struct kmem_cache *rec;
|
||||
struct kmem_cache *g_rq;
|
||||
struct kmem_cache *w_rq;
|
||||
|
||||
struct kref kref;
|
||||
|
||||
struct mutex mutex; /* Ensures consistency between
|
||||
* caches and kref
|
||||
*/
|
||||
};
|
||||
|
||||
static struct pblk_global_caches pblk_caches = {
|
||||
.mutex = __MUTEX_INITIALIZER(pblk_caches.mutex),
|
||||
.kref = KREF_INIT(0),
|
||||
};
|
||||
|
||||
struct bio_set pblk_bio_set;
|
||||
|
||||
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
|
||||
|
@ -168,36 +185,26 @@ static void pblk_rwb_free(struct pblk *pblk)
|
|||
if (pblk_rb_tear_down_check(&pblk->rwb))
|
||||
pblk_err(pblk, "write buffer error on tear down\n");
|
||||
|
||||
pblk_rb_data_free(&pblk->rwb);
|
||||
vfree(pblk_rb_entries_ref(&pblk->rwb));
|
||||
pblk_rb_free(&pblk->rwb);
|
||||
}
|
||||
|
||||
static int pblk_rwb_init(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_rb_entry *entries;
|
||||
unsigned long nr_entries, buffer_size;
|
||||
unsigned int power_size, power_seg_sz;
|
||||
int pgs_in_buffer;
|
||||
unsigned long buffer_size;
|
||||
int pgs_in_buffer, threshold;
|
||||
|
||||
pgs_in_buffer = max(geo->mw_cunits, geo->ws_opt) * geo->all_luns;
|
||||
threshold = geo->mw_cunits * geo->all_luns;
|
||||
pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt)
|
||||
* geo->all_luns;
|
||||
|
||||
if (write_buffer_size && (write_buffer_size > pgs_in_buffer))
|
||||
buffer_size = write_buffer_size;
|
||||
else
|
||||
buffer_size = pgs_in_buffer;
|
||||
|
||||
nr_entries = pblk_rb_calculate_size(buffer_size);
|
||||
|
||||
entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
|
||||
if (!entries)
|
||||
return -ENOMEM;
|
||||
|
||||
power_size = get_count_order(nr_entries);
|
||||
power_seg_sz = get_count_order(geo->csecs);
|
||||
|
||||
return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
|
||||
return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs);
|
||||
}
|
||||
|
||||
/* Minimum pages needed within a lun */
|
||||
|
@ -306,53 +313,80 @@ static int pblk_set_addrf(struct pblk *pblk)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int pblk_init_global_caches(struct pblk *pblk)
|
||||
static int pblk_create_global_caches(void)
|
||||
{
|
||||
down_write(&pblk_lock);
|
||||
pblk_ws_cache = kmem_cache_create("pblk_blk_ws",
|
||||
|
||||
pblk_caches.ws = kmem_cache_create("pblk_blk_ws",
|
||||
sizeof(struct pblk_line_ws), 0, 0, NULL);
|
||||
if (!pblk_ws_cache) {
|
||||
up_write(&pblk_lock);
|
||||
if (!pblk_caches.ws)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pblk_rec_cache = kmem_cache_create("pblk_rec",
|
||||
pblk_caches.rec = kmem_cache_create("pblk_rec",
|
||||
sizeof(struct pblk_rec_ctx), 0, 0, NULL);
|
||||
if (!pblk_rec_cache) {
|
||||
kmem_cache_destroy(pblk_ws_cache);
|
||||
up_write(&pblk_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!pblk_caches.rec)
|
||||
goto fail_destroy_ws;
|
||||
|
||||
pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
|
||||
pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
|
||||
0, 0, NULL);
|
||||
if (!pblk_g_rq_cache) {
|
||||
kmem_cache_destroy(pblk_ws_cache);
|
||||
kmem_cache_destroy(pblk_rec_cache);
|
||||
up_write(&pblk_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!pblk_caches.g_rq)
|
||||
goto fail_destroy_rec;
|
||||
|
||||
pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
|
||||
pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
|
||||
0, 0, NULL);
|
||||
if (!pblk_w_rq_cache) {
|
||||
kmem_cache_destroy(pblk_ws_cache);
|
||||
kmem_cache_destroy(pblk_rec_cache);
|
||||
kmem_cache_destroy(pblk_g_rq_cache);
|
||||
up_write(&pblk_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
up_write(&pblk_lock);
|
||||
if (!pblk_caches.w_rq)
|
||||
goto fail_destroy_g_rq;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_destroy_g_rq:
|
||||
kmem_cache_destroy(pblk_caches.g_rq);
|
||||
fail_destroy_rec:
|
||||
kmem_cache_destroy(pblk_caches.rec);
|
||||
fail_destroy_ws:
|
||||
kmem_cache_destroy(pblk_caches.ws);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void pblk_free_global_caches(struct pblk *pblk)
|
||||
static int pblk_get_global_caches(void)
|
||||
{
|
||||
kmem_cache_destroy(pblk_ws_cache);
|
||||
kmem_cache_destroy(pblk_rec_cache);
|
||||
kmem_cache_destroy(pblk_g_rq_cache);
|
||||
kmem_cache_destroy(pblk_w_rq_cache);
|
||||
int ret;
|
||||
|
||||
mutex_lock(&pblk_caches.mutex);
|
||||
|
||||
if (kref_read(&pblk_caches.kref) > 0) {
|
||||
kref_get(&pblk_caches.kref);
|
||||
mutex_unlock(&pblk_caches.mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = pblk_create_global_caches();
|
||||
|
||||
if (!ret)
|
||||
kref_get(&pblk_caches.kref);
|
||||
|
||||
mutex_unlock(&pblk_caches.mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void pblk_destroy_global_caches(struct kref *ref)
|
||||
{
|
||||
struct pblk_global_caches *c;
|
||||
|
||||
c = container_of(ref, struct pblk_global_caches, kref);
|
||||
|
||||
kmem_cache_destroy(c->ws);
|
||||
kmem_cache_destroy(c->rec);
|
||||
kmem_cache_destroy(c->g_rq);
|
||||
kmem_cache_destroy(c->w_rq);
|
||||
}
|
||||
|
||||
static void pblk_put_global_caches(void)
|
||||
{
|
||||
mutex_lock(&pblk_caches.mutex);
|
||||
kref_put(&pblk_caches.kref, pblk_destroy_global_caches);
|
||||
mutex_unlock(&pblk_caches.mutex);
|
||||
}
|
||||
|
||||
static int pblk_core_init(struct pblk *pblk)
|
||||
|
@ -371,23 +405,19 @@ static int pblk_core_init(struct pblk *pblk)
|
|||
atomic64_set(&pblk->nr_flush, 0);
|
||||
pblk->nr_flush_rst = 0;
|
||||
|
||||
pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE);
|
||||
pblk->min_write_pgs = geo->ws_opt;
|
||||
max_write_ppas = pblk->min_write_pgs * geo->all_luns;
|
||||
pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
|
||||
pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
|
||||
queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT));
|
||||
pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
|
||||
|
||||
if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
|
||||
pblk_err(pblk, "vector list too big(%u > %u)\n",
|
||||
pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
|
||||
GFP_KERNEL);
|
||||
if (!pblk->pad_dist)
|
||||
return -ENOMEM;
|
||||
|
||||
if (pblk_init_global_caches(pblk))
|
||||
if (pblk_get_global_caches())
|
||||
goto fail_free_pad_dist;
|
||||
|
||||
/* Internal bios can be at most the sectors signaled by the device. */
|
||||
|
@ -396,27 +426,27 @@ static int pblk_core_init(struct pblk *pblk)
|
|||
goto free_global_caches;
|
||||
|
||||
ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
|
||||
pblk_ws_cache);
|
||||
pblk_caches.ws);
|
||||
if (ret)
|
||||
goto free_page_bio_pool;
|
||||
|
||||
ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
|
||||
pblk_rec_cache);
|
||||
pblk_caches.rec);
|
||||
if (ret)
|
||||
goto free_gen_ws_pool;
|
||||
|
||||
ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
pblk_caches.g_rq);
|
||||
if (ret)
|
||||
goto free_rec_pool;
|
||||
|
||||
ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
|
||||
pblk_g_rq_cache);
|
||||
pblk_caches.g_rq);
|
||||
if (ret)
|
||||
goto free_r_rq_pool;
|
||||
|
||||
ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
|
||||
pblk_w_rq_cache);
|
||||
pblk_caches.w_rq);
|
||||
if (ret)
|
||||
goto free_e_rq_pool;
|
||||
|
||||
|
@ -462,7 +492,7 @@ free_gen_ws_pool:
|
|||
free_page_bio_pool:
|
||||
mempool_exit(&pblk->page_bio_pool);
|
||||
free_global_caches:
|
||||
pblk_free_global_caches(pblk);
|
||||
pblk_put_global_caches();
|
||||
fail_free_pad_dist:
|
||||
kfree(pblk->pad_dist);
|
||||
return -ENOMEM;
|
||||
|
@ -486,7 +516,7 @@ static void pblk_core_free(struct pblk *pblk)
|
|||
mempool_exit(&pblk->e_rq_pool);
|
||||
mempool_exit(&pblk->w_rq_pool);
|
||||
|
||||
pblk_free_global_caches(pblk);
|
||||
pblk_put_global_caches();
|
||||
kfree(pblk->pad_dist);
|
||||
}
|
||||
|
||||
|
@ -504,6 +534,9 @@ static void pblk_line_mg_free(struct pblk *pblk)
|
|||
pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
|
||||
kfree(l_mg->eline_meta[i]);
|
||||
}
|
||||
|
||||
mempool_destroy(l_mg->bitmap_pool);
|
||||
kmem_cache_destroy(l_mg->bitmap_cache);
|
||||
}
|
||||
|
||||
static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
|
||||
|
@ -540,67 +573,6 @@ static void pblk_lines_free(struct pblk *pblk)
|
|||
kfree(pblk->lines);
|
||||
}
|
||||
|
||||
static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
|
||||
u8 *blks, int nr_blks)
|
||||
{
|
||||
struct ppa_addr ppa;
|
||||
int ret;
|
||||
|
||||
ppa.ppa = 0;
|
||||
ppa.g.ch = rlun->bppa.g.ch;
|
||||
ppa.g.lun = rlun->bppa.g.lun;
|
||||
|
||||
ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
|
||||
if (nr_blks < 0)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *pblk_bb_get_meta(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
u8 *meta;
|
||||
int i, nr_blks, blk_per_lun;
|
||||
int ret;
|
||||
|
||||
blk_per_lun = geo->num_chk * geo->pln_mode;
|
||||
nr_blks = blk_per_lun * geo->all_luns;
|
||||
|
||||
meta = kmalloc(nr_blks, GFP_KERNEL);
|
||||
if (!meta)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
for (i = 0; i < geo->all_luns; i++) {
|
||||
struct pblk_lun *rlun = &pblk->luns[i];
|
||||
u8 *meta_pos = meta + i * blk_per_lun;
|
||||
|
||||
ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun);
|
||||
if (ret) {
|
||||
kfree(meta);
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
}
|
||||
|
||||
return meta;
|
||||
}
|
||||
|
||||
static void *pblk_chunk_get_meta(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12)
|
||||
return pblk_bb_get_meta(pblk);
|
||||
else
|
||||
return pblk_chunk_get_info(pblk);
|
||||
}
|
||||
|
||||
static int pblk_luns_init(struct pblk *pblk)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
@ -699,51 +671,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
|
|||
atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
|
||||
}
|
||||
|
||||
static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
|
||||
void *chunk_meta)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
int i, chk_per_lun, nr_bad_chks = 0;
|
||||
|
||||
chk_per_lun = geo->num_chk * geo->pln_mode;
|
||||
|
||||
for (i = 0; i < lm->blk_per_line; i++) {
|
||||
struct pblk_lun *rlun = &pblk->luns[i];
|
||||
struct nvm_chk_meta *chunk;
|
||||
int pos = pblk_ppa_to_pos(geo, rlun->bppa);
|
||||
u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
|
||||
|
||||
chunk = &line->chks[pos];
|
||||
|
||||
/*
|
||||
* In 1.2 spec. chunk state is not persisted by the device. Thus
|
||||
* some of the values are reset each time pblk is instantiated,
|
||||
* so we have to assume that the block is closed.
|
||||
*/
|
||||
if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
|
||||
chunk->state = NVM_CHK_ST_CLOSED;
|
||||
else
|
||||
chunk->state = NVM_CHK_ST_OFFLINE;
|
||||
|
||||
chunk->type = NVM_CHK_TP_W_SEQ;
|
||||
chunk->wi = 0;
|
||||
chunk->slba = -1;
|
||||
chunk->cnlb = geo->clba;
|
||||
chunk->wp = 0;
|
||||
|
||||
if (!(chunk->state & NVM_CHK_ST_OFFLINE))
|
||||
continue;
|
||||
|
||||
set_bit(pos, line->blk_bitmap);
|
||||
nr_bad_chks++;
|
||||
}
|
||||
|
||||
return nr_bad_chks;
|
||||
}
|
||||
|
||||
static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
|
||||
static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
|
||||
struct nvm_chk_meta *meta)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
|
@ -772,6 +700,9 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
|
|||
chunk->cnlb = chunk_meta->cnlb;
|
||||
chunk->wp = chunk_meta->wp;
|
||||
|
||||
trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa,
|
||||
chunk->state);
|
||||
|
||||
if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
|
||||
WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
|
||||
continue;
|
||||
|
@ -790,8 +721,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
|
|||
static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
|
||||
void *chunk_meta, int line_id)
|
||||
{
|
||||
struct nvm_tgt_dev *dev = pblk->dev;
|
||||
struct nvm_geo *geo = &dev->geo;
|
||||
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
long nr_bad_chks, chk_in_line;
|
||||
|
@ -804,10 +733,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
|
|||
line->vsc = &l_mg->vsc_list[line_id];
|
||||
spin_lock_init(&line->lock);
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12)
|
||||
nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta);
|
||||
else
|
||||
nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta);
|
||||
nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta);
|
||||
|
||||
chk_in_line = lm->blk_per_line - nr_bad_chks;
|
||||
if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
|
||||
|
@ -913,6 +839,17 @@ static int pblk_line_mg_init(struct pblk *pblk)
|
|||
goto fail_free_smeta;
|
||||
}
|
||||
|
||||
l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap",
|
||||
lm->sec_bitmap_len, 0, 0, NULL);
|
||||
if (!l_mg->bitmap_cache)
|
||||
goto fail_free_smeta;
|
||||
|
||||
/* the bitmap pool is used for both valid and map bitmaps */
|
||||
l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2,
|
||||
l_mg->bitmap_cache);
|
||||
if (!l_mg->bitmap_pool)
|
||||
goto fail_destroy_bitmap_cache;
|
||||
|
||||
/* emeta allocates three different buffers for managing metadata with
|
||||
* in-memory and in-media layouts
|
||||
*/
|
||||
|
@ -965,6 +902,10 @@ fail_free_emeta:
|
|||
kfree(l_mg->eline_meta[i]->buf);
|
||||
kfree(l_mg->eline_meta[i]);
|
||||
}
|
||||
|
||||
mempool_destroy(l_mg->bitmap_pool);
|
||||
fail_destroy_bitmap_cache:
|
||||
kmem_cache_destroy(l_mg->bitmap_cache);
|
||||
fail_free_smeta:
|
||||
for (i = 0; i < PBLK_DATA_LINES; i++)
|
||||
kfree(l_mg->sline_meta[i]);
|
||||
|
@ -1058,7 +999,7 @@ static int pblk_lines_init(struct pblk *pblk)
|
|||
if (ret)
|
||||
goto fail_free_meta;
|
||||
|
||||
chunk_meta = pblk_chunk_get_meta(pblk);
|
||||
chunk_meta = pblk_get_chunk_meta(pblk);
|
||||
if (IS_ERR(chunk_meta)) {
|
||||
ret = PTR_ERR(chunk_meta);
|
||||
goto fail_free_luns;
|
||||
|
@ -1079,16 +1020,20 @@ static int pblk_lines_init(struct pblk *pblk)
|
|||
goto fail_free_lines;
|
||||
|
||||
nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
|
||||
|
||||
trace_pblk_line_state(pblk_disk_name(pblk), line->id,
|
||||
line->state);
|
||||
}
|
||||
|
||||
if (!nr_free_chks) {
|
||||
pblk_err(pblk, "too many bad blocks prevent for sane instance\n");
|
||||
return -EINTR;
|
||||
ret = -EINTR;
|
||||
goto fail_free_lines;
|
||||
}
|
||||
|
||||
pblk_set_provision(pblk, nr_free_chks);
|
||||
|
||||
kfree(chunk_meta);
|
||||
vfree(chunk_meta);
|
||||
return 0;
|
||||
|
||||
fail_free_lines:
|
||||
|
@ -1165,7 +1110,6 @@ static void pblk_exit(void *private, bool graceful)
|
|||
{
|
||||
struct pblk *pblk = private;
|
||||
|
||||
down_write(&pblk_lock);
|
||||
pblk_gc_exit(pblk, graceful);
|
||||
pblk_tear_down(pblk, graceful);
|
||||
|
||||
|
@ -1174,7 +1118,6 @@ static void pblk_exit(void *private, bool graceful)
|
|||
#endif
|
||||
|
||||
pblk_free(pblk);
|
||||
up_write(&pblk_lock);
|
||||
}
|
||||
|
||||
static sector_t pblk_capacity(void *private)
|
||||
|
@ -1200,6 +1143,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
|
|||
pblk->dev = dev;
|
||||
pblk->disk = tdisk;
|
||||
pblk->state = PBLK_STATE_RUNNING;
|
||||
trace_pblk_state(pblk_disk_name(pblk), pblk->state);
|
||||
pblk->gc.gc_enabled = 0;
|
||||
|
||||
if (!(geo->version == NVM_OCSSD_SPEC_12 ||
|
||||
|
@ -1210,13 +1154,6 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
|
|||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) {
|
||||
pblk_err(pblk, "host-side L2P table not supported. (%x)\n",
|
||||
geo->dom);
|
||||
kfree(pblk);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
spin_lock_init(&pblk->resubmit_lock);
|
||||
spin_lock_init(&pblk->trans_lock);
|
||||
spin_lock_init(&pblk->lock);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue