2011-01-21 01:50:14 +08:00
|
|
|
/*
|
|
|
|
* Definitions for the NVM Express interface
|
2014-04-11 22:37:39 +08:00
|
|
|
* Copyright (c) 2011-2014, Intel Corporation.
|
2011-01-21 01:50:14 +08:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms and conditions of the GNU General Public License,
|
|
|
|
* version 2, as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
|
|
* more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_NVME_H
|
|
|
|
#define _LINUX_NVME_H
|
|
|
|
|
2013-06-26 03:14:56 +08:00
|
|
|
#include <uapi/linux/nvme.h>
|
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/kref.h>
|
2014-11-04 23:20:14 +08:00
|
|
|
#include <linux/blk-mq.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
|
|
|
|
struct nvme_bar {
|
|
|
|
__u64 cap; /* Controller Capabilities */
|
|
|
|
__u32 vs; /* Version */
|
2011-02-15 01:20:15 +08:00
|
|
|
__u32 intms; /* Interrupt Mask Set */
|
|
|
|
__u32 intmc; /* Interrupt Mask Clear */
|
2011-01-21 01:50:14 +08:00
|
|
|
__u32 cc; /* Controller Configuration */
|
2011-02-15 01:20:15 +08:00
|
|
|
__u32 rsvd1; /* Reserved */
|
2011-01-21 01:50:14 +08:00
|
|
|
__u32 csts; /* Controller Status */
|
2011-02-15 01:20:15 +08:00
|
|
|
__u32 rsvd2; /* Reserved */
|
2011-01-21 01:50:14 +08:00
|
|
|
__u32 aqa; /* Admin Queue Attributes */
|
|
|
|
__u64 asq; /* Admin SQ Base Address */
|
|
|
|
__u64 acq; /* Admin CQ Base Address */
|
|
|
|
};
|
|
|
|
|
2012-07-28 01:57:23 +08:00
|
|
|
#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
|
2011-04-20 03:04:20 +08:00
|
|
|
#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
|
2011-10-21 05:00:41 +08:00
|
|
|
#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
|
2012-07-27 01:29:57 +08:00
|
|
|
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
|
2014-06-24 01:34:01 +08:00
|
|
|
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
|
2011-04-20 03:04:20 +08:00
|
|
|
|
2011-01-21 01:50:14 +08:00
|
|
|
enum {
|
|
|
|
NVME_CC_ENABLE = 1 << 0,
|
|
|
|
NVME_CC_CSS_NVM = 0 << 4,
|
|
|
|
NVME_CC_MPS_SHIFT = 7,
|
|
|
|
NVME_CC_ARB_RR = 0 << 11,
|
|
|
|
NVME_CC_ARB_WRRU = 1 << 11,
|
2011-03-23 03:55:45 +08:00
|
|
|
NVME_CC_ARB_VS = 7 << 11,
|
|
|
|
NVME_CC_SHN_NONE = 0 << 14,
|
|
|
|
NVME_CC_SHN_NORMAL = 1 << 14,
|
|
|
|
NVME_CC_SHN_ABRUPT = 2 << 14,
|
2013-07-16 05:02:22 +08:00
|
|
|
NVME_CC_SHN_MASK = 3 << 14,
|
2011-03-23 03:55:45 +08:00
|
|
|
NVME_CC_IOSQES = 6 << 16,
|
|
|
|
NVME_CC_IOCQES = 4 << 20,
|
2011-01-21 01:50:14 +08:00
|
|
|
NVME_CSTS_RDY = 1 << 0,
|
|
|
|
NVME_CSTS_CFS = 1 << 1,
|
|
|
|
NVME_CSTS_SHST_NORMAL = 0 << 2,
|
|
|
|
NVME_CSTS_SHST_OCCUR = 1 << 2,
|
|
|
|
NVME_CSTS_SHST_CMPLT = 2 << 2,
|
2013-07-16 05:02:22 +08:00
|
|
|
NVME_CSTS_SHST_MASK = 3 << 2,
|
2011-01-21 01:50:14 +08:00
|
|
|
};
|
|
|
|
|
2014-06-04 11:04:30 +08:00
|
|
|
extern unsigned char nvme_io_timeout;
|
|
|
|
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
|
2013-03-05 09:40:57 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
|
|
|
*/
|
|
|
|
struct nvme_dev {
|
|
|
|
struct list_head node;
|
2014-11-04 23:20:14 +08:00
|
|
|
struct nvme_queue **queues;
|
|
|
|
struct request_queue *admin_q;
|
|
|
|
struct blk_mq_tag_set tagset;
|
|
|
|
struct blk_mq_tag_set admin_tagset;
|
2013-03-05 09:40:57 +08:00
|
|
|
u32 __iomem *dbs;
|
2015-05-22 17:12:39 +08:00
|
|
|
struct device *dev;
|
2013-03-05 09:40:57 +08:00
|
|
|
struct dma_pool *prp_page_pool;
|
|
|
|
struct dma_pool *prp_small_pool;
|
|
|
|
int instance;
|
2014-03-25 00:46:25 +08:00
|
|
|
unsigned queue_count;
|
|
|
|
unsigned online_queues;
|
|
|
|
unsigned max_qid;
|
|
|
|
int q_depth;
|
2013-09-10 11:25:37 +08:00
|
|
|
u32 db_stride;
|
2013-03-05 09:40:57 +08:00
|
|
|
u32 ctrl_config;
|
|
|
|
struct msix_entry *entry;
|
|
|
|
struct nvme_bar __iomem *bar;
|
|
|
|
struct list_head namespaces;
|
2013-02-20 01:17:58 +08:00
|
|
|
struct kref kref;
|
2015-02-04 02:21:42 +08:00
|
|
|
struct device *device;
|
2014-03-07 23:24:49 +08:00
|
|
|
work_func_t reset_workfn;
|
2013-12-11 04:10:36 +08:00
|
|
|
struct work_struct reset_work;
|
2015-02-13 06:33:00 +08:00
|
|
|
struct work_struct probe_work;
|
2013-02-20 01:17:58 +08:00
|
|
|
char name[12];
|
2013-03-05 09:40:57 +08:00
|
|
|
char serial[20];
|
|
|
|
char model[40];
|
|
|
|
char firmware_rev[8];
|
|
|
|
u32 max_hw_sectors;
|
2013-04-10 07:13:20 +08:00
|
|
|
u32 stripe_size;
|
2014-06-24 01:34:01 +08:00
|
|
|
u32 page_size;
|
2013-03-05 09:40:57 +08:00
|
|
|
u16 oncs;
|
2013-12-11 04:10:38 +08:00
|
|
|
u16 abort_limit;
|
2014-06-19 03:58:57 +08:00
|
|
|
u8 event_limit;
|
2014-04-30 01:41:28 +08:00
|
|
|
u8 vwc;
|
2013-03-05 09:40:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An NVM Express namespace is equivalent to a SCSI LUN
|
|
|
|
*/
|
|
|
|
struct nvme_ns {
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
struct nvme_dev *dev;
|
|
|
|
struct request_queue *queue;
|
|
|
|
struct gendisk *disk;
|
|
|
|
|
2013-07-09 05:26:25 +08:00
|
|
|
unsigned ns_id;
|
2013-03-05 09:40:57 +08:00
|
|
|
int lba_shift;
|
2015-04-08 06:57:19 +08:00
|
|
|
u16 ms;
|
|
|
|
bool ext;
|
|
|
|
u8 pi_type;
|
2013-03-05 09:40:58 +08:00
|
|
|
u64 mode_select_num_blocks;
|
|
|
|
u32 mode_select_block_len;
|
2013-03-05 09:40:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The nvme_iod describes the data in an I/O, including the list of PRP
|
|
|
|
* entries. You can't see it in this data structure because C doesn't let
|
|
|
|
* me express that. Use nvme_alloc_iod to ensure there's enough space
|
|
|
|
* allocated to store the PRP list.
|
|
|
|
*/
|
|
|
|
struct nvme_iod {
|
2015-01-23 03:07:58 +08:00
|
|
|
unsigned long private; /* For the use of the submitter of the I/O */
|
2013-03-05 09:40:57 +08:00
|
|
|
int npages; /* In the PRP list. 0 means small pool in use */
|
|
|
|
int offset; /* Of PRP list */
|
|
|
|
int nents; /* Used in scatterlist */
|
|
|
|
int length; /* Of data, in bytes */
|
|
|
|
dma_addr_t first_dma;
|
NVMe: Metadata format support
Adds support for NVMe metadata formats and exposes block devices for
all namespaces regardless of their format. Namespace formats that are
unusable will have disk capacity set to 0, but a handle to the block
device is created to simplify device management. A namespace is not
usable when the format requires host interleave block and metadata in
single buffer, has no provisioned storage, or has better data but failed
to register with blk integrity.
The namespace has to be scanned in two phases to support separate
metadata formats. The first establishes the sector size and capacity
prior to invoking add_disk. If metadata is required, the capacity will
be temporarilly set to 0 until it can be revalidated and registered with
the integrity extenstions after add_disk completes.
The driver relies on the integrity extensions to provide the metadata
buffer. NVMe requires this be a single physically contiguous region,
so only one integrity segment is allowed per command. If the metadata
is used for T10 PI, the driver provides mappings to save and restore
the reftag physical block translation. The driver provides no-op
functions for generate and verify if metadata is not used for protection
information. This way the setup is always provided by the block layer.
If a request does not supply a required metadata buffer, the command
is failed with bad address. This could only happen if a user manually
disables verify/generate on such a disk. The only exception to where
this is okay is if the controller is capable of stripping/generating
the metadata, which is possible on some types of formats.
The metadata scatter gather list now occupies the spot in the nvme_iod
that used to be used to link retryable IOD's, but we don't do that
anymore, so the field was unused.
Signed-off-by: Keith Busch <keith.busch@intel.com>
2015-02-20 04:39:03 +08:00
|
|
|
struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
|
2013-03-05 09:40:57 +08:00
|
|
|
struct scatterlist sg[0];
|
|
|
|
};
|
2013-03-05 09:40:58 +08:00
|
|
|
|
2013-03-28 09:28:22 +08:00
|
|
|
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
|
|
|
|
{
|
|
|
|
return (sector >> (ns->lba_shift - 9));
|
|
|
|
}
|
|
|
|
|
2013-03-05 09:40:58 +08:00
|
|
|
/**
|
|
|
|
* nvme_free_iod - frees an nvme_iod
|
|
|
|
* @dev: The device that the I/O was submitted to
|
|
|
|
* @iod: The memory to free
|
|
|
|
*/
|
|
|
|
void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
|
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t);
|
2013-03-05 09:40:58 +08:00
|
|
|
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
|
|
|
|
unsigned long addr, unsigned length);
|
|
|
|
void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
|
|
|
|
struct nvme_iod *iod);
|
2015-05-22 17:12:38 +08:00
|
|
|
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
|
2013-03-05 09:40:58 +08:00
|
|
|
int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
|
|
|
|
dma_addr_t dma_addr);
|
|
|
|
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
|
|
|
|
dma_addr_t dma_addr, u32 *result);
|
|
|
|
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
|
|
|
|
dma_addr_t dma_addr, u32 *result);
|
|
|
|
|
|
|
|
struct sg_io_hdr;
|
|
|
|
|
|
|
|
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
|
2013-10-24 03:07:34 +08:00
|
|
|
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
|
2013-03-05 09:40:58 +08:00
|
|
|
int nvme_sg_get_version_num(int __user *ip);
|
|
|
|
|
2011-01-21 01:50:14 +08:00
|
|
|
#endif /* _LINUX_NVME_H */
|