OpenCloudOS-Kernel/drivers/scsi/megaraid/megaraid_sas.h

2733 lines
65 KiB
C
Raw Normal View History

treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 13 Based on 2 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details you should have received a copy of the gnu general public license along with this program if not see http www gnu org licenses this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details [based] [from] [clk] [highbank] [c] you should have received a copy of the gnu general public license along with this program if not see http www gnu org licenses extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 355 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org> Reviewed-by: Jilayne Lovejoy <opensource@jilayne.com> Reviewed-by: Steve Winslow <swinslow@gmail.com> Reviewed-by: Allison Randal <allison@lohutok.net> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190519154041.837383322@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-19 21:51:43 +08:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Linux MegaRAID driver for SAS based RAID controllers
*
* Copyright (c) 2003-2013 LSI Corporation
* Copyright (c) 2013-2016 Avago Technologies
* Copyright (c) 2016-2018 Broadcom Inc.
*
* FILE: megaraid_sas.h
*
* Authors: Broadcom Inc.
* Kashyap Desai <kashyap.desai@broadcom.com>
* Sumit Saxena <sumit.saxena@broadcom.com>
*
* Send feedback to: megaraidlinux.pdl@broadcom.com
*/
#ifndef LSI_MEGARAID_SAS_H
#define LSI_MEGARAID_SAS_H
/*
* MegaRAID SAS Driver meta data
*/
#define MEGASAS_VERSION "07.714.04.00-rc1"
#define MEGASAS_RELDATE "Apr 14, 2020"
#define MEGASAS_MSIX_NAME_LEN 32
/*
* Device IDs
*/
#define PCI_DEVICE_ID_LSI_SAS1078R 0x0060
#define PCI_DEVICE_ID_LSI_SAS1078DE 0x007C
#define PCI_DEVICE_ID_LSI_VERDE_ZCR 0x0413
#define PCI_DEVICE_ID_LSI_SAS1078GEN2 0x0078
#define PCI_DEVICE_ID_LSI_SAS0079GEN2 0x0079
#define PCI_DEVICE_ID_LSI_SAS0073SKINNY 0x0073
#define PCI_DEVICE_ID_LSI_SAS0071SKINNY 0x0071
#define PCI_DEVICE_ID_LSI_FUSION 0x005b
#define PCI_DEVICE_ID_LSI_PLASMA 0x002f
#define PCI_DEVICE_ID_LSI_INVADER 0x005d
#define PCI_DEVICE_ID_LSI_FURY 0x005f
#define PCI_DEVICE_ID_LSI_INTRUDER 0x00ce
#define PCI_DEVICE_ID_LSI_INTRUDER_24 0x00cf
#define PCI_DEVICE_ID_LSI_CUTLASS_52 0x0052
#define PCI_DEVICE_ID_LSI_CUTLASS_53 0x0053
#define PCI_DEVICE_ID_LSI_VENTURA 0x0014
#define PCI_DEVICE_ID_LSI_CRUSADER 0x0015
#define PCI_DEVICE_ID_LSI_HARPOON 0x0016
#define PCI_DEVICE_ID_LSI_TOMCAT 0x0017
#define PCI_DEVICE_ID_LSI_VENTURA_4PORT 0x001B
#define PCI_DEVICE_ID_LSI_CRUSADER_4PORT 0x001C
#define PCI_DEVICE_ID_LSI_AERO_10E1 0x10e1
#define PCI_DEVICE_ID_LSI_AERO_10E2 0x10e2
#define PCI_DEVICE_ID_LSI_AERO_10E5 0x10e5
#define PCI_DEVICE_ID_LSI_AERO_10E6 0x10e6
#define PCI_DEVICE_ID_LSI_AERO_10E0 0x10e0
#define PCI_DEVICE_ID_LSI_AERO_10E3 0x10e3
#define PCI_DEVICE_ID_LSI_AERO_10E4 0x10e4
#define PCI_DEVICE_ID_LSI_AERO_10E7 0x10e7
/*
* Intel HBA SSDIDs
*/
#define MEGARAID_INTEL_RS3DC080_SSDID 0x9360
#define MEGARAID_INTEL_RS3DC040_SSDID 0x9362
#define MEGARAID_INTEL_RS3SC008_SSDID 0x9380
#define MEGARAID_INTEL_RS3MC044_SSDID 0x9381
#define MEGARAID_INTEL_RS3WC080_SSDID 0x9341
#define MEGARAID_INTEL_RS3WC040_SSDID 0x9343
#define MEGARAID_INTEL_RMS3BC160_SSDID 0x352B
/*
* Intruder HBA SSDIDs
*/
#define MEGARAID_INTRUDER_SSDID1 0x9371
#define MEGARAID_INTRUDER_SSDID2 0x9390
#define MEGARAID_INTRUDER_SSDID3 0x9370
/*
* Intel HBA branding
*/
#define MEGARAID_INTEL_RS3DC080_BRANDING \
"Intel(R) RAID Controller RS3DC080"
#define MEGARAID_INTEL_RS3DC040_BRANDING \
"Intel(R) RAID Controller RS3DC040"
#define MEGARAID_INTEL_RS3SC008_BRANDING \
"Intel(R) RAID Controller RS3SC008"
#define MEGARAID_INTEL_RS3MC044_BRANDING \
"Intel(R) RAID Controller RS3MC044"
#define MEGARAID_INTEL_RS3WC080_BRANDING \
"Intel(R) RAID Controller RS3WC080"
#define MEGARAID_INTEL_RS3WC040_BRANDING \
"Intel(R) RAID Controller RS3WC040"
#define MEGARAID_INTEL_RMS3BC160_BRANDING \
"Intel(R) Integrated RAID Module RMS3BC160"
/*
* =====================================
* MegaRAID SAS MFI firmware definitions
* =====================================
*/
/*
* MFI stands for MegaRAID SAS FW Interface. This is just a moniker for
* protocol between the software and firmware. Commands are issued using
* "message frames"
*/
/*
* FW posts its state in upper 4 bits of outbound_msg_0 register
*/
#define MFI_STATE_MASK 0xF0000000
#define MFI_STATE_UNDEFINED 0x00000000
#define MFI_STATE_BB_INIT 0x10000000
#define MFI_STATE_FW_INIT 0x40000000
#define MFI_STATE_WAIT_HANDSHAKE 0x60000000
#define MFI_STATE_FW_INIT_2 0x70000000
#define MFI_STATE_DEVICE_SCAN 0x80000000
#define MFI_STATE_BOOT_MESSAGE_PENDING 0x90000000
#define MFI_STATE_FLUSH_CACHE 0xA0000000
#define MFI_STATE_READY 0xB0000000
#define MFI_STATE_OPERATIONAL 0xC0000000
#define MFI_STATE_FAULT 0xF0000000
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
#define MFI_STATE_FORCE_OCR 0x00000080
#define MFI_STATE_DMADONE 0x00000008
#define MFI_STATE_CRASH_DUMP_DONE 0x00000004
#define MFI_RESET_REQUIRED 0x00000001
#define MFI_RESET_ADAPTER 0x00000002
#define MEGAMFI_FRAME_SIZE 64
#define MFI_STATE_FAULT_CODE 0x0FFF0000
#define MFI_STATE_FAULT_SUBCODE 0x0000FF00
/*
* During FW init, clear pending cmds & reset state using inbound_msg_0
*
* ABORT : Abort all pending cmds
* READY : Move from OPERATIONAL to READY state; discard queue info
* MFIMODE : Discard (possible) low MFA posted in 64-bit mode (??)
* CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver
* HOTPLUG : Resume from Hotplug
* MFI_STOP_ADP : Send signal to FW to stop processing
* MFI_ADP_TRIGGER_SNAP_DUMP: Inform firmware to initiate snap dump
*/
#define WRITE_SEQUENCE_OFFSET (0x0000000FC) /* I20 */
#define HOST_DIAGNOSTIC_OFFSET (0x000000F8) /* I20 */
#define DIAG_WRITE_ENABLE (0x00000080)
#define DIAG_RESET_ADAPTER (0x00000004)
#define MFI_ADP_RESET 0x00000040
#define MFI_INIT_ABORT 0x00000001
#define MFI_INIT_READY 0x00000002
#define MFI_INIT_MFIMODE 0x00000004
#define MFI_INIT_CLEAR_HANDSHAKE 0x00000008
#define MFI_INIT_HOTPLUG 0x00000010
#define MFI_STOP_ADP 0x00000020
#define MFI_RESET_FLAGS MFI_INIT_READY| \
MFI_INIT_MFIMODE| \
MFI_INIT_ABORT
#define MFI_ADP_TRIGGER_SNAP_DUMP 0x00000100
#define MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE (0x01)
/*
* MFI frame flags
*/
#define MFI_FRAME_POST_IN_REPLY_QUEUE 0x0000
#define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE 0x0001
#define MFI_FRAME_SGL32 0x0000
#define MFI_FRAME_SGL64 0x0002
#define MFI_FRAME_SENSE32 0x0000
#define MFI_FRAME_SENSE64 0x0004
#define MFI_FRAME_DIR_NONE 0x0000
#define MFI_FRAME_DIR_WRITE 0x0008
#define MFI_FRAME_DIR_READ 0x0010
#define MFI_FRAME_DIR_BOTH 0x0018
#define MFI_FRAME_IEEE 0x0020
megaraid_sas : Use Block layer tag support for internal command indexing megaraid_sas driver will use block layer provided tag for indexing internal MPT frames to get any unique MPT frame tied with tag. Each IO request submitted from SCSI mid layer will get associated MPT frame from MPT framepool (retrieved and return back using spinlock inside megaraid_sas driver's submission/completion call back). Getting MPT frame from MPT Frame pool is very expensive operation because of associated spin lock operation (spinlock overhead increase on multi NUMA node). This type of locking in driver is very expensive call considering each IO request need - Acquire and Release of the same lock. With this support, in IO path driver will directly provide the unique command index(which is based on block layer tag) and will get the MPT frame tied to the tag and this way driver can get rid off lock, which synchronizes the access to MPT frame pool while fetching and returning MPT frame from the pool. This support in driver provides siginificant performance improvement(on multi NUMA node system)on latest upstream with SCSI.MQ as well as on existing linux distributions. Here is the data for test executed at Avago- - IO Tool- FIO - 4 Socket SMC server. (4 NUMA node server) - 12 SSDs in JBOD mode . - 4K Rand READ, QD=32 - SCSI MQ x86_64 (Latest Upstream kernel) - upto 300% Performance Improvement. If IOs are running on single Node, perfromance gain is less, but as soon as increase number of nodes, performance improvement is significant. IOs running on all 4 NUMA nodes, with this patch applied IOPs observed was 1170K vs 344K IOPs seen without this patch. Logically, there are two parts of this patch- 1) Block layer tag support 2) changes in calling convention of return_cmd. part 2 will revert the changes done by patch- 90dc9d9 megaraid_sas : MFI MPT linked list corruption fix because changes done in part 1 has fixed the problem of MFI MPT linked list corruption. part 2 is very much dependent on part 1, so we decided to have single patch for these two logical changes. [jejb: remove chatty printk pointed out by hch] Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-04-23 19:01:24 +08:00
/* Driver internal */
#define DRV_DCMD_POLLED_MODE 0x1
#define DRV_DCMD_SKIP_REFIRE 0x2
megaraid_sas : Use Block layer tag support for internal command indexing megaraid_sas driver will use block layer provided tag for indexing internal MPT frames to get any unique MPT frame tied with tag. Each IO request submitted from SCSI mid layer will get associated MPT frame from MPT framepool (retrieved and return back using spinlock inside megaraid_sas driver's submission/completion call back). Getting MPT frame from MPT Frame pool is very expensive operation because of associated spin lock operation (spinlock overhead increase on multi NUMA node). This type of locking in driver is very expensive call considering each IO request need - Acquire and Release of the same lock. With this support, in IO path driver will directly provide the unique command index(which is based on block layer tag) and will get the MPT frame tied to the tag and this way driver can get rid off lock, which synchronizes the access to MPT frame pool while fetching and returning MPT frame from the pool. This support in driver provides siginificant performance improvement(on multi NUMA node system)on latest upstream with SCSI.MQ as well as on existing linux distributions. Here is the data for test executed at Avago- - IO Tool- FIO - 4 Socket SMC server. (4 NUMA node server) - 12 SSDs in JBOD mode . - 4K Rand READ, QD=32 - SCSI MQ x86_64 (Latest Upstream kernel) - upto 300% Performance Improvement. If IOs are running on single Node, perfromance gain is less, but as soon as increase number of nodes, performance improvement is significant. IOs running on all 4 NUMA nodes, with this patch applied IOPs observed was 1170K vs 344K IOPs seen without this patch. Logically, there are two parts of this patch- 1) Block layer tag support 2) changes in calling convention of return_cmd. part 2 will revert the changes done by patch- 90dc9d9 megaraid_sas : MFI MPT linked list corruption fix because changes done in part 1 has fixed the problem of MFI MPT linked list corruption. part 2 is very much dependent on part 1, so we decided to have single patch for these two logical changes. [jejb: remove chatty printk pointed out by hch] Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-04-23 19:01:24 +08:00
/*
* Definition for cmd_status
*/
#define MFI_CMD_STATUS_POLL_MODE 0xFF
/*
* MFI command opcodes
*/
enum MFI_CMD_OP {
MFI_CMD_INIT = 0x0,
MFI_CMD_LD_READ = 0x1,
MFI_CMD_LD_WRITE = 0x2,
MFI_CMD_LD_SCSI_IO = 0x3,
MFI_CMD_PD_SCSI_IO = 0x4,
MFI_CMD_DCMD = 0x5,
MFI_CMD_ABORT = 0x6,
MFI_CMD_SMP = 0x7,
MFI_CMD_STP = 0x8,
MFI_CMD_NVME = 0x9,
MFI_CMD_TOOLBOX = 0xa,
MFI_CMD_OP_COUNT,
MFI_CMD_INVALID = 0xff
};
#define MR_DCMD_CTRL_GET_INFO 0x01010000
#define MR_DCMD_LD_GET_LIST 0x03010000
#define MR_DCMD_LD_LIST_QUERY 0x03010100
#define MR_DCMD_CTRL_CACHE_FLUSH 0x01101000
#define MR_FLUSH_CTRL_CACHE 0x01
#define MR_FLUSH_DISK_CACHE 0x02
#define MR_DCMD_CTRL_SHUTDOWN 0x01050000
#define MR_DCMD_HIBERNATE_SHUTDOWN 0x01060000
#define MR_ENABLE_DRIVE_SPINDOWN 0x01
#define MR_DCMD_CTRL_EVENT_GET_INFO 0x01040100
#define MR_DCMD_CTRL_EVENT_GET 0x01040300
#define MR_DCMD_CTRL_EVENT_WAIT 0x01040500
#define MR_DCMD_LD_GET_PROPERTIES 0x03030000
#define MR_DCMD_CLUSTER 0x08000000
#define MR_DCMD_CLUSTER_RESET_ALL 0x08010100
#define MR_DCMD_CLUSTER_RESET_LD 0x08010200
#define MR_DCMD_PD_LIST_QUERY 0x02010100
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
#define MR_DCMD_CTRL_SET_CRASH_DUMP_PARAMS 0x01190100
#define MR_DRIVER_SET_APP_CRASHDUMP_MODE (0xF0010000 | 0x0600)
#define MR_DCMD_PD_GET_INFO 0x02020000
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
/*
* Global functions
*/
extern u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id);
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
/*
* MFI command completion codes
*/
enum MFI_STAT {
MFI_STAT_OK = 0x00,
MFI_STAT_INVALID_CMD = 0x01,
MFI_STAT_INVALID_DCMD = 0x02,
MFI_STAT_INVALID_PARAMETER = 0x03,
MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04,
MFI_STAT_ABORT_NOT_POSSIBLE = 0x05,
MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06,
MFI_STAT_APP_IN_USE = 0x07,
MFI_STAT_APP_NOT_INITIALIZED = 0x08,
MFI_STAT_ARRAY_INDEX_INVALID = 0x09,
MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a,
MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b,
MFI_STAT_DEVICE_NOT_FOUND = 0x0c,
MFI_STAT_DRIVE_TOO_SMALL = 0x0d,
MFI_STAT_FLASH_ALLOC_FAIL = 0x0e,
MFI_STAT_FLASH_BUSY = 0x0f,
MFI_STAT_FLASH_ERROR = 0x10,
MFI_STAT_FLASH_IMAGE_BAD = 0x11,
MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12,
MFI_STAT_FLASH_NOT_OPEN = 0x13,
MFI_STAT_FLASH_NOT_STARTED = 0x14,
MFI_STAT_FLUSH_FAILED = 0x15,
MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16,
MFI_STAT_LD_CC_IN_PROGRESS = 0x17,
MFI_STAT_LD_INIT_IN_PROGRESS = 0x18,
MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19,
MFI_STAT_LD_MAX_CONFIGURED = 0x1a,
MFI_STAT_LD_NOT_OPTIMAL = 0x1b,
MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c,
MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d,
MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e,
MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f,
MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20,
MFI_STAT_MFC_HW_ERROR = 0x21,
MFI_STAT_NO_HW_PRESENT = 0x22,
MFI_STAT_NOT_FOUND = 0x23,
MFI_STAT_NOT_IN_ENCL = 0x24,
MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25,
MFI_STAT_PD_TYPE_WRONG = 0x26,
MFI_STAT_PR_DISABLED = 0x27,
MFI_STAT_ROW_INDEX_INVALID = 0x28,
MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29,
MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a,
MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b,
MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c,
MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d,
MFI_STAT_SCSI_IO_FAILED = 0x2e,
MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f,
MFI_STAT_SHUTDOWN_FAILED = 0x30,
MFI_STAT_TIME_NOT_SET = 0x31,
MFI_STAT_WRONG_STATE = 0x32,
MFI_STAT_LD_OFFLINE = 0x33,
MFI_STAT_PEER_NOTIFICATION_REJECTED = 0x34,
MFI_STAT_PEER_NOTIFICATION_FAILED = 0x35,
MFI_STAT_RESERVATION_IN_PROGRESS = 0x36,
MFI_STAT_I2C_ERRORS_DETECTED = 0x37,
MFI_STAT_PCI_ERRORS_DETECTED = 0x38,
MFI_STAT_CONFIG_SEQ_MISMATCH = 0x67,
MFI_STAT_INVALID_STATUS = 0xFF
};
enum mfi_evt_class {
MFI_EVT_CLASS_DEBUG = -2,
MFI_EVT_CLASS_PROGRESS = -1,
MFI_EVT_CLASS_INFO = 0,
MFI_EVT_CLASS_WARNING = 1,
MFI_EVT_CLASS_CRITICAL = 2,
MFI_EVT_CLASS_FATAL = 3,
MFI_EVT_CLASS_DEAD = 4
};
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
/*
* Crash dump related defines
*/
#define MAX_CRASH_DUMP_SIZE 512
#define CRASH_DMA_BUF_SIZE (1024 * 1024)
enum MR_FW_CRASH_DUMP_STATE {
UNAVAILABLE = 0,
AVAILABLE = 1,
COPYING = 2,
COPIED = 3,
COPY_ERROR = 4,
};
enum _MR_CRASH_BUF_STATUS {
MR_CRASH_BUF_TURN_OFF = 0,
MR_CRASH_BUF_TURN_ON = 1,
};
/*
* Number of mailbox bytes in DCMD message frame
*/
#define MFI_MBOX_SIZE 12
enum MR_EVT_CLASS {
MR_EVT_CLASS_DEBUG = -2,
MR_EVT_CLASS_PROGRESS = -1,
MR_EVT_CLASS_INFO = 0,
MR_EVT_CLASS_WARNING = 1,
MR_EVT_CLASS_CRITICAL = 2,
MR_EVT_CLASS_FATAL = 3,
MR_EVT_CLASS_DEAD = 4,
};
enum MR_EVT_LOCALE {
MR_EVT_LOCALE_LD = 0x0001,
MR_EVT_LOCALE_PD = 0x0002,
MR_EVT_LOCALE_ENCL = 0x0004,
MR_EVT_LOCALE_BBU = 0x0008,
MR_EVT_LOCALE_SAS = 0x0010,
MR_EVT_LOCALE_CTRL = 0x0020,
MR_EVT_LOCALE_CONFIG = 0x0040,
MR_EVT_LOCALE_CLUSTER = 0x0080,
MR_EVT_LOCALE_ALL = 0xffff,
};
enum MR_EVT_ARGS {
MR_EVT_ARGS_NONE,
MR_EVT_ARGS_CDB_SENSE,
MR_EVT_ARGS_LD,
MR_EVT_ARGS_LD_COUNT,
MR_EVT_ARGS_LD_LBA,
MR_EVT_ARGS_LD_OWNER,
MR_EVT_ARGS_LD_LBA_PD_LBA,
MR_EVT_ARGS_LD_PROG,
MR_EVT_ARGS_LD_STATE,
MR_EVT_ARGS_LD_STRIP,
MR_EVT_ARGS_PD,
MR_EVT_ARGS_PD_ERR,
MR_EVT_ARGS_PD_LBA,
MR_EVT_ARGS_PD_LBA_LD,
MR_EVT_ARGS_PD_PROG,
MR_EVT_ARGS_PD_STATE,
MR_EVT_ARGS_PCI,
MR_EVT_ARGS_RATE,
MR_EVT_ARGS_STR,
MR_EVT_ARGS_TIME,
MR_EVT_ARGS_ECC,
MR_EVT_ARGS_LD_PROP,
MR_EVT_ARGS_PD_SPARE,
MR_EVT_ARGS_PD_INDEX,
MR_EVT_ARGS_DIAG_PASS,
MR_EVT_ARGS_DIAG_FAIL,
MR_EVT_ARGS_PD_LBA_LBA,
MR_EVT_ARGS_PORT_PHY,
MR_EVT_ARGS_PD_MISSING,
MR_EVT_ARGS_PD_ADDRESS,
MR_EVT_ARGS_BITMAP,
MR_EVT_ARGS_CONNECTOR,
MR_EVT_ARGS_PD_PD,
MR_EVT_ARGS_PD_FRU,
MR_EVT_ARGS_PD_PATHINFO,
MR_EVT_ARGS_PD_POWER_STATE,
MR_EVT_ARGS_GENERIC,
};
#define SGE_BUFFER_SIZE 4096
#define MEGASAS_CLUSTER_ID_SIZE 16
/*
* define constants for device list query options
*/
enum MR_PD_QUERY_TYPE {
MR_PD_QUERY_TYPE_ALL = 0,
MR_PD_QUERY_TYPE_STATE = 1,
MR_PD_QUERY_TYPE_POWER_STATE = 2,
MR_PD_QUERY_TYPE_MEDIA_TYPE = 3,
MR_PD_QUERY_TYPE_SPEED = 4,
MR_PD_QUERY_TYPE_EXPOSED_TO_HOST = 5,
};
enum MR_LD_QUERY_TYPE {
MR_LD_QUERY_TYPE_ALL = 0,
MR_LD_QUERY_TYPE_EXPOSED_TO_HOST = 1,
MR_LD_QUERY_TYPE_USED_TGT_IDS = 2,
MR_LD_QUERY_TYPE_CLUSTER_ACCESS = 3,
MR_LD_QUERY_TYPE_CLUSTER_LOCALE = 4,
};
#define MR_EVT_CFG_CLEARED 0x0004
#define MR_EVT_LD_STATE_CHANGE 0x0051
#define MR_EVT_PD_INSERTED 0x005b
#define MR_EVT_PD_REMOVED 0x0070
#define MR_EVT_LD_CREATED 0x008a
#define MR_EVT_LD_DELETED 0x008b
#define MR_EVT_FOREIGN_CFG_IMPORTED 0x00db
#define MR_EVT_LD_OFFLINE 0x00fc
#define MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED 0x0152
#define MR_EVT_CTRL_PROP_CHANGED 0x012f
enum MR_PD_STATE {
MR_PD_STATE_UNCONFIGURED_GOOD = 0x00,
MR_PD_STATE_UNCONFIGURED_BAD = 0x01,
MR_PD_STATE_HOT_SPARE = 0x02,
MR_PD_STATE_OFFLINE = 0x10,
MR_PD_STATE_FAILED = 0x11,
MR_PD_STATE_REBUILD = 0x14,
MR_PD_STATE_ONLINE = 0x18,
MR_PD_STATE_COPYBACK = 0x20,
MR_PD_STATE_SYSTEM = 0x40,
};
union MR_PD_REF {
struct {
u16 deviceId;
u16 seqNum;
} mrPdRef;
u32 ref;
};
/*
* define the DDF Type bit structure
*/
union MR_PD_DDF_TYPE {
struct {
union {
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u16 forcedPDGUID:1;
u16 inVD:1;
u16 isGlobalSpare:1;
u16 isSpare:1;
u16 isForeign:1;
u16 reserved:7;
u16 intf:4;
#else
u16 intf:4;
u16 reserved:7;
u16 isForeign:1;
u16 isSpare:1;
u16 isGlobalSpare:1;
u16 inVD:1;
u16 forcedPDGUID:1;
#endif
} pdType;
u16 type;
};
u16 reserved;
} ddf;
struct {
u32 reserved;
} nonDisk;
u32 type;
} __packed;
/*
* defines the progress structure
*/
union MR_PROGRESS {
struct {
u16 progress;
union {
u16 elapsedSecs;
u16 elapsedSecsForLastPercent;
};
} mrProgress;
u32 w;
} __packed;
/*
* defines the physical drive progress structure
*/
struct MR_PD_PROGRESS {
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u32 rbld:1;
u32 patrol:1;
u32 clear:1;
u32 copyBack:1;
u32 erase:1;
u32 locate:1;
u32 reserved:26;
#else
u32 reserved:26;
u32 locate:1;
u32 erase:1;
u32 copyBack:1;
u32 clear:1;
u32 patrol:1;
u32 rbld:1;
#endif
} active;
union MR_PROGRESS rbld;
union MR_PROGRESS patrol;
union {
union MR_PROGRESS clear;
union MR_PROGRESS erase;
};
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u32 rbld:1;
u32 patrol:1;
u32 clear:1;
u32 copyBack:1;
u32 erase:1;
u32 reserved:27;
#else
u32 reserved:27;
u32 erase:1;
u32 copyBack:1;
u32 clear:1;
u32 patrol:1;
u32 rbld:1;
#endif
} pause;
union MR_PROGRESS reserved[3];
} __packed;
struct MR_PD_INFO {
union MR_PD_REF ref;
u8 inquiryData[96];
u8 vpdPage83[64];
u8 notSupported;
u8 scsiDevType;
union {
u8 connectedPortBitmap;
u8 connectedPortNumbers;
};
u8 deviceSpeed;
u32 mediaErrCount;
u32 otherErrCount;
u32 predFailCount;
u32 lastPredFailEventSeqNum;
u16 fwState;
u8 disabledForRemoval;
u8 linkSpeed;
union MR_PD_DDF_TYPE state;
struct {
u8 count;
#ifndef __BIG_ENDIAN_BITFIELD
u8 isPathBroken:4;
u8 reserved3:3;
u8 widePortCapable:1;
#else
u8 widePortCapable:1;
u8 reserved3:3;
u8 isPathBroken:4;
#endif
u8 connectorIndex[2];
u8 reserved[4];
u64 sasAddr[2];
u8 reserved2[16];
} pathInfo;
u64 rawSize;
u64 nonCoercedSize;
u64 coercedSize;
u16 enclDeviceId;
u8 enclIndex;
union {
u8 slotNumber;
u8 enclConnectorIndex;
};
struct MR_PD_PROGRESS progInfo;
u8 badBlockTableFull;
u8 unusableInCurrentConfig;
u8 vpdPage83Ext[64];
u8 powerState;
u8 enclPosition;
u32 allowedOps;
u16 copyBackPartnerId;
u16 enclPartnerDeviceId;
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u16 fdeCapable:1;
u16 fdeEnabled:1;
u16 secured:1;
u16 locked:1;
u16 foreign:1;
u16 needsEKM:1;
u16 reserved:10;
#else
u16 reserved:10;
u16 needsEKM:1;
u16 foreign:1;
u16 locked:1;
u16 secured:1;
u16 fdeEnabled:1;
u16 fdeCapable:1;
#endif
} security;
u8 mediaType;
u8 notCertified;
u8 bridgeVendor[8];
u8 bridgeProductIdentification[16];
u8 bridgeProductRevisionLevel[4];
u8 satBridgeExists;
u8 interfaceType;
u8 temperature;
u8 emulatedBlockSize;
u16 userDataBlockSize;
u16 reserved2;
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u32 piType:3;
u32 piFormatted:1;
u32 piEligible:1;
u32 NCQ:1;
u32 WCE:1;
u32 commissionedSpare:1;
u32 emergencySpare:1;
u32 ineligibleForSSCD:1;
u32 ineligibleForLd:1;
u32 useSSEraseType:1;
u32 wceUnchanged:1;
u32 supportScsiUnmap:1;
u32 reserved:18;
#else
u32 reserved:18;
u32 supportScsiUnmap:1;
u32 wceUnchanged:1;
u32 useSSEraseType:1;
u32 ineligibleForLd:1;
u32 ineligibleForSSCD:1;
u32 emergencySpare:1;
u32 commissionedSpare:1;
u32 WCE:1;
u32 NCQ:1;
u32 piEligible:1;
u32 piFormatted:1;
u32 piType:3;
#endif
} properties;
u64 shieldDiagCompletionTime;
u8 shieldCounter;
u8 linkSpeedOther;
u8 reserved4[2];
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u32 bbmErrCountSupported:1;
u32 bbmErrCount:31;
#else
u32 bbmErrCount:31;
u32 bbmErrCountSupported:1;
#endif
} bbmErr;
u8 reserved1[512-428];
} __packed;
/*
* Definition of structure used to expose attributes of VD or JBOD
* (this structure is to be filled by firmware when MR_DCMD_DRV_GET_TARGET_PROP
* is fired by driver)
*/
struct MR_TARGET_PROPERTIES {
u32 max_io_size_kb;
u32 device_qdepth;
u32 sector_size;
u8 reset_tmo;
u8 reserved[499];
} __packed;
/*
* defines the physical drive address structure
*/
struct MR_PD_ADDRESS {
__le16 deviceId;
u16 enclDeviceId;
union {
struct {
u8 enclIndex;
u8 slotNumber;
} mrPdAddress;
struct {
u8 enclPosition;
u8 enclConnectorIndex;
} mrEnclAddress;
};
u8 scsiDevType;
union {
u8 connectedPortBitmap;
u8 connectedPortNumbers;
};
u64 sasAddr[2];
} __packed;
/*
* defines the physical drive list structure
*/
struct MR_PD_LIST {
__le32 size;
__le32 count;
struct MR_PD_ADDRESS addr[1];
} __packed;
struct megasas_pd_list {
u16 tid;
u8 driveType;
u8 driveState;
} __packed;
/*
* defines the logical drive reference structure
*/
union MR_LD_REF {
struct {
u8 targetId;
u8 reserved;
__le16 seqNum;
};
__le32 ref;
} __packed;
/*
* defines the logical drive list structure
*/
struct MR_LD_LIST {
__le32 ldCount;
__le32 reserved;
struct {
union MR_LD_REF ref;
u8 state;
u8 reserved[3];
__le64 size;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
} ldList[MAX_LOGICAL_DRIVES_EXT];
} __packed;
struct MR_LD_TARGETID_LIST {
__le32 size;
__le32 count;
u8 pad[3];
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u8 targetId[MAX_LOGICAL_DRIVES_EXT];
};
struct MR_HOST_DEVICE_LIST_ENTRY {
struct {
union {
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u8 reserved:7;
u8 is_sys_pd:1;
#else
u8 is_sys_pd:1;
u8 reserved:7;
#endif
} bits;
u8 byte;
} u;
} flags;
u8 scsi_type;
__le16 target_id;
u8 reserved[4];
__le64 sas_addr[2];
} __packed;
struct MR_HOST_DEVICE_LIST {
__le32 size;
__le32 count;
__le32 reserved[2];
struct MR_HOST_DEVICE_LIST_ENTRY host_device_list[1];
} __packed;
#define HOST_DEVICE_LIST_SZ (sizeof(struct MR_HOST_DEVICE_LIST) + \
(sizeof(struct MR_HOST_DEVICE_LIST_ENTRY) * \
(MEGASAS_MAX_PD + MAX_LOGICAL_DRIVES_EXT - 1)))
/*
* SAS controller properties
*/
struct megasas_ctrl_prop {
u16 seq_num;
u16 pred_fail_poll_interval;
u16 intr_throttle_count;
u16 intr_throttle_timeouts;
u8 rebuild_rate;
u8 patrol_read_rate;
u8 bgi_rate;
u8 cc_rate;
u8 recon_rate;
u8 cache_flush_interval;
u8 spinup_drv_count;
u8 spinup_delay;
u8 cluster_enable;
u8 coercion_mode;
u8 alarm_enable;
u8 disable_auto_rebuild;
u8 disable_battery_warn;
u8 ecc_bucket_size;
u16 ecc_bucket_leak_rate;
u8 restore_hotspare_on_insertion;
u8 expose_encl_devices;
u8 maintainPdFailHistory;
u8 disallowHostRequestReordering;
u8 abortCCOnError;
u8 loadBalanceMode;
u8 disableAutoDetectBackplane;
u8 snapVDSpace;
/*
* Add properties that can be controlled by
* a bit in the following structure.
*/
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u32 reserved:18;
u32 enableJBOD:1;
u32 disableSpinDownHS:1;
u32 allowBootWithPinnedCache:1;
u32 disableOnlineCtrlReset:1;
u32 enableSecretKeyControl:1;
u32 autoEnhancedImport:1;
u32 enableSpinDownUnconfigured:1;
u32 SSDPatrolReadEnabled:1;
u32 SSDSMARTerEnabled:1;
u32 disableNCQ:1;
u32 useFdeOnly:1;
u32 prCorrectUnconfiguredAreas:1;
u32 SMARTerEnabled:1;
u32 copyBackDisabled:1;
#else
u32 copyBackDisabled:1;
u32 SMARTerEnabled:1;
u32 prCorrectUnconfiguredAreas:1;
u32 useFdeOnly:1;
u32 disableNCQ:1;
u32 SSDSMARTerEnabled:1;
u32 SSDPatrolReadEnabled:1;
u32 enableSpinDownUnconfigured:1;
u32 autoEnhancedImport:1;
u32 enableSecretKeyControl:1;
u32 disableOnlineCtrlReset:1;
u32 allowBootWithPinnedCache:1;
u32 disableSpinDownHS:1;
u32 enableJBOD:1;
u32 reserved:18;
#endif
} OnOffProperties;
union {
u8 autoSnapVDSpace;
u8 viewSpace;
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u16 reserved3:9;
u16 enable_fw_dev_list:1;
u16 reserved2:1;
u16 enable_snap_dump:1;
u16 reserved1:4;
#else
u16 reserved1:4;
u16 enable_snap_dump:1;
u16 reserved2:1;
u16 enable_fw_dev_list:1;
u16 reserved3:9;
#endif
} on_off_properties2;
};
__le16 spinDownTime;
u8 reserved[24];
} __packed;
/*
* SAS controller information
*/
struct megasas_ctrl_info {
/*
* PCI device information
*/
struct {
__le16 vendor_id;
__le16 device_id;
__le16 sub_vendor_id;
__le16 sub_device_id;
u8 reserved[24];
} __attribute__ ((packed)) pci;
/*
* Host interface information
*/
struct {
u8 PCIX:1;
u8 PCIE:1;
u8 iSCSI:1;
u8 SAS_3G:1;
u8 SRIOV:1;
u8 reserved_0:3;
u8 reserved_1[6];
u8 port_count;
u64 port_addr[8];
} __attribute__ ((packed)) host_interface;
/*
* Device (backend) interface information
*/
struct {
u8 SPI:1;
u8 SAS_3G:1;
u8 SATA_1_5G:1;
u8 SATA_3G:1;
u8 reserved_0:4;
u8 reserved_1[6];
u8 port_count;
u64 port_addr[8];
} __attribute__ ((packed)) device_interface;
/*
* List of components residing in flash. All str are null terminated
*/
__le32 image_check_word;
__le32 image_component_count;
struct {
char name[8];
char version[32];
char build_date[16];
char built_time[16];
} __attribute__ ((packed)) image_component[8];
/*
* List of flash components that have been flashed on the card, but
* are not in use, pending reset of the adapter. This list will be
* empty if a flash operation has not occurred. All stings are null
* terminated
*/
__le32 pending_image_component_count;
struct {
char name[8];
char version[32];
char build_date[16];
char build_time[16];
} __attribute__ ((packed)) pending_image_component[8];
u8 max_arms;
u8 max_spans;
u8 max_arrays;
u8 max_lds;
char product_name[80];
char serial_no[32];
/*
* Other physical/controller/operation information. Indicates the
* presence of the hardware
*/
struct {
u32 bbu:1;
u32 alarm:1;
u32 nvram:1;
u32 uart:1;
u32 reserved:28;
} __attribute__ ((packed)) hw_present;
__le32 current_fw_time;
/*
* Maximum data transfer sizes
*/
__le16 max_concurrent_cmds;
__le16 max_sge_count;
__le32 max_request_size;
/*
* Logical and physical device counts
*/
__le16 ld_present_count;
__le16 ld_degraded_count;
__le16 ld_offline_count;
__le16 pd_present_count;
__le16 pd_disk_present_count;
__le16 pd_disk_pred_failure_count;
__le16 pd_disk_failed_count;
/*
* Memory size information
*/
__le16 nvram_size;
__le16 memory_size;
__le16 flash_size;
/*
* Error counters
*/
__le16 mem_correctable_error_count;
__le16 mem_uncorrectable_error_count;
/*
* Cluster information
*/
u8 cluster_permitted;
u8 cluster_active;
/*
* Additional max data transfer sizes
*/
__le16 max_strips_per_io;
/*
* Controller capabilities structures
*/
struct {
u32 raid_level_0:1;
u32 raid_level_1:1;
u32 raid_level_5:1;
u32 raid_level_1E:1;
u32 raid_level_6:1;
u32 reserved:27;
} __attribute__ ((packed)) raid_levels;
struct {
u32 rbld_rate:1;
u32 cc_rate:1;
u32 bgi_rate:1;
u32 recon_rate:1;
u32 patrol_rate:1;
u32 alarm_control:1;
u32 cluster_supported:1;
u32 bbu:1;
u32 spanning_allowed:1;
u32 dedicated_hotspares:1;
u32 revertible_hotspares:1;
u32 foreign_config_import:1;
u32 self_diagnostic:1;
u32 mixed_redundancy_arr:1;
u32 global_hot_spares:1;
u32 reserved:17;
} __attribute__ ((packed)) adapter_operations;
struct {
u32 read_policy:1;
u32 write_policy:1;
u32 io_policy:1;
u32 access_policy:1;
u32 disk_cache_policy:1;
u32 reserved:27;
} __attribute__ ((packed)) ld_operations;
struct {
u8 min;
u8 max;
u8 reserved[2];
} __attribute__ ((packed)) stripe_sz_ops;
struct {
u32 force_online:1;
u32 force_offline:1;
u32 force_rebuild:1;
u32 reserved:29;
} __attribute__ ((packed)) pd_operations;
struct {
u32 ctrl_supports_sas:1;
u32 ctrl_supports_sata:1;
u32 allow_mix_in_encl:1;
u32 allow_mix_in_ld:1;
u32 allow_sata_in_cluster:1;
u32 reserved:27;
} __attribute__ ((packed)) pd_mix_support;
/*
* Define ECC single-bit-error bucket information
*/
u8 ecc_bucket_count;
u8 reserved_2[11];
/*
* Include the controller properties (changeable items)
*/
struct megasas_ctrl_prop properties;
/*
* Define FW pkg version (set in envt v'bles on OEM basis)
*/
char package_version[0x60];
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
/*
* If adapterOperations.supportMoreThan8Phys is set,
* and deviceInterface.portCount is greater than 8,
* SAS Addrs for first 8 ports shall be populated in
* deviceInterface.portAddr, and the rest shall be
* populated in deviceInterfacePortAddr2.
*/
__le64 deviceInterfacePortAddr2[8]; /*6a0h */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u8 reserved3[128]; /*6e0h */
struct { /*760h */
u16 minPdRaidLevel_0:4;
u16 maxPdRaidLevel_0:12;
u16 minPdRaidLevel_1:4;
u16 maxPdRaidLevel_1:12;
u16 minPdRaidLevel_5:4;
u16 maxPdRaidLevel_5:12;
u16 minPdRaidLevel_1E:4;
u16 maxPdRaidLevel_1E:12;
u16 minPdRaidLevel_6:4;
u16 maxPdRaidLevel_6:12;
u16 minPdRaidLevel_10:4;
u16 maxPdRaidLevel_10:12;
u16 minPdRaidLevel_50:4;
u16 maxPdRaidLevel_50:12;
u16 minPdRaidLevel_60:4;
u16 maxPdRaidLevel_60:12;
u16 minPdRaidLevel_1E_RLQ0:4;
u16 maxPdRaidLevel_1E_RLQ0:12;
u16 minPdRaidLevel_1E0_RLQ0:4;
u16 maxPdRaidLevel_1E0_RLQ0:12;
u16 reserved[6];
} pdsForRaidLevels;
__le16 maxPds; /*780h */
__le16 maxDedHSPs; /*782h */
__le16 maxGlobalHSP; /*784h */
__le16 ddfSize; /*786h */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u8 maxLdsPerArray; /*788h */
u8 partitionsInDDF; /*789h */
u8 lockKeyBinding; /*78ah */
u8 maxPITsPerLd; /*78bh */
u8 maxViewsPerLd; /*78ch */
u8 maxTargetId; /*78dh */
__le16 maxBvlVdSize; /*78eh */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
__le16 maxConfigurableSSCSize; /*790h */
__le16 currentSSCsize; /*792h */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
char expanderFwVersion[12]; /*794h */
__le16 PFKTrialTimeRemaining; /*7A0h */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
__le16 cacheMemorySize; /*7A2h */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
struct { /*7A4h */
#if defined(__BIG_ENDIAN_BITFIELD)
u32 reserved:5;
u32 activePassive:2;
u32 supportConfigAutoBalance:1;
u32 mpio:1;
u32 supportDataLDonSSCArray:1;
u32 supportPointInTimeProgress:1;
u32 supportUnevenSpans:1;
u32 dedicatedHotSparesLimited:1;
u32 headlessMode:1;
u32 supportEmulatedDrives:1;
u32 supportResetNow:1;
u32 realTimeScheduler:1;
u32 supportSSDPatrolRead:1;
u32 supportPerfTuning:1;
u32 disableOnlinePFKChange:1;
u32 supportJBOD:1;
u32 supportBootTimePFKChange:1;
u32 supportSetLinkSpeed:1;
u32 supportEmergencySpares:1;
u32 supportSuspendResumeBGops:1;
u32 blockSSDWriteCacheChange:1;
u32 supportShieldState:1;
u32 supportLdBBMInfo:1;
u32 supportLdPIType3:1;
u32 supportLdPIType2:1;
u32 supportLdPIType1:1;
u32 supportPIcontroller:1;
#else
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u32 supportPIcontroller:1;
u32 supportLdPIType1:1;
u32 supportLdPIType2:1;
u32 supportLdPIType3:1;
u32 supportLdBBMInfo:1;
u32 supportShieldState:1;
u32 blockSSDWriteCacheChange:1;
u32 supportSuspendResumeBGops:1;
u32 supportEmergencySpares:1;
u32 supportSetLinkSpeed:1;
u32 supportBootTimePFKChange:1;
u32 supportJBOD:1;
u32 disableOnlinePFKChange:1;
u32 supportPerfTuning:1;
u32 supportSSDPatrolRead:1;
u32 realTimeScheduler:1;
u32 supportResetNow:1;
u32 supportEmulatedDrives:1;
u32 headlessMode:1;
u32 dedicatedHotSparesLimited:1;
u32 supportUnevenSpans:1;
u32 supportPointInTimeProgress:1;
u32 supportDataLDonSSCArray:1;
u32 mpio:1;
u32 supportConfigAutoBalance:1;
u32 activePassive:2;
u32 reserved:5;
#endif
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
} adapterOperations2;
u8 driverVersion[32]; /*7A8h */
u8 maxDAPdCountSpinup60; /*7C8h */
u8 temperatureROC; /*7C9h */
u8 temperatureCtrl; /*7CAh */
u8 reserved4; /*7CBh */
__le16 maxConfigurablePds; /*7CCh */
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u8 reserved5[2]; /*0x7CDh */
/*
* HA cluster information
*/
struct {
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
#if defined(__BIG_ENDIAN_BITFIELD)
u32 reserved:25;
u32 passive:1;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u32 premiumFeatureMismatch:1;
u32 ctrlPropIncompatible:1;
u32 fwVersionMismatch:1;
u32 hwIncompatible:1;
u32 peerIsIncompatible:1;
u32 peerIsPresent:1;
#else
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u32 peerIsPresent:1;
u32 peerIsIncompatible:1;
u32 hwIncompatible:1;
u32 fwVersionMismatch:1;
u32 ctrlPropIncompatible:1;
u32 premiumFeatureMismatch:1;
u32 passive:1;
u32 reserved:25;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
#endif
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
} cluster;
char clusterId[MEGASAS_CLUSTER_ID_SIZE]; /*0x7D4 */
struct {
u8 maxVFsSupported; /*0x7E4*/
u8 numVFsEnabled; /*0x7E5*/
u8 requestorId; /*0x7E6 0:PF, 1:VF1, 2:VF2*/
u8 reserved; /*0x7E7*/
} iov;
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
u32 reserved:7;
u32 useSeqNumJbodFP:1;
u32 supportExtendedSSCSize:1;
u32 supportDiskCacheSettingForSysPDs:1;
u32 supportCPLDUpdate:1;
u32 supportTTYLogCompression:1;
u32 discardCacheDuringLDDelete:1;
u32 supportSecurityonJBOD:1;
u32 supportCacheBypassModes:1;
u32 supportDisableSESMonitoring:1;
u32 supportForceFlash:1;
u32 supportNVDRAM:1;
u32 supportDrvActivityLEDSetting:1;
u32 supportAllowedOpsforDrvRemoval:1;
u32 supportHOQRebuild:1;
u32 supportForceTo512e:1;
u32 supportNVCacheErase:1;
u32 supportDebugQueue:1;
u32 supportSwZone:1;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
u32 supportCrashDump:1;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u32 supportMaxExtLDs:1;
u32 supportT10RebuildAssist:1;
u32 supportDisableImmediateIO:1;
u32 supportThermalPollInterval:1;
u32 supportPersonalityChange:2;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
#else
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u32 supportPersonalityChange:2;
u32 supportThermalPollInterval:1;
u32 supportDisableImmediateIO:1;
u32 supportT10RebuildAssist:1;
u32 supportMaxExtLDs:1;
u32 supportCrashDump:1;
u32 supportSwZone:1;
u32 supportDebugQueue:1;
u32 supportNVCacheErase:1;
u32 supportForceTo512e:1;
u32 supportHOQRebuild:1;
u32 supportAllowedOpsforDrvRemoval:1;
u32 supportDrvActivityLEDSetting:1;
u32 supportNVDRAM:1;
u32 supportForceFlash:1;
u32 supportDisableSESMonitoring:1;
u32 supportCacheBypassModes:1;
u32 supportSecurityonJBOD:1;
u32 discardCacheDuringLDDelete:1;
u32 supportTTYLogCompression:1;
u32 supportCPLDUpdate:1;
u32 supportDiskCacheSettingForSysPDs:1;
u32 supportExtendedSSCSize:1;
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
u32 useSeqNumJbodFP:1;
u32 reserved:7;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
#endif
} adapterOperations3;
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u8 reserved:7;
/* Indicates whether the CPLD image is part of
* the package and stored in flash
*/
u8 cpld_in_flash:1;
#else
u8 cpld_in_flash:1;
u8 reserved:7;
#endif
u8 reserved1[3];
/* Null terminated string. Has the version
* information if cpld_in_flash = FALSE
*/
u8 userCodeDefinition[12];
} cpld; /* Valid only if upgradableCPLD is TRUE */
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u16 reserved:2;
u16 support_nvme_passthru:1;
u16 support_pl_debug_info:1;
u16 support_flash_comp_info:1;
u16 support_host_info:1;
u16 support_dual_fw_update:1;
u16 support_ssc_rev3:1;
u16 fw_swaps_bbu_vpd_info:1;
u16 support_pd_map_target_id:1;
u16 support_ses_ctrl_in_multipathcfg:1;
u16 image_upload_supported:1;
u16 support_encrypted_mfc:1;
u16 supported_enc_algo:1;
u16 support_ibutton_less:1;
u16 ctrl_info_ext_supported:1;
#else
u16 ctrl_info_ext_supported:1;
u16 support_ibutton_less:1;
u16 supported_enc_algo:1;
u16 support_encrypted_mfc:1;
u16 image_upload_supported:1;
/* FW supports LUN based association and target port based */
u16 support_ses_ctrl_in_multipathcfg:1;
/* association for the SES device connected in multipath mode */
/* FW defines Jbod target Id within MR_PD_CFG_SEQ */
u16 support_pd_map_target_id:1;
/* FW swaps relevant fields in MR_BBU_VPD_INFO_FIXED to
* provide the data in little endian order
*/
u16 fw_swaps_bbu_vpd_info:1;
u16 support_ssc_rev3:1;
/* FW supports CacheCade 3.0, only one SSCD creation allowed */
u16 support_dual_fw_update:1;
/* FW supports dual firmware update feature */
u16 support_host_info:1;
/* FW supports MR_DCMD_CTRL_HOST_INFO_SET/GET */
u16 support_flash_comp_info:1;
/* FW supports MR_DCMD_CTRL_FLASH_COMP_INFO_GET */
u16 support_pl_debug_info:1;
/* FW supports retrieval of PL debug information through apps */
u16 support_nvme_passthru:1;
/* FW supports NVMe passthru commands */
u16 reserved:2;
#endif
} adapter_operations4;
u8 pad[0x800 - 0x7FE]; /* 0x7FE pad to 2K for expansion */
u32 size;
u32 pad1;
u8 reserved6[64];
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u32 reserved:19;
u32 support_pci_lane_margining: 1;
u32 support_psoc_update:1;
u32 support_force_personality_change:1;
u32 support_fde_type_mix:1;
u32 support_snap_dump:1;
u32 support_nvme_tm:1;
u32 support_oce_only:1;
u32 support_ext_mfg_vpd:1;
u32 support_pcie:1;
u32 support_cvhealth_info:1;
u32 support_profile_change:2;
u32 mr_config_ext2_supported:1;
#else
u32 mr_config_ext2_supported:1;
u32 support_profile_change:2;
u32 support_cvhealth_info:1;
u32 support_pcie:1;
u32 support_ext_mfg_vpd:1;
u32 support_oce_only:1;
u32 support_nvme_tm:1;
u32 support_snap_dump:1;
u32 support_fde_type_mix:1;
u32 support_force_personality_change:1;
u32 support_psoc_update:1;
u32 support_pci_lane_margining: 1;
u32 reserved:19;
#endif
} adapter_operations5;
u32 rsvdForAdptOp[63];
u8 reserved7[3];
u8 TaskAbortTO; /* Timeout value in seconds used by Abort Task TM */
u8 MaxResetTO; /* Max Supported Reset timeout in seconds. */
u8 reserved8[3];
} __packed;
/*
* ===============================
* MegaRAID SAS driver definitions
* ===============================
*/
#define MEGASAS_MAX_PD_CHANNELS 2
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
#define MEGASAS_MAX_LD_CHANNELS 2
#define MEGASAS_MAX_CHANNELS (MEGASAS_MAX_PD_CHANNELS + \
MEGASAS_MAX_LD_CHANNELS)
#define MEGASAS_MAX_DEV_PER_CHANNEL 128
#define MEGASAS_DEFAULT_INIT_ID -1
#define MEGASAS_MAX_LUN 8
#define MEGASAS_DEFAULT_CMD_PER_LUN 256
#define MEGASAS_MAX_PD (MEGASAS_MAX_PD_CHANNELS * \
MEGASAS_MAX_DEV_PER_CHANNEL)
#define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \
MEGASAS_MAX_DEV_PER_CHANNEL)
#define MEGASAS_MAX_SECTORS (2*1024)
#define MEGASAS_MAX_SECTORS_IEEE (2*128)
#define MEGASAS_DBG_LVL 1
#define MEGASAS_FW_BUSY 1
/* Driver's internal Logging levels*/
#define OCR_DEBUG (1 << 0)
#define TM_DEBUG (1 << 1)
#define LD_PD_DEBUG (1 << 2)
#define SCAN_PD_CHANNEL 0x1
#define SCAN_VD_CHANNEL 0x2
#define MEGASAS_KDUMP_QUEUE_DEPTH 100
#define MR_LARGE_IO_MIN_SIZE (32 * 1024)
#define MR_R1_LDIO_PIGGYBACK_DEFAULT 4
enum MR_SCSI_CMD_TYPE {
READ_WRITE_LDIO = 0,
NON_READ_WRITE_LDIO = 1,
READ_WRITE_SYSPDIO = 2,
NON_READ_WRITE_SYSPDIO = 3,
};
enum DCMD_TIMEOUT_ACTION {
INITIATE_OCR = 0,
KILL_ADAPTER = 1,
IGNORE_TIMEOUT = 2,
};
enum FW_BOOT_CONTEXT {
PROBE_CONTEXT = 0,
OCR_CONTEXT = 1,
};
/* Frame Type */
#define IO_FRAME 0
#define PTHRU_FRAME 1
/*
* When SCSI mid-layer calls driver's reset routine, driver waits for
* MEGASAS_RESET_WAIT_TIME seconds for all outstanding IO to complete. Note
* that the driver cannot _actually_ abort or reset pending commands. While
* it is waiting for the commands to complete, it prints a diagnostic message
* every MEGASAS_RESET_NOTICE_INTERVAL seconds
*/
#define MEGASAS_RESET_WAIT_TIME 180
#define MEGASAS_INTERNAL_CMD_WAIT_TIME 180
#define MEGASAS_RESET_NOTICE_INTERVAL 5
#define MEGASAS_IOCTL_CMD 0
#define MEGASAS_DEFAULT_CMD_TIMEOUT 90
#define MEGASAS_THROTTLE_QUEUE_DEPTH 16
#define MEGASAS_DEFAULT_TM_TIMEOUT 50
/*
* FW reports the maximum of number of commands that it can accept (maximum
* commands that can be outstanding) at any time. The driver must report a
* lower number to the mid layer because it can issue a few internal commands
* itself (E.g, AEN, abort cmd, IOCTLs etc). The number of commands it needs
* is shown below
*/
#define MEGASAS_INT_CMDS 32
#define MEGASAS_SKINNY_INT_CMDS 5
#define MEGASAS_FUSION_INTERNAL_CMDS 8
#define MEGASAS_FUSION_IOCTL_CMDS 3
#define MEGASAS_MFI_IOCTL_CMDS 27
#define MEGASAS_MAX_MSIX_QUEUES 128
/*
* FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit
* SGLs based on the size of dma_addr_t
*/
#define IS_DMA64 (sizeof(dma_addr_t) == 8)
#define MFI_XSCALE_OMR0_CHANGE_INTERRUPT 0x00000001
#define MFI_INTR_FLAG_REPLY_MESSAGE 0x00000001
#define MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE 0x00000002
#define MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT 0x00000004
#define MFI_OB_INTR_STATUS_MASK 0x00000002
#define MFI_POLL_TIMEOUT_SECS 60
#define MFI_IO_TIMEOUT_SECS 180
#define MEGASAS_SRIOV_HEARTBEAT_INTERVAL_VF (5 * HZ)
#define MEGASAS_OCR_SETTLE_TIME_VF (1000 * 30)
#define MEGASAS_SRIOV_MAX_RESET_TRIES_VF 1
#define MEGASAS_ROUTINE_WAIT_TIME_VF 300
#define MFI_REPLY_1078_MESSAGE_INTERRUPT 0x80000000
#define MFI_REPLY_GEN2_MESSAGE_INTERRUPT 0x00000001
#define MFI_GEN2_ENABLE_INTERRUPT_MASK (0x00000001 | 0x00000004)
#define MFI_REPLY_SKINNY_MESSAGE_INTERRUPT 0x40000000
#define MFI_SKINNY_ENABLE_INTERRUPT_MASK (0x00000001)
#define MFI_1068_PCSR_OFFSET 0x84
#define MFI_1068_FW_HANDSHAKE_OFFSET 0x64
#define MFI_1068_FW_READY 0xDDDD0000
#define MR_MAX_REPLY_QUEUES_OFFSET 0X0000001F
#define MR_MAX_REPLY_QUEUES_EXT_OFFSET 0X003FC000
#define MR_MAX_REPLY_QUEUES_EXT_OFFSET_SHIFT 14
#define MR_MAX_MSIX_REG_ARRAY 16
#define MR_RDPQ_MODE_OFFSET 0X00800000
#define MR_MAX_RAID_MAP_SIZE_OFFSET_SHIFT 16
#define MR_MAX_RAID_MAP_SIZE_MASK 0x1FF
#define MR_MIN_MAP_SIZE 0x10000
/* 64k */
scsi: megaraid_sas: Send SYNCHRONIZE_CACHE for VD to firmware Until now the megaraid_sas driver has reported successful completion on SYNCHRONIZE_CACHE commands without sending them down to the controller. The controller firmware has been responsible for taking care of flushing disk caches for all drives that belong to a Virtual Disk at the time of system reboot/shutdown. There may have been a reason to avoid sending SYNCHRONIZE_CACHE to a VD in the past but that no longer appears to be valid. Older versions of MegaRaid firmware (Gen2 and Gen2.5) set the WCE bit for Virtual Disks but the firmware does not report correct completion status for a SYNCHRONIZE_CACHE command. As a result, we must use another method to identify whether it is safe to send the command to the controller. We use the canHandleSyncCache firmware flag in the scratch pad register at offset 0xB4. New SYNCHRONIZE_CACHE behavior: IF 'JBOD' Driver sends SYNCHRONIZE_CACHE command to the firmware Firmware sends SYNCHRONIZE_CACHE to drive Firmware obtains status from drive and returns same status back to driver ELSEIF 'VirtualDisk' IF firmware supports new API bit called canHandleSyncCache Driver sends SYNCHRONIZE_CACHE command to the firmware Firmware does not send SYNCHRONIZE_CACHE to drives Firmware returns SUCCESS ELSE Driver does not send SYNCHRONIZE_CACHE command to the firmware Driver return SUCCESS for that command ENDIF ENDIF [mkp: edited patch description] Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Reviewed-by: Ewan D. Milne <emilne@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2016-10-21 21:33:33 +08:00
#define MR_CAN_HANDLE_SYNC_CACHE_OFFSET 0X01000000
#define MR_ATOMIC_DESCRIPTOR_SUPPORT_OFFSET (1 << 24)
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
#define MR_CAN_HANDLE_64_BIT_DMA_OFFSET (1 << 25)
#define MR_INTR_COALESCING_SUPPORT_OFFSET (1 << 26)
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
#define MEGASAS_WATCHDOG_THREAD_INTERVAL 1000
#define MEGASAS_WAIT_FOR_NEXT_DMA_MSECS 20
#define MEGASAS_WATCHDOG_WAIT_COUNT 50
enum MR_ADAPTER_TYPE {
MFI_SERIES = 1,
THUNDERBOLT_SERIES = 2,
INVADER_SERIES = 3,
VENTURA_SERIES = 4,
AERO_SERIES = 5,
};
/*
* register set for both 1068 and 1078 controllers
* structure extended for 1078 registers
*/
struct megasas_register_set {
u32 doorbell; /*0000h*/
u32 fusion_seq_offset; /*0004h*/
u32 fusion_host_diag; /*0008h*/
u32 reserved_01; /*000Ch*/
u32 inbound_msg_0; /*0010h*/
u32 inbound_msg_1; /*0014h*/
u32 outbound_msg_0; /*0018h*/
u32 outbound_msg_1; /*001Ch*/
u32 inbound_doorbell; /*0020h*/
u32 inbound_intr_status; /*0024h*/
u32 inbound_intr_mask; /*0028h*/
u32 outbound_doorbell; /*002Ch*/
u32 outbound_intr_status; /*0030h*/
u32 outbound_intr_mask; /*0034h*/
u32 reserved_1[2]; /*0038h*/
u32 inbound_queue_port; /*0040h*/
u32 outbound_queue_port; /*0044h*/
u32 reserved_2[9]; /*0048h*/
u32 reply_post_host_index; /*006Ch*/
u32 reserved_2_2[12]; /*0070h*/
u32 outbound_doorbell_clear; /*00A0h*/
u32 reserved_3[3]; /*00A4h*/
u32 outbound_scratch_pad_0; /*00B0h*/
u32 outbound_scratch_pad_1; /*00B4h*/
u32 outbound_scratch_pad_2; /*00B8h*/
u32 outbound_scratch_pad_3; /*00BCh*/
u32 inbound_low_queue_port ; /*00C0h*/
u32 inbound_high_queue_port ; /*00C4h*/
u32 inbound_single_queue_port; /*00C8h*/
u32 res_6[11]; /*CCh*/
u32 host_diag;
u32 seq_offset;
u32 index_registers[807]; /*00CCh*/
} __attribute__ ((packed));
struct megasas_sge32 {
__le32 phys_addr;
__le32 length;
} __attribute__ ((packed));
struct megasas_sge64 {
__le64 phys_addr;
__le32 length;
} __attribute__ ((packed));
struct megasas_sge_skinny {
__le64 phys_addr;
__le32 length;
__le32 flag;
} __packed;
union megasas_sgl {
struct megasas_sge32 sge32[1];
struct megasas_sge64 sge64[1];
struct megasas_sge_skinny sge_skinny[1];
} __attribute__ ((packed));
struct megasas_header {
u8 cmd; /*00h */
u8 sense_len; /*01h */
u8 cmd_status; /*02h */
u8 scsi_status; /*03h */
u8 target_id; /*04h */
u8 lun; /*05h */
u8 cdb_len; /*06h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 data_xferlen; /*14h */
} __attribute__ ((packed));
union megasas_sgl_frame {
struct megasas_sge32 sge32[8];
struct megasas_sge64 sge64[5];
} __attribute__ ((packed));
typedef union _MFI_CAPABILITIES {
struct {
#if defined(__BIG_ENDIAN_BITFIELD)
u32 reserved:16;
u32 support_fw_exposed_dev_list:1;
u32 support_nvme_passthru:1;
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
u32 support_64bit_mode:1;
u32 support_pd_map_target_id:1;
u32 support_qd_throttling:1;
u32 support_fp_rlbypass:1;
u32 support_vfid_in_ioframe:1;
u32 support_ext_io_size:1;
u32 support_ext_queue_depth:1;
u32 security_protocol_cmds_fw:1;
u32 support_core_affinity:1;
u32 support_ndrive_r1_lb:1;
u32 support_max_255lds:1;
u32 support_fastpath_wb:1;
u32 support_additional_msix:1;
u32 support_fp_remote_lun:1;
#else
u32 support_fp_remote_lun:1;
u32 support_additional_msix:1;
u32 support_fastpath_wb:1;
u32 support_max_255lds:1;
u32 support_ndrive_r1_lb:1;
u32 support_core_affinity:1;
u32 security_protocol_cmds_fw:1;
u32 support_ext_queue_depth:1;
u32 support_ext_io_size:1;
u32 support_vfid_in_ioframe:1;
u32 support_fp_rlbypass:1;
u32 support_qd_throttling:1;
u32 support_pd_map_target_id:1;
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
u32 support_64bit_mode:1;
u32 support_nvme_passthru:1;
u32 support_fw_exposed_dev_list:1;
u32 reserved:16;
#endif
} mfi_capabilities;
__le32 reg;
} MFI_CAPABILITIES;
struct megasas_init_frame {
u8 cmd; /*00h */
u8 reserved_0; /*01h */
u8 cmd_status; /*02h */
u8 reserved_1; /*03h */
MFI_CAPABILITIES driver_operations; /*04h*/
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 replyqueue_mask; /*12h */
__le32 data_xfer_len; /*14h */
__le32 queue_info_new_phys_addr_lo; /*18h */
__le32 queue_info_new_phys_addr_hi; /*1Ch */
__le32 queue_info_old_phys_addr_lo; /*20h */
__le32 queue_info_old_phys_addr_hi; /*24h */
__le32 reserved_4[2]; /*28h */
__le32 system_info_lo; /*30h */
__le32 system_info_hi; /*34h */
__le32 reserved_5[2]; /*38h */
} __attribute__ ((packed));
struct megasas_init_queue_info {
__le32 init_flags; /*00h */
__le32 reply_queue_entries; /*04h */
__le32 reply_queue_start_phys_addr_lo; /*08h */
__le32 reply_queue_start_phys_addr_hi; /*0Ch */
__le32 producer_index_phys_addr_lo; /*10h */
__le32 producer_index_phys_addr_hi; /*14h */
__le32 consumer_index_phys_addr_lo; /*18h */
__le32 consumer_index_phys_addr_hi; /*1Ch */
} __attribute__ ((packed));
struct megasas_io_frame {
u8 cmd; /*00h */
u8 sense_len; /*01h */
u8 cmd_status; /*02h */
u8 scsi_status; /*03h */
u8 target_id; /*04h */
u8 access_byte; /*05h */
u8 reserved_0; /*06h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 lba_count; /*14h */
__le32 sense_buf_phys_addr_lo; /*18h */
__le32 sense_buf_phys_addr_hi; /*1Ch */
__le32 start_lba_lo; /*20h */
__le32 start_lba_hi; /*24h */
union megasas_sgl sgl; /*28h */
} __attribute__ ((packed));
struct megasas_pthru_frame {
u8 cmd; /*00h */
u8 sense_len; /*01h */
u8 cmd_status; /*02h */
u8 scsi_status; /*03h */
u8 target_id; /*04h */
u8 lun; /*05h */
u8 cdb_len; /*06h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 data_xfer_len; /*14h */
__le32 sense_buf_phys_addr_lo; /*18h */
__le32 sense_buf_phys_addr_hi; /*1Ch */
u8 cdb[16]; /*20h */
union megasas_sgl sgl; /*30h */
} __attribute__ ((packed));
struct megasas_dcmd_frame {
u8 cmd; /*00h */
u8 reserved_0; /*01h */
u8 cmd_status; /*02h */
u8 reserved_1[4]; /*03h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 data_xfer_len; /*14h */
__le32 opcode; /*18h */
union { /*1Ch */
u8 b[12];
__le16 s[6];
__le32 w[3];
} mbox;
union megasas_sgl sgl; /*28h */
} __attribute__ ((packed));
struct megasas_abort_frame {
u8 cmd; /*00h */
u8 reserved_0; /*01h */
u8 cmd_status; /*02h */
u8 reserved_1; /*03h */
__le32 reserved_2; /*04h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 reserved_3; /*12h */
__le32 reserved_4; /*14h */
__le32 abort_context; /*18h */
__le32 pad_1; /*1Ch */
__le32 abort_mfi_phys_addr_lo; /*20h */
__le32 abort_mfi_phys_addr_hi; /*24h */
__le32 reserved_5[6]; /*28h */
} __attribute__ ((packed));
struct megasas_smp_frame {
u8 cmd; /*00h */
u8 reserved_1; /*01h */
u8 cmd_status; /*02h */
u8 connection_status; /*03h */
u8 reserved_2[3]; /*04h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 data_xfer_len; /*14h */
__le64 sas_addr; /*18h */
union {
struct megasas_sge32 sge32[2]; /* [0]: resp [1]: req */
struct megasas_sge64 sge64[2]; /* [0]: resp [1]: req */
} sgl;
} __attribute__ ((packed));
struct megasas_stp_frame {
u8 cmd; /*00h */
u8 reserved_1; /*01h */
u8 cmd_status; /*02h */
u8 reserved_2; /*03h */
u8 target_id; /*04h */
u8 reserved_3[2]; /*05h */
u8 sge_count; /*07h */
__le32 context; /*08h */
__le32 pad_0; /*0Ch */
__le16 flags; /*10h */
__le16 timeout; /*12h */
__le32 data_xfer_len; /*14h */
__le16 fis[10]; /*18h */
__le32 stp_flags;
union {
struct megasas_sge32 sge32[2]; /* [0]: resp [1]: data */
struct megasas_sge64 sge64[2]; /* [0]: resp [1]: data */
} sgl;
} __attribute__ ((packed));
union megasas_frame {
struct megasas_header hdr;
struct megasas_init_frame init;
struct megasas_io_frame io;
struct megasas_pthru_frame pthru;
struct megasas_dcmd_frame dcmd;
struct megasas_abort_frame abort;
struct megasas_smp_frame smp;
struct megasas_stp_frame stp;
u8 raw_bytes[64];
};
/**
* struct MR_PRIV_DEVICE - sdev private hostdata
* @is_tm_capable: firmware managed tm_capable flag
* @tm_busy: TM request is in progress
*/
struct MR_PRIV_DEVICE {
bool is_tm_capable;
bool tm_busy;
atomic_t r1_ldio_hint;
u8 interface_type;
u8 task_abort_tmo;
u8 target_reset_tmo;
};
struct megasas_cmd;
union megasas_evt_class_locale {
struct {
#ifndef __BIG_ENDIAN_BITFIELD
u16 locale;
u8 reserved;
s8 class;
#else
s8 class;
u8 reserved;
u16 locale;
#endif
} __attribute__ ((packed)) members;
u32 word;
} __attribute__ ((packed));
struct megasas_evt_log_info {
__le32 newest_seq_num;
__le32 oldest_seq_num;
__le32 clear_seq_num;
__le32 shutdown_seq_num;
__le32 boot_seq_num;
} __attribute__ ((packed));
struct megasas_progress {
__le16 progress;
__le16 elapsed_seconds;
} __attribute__ ((packed));
struct megasas_evtarg_ld {
u16 target_id;
u8 ld_index;
u8 reserved;
} __attribute__ ((packed));
struct megasas_evtarg_pd {
u16 device_id;
u8 encl_index;
u8 slot_number;
} __attribute__ ((packed));
struct megasas_evt_detail {
__le32 seq_num;
__le32 time_stamp;
__le32 code;
union megasas_evt_class_locale cl;
u8 arg_type;
u8 reserved1[15];
union {
struct {
struct megasas_evtarg_pd pd;
u8 cdb_length;
u8 sense_length;
u8 reserved[2];
u8 cdb[16];
u8 sense[64];
} __attribute__ ((packed)) cdbSense;
struct megasas_evtarg_ld ld;
struct {
struct megasas_evtarg_ld ld;
__le64 count;
} __attribute__ ((packed)) ld_count;
struct {
__le64 lba;
struct megasas_evtarg_ld ld;
} __attribute__ ((packed)) ld_lba;
struct {
struct megasas_evtarg_ld ld;
__le32 prevOwner;
__le32 newOwner;
} __attribute__ ((packed)) ld_owner;
struct {
u64 ld_lba;
u64 pd_lba;
struct megasas_evtarg_ld ld;
struct megasas_evtarg_pd pd;
} __attribute__ ((packed)) ld_lba_pd_lba;
struct {
struct megasas_evtarg_ld ld;
struct megasas_progress prog;
} __attribute__ ((packed)) ld_prog;
struct {
struct megasas_evtarg_ld ld;
u32 prev_state;
u32 new_state;
} __attribute__ ((packed)) ld_state;
struct {
u64 strip;
struct megasas_evtarg_ld ld;
} __attribute__ ((packed)) ld_strip;
struct megasas_evtarg_pd pd;
struct {
struct megasas_evtarg_pd pd;
u32 err;
} __attribute__ ((packed)) pd_err;
struct {
u64 lba;
struct megasas_evtarg_pd pd;
} __attribute__ ((packed)) pd_lba;
struct {
u64 lba;
struct megasas_evtarg_pd pd;
struct megasas_evtarg_ld ld;
} __attribute__ ((packed)) pd_lba_ld;
struct {
struct megasas_evtarg_pd pd;
struct megasas_progress prog;
} __attribute__ ((packed)) pd_prog;
struct {
struct megasas_evtarg_pd pd;
u32 prevState;
u32 newState;
} __attribute__ ((packed)) pd_state;
struct {
u16 vendorId;
__le16 deviceId;
u16 subVendorId;
u16 subDeviceId;
} __attribute__ ((packed)) pci;
u32 rate;
char str[96];
struct {
u32 rtc;
u32 elapsedSeconds;
} __attribute__ ((packed)) time;
struct {
u32 ecar;
u32 elog;
char str[64];
} __attribute__ ((packed)) ecc;
u8 b[96];
__le16 s[48];
__le32 w[24];
__le64 d[12];
} args;
char description[128];
} __attribute__ ((packed));
struct megasas_aen_event {
struct delayed_work hotplug_work;
struct megasas_instance *instance;
};
struct megasas_irq_context {
char name[MEGASAS_MSIX_NAME_LEN];
struct megasas_instance *instance;
u32 MSIxIndex;
scsi: megaraid_sas: IRQ poll to avoid CPU hard lockups Issue Description: We have seen cpu lock up issues from field if system has a large (more than 96) logical cpu count. SAS3.0 controller (Invader series) supports max 96 MSI-X vector and SAS3.5 product (Ventura) supports max 128 MSI-X vectors. This may be a generic issue (if PCI device support completion on multiple reply queues). Let me explain it w.r.t megaraid_sas supported h/w just to simplify the problem and possible changes to handle such issues. MegaRAID controller supports multiple reply queues in completion path. Driver creates MSI-X vectors for controller as "minimum of (FW supported Reply queues, Logical CPUs)". If submitter is not interrupted via completion on same CPU, there is a loop in the IO path. This behavior can cause hard/soft CPU lockups, IO timeout, system sluggish etc. Example - one CPU (e.g. CPU A) is busy submitting the IOs and another CPU (e.g. CPU B) is busy with processing the corresponding IO's reply descriptors from reply descriptor queue upon receiving the interrupts from HBA. If CPU A is continuously pumping the IOs then always CPU B (which is executing the ISR) will see the valid reply descriptors in the reply descriptor queue and it will be continuously processing those reply descriptor in a loop without quitting the ISR handler. megaraid_sas driver will exit ISR handler if it finds unused reply descriptor in the reply descriptor queue. Since CPU A will be continuously sending the IOs, CPU B may always see a valid reply descriptor (posted by HBA Firmware after processing the IO) in the reply descriptor queue. In worst case, driver will not quit from this loop in the ISR handler. Eventually, CPU lockup will be detected by watchdog. Above mentioned behavior is not common if "rq_affinity" set to 2 or affinity_hint is honored by irqbalancer as "exact". If rq_affinity is set to 2, submitter will be always interrupted via completion on same CPU. If irqbalancer is using "exact" policy, interrupt will be delivered to submitter CPU. Problem statement: If CPU count to MSI-X vectors (reply descriptor Queues) count ratio is not 1:1, we still have exposure of issue explained above and for that we don't have any solution. Exposure of soft/hard lockup is seen if CPU count is more than MSI-X supported by device. If CPUs count to MSI-X vectors count ratio is not 1:1, (Other way, if CPU counts to MSI-X vector count ratio is something like X:1, where X > 1) then 'exact' irqbalance policy OR rq_affinity = 2 won't help to avoid CPU hard/soft lockups. There won't be any one to one mapping between CPU to MSI-X vector instead one MSI-X interrupt (or reply descriptor queue) is shared with group/set of CPUs and there is a possibility of having a loop in the IO path within that CPU group and may observe lockups. For example: Consider a system having two NUMA nodes and each node having four logical CPUs and also consider that number of MSI-X vectors enabled on the HBA is two, then CPUs count to MSI-X vector count ratio as 4:1. e.g. MSI-X vector 0 is affinity to CPU 0, CPU 1, CPU 2 & CPU 3 of NUMA node 0 and MSI-X vector 1 is affinity to CPU 4, CPU 5, CPU 6 & CPU 7 of NUMA node 1. numactl --hardware available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 --> MSI-X 0 node 0 size: 65536 MB node 0 free: 63176 MB node 1 cpus: 4 5 6 7 --> MSI-X 1 node 1 size: 65536 MB node 1 free: 63176 MB Assume that user started an application which uses all the CPUs of NUMA node 0 for issuing the IOs. Only one CPU from affinity list (it can be any cpu since this behavior depends upon irqbalance) CPU0 will receive the interrupts from MSI-X 0 for all the IOs. Eventually, CPU 0 IO submission percentage will be decreasing and ISR processing percentage will be increasing as it is more busy with processing the interrupts. Gradually IO submission percentage on CPU 0 will be zero and it's ISR processing percentage will be 100% as IO loop has already formed within the NUMA node 0, i.e. CPU 1, CPU 2 & CPU 3 will be continuously busy with submitting the heavy IOs and only CPU 0 is busy in the ISR path as it always find the valid reply descriptor in the reply descriptor queue. Eventually, we will observe the hard lockup here. Chances of occurring of hard/soft lockups are directly proportional to value of X. If value of X is high, then chances of observing CPU lockups is high. Solution: Use IRQ poll interface defined in "irq_poll.c". megaraid_sas driver will execute ISR routine in softirq context and it will always quit the loop based on budget provided in IRQ poll interface. Driver will switch to IRQ poll only when more than a threshold number of reply descriptors are handled in one ISR. Currently threshold is set as 1/4th of HBA queue depth. In these scenarios (i.e. where CPUs count to MSI-X vectors count ratio is X:1 (where X > 1)), IRQ poll interface will avoid CPU hard lockups due to voluntary exit from the reply queue processing based on budget. Note - Only one MSI-X vector is busy doing processing. Select CONFIG_IRQ_POLL from driver Kconfig for driver compilation. Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-05-08 01:05:35 +08:00
u32 os_irq;
struct irq_poll irqpoll;
bool irq_poll_scheduled;
bool irq_line_enable;
};
struct MR_DRV_SYSTEM_INFO {
u8 infoVersion;
u8 systemIdLength;
u16 reserved0;
u8 systemId[64];
u8 reserved[1980];
};
enum MR_PD_TYPE {
UNKNOWN_DRIVE = 0,
PARALLEL_SCSI = 1,
SAS_PD = 2,
SATA_PD = 3,
FC_PD = 4,
NVME_PD = 5,
};
/* JBOD Queue depth definitions */
#define MEGASAS_SATA_QD 32
#define MEGASAS_SAS_QD 256
#define MEGASAS_DEFAULT_PD_QD 64
#define MEGASAS_NVME_QD 64
#define MR_DEFAULT_NVME_PAGE_SIZE 4096
#define MR_DEFAULT_NVME_PAGE_SHIFT 12
#define MR_DEFAULT_NVME_MDTS_KB 128
#define MR_NVME_PAGE_SIZE_MASK 0x000000FF
/*Aero performance parameters*/
#define MR_HIGH_IOPS_QUEUE_COUNT 8
#define MR_DEVICE_HIGH_IOPS_DEPTH 8
#define MR_HIGH_IOPS_BATCH_COUNT 16
scsi: megaraid_sas: Introduce various Aero performance modes For Aero adapters, driver provides three different performance modes controlled through module parameter named 'perf_mode'. Below are those performance modes: 0: Balanced - Additional high IOPS reply queues will be enabled along with low latency queues. Interrupt coalescing will be enabled only for these high IOPS reply queues. 1: IOPS - No additional high IOPS queues are enabled. Interrupt coalescing will be enabled on all reply queues. 2: Latency - No additional high IOPS queues are enabled. Interrupt coalescing will be disabled on all reply queues. This is a legacy behavior similar to Ventura & Invader Series. Default performance mode settings: - Performance mode set to 'Balanced', if Aero controller is working in 16GT/s PCIe speed. - Performance mode will be set to 'Latency' mode for all other cases. Through module parameter 'perf_mode', user can override default performance mode to desired one. Captured some performance numbers with these performance modes. 4k Random Read IO performance numbers on 24 SAS SSD drives for above three performance modes. Performance data is from Intel Skylake and HGST SS300 (drive model SDLL1DLR400GCCA1). IOPS: ----------------------------------------------------------------------- |perf_mode | qd = 1 | qd = 64 | note | |-------------|--------|---------|------------------------------------- |balanced | 259K | 3061k | Provides max performance numbers | | | | | both on lower QD workload & | | | | | also on higher QD workload | |-------------|--------|---------|------------------------------------- |iops | 220K | 3100k | Provides max performance numbers | | | | | only on higher QD workload. | |-------------|--------|---------|------------------------------------- |latency | 246k | 2226k | Provides good performance numbers | | | | | only on lower QD worklaod. | ----------------------------------------------------------------------- Average Latency: ----------------------------------------------------- |perf_mode | qd = 1 | qd = 64 | |-------------|--------------|----------------------| |balanced | 92.05 usec | 501.12 usec | |-------------|--------------|----------------------| |iops | 108.40 usec | 498.10 usec | |-------------|--------------|----------------------| |latency | 97.10 usec | 689.26 usec | ----------------------------------------------------- Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com> Signed-off-by: Chandrakanth Patil <chandrakanth.patil@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-06-25 19:04:35 +08:00
enum MR_PERF_MODE {
MR_BALANCED_PERF_MODE = 0,
MR_IOPS_PERF_MODE = 1,
MR_LATENCY_PERF_MODE = 2,
};
#define MEGASAS_PERF_MODE_2STR(mode) \
((mode) == MR_BALANCED_PERF_MODE ? "Balanced" : \
(mode) == MR_IOPS_PERF_MODE ? "IOPS" : \
(mode) == MR_LATENCY_PERF_MODE ? "Latency" : \
"Unknown")
struct megasas_instance {
unsigned int *reply_map;
__le32 *producer;
dma_addr_t producer_h;
__le32 *consumer;
dma_addr_t consumer_h;
struct MR_DRV_SYSTEM_INFO *system_info_buf;
dma_addr_t system_info_h;
struct MR_LD_VF_AFFILIATION *vf_affiliation;
dma_addr_t vf_affiliation_h;
struct MR_LD_VF_AFFILIATION_111 *vf_affiliation_111;
dma_addr_t vf_affiliation_111_h;
struct MR_CTRL_HB_HOST_MEM *hb_host_mem;
dma_addr_t hb_host_mem_h;
struct MR_PD_INFO *pd_info;
dma_addr_t pd_info_h;
struct MR_TARGET_PROPERTIES *tgt_prop;
dma_addr_t tgt_prop_h;
__le32 *reply_queue;
dma_addr_t reply_queue_h;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
u32 *crash_dump_buf;
dma_addr_t crash_dump_h;
struct MR_PD_LIST *pd_list_buf;
dma_addr_t pd_list_buf_h;
struct megasas_ctrl_info *ctrl_info_buf;
dma_addr_t ctrl_info_buf_h;
struct MR_LD_LIST *ld_list_buf;
dma_addr_t ld_list_buf_h;
struct MR_LD_TARGETID_LIST *ld_targetid_list_buf;
dma_addr_t ld_targetid_list_buf_h;
struct MR_HOST_DEVICE_LIST *host_device_list_buf;
dma_addr_t host_device_list_buf_h;
struct MR_SNAPDUMP_PROPERTIES *snapdump_prop;
dma_addr_t snapdump_prop_h;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
void *crash_buf[MAX_CRASH_DUMP_SIZE];
unsigned int fw_crash_buffer_size;
unsigned int fw_crash_state;
unsigned int fw_crash_buffer_offset;
u32 drv_buf_index;
u32 drv_buf_alloc;
u32 crash_dump_fw_support;
u32 crash_dump_drv_support;
u32 crash_dump_app_support;
u32 secure_jbod_support;
u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
bool use_seqnum_jbod_fp; /* Added for PD sequence */
bool smp_affinity_enable;
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
spinlock_t crashdump_lock;
struct megasas_register_set __iomem *reg_set;
u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY];
struct megasas_pd_list pd_list[MEGASAS_MAX_PD];
struct megasas_pd_list local_pd_list[MEGASAS_MAX_PD];
u8 ld_ids[MEGASAS_MAX_LD_IDS];
s8 init_id;
u16 max_num_sge;
u16 max_fw_cmds;
u16 max_mpt_cmds;
u16 max_mfi_cmds;
u16 max_scsi_cmds;
u16 ldio_threshold;
u16 cur_can_queue;
u32 max_sectors_per_req;
bool msix_load_balance;
struct megasas_aen_event *ev;
struct megasas_cmd **cmd_list;
struct list_head cmd_pool;
/* used to sync fire the cmd to fw */
spinlock_t mfi_pool_lock;
/* used to sync fire the cmd to fw */
spinlock_t hba_lock;
/* used to synch producer, consumer ptrs in dpc */
spinlock_t stream_lock;
spinlock_t completion_lock;
struct dma_pool *frame_dma_pool;
struct dma_pool *sense_dma_pool;
struct megasas_evt_detail *evt_detail;
dma_addr_t evt_detail_h;
struct megasas_cmd *aen_cmd;
struct semaphore ioctl_sem;
struct Scsi_Host *host;
wait_queue_head_t int_cmd_wait_q;
wait_queue_head_t abort_cmd_wait_q;
struct pci_dev *pdev;
u32 unique_id;
u32 fw_support_ieee;
scsi: megaraid_sas: IRQ poll to avoid CPU hard lockups Issue Description: We have seen cpu lock up issues from field if system has a large (more than 96) logical cpu count. SAS3.0 controller (Invader series) supports max 96 MSI-X vector and SAS3.5 product (Ventura) supports max 128 MSI-X vectors. This may be a generic issue (if PCI device support completion on multiple reply queues). Let me explain it w.r.t megaraid_sas supported h/w just to simplify the problem and possible changes to handle such issues. MegaRAID controller supports multiple reply queues in completion path. Driver creates MSI-X vectors for controller as "minimum of (FW supported Reply queues, Logical CPUs)". If submitter is not interrupted via completion on same CPU, there is a loop in the IO path. This behavior can cause hard/soft CPU lockups, IO timeout, system sluggish etc. Example - one CPU (e.g. CPU A) is busy submitting the IOs and another CPU (e.g. CPU B) is busy with processing the corresponding IO's reply descriptors from reply descriptor queue upon receiving the interrupts from HBA. If CPU A is continuously pumping the IOs then always CPU B (which is executing the ISR) will see the valid reply descriptors in the reply descriptor queue and it will be continuously processing those reply descriptor in a loop without quitting the ISR handler. megaraid_sas driver will exit ISR handler if it finds unused reply descriptor in the reply descriptor queue. Since CPU A will be continuously sending the IOs, CPU B may always see a valid reply descriptor (posted by HBA Firmware after processing the IO) in the reply descriptor queue. In worst case, driver will not quit from this loop in the ISR handler. Eventually, CPU lockup will be detected by watchdog. Above mentioned behavior is not common if "rq_affinity" set to 2 or affinity_hint is honored by irqbalancer as "exact". If rq_affinity is set to 2, submitter will be always interrupted via completion on same CPU. If irqbalancer is using "exact" policy, interrupt will be delivered to submitter CPU. Problem statement: If CPU count to MSI-X vectors (reply descriptor Queues) count ratio is not 1:1, we still have exposure of issue explained above and for that we don't have any solution. Exposure of soft/hard lockup is seen if CPU count is more than MSI-X supported by device. If CPUs count to MSI-X vectors count ratio is not 1:1, (Other way, if CPU counts to MSI-X vector count ratio is something like X:1, where X > 1) then 'exact' irqbalance policy OR rq_affinity = 2 won't help to avoid CPU hard/soft lockups. There won't be any one to one mapping between CPU to MSI-X vector instead one MSI-X interrupt (or reply descriptor queue) is shared with group/set of CPUs and there is a possibility of having a loop in the IO path within that CPU group and may observe lockups. For example: Consider a system having two NUMA nodes and each node having four logical CPUs and also consider that number of MSI-X vectors enabled on the HBA is two, then CPUs count to MSI-X vector count ratio as 4:1. e.g. MSI-X vector 0 is affinity to CPU 0, CPU 1, CPU 2 & CPU 3 of NUMA node 0 and MSI-X vector 1 is affinity to CPU 4, CPU 5, CPU 6 & CPU 7 of NUMA node 1. numactl --hardware available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 --> MSI-X 0 node 0 size: 65536 MB node 0 free: 63176 MB node 1 cpus: 4 5 6 7 --> MSI-X 1 node 1 size: 65536 MB node 1 free: 63176 MB Assume that user started an application which uses all the CPUs of NUMA node 0 for issuing the IOs. Only one CPU from affinity list (it can be any cpu since this behavior depends upon irqbalance) CPU0 will receive the interrupts from MSI-X 0 for all the IOs. Eventually, CPU 0 IO submission percentage will be decreasing and ISR processing percentage will be increasing as it is more busy with processing the interrupts. Gradually IO submission percentage on CPU 0 will be zero and it's ISR processing percentage will be 100% as IO loop has already formed within the NUMA node 0, i.e. CPU 1, CPU 2 & CPU 3 will be continuously busy with submitting the heavy IOs and only CPU 0 is busy in the ISR path as it always find the valid reply descriptor in the reply descriptor queue. Eventually, we will observe the hard lockup here. Chances of occurring of hard/soft lockups are directly proportional to value of X. If value of X is high, then chances of observing CPU lockups is high. Solution: Use IRQ poll interface defined in "irq_poll.c". megaraid_sas driver will execute ISR routine in softirq context and it will always quit the loop based on budget provided in IRQ poll interface. Driver will switch to IRQ poll only when more than a threshold number of reply descriptors are handled in one ISR. Currently threshold is set as 1/4th of HBA queue depth. In these scenarios (i.e. where CPUs count to MSI-X vectors count ratio is X:1 (where X > 1)), IRQ poll interface will avoid CPU hard lockups due to voluntary exit from the reply queue processing based on budget. Note - Only one MSI-X vector is busy doing processing. Select CONFIG_IRQ_POLL from driver Kconfig for driver compilation. Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-05-08 01:05:35 +08:00
u32 threshold_reply_count;
atomic_t fw_outstanding;
atomic_t ldio_outstanding;
atomic_t fw_reset_no_pci_access;
atomic64_t total_io_count;
atomic64_t high_iops_outstanding;
struct megasas_instance_template *instancet;
struct tasklet_struct isr_tasklet;
struct work_struct work_init;
struct delayed_work fw_fault_work;
struct workqueue_struct *fw_fault_work_q;
char fault_handler_work_q_name[48];
u8 flag;
u8 unload;
u8 flag_ieee;
u8 issuepend_done;
u8 disableOnlineCtrlReset;
[SCSI] megaraid_sas: Add support for Uneven Span PRL11 MegaRAID older Firmware does not support uneven span configuration for PRL11. E.g User wants to create 34 Driver PRL11 config, it was not possible using old firmware, since it was not supported configuration in old firmware Old Firmware expect even number of Drives in each span and same number of physical drives at each span. Considering above design, 17 Drives at Span-0 and 17 drives at span-1 was not possible. Now, using this new feature Firmware and Driver both required changes. New Firmware can allow user to create 16 Drives at span-0 and 18 Drives at span-1. This will allow user to create 34 Drives Uneven span PRL11. RAID map is interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. Since legacy RAID map consider Even Span design, there was no place to keep Uneven span information in existing Raid map. Because of this limitation, for Uneven span VD, driver can not use RAID map. This patch address the changes required in Driver to support Uneven span PRL11 support. 1. Driver will find if Firmware has UnevenSpanSupport or not by reading Controller Info. 2. If Firmware has UnvenSpan PRL11 support, then Driver will inform about its capability of handling UnevenSpan PRL11 to the firmware. 3. Driver will update its copy of span info on each time Raid map update is called. 4. Follow different IO path if it is Uneven Span. (For Uneven Span, Driver uses Span Set info to find relavent fields for that particular Virtual Disk) More verbose prints will be available by setting "SPAN_DEBUG" to 1 at compilation time. Signed-off-by: Sumit Saxena <sumit.saxena@lsi.com> Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2013-05-22 15:05:04 +08:00
u8 UnevenSpanSupport;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u8 supportmax256vd;
u8 pd_list_not_supported;
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u16 fw_supported_vd_count;
u16 fw_supported_pd_count;
u16 drv_supported_vd_count;
u16 drv_supported_pd_count;
atomic_t adprecovery;
unsigned long last_time;
u32 mfiStatus;
u32 last_seq_num;
struct list_head internal_reset_pending_q;
/* Ptr to hba specific information */
void *ctrl_context;
unsigned int msix_vectors;
struct megasas_irq_context irq_context[MEGASAS_MAX_MSIX_QUEUES];
u64 map_id;
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
u64 pd_seq_map_id;
struct megasas_cmd *map_update_cmd;
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
struct megasas_cmd *jbod_seq_cmd;
unsigned long bar;
long reset_flags;
struct mutex reset_mutex;
struct timer_list sriov_heartbeat_timer;
char skip_heartbeat_timer_del;
u8 requestorId;
char PlasmaFW111;
char clusterId[MEGASAS_CLUSTER_ID_SIZE];
u8 peerIsPresent;
u8 passive;
u16 throttlequeuedepth;
u8 mask_interrupts;
u16 max_chain_frame_sz;
u8 is_imr;
u8 is_rdpq;
bool dev_handle;
scsi: megaraid_sas: Send SYNCHRONIZE_CACHE for VD to firmware Until now the megaraid_sas driver has reported successful completion on SYNCHRONIZE_CACHE commands without sending them down to the controller. The controller firmware has been responsible for taking care of flushing disk caches for all drives that belong to a Virtual Disk at the time of system reboot/shutdown. There may have been a reason to avoid sending SYNCHRONIZE_CACHE to a VD in the past but that no longer appears to be valid. Older versions of MegaRaid firmware (Gen2 and Gen2.5) set the WCE bit for Virtual Disks but the firmware does not report correct completion status for a SYNCHRONIZE_CACHE command. As a result, we must use another method to identify whether it is safe to send the command to the controller. We use the canHandleSyncCache firmware flag in the scratch pad register at offset 0xB4. New SYNCHRONIZE_CACHE behavior: IF 'JBOD' Driver sends SYNCHRONIZE_CACHE command to the firmware Firmware sends SYNCHRONIZE_CACHE to drive Firmware obtains status from drive and returns same status back to driver ELSEIF 'VirtualDisk' IF firmware supports new API bit called canHandleSyncCache Driver sends SYNCHRONIZE_CACHE command to the firmware Firmware does not send SYNCHRONIZE_CACHE to drives Firmware returns SUCCESS ELSE Driver does not send SYNCHRONIZE_CACHE command to the firmware Driver return SUCCESS for that command ENDIF ENDIF [mkp: edited patch description] Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Reviewed-by: Ewan D. Milne <emilne@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2016-10-21 21:33:33 +08:00
bool fw_sync_cache_support;
u32 mfi_frame_size;
bool msix_combined;
u16 max_raid_mapsize;
/* preffered count to send as LDIO irrspective of FP capable.*/
u8 r1_ldio_hint_default;
u32 nvme_page_size;
u8 adapter_type;
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
bool consistent_mask_64bit;
bool support_nvme_passthru;
bool enable_sdev_max_qd;
u8 task_abort_tmo;
u8 max_reset_tmo;
u8 snapdump_wait_time;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_root;
struct dentry *raidmap_dump;
#endif
u8 enable_fw_dev_list;
bool atomic_desc_support;
bool support_seqnum_jbod_fp;
bool support_pci_lane_margining;
u8 low_latency_index_start;
scsi: megaraid_sas: Introduce various Aero performance modes For Aero adapters, driver provides three different performance modes controlled through module parameter named 'perf_mode'. Below are those performance modes: 0: Balanced - Additional high IOPS reply queues will be enabled along with low latency queues. Interrupt coalescing will be enabled only for these high IOPS reply queues. 1: IOPS - No additional high IOPS queues are enabled. Interrupt coalescing will be enabled on all reply queues. 2: Latency - No additional high IOPS queues are enabled. Interrupt coalescing will be disabled on all reply queues. This is a legacy behavior similar to Ventura & Invader Series. Default performance mode settings: - Performance mode set to 'Balanced', if Aero controller is working in 16GT/s PCIe speed. - Performance mode will be set to 'Latency' mode for all other cases. Through module parameter 'perf_mode', user can override default performance mode to desired one. Captured some performance numbers with these performance modes. 4k Random Read IO performance numbers on 24 SAS SSD drives for above three performance modes. Performance data is from Intel Skylake and HGST SS300 (drive model SDLL1DLR400GCCA1). IOPS: ----------------------------------------------------------------------- |perf_mode | qd = 1 | qd = 64 | note | |-------------|--------|---------|------------------------------------- |balanced | 259K | 3061k | Provides max performance numbers | | | | | both on lower QD workload & | | | | | also on higher QD workload | |-------------|--------|---------|------------------------------------- |iops | 220K | 3100k | Provides max performance numbers | | | | | only on higher QD workload. | |-------------|--------|---------|------------------------------------- |latency | 246k | 2226k | Provides good performance numbers | | | | | only on lower QD worklaod. | ----------------------------------------------------------------------- Average Latency: ----------------------------------------------------- |perf_mode | qd = 1 | qd = 64 | |-------------|--------------|----------------------| |balanced | 92.05 usec | 501.12 usec | |-------------|--------------|----------------------| |iops | 108.40 usec | 498.10 usec | |-------------|--------------|----------------------| |latency | 97.10 usec | 689.26 usec | ----------------------------------------------------- Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com> Signed-off-by: Chandrakanth Patil <chandrakanth.patil@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-06-25 19:04:35 +08:00
int perf_mode;
};
struct MR_LD_VF_MAP {
u32 size;
union MR_LD_REF ref;
u8 ldVfCount;
u8 reserved[6];
u8 policy[1];
};
struct MR_LD_VF_AFFILIATION {
u32 size;
u8 ldCount;
u8 vfCount;
u8 thisVf;
u8 reserved[9];
struct MR_LD_VF_MAP map[1];
};
/* Plasma 1.11 FW backward compatibility structures */
#define IOV_111_OFFSET 0x7CE
#define MAX_VIRTUAL_FUNCTIONS 8
#define MR_LD_ACCESS_HIDDEN 15
struct IOV_111 {
u8 maxVFsSupported;
u8 numVFsEnabled;
u8 requestorId;
u8 reserved[5];
};
struct MR_LD_VF_MAP_111 {
u8 targetId;
u8 reserved[3];
u8 policy[MAX_VIRTUAL_FUNCTIONS];
};
struct MR_LD_VF_AFFILIATION_111 {
u8 vdCount;
u8 vfCount;
u8 thisVf;
u8 reserved[5];
struct MR_LD_VF_MAP_111 map[MAX_LOGICAL_DRIVES];
};
struct MR_CTRL_HB_HOST_MEM {
struct {
u32 fwCounter; /* Firmware heart beat counter */
struct {
u32 debugmode:1; /* 1=Firmware is in debug mode.
Heart beat will not be updated. */
u32 reserved:31;
} debug;
u32 reserved_fw[6];
u32 driverCounter; /* Driver heart beat counter. 0x20 */
u32 reserved_driver[7];
} HB;
u8 pad[0x400-0x40];
};
enum {
MEGASAS_HBA_OPERATIONAL = 0,
MEGASAS_ADPRESET_SM_INFAULT = 1,
MEGASAS_ADPRESET_SM_FW_RESET_SUCCESS = 2,
MEGASAS_ADPRESET_SM_OPERATIONAL = 3,
MEGASAS_HW_CRITICAL_ERROR = 4,
MEGASAS_ADPRESET_SM_POLLING = 5,
MEGASAS_ADPRESET_INPROG_SIGN = 0xDEADDEAD,
};
struct megasas_instance_template {
void (*fire_cmd)(struct megasas_instance *, dma_addr_t, \
u32, struct megasas_register_set __iomem *);
void (*enable_intr)(struct megasas_instance *);
void (*disable_intr)(struct megasas_instance *);
int (*clear_intr)(struct megasas_instance *);
u32 (*read_fw_status_reg)(struct megasas_instance *);
int (*adp_reset)(struct megasas_instance *, \
struct megasas_register_set __iomem *);
int (*check_reset)(struct megasas_instance *, \
struct megasas_register_set __iomem *);
irqreturn_t (*service_isr)(int irq, void *devp);
void (*tasklet)(unsigned long);
u32 (*init_adapter)(struct megasas_instance *);
u32 (*build_and_issue_cmd) (struct megasas_instance *,
struct scsi_cmnd *);
void (*issue_dcmd)(struct megasas_instance *instance,
struct megasas_cmd *cmd);
};
#define MEGASAS_IS_LOGICAL(sdev) \
((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1)
#define MEGASAS_DEV_INDEX(scp) \
(((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \
scp->device->id)
#define MEGASAS_PD_INDEX(scp) \
((scp->device->channel * MEGASAS_MAX_DEV_PER_CHANNEL) + \
scp->device->id)
struct megasas_cmd {
union megasas_frame *frame;
dma_addr_t frame_phys_addr;
u8 *sense;
dma_addr_t sense_phys_addr;
u32 index;
u8 sync_cmd;
megaraid_sas : Modify return value of megasas_issue_blocked_cmd() and wait_and_poll() to consider command status returned by firmware This patch is rebased on top of recently sent 18 patches(submitted by me) for megaraid_sas driver. Change the return value of wait_and_poll() and megsas_issue_blocked_cmd() based on MFI_STAT returned by firmware for that command. Earlier driver always send return type based on command completion (but never check MFI_STAT_OK for that command), so even if command is failed by firmware still driver will return SUCCESS status from these functions wait_and_poll() and megsas_issue_blocked_cmd() and if caller of these functions does not check command status (MFI_STAT), then it may endup using invalid data returned in DMA buffers(one of the example is megasas_ld_list_query DCMD). Best thing to avoid this type of issue is do error handling and set proper return type from caller function wait_and_poll() and megsas_issue_blocked_cmd(). The change proposed in this patch will fix the regression introduced in patch- "90dc9d9 megaraid_sas : MFI MPT linked list corruption fix" inside function megasas_ld_list_query(). Prior to this MFI MPT linked list corruption fix patch, megasas_ld_list_query() function used to check DCMD status(returned by firmware) but with this linked list corruption fix patch, DCMD status will not be checked inside function megasas_ld_list_query() and introduced this issue of wrong data being used by function megasas_ld_list_query(). Cc: <stable@vger.kernel.org> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-05-06 21:31:02 +08:00
u8 cmd_status_drv;
u8 abort_aen;
u8 retry_for_fw_reset;
struct list_head list;
struct scsi_cmnd *scmd;
megaraid_sas : Use Block layer tag support for internal command indexing megaraid_sas driver will use block layer provided tag for indexing internal MPT frames to get any unique MPT frame tied with tag. Each IO request submitted from SCSI mid layer will get associated MPT frame from MPT framepool (retrieved and return back using spinlock inside megaraid_sas driver's submission/completion call back). Getting MPT frame from MPT Frame pool is very expensive operation because of associated spin lock operation (spinlock overhead increase on multi NUMA node). This type of locking in driver is very expensive call considering each IO request need - Acquire and Release of the same lock. With this support, in IO path driver will directly provide the unique command index(which is based on block layer tag) and will get the MPT frame tied to the tag and this way driver can get rid off lock, which synchronizes the access to MPT frame pool while fetching and returning MPT frame from the pool. This support in driver provides siginificant performance improvement(on multi NUMA node system)on latest upstream with SCSI.MQ as well as on existing linux distributions. Here is the data for test executed at Avago- - IO Tool- FIO - 4 Socket SMC server. (4 NUMA node server) - 12 SSDs in JBOD mode . - 4K Rand READ, QD=32 - SCSI MQ x86_64 (Latest Upstream kernel) - upto 300% Performance Improvement. If IOs are running on single Node, perfromance gain is less, but as soon as increase number of nodes, performance improvement is significant. IOs running on all 4 NUMA nodes, with this patch applied IOPs observed was 1170K vs 344K IOPs seen without this patch. Logically, there are two parts of this patch- 1) Block layer tag support 2) changes in calling convention of return_cmd. part 2 will revert the changes done by patch- 90dc9d9 megaraid_sas : MFI MPT linked list corruption fix because changes done in part 1 has fixed the problem of MFI MPT linked list corruption. part 2 is very much dependent on part 1, so we decided to have single patch for these two logical changes. [jejb: remove chatty printk pointed out by hch] Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: James Bottomley <JBottomley@Odin.com>
2015-04-23 19:01:24 +08:00
u8 flags;
struct megasas_instance *instance;
union {
struct {
u16 smid;
u16 resvd;
} context;
u32 frame_count;
};
};
#define MAX_MGMT_ADAPTERS 1024
#define MAX_IOCTL_SGE 16
struct megasas_iocpacket {
u16 host_no;
u16 __pad1;
u32 sgl_off;
u32 sge_count;
u32 sense_off;
u32 sense_len;
union {
u8 raw[128];
struct megasas_header hdr;
} frame;
struct iovec sgl[MAX_IOCTL_SGE];
} __attribute__ ((packed));
struct megasas_aen {
u16 host_no;
u16 __pad1;
u32 seq_num;
u32 class_locale_word;
} __attribute__ ((packed));
#ifdef CONFIG_COMPAT
struct compat_megasas_iocpacket {
u16 host_no;
u16 __pad1;
u32 sgl_off;
u32 sge_count;
u32 sense_off;
u32 sense_len;
union {
u8 raw[128];
struct megasas_header hdr;
} frame;
struct compat_iovec sgl[MAX_IOCTL_SGE];
} __attribute__ ((packed));
#define MEGASAS_IOC_FIRMWARE32 _IOWR('M', 1, struct compat_megasas_iocpacket)
#endif
#define MEGASAS_IOC_FIRMWARE _IOWR('M', 1, struct megasas_iocpacket)
#define MEGASAS_IOC_GET_AEN _IOW('M', 3, struct megasas_aen)
struct megasas_mgmt_info {
u16 count;
struct megasas_instance *instance[MAX_MGMT_ADAPTERS];
int max_index;
};
enum MEGASAS_OCR_CAUSE {
FW_FAULT_OCR = 0,
SCSIIO_TIMEOUT_OCR = 1,
MFI_IO_TIMEOUT_OCR = 2,
};
enum DCMD_RETURN_STATUS {
DCMD_SUCCESS = 0x00,
DCMD_TIMEOUT = 0x01,
DCMD_FAILED = 0x02,
DCMD_BUSY = 0x03,
DCMD_INIT = 0xff,
};
u8
MR_BuildRaidContext(struct megasas_instance *instance,
struct IO_REQUEST_INFO *io_info,
struct RAID_CONTEXT *pRAID_Context,
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
struct MR_DRV_RAID_MAP_ALL *map, u8 **raidLUN);
scsi: megaraid_sas: MR_TargetIdToLdGet u8 to u16 and avoid invalid raid-map access Change MR_TargetIdToLdGet return type from u8 to u16. ld id range check is added at two places in this patch - @megasas_build_ldio_fusion and @megasas_build_ld_nonrw_fusion. Previous driver code used different data type for lds TargetId returned from MR_TargetIdToLdGet. Prior to this change, above two functions was safeguarded due to function always return u8 and maximum value of ld id returned was 255. In below check, fw_supported_vd_count as of today is 64 or 256 and valid range to support is either 0-63 or 0-255. Ideally want to filter accessing raid map for ld ids which are not valid. With the u16 change, invalid ld id value is 0xFFFF and we will see kernel panic due to random memory access in MR_LdRaidGet. The changes will ensure we do not call MR_LdRaidGet if ld id is beyond size of ldSpanMap array. if (ld < instance->fw_supported_vd_count) >From firmware perspective,ld id 0xFF is invalid and even though current driver code forward such command, firmware fails with target not available. ld target id issue occurs mainly whenever driver loops to populate raid map (ea. MR_ValidateMapInfo). These are the only two places where we may see out of range target ids and wants to protect raid map access based on range provided by Firmware API. Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-02-10 16:59:19 +08:00
u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_DRV_RAID_MAP_ALL *map);
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
struct MR_LD_RAID *MR_LdRaidGet(u32 ld, struct MR_DRV_RAID_MAP_ALL *map);
u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_DRV_RAID_MAP_ALL *map);
u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_DRV_RAID_MAP_ALL *map);
__le16 MR_PdDevHandleGet(u32 pd, struct MR_DRV_RAID_MAP_ALL *map);
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u16 MR_GetLDTgtId(u32 ld, struct MR_DRV_RAID_MAP_ALL *map);
__le16 get_updated_dev_handle(struct megasas_instance *instance,
struct LD_LOAD_BALANCE_INFO *lbInfo,
struct IO_REQUEST_INFO *in_info,
struct MR_DRV_RAID_MAP_ALL *drv_map);
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
void mr_update_load_balance_params(struct MR_DRV_RAID_MAP_ALL *map,
struct LD_LOAD_BALANCE_INFO *lbInfo);
int megasas_get_ctrl_info(struct megasas_instance *instance);
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
/* PD sequence */
int
megasas_sync_pd_seq_num(struct megasas_instance *instance, bool pend);
void megasas_set_dynamic_target_properties(struct scsi_device *sdev,
bool is_target_prop);
int megasas_get_target_prop(struct megasas_instance *instance,
struct scsi_device *sdev);
void megasas_get_snapdump_properties(struct megasas_instance *instance);
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
int megasas_set_crash_dump_params(struct megasas_instance *instance,
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
u8 crash_buf_state);
megaraid_sas : Firmware crash dump feature support Resending the patch. Addressed the review comments from Tomas Henzl. Move buff_offset inside spinlock, corrected loop at crash dump buffer free, reset_devices check is added to disable fw crash dump feature in kdump kernel. This feature will provide similar interface as kernel crash dump feature. When megaraid firmware encounter any crash, driver will collect the firmware raw image and dump it into pre-configured location. Driver will allocate two different segment of memory. #1 Non-DMA able large buffer (will be allocated on demand) to capture actual FW crash dump. #2 DMA buffer (persistence allocation) just to do a arbitrator job. Firmware will keep writing Crash dump data in chucks of DMA buffer size into #2, which will be copy back by driver to the host memory as described in #1. Driver-Firmware interface: ================== A.) Host driver can allocate maximum 512MB Host memory to store crash dump data. This memory will be internal to the host and will not be exposed to the Firmware. Driver may not be able to allocate 512 MB. In that case, driver will do possible memory (available at run time) allocation to store crash dump data. Let’s call this buffer as Host Crash Buffer. Host Crash buffer will not be contigious as a whole, but it will have multiple chunk of contigious memory. This will be internal to driver and firmware/application are unaware of it. Partial allocation of Host Crash buffer may have valid information to debug depending upon what was collected in that buffer and depending on nature of failure. Complete Crash dump is the best case, but we do want to capture partial buffer just to grab something rather than nothing. Host Crash buffer will be allocated only when FW Crash dump data is available, and will be deallocated once application copy Host Crash buffer to the file. Host Crash buffer size can be anything between 1MB to 512MB. (It will be multiple of 1MBs) B.) Irrespective of underlying Firmware capability of crash dump support, driver will allocate DMA buffer at start of the day for each MR controllers. Let’s call this buffer as “DMA Crash Buffer”. For this feature, size of DMA crash buffer will be 1MB. (We will not gain much even if DMA buffer size is increased.) C.) Driver will now read Controller Info sending existing dcmd “MR_DCMD_CTRL_GET_INFO”. Driver should extract the information from ctrl info provided by firmware and figure out if firmware support crash dump feature or not. Driver will enable crash dump feature only if “Firmware support Crash dump” + “Driver was able to create DMA Crash Buffer”. If either one from above is not set, Crash dump feature should be disable in driver. Firmware will enable crash dump feature only if “Driver Send DCMD- MR_DCMD_SET_CRASH_BUF_PARA with MR_CRASH_BUF_TURN_ON” Helper application/script should use sysfs parameter fw_crash_xxx to actually copy data from host memory to the filesystem. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:28 +08:00
void megasas_free_host_crash_buffer(struct megasas_instance *instance);
megaraid_sas : Extended VD support Resending the patch. Addressed the review comments from Tomas Henzl. reserved1 field(part of union) of Raid map struct was not required so it is removed. Current MegaRAID firmware and hence the driver only supported 64VDs. E.g: If the user wants to create more than 64VD on a controller, it is not possible on current firmware/driver. New feature and requirement to support upto 256VD, firmware/driver/apps need changes. In addition to that there must be a backward compatibility of the new driver with the older firmware and vice versa. RAID map is the interface between Driver and FW to fetch all required fields(attributes) for each Virtual Drives. In the earlier design driver was using the FW copy of RAID map where as in the new design the Driver will keep the RAID map copy of its own; on which it will operate for any raid map access in fast path. Local driver raid map copy will provide ease of access through out the code and provide generic interface for future FW raid map changes. For the backward compatibility driver will notify FW that it supports 256VD to the FW in driver capability field. Based on the controller properly returned by the FW, the Driver will know whether it supports 256VD or not and will copy the RAID map accordingly. At any given time, driver will always have old or new Raid map. So with this changes, driver can also work in host lock less mode. Please see next patch which enable host lock less mode for megaraid_sas driver. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Tomas Henzl <thenzl@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
2014-09-12 21:27:33 +08:00
void megasas_return_cmd_fusion(struct megasas_instance *instance,
struct megasas_cmd_fusion *cmd);
int megasas_issue_blocked_cmd(struct megasas_instance *instance,
struct megasas_cmd *cmd, int timeout);
void __megasas_return_cmd(struct megasas_instance *instance,
struct megasas_cmd *cmd);
void megasas_return_mfi_mpt_pthr(struct megasas_instance *instance,
struct megasas_cmd *cmd_mfi, struct megasas_cmd_fusion *cmd_fusion);
int megasas_cmd_type(struct scsi_cmnd *cmd);
megaraid_sas: JBOD sequence number support Implemented JBOD map which will provide quick access for JBOD path and also provide sequence number. This will help hardware to fail command to the FW in case of any sequence mismatch. Fast Path I/O for JBOD will refer JBOD map (which has sequence number per JBOD device) instead of RAID map. Previously, the driver used RAID map to get device handle for fast path I/O and this not have sequence number information. Now, driver will use JBOD map instead. As part of error handling, if JBOD map is failed/not supported by firmware, driver will continue using legacy behavior. Now there will be three IO paths for JBOD (syspd): - JBOD map with sequence number (Fast Path) - RAID map without sequence number (Fast Path) - FW path via h/w exception queue deliberately setup devhandle 0xFFFF (FW path). Relevant data structures: - Driver send new DCMD MR_DCMD_SYSTEM_PD_MAP_GET_INFO for this purpose. - struct MR_PD_CFG_SEQ- This structure represent map of single physical device. - struct MR_PD_CFG_SEQ_NUM_SYNC- This structure represent whole JBOD map in general(size, count of sysPDs configured, struct MR_PD_CFG_SEQ of syspD with 0 index). - JBOD sequence map size is: sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)) which is allocated while setting up JBOD map at driver load time. Signed-off-by: Sumit Saxena <sumit.saxena@avagotech.com> Signed-off-by: Kashyap Desai <kashyap.desai@avagotech.com> Reviewed-by: Martin Petersen <martin.petersen@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2015-08-31 19:53:11 +08:00
void megasas_setup_jbod_map(struct megasas_instance *instance);
void megasas_update_sdev_properties(struct scsi_device *sdev);
int megasas_reset_fusion(struct Scsi_Host *shost, int reason);
int megasas_task_abort_fusion(struct scsi_cmnd *scmd);
int megasas_reset_target_fusion(struct scsi_cmnd *scmd);
u32 mega_mod64(u64 dividend, u32 divisor);
int megasas_alloc_fusion_context(struct megasas_instance *instance);
void megasas_free_fusion_context(struct megasas_instance *instance);
int megasas_fusion_start_watchdog(struct megasas_instance *instance);
void megasas_fusion_stop_watchdog(struct megasas_instance *instance);
scsi: megaraid_sas: Add support for 64bit consistent DMA The latest MegaRAID Firmware (for Invader series) has support for 64bit DMA for both streaming and consistent DMA buffers. All Ventura series controller FW always support 64 bit consistent DMA. Also, on a few architectures 32bit DMA is not supported. Current driver always prefers 32bit for consistent DMA and 64bit for streaming DMA. This behavior was unintentional and carried forwarded from legacy controller FW. Need to enhance the driver to support 64bit consistent DMA buffers based on the firmware capability. Below is the DMA setting strategy in driver with this patch. For Ventura series, always try to set 64bit DMA mask. If it fails fall back to 32bit DMA mask. For Invader series and earlier generation controllers, first try to set to 32bit consistent DMA mask irrespective of FW capability. This is needed to ensure firmware downgrades do not break. If 32bit DMA setting fails, check FW capability and try seting to 64bit DMA mask. There are certain restrictions in the hardware for having all sense buffers and all reply descriptors to be in the same 4GB memory region. This limitation is h/w dependent and can not be changed in firmware. This limitation needs to be taken care in driver while allocating the buffers. There was a discussion regarding this - find details at below link. https://www.spinics.net/lists/linux-scsi/msg108251.html Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-10-19 17:49:05 +08:00
void megasas_set_dma_settings(struct megasas_instance *instance,
struct megasas_dcmd_frame *dcmd,
dma_addr_t dma_addr, u32 dma_len);
int megasas_adp_reset_wait_for_ready(struct megasas_instance *instance,
bool do_adp_reset,
int ocr_context);
scsi: megaraid_sas: IRQ poll to avoid CPU hard lockups Issue Description: We have seen cpu lock up issues from field if system has a large (more than 96) logical cpu count. SAS3.0 controller (Invader series) supports max 96 MSI-X vector and SAS3.5 product (Ventura) supports max 128 MSI-X vectors. This may be a generic issue (if PCI device support completion on multiple reply queues). Let me explain it w.r.t megaraid_sas supported h/w just to simplify the problem and possible changes to handle such issues. MegaRAID controller supports multiple reply queues in completion path. Driver creates MSI-X vectors for controller as "minimum of (FW supported Reply queues, Logical CPUs)". If submitter is not interrupted via completion on same CPU, there is a loop in the IO path. This behavior can cause hard/soft CPU lockups, IO timeout, system sluggish etc. Example - one CPU (e.g. CPU A) is busy submitting the IOs and another CPU (e.g. CPU B) is busy with processing the corresponding IO's reply descriptors from reply descriptor queue upon receiving the interrupts from HBA. If CPU A is continuously pumping the IOs then always CPU B (which is executing the ISR) will see the valid reply descriptors in the reply descriptor queue and it will be continuously processing those reply descriptor in a loop without quitting the ISR handler. megaraid_sas driver will exit ISR handler if it finds unused reply descriptor in the reply descriptor queue. Since CPU A will be continuously sending the IOs, CPU B may always see a valid reply descriptor (posted by HBA Firmware after processing the IO) in the reply descriptor queue. In worst case, driver will not quit from this loop in the ISR handler. Eventually, CPU lockup will be detected by watchdog. Above mentioned behavior is not common if "rq_affinity" set to 2 or affinity_hint is honored by irqbalancer as "exact". If rq_affinity is set to 2, submitter will be always interrupted via completion on same CPU. If irqbalancer is using "exact" policy, interrupt will be delivered to submitter CPU. Problem statement: If CPU count to MSI-X vectors (reply descriptor Queues) count ratio is not 1:1, we still have exposure of issue explained above and for that we don't have any solution. Exposure of soft/hard lockup is seen if CPU count is more than MSI-X supported by device. If CPUs count to MSI-X vectors count ratio is not 1:1, (Other way, if CPU counts to MSI-X vector count ratio is something like X:1, where X > 1) then 'exact' irqbalance policy OR rq_affinity = 2 won't help to avoid CPU hard/soft lockups. There won't be any one to one mapping between CPU to MSI-X vector instead one MSI-X interrupt (or reply descriptor queue) is shared with group/set of CPUs and there is a possibility of having a loop in the IO path within that CPU group and may observe lockups. For example: Consider a system having two NUMA nodes and each node having four logical CPUs and also consider that number of MSI-X vectors enabled on the HBA is two, then CPUs count to MSI-X vector count ratio as 4:1. e.g. MSI-X vector 0 is affinity to CPU 0, CPU 1, CPU 2 & CPU 3 of NUMA node 0 and MSI-X vector 1 is affinity to CPU 4, CPU 5, CPU 6 & CPU 7 of NUMA node 1. numactl --hardware available: 2 nodes (0-1) node 0 cpus: 0 1 2 3 --> MSI-X 0 node 0 size: 65536 MB node 0 free: 63176 MB node 1 cpus: 4 5 6 7 --> MSI-X 1 node 1 size: 65536 MB node 1 free: 63176 MB Assume that user started an application which uses all the CPUs of NUMA node 0 for issuing the IOs. Only one CPU from affinity list (it can be any cpu since this behavior depends upon irqbalance) CPU0 will receive the interrupts from MSI-X 0 for all the IOs. Eventually, CPU 0 IO submission percentage will be decreasing and ISR processing percentage will be increasing as it is more busy with processing the interrupts. Gradually IO submission percentage on CPU 0 will be zero and it's ISR processing percentage will be 100% as IO loop has already formed within the NUMA node 0, i.e. CPU 1, CPU 2 & CPU 3 will be continuously busy with submitting the heavy IOs and only CPU 0 is busy in the ISR path as it always find the valid reply descriptor in the reply descriptor queue. Eventually, we will observe the hard lockup here. Chances of occurring of hard/soft lockups are directly proportional to value of X. If value of X is high, then chances of observing CPU lockups is high. Solution: Use IRQ poll interface defined in "irq_poll.c". megaraid_sas driver will execute ISR routine in softirq context and it will always quit the loop based on budget provided in IRQ poll interface. Driver will switch to IRQ poll only when more than a threshold number of reply descriptors are handled in one ISR. Currently threshold is set as 1/4th of HBA queue depth. In these scenarios (i.e. where CPUs count to MSI-X vectors count ratio is X:1 (where X > 1)), IRQ poll interface will avoid CPU hard lockups due to voluntary exit from the reply queue processing based on budget. Note - Only one MSI-X vector is busy doing processing. Select CONFIG_IRQ_POLL from driver Kconfig for driver compilation. Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2019-05-08 01:05:35 +08:00
int megasas_irqpoll(struct irq_poll *irqpoll, int budget);
void megasas_dump_fusion_io(struct scsi_cmnd *scmd);
u32 megasas_readl(struct megasas_instance *instance,
const volatile void __iomem *addr);
struct megasas_cmd *megasas_get_cmd(struct megasas_instance *instance);
void megasas_return_cmd(struct megasas_instance *instance,
struct megasas_cmd *cmd);
int megasas_issue_polled(struct megasas_instance *instance,
struct megasas_cmd *cmd);
void megaraid_sas_kill_hba(struct megasas_instance *instance);
void megasas_check_and_restore_queue_depth(struct megasas_instance *instance);
void megasas_start_timer(struct megasas_instance *instance);
int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
int initial);
int megasas_alloc_cmds(struct megasas_instance *instance);
void megasas_free_cmds(struct megasas_instance *instance);
void megasas_init_debugfs(void);
void megasas_exit_debugfs(void);
void megasas_setup_debugfs(struct megasas_instance *instance);
void megasas_destroy_debugfs(struct megasas_instance *instance);
#endif /*LSI_MEGARAID_SAS_H */