2015-05-20 10:54:31 +08:00
|
|
|
/*
|
|
|
|
* NVDIMM Firmware Interface Table - NFIT
|
|
|
|
*
|
|
|
|
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of version 2 of the GNU General Public License as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*/
|
|
|
|
#ifndef __NFIT_H__
|
|
|
|
#define __NFIT_H__
|
2016-02-20 04:16:34 +08:00
|
|
|
#include <linux/workqueue.h>
|
2015-05-20 10:54:31 +08:00
|
|
|
#include <linux/libnvdimm.h>
|
2016-07-24 12:51:21 +08:00
|
|
|
#include <linux/ndctl.h>
|
2015-05-20 10:54:31 +08:00
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/acpi.h>
|
|
|
|
#include <acpi/acuuid.h>
|
|
|
|
|
2016-04-29 07:23:43 +08:00
|
|
|
/* ACPI 6.1 */
|
2015-05-20 10:54:31 +08:00
|
|
|
#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
|
2016-04-29 07:23:43 +08:00
|
|
|
|
2017-10-21 06:39:43 +08:00
|
|
|
/* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */
|
2015-05-20 10:54:31 +08:00
|
|
|
#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
|
2016-04-29 07:23:43 +08:00
|
|
|
|
|
|
|
/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
|
|
|
|
#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
|
|
|
|
#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
|
|
|
|
|
2016-05-27 00:38:41 +08:00
|
|
|
/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
|
|
|
|
#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
|
|
|
|
|
2015-06-24 08:08:34 +08:00
|
|
|
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
|
|
|
|
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
|
2017-04-14 10:46:36 +08:00
|
|
|
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
|
2015-05-20 10:54:31 +08:00
|
|
|
|
2017-10-21 06:39:43 +08:00
|
|
|
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
|
|
|
|
|
2017-10-30 03:13:07 +08:00
|
|
|
#define NVDIMM_STANDARD_CMDMASK \
|
|
|
|
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
|
|
|
|
| 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA \
|
|
|
|
| 1 << ND_CMD_SET_CONFIG_DATA | 1 << ND_CMD_VENDOR_EFFECT_LOG_SIZE \
|
|
|
|
| 1 << ND_CMD_VENDOR_EFFECT_LOG | 1 << ND_CMD_VENDOR)
|
|
|
|
|
2017-10-21 06:39:43 +08:00
|
|
|
/*
|
|
|
|
* Command numbers that the kernel needs to know about to handle
|
|
|
|
* non-default DSM revision ids
|
|
|
|
*/
|
|
|
|
enum nvdimm_family_cmds {
|
2017-11-16 02:10:48 +08:00
|
|
|
NVDIMM_INTEL_LATCH_SHUTDOWN = 10,
|
2017-10-21 06:39:43 +08:00
|
|
|
NVDIMM_INTEL_GET_MODES = 11,
|
|
|
|
NVDIMM_INTEL_GET_FWINFO = 12,
|
|
|
|
NVDIMM_INTEL_START_FWUPDATE = 13,
|
|
|
|
NVDIMM_INTEL_SEND_FWUPDATE = 14,
|
|
|
|
NVDIMM_INTEL_FINISH_FWUPDATE = 15,
|
|
|
|
NVDIMM_INTEL_QUERY_FWUPDATE = 16,
|
|
|
|
NVDIMM_INTEL_SET_THRESHOLD = 17,
|
|
|
|
NVDIMM_INTEL_INJECT_ERROR = 18,
|
acpi/nfit: Add support for Intel DSM 1.8 commands
Add command definition for security commands defined in Intel DSM
specification v1.8 [1]. This includes "get security state", "set
passphrase", "unlock unit", "freeze lock", "secure erase", "overwrite",
"overwrite query", "master passphrase enable/disable", and "master
erase", . Since this adds several Intel definitions, move the relevant
bits to their own header.
These commands mutate physical data, but that manipulation is not cache
coherent. The requirement to flush and invalidate caches makes these
commands unsuitable to be called from userspace, so extra logic is added
to detect and block these commands from being submitted via the ioctl
command submission path.
Lastly, the commands may contain sensitive key material that should not
be dumped in a standard debug session. Update the nvdimm-command
payload-dump facility to move security command payloads behind a
default-off compile time switch.
[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-12-05 02:31:11 +08:00
|
|
|
NVDIMM_INTEL_GET_SECURITY_STATE = 19,
|
|
|
|
NVDIMM_INTEL_SET_PASSPHRASE = 20,
|
|
|
|
NVDIMM_INTEL_DISABLE_PASSPHRASE = 21,
|
|
|
|
NVDIMM_INTEL_UNLOCK_UNIT = 22,
|
|
|
|
NVDIMM_INTEL_FREEZE_LOCK = 23,
|
|
|
|
NVDIMM_INTEL_SECURE_ERASE = 24,
|
|
|
|
NVDIMM_INTEL_OVERWRITE = 25,
|
|
|
|
NVDIMM_INTEL_QUERY_OVERWRITE = 26,
|
|
|
|
NVDIMM_INTEL_SET_MASTER_PASSPHRASE = 27,
|
|
|
|
NVDIMM_INTEL_MASTER_SECURE_ERASE = 28,
|
2017-10-21 06:39:43 +08:00
|
|
|
};
|
|
|
|
|
acpi/nfit: Add support for Intel DSM 1.8 commands
Add command definition for security commands defined in Intel DSM
specification v1.8 [1]. This includes "get security state", "set
passphrase", "unlock unit", "freeze lock", "secure erase", "overwrite",
"overwrite query", "master passphrase enable/disable", and "master
erase", . Since this adds several Intel definitions, move the relevant
bits to their own header.
These commands mutate physical data, but that manipulation is not cache
coherent. The requirement to flush and invalidate caches makes these
commands unsuitable to be called from userspace, so extra logic is added
to detect and block these commands from being submitted via the ioctl
command submission path.
Lastly, the commands may contain sensitive key material that should not
be dumped in a standard debug session. Update the nvdimm-command
payload-dump facility to move security command payloads behind a
default-off compile time switch.
[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-12-05 02:31:11 +08:00
|
|
|
#define NVDIMM_INTEL_SECURITY_CMDMASK \
|
|
|
|
(1 << NVDIMM_INTEL_GET_SECURITY_STATE | 1 << NVDIMM_INTEL_SET_PASSPHRASE \
|
|
|
|
| 1 << NVDIMM_INTEL_DISABLE_PASSPHRASE | 1 << NVDIMM_INTEL_UNLOCK_UNIT \
|
|
|
|
| 1 << NVDIMM_INTEL_FREEZE_LOCK | 1 << NVDIMM_INTEL_SECURE_ERASE \
|
|
|
|
| 1 << NVDIMM_INTEL_OVERWRITE | 1 << NVDIMM_INTEL_QUERY_OVERWRITE \
|
|
|
|
| 1 << NVDIMM_INTEL_SET_MASTER_PASSPHRASE \
|
|
|
|
| 1 << NVDIMM_INTEL_MASTER_SECURE_ERASE)
|
|
|
|
|
2017-10-21 06:39:43 +08:00
|
|
|
#define NVDIMM_INTEL_CMDMASK \
|
|
|
|
(NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \
|
|
|
|
| 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \
|
|
|
|
| 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \
|
|
|
|
| 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \
|
acpi/nfit: Add support for Intel DSM 1.8 commands
Add command definition for security commands defined in Intel DSM
specification v1.8 [1]. This includes "get security state", "set
passphrase", "unlock unit", "freeze lock", "secure erase", "overwrite",
"overwrite query", "master passphrase enable/disable", and "master
erase", . Since this adds several Intel definitions, move the relevant
bits to their own header.
These commands mutate physical data, but that manipulation is not cache
coherent. The requirement to flush and invalidate caches makes these
commands unsuitable to be called from userspace, so extra logic is added
to detect and block these commands from being submitted via the ioctl
command submission path.
Lastly, the commands may contain sensitive key material that should not
be dumped in a standard debug session. Update the nvdimm-command
payload-dump facility to move security command payloads behind a
default-off compile time switch.
[1]: http://pmem.io/documents/NVDIMM_DSM_Interface-V1.8.pdf
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-12-05 02:31:11 +08:00
|
|
|
| 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN \
|
|
|
|
| NVDIMM_INTEL_SECURITY_CMDMASK)
|
2017-10-21 06:39:43 +08:00
|
|
|
|
2015-05-20 10:54:31 +08:00
|
|
|
enum nfit_uuids {
|
2016-04-29 07:23:43 +08:00
|
|
|
/* for simplicity alias the uuid index with the family id */
|
|
|
|
NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
|
|
|
|
NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
|
|
|
|
NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
|
2016-05-27 00:38:41 +08:00
|
|
|
NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
|
2015-05-20 10:54:31 +08:00
|
|
|
NFIT_SPA_VOLATILE,
|
|
|
|
NFIT_SPA_PM,
|
|
|
|
NFIT_SPA_DCR,
|
|
|
|
NFIT_SPA_BDW,
|
|
|
|
NFIT_SPA_VDISK,
|
|
|
|
NFIT_SPA_VCD,
|
|
|
|
NFIT_SPA_PDISK,
|
|
|
|
NFIT_SPA_PCD,
|
|
|
|
NFIT_DEV_BUS,
|
|
|
|
NFIT_UUID_MAX,
|
|
|
|
};
|
|
|
|
|
2016-04-29 09:35:23 +08:00
|
|
|
/*
|
2016-06-30 02:19:32 +08:00
|
|
|
* Region format interface codes are stored with the interface as the
|
|
|
|
* LSB and the function as the MSB.
|
2016-04-29 09:35:23 +08:00
|
|
|
*/
|
2016-06-30 02:19:32 +08:00
|
|
|
#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
|
|
|
|
#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
|
|
|
|
#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
|
2016-02-02 09:48:42 +08:00
|
|
|
|
2015-07-11 01:06:14 +08:00
|
|
|
enum {
|
2016-02-13 09:01:11 +08:00
|
|
|
NFIT_BLK_READ_FLUSH = 1,
|
|
|
|
NFIT_BLK_DCR_LATCH = 2,
|
|
|
|
NFIT_ARS_STATUS_DONE = 0,
|
|
|
|
NFIT_ARS_STATUS_BUSY = 1 << 16,
|
|
|
|
NFIT_ARS_STATUS_NONE = 2 << 16,
|
|
|
|
NFIT_ARS_STATUS_INTR = 3 << 16,
|
|
|
|
NFIT_ARS_START_BUSY = 6,
|
|
|
|
NFIT_ARS_CAP_NONE = 1,
|
|
|
|
NFIT_ARS_F_OVERFLOW = 1,
|
2016-02-18 05:01:23 +08:00
|
|
|
NFIT_ARS_TIMEOUT = 90,
|
2015-07-11 01:06:14 +08:00
|
|
|
};
|
|
|
|
|
2016-08-20 04:40:58 +08:00
|
|
|
enum nfit_root_notifiers {
|
|
|
|
NFIT_NOTIFY_UPDATE = 0x80,
|
2017-06-09 02:36:57 +08:00
|
|
|
NFIT_NOTIFY_UC_MEMORY_ERROR = 0x81,
|
2016-08-20 04:40:58 +08:00
|
|
|
};
|
|
|
|
|
2016-08-23 10:28:37 +08:00
|
|
|
enum nfit_dimm_notifiers {
|
|
|
|
NFIT_NOTIFY_DIMM_HEALTH = 0x81,
|
|
|
|
};
|
|
|
|
|
2018-04-03 06:40:30 +08:00
|
|
|
enum nfit_ars_state {
|
acpi, nfit: Fix Address Range Scrub completion tracking
The Address Range Scrub implementation tried to skip running scrubs
against ranges that were already scrubbed by the BIOS. Unfortunately
that support also resulted in early scrub completions as evidenced by
this debug output from nfit_test:
nd_region region9: ARS: range 1 short complete
nd_region region3: ARS: range 1 short complete
nd_region region4: ARS: range 2 ARS start (0)
nd_region region4: ARS: range 2 short complete
...i.e. completions without any indications that the scrub was started.
This state of affairs was hard to see in the code due to the
proliferation of state bits and mistakenly trying to track done state
per-range when the completion is a global property of the bus.
So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
ARS_REQ_LONG. The implementation will still complete and reap the
results of BIOS initiated ARS, but it will not attempt to use that
information to affect the completion status of scrubbing the ranges from
a Linux perspective.
Instead, try to synchronously run a short ARS per range at init time and
schedule a long scrub in the background. If ARS is busy with an ARS
request, schedule both a short and a long scrub for when ARS returns to
idle. This logic also satisfies the intent of what ARS_REQ_REDO was
trying to achieve. The new rule is that the REQ flag stays set until the
next successful ars_start() for that range.
With the new policy that the REQ flags are not cleared until the next
start, the implementation no longer loses requests as can be seen from
the following log:
nd_region region3: ARS: range 1 ARS start short (0)
nd_region region9: ARS: range 1 ARS start short (0)
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start short (0)
nd_region region9: ARS: range 1 complete
nd_region region9: ARS: range 1 ARS start long (0)
nd_region region4: ARS: range 2 complete
nd_region region3: ARS: range 1 ARS start long (0)
nd_region region9: ARS: range 1 complete
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start long (0)
nd_region region4: ARS: range 2 complete
...note that the nfit_test emulated driver provides 2 buses, that is why
some of the range indices are duplicated. Notice that each range
now successfully completes a short and long scrub.
Cc: <stable@vger.kernel.org>
Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state")
Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
Reported-by: Jacek Zloch <jacek.zloch@intel.com>
Reported-by: Krzysztof Rusocki <krzysztof.rusocki@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-10-14 11:32:17 +08:00
|
|
|
ARS_REQ_SHORT,
|
|
|
|
ARS_REQ_LONG,
|
2018-04-03 06:40:30 +08:00
|
|
|
ARS_FAILED,
|
|
|
|
};
|
|
|
|
|
2015-05-20 10:54:31 +08:00
|
|
|
struct nfit_spa {
|
|
|
|
struct list_head list;
|
2016-02-18 05:01:23 +08:00
|
|
|
struct nd_region *nd_region;
|
2018-04-03 06:40:30 +08:00
|
|
|
unsigned long ars_state;
|
2016-02-18 05:01:23 +08:00
|
|
|
u32 clear_err_unit;
|
|
|
|
u32 max_ars;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_system_address spa[0];
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct nfit_dcr {
|
|
|
|
struct list_head list;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_control_region dcr[0];
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct nfit_bdw {
|
|
|
|
struct list_head list;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_data_region bdw[0];
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
2015-06-25 16:21:02 +08:00
|
|
|
struct nfit_idt {
|
|
|
|
struct list_head list;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_interleave idt[0];
|
2015-06-25 16:21:02 +08:00
|
|
|
};
|
|
|
|
|
2015-07-11 01:06:13 +08:00
|
|
|
struct nfit_flush {
|
|
|
|
struct list_head list;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_flush_address flush[0];
|
2015-07-11 01:06:13 +08:00
|
|
|
};
|
|
|
|
|
2015-05-20 10:54:31 +08:00
|
|
|
struct nfit_memdev {
|
|
|
|
struct list_head list;
|
2016-07-15 08:22:48 +08:00
|
|
|
struct acpi_nfit_memory_map memdev[0];
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
2018-09-27 01:48:38 +08:00
|
|
|
enum nfit_mem_flags {
|
|
|
|
NFIT_MEM_LSR,
|
|
|
|
NFIT_MEM_LSW,
|
2018-09-27 01:47:15 +08:00
|
|
|
NFIT_MEM_DIRTY,
|
|
|
|
NFIT_MEM_DIRTY_COUNT,
|
2018-09-27 01:48:38 +08:00
|
|
|
};
|
|
|
|
|
2018-12-05 02:31:20 +08:00
|
|
|
#define NFIT_DIMM_ID_LEN 22
|
|
|
|
|
2015-05-20 10:54:31 +08:00
|
|
|
/* assembled tables for a given dimm/memory-device */
|
|
|
|
struct nfit_mem {
|
2015-04-25 15:56:17 +08:00
|
|
|
struct nvdimm *nvdimm;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct acpi_nfit_memory_map *memdev_dcr;
|
|
|
|
struct acpi_nfit_memory_map *memdev_pmem;
|
2015-06-25 16:21:02 +08:00
|
|
|
struct acpi_nfit_memory_map *memdev_bdw;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct acpi_nfit_control_region *dcr;
|
|
|
|
struct acpi_nfit_data_region *bdw;
|
|
|
|
struct acpi_nfit_system_address *spa_dcr;
|
|
|
|
struct acpi_nfit_system_address *spa_bdw;
|
2015-06-25 16:21:02 +08:00
|
|
|
struct acpi_nfit_interleave *idt_dcr;
|
|
|
|
struct acpi_nfit_interleave *idt_bdw;
|
2016-08-23 10:28:37 +08:00
|
|
|
struct kernfs_node *flags_attr;
|
2015-07-11 01:06:13 +08:00
|
|
|
struct nfit_flush *nfit_flush;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct list_head list;
|
2015-06-09 02:27:06 +08:00
|
|
|
struct acpi_device *adev;
|
2016-04-06 06:26:50 +08:00
|
|
|
struct acpi_nfit_desc *acpi_desc;
|
2018-12-05 02:31:20 +08:00
|
|
|
char id[NFIT_DIMM_ID_LEN+1];
|
2016-06-08 08:00:04 +08:00
|
|
|
struct resource *flush_wpq;
|
2015-06-09 02:27:06 +08:00
|
|
|
unsigned long dsm_mask;
|
2018-09-27 01:48:38 +08:00
|
|
|
unsigned long flags;
|
2018-09-27 01:47:15 +08:00
|
|
|
u32 dirty_shutdown;
|
2016-04-29 07:23:43 +08:00
|
|
|
int family;
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct acpi_nfit_desc {
|
|
|
|
struct nvdimm_bus_descriptor nd_desc;
|
2015-11-21 08:05:49 +08:00
|
|
|
struct acpi_table_header acpi_header;
|
2015-10-28 06:58:27 +08:00
|
|
|
struct mutex init_mutex;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct list_head memdevs;
|
2015-07-11 01:06:13 +08:00
|
|
|
struct list_head flushes;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct list_head dimms;
|
|
|
|
struct list_head spas;
|
|
|
|
struct list_head dcrs;
|
|
|
|
struct list_head bdws;
|
2015-06-25 16:21:02 +08:00
|
|
|
struct list_head idts;
|
2015-05-20 10:54:31 +08:00
|
|
|
struct nvdimm_bus *nvdimm_bus;
|
|
|
|
struct device *dev;
|
2016-02-18 05:01:23 +08:00
|
|
|
struct nd_cmd_ars_status *ars_status;
|
acpi, nfit: Fix Address Range Scrub completion tracking
The Address Range Scrub implementation tried to skip running scrubs
against ranges that were already scrubbed by the BIOS. Unfortunately
that support also resulted in early scrub completions as evidenced by
this debug output from nfit_test:
nd_region region9: ARS: range 1 short complete
nd_region region3: ARS: range 1 short complete
nd_region region4: ARS: range 2 ARS start (0)
nd_region region4: ARS: range 2 short complete
...i.e. completions without any indications that the scrub was started.
This state of affairs was hard to see in the code due to the
proliferation of state bits and mistakenly trying to track done state
per-range when the completion is a global property of the bus.
So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
ARS_REQ_LONG. The implementation will still complete and reap the
results of BIOS initiated ARS, but it will not attempt to use that
information to affect the completion status of scrubbing the ranges from
a Linux perspective.
Instead, try to synchronously run a short ARS per range at init time and
schedule a long scrub in the background. If ARS is busy with an ARS
request, schedule both a short and a long scrub for when ARS returns to
idle. This logic also satisfies the intent of what ARS_REQ_REDO was
trying to achieve. The new rule is that the REQ flag stays set until the
next successful ars_start() for that range.
With the new policy that the REQ flags are not cleared until the next
start, the implementation no longer loses requests as can be seen from
the following log:
nd_region region3: ARS: range 1 ARS start short (0)
nd_region region9: ARS: range 1 ARS start short (0)
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start short (0)
nd_region region9: ARS: range 1 complete
nd_region region9: ARS: range 1 ARS start long (0)
nd_region region4: ARS: range 2 complete
nd_region region3: ARS: range 1 ARS start long (0)
nd_region region9: ARS: range 1 complete
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start long (0)
nd_region region4: ARS: range 2 complete
...note that the nfit_test emulated driver provides 2 buses, that is why
some of the range indices are duplicated. Notice that each range
now successfully completes a short and long scrub.
Cc: <stable@vger.kernel.org>
Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state")
Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
Reported-by: Jacek Zloch <jacek.zloch@intel.com>
Reported-by: Krzysztof Rusocki <krzysztof.rusocki@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-10-14 11:32:17 +08:00
|
|
|
struct nfit_spa *scrub_spa;
|
2018-04-06 07:18:55 +08:00
|
|
|
struct delayed_work dwork;
|
2016-07-24 12:51:21 +08:00
|
|
|
struct list_head list;
|
2016-07-24 12:51:42 +08:00
|
|
|
struct kernfs_node *scrub_count_state;
|
2018-04-05 16:25:02 +08:00
|
|
|
unsigned int max_ars;
|
2016-07-24 12:51:42 +08:00
|
|
|
unsigned int scrub_count;
|
2016-10-01 07:19:29 +08:00
|
|
|
unsigned int scrub_mode;
|
2018-07-06 05:58:49 +08:00
|
|
|
unsigned int scrub_busy:1;
|
2016-02-20 04:16:34 +08:00
|
|
|
unsigned int cancel:1;
|
2016-04-29 07:17:07 +08:00
|
|
|
unsigned long dimm_cmd_force_en;
|
|
|
|
unsigned long bus_cmd_force_en;
|
2017-09-22 15:47:40 +08:00
|
|
|
unsigned long bus_nfit_cmd_force_en;
|
2018-02-01 03:45:38 +08:00
|
|
|
unsigned int platform_cap;
|
2018-04-06 07:18:55 +08:00
|
|
|
unsigned int scrub_tmo;
|
2015-06-18 05:23:32 +08:00
|
|
|
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
|
|
|
|
void *iobuf, u64 len, int rw);
|
2015-05-20 10:54:31 +08:00
|
|
|
};
|
|
|
|
|
2016-10-01 07:19:29 +08:00
|
|
|
enum scrub_mode {
|
|
|
|
HW_ERROR_SCRUB_OFF,
|
|
|
|
HW_ERROR_SCRUB_ON,
|
|
|
|
};
|
|
|
|
|
2015-06-25 16:21:02 +08:00
|
|
|
enum nd_blk_mmio_selector {
|
|
|
|
BDW,
|
|
|
|
DCR,
|
|
|
|
};
|
|
|
|
|
nd_blk: change aperture mapping from WC to WB
This should result in a pretty sizeable performance gain for reads. For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB). This was
done on a random lab machine.
PMEM reads from a write combining mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
100000+0 records in
100000+0 records out
409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s
PMEM reads from a write-back mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
1000000+0 records in
1000000+0 records out
4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s
To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:
http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read. This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM. We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.
In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range(). This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-08-28 03:14:20 +08:00
|
|
|
struct nd_blk_addr {
|
|
|
|
union {
|
|
|
|
void __iomem *base;
|
2016-06-04 09:06:47 +08:00
|
|
|
void *aperture;
|
nd_blk: change aperture mapping from WC to WB
This should result in a pretty sizeable performance gain for reads. For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB). This was
done on a random lab machine.
PMEM reads from a write combining mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
100000+0 records in
100000+0 records out
409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s
PMEM reads from a write-back mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
1000000+0 records in
1000000+0 records out
4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s
To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:
http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read. This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM. We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.
In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range(). This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-08-28 03:14:20 +08:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2015-06-25 16:21:02 +08:00
|
|
|
struct nfit_blk {
|
|
|
|
struct nfit_blk_mmio {
|
nd_blk: change aperture mapping from WC to WB
This should result in a pretty sizeable performance gain for reads. For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB). This was
done on a random lab machine.
PMEM reads from a write combining mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
100000+0 records in
100000+0 records out
409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s
PMEM reads from a write-back mapping:
# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
1000000+0 records in
1000000+0 records out
4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s
To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:
http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read. This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM. We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.
In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range(). This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2015-08-28 03:14:20 +08:00
|
|
|
struct nd_blk_addr addr;
|
2015-06-25 16:21:02 +08:00
|
|
|
u64 size;
|
|
|
|
u64 base_offset;
|
|
|
|
u32 line_size;
|
|
|
|
u32 num_lines;
|
|
|
|
u32 table_size;
|
|
|
|
struct acpi_nfit_interleave *idt;
|
|
|
|
struct acpi_nfit_system_address *spa;
|
|
|
|
} mmio[2];
|
|
|
|
struct nd_region *nd_region;
|
|
|
|
u64 bdw_offset; /* post interleave offset */
|
|
|
|
u64 stat_offset;
|
|
|
|
u64 cmd_offset;
|
2015-07-11 01:06:14 +08:00
|
|
|
u32 dimm_flags;
|
2015-07-11 01:06:13 +08:00
|
|
|
};
|
|
|
|
|
2016-07-24 12:51:21 +08:00
|
|
|
extern struct list_head acpi_descs;
|
|
|
|
extern struct mutex acpi_desc_lock;
|
acpi, nfit: Fix Address Range Scrub completion tracking
The Address Range Scrub implementation tried to skip running scrubs
against ranges that were already scrubbed by the BIOS. Unfortunately
that support also resulted in early scrub completions as evidenced by
this debug output from nfit_test:
nd_region region9: ARS: range 1 short complete
nd_region region3: ARS: range 1 short complete
nd_region region4: ARS: range 2 ARS start (0)
nd_region region4: ARS: range 2 short complete
...i.e. completions without any indications that the scrub was started.
This state of affairs was hard to see in the code due to the
proliferation of state bits and mistakenly trying to track done state
per-range when the completion is a global property of the bus.
So, kill the four ARS state bits (ARS_REQ, ARS_REQ_REDO, ARS_DONE, and
ARS_SHORT), and replace them with just 2 request flags ARS_REQ_SHORT and
ARS_REQ_LONG. The implementation will still complete and reap the
results of BIOS initiated ARS, but it will not attempt to use that
information to affect the completion status of scrubbing the ranges from
a Linux perspective.
Instead, try to synchronously run a short ARS per range at init time and
schedule a long scrub in the background. If ARS is busy with an ARS
request, schedule both a short and a long scrub for when ARS returns to
idle. This logic also satisfies the intent of what ARS_REQ_REDO was
trying to achieve. The new rule is that the REQ flag stays set until the
next successful ars_start() for that range.
With the new policy that the REQ flags are not cleared until the next
start, the implementation no longer loses requests as can be seen from
the following log:
nd_region region3: ARS: range 1 ARS start short (0)
nd_region region9: ARS: range 1 ARS start short (0)
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start short (0)
nd_region region9: ARS: range 1 complete
nd_region region9: ARS: range 1 ARS start long (0)
nd_region region4: ARS: range 2 complete
nd_region region3: ARS: range 1 ARS start long (0)
nd_region region9: ARS: range 1 complete
nd_region region3: ARS: range 1 complete
nd_region region4: ARS: range 2 ARS start long (0)
nd_region region4: ARS: range 2 complete
...note that the nfit_test emulated driver provides 2 buses, that is why
some of the range indices are duplicated. Notice that each range
now successfully completes a short and long scrub.
Cc: <stable@vger.kernel.org>
Fixes: 14c73f997a5e ("nfit, address-range-scrub: introduce nfit_spa->ars_state")
Fixes: cc3d3458d46f ("acpi/nfit: queue issuing of ars when an uc error...")
Reported-by: Jacek Zloch <jacek.zloch@intel.com>
Reported-by: Krzysztof Rusocki <krzysztof.rusocki@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2018-10-14 11:32:17 +08:00
|
|
|
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
|
|
|
|
enum nfit_ars_state req_type);
|
2015-06-25 16:21:02 +08:00
|
|
|
|
2016-07-24 12:51:21 +08:00
|
|
|
#ifdef CONFIG_X86_MCE
|
|
|
|
void nfit_mce_register(void);
|
|
|
|
void nfit_mce_unregister(void);
|
|
|
|
#else
|
|
|
|
static inline void nfit_mce_register(void)
|
2015-06-25 16:21:02 +08:00
|
|
|
{
|
|
|
|
}
|
2016-07-24 12:51:21 +08:00
|
|
|
static inline void nfit_mce_unregister(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int nfit_spa_type(struct acpi_nfit_system_address *spa);
|
2015-06-25 16:21:02 +08:00
|
|
|
|
2015-05-20 10:54:31 +08:00
|
|
|
static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
|
|
|
|
struct nfit_mem *nfit_mem)
|
|
|
|
{
|
|
|
|
if (nfit_mem->memdev_dcr)
|
|
|
|
return nfit_mem->memdev_dcr;
|
|
|
|
return nfit_mem->memdev_pmem;
|
|
|
|
}
|
2015-04-27 07:26:48 +08:00
|
|
|
|
|
|
|
static inline struct acpi_nfit_desc *to_acpi_desc(
|
|
|
|
struct nvdimm_bus_descriptor *nd_desc)
|
|
|
|
{
|
|
|
|
return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
|
|
|
|
}
|
2015-06-18 05:23:32 +08:00
|
|
|
|
2017-06-06 00:40:42 +08:00
|
|
|
const guid_t *to_nfit_uuid(enum nfit_uuids id);
|
2016-07-15 07:19:55 +08:00
|
|
|
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
|
2017-04-19 00:56:31 +08:00
|
|
|
void acpi_nfit_shutdown(void *data);
|
2016-08-19 13:15:04 +08:00
|
|
|
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
|
2016-08-23 10:23:25 +08:00
|
|
|
void __acpi_nvdimm_notify(struct device *dev, u32 event);
|
2016-12-06 05:43:25 +08:00
|
|
|
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
|
|
|
unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc);
|
2016-02-20 04:29:32 +08:00
|
|
|
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
|
2015-05-20 10:54:31 +08:00
|
|
|
#endif /* __NFIT_H__ */
|