sata_mv: implement IRQ coalescing (v2)
Add IRQ coalescing to sata_mv (off by default). This feature can reduce total interrupt overhead for RAID setups in some situations, by deferring the interrupt signal until one or both of: a) a specified io_count (completed SATA commands) is achieved, or b) a specified time interval elapses after an IO completion. For now, module parameters are used to set the irq_coalescing_io_count and irq_coalescing_usecs (timeout) globally. These may eventually be supplemented with sysfs attributes, so that thresholds can be set on-the-fly and on a per-chip (or even per-host_controller) basis. Signed-off-by: Mark Lord <mlord@pobox.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
This commit is contained in:
parent
40f21b1124
commit
2b748a0a34
|
@ -34,10 +34,7 @@
|
||||||
*
|
*
|
||||||
* --> Develop a low-power-consumption strategy, and implement it.
|
* --> Develop a low-power-consumption strategy, and implement it.
|
||||||
*
|
*
|
||||||
* --> [Experiment, low priority] Investigate interrupt coalescing.
|
* --> Add sysfs attributes for per-chip / per-HC IRQ coalescing thresholds.
|
||||||
* Quite often, especially with PCI Message Signalled Interrupts (MSI),
|
|
||||||
* the overhead reduced by interrupt mitigation is quite often not
|
|
||||||
* worth the latency cost.
|
|
||||||
*
|
*
|
||||||
* --> [Experiment, Marvell value added] Is it possible to use target
|
* --> [Experiment, Marvell value added] Is it possible to use target
|
||||||
* mode to cross-connect two Linux boxes with Marvell cards? If so,
|
* mode to cross-connect two Linux boxes with Marvell cards? If so,
|
||||||
|
@ -67,7 +64,7 @@
|
||||||
#include <linux/libata.h>
|
#include <linux/libata.h>
|
||||||
|
|
||||||
#define DRV_NAME "sata_mv"
|
#define DRV_NAME "sata_mv"
|
||||||
#define DRV_VERSION "1.26"
|
#define DRV_VERSION "1.27"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* module options
|
* module options
|
||||||
|
@ -79,6 +76,16 @@ module_param(msi, int, S_IRUGO);
|
||||||
MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)");
|
MODULE_PARM_DESC(msi, "Enable use of PCI MSI (0=off, 1=on)");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int irq_coalescing_io_count;
|
||||||
|
module_param(irq_coalescing_io_count, int, S_IRUGO);
|
||||||
|
MODULE_PARM_DESC(irq_coalescing_io_count,
|
||||||
|
"IRQ coalescing I/O count threshold (0..255)");
|
||||||
|
|
||||||
|
static int irq_coalescing_usecs;
|
||||||
|
module_param(irq_coalescing_usecs, int, S_IRUGO);
|
||||||
|
MODULE_PARM_DESC(irq_coalescing_usecs,
|
||||||
|
"IRQ coalescing time threshold in usecs");
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
/* BAR's are enumerated in terms of pci_resource_start() terms */
|
/* BAR's are enumerated in terms of pci_resource_start() terms */
|
||||||
MV_PRIMARY_BAR = 0, /* offset 0x10: memory space */
|
MV_PRIMARY_BAR = 0, /* offset 0x10: memory space */
|
||||||
|
@ -88,8 +95,33 @@ enum {
|
||||||
MV_MAJOR_REG_AREA_SZ = 0x10000, /* 64KB */
|
MV_MAJOR_REG_AREA_SZ = 0x10000, /* 64KB */
|
||||||
MV_MINOR_REG_AREA_SZ = 0x2000, /* 8KB */
|
MV_MINOR_REG_AREA_SZ = 0x2000, /* 8KB */
|
||||||
|
|
||||||
|
/* For use with both IRQ coalescing methods ("all ports" or "per-HC" */
|
||||||
|
COAL_CLOCKS_PER_USEC = 150, /* for calculating COAL_TIMEs */
|
||||||
|
MAX_COAL_TIME_THRESHOLD = ((1 << 24) - 1), /* internal clocks count */
|
||||||
|
MAX_COAL_IO_COUNT = 255, /* completed I/O count */
|
||||||
|
|
||||||
MV_PCI_REG_BASE = 0,
|
MV_PCI_REG_BASE = 0,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-chip ("all ports") interrupt coalescing feature.
|
||||||
|
* This is only for GEN_II / GEN_IIE hardware.
|
||||||
|
*
|
||||||
|
* Coalescing defers the interrupt until either the IO_THRESHOLD
|
||||||
|
* (count of completed I/Os) is met, or the TIME_THRESHOLD is met.
|
||||||
|
*/
|
||||||
|
MV_COAL_REG_BASE = 0x18000,
|
||||||
|
MV_IRQ_COAL_CAUSE = (MV_COAL_REG_BASE + 0x08),
|
||||||
|
ALL_PORTS_COAL_IRQ = (1 << 4), /* all ports irq event */
|
||||||
|
|
||||||
|
MV_IRQ_COAL_IO_THRESHOLD = (MV_COAL_REG_BASE + 0xcc),
|
||||||
|
MV_IRQ_COAL_TIME_THRESHOLD = (MV_COAL_REG_BASE + 0xd0),
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Registers for the (unused here) transaction coalescing feature:
|
||||||
|
*/
|
||||||
|
MV_TRAN_COAL_CAUSE_LO = (MV_COAL_REG_BASE + 0x88),
|
||||||
|
MV_TRAN_COAL_CAUSE_HI = (MV_COAL_REG_BASE + 0x8c),
|
||||||
|
|
||||||
MV_SATAHC0_REG_BASE = 0x20000,
|
MV_SATAHC0_REG_BASE = 0x20000,
|
||||||
MV_FLASH_CTL_OFS = 0x1046c,
|
MV_FLASH_CTL_OFS = 0x1046c,
|
||||||
MV_GPIO_PORT_CTL_OFS = 0x104f0,
|
MV_GPIO_PORT_CTL_OFS = 0x104f0,
|
||||||
|
@ -186,6 +218,8 @@ enum {
|
||||||
DONE_IRQ = (1 << 1), /* shift by (2 * port #) */
|
DONE_IRQ = (1 << 1), /* shift by (2 * port #) */
|
||||||
HC0_IRQ_PEND = 0x1ff, /* bits 0-8 = HC0's ports */
|
HC0_IRQ_PEND = 0x1ff, /* bits 0-8 = HC0's ports */
|
||||||
HC_SHIFT = 9, /* bits 9-17 = HC1's ports */
|
HC_SHIFT = 9, /* bits 9-17 = HC1's ports */
|
||||||
|
DONE_IRQ_0_3 = 0x000000aa, /* DONE_IRQ ports 0,1,2,3 */
|
||||||
|
DONE_IRQ_4_7 = (DONE_IRQ_0_3 << HC_SHIFT), /* 4,5,6,7 */
|
||||||
PCI_ERR = (1 << 18),
|
PCI_ERR = (1 << 18),
|
||||||
TRAN_COAL_LO_DONE = (1 << 19), /* transaction coalescing */
|
TRAN_COAL_LO_DONE = (1 << 19), /* transaction coalescing */
|
||||||
TRAN_COAL_HI_DONE = (1 << 20), /* transaction coalescing */
|
TRAN_COAL_HI_DONE = (1 << 20), /* transaction coalescing */
|
||||||
|
@ -207,6 +241,16 @@ enum {
|
||||||
HC_COAL_IRQ = (1 << 4), /* IRQ coalescing */
|
HC_COAL_IRQ = (1 << 4), /* IRQ coalescing */
|
||||||
DEV_IRQ = (1 << 8), /* shift by port # */
|
DEV_IRQ = (1 << 8), /* shift by port # */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Per-HC (Host-Controller) interrupt coalescing feature.
|
||||||
|
* This is present on all chip generations.
|
||||||
|
*
|
||||||
|
* Coalescing defers the interrupt until either the IO_THRESHOLD
|
||||||
|
* (count of completed I/Os) is met, or the TIME_THRESHOLD is met.
|
||||||
|
*/
|
||||||
|
HC_IRQ_COAL_IO_THRESHOLD_OFS = 0x000c,
|
||||||
|
HC_IRQ_COAL_TIME_THRESHOLD_OFS = 0x0010,
|
||||||
|
|
||||||
/* Shadow block registers */
|
/* Shadow block registers */
|
||||||
SHD_BLK_OFS = 0x100,
|
SHD_BLK_OFS = 0x100,
|
||||||
SHD_CTL_AST_OFS = 0x20, /* ofs from SHD_BLK_OFS */
|
SHD_CTL_AST_OFS = 0x20, /* ofs from SHD_BLK_OFS */
|
||||||
|
@ -897,6 +941,23 @@ static void mv_set_edma_ptrs(void __iomem *port_mmio,
|
||||||
port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
|
port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mv_write_main_irq_mask(u32 mask, struct mv_host_priv *hpriv)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* When writing to the main_irq_mask in hardware,
|
||||||
|
* we must ensure exclusivity between the interrupt coalescing bits
|
||||||
|
* and the corresponding individual port DONE_IRQ bits.
|
||||||
|
*
|
||||||
|
* Note that this register is really an "IRQ enable" register,
|
||||||
|
* not an "IRQ mask" register as Marvell's naming might suggest.
|
||||||
|
*/
|
||||||
|
if (mask & (ALL_PORTS_COAL_DONE | PORTS_0_3_COAL_DONE))
|
||||||
|
mask &= ~DONE_IRQ_0_3;
|
||||||
|
if (mask & (ALL_PORTS_COAL_DONE | PORTS_4_7_COAL_DONE))
|
||||||
|
mask &= ~DONE_IRQ_4_7;
|
||||||
|
writelfl(mask, hpriv->main_irq_mask_addr);
|
||||||
|
}
|
||||||
|
|
||||||
static void mv_set_main_irq_mask(struct ata_host *host,
|
static void mv_set_main_irq_mask(struct ata_host *host,
|
||||||
u32 disable_bits, u32 enable_bits)
|
u32 disable_bits, u32 enable_bits)
|
||||||
{
|
{
|
||||||
|
@ -907,7 +968,7 @@ static void mv_set_main_irq_mask(struct ata_host *host,
|
||||||
new_mask = (old_mask & ~disable_bits) | enable_bits;
|
new_mask = (old_mask & ~disable_bits) | enable_bits;
|
||||||
if (new_mask != old_mask) {
|
if (new_mask != old_mask) {
|
||||||
hpriv->main_irq_mask = new_mask;
|
hpriv->main_irq_mask = new_mask;
|
||||||
writelfl(new_mask, hpriv->main_irq_mask_addr);
|
mv_write_main_irq_mask(new_mask, hpriv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -948,6 +1009,64 @@ static void mv_clear_and_enable_port_irqs(struct ata_port *ap,
|
||||||
mv_enable_port_irqs(ap, port_irqs);
|
mv_enable_port_irqs(ap, port_irqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mv_set_irq_coalescing(struct ata_host *host,
|
||||||
|
unsigned int count, unsigned int usecs)
|
||||||
|
{
|
||||||
|
struct mv_host_priv *hpriv = host->private_data;
|
||||||
|
void __iomem *mmio = hpriv->base, *hc_mmio;
|
||||||
|
u32 coal_enable = 0;
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned int clks;
|
||||||
|
const u32 coal_disable = PORTS_0_3_COAL_DONE | PORTS_4_7_COAL_DONE |
|
||||||
|
ALL_PORTS_COAL_DONE;
|
||||||
|
|
||||||
|
/* Disable IRQ coalescing if either threshold is zero */
|
||||||
|
if (!usecs || !count) {
|
||||||
|
clks = count = 0;
|
||||||
|
} else {
|
||||||
|
/* Respect maximum limits of the hardware */
|
||||||
|
clks = usecs * COAL_CLOCKS_PER_USEC;
|
||||||
|
if (clks > MAX_COAL_TIME_THRESHOLD)
|
||||||
|
clks = MAX_COAL_TIME_THRESHOLD;
|
||||||
|
if (count > MAX_COAL_IO_COUNT)
|
||||||
|
count = MAX_COAL_IO_COUNT;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock_irqsave(&host->lock, flags);
|
||||||
|
|
||||||
|
#if 0 /* disabled pending functional clarification from Marvell */
|
||||||
|
if (!IS_GEN_I(hpriv)) {
|
||||||
|
/*
|
||||||
|
* GEN_II/GEN_IIE: global thresholds for the entire chip.
|
||||||
|
*/
|
||||||
|
writel(clks, mmio + MV_IRQ_COAL_TIME_THRESHOLD);
|
||||||
|
writel(count, mmio + MV_IRQ_COAL_IO_THRESHOLD);
|
||||||
|
/* clear leftover coal IRQ bit */
|
||||||
|
writelfl(~ALL_PORTS_COAL_IRQ, mmio + MV_IRQ_COAL_CAUSE);
|
||||||
|
clks = count = 0; /* so as to clear the alternate regs below */
|
||||||
|
coal_enable = ALL_PORTS_COAL_DONE;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* All chips: independent thresholds for each HC on the chip.
|
||||||
|
*/
|
||||||
|
hc_mmio = mv_hc_base_from_port(mmio, 0);
|
||||||
|
writel(clks, hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD_OFS);
|
||||||
|
writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD_OFS);
|
||||||
|
coal_enable |= PORTS_0_3_COAL_DONE;
|
||||||
|
if (hpriv->n_ports > 4) {
|
||||||
|
hc_mmio = mv_hc_base_from_port(mmio, MV_PORTS_PER_HC);
|
||||||
|
writel(clks, hc_mmio + HC_IRQ_COAL_TIME_THRESHOLD_OFS);
|
||||||
|
writel(count, hc_mmio + HC_IRQ_COAL_IO_THRESHOLD_OFS);
|
||||||
|
coal_enable |= PORTS_4_7_COAL_DONE;
|
||||||
|
}
|
||||||
|
if (!count)
|
||||||
|
coal_enable = 0;
|
||||||
|
mv_set_main_irq_mask(host, coal_disable, coal_enable);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&host->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mv_start_edma - Enable eDMA engine
|
* mv_start_edma - Enable eDMA engine
|
||||||
* @base: port base address
|
* @base: port base address
|
||||||
|
@ -2500,6 +2619,10 @@ static int mv_host_intr(struct ata_host *host, u32 main_irq_cause)
|
||||||
void __iomem *mmio = hpriv->base, *hc_mmio;
|
void __iomem *mmio = hpriv->base, *hc_mmio;
|
||||||
unsigned int handled = 0, port;
|
unsigned int handled = 0, port;
|
||||||
|
|
||||||
|
/* If asserted, clear the "all ports" IRQ coalescing bit */
|
||||||
|
if (main_irq_cause & ALL_PORTS_COAL_DONE)
|
||||||
|
writel(~ALL_PORTS_COAL_IRQ, mmio + MV_IRQ_COAL_CAUSE);
|
||||||
|
|
||||||
for (port = 0; port < hpriv->n_ports; port++) {
|
for (port = 0; port < hpriv->n_ports; port++) {
|
||||||
struct ata_port *ap = host->ports[port];
|
struct ata_port *ap = host->ports[port];
|
||||||
unsigned int p, shift, hardport, port_cause;
|
unsigned int p, shift, hardport, port_cause;
|
||||||
|
@ -2532,6 +2655,8 @@ static int mv_host_intr(struct ata_host *host, u32 main_irq_cause)
|
||||||
* to ack (only) those ports via hc_irq_cause.
|
* to ack (only) those ports via hc_irq_cause.
|
||||||
*/
|
*/
|
||||||
ack_irqs = 0;
|
ack_irqs = 0;
|
||||||
|
if (hc_cause & PORTS_0_3_COAL_DONE)
|
||||||
|
ack_irqs = HC_COAL_IRQ;
|
||||||
for (p = 0; p < MV_PORTS_PER_HC; ++p) {
|
for (p = 0; p < MV_PORTS_PER_HC; ++p) {
|
||||||
if ((port + p) >= hpriv->n_ports)
|
if ((port + p) >= hpriv->n_ports)
|
||||||
break;
|
break;
|
||||||
|
@ -2620,7 +2745,7 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance)
|
||||||
|
|
||||||
/* for MSI: block new interrupts while in here */
|
/* for MSI: block new interrupts while in here */
|
||||||
if (using_msi)
|
if (using_msi)
|
||||||
writel(0, hpriv->main_irq_mask_addr);
|
mv_write_main_irq_mask(0, hpriv);
|
||||||
|
|
||||||
main_irq_cause = readl(hpriv->main_irq_cause_addr);
|
main_irq_cause = readl(hpriv->main_irq_cause_addr);
|
||||||
pending_irqs = main_irq_cause & hpriv->main_irq_mask;
|
pending_irqs = main_irq_cause & hpriv->main_irq_mask;
|
||||||
|
@ -2637,7 +2762,7 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance)
|
||||||
|
|
||||||
/* for MSI: unmask; interrupt cause bits will retrigger now */
|
/* for MSI: unmask; interrupt cause bits will retrigger now */
|
||||||
if (using_msi)
|
if (using_msi)
|
||||||
writel(hpriv->main_irq_mask, hpriv->main_irq_mask_addr);
|
mv_write_main_irq_mask(hpriv->main_irq_mask, hpriv);
|
||||||
|
|
||||||
spin_unlock(&host->lock);
|
spin_unlock(&host->lock);
|
||||||
|
|
||||||
|
@ -3546,6 +3671,8 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx)
|
||||||
* The per-port interrupts get done later as ports are set up.
|
* The per-port interrupts get done later as ports are set up.
|
||||||
*/
|
*/
|
||||||
mv_set_main_irq_mask(host, 0, PCI_ERR);
|
mv_set_main_irq_mask(host, 0, PCI_ERR);
|
||||||
|
mv_set_irq_coalescing(host, irq_coalescing_io_count,
|
||||||
|
irq_coalescing_usecs);
|
||||||
done:
|
done:
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue