From c36e93a0bdc7df31543a6950454dbd26b56c9015 Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Mon, 3 Jun 2013 14:03:49 +0200 Subject: [PATCH 1/3] spi: mpc512x: minor prep before feature change mechanical edits before changing functionality, to reduce diffs - rename variables to later tell remaining TX and RX apart - use size_t for the current TX and RX length (for better compatibility with the min() macro) - remove unused members from the master data (sysclk, eofbyte) - silence a checkpatch warning (braces around single statement) Signed-off-by: Gerhard Sittig Signed-off-by: Mark Brown --- drivers/spi/spi-mpc512x-psc.c | 43 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index dfddf336912d..759a937d5fa7 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -33,7 +33,6 @@ struct mpc512x_psc_spi { void (*cs_control)(struct spi_device *spi, bool on); - u32 sysclk; /* driver internal data */ struct mpc52xx_psc __iomem *psc; @@ -42,7 +41,6 @@ struct mpc512x_psc_spi { u8 bits_per_word; u8 busy; u32 mclk; - u8 eofbyte; struct workqueue_struct *workqueue; struct work_struct work; @@ -50,7 +48,7 @@ struct mpc512x_psc_spi { struct list_head queue; spinlock_t lock; /* Message queue lock */ - struct completion done; + struct completion txisrdone; }; /* controller state */ @@ -138,7 +136,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master); struct mpc52xx_psc __iomem *psc = mps->psc; struct mpc512x_psc_fifo __iomem *fifo = mps->fifo; - size_t len = t->len; + size_t tx_len = t->len; u8 *tx_buf = (u8 *)t->tx_buf; u8 *rx_buf = (u8 *)t->rx_buf; @@ -152,58 +150,57 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, /* enable transmiter/receiver */ out_8(&psc->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE); - while (len) { - int count; + while (tx_len) { + size_t txcount; int i; u8 data; size_t fifosz; - int rxcount; + size_t rxcount; /* * The number of bytes that can be sent at a time * depends on the fifo size. */ fifosz = MPC512x_PSC_FIFO_SZ(in_be32(&fifo->txsz)); - count = min(fifosz, len); + txcount = min(fifosz, tx_len); - for (i = count; i > 0; i--) { + for (i = txcount; i > 0; i--) { data = tx_buf ? *tx_buf++ : 0; - if (len == EOFBYTE && t->cs_change) + if (tx_len == EOFBYTE && t->cs_change) setbits32(&fifo->txcmd, MPC512x_PSC_FIFO_EOF); out_8(&fifo->txdata_8, data); - len--; + tx_len--; } - INIT_COMPLETION(mps->done); + INIT_COMPLETION(mps->txisrdone); /* interrupt on tx fifo empty */ out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY); - wait_for_completion(&mps->done); + wait_for_completion(&mps->txisrdone); mdelay(1); - /* rx fifo should have count bytes in it */ + /* rx fifo should have txcount bytes in it */ rxcount = in_be32(&fifo->rxcnt); - if (rxcount != count) + if (rxcount != txcount) mdelay(1); rxcount = in_be32(&fifo->rxcnt); - if (rxcount != count) { + if (rxcount != txcount) { dev_warn(&spi->dev, "expected %d bytes in rx fifo " - "but got %d\n", count, rxcount); + "but got %d\n", txcount, rxcount); } - rxcount = min(rxcount, count); + rxcount = min(rxcount, txcount); for (i = rxcount; i > 0; i--) { data = in_8(&fifo->rxdata_8); if (rx_buf) *rx_buf++ = data; } - while (in_be32(&fifo->rxcnt)) { + while (in_be32(&fifo->rxcnt)) in_8(&fifo->rxdata_8); - } } /* disable transmiter/receiver and fifo interrupt */ out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE); @@ -412,7 +409,7 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id) in_be32(&fifo->tximr) & MPC512x_PSC_FIFO_EMPTY) { out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); out_be32(&fifo->tximr, 0); - complete(&mps->done); + complete(&mps->txisrdone); return IRQ_HANDLED; } return IRQ_NONE; @@ -444,11 +441,9 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, if (pdata == NULL) { mps->cs_control = mpc512x_spi_cs_control; - mps->sysclk = 0; master->bus_num = bus_num; } else { mps->cs_control = pdata->cs_control; - mps->sysclk = pdata->sysclk; master->bus_num = pdata->bus_num; master->num_chipselect = pdata->max_chipselect; } @@ -479,7 +474,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, goto free_irq; spin_lock_init(&mps->lock); - init_completion(&mps->done); + init_completion(&mps->txisrdone); INIT_WORK(&mps->work, mpc512x_psc_spi_work); INIT_LIST_HEAD(&mps->queue); From 5df24ea63d1eb1b3b0556817bdaf378c19f196ea Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Mon, 3 Jun 2013 14:03:50 +0200 Subject: [PATCH 2/3] spi: mpc512x: improve throughput in the RX/TX func change the MPC512x SPI controller's transmission routine to increase throughput: allow the RX byte counter to "lag behind" the TX byte counter while iterating over the transfer's data, only wait for the remaining RX bytes at the very end of the transfer this approach eliminates delays in the milliseconds range, transfer times for e.g. 16MB of SPI flash data dropped from 31s to 9s, correct operation was tested by continuously transferring and comparing data from an SPI flash (more than 200GB in some 45 hours) background information on the motivation: one might assume that all the RX data should have been received when the TX data was sent, given the fact that we are the SPI master and provide all of the clock, but in practise there's a difference the ISR is triggered when the TX FIFO became empty, while transmission of the last item still occurs (from the TX hold and shift registers), sampling RX data on the opposite clock edge compared to the TX data adds another delay (half a bit period), and RX data needs to propagate from the reception buffer to the RX FIFO depending on the specific SoC implementation to cut it short: a difference between TX and RX byte counters during transmission is not just acceptable but should be considered the regular case, only the very end of the transfer needs to make sure that all of the RX data was received before deasserting the chip select and telling the caller that transmission has completed Signed-off-by: Gerhard Sittig Signed-off-by: Mark Brown --- drivers/spi/spi-mpc512x-psc.c | 137 ++++++++++++++++++++++++++-------- 1 file changed, 105 insertions(+), 32 deletions(-) diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 759a937d5fa7..53c7899a8aba 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -137,6 +137,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, struct mpc52xx_psc __iomem *psc = mps->psc; struct mpc512x_psc_fifo __iomem *fifo = mps->fifo; size_t tx_len = t->len; + size_t rx_len = t->len; u8 *tx_buf = (u8 *)t->tx_buf; u8 *rx_buf = (u8 *)t->rx_buf; @@ -150,57 +151,129 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, /* enable transmiter/receiver */ out_8(&psc->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE); - while (tx_len) { + while (rx_len || tx_len) { size_t txcount; - int i; u8 data; size_t fifosz; size_t rxcount; + int rxtries; /* - * The number of bytes that can be sent at a time - * depends on the fifo size. + * send the TX bytes in as large a chunk as possible + * but neither exceed the TX nor the RX FIFOs */ fifosz = MPC512x_PSC_FIFO_SZ(in_be32(&fifo->txsz)); txcount = min(fifosz, tx_len); + fifosz = MPC512x_PSC_FIFO_SZ(in_be32(&fifo->rxsz)); + fifosz -= in_be32(&fifo->rxcnt) + 1; + txcount = min(fifosz, txcount); + if (txcount) { - for (i = txcount; i > 0; i--) { - data = tx_buf ? *tx_buf++ : 0; - if (tx_len == EOFBYTE && t->cs_change) - setbits32(&fifo->txcmd, MPC512x_PSC_FIFO_EOF); - out_8(&fifo->txdata_8, data); - tx_len--; + /* fill the TX FIFO */ + while (txcount-- > 0) { + data = tx_buf ? *tx_buf++ : 0; + if (tx_len == EOFBYTE && t->cs_change) + setbits32(&fifo->txcmd, + MPC512x_PSC_FIFO_EOF); + out_8(&fifo->txdata_8, data); + tx_len--; + } + + /* have the ISR trigger when the TX FIFO is empty */ + INIT_COMPLETION(mps->txisrdone); + out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); + out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY); + wait_for_completion(&mps->txisrdone); } - INIT_COMPLETION(mps->txisrdone); + /* + * consume as much RX data as the FIFO holds, while we + * iterate over the transfer's TX data length + * + * only insist in draining all the remaining RX bytes + * when the TX bytes were exhausted (that's at the very + * end of this transfer, not when still iterating over + * the transfer's chunks) + */ + rxtries = 50; + do { - /* interrupt on tx fifo empty */ - out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); - out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY); + /* + * grab whatever was in the FIFO when we started + * looking, don't bother fetching what was added to + * the FIFO while we read from it -- we'll return + * here eventually and prefer sending out remaining + * TX data + */ + fifosz = in_be32(&fifo->rxcnt); + rxcount = min(fifosz, rx_len); + while (rxcount-- > 0) { + data = in_8(&fifo->rxdata_8); + if (rx_buf) + *rx_buf++ = data; + rx_len--; + } - wait_for_completion(&mps->txisrdone); + /* + * come back later if there still is TX data to send, + * bail out of the RX drain loop if all of the TX data + * was sent and all of the RX data was received (i.e. + * when the transmission has completed) + */ + if (tx_len) + break; + if (!rx_len) + break; - mdelay(1); + /* + * TX data transmission has completed while RX data + * is still pending -- that's a transient situation + * which depends on wire speed and specific + * hardware implementation details (buffering) yet + * should resolve very quickly + * + * just yield for a moment to not hog the CPU for + * too long when running SPI at low speed + * + * the timeout range is rather arbitrary and tries + * to balance throughput against system load; the + * chosen values result in a minimal timeout of 50 + * times 10us and thus work at speeds as low as + * some 20kbps, while the maximum timeout at the + * transfer's end could be 5ms _if_ nothing else + * ticks in the system _and_ RX data still wasn't + * received, which only occurs in situations that + * are exceptional; removing the unpredictability + * of the timeout either decreases throughput + * (longer timeouts), or puts more load on the + * system (fixed short timeouts) or requires the + * use of a timeout API instead of a counter and an + * unknown inner delay + */ + usleep_range(10, 100); - /* rx fifo should have txcount bytes in it */ - rxcount = in_be32(&fifo->rxcnt); - if (rxcount != txcount) - mdelay(1); - - rxcount = in_be32(&fifo->rxcnt); - if (rxcount != txcount) { - dev_warn(&spi->dev, "expected %d bytes in rx fifo " - "but got %d\n", txcount, rxcount); + } while (--rxtries > 0); + if (!tx_len && rx_len && !rxtries) { + /* + * not enough RX bytes even after several retries + * and the resulting rather long timeout? + */ + rxcount = in_be32(&fifo->rxcnt); + dev_warn(&spi->dev, + "short xfer, missing %zd RX bytes, FIFO level %zd\n", + rx_len, rxcount); } - rxcount = min(rxcount, txcount); - for (i = rxcount; i > 0; i--) { - data = in_8(&fifo->rxdata_8); - if (rx_buf) - *rx_buf++ = data; + /* + * drain and drop RX data which "should not be there" in + * the first place, for undisturbed transmission this turns + * into a NOP (except for the FIFO level fetch) + */ + if (!tx_len && !rx_len) { + while (in_be32(&fifo->rxcnt)) + in_8(&fifo->rxdata_8); } - while (in_be32(&fifo->rxcnt)) - in_8(&fifo->rxdata_8); + } /* disable transmiter/receiver and fifo interrupt */ out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE); From 85085898ab0fefc6e181d3b495dc34034dda2fee Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Mon, 3 Jun 2013 14:03:51 +0200 Subject: [PATCH 3/3] spi: mpc512x: use the SPI subsystem's message queue the SPI subsystem recently grew support to queue messages before handing them to the SPI master, and erroneously emitted deprecation warnings when the SPI master's driver did not use the common logic (in fact the master might queue messages, but implement the queue in the master driver's source) [ 0.823015] mpc512x-psc-spi 80011400.psc: master is unqueued, this is deprecated [ 0.854913] mpc512x-psc-spi 80011500.psc: master is unqueued, this is deprecated this change makes the MPC512x PSC SPI driver use the SPI subsystem's support to queue SPI messages and removes the master driver's private code for the queue support Signed-off-by: Gerhard Sittig Signed-off-by: Mark Brown --- drivers/spi/spi-mpc512x-psc.c | 185 ++++++++++++++-------------------- 1 file changed, 74 insertions(+), 111 deletions(-) diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 53c7899a8aba..29fce6af5145 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -39,15 +38,8 @@ struct mpc512x_psc_spi { struct mpc512x_psc_fifo __iomem *fifo; unsigned int irq; u8 bits_per_word; - u8 busy; u32 mclk; - struct workqueue_struct *workqueue; - struct work_struct work; - - struct list_head queue; - spinlock_t lock; /* Message queue lock */ - struct completion txisrdone; }; @@ -134,7 +126,6 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, struct spi_transfer *t) { struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master); - struct mpc52xx_psc __iomem *psc = mps->psc; struct mpc512x_psc_fifo __iomem *fifo = mps->fifo; size_t tx_len = t->len; size_t rx_len = t->len; @@ -144,13 +135,6 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, if (!tx_buf && !rx_buf && t->len) return -EINVAL; - /* Zero MR2 */ - in_8(&psc->mode); - out_8(&psc->mode, 0x0); - - /* enable transmiter/receiver */ - out_8(&psc->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE); - while (rx_len || tx_len) { size_t txcount; u8 data; @@ -275,76 +259,90 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, } } - /* disable transmiter/receiver and fifo interrupt */ - out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE); - out_be32(&fifo->tximr, 0); return 0; } -static void mpc512x_psc_spi_work(struct work_struct *work) +static int mpc512x_psc_spi_msg_xfer(struct spi_master *master, + struct spi_message *m) { - struct mpc512x_psc_spi *mps = container_of(work, - struct mpc512x_psc_spi, - work); + struct spi_device *spi; + unsigned cs_change; + int status; + struct spi_transfer *t; - spin_lock_irq(&mps->lock); - mps->busy = 1; - while (!list_empty(&mps->queue)) { - struct spi_message *m; - struct spi_device *spi; - struct spi_transfer *t = NULL; - unsigned cs_change; - int status; - - m = container_of(mps->queue.next, struct spi_message, queue); - list_del_init(&m->queue); - spin_unlock_irq(&mps->lock); - - spi = m->spi; - cs_change = 1; - status = 0; - list_for_each_entry(t, &m->transfers, transfer_list) { - if (t->bits_per_word || t->speed_hz) { - status = mpc512x_psc_spi_transfer_setup(spi, t); - if (status < 0) - break; - } - - if (cs_change) - mpc512x_psc_spi_activate_cs(spi); - cs_change = t->cs_change; - - status = mpc512x_psc_spi_transfer_rxtx(spi, t); - if (status) + spi = m->spi; + cs_change = 1; + status = 0; + list_for_each_entry(t, &m->transfers, transfer_list) { + if (t->bits_per_word || t->speed_hz) { + status = mpc512x_psc_spi_transfer_setup(spi, t); + if (status < 0) break; - m->actual_length += t->len; - - if (t->delay_usecs) - udelay(t->delay_usecs); - - if (cs_change) - mpc512x_psc_spi_deactivate_cs(spi); } - m->status = status; - m->complete(m->context); + if (cs_change) + mpc512x_psc_spi_activate_cs(spi); + cs_change = t->cs_change; - if (status || !cs_change) + status = mpc512x_psc_spi_transfer_rxtx(spi, t); + if (status) + break; + m->actual_length += t->len; + + if (t->delay_usecs) + udelay(t->delay_usecs); + + if (cs_change) mpc512x_psc_spi_deactivate_cs(spi); - - mpc512x_psc_spi_transfer_setup(spi, NULL); - - spin_lock_irq(&mps->lock); } - mps->busy = 0; - spin_unlock_irq(&mps->lock); + + m->status = status; + m->complete(m->context); + + if (status || !cs_change) + mpc512x_psc_spi_deactivate_cs(spi); + + mpc512x_psc_spi_transfer_setup(spi, NULL); + + spi_finalize_current_message(master); + return status; +} + +static int mpc512x_psc_spi_prep_xfer_hw(struct spi_master *master) +{ + struct mpc512x_psc_spi *mps = spi_master_get_devdata(master); + struct mpc52xx_psc __iomem *psc = mps->psc; + + dev_dbg(&master->dev, "%s()\n", __func__); + + /* Zero MR2 */ + in_8(&psc->mode); + out_8(&psc->mode, 0x0); + + /* enable transmitter/receiver */ + out_8(&psc->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE); + + return 0; +} + +static int mpc512x_psc_spi_unprep_xfer_hw(struct spi_master *master) +{ + struct mpc512x_psc_spi *mps = spi_master_get_devdata(master); + struct mpc52xx_psc __iomem *psc = mps->psc; + struct mpc512x_psc_fifo __iomem *fifo = mps->fifo; + + dev_dbg(&master->dev, "%s()\n", __func__); + + /* disable transmitter/receiver and fifo interrupt */ + out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE); + out_be32(&fifo->tximr, 0); + + return 0; } static int mpc512x_psc_spi_setup(struct spi_device *spi) { - struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master); struct mpc512x_psc_spi_cs *cs = spi->controller_state; - unsigned long flags; int ret; if (spi->bits_per_word % 8) @@ -373,28 +371,6 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi) cs->bits_per_word = spi->bits_per_word; cs->speed_hz = spi->max_speed_hz; - spin_lock_irqsave(&mps->lock, flags); - if (!mps->busy) - mpc512x_psc_spi_deactivate_cs(spi); - spin_unlock_irqrestore(&mps->lock, flags); - - return 0; -} - -static int mpc512x_psc_spi_transfer(struct spi_device *spi, - struct spi_message *m) -{ - struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master); - unsigned long flags; - - m->actual_length = 0; - m->status = -EINPROGRESS; - - spin_lock_irqsave(&mps->lock, flags); - list_add_tail(&m->queue, &mps->queue); - queue_work(mps->workqueue, &mps->work); - spin_unlock_irqrestore(&mps->lock, flags); - return 0; } @@ -477,7 +453,7 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id) struct mpc512x_psc_spi *mps = (struct mpc512x_psc_spi *)dev_id; struct mpc512x_psc_fifo __iomem *fifo = mps->fifo; - /* clear interrupt and wake up the work queue */ + /* clear interrupt and wake up the rx/tx routine */ if (in_be32(&fifo->txisr) & in_be32(&fifo->tximr) & MPC512x_PSC_FIFO_EMPTY) { out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); @@ -523,7 +499,9 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LSB_FIRST; master->setup = mpc512x_psc_spi_setup; - master->transfer = mpc512x_psc_spi_transfer; + master->prepare_transfer_hardware = mpc512x_psc_spi_prep_xfer_hw; + master->transfer_one_message = mpc512x_psc_spi_msg_xfer; + master->unprepare_transfer_hardware = mpc512x_psc_spi_unprep_xfer_hw; master->cleanup = mpc512x_psc_spi_cleanup; master->dev.of_node = dev->of_node; @@ -541,31 +519,18 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr, "mpc512x-psc-spi", mps); if (ret) goto free_master; + init_completion(&mps->txisrdone); ret = mpc512x_psc_spi_port_config(master, mps); if (ret < 0) goto free_irq; - spin_lock_init(&mps->lock); - init_completion(&mps->txisrdone); - INIT_WORK(&mps->work, mpc512x_psc_spi_work); - INIT_LIST_HEAD(&mps->queue); - - mps->workqueue = - create_singlethread_workqueue(dev_name(master->dev.parent)); - if (mps->workqueue == NULL) { - ret = -EBUSY; - goto free_irq; - } - ret = spi_register_master(master); if (ret < 0) - goto unreg_master; + goto free_irq; return ret; -unreg_master: - destroy_workqueue(mps->workqueue); free_irq: free_irq(mps->irq, mps); free_master: @@ -581,8 +546,6 @@ static int mpc512x_psc_spi_do_remove(struct device *dev) struct spi_master *master = spi_master_get(dev_get_drvdata(dev)); struct mpc512x_psc_spi *mps = spi_master_get_devdata(master); - flush_workqueue(mps->workqueue); - destroy_workqueue(mps->workqueue); spi_unregister_master(master); free_irq(mps->irq, mps); if (mps->psc)