Merge branch 'cxgb4-speed-up-reading-on-chip-memory'
Rahul Lakkireddy says: ==================== cxgb4: speed up reading on-chip memory This series of patches speed up reading on-chip memory (EDC and MC) by reading 64-bits at a time. Patch 1 reworks logic to read EDC and MC. Patch 2 adds logic to read EDC and MC 64-bits at a time. v2: - Dropped AVX CPU intrinsic instructions. - Use readq() to read 64-bits at a time. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
a92ac140fc
|
@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
|
||||||
&payload->start, &payload->end);
|
&payload->start, &payload->end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
|
||||||
|
int mtype, u32 addr, u32 len, void *hbuf)
|
||||||
|
{
|
||||||
|
u32 win_pf, memoffset, mem_aperture, mem_base;
|
||||||
|
struct adapter *adap = pdbg_init->adap;
|
||||||
|
u32 pos, offset, resid;
|
||||||
|
u32 *res_buf;
|
||||||
|
u64 *buf;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Argument sanity checks ...
|
||||||
|
*/
|
||||||
|
if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
buf = (u64 *)hbuf;
|
||||||
|
|
||||||
|
/* Try to do 64-bit reads. Residual will be handled later. */
|
||||||
|
resid = len & 0x7;
|
||||||
|
len -= resid;
|
||||||
|
|
||||||
|
ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
|
||||||
|
&mem_aperture);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
addr = addr + memoffset;
|
||||||
|
win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
|
||||||
|
|
||||||
|
pos = addr & ~(mem_aperture - 1);
|
||||||
|
offset = addr - pos;
|
||||||
|
|
||||||
|
/* Set up initial PCI-E Memory Window to cover the start of our
|
||||||
|
* transfer.
|
||||||
|
*/
|
||||||
|
t4_memory_update_win(adap, win, pos | win_pf);
|
||||||
|
|
||||||
|
/* Transfer data from the adapter */
|
||||||
|
while (len > 0) {
|
||||||
|
*buf++ = le64_to_cpu((__force __le64)
|
||||||
|
t4_read_reg64(adap, mem_base + offset));
|
||||||
|
offset += sizeof(u64);
|
||||||
|
len -= sizeof(u64);
|
||||||
|
|
||||||
|
/* If we've reached the end of our current window aperture,
|
||||||
|
* move the PCI-E Memory Window on to the next.
|
||||||
|
*/
|
||||||
|
if (offset == mem_aperture) {
|
||||||
|
pos += mem_aperture;
|
||||||
|
offset = 0;
|
||||||
|
t4_memory_update_win(adap, win, pos | win_pf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res_buf = (u32 *)buf;
|
||||||
|
/* Read residual in 32-bit multiples */
|
||||||
|
while (resid > sizeof(u32)) {
|
||||||
|
*res_buf++ = le32_to_cpu((__force __le32)
|
||||||
|
t4_read_reg(adap, mem_base + offset));
|
||||||
|
offset += sizeof(u32);
|
||||||
|
resid -= sizeof(u32);
|
||||||
|
|
||||||
|
/* If we've reached the end of our current window aperture,
|
||||||
|
* move the PCI-E Memory Window on to the next.
|
||||||
|
*/
|
||||||
|
if (offset == mem_aperture) {
|
||||||
|
pos += mem_aperture;
|
||||||
|
offset = 0;
|
||||||
|
t4_memory_update_win(adap, win, pos | win_pf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transfer residual < 32-bits */
|
||||||
|
if (resid)
|
||||||
|
t4_memory_rw_residual(adap, resid, mem_base + offset,
|
||||||
|
(u8 *)res_buf, T4_MEMORY_READ);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#define CUDBG_YIELD_ITERATION 256
|
#define CUDBG_YIELD_ITERATION 256
|
||||||
|
|
||||||
static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
|
static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
|
||||||
|
@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
|
||||||
goto skip_read;
|
goto skip_read;
|
||||||
|
|
||||||
spin_lock(&padap->win0_lock);
|
spin_lock(&padap->win0_lock);
|
||||||
rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
|
rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
|
||||||
bytes_read, bytes,
|
bytes_read, bytes, temp_buff.data);
|
||||||
(__be32 *)temp_buff.data,
|
|
||||||
1);
|
|
||||||
spin_unlock(&padap->win0_lock);
|
spin_unlock(&padap->win0_lock);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
cudbg_err->sys_err = rc;
|
cudbg_err->sys_err = rc;
|
||||||
|
|
|
@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
|
||||||
u32 t4_get_util_window(struct adapter *adap);
|
u32 t4_get_util_window(struct adapter *adap);
|
||||||
void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
|
void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
|
||||||
|
|
||||||
|
int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
|
||||||
|
u32 *mem_base, u32 *mem_aperture);
|
||||||
|
void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
|
||||||
|
void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
|
||||||
|
int dir);
|
||||||
#define T4_MEMORY_WRITE 0
|
#define T4_MEMORY_WRITE 0
|
||||||
#define T4_MEMORY_READ 1
|
#define T4_MEMORY_READ 1
|
||||||
int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
|
int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
|
||||||
|
|
|
@ -483,6 +483,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t4_memory_rw_init - Get memory window relative offset, base, and size.
|
||||||
|
* @adap: the adapter
|
||||||
|
* @win: PCI-E Memory Window to use
|
||||||
|
* @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
|
||||||
|
* @mem_off: memory relative offset with respect to @mtype.
|
||||||
|
* @mem_base: configured memory base address.
|
||||||
|
* @mem_aperture: configured memory window aperture.
|
||||||
|
*
|
||||||
|
* Get the configured memory window's relative offset, base, and size.
|
||||||
|
*/
|
||||||
|
int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
|
||||||
|
u32 *mem_base, u32 *mem_aperture)
|
||||||
|
{
|
||||||
|
u32 edc_size, mc_size, mem_reg;
|
||||||
|
|
||||||
|
/* Offset into the region of memory which is being accessed
|
||||||
|
* MEM_EDC0 = 0
|
||||||
|
* MEM_EDC1 = 1
|
||||||
|
* MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
|
||||||
|
* MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
|
||||||
|
* MEM_HMA = 4
|
||||||
|
*/
|
||||||
|
edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
|
||||||
|
if (mtype == MEM_HMA) {
|
||||||
|
*mem_off = 2 * (edc_size * 1024 * 1024);
|
||||||
|
} else if (mtype != MEM_MC1) {
|
||||||
|
*mem_off = (mtype * (edc_size * 1024 * 1024));
|
||||||
|
} else {
|
||||||
|
mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
|
||||||
|
MA_EXT_MEMORY0_BAR_A));
|
||||||
|
*mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Each PCI-E Memory Window is programmed with a window size -- or
|
||||||
|
* "aperture" -- which controls the granularity of its mapping onto
|
||||||
|
* adapter memory. We need to grab that aperture in order to know
|
||||||
|
* how to use the specified window. The window is also programmed
|
||||||
|
* with the base address of the Memory Window in BAR0's address
|
||||||
|
* space. For T4 this is an absolute PCI-E Bus Address. For T5
|
||||||
|
* the address is relative to BAR0.
|
||||||
|
*/
|
||||||
|
mem_reg = t4_read_reg(adap,
|
||||||
|
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
|
||||||
|
win));
|
||||||
|
/* a dead adapter will return 0xffffffff for PIO reads */
|
||||||
|
if (mem_reg == 0xffffffff)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
*mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
|
||||||
|
*mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
|
||||||
|
if (is_t4(adap->params.chip))
|
||||||
|
*mem_base -= adap->t4_bar0;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t4_memory_update_win - Move memory window to specified address.
|
||||||
|
* @adap: the adapter
|
||||||
|
* @win: PCI-E Memory Window to use
|
||||||
|
* @addr: location to move.
|
||||||
|
*
|
||||||
|
* Move memory window to specified address.
|
||||||
|
*/
|
||||||
|
void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
|
||||||
|
{
|
||||||
|
t4_write_reg(adap,
|
||||||
|
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
|
||||||
|
addr);
|
||||||
|
/* Read it back to ensure that changes propagate before we
|
||||||
|
* attempt to use the new value.
|
||||||
|
*/
|
||||||
|
t4_read_reg(adap,
|
||||||
|
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t4_memory_rw_residual - Read/Write residual data.
|
||||||
|
* @adap: the adapter
|
||||||
|
* @off: relative offset within residual to start read/write.
|
||||||
|
* @addr: address within indicated memory type.
|
||||||
|
* @buf: host memory buffer
|
||||||
|
* @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
|
||||||
|
*
|
||||||
|
* Read/Write residual data less than 32-bits.
|
||||||
|
*/
|
||||||
|
void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
|
||||||
|
int dir)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
u32 word;
|
||||||
|
char byte[4];
|
||||||
|
} last;
|
||||||
|
unsigned char *bp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (dir == T4_MEMORY_READ) {
|
||||||
|
last.word = le32_to_cpu((__force __le32)
|
||||||
|
t4_read_reg(adap, addr));
|
||||||
|
for (bp = (unsigned char *)buf, i = off; i < 4; i++)
|
||||||
|
bp[i] = last.byte[i];
|
||||||
|
} else {
|
||||||
|
last.word = *buf;
|
||||||
|
for (i = off; i < 4; i++)
|
||||||
|
last.byte[i] = 0;
|
||||||
|
t4_write_reg(adap, addr,
|
||||||
|
(__force u32)cpu_to_le32(last.word));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
|
* t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
|
||||||
* @adap: the adapter
|
* @adap: the adapter
|
||||||
|
@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
|
||||||
u32 len, void *hbuf, int dir)
|
u32 len, void *hbuf, int dir)
|
||||||
{
|
{
|
||||||
u32 pos, offset, resid, memoffset;
|
u32 pos, offset, resid, memoffset;
|
||||||
u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
|
u32 win_pf, mem_aperture, mem_base;
|
||||||
u32 *buf;
|
u32 *buf;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* Argument sanity checks ...
|
/* Argument sanity checks ...
|
||||||
*/
|
*/
|
||||||
|
@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
|
||||||
resid = len & 0x3;
|
resid = len & 0x3;
|
||||||
len -= resid;
|
len -= resid;
|
||||||
|
|
||||||
/* Offset into the region of memory which is being accessed
|
ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
|
||||||
* MEM_EDC0 = 0
|
&mem_aperture);
|
||||||
* MEM_EDC1 = 1
|
if (ret)
|
||||||
* MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller
|
return ret;
|
||||||
* MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5)
|
|
||||||
* MEM_HMA = 4
|
|
||||||
*/
|
|
||||||
edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
|
|
||||||
if (mtype == MEM_HMA) {
|
|
||||||
memoffset = 2 * (edc_size * 1024 * 1024);
|
|
||||||
} else if (mtype != MEM_MC1) {
|
|
||||||
memoffset = (mtype * (edc_size * 1024 * 1024));
|
|
||||||
} else {
|
|
||||||
mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
|
|
||||||
MA_EXT_MEMORY0_BAR_A));
|
|
||||||
memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Determine the PCIE_MEM_ACCESS_OFFSET */
|
/* Determine the PCIE_MEM_ACCESS_OFFSET */
|
||||||
addr = addr + memoffset;
|
addr = addr + memoffset;
|
||||||
|
|
||||||
/* Each PCI-E Memory Window is programmed with a window size -- or
|
|
||||||
* "aperture" -- which controls the granularity of its mapping onto
|
|
||||||
* adapter memory. We need to grab that aperture in order to know
|
|
||||||
* how to use the specified window. The window is also programmed
|
|
||||||
* with the base address of the Memory Window in BAR0's address
|
|
||||||
* space. For T4 this is an absolute PCI-E Bus Address. For T5
|
|
||||||
* the address is relative to BAR0.
|
|
||||||
*/
|
|
||||||
mem_reg = t4_read_reg(adap,
|
|
||||||
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
|
|
||||||
win));
|
|
||||||
mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
|
|
||||||
mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
|
|
||||||
if (is_t4(adap->params.chip))
|
|
||||||
mem_base -= adap->t4_bar0;
|
|
||||||
win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
|
win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
|
||||||
|
|
||||||
/* Calculate our initial PCI-E Memory Window Position and Offset into
|
/* Calculate our initial PCI-E Memory Window Position and Offset into
|
||||||
* that Window.
|
* that Window.
|
||||||
*/
|
*/
|
||||||
pos = addr & ~(mem_aperture-1);
|
pos = addr & ~(mem_aperture - 1);
|
||||||
offset = addr - pos;
|
offset = addr - pos;
|
||||||
|
|
||||||
/* Set up initial PCI-E Memory Window to cover the start of our
|
/* Set up initial PCI-E Memory Window to cover the start of our
|
||||||
* transfer. (Read it back to ensure that changes propagate before we
|
* transfer.
|
||||||
* attempt to use the new value.)
|
|
||||||
*/
|
*/
|
||||||
t4_write_reg(adap,
|
t4_memory_update_win(adap, win, pos | win_pf);
|
||||||
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
|
|
||||||
pos | win_pf);
|
|
||||||
t4_read_reg(adap,
|
|
||||||
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
|
|
||||||
|
|
||||||
/* Transfer data to/from the adapter as long as there's an integral
|
/* Transfer data to/from the adapter as long as there's an integral
|
||||||
* number of 32-bit transfers to complete.
|
* number of 32-bit transfers to complete.
|
||||||
|
@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
|
||||||
if (offset == mem_aperture) {
|
if (offset == mem_aperture) {
|
||||||
pos += mem_aperture;
|
pos += mem_aperture;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
t4_write_reg(adap,
|
t4_memory_update_win(adap, win, pos | win_pf);
|
||||||
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
|
|
||||||
win), pos | win_pf);
|
|
||||||
t4_read_reg(adap,
|
|
||||||
PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
|
|
||||||
win));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
|
||||||
* residual amount. The PCI-E Memory Window has already been moved
|
* residual amount. The PCI-E Memory Window has already been moved
|
||||||
* above (if necessary) to cover this final transfer.
|
* above (if necessary) to cover this final transfer.
|
||||||
*/
|
*/
|
||||||
if (resid) {
|
if (resid)
|
||||||
union {
|
t4_memory_rw_residual(adap, resid, mem_base + offset,
|
||||||
u32 word;
|
(u8 *)buf, dir);
|
||||||
char byte[4];
|
|
||||||
} last;
|
|
||||||
unsigned char *bp;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
if (dir == T4_MEMORY_READ) {
|
|
||||||
last.word = le32_to_cpu(
|
|
||||||
(__force __le32)t4_read_reg(adap,
|
|
||||||
mem_base + offset));
|
|
||||||
for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
|
|
||||||
bp[i] = last.byte[i];
|
|
||||||
} else {
|
|
||||||
last.word = *buf;
|
|
||||||
for (i = resid; i < 4; i++)
|
|
||||||
last.byte[i] = 0;
|
|
||||||
t4_write_reg(adap, mem_base + offset,
|
|
||||||
(__force u32)cpu_to_le32(last.word));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue