From 35b1de5579704be0e03454f713dddd6f86eccb7e Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 27 Jun 2014 19:23:47 +0530 Subject: [PATCH 1/5] rdma/cxgb4: Fixes cxgb4 probe failure in VM when PF is exposed through PCI Passthrough Change logic which determines our Physical Function at PCI Probe time. Now we read the PL_WHOAMI register and get the Physical Function. Pass Physical Function to Upper Layer Drivers in lld_info structure in the new field "pf" added to lld_info. This is useful for the cases where the PF, say PF4, is attached to a Virtual Machine via some form of "PCI Pass Through" technology and the PCI Function shows up as PF0 in the VM. Based on original work by Casey Leedom Signed-off-by: Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 18 ++++++++++-------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5e153f6d4b48..d62a0f9dd11a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -465,7 +465,8 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) 16)) | FW_WR_FLOWID(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; - flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8); + flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN + (ep->com.dev->rdev.lldi.pf)); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index b7154dc9936c..e8e77b806c7a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4053,6 +4053,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) unsigned short i; lli.pdev = adap->pdev; + lli.pf = adap->fn; lli.l2t = adap->l2t; lli.tids = &adap->tids; lli.ports = adap->port; @@ -6294,13 +6295,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return err; } - /* We control everything through one PF */ - func = PCI_FUNC(pdev->devfn); - if (func != ent->driver_data) { - pci_save_state(pdev); /* to restore SR-IOV later */ - goto sriov; - } - err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "cannot enable PCI device\n"); @@ -6344,6 +6338,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; } + /* We control everything through one PF */ + func = SOURCEPF_GET(readl(adapter->regs + PL_WHOAMI)); + if ((pdev->device == 0xa000 && func != 0) || + func != ent->driver_data) { + pci_save_state(pdev); /* to restore SR-IOV later */ + err = 0; + goto out_unmap_bar0; + } + adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; adapter->mbox = func; @@ -6507,7 +6510,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (is_offload(adapter)) attach_ulds(adapter); -sriov: #ifdef CONFIG_PCI_IOV if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) if (pci_enable_sriov(pdev, num_vf[func]) == 0) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index d719decbfb28..8f60851b75ad 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -253,6 +253,7 @@ struct cxgb4_lld_info { int dbfifo_int_thresh; /* doorbell fifo int threshold */ unsigned int sge_pktshift; /* Padding between CPL and */ /* packet data */ + unsigned int pf; /* Physical Function we're using */ bool enable_fw_ofld_conn; /* Enable connection through fw */ /* WR */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ From 0abfd1524b655f00597d419c8e63d06ebf637372 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 27 Jun 2014 19:23:48 +0530 Subject: [PATCH 2/5] cxgb4: Use FW interface to get BAR0 value Use the firmware interface to get the BAR0 value since we really don't want to use the PCI-E Configuration Space Backdoor access which is owned by the firmware. Set up PCI-E Memory Window registers using the true values programmed into BAR registers. When the PF4 "Master Function" is exported to a Virtual Machine, the values returned by pci_resource_start() will be for the synthetic PCI-E Configuration Space and not the real addresses. But we need to program the PCI-E Memory Window address decoders with the real addresses that we're going to be using in order to have accesses through the Memory Windows work. Based on origninal work by Casey Leedom Signed-off-by: Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 5 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 72 +++++++++++++++++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 24 +++++++ drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 5 ++ 4 files changed, 98 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 7b5425c9c0fb..4fd215185f93 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -85,7 +85,8 @@ enum { MEMWIN1_BASE_T5 = 0x52000, MEMWIN2_APERTURE = 65536, MEMWIN2_BASE = 0x30000, - MEMWIN2_BASE_T5 = 0x54000, + MEMWIN2_APERTURE_T5 = 131072, + MEMWIN2_BASE_T5 = 0x60000, }; enum dev_master { @@ -608,6 +609,7 @@ struct l2t_data; struct adapter { void __iomem *regs; void __iomem *bar2; + u32 t4_bar0; struct pci_dev *pdev; struct device *pdev_dev; unsigned int mbox; @@ -946,6 +948,7 @@ void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx); +void t4_hw_pci_read_cfg4(struct adapter *adapter, int reg, u32 *val); struct fw_filter_wr; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e8e77b806c7a..524a8b2894e7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4773,20 +4773,75 @@ void t4_fatal_err(struct adapter *adap) dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n"); } +/* Return the specified PCI-E Configuration Space register from our Physical + * Function. We try first via a Firmware LDST Command since we prefer to let + * the firmware own all of these registers, but if that fails we go for it + * directly ourselves. + */ +static u32 t4_read_pcie_cfg4(struct adapter *adap, int reg) +{ + struct fw_ldst_cmd ldst_cmd; + u32 val; + int ret; + + /* Construct and send the Firmware LDST Command to retrieve the + * specified PCI-E Configuration Space register. + */ + memset(&ldst_cmd, 0, sizeof(ldst_cmd)); + ldst_cmd.op_to_addrspace = + htonl(FW_CMD_OP(FW_LDST_CMD) | + FW_CMD_REQUEST | + FW_CMD_READ | + FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FUNC_PCIE)); + ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd)); + ldst_cmd.u.pcie.select_naccess = FW_LDST_CMD_NACCESS(1); + ldst_cmd.u.pcie.ctrl_to_fn = + (FW_LDST_CMD_LC | FW_LDST_CMD_FN(adap->fn)); + ldst_cmd.u.pcie.r = reg; + ret = t4_wr_mbox(adap, adap->mbox, &ldst_cmd, sizeof(ldst_cmd), + &ldst_cmd); + + /* If the LDST Command suucceeded, exctract the returned register + * value. Otherwise read it directly ourself. + */ + if (ret == 0) + val = ntohl(ldst_cmd.u.pcie.data[0]); + else + t4_hw_pci_read_cfg4(adap, reg, &val); + + return val; +} + static void setup_memwin(struct adapter *adap) { - u32 bar0, mem_win0_base, mem_win1_base, mem_win2_base; + u32 mem_win0_base, mem_win1_base, mem_win2_base, mem_win2_aperture; - bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */ if (is_t4(adap->params.chip)) { + u32 bar0; + + /* Truncation intentional: we only read the bottom 32-bits of + * the 64-bit BAR0/BAR1 ... We use the hardware backdoor + * mechanism to read BAR0 instead of using + * pci_resource_start() because we could be operating from + * within a Virtual Machine which is trapping our accesses to + * our Configuration Space and we need to set up the PCI-E + * Memory Window decoders with the actual addresses which will + * be coming across the PCI-E link. + */ + bar0 = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_0); + bar0 &= PCI_BASE_ADDRESS_MEM_MASK; + adap->t4_bar0 = bar0; + mem_win0_base = bar0 + MEMWIN0_BASE; mem_win1_base = bar0 + MEMWIN1_BASE; mem_win2_base = bar0 + MEMWIN2_BASE; + mem_win2_aperture = MEMWIN2_APERTURE; } else { /* For T5, only relative offset inside the PCIe BAR is passed */ mem_win0_base = MEMWIN0_BASE; - mem_win1_base = MEMWIN1_BASE_T5; + mem_win1_base = MEMWIN1_BASE; mem_win2_base = MEMWIN2_BASE_T5; + mem_win2_aperture = MEMWIN2_APERTURE_T5; } t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0), mem_win0_base | BIR(0) | @@ -4796,16 +4851,19 @@ static void setup_memwin(struct adapter *adap) WINDOW(ilog2(MEMWIN1_APERTURE) - 10)); t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2), mem_win2_base | BIR(0) | - WINDOW(ilog2(MEMWIN2_APERTURE) - 10)); + WINDOW(ilog2(mem_win2_aperture) - 10)); + t4_read_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2)); } static void setup_memwin_rdma(struct adapter *adap) { if (adap->vres.ocq.size) { - unsigned int start, sz_kb; + u32 start; + unsigned int sz_kb; - start = pci_resource_start(adap->pdev, 2) + - OCQ_WIN_OFFSET(adap->pdev, &adap->vres); + start = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_2); + start &= PCI_BASE_ADDRESS_MEM_MASK; + start += OCQ_WIN_OFFSET(adap->pdev, &adap->vres); sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10; t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3), diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 4536ea9f7018..2c3d00d77ef6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -143,6 +143,30 @@ void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, } } +/* + * Read a 32-bit PCI Configuration Space register via the PCI-E backdoor + * mechanism. This guarantees that we get the real value even if we're + * operating within a Virtual Machine and the Hypervisor is trapping our + * Configuration Space accesses. + */ +void t4_hw_pci_read_cfg4(struct adapter *adap, int reg, u32 *val) +{ + u32 req = ENABLE | FUNCTION(adap->fn) | reg; + + if (is_t4(adap->params.chip)) + req |= F_LOCALCFG; + + t4_write_reg(adap, PCIE_CFG_SPACE_REQ, req); + *val = t4_read_reg(adap, PCIE_CFG_SPACE_DATA); + + /* Reset ENABLE to 0 so reads of PCIE_CFG_SPACE_DATA won't cause a + * Configuration Space read. (None of the other fields matter when + * ENABLE is 0 so a simple register write is easier than a + * read-modify-write via t4_set_reg_field().) + */ + t4_write_reg(adap, PCIE_CFG_SPACE_REQ, 0); +} + /* * Get the reply to a mailbox command and store it in @rpl in big-endian order. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 3360025ad922..f6b2ee13e715 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -387,6 +387,8 @@ #define MSTGRPPERR 0x00000001U #define PCIE_NONFAT_ERR 0x3010 +#define PCIE_CFG_SPACE_REQ 0x3060 +#define PCIE_CFG_SPACE_DATA 0x3064 #define PCIE_MEM_ACCESS_BASE_WIN 0x3068 #define S_PCIEOFST 10 #define M_PCIEOFST 0x3fffffU @@ -399,6 +401,9 @@ #define WINDOW_SHIFT 0 #define WINDOW(x) ((x) << WINDOW_SHIFT) #define PCIE_MEM_ACCESS_OFFSET 0x306c +#define ENABLE (1U << 30) +#define FUNCTION(x) ((x) << 12) +#define F_LOCALCFG (1U << 28) #define S_PFNUM 0 #define V_PFNUM(x) ((x) << S_PFNUM) From fc5ab020965067c457a4b5a9eb15648b6874bef2 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 27 Jun 2014 19:23:49 +0530 Subject: [PATCH 3/5] cxgb4: Replaced the backdoor mechanism to access the HW memory with PCIe Window method Rip out a bunch of redundant PCI-E Memory Window Read/Write routines, collapse the more general purpose routines into a single routine thereby eliminating the need for a large stack frame (and extra data copying) in the outer routine, change everything to use the improved routine t4_memory_rw. Based on origninal work by Casey Leedom and Steve Wise Signed-off-by: Casey Leedom Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 15 +- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 60 +++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 246 ++++++++---------- drivers/net/ethernet/chelsio/cxgb4/t4_regs.h | 1 + 4 files changed, 149 insertions(+), 173 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 4fd215185f93..f338a7fcebf7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -654,6 +654,7 @@ struct adapter { struct dentry *debugfs_root; spinlock_t stats_lock; + spinlock_t win0_lock ____cacheline_aligned_in_smp; }; /* Defined bit width of user definable filter tuples @@ -960,8 +961,17 @@ int t4_wait_dev_ready(struct adapter *adap); int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc); int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); -int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, - __be32 *buf); + +#define T4_MEMORY_WRITE 0 +#define T4_MEMORY_READ 1 +int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, + __be32 *buf, int dir); +static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, + u32 len, __be32 *buf) +{ + return t4_memory_rw(adap, 0, mtype, addr, len, buf, 0); +} + int t4_seeprom_wp(struct adapter *adapter, bool enable); int get_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); @@ -1059,7 +1069,6 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl); void t4_db_full(struct adapter *adapter); void t4_db_dropped(struct adapter *adapter); -int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); void t4_sge_decode_idma_state(struct adapter *adapter, int state); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 524a8b2894e7..74ef0e69cdc4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3066,6 +3066,8 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t avail = file_inode(file)->i_size; unsigned int mem = (uintptr_t)file->private_data & 3; struct adapter *adap = file->private_data - mem; + __be32 *data; + int ret; if (pos < 0) return -EINVAL; @@ -3074,29 +3076,24 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count, if (count > avail - pos) count = avail - pos; - while (count) { - size_t len; - int ret, ofst; - __be32 data[16]; + data = t4_alloc_mem(count); + if (!data) + return -ENOMEM; - if ((mem == MEM_MC) || (mem == MEM_MC1)) - ret = t4_mc_read(adap, mem % MEM_MC, pos, data, NULL); - else - ret = t4_edc_read(adap, mem, pos, data, NULL); - if (ret) - return ret; - - ofst = pos % sizeof(data); - len = min(count, sizeof(data) - ofst); - if (copy_to_user(buf, (u8 *)data + ofst, len)) - return -EFAULT; - - buf += len; - pos += len; - count -= len; + spin_lock(&adap->win0_lock); + ret = t4_memory_rw(adap, 0, mem, pos, count, data, T4_MEMORY_READ); + spin_unlock(&adap->win0_lock); + if (ret) { + t4_free_mem(data); + return ret; } - count = pos - *ppos; - *ppos = pos; + ret = copy_to_user(buf, data, count); + + t4_free_mem(data); + if (ret) + return -EFAULT; + + *ppos = pos + count; return count; } @@ -3757,7 +3754,11 @@ static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx) __be64 indices; int ret; - ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8); + spin_lock(&adap->win0_lock); + ret = t4_memory_rw(adap, 0, MEM_EDC0, addr, + sizeof(indices), (__be32 *)&indices, + T4_MEMORY_READ); + spin_unlock(&adap->win0_lock); if (!ret) { *cidx = (be64_to_cpu(indices) >> 25) & 0xffff; *pidx = (be64_to_cpu(indices) >> 9) & 0xffff; @@ -5076,7 +5077,7 @@ static int adap_init0_config(struct adapter *adapter, int reset) adapter->fn, 0, 1, params, val); if (ret == 0) { /* - * For t4_memory_write() below addresses and + * For t4_memory_rw() below addresses and * sizes have to be in terms of multiples of 4 * bytes. So, if the Configuration File isn't * a multiple of 4 bytes in length we'll have @@ -5092,8 +5093,9 @@ static int adap_init0_config(struct adapter *adapter, int reset) mtype = FW_PARAMS_PARAM_Y_GET(val[0]); maddr = FW_PARAMS_PARAM_Z_GET(val[0]) << 16; - ret = t4_memory_write(adapter, mtype, maddr, - size, data); + spin_lock(&adapter->win0_lock); + ret = t4_memory_rw(adapter, 0, mtype, maddr, + size, data, T4_MEMORY_WRITE); if (ret == 0 && resid != 0) { union { __be32 word; @@ -5104,10 +5106,12 @@ static int adap_init0_config(struct adapter *adapter, int reset) last.word = data[size >> 2]; for (i = resid; i < 4; i++) last.buf[i] = 0; - ret = t4_memory_write(adapter, mtype, - maddr + size, - 4, &last.word); + ret = t4_memory_rw(adapter, 0, mtype, + maddr + size, + 4, &last.word, + T4_MEMORY_WRITE); } + spin_unlock(&adapter->win0_lock); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 2c3d00d77ef6..eb5a278e8045 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -413,78 +413,41 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) return 0; } -/* - * t4_mem_win_rw - read/write memory through PCIE memory window - * @adap: the adapter - * @addr: address of first byte requested - * @data: MEMWIN0_APERTURE bytes of data containing the requested address - * @dir: direction of transfer 1 => read, 0 => write - * - * Read/write MEMWIN0_APERTURE bytes of data from MC starting at a - * MEMWIN0_APERTURE-byte-aligned address that covers the requested - * address @addr. - */ -static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir) -{ - int i; - u32 win_pf = is_t4(adap->params.chip) ? 0 : V_PFNUM(adap->fn); - - /* - * Setup offset into PCIE memory window. Address must be a - * MEMWIN0_APERTURE-byte-aligned address. (Read back MA register to - * ensure that changes propagate before we attempt to use the new - * values.) - */ - t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, - (addr & ~(MEMWIN0_APERTURE - 1)) | win_pf); - t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); - - /* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */ - for (i = 0; i < MEMWIN0_APERTURE; i = i+0x4) { - if (dir) - *data++ = (__force __be32) t4_read_reg(adap, - (MEMWIN0_BASE + i)); - else - t4_write_reg(adap, (MEMWIN0_BASE + i), - (__force u32) *data++); - } - - return 0; -} - /** * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window * @adap: the adapter + * @win: PCI-E Memory Window to use * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC * @addr: address within indicated memory type * @len: amount of memory to transfer * @buf: host memory buffer - * @dir: direction of transfer 1 => read, 0 => write + * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) * * Reads/writes an [almost] arbitrary memory region in the firmware: the - * firmware memory address, length and host buffer must be aligned on - * 32-bit boudaries. The memory is transferred as a raw byte sequence - * from/to the firmware's memory. If this memory contains data - * structures which contain multi-byte integers, it's the callers - * responsibility to perform appropriate byte order conversions. + * firmware memory address and host buffer must be aligned on 32-bit + * boudaries; the length may be arbitrary. The memory is transferred as + * a raw byte sequence from/to the firmware's memory. If this memory + * contains data structures which contain multi-byte integers, it's the + * caller's responsibility to perform appropriate byte order conversions. */ -static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, - __be32 *buf, int dir) +int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, + u32 len, __be32 *buf, int dir) { - u32 pos, start, end, offset, memoffset; - u32 edc_size, mc_size; - int ret = 0; - __be32 *data; + u32 pos, offset, resid, memoffset; + u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; - /* - * Argument sanity checks ... + /* Argument sanity checks ... */ - if ((addr & 0x3) || (len & 0x3)) + if (addr & 0x3) return -EINVAL; - data = vmalloc(MEMWIN0_APERTURE); - if (!data) - return -ENOMEM; + /* It's convenient to be able to handle lengths which aren't a + * multiple of 32-bits because we often end up transferring files to + * the firmware. So we'll handle that by normalizing the length here + * and then handling any residual transfer at the end. + */ + resid = len & 0x3; + len -= resid; /* Offset into the region of memory which is being accessed * MEM_EDC0 = 0 @@ -505,66 +468,98 @@ static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, /* Determine the PCIE_MEM_ACCESS_OFFSET */ addr = addr + memoffset; - /* - * The underlaying EDC/MC read routines read MEMWIN0_APERTURE bytes - * at a time so we need to round down the start and round up the end. - * We'll start copying out of the first line at (addr - start) a word - * at a time. + /* Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. */ - start = addr & ~(MEMWIN0_APERTURE-1); - end = (addr + len + MEMWIN0_APERTURE-1) & ~(MEMWIN0_APERTURE-1); - offset = (addr - start)/sizeof(__be32); + mem_reg = t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, + win)); + mem_aperture = 1 << (GET_WINDOW(mem_reg) + 10); + mem_base = GET_PCIEOFST(mem_reg) << 10; + if (is_t4(adap->params.chip)) + mem_base -= adap->t4_bar0; + win_pf = is_t4(adap->params.chip) ? 0 : V_PFNUM(adap->fn); - for (pos = start; pos < end; pos += MEMWIN0_APERTURE, offset = 0) { + /* Calculate our initial PCI-E Memory Window Position and Offset into + * that Window. + */ + pos = addr & ~(mem_aperture-1); + offset = addr - pos; - /* - * If we're writing, copy the data from the caller's memory - * buffer + /* Set up initial PCI-E Memory Window to cover the start of our + * transfer. (Read it back to ensure that changes propagate before we + * attempt to use the new value.) + */ + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win), + pos | win_pf); + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, win)); + + /* Transfer data to/from the adapter as long as there's an integral + * number of 32-bit transfers to complete. + */ + while (len > 0) { + if (dir == T4_MEMORY_READ) + *buf++ = (__force __be32) t4_read_reg(adap, + mem_base + offset); + else + t4_write_reg(adap, mem_base + offset, + (__force u32) *buf++); + offset += sizeof(__be32); + len -= sizeof(__be32); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. Note that + * doing this here after "len" may be 0 allows us to set up + * the PCI-E Memory Window for a possible final residual + * transfer below ... */ - if (!dir) { - /* - * If we're doing a partial write, then we need to do - * a read-modify-write ... - */ - if (offset || len < MEMWIN0_APERTURE) { - ret = t4_mem_win_rw(adap, pos, data, 1); - if (ret) - break; - } - while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && - len > 0) { - data[offset++] = *buf++; - len -= sizeof(__be32); - } + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, + win), pos | win_pf); + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, + win)); } - - /* - * Transfer a block of memory and bail if there's an error. - */ - ret = t4_mem_win_rw(adap, pos, data, dir); - if (ret) - break; - - /* - * If we're reading, copy the data into the caller's memory - * buffer. - */ - if (dir) - while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && - len > 0) { - *buf++ = data[offset++]; - len -= sizeof(__be32); - } } - vfree(data); - return ret; -} + /* If the original transfer had a length which wasn't a multiple of + * 32-bits, now's where we need to finish off the transfer of the + * residual amount. The PCI-E Memory Window has already been moved + * above (if necessary) to cover this final transfer. + */ + if (resid) { + union { + __be32 word; + char byte[4]; + } last; + unsigned char *bp; + int i; -int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, - __be32 *buf) -{ - return t4_memory_rw(adap, mtype, addr, len, buf, 0); + if (dir == T4_MEMORY_WRITE) { + last.word = (__force __be32) t4_read_reg(adap, + mem_base + offset); + for (bp = (unsigned char *)buf, i = resid; i < 4; i++) + bp[i] = last.byte[i]; + } else { + last.word = *buf; + for (i = resid; i < 4; i++) + last.byte[i] = 0; + t4_write_reg(adap, mem_base + offset, + (__force u32) last.word); + } + } + + return 0; } #define EEPROM_STAT_ADDR 0x7bfc @@ -2528,39 +2523,6 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } -/** - * t4_mem_win_read_len - read memory through PCIE memory window - * @adap: the adapter - * @addr: address of first byte requested aligned on 32b. - * @data: len bytes to hold the data read - * @len: amount of data to read from window. Must be <= - * MEMWIN0_APERATURE after adjusting for 16B for T4 and - * 128B for T5 alignment requirements of the the memory window. - * - * Read len bytes of data from MC starting at @addr. - */ -int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len) -{ - int i, off; - u32 win_pf = is_t4(adap->params.chip) ? 0 : V_PFNUM(adap->fn); - - /* Align on a 2KB boundary. - */ - off = addr & MEMWIN0_APERTURE; - if ((addr & 3) || (len + off) > MEMWIN0_APERTURE) - return -EINVAL; - - t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, - (addr & ~MEMWIN0_APERTURE) | win_pf); - t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); - - for (i = 0; i < len; i += 4) - *data++ = (__force __be32) t4_read_reg(adap, - (MEMWIN0_BASE + off + i)); - - return 0; -} - /** * t4_mdio_rd - read a PHY register through MDIO * @adap: the adapter diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index f6b2ee13e715..ae7776471ceb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -400,6 +400,7 @@ #define WINDOW_MASK 0x000000ffU #define WINDOW_SHIFT 0 #define WINDOW(x) ((x) << WINDOW_SHIFT) +#define GET_WINDOW(x) (((x) >> WINDOW_SHIFT) & WINDOW_MASK) #define PCIE_MEM_ACCESS_OFFSET 0x306c #define ENABLE (1U << 30) #define FUNCTION(x) ((x) << 12) From fb1e933d3c1a7289dc3c3456ff9b97f53ed5f1d9 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 27 Jun 2014 19:23:50 +0530 Subject: [PATCH 4/5] cxgb4: Adds device ID for few more Chelsio T4 Adapters Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 74ef0e69cdc4..921f80500032 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -224,6 +224,17 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = { CH_DEVICE(0x4008, -1), CH_DEVICE(0x4009, -1), CH_DEVICE(0x400a, -1), + CH_DEVICE(0x400d, -1), + CH_DEVICE(0x400e, -1), + CH_DEVICE(0x4080, -1), + CH_DEVICE(0x4081, -1), + CH_DEVICE(0x4082, -1), + CH_DEVICE(0x4083, -1), + CH_DEVICE(0x4084, -1), + CH_DEVICE(0x4085, -1), + CH_DEVICE(0x4086, -1), + CH_DEVICE(0x4087, -1), + CH_DEVICE(0x4088, -1), CH_DEVICE(0x4401, 4), CH_DEVICE(0x4402, 4), CH_DEVICE(0x4403, 4), @@ -236,6 +247,15 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = { CH_DEVICE(0x440a, 4), CH_DEVICE(0x440d, 4), CH_DEVICE(0x440e, 4), + CH_DEVICE(0x4480, 4), + CH_DEVICE(0x4481, 4), + CH_DEVICE(0x4482, 4), + CH_DEVICE(0x4483, 4), + CH_DEVICE(0x4484, 4), + CH_DEVICE(0x4485, 4), + CH_DEVICE(0x4486, 4), + CH_DEVICE(0x4487, 4), + CH_DEVICE(0x4488, 4), CH_DEVICE(0x5001, 4), CH_DEVICE(0x5002, 4), CH_DEVICE(0x5003, 4), From dde3aadf53ce4d5c5857d87619eeb9ff777a9c2f Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 27 Jun 2014 19:23:51 +0530 Subject: [PATCH 5/5] cxgb4vf: Adds device ID for few more Chelsio T4 Adapters Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index ff1cdd1788b5..f002af190a65 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2924,6 +2924,15 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4vf_pci_tbl) = { CH_DEVICE(0x480a, 0), /* T404-bt */ CH_DEVICE(0x480d, 0), /* T480-cr */ CH_DEVICE(0x480e, 0), /* T440-lp-cr */ + CH_DEVICE(0x4880, 0), + CH_DEVICE(0x4880, 1), + CH_DEVICE(0x4880, 2), + CH_DEVICE(0x4880, 3), + CH_DEVICE(0x4880, 4), + CH_DEVICE(0x4880, 5), + CH_DEVICE(0x4880, 6), + CH_DEVICE(0x4880, 7), + CH_DEVICE(0x4880, 8), CH_DEVICE(0x5800, 0), /* T580-dbg */ CH_DEVICE(0x5801, 0), /* T520-cr */ CH_DEVICE(0x5802, 0), /* T522-cr */