From 8c4ac0949f7779cd4cc8c618f1b07e6113682010 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 15 Jan 2008 21:11:28 +0100 Subject: [PATCH 01/26] ieee1394: sbp2: prepare for s/g chaining Signed-off-by: Stefan Richter --- drivers/ieee1394/sbp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index 1eda11abeb1e..96eac0b53019 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -1489,7 +1489,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, /* loop through and fill out our SBP-2 page tables * (and split up anything too large) */ - for (i = 0, sg_count = 0 ; i < count; i++, sgpnt++) { + for (i = 0, sg_count = 0; i < count; i++, sgpnt = sg_next(sgpnt)) { sg_len = sg_dma_len(sgpnt); sg_addr = sg_dma_address(sgpnt); while (sg_len) { From 825f1df545ab0289185373b0eaf06fb0b3487422 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 4 Nov 2007 14:59:24 +0100 Subject: [PATCH 02/26] ieee1394: sbp2: s/g list access cosmetics Replace sg->length by sg_dma_len(sg). Rename a variable for shorter line lengths and eliminate some superfluous local variables. Signed-off-by: Stefan Richter --- drivers/ieee1394/sbp2.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index 96eac0b53019..b91142b8482c 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -1451,7 +1451,7 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, struct sbp2_fwhost_info *hi, struct sbp2_command_info *cmd, unsigned int scsi_use_sg, - struct scatterlist *sgpnt, + struct scatterlist *sg, u32 orb_direction, enum dma_data_direction dma_dir) { @@ -1461,12 +1461,12 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, /* special case if only one element (and less than 64KB in size) */ if ((scsi_use_sg == 1) && - (sgpnt[0].length <= SBP2_MAX_SG_ELEMENT_LENGTH)) { + (sg_dma_len(sg) <= SBP2_MAX_SG_ELEMENT_LENGTH)) { - cmd->dma_size = sgpnt[0].length; + cmd->dma_size = sg_dma_len(sg); cmd->dma_type = CMD_DMA_PAGE; cmd->cmd_dma = dma_map_page(hi->host->device.parent, - sg_page(&sgpnt[0]), sgpnt[0].offset, + sg_page(sg), sg->offset, cmd->dma_size, cmd->dma_dir); orb->data_descriptor_lo = cmd->cmd_dma; @@ -1477,11 +1477,11 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, &cmd->scatter_gather_element[0]; u32 sg_count, sg_len; dma_addr_t sg_addr; - int i, count = dma_map_sg(hi->host->device.parent, sgpnt, + int i, count = dma_map_sg(hi->host->device.parent, sg, scsi_use_sg, dma_dir); cmd->dma_size = scsi_use_sg; - cmd->sge_buffer = sgpnt; + cmd->sge_buffer = sg; /* use page tables (s/g) */ orb->misc |= ORB_SET_PAGE_TABLE_PRESENT(0x1); @@ -1489,9 +1489,9 @@ static void sbp2_prep_command_orb_sg(struct sbp2_command_orb *orb, /* loop through and fill out our SBP-2 page tables * (and split up anything too large) */ - for (i = 0, sg_count = 0; i < count; i++, sgpnt = sg_next(sgpnt)) { - sg_len = sg_dma_len(sgpnt); - sg_addr = sg_dma_address(sgpnt); + for (i = 0, sg_count = 0; i < count; i++, sg = sg_next(sg)) { + sg_len = sg_dma_len(sg); + sg_addr = sg_dma_address(sg); while (sg_len) { sg_element[sg_count].segment_base_lo = sg_addr; if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) { @@ -1521,11 +1521,10 @@ static void sbp2_create_command_orb(struct sbp2_lu *lu, unchar *scsi_cmd, unsigned int scsi_use_sg, unsigned int scsi_request_bufflen, - void *scsi_request_buffer, + struct scatterlist *sg, enum dma_data_direction dma_dir) { struct sbp2_fwhost_info *hi = lu->hi; - struct scatterlist *sgpnt = (struct scatterlist *)scsi_request_buffer; struct sbp2_command_orb *orb = &cmd->command_orb; u32 orb_direction; @@ -1560,7 +1559,7 @@ static void sbp2_create_command_orb(struct sbp2_lu *lu, orb->data_descriptor_lo = 0x0; orb->misc |= ORB_SET_DIRECTION(1); } else - sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sgpnt, + sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sg, orb_direction, dma_dir); sbp2util_cpu_to_be32_buffer(orb, sizeof(*orb)); @@ -1650,7 +1649,6 @@ static int sbp2_send_command(struct sbp2_lu *lu, struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) { unchar *scsi_cmd = (unchar *)SCpnt->cmnd; - unsigned int request_bufflen = scsi_bufflen(SCpnt); struct sbp2_command_info *cmd; cmd = sbp2util_allocate_command_orb(lu, SCpnt, done); @@ -1658,7 +1656,7 @@ static int sbp2_send_command(struct sbp2_lu *lu, struct scsi_cmnd *SCpnt, return -EIO; sbp2_create_command_orb(lu, cmd, scsi_cmd, scsi_sg_count(SCpnt), - request_bufflen, scsi_sglist(SCpnt), + scsi_bufflen(SCpnt), scsi_sglist(SCpnt), SCpnt->sc_data_direction); sbp2_link_orb_command(lu, cmd); From a5c52df8bce65971b8db95149ebc1c5f1026af45 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 19 Nov 2007 17:48:10 -0800 Subject: [PATCH 03/26] ieee1394: Add missing "space" Signed-off-by: Joe Perches Signed-off-by: Stefan Richter --- drivers/ieee1394/raw1394.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index cadf0479cce5..37e7e109af38 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -858,7 +858,7 @@ static int arm_read(struct hpsb_host *host, int nodeid, quadlet_t * buffer, int found = 0, size = 0, rcode = -1; struct arm_request_response *arm_req_resp = NULL; - DBGMSG("arm_read called by node: %X" + DBGMSG("arm_read called by node: %X " "addr: %4.4x %8.8x length: %Zu", nodeid, (u16) ((addr >> 32) & 0xFFFF), (u32) (addr & 0xFFFFFFFF), length); @@ -1012,7 +1012,7 @@ static int arm_write(struct hpsb_host *host, int nodeid, int destid, int found = 0, size = 0, rcode = -1, length_conflict = 0; struct arm_request_response *arm_req_resp = NULL; - DBGMSG("arm_write called by node: %X" + DBGMSG("arm_write called by node: %X " "addr: %4.4x %8.8x length: %Zu", nodeid, (u16) ((addr >> 32) & 0xFFFF), (u32) (addr & 0xFFFFFFFF), length); From 61db81214bcef33a41325bdc436fb515b697fcdb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 5 Dec 2007 18:15:53 +1100 Subject: [PATCH 04/26] ieee1394: nopage Convert ieee1394 from nopage to fault. Remove redundant vma range checks (correct resource range check is retained). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Stefan Richter --- drivers/ieee1394/dma.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c index 7c4eb39b7024..ec024b5d5e5e 100644 --- a/drivers/ieee1394/dma.c +++ b/drivers/ieee1394/dma.c @@ -231,37 +231,32 @@ void dma_region_sync_for_device(struct dma_region *dma, unsigned long offset, #ifdef CONFIG_MMU -/* nopage() handler for mmap access */ +/* fault() handler for mmap access */ -static struct page *dma_region_pagefault(struct vm_area_struct *area, - unsigned long address, int *type) +static int dma_region_pagefault(struct vm_area_struct *vma, + struct vm_fault *vmf) { - unsigned long offset; unsigned long kernel_virt_addr; - struct page *ret = NOPAGE_SIGBUS; - struct dma_region *dma = (struct dma_region *)area->vm_private_data; + struct dma_region *dma = (struct dma_region *)vma->vm_private_data; if (!dma->kvirt) - goto out; + goto error; - if ((address < (unsigned long)area->vm_start) || - (address > - (unsigned long)area->vm_start + (dma->n_pages << PAGE_SHIFT))) - goto out; + if (vmf->pgoff >= dma->n_pages) + goto error; - if (type) - *type = VM_FAULT_MINOR; - offset = address - area->vm_start; - kernel_virt_addr = (unsigned long)dma->kvirt + offset; - ret = vmalloc_to_page((void *)kernel_virt_addr); - get_page(ret); - out: - return ret; + kernel_virt_addr = (unsigned long)dma->kvirt + (vmf->pgoff << PAGE_SHIFT); + vmf->page = vmalloc_to_page((void *)kernel_virt_addr); + get_page(vmf->page); + return 0; + + error: + return VM_FAULT_SIGBUS; } static struct vm_operations_struct dma_region_vm_ops = { - .nopage = dma_region_pagefault, + .fault = dma_region_pagefault, }; /** @@ -275,7 +270,7 @@ int dma_region_mmap(struct dma_region *dma, struct file *file, if (!dma->kvirt) return -EINVAL; - /* must be page-aligned */ + /* must be page-aligned (XXX: comment is wrong, we could allow pgoff) */ if (vma->vm_pgoff != 0) return -EINVAL; From c7ea990f87f9fbe6a7183c03f7993b3360454834 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 15 Dec 2007 14:04:42 +0100 Subject: [PATCH 05/26] ieee1394: small cleanup after "nopage" Signed-off-by: Stefan Richter --- drivers/ieee1394/dma.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c index ec024b5d5e5e..73685e7dc7e4 100644 --- a/drivers/ieee1394/dma.c +++ b/drivers/ieee1394/dma.c @@ -231,28 +231,20 @@ void dma_region_sync_for_device(struct dma_region *dma, unsigned long offset, #ifdef CONFIG_MMU -/* fault() handler for mmap access */ - static int dma_region_pagefault(struct vm_area_struct *vma, - struct vm_fault *vmf) + struct vm_fault *vmf) { - unsigned long kernel_virt_addr; - struct dma_region *dma = (struct dma_region *)vma->vm_private_data; if (!dma->kvirt) - goto error; + return VM_FAULT_SIGBUS; if (vmf->pgoff >= dma->n_pages) - goto error; + return VM_FAULT_SIGBUS; - kernel_virt_addr = (unsigned long)dma->kvirt + (vmf->pgoff << PAGE_SHIFT); - vmf->page = vmalloc_to_page((void *)kernel_virt_addr); + vmf->page = vmalloc_to_page(dma->kvirt + (vmf->pgoff << PAGE_SHIFT)); get_page(vmf->page); return 0; - - error: - return VM_FAULT_SIGBUS; } static struct vm_operations_struct dma_region_vm_ops = { From 3e75b493fbfad5d70831a2f7267c7cd8b8fec71f Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 15 Dec 2007 14:11:41 +0100 Subject: [PATCH 06/26] ieee1394: remove unused code The code has been in "#if 0 - #endif" since Linux 2.6.12. Signed-off-by: Stefan Richter --- drivers/ieee1394/ieee1394_transactions.c | 68 ------------------------ 1 file changed, 68 deletions(-) diff --git a/drivers/ieee1394/ieee1394_transactions.c b/drivers/ieee1394/ieee1394_transactions.c index 677989320951..10c3d9f8c038 100644 --- a/drivers/ieee1394/ieee1394_transactions.c +++ b/drivers/ieee1394/ieee1394_transactions.c @@ -570,71 +570,3 @@ int hpsb_write(struct hpsb_host *host, nodeid_t node, unsigned int generation, return retval; } - -#if 0 - -int hpsb_lock(struct hpsb_host *host, nodeid_t node, unsigned int generation, - u64 addr, int extcode, quadlet_t * data, quadlet_t arg) -{ - struct hpsb_packet *packet; - int retval = 0; - - BUG_ON(in_interrupt()); // We can't be called in an interrupt, yet - - packet = hpsb_make_lockpacket(host, node, addr, extcode, data, arg); - if (!packet) - return -ENOMEM; - - packet->generation = generation; - retval = hpsb_send_packet_and_wait(packet); - if (retval < 0) - goto hpsb_lock_fail; - - retval = hpsb_packet_success(packet); - - if (retval == 0) { - *data = packet->data[0]; - } - - hpsb_lock_fail: - hpsb_free_tlabel(packet); - hpsb_free_packet(packet); - - return retval; -} - -int hpsb_send_gasp(struct hpsb_host *host, int channel, unsigned int generation, - quadlet_t * buffer, size_t length, u32 specifier_id, - unsigned int version) -{ - struct hpsb_packet *packet; - int retval = 0; - u16 specifier_id_hi = (specifier_id & 0x00ffff00) >> 8; - u8 specifier_id_lo = specifier_id & 0xff; - - HPSB_VERBOSE("Send GASP: channel = %d, length = %Zd", channel, length); - - length += 8; - - packet = hpsb_make_streampacket(host, NULL, length, channel, 3, 0); - if (!packet) - return -ENOMEM; - - packet->data[0] = cpu_to_be32((host->node_id << 16) | specifier_id_hi); - packet->data[1] = - cpu_to_be32((specifier_id_lo << 24) | (version & 0x00ffffff)); - - memcpy(&(packet->data[2]), buffer, length - 8); - - packet->generation = generation; - - packet->no_waiter = 1; - - retval = hpsb_send_packet(packet); - if (retval < 0) - hpsb_free_packet(packet); - - return retval; -} - -#endif /* 0 */ From 4e6343a10b6afb5b036db35c4a0f0aa1333232e1 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 16 Dec 2007 17:31:26 +0100 Subject: [PATCH 07/26] ieee1394: sbp2: raise default transfer size limit This patch speeds up sbp2 a little bit --- but more importantly, it brings the behavior of sbp2 and fw-sbp2 closer to each other. Like fw-sbp2, sbp2 now does not limit the size of single transfers to 255 sectors anymore, unless told so by a blacklist flag or by module load parameters. Only very old bridge chips have been known to need the 255 sectors limit, and we have got one such chip in our hardwired blacklist. There certainly is a danger that more bridges need that limit; but I prefer to have this issue present in both fw-sbp2 and sbp2 rather than just one of them. An OXUF922 with 400GB 7200RPM disk on an S400 controller is sped up by this patch from 22.9 to 23.5 MB/s according to hdparm. The same effect could be achieved before by setting a higher max_sectors module parameter. On buses which use 1394b beta mode, sbp2 and fw-sbp2 will now achieve virtually the same bandwidth. Fw-sbp2 only remains faster on 1394a buses due to fw-core's gap count optimization. Signed-off-by: Stefan Richter --- drivers/ieee1394/sbp2.c | 26 +++++++++++++++----------- drivers/ieee1394/sbp2.h | 1 - 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c index b91142b8482c..2b889d91e673 100644 --- a/drivers/ieee1394/sbp2.c +++ b/drivers/ieee1394/sbp2.c @@ -51,6 +51,7 @@ * Grep for inline FIXME comments below. */ +#include #include #include #include @@ -127,17 +128,21 @@ MODULE_PARM_DESC(serialize_io, "Serialize requests coming from SCSI drivers " "(default = Y, faster but buggy = N)"); /* - * Bump up max_sectors if you'd like to support very large sized - * transfers. Please note that some older sbp2 bridge chips are broken for - * transfers greater or equal to 128KB. Default is a value of 255 - * sectors, or just under 128KB (at 512 byte sector size). I can note that - * the Oxsemi sbp2 chipsets have no problems supporting very large - * transfer sizes. + * Adjust max_sectors if you'd like to influence how many sectors each SCSI + * command can transfer at most. Please note that some older SBP-2 bridge + * chips are broken for transfers greater or equal to 128KB, therefore + * max_sectors used to be a safe 255 sectors for many years. We now have a + * default of 0 here which means that we let the SCSI stack choose a limit. + * + * The SBP2_WORKAROUND_128K_MAX_TRANS flag, if set either in the workarounds + * module parameter or in the sbp2_workarounds_table[], will override the + * value of max_sectors. We should use sbp2_workarounds_table[] to cover any + * bridge chip which becomes known to need the 255 sectors limit. */ -static int sbp2_max_sectors = SBP2_MAX_SECTORS; +static int sbp2_max_sectors; module_param_named(max_sectors, sbp2_max_sectors, int, 0444); MODULE_PARM_DESC(max_sectors, "Change max sectors per I/O supported " - "(default = " __stringify(SBP2_MAX_SECTORS) ")"); + "(default = 0 = use SCSI stack's default)"); /* * Exclusive login to sbp2 device? In most cases, the sbp2 driver should @@ -1985,6 +1990,8 @@ static int sbp2scsi_slave_configure(struct scsi_device *sdev) sdev->skip_ms_page_8 = 1; if (lu->workarounds & SBP2_WORKAROUND_FIX_CAPACITY) sdev->fix_capacity = 1; + if (lu->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS) + blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512); return 0; } @@ -2091,9 +2098,6 @@ static int sbp2_module_init(void) sbp2_shost_template.cmd_per_lun = 1; } - if (sbp2_default_workarounds & SBP2_WORKAROUND_128K_MAX_TRANS && - (sbp2_max_sectors * 512) > (128 * 1024)) - sbp2_max_sectors = 128 * 1024 / 512; sbp2_shost_template.max_sectors = sbp2_max_sectors; hpsb_register_highlevel(&sbp2_highlevel); diff --git a/drivers/ieee1394/sbp2.h b/drivers/ieee1394/sbp2.h index 333a4bb76743..d2ecb0d8a1bb 100644 --- a/drivers/ieee1394/sbp2.h +++ b/drivers/ieee1394/sbp2.h @@ -222,7 +222,6 @@ struct sbp2_status_block { */ #define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 -#define SBP2_MAX_SECTORS 255 /* There is no real limitation of the queue depth (i.e. length of the linked * list of command ORBs) at the target. The chosen depth is merely an * implementation detail of the sbp2 driver. */ From 85c5798b09e9248f29edbc42f10b99842661e85c Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 16 Dec 2007 20:53:13 +0100 Subject: [PATCH 08/26] ieee1394: ohci1394: don't schedule IT tasklets on IR events Bug noted by Pieter Palmers: Isochronous transmit tasklets were scheduled on isochronous receive events, in addition to the proper isochronous receive tasklets. http://marc.info/?l=linux1394-devel&m=119783196222802 Signed-off-by: Stefan Richter --- drivers/ieee1394/ohci1394.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 372c5c16eb31..969de2a2d633 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -2126,10 +2126,14 @@ static void ohci_schedule_iso_tasklets(struct ti_ohci *ohci, list_for_each_entry(t, &ohci->iso_tasklet_list, link) { mask = 1 << t->context; - if (t->type == OHCI_ISO_TRANSMIT && tx_event & mask) - tasklet_schedule(&t->tasklet); - else if (rx_event & mask) - tasklet_schedule(&t->tasklet); + if (t->type == OHCI_ISO_TRANSMIT) { + if (tx_event & mask) + tasklet_schedule(&t->tasklet); + } else { + /* OHCI_ISO_RECEIVE or OHCI_ISO_MULTICHANNEL_RECEIVE */ + if (rx_event & mask) + tasklet_schedule(&t->tasklet); + } } spin_unlock_irqrestore(&ohci->iso_tasklet_list_lock, flags); From 285838eb22ef0db77b464da70b7352cdbfffc939 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Wed, 7 Nov 2007 01:12:51 +0100 Subject: [PATCH 09/26] firewire: fw-sbp2: refactor workq and kref handling This somewhat reduces the size of firewire-sbp2.ko. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 56 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index c2169d215bf7..2108cd92451b 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -628,6 +628,21 @@ static void sbp2_release_target(struct kref *kref) static struct workqueue_struct *sbp2_wq; +/* + * Always get the target's kref when scheduling work on one its units. + * Each workqueue job is responsible to call sbp2_target_put() upon return. + */ +static void sbp2_queue_work(struct sbp2_logical_unit *lu, unsigned long delay) +{ + if (queue_delayed_work(sbp2_wq, &lu->work, delay)) + kref_get(&lu->tgt->kref); +} + +static void sbp2_target_put(struct sbp2_target *tgt) +{ + kref_put(&tgt->kref, sbp2_release_target); +} + static void sbp2_reconnect(struct work_struct *work); static void sbp2_login(struct work_struct *work) @@ -649,16 +664,12 @@ static void sbp2_login(struct work_struct *work) if (sbp2_send_management_orb(lu, node_id, generation, SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) { - if (lu->retries++ < 5) { - if (queue_delayed_work(sbp2_wq, &lu->work, - DIV_ROUND_UP(HZ, 5))) - kref_get(&lu->tgt->kref); - } else { + if (lu->retries++ < 5) + sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); + else fw_error("failed to login to %s LUN %04x\n", unit->device.bus_id, lu->lun); - } - kref_put(&lu->tgt->kref, sbp2_release_target); - return; + goto out; } lu->generation = generation; @@ -700,7 +711,8 @@ static void sbp2_login(struct work_struct *work) lu->sdev = sdev; scsi_device_put(sdev); } - kref_put(&lu->tgt->kref, sbp2_release_target); + out: + sbp2_target_put(lu->tgt); } static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) @@ -865,18 +877,13 @@ static int sbp2_probe(struct device *dev) get_device(&unit->device); - /* - * We schedule work to do the login so we can easily - * reschedule retries. Always get the ref before scheduling - * work. - */ + /* Do the login in a workqueue so we can easily reschedule retries. */ list_for_each_entry(lu, &tgt->lu_list, link) - if (queue_delayed_work(sbp2_wq, &lu->work, 0)) - kref_get(&tgt->kref); + sbp2_queue_work(lu, 0); return 0; fail_tgt_put: - kref_put(&tgt->kref, sbp2_release_target); + sbp2_target_put(tgt); return -ENOMEM; fail_shost_put: @@ -889,7 +896,7 @@ static int sbp2_remove(struct device *dev) struct fw_unit *unit = fw_unit(dev); struct sbp2_target *tgt = unit->device.driver_data; - kref_put(&tgt->kref, sbp2_release_target); + sbp2_target_put(tgt); return 0; } @@ -915,10 +922,8 @@ static void sbp2_reconnect(struct work_struct *work) lu->retries = 0; PREPARE_DELAYED_WORK(&lu->work, sbp2_login); } - if (queue_delayed_work(sbp2_wq, &lu->work, DIV_ROUND_UP(HZ, 5))) - kref_get(&lu->tgt->kref); - kref_put(&lu->tgt->kref, sbp2_release_target); - return; + sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); + goto out; } lu->generation = generation; @@ -930,8 +935,8 @@ static void sbp2_reconnect(struct work_struct *work) sbp2_agent_reset(lu); sbp2_cancel_orbs(lu); - - kref_put(&lu->tgt->kref, sbp2_release_target); + out: + sbp2_target_put(lu->tgt); } static void sbp2_update(struct fw_unit *unit) @@ -947,8 +952,7 @@ static void sbp2_update(struct fw_unit *unit) */ list_for_each_entry(lu, &tgt->lu_list, link) { lu->retries = 0; - if (queue_delayed_work(sbp2_wq, &lu->work, 0)) - kref_get(&tgt->kref); + sbp2_queue_work(lu, 0); } } From b7811da2d94d8e1f77015ec9afa4575ddc9981a4 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 15 Jan 2008 21:10:50 +0100 Subject: [PATCH 10/26] firewire: fw-sbp2: prepare for s/g chaining Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 2108cd92451b..8281ad94bd8c 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -1107,9 +1107,9 @@ sbp2_map_scatterlist(struct sbp2_command_orb *orb, struct fw_device *device, * elements larger than 65535 bytes, some IOMMUs may merge sg elements * during DMA mapping, and Linux currently doesn't prevent this. */ - for (i = 0, j = 0; i < count; i++) { - sg_len = sg_dma_len(sg + i); - sg_addr = sg_dma_address(sg + i); + for (i = 0, j = 0; i < count; i++, sg = sg_next(sg)) { + sg_len = sg_dma_len(sg); + sg_addr = sg_dma_address(sg); while (sg_len) { /* FIXME: This won't get us out of the pinch. */ if (unlikely(j >= ARRAY_SIZE(orb->page_table))) { From 4b11ea96a08a0f97a16edba55a615354c6d846b5 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 16 Dec 2007 17:32:11 +0100 Subject: [PATCH 11/26] firewire: fw-sbp2: remove unused misleading macro SBP2_MAX_SECTORS is nowhere used in fw-sbp2. It merely got copied over from sbp2 where it played a role in the past. Signed-off-by: Stefan Richter --- drivers/firewire/fw-sbp2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 8281ad94bd8c..e5a2571a3671 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -151,9 +151,7 @@ struct sbp2_target { }; #define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 -#define SBP2_MAX_SECTORS 255 /* Max sectors supported */ #define SBP2_ORB_TIMEOUT 2000 /* Timeout in ms */ - #define SBP2_ORB_NULL 0x80000000 #define SBP2_DIRECTION_TO_MEDIA 0x0 From 0642b6577f1d455ed99e2da4a4d9216a866d2449 Mon Sep 17 00:00:00 2001 From: David Moore Date: Wed, 19 Dec 2007 03:09:18 -0500 Subject: [PATCH 12/26] firewire: fw-ohci: Fix for dualbuffer three-or-more buffers This patch fixes the problem where different OHCI 1.1 controllers behave differently when a received iso packet straddles three or more buffers when using the dual-buffer receive mode. Two changes are made in order to handle this situation: 1. The packet sync DMA descriptor is given a non-zero header length and non-zero payload length. This is because zero-payload descriptors are not discussed in the OHCI 1.1 specs and their behavior is thus undefined. Instead we use a header size just large enough for a single header and a payload length of 4 bytes for this first descriptor. 2. As we process received packets in the context's tasklet, read the packet length out of the headers. Keep track of the running total of the packet length as "excess_bytes", so we can ignore any descriptors where no packet starts or ends. These descriptors may not have had their first_res_count or second_res_count fields updated by the controller so we cannot rely on those values. The main drawback of this patch is that the excess_bytes value might get "out of sync" with the packet descriptors if something strange happens to the DMA program. I'm not if such a thing could ever happen, but I appreciate any suggestions in making it more robust. Also, the packet-per-buffer support may need a similar fix to deal with issue 1, but I haven't done any work on that yet. Stefan, I'm hoping that with this patch, all your OHCI 1.1 controllers will work properly with an unmodified version of libdc1394. Signed-off-by: David Moore Signed-off-by: Stefan Richter --- drivers/firewire/fw-ohci.c | 44 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 436a855a4c60..a44d16d0c505 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -125,6 +125,7 @@ struct context { struct iso_context { struct fw_iso_context base; struct context context; + int excess_bytes; void *header; size_t header_length; }; @@ -1408,9 +1409,13 @@ static int handle_ir_dualbuffer_packet(struct context *context, void *p, *end; int i; - if (db->first_res_count > 0 && db->second_res_count > 0) - /* This descriptor isn't done yet, stop iteration. */ - return 0; + if (db->first_res_count > 0 && db->second_res_count > 0) { + if (ctx->excess_bytes <= le16_to_cpu(db->second_req_count)) { + /* This descriptor isn't done yet, stop iteration. */ + return 0; + } + ctx->excess_bytes -= le16_to_cpu(db->second_req_count); + } header_length = le16_to_cpu(db->first_req_count) - le16_to_cpu(db->first_res_count); @@ -1429,11 +1434,15 @@ static int handle_ir_dualbuffer_packet(struct context *context, *(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4)); memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4); i += ctx->base.header_size; + ctx->excess_bytes += + (le32_to_cpu(*(u32 *)(p + 4)) >> 16) & 0xffff; p += ctx->base.header_size + 4; } - ctx->header_length = i; + ctx->excess_bytes -= le16_to_cpu(db->second_req_count) - + le16_to_cpu(db->second_res_count); + if (le16_to_cpu(db->control) & DESCRIPTOR_IRQ_ALWAYS) { ir_header = (__le32 *) (db + 1); ctx->base.callback(&ctx->base, @@ -1775,19 +1784,6 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, * packet, retransmit or terminate.. */ - if (packet->skip) { - d = context_get_descriptors(&ctx->context, 2, &d_bus); - if (d == NULL) - return -ENOMEM; - - db = (struct db_descriptor *) d; - db->control = cpu_to_le16(DESCRIPTOR_STATUS | - DESCRIPTOR_BRANCH_ALWAYS | - DESCRIPTOR_WAIT); - db->first_size = cpu_to_le16(ctx->base.header_size + 4); - context_append(&ctx->context, d, 2, 0); - } - p = packet; z = 2; @@ -1815,11 +1811,18 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, db->control = cpu_to_le16(DESCRIPTOR_STATUS | DESCRIPTOR_BRANCH_ALWAYS); db->first_size = cpu_to_le16(ctx->base.header_size + 4); - db->first_req_count = cpu_to_le16(header_size); + if (p->skip && rest == p->payload_length) { + db->control |= cpu_to_le16(DESCRIPTOR_WAIT); + db->first_req_count = db->first_size; + } else { + db->first_req_count = cpu_to_le16(header_size); + } db->first_res_count = db->first_req_count; db->first_buffer = cpu_to_le32(d_bus + sizeof(*db)); - if (offset + rest < PAGE_SIZE) + if (p->skip && rest == p->payload_length) + length = 4; + else if (offset + rest < PAGE_SIZE) length = rest; else length = PAGE_SIZE - offset; @@ -1835,7 +1838,8 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, context_append(&ctx->context, d, z, header_z); offset = (offset + length) & ~PAGE_MASK; rest -= length; - page++; + if (offset == 0) + page++; } return 0; From bcee893c6cba88badd292b636f33a8677c0dd962 Mon Sep 17 00:00:00 2001 From: David Moore Date: Wed, 19 Dec 2007 15:26:38 -0500 Subject: [PATCH 13/26] firewire: fw-ohci: Bug fixes for packet-per-buffer support This patch corrects a number of bugs in the current OHCI 1.0 packet-per-buffer support: 1. Correctly deal with payloads that cross a page boundary. The previous version would not split the descriptor at such a boundary, potentially corrupting unrelated memory. 2. Allow user-space to specify multiple packets per struct fw_cdev_iso_packet in the same way that dual-buffer allows. This is signaled by header_length being a multiple of header_size. This multiple determines the number of packets. The payload size allocated per packet is determined by dividing the total payload size by the number of packets. 3. Make sync support work properly for packet-per-buffer. I have tested this patch with libdc1394 by forcing my OHCI 1.1 controller to use the packet-per-buffer support instead of dual-buffer. I would greatly appreciate testing by those who have a DV devices and other types of iso streamers to make sure I didn't cause any regressions. Stefan, with this patch, I'm hoping that libdc1394 will work with all your OHCI 1.0 controllers now. The one bit of future work that remains for packet-per-buffer support is the automatic compaction of short payloads that I discussed with Kristian. Signed-off-by: David Moore Signed-off-by: Stefan Richter --- drivers/firewire/fw-ohci.c | 99 +++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index a44d16d0c505..a9f2d07e7c65 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -1461,24 +1461,24 @@ static int handle_ir_packet_per_buffer(struct context *context, { struct iso_context *ctx = container_of(context, struct iso_context, context); - struct descriptor *pd = d + 1; + struct descriptor *pd; __le32 *ir_header; - size_t header_length; - void *p, *end; - int i, z; + void *p; + int i; - if (pd->res_count == pd->req_count) + for (pd = d; pd <= last; pd++) { + if (pd->transfer_status) + break; + } + if (pd > last) /* Descriptor(s) not done yet, stop iteration */ return 0; - header_length = le16_to_cpu(d->req_count); - i = ctx->header_length; - z = le32_to_cpu(pd->branch_address) & 0xf; - p = d + z; - end = p + header_length; + p = last + 1; - while (p < end && i + ctx->base.header_size <= PAGE_SIZE) { + if (ctx->base.header_size > 0 && + i + ctx->base.header_size <= PAGE_SIZE) { /* * The iso header is byteswapped to little endian by * the controller, but the remaining header quadlets @@ -1487,14 +1487,11 @@ static int handle_ir_packet_per_buffer(struct context *context, */ *(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4)); memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4); - i += ctx->base.header_size; - p += ctx->base.header_size + 4; + ctx->header_length += ctx->base.header_size; } - ctx->header_length = i; - - if (le16_to_cpu(pd->control) & DESCRIPTOR_IRQ_ALWAYS) { - ir_header = (__le32 *) (d + z); + if (le16_to_cpu(last->control) & DESCRIPTOR_IRQ_ALWAYS) { + ir_header = (__le32 *) p; ctx->base.callback(&ctx->base, le32_to_cpu(ir_header[0]) & 0xffff, ctx->header_length, ctx->header, @@ -1502,7 +1499,6 @@ static int handle_ir_packet_per_buffer(struct context *context, ctx->header_length = 0; } - return 1; } @@ -1853,67 +1849,70 @@ ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base, { struct iso_context *ctx = container_of(base, struct iso_context, base); struct descriptor *d = NULL, *pd = NULL; - struct fw_iso_packet *p; + struct fw_iso_packet *p = packet; dma_addr_t d_bus, page_bus; u32 z, header_z, rest; - int i, page, offset, packet_count, header_size; - - if (packet->skip) { - d = context_get_descriptors(&ctx->context, 1, &d_bus); - if (d == NULL) - return -ENOMEM; - - d->control = cpu_to_le16(DESCRIPTOR_STATUS | - DESCRIPTOR_INPUT_LAST | - DESCRIPTOR_BRANCH_ALWAYS | - DESCRIPTOR_WAIT); - context_append(&ctx->context, d, 1, 0); - } - - /* one descriptor for header, one for payload */ - /* FIXME: handle cases where we need multiple desc. for payload */ - z = 2; - p = packet; + int i, j, length; + int page, offset, packet_count, header_size, payload_per_buffer; /* * The OHCI controller puts the status word in the * buffer too, so we need 4 extra bytes per packet. */ packet_count = p->header_length / ctx->base.header_size; - header_size = packet_count * (ctx->base.header_size + 4); + header_size = ctx->base.header_size + 4; /* Get header size in number of descriptors. */ header_z = DIV_ROUND_UP(header_size, sizeof(*d)); page = payload >> PAGE_SHIFT; offset = payload & ~PAGE_MASK; - rest = p->payload_length; + payload_per_buffer = p->payload_length / packet_count; for (i = 0; i < packet_count; i++) { /* d points to the header descriptor */ + z = DIV_ROUND_UP(payload_per_buffer + offset, PAGE_SIZE) + 1; d = context_get_descriptors(&ctx->context, - z + header_z, &d_bus); + z + header_z, &d_bus); if (d == NULL) return -ENOMEM; - d->control = cpu_to_le16(DESCRIPTOR_INPUT_MORE); + d->control = cpu_to_le16(DESCRIPTOR_STATUS | + DESCRIPTOR_INPUT_MORE); + if (p->skip && i == 0) + d->control |= cpu_to_le16(DESCRIPTOR_WAIT); d->req_count = cpu_to_le16(header_size); d->res_count = d->req_count; + d->transfer_status = 0; d->data_address = cpu_to_le32(d_bus + (z * sizeof(*d))); - /* pd points to the payload descriptor */ - pd = d + 1; + rest = payload_per_buffer; + for (j = 1; j < z; j++) { + pd = d + j; + pd->control = cpu_to_le16(DESCRIPTOR_STATUS | + DESCRIPTOR_INPUT_MORE); + + if (offset + rest < PAGE_SIZE) + length = rest; + else + length = PAGE_SIZE - offset; + pd->req_count = cpu_to_le16(length); + pd->res_count = pd->req_count; + pd->transfer_status = 0; + + page_bus = page_private(buffer->pages[page]); + pd->data_address = cpu_to_le32(page_bus + offset); + + offset = (offset + length) & ~PAGE_MASK; + rest -= length; + if (offset == 0) + page++; + } pd->control = cpu_to_le16(DESCRIPTOR_STATUS | DESCRIPTOR_INPUT_LAST | DESCRIPTOR_BRANCH_ALWAYS); - if (p->interrupt) + if (p->interrupt && i == packet_count - 1) pd->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS); - pd->req_count = cpu_to_le16(rest); - pd->res_count = pd->req_count; - - page_bus = page_private(buffer->pages[page]); - pd->data_address = cpu_to_le32(page_bus + offset); - context_append(&ctx->context, d, z, header_z); } From 478b233eda81bfe41307512b8336fd688c6553e0 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 21 Dec 2007 23:02:15 +0530 Subject: [PATCH 14/26] firewire: Fix extraction of source node id Fix extraction of the source node id from the packet header. Signed-off-by: Rabin Vincent Signed-off-by: Stefan Richter --- drivers/firewire/fw-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index c00d4a9b39e5..8018c3b9df0f 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -650,7 +650,7 @@ fw_core_handle_request(struct fw_card *card, struct fw_packet *p) HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2]; tcode = HEADER_GET_TCODE(p->header[0]); destination = HEADER_GET_DESTINATION(p->header[0]); - source = HEADER_GET_SOURCE(p->header[0]); + source = HEADER_GET_SOURCE(p->header[1]); spin_lock_irqsave(&address_handler_lock, flags); handler = lookup_enclosing_address_handler(&address_handler_list, From bb9f2206b60ace29e49a057fbd9be86d79d86200 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 22 Dec 2007 22:14:52 +0100 Subject: [PATCH 15/26] firewire: fw-ohci: CycleTooLong interrupt management The firewire-ohci driver so far lacked the ability to resume cycle master duty after that condition happened, as added to ohci1394 in Linux 2.6.18 by commit 57fdb58fa5a140bdd52cf4c4ffc30df73676f0a5. This ports this patch to fw-ohci. The "cycle too long" condition has been seen in practice - with IIDC cameras if a mode with packets too large for a speed is chosen, - sporadically when capturing DV on a VIA VT6306 card with ohci1394/ ieee1394/ raw1394/ dvgrab 2. https://bugzilla.redhat.com/show_bug.cgi?id=415841#c7 (This does not fix Fedora bug 415841.) Signed-off-by: Stefan Richter --- drivers/firewire/fw-ohci.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index a9f2d07e7c65..74d5d945f200 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -1079,6 +1079,13 @@ static irqreturn_t irq_handler(int irq, void *data) if (unlikely(event & OHCI1394_postedWriteErr)) fw_error("PCI posted write error\n"); + if (unlikely(event & OHCI1394_cycleTooLong)) { + if (printk_ratelimit()) + fw_notify("isochronous cycle too long\n"); + reg_write(ohci, OHCI1394_LinkControlSet, + OHCI1394_LinkControl_cycleMaster); + } + if (event & OHCI1394_cycle64Seconds) { cycle_time = reg_read(ohci, OHCI1394_IsochronousCycleTimer); if ((cycle_time & 0x80000000) == 0) @@ -1152,8 +1159,8 @@ static int ohci_enable(struct fw_card *card, u32 *config_rom, size_t length) OHCI1394_RQPkt | OHCI1394_RSPkt | OHCI1394_reqTxComplete | OHCI1394_respTxComplete | OHCI1394_isochRx | OHCI1394_isochTx | - OHCI1394_postedWriteErr | OHCI1394_cycle64Seconds | - OHCI1394_masterIntEnable); + OHCI1394_postedWriteErr | OHCI1394_cycleTooLong | + OHCI1394_cycle64Seconds | OHCI1394_masterIntEnable); /* Activate link_on bit and contender bit in our self ID packets.*/ if (ohci_update_phy_reg(card, 4, 0, From fe5ca63430d640c3a922e5d7c6dd411ab6a2e077 Mon Sep 17 00:00:00 2001 From: David Moore Date: Sun, 6 Jan 2008 17:21:41 -0500 Subject: [PATCH 16/26] firewire: fw-ohci: Dynamically allocate buffers for DMA descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the fw-ohci driver used fixed-length buffers for storing descriptors for isochronous receive DMA programs. If an application (such as libdc1394) generated a DMA program that was too large, fw-ohci would reach the limit of its fixed-sized buffer and return an error to userspace. This patch replaces the fixed-length ring-buffer with a linked-list of page-sized buffers. Additional buffers can be dynamically allocated and appended to the list when necessary. For a particular context, buffers are kept around after use and reused as necessary, so there is no allocation taking place after the DMA program is generated for the first time. In addition, the buffers it uses are coherent for DMA so there is no syncing required before and after writes. This syncing wasn't properly done in the previous version of the code. - This is the fourth version of my patch that replaces a fixed-length buffer for DMA descriptors with a dynamically allocated linked-list of buffers. As we discovered with the last attempt, new context programs are sometimes queued from interrupt context, making it unacceptable to call tasklet_disable() from context_get_descriptors(). This version of the patch uses ohci->lock for all locking needs instead of tasklet_disable/enable. There is a new requirement that context_get_descriptors() be called while holding ohci->lock. It was already held for the AT context, so adding the requirement for the iso context did not seem particularly onerous. In addition, this has the side benefit of allowing iso queue to be safely called from concurrent user-space threads, which previously was not safe. Signed-off-by: David Moore Signed-off-by: Kristian Høgsberg Signed-off-by: Jarod Wilson - Fixes the following issues: - Isochronous reception stopped prematurely if an application used a larger buffer. (Reproduced with coriander.) - Isochronous reception stopped after one or a few frames on VT630x in OHCI 1.0 mode. (Fixes reception in coriander, but dvgrab still doesn't work with these chips.) Patch update: struct member alignment, whitespace nits Signed-off-by: Stefan Richter --- drivers/firewire/fw-ohci.c | 232 ++++++++++++++++++++++++------------- 1 file changed, 154 insertions(+), 78 deletions(-) diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index 74d5d945f200..7ebad3c14cb8 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c @@ -98,17 +98,48 @@ struct context; typedef int (*descriptor_callback_t)(struct context *ctx, struct descriptor *d, struct descriptor *last); + +/* + * A buffer that contains a block of DMA-able coherent memory used for + * storing a portion of a DMA descriptor program. + */ +struct descriptor_buffer { + struct list_head list; + dma_addr_t buffer_bus; + size_t buffer_size; + size_t used; + struct descriptor buffer[0]; +}; + struct context { struct fw_ohci *ohci; u32 regs; + int total_allocation; - struct descriptor *buffer; - dma_addr_t buffer_bus; - size_t buffer_size; - struct descriptor *head_descriptor; - struct descriptor *tail_descriptor; - struct descriptor *tail_descriptor_last; - struct descriptor *prev_descriptor; + /* + * List of page-sized buffers for storing DMA descriptors. + * Head of list contains buffers in use and tail of list contains + * free buffers. + */ + struct list_head buffer_list; + + /* + * Pointer to a buffer inside buffer_list that contains the tail + * end of the current DMA program. + */ + struct descriptor_buffer *buffer_tail; + + /* + * The descriptor containing the branch address of the first + * descriptor that has not yet been filled by the device. + */ + struct descriptor *last; + + /* + * The last descriptor in the DMA program. It contains the branch + * address that must be updated upon appending a new descriptor. + */ + struct descriptor *prev; descriptor_callback_t callback; @@ -198,8 +229,6 @@ static inline struct fw_ohci *fw_ohci(struct fw_card *card) #define SELF_ID_BUF_SIZE 0x800 #define OHCI_TCODE_PHY_PACKET 0x0e #define OHCI_VERSION_1_1 0x010010 -#define ISO_BUFFER_SIZE (64 * 1024) -#define AT_BUFFER_SIZE 4096 static char ohci_driver_name[] = KBUILD_MODNAME; @@ -456,71 +485,108 @@ find_branch_descriptor(struct descriptor *d, int z) static void context_tasklet(unsigned long data) { struct context *ctx = (struct context *) data; - struct fw_ohci *ohci = ctx->ohci; struct descriptor *d, *last; u32 address; int z; + struct descriptor_buffer *desc; - dma_sync_single_for_cpu(ohci->card.device, ctx->buffer_bus, - ctx->buffer_size, DMA_TO_DEVICE); - - d = ctx->tail_descriptor; - last = ctx->tail_descriptor_last; - + desc = list_entry(ctx->buffer_list.next, + struct descriptor_buffer, list); + last = ctx->last; while (last->branch_address != 0) { + struct descriptor_buffer *old_desc = desc; address = le32_to_cpu(last->branch_address); z = address & 0xf; - d = ctx->buffer + (address - ctx->buffer_bus) / sizeof(*d); + address &= ~0xf; + + /* If the branch address points to a buffer outside of the + * current buffer, advance to the next buffer. */ + if (address < desc->buffer_bus || + address >= desc->buffer_bus + desc->used) + desc = list_entry(desc->list.next, + struct descriptor_buffer, list); + d = desc->buffer + (address - desc->buffer_bus) / sizeof(*d); last = find_branch_descriptor(d, z); if (!ctx->callback(ctx, d, last)) break; - ctx->tail_descriptor = d; - ctx->tail_descriptor_last = last; + if (old_desc != desc) { + /* If we've advanced to the next buffer, move the + * previous buffer to the free list. */ + unsigned long flags; + old_desc->used = 0; + spin_lock_irqsave(&ctx->ohci->lock, flags); + list_move_tail(&old_desc->list, &ctx->buffer_list); + spin_unlock_irqrestore(&ctx->ohci->lock, flags); + } + ctx->last = last; } } +/* + * Allocate a new buffer and add it to the list of free buffers for this + * context. Must be called with ohci->lock held. + */ +static int +context_add_buffer(struct context *ctx) +{ + struct descriptor_buffer *desc; + dma_addr_t bus_addr; + int offset; + + /* + * 16MB of descriptors should be far more than enough for any DMA + * program. This will catch run-away userspace or DoS attacks. + */ + if (ctx->total_allocation >= 16*1024*1024) + return -ENOMEM; + + desc = dma_alloc_coherent(ctx->ohci->card.device, PAGE_SIZE, + &bus_addr, GFP_ATOMIC); + if (!desc) + return -ENOMEM; + + offset = (void *)&desc->buffer - (void *)desc; + desc->buffer_size = PAGE_SIZE - offset; + desc->buffer_bus = bus_addr + offset; + desc->used = 0; + + list_add_tail(&desc->list, &ctx->buffer_list); + ctx->total_allocation += PAGE_SIZE; + + return 0; +} + static int context_init(struct context *ctx, struct fw_ohci *ohci, - size_t buffer_size, u32 regs, - descriptor_callback_t callback) + u32 regs, descriptor_callback_t callback) { ctx->ohci = ohci; ctx->regs = regs; - ctx->buffer_size = buffer_size; - ctx->buffer = kmalloc(buffer_size, GFP_KERNEL); - if (ctx->buffer == NULL) + ctx->total_allocation = 0; + + INIT_LIST_HEAD(&ctx->buffer_list); + if (context_add_buffer(ctx) < 0) return -ENOMEM; + ctx->buffer_tail = list_entry(ctx->buffer_list.next, + struct descriptor_buffer, list); + tasklet_init(&ctx->tasklet, context_tasklet, (unsigned long)ctx); ctx->callback = callback; - ctx->buffer_bus = - dma_map_single(ohci->card.device, ctx->buffer, - buffer_size, DMA_TO_DEVICE); - if (dma_mapping_error(ctx->buffer_bus)) { - kfree(ctx->buffer); - return -ENOMEM; - } - - ctx->head_descriptor = ctx->buffer; - ctx->prev_descriptor = ctx->buffer; - ctx->tail_descriptor = ctx->buffer; - ctx->tail_descriptor_last = ctx->buffer; - /* * We put a dummy descriptor in the buffer that has a NULL * branch address and looks like it's been sent. That way we - * have a descriptor to append DMA programs to. Also, the - * ring buffer invariant is that it always has at least one - * element so that head == tail means buffer full. + * have a descriptor to append DMA programs to. */ - - memset(ctx->head_descriptor, 0, sizeof(*ctx->head_descriptor)); - ctx->head_descriptor->control = cpu_to_le16(DESCRIPTOR_OUTPUT_LAST); - ctx->head_descriptor->transfer_status = cpu_to_le16(0x8011); - ctx->head_descriptor++; + memset(ctx->buffer_tail->buffer, 0, sizeof(*ctx->buffer_tail->buffer)); + ctx->buffer_tail->buffer->control = cpu_to_le16(DESCRIPTOR_OUTPUT_LAST); + ctx->buffer_tail->buffer->transfer_status = cpu_to_le16(0x8011); + ctx->buffer_tail->used += sizeof(*ctx->buffer_tail->buffer); + ctx->last = ctx->buffer_tail->buffer; + ctx->prev = ctx->buffer_tail->buffer; return 0; } @@ -529,35 +595,42 @@ static void context_release(struct context *ctx) { struct fw_card *card = &ctx->ohci->card; + struct descriptor_buffer *desc, *tmp; - dma_unmap_single(card->device, ctx->buffer_bus, - ctx->buffer_size, DMA_TO_DEVICE); - kfree(ctx->buffer); + list_for_each_entry_safe(desc, tmp, &ctx->buffer_list, list) + dma_free_coherent(card->device, PAGE_SIZE, desc, + desc->buffer_bus - + ((void *)&desc->buffer - (void *)desc)); } +/* Must be called with ohci->lock held */ static struct descriptor * context_get_descriptors(struct context *ctx, int z, dma_addr_t *d_bus) { - struct descriptor *d, *tail, *end; + struct descriptor *d = NULL; + struct descriptor_buffer *desc = ctx->buffer_tail; - d = ctx->head_descriptor; - tail = ctx->tail_descriptor; - end = ctx->buffer + ctx->buffer_size / sizeof(*d); + if (z * sizeof(*d) > desc->buffer_size) + return NULL; - if (d + z <= tail) { - goto has_space; - } else if (d > tail && d + z <= end) { - goto has_space; - } else if (d > tail && ctx->buffer + z <= tail) { - d = ctx->buffer; - goto has_space; + if (z * sizeof(*d) > desc->buffer_size - desc->used) { + /* No room for the descriptor in this buffer, so advance to the + * next one. */ + + if (desc->list.next == &ctx->buffer_list) { + /* If there is no free buffer next in the list, + * allocate one. */ + if (context_add_buffer(ctx) < 0) + return NULL; + } + desc = list_entry(desc->list.next, + struct descriptor_buffer, list); + ctx->buffer_tail = desc; } - return NULL; - - has_space: + d = desc->buffer + desc->used / sizeof(*d); memset(d, 0, z * sizeof(*d)); - *d_bus = ctx->buffer_bus + (d - ctx->buffer) * sizeof(*d); + *d_bus = desc->buffer_bus + desc->used; return d; } @@ -567,7 +640,7 @@ static void context_run(struct context *ctx, u32 extra) struct fw_ohci *ohci = ctx->ohci; reg_write(ohci, COMMAND_PTR(ctx->regs), - le32_to_cpu(ctx->tail_descriptor_last->branch_address)); + le32_to_cpu(ctx->last->branch_address)); reg_write(ohci, CONTROL_CLEAR(ctx->regs), ~0); reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_RUN | extra); flush_writes(ohci); @@ -577,15 +650,13 @@ static void context_append(struct context *ctx, struct descriptor *d, int z, int extra) { dma_addr_t d_bus; + struct descriptor_buffer *desc = ctx->buffer_tail; - d_bus = ctx->buffer_bus + (d - ctx->buffer) * sizeof(*d); + d_bus = desc->buffer_bus + (d - desc->buffer) * sizeof(*d); - ctx->head_descriptor = d + z + extra; - ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z); - ctx->prev_descriptor = find_branch_descriptor(d, z); - - dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus, - ctx->buffer_size, DMA_TO_DEVICE); + desc->used += (z + extra) * sizeof(*d); + ctx->prev->branch_address = cpu_to_le32(d_bus | z); + ctx->prev = find_branch_descriptor(d, z); reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE); flush_writes(ctx->ohci); @@ -1571,8 +1642,7 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size) if (ctx->header == NULL) goto out; - retval = context_init(&ctx->context, ohci, ISO_BUFFER_SIZE, - regs, callback); + retval = context_init(&ctx->context, ohci, regs, callback); if (retval < 0) goto out_with_header; @@ -1933,16 +2003,22 @@ ohci_queue_iso(struct fw_iso_context *base, unsigned long payload) { struct iso_context *ctx = container_of(base, struct iso_context, base); + unsigned long flags; + int retval; + spin_lock_irqsave(&ctx->context.ohci->lock, flags); if (base->type == FW_ISO_CONTEXT_TRANSMIT) - return ohci_queue_iso_transmit(base, packet, buffer, payload); + retval = ohci_queue_iso_transmit(base, packet, buffer, payload); else if (ctx->context.ohci->version >= OHCI_VERSION_1_1) - return ohci_queue_iso_receive_dualbuffer(base, packet, + retval = ohci_queue_iso_receive_dualbuffer(base, packet, buffer, payload); else - return ohci_queue_iso_receive_packet_per_buffer(base, packet, + retval = ohci_queue_iso_receive_packet_per_buffer(base, packet, buffer, payload); + spin_unlock_irqrestore(&ctx->context.ohci->lock, flags); + + return retval; } static const struct fw_card_driver ohci_driver = { @@ -2014,10 +2090,10 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) ar_context_init(&ohci->ar_response_ctx, ohci, OHCI1394_AsRspRcvContextControlSet); - context_init(&ohci->at_request_ctx, ohci, AT_BUFFER_SIZE, + context_init(&ohci->at_request_ctx, ohci, OHCI1394_AsReqTrContextControlSet, handle_at_packet); - context_init(&ohci->at_response_ctx, ohci, AT_BUFFER_SIZE, + context_init(&ohci->at_response_ctx, ohci, OHCI1394_AsRspTrContextControlSet, handle_at_packet); reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, ~0); From f148e20cef696e30a370d4f7cb9aeb46273fdd6e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 12 Jan 2008 12:32:44 +0100 Subject: [PATCH 17/26] firewire vs. ieee1394: clarify MAINTAINERS Maintainers like to receive less mail, and submitters like to have to Cc less recipients. Signed-off-by: Stefan Richter --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ba05e8058689..2d5ff3e34699 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1595,7 +1595,7 @@ P: Alexander Viro M: viro@zeniv.linux.org.uk S: Maintained -FIREWIRE SUBSYSTEM +FIREWIRE SUBSYSTEM (drivers/firewire, ) P: Kristian Hoegsberg, Stefan Richter M: krh@redhat.com, stefanr@s5r6.in-berlin.de L: linux1394-devel@lists.sourceforge.net @@ -1917,7 +1917,7 @@ L: linux-ide@vger.kernel.org L: linux-scsi@vger.kernel.org S: Orphan -IEEE 1394 SUBSYSTEM +IEEE 1394 SUBSYSTEM (drivers/ieee1394) P: Ben Collins M: ben.collins@ubuntu.com P: Stefan Richter From 4dccd020d7ca5e673d7804cc4ff80fbf58d8a37e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 20 Jan 2008 01:24:26 +0100 Subject: [PATCH 18/26] firewire: fw-sbp2: skip unnecessary logout Don't attempt to send a logout ORB if the target was already unplugged or had its link switched off. If two targets are attached, this enhances the chance to quickly reconnect to the remaining target when one target is plugged out. Signed-off-by: Stefan Richter Acked-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index e5a2571a3671..661a5b66f661 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -606,13 +606,17 @@ static void sbp2_release_target(struct kref *kref) struct sbp2_logical_unit *lu, *next; struct Scsi_Host *shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]); + struct fw_device *device = fw_device(tgt->unit->device.parent); list_for_each_entry_safe(lu, next, &tgt->lu_list, link) { if (lu->sdev) scsi_remove_device(lu->sdev); - sbp2_send_management_orb(lu, tgt->node_id, lu->generation, - SBP2_LOGOUT_REQUEST, lu->login_id, NULL); + if (!fw_device_is_shutdown(device)) + sbp2_send_management_orb(lu, tgt->node_id, + lu->generation, SBP2_LOGOUT_REQUEST, + lu->login_id, NULL); + fw_core_remove_address_handler(&lu->address_handler); list_del(&lu->link); kfree(lu); From 14dc992aa782f8759c6d117d4322db62f62600ce Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 20 Jan 2008 01:25:31 +0100 Subject: [PATCH 19/26] firewire: fw-sbp2: try to increase reconnect_hold (speed up reconnection) Ask the target to grant 4 seconds instead of the standard and minimum of 1 second window after bus reset for reconnection. This accelerates reconnection if there are more than one targets on the bus: If a login and inquiry to one target blocks the fw-sbp2 workqueue for more than 1s after bus reset, we now still can reconnect to the other target. Before that, fw-sbp2's reconnect attempts would be rejected with "error status: 0:9" (function rejected), and fw-sbp2 would finally re-login. All those futile reconnect attemps cost extra time until the target which needs re-login is ready for I/O again. The reconnect timeout field in the login ORB doesn't have to be honored by the target though. I found that we could get up to - allegedly 32768s from an old OXFW911 firmware - 256s from LSI bridges - 4s from OXUF922 and OXFW912 bridges, - 2s from TI bridges, - only the standard 1s from Initio and Prolific bridges and from Apple OpenFirmware in target mode. We just try to get 4 seconds which already covers the case of a few HDDs on the same bus quite nicely. A minor drawback occurs in the following (rare and impractical) border case: - two initiators are there, initiator 1 holds an exclusive login to a target, - initiator 1 goes off the bus, - target refuses login attempts from initiator 2 until reconnect_hold seconds after bus reset. An alternative approach to the issue at hand would be to parallelize fw-sbp2's reconnect and login work. Signed-off-by: Stefan Richter Acked-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 661a5b66f661..d2fbfc6f6d8a 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -514,9 +514,10 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->request.status_fifo.low = lu->address_handler.offset; if (function == SBP2_LOGIN_REQUEST) { + /* Ask for 2^2 == 4 seconds reconnect grace period */ orb->request.misc |= - MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login) | - MANAGEMENT_ORB_RECONNECT(0); + MANAGEMENT_ORB_RECONNECT(2) | + MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login); } fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); From 5a8a1bcd15dfb9f177f3605fe6b9ba2bef2bf55a Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 24 Jan 2008 01:53:19 +0100 Subject: [PATCH 20/26] firewire: fw-sbp2: use device generation, not card generation There was a small window where a login or reconnect job could use an already updated card generation with an outdated node ID. We have to use the fw_device.generation here, not the fw_card.generation, because the generation must never be newer than the node ID when we emit a transaction. This cannot be guaranteed with fw_card.generation. Furthermore, the target's and initiator's node IDs can be obtained from fw_device and fw_card. Dereferencing their underlying topology objects is not necessary. Signed-off-by: Stefan Richter Verified in concert with subsequent memory barriers patch to fix 'giving up on config rom' issues on multiple system and drive combinations that were previously affected. Signed-off-by: Jarod Wilson --- drivers/firewire/fw-sbp2.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index d2fbfc6f6d8a..d406c34fd378 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -661,9 +661,9 @@ static void sbp2_login(struct work_struct *work) struct sbp2_login_response response; int generation, node_id, local_node_id; - generation = device->card->generation; - node_id = device->node->node_id; - local_node_id = device->card->local_node->node_id; + generation = device->generation; + node_id = device->node_id; + local_node_id = device->card->node_id; if (sbp2_send_management_orb(lu, node_id, generation, SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) { @@ -911,9 +911,9 @@ static void sbp2_reconnect(struct work_struct *work) struct fw_device *device = fw_device(unit->device.parent); int generation, node_id, local_node_id; - generation = device->card->generation; - node_id = device->node->node_id; - local_node_id = device->card->local_node->node_id; + generation = device->generation; + node_id = device->node_id; + local_node_id = device->card->node_id; if (sbp2_send_management_orb(lu, node_id, generation, SBP2_RECONNECT_REQUEST, From cf5a56ac8083dd04ffe8b9b2ec7895e9bcff44bc Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 24 Jan 2008 01:53:51 +0100 Subject: [PATCH 21/26] firewire: fw-cdev: use device generation, not card generation We have to use the fw_device.generation here, not the fw_card.generation, because the generation must never be newer than the node ID when we emit a transaction. This cannot be guaranteed with fw_card.generation. Signed-off-by: Stefan Richter Verified in concert with subsequent memory barriers patch to fix 'giving up on config rom' issues on multiple system and drive combinations that were previously affected. Signed-off-by: Jarod Wilson --- drivers/firewire/fw-cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c index 60f1a8924a95..cea8a799799f 100644 --- a/drivers/firewire/fw-cdev.c +++ b/drivers/firewire/fw-cdev.c @@ -206,12 +206,12 @@ fill_bus_reset_event(struct fw_cdev_event_bus_reset *event, event->closure = client->bus_reset_closure; event->type = FW_CDEV_EVENT_BUS_RESET; + event->generation = client->device->generation; event->node_id = client->device->node_id; event->local_node_id = card->local_node->node_id; event->bm_node_id = 0; /* FIXME: We don't track the BM. */ event->irm_node_id = card->irm_node->node_id; event->root_node_id = card->root_node->node_id; - event->generation = card->generation; } static void From b5d2a5e04e6a26cb3f77af8cbc31e74c361d706c Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 25 Jan 2008 18:57:41 +0100 Subject: [PATCH 22/26] firewire: enforce access order between generation and node ID, fix "giving up on config rom" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fw_device.node_id and fw_device.generation are accessed without mutexes. We have to ensure that all readers will get to see node_id updates before generation updates. Fixes an inability to recognize devices after "giving up on config rom", https://bugzilla.redhat.com/show_bug.cgi?id=429950 Signed-off-by: Stefan Richter Reviewed by Nick Piggin . Verified to fix 'giving up on config rom' issues on multiple system and drive combinations that were previously affected. Signed-off-by: Jarod Wilson Signed-off-by: Kristian Høgsberg --- drivers/firewire/fw-cdev.c | 1 + drivers/firewire/fw-device.c | 15 +++++++++++++-- drivers/firewire/fw-device.h | 12 ++++++++++++ drivers/firewire/fw-sbp2.c | 3 +++ drivers/firewire/fw-topology.c | 6 ++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/fw-cdev.c b/drivers/firewire/fw-cdev.c index cea8a799799f..7e73cbaa4121 100644 --- a/drivers/firewire/fw-cdev.c +++ b/drivers/firewire/fw-cdev.c @@ -207,6 +207,7 @@ fill_bus_reset_event(struct fw_cdev_event_bus_reset *event, event->closure = client->bus_reset_closure; event->type = FW_CDEV_EVENT_BUS_RESET; event->generation = client->device->generation; + smp_rmb(); /* node_id must not be older than generation */ event->node_id = client->device->node_id; event->local_node_id = card->local_node->node_id; event->bm_node_id = 0; /* FIXME: We don't track the BM. */ diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c index 56681b3b297b..872df2238609 100644 --- a/drivers/firewire/fw-device.c +++ b/drivers/firewire/fw-device.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "fw-transaction.h" #include "fw-topology.h" @@ -182,9 +183,14 @@ static void fw_device_release(struct device *dev) int fw_device_enable_phys_dma(struct fw_device *device) { + int generation = device->generation; + + /* device->node_id, accessed below, must not be older than generation */ + smp_rmb(); + return device->card->driver->enable_phys_dma(device->card, device->node_id, - device->generation); + generation); } EXPORT_SYMBOL(fw_device_enable_phys_dma); @@ -389,12 +395,16 @@ static int read_rom(struct fw_device *device, int index, u32 * data) struct read_quadlet_callback_data callback_data; struct fw_transaction t; u64 offset; + int generation = device->generation; + + /* device->node_id, accessed below, must not be older than generation */ + smp_rmb(); init_completion(&callback_data.done); offset = 0xfffff0000400ULL + index * 4; fw_send_request(device->card, &t, TCODE_READ_QUADLET_REQUEST, - device->node_id, device->generation, device->max_speed, + device->node_id, generation, device->max_speed, offset, NULL, 4, complete_transaction, &callback_data); wait_for_completion(&callback_data.done); @@ -801,6 +811,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) device = node->data; device->node_id = node->node_id; + smp_wmb(); /* update node_id before generation */ device->generation = card->generation; if (atomic_read(&device->state) == FW_DEVICE_RUNNING) { PREPARE_DELAYED_WORK(&device->work, fw_device_update); diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h index 894d4a92a18e..0854fe2bc110 100644 --- a/drivers/firewire/fw-device.h +++ b/drivers/firewire/fw-device.h @@ -35,6 +35,18 @@ struct fw_attribute_group { struct attribute *attrs[11]; }; +/* + * Note, fw_device.generation always has to be read before fw_device.node_id. + * Use SMP memory barriers to ensure this. Otherwise requests will be sent + * to an outdated node_id if the generation was updated in the meantime due + * to a bus reset. + * + * Likewise, fw-core will take care to update .node_id before .generation so + * that whenever fw_device.generation is current WRT the actual bus generation, + * fw_device.node_id is guaranteed to be current too. + * + * The same applies to fw_device.card->node_id vs. fw_device.generation. + */ struct fw_device { atomic_t state; struct fw_node *node; diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index d406c34fd378..705a20ce6b4a 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -662,6 +663,7 @@ static void sbp2_login(struct work_struct *work) int generation, node_id, local_node_id; generation = device->generation; + smp_rmb(); /* node_id must not be older than generation */ node_id = device->node_id; local_node_id = device->card->node_id; @@ -912,6 +914,7 @@ static void sbp2_reconnect(struct work_struct *work) int generation, node_id, local_node_id; generation = device->generation; + smp_rmb(); /* node_id must not be older than generation */ node_id = device->node_id; local_node_id = device->card->node_id; diff --git a/drivers/firewire/fw-topology.c b/drivers/firewire/fw-topology.c index 0fc9b000e99d..172c1867e9aa 100644 --- a/drivers/firewire/fw-topology.c +++ b/drivers/firewire/fw-topology.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fw-transaction.h" #include "fw-topology.h" @@ -518,6 +519,11 @@ fw_core_handle_bus_reset(struct fw_card *card, card->bm_retries = 0; card->node_id = node_id; + /* + * Update node_id before generation to prevent anybody from using + * a stale node_id together with a current generation. + */ + smp_wmb(); card->generation = generation; card->reset_jiffies = jiffies; schedule_delayed_work(&card->work, 0); From f8d2dc39389d6ccc0def290dc4b7eb71d68645a2 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Fri, 25 Jan 2008 17:53:49 +0100 Subject: [PATCH 23/26] firewire: fw-core: react on bus resets while the config ROM is being fetched read_rom() obtained a fresh new fw_device.generation for each read transaction. Hence it was able to continue reading in the middle of the ROM even if a bus reset happened. However the device may have modified the ROM during the reset. We would end up with a corrupt fetched ROM image then. Although all of this is quite unlikely, it is not impossible. Therefore we now restart reading the ROM if the bus generation changed. Note, the memory barrier in read_rom() is still necessary according to tests by Jarod Wilson, despite of the ->generation access being moved up in the call chain. Signed-off-by: Stefan Richter This is essentially what I've been beating on locally, and I've yet to hit another config rom read failure with it. Signed-off-by: Jarod Wilson --- drivers/firewire/fw-device.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c index 872df2238609..de9066e69adf 100644 --- a/drivers/firewire/fw-device.c +++ b/drivers/firewire/fw-device.c @@ -390,12 +390,12 @@ complete_transaction(struct fw_card *card, int rcode, complete(&callback_data->done); } -static int read_rom(struct fw_device *device, int index, u32 * data) +static int +read_rom(struct fw_device *device, int generation, int index, u32 *data) { struct read_quadlet_callback_data callback_data; struct fw_transaction t; u64 offset; - int generation = device->generation; /* device->node_id, accessed below, must not be older than generation */ smp_rmb(); @@ -414,7 +414,14 @@ static int read_rom(struct fw_device *device, int index, u32 * data) return callback_data.rcode; } -static int read_bus_info_block(struct fw_device *device) +/* + * Read the bus info block, perform a speed probe, and read all of the rest of + * the config ROM. We do all this with a cached bus generation. If the bus + * generation changes under us, read_bus_info_block will fail and get retried. + * It's better to start all over in this case because the node from which we + * are reading the ROM may have changed the ROM during the reset. + */ +static int read_bus_info_block(struct fw_device *device, int generation) { static u32 rom[256]; u32 stack[16], sp, key; @@ -424,7 +431,7 @@ static int read_bus_info_block(struct fw_device *device) /* First read the bus info block. */ for (i = 0; i < 5; i++) { - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE) return -1; /* * As per IEEE1212 7.2, during power-up, devices can @@ -459,7 +466,8 @@ static int read_bus_info_block(struct fw_device *device) device->max_speed = device->card->link_speed; while (device->max_speed > SCODE_100) { - if (read_rom(device, 0, &dummy) == RCODE_COMPLETE) + if (read_rom(device, generation, 0, &dummy) == + RCODE_COMPLETE) break; device->max_speed--; } @@ -492,7 +500,7 @@ static int read_bus_info_block(struct fw_device *device) return -1; /* Read header quadlet for the block to get the length. */ - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE) return -1; end = i + (rom[i] >> 16) + 1; i++; @@ -511,7 +519,8 @@ static int read_bus_info_block(struct fw_device *device) * it references another block, and push it in that case. */ while (i < end) { - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != + RCODE_COMPLETE) return -1; if ((key >> 30) == 3 && (rom[i] >> 30) > 1 && sp < ARRAY_SIZE(stack)) @@ -658,7 +667,7 @@ static void fw_device_init(struct work_struct *work) * device. */ - if (read_bus_info_block(device) < 0) { + if (read_bus_info_block(device, device->generation) < 0) { if (device->config_rom_retries < MAX_RETRIES) { device->config_rom_retries++; schedule_delayed_work(&device->work, RETRY_DELAY); From 8f9f963e5d9853dbc5fa5091f15ae64f423d3d89 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 23 Jan 2008 16:05:45 -0500 Subject: [PATCH 24/26] firewire: replace subtraction with bitwise and Replace an unnecessary subtraction with a bitwise AND when determining the value of ext_tcode in fw_fill_transaction() to save a cpu cycle or two in a somewhat critical path. Signed-off-by: Jarod Wilson Signed-off-by: Stefan Richter --- drivers/firewire/fw-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c index 8018c3b9df0f..7fcc59dedf08 100644 --- a/drivers/firewire/fw-transaction.c +++ b/drivers/firewire/fw-transaction.c @@ -153,7 +153,7 @@ fw_fill_request(struct fw_packet *packet, int tcode, int tlabel, int ext_tcode; if (tcode > 0x10) { - ext_tcode = tcode - 0x10; + ext_tcode = tcode & ~0x10; tcode = TCODE_LOCK_REQUEST; } else ext_tcode = 0; From a4c379c1979fbc417099cd22ba16735bc3625bbf Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Sat, 19 Jan 2008 13:15:05 +0100 Subject: [PATCH 25/26] firewire: fw-sbp2: increase login orb reply timeout, fix "failed to login" Increase (and rename) the login orb reply timeout value to 20s to match that of the old firewire stack. 2s simply didn't give many devices enough time to spin up and reply. Fixes inability to recognize some devices. Failure mode was "orb reply timed out"/"failed to login". Signed-off-by: Jarod Wilson Signed-off-by: Stefan Richter (style, comments, changelog) --- drivers/firewire/fw-sbp2.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 705a20ce6b4a..794badeee0e2 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -151,9 +151,16 @@ struct sbp2_target { struct list_head lu_list; }; -#define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 +/* + * Per section 7.4.8 of the SBP-2 spec, a mgt_ORB_timeout value can be + * provided in the config rom. A high timeout value really only matters + * on initial login, where we'll just use 20s rather than hassling with + * reading the config rom, since it really wouldn't buy us much. + */ +#define SBP2_LOGIN_ORB_TIMEOUT 20000 /* Timeout in ms */ #define SBP2_ORB_TIMEOUT 2000 /* Timeout in ms */ #define SBP2_ORB_NULL 0x80000000 +#define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 #define SBP2_DIRECTION_TO_MEDIA 0x0 #define SBP2_DIRECTION_FROM_MEDIA 0x1 @@ -488,6 +495,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, { struct fw_device *device = fw_device(lu->tgt->unit->device.parent); struct sbp2_management_orb *orb; + unsigned int timeout; int retval = -ENOMEM; orb = kzalloc(sizeof(*orb), GFP_ATOMIC); @@ -519,6 +527,9 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->request.misc |= MANAGEMENT_ORB_RECONNECT(2) | MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login); + timeout = SBP2_LOGIN_ORB_TIMEOUT; + } else { + timeout = SBP2_ORB_TIMEOUT; } fw_memcpy_to_be32(&orb->request, &orb->request, sizeof(orb->request)); @@ -535,8 +546,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, sbp2_send_orb(&orb->base, lu, node_id, generation, lu->tgt->management_agent_address); - wait_for_completion_timeout(&orb->done, - msecs_to_jiffies(SBP2_ORB_TIMEOUT)); + wait_for_completion_timeout(&orb->done, msecs_to_jiffies(timeout)); retval = -EIO; if (sbp2_cancel_orbs(lu) == 0) { From 384170da9384b7bb3650c0c9b9d17ba0f7bde4ff Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 25 Jan 2008 23:31:12 -0500 Subject: [PATCH 26/26] firewire: fw-sbp2: Use sbp2 device-provided mgt orb timeout for logins To be more compliant with section 7.4.8 of the SBP-2 specification, use the mgt_ORB_timeout specified in the SBP-2 device's config rom for login ORB attempts (though with some sanity checks). A happy side-effect is that certain device and controller combinations that sometimes take more than 20 seconds to get synced up (like my laptop with just about any SBP-2 device) now function more reliably. Signed-off-by: Jarod Wilson Signed-off-by: Stefan Richter (silenced sparse) --- drivers/firewire/fw-sbp2.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c index 794badeee0e2..19ece9b6d742 100644 --- a/drivers/firewire/fw-sbp2.c +++ b/drivers/firewire/fw-sbp2.c @@ -149,15 +149,17 @@ struct sbp2_target { unsigned workarounds; struct list_head lu_list; + + unsigned int mgt_orb_timeout; }; /* * Per section 7.4.8 of the SBP-2 spec, a mgt_ORB_timeout value can be - * provided in the config rom. A high timeout value really only matters - * on initial login, where we'll just use 20s rather than hassling with - * reading the config rom, since it really wouldn't buy us much. + * provided in the config rom. Most devices do provide a value, which + * we'll use for login management orbs, but with some sane limits. */ -#define SBP2_LOGIN_ORB_TIMEOUT 20000 /* Timeout in ms */ +#define SBP2_MIN_LOGIN_ORB_TIMEOUT 5000U /* Timeout in ms */ +#define SBP2_MAX_LOGIN_ORB_TIMEOUT 40000U /* Timeout in ms */ #define SBP2_ORB_TIMEOUT 2000 /* Timeout in ms */ #define SBP2_ORB_NULL 0x80000000 #define SBP2_MAX_SG_ELEMENT_LENGTH 0xf000 @@ -166,6 +168,7 @@ struct sbp2_target { #define SBP2_DIRECTION_FROM_MEDIA 0x1 /* Unit directory keys */ +#define SBP2_CSR_UNIT_CHARACTERISTICS 0x3a #define SBP2_CSR_FIRMWARE_REVISION 0x3c #define SBP2_CSR_LOGICAL_UNIT_NUMBER 0x14 #define SBP2_CSR_LOGICAL_UNIT_DIRECTORY 0xd4 @@ -527,7 +530,7 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id, orb->request.misc |= MANAGEMENT_ORB_RECONNECT(2) | MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login); - timeout = SBP2_LOGIN_ORB_TIMEOUT; + timeout = lu->tgt->mgt_orb_timeout; } else { timeout = SBP2_ORB_TIMEOUT; } @@ -777,6 +780,7 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory, { struct fw_csr_iterator ci; int key, value; + unsigned int timeout; fw_csr_iterator_init(&ci, directory); while (fw_csr_iterator_next(&ci, &key, &value)) { @@ -799,6 +803,21 @@ static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory, *firmware_revision = value; break; + case SBP2_CSR_UNIT_CHARACTERISTICS: + /* the timeout value is stored in 500ms units */ + timeout = ((unsigned int) value >> 8 & 0xff) * 500; + timeout = max(timeout, SBP2_MIN_LOGIN_ORB_TIMEOUT); + tgt->mgt_orb_timeout = + min(timeout, SBP2_MAX_LOGIN_ORB_TIMEOUT); + + if (timeout > tgt->mgt_orb_timeout) + fw_notify("%s: config rom contains %ds " + "management ORB timeout, limiting " + "to %ds\n", tgt->unit->device.bus_id, + timeout / 1000, + tgt->mgt_orb_timeout / 1000); + break; + case SBP2_CSR_LOGICAL_UNIT_NUMBER: if (sbp2_add_logical_unit(tgt, value) < 0) return -ENOMEM;