Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
 "I've got a few bits pending for 3.8 final, that I better get sent out.
  It's all been sitting for a while, I consider it safe.

  It contains:

   - Two bug fixes for mtip32xx, fixing a driver hang and a crash.

   - A few-liner protocol error fix for drbd.

   - A few fixes for the xen block front/back driver, fixing a potential
     data corruption issue.

   - A race fix for disk_clear_events(), causing spurious warnings.  Out
     of the Chrome OS base.

   - A deadlock fix for disk_clear_events(), moving it to the a
     unfreezable workqueue.  Also from the Chrome OS base."

* 'for-linus' of git://git.kernel.dk/linux-block:
  drbd: fix potential protocol error and resulting disconnect/reconnect
  mtip32xx: fix for crash when the device surprise removed during rebuild
  mtip32xx: fix for driver hang after a command timeout
  block: prevent race/cleanup
  block: remove deadlock in disk_clear_events
  xen-blkfront: handle bvecs with partial data
  llist/xen-blkfront: implement safe version of llist_for_each_entry
  xen-blkback: implement safe iterator for the list of persistent grants
This commit is contained in:
Linus Torvalds 2013-02-07 08:38:33 +11:00
commit 2110cf029a
8 changed files with 101 additions and 28 deletions

View File

@ -35,6 +35,8 @@ static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type; static struct device_type disk_type;
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr);
static void disk_alloc_events(struct gendisk *disk); static void disk_alloc_events(struct gendisk *disk);
static void disk_add_events(struct gendisk *disk); static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk);
@ -1549,6 +1551,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
const struct block_device_operations *bdops = disk->fops; const struct block_device_operations *bdops = disk->fops;
struct disk_events *ev = disk->ev; struct disk_events *ev = disk->ev;
unsigned int pending; unsigned int pending;
unsigned int clearing = mask;
if (!ev) { if (!ev) {
/* for drivers still using the old ->media_changed method */ /* for drivers still using the old ->media_changed method */
@ -1558,34 +1561,53 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
return 0; return 0;
} }
/* tell the workfn about the events being cleared */ disk_block_events(disk);
/*
* store the union of mask and ev->clearing on the stack so that the
* race with disk_flush_events does not cause ambiguity (ev->clearing
* can still be modified even if events are blocked).
*/
spin_lock_irq(&ev->lock); spin_lock_irq(&ev->lock);
ev->clearing |= mask; clearing |= ev->clearing;
ev->clearing = 0;
spin_unlock_irq(&ev->lock); spin_unlock_irq(&ev->lock);
/* uncondtionally schedule event check and wait for it to finish */ disk_check_events(ev, &clearing);
disk_block_events(disk); /*
queue_delayed_work(system_freezable_wq, &ev->dwork, 0); * if ev->clearing is not 0, the disk_flush_events got called in the
flush_delayed_work(&ev->dwork); * middle of this function, so we want to run the workfn without delay.
__disk_unblock_events(disk, false); */
__disk_unblock_events(disk, ev->clearing ? true : false);
/* then, fetch and clear pending events */ /* then, fetch and clear pending events */
spin_lock_irq(&ev->lock); spin_lock_irq(&ev->lock);
WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
pending = ev->pending & mask; pending = ev->pending & mask;
ev->pending &= ~mask; ev->pending &= ~mask;
spin_unlock_irq(&ev->lock); spin_unlock_irq(&ev->lock);
WARN_ON_ONCE(clearing & mask);
return pending; return pending;
} }
/*
* Separate this part out so that a different pointer for clearing_ptr can be
* passed in for disk_clear_events.
*/
static void disk_events_workfn(struct work_struct *work) static void disk_events_workfn(struct work_struct *work)
{ {
struct delayed_work *dwork = to_delayed_work(work); struct delayed_work *dwork = to_delayed_work(work);
struct disk_events *ev = container_of(dwork, struct disk_events, dwork); struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
disk_check_events(ev, &ev->clearing);
}
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr)
{
struct gendisk *disk = ev->disk; struct gendisk *disk = ev->disk;
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
unsigned int clearing = ev->clearing; unsigned int clearing = *clearing_ptr;
unsigned int events; unsigned int events;
unsigned long intv; unsigned long intv;
int nr_events = 0, i; int nr_events = 0, i;
@ -1598,7 +1620,7 @@ static void disk_events_workfn(struct work_struct *work)
events &= ~ev->pending; events &= ~ev->pending;
ev->pending |= events; ev->pending |= events;
ev->clearing &= ~clearing; *clearing_ptr &= ~clearing;
intv = disk_events_poll_jiffies(disk); intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv) if (!ev->block && intv)

View File

@ -168,7 +168,7 @@ static void wake_all_senders(struct drbd_tconn *tconn) {
} }
/* must hold resource->req_lock */ /* must hold resource->req_lock */
static void start_new_tl_epoch(struct drbd_tconn *tconn) void start_new_tl_epoch(struct drbd_tconn *tconn)
{ {
/* no point closing an epoch, if it is empty, anyways. */ /* no point closing an epoch, if it is empty, anyways. */
if (tconn->current_tle_writes == 0) if (tconn->current_tle_writes == 0)

View File

@ -267,6 +267,7 @@ struct bio_and_error {
int error; int error;
}; };
extern void start_new_tl_epoch(struct drbd_tconn *tconn);
extern void drbd_req_destroy(struct kref *kref); extern void drbd_req_destroy(struct kref *kref);
extern void _req_may_be_done(struct drbd_request *req, extern void _req_may_be_done(struct drbd_request *req,
struct bio_and_error *m); struct bio_and_error *m);

View File

@ -931,6 +931,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
enum drbd_state_rv rv = SS_SUCCESS; enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw; enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw; struct after_state_chg_work *ascw;
bool did_remote, should_do_remote;
os = drbd_read_state(mdev); os = drbd_read_state(mdev);
@ -981,11 +982,17 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
(os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
atomic_inc(&mdev->local_cnt); atomic_inc(&mdev->local_cnt);
did_remote = drbd_should_do_remote(mdev->state);
mdev->state.i = ns.i; mdev->state.i = ns.i;
should_do_remote = drbd_should_do_remote(mdev->state);
mdev->tconn->susp = ns.susp; mdev->tconn->susp = ns.susp;
mdev->tconn->susp_nod = ns.susp_nod; mdev->tconn->susp_nod = ns.susp_nod;
mdev->tconn->susp_fen = ns.susp_fen; mdev->tconn->susp_fen = ns.susp_fen;
/* put replicated vs not-replicated requests in seperate epochs */
if (did_remote != should_do_remote)
start_new_tl_epoch(mdev->tconn);
if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
drbd_print_uuids(mdev, "attached to UUIDs"); drbd_print_uuids(mdev, "attached to UUIDs");

View File

@ -626,12 +626,13 @@ static void mtip_timeout_function(unsigned long int data)
} }
} }
if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { if (cmdto_cnt) {
print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
mtip_restart_port(port); mtip_restart_port(port);
wake_up_interruptible(&port->svc_wait);
}
clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
} }
if (port->ic_pause_timer) { if (port->ic_pause_timer) {
@ -3887,7 +3888,12 @@ static int mtip_block_remove(struct driver_data *dd)
* Delete our gendisk structure. This also removes the device * Delete our gendisk structure. This also removes the device
* from /dev * from /dev
*/ */
del_gendisk(dd->disk); if (dd->disk) {
if (dd->disk->queue)
del_gendisk(dd->disk);
else
put_disk(dd->disk);
}
spin_lock(&rssd_index_lock); spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index); ida_remove(&rssd_index_ida, dd->index);
@ -3921,7 +3927,13 @@ static int mtip_block_shutdown(struct driver_data *dd)
"Shutting down %s ...\n", dd->disk->disk_name); "Shutting down %s ...\n", dd->disk->disk_name);
/* Delete our gendisk structure, and cleanup the blk queue. */ /* Delete our gendisk structure, and cleanup the blk queue. */
del_gendisk(dd->disk); if (dd->disk) {
if (dd->disk->queue)
del_gendisk(dd->disk);
else
put_disk(dd->disk);
}
spin_lock(&rssd_index_lock); spin_lock(&rssd_index_lock);
ida_remove(&rssd_index_ida, dd->index); ida_remove(&rssd_index_ida, dd->index);

View File

@ -161,10 +161,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
static void make_response(struct xen_blkif *blkif, u64 id, static void make_response(struct xen_blkif *blkif, u64 id,
unsigned short op, int st); unsigned short op, int st);
#define foreach_grant(pos, rbtree, node) \ #define foreach_grant_safe(pos, n, rbtree, node) \
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
(n) = rb_next(&(pos)->node); \
&(pos)->node != NULL; \ &(pos)->node != NULL; \
(pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) (pos) = container_of(n, typeof(*(pos)), node), \
(n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
static void add_persistent_gnt(struct rb_root *root, static void add_persistent_gnt(struct rb_root *root,
@ -217,10 +219,11 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt; struct persistent_gnt *persistent_gnt;
struct rb_node *n;
int ret = 0; int ret = 0;
int segs_to_unmap = 0; int segs_to_unmap = 0;
foreach_grant(persistent_gnt, root, node) { foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle == BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE); BLKBACK_INVALID_HANDLE);
gnttab_set_unmap_op(&unmap[segs_to_unmap], gnttab_set_unmap_op(&unmap[segs_to_unmap],
@ -230,9 +233,6 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
persistent_gnt->handle); persistent_gnt->handle);
pages[segs_to_unmap] = persistent_gnt->page; pages[segs_to_unmap] = persistent_gnt->page;
rb_erase(&persistent_gnt->node, root);
kfree(persistent_gnt);
num--;
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
!rb_next(&persistent_gnt->node)) { !rb_next(&persistent_gnt->node)) {
@ -241,6 +241,10 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
BUG_ON(ret); BUG_ON(ret);
segs_to_unmap = 0; segs_to_unmap = 0;
} }
rb_erase(&persistent_gnt->node, root);
kfree(persistent_gnt);
num--;
} }
BUG_ON(num != 0); BUG_ON(num != 0);
} }

View File

@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
{ {
struct llist_node *all_gnts; struct llist_node *all_gnts;
struct grant *persistent_gnt; struct grant *persistent_gnt;
struct llist_node *n;
/* Prevent new requests being issued until we fix things up. */ /* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock); spin_lock_irq(&info->io_lock);
@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
/* Remove all persistent grants */ /* Remove all persistent grants */
if (info->persistent_gnts_c) { if (info->persistent_gnts_c) {
all_gnts = llist_del_all(&info->persistent_gnts); all_gnts = llist_del_all(&info->persistent_gnts);
llist_for_each_entry(persistent_gnt, all_gnts, node) { llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn)); __free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt); kfree(persistent_gnt);
@ -835,7 +836,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
struct blkif_response *bret) struct blkif_response *bret)
{ {
int i; int i = 0;
struct bio_vec *bvec; struct bio_vec *bvec;
struct req_iterator iter; struct req_iterator iter;
unsigned long flags; unsigned long flags;
@ -852,7 +853,8 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
*/ */
rq_for_each_segment(bvec, s->request, iter) { rq_for_each_segment(bvec, s->request, iter) {
BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
i = offset >> PAGE_SHIFT; if (bvec->bv_offset < offset)
i++;
BUG_ON(i >= s->req.u.rw.nr_segments); BUG_ON(i >= s->req.u.rw.nr_segments);
shared_data = kmap_atomic( shared_data = kmap_atomic(
pfn_to_page(s->grants_used[i]->pfn)); pfn_to_page(s->grants_used[i]->pfn));
@ -861,7 +863,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
bvec->bv_len); bvec->bv_len);
bvec_kunmap_irq(bvec_data, &flags); bvec_kunmap_irq(bvec_data, &flags);
kunmap_atomic(shared_data); kunmap_atomic(shared_data);
offset += bvec->bv_len; offset = bvec->bv_offset + bvec->bv_len;
} }
} }
/* Add the persistent grant into the list of free grants */ /* Add the persistent grant into the list of free grants */

View File

@ -124,6 +124,31 @@ static inline void init_llist_head(struct llist_head *list)
&(pos)->member != NULL; \ &(pos)->member != NULL; \
(pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
/**
* llist_for_each_entry_safe - iterate safely against remove over some entries
* of lock-less list of given type.
* @pos: the type * to use as a loop cursor.
* @n: another type * to use as a temporary storage.
* @node: the fist entry of deleted list entries.
* @member: the name of the llist_node with the struct.
*
* In general, some entries of the lock-less list can be traversed
* safely only after being removed from list, so start with an entry
* instead of list head. This variant allows removal of entries
* as we iterate.
*
* If being used on entries deleted from lock-less list directly, the
* traverse order is from the newest to the oldest added entry. If
* you want to traverse from the oldest to the newest, you must
* reverse the order by yourself before traversing.
*/
#define llist_for_each_entry_safe(pos, n, node, member) \
for ((pos) = llist_entry((node), typeof(*(pos)), member), \
(n) = (pos)->member.next; \
&(pos)->member != NULL; \
(pos) = llist_entry(n, typeof(*(pos)), member), \
(n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
/** /**
* llist_empty - tests whether a lock-less list is empty * llist_empty - tests whether a lock-less list is empty
* @head: the list to test * @head: the list to test