From 73a01a18b9c28a0fab1131ece5b0a9bc00a879b8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 27 Oct 2010 14:33:00 +0200 Subject: [PATCH] drbd: New packet for Ahead/Behind mode: P_OUT_OF_SYNC Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 10 ++++--- drivers/block/drbd/drbd_int.h | 14 +++++++++- drivers/block/drbd/drbd_main.c | 10 +++++++ drivers/block/drbd/drbd_receiver.c | 10 +++++++ drivers/block/drbd/drbd_req.c | 44 ++++++++++++++++++++++-------- drivers/block/drbd/drbd_req.h | 4 ++- drivers/block/drbd/drbd_worker.c | 16 +++++++++++ include/linux/drbd.h | 2 +- 8 files changed, 91 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index b4adb58c7472..33f6cc537d08 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1007,22 +1007,22 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, * called by tl_clear and drbd_send_dblock (==drbd_make_request). * so this can be _any_ process. */ -void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, +int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line) { unsigned long sbnr, ebnr, lbnr, flags; sector_t esector, nr_sectors; - unsigned int enr, count; + unsigned int enr, count = 0; struct lc_element *e; if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); - return; + return 0; } if (!get_ldev(mdev)) - return; /* no disk, no metadata, no bitmap to set bits in */ + return 0; /* no disk, no metadata, no bitmap to set bits in */ nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; @@ -1052,6 +1052,8 @@ void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, out: put_ldev(mdev); + + return count; } static diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 21b7439438cd..471331236826 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -212,6 +212,7 @@ enum drbd_packets { /* P_CKPT_FENCE_REQ = 0x25, * currently reserved for protocol D */ /* P_CKPT_DISABLE_REQ = 0x26, * currently reserved for protocol D */ P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ + P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ P_MAX_CMD = 0x28, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ @@ -269,6 +270,7 @@ static inline const char *cmdname(enum drbd_packets cmd) [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", + [P_OUT_OF_SYNC] = "OutOfSync", [P_MAX_CMD] = NULL, }; @@ -550,6 +552,13 @@ struct p_discard { u32 pad; } __packed; +struct p_block_desc { + struct p_header80 head; + u64 sector; + u32 blksize; + u32 pad; /* to multiple of 8 Byte */ +} __packed; + /* Valid values for the encoding field. * Bump proto version when changing this. */ enum drbd_bitmap_code { @@ -647,6 +656,7 @@ union p_polymorph { struct p_block_req block_req; struct p_delay_probe93 delay_probe93; struct p_rs_uuid rs_uuid; + struct p_block_desc block_desc; } __packed; /**********************************************************************/ @@ -1221,6 +1231,7 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp, int data_size); extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, sector_t sector, int blksize, u64 block_id); +extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, struct drbd_epoch_entry *e); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); @@ -1534,6 +1545,7 @@ extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); +extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); @@ -1626,7 +1638,7 @@ extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); #define drbd_set_in_sync(mdev, sector, size) \ __drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__) -extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, +extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, const char *file, const unsigned int line); #define drbd_set_out_of_sync(mdev, sector, size) \ __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 46f27d6c0b21..0dc93f43a476 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2634,6 +2634,16 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, return ok; } +int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) +{ + struct p_block_desc p; + + p.sector = cpu_to_be64(req->sector); + p.blksize = cpu_to_be32(req->size); + + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); +} + /* drbd_send distinguishes two cases: diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b19e8b2c4ce5..04a08e7541cc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3562,6 +3562,15 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u return TRUE; } +static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +{ + struct p_block_desc *p = &mdev->data.rbuf.block_desc; + + drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); + + return TRUE; +} + typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); struct data_cmd { @@ -3592,6 +3601,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, + [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, /* anything missing from this table is in * the asender_tbl, see get_asender_cmd */ [P_MAX_CMD] = { 0, 0, NULL }, diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 60288fb3c4d7..a8d1ff2bda27 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -142,7 +142,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, /* before we can signal completion to the upper layers, * we may need to close the current epoch */ - if (mdev->state.conn >= C_CONNECTED && + if (mdev->state.conn >= C_CONNECTED && mdev->state.conn < C_AHEAD && req->epoch == mdev->newest_tle->br_number) queue_barrier(mdev); @@ -545,6 +545,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; + case queue_for_send_oos: + req->rq_state |= RQ_NET_QUEUED; + req->w.cb = w_send_oos; + drbd_queue_work(&mdev->data.work, &req->w); + break; + + case oos_handed_to_network: + /* actually the same */ case send_canceled: /* treat it the same */ case send_failed: @@ -756,7 +764,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) const sector_t sector = bio->bi_sector; struct drbd_tl_epoch *b = NULL; struct drbd_request *req; - int local, remote; + int local, remote, send_oos = 0; int err = -EIO; int ret = 0; @@ -820,8 +828,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) } remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || - (mdev->state.pdsk == D_INCONSISTENT && - mdev->state.conn >= C_CONNECTED)); + (mdev->state.pdsk >= D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED && + mdev->state.conn < C_AHEAD)); + send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD && + mdev->state.pdsk >= D_INCONSISTENT); if (!(local || remote) && !is_susp(mdev->state)) { if (__ratelimit(&drbd_ratelimit_state)) @@ -835,7 +846,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) * but there is a race between testing the bit and pointer outside the * spinlock, and grabbing the spinlock. * if we lost that race, we retry. */ - if (rw == WRITE && remote && + if (rw == WRITE && (remote || send_oos) && mdev->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { allocate_barrier: @@ -860,11 +871,15 @@ allocate_barrier: goto fail_free_complete; } - if (remote) { + if (remote || send_oos) { remote = (mdev->state.pdsk == D_UP_TO_DATE || - (mdev->state.pdsk == D_INCONSISTENT && - mdev->state.conn >= C_CONNECTED)); - if (!remote) + (mdev->state.pdsk >= D_INCONSISTENT && + mdev->state.conn >= C_CONNECTED && + mdev->state.conn < C_AHEAD)); + send_oos = (rw == WRITE && mdev->state.conn == C_AHEAD && + mdev->state.pdsk >= D_INCONSISTENT); + + if (!(remote || send_oos)) dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); @@ -877,7 +892,7 @@ allocate_barrier: mdev->unused_spare_tle = b; b = NULL; } - if (rw == WRITE && remote && + if (rw == WRITE && (remote || send_oos) && mdev->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { /* someone closed the current epoch @@ -900,7 +915,7 @@ allocate_barrier: * barrier packet. To get the write ordering right, we only have to * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ - if (remote && mdev->unused_spare_tle && + if ((remote || send_oos) && mdev->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { _tl_add_barrier(mdev, mdev->unused_spare_tle); mdev->unused_spare_tle = NULL; @@ -948,8 +963,11 @@ allocate_barrier: ? queue_for_net_write : queue_for_net_read); } + if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) + _req_mod(req, queue_for_send_oos); - if (remote && mdev->net_conf->on_congestion != OC_BLOCK) { + if (remote && + mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { int congested = 0; if (mdev->net_conf->cong_fill && @@ -964,6 +982,8 @@ allocate_barrier: } if (congested) { + queue_barrier(mdev); + if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 69d350fe7c1e..40d3dcd8fc81 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -82,14 +82,16 @@ enum drbd_req_event { to_be_submitted, /* XXX yes, now I am inconsistent... - * these two are not "events" but "actions" + * these are not "events" but "actions" * oh, well... */ queue_for_net_write, queue_for_net_read, + queue_for_send_oos, send_canceled, send_failed, handed_over_to_network, + oos_handed_to_network, connection_lost_while_pending, read_retry_remote_canceled, recv_acked_by_peer, diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 782d87237cb4..67499077c482 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1237,6 +1237,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } +int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + int ok; + + if (unlikely(cancel)) { + req_mod(req, send_canceled); + return 1; + } + + ok = drbd_send_oos(mdev, req); + req_mod(req, oos_handed_to_network); + + return ok; +} + /** * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request * @mdev: DRBD device. diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 23f31be6f00d..41da654cc0b1 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.9" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 95 +#define PRO_VERSION_MAX 96 enum drbd_io_error_p {