md/r5cache: r5cache recovery: part 2

1. In previous patch, we: - add new data to r5l_recovery_ctx - add new functions to recovery write-back cache The new functions are not used in this patch, so this patch does not change the behavior of recovery. 2. In this patchpatch, we: - modify main recovery procedure r5l_recovery_log() to call new functions - remove old functions Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Shaohua Li <shli@fb.com>
2016-11-17 15:24:44 -08:00 · 2016-11-17 15:24:44 -08:00 · 5aabf7c49d
parent b4c625c673
commit 5aabf7c49d
2 changed files with 26 additions and 187 deletions
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@ -1393,156 +1393,6 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
 	return 0;
 }

-static int r5l_recovery_flush_one_stripe(struct r5l_log *log,
-					 struct r5l_recovery_ctx *ctx,
-					 sector_t stripe_sect,
-					 int *offset)
-{
-	struct r5conf *conf = log->rdev->mddev->private;
-	struct stripe_head *sh;
-	struct r5l_payload_data_parity *payload;
-	int disk_index;
-
-	sh = raid5_get_active_stripe(conf, stripe_sect, 0, 0, 0);
-	while (1) {
-		sector_t log_offset = r5l_ring_add(log, ctx->pos,
-				ctx->meta_total_blocks);
-		payload = page_address(ctx->meta_page) + *offset;
-
-		if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_DATA) {
-			raid5_compute_sector(conf,
-					     le64_to_cpu(payload->location), 0,
-					     &disk_index, sh);
-
-			sync_page_io(log->rdev, log_offset, PAGE_SIZE,
-				     sh->dev[disk_index].page, REQ_OP_READ, 0,
-				     false);
-			sh->dev[disk_index].log_checksum =
-				le32_to_cpu(payload->checksum[0]);
-			set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
-		} else {
-			disk_index = sh->pd_idx;
-			sync_page_io(log->rdev, log_offset, PAGE_SIZE,
-				     sh->dev[disk_index].page, REQ_OP_READ, 0,
-				     false);
-			sh->dev[disk_index].log_checksum =
-				le32_to_cpu(payload->checksum[0]);
-			set_bit(R5_Wantwrite, &sh->dev[disk_index].flags);
-
-			if (sh->qd_idx >= 0) {
-				disk_index = sh->qd_idx;
-				sync_page_io(log->rdev,
-					     r5l_ring_add(log, log_offset, BLOCK_SECTORS),
-					     PAGE_SIZE, sh->dev[disk_index].page,
-					     REQ_OP_READ, 0, false);
-				sh->dev[disk_index].log_checksum =
-					le32_to_cpu(payload->checksum[1]);
-				set_bit(R5_Wantwrite,
-					&sh->dev[disk_index].flags);
-			}
-		}
-
-		ctx->meta_total_blocks += le32_to_cpu(payload->size);
-		*offset += sizeof(struct r5l_payload_data_parity) +
-			sizeof(__le32) *
-			(le32_to_cpu(payload->size) >> (PAGE_SHIFT - 9));
-		if (le16_to_cpu(payload->header.type) == R5LOG_PAYLOAD_PARITY)
-			break;
-	}
-
-	for (disk_index = 0; disk_index < sh->disks; disk_index++) {
-		void *addr;
-		u32 checksum;
-
-		if (!test_bit(R5_Wantwrite, &sh->dev[disk_index].flags))
-			continue;
-		addr = kmap_atomic(sh->dev[disk_index].page);
-		checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
-		kunmap_atomic(addr);
-		if (checksum != sh->dev[disk_index].log_checksum)
-			goto error;
-	}
-
-	for (disk_index = 0; disk_index < sh->disks; disk_index++) {
-		struct md_rdev *rdev, *rrdev;
-
-		if (!test_and_clear_bit(R5_Wantwrite,
-					&sh->dev[disk_index].flags))
-			continue;
-
-		/* in case device is broken */
-		rcu_read_lock();
-		rdev = rcu_dereference(conf->disks[disk_index].rdev);
-		if (rdev) {
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			sync_page_io(rdev, stripe_sect, PAGE_SIZE,
-				     sh->dev[disk_index].page, REQ_OP_WRITE, 0,
-				     false);
-			rdev_dec_pending(rdev, rdev->mddev);
-			rcu_read_lock();
-		}
-		rrdev = rcu_dereference(conf->disks[disk_index].replacement);
-		if (rrdev) {
-			atomic_inc(&rrdev->nr_pending);
-			rcu_read_unlock();
-			sync_page_io(rrdev, stripe_sect, PAGE_SIZE,
-				     sh->dev[disk_index].page, REQ_OP_WRITE, 0,
-				     false);
-			rdev_dec_pending(rrdev, rrdev->mddev);
-			rcu_read_lock();
-		}
-		rcu_read_unlock();
-	}
-	raid5_release_stripe(sh);
-	return 0;
-
-error:
-	for (disk_index = 0; disk_index < sh->disks; disk_index++)
-		sh->dev[disk_index].flags = 0;
-	raid5_release_stripe(sh);
-	return -EINVAL;
-}
-
-static int r5l_recovery_flush_one_meta(struct r5l_log *log,
-				       struct r5l_recovery_ctx *ctx)
-{
-	struct r5conf *conf = log->rdev->mddev->private;
-	struct r5l_payload_data_parity *payload;
-	struct r5l_meta_block *mb;
-	int offset;
-	sector_t stripe_sector;
-
-	mb = page_address(ctx->meta_page);
-	offset = sizeof(struct r5l_meta_block);
-
-	while (offset < le32_to_cpu(mb->meta_size)) {
-		int dd;
-
-		payload = (void *)mb + offset;
-		stripe_sector = raid5_compute_sector(conf,
-						     le64_to_cpu(payload->location), 0, &dd, NULL);
-		if (r5l_recovery_flush_one_stripe(log, ctx, stripe_sector,
-						  &offset))
-			return -EINVAL;
-	}
-	return 0;
-}
-
-/* copy data/parity from log to raid disks */
-static void r5l_recovery_flush_log(struct r5l_log *log,
-				   struct r5l_recovery_ctx *ctx)
-{
-	while (1) {
-		if (r5l_recovery_read_meta_block(log, ctx))
-			return;
-		if (r5l_recovery_flush_one_meta(log, ctx))
-			return;
-		ctx->seq++;
-		ctx->pos = r5l_ring_add(log, ctx->pos, ctx->meta_total_blocks);
-	}
-}
-
 static void
 r5l_recovery_create_empty_meta_block(struct r5l_log *log,
 				     struct page *page,
@ -2166,7 +2016,9 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,

 static int r5l_recovery_log(struct r5l_log *log)
 {
+	struct mddev *mddev = log->rdev->mddev;
 	struct r5l_recovery_ctx ctx;
+	int ret;

 	ctx.pos = log->last_checkpoint;
 	ctx.seq = log->last_cp_seq;
@ -2178,47 +2030,33 @@ static int r5l_recovery_log(struct r5l_log *log)
 	if (!ctx.meta_page)
 		return -ENOMEM;

-	r5l_recovery_flush_log(log, &ctx);
+	ret = r5c_recovery_flush_log(log, &ctx);
 	__free_page(ctx.meta_page);

-	/*
-	 * we did a recovery. Now ctx.pos points to an invalid meta block. New
-	 * log will start here. but we can't let superblock point to last valid
-	 * meta block. The log might looks like:
-	 * | meta 1| meta 2| meta 3|
-	 * meta 1 is valid, meta 2 is invalid. meta 3 could be valid. If
-	 * superblock points to meta 1, we write a new valid meta 2n.  if crash
-	 * happens again, new recovery will start from meta 1. Since meta 2n is
-	 * valid now, recovery will think meta 3 is valid, which is wrong.
-	 * The solution is we create a new meta in meta2 with its seq == meta
-	 * 1's seq + 10 and let superblock points to meta2. The same recovery will
-	 * not think meta 3 is a valid meta, because its seq doesn't match
-	 */
-	if (ctx.seq > log->last_cp_seq) {
-		int ret;
+	if (ret)
+		return ret;

-		ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10);
-		if (ret)
-			return ret;
-		log->seq = ctx.seq + 11;
-		log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-		r5l_write_super(log, ctx.pos);
-		log->last_checkpoint = ctx.pos;
-		log->next_checkpoint = ctx.pos;
-	} else {
-		log->log_start = ctx.pos;
-		log->seq = ctx.seq;
-	}
-
-	/*
-	 * This is to suppress "function defined but not used" warning.
-	 * It will be removed when the two functions are used (next patch).
-	 */
-	if (!log) {
-		r5c_recovery_flush_log(log, &ctx);
-		r5c_recovery_rewrite_data_only_stripes(log, &ctx);
+	if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
+		pr_debug("md/raid:%s: starting from clean shutdown\n",
+			 mdname(mddev));
+	else {
+		pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
+			 mdname(mddev), ctx.data_only_stripes,
+			 ctx.data_parity_stripes);
+
+		if (ctx.data_only_stripes > 0)
+			if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+				pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+				       mdname(mddev));
+				return -EIO;
+			}
 	}

+	log->log_start = ctx.pos;
+	log->next_checkpoint = ctx.pos;
+	log->seq = ctx.seq;
+	r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq);
+	r5l_write_super(log, ctx.pos);
 	return 0;
 }

--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@ -7100,7 +7100,8 @@ static int raid5_run(struct mddev *mddev)

 		pr_debug("md/raid:%s: using device %s as journal\n",
 			 mdname(mddev), bdevname(journal_dev->bdev, b));
-		r5l_init_log(conf, journal_dev);
+		if (r5l_init_log(conf, journal_dev))
+			goto abort;
 	}

 	return 0;