summary refs log tree commit diff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-08 13:28:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-08 13:28:33 -0800
commit2943c833222ef87c111ee0c6b7b8519ad2983e99 (patch)
tree0ef8cc4f72a63b325e7ae858ec68822ec4f3c64f /drivers/md/md.c
parent98793265b429a3f0b3f1750e74d67cd4d740d162 (diff)
parent19d671695e1931ebfd75b2b888778201aefe35ca (diff)
downloadlinux-2943c833222ef87c111ee0c6b7b8519ad2983e99.tar.gz
Merge tag 'md-3.3' of git://neil.brown.name/md
md update for 3.3

Big change is new hot-replacement.
A slot in an array can hold 2 devices - one that
wants-replacement and one that is the replacement.
Once the replacement is built - either from the
original or (in the case of errors) from elsewhere,
the wants-replacement device will be removed.

* tag 'md-3.3' of git://neil.brown.name/md: (36 commits)
  md/raid1: Mark device want_replacement when we see a write error.
  md/raid1: If there is a spare and a want_replacement device, start replacement.
  md/raid1: recognise replacements when assembling arrays.
  md/raid1: handle activation of replacement device when recovery completes.
  md/raid1: Allow a failed replacement device to be removed.
  md/raid1: Allocate spare to store replacement devices and their bios.
  md/raid1:  Replace use of mddev->raid_disks with conf->raid_disks.
  md/raid10: If there is a spare and a want_replacement device, start replacement.
  md/raid10: recognise replacements when assembling array.
  md/raid10: Allow replacement device to be replace old drive.
  md/raid10: handle recovery of replacement devices.
  md/raid10:  Handle replacement devices during resync.
  md/raid10: writes should get directed to replacement as well as original.
  md/raid10: allow removal of failed replacement devices.
  md/raid10: preferentially read from replacement device if possible.
  md/raid10:  change read_balance to return an rdev
  md/raid10: prepare data structures for handling replacement.
  md/raid5: Mark device want_replacement when we see a write error.
  md/raid5: If there is a spare and a want_replacement device, start replacement.
  md/raid5: recognise replacements when assembling array.
  ...
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c107
1 files changed, 80 insertions, 27 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5d1b6762f108..ca8527fe77eb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1713,6 +1713,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 		}
 		if (sb->devflags & WriteMostly1)
 			set_bit(WriteMostly, &rdev->flags);
+		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+			set_bit(Replacement, &rdev->flags);
 	} else /* MULTIPATH are always insync */
 		set_bit(In_sync, &rdev->flags);
 
@@ -1766,6 +1768,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
 		sb->recovery_offset =
 			cpu_to_le64(rdev->recovery_offset);
 	}
+	if (test_bit(Replacement, &rdev->flags))
+		sb->feature_map |=
+			cpu_to_le32(MD_FEATURE_REPLACEMENT);
 
 	if (mddev->reshape_position != MaxSector) {
 		sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
@@ -2559,6 +2564,15 @@ state_show(struct md_rdev *rdev, char *page)
 		len += sprintf(page+len, "%swrite_error", sep);
 		sep = ",";
 	}
+	if (test_bit(WantReplacement, &rdev->flags)) {
+		len += sprintf(page+len, "%swant_replacement", sep);
+		sep = ",";
+	}
+	if (test_bit(Replacement, &rdev->flags)) {
+		len += sprintf(page+len, "%sreplacement", sep);
+		sep = ",";
+	}
+
 	return len+sprintf(page+len, "\n");
 }
 
@@ -2627,6 +2641,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 	} else if (cmd_match(buf, "-write_error")) {
 		clear_bit(WriteErrorSeen, &rdev->flags);
 		err = 0;
+	} else if (cmd_match(buf, "want_replacement")) {
+		/* Any non-spare device that is not a replacement can
+		 * become want_replacement at any time, but we then need to
+		 * check if recovery is needed.
+		 */
+		if (rdev->raid_disk >= 0 &&
+		    !test_bit(Replacement, &rdev->flags))
+			set_bit(WantReplacement, &rdev->flags);
+		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+		md_wakeup_thread(rdev->mddev->thread);
+		err = 0;
+	} else if (cmd_match(buf, "-want_replacement")) {
+		/* Clearing 'want_replacement' is always allowed.
+		 * Once replacements starts it is too late though.
+		 */
+		err = 0;
+		clear_bit(WantReplacement, &rdev->flags);
+	} else if (cmd_match(buf, "replacement")) {
+		/* Can only set a device as a replacement when array has not
+		 * yet been started.  Once running, replacement is automatic
+		 * from spares, or by assigning 'slot'.
+		 */
+		if (rdev->mddev->pers)
+			err = -EBUSY;
+		else {
+			set_bit(Replacement, &rdev->flags);
+			err = 0;
+		}
+	} else if (cmd_match(buf, "-replacement")) {
+		/* Similarly, can only clear Replacement before start */
+		if (rdev->mddev->pers)
+			err = -EBUSY;
+		else {
+			clear_bit(Replacement, &rdev->flags);
+			err = 0;
+		}
 	}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2688,7 +2738,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 		if (rdev->mddev->pers->hot_remove_disk == NULL)
 			return -EINVAL;
 		err = rdev->mddev->pers->
-			hot_remove_disk(rdev->mddev, rdev->raid_disk);
+			hot_remove_disk(rdev->mddev, rdev);
 		if (err)
 			return err;
 		sysfs_unlink_rdev(rdev->mddev, rdev);
@@ -2696,7 +2746,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
 		md_wakeup_thread(rdev->mddev->thread);
 	} else if (rdev->mddev->pers) {
-		struct md_rdev *rdev2;
 		/* Activating a spare .. or possibly reactivating
 		 * if we ever get bitmaps working here.
 		 */
@@ -2710,10 +2759,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 		if (rdev->mddev->pers->hot_add_disk == NULL)
 			return -EINVAL;
 
-		list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
-			if (rdev2->raid_disk == slot)
-				return -EEXIST;
-
 		if (slot >= rdev->mddev->raid_disks &&
 		    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
 			return -ENOSPC;
@@ -6053,8 +6098,15 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 	struct mddev *mddev = NULL;
 	int ro;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
+	switch (cmd) {
+	case RAID_VERSION:
+	case GET_ARRAY_INFO:
+	case GET_DISK_INFO:
+		break;
+	default:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
 
 	/*
 	 * Commands dealing with the RAID driver but not any
@@ -6714,8 +6766,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
 			if (test_bit(Faulty, &rdev->flags)) {
 				seq_printf(seq, "(F)");
 				continue;
-			} else if (rdev->raid_disk < 0)
+			}
+			if (rdev->raid_disk < 0)
 				seq_printf(seq, "(S)"); /* spare */
+			if (test_bit(Replacement, &rdev->flags))
+				seq_printf(seq, "(R)");
 			sectors += rdev->sectors;
 		}
 
@@ -7337,29 +7392,27 @@ static int remove_and_add_spares(struct mddev *mddev)
 		     ! test_bit(In_sync, &rdev->flags)) &&
 		    atomic_read(&rdev->nr_pending)==0) {
 			if (mddev->pers->hot_remove_disk(
-				    mddev, rdev->raid_disk)==0) {
+				    mddev, rdev) == 0) {
 				sysfs_unlink_rdev(mddev, rdev);
 				rdev->raid_disk = -1;
 			}
 		}
 
-	if (mddev->degraded) {
-		list_for_each_entry(rdev, &mddev->disks, same_set) {
-			if (rdev->raid_disk >= 0 &&
-			    !test_bit(In_sync, &rdev->flags) &&
-			    !test_bit(Faulty, &rdev->flags))
+	list_for_each_entry(rdev, &mddev->disks, same_set) {
+		if (rdev->raid_disk >= 0 &&
+		    !test_bit(In_sync, &rdev->flags) &&
+		    !test_bit(Faulty, &rdev->flags))
+			spares++;
+		if (rdev->raid_disk < 0
+		    && !test_bit(Faulty, &rdev->flags)) {
+			rdev->recovery_offset = 0;
+			if (mddev->pers->
+			    hot_add_disk(mddev, rdev) == 0) {
+				if (sysfs_link_rdev(mddev, rdev))
+					/* failure here is OK */;
 				spares++;
-			if (rdev->raid_disk < 0
-			    && !test_bit(Faulty, &rdev->flags)) {
-				rdev->recovery_offset = 0;
-				if (mddev->pers->
-				    hot_add_disk(mddev, rdev) == 0) {
-					if (sysfs_link_rdev(mddev, rdev))
-						/* failure here is OK */;
-					spares++;
-					md_new_event(mddev);
-					set_bit(MD_CHANGE_DEVS, &mddev->flags);
-				}
+				md_new_event(mddev);
+				set_bit(MD_CHANGE_DEVS, &mddev->flags);
 			}
 		}
 	}
@@ -7474,7 +7527,7 @@ void md_check_recovery(struct mddev *mddev)
 				    test_bit(Faulty, &rdev->flags) &&
 				    atomic_read(&rdev->nr_pending)==0) {
 					if (mddev->pers->hot_remove_disk(
-						    mddev, rdev->raid_disk)==0) {
+						    mddev, rdev) == 0) {
 						sysfs_unlink_rdev(mddev, rdev);
 						rdev->raid_disk = -1;
 					}