Defer erroring an io until we know that all the paths have been tested
*since* the last path failed.  Large patch because some functions
moved around.
--- diff/drivers/md/dm-mpath.c	2004-02-03 11:58:50.000000000 +0000
+++ source/drivers/md/dm-mpath.c	2004-02-06 15:57:19.000000000 +0000
@@ -30,6 +30,8 @@
 	struct priority_group *pg;
 
 	spinlock_t failed_lock;
+	/* FIXME: put the next 2 fields in a bitset */
+	int tested;
 	int has_failed;
 	unsigned fail_count;
 	unsigned fail_total;
@@ -55,10 +57,14 @@
 	struct list_head list;
 	struct dm_target *ti;
 
+	unsigned nr_paths;
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
 
 	spinlock_t lock;
+	unsigned nr_valid_paths;
+	unsigned nr_tested_paths;
+
 	struct path *current_path;
 	unsigned current_count;
 	unsigned min_io;
@@ -183,22 +189,53 @@
 /*-----------------------------------------------------------------
  * All paths should be tested periodically.
  *---------------------------------------------------------------*/
+static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
+{
+	struct priority_group *pg;
+	struct path *p;
+
+	list_for_each_entry (pg, &m->priority_groups, list) {
+		list_for_each_entry (p, &pg->paths, list)
+			fn(p);
+	}
+}
+
+static void clear_tested(struct path *p)
+{
+	p->tested = 0;
+}
+
 static void fail_path(struct path *path)
 {
 	unsigned long flags;
+	struct multipath *m;
 
 	spin_lock_irqsave(&path->failed_lock, flags);
 
-	if (!path->has_failed) {
-		/* FIXME: this is brain dead */
-		if (!--path->fail_count) {
-			path->has_failed = 1;
-			path->fail_total++;
-			path->pg->ps->type->set_path_state(path->pg->ps,
-							   path, 0);
+	/* FIXME: path->fail_count is brain dead */
+	if (!path->has_failed && !--path->fail_count) {
+		path->has_failed = 1;
+		path->fail_total++;
+		path->pg->ps->type->set_path_state(path->pg->ps, path, 0);
+		path->pg->m->trigger_event = 1;
+
+		m = path->pg->m;
+		spin_lock(&m->lock);
+		m->nr_valid_paths--;
+		if (!m->nr_valid_paths) {
+			iterate_paths(m, clear_tested);
+			m->nr_tested_paths = 0;
 		}
+		spin_unlock(&m->lock);
+	}
 
-		path->pg->m->trigger_event = 1;
+	if (!path->tested) {
+		path->tested = 1;
+
+		m = path->pg->m;
+		spin_lock(&m->lock);
+		m->nr_tested_paths++;
+		spin_unlock(&m->lock);
 	}
 
 	spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -207,6 +244,7 @@
 static void recover_path(struct path *path)
 {
 	unsigned long flags;
+	struct multipath *m = path->pg->m;
 
 	spin_lock_irqsave(&path->failed_lock, flags);
 
@@ -214,7 +252,11 @@
 		path->has_failed = 0;
 		path->fail_count = MPATH_FAIL_COUNT;
 		path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
-		path->pg->m->trigger_event = 1;
+		m->trigger_event = 1;
+
+		spin_lock(&m->lock);
+		m->nr_valid_paths++;
+		spin_unlock(&m->lock);
 	}
 
 	spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -273,26 +315,84 @@
 	}
 }
 
-static void dispatch_failed_ios(struct multipath *m)
+static int __choose_path(struct multipath *m)
 {
+	struct priority_group *pg;
+	struct path *path = NULL;
+
+	if (m->nr_valid_paths) {
+		/* loop through the priority groups until we find a valid path. */
+		list_for_each_entry (pg, &m->priority_groups, list) {
+			path = pg->ps->type->select_path(pg->ps);
+			if (path)
+				break;
+		}
+	}
+
+	m->current_path = path;
+	m->current_count = m->min_io;
+	return 0;
+}
+
+static struct path *get_current_path(struct multipath *m)
+{
+	struct path *path;
 	unsigned long flags;
-	struct bio *bio;
 
 	spin_lock_irqsave(&m->lock, flags);
-	bio = bio_list_get(&m->failed_ios);
+
+	/* Do we need to select a new path? */
+	if (!m->current_path || --m->current_count == 0)
+		__choose_path(m);
+
+	path = m->current_path;
+
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	submit_ios(bio);
+	return path;
 }
 
-static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
+static int map_io(struct multipath *m, struct bio *bio)
 {
-	struct priority_group *pg;
-	struct path *p;
+	struct path *path;
 
-	list_for_each_entry (pg, &m->priority_groups, list) {
-		list_for_each_entry (p, &pg->paths, list)
-			fn(p);
+	path = get_current_path(m);
+	if (!path)
+		return -EIO;
+
+	bio->bi_bdev = path->dev->bdev;
+	return 0;
+}
+
+static void dispatch_failed_ios(struct multipath *m)
+{
+	int r;
+	unsigned long flags;
+	struct bio *bio = NULL, *next;
+
+	spin_lock_irqsave(&m->lock, flags);
+	if (m->nr_valid_paths || (m->nr_tested_paths == m->nr_paths))
+		bio = bio_list_get(&m->failed_ios);
+	spin_unlock_irqrestore(&m->lock, flags);
+
+
+	while (bio) {
+		next = bio->bi_next;
+		bio->bi_next = NULL;
+
+		r = map_io(m, bio);
+		if (r)
+			/*
+			 * This wont loop forever because the
+			 * end_io function will fail the ios if
+			 * we've tested all the paths.
+			 */
+			bio_io_error(bio, bio->bi_size);
+
+		else
+			generic_make_request(bio);
+
+		bio = next;
 	}
 }
 
@@ -310,6 +410,7 @@
 	list_for_each_entry (m, &_mpaths, list) {
 		dispatch_failed_ios(m);
 		iterate_paths(m, test_path);
+		submit_ios(bio_list_get(&m->test_ios));
 
 		spin_lock_irqsave(&m->lock, flags);
 		if (m->trigger_event) {
@@ -575,9 +676,12 @@
 	while (as.argc) {
 		struct priority_group *pg;
 		pg = parse_priority_group(&as, m, ti);
-		if (pg)
+		if (pg) {
+			m->nr_paths += pg->nr_paths;
 			__insert_priority_group(m, pg);
+		}
 	}
+	m->nr_valid_paths = m->nr_paths;
 
 	ti->private = m;
 	m->ti = ti;
@@ -604,53 +708,6 @@
 	free_multipath(m);
 }
 
-static int __choose_path(struct multipath *m)
-{
-	struct priority_group *pg;
-	struct path *path = NULL;
-
-	/* loop through the priority groups until we find a valid path. */
-	list_for_each_entry (pg, &m->priority_groups, list) {
-		path = pg->ps->type->select_path(pg->ps);
-		if (path)
-			break;
-	}
-
-	m->current_path = path;
-	m->current_count = m->min_io;
-	return 0;
-}
-
-static struct path *get_current_path(struct multipath *m)
-{
-	struct path *path;
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	/* Do we need to select a new path? */
-	if (!m->current_path || --m->current_count == 0)
-		__choose_path(m);
-
-	path = m->current_path;
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return path;
-}
-
-static int map_io(struct multipath *m, struct bio *bio)
-{
-	struct path *path;
-
-	path = get_current_path(m);
-	if (!path)
-		return -EIO;
-
-	bio->bi_bdev = path->dev->bdev;
-	return 0;
-}
-
 static int multipath_map(struct dm_target *ti, struct bio *bio,
 			 union map_info *map_context)
 {
@@ -684,29 +741,30 @@
 static int multipath_end_io(struct dm_target *ti, struct bio *bio,
 			    int error, union map_info *map_context)
 {
-	int r = 0;
-	unsigned long flags;
+	struct path *path;
 	struct multipath *m = (struct multipath *) ti->private;
 
 	if (error) {
-		struct path *path;
+		spin_lock(&m->lock);
+		if (!m->nr_valid_paths && (m->nr_tested_paths == m->nr_paths)) {
+			spin_unlock(&m->lock);
+			return -EIO;
+		}
+		spin_unlock(&m->lock);
 
 		path = find_path(m, bio->bi_bdev);
 		fail_path(path);
-		r = map_io(m, bio);
 
-		if (!r) {
-			/* queue for the daemon to resubmit */
-			spin_lock_irqsave(&m->lock, flags);
-			bio_list_add(&m->failed_ios, bio);
-			spin_unlock_irqrestore(&m->lock, flags);
+		/* queue for the daemon to resubmit */
+		spin_lock(&m->lock);
+		bio_list_add(&m->failed_ios, bio);
+		spin_unlock(&m->lock);
 
-			dm_daemon_wake(&_kmpathd);
-			r = 1;	/* io not complete */
-		}
+		dm_daemon_wake(&_kmpathd);
+		return 1;	/* io not complete */
 	}
 
-	return r;
+	return 0;
 }
 
 static void lock_path(struct path *p) {down(&p->test_lock);}
