<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: NeilBrown &lt;neilb@cse.unsw.edu.au&gt;

linear currently uses division by the size of the smallest componenet device
to find which device a request goes to.  If that smallest device is larger
than 2 terabytes, then the division will not work on some systems.

So we introduce a pre-shift, and take care not to make the hash table too
large, much like the code in raid0.

Also get rid of conf-&gt;nr_zones, which is not needed.

Signed-off-by: Neil Brown &lt;neilb@cse.unsw.edu.au&gt;
Signed-off-by: Andrew Morton &lt;akpm@osdl.org&gt;
---

 drivers/md/linear.c         |   99 ++++++++++++++++++++++++++++++--------------
 include/linux/raid/linear.h |    4 -
 2 files changed, 70 insertions(+), 33 deletions(-)

diff -puN drivers/md/linear.c~md-support-md-linear-array-with-components-greater-than-2-terabytes drivers/md/linear.c
--- devel/drivers/md/linear.c~md-support-md-linear-array-with-components-greater-than-2-terabytes	2005-08-21 22:35:53.000000000 -0700
+++ devel-akpm/drivers/md/linear.c	2005-08-21 22:35:53.000000000 -0700
@@ -38,7 +38,8 @@ static inline dev_info_t *which_dev(mdde
 	/*
 	 * sector_div(a,b) returns the remainer and sets a to a/b
 	 */
-	(void)sector_div(block, conf-&gt;smallest-&gt;size);
+	block &gt;&gt;= conf-&gt;preshift;
+	(void)sector_div(block, conf-&gt;hash_spacing);
 	hash = conf-&gt;hash_table[block];
 
 	while ((sector&gt;&gt;1) &gt;= (hash-&gt;size + hash-&gt;offset))
@@ -47,7 +48,7 @@ static inline dev_info_t *which_dev(mdde
 }
 
 /**
- *	linear_mergeable_bvec -- tell bio layer if a two requests can be merged
+ *	linear_mergeable_bvec -- tell bio layer if two requests can be merged
  *	@q: request queue
  *	@bio: the buffer head that's been built up so far
  *	@biovec: the request that could be merged to it.
@@ -116,7 +117,7 @@ static int linear_run (mddev_t *mddev)
 	dev_info_t **table;
 	mdk_rdev_t *rdev;
 	int i, nb_zone, cnt;
-	sector_t start;
+	sector_t min_spacing;
 	sector_t curr_offset;
 	struct list_head *tmp;
 
@@ -127,11 +128,6 @@ static int linear_run (mddev_t *mddev)
 	memset(conf, 0, sizeof(*conf) + mddev-&gt;raid_disks*sizeof(dev_info_t));
 	mddev-&gt;private = conf;
 
-	/*
-	 * Find the smallest device.
-	 */
-
-	conf-&gt;smallest = NULL;
 	cnt = 0;
 	mddev-&gt;array_size = 0;
 
@@ -159,8 +155,6 @@ static int linear_run (mddev_t *mddev)
 		disk-&gt;size = rdev-&gt;size;
 		mddev-&gt;array_size += rdev-&gt;size;
 
-		if (!conf-&gt;smallest || (disk-&gt;size &lt; conf-&gt;smallest-&gt;size))
-			conf-&gt;smallest = disk;
 		cnt++;
 	}
 	if (cnt != mddev-&gt;raid_disks) {
@@ -168,6 +162,36 @@ static int linear_run (mddev_t *mddev)
 		goto out;
 	}
 
+	min_spacing = mddev-&gt;array_size;
+	sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));
+
+	/* min_spacing is the minimum spacing that will fit the hash
+	 * table in one PAGE.  This may be much smaller than needed.
+	 * We find the smallest non-terminal set of consecutive devices
+	 * that is larger than min_spacing as use the size of that as
+	 * the actual spacing
+	 */
+	conf-&gt;hash_spacing = mddev-&gt;array_size;
+	for (i=0; i &lt; cnt-1 ; i++) {
+		sector_t sz = 0;
+		int j;
+		for (j=i; i&lt;cnt-1 &amp;&amp; sz &lt; min_spacing ; j++)
+			sz += conf-&gt;disks[j].size;
+		if (sz &gt;= min_spacing &amp;&amp; sz &lt; conf-&gt;hash_spacing)
+			conf-&gt;hash_spacing = sz;
+	}
+
+	/* hash_spacing may be too large for sector_div to work with,
+	 * so we might need to pre-shift
+	 */
+	conf-&gt;preshift = 0;
+	if (sizeof(sector_t) &gt; sizeof(u32)) {
+		sector_t space = conf-&gt;hash_spacing;
+		while (space &gt; (sector_t)(~(u32)0)) {
+			space &gt;&gt;= 1;
+			conf-&gt;preshift++;
+		}
+	}
 	/*
 	 * This code was restructured to work around a gcc-2.95.3 internal
 	 * compiler error.  Alter it with care.
@@ -177,39 +201,52 @@ static int linear_run (mddev_t *mddev)
 		unsigned round;
 		unsigned long base;
 
-		sz = mddev-&gt;array_size;
-		base = conf-&gt;smallest-&gt;size;
+		sz = mddev-&gt;array_size &gt;&gt; conf-&gt;preshift;
+		sz += 1; /* force round-up */
+		base = conf-&gt;hash_spacing &gt;&gt; conf-&gt;preshift;
 		round = sector_div(sz, base);
-		nb_zone = conf-&gt;nr_zones = sz + (round ? 1 : 0);
+		nb_zone = sz + (round ? 1 : 0);
 	}
-			
-	conf-&gt;hash_table = kmalloc (sizeof (dev_info_t*) * nb_zone,
+	BUG_ON(nb_zone &gt; PAGE_SIZE / sizeof(struct dev_info *));
+
+	conf-&gt;hash_table = kmalloc (sizeof (struct dev_info *) * nb_zone,
 					GFP_KERNEL);
 	if (!conf-&gt;hash_table)
 		goto out;
 
 	/*
 	 * Here we generate the linear hash table
+	 * First calculate the device offsets.
 	 */
+	conf-&gt;disks[0].offset = 0;
+	for (i=1; i&lt;mddev-&gt;raid_disks; i++)
+		conf-&gt;disks[i].offset =
+			conf-&gt;disks[i-1].offset +
+			conf-&gt;disks[i-1].size;
+
 	table = conf-&gt;hash_table;
-	start = 0;
 	curr_offset = 0;
-	for (i = 0; i &lt; cnt; i++) {
-		dev_info_t *disk = conf-&gt;disks + i;
-
-		disk-&gt;offset = curr_offset;
-		curr_offset += disk-&gt;size;
-
-		/* 'curr_offset' is the end of this disk
-		 * 'start' is the start of table
+	i = 0;
+	for (curr_offset = 0;
+	     curr_offset &lt; mddev-&gt;array_size;
+	     curr_offset += conf-&gt;hash_spacing) {
+
+		while (i &lt; mddev-&gt;raid_disks-1 &amp;&amp;
+		       curr_offset &gt;= conf-&gt;disks[i+1].offset)
+			i++;
+
+		*table ++ = conf-&gt;disks + i;
+	}
+
+	if (conf-&gt;preshift) {
+		conf-&gt;hash_spacing &gt;&gt;= conf-&gt;preshift;
+		/* round hash_spacing up so that when we divide by it,
+		 * we err on the side of "too-low", which is safest.
 		 */
-		while (start &lt; curr_offset) {
-			*table++ = disk;
-			start += conf-&gt;smallest-&gt;size;
-		}
+		conf-&gt;hash_spacing++;
 	}
-	if (table-conf-&gt;hash_table != nb_zone)
-		BUG();
+
+	BUG_ON(table - conf-&gt;hash_table &gt; nb_zone);
 
 	blk_queue_merge_bvec(mddev-&gt;queue, linear_mergeable_bvec);
 	mddev-&gt;queue-&gt;unplug_fn = linear_unplug;
@@ -299,7 +336,7 @@ static void linear_status (struct seq_fi
 	sector_t s = 0;
   
 	seq_printf(seq, "      ");
-	for (j = 0; j &lt; conf-&gt;nr_zones; j++)
+	for (j = 0; j &lt; mddev-&gt;raid_disks; j++)
 	{
 		char b[BDEVNAME_SIZE];
 		s += conf-&gt;smallest_size;
diff -puN include/linux/raid/linear.h~md-support-md-linear-array-with-components-greater-than-2-terabytes include/linux/raid/linear.h
--- devel/include/linux/raid/linear.h~md-support-md-linear-array-with-components-greater-than-2-terabytes	2005-08-21 22:35:53.000000000 -0700
+++ devel-akpm/include/linux/raid/linear.h	2005-08-21 22:35:53.000000000 -0700
@@ -14,8 +14,8 @@ typedef struct dev_info dev_info_t;
 struct linear_private_data
 {
 	dev_info_t		**hash_table;
-	dev_info_t		*smallest;
-	int			nr_zones;
+	sector_t		hash_spacing;
+	int			preshift; /* shift before dividing by hash_spacing */
 	dev_info_t		disks[0];
 };
 
_
</pre></body></html>