/* raid0.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr> Copyright (C) 1999, 2000 Ingo Molnar, Red Hat RAID-0 management functions. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/module.h> #include <linux/raid/raid0.h> #include <linux/bio.h> #define MAJOR_NR MD_MAJOR #define MD_DRIVER #define MD_PERSONALITY #define DEVICE_NR(device) (minor(device)) static int create_strip_zones (mddev_t *mddev) { int i, c, j; unsigned long current_offset, curr_zone_offset; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; struct list_head *tmp1, *tmp2; struct strip_zone *zone; int cnt; /* * The number of 'same size groups' */ conf->nr_strip_zones = 0; ITERATE_RDEV(mddev,rdev1,tmp1) { printk("raid0: looking at %s\n", bdev_partition_name(rdev1->bdev)); c = 0; ITERATE_RDEV(mddev,rdev2,tmp2) { printk("raid0: comparing %s(%ld) with %s(%ld)\n", bdev_partition_name(rdev1->bdev), rdev1->size, bdev_partition_name(rdev2->bdev), rdev2->size); if (rdev2 == rdev1) { printk("raid0: END\n"); break; } if (rdev2->size == rdev1->size) { /* * Not unique, don't count it as a new * group */ printk("raid0: EQUAL\n"); c = 1; break; } printk("raid0: NOT EQUAL\n"); } if (!c) { printk("raid0: ==> UNIQUE\n"); conf->nr_strip_zones++; printk("raid0: %d zones\n", conf->nr_strip_zones); } } printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); conf->strip_zone = vmalloc(sizeof(struct strip_zone)* conf->nr_strip_zones); if (!conf->strip_zone) return 1; memset(conf->strip_zone, 0,sizeof(struct strip_zone)* conf->nr_strip_zones); /* The first zone must contain all devices, so here we check that * there is a properly alignment of slots to devices and find them all */ zone = &conf->strip_zone[0]; cnt = 0; smallest = NULL; ITERATE_RDEV(mddev, rdev1, tmp1) { int j = rdev1->raid_disk; if (j < 0 || j >= mddev->raid_disks) { printk("raid0: bad disk number %d - aborting!\n", j); goto abort; } if (zone->dev[j]) { printk("raid0: multiple devices for %d - aborting!\n", j); goto abort; } zone->dev[j] = rdev1; if (!smallest || (rdev1->size <smallest->size)) smallest = rdev1; cnt++; } if (cnt != mddev->raid_disks) { printk("raid0: too few disks (%d of %d) - aborting!\n", cnt, mddev->raid_disks); goto abort; } zone->nb_dev = cnt; zone->size = smallest->size * cnt; zone->zone_offset = 0; conf->smallest = zone; current_offset = smallest->size; curr_zone_offset = zone->size; /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) { zone = conf->strip_zone + i; printk("raid0: zone %d\n", i); zone->dev_offset = current_offset; smallest = NULL; c = 0; for (j=0; j<cnt; j++) { rdev = conf->strip_zone[0].dev[j]; printk("raid0: checking %s ...", bdev_partition_name(rdev->bdev)); if (rdev->size > current_offset) { printk(" contained as device %d\n", c); zone->dev[c] = rdev; c++; if (!smallest || (rdev->size <smallest->size)) { smallest = rdev; printk(" (%ld) is smallest!.\n", rdev->size); } } else printk(" nope.\n"); } zone->nb_dev = c; zone->size = (smallest->size - current_offset) * c; printk("raid0: zone->nb_dev: %d, size: %ld\n",zone->nb_dev,zone->size); if (!conf->smallest || (zone->size < conf->smallest->size)) conf->smallest = zone; zone->zone_offset = curr_zone_offset; curr_zone_offset += zone->size; current_offset = smallest->size; printk("raid0: current zone offset: %ld\n", current_offset); } printk("raid0: done.\n"); return 0; abort: vfree(conf->strip_zone); return 1; } static int raid0_run (mddev_t *mddev) { unsigned long cur=0, i=0, size, zone0_size, nb_zone; raid0_conf_t *conf; MOD_INC_USE_COUNT; conf = vmalloc(sizeof (raid0_conf_t)); if (!conf) goto out; mddev->private = (void *)conf; if (create_strip_zones (mddev)) goto out_free_conf; printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]); printk("raid0 : conf->smallest->size is %ld blocks.\n", conf->smallest->size); nb_zone = md_size[mdidx(mddev)]/conf->smallest->size + (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0); printk("raid0 : nb_zone is %ld.\n", nb_zone); conf->nr_zones = nb_zone; printk("raid0 : Allocating %ld bytes for hash.\n", nb_zone*sizeof(struct raid0_hash)); conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone); if (!conf->hash_table) goto out_free_zone_conf; size = conf->strip_zone[cur].size; i = 0; while (cur < conf->nr_strip_zones) { conf->hash_table[i].zone0 = conf->strip_zone + cur; /* * If we completely fill the slot */ if (size >= conf->smallest->size) { conf->hash_table[i++].zone1 = NULL; size -= conf->smallest->size; if (!size) { if (++cur == conf->nr_strip_zones) continue; size = conf->strip_zone[cur].size; } continue; } if (++cur == conf->nr_strip_zones) { /* * Last dev, set unit1 as NULL */ conf->hash_table[i].zone1=NULL; continue; } /* * Here we use a 2nd dev to fill the slot */ zone0_size = size; size = conf->strip_zone[cur].size; conf->hash_table[i++].zone1 = conf->strip_zone + cur; size -= (conf->smallest->size - zone0_size); } return 0; out_free_zone_conf: vfree(conf->strip_zone); conf->strip_zone = NULL; out_free_conf: vfree(conf); mddev->private = NULL; out: MOD_DEC_USE_COUNT; return 1; } static int raid0_stop (mddev_t *mddev) { raid0_conf_t *conf = mddev_to_conf(mddev); vfree (conf->hash_table); conf->hash_table = NULL; vfree (conf->strip_zone); conf->strip_zone = NULL; vfree (conf); mddev->private = NULL; MOD_DEC_USE_COUNT; return 0; } /* * FIXME - We assume some things here : * - requested buffers NEVER bigger than chunk size, * - requested buffers NEVER cross stripes limits. * Of course, those facts may not be valid anymore (and surely won't...) * Hey guys, there's some work out there ;-) */ static int raid0_make_request (request_queue_t *q, struct bio *bio) { mddev_t *mddev = q->queuedata; unsigned int sect_in_chunk, chunksize_bits, chunk_size; raid0_conf_t *conf = mddev_to_conf(mddev); struct raid0_hash *hash; struct strip_zone *zone; mdk_rdev_t *tmp_dev; unsigned long chunk, block, rsect; chunk_size = mddev->chunk_size >> 10; chunksize_bits = ffz(~chunk_size); block = bio->bi_sector >> 1; hash = conf->hash_table + block / conf->smallest->size; /* Sanity check */ if (chunk_size < (block % chunk_size) + (bio->bi_size >> 10)) goto bad_map; if (!hash) goto bad_hash; if (!hash->zone0) goto bad_zone0; if (block >= (hash->zone0->size + hash->zone0->zone_offset)) { if (!hash->zone1) goto bad_zone1; zone = hash->zone1; } else zone = hash->zone0; sect_in_chunk = bio->bi_sector & ((chunk_size<<1) -1); chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits); tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev]; rsect = (((chunk << chunksize_bits) + zone->dev_offset)<<1) + sect_in_chunk; /* * The new BH_Lock semantics in ll_rw_blk.c guarantee that this * is the only IO operation happening on this bh. */ bio->bi_bdev = tmp_dev->bdev; bio->bi_sector = rsect; /* * Let the main block layer submit the IO and resolve recursion: */ return 1; bad_map: printk ("raid0_make_request bug: can't convert block across chunks or bigger than %dk %ld %d\n", chunk_size, bio->bi_sector, bio->bi_size >> 10); goto outerr; bad_hash: printk("raid0_make_request bug: hash==NULL for block %ld\n", block); goto outerr; bad_zone0: printk ("raid0_make_request bug: hash->zone0==NULL for block %ld\n", block); goto outerr; bad_zone1: printk ("raid0_make_request bug: hash->zone1==NULL for block %ld\n", block); outerr: bio_io_error(bio, bio->bi_size); return 0; } static int raid0_status (char *page, mddev_t *mddev) { int sz = 0; #undef MD_DEBUG #ifdef MD_DEBUG int j, k; raid0_conf_t *conf = mddev_to_conf(mddev); sz += sprintf(page + sz, " "); for (j = 0; j < conf->nr_zones; j++) { sz += sprintf(page + sz, "[z%d", conf->hash_table[j].zone0 - conf->strip_zone); if (conf->hash_table[j].zone1) sz += sprintf(page+sz, "/z%d] ", conf->hash_table[j].zone1 - conf->strip_zone); else sz += sprintf(page+sz, "] "); } sz += sprintf(page + sz, "\n"); for (j = 0; j < conf->nr_strip_zones; j++) { sz += sprintf(page + sz, " z%d=[", j); for (k = 0; k < conf->strip_zone[j].nb_dev; k++) sz += sprintf (page+sz, "%s/", bdev_partition_name( conf->strip_zone[j].dev[k]->bdev)); sz--; sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n", conf->strip_zone[j].zone_offset, conf->strip_zone[j].dev_offset, conf->strip_zone[j].size); } #endif sz += sprintf(page + sz, " %dk chunks", mddev->chunk_size/1024); return sz; } static mdk_personality_t raid0_personality= { .name = "raid0", .make_request = raid0_make_request, .run = raid0_run, .stop = raid0_stop, .status = raid0_status, }; static int __init raid0_init (void) { return register_md_personality (RAID0, &raid0_personality); } static void raid0_exit (void) { unregister_md_personality (RAID0); } module_init(raid0_init); module_exit(raid0_exit); MODULE_LICENSE("GPL");