diff -Naur linux-2.6.16.9-orig/drivers/md/dm-userspace.c linux-2.6.16.9-dmu/drivers/md/dm-userspace.c --- linux-2.6.16.9-orig/drivers/md/dm-userspace.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.16.9-dmu/drivers/md/dm-userspace.c 2006-04-26 14:24:55.121080008 -0700 @@ -0,0 +1,1143 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dm.h" +#include "dm-bio-list.h" +#include "kcopyd.h" + +#define DMU_DEBUG 0 + +#define DMU_COPY_PAGES 256 +#define DMU_KEY_LEN 256 + +#define DMU_PREFIX "dm-userspace: " + +#define DMU_LIFETIME 128 + +#if DMU_DEBUG +#define DPRINTK( s, arg... ) printk(DMU_PREFIX s, ##arg) +#else +#define DPRINTK( s, arg... ) +#endif + +static int enable_watchdog = 0; +static struct work_struct wd; + +static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; +LIST_HEAD(devices); + +/* Device number for the control device */ +static dev_t our_dev; + +struct target_device { + struct list_head list; + struct block_device *bdev; + struct kref users; +}; + +/* A dm-userspace device, which consists of multiple targets sharing a + * common key + */ +struct dmu_device { + struct list_head list; + struct list_head requests; + struct list_head remaps; + struct list_head target_devs; + + struct cdev cdev; + dev_t ctl_dev; + + char key[DMU_KEY_LEN]; + struct kref users; + + wait_queue_head_t wqueue; + + uint64_t block_size; + uint64_t block_mask; + unsigned int block_shift; + + struct kcopyd_client *kcopyd_client; +}; + +struct userspace_request { + struct list_head list; + spinlock_t lock; + struct dmu_device *dev; + int type; + int sent; + uint32_t flags; + union { + struct bio_list bios; + uint64_t block; + } u; +}; + +struct dmu_map { + uint32_t org_block; /* Original block */ + uint32_t new_block; /* Destination block */ + int32_t offset; + uint32_t flags; + struct target_device *src; + struct target_device *dest; + struct bio_list bios; + struct list_head list; + struct dmu_device *dev; + + uint32_t use_count; + + struct dmu_map *next; /* Next remap that is dependent on this one */ +}; + +/* Forward delcarations */ +static struct file_operations ctl_fops; +static void copy_block(struct dmu_map *remap); + +/* + * Return the block number for @sector + */ +static inline u64 dmu_block(struct dmu_device *dev, + sector_t sector) +{ + return sector >> dev->block_shift; +} + +/* + * Return the sector offset in a block for @sector + */ +static inline u64 dmu_sector_offset(struct dmu_device *dev, + sector_t sector) +{ + return sector & dev->block_mask; +} + +/* + * Return the starting sector for @block + */ +static inline u64 dmu_sector(struct dmu_device *dev, + uint64_t block) +{ + return block << dev->block_shift; +} + +static void error_bios(struct bio *bio) +{ + struct bio *biop; + int count = 0; + + while (bio) { + biop = bio->bi_next; + bio->bi_next = NULL; + bio_io_error(bio, bio->bi_size); + bio = biop; + count++; + } + + if (count) + DPRINTK("*** Failed %i requests\n", count); +} + +static inline struct target_device *get_target(struct dmu_device *dev, + dev_t devno) +{ + + struct target_device *target; + struct block_device *bdev; + + list_for_each_entry(target, &dev->target_devs, list) { + if (target->bdev->bd_dev == devno) + return target; + } + + bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); + if (IS_ERR(bdev)) { + printk(KERN_ERR DMU_PREFIX "Unable to lookup device %x\n", + devno); + return NULL; + } + + target = kmalloc(sizeof(*target), GFP_KERNEL); + if (!target) { + printk(KERN_ERR DMU_PREFIX + "Unable to alloc new target device\n"); + return NULL; + } + + target->bdev = bdev; + INIT_LIST_HEAD(&target->list); + + list_add_tail(&target->list, &dev->target_devs); + + return target; +} + +static void put_target(struct target_device *target) +{ + bd_release(target->bdev); + blkdev_put(target->bdev); + + list_del(&target->list); + + kfree(target); +} + +/* + * Add a request to the device's request queue + */ +static inline void add_request(struct dmu_device *dev, + struct userspace_request *req) +{ + list_add_tail(&req->list, &dev->requests); + wake_up_interruptible(&dev->wqueue); +} + +static void destroy_remap(struct dmu_map *remap) +{ + struct bio *bios; + + bios = bio_list_get(&remap->bios); + if (bios) + error_bios(bios); + + kfree(remap); +} + +/* + * This periodically dumps out some debug information. It's really + * only useful while developing. + */ +static void watchdog(void *data) +{ + unsigned int v_remaps, i_remaps, reqs, devs = 0; + struct dmu_device *dev; + struct dmu_map *remap; + struct userspace_request *req; + + spin_lock(&devices_lock); + + list_for_each_entry(dev, &devices, list) { + + v_remaps = i_remaps = reqs = 0; + + list_for_each_entry(remap, &dev->remaps, list) { + if (DMU_GET_FLAG(remap->flags, DMU_FLAG_VALID)) + v_remaps++; + else + i_remaps++; + } + + list_for_each_entry(req, &dev->requests, list) { + reqs++; + } + + printk("Device %x:%x: " + " reqs: %u " + " inv maps: %u " + " val maps: %u\n", + MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev), + reqs, i_remaps, v_remaps); + devs++; + } + + spin_unlock(&devices_lock); + + schedule_delayed_work(&wd, HZ); +} + +static void bio_remap(struct bio *bio, + struct dmu_map *remap) +{ + BUG_ON(remap->dest == NULL); + + bio->bi_sector = dmu_sector(remap->dev, remap->new_block) + + dmu_sector_offset(remap->dev, bio->bi_sector) + + remap->offset; + + bio->bi_bdev = remap->dest->bdev; + + BUG_ON(bio->bi_bdev == NULL); +} + +static void remap_flusher(struct dmu_map *remap) +{ + struct bio *bio; + struct userspace_request *req; + + DPRINTK("Flushing bios for block %u:%u\n", + remap->org_block, remap->new_block); + + while ((bio = bio_list_pop(&remap->bios))) { + bio_remap(bio, remap); + + DPRINTK("Flushed %llu:%llu (%u bytes)\n", + dmu_block(remap->dev, bio->bi_sector), + dmu_sector_offset(remap->dev, bio->bi_sector), + bio->bi_size); + + generic_make_request(bio); + } + + /* Now this is a valid mapping */ + DMU_SET_FLAG(remap->flags, DMU_FLAG_VALID); + + /* Notify userspace */ + if (DMU_GET_FLAG(remap->flags, DMU_FLAG_COPY_FIRST)) { + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR DMU_PREFIX + "Failed to allocate copy response\n"); + return; + } + + req->type = DM_USERSPACE_COPY_FINISHED; + req->sent = 0; + req->u.block = remap->org_block; + INIT_LIST_HEAD(&req->list); + + add_request(remap->dev, req); + } + + if (remap->next) { + if (DMU_GET_FLAG(remap->next->flags, DMU_FLAG_COPY_FIRST)) + copy_block(remap->next); + else + remap_flusher(remap->next); + remap->next = NULL; + } + + if (DMU_GET_FLAG(remap->flags, DMU_FLAG_TEMPORARY)) + destroy_remap(remap); +} + +static void destroy_dmu_device(struct kref *ref) +{ + struct dmu_device *dev; + struct list_head *cursor, *next; + struct bio *bio; + + dev = container_of(ref, struct dmu_device, users); + + DPRINTK("Destroying device: %s\n", dev->key); + + spin_lock(&devices_lock); + list_del(&dev->list); + spin_unlock(&devices_lock); + + list_for_each_safe(cursor, next, &dev->target_devs) { + struct target_device *target; + + target = list_entry(cursor, + struct target_device, + list); + + put_target(target); + } + + list_for_each_safe(cursor, next, &dev->remaps) { + struct dmu_map *remap; + + remap = list_entry(cursor, + struct dmu_map, + list); + + list_del(&remap->list); + destroy_remap(remap); + } + + list_for_each_safe(cursor, next, &dev->requests) { + struct userspace_request *req; + + req = list_entry(cursor, + struct userspace_request, + list); + + list_del(&req->list); + + bio = bio_list_get(&req->u.bios); + if (bio) + error_bios(bio); + kfree(req); + } + + kcopyd_client_destroy(dev->kcopyd_client); + + cdev_del(&dev->cdev); + kfree(dev); +} + +static inline void get_dev(struct dmu_device *dev) +{ + DPRINTK("get on %s\n", dev->key); + kref_get(&dev->users); +} + +static inline void put_dev(struct dmu_device *dev) +{ + DPRINTK("put on %s\n", dev->key); + kref_put(&dev->users, destroy_dmu_device); +} + +static inline int get_free_minor(void) +{ + struct dmu_device *dev; + int minor = 0; + + list_for_each_entry(dev, &devices, list) { + if (MINOR(dev->ctl_dev) != minor) + break; + minor++; + } + + return minor; +} + +static int init_dmu_device(struct dmu_device *dev, u32 block_size) +{ + int ret; + + cdev_init(&dev->cdev, &ctl_fops); + dev->cdev.owner = THIS_MODULE; + dev->cdev.ops = &ctl_fops; + + init_waitqueue_head(&dev->wqueue); + INIT_LIST_HEAD(&dev->list); + INIT_LIST_HEAD(&dev->requests); + INIT_LIST_HEAD(&dev->remaps); + INIT_LIST_HEAD(&dev->target_devs); + kref_init(&dev->users); + + dev->block_size = block_size; + dev->block_mask = block_size - 1; + dev->block_shift = ffs(block_size) - 1; + + ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopyd_client); + if (ret) { + printk(DMU_PREFIX "Failed to initialize kcopyd client\n"); + return 0; + } + + return 1; +} + +static struct dmu_device *new_dmu_device(char *key, + struct dm_target *ti, + u32 block_size) +{ + struct dmu_device *dev, *ptr; + int ret; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (dev == NULL) { + printk(DMU_PREFIX "Failed to allocate new userspace device\n"); + return NULL; + } + + if (!init_dmu_device(dev, block_size)) + goto bad1; + + strncpy(dev->key, key, DMU_KEY_LEN); + + DPRINTK("New device with size %lu mask 0x%lX shift %u\n", + dev->block_size, dev->block_mask, dev->block_shift); + + spin_lock(&devices_lock); + + dev->ctl_dev = MKDEV(MAJOR(our_dev), get_free_minor()); + + ret = cdev_add(&dev->cdev, dev->ctl_dev, 1); + if (ret < 0) { + printk(DMU_PREFIX "Failed to register control device %d:%d\n", + MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev)); + spin_unlock(&devices_lock); + goto bad2; + } + + DPRINTK("Registered new control interface: %i:%i\n", + MAJOR(dev->ctl_dev), MINOR(dev->ctl_dev)); + + if (list_empty(&devices)) { + list_add(&dev->list, &devices); + } else { + list_for_each_entry(ptr, &devices, list) { + if (MINOR(ptr->ctl_dev) < MINOR(dev->ctl_dev)) + list_add(&dev->list, &ptr->list); + } + } + + spin_unlock(&devices_lock); + + return dev; + + bad2: + spin_unlock(&devices_lock); + cdev_del(&dev->cdev); + bad1: + kfree(dev); + return NULL; +} + +static struct dmu_device *find_dmu_device(const char *key) +{ + struct dmu_device *dev; + struct dmu_device *match = NULL; + + spin_lock(&devices_lock); + + list_for_each_entry(dev, &devices, list) { + if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { + match = dev; + break; + } + } + + spin_unlock(&devices_lock); + + return match; +} + +static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + uint64_t block_size; + struct dmu_device *dev; + char *device_key = argv[0]; + char *block_size_param = argv[1]; + + if (argc < 2) { + ti->error = DMU_PREFIX "Invalid argument count"; + return -EINVAL; + } + + block_size = simple_strtoul(block_size_param, NULL, 10) / 512; + + dev = find_dmu_device(device_key); + if (dev == NULL) { + dev = new_dmu_device(device_key, + ti, + block_size); + if (dev == NULL) { + ti->error = DMU_PREFIX "Failed to create device"; + goto bad; + } + } else { + get_dev(dev); + } + + if (dev->block_size != block_size) { + ti->error = DMU_PREFIX "Invalid block size"; + goto bad; + } + + /* We leak the device if we fail near here */ + + ti->private = dev; + ti->split_io = block_size; + + DPRINTK(" block-shift: %i\n", dev->block_shift); + DPRINTK(" block-mask: %lx\n", dev->block_mask); + + return 0; + + bad: + if (dev) + put_dev(dev); + + return -EINVAL; +} + +static void dmu_dtr(struct dm_target *ti) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + put_dev(dev); + + DPRINTK("destroyed %d:%d\n", (int)ti->begin, (int)ti->len); +} + +/* Search @dev for an existing remap of @block */ +static struct dmu_map *find_existing_map(struct dmu_device *dev, + uint64_t block) +{ + struct dmu_map *remap, *next; + + list_for_each_entry_safe(remap, next, &dev->remaps, list) { + if (remap->org_block == block) + return remap; + else { + if (DMU_GET_FLAG(remap->flags, DMU_FLAG_VALID) + && (--remap->use_count == 0)) { + DPRINTK("Removing remap %u:%u\n", + remap->org_block, + remap->new_block); + list_del(&remap->list); + destroy_remap(remap); + } + } + } + + return NULL; +} + +/* Search @dev for an outstanding request for remapping @block */ +static struct userspace_request *find_existing_req(struct dmu_device *dev, + uint64_t block) +{ + struct userspace_request *req; + + list_for_each_entry(req, &dev->requests, list) { + /* FIXME: Blech */ + if ((req->type == DM_USERSPACE_MAP_BLOCK) && + (dmu_block(dev, req->u.bios.head->bi_sector) == block)) + return req; + } + + return NULL; +} + +static int make_new_request(struct dmu_device *dev, struct bio *bio) +{ + struct userspace_request *req; + + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (req == NULL) + goto bad; + + INIT_LIST_HEAD(&req->list); + req->type = DM_USERSPACE_MAP_BLOCK; + req->sent = 0; + if (bio_rw(bio)) + DMU_SET_FLAG(req->flags, DMU_FLAG_RW); + else + DMU_CLR_FLAG(req->flags, DMU_FLAG_RW); + bio_list_init(&req->u.bios); + bio_list_add(&req->u.bios, bio); + req->dev = dev; + + add_request(dev, req); + + DPRINTK("Queued %s request for sector " SECTOR_FORMAT "\n", + DMU_GET_FLAG(req->flags, DMU_FLAG_RW) ? "write" : "read", + bio->bi_sector); + + return 0; + + bad: + printk(DMU_PREFIX "Failed to queue bio!\n"); + return -1; +} + +static int dmu_map_remap_case(struct dmu_device *dev, + struct dmu_map *remap, + struct bio *bio) +{ + int ret = 0; + + if (!DMU_GET_FLAG(remap->flags, DMU_FLAG_RW) && bio_rw(bio)) { + /* New request */ + make_new_request(dev, bio); + } else { + if (DMU_GET_FLAG(remap->flags, DMU_FLAG_VALID)) { + bio_remap(bio, remap); + ret = 1; + } else { + bio_list_add(&remap->bios, bio); + } + } + + return ret; +} + +static int dmu_map_request_case(struct dmu_device *dev, + struct userspace_request *req, + struct bio *bio) +{ + int req_rw = DMU_GET_FLAG(req->flags, DMU_FLAG_RW); + + if (req_rw || (!req_rw && !bio_rw(bio))) { + bio_list_add(&req->u.bios, bio); + } else { + if (!req->sent) + DMU_SET_FLAG(req->flags, DMU_FLAG_RW); + else + make_new_request(dev, bio); + } + + return 0; +} + +static int dmu_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + struct dmu_map *remap; + struct userspace_request *req; + int ret = 0; + u64 block; + + spin_lock(&devices_lock); + + block = dmu_block(dev, bio->bi_sector); + + remap = find_existing_map(dev, block); + if (remap) { + ret = dmu_map_remap_case(dev, remap, bio); + goto done; + } + + req = find_existing_req(dev, block); + if (req) { + ret = dmu_map_request_case(dev, req, bio); + goto done; + } + + ret = make_new_request(dev, bio); + + done: + spin_unlock(&devices_lock); + + return ret; +} + +static int dmu_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + switch (type) { + case STATUSTYPE_INFO: + snprintf(result, maxlen, "%x:%x\n", + MAJOR(dev->ctl_dev), + MINOR(dev->ctl_dev)); + break; + + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %llu", + dev->key, + dev->block_size * 512); + break; + } + + return 0; +} + +static struct target_type userspace_target = { + .name = "userspace", + .version = {0, 1, 0}, + .module = THIS_MODULE, + .ctr = dmu_ctr, + .dtr = dmu_dtr, + .map = dmu_map, + .status = dmu_status, +}; + +static int format_userspace_message(struct dmu_write *msg, + struct userspace_request *req) +{ + msg->type = req->type; + DMU_CPY_FLAG(msg->flags, req->flags, DMU_FLAG_RW); + + if (msg->type == DM_USERSPACE_MAP_BLOCK) { + msg->org_block = dmu_block(req->dev, + req->u.bios.head->bi_sector); + DPRINTK("Asking userspace to map %u (%c)\n", + msg->org_block, + DMU_GET_FLAG(msg->flags, DMU_FLAG_RW) ? 'W' : 'R'); + } else if (msg->type == DM_USERSPACE_COPY_FINISHED) { + msg->org_block = req->u.block; + /* COPY_FINISHED messages don't get responses, so + * we take them off the request queue here + */ + list_del(&req->list); + } else { + printk(DMU_PREFIX "Userspace sent unknown " + "message type %i\n", msg->type); + list_del(&req->list); + return 0; + } + + return 1; +} + +ssize_t dmu_ctl_read(struct file *file, char *buffer, + size_t size, loff_t *offset) +{ + + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct dmu_write msg; + struct userspace_request *req = NULL; + struct userspace_request *next; + int ret = 0; + int num_reqs, req_idx = 0; + + num_reqs = size / sizeof(msg); + + if (num_reqs == 0) + return -EINVAL; + + /* Quick decision based on whether or not there are requests */ + if (file->f_flags & O_NONBLOCK) { + if (list_empty(&dev->requests)) + return 0; + } else { + DPRINTK("Blocking read...\n"); + wait_event_interruptible(dev->wqueue, + !list_empty(&dev->requests)); + } + + spin_lock(&devices_lock); + + list_for_each_entry_safe(req, next, &dev->requests, list) { + + DPRINTK("READ loop\n"); + + if (req->sent) + continue; + + if (!format_userspace_message(&msg, req)) + continue; + + if (copy_to_user(buffer+ret, &msg, sizeof(msg))) { + DPRINTK("control read copy_to_user failed!\n"); + ret = -EINVAL; + goto out; + } else { + req->sent = 1; + ret += sizeof(msg); + if (++req_idx >= num_reqs) + break; + } + } + + out: + spin_unlock(&devices_lock); + + return ret; +} + +static void copy_callback(int read_err, + unsigned int write_err, + void *data) +{ + spin_lock(&devices_lock); + remap_flusher((struct dmu_map *)data); + spin_unlock(&devices_lock); +} + +static void copy_block(struct dmu_map *remap) +{ + struct io_region src, dst; + + src.bdev = remap->src->bdev; + src.sector = remap->org_block << remap->dev->block_shift; + src.count = remap->dev->block_size; + + dst.bdev = remap->dest->bdev; + dst.sector = (remap->new_block << remap->dev->block_shift); + dst.sector += remap->offset; + dst.count = remap->dev->block_size; + + DPRINTK("Copying:" + SECTOR_FORMAT ":" SECTOR_FORMAT " " SECTOR_FORMAT " -> " + SECTOR_FORMAT ":" SECTOR_FORMAT " " SECTOR_FORMAT "\n", + dmu_block(remap->dev, src.sector), + dmu_sector_offset(remap->dev, src.sector), + src.count, + dmu_block(remap->dev, dst.sector), + dmu_sector_offset(remap->dev, dst.sector), + dst.count); + + kcopyd_copy(remap->dev->kcopyd_client, + &src, 1, &dst, 0, copy_callback, remap); +} + +static int remap_request(struct dmu_write *msg, + struct dmu_device *dev, + struct userspace_request *req) + +{ + struct dmu_map *remap = NULL; + struct target_device *s_dev = NULL, *d_dev = NULL; + int is_chained = 0; + + if (DMU_GET_FLAG(msg->flags, DMU_FLAG_COPY_FIRST)) { + s_dev = get_target(dev, MKDEV(msg->src_maj, msg->src_min)); + if (!s_dev) { + printk(KERN_ERR DMU_PREFIX + "Failed to find src device %i:%i\n", + msg->src_maj, msg->src_min); + goto bad; + } + } else { + s_dev = NULL; + } + + d_dev = get_target(dev, MKDEV(msg->dest_maj, msg->dest_min)); + if (!d_dev) { + printk(KERN_ERR DMU_PREFIX "Failed to find dst device %i:%i\n", + msg->dest_maj, msg->dest_min); + goto bad; + } + + remap = find_existing_map(dev, msg->org_block); + if (!remap) { + remap = kmalloc(sizeof(*remap), GFP_KERNEL); + if (!remap) { + printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!"); + goto bad; + } + + bio_list_init(&remap->bios); + INIT_LIST_HEAD(&remap->list); + remap->next = NULL; + + if (!DMU_GET_FLAG(msg->flags, DMU_FLAG_TEMPORARY)) + list_add_tail(&remap->list, &dev->remaps); + + } else if (!DMU_GET_FLAG(remap->flags, DMU_FLAG_VALID)) { + /* Need to create a new mapping and chain it to the + existing one */ + struct dmu_map *new_map; + + new_map = kmalloc(sizeof(*new_map), GFP_KERNEL); + if (!new_map) { + printk(KERN_ERR DMU_PREFIX "Failed to alloc remap!"); + goto bad; + } + + bio_list_init(&remap->bios); + INIT_LIST_HEAD(&remap->list); + remap->next = new_map; + remap = new_map; + is_chained = 1; + printk("Chaining a remap\n"); + + if (!DMU_GET_FLAG(msg->flags, DMU_FLAG_TEMPORARY)) + list_add_tail(&remap->list, &dev->remaps); + } + + remap->org_block = msg->org_block; + remap->new_block = msg->new_block; + remap->src = s_dev; + remap->dest = d_dev; + remap->offset = msg->offset; + remap->dev = dev; + remap->use_count = DMU_LIFETIME; + + DMU_CLR_FLAG(remap->flags, DMU_FLAG_VALID); + DMU_CPY_FLAG(remap->flags, msg->flags, DMU_FLAG_TEMPORARY); + DMU_CPY_FLAG(remap->flags, msg->flags, DMU_FLAG_RW); + DMU_CPY_FLAG(remap->flags, msg->flags, DMU_FLAG_COPY_FIRST); + + if (req) + bio_list_merge(&remap->bios, &req->u.bios); + + if (! is_chained) { + if (DMU_GET_FLAG(msg->flags, DMU_FLAG_COPY_FIRST)) + copy_block(remap); + else + remap_flusher(remap); + } + + return 1; + + bad: + printk(KERN_ERR DMU_PREFIX "Remap error: chaos may ensue\n"); + + return 0; +} + +ssize_t dmu_ctl_write(struct file *file, const char *buffer, + size_t size, loff_t *offset) +{ + + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct dmu_write msg; + struct userspace_request *next; + struct userspace_request *req = NULL, *match = NULL; + int num_resp, resp_idx; + int ret = 0; + + num_resp = size / sizeof(struct dmu_write); + + if (num_resp == 0) + return -EINVAL; + + spin_lock(&devices_lock); + + for (resp_idx = 0; resp_idx < num_resp; resp_idx++) { + if (copy_from_user(&msg, buffer+ret, sizeof(msg))) { + printk(DMU_PREFIX + "control_write copy_from_user failed!\n"); + ret = -EACCES; + goto out; + } + + ret += sizeof(msg); + + match = NULL; + /* See if we have a pending request that matches this */ + list_for_each_entry_safe(req, next, &dev->requests, list) { + if ((req->type == DM_USERSPACE_MAP_BLOCK) && + (dmu_block(dev, req->u.bios.head->bi_sector) == + msg.org_block)) { + list_del(&req->list); + match = req; + break; + } + } + + switch (msg.type) { + + case DM_USERSPACE_MAP_BLOCK: + DPRINTK("Got map: %u -> %u:%i (%i:%i)\n", + msg.org_block, + msg.new_block, + msg.offset, + msg.dest_maj, + msg.dest_min); + remap_request(&msg, dev, match); + break; + + case DM_USERSPACE_MAP_FAILED: + if (match) { + printk(KERN_ERR DMU_PREFIX + "userspace reported " + "failure to map sector %lu\n", + (unsigned long) + match->u.bios.head->bi_sector); + + error_bios(match->u.bios.head); + } + break; + default: + printk(DMU_PREFIX + "Unknown request!\n"); + } + + kfree(match); + } + out: + spin_unlock(&devices_lock); + return ret; +} + +int dmu_ctl_open(struct inode *inode, struct file *file) +{ + struct dmu_device *dev; + + dev = container_of(inode->i_cdev, struct dmu_device, cdev); + + get_dev(dev); + + file->private_data = dev; + + return 0; +} + +int dmu_ctl_release(struct inode *inode, struct file *file) +{ + struct dmu_device *dev; + + dev = (struct dmu_device *)file->private_data; + + if (dev == NULL) { + printk(DMU_PREFIX "Control release on invalid device!\n"); + return -EINVAL; + } + + put_dev(dev); + + return 0; +} + +static struct file_operations ctl_fops = { + .open = dmu_ctl_open, + .release = dmu_ctl_release, + .read = dmu_ctl_read, + .write = dmu_ctl_write, + .owner = THIS_MODULE, +}; + +int __init dm_userspace_init(void) +{ + int r = dm_register_target(&userspace_target); + if (r < 0) { + DMERR(DMU_PREFIX "Register failed %d", r); + return 0; + } + + if (enable_watchdog) { + INIT_WORK(&wd, watchdog, NULL); + schedule_delayed_work(&wd, HZ); + } + + r = alloc_chrdev_region(&our_dev, 0, 10, "dm-userspace"); + + DPRINTK(DMU_PREFIX "Loaded (major %i)\n", MAJOR(our_dev)); + + return r; +} + +void __exit dm_userspace_exit(void) +{ + int r; + struct list_head *cursor, *next; + struct dmu_device *dev; + + DPRINTK(DMU_PREFIX "Unloading\n"); + + if (enable_watchdog) + if (!cancel_delayed_work(&wd)) + flush_scheduled_work(); + + spin_lock(&devices_lock); + + list_for_each_safe(cursor, next, &devices) { + dev = list_entry(cursor, struct dmu_device, list); + list_del(cursor); + } + + spin_unlock(&devices_lock); + + unregister_chrdev_region(our_dev, 10); + + r = dm_unregister_target(&userspace_target); + if (r < 0) + DMERR(DMU_PREFIX "unregister failed %d", r); +} + +module_init(dm_userspace_init); +module_exit(dm_userspace_exit); + +module_param(enable_watchdog, int, S_IRUGO); + +MODULE_DESCRIPTION(DM_NAME " userspace target"); +MODULE_AUTHOR("Dan Smith"); +MODULE_LICENSE("GPL"); diff -Naur linux-2.6.16.9-orig/drivers/md/Kconfig linux-2.6.16.9-dmu/drivers/md/Kconfig --- linux-2.6.16.9-orig/drivers/md/Kconfig 2006-04-18 23:10:14.000000000 -0700 +++ linux-2.6.16.9-dmu/drivers/md/Kconfig 2006-04-24 11:30:07.000000000 -0700 @@ -210,6 +210,12 @@ ---help--- Allow volume managers to take writeable snapshots of a device. +config DM_USERSPACE + tristate "Userspace target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + A target that provides a userspace interface to device-mapper + config DM_MIRROR tristate "Mirror target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff -Naur linux-2.6.16.9-orig/drivers/md/Makefile linux-2.6.16.9-dmu/drivers/md/Makefile --- linux-2.6.16.9-orig/drivers/md/Makefile 2006-04-18 23:10:14.000000000 -0700 +++ linux-2.6.16.9-dmu/drivers/md/Makefile 2006-04-24 11:30:26.000000000 -0700 @@ -37,6 +37,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o obj-$(CONFIG_DM_ZERO) += dm-zero.o +obj-$(CONFIG_DM_USERSPACE) += dm-userspace.o quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ diff -Naur linux-2.6.16.9-orig/include/linux/dm-userspace.h linux-2.6.16.9-dmu/include/linux/dm-userspace.h --- linux-2.6.16.9-orig/include/linux/dm-userspace.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.16.9-dmu/include/linux/dm-userspace.h 2006-04-26 14:24:55.125079400 -0700 @@ -0,0 +1,70 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __DM_USERSPACE_H +#define __DM_USERSPACE_H + +#ifdef __KERNEL__ +# include +#else +# include +#endif + +/* + * Message Types + */ +#define DM_USERSPACE_MAP_BLOCK 1 +#define DM_USERSPACE_MAP_FAILED 2 +#define DM_USERSPACE_MAP_INVALIDATE 3 +#define DM_USERSPACE_COPY_FINISHED 10 + +/* + * Flags and associated macros + */ +#define DMU_FLAG_VALID 1 +#define DMU_FLAG_RW 2 +#define DMU_FLAG_COPY_FIRST 4 +#define DMU_FLAG_TEMPORARY 8 + +#define DMU_GET_FLAG(x, y) ((x) & y) +#define DMU_SET_FLAG(x, y) ((x) |= y) +#define DMU_CLR_FLAG(x, y) ((x) &= (~y)) +#define DMU_CPY_FLAG(x, y, z) (x = (((x) & (~z)) | ((y) & z))) + +/* + * This is the message that is passed back and forth between the + * kernel and the user application + */ +struct dmu_write { + uint32_t type; /* Type of request */ + uint32_t flags; /* Flags */ + + uint64_t org_block; /* Block that was accessed */ + uint64_t new_block; /* The new block it should go to */ + int64_t offset; /* Sector offset of the block, if needed */ + + uint32_t src_maj; /* The source device for copying */ + uint32_t src_min; + + uint32_t dest_maj; /* Destination device for copying, and */ + uint32_t dest_min; /* for the block access */ + +}; + +#endif