Commit 1737ddec authored by Jens Axboe's avatar Jens Axboe Committed by Linus Torvalds

[PATCH] bio_copy_user() cleanups and fixes

blk_rq_map_user() is a bit of a hack currently, since it drops back to
kmalloc() if bio_map_user() fails.  This is unfortunate since it means we
do no real segment or size checking (and the request segment counts contain
crap, already found one bug in a scsi lld).  It's also pretty nasty for >
PAGE_SIZE requests, as we attempt to do higher order page allocations.
Even worse still, ide-cd will drop back to PIO for non-sg/bio requests. 
All in all, very suboptimal.

This patch adds bio_copy_user() which simply sets up a bio with kernel
pages and copies data as needed for reads and writes.  It also changes
bio_map_user() to return an error pointer like bio_copy_user(), so we can
return something sane to the user instead of always -ENOMEM.
Signed-off-by: default avatarJens Axboe <axboe@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 8e7e3e3e
...@@ -1823,50 +1823,43 @@ EXPORT_SYMBOL(blk_insert_request); ...@@ -1823,50 +1823,43 @@ EXPORT_SYMBOL(blk_insert_request);
struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf,
unsigned int len) unsigned int len)
{ {
struct request *rq = NULL; unsigned long uaddr;
char *buf = NULL; struct request *rq;
struct bio *bio; struct bio *bio;
int ret;
if (len > (q->max_sectors << 9))
return ERR_PTR(-EINVAL);
if ((!len && ubuf) || (len && !ubuf))
return ERR_PTR(-EINVAL);
rq = blk_get_request(q, rw, __GFP_WAIT); rq = blk_get_request(q, rw, __GFP_WAIT);
if (!rq) if (!rq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
bio = bio_map_user(q, NULL, (unsigned long) ubuf, len, rw == READ); /*
if (!bio) { * if alignment requirement is satisfied, map in user pages for
int bytes = (len + 511) & ~511; * direct dma. else, set up kernel bounce buffers
*/
buf = kmalloc(bytes, q->bounce_gfp | GFP_USER); uaddr = (unsigned long) ubuf;
if (!buf) { if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
ret = -ENOMEM; bio = bio_map_user(q, NULL, uaddr, len, rw == READ);
goto fault; else
} bio = bio_copy_user(q, uaddr, len, rw == READ);
if (rw == WRITE) {
if (copy_from_user(buf, ubuf, len)) {
ret = -EFAULT;
goto fault;
}
} else
memset(buf, 0, len);
}
if (!IS_ERR(bio)) {
rq->bio = rq->biotail = bio; rq->bio = rq->biotail = bio;
if (rq->bio)
blk_rq_bio_prep(q, rq, bio); blk_rq_bio_prep(q, rq, bio);
rq->buffer = rq->data = buf; rq->buffer = rq->data = NULL;
rq->data_len = len; rq->data_len = len;
return rq; return rq;
fault: }
if (buf)
kfree(buf);
if (bio)
bio_unmap_user(bio, 1);
if (rq)
blk_put_request(rq);
return ERR_PTR(ret); /*
* bio is the err-ptr
*/
blk_put_request(rq);
return (struct request *) bio;
} }
EXPORT_SYMBOL(blk_rq_map_user); EXPORT_SYMBOL(blk_rq_map_user);
...@@ -1880,18 +1873,15 @@ EXPORT_SYMBOL(blk_rq_map_user); ...@@ -1880,18 +1873,15 @@ EXPORT_SYMBOL(blk_rq_map_user);
* Description: * Description:
* Unmap a request previously mapped by blk_rq_map_user(). * Unmap a request previously mapped by blk_rq_map_user().
*/ */
int blk_rq_unmap_user(struct request *rq, void __user *ubuf, struct bio *bio, int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen)
unsigned int ulen)
{ {
const int read = rq_data_dir(rq) == READ;
int ret = 0; int ret = 0;
if (bio) if (bio) {
bio_unmap_user(bio, read); if (bio_flagged(bio, BIO_USER_MAPPED))
if (rq->buffer) { bio_unmap_user(bio);
if (read && copy_to_user(ubuf, rq->buffer, ulen)) else
ret = -EFAULT; ret = bio_uncopy_user(bio);
kfree(rq->buffer);
} }
blk_put_request(rq); blk_put_request(rq);
......
...@@ -211,7 +211,7 @@ static int sg_io(request_queue_t *q, struct gendisk *bd_disk, ...@@ -211,7 +211,7 @@ static int sg_io(request_queue_t *q, struct gendisk *bd_disk,
hdr->sb_len_wr = len; hdr->sb_len_wr = len;
} }
if (blk_rq_unmap_user(rq, hdr->dxferp, bio, hdr->dxfer_len)) if (blk_rq_unmap_user(rq, bio, hdr->dxfer_len))
return -EFAULT; return -EFAULT;
/* may not have succeeded, but output values written to control /* may not have succeeded, but output values written to control
......
...@@ -2015,7 +2015,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, ...@@ -2015,7 +2015,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
cdi->last_sense = s->sense_key; cdi->last_sense = s->sense_key;
} }
if (blk_rq_unmap_user(rq, ubuf, bio, len)) if (blk_rq_unmap_user(rq, bio, len))
ret = -EFAULT; ret = -EFAULT;
if (ret) if (ret)
......
...@@ -1959,13 +1959,17 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) ...@@ -1959,13 +1959,17 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
* sg request * sg request
*/ */
if (rq->bio) { if (rq->bio) {
if (rq->data_len & 3) { int mask = drive->queue->dma_alignment;
printk("%s: block pc not aligned, len=%d\n", drive->name, rq->data_len); unsigned long addr = (unsigned long) page_address(bio_page(rq->bio));
cdrom_end_request(drive, 0);
return ide_stopped;
}
info->dma = drive->using_dma;
info->cmd = rq_data_dir(rq); info->cmd = rq_data_dir(rq);
info->dma = drive->using_dma;
/*
* check if dma is safe
*/
if ((rq->data_len & mask) || (addr & mask))
info->dma = 0;
} }
/* Start sending the command to the drive. */ /* Start sending the command to the drive. */
...@@ -3133,7 +3137,7 @@ int ide_cdrom_setup (ide_drive_t *drive) ...@@ -3133,7 +3137,7 @@ int ide_cdrom_setup (ide_drive_t *drive)
int nslots; int nslots;
blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn);
blk_queue_dma_alignment(drive->queue, 3); blk_queue_dma_alignment(drive->queue, 31);
drive->queue->unplug_delay = (1 * HZ) / 1000; drive->queue->unplug_delay = (1 * HZ) / 1000;
if (!drive->queue->unplug_delay) if (!drive->queue->unplug_delay)
drive->queue->unplug_delay = 1; drive->queue->unplug_delay = 1;
......
...@@ -376,6 +376,115 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, ...@@ -376,6 +376,115 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
len, offset); len, offset);
} }
/**
* bio_uncopy_user - finish previously mapped bio
* @bio: bio being terminated
*
* Free pages allocated from bio_copy_user() and write back data
* to user space in case of a read.
*/
int bio_uncopy_user(struct bio *bio)
{
struct bio_vec *bvec;
int i, ret = 0;
if (bio_data_dir(bio) == READ) {
char *uaddr = bio->bi_private;
__bio_for_each_segment(bvec, bio, i, 0) {
char *addr = page_address(bvec->bv_page);
if (!ret && copy_to_user(uaddr, addr, bvec->bv_len))
ret = -EFAULT;
__free_page(bvec->bv_page);
uaddr += bvec->bv_len;
}
}
bio_put(bio);
return ret;
}
/**
* bio_copy_user - copy user data to bio
* @q: destination block queue
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr,
unsigned int len, int write_to_vm)
{
unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = uaddr >> PAGE_SHIFT;
struct bio_vec *bvec;
struct page *page;
struct bio *bio;
int i, ret;
bio = bio_alloc(GFP_KERNEL, end - start);
if (!bio)
return ERR_PTR(-ENOMEM);
ret = 0;
while (len) {
unsigned int bytes = PAGE_SIZE;
if (bytes > len)
bytes = len;
page = alloc_page(q->bounce_gfp | GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
break;
}
if (__bio_add_page(q, bio, page, bytes, 0) < bytes) {
ret = -EINVAL;
break;
}
len -= bytes;
}
/*
* success
*/
if (!ret) {
if (!write_to_vm) {
bio->bi_rw |= (1 << BIO_RW);
/*
* for a write, copy in data to kernel pages
*/
ret = -EFAULT;
bio_for_each_segment(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
if (copy_from_user(addr, (char *) uaddr, bvec->bv_len))
goto cleanup;
}
}
bio->bi_private = (void *) uaddr;
return bio;
}
/*
* cleanup
*/
cleanup:
bio_for_each_segment(bvec, bio, i)
__free_page(bvec->bv_page);
bio_put(bio);
return ERR_PTR(ret);
}
static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
unsigned long uaddr, unsigned int len, unsigned long uaddr, unsigned int len,
int write_to_vm) int write_to_vm)
...@@ -392,12 +501,13 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, ...@@ -392,12 +501,13 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
* size for now, in the future we can relax this restriction * size for now, in the future we can relax this restriction
*/ */
if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
return NULL; return ERR_PTR(-EINVAL);
bio = bio_alloc(GFP_KERNEL, nr_pages); bio = bio_alloc(GFP_KERNEL, nr_pages);
if (!bio) if (!bio)
return NULL; return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
if (!pages) if (!pages)
goto out; goto out;
...@@ -446,11 +556,12 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, ...@@ -446,11 +556,12 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
if (!write_to_vm) if (!write_to_vm)
bio->bi_rw |= (1 << BIO_RW); bio->bi_rw |= (1 << BIO_RW);
bio->bi_flags |= (1 << BIO_USER_MAPPED);
return bio; return bio;
out: out:
kfree(pages); kfree(pages);
bio_put(bio); bio_put(bio);
return NULL; return ERR_PTR(ret);
} }
/** /**
...@@ -461,7 +572,7 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, ...@@ -461,7 +572,7 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
* @write_to_vm: bool indicating writing to pages or not * @write_to_vm: bool indicating writing to pages or not
* *
* Map the user space address into a bio suitable for io to a block * Map the user space address into a bio suitable for io to a block
* device. * device. Returns an error pointer in case of error.
*/ */
struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
unsigned long uaddr, unsigned int len, int write_to_vm) unsigned long uaddr, unsigned int len, int write_to_vm)
...@@ -470,7 +581,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, ...@@ -470,7 +581,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm);
if (bio) { if (IS_ERR(bio))
return bio;
/* /*
* subtle -- if __bio_map_user() ended up bouncing a bio, * subtle -- if __bio_map_user() ended up bouncing a bio,
* it would normally disappear when its bi_end_io is run. * it would normally disappear when its bi_end_io is run.
...@@ -479,17 +592,18 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, ...@@ -479,17 +592,18 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
*/ */
bio_get(bio); bio_get(bio);
if (bio->bi_size < len) { if (bio->bi_size == len)
bio_endio(bio, bio->bi_size, 0);
bio_unmap_user(bio, 0);
return NULL;
}
}
return bio; return bio;
/*
* don't support partial mappings
*/
bio_endio(bio, bio->bi_size, 0);
bio_unmap_user(bio);
return ERR_PTR(-EINVAL);
} }
static void __bio_unmap_user(struct bio *bio, int write_to_vm) static void __bio_unmap_user(struct bio *bio)
{ {
struct bio_vec *bvec; struct bio_vec *bvec;
int i; int i;
...@@ -510,7 +624,7 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm) ...@@ -510,7 +624,7 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
* make sure we dirty pages we wrote to * make sure we dirty pages we wrote to
*/ */
__bio_for_each_segment(bvec, bio, i, 0) { __bio_for_each_segment(bvec, bio, i, 0) {
if (write_to_vm) if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page); set_page_dirty_lock(bvec->bv_page);
page_cache_release(bvec->bv_page); page_cache_release(bvec->bv_page);
...@@ -522,17 +636,15 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm) ...@@ -522,17 +636,15 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
/** /**
* bio_unmap_user - unmap a bio * bio_unmap_user - unmap a bio
* @bio: the bio being unmapped * @bio: the bio being unmapped
* @write_to_vm: bool indicating whether pages were written to
* *
* Unmap a bio previously mapped by bio_map_user(). The @write_to_vm * Unmap a bio previously mapped by bio_map_user(). Must be called with
* must be the same as passed into bio_map_user(). Must be called with
* a process context. * a process context.
* *
* bio_unmap_user() may sleep. * bio_unmap_user() may sleep.
*/ */
void bio_unmap_user(struct bio *bio, int write_to_vm) void bio_unmap_user(struct bio *bio)
{ {
__bio_unmap_user(bio, write_to_vm); __bio_unmap_user(bio);
bio_put(bio); bio_put(bio);
} }
...@@ -863,3 +975,5 @@ EXPORT_SYMBOL(bio_unmap_user); ...@@ -863,3 +975,5 @@ EXPORT_SYMBOL(bio_unmap_user);
EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_pair_release);
EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split);
EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_split_pool);
EXPORT_SYMBOL(bio_copy_user);
EXPORT_SYMBOL(bio_uncopy_user);
...@@ -120,6 +120,7 @@ struct bio { ...@@ -120,6 +120,7 @@ struct bio {
#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ #define BIO_SEG_VALID 3 /* nr_hw_seg valid */
#define BIO_CLONED 4 /* doesn't own data */ #define BIO_CLONED 4 /* doesn't own data */
#define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_BOUNCED 5 /* bio is a bounce bio */
#define BIO_USER_MAPPED 6 /* contains user pages */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/* /*
...@@ -264,9 +265,11 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); ...@@ -264,9 +265,11 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_get_nr_vecs(struct block_device *); extern int bio_get_nr_vecs(struct block_device *);
extern struct bio *bio_map_user(struct request_queue *, struct block_device *, extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
unsigned long, unsigned int, int); unsigned long, unsigned int, int);
extern void bio_unmap_user(struct bio *, int); extern void bio_unmap_user(struct bio *);
extern void bio_set_pages_dirty(struct bio *bio); extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio);
extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
extern int bio_uncopy_user(struct bio *);
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
/* /*
......
...@@ -524,7 +524,7 @@ extern void __blk_stop_queue(request_queue_t *q); ...@@ -524,7 +524,7 @@ extern void __blk_stop_queue(request_queue_t *q);
extern void blk_run_queue(request_queue_t *); extern void blk_run_queue(request_queue_t *);
extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
extern int blk_rq_unmap_user(struct request *, void __user *, struct bio *, unsigned int); extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int);
extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *);
static inline request_queue_t *bdev_get_queue(struct block_device *bdev) static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment