[PATCH] bio_copy_user() cleanups and fixes

blk_rq_map_user() is a bit of a hack currently, since it drops back to kmalloc() if bio_map_user() fails. This is unfortunate since it means we do no real segment or size checking (and the request segment counts contain crap, already found one bug in a scsi lld). It's also pretty nasty for > PAGE_SIZE requests, as we attempt to do higher order page allocations. Even worse still, ide-cd will drop back to PIO for non-sg/bio requests. All in all, very suboptimal. This patch adds bio_copy_user() which simply sets up a bio with kernel pages and copies data as needed for reads and writes. It also changes bio_map_user() to return an error pointer like bio_copy_user(), so we can return something sane to the user instead of always -ENOMEM. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] bio_copy_user() cleanups and fixes
blk_rq_map_user() is a bit of a hack currently, since it drops back to kmalloc() if bio_map_user() fails. This is unfortunate since it means we do no real segment or size checking (and the request segment counts contain crap, already found one bug in a scsi lld). It's also pretty nasty for > PAGE_SIZE requests, as we attempt to do higher order page allocations. Even worse still, ide-cd will drop back to PIO for non-sg/bio requests. All in all, very suboptimal. This patch adds bio_copy_user() which simply sets up a bio with kernel pages and copies data as needed for reads and writes. It also changes bio_map_user() to return an error pointer like bio_copy_user(), so we can return something sane to the user instead of always -ENOMEM. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1737ddec · Jens Axboe · Linus Torvalds · 8e7e3e3e · 1737ddec · 1737ddec
Commit 1737ddec authored Aug 02, 2004 by Jens Axboe Committed by Linus Torvalds Aug 02, 2004
7 changed files
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1823,50 +1823,43 @@ EXPORT_SYMBOL(blk_insert_request);
 struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf,
 				unsigned int len)
 {
-	struct request *rq = NULL;
+	unsigned long uaddr;
-	char *buf = NULL;
+	struct request *rq;
 	struct bio *bio;
-	int ret;
+	if (len > (q->max_sectors << 9))
+		return ERR_PTR(-EINVAL);
+	if ((!len && ubuf) || (len && !ubuf))
+		return ERR_PTR(-EINVAL);
 	rq = blk_get_request(q, rw, __GFP_WAIT);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
-	bio = bio_map_user(q, NULL, (unsigned long) ubuf, len, rw == READ);
+	/*
-	if (!bio) {
+	 * if alignment requirement is satisfied, map in user pages for
-		int bytes = (len + 511) & ~511;
+	 * direct dma. else, set up kernel bounce buffers
+	 */
-		buf = kmalloc(bytes, q->bounce_gfp | GFP_USER);
+	uaddr = (unsigned long) ubuf;
-		if (!buf) {
+	if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
-			ret = -ENOMEM;
+		bio = bio_map_user(q, NULL, uaddr, len, rw == READ);
-			goto fault;
+	else
-		}
+		bio = bio_copy_user(q, uaddr, len, rw == READ);
-		if (rw == WRITE) {
-			if (copy_from_user(buf, ubuf, len)) {
-				ret = -EFAULT;
-				goto fault;
-			}
-		} else
-			memset(buf, 0, len);
-	}
+	if (!IS_ERR(bio)) {
 		rq->bio = rq->biotail = bio;
-	if (rq->bio)
 		blk_rq_bio_prep(q, rq, bio);
-	rq->buffer = rq->data = buf;
+		rq->buffer = rq->data = NULL;
 		rq->data_len = len;
 		return rq;
-fault:
+	}
-	if (buf)
-		kfree(buf);
-	if (bio)
-		bio_unmap_user(bio, 1);
-	if (rq)
-		blk_put_request(rq);
-	return ERR_PTR(ret);
+	/*
+	 * bio is the err-ptr
+	 */
+	blk_put_request(rq);
+	return (struct request *) bio;
 }
 EXPORT_SYMBOL(blk_rq_map_user);
@@ -1880,18 +1873,15 @@ EXPORT_SYMBOL(blk_rq_map_user);
 * Description:
 *    Unmap a request previously mapped by blk_rq_map_user().
 */
-int blk_rq_unmap_user(struct request *rq, void __user *ubuf, struct bio *bio,
+int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen)
-		      unsigned int ulen)
 {
-	const int read = rq_data_dir(rq) == READ;
 	int ret = 0;
-	if (bio)
+	if (bio) {
-		bio_unmap_user(bio, read);
+		if (bio_flagged(bio, BIO_USER_MAPPED))
-	if (rq->buffer) {
+			bio_unmap_user(bio);
-		if (read && copy_to_user(ubuf, rq->buffer, ulen))
+		else
-			ret = -EFAULT;
+			ret = bio_uncopy_user(bio);
-		kfree(rq->buffer);
 	}
 	blk_put_request(rq);

--- a/drivers/block/scsi_ioctl.c
+++ b/drivers/block/scsi_ioctl.c
@@ -211,7 +211,7 @@ static int sg_io(request_queue_t *q, struct gendisk *bd_disk,
 			hdr->sb_len_wr = len;
 	}
-	if (blk_rq_unmap_user(rq, hdr->dxferp, bio, hdr->dxfer_len))
+	if (blk_rq_unmap_user(rq, bio, hdr->dxfer_len))
 		return -EFAULT;
 	/* may not have succeeded, but output values written to control

--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2015,7 +2015,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			cdi->last_sense = s->sense_key;
 		}
-		if (blk_rq_unmap_user(rq, ubuf, bio, len))
+		if (blk_rq_unmap_user(rq, bio, len))
 			ret = -EFAULT;
 		if (ret)

--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1959,13 +1959,17 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 	 * sg request
 	 */
 	if (rq->bio) {
-		if (rq->data_len & 3) {
+		int mask = drive->queue->dma_alignment;
-			printk("%s: block pc not aligned, len=%d\n", drive->name, rq->data_len);
+		unsigned long addr = (unsigned long) page_address(bio_page(rq->bio));
-			cdrom_end_request(drive, 0);
-			return ide_stopped;
-		}
-		info->dma = drive->using_dma;
 		info->cmd = rq_data_dir(rq);
+		info->dma = drive->using_dma;
+		/*
+		 * check if dma is safe
+		 */
+		if ((rq->data_len & mask) || (addr & mask))
+			info->dma = 0;
 	}
 	/* Start sending the command to the drive. */
@@ -3133,7 +3137,7 @@ int ide_cdrom_setup (ide_drive_t *drive)
 	int nslots;
 	blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn);
-	blk_queue_dma_alignment(drive->queue, 3);
+	blk_queue_dma_alignment(drive->queue, 31);
 	drive->queue->unplug_delay = (1 * HZ) / 1000;
 	if (!drive->queue->unplug_delay)
 		drive->queue->unplug_delay = 1;

--- a/fs/bio.c
+++ b/fs/bio.c
@@ -376,6 +376,115 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 			      len, offset);
 }
+/**
+ *	bio_uncopy_user	-	finish previously mapped bio
+ *	@bio: bio being terminated
+ *
+ *	Free pages allocated from bio_copy_user() and write back data
+ *	to user space in case of a read.
+ */
+int bio_uncopy_user(struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i, ret = 0;
+	if (bio_data_dir(bio) == READ) {
+		char *uaddr = bio->bi_private;
+		__bio_for_each_segment(bvec, bio, i, 0) {
+			char *addr = page_address(bvec->bv_page);
+			if (!ret && copy_to_user(uaddr, addr, bvec->bv_len))
+				ret = -EFAULT;
+			__free_page(bvec->bv_page);
+			uaddr += bvec->bv_len;
+		}
+	}
+	bio_put(bio);
+	return ret;
+}
+/**
+ *	bio_copy_user	-	copy user data to bio
+ *	@q: destination block queue
+ *	@uaddr: start of user address
+ *	@len: length in bytes
+ *	@write_to_vm: bool indicating writing to pages or not
+ *
+ *	Prepares and returns a bio for indirect user io, bouncing data
+ *	to/from kernel pages as necessary. Must be paired with
+ *	call bio_uncopy_user() on io completion.
+ */
+struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr,
+			  unsigned int len, int write_to_vm)
+{
+	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned long start = uaddr >> PAGE_SHIFT;
+	struct bio_vec *bvec;
+	struct page *page;
+	struct bio *bio;
+	int i, ret;
+	bio = bio_alloc(GFP_KERNEL, end - start);
+	if (!bio)
+		return ERR_PTR(-ENOMEM);
+	ret = 0;
+	while (len) {
+		unsigned int bytes = PAGE_SIZE;
+		if (bytes > len)
+			bytes = len;
+		page = alloc_page(q->bounce_gfp | GFP_KERNEL);
+		if (!page) {
+			ret = -ENOMEM;
+			break;
+		}
+		if (__bio_add_page(q, bio, page, bytes, 0) < bytes) {
+			ret = -EINVAL;
+			break;
+		}
+		len -= bytes;
+	}
+	/*
+	 * success
+	 */
+	if (!ret) {
+		if (!write_to_vm) {
+			bio->bi_rw |= (1 << BIO_RW);
+			/*
+	 		 * for a write, copy in data to kernel pages
+			 */
+			ret = -EFAULT;
+			bio_for_each_segment(bvec, bio, i) {
+				char *addr = page_address(bvec->bv_page);
+				if (copy_from_user(addr, (char *) uaddr, bvec->bv_len))
+					goto cleanup;
+			}
+		}
+		bio->bi_private = (void *) uaddr;
+		return bio;
+	}
+	/*
+	 * cleanup
+	 */
+cleanup:
+	bio_for_each_segment(bvec, bio, i)
+		__free_page(bvec->bv_page);
+	bio_put(bio);
+	return ERR_PTR(ret);
+}
 static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 				  unsigned long uaddr, unsigned int len,
 				  int write_to_vm)
@@ -392,12 +501,13 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 	 * size for now, in the future we can relax this restriction
 	 */
 	if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	bio = bio_alloc(GFP_KERNEL, nr_pages);
 	if (!bio)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
+	ret = -ENOMEM;
 	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
 	if (!pages)
 		goto out;
@@ -446,11 +556,12 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 	if (!write_to_vm)
 		bio->bi_rw |= (1 << BIO_RW);
+	bio->bi_flags |= (1 << BIO_USER_MAPPED);
 	return bio;
 out:
 	kfree(pages);
 	bio_put(bio);
-	return NULL;
+	return ERR_PTR(ret);
 }
 /**
@@ -461,7 +572,7 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
 *	@write_to_vm: bool indicating writing to pages or not
 *
 *	Map the user space address into a bio suitable for io to a block
- *	device.
+ *	device. Returns an error pointer in case of error.
 */
 struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
 			 unsigned long uaddr, unsigned int len, int write_to_vm)
@@ -470,7 +581,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
 	bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm);
-	if (bio) {
+	if (IS_ERR(bio))
+		return bio;
 	/*
 	 * subtle -- if __bio_map_user() ended up bouncing a bio,
 	 * it would normally disappear when its bi_end_io is run.
@@ -479,17 +592,18 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
 	 */
 	bio_get(bio);
-		if (bio->bi_size < len) {
+	if (bio->bi_size == len)
-			bio_endio(bio, bio->bi_size, 0);
-			bio_unmap_user(bio, 0);
-			return NULL;
-		}
-	}
 		return bio;
+	/*
+	 * don't support partial mappings
+	 */
+	bio_endio(bio, bio->bi_size, 0);
+	bio_unmap_user(bio);
+	return ERR_PTR(-EINVAL);
 }
-static void __bio_unmap_user(struct bio *bio, int write_to_vm)
+static void __bio_unmap_user(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
@@ -510,7 +624,7 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
 	 * make sure we dirty pages we wrote to
 	 */
 	__bio_for_each_segment(bvec, bio, i, 0) {
-		if (write_to_vm)
+		if (bio_data_dir(bio) == READ)
 			set_page_dirty_lock(bvec->bv_page);
 		page_cache_release(bvec->bv_page);
@@ -522,17 +636,15 @@ static void __bio_unmap_user(struct bio *bio, int write_to_vm)
 /**
 *	bio_unmap_user	-	unmap a bio
 *	@bio:		the bio being unmapped
- *	@write_to_vm:	bool indicating whether pages were written to
 *
- *	Unmap a bio previously mapped by bio_map_user(). The @write_to_vm
+ *	Unmap a bio previously mapped by bio_map_user(). Must be called with
- *	must be the same as passed into bio_map_user(). Must be called with
 *	a process context.
 *
 *	bio_unmap_user() may sleep.
 */
-void bio_unmap_user(struct bio *bio, int write_to_vm)
+void bio_unmap_user(struct bio *bio)
 {
-	__bio_unmap_user(bio, write_to_vm);
+	__bio_unmap_user(bio);
 	bio_put(bio);
 }
@@ -863,3 +975,5 @@ EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
 EXPORT_SYMBOL(bio_split_pool);
+EXPORT_SYMBOL(bio_copy_user);
+EXPORT_SYMBOL(bio_uncopy_user);
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -120,6 +120,7 @@ struct bio {
 #define BIO_SEG_VALID	3	/* nr_hw_seg valid */
 #define BIO_CLONED	4	/* doesn't own data */
 #define BIO_BOUNCED	5	/* bio is a bounce bio */
+#define BIO_USER_MAPPED 6	/* contains user pages */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 /*
@@ -264,9 +265,11 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
 				unsigned long, unsigned int, int);
-extern void bio_unmap_user(struct bio *, int);
+extern void bio_unmap_user(struct bio *);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
+extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
+extern int bio_uncopy_user(struct bio *);
 #ifdef CONFIG_HIGHMEM
 /*

--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -524,7 +524,7 @@ extern void __blk_stop_queue(request_queue_t *q);
 extern void blk_run_queue(request_queue_t *);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
 extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
-extern int blk_rq_unmap_user(struct request *, void __user *, struct bio *, unsigned int);
+extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int);
 extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *);
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)