Commit 275da6a3 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] LOOP_CHANGE_FD ioctl

From: Arjan van de Ven <arjanv@redhat.com>

The patch below (written by Al Viro) solves a nasty chicken-and-egg issue
for operating system installers (well at least anaconda but the problem
domain is not exclusive to that)

The basic problem is this:

- The small first stage installer locates the image file of the second
  stage installer (which has X and all the graphical stuff); this image can
  be on the same CD, but it can come via NFS, http or ftp or ...  as well.

- The first stage installer loop-back mounts this image and gives control
  to the second stage installer by calling some binary there.

- The graphical installer then asks the user all those questions and
  starts installing packages.  Again the packages can come from the CD but
  also from NFS or http or ...

Now in case of a CD install, once all requested packages from the first CD
are installed, the installer wants to unmount and eject the CD and prompt
the user to put CD 2 in.......  EXCEPT that the unmount can't work since
the installer is actually running from a loopback mount of this cd.

The solution is a "LOOP_CHANGE_FD" ioctl, where basically the installer
copies the image to the harddisk (which can only be done late since only
late the target harddisk is mkfs'd) and then magically switches the backing
store FD from underneath the loop device to the one on the target harddisk
(and thus unbusying the CD mount).

This is obviously only allowed if the size of the new image is identical
and if the loop image is read-only in the first place.  It's the
responsibility of root to make sure the contents is the same (but that's of
the give-root-enough-rope kind)
parent 0c8846a5
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/buffer_head.h> /* for invalidate_bdev() */ #include <linux/buffer_head.h> /* for invalidate_bdev() */
#include <linux/completion.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -148,14 +149,12 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { ...@@ -148,14 +149,12 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
&xor_funcs &xor_funcs
}; };
static int static loff_t get_loop_size(struct loop_device *lo, struct file *file)
figure_loop_size(struct loop_device *lo)
{ {
loff_t size, offset, loopsize; loff_t size, offset, loopsize;
sector_t x;
/* Compute loopsize in bytes */ /* Compute loopsize in bytes */
size = i_size_read(lo->lo_backing_file->f_mapping->host); size = i_size_read(file->f_mapping->host);
offset = lo->lo_offset; offset = lo->lo_offset;
loopsize = size - offset; loopsize = size - offset;
if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
...@@ -165,8 +164,14 @@ figure_loop_size(struct loop_device *lo) ...@@ -165,8 +164,14 @@ figure_loop_size(struct loop_device *lo)
* Unfortunately, if we want to do I/O on the device, * Unfortunately, if we want to do I/O on the device,
* the number of 512-byte sectors has to fit into a sector_t. * the number of 512-byte sectors has to fit into a sector_t.
*/ */
size = loopsize >> 9; return loopsize >> 9;
x = (sector_t)size; }
static int
figure_loop_size(struct loop_device *lo)
{
loff_t size = get_loop_size(lo, lo->lo_backing_file);
sector_t x = (sector_t)size;
if ((loff_t)x != size) if ((loff_t)x != size)
return -EFBIG; return -EFBIG;
...@@ -429,12 +434,24 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) ...@@ -429,12 +434,24 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
goto out; goto out;
} }
struct switch_request {
struct file *file;
struct completion wait;
};
static void do_loop_switch(struct loop_device *, struct switch_request *);
static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
{ {
int ret; int ret;
if (unlikely(!bio->bi_bdev)) {
do_loop_switch(lo, bio->bi_private);
bio_put(bio);
} else {
ret = do_bio_filebacked(lo, bio); ret = do_bio_filebacked(lo, bio);
bio_endio(bio, bio->bi_size, ret); bio_endio(bio, bio->bi_size, ret);
}
} }
/* /*
...@@ -495,6 +512,103 @@ static int loop_thread(void *data) ...@@ -495,6 +512,103 @@ static int loop_thread(void *data)
return 0; return 0;
} }
/*
* loop_switch performs the hard work of switching a backing store.
* First it needs to flush existing IO, it does this by sending a magic
* BIO down the pipe. The completion of this BIO does the actual switch.
*/
static int loop_switch(struct loop_device *lo, struct file *file)
{
struct switch_request w;
struct bio *bio = bio_alloc(GFP_KERNEL, 1);
if (!bio)
return -ENOMEM;
init_completion(&w.wait);
w.file = file;
bio->bi_private = &w;
bio->bi_bdev = NULL;
loop_make_request(lo->lo_queue, bio);
wait_for_completion(&w.wait);
return 0;
}
/*
* Do the actual switch; called from the BIO completion routine
*/
static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
{
struct file *file = p->file;
struct file *old_file = lo->lo_backing_file;
struct address_space *mapping = file->f_mapping;
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
lo->lo_blocksize = mapping->host->i_blksize;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
complete(&p->wait);
}
/*
* loop_change_fd switched the backing store of a loopback device to
* a new file. This is useful for operating system installers to free up
* the original file and in High Availability environments to switch to
* an alternative location for the content in case of server meltdown.
* This can only work if the loop device is used read-only, and if the
* new backing store is the same size and type as the old backing store.
*/
static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
struct block_device *bdev, unsigned int arg)
{
struct file *file, *old_file;
struct inode *inode;
int error;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
goto out;
/* the loop device has to be read-only */
error = -EINVAL;
if (lo->lo_flags != LO_FLAGS_READ_ONLY)
goto out;
error = -EBADF;
file = fget(arg);
if (!file)
goto out;
inode = file->f_mapping->host;
old_file = lo->lo_backing_file;
error = -EINVAL;
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
goto out_putf;
/* new backing store needs to support loop (eg sendfile) */
if (!inode->i_fop->sendfile)
goto out_putf;
/* size of the new backing store needs to be the same */
if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
goto out_putf;
/* and ... switch */
error = loop_switch(lo, file);
if (error)
goto out_putf;
fput(old_file);
return 0;
out_putf:
fput(file);
out:
return error;
}
static int loop_set_fd(struct loop_device *lo, struct file *lo_file, static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
struct block_device *bdev, unsigned int arg) struct block_device *bdev, unsigned int arg)
{ {
...@@ -505,6 +619,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, ...@@ -505,6 +619,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
unsigned lo_blocksize; unsigned lo_blocksize;
int lo_flags = 0; int lo_flags = 0;
int error; int error;
loff_t size;
/* This is safe, since we have a reference from open(). */ /* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
...@@ -543,6 +658,13 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, ...@@ -543,6 +658,13 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
goto out_putf; goto out_putf;
} }
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size) {
error = -EFBIG;
goto out_putf;
}
if (!(lo_file->f_mode & FMODE_WRITE)) if (!(lo_file->f_mode & FMODE_WRITE))
lo_flags |= LO_FLAGS_READ_ONLY; lo_flags |= LO_FLAGS_READ_ONLY;
...@@ -555,10 +677,6 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, ...@@ -555,10 +677,6 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
lo->transfer = NULL; lo->transfer = NULL;
lo->ioctl = NULL; lo->ioctl = NULL;
lo->lo_sizelimit = 0; lo->lo_sizelimit = 0;
if (figure_loop_size(lo)) {
error = -EFBIG;
goto out_putf;
}
lo->old_gfp_mask = mapping_gfp_mask(mapping); lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
...@@ -571,6 +689,8 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, ...@@ -571,6 +689,8 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
blk_queue_make_request(lo->lo_queue, loop_make_request); blk_queue_make_request(lo->lo_queue, loop_make_request);
lo->lo_queue->queuedata = lo; lo->lo_queue->queuedata = lo;
set_capacity(disks[lo->lo_number], size);
set_blocksize(bdev, lo_blocksize); set_blocksize(bdev, lo_blocksize);
kernel_thread(loop_thread, lo, CLONE_KERNEL); kernel_thread(loop_thread, lo, CLONE_KERNEL);
...@@ -881,6 +1001,9 @@ static int lo_ioctl(struct inode * inode, struct file * file, ...@@ -881,6 +1001,9 @@ static int lo_ioctl(struct inode * inode, struct file * file,
case LOOP_SET_FD: case LOOP_SET_FD:
err = loop_set_fd(lo, file, inode->i_bdev, arg); err = loop_set_fd(lo, file, inode->i_bdev, arg);
break; break;
case LOOP_CHANGE_FD:
err = loop_change_fd(lo, file, inode->i_bdev, arg);
break;
case LOOP_CLR_FD: case LOOP_CLR_FD:
err = loop_clr_fd(lo, inode->i_bdev); err = loop_clr_fd(lo, inode->i_bdev);
break; break;
......
...@@ -153,5 +153,6 @@ int loop_unregister_transfer(int number); ...@@ -153,5 +153,6 @@ int loop_unregister_transfer(int number);
#define LOOP_GET_STATUS 0x4C03 #define LOOP_GET_STATUS 0x4C03
#define LOOP_SET_STATUS64 0x4C04 #define LOOP_SET_STATUS64 0x4C04
#define LOOP_GET_STATUS64 0x4C05 #define LOOP_GET_STATUS64 0x4C05
#define LOOP_CHANGE_FD 0x4C06
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment