Commit 06886a5a authored by Boaz Harrosh's avatar Boaz Harrosh

exofs: Move all operations to an io_engine

In anticipation for multi-device operations, we separate osd operations
into an abstract I/O API. Currently only one device is used but later
when adding more devices, we will drive all devices in parallel according
to a "data_map" that describes how data is arranged on multiple devices.
The file system level operates, like before, as if there is one object
(inode-number) and an i_size. The io engine will split this to the same
object-number but on multiple device.

At first we introduce Mirror (raid 1) layout. But at the final outcome
we intend to fully implement the pNFS-Objects data-map, including
raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12

* Define an io_state based API for accessing osd storage devices
  in an abstract way.
  Usage:
	First a caller allocates an io state with:
		exofs_get_io_state(struct exofs_sb_info *sbi,
				   struct exofs_io_state** ios);

	Then calles one of:
		exofs_sbi_create(struct exofs_io_state *ios);
		exofs_sbi_remove(struct exofs_io_state *ios);
		exofs_sbi_write(struct exofs_io_state *ios);
		exofs_sbi_read(struct exofs_io_state *ios);
		exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);

	And when done
		exofs_put_io_state(struct exofs_io_state *ios);

* Convert all source files to use this new API
* Convert from bio_alloc to bio_kmalloc
* In io engine we make use of the now fixed osd_req_decode_sense

There are no functional changes or on disk additions after this patch.
Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent 8ce9bdd1
......@@ -155,22 +155,4 @@ enum {
(((name_len) + offsetof(struct exofs_dir_entry, name) + \
EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
/*************************
* function declarations *
*************************/
/* osd.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
static inline int exofs_check_ok(struct osd_request *or)
{
return exofs_check_ok_resid(or, NULL, NULL);
}
int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
int exofs_async_op(struct osd_request *or,
osd_req_done_fn *async_done, void *caller_context, u8 *cred);
int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
#endif /*ifndef __EXOFS_COM_H__*/
......@@ -30,14 +30,13 @@
* along with exofs; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __EXOFS_H__
#define __EXOFS_H__
#include <linux/fs.h>
#include <linux/time.h>
#include "common.h"
#ifndef __EXOFS_H__
#define __EXOFS_H__
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
#ifdef CONFIG_EXOFS_DEBUG
......@@ -56,6 +55,7 @@
*/
struct exofs_sb_info {
struct osd_dev *s_dev; /* returned by get_osd_dev */
struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
osd_id s_pid; /* partition ID of file system*/
int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */
......@@ -79,6 +79,50 @@ struct exofs_i_info {
struct inode vfs_inode; /* normal in-memory inode */
};
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
{
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
}
struct exofs_io_state;
typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
struct exofs_io_state {
struct kref kref;
void *private;
exofs_io_done_fn done;
struct exofs_sb_info *sbi;
struct osd_obj_id obj;
u8 *cred;
/* Global read/write IO*/
loff_t offset;
unsigned long length;
void *kern_buff;
struct bio *bio;
/* Attributes */
unsigned in_attr_len;
struct osd_attr *in_attr;
unsigned out_attr_len;
struct osd_attr *out_attr;
/* Variable array of size numdevs */
unsigned numdevs;
struct exofs_per_dev_state {
struct osd_request *or;
struct bio *bio;
} per_dev[];
};
static inline unsigned exofs_io_state_size(unsigned numdevs)
{
return sizeof(struct exofs_io_state) +
sizeof(struct exofs_per_dev_state) * numdevs;
}
/*
* our inode flags
*/
......@@ -130,6 +174,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
/*************************
* function declarations *
*************************/
/* ios.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length);
int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
void exofs_put_io_state(struct exofs_io_state *ios);
int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
int exofs_sbi_create(struct exofs_io_state *ios);
int exofs_sbi_remove(struct exofs_io_state *ios);
int exofs_sbi_write(struct exofs_io_state *ios);
int exofs_sbi_read(struct exofs_io_state *ios);
int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
static inline int exofs_oi_write(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_write(ios);
}
static inline int exofs_oi_read(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_read(ios);
}
/* inode.c */
void exofs_truncate(struct inode *inode);
int exofs_setattr(struct dentry *, struct iattr *);
......@@ -169,6 +249,7 @@ extern const struct file_operations exofs_file_operations;
/* inode.c */
extern const struct address_space_operations exofs_aops;
extern const struct osd_attr g_attr_logical_length;
/* namei.c */
extern const struct inode_operations exofs_dir_inode_operations;
......
This diff is collapsed.
This diff is collapsed.
......@@ -203,49 +203,40 @@ int exofs_sync_fs(struct super_block *sb, int wait)
{
struct exofs_sb_info *sbi;
struct exofs_fscb *fscb;
struct osd_request *or;
struct osd_obj_id obj;
struct exofs_io_state *ios;
int ret = -ENOMEM;
fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
if (!fscb) {
EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
return -ENOMEM;
}
lock_super(sb);
sbi = sb->s_fs_info;
fscb = &sbi->s_fscb;
ret = exofs_get_io_state(sbi, &ios);
if (ret)
goto out;
ios->length = sizeof(*fscb);
memset(fscb, 0, ios->length);
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
fscb->s_magic = cpu_to_le16(sb->s_magic);
fscb->s_newfs = 0;
or = osd_start_request(sbi->s_dev, GFP_KERNEL);
if (unlikely(!or)) {
EXOFS_ERR("exofs_write_super: osd_start_request failed.\n");
goto out;
}
obj.partition = sbi->s_pid;
obj.id = EXOFS_SUPER_ID;
ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
if (unlikely(ret)) {
EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n");
goto out;
}
ios->obj.id = EXOFS_SUPER_ID;
ios->offset = 0;
ios->kern_buff = fscb;
ios->cred = sbi->s_cred;
ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
ret = exofs_sbi_write(ios);
if (unlikely(ret)) {
EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
goto out;
}
sb->s_dirt = 0;
out:
if (or)
osd_end_request(or);
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
exofs_put_io_state(ios);
unlock_super(sb);
kfree(fscb);
return ret;
}
......@@ -302,24 +293,23 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root;
struct exofs_mountopt *opts = data;
struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */
struct osd_request *or = NULL;
struct osd_obj_id obj;
int ret;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
/* use mount options to fill superblock */
sbi->s_dev = osduld_path_lookup(opts->dev_name);
if (IS_ERR(sbi->s_dev)) {
ret = PTR_ERR(sbi->s_dev);
sbi->s_dev = NULL;
od = osduld_path_lookup(opts->dev_name);
if (IS_ERR(od)) {
ret = PTR_ERR(od);
goto free_sbi;
}
sbi->s_dev = od;
sbi->s_pid = opts->pid;
sbi->s_timeout = opts->timeout;
......@@ -333,35 +323,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_bdev = NULL;
sb->s_dev = 0;
/* read data from on-disk superblock object */
obj.partition = sbi->s_pid;
obj.id = EXOFS_SUPER_ID;
exofs_make_credential(sbi->s_cred, &obj);
or = osd_start_request(sbi->s_dev, GFP_KERNEL);
if (unlikely(!or)) {
if (!silent)
EXOFS_ERR(
"exofs_fill_super: osd_start_request failed.\n");
ret = -ENOMEM;
goto free_sbi;
}
ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret)) {
if (!silent)
EXOFS_ERR(
"exofs_fill_super: osd_req_read_kern failed.\n");
ret = -ENOMEM;
goto free_sbi;
}
ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
if (unlikely(ret)) {
if (!silent)
EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
ret = -EIO;
ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret))
goto free_sbi;
}
sb->s_magic = le16_to_cpu(fscb.s_magic);
sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
......@@ -380,6 +348,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
spin_lock_init(&sbi->s_next_gen_lock);
/* set up operation vectors */
sb->s_fs_info = sbi;
sb->s_op = &exofs_sops;
sb->s_export_op = &exofs_export_ops;
root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
......@@ -406,16 +375,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
}
_exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid);
ret = 0;
out:
if (or)
osd_end_request(or);
return ret;
return 0;
free_sbi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
opts->dev_name, sbi->s_pid, ret);
osduld_put_device(sbi->s_dev); /* NULL safe */
kfree(sbi);
goto out;
return ret;
}
/*
......@@ -444,7 +411,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
struct osd_obj_id obj = {sbi->s_pid, 0};
struct exofs_io_state *ios;
struct osd_attr attrs[] = {
ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
......@@ -453,26 +420,25 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
};
uint64_t capacity = ULLONG_MAX;
uint64_t used = ULLONG_MAX;
struct osd_request *or;
uint8_t cred_a[OSD_CAP_LEN];
int ret;
/* get used/capacity attributes */
exofs_make_credential(cred_a, &obj);
or = osd_start_request(sbi->s_dev, GFP_KERNEL);
if (unlikely(!or)) {
EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
return -ENOMEM;
ret = exofs_get_io_state(sbi, &ios);
if (ret) {
EXOFS_DBGMSG("exofs_get_io_state failed.\n");
return ret;
}
osd_req_get_attributes(or, &obj);
osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs));
ret = exofs_sync_op(or, sbi->s_timeout, cred_a);
exofs_make_credential(cred_a, &ios->obj);
ios->cred = sbi->s_cred;
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_read(ios);
if (unlikely(ret))
goto out;
ret = extract_attr_from_req(or, &attrs[0]);
ret = extract_attr_from_ios(ios, &attrs[0]);
if (likely(!ret)) {
capacity = get_unaligned_be64(attrs[0].val_ptr);
if (unlikely(!capacity))
......@@ -480,7 +446,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
} else
EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
ret = extract_attr_from_req(or, &attrs[1]);
ret = extract_attr_from_ios(ios, &attrs[1]);
if (likely(!ret))
used = get_unaligned_be64(attrs[1].val_ptr);
else
......@@ -497,7 +463,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXOFS_NAME_LEN;
out:
osd_end_request(or);
exofs_put_io_state(ios);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment