Commit 06886a5a authored by Boaz Harrosh's avatar Boaz Harrosh

exofs: Move all operations to an io_engine

In anticipation for multi-device operations, we separate osd operations
into an abstract I/O API. Currently only one device is used but later
when adding more devices, we will drive all devices in parallel according
to a "data_map" that describes how data is arranged on multiple devices.
The file system level operates, like before, as if there is one object
(inode-number) and an i_size. The io engine will split this to the same
object-number but on multiple device.

At first we introduce Mirror (raid 1) layout. But at the final outcome
we intend to fully implement the pNFS-Objects data-map, including
raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12

* Define an io_state based API for accessing osd storage devices
  in an abstract way.
  Usage:
	First a caller allocates an io state with:
		exofs_get_io_state(struct exofs_sb_info *sbi,
				   struct exofs_io_state** ios);

	Then calles one of:
		exofs_sbi_create(struct exofs_io_state *ios);
		exofs_sbi_remove(struct exofs_io_state *ios);
		exofs_sbi_write(struct exofs_io_state *ios);
		exofs_sbi_read(struct exofs_io_state *ios);
		exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);

	And when done
		exofs_put_io_state(struct exofs_io_state *ios);

* Convert all source files to use this new API
* Convert from bio_alloc to bio_kmalloc
* In io engine we make use of the now fixed osd_req_decode_sense

There are no functional changes or on disk additions after this patch.
Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent 8ce9bdd1
...@@ -155,22 +155,4 @@ enum { ...@@ -155,22 +155,4 @@ enum {
(((name_len) + offsetof(struct exofs_dir_entry, name) + \ (((name_len) + offsetof(struct exofs_dir_entry, name) + \
EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
/*************************
* function declarations *
*************************/
/* osd.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
static inline int exofs_check_ok(struct osd_request *or)
{
return exofs_check_ok_resid(or, NULL, NULL);
}
int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
int exofs_async_op(struct osd_request *or,
osd_req_done_fn *async_done, void *caller_context, u8 *cred);
int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
#endif /*ifndef __EXOFS_COM_H__*/ #endif /*ifndef __EXOFS_COM_H__*/
...@@ -30,14 +30,13 @@ ...@@ -30,14 +30,13 @@
* along with exofs; if not, write to the Free Software * along with exofs; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#ifndef __EXOFS_H__
#define __EXOFS_H__
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/time.h> #include <linux/time.h>
#include "common.h" #include "common.h"
#ifndef __EXOFS_H__
#define __EXOFS_H__
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
#ifdef CONFIG_EXOFS_DEBUG #ifdef CONFIG_EXOFS_DEBUG
...@@ -56,6 +55,7 @@ ...@@ -56,6 +55,7 @@
*/ */
struct exofs_sb_info { struct exofs_sb_info {
struct osd_dev *s_dev; /* returned by get_osd_dev */ struct osd_dev *s_dev; /* returned by get_osd_dev */
struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
osd_id s_pid; /* partition ID of file system*/ osd_id s_pid; /* partition ID of file system*/
int s_timeout; /* timeout for OSD operations */ int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */ uint64_t s_nextid; /* highest object ID used */
...@@ -79,6 +79,50 @@ struct exofs_i_info { ...@@ -79,6 +79,50 @@ struct exofs_i_info {
struct inode vfs_inode; /* normal in-memory inode */ struct inode vfs_inode; /* normal in-memory inode */
}; };
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
{
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
}
struct exofs_io_state;
typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
struct exofs_io_state {
struct kref kref;
void *private;
exofs_io_done_fn done;
struct exofs_sb_info *sbi;
struct osd_obj_id obj;
u8 *cred;
/* Global read/write IO*/
loff_t offset;
unsigned long length;
void *kern_buff;
struct bio *bio;
/* Attributes */
unsigned in_attr_len;
struct osd_attr *in_attr;
unsigned out_attr_len;
struct osd_attr *out_attr;
/* Variable array of size numdevs */
unsigned numdevs;
struct exofs_per_dev_state {
struct osd_request *or;
struct bio *bio;
} per_dev[];
};
static inline unsigned exofs_io_state_size(unsigned numdevs)
{
return sizeof(struct exofs_io_state) +
sizeof(struct exofs_per_dev_state) * numdevs;
}
/* /*
* our inode flags * our inode flags
*/ */
...@@ -130,6 +174,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) ...@@ -130,6 +174,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
/************************* /*************************
* function declarations * * function declarations *
*************************/ *************************/
/* ios.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length);
int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
void exofs_put_io_state(struct exofs_io_state *ios);
int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
int exofs_sbi_create(struct exofs_io_state *ios);
int exofs_sbi_remove(struct exofs_io_state *ios);
int exofs_sbi_write(struct exofs_io_state *ios);
int exofs_sbi_read(struct exofs_io_state *ios);
int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
static inline int exofs_oi_write(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_write(ios);
}
static inline int exofs_oi_read(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_read(ios);
}
/* inode.c */ /* inode.c */
void exofs_truncate(struct inode *inode); void exofs_truncate(struct inode *inode);
int exofs_setattr(struct dentry *, struct iattr *); int exofs_setattr(struct dentry *, struct iattr *);
...@@ -169,6 +249,7 @@ extern const struct file_operations exofs_file_operations; ...@@ -169,6 +249,7 @@ extern const struct file_operations exofs_file_operations;
/* inode.c */ /* inode.c */
extern const struct address_space_operations exofs_aops; extern const struct address_space_operations exofs_aops;
extern const struct osd_attr g_attr_logical_length;
/* namei.c */ /* namei.c */
extern const struct inode_operations exofs_dir_inode_operations; extern const struct inode_operations exofs_dir_inode_operations;
......
This diff is collapsed.
This diff is collapsed.
...@@ -203,49 +203,40 @@ int exofs_sync_fs(struct super_block *sb, int wait) ...@@ -203,49 +203,40 @@ int exofs_sync_fs(struct super_block *sb, int wait)
{ {
struct exofs_sb_info *sbi; struct exofs_sb_info *sbi;
struct exofs_fscb *fscb; struct exofs_fscb *fscb;
struct osd_request *or; struct exofs_io_state *ios;
struct osd_obj_id obj;
int ret = -ENOMEM; int ret = -ENOMEM;
fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
if (!fscb) {
EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
return -ENOMEM;
}
lock_super(sb); lock_super(sb);
sbi = sb->s_fs_info; sbi = sb->s_fs_info;
fscb = &sbi->s_fscb;
ret = exofs_get_io_state(sbi, &ios);
if (ret)
goto out;
ios->length = sizeof(*fscb);
memset(fscb, 0, ios->length);
fscb->s_nextid = cpu_to_le64(sbi->s_nextid); fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
fscb->s_magic = cpu_to_le16(sb->s_magic); fscb->s_magic = cpu_to_le16(sb->s_magic);
fscb->s_newfs = 0; fscb->s_newfs = 0;
or = osd_start_request(sbi->s_dev, GFP_KERNEL); ios->obj.id = EXOFS_SUPER_ID;
if (unlikely(!or)) { ios->offset = 0;
EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); ios->kern_buff = fscb;
goto out; ios->cred = sbi->s_cred;
}
obj.partition = sbi->s_pid; ret = exofs_sbi_write(ios);
obj.id = EXOFS_SUPER_ID;
ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
if (unlikely(ret)) {
EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n");
goto out;
}
ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n"); EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
goto out; goto out;
} }
sb->s_dirt = 0; sb->s_dirt = 0;
out: out:
if (or) EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
osd_end_request(or); exofs_put_io_state(ios);
unlock_super(sb); unlock_super(sb);
kfree(fscb);
return ret; return ret;
} }
...@@ -302,24 +293,23 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -302,24 +293,23 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root; struct inode *root;
struct exofs_mountopt *opts = data; struct exofs_mountopt *opts = data;
struct exofs_sb_info *sbi; /*extended info */ struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */ struct exofs_fscb fscb; /*on-disk superblock info */
struct osd_request *or = NULL;
struct osd_obj_id obj; struct osd_obj_id obj;
int ret; int ret;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) if (!sbi)
return -ENOMEM; return -ENOMEM;
sb->s_fs_info = sbi;
/* use mount options to fill superblock */ /* use mount options to fill superblock */
sbi->s_dev = osduld_path_lookup(opts->dev_name); od = osduld_path_lookup(opts->dev_name);
if (IS_ERR(sbi->s_dev)) { if (IS_ERR(od)) {
ret = PTR_ERR(sbi->s_dev); ret = PTR_ERR(od);
sbi->s_dev = NULL;
goto free_sbi; goto free_sbi;
} }
sbi->s_dev = od;
sbi->s_pid = opts->pid; sbi->s_pid = opts->pid;
sbi->s_timeout = opts->timeout; sbi->s_timeout = opts->timeout;
...@@ -333,35 +323,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -333,35 +323,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_bdev = NULL; sb->s_bdev = NULL;
sb->s_dev = 0; sb->s_dev = 0;
/* read data from on-disk superblock object */
obj.partition = sbi->s_pid; obj.partition = sbi->s_pid;
obj.id = EXOFS_SUPER_ID; obj.id = EXOFS_SUPER_ID;
exofs_make_credential(sbi->s_cred, &obj); exofs_make_credential(sbi->s_cred, &obj);
or = osd_start_request(sbi->s_dev, GFP_KERNEL); ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
if (unlikely(!or)) { if (unlikely(ret))
if (!silent)
EXOFS_ERR(
"exofs_fill_super: osd_start_request failed.\n");
ret = -ENOMEM;
goto free_sbi;
}
ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret)) {
if (!silent)
EXOFS_ERR(
"exofs_fill_super: osd_req_read_kern failed.\n");
ret = -ENOMEM;
goto free_sbi;
}
ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
if (unlikely(ret)) {
if (!silent)
EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
ret = -EIO;
goto free_sbi; goto free_sbi;
}
sb->s_magic = le16_to_cpu(fscb.s_magic); sb->s_magic = le16_to_cpu(fscb.s_magic);
sbi->s_nextid = le64_to_cpu(fscb.s_nextid); sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
...@@ -380,6 +348,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -380,6 +348,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
spin_lock_init(&sbi->s_next_gen_lock); spin_lock_init(&sbi->s_next_gen_lock);
/* set up operation vectors */ /* set up operation vectors */
sb->s_fs_info = sbi;
sb->s_op = &exofs_sops; sb->s_op = &exofs_sops;
sb->s_export_op = &exofs_export_ops; sb->s_export_op = &exofs_export_ops;
root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
...@@ -406,16 +375,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -406,16 +375,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
} }
_exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid); _exofs_print_device("Mounting", opts->dev_name, sbi->s_dev, sbi->s_pid);
ret = 0; return 0;
out:
if (or)
osd_end_request(or);
return ret;
free_sbi: free_sbi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
opts->dev_name, sbi->s_pid, ret);
osduld_put_device(sbi->s_dev); /* NULL safe */ osduld_put_device(sbi->s_dev); /* NULL safe */
kfree(sbi); kfree(sbi);
goto out; return ret;
} }
/* /*
...@@ -444,7 +411,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -444,7 +411,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{ {
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct exofs_sb_info *sbi = sb->s_fs_info; struct exofs_sb_info *sbi = sb->s_fs_info;
struct osd_obj_id obj = {sbi->s_pid, 0}; struct exofs_io_state *ios;
struct osd_attr attrs[] = { struct osd_attr attrs[] = {
ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
...@@ -453,26 +420,25 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -453,26 +420,25 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
}; };
uint64_t capacity = ULLONG_MAX; uint64_t capacity = ULLONG_MAX;
uint64_t used = ULLONG_MAX; uint64_t used = ULLONG_MAX;
struct osd_request *or;
uint8_t cred_a[OSD_CAP_LEN]; uint8_t cred_a[OSD_CAP_LEN];
int ret; int ret;
/* get used/capacity attributes */ ret = exofs_get_io_state(sbi, &ios);
exofs_make_credential(cred_a, &obj); if (ret) {
EXOFS_DBGMSG("exofs_get_io_state failed.\n");
or = osd_start_request(sbi->s_dev, GFP_KERNEL); return ret;
if (unlikely(!or)) {
EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
return -ENOMEM;
} }
osd_req_get_attributes(or, &obj); exofs_make_credential(cred_a, &ios->obj);
osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); ios->cred = sbi->s_cred;
ret = exofs_sync_op(or, sbi->s_timeout, cred_a); ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_read(ios);
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
ret = extract_attr_from_req(or, &attrs[0]); ret = extract_attr_from_ios(ios, &attrs[0]);
if (likely(!ret)) { if (likely(!ret)) {
capacity = get_unaligned_be64(attrs[0].val_ptr); capacity = get_unaligned_be64(attrs[0].val_ptr);
if (unlikely(!capacity)) if (unlikely(!capacity))
...@@ -480,7 +446,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -480,7 +446,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
} else } else
EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
ret = extract_attr_from_req(or, &attrs[1]); ret = extract_attr_from_ios(ios, &attrs[1]);
if (likely(!ret)) if (likely(!ret))
used = get_unaligned_be64(attrs[1].val_ptr); used = get_unaligned_be64(attrs[1].val_ptr);
else else
...@@ -497,7 +463,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -497,7 +463,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXOFS_NAME_LEN; buf->f_namelen = EXOFS_NAME_LEN;
out: out:
osd_end_request(or); exofs_put_io_state(ios);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment