Commit d03480a1 authored by Linus Torvalds's avatar Linus Torvalds

Remove intermezzo, per instructions from Peter Braam.

parent b188bcda
#
# Makefile 1.00 Peter Braam <braam@clusterfs.com>
#
obj-$(CONFIG_INTERMEZZO_FS) += intermezzo.o
intermezzo-objs := cache.o dcache.o dir.o ext_attr.o file.o fileset.o \
inode.o journal.o journal_ext2.o journal_ext3.o \
journal_obdfs.o journal_reiserfs.o journal_tmpfs.o journal_xfs.o \
kml_reint.o kml_unpack.o methods.o presto.o psdev.o replicator.o \
super.o sysctl.o upcall.o vfs.o
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/module.h>
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
/*
This file contains the routines associated with managing a
cache of files for InterMezzo. These caches have two reqs:
- need to be found fast so they are hashed by the device,
with an attempt to have collision chains of length 1.
The methods for the cache are set up in methods.
*/
extern kmem_cache_t * presto_dentry_slab;
/* the intent of this hash is to have collision chains of length 1 */
#define CACHES_BITS 8
#define CACHES_SIZE (1 << CACHES_BITS)
#define CACHES_MASK CACHES_SIZE - 1
static struct list_head presto_caches[CACHES_SIZE];
static inline int presto_cache_hash(struct super_block *s)
{
return (CACHES_MASK) & ((unsigned long)s >> L1_CACHE_SHIFT);
}
inline void presto_cache_add(struct presto_cache *cache)
{
list_add(&cache->cache_chain,
&presto_caches[presto_cache_hash(cache->cache_sb)]);
}
inline void presto_cache_init_hash(void)
{
int i;
for ( i = 0; i < CACHES_SIZE; i++ ) {
INIT_LIST_HEAD(&presto_caches[i]);
}
}
int izo_ioctl_packlen(struct izo_ioctl_data *data)
{
int len = sizeof(struct izo_ioctl_data);
len += size_round(data->ioc_inllen1);
len += size_round(data->ioc_inllen2);
return len;
}
/* map a device to a cache */
struct presto_cache *presto_cache_find(struct super_block *s)
{
struct presto_cache *cache;
struct list_head *lh, *tmp;
lh = tmp = &(presto_caches[presto_cache_hash(s)]);
while ( (tmp = lh->next) != lh ) {
cache = list_entry(tmp, struct presto_cache, cache_chain);
if (cache->cache_sb == s)
return cache;
}
return NULL;
}
/* map an inode to a cache */
struct presto_cache *presto_get_cache(struct inode *inode)
{
struct presto_cache *cache;
ENTRY;
/* find the correct presto_cache here, based on the device */
cache = presto_cache_find(inode->i_sb);
if ( !cache ) {
CERROR("WARNING: no presto cache for %s, ino %ld\n",
inode->i_sb->s_id, inode->i_ino);
EXIT;
return NULL;
}
EXIT;
return cache;
}
/* another debugging routine: check fs is InterMezzo fs */
int presto_ispresto(struct inode *inode)
{
struct presto_cache *cache;
if ( !inode )
return 0;
cache = presto_get_cache(inode);
if ( !cache )
return 0;
return inode->i_sb == cache->cache_sb;
}
/* setup a cache structure when we need one */
struct presto_cache *presto_cache_init(void)
{
struct presto_cache *cache;
PRESTO_ALLOC(cache, sizeof(struct presto_cache));
if ( cache ) {
memset(cache, 0, sizeof(struct presto_cache));
INIT_LIST_HEAD(&cache->cache_chain);
INIT_LIST_HEAD(&cache->cache_fset_list);
cache->cache_lock = SPIN_LOCK_UNLOCKED;
cache->cache_reserved = 0;
}
return cache;
}
/* free a cache structure and all of the memory it is pointing to */
inline void presto_free_cache(struct presto_cache *cache)
{
if (!cache)
return;
list_del(&cache->cache_chain);
if (cache->cache_sb && cache->cache_sb->s_root &&
presto_d2d(cache->cache_sb->s_root)) {
kmem_cache_free(presto_dentry_slab,
presto_d2d(cache->cache_sb->s_root));
cache->cache_sb->s_root->d_fsdata = NULL;
}
PRESTO_FREE(cache, sizeof(struct presto_cache));
}
int presto_reserve_space(struct presto_cache *cache, loff_t req)
{
struct filter_fs *filter;
loff_t avail;
struct super_block *sb = cache->cache_sb;
filter = cache->cache_filter;
if (!filter ) {
EXIT;
return 0;
}
if (!filter->o_trops ) {
EXIT;
return 0;
}
if (!filter->o_trops->tr_avail ) {
EXIT;
return 0;
}
spin_lock(&cache->cache_lock);
avail = filter->o_trops->tr_avail(cache, sb);
CDEBUG(D_SUPER, "ESC::%ld +++> %ld \n", (long) cache->cache_reserved,
(long) (cache->cache_reserved + req));
CDEBUG(D_SUPER, "ESC::Avail::%ld \n", (long) avail);
if (req + cache->cache_reserved > avail) {
spin_unlock(&cache->cache_lock);
EXIT;
return -ENOSPC;
}
cache->cache_reserved += req;
spin_unlock(&cache->cache_lock);
EXIT;
return 0;
}
void presto_release_space(struct presto_cache *cache, loff_t req)
{
CDEBUG(D_SUPER, "ESC::%ld ---> %ld \n", (long) cache->cache_reserved,
(long) (cache->cache_reserved - req));
spin_lock(&cache->cache_lock);
cache->cache_reserved -= req;
spin_unlock(&cache->cache_lock);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Original version: Copyright (C) 1996 P. Braam and M. Callahan
* Rewritten for Linux 2.1. Copyright (C) 1997 Carnegie Mellon University
* d_fsdata and NFS compatiblity fixes Copyright (C) 2001 Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Directory operations for InterMezzo filesystem
*/
/* inode dentry alias list walking code adapted from linux/fs/dcache.c
*
* fs/dcache.c
*
* (C) 1997 Thomas Schoebel-Theuer,
* with heavy changes by Linus Torvalds
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include "intermezzo_fs.h"
kmem_cache_t * presto_dentry_slab;
/* called when a cache lookup succeeds */
static int presto_d_revalidate(struct dentry *de, struct nameidata *nd)
{
struct inode *inode = de->d_inode;
struct presto_file_set * root_fset;
ENTRY;
if (!inode) {
EXIT;
return 0;
}
if (is_bad_inode(inode)) {
EXIT;
return 0;
}
if (!presto_d2d(de)) {
presto_set_dd(de);
}
if (!presto_d2d(de)) {
EXIT;
return 0;
}
root_fset = presto_d2d(de->d_inode->i_sb->s_root)->dd_fset;
if (root_fset->fset_flags & FSET_FLAT_BRANCH &&
(presto_d2d(de)->dd_fset != root_fset )) {
presto_d2d(de)->dd_fset = root_fset;
}
EXIT;
return 1;
#if 0
/* The following is needed for metadata on demand. */
if ( S_ISDIR(inode->i_mode) ) {
EXIT;
return (presto_chk(de, PRESTO_DATA) &&
(presto_chk(de, PRESTO_ATTR)));
} else {
EXIT;
return presto_chk(de, PRESTO_ATTR);
}
#endif
}
static void presto_d_release(struct dentry *dentry)
{
if (!presto_d2d(dentry)) {
/* This can happen for dentries from NFSd */
return;
}
presto_d2d(dentry)->dd_count--;
if (!presto_d2d(dentry)->dd_count) {
kmem_cache_free(presto_dentry_slab, presto_d2d(dentry));
dentry->d_fsdata = NULL;
}
}
struct dentry_operations presto_dentry_ops =
{
.d_revalidate = presto_d_revalidate,
.d_release = presto_d_release
};
static inline int presto_is_dentry_ROOT (struct dentry *dentry)
{
return(dentry_name_cmp(dentry,"ROOT") &&
!dentry_name_cmp(dentry->d_parent,".intermezzo"));
}
static struct presto_file_set* presto_try_find_fset(struct dentry* dentry,
int *is_under_d_intermezzo)
{
struct dentry* temp_dentry;
struct presto_dentry_data *d_data;
int found_root=0;
ENTRY;
CDEBUG(D_FSDATA, "finding fileset for %p:%s\n", dentry,
dentry->d_name.name);
*is_under_d_intermezzo = 0;
/* walk up through the branch to get the fileset */
/* The dentry we are passed presumably does not have the correct
* fset information. However, we still want to start walking up
* the branch from this dentry to get our found_root and
* is_under_d_intermezzo decisions correct
*/
for (temp_dentry = dentry ; ; temp_dentry = temp_dentry->d_parent) {
CDEBUG(D_FSDATA, "--->dentry %p:%*s\n", temp_dentry,
temp_dentry->d_name.len,temp_dentry->d_name.name);
if (presto_is_dentry_ROOT(temp_dentry))
found_root = 1;
if (!found_root &&
dentry_name_cmp(temp_dentry, ".intermezzo")) {
*is_under_d_intermezzo = 1;
}
d_data = presto_d2d(temp_dentry);
if (d_data) {
/* If we found a "ROOT" dentry while walking up the
* branch, we will journal regardless of whether
* we are under .intermezzo or not.
* If we are already under d_intermezzo don't reverse
* the decision here...even if we found a "ROOT"
* dentry above .intermezzo (if we were ever to
* modify the directory structure).
*/
if (!*is_under_d_intermezzo)
*is_under_d_intermezzo = !found_root &&
(d_data->dd_flags & PRESTO_DONT_JOURNAL);
EXIT;
return d_data->dd_fset;
}
if (temp_dentry->d_parent == temp_dentry) {
break;
}
}
EXIT;
return NULL;
}
/* Only call this function on positive dentries */
static struct presto_dentry_data* presto_try_find_alias_with_dd (
struct dentry* dentry)
{
struct inode *inode=dentry->d_inode;
struct list_head *head, *next, *tmp;
struct dentry *tmp_dentry;
/* Search through the alias list for dentries with d_fsdata */
spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
tmp = next;
next = tmp->next;
tmp_dentry = list_entry(tmp, struct dentry, d_alias);
if (!presto_d2d(tmp_dentry)) {
spin_unlock(&dcache_lock);
return presto_d2d(tmp_dentry);
}
}
spin_unlock(&dcache_lock);
return NULL;
}
/* Only call this function on positive dentries */
static void presto_set_alias_dd (struct dentry *dentry,
struct presto_dentry_data* dd)
{
struct inode *inode=dentry->d_inode;
struct list_head *head, *next, *tmp;
struct dentry *tmp_dentry;
/* Set d_fsdata for this dentry */
dd->dd_count++;
dentry->d_fsdata = dd;
/* Now set d_fsdata for all dentries in the alias list. */
spin_lock(&dcache_lock);
head = &inode->i_dentry;
next = inode->i_dentry.next;
while (next != head) {
tmp = next;
next = tmp->next;
tmp_dentry = list_entry(tmp, struct dentry, d_alias);
if (!presto_d2d(tmp_dentry)) {
dd->dd_count++;
tmp_dentry->d_fsdata = dd;
}
}
spin_unlock(&dcache_lock);
return;
}
inline struct presto_dentry_data *izo_alloc_ddata(void)
{
struct presto_dentry_data *dd;
dd = kmem_cache_alloc(presto_dentry_slab, SLAB_KERNEL);
if (dd == NULL) {
CERROR("IZO: out of memory trying to allocate presto_dentry_data\n");
return NULL;
}
memset(dd, 0, sizeof(*dd));
dd->dd_count = 1;
return dd;
}
/* This uses the BKL! */
int presto_set_dd(struct dentry * dentry)
{
struct presto_file_set *fset;
struct presto_dentry_data *dd;
int is_under_d_izo;
int error=0;
ENTRY;
if (!dentry)
BUG();
lock_kernel();
/* Did we lose a race? */
if (dentry->d_fsdata) {
CERROR("dentry %p already has d_fsdata set\n", dentry);
if (dentry->d_inode)
CERROR(" inode: %ld\n", dentry->d_inode->i_ino);
EXIT;
goto out_unlock;
}
if (dentry->d_inode != NULL) {
/* NFSd runs find_fh_dentry which instantiates disconnected
* dentries which are then connected without a lookup().
* So it is possible to have connected dentries that do not
* have d_fsdata set. So we walk the list trying to find
* an alias which has its d_fsdata set and then use that
* for all the other dentries as well.
* - SHP,Vinny.
*/
/* If there is an alias with d_fsdata use it. */
if ((dd = presto_try_find_alias_with_dd (dentry))) {
presto_set_alias_dd (dentry, dd);
EXIT;
goto out_unlock;
}
} else {
/* Negative dentry */
CDEBUG(D_FSDATA,"negative dentry %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
}
/* No pre-existing d_fsdata, we need to construct one.
* First, we must walk up the tree to find the fileset
* If a fileset can't be found, we leave a null fsdata
* and return EROFS to indicate that we can't journal
* updates.
*/
fset = presto_try_find_fset (dentry, &is_under_d_izo);
if (!fset) {
#ifdef PRESTO_NO_NFS
CERROR("No fileset for dentry %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
#endif
error = -EROFS;
EXIT;
goto out_unlock;
}
dentry->d_fsdata = izo_alloc_ddata();
if (!presto_d2d(dentry)) {
CERROR ("InterMezzo: out of memory allocating d_fsdata\n");
error = -ENOMEM;
goto out_unlock;
}
presto_d2d(dentry)->dd_fset = fset;
if (is_under_d_izo)
presto_d2d(dentry)->dd_flags |= PRESTO_DONT_JOURNAL;
EXIT;
out_unlock:
CDEBUG(D_FSDATA,"presto_set_dd dentry %p: %*s, d_fsdata %p\n",
dentry, dentry->d_name.len, dentry->d_name.name,
dentry->d_fsdata);
unlock_kernel();
return error;
}
int presto_init_ddata_cache(void)
{
ENTRY;
presto_dentry_slab =
kmem_cache_create("presto_cache",
sizeof(struct presto_dentry_data), 0,
SLAB_HWCACHE_ALIGN, NULL,
NULL);
EXIT;
return (presto_dentry_slab != NULL);
}
void presto_cleanup_ddata_cache(void)
{
kmem_cache_destroy(presto_dentry_slab);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Tacitus Systems
* Copyright (C) 2000 Peter J. Braam
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <asm/bitops.h>
#include <asm/termios.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/module.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
static inline void presto_relock_sem(struct inode *dir)
{
/* the lock from sys_mkdir / lookup_create */
down(&dir->i_sem);
/* the rest is done by the do_{create,mkdir, ...} */
}
static inline void presto_relock_other(struct inode *dir)
{
/* vfs_mkdir locks */
// down(&dir->i_zombie);
//lock_kernel();
}
static inline void presto_fulllock(struct inode *dir)
{
/* the lock from sys_mkdir / lookup_create */
down(&dir->i_sem);
/* vfs_mkdir locks */
// down(&dir->i_zombie);
//lock_kernel();
}
static inline void presto_unlock(struct inode *dir)
{
/* vfs_mkdir locks */
//unlock_kernel();
// up(&dir->i_zombie);
/* the lock from sys_mkdir / lookup_create */
up(&dir->i_sem);
}
/*
* these are initialized in super.c
*/
extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd);
static int izo_authorized_uid;
int izo_dentry_is_ilookup(struct dentry *dentry, ino_t *id,
unsigned int *generation)
{
char tmpname[64];
char *next;
ENTRY;
/* prefix is 7 characters: '...ino:' */
if ( dentry->d_name.len < 7 || dentry->d_name.len > 64 ||
memcmp(dentry->d_name.name, PRESTO_ILOOKUP_MAGIC, 7) != 0 ) {
EXIT;
return 0;
}
memcpy(tmpname, dentry->d_name.name + 7, dentry->d_name.len - 7);
*(tmpname + dentry->d_name.len - 7) = '\0';
/* name is of the form ...ino:<inode number>:<generation> */
*id = simple_strtoul(tmpname, &next, 16);
if ( *next == PRESTO_ILOOKUP_SEP ) {
*generation = simple_strtoul(next + 1, 0, 16);
CDEBUG(D_INODE, "ino string: %s, Id = %lx (%lu), "
"generation %x (%d)\n",
tmpname, *id, *id, *generation, *generation);
EXIT;
return 1;
} else {
EXIT;
return 0;
}
}
struct dentry *presto_tmpfs_ilookup(struct inode *dir,
struct dentry *dentry,
ino_t ino,
unsigned int generation)
{
return dentry;
}
inline int presto_can_ilookup(void)
{
return (current->euid == izo_authorized_uid ||
capable(CAP_DAC_READ_SEARCH));
}
struct dentry *presto_iget_ilookup(struct inode *dir,
struct dentry *dentry,
ino_t ino,
unsigned int generation)
{
struct inode *inode;
int error;
ENTRY;
if ( !presto_can_ilookup() ) {
CERROR("ilookup denied: euid %u, authorized_uid %u\n",
current->euid, izo_authorized_uid);
return ERR_PTR(-EPERM);
}
error = -ENOENT;
inode = iget(dir->i_sb, ino);
if (!inode) {
CERROR("fatal: NULL inode ino %lu\n", ino);
goto cleanup_iput;
}
if (is_bad_inode(inode) || inode->i_nlink == 0) {
CERROR("fatal: bad inode ino %lu, links %d\n", ino, inode->i_nlink);
goto cleanup_iput;
}
if (inode->i_generation != generation) {
CERROR("fatal: bad generation %u (want %u)\n",
inode->i_generation, generation);
goto cleanup_iput;
}
d_instantiate(dentry, inode);
dentry->d_flags |= DCACHE_DISCONNECTED; /* NFS hack */
EXIT;
return NULL;
cleanup_iput:
if (inode)
iput(inode);
return ERR_PTR(error);
}
struct dentry *presto_add_ilookup_dentry(struct dentry *parent,
struct dentry *real)
{
struct inode *inode = real->d_inode;
struct dentry *de;
char buf[32];
char *ptr = buf;
struct dentry *inodir;
struct presto_dentry_data *dd;
inodir = lookup_one_len("..iopen..", parent, strlen("..iopen.."));
if (!inodir || IS_ERR(inodir) || !inodir->d_inode ) {
CERROR("%s: bad ..iopen.. lookup\n", __FUNCTION__);
return NULL;
}
inodir->d_inode->i_op = &presto_dir_iops;
snprintf(ptr, 32, "...ino:%lx:%x", inode->i_ino, inode->i_generation);
de = lookup_one_len(ptr, inodir, strlen(ptr));
if (!de || IS_ERR(de)) {
CERROR("%s: bad ...ino lookup %ld\n",
__FUNCTION__, PTR_ERR(de));
dput(inodir);
return NULL;
}
dd = presto_d2d(real);
if (!dd)
BUG();
/* already exists */
if (de->d_inode)
BUG();
#if 0
if (de->d_inode != inode ) {
CERROR("XX de->d_inode %ld, inode %ld\n",
de->d_inode->i_ino, inode->i_ino);
BUG();
}
if (dd->dd_inodentry) {
CERROR("inodentry exists %ld \n", inode->i_ino);
BUG();
}
dput(inodir);
return de;
}
#endif
if (presto_d2d(de))
BUG();
atomic_inc(&inode->i_count);
de->d_op = &presto_dentry_ops;
d_add(de, inode);
if (!de->d_op)
CERROR("DD: no ops dentry %p, dd %p\n", de, dd);
dd->dd_inodentry = de;
dd->dd_count++;
de->d_fsdata = dd;
dput(inodir);
return de;
}
struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
int rc = 0;
struct dentry *de;
struct presto_cache *cache;
int minor;
ino_t ino;
unsigned int generation;
struct inode_operations *iops;
int is_ilookup = 0;
ENTRY;
cache = presto_get_cache(dir);
if (cache == NULL) {
CERROR("InterMezzo BUG: no cache in presto_lookup "
"(dir ino: %ld)!\n", dir->i_ino);
EXIT;
return NULL;
}
minor = presto_c2m(cache);
iops = filter_c2cdiops(cache->cache_filter);
if (!iops || !iops->lookup) {
CERROR("InterMezzo BUG: filesystem has no lookup\n");
EXIT;
return NULL;
}
CDEBUG(D_CACHE, "dentry %p, dir ino: %ld, name: %*s, islento: %d\n",
dentry, dir->i_ino, dentry->d_name.len, dentry->d_name.name,
ISLENTO(minor));
if (dentry->d_fsdata)
CERROR("DD -- BAD dentry %p has data\n", dentry);
dentry->d_fsdata = NULL;
#if 0
if (ext2_check_for_iopen(dir, dentry))
de = NULL;
else {
#endif
if ( izo_dentry_is_ilookup(dentry, &ino, &generation) ) {
de = cache->cache_filter->o_trops->tr_ilookup
(dir, dentry, ino, generation);
is_ilookup = 1;
} else
de = iops->lookup(dir, dentry, nd);
#if 0
}
#endif
if ( IS_ERR(de) ) {
CERROR("dentry lookup error %ld\n", PTR_ERR(de));
return de;
}
/* some file systems have no read_inode: set methods here */
if (dentry->d_inode)
presto_set_ops(dentry->d_inode, cache->cache_filter);
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op, &presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* In lookup we will tolerate EROFS return codes from presto_set_dd
* to placate NFS. EROFS indicates that a fileset was not found but
* we should still be able to continue through a lookup.
* Anything else is a hard error and must be returned to VFS. */
if (!is_ilookup)
rc = presto_set_dd(dentry);
if (rc && rc != -EROFS) {
CERROR("presto_set_dd failed (dir %ld, name %*s): %d\n",
dir->i_ino, dentry->d_name.len, dentry->d_name.name, rc);
return ERR_PTR(rc);
}
EXIT;
return NULL;
}
static inline int presto_check_set_fsdata (struct dentry *de)
{
if (presto_d2d(de) == NULL) {
#ifdef PRESTO_NO_NFS
CERROR("dentry without fsdata: %p: %*s\n", de,
de->d_name.len, de->d_name.name);
BUG();
#endif
return presto_set_dd (de);
}
return 0;
}
int presto_setattr(struct dentry *de, struct iattr *iattr)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct lento_vfs_context info = { 0, {0}, 0 };
ENTRY;
error = presto_prep(de, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
if (!iattr->ia_valid)
CDEBUG(D_INODE, "presto_setattr: iattr is not valid\n");
CDEBUG(D_INODE, "valid %#x, mode %#o, uid %u, gid %u, size %Lu, "
"atime %lu mtime %lu ctime %lu flags %d\n",
iattr->ia_valid, iattr->ia_mode, iattr->ia_uid, iattr->ia_gid,
iattr->ia_size, iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec,
iattr->ia_ctime.tv_sec, iattr->ia_attr_flags);
if ( presto_get_permit(de->d_inode) < 0 ) {
EXIT;
return -EROFS;
}
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_setattr(fset, de, iattr, &info);
presto_put_permit(de->d_inode);
return error;
}
/*
* Now the meat: the fs operations that require journaling
*
*
* XXX: some of these need modifications for hierarchical filesets
*/
int presto_prep(struct dentry *dentry, struct presto_cache **cache,
struct presto_file_set **fset)
{
int rc;
/* NFS might pass us dentries which have not gone through lookup.
* Test and set d_fsdata for such dentries
*/
rc = presto_check_set_fsdata (dentry);
if (rc) return rc;
*fset = presto_fset(dentry);
if ( *fset == NULL ) {
CERROR("No file set for dentry at %p: %*s\n", dentry,
dentry->d_name.len, dentry->d_name.name);
return -EROFS;
}
*cache = (*fset)->fset_cache;
if ( *cache == NULL ) {
CERROR("PRESTO: BAD, BAD: cannot find cache\n");
return -EBADF;
}
CDEBUG(D_PIOCTL, "---> cache flags %x, fset flags %x\n",
(*cache)->cache_flags, (*fset)->fset_flags);
if( presto_is_read_only(*fset) ) {
CERROR("PRESTO: cannot modify read-only fileset, minor %d.\n",
presto_c2m(*cache));
return -EROFS;
}
return 0;
}
static int presto_create(struct inode * dir, struct dentry * dentry, int mode,
struct nameidata *nd)
{
int error;
struct presto_cache *cache;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
struct presto_file_set *fset;
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
/* Does blocking and non-blocking behavious need to be
checked for. Without blocking (return 1), the permit
was acquired without reintegration
*/
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_create(fset, parent, dentry, mode, &info);
presto_relock_other(dir);
presto_put_permit(dir);
EXIT;
return error;
}
static int presto_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry)
{
int error;
struct presto_cache *cache, *new_cache;
struct presto_file_set *fset, *new_fset;
struct dentry *parent = new_dentry->d_parent;
struct lento_vfs_context info;
ENTRY;
error = presto_prep(old_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
error = presto_check_set_fsdata(new_dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(new_dentry->d_parent, &new_cache, &new_fset);
if ( error ) {
EXIT;
return error;
}
if (fset != new_fset) {
EXIT;
return -EXDEV;
}
presto_unlock(dir);
if ( presto_get_permit(old_dentry->d_inode) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = new_dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_link(fset, old_dentry, parent,
new_dentry, &info);
#if 0
/* XXX for links this is not right */
if (cache->cache_filter->o_trops->tr_add_ilookup ) {
struct dentry *d;
d = cache->cache_filter->o_trops->tr_add_ilookup
(dir->i_sb->s_root, new_dentry, 1);
}
#endif
presto_relock_other(dir);
presto_put_permit(dir);
presto_put_permit(old_dentry->d_inode);
return error;
}
static int presto_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
int error;
struct presto_file_set *fset;
struct presto_cache *cache;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
presto_relock_sem(dir);
parent = dentry->d_parent;
error = presto_do_mkdir(fset, parent, dentry, mode, &info);
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
static int presto_symlink(struct inode *dir, struct dentry *dentry,
const char *name)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_symlink(fset, parent, dentry, name, &info);
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
int presto_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_unlink(fset, parent, dentry, &info);
presto_relock_other(dir);
presto_put_permit(dir);
return error;
}
static int presto_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
ENTRY;
CDEBUG(D_FILE, "prepping presto\n");
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
CDEBUG(D_FILE, "unlocking\n");
/* We need to dget() before the dput in double_unlock, to ensure we
* still have dentry references. double_lock doesn't do dget for us.
*/
if (d_unhashed(dentry))
d_rehash(dentry);
// double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
up(&dentry->d_inode->i_sem);
up(&dir->i_sem);
CDEBUG(D_FILE, "getting permit\n");
if ( presto_get_permit(parent->d_inode) < 0 ) {
EXIT;
down(&dir->i_sem);
down(&dentry->d_inode->i_sem);
// double_down(&dir->i_sem, &dentry->d_inode->i_sem);
// double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
lock_kernel();
return -EROFS;
}
CDEBUG(D_FILE, "locking\n");
down(&dir->i_sem);
down(&dentry->d_inode->i_sem);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_rmdir(fset, parent, dentry, &info);
presto_put_permit(parent->d_inode);
lock_kernel();
EXIT;
return error;
}
static int presto_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct dentry *parent = dentry->d_parent;
struct lento_vfs_context info;
if (!old_valid_dev(rdev))
return -EINVAL;
ENTRY;
error = presto_check_set_fsdata(dentry);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(dentry->d_parent, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
presto_unlock(dir);
if ( presto_get_permit(dir) < 0 ) {
EXIT;
presto_fulllock(dir);
return -EROFS;
}
presto_relock_sem(dir);
parent = dentry->d_parent;
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = presto_do_mknod(fset, parent, dentry, mode, rdev, &info);
presto_relock_other(dir);
presto_put_permit(dir);
EXIT;
return error;
}
// XXX this can be optimized: renamtes across filesets only require
// multiple KML records, but can locally be executed normally.
int presto_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
int error;
struct presto_cache *cache, *new_cache;
struct presto_file_set *fset, *new_fset;
struct lento_vfs_context info;
struct dentry *old_parent = old_dentry->d_parent;
struct dentry *new_parent = new_dentry->d_parent;
int triple;
ENTRY;
error = presto_prep(old_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
error = presto_prep(new_parent, &new_cache, &new_fset);
if ( error ) {
EXIT;
return error;
}
if ( fset != new_fset ) {
EXIT;
return -EXDEV;
}
/* We need to do dget before the dput in double_unlock, to ensure we
* still have dentry references. double_lock doesn't do dget for us.
*/
triple = (S_ISDIR(old_dentry->d_inode->i_mode) && new_dentry->d_inode)?
1:0;
unlock_rename(new_dentry->d_parent, old_dentry->d_parent);
if ( presto_get_permit(old_dir) < 0 ) {
EXIT;
return -EROFS;
}
if ( presto_get_permit(new_dir) < 0 ) {
EXIT;
return -EROFS;
}
lock_rename(new_dentry->d_parent, old_dentry->d_parent);
memset(&info, 0, sizeof(info));
if (!ISLENTO(presto_c2m(cache)))
info.flags = LENTO_FL_KML;
info.flags |= LENTO_FL_IGNORE_TIME;
error = do_rename(fset, old_parent, old_dentry, new_parent,
new_dentry, &info);
presto_put_permit(new_dir);
presto_put_permit(old_dir);
return error;
}
/* basically this allows the ilookup processes access to all files for
* reading, while not making ilookup totally insecure. This could all
* go away if we could set the CAP_DAC_READ_SEARCH capability for the client.
*/
/* If posix acls are available, the underlying cache fs will export the
* appropriate permission function. Thus we do not worry here about ACLs
* or EAs. -SHP
*/
int presto_permission(struct inode *inode, int mask, struct nameidata *nd)
{
unsigned short mode = inode->i_mode;
struct presto_cache *cache;
int rc;
ENTRY;
if ( presto_can_ilookup() && !(mask & S_IWOTH)) {
CDEBUG(D_CACHE, "ilookup on %ld OK\n", inode->i_ino);
EXIT;
return 0;
}
cache = presto_get_cache(inode);
if ( cache ) {
/* we only override the file/dir permission operations */
struct inode_operations *fiops = filter_c2cfiops(cache->cache_filter);
struct inode_operations *diops = filter_c2cdiops(cache->cache_filter);
if ( S_ISREG(mode) && fiops && fiops->permission ) {
EXIT;
return fiops->permission(inode, mask, nd);
}
if ( S_ISDIR(mode) && diops && diops->permission ) {
EXIT;
return diops->permission(inode, mask, nd);
}
}
/* The cache filesystem doesn't have its own permission function,
* so we call the default one.
*/
rc = vfs_permission(inode, mask);
EXIT;
return rc;
}
int presto_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
char buf[1024];
struct izo_ioctl_data *data = NULL;
struct presto_dentry_data *dd;
int rc;
ENTRY;
/* Try the filesystem's ioctl first, and return if it succeeded. */
dd = presto_d2d(file->f_dentry);
if (dd && dd->dd_fset) {
int (*cache_ioctl)(struct inode *, struct file *, unsigned int, unsigned long ) = filter_c2cdfops(dd->dd_fset->fset_cache->cache_filter)->ioctl;
rc = -ENOTTY;
if (cache_ioctl)
rc = cache_ioctl(inode, file, cmd, arg);
if (rc != -ENOTTY) {
EXIT;
return rc;
}
}
if (current->euid != 0 && current->euid != izo_authorized_uid) {
EXIT;
return -EPERM;
}
memset(buf, 0, sizeof(buf));
if (izo_ioctl_getdata(buf, buf + 1024, (void *)arg)) {
CERROR("intermezzo ioctl: data error\n");
return -EINVAL;
}
data = (struct izo_ioctl_data *)buf;
switch(cmd) {
case IZO_IOC_REINTKML: {
int rc;
int cperr;
rc = kml_reint_rec(file, data);
EXIT;
cperr = copy_to_user((char *)arg, data, sizeof(*data));
if (cperr) {
CERROR("WARNING: cperr %d\n", cperr);
rc = -EFAULT;
}
return rc;
}
case IZO_IOC_GET_RCVD: {
struct izo_rcvd_rec rec;
struct presto_file_set *fset;
int rc;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
if (rc < 0) {
EXIT;
return rc;
}
EXIT;
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
}
case IZO_IOC_REPSTATUS: {
__u64 client_kmlsize;
struct izo_rcvd_rec *lr_client;
struct izo_rcvd_rec rec;
struct presto_file_set *fset;
int minor;
int rc;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
client_kmlsize = data->ioc_kmlsize;
lr_client = (struct izo_rcvd_rec *) data->ioc_pbuf1;
rc = izo_repstatus(fset, client_kmlsize,
lr_client, &rec);
if (rc < 0) {
EXIT;
return rc;
}
EXIT;
return copy_to_user((char *)arg, &rec, sizeof(rec))? -EFAULT : 0;
}
case IZO_IOC_GET_CHANNEL: {
struct presto_file_set *fset;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
data->ioc_dev = fset->fset_cache->cache_psdev->uc_minor;
CDEBUG(D_PSDEV, "CHANNEL %d\n", data->ioc_dev);
EXIT;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
case IZO_IOC_SET_IOCTL_UID:
izo_authorized_uid = data->ioc_uid;
EXIT;
return 0;
case IZO_IOC_SET_PID:
rc = izo_psdev_setpid(data->ioc_dev);
EXIT;
return rc;
case IZO_IOC_SET_CHANNEL:
rc = izo_psdev_setchannel(file, data->ioc_dev);
EXIT;
return rc;
case IZO_IOC_GET_KML_SIZE: {
struct presto_file_set *fset;
__u64 kmlsize;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
kmlsize = presto_kml_offset(fset) + fset->fset_kml_logical_off;
EXIT;
return copy_to_user((char *)arg, &kmlsize, sizeof(kmlsize))?-EFAULT : 0;
}
case IZO_IOC_PURGE_FILE_DATA: {
struct presto_file_set *fset;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
rc = izo_purge_file(fset, data->ioc_inlbuf1);
EXIT;
return rc;
}
case IZO_IOC_GET_FILEID: {
rc = izo_get_fileid(file, data);
EXIT;
if (rc)
return rc;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
case IZO_IOC_SET_FILEID: {
rc = izo_set_fileid(file, data);
EXIT;
if (rc)
return rc;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
case IZO_IOC_ADJUST_LML: {
struct lento_vfs_context *info;
info = (struct lento_vfs_context *)data->ioc_inlbuf1;
rc = presto_adjust_lml(file, info);
EXIT;
return rc;
}
case IZO_IOC_CONNECT: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_connect(minor, data->ioc_ino,
data->ioc_generation, data->ioc_uuid,
data->ioc_flags);
EXIT;
return rc;
}
case IZO_IOC_GO_FETCH_KML: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_go_fetch_kml(minor, fset->fset_name,
data->ioc_uuid, data->ioc_kmlsize);
EXIT;
return rc;
}
case IZO_IOC_REVOKE_PERMIT:
if (data->ioc_flags)
rc = izo_revoke_permit(file->f_dentry, data->ioc_uuid);
else
rc = izo_revoke_permit(file->f_dentry, NULL);
EXIT;
return rc;
case IZO_IOC_CLEAR_FSET:
rc = izo_clear_fsetroot(file->f_dentry);
EXIT;
return rc;
case IZO_IOC_CLEAR_ALL_FSETS: {
struct presto_file_set *fset;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
rc = izo_clear_all_fsetroots(fset->fset_cache);
EXIT;
return rc;
}
case IZO_IOC_SET_FSET:
/*
* Mark this dentry as being a fileset root.
*/
rc = presto_set_fsetroot_from_ioc(file->f_dentry,
data->ioc_inlbuf1,
data->ioc_flags);
EXIT;
return rc;
case IZO_IOC_MARK: {
int res = 0; /* resulting flags - returned to user */
int error;
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %d\n",
file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
data->ioc_or_flag, data->ioc_mark_what);
switch (data->ioc_mark_what) {
case MARK_DENTRY:
error = izo_mark_dentry(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_FSET:
error = izo_mark_fset(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_CACHE:
error = izo_mark_cache(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
break;
case MARK_GETFL: {
int fflags, cflags;
data->ioc_and_flag = 0xffffffff;
data->ioc_or_flag = 0;
error = izo_mark_dentry(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &res);
if (error)
break;
error = izo_mark_fset(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag, &fflags);
if (error)
break;
error = izo_mark_cache(file->f_dentry,
data->ioc_and_flag,
data->ioc_or_flag,
&cflags);
if (error)
break;
data->ioc_and_flag = fflags;
data->ioc_or_flag = cflags;
break;
}
default:
error = -EINVAL;
}
if (error) {
EXIT;
return error;
}
data->ioc_mark_what = res;
CDEBUG(D_DOWNCALL, "mark inode: %ld, and: %x, or: %x, what %x\n",
file->f_dentry->d_inode->i_ino, data->ioc_and_flag,
data->ioc_or_flag, data->ioc_mark_what);
EXIT;
return copy_to_user((char *)arg, data, sizeof(*data))? -EFAULT : 0;
}
#if 0
case IZO_IOC_CLIENT_MAKE_BRANCH: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_client_make_branch(minor, fset->fset_name,
data->ioc_inlbuf1,
data->ioc_inlbuf2);
EXIT;
return rc;
}
#endif
case IZO_IOC_SERVER_MAKE_BRANCH: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
izo_upc_server_make_branch(minor, data->ioc_inlbuf1);
EXIT;
return 0;
}
case IZO_IOC_SET_KMLSIZE: {
struct presto_file_set *fset;
int minor;
struct izo_rcvd_rec rec;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_set_kmlsize(minor, fset->fset_name, data->ioc_uuid,
data->ioc_kmlsize);
if (rc != 0) {
EXIT;
return rc;
}
rc = izo_rcvd_get(&rec, fset, data->ioc_uuid);
if (rc == -EINVAL) {
/* We don't know anything about this uuid yet; no
* worries. */
memset(&rec, 0, sizeof(rec));
} else if (rc <= 0) {
CERROR("InterMezzo: error reading last_rcvd: %d\n", rc);
EXIT;
return rc;
}
rec.lr_remote_offset = data->ioc_kmlsize;
rc = izo_rcvd_write(fset, &rec);
if (rc <= 0) {
CERROR("InterMezzo: error writing last_rcvd: %d\n", rc);
EXIT;
return rc;
}
EXIT;
return rc;
}
case IZO_IOC_BRANCH_UNDO: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_branch_undo(minor, fset->fset_name,
data->ioc_inlbuf1);
EXIT;
return rc;
}
case IZO_IOC_BRANCH_REDO: {
struct presto_file_set *fset;
int minor;
fset = presto_fset(file->f_dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
minor = presto_f2m(fset);
rc = izo_upc_branch_redo(minor, fset->fset_name,
data->ioc_inlbuf1);
EXIT;
return rc;
}
default:
EXIT;
return -ENOTTY;
}
EXIT;
return 0;
}
struct file_operations presto_dir_fops = {
.ioctl = presto_ioctl
};
struct inode_operations presto_dir_iops = {
.create = presto_create,
.lookup = presto_lookup,
.link = presto_link,
.unlink = presto_unlink,
.symlink = presto_symlink,
.mkdir = presto_mkdir,
.rmdir = presto_rmdir,
.mknod = presto_mknod,
.rename = presto_rename,
.permission = presto_permission,
.setattr = presto_setattr,
#ifdef CONFIG_FS_EXT_ATTR
.set_ext_attr = presto_set_ext_attr,
#endif
};
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Tacit Networks, Inc.
* Author: Shirish H. Phatak <shirish@tacitnetworks.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Extended attribute handling for presto.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/segment.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#ifdef CONFIG_FS_EXT_ATTR
#include <linux/ext_attr.h>
extern inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
unsigned long value);
/* VFS interface */
/* XXX! Fixme test for user defined attributes */
int presto_set_ext_attr(struct inode *inode,
const char *name, void *buffer,
size_t buffer_len, int flags)
{
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct lento_vfs_context info;
struct dentry *dentry;
int minor = presto_i2m(inode);
char *buf = NULL;
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
if ( ISLENTO(minor) ) {
EXIT;
return -EINVAL;
}
/* BAD...vfs should really pass down the dentry to use, especially
* since every other operation in iops does. But for now
* we do a reverse mapping from inode to the first dentry
*/
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -EINVAL;
}
dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
error = presto_prep(dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
if ((buffer != NULL) && (buffer_len != 0)) {
/* If buffer is a user space pointer copy it to kernel space
* and reset the flag. We do this since the journal functions need
* access to the contents of the buffer, and the file system
* does not care. When we actually invoke the function, we remove
* the EXT_ATTR_FLAG_USER flag.
*
* XXX:Check if the "fs does not care" assertion is always true -SHP
* (works for ext3)
*/
if (flags & EXT_ATTR_FLAG_USER) {
PRESTO_ALLOC(buf, buffer_len);
if (!buf) {
CERROR("InterMezzo: out of memory!!!\n");
return -ENOMEM;
}
error = copy_from_user(buf, buffer, buffer_len);
if (error)
return -EFAULT;
} else
buf = buffer;
} else
buf = buffer;
if ( presto_get_permit(inode) < 0 ) {
EXIT;
if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
PRESTO_FREE(buf, buffer_len);
return -EROFS;
}
/* Simulate presto_setup_info */
memset(&info, 0, sizeof(info));
/* For now redundant..but we keep it around just in case */
info.flags = LENTO_FL_IGNORE_TIME;
if (!ISLENTO(cache->cache_psdev->uc_minor))
info.flags |= LENTO_FL_KML;
/* We pass in the kernel space pointer and reset the
* EXT_ATTR_FLAG_USER flag.
* See comments above.
*/
/* Note that mode is already set by VFS so we send in a NULL */
error = presto_do_set_ext_attr(fset, dentry, name, buf,
buffer_len, flags & ~EXT_ATTR_FLAG_USER,
NULL, &info);
presto_put_permit(inode);
if (buffer_len && (flags & EXT_ATTR_FLAG_USER))
PRESTO_FREE(buf, buffer_len);
EXIT;
return error;
}
/* Lento Interface */
/* XXX: ignore flags? We should be forcing these operations through? -SHP*/
int lento_set_ext_attr(const char *path, const char *name,
void *buffer, size_t buffer_len, int flags, mode_t mode,
struct lento_vfs_context *info)
{
int error;
char * pathname;
struct nameidata nd;
struct dentry *dentry;
struct presto_file_set *fset;
ENTRY;
lock_kernel();
pathname=getname(path);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
goto exit;
}
/* Note that ext_attrs apply to both files and directories..*/
error=presto_walk(pathname,&nd);
if (error)
goto exit;
dentry = nd.dentry;
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_dentry;
}
if (buffer==NULL) buffer_len=0;
error = presto_do_set_ext_attr(fset, dentry, name, buffer,
buffer_len, flags, &mode, info);
exit_dentry:
path_release(&nd);
exit_path:
putname(pathname);
exit:
unlock_kernel();
return error;
}
#endif /*CONFIG_FS_EXT_ATTR*/
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory.
* Copyright (C) 2000, 2001 Tacit Networks, Inc.
* Copyright (C) 2000 Peter J. Braam
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Cluster File Systems, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file manages file I/O
*
*/
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fsfilter.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
/*
* these are initialized in super.c
*/
extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd);
static int presto_open_upcall(int minor, struct dentry *de)
{
int rc = 0;
char *path, *buffer;
struct presto_file_set *fset;
int pathlen;
struct lento_vfs_context info;
struct presto_dentry_data *dd = presto_d2d(de);
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
return -ENOMEM;
}
fset = presto_fset(de);
path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
if (dd->remote_ino == 0) {
rc = presto_get_fileid(minor, fset, de);
}
memset (&info, 0, sizeof(info));
if (dd->remote_ino > 0) {
info.remote_ino = dd->remote_ino;
info.remote_generation = dd->remote_generation;
} else
CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
(unsigned long long) dd->remote_ino);
rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
PRESTO_FREE(buffer, PAGE_SIZE);
return rc;
}
static inline int open_check_dod(struct file *file,
struct presto_file_set *fset)
{
int gen, is_iopen = 0, minor;
struct presto_cache *cache = fset->fset_cache;
ino_t inum;
minor = presto_c2m(cache);
if ( ISLENTO(minor) ) {
CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
return 0;
}
/* Files are only ever opened by inode during backfetches, when by
* definition we have the authoritative copy of the data. No DOD. */
is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
if (is_iopen) {
CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
return 0;
}
if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
CDEBUG(D_CACHE, "fileset not on demand.\n");
return 0;
}
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
return 0;
}
if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
return 0;
}
if (presto_chk(file->f_dentry, PRESTO_DATA)) {
CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
return 0;
}
if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
return 0;
}
return 1;
}
static int presto_file_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata;
int writable = (file->f_flags & (O_RDWR | O_WRONLY));
int minor, i;
ENTRY;
if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
EXIT;
return -EBADF;
}
minor = presto_c2m(cache);
CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
ISLENTO(minor));
if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
file->f_flags & O_WRONLY)) {
CDEBUG(D_CACHE, "calling presto_get_permit\n");
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
presto_put_permit(inode);
}
if (open_check_dod(file, fset)) {
CDEBUG(D_CACHE, "presto_open_upcall\n");
CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
rc = presto_open_upcall(minor, file->f_dentry);
if (rc) {
EXIT;
CERROR("%s: returning error %d\n", __FUNCTION__, rc);
return rc;
}
}
/* file was truncated upon open: do not refetch */
if (file->f_flags & O_TRUNC) {
CDEBUG(D_CACHE, "setting DATA, ATTR\n");
presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
}
fops = filter_c2cffops(cache->cache_filter);
if ( fops->open ) {
CDEBUG(D_CACHE, "calling fs open\n");
rc = fops->open(inode, file);
if (rc) {
EXIT;
return rc;
}
}
if (writable) {
PRESTO_ALLOC(fdata, sizeof(*fdata));
if (!fdata) {
EXIT;
return -ENOMEM;
}
/* LOCK: XXX check that the kernel lock protects this alloc */
fdata->fd_do_lml = 0;
fdata->fd_bytes_written = 0;
fdata->fd_fsuid = current->fsuid;
fdata->fd_fsgid = current->fsgid;
fdata->fd_mode = file->f_dentry->d_inode->i_mode;
fdata->fd_uid = file->f_dentry->d_inode->i_uid;
fdata->fd_gid = file->f_dentry->d_inode->i_gid;
fdata->fd_ngroups = current->group_info->ngroups;
for (i=0 ; i < current->group_info->ngroups ; i++)
fdata->fd_groups[i] = GROUP_AT(current->group_info,i);
if (!ISLENTO(minor))
fdata->fd_info.flags = LENTO_FL_KML;
else {
/* this is for the case of DOD,
reint_close will adjust flags if needed */
fdata->fd_info.flags = 0;
}
presto_getversion(&fdata->fd_version, inode);
file->private_data = fdata;
} else {
file->private_data = NULL;
}
EXIT;
return 0;
}
int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
{
struct presto_file_data *fdata =
(struct presto_file_data *) file->private_data;
if (!fdata) {
EXIT;
return -EINVAL;
}
memcpy(&fdata->fd_info, info, sizeof(*info));
EXIT;
return 0;
}
static int presto_file_release(struct inode *inode, struct file *file)
{
int rc;
struct file_operations *fops;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
ENTRY;
rc = presto_prep(file->f_dentry, &cache, &fset);
if ( rc ) {
EXIT;
return rc;
}
fops = filter_c2cffops(cache->cache_filter);
if (fops && fops->release)
rc = fops->release(inode, file);
CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor, rc, fdata);
/* this file was modified: ignore close errors, write KML */
if (fdata && fdata->fd_do_lml) {
/* XXX: remove when lento gets file granularity cd */
if ( presto_get_permit(inode) < 0 ) {
EXIT;
return -EROFS;
}
fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
rc = presto_do_close(fset, file);
presto_put_permit(inode);
}
if (!rc && fdata) {
PRESTO_FREE(fdata, sizeof(*fdata));
file->private_data = NULL;
}
EXIT;
return rc;
}
static void presto_apply_write_policy(struct file *file,
struct presto_file_set *fset, loff_t res)
{
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
struct presto_cache *cache = fset->fset_cache;
struct presto_version new_file_ver;
int error;
struct rec_info rec;
/* Here we do a journal close after a fixed or a specified
amount of KBytes, currently a global parameter set with
sysctl. If files are open for a long time, this gives added
protection. (XXX todo: per cache, add ioctl, handle
journaling in a thread, add more options etc.)
*/
if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
(!ISLENTO(cache->cache_psdev->uc_minor))) {
fdata->fd_bytes_written += res;
if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
presto_getversion(&new_file_ver,
file->f_dentry->d_inode);
/* This is really heavy weight and should be fixed
ASAP. At most we should be recording the number
of bytes written and not locking the kernel,
wait for permits, etc, on the write path. SHP
*/
lock_kernel();
if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
EXIT;
/* we must be disconnected, not to worry */
unlock_kernel();
return;
}
error = presto_journal_close(&rec, fset, fdata,
file->f_dentry,
&fdata->fd_version,
&new_file_ver);
presto_put_permit(file->f_dentry->d_inode);
unlock_kernel();
if ( error ) {
CERROR("presto_close: cannot journal close\n");
/* XXX these errors are really bad */
/* panic(); */
return;
}
fdata->fd_bytes_written = 0;
}
}
}
static ssize_t presto_file_write(struct file *file, const char *buf,
size_t size, loff_t *off)
{
struct rec_info rec;
int error;
struct presto_cache *cache;
struct presto_file_set *fset;
struct file_operations *fops;
ssize_t res;
int do_lml_here;
void *handle = NULL;
unsigned long blocks;
struct presto_file_data *fdata;
loff_t res_size;
error = presto_prep(file->f_dentry, &cache, &fset);
if ( error ) {
EXIT;
return error;
}
blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
/* XXX 3 is for ext2 indirect blocks ... */
res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
<< file->f_dentry->d_inode->i_sb->s_blocksize_bits);
error = presto_reserve_space(fset->fset_cache, res_size);
CDEBUG(D_INODE, "Reserved %Ld for %Zd\n", res_size, size);
if ( error ) {
EXIT;
return -ENOSPC;
}
CDEBUG(D_INODE, "islento %d, minor: %d\n",
ISLENTO(cache->cache_psdev->uc_minor),
cache->cache_psdev->uc_minor);
/*
* XXX this lock should become a per inode lock when
* Vinny's changes are in; we could just use i_sem.
*/
read_lock(&fset->fset_lml.fd_lock);
fdata = (struct presto_file_data *)file->private_data;
do_lml_here = size && (fdata->fd_do_lml == 0) &&
!presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
if (do_lml_here)
fdata->fd_do_lml = 1;
read_unlock(&fset->fset_lml.fd_lock);
/* XXX
There might be a bug here. We need to make
absolutely sure that the ext3_file_write commits
after our transaction that writes the LML record.
Nesting the file write helps if new blocks are allocated.
*/
res = 0;
if (do_lml_here) {
struct presto_version file_version;
/* handle different space reqs from file system below! */
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_WRITE);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, res_size);
CERROR("presto_write: no space for transaction\n");
return -ENOSPC;
}
presto_getversion(&file_version, file->f_dentry->d_inode);
res = presto_write_lml_close(&rec, fset, file,
fdata->fd_info.remote_ino,
fdata->fd_info.remote_generation,
&fdata->fd_info.remote_version,
&file_version);
fdata->fd_lml_offset = rec.offset;
if ( res ) {
CERROR("intermezzo: PANIC failed to write LML\n");
*(int *)0 = 1;
EXIT;
goto exit_write;
}
presto_trans_commit(fset, handle);
}
fops = filter_c2cffops(cache->cache_filter);
res = fops->write(file, buf, size, off);
if ( res != size ) {
CDEBUG(D_FILE, "file write returns short write: size %Zd, res %Zd\n", size, res);
}
if ( (res > 0) && fdata )
presto_apply_write_policy(file, fset, res);
exit_write:
presto_release_space(fset->fset_cache, res_size);
return res;
}
struct file_operations presto_file_fops = {
.write = presto_file_write,
.open = presto_file_open,
.release = presto_file_release,
.ioctl = presto_ioctl
};
struct inode_operations presto_file_iops = {
.permission = presto_permission,
.setattr = presto_setattr,
#ifdef CONFIG_FS_EXT_ATTR
.set_ext_attr = presto_set_ext_attr,
#endif
};
/* FIXME: I bet we want to add a lock here and in presto_file_open. */
int izo_purge_file(struct presto_file_set *fset, char *file)
{
#if 0
void *handle = NULL;
char *path = NULL;
struct nameidata nd;
struct dentry *dentry;
int rc = 0, len;
loff_t oldsize;
/* FIXME: not mtpt it's gone */
len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
PRESTO_ALLOC(path, len + 1);
if (path == NULL)
return -1;
sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
rc = izo_lookup_file(fset, path, &nd);
if (rc)
goto error;
dentry = nd.dentry;
/* FIXME: take a lock here */
if (dentry->d_inode->i_atime.tv_sec > get_seconds() - 5) {
/* We lost the race; this file was accessed while we were doing
* ioctls and lookups and whatnot. */
rc = -EBUSY;
goto error_unlock;
}
/* FIXME: Check if this file is open. */
handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
if (IS_ERR(handle)) {
rc = -ENOMEM;
goto error_unlock;
}
/* FIXME: Write LML record */
oldsize = dentry->d_inode->i_size;
rc = izo_do_truncate(fset, dentry, 0, oldsize);
if (rc != 0)
goto error_clear;
rc = izo_do_truncate(fset, dentry, oldsize, 0);
if (rc != 0)
goto error_clear;
error_clear:
/* FIXME: clear LML record */
error_unlock:
/* FIXME: release the lock here */
error:
if (handle != NULL && !IS_ERR(handle))
presto_trans_commit(fset, handle);
if (path != NULL)
PRESTO_FREE(path, len + 1);
return rc;
#else
return 0;
#endif
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Managing filesets
*
*/
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/module.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
static inline struct presto_file_set *presto_dentry2fset(struct dentry *dentry)
{
if (presto_d2d(dentry) == NULL) {
EXIT;
return NULL;
}
return presto_d2d(dentry)->dd_fset;
}
/* find the fileset dentry for this dentry */
struct presto_file_set *presto_fset(struct dentry *de)
{
struct dentry *fsde;
ENTRY;
if ( !de->d_inode ) {
/* FIXME: is this ok to be NULL? */
CDEBUG(D_INODE,"presto_fset: warning %*s has NULL inode.\n",
de->d_name.len, de->d_name.name);
}
for (fsde = de;; fsde = fsde->d_parent) {
if ( presto_dentry2fset(fsde) ) {
EXIT;
return presto_dentry2fset(fsde);
}
if (fsde->d_parent == fsde)
break;
}
EXIT;
return NULL;
}
int presto_get_lastrecno(char *path, off_t *recno)
{
struct nameidata nd;
struct presto_file_set *fset;
struct dentry *dentry;
int error;
ENTRY;
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto kml_out;
}
error = -EINVAL;
if ( ! presto_dentry2fset(dentry)) {
EXIT;
goto kml_out;
}
fset = presto_dentry2fset(dentry);
if (!fset) {
EXIT;
goto kml_out;
}
error = 0;
*recno = fset->fset_kml.fd_recno;
kml_out:
path_release(&nd);
return error;
}
static char * _izo_make_path(char *fsetname, char *name)
{
char *path = NULL;
int len;
len = strlen("/.intermezzo/") + strlen(fsetname)
+ 1 + strlen(name) + 1;
PRESTO_ALLOC(path, len);
if (path == NULL)
return NULL;
sprintf(path, "/.intermezzo/%s/%s", fsetname, name);
return path;
}
char * izo_make_path(struct presto_file_set *fset, char *name)
{
return _izo_make_path(fset->fset_name, name);
}
static struct file *_izo_fset_open(char *fsetname, char *name, int flags, int mode)
{
char *path;
struct file *f;
int error;
ENTRY;
path = _izo_make_path(fsetname, name);
if (path == NULL) {
EXIT;
return ERR_PTR(-ENOMEM);
}
CDEBUG(D_INODE, "opening file %s\n", path);
f = filp_open(path, flags, mode);
error = PTR_ERR(f);
if (IS_ERR(f)) {
CDEBUG(D_INODE, "Error %d\n", error);
}
PRESTO_FREE(path, strlen(path));
EXIT;
return f;
}
struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode)
{
return _izo_fset_open(fset->fset_name, name, flags, mode);
}
/*
* note: this routine "pins" a dentry for a fileset root
*/
int presto_set_fsetroot(struct dentry *ioctl_dentry, char *fsetname,
unsigned int flags)
{
struct presto_file_set *fset = NULL;
struct presto_cache *cache;
int error;
struct file *fset_root;
struct dentry *dentry;
ENTRY;
fset_root = _izo_fset_open(fsetname, "ROOT", O_RDONLY, 000);
if (IS_ERR(fset_root)) {
CERROR("Can't open %s/ROOT\n", fsetname);
EXIT;
error = PTR_ERR(fset_root);
goto out;
}
dentry = dget(fset_root->f_dentry);
filp_close(fset_root, NULL);
dentry->d_inode->i_op = ioctl_dentry->d_inode->i_op;
dentry->d_inode->i_fop = ioctl_dentry->d_inode->i_fop;
dentry->d_op = ioctl_dentry->d_op;
fset = presto_dentry2fset(dentry);
if (fset && (fset->fset_dentry == dentry) ) {
CERROR("Fsetroot already set (inode %ld)\n",
dentry->d_inode->i_ino);
/* XXX: ignore because clear_fsetroot is broken */
#if 0
dput(dentry);
EXIT;
error = -EEXIST;
goto out;
#endif
}
cache = presto_get_cache(dentry->d_inode);
if (!cache) {
CERROR("No cache found for inode %ld\n",
dentry->d_inode->i_ino);
EXIT;
error = -ENODEV;
goto out_free;
}
PRESTO_ALLOC(fset, sizeof(*fset));
if ( !fset ) {
CERROR("No memory allocating fset for %s\n", fsetname);
EXIT;
error = -ENOMEM;
goto out_free;
}
CDEBUG(D_INODE, "fset at %p\n", fset);
CDEBUG(D_INODE, "InterMezzo: fsetroot: inode %ld, fileset name %s\n",
dentry->d_inode->i_ino, fsetname);
fset->fset_mnt = mntget(current->fs->pwdmnt);
fset->fset_cache = cache;
fset->fset_dentry = dentry;
fset->fset_name = strdup(fsetname);
fset->fset_chunkbits = CHUNK_BITS;
fset->fset_flags = flags;
fset->fset_file_maxio = FSET_DEFAULT_MAX_FILEIO;
fset->fset_permit_lock = SPIN_LOCK_UNLOCKED;
PRESTO_ALLOC(fset->fset_reint_buf, 64 * 1024);
if (fset->fset_reint_buf == NULL) {
EXIT;
error = -ENOMEM;
goto out_free;
}
init_waitqueue_head(&fset->fset_permit_queue);
if (presto_d2d(dentry) == NULL) {
dentry->d_fsdata = izo_alloc_ddata();
}
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: %s: no memory\n", __FUNCTION__);
EXIT;
error = -ENOMEM;
goto out_free;
}
presto_d2d(dentry)->dd_fset = fset;
list_add(&fset->fset_list, &cache->cache_fset_list);
error = izo_init_kml_file(fset, &fset->fset_kml);
if ( error ) {
EXIT;
CDEBUG(D_JOURNAL, "Error init_kml %d\n", error);
goto out_list_del;
}
error = izo_init_lml_file(fset, &fset->fset_lml);
if ( error ) {
int rc;
EXIT;
rc = izo_log_close(&fset->fset_kml);
CDEBUG(D_JOURNAL, "Error init_lml %d, cleanup %d\n", error, rc);
goto out_list_del;
}
/* init_last_rcvd_file could trigger a presto_file_write(), which
* requires that the lml structure be initialized. -phil */
error = izo_init_last_rcvd_file(fset, &fset->fset_rcvd);
if ( error ) {
int rc;
EXIT;
rc = izo_log_close(&fset->fset_kml);
rc = izo_log_close(&fset->fset_lml);
CDEBUG(D_JOURNAL, "Error init_lastrcvd %d, cleanup %d\n", error, rc);
goto out_list_del;
}
CDEBUG(D_PIOCTL, "-------> fset at %p, dentry at %p, mtpt %p,"
"fset %s, cache %p, presto_d2d(dentry)->dd_fset %p\n",
fset, dentry, fset->fset_dentry, fset->fset_name, cache,
presto_d2d(dentry)->dd_fset);
EXIT;
return 0;
out_list_del:
list_del(&fset->fset_list);
presto_d2d(dentry)->dd_fset = NULL;
out_free:
if (fset) {
mntput(fset->fset_mnt);
if (fset->fset_reint_buf != NULL)
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
PRESTO_FREE(fset, sizeof(*fset));
}
dput(dentry);
out:
return error;
}
static int izo_cleanup_fset(struct presto_file_set *fset)
{
int error;
struct presto_cache *cache;
ENTRY;
CERROR("Cleaning up fset %s\n", fset->fset_name);
error = izo_log_close(&fset->fset_kml);
if (error)
CERROR("InterMezzo: Closing kml for fset %s: %d\n",
fset->fset_name, error);
error = izo_log_close(&fset->fset_lml);
if (error)
CERROR("InterMezzo: Closing lml for fset %s: %d\n",
fset->fset_name, error);
error = izo_log_close(&fset->fset_rcvd);
if (error)
CERROR("InterMezzo: Closing last_rcvd for fset %s: %d\n",
fset->fset_name, error);
cache = fset->fset_cache;
list_del(&fset->fset_list);
presto_d2d(fset->fset_dentry)->dd_fset = NULL;
dput(fset->fset_dentry);
mntput(fset->fset_mnt);
PRESTO_FREE(fset->fset_name, strlen(fset->fset_name) + 1);
PRESTO_FREE(fset->fset_reint_buf, 64 * 1024);
PRESTO_FREE(fset, sizeof(*fset));
EXIT;
return error;
}
int izo_clear_fsetroot(struct dentry *dentry)
{
struct presto_file_set *fset;
ENTRY;
fset = presto_dentry2fset(dentry);
if (!fset) {
EXIT;
return -EINVAL;
}
izo_cleanup_fset(fset);
EXIT;
return 0;
}
int izo_clear_all_fsetroots(struct presto_cache *cache)
{
struct presto_file_set *fset;
struct list_head *tmp,*tmpnext;
int error;
error = 0;
tmp = &cache->cache_fset_list;
tmpnext = tmp->next;
while ( tmpnext != &cache->cache_fset_list) {
tmp = tmpnext;
tmpnext = tmp->next;
fset = list_entry(tmp, struct presto_file_set, fset_list);
error = izo_cleanup_fset(fset);
if (error)
break;
}
return error;
}
static struct vfsmount *izo_alloc_vfsmnt(void)
{
struct vfsmount *mnt;
PRESTO_ALLOC(mnt, sizeof(*mnt));
if (mnt) {
memset(mnt, 0, sizeof(struct vfsmount));
atomic_set(&mnt->mnt_count,1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
}
return mnt;
}
static void izo_setup_ctxt(struct dentry *root, struct vfsmount *mnt,
struct run_ctxt *save)
{
struct run_ctxt new;
mnt->mnt_root = root;
mnt->mnt_sb = root->d_inode->i_sb;
unlock_super(mnt->mnt_sb);
new.rootmnt = mnt;
new.root = root;
new.pwdmnt = mnt;
new.pwd = root;
new.fsuid = 0;
new.fsgid = 0;
new.fs = get_fs();
/* XXX where can we get the groups from? */
new.group_info = groups_alloc(0);
push_ctxt(save, &new);
}
static void izo_cleanup_ctxt(struct vfsmount *mnt, struct run_ctxt *save)
{
lock_super(mnt->mnt_sb);
pop_ctxt(save);
}
static int izo_simple_mkdir(struct dentry *dir, char *name, int mode)
{
struct dentry *dchild;
int err;
ENTRY;
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild)) {
EXIT;
return PTR_ERR(dchild);
}
if (dchild->d_inode) {
dput(dchild);
EXIT;
return -EEXIST;
}
err = vfs_mkdir(dir->d_inode, dchild, mode);
dput(dchild);
EXIT;
return err;
}
static int izo_simple_symlink(struct dentry *dir, char *name, char *tgt)
{
struct dentry *dchild;
int err;
ENTRY;
dchild = lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild)) {
EXIT;
return PTR_ERR(dchild);
}
if (dchild->d_inode) {
dput(dchild);
EXIT;
return -EEXIST;
}
err = vfs_symlink(dir->d_inode, dchild, tgt);
dput(dchild);
EXIT;
return err;
}
/*
* run set_fsetroot in chroot environment
*/
int presto_set_fsetroot_from_ioc(struct dentry *root, char *fsetname,
unsigned int flags)
{
int rc;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
if (root != root->d_inode->i_sb->s_root) {
CERROR ("IOC_SET_FSET must be called on mount point\n");
return -ENODEV;
}
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount;
if (!mnt) {
EXIT;
return -ENOMEM;
}
izo_setup_ctxt(root, mnt, &save);
rc = presto_set_fsetroot(root, fsetname, flags);
izo_cleanup_ctxt(mnt, &save);
return rc;
}
/* XXX: this function should detect if fsetname is already in use for
the cache under root
*/
int izo_prepare_fileset(struct dentry *root, char *fsetname)
{
int err;
struct dentry *dotizo = NULL, *fsetdir = NULL, *dotiopen = NULL;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount = izo_alloc_vfsmnt();
if (!mnt) {
EXIT;
return -ENOMEM;
}
if (!fsetname)
fsetname = "rootfset";
izo_setup_ctxt(root, mnt, &save);
err = izo_simple_mkdir(root, ".intermezzo", 0755);
CDEBUG(D_CACHE, "mkdir on .intermezzo err %d\n", err);
err = izo_simple_mkdir(root, "..iopen..", 0755);
CDEBUG(D_CACHE, "mkdir on ..iopen.. err %d\n", err);
dotiopen = lookup_one_len("..iopen..", root, strlen("..iopen.."));
if (IS_ERR(dotiopen)) {
EXIT;
goto out;
}
dotiopen->d_inode->i_op = &presto_dir_iops;
dput(dotiopen);
dotizo = lookup_one_len(".intermezzo", root, strlen(".intermezzo"));
if (IS_ERR(dotizo)) {
EXIT;
goto out;
}
err = izo_simple_mkdir(dotizo, fsetname, 0755);
CDEBUG(D_CACHE, "mkdir err %d\n", err);
/* XXX find the dentry of the root of the fileset (root for now) */
fsetdir = lookup_one_len(fsetname, dotizo, strlen(fsetname));
if (IS_ERR(fsetdir)) {
EXIT;
goto out;
}
err = izo_simple_symlink(fsetdir, "ROOT", "../..");
/* XXX read flags from flags file */
err = presto_set_fsetroot(root, fsetname, 0);
CDEBUG(D_CACHE, "set_fsetroot err %d\n", err);
out:
if (dotizo && !IS_ERR(dotizo))
dput(dotizo);
if (fsetdir && !IS_ERR(fsetdir))
dput(fsetdir);
izo_cleanup_ctxt(mnt, &save);
return err;
}
int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data)
{
int rc = 0;
struct presto_cache *cache;
struct vfsmount *mnt;
struct run_ctxt save;
struct nameidata nd;
struct dentry *dentry;
struct presto_dentry_data *dd;
struct dentry *root;
char *buf = NULL;
ENTRY;
root = dir->f_dentry;
/* actually, needs to be called on ROOT of fset, not mount point
if (root != root->d_inode->i_sb->s_root) {
CERROR ("IOC_SET_FSET must be called on mount point\n");
return -ENODEV;
}
*/
cache = presto_get_cache(root->d_inode);
mnt = cache->cache_vfsmount;
if (!mnt) {
EXIT;
return -ENOMEM;
}
izo_setup_ctxt(root, mnt, &save);
PRESTO_ALLOC(buf, data->ioc_plen1);
if (!buf) {
rc = -ENOMEM;
EXIT;
goto out;
}
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
rc = -EFAULT;
EXIT;
goto out;
}
rc = presto_walk(buf, &nd);
if (rc) {
CERROR("Unable to open: %s\n", buf);
EXIT;
goto out;
}
dentry = nd.dentry;
if (!dentry) {
CERROR("no dentry!\n");
rc = -EINVAL;
EXIT;
goto out_close;
}
dd = presto_d2d(dentry);
if (!dd) {
CERROR("no dentry_data!\n");
rc = -EINVAL;
EXIT;
goto out_close;
}
CDEBUG(D_FILE,"de:%p dd:%p\n", dentry, dd);
if (dd->remote_ino != 0) {
CERROR("remote_ino already set? %Lx:%Lx\n",
(unsigned long long) dd->remote_ino,
(unsigned long long) dd->remote_generation);
rc = 0;
EXIT;
goto out_close;
}
CDEBUG(D_FILE,"setting %p %p, %s to %Lx:%Lx\n", dentry, dd,
buf,
(unsigned long long) data->ioc_ino,
(unsigned long long) data->ioc_generation);
dd->remote_ino = data->ioc_ino;
dd->remote_generation = data->ioc_generation;
EXIT;
out_close:
path_release(&nd);
out:
if (buf)
PRESTO_FREE(buf, data->ioc_plen1);
izo_cleanup_ctxt(mnt, &save);
return rc;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1996 Peter J. Braam <braam@maths.ox.ac.uk> and
* Michael Callahan <callahan@maths.ox.ac.uk>
* Copyright (C) 1999 Carnegie Mellon University
* Rewritten for Linux 2.1. Peter Braam <braam@cs.cmu.edu>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Super block/filesystem wide operations
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/segment.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
extern void presto_free_cache(struct presto_cache *);
void presto_set_ops(struct inode *inode, struct filter_fs *filter)
{
ENTRY;
if (!inode || is_bad_inode(inode))
return;
if (S_ISREG(inode->i_mode)) {
if ( !filter_c2cfiops(filter) ) {
filter_setup_file_ops(filter,
inode, &presto_file_iops,
&presto_file_fops);
}
inode->i_op = filter_c2ufiops(filter);
inode->i_fop = filter_c2uffops(filter);
CDEBUG(D_INODE, "set file methods for %ld to %p\n",
inode->i_ino, inode->i_op);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = filter_c2udiops(filter);
inode->i_fop = filter_c2udfops(filter);
CDEBUG(D_INODE, "set dir methods for %ld to %p ioctl %p\n",
inode->i_ino, inode->i_op, inode->i_fop->ioctl);
} else if (S_ISLNK(inode->i_mode)) {
if ( !filter_c2csiops(filter)) {
filter_setup_symlink_ops(filter,
inode,
&presto_sym_iops,
&presto_sym_fops);
}
inode->i_op = filter_c2usiops(filter);
inode->i_fop = filter_c2usfops(filter);
CDEBUG(D_INODE, "set link methods for %ld to %p\n",
inode->i_ino, inode->i_op);
}
EXIT;
}
void presto_read_inode(struct inode *inode)
{
struct presto_cache *cache;
cache = presto_get_cache(inode);
if ( !cache ) {
CERROR("PRESTO: BAD, BAD: cannot find cache\n");
make_bad_inode(inode);
return ;
}
filter_c2csops(cache->cache_filter)->read_inode(inode);
CDEBUG(D_INODE, "presto_read_inode: ino %ld, gid %d\n",
inode->i_ino, inode->i_gid);
presto_set_ops(inode, cache->cache_filter);
/* XXX handle special inodes here or not - probably not? */
}
static void presto_put_super(struct super_block *sb)
{
struct presto_cache *cache;
struct upc_channel *channel;
struct super_operations *sops;
struct list_head *lh;
int err;
ENTRY;
cache = presto_cache_find(sb);
if (!cache) {
EXIT;
goto exit;
}
channel = &izo_channels[presto_c2m(cache)];
sops = filter_c2csops(cache->cache_filter);
err = izo_clear_all_fsetroots(cache);
if (err) {
CERROR("%s: err %d\n", __FUNCTION__, err);
}
PRESTO_FREE(cache->cache_vfsmount, sizeof(struct vfsmount));
/* look at kill_super - fsync_super is not exported GRRR but
probably not needed */
unlock_super(sb);
shrink_dcache_parent(cache->cache_root);
dput(cache->cache_root);
//fsync_super(sb);
lock_super(sb);
if (sops->write_super)
sops->write_super(sb);
if (sops->put_super)
sops->put_super(sb);
/* free any remaining async upcalls when the filesystem is unmounted */
spin_lock(&channel->uc_lock);
lh = channel->uc_pending.next;
while ( lh != &channel->uc_pending) {
struct upc_req *req;
req = list_entry(lh, struct upc_req, rq_chain);
/* assignment must be here: we are about to free &lh */
lh = lh->next;
if ( ! (req->rq_flags & REQ_ASYNC) )
continue;
list_del(&(req->rq_chain));
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(struct upc_req));
}
list_del(&cache->cache_channel_list);
spin_unlock(&channel->uc_lock);
presto_free_cache(cache);
exit:
CDEBUG(D_MALLOC, "after umount: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
return ;
}
struct super_operations presto_super_ops = {
.read_inode = presto_read_inode,
.put_super = presto_put_super,
};
/* symlinks can be chowned */
struct inode_operations presto_sym_iops = {
.setattr = presto_setattr
};
/* NULL for now */
struct file_operations presto_sym_fops;
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
* Copyright (C) 2001 Tacitus Systems, Inc.
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __INTERMEZZO_FS_H_
#define __INTERMEZZO_FS_H_ 1
#include "intermezzo_lib.h"
#include "intermezzo_idl.h"
#ifdef __KERNEL__
typedef __u8 uuid_t[16];
#else
# include <uuid/uuid.h>
#endif
struct lento_vfs_context {
__u64 kml_offset;
struct timespec updated_time;
__u64 remote_ino;
__u64 remote_generation;
__u32 slot_offset;
__u32 recno;
__u32 flags;
uuid_t uuid;
struct presto_version remote_version;
};
#ifdef __KERNEL__
# include <linux/smp.h>
# include <linux/fsfilter.h>
# include <linux/mount.h>
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include <linux/smp_lock.h>
/* fixups for fs.h */
# ifndef fs_down
# define fs_down(sem) down(sem)
# endif
# ifndef fs_up
# define fs_up(sem) up(sem)
# endif
# define KML_IDLE 0
# define KML_DECODE 1
# define KML_OPTIMIZE 2
# define KML_REINT 3
# define KML_OPEN_REINT 0x0100
# define KML_REINT_BEGIN 0x0200
# define KML_BACKFETCH 0x0400
# define KML_REINT_END 0x0800
# define KML_CLOSE_REINT 0x1000
# define KML_REINT_MAXBUF (64 * 1024)
# define CACHE_CLIENT_RO 0x4
# define CACHE_LENTO_RO 0x8
/* global variables */
extern int presto_debug;
extern int presto_print_entry;
extern long presto_kmemory;
extern long presto_vmemory;
# define PRESTO_DEBUG
# ifdef PRESTO_DEBUG
/* debugging masks */
# define D_SUPER 1
# define D_INODE 2
# define D_FILE 4
# define D_CACHE 8 /* cache debugging */
# define D_MALLOC 16 /* print malloc, de-alloc information */
# define D_JOURNAL 32
# define D_UPCALL 64 /* up and downcall debugging */
# define D_PSDEV 128
# define D_PIOCTL 256
# define D_SPECIAL 512
# define D_TIMING 1024
# define D_DOWNCALL 2048
# define D_KML 4096
# define D_FSDATA 8192
# define CDEBUG(mask, format, a...) \
do { \
if (presto_debug & mask) { \
printk("(%s:%s,l. %d %d): " format, __FILE__, \
__FUNCTION__, __LINE__, current->pid \
, ## a); \
} \
} while (0)
#define CERROR(format, a...) \
do { \
printk("(%s:%s,l. %d %d): " format, __FILE__, __FUNCTION__, \
__LINE__, current->pid , ## a); \
} while (0)
# define ENTRY \
if (presto_print_entry) \
printk("Process %d entered %s\n", current->pid, __FUNCTION__)
# define EXIT \
if (presto_print_entry) \
printk("Process %d leaving %s at %d\n", current->pid, \
__FUNCTION__, __LINE__)
# define presto_kmem_inc(ptr, size) presto_kmemory += (size)
# define presto_kmem_dec(ptr, size) presto_kmemory -= (size)
# define presto_vmem_inc(ptr, size) presto_vmemory += (size)
# define presto_vmem_dec(ptr, size) presto_vmemory -= (size)
# else /* !PRESTO_DEBUG */
# define CDEBUG(mask, format, a...) do {} while (0)
# define ENTRY do {} while (0)
# define EXIT do {} while (0)
# define presto_kmem_inc(ptr, size) do {} while (0)
# define presto_kmem_dec(ptr, size) do {} while (0)
# define presto_vmem_inc(ptr, size) do {} while (0)
# define presto_vmem_dec(ptr, size) do {} while (0)
# endif /* PRESTO_DEBUG */
struct run_ctxt {
struct vfsmount *pwdmnt;
struct dentry *pwd;
struct vfsmount *rootmnt;
struct dentry *root;
uid_t fsuid;
gid_t fsgid;
mm_segment_t fs;
struct group_info * group_info;
/* int ngroups;
gid_t groups[NGROUPS];*/
};
static inline void push_ctxt(struct run_ctxt *save, struct run_ctxt *new)
{
save->fs = get_fs();
save->pwd = dget(current->fs->pwd);
save->pwdmnt = mntget(current->fs->pwdmnt);
save->fsgid = current->fsgid;
save->fsuid = current->fsuid;
save->root = current->fs->root;
save->rootmnt = current->fs->rootmnt;
save->group_info = current->group_info;
/* save->ngroups = current->ngroups;
for (i = 0; i< current->ngroups; i++)
save->groups[i] = current->groups[i];*/
set_fs(new->fs);
lock_kernel();
set_fs_pwd(current->fs, new->pwdmnt, new->pwd);
if (new->root)
set_fs_root(current->fs, new->rootmnt, new->root);
unlock_kernel();
current->fsuid = new->fsuid;
current->fsgid = new->fsgid;
/*if (new->ngroups > 0) {
current->ngroups = new->ngroups;
for (i = 0; i< new->ngroups; i++)
current->groups[i] = new->groups[i];
}*/
current->group_info = new->group_info;
}
static inline void pop_ctxt(struct run_ctxt *saved)
{
set_fs(saved->fs);
lock_kernel();
set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
if (saved->root)
set_fs_root(current->fs, saved->rootmnt, saved->root);
unlock_kernel();
current->fsuid = saved->fsuid;
current->fsgid = saved->fsgid;
current->group_info = saved->group_info;
/*
current->ngroups = saved->ngroups;
for (i = 0; i< saved->ngroups; i++)
current->groups[i] = saved->groups[i];
*/
mntput(saved->pwdmnt);
dput(saved->pwd);
}
static inline struct presto_dentry_data *presto_d2d(struct dentry *dentry)
{
return (struct presto_dentry_data *)(dentry->d_fsdata);
}
struct presto_cache {
spinlock_t cache_lock;
loff_t cache_reserved;
struct vfsmount *cache_vfsmount;
struct super_block *cache_sb;
struct dentry *cache_root;
struct list_head cache_chain; /* for the dev/cache hash */
int cache_flags;
char *cache_type; /* filesystem type of cache */
struct filter_fs *cache_filter;
struct upc_channel *cache_psdev; /* points to channel used */
struct list_head cache_channel_list;
struct list_head cache_fset_list; /* filesets mounted in cache */
};
struct presto_log_fd {
rwlock_t fd_lock;
loff_t fd_offset; /* offset where next record should go */
struct file *fd_file;
int fd_truncating;
unsigned int fd_recno; /* last recno written */
struct list_head fd_reservations;
};
/* file sets */
# define CHUNK_BITS 16
struct presto_file_set {
struct list_head fset_list;
struct presto_log_fd fset_kml;
struct presto_log_fd fset_lml;
struct presto_log_fd fset_rcvd;
struct list_head *fset_clients; /* cache of clients */
struct dentry *fset_dentry;
struct vfsmount *fset_mnt;
struct presto_cache *fset_cache;
unsigned int fset_lento_recno; /* last recno mentioned to lento */
loff_t fset_lento_off; /* last offset mentioned to lento */
loff_t fset_kml_logical_off; /* logical offset of kml file byte 0 */
char * fset_name;
int fset_flags;
int fset_chunkbits;
char *fset_reint_buf; /* temporary buffer holds kml during reint */
spinlock_t fset_permit_lock;
int fset_permit_count;
int fset_permit_upcall_count;
/* This queue is used both for processes waiting for the kernel to give
* up the permit as well as processes waiting for the kernel to be given
* the permit, depending on the state of FSET_HASPERMIT. */
wait_queue_head_t fset_permit_queue;
loff_t fset_file_maxio; /* writing more than this causes a close */
unsigned long int kml_truncate_size;
};
/* This is the default number of bytes written before a close is recorded*/
#define FSET_DEFAULT_MAX_FILEIO (1024<<10)
struct dentry *presto_tmpfs_ilookup(struct inode *dir, struct dentry *dentry,
ino_t ino, unsigned int generation);
struct dentry *presto_iget_ilookup(struct inode *dir, struct dentry *dentry,
ino_t ino, unsigned int generation);
struct dentry *presto_add_ilookup_dentry(struct dentry *parent,
struct dentry *real);
struct journal_ops {
int (*tr_all_data)(struct inode *);
loff_t (*tr_avail)(struct presto_cache *fset, struct super_block *);
void *(*tr_start)(struct presto_file_set *, struct inode *, int op);
void (*tr_commit)(struct presto_file_set *, void *handle);
void (*tr_journal_data)(struct inode *);
struct dentry *(*tr_ilookup)(struct inode *dir, struct dentry *dentry, ino_t ino, unsigned int generation);
struct dentry *(*tr_add_ilookup)(struct dentry *parent, struct dentry *real);
};
extern struct journal_ops presto_ext2_journal_ops;
extern struct journal_ops presto_ext3_journal_ops;
extern struct journal_ops presto_tmpfs_journal_ops;
extern struct journal_ops presto_xfs_journal_ops;
extern struct journal_ops presto_reiserfs_journal_ops;
extern struct journal_ops presto_obdfs_journal_ops;
# define LENTO_FL_KML 0x0001
# define LENTO_FL_EXPECT 0x0002
# define LENTO_FL_VFSCHECK 0x0004
# define LENTO_FL_JUSTLOG 0x0008
# define LENTO_FL_WRITE_KML 0x0010
# define LENTO_FL_CANCEL_LML 0x0020
# define LENTO_FL_WRITE_EXPECT 0x0040
# define LENTO_FL_IGNORE_TIME 0x0080
# define LENTO_FL_TOUCH_PARENT 0x0100
# define LENTO_FL_TOUCH_NEWOBJ 0x0200
# define LENTO_FL_SET_DDFILEID 0x0400
struct presto_cache *presto_get_cache(struct inode *inode);
int presto_sprint_mounts(char *buf, int buflen, int minor);
struct presto_file_set *presto_fset(struct dentry *de);
int presto_journal(struct dentry *dentry, char *buf, size_t size);
int presto_fwrite(struct file *file, const char *str, int len, loff_t *off);
int presto_ispresto(struct inode *);
/* super.c */
extern struct file_system_type presto_fs_type;
extern int init_intermezzo_fs(void);
/* fileset.c */
extern int izo_prepare_fileset(struct dentry *root, char *fsetname);
char * izo_make_path(struct presto_file_set *fset, char *name);
struct file *izo_fset_open(struct presto_file_set *fset, char *name, int flags, int mode);
/* psdev.c */
int izo_psdev_get_free_channel(void);
int presto_psdev_init(void);
int izo_psdev_setpid(int minor);
extern void presto_psdev_cleanup(void);
int presto_lento_up(int minor);
int izo_psdev_setchannel(struct file *file, int fd);
/* inode.c */
extern struct super_operations presto_super_ops;
void presto_set_ops(struct inode *inode, struct filter_fs *filter);
/* dcache.c */
void presto_frob_dop(struct dentry *de);
char *presto_path(struct dentry *dentry, struct dentry *root,
char *buffer, int buflen);
struct presto_dentry_data *izo_alloc_ddata(void);
int presto_set_dd(struct dentry *);
int presto_init_ddata_cache(void);
void presto_cleanup_ddata_cache(void);
extern struct dentry_operations presto_dentry_ops;
/* dir.c */
extern struct inode_operations presto_dir_iops;
extern struct inode_operations presto_file_iops;
extern struct inode_operations presto_sym_iops;
extern struct file_operations presto_dir_fops;
extern struct file_operations presto_file_fops;
extern struct file_operations presto_sym_fops;
int presto_setattr(struct dentry *de, struct iattr *iattr);
int presto_settime(struct presto_file_set *fset, struct dentry *newobj,
struct dentry *parent, struct dentry *target,
struct lento_vfs_context *ctx, int valid);
int presto_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
extern int presto_ilookup_uid;
# define PRESTO_ILOOKUP_MAGIC "...ino:"
# define PRESTO_ILOOKUP_SEP ':'
int izo_dentry_is_ilookup(struct dentry *, ino_t *id, unsigned int *generation);
struct dentry *presto_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd);
struct presto_dentry_data {
int dd_count; /* how mnay dentries are using this dentry */
struct presto_file_set *dd_fset;
struct dentry *dd_inodentry;
loff_t dd_kml_offset;
int dd_flags;
__u64 remote_ino;
__u64 remote_generation;
};
struct presto_file_data {
int fd_do_lml;
loff_t fd_lml_offset;
size_t fd_bytes_written;
/* authorization related data of file at open time */
uid_t fd_uid;
gid_t fd_gid;
mode_t fd_mode;
/* identification data of calling process */
uid_t fd_fsuid;
gid_t fd_fsgid;
int fd_ngroups;
gid_t fd_groups[NGROUPS_SMALL];
/* information how to complete the close operation */
struct lento_vfs_context fd_info;
struct presto_version fd_version;
};
/* presto.c and Lento::Downcall */
int presto_walk(const char *name, struct nameidata *nd);
int izo_clear_fsetroot(struct dentry *dentry);
int izo_clear_all_fsetroots(struct presto_cache *cache);
int presto_get_kmlsize(char *path, __u64 *size);
int presto_get_lastrecno(char *path, off_t *size);
int presto_set_fsetroot(struct dentry *dentry, char *fsetname,
unsigned int flags);
int presto_set_fsetroot_from_ioc(struct dentry *dentry, char *fsetname,
unsigned int flags);
int presto_is_read_only(struct presto_file_set *);
int presto_truncate_lml(struct presto_file_set *fset);
int lento_write_lml(char *path,
__u64 remote_ino,
__u32 remote_generation,
__u32 remote_version,
struct presto_version *remote_file_version);
int lento_complete_closes(char *path);
int presto_f2m(struct presto_file_set *fset);
int presto_prep(struct dentry *, struct presto_cache **,
struct presto_file_set **);
/* cache.c */
extern struct presto_cache *presto_cache_init(void);
extern void presto_cache_add(struct presto_cache *cache);
extern void presto_cache_init_hash(void);
struct presto_cache *presto_cache_find(struct super_block *sb);
#define PRESTO_REQLOW (3 * 4096)
#define PRESTO_REQHIGH (6 * 4096)
void presto_release_space(struct presto_cache *cache, loff_t req);
int presto_reserve_space(struct presto_cache *cache, loff_t req);
#define PRESTO_DATA 0x00000002 /* cached data is valid */
#define PRESTO_ATTR 0x00000004 /* attributes cached */
#define PRESTO_DONT_JOURNAL 0x00000008 /* things like .intermezzo/ */
struct presto_file_set *presto_path2fileset(const char *name);
int izo_revoke_permit(struct dentry *, uuid_t uuid);
int presto_chk(struct dentry *dentry, int flag);
void presto_set(struct dentry *dentry, int flag);
int presto_get_permit(struct inode *inode);
int presto_put_permit(struct inode *inode);
int presto_set_max_kml_size(const char *path, unsigned long max_size);
int izo_mark_dentry(struct dentry *dentry, int and, int or, int *res);
int izo_mark_cache(struct dentry *dentry, int and_bits, int or_bits, int *);
int izo_mark_fset(struct dentry *dentry, int and_bits, int or_bits, int *);
void presto_getversion(struct presto_version *pv, struct inode *inode);
int presto_i2m(struct inode *inode);
int presto_c2m(struct presto_cache *cache);
/* file.c */
int izo_purge_file(struct presto_file_set *fset, char *file);
int presto_adjust_lml(struct file *file, struct lento_vfs_context *info);
/* journal.c */
struct rec_info {
loff_t offset;
int size;
int recno;
int is_kml;
};
void presto_trans_commit(struct presto_file_set *fset, void *handle);
void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
int op);
int presto_fread(struct file *file, char *str, int len, loff_t *off);
int presto_clear_lml_close(struct presto_file_set *fset,
loff_t lml_offset);
int presto_complete_lml(struct presto_file_set *fset);
int presto_read_kml_logical_offset(struct rec_info *recinfo,
struct presto_file_set *fset);
int presto_write_kml_logical_offset(struct presto_file_set *fset);
struct file *presto_copy_kml_tail(struct presto_file_set *fset,
unsigned long int start);
int presto_finish_kml_truncate(struct presto_file_set *fset,
unsigned long int offset);
int izo_lookup_file(struct presto_file_set *fset, char *path,
struct nameidata *nd);
int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
loff_t length, loff_t size_check);
int izo_log_close(struct presto_log_fd *logfd);
struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags);
int izo_init_kml_file(struct presto_file_set *, struct presto_log_fd *);
int izo_init_lml_file(struct presto_file_set *, struct presto_log_fd *);
int izo_init_last_rcvd_file(struct presto_file_set *, struct presto_log_fd *);
/* vfs.c */
/* Extra data needed in the KML for rollback operations; this structure is
* passed around during the KML-writing process. */
struct izo_rollback_data {
__u32 rb_mode;
__u32 rb_rdev;
__u64 rb_uid;
__u64 rb_gid;
};
int presto_write_last_rcvd(struct rec_info *recinfo,
struct presto_file_set *fset,
struct lento_vfs_context *info);
void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb);
int presto_do_close(struct presto_file_set *fset, struct file *file);
int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
struct iattr *iattr, struct lento_vfs_context *info);
int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info);
int presto_do_link(struct presto_file_set *fset, struct dentry *dir,
struct dentry *old_dentry, struct dentry *new_dentry,
struct lento_vfs_context *info);
int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info);
int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, const char *name,
struct lento_vfs_context *info);
int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info);
int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info);
int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode, dev_t dev,
struct lento_vfs_context *info);
int do_rename(struct presto_file_set *fset, struct dentry *old_dir,
struct dentry *old_dentry, struct dentry *new_dir,
struct dentry *new_dentry, struct lento_vfs_context *info);
int presto_do_statfs (struct presto_file_set *fset,
struct kstatfs * buf);
int lento_setattr(const char *name, struct iattr *iattr,
struct lento_vfs_context *info);
int lento_create(const char *name, int mode, struct lento_vfs_context *info);
int lento_link(const char *oldname, const char *newname,
struct lento_vfs_context *info);
int lento_unlink(const char *name, struct lento_vfs_context *info);
int lento_symlink(const char *oldname,const char *newname,
struct lento_vfs_context *info);
int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info);
int lento_rmdir(const char *name, struct lento_vfs_context *info);
int lento_mknod(const char *name, int mode, dev_t dev,
struct lento_vfs_context *info);
int lento_rename(const char *oldname, const char *newname,
struct lento_vfs_context *info);
int lento_iopen(const char *name, ino_t ino, unsigned int generation,int flags);
/* journal.c */
#define JOURNAL_PAGE_SZ PAGE_SIZE
int presto_no_journal(struct presto_file_set *fset);
int journal_fetch(int minor);
int presto_log(struct presto_file_set *fset, struct rec_info *rec,
const char *buf, size_t size,
const char *string1, int len1,
const char *string2, int len2,
const char *string3, int len3);
int presto_get_fileid(int minor, struct presto_file_set *fset,
struct dentry *dentry);
int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, struct presto_version *old_ver,
struct izo_rollback_data *, struct iattr *iattr);
int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_file_ver, int mode);
int presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver);
int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dir,
struct presto_version *tgt_dir_ver,
struct presto_version *old_file_ver,
struct izo_rollback_data *, struct dentry *dentry,
char *old_target, int old_targetlen);
int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, const char *target,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver);
int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_dir_ver, int mode);
int presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *old_dir_ver,
struct izo_rollback_data *, int len, const char *name);
int presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_node_ver, int mode,
int dmajor, int dminor);
int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *src_dir_ver,
struct presto_version *tgt_dir_ver);
int presto_journal_open(struct rec_info *, struct presto_file_set *,
struct dentry *, struct presto_version *old_ver);
int presto_journal_close(struct rec_info *rec, struct presto_file_set *,
struct presto_file_data *, struct dentry *,
struct presto_version *old_file_ver,
struct presto_version *new_file_ver);
int presto_write_lml_close(struct rec_info *rec,
struct presto_file_set *fset,
struct file *file,
__u64 remote_ino,
__u64 remote_generation,
struct presto_version *remote_version,
struct presto_version *new_file_ver);
void presto_log_op(void *data, int len);
loff_t presto_kml_offset(struct presto_file_set *fset);
/* upcall.c */
#define SYNCHRONOUS 0
#define ASYNCHRONOUS 1
/* asynchronous calls */
int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length,
__u32 last_recno, char *fsetname);
int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno,
char *fsetname);
int izo_upc_go_fetch_kml(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize);
int izo_upc_backfetch(int minor, char *path, char *fileset,
struct lento_vfs_context *);
/* synchronous calls */
int izo_upc_get_fileid(int minor, __u32 reclen, char *rec,
__u32 pathlen, char *path, char *fsetname);
int izo_upc_permit(int minor, struct dentry *, __u32 pathlen, char *path,
char *fset);
int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname,
struct lento_vfs_context *info);
int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16],
int client_flag);
int izo_upc_revoke_permit(int minor, char *fsetname, uuid_t uuid);
int izo_upc_set_kmlsize(int minor, char *fsetname, uuid_t uuid, __u64 kmlsize);
int izo_upc_client_make_branch(int minor, char *fsetname);
int izo_upc_server_make_branch(int minor, char *fsetname);
int izo_upc_branch_undo(int minor, char *fsetname, char *branchname);
int izo_upc_branch_redo(int minor, char *fsetname, char *branchname);
int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server);
/* general mechanism */
int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *, int async);
/* replicator.c */
int izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize,
struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server);
int izo_rep_cache_init(struct presto_file_set *);
loff_t izo_rcvd_get(struct izo_rcvd_rec *, struct presto_file_set *, char *uuid);
loff_t izo_rcvd_write(struct presto_file_set *, struct izo_rcvd_rec *);
loff_t izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno,
__u64 remote_offset);
int izo_ioctl_packlen(struct izo_ioctl_data *data);
/* sysctl.c */
int init_intermezzo_sysctl(void);
void cleanup_intermezzo_sysctl(void);
/* ext_attr.c */
/* We will be more tolerant than the default ea patch with attr name sizes and
* the size of value. If these come via VFS from the default ea patches, the
* corresponding character strings will be truncated anyway. During journalling- * we journal length for both name and value. See journal_set_ext_attr.
*/
#define PRESTO_EXT_ATTR_NAME_MAX 128
#define PRESTO_EXT_ATTR_VALUE_MAX 8192
#define PRESTO_ALLOC(ptr, size) \
do { \
long s = (size); \
(ptr) = kmalloc(s, GFP_KERNEL); \
if ((ptr) == NULL) \
CERROR("IZO: out of memory at %s:%d (trying to " \
"allocate %ld)\n", __FILE__, __LINE__, s); \
else { \
presto_kmem_inc((ptr), s); \
memset((ptr), 0, s); \
} \
CDEBUG(D_MALLOC, "kmalloced: %ld at %p (tot %ld).\n", \
s, (ptr), presto_kmemory); \
} while (0)
#define PRESTO_FREE(ptr, size) \
do { \
long s = (size); \
if ((ptr) == NULL) { \
CERROR("IZO: free NULL pointer (%ld bytes) at " \
"%s:%d\n", s, __FILE__, __LINE__); \
break; \
} \
kfree(ptr); \
CDEBUG(D_MALLOC, "kfreed: %ld at %p (tot %ld).\n", \
s, (ptr), presto_kmemory); \
presto_kmem_dec((ptr), s); \
} while (0)
static inline int dentry_name_cmp(struct dentry *dentry, char *name)
{
return (strlen(name) == dentry->d_name.len &&
memcmp(name, dentry->d_name.name, dentry->d_name.len) == 0);
}
static inline char *strdup(char *str)
{
char *tmp;
tmp = kmalloc(strlen(str) + 1, GFP_KERNEL);
if (tmp)
memcpy(tmp, str, strlen(str) + 1);
return tmp;
}
static inline int izo_ioctl_is_invalid(struct izo_ioctl_data *data)
{
if (data->ioc_len > (1<<30)) {
CERROR("IZO ioctl: ioc_len larger than 1<<30\n");
return 1;
}
if (data->ioc_inllen1 > (1<<30)) {
CERROR("IZO ioctl: ioc_inllen1 larger than 1<<30\n");
return 1;
}
if (data->ioc_inllen2 > (1<<30)) {
CERROR("IZO ioctl: ioc_inllen2 larger than 1<<30\n");
return 1;
}
if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
CERROR("IZO ioctl: inlbuf1 pointer but 0 length\n");
return 1;
}
if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
CERROR("IZO ioctl: inlbuf2 pointer but 0 length\n");
return 1;
}
if (data->ioc_pbuf1 && !data->ioc_plen1) {
CERROR("IZO ioctl: pbuf1 pointer but 0 length\n");
return 1;
}
if (data->ioc_pbuf2 && !data->ioc_plen2) {
CERROR("IZO ioctl: pbuf2 pointer but 0 length\n");
return 1;
}
if (izo_ioctl_packlen(data) != data->ioc_len ) {
CERROR("IZO ioctl: packlen exceeds ioc_len\n");
return 1;
}
if (data->ioc_inllen1 &&
data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
CERROR("IZO ioctl: inlbuf1 not 0 terminated\n");
return 1;
}
if (data->ioc_inllen2 &&
data->ioc_bulk[size_round(data->ioc_inllen1) + data->ioc_inllen2
- 1] != '\0') {
CERROR("IZO ioctl: inlbuf2 not 0 terminated\n");
return 1;
}
return 0;
}
/* buffer MUST be at least the size of izo_ioctl_hdr */
static inline int izo_ioctl_getdata(char *buf, char *end, void *arg)
{
struct izo_ioctl_hdr *hdr;
struct izo_ioctl_data *data;
int err;
ENTRY;
hdr = (struct izo_ioctl_hdr *)buf;
data = (struct izo_ioctl_data *)buf;
err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
if ( err ) {
EXIT;
return err;
}
if (hdr->ioc_version != IZO_IOCTL_VERSION) {
CERROR("IZO: version mismatch kernel vs application\n");
return -EINVAL;
}
if (hdr->ioc_len + buf >= end) {
CERROR("IZO: user buffer exceeds kernel buffer\n");
return -EINVAL;
}
if (hdr->ioc_len < sizeof(struct izo_ioctl_data)) {
CERROR("IZO: user buffer too small for ioctl\n");
return -EINVAL;
}
err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
if ( err ) {
EXIT;
return err;
}
if (izo_ioctl_is_invalid(data)) {
CERROR("IZO: ioctl not correctly formatted\n");
return -EINVAL;
}
if (data->ioc_inllen1) {
data->ioc_inlbuf1 = &data->ioc_bulk[0];
}
if (data->ioc_inllen2) {
data->ioc_inlbuf2 = &data->ioc_bulk[0] +
size_round(data->ioc_inllen1);
}
EXIT;
return 0;
}
# define MYPATHLEN(buffer, path) ((buffer) + PAGE_SIZE - (path))
# define free kfree
# define malloc(a) kmalloc(a, GFP_KERNEL)
# define printf printk
int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data);
int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data);
int izo_set_fileid(struct file *dir, struct izo_ioctl_data *data);
#else /* __KERNEL__ */
# include <stdlib.h>
# include <stdio.h>
# include <sys/types.h>
# include <sys/ioctl.h>
# include <string.h>
# define printk printf
# ifndef CERROR
# define CERROR printf
# endif
# define kmalloc(a,b) malloc(a)
void init_fsreintdata (void);
int kml_fsreint(struct kml_rec *rec, char *basedir);
int kml_iocreint(__u32 size, char *ptr, __u32 offset, int dird,
uuid_t uuid, __u32 generate_kml);
static inline void izo_ioctl_init(struct izo_ioctl_data *data)
{
memset(data, 0, sizeof(*data));
data->ioc_len = sizeof(*data);
data->ioc_version = IZO_IOCTL_VERSION;
}
static inline int
izo_ioctl_pack(struct izo_ioctl_data *data, char **pbuf, int max)
{
char *ptr;
struct izo_ioctl_data *overlay;
data->ioc_len = izo_ioctl_packlen(data);
data->ioc_version = IZO_IOCTL_VERSION;
if (*pbuf && izo_ioctl_packlen(data) > max)
return 1;
if (*pbuf == NULL)
*pbuf = malloc(data->ioc_len);
if (*pbuf == NULL)
return 1;
overlay = (struct izo_ioctl_data *)*pbuf;
memcpy(*pbuf, data, sizeof(*data));
ptr = overlay->ioc_bulk;
if (data->ioc_inlbuf1)
LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
if (data->ioc_inlbuf2)
LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
if (izo_ioctl_is_invalid(overlay))
return 1;
return 0;
}
#endif /* __KERNEL__*/
#define IZO_ERROR_NAME 1
#define IZO_ERROR_UPDATE 2
#define IZO_ERROR_DELETE 3
#define IZO_ERROR_RENAME 4
static inline char *izo_error(int err)
{
#ifndef __KERNEL__
if (err <= 0)
return strerror(-err);
#endif
switch (err) {
case IZO_ERROR_NAME:
return "InterMezzo name/name conflict";
case IZO_ERROR_UPDATE:
return "InterMezzo update/update conflict";
case IZO_ERROR_DELETE:
return "InterMezzo update/delete conflict";
case IZO_ERROR_RENAME:
return "InterMezzo rename/rename conflict";
}
return "Unknown InterMezzo error";
}
/* kml_unpack.c */
char *kml_print_rec(struct kml_rec *rec, int brief);
int kml_unpack(struct kml_rec *rec, char **buf, char *end);
/* fs 2.5 compat */
/* is_read_only() is replaced by bdev_read_only which takes struct
block_device *. Since this is only needed for debugging, it can be
safely ignored now.
*/
#define is_read_only(dev) 0
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
* Copyright (C) 2001 Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __INTERMEZZO_IDL_H__
#define __INTERMEZZO_IDL_H__
#include <linux/ioctl.h>
#include <linux/types.h>
/* this file contains all data structures used in InterMezzo's interfaces:
* - upcalls
* - ioctl's
* - KML records
* - RCVD records
* - rpc's
*/
/* UPCALL */
#define INTERMEZZO_MINOR 248
#define IZO_UPC_VERSION 0x00010002
#define IZO_UPC_PERMIT 1
#define IZO_UPC_CONNECT 2
#define IZO_UPC_GO_FETCH_KML 3
#define IZO_UPC_OPEN 4
#define IZO_UPC_REVOKE_PERMIT 5
#define IZO_UPC_KML 6
#define IZO_UPC_BACKFETCH 7
#define IZO_UPC_KML_TRUNC 8
#define IZO_UPC_SET_KMLSIZE 9
#define IZO_UPC_BRANCH_UNDO 10
#define IZO_UPC_BRANCH_REDO 11
#define IZO_UPC_GET_FILEID 12
#define IZO_UPC_CLIENT_MAKE_BRANCH 13
#define IZO_UPC_SERVER_MAKE_BRANCH 14
#define IZO_UPC_REPSTATUS 15
#define IZO_UPC_LARGEST_OPCODE 15
struct izo_upcall_hdr {
__u32 u_len;
__u32 u_version;
__u32 u_opc;
__u32 u_uniq;
__u32 u_pid;
__u32 u_uid;
__u32 u_pathlen;
__u32 u_fsetlen;
__u64 u_offset;
__u64 u_length;
__u32 u_first_recno;
__u32 u_last_recno;
__u32 u_async;
__u32 u_reclen;
__u8 u_uuid[16];
};
/* This structure _must_ sit at the beginning of the buffer */
struct izo_upcall_resp {
__u32 opcode;
__u32 unique;
__u32 result;
};
/* IOCTL */
#define IZO_IOCTL_VERSION 0x00010003
/* maximum size supported for ioc_pbuf1 */
#define KML_MAX_BUF (64*1024)
struct izo_ioctl_hdr {
__u32 ioc_len;
__u32 ioc_version;
};
struct izo_ioctl_data {
__u32 ioc_len;
__u32 ioc_version;
__u32 ioc_izodev;
__u32 ioc_kmlrecno;
__u64 ioc_kmlsize;
__u32 ioc_flags;
__s32 ioc_inofd;
__u64 ioc_ino;
__u64 ioc_generation;
__u32 ioc_mark_what;
__u32 ioc_and_flag;
__u32 ioc_or_flag;
__u32 ioc_dev;
__u32 ioc_offset;
__u32 ioc_slot;
__u64 ioc_uid;
__u8 ioc_uuid[16];
__u32 ioc_inllen1; /* path */
char *ioc_inlbuf1;
__u32 ioc_inllen2; /* fileset */
char *ioc_inlbuf2;
__u32 ioc_plen1; /* buffers in user space (KML) */
char *ioc_pbuf1;
__u32 ioc_plen2; /* buffers in user space (KML) */
char *ioc_pbuf2;
char ioc_bulk[0];
};
#define IZO_IOC_DEVICE _IOW ('p',0x50, void *)
#define IZO_IOC_REINTKML _IOW ('p',0x51, void *)
#define IZO_IOC_GET_RCVD _IOW ('p',0x52, void *)
#define IZO_IOC_SET_IOCTL_UID _IOW ('p',0x53, void *)
#define IZO_IOC_GET_KML_SIZE _IOW ('p',0x54, void *)
#define IZO_IOC_PURGE_FILE_DATA _IOW ('p',0x55, void *)
#define IZO_IOC_CONNECT _IOW ('p',0x56, void *)
#define IZO_IOC_GO_FETCH_KML _IOW ('p',0x57, void *)
#define IZO_IOC_MARK _IOW ('p',0x58, void *)
#define IZO_IOC_CLEAR_FSET _IOW ('p',0x59, void *)
#define IZO_IOC_CLEAR_ALL_FSETS _IOW ('p',0x60, void *)
#define IZO_IOC_SET_FSET _IOW ('p',0x61, void *)
#define IZO_IOC_REVOKE_PERMIT _IOW ('p',0x62, void *)
#define IZO_IOC_SET_KMLSIZE _IOW ('p',0x63, void *)
#define IZO_IOC_CLIENT_MAKE_BRANCH _IOW ('p',0x64, void *)
#define IZO_IOC_SERVER_MAKE_BRANCH _IOW ('p',0x65, void *)
#define IZO_IOC_BRANCH_UNDO _IOW ('p',0x66, void *)
#define IZO_IOC_BRANCH_REDO _IOW ('p',0x67, void *)
#define IZO_IOC_SET_PID _IOW ('p',0x68, void *)
#define IZO_IOC_SET_CHANNEL _IOW ('p',0x69, void *)
#define IZO_IOC_GET_CHANNEL _IOW ('p',0x70, void *)
#define IZO_IOC_GET_FILEID _IOW ('p',0x71, void *)
#define IZO_IOC_ADJUST_LML _IOW ('p',0x72, void *)
#define IZO_IOC_SET_FILEID _IOW ('p',0x73, void *)
#define IZO_IOC_REPSTATUS _IOW ('p',0x74, void *)
/* marking flags for fsets */
#define FSET_CLIENT_RO 0x00000001
#define FSET_LENTO_RO 0x00000002
#define FSET_HASPERMIT 0x00000004 /* we have a permit to WB */
#define FSET_INSYNC 0x00000008 /* this fileset is in sync */
#define FSET_PERMIT_WAITING 0x00000010 /* Lento is waiting for permit */
#define FSET_STEAL_PERMIT 0x00000020 /* take permit if Lento is dead */
#define FSET_JCLOSE_ON_WRITE 0x00000040 /* Journal closes on writes */
#define FSET_DATA_ON_DEMAND 0x00000080 /* update data on file_open() */
#define FSET_PERMIT_EXCLUSIVE 0x00000100 /* only one permitholder allowed */
#define FSET_HAS_BRANCHES 0x00000200 /* this fileset contains branches */
#define FSET_IS_BRANCH 0x00000400 /* this fileset is a branch */
#define FSET_FLAT_BRANCH 0x00000800 /* this fileset is ROOT with branches */
/* what to mark indicator (ioctl parameter) */
#define MARK_DENTRY 101
#define MARK_FSET 102
#define MARK_CACHE 103
#define MARK_GETFL 104
/* KML */
#define KML_MAJOR_VERSION 0x00010000
#define KML_MINOR_VERSION 0x00000002
#define KML_OPCODE_NOOP 0
#define KML_OPCODE_CREATE 1
#define KML_OPCODE_MKDIR 2
#define KML_OPCODE_UNLINK 3
#define KML_OPCODE_RMDIR 4
#define KML_OPCODE_CLOSE 5
#define KML_OPCODE_SYMLINK 6
#define KML_OPCODE_RENAME 7
#define KML_OPCODE_SETATTR 8
#define KML_OPCODE_LINK 9
#define KML_OPCODE_OPEN 10
#define KML_OPCODE_MKNOD 11
#define KML_OPCODE_WRITE 12
#define KML_OPCODE_RELEASE 13
#define KML_OPCODE_TRUNC 14
#define KML_OPCODE_SETEXTATTR 15
#define KML_OPCODE_DELEXTATTR 16
#define KML_OPCODE_KML_TRUNC 17
#define KML_OPCODE_GET_FILEID 18
#define KML_OPCODE_NUM 19
/* new stuff */
struct presto_version {
__u32 pv_mtime_sec;
__u32 pv_mtime_nsec;
__u32 pv_ctime_sec;
__u32 pv_ctime_nsec;
__u64 pv_size;
};
struct kml_prefix_hdr {
__u32 len;
__u32 version;
__u32 pid;
__u32 auid;
__u32 fsuid;
__u32 fsgid;
__u32 opcode;
__u32 ngroups;
};
struct kml_prefix {
struct kml_prefix_hdr *hdr;
__u32 *groups;
};
struct kml_suffix {
__u32 prevrec;
__u32 recno;
__u32 time;
__u32 len;
};
struct kml_rec {
char *buf;
struct kml_prefix prefix;
__u64 offset;
char *path;
int pathlen;
char *name;
int namelen;
char *target;
int targetlen;
struct presto_version *old_objectv;
struct presto_version *new_objectv;
struct presto_version *old_parentv;
struct presto_version *new_parentv;
struct presto_version *old_targetv;
struct presto_version *new_targetv;
__u32 valid;
__u32 mode;
__u32 uid;
__u32 gid;
__u64 size;
__u32 mtime_sec;
__u32 mtime_nsec;
__u32 ctime_sec;
__u32 ctime_nsec;
__u32 flags;
__u32 ino;
__u32 rdev;
__u32 major;
__u32 minor;
__u32 generation;
__u32 old_mode;
__u32 old_rdev;
__u64 old_uid;
__u64 old_gid;
char *old_target;
int old_targetlen;
struct kml_suffix *suffix;
};
/* RCVD */
/* izo_rcvd_rec fills the .intermezzo/fset/last_rcvd file and provides data about
* our view of reintegration offsets for a given peer.
*
* The only exception is the last_rcvd record which has a UUID consisting of all
* zeroes; this record's lr_local_offset field is the logical byte offset of our
* KML, which is updated when KML truncation takes place. All other fields are
* reserved. */
/* XXX - document how clean shutdowns are recorded */
struct izo_rcvd_rec {
__u8 lr_uuid[16]; /* which peer? */
__u64 lr_remote_recno; /* last confirmed remote recno */
__u64 lr_remote_offset; /* last confirmed remote offset */
__u64 lr_local_recno; /* last locally reinted recno */
__u64 lr_local_offset; /* last locally reinted offset */
__u64 lr_last_ctime; /* the largest ctime that has reintegrated */
};
/* Cache purge database
*
* Each DB entry is this structure followed by the path name, no trailing NUL. */
struct izo_purge_entry {
__u64 p_atime;
__u32 p_pathlen;
};
/* RPC */
#endif
#ifndef __PRESTO_JOURNAL_H
#define __PRESTO_JOURNAL_H
struct journal_prefix {
int len;
u32 version;
int pid;
int uid;
int fsuid;
int fsgid;
int opcode;
u32 ngroups;
u32 groups[0];
};
struct journal_suffix {
unsigned long prevrec; /* offset of previous record for dentry */
int recno;
int time;
int len;
};
#endif
#ifndef __INTERMEZZO_KML_H
#define __INTERMEZZO_KML_H
#include "intermezzo_psdev.h"
#include <linux/fs.h>
#include "intermezzo_journal.h"
#define PRESTO_KML_MAJOR_VERSION 0x00010000
#define PRESTO_KML_MINOR_VERSION 0x00002001
#define PRESTO_OP_NOOP 0
#define PRESTO_OP_CREATE 1
#define PRESTO_OP_MKDIR 2
#define PRESTO_OP_UNLINK 3
#define PRESTO_OP_RMDIR 4
#define PRESTO_OP_CLOSE 5
#define PRESTO_OP_SYMLINK 6
#define PRESTO_OP_RENAME 7
#define PRESTO_OP_SETATTR 8
#define PRESTO_OP_LINK 9
#define PRESTO_OP_OPEN 10
#define PRESTO_OP_MKNOD 11
#define PRESTO_OP_WRITE 12
#define PRESTO_OP_RELEASE 13
#define PRESTO_OP_TRUNC 14
#define PRESTO_OP_SETEXTATTR 15
#define PRESTO_OP_DELEXTATTR 16
#define PRESTO_LML_DONE 1 /* flag to get first write to do LML */
#define KML_KOP_MARK 0xffff
struct presto_lml_data {
loff_t rec_offset;
};
struct big_journal_prefix {
u32 len;
u32 version;
u32 pid;
u32 uid;
u32 fsuid;
u32 fsgid;
u32 opcode;
u32 ngroups;
u32 groups[NGROUPS_SMALL];
};
enum kml_opcode {
KML_CREATE = 1,
KML_MKDIR,
KML_UNLINK,
KML_RMDIR,
KML_CLOSE,
KML_SYMLINK,
KML_RENAME,
KML_SETATTR,
KML_LINK,
KML_OPEN,
KML_MKNOD,
KML_ENDMARK = 0xff
};
struct kml_create {
char *path;
struct presto_version new_objectv,
old_parentv,
new_parentv;
int mode;
int uid;
int gid;
};
struct kml_open {
};
struct kml_mkdir {
char *path;
struct presto_version new_objectv,
old_parentv,
new_parentv;
int mode;
int uid;
int gid;
};
struct kml_unlink {
char *path,
*name;
struct presto_version old_tgtv,
old_parentv,
new_parentv;
};
struct kml_rmdir {
char *path,
*name;
struct presto_version old_tgtv,
old_parentv,
new_parentv;
};
struct kml_close {
int open_mode,
open_uid,
open_gid;
char *path;
struct presto_version new_objectv;
__u64 ino;
int generation;
};
struct kml_symlink {
char *sourcepath,
*targetpath;
struct presto_version new_objectv,
old_parentv,
new_parentv;
int uid;
int gid;
};
struct kml_rename {
char *sourcepath,
*targetpath;
struct presto_version old_objectv,
new_objectv,
old_tgtv,
new_tgtv;
};
struct kml_setattr {
char *path;
struct presto_version old_objectv;
struct iattr iattr;
};
struct kml_link {
char *sourcepath,
*targetpath;
struct presto_version new_objectv,
old_parentv,
new_parentv;
};
struct kml_mknod {
char *path;
struct presto_version new_objectv,
old_parentv,
new_parentv;
int mode;
int uid;
int gid;
int major;
int minor;
};
/* kml record items for optimizing */
struct kml_kop_node
{
u32 kml_recno;
u32 kml_flag;
u32 kml_op;
nlink_t i_nlink;
u32 i_ino;
};
struct kml_kop_lnode
{
struct list_head chains;
struct kml_kop_node node;
};
struct kml_endmark {
u32 total;
struct kml_kop_node *kop;
};
/* kml_flag */
#define KML_REC_DELETE 1
#define KML_REC_EXIST 0
struct kml_optimize {
struct list_head kml_chains;
u32 kml_flag;
u32 kml_op;
nlink_t i_nlink;
u32 i_ino;
};
struct kml_rec {
/* attribute of this record */
int rec_size;
int rec_kml_offset;
struct big_journal_prefix rec_head;
union {
struct kml_create create;
struct kml_open open;
struct kml_mkdir mkdir;
struct kml_unlink unlink;
struct kml_rmdir rmdir;
struct kml_close close;
struct kml_symlink symlink;
struct kml_rename rename;
struct kml_setattr setattr;
struct kml_mknod mknod;
struct kml_link link;
struct kml_endmark endmark;
} rec_kml;
struct journal_suffix rec_tail;
/* for kml optimize only */
struct kml_optimize kml_optimize;
};
/* kml record items for optimizing */
extern void kml_kop_init (struct presto_file_set *fset);
extern void kml_kop_addrec (struct presto_file_set *fset,
struct inode *ino, u32 op, u32 flag);
extern int kml_kop_flush (struct presto_file_set *fset);
/* defined in kml_setup.c */
extern int kml_init (struct presto_file_set *fset);
extern int kml_cleanup (struct presto_file_set *fset);
/* defined in kml.c */
extern int begin_kml_reint (struct file *file, unsigned long arg);
extern int do_kml_reint (struct file *file, unsigned long arg);
extern int end_kml_reint (struct file *file, unsigned long arg);
/* kml_utils.c */
extern char *dlogit (void *tbuf, const void *sbuf, int size);
extern char * bdup_printf (char *format, ...);
/* defined in kml_decode.c */
/* printop */
#define PRINT_KML_PREFIX 0x1
#define PRINT_KML_SUFFIX 0x2
#define PRINT_KML_REC 0x4
#define PRINT_KML_OPTIMIZE 0x8
#define PRINT_KML_EXIST 0x10
#define PRINT_KML_DELETE 0x20
extern void kml_printrec (struct kml_rec *rec, int printop);
extern int print_allkmlrec (struct list_head *head, int printop);
extern int delete_kmlrec (struct list_head *head);
extern int kml_decoderec (char *buf, int pos, int buflen, int *size,
struct kml_rec **newrec);
extern int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen);
extern void kml_freerec (struct kml_rec *rec);
/* defined in kml_reint.c */
#define KML_CLOSE_BACKFETCH 1
extern int kml_reintbuf (struct kml_fsdata *kml_fsdata,
char *mtpt, struct kml_rec **rec);
/* defined in kml_setup.c */
extern int kml_init (struct presto_file_set *fset);
extern int kml_cleanup (struct presto_file_set *fset);
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Data structures unpacking/packing macros & inlines
*
*/
#ifndef _INTERMEZZO_LIB_H
#define _INTERMEZZO_LIB_H
#ifdef __KERNEL__
# include <linux/types.h>
#else
# include <string.h>
# include <sys/types.h>
#endif
static inline int size_round (int val)
{
return (val + 3) & (~0x3);
}
static inline int size_round0(int val)
{
if (!val)
return 0;
return (val + 1 + 3) & (~0x3);
}
static inline size_t round_strlen(char *fset)
{
return size_round(strlen(fset) + 1);
}
#ifdef __KERNEL__
# define NTOH__u32(var) le32_to_cpu(var)
# define NTOH__u64(var) le64_to_cpu(var)
# define HTON__u32(var) cpu_to_le32(var)
# define HTON__u64(var) cpu_to_le64(var)
#else
# include <glib.h>
# define NTOH__u32(var) GUINT32_FROM_LE(var)
# define NTOH__u64(var) GUINT64_FROM_LE(var)
# define HTON__u32(var) GUINT32_TO_LE(var)
# define HTON__u64(var) GUINT64_TO_LE(var)
#endif
/*
* copy sizeof(type) bytes from pointer to var and move ptr forward.
* return EFAULT if pointer goes beyond end
*/
#define UNLOGV(var,type,ptr,end) \
do { \
var = *(type *)ptr; \
ptr += sizeof(type); \
if (ptr > end ) \
return -EFAULT; \
} while (0)
/* the following two macros convert to little endian */
/* type MUST be __u32 or __u64 */
#define LUNLOGV(var,type,ptr,end) \
do { \
var = NTOH##type(*(type *)ptr); \
ptr += sizeof(type); \
if (ptr > end ) \
return -EFAULT; \
} while (0)
/* now log values */
#define LOGV(var,type,ptr) \
do { \
*((type *)ptr) = var; \
ptr += sizeof(type); \
} while (0)
/* and in network order */
#define LLOGV(var,type,ptr) \
do { \
*((type *)ptr) = HTON##type(var); \
ptr += sizeof(type); \
} while (0)
/*
* set var to point at (type *)ptr, move ptr forward with sizeof(type)
* return from function with EFAULT if ptr goes beyond end
*/
#define UNLOGP(var,type,ptr,end) \
do { \
var = (type *)ptr; \
ptr += sizeof(type); \
if (ptr > end ) \
return -EFAULT; \
} while (0)
#define LOGP(var,type,ptr) \
do { \
memcpy(ptr, var, sizeof(type)); \
ptr += sizeof(type); \
} while (0)
/*
* set var to point at (char *)ptr, move ptr forward by size_round(len);
* return from function with EFAULT if ptr goes beyond end
*/
#define UNLOGL(var,type,len,ptr,end) \
do { \
if (len == 0) \
var = (type *)0; \
else { \
var = (type *)ptr; \
ptr += size_round(len * sizeof(type)); \
} \
if (ptr > end ) \
return -EFAULT; \
} while (0)
#define UNLOGL0(var,type,len,ptr,end) \
do { \
UNLOGL(var,type,len+1,ptr,end); \
if ( *((char *)ptr - size_round(len+1) + len) != '\0') \
return -EFAULT; \
} while (0)
#define LOGL(var,len,ptr) \
do { \
size_t __fill = size_round(len); \
/* Prevent data leakage. */ \
if (__fill > 0) \
memset((char *)ptr, 0, __fill); \
memcpy((char *)ptr, (const char *)var, len); \
ptr += __fill; \
} while (0)
#define LOGL0(var,len,ptr) \
do { \
if (!len) break; \
memcpy((char *)ptr, (const char *)var, len); \
*((char *)(ptr) + len) = 0; \
ptr += size_round(len + 1); \
} while (0)
#endif /* _INTERMEZZO_LIB_H */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
#ifndef __PRESTO_PSDEV_H
#define __PRESTO_PSDEV_H
#define MAX_CHANNEL 16
#define PROCNAME_SIZE 32
#include <linux/smp_lock.h>
/* represents state of an instance reached with /dev/intermezzo */
/* communication pending & processing queues */
struct upc_channel {
unsigned int uc_seq;
wait_queue_head_t uc_waitq; /* Lento wait queue */
struct list_head uc_pending;
struct list_head uc_processing;
spinlock_t uc_lock;
int uc_pid; /* Lento's pid */
int uc_hard; /* allows signals during upcalls */
int uc_no_filter;
int uc_no_journal;
int uc_no_upcall;
int uc_timeout; /* . sec: signals will dequeue upc */
long uc_errorval; /* for testing I/O failures */
struct list_head uc_cache_list;
int uc_minor;
};
#define ISLENTO(minor) (current->pid == izo_channels[minor].uc_pid \
|| current->real_parent->pid == izo_channels[minor].uc_pid \
|| current->real_parent->real_parent->pid == izo_channels[minor].uc_pid)
extern struct upc_channel izo_channels[MAX_CHANNEL];
/* message types between presto filesystem in kernel */
#define REQ_READ 1
#define REQ_WRITE 2
#define REQ_ASYNC 4
#define REQ_DEAD 8
struct upc_req {
struct list_head rq_chain;
caddr_t rq_data;
int rq_flags;
int rq_bufsize;
int rq_rep_size;
int rq_opcode; /* copied from data to save lookup */
int rq_unique;
wait_queue_head_t rq_sleep; /* process' wait queue */
unsigned long rq_posttime;
};
#endif
/*
* Based on cfs.h from Coda, but revamped for increased simplicity.
* Linux modifications by Peter Braam, Aug 1996
* Rewritten for InterMezzo
*/
#ifndef _PRESTO_HEADER_
#define _PRESTO_HEADER_
/* upcall.c */
#define SYNCHRONOUS 0
#define ASYNCHRONOUS 1
int lento_permit(int minor, int pathlen, int fsetnamelen, char *path, char *fset);
int lento_opendir(int minor, int pathlen, char *path, int async);
int lento_kml(int minor, unsigned int offset, unsigned int first_recno,
unsigned int length, unsigned int last_recno, int namelen,
char *fsetname);
int lento_open(int minor, int pathlen, char *path);
int lento_journal(int minor, char *page, int async);
int lento_release_permit(int minor, int cookie);
/*
* Kernel <--> Lento communications.
*/
/* upcalls */
#define LENTO_PERMIT 1
#define LENTO_JOURNAL 2
#define LENTO_OPENDIR 3
#define LENTO_OPEN 4
#define LENTO_SIGNAL 5
#define LENTO_KML 6
#define LENTO_COOKIE 7
/* Lento <-> Presto RPC arguments */
struct lento_up_hdr {
unsigned int opcode;
unsigned int unique; /* Keep multiple outstanding msgs distinct */
u_short pid; /* Common to all */
u_short uid;
};
/* This structure _must_ sit at the beginning of the buffer */
struct lento_down_hdr {
unsigned int opcode;
unsigned int unique;
unsigned int result;
};
/* lento_permit: */
struct lento_permit_in {
struct lento_up_hdr uh;
int pathlen;
int fsetnamelen;
char path[0];
};
struct lento_permit_out {
struct lento_down_hdr dh;
};
/* lento_opendir: */
struct lento_opendir_in {
struct lento_up_hdr uh;
int async;
int pathlen;
char path[0];
};
struct lento_opendir_out {
struct lento_down_hdr dh;
};
/* lento_kml: */
struct lento_kml_in {
struct lento_up_hdr uh;
unsigned int offset;
unsigned int first_recno;
unsigned int length;
unsigned int last_recno;
int namelen;
char fsetname[0];
};
struct lento_kml_out {
struct lento_down_hdr dh;
};
/* lento_open: */
struct lento_open_in {
struct lento_up_hdr uh;
int pathlen;
char path[0];
};
struct lento_open_out {
struct lento_down_hdr dh;
};
/* lento_response_cookie */
struct lento_response_cookie_in {
struct lento_up_hdr uh;
int cookie;
};
struct lento_response_cookie_out {
struct lento_down_hdr dh;
};
struct lento_mknod {
struct lento_down_hdr dh;
int major;
int minor;
int mode;
char path[0];
};
/* NB: every struct below begins with an up_hdr */
union up_args {
struct lento_up_hdr uh;
struct lento_permit_in lento_permit;
struct lento_open_in lento_open;
struct lento_opendir_in lento_opendir;
struct lento_kml_in lento_kml;
struct lento_response_cookie_in lento_response_cookie;
};
union down_args {
struct lento_down_hdr dh;
struct lento_permit_out lento_permit;
struct lento_open_out lento_open;
struct lento_opendir_out lento_opendir;
struct lento_kml_out lento_kml;
struct lento_response_cookie_out lento_response_cookie;
};
#include "intermezzo_psdev.h"
int lento_upcall(int minor, int read_size, int *rep_size,
union up_args *buffer, int async,
struct upc_req *rq );
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam
* Copyright (C) 2001 Cluster File Systems, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* Support for journalling extended attributes
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/time.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
struct presto_reservation_data {
unsigned int ri_recno;
loff_t ri_offset;
loff_t ri_size;
struct list_head ri_list;
};
/*
* Locking Semantics
*
* write lock in struct presto_log_fd:
* - name: fd_lock
* - required for: accessing any field in a presto_log_fd
* - may not be held across I/O
* -
*
*/
/*
* reserve record space and/or atomically request state of the log
* rec will hold the location reserved record upon return
* this reservation will be placed in the queue
*/
static void presto_reserve_record(struct presto_file_set *fset,
struct presto_log_fd *fd,
struct rec_info *rec,
struct presto_reservation_data *rd)
{
int chunked_record = 0;
ENTRY;
write_lock(&fd->fd_lock);
if ( rec->is_kml ) {
int chunk = 1 << fset->fset_chunkbits;
int chunk_mask = ~(chunk -1);
loff_t boundary;
boundary = (fd->fd_offset + chunk - 1) & chunk_mask;
if ( fd->fd_offset + rec->size >= boundary ) {
chunked_record = 1;
fd->fd_offset = boundary;
}
}
fd->fd_recno++;
/* this moves the fd_offset back after truncation */
if ( list_empty(&fd->fd_reservations) &&
!chunked_record) {
fd->fd_offset = fd->fd_file->f_dentry->d_inode->i_size;
}
rec->offset = fd->fd_offset;
if (rec->is_kml)
rec->offset += fset->fset_kml_logical_off;
rec->recno = fd->fd_recno;
/* add the reservation data to the end of the list */
rd->ri_offset = fd->fd_offset;
rd->ri_size = rec->size;
rd->ri_recno = rec->recno;
list_add(&rd->ri_list, fd->fd_reservations.prev);
fd->fd_offset += rec->size;
write_unlock(&fd->fd_lock);
EXIT;
}
static inline void presto_release_record(struct presto_log_fd *fd,
struct presto_reservation_data *rd)
{
write_lock(&fd->fd_lock);
list_del(&rd->ri_list);
write_unlock(&fd->fd_lock);
}
/* XXX should we ask for do_truncate to be exported? */
int izo_do_truncate(struct presto_file_set *fset, struct dentry *dentry,
loff_t length, loff_t size_check)
{
struct inode *inode = dentry->d_inode;
int error;
struct iattr newattrs;
ENTRY;
if (length < 0) {
EXIT;
return -EINVAL;
}
down(&inode->i_sem);
lock_kernel();
if (size_check != inode->i_size) {
unlock_kernel();
up(&inode->i_sem);
EXIT;
return -EALREADY;
}
newattrs.ia_size = length;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
if (inode->i_op && inode->i_op->setattr)
error = inode->i_op->setattr(dentry, &newattrs);
else {
inode_setattr(dentry->d_inode, &newattrs);
error = 0;
}
unlock_kernel();
up(&inode->i_sem);
EXIT;
return error;
}
static void presto_kml_truncate(struct presto_file_set *fset)
{
int rc;
ENTRY;
write_lock(&fset->fset_kml.fd_lock);
if (fset->fset_kml.fd_truncating == 1 ) {
write_unlock(&fset->fset_kml.fd_lock);
EXIT;
return;
}
fset->fset_kml.fd_truncating = 1;
write_unlock(&fset->fset_kml.fd_lock);
CERROR("islento: %d, count: %d\n",
ISLENTO(presto_i2m(fset->fset_dentry->d_inode)),
fset->fset_permit_count);
rc = izo_upc_kml_truncate(fset->fset_cache->cache_psdev->uc_minor,
fset->fset_lento_off, fset->fset_lento_recno,
fset->fset_name);
/* Userspace is the only permitholder now, and will retain an exclusive
* hold on the permit until KML truncation completes. */
/* FIXME: double check this code path now that the precise semantics of
* fset->fset_permit_count have changed. */
if (rc != 0) {
write_lock(&fset->fset_kml.fd_lock);
fset->fset_kml.fd_truncating = 0;
write_unlock(&fset->fset_kml.fd_lock);
}
EXIT;
}
void *presto_trans_start(struct presto_file_set *fset, struct inode *inode,
int op)
{
ENTRY;
if ( !fset->fset_cache->cache_filter->o_trops ) {
EXIT;
return NULL;
}
EXIT;
return fset->fset_cache->cache_filter->o_trops->tr_start
(fset, inode, op);
}
void presto_trans_commit(struct presto_file_set *fset, void *handle)
{
ENTRY;
if (!fset->fset_cache->cache_filter->o_trops ) {
EXIT;
return;
}
fset->fset_cache->cache_filter->o_trops->tr_commit(fset, handle);
/* Check to see if the KML needs truncated. */
if (fset->kml_truncate_size > 0 &&
!fset->fset_kml.fd_truncating &&
fset->fset_kml.fd_offset > fset->kml_truncate_size) {
CDEBUG(D_JOURNAL, "kml size: %lu; truncating\n",
(unsigned long)fset->fset_kml.fd_offset);
presto_kml_truncate(fset);
}
EXIT;
}
inline int presto_no_journal(struct presto_file_set *fset)
{
int minor = fset->fset_cache->cache_psdev->uc_minor;
return izo_channels[minor].uc_no_journal;
}
#define size_round(x) (((x)+3) & ~0x3)
#define BUFF_FREE(buf) PRESTO_FREE(buf, PAGE_SIZE)
#define BUFF_ALLOC(newbuf, oldbuf) \
PRESTO_ALLOC(newbuf, PAGE_SIZE); \
if ( !newbuf ) { \
if (oldbuf) \
BUFF_FREE(oldbuf); \
return -ENOMEM; \
}
/*
* "buflen" should be PAGE_SIZE or more.
* Give relative path wrt to a fsetroot
*/
char * presto_path(struct dentry *dentry, struct dentry *root,
char *buffer, int buflen)
{
char * end = buffer+buflen;
char * retval;
*--end = '\0';
buflen--;
if (dentry->d_parent != dentry && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
}
/* Get '/' right */
retval = end-1;
*retval = '/';
for (;;) {
struct dentry * parent;
int namelen;
if (dentry == root)
break;
parent = dentry->d_parent;
if (dentry == parent)
break;
namelen = dentry->d_name.len;
buflen -= namelen + 1;
if (buflen < 0)
break;
end -= namelen;
memcpy(end, dentry->d_name.name, namelen);
*--end = '/';
retval = end;
dentry = parent;
}
return retval;
}
static inline char *logit(char *buf, const void *value, int size)
{
char *ptr = (char *)value;
memcpy(buf, ptr, size);
buf += size;
return buf;
}
static inline char *
journal_log_prefix_with_groups_and_ids(char *buf, int opcode,
struct rec_info *rec,
__u32 ngroups, gid_t *groups,
__u32 fsuid, __u32 fsgid)
{
struct kml_prefix_hdr p;
u32 loggroups[NGROUPS_SMALL];
int i;
p.len = cpu_to_le32(rec->size);
p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION;
p.pid = cpu_to_le32(current->pid);
p.auid = cpu_to_le32(current->uid);
p.fsuid = cpu_to_le32(fsuid);
p.fsgid = cpu_to_le32(fsgid);
p.ngroups = cpu_to_le32(ngroups);
p.opcode = cpu_to_le32(opcode);
for (i=0 ; i < ngroups ; i++)
loggroups[i] = cpu_to_le32((__u32) groups[i]);
buf = logit(buf, &p, sizeof(struct kml_prefix_hdr));
buf = logit(buf, &loggroups, sizeof(__u32) * ngroups);
return buf;
}
static inline char *
journal_log_prefix(char *buf, int opcode, struct rec_info *rec)
{
__u32 groups[NGROUPS_SMALL];
int i;
/* convert 16 bit gid's to 32 bit gid's */
for (i=0; i<current->group_info->ngroups; i++)
groups[i] = GROUP_AT(current->group_info,i);
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
(__u32)current->group_info->ngroups,
groups,
(__u32)current->fsuid,
(__u32)current->fsgid);
}
static inline char *
journal_log_prefix_with_groups(char *buf, int opcode, struct rec_info *rec,
__u32 ngroups, gid_t *groups)
{
return journal_log_prefix_with_groups_and_ids(buf, opcode, rec,
ngroups, groups,
(__u32)current->fsuid,
(__u32)current->fsgid);
}
static inline char *log_dentry_version(char *buf, struct dentry *dentry)
{
struct presto_version version;
presto_getversion(&version, dentry->d_inode);
version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
version.pv_size = HTON__u64(version.pv_size);
return logit(buf, &version, sizeof(version));
}
static inline char *log_version(char *buf, struct presto_version *pv)
{
struct presto_version version;
memcpy(&version, pv, sizeof(version));
version.pv_mtime_sec = HTON__u32(version.pv_mtime_sec);
version.pv_mtime_nsec = HTON__u32(version.pv_mtime_nsec);
version.pv_ctime_sec = HTON__u32(version.pv_ctime_sec);
version.pv_ctime_nsec = HTON__u32(version.pv_ctime_nsec);
version.pv_size = HTON__u64(version.pv_size);
return logit(buf, &version, sizeof(version));
}
static inline char *log_rollback(char *buf, struct izo_rollback_data *rb)
{
struct izo_rollback_data rollback;
rollback.rb_mode = HTON__u32(rb->rb_mode);
rollback.rb_rdev = HTON__u32(rb->rb_rdev);
rollback.rb_uid = HTON__u64(rb->rb_uid);
rollback.rb_gid = HTON__u64(rb->rb_gid);
return logit(buf, &rollback, sizeof(rollback));
}
static inline char *journal_log_suffix(char *buf, char *log,
struct presto_file_set *fset,
struct dentry *dentry,
struct rec_info *rec)
{
struct kml_suffix s;
struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log;
#if 0
/* XXX needs to be done after reservation,
disable ths until version 1.2 */
if ( dentry ) {
s.prevrec = cpu_to_le32(rec->offset -
presto_d2d(dentry)->dd_kml_offset);
presto_d2d(dentry)->dd_kml_offset = rec->offset;
} else {
s.prevrec = -1;
}
#endif
s.prevrec = 0;
/* record number needs to be filled in after reservation
s.recno = cpu_to_le32(rec->recno); */
s.time = cpu_to_le32(get_seconds());
s.len = p->len;
return logit(buf, &s, sizeof(s));
}
int izo_log_close(struct presto_log_fd *logfd)
{
int rc = 0;
if (logfd->fd_file) {
rc = filp_close(logfd->fd_file, 0);
logfd->fd_file = NULL;
} else
CERROR("InterMezzo: %s: no filp\n", __FUNCTION__);
if (rc != 0)
CERROR("InterMezzo: close files: filp won't close: %d\n", rc);
return rc;
}
int presto_fwrite(struct file *file, const char *str, int len, loff_t *off)
{
int rc;
mm_segment_t old_fs;
ENTRY;
rc = -EINVAL;
if ( !off ) {
EXIT;
return rc;
}
if ( ! file ) {
EXIT;
return rc;
}
if ( ! file->f_op ) {
EXIT;
return rc;
}
if ( ! file->f_op->write ) {
EXIT;
return rc;
}
old_fs = get_fs();
set_fs(get_ds());
rc = file->f_op->write(file, str, len, off);
if (rc != len) {
CERROR("presto_fwrite: wrote %d bytes instead of "
"%d at %ld\n", rc, len, (long)*off);
rc = -EIO;
}
set_fs(old_fs);
EXIT;
return rc;
}
int presto_fread(struct file *file, char *str, int len, loff_t *off)
{
int rc;
mm_segment_t old_fs;
ENTRY;
if (len > 512)
CERROR("presto_fread: read at %Ld for %d bytes, ino %ld\n",
*off, len, file->f_dentry->d_inode->i_ino);
rc = -EINVAL;
if ( !off ) {
EXIT;
return rc;
}
if ( ! file ) {
EXIT;
return rc;
}
if ( ! file->f_op ) {
EXIT;
return rc;
}
if ( ! file->f_op->read ) {
EXIT;
return rc;
}
old_fs = get_fs();
set_fs(get_ds());
rc = file->f_op->read(file, str, len, off);
if (rc != len) {
CDEBUG(D_FILE, "presto_fread: read %d bytes instead of "
"%d at %Ld\n", rc, len, *off);
rc = -EIO;
}
set_fs(old_fs);
EXIT;
return rc;
}
loff_t presto_kml_offset(struct presto_file_set *fset)
{
unsigned int kml_recno;
struct presto_log_fd *fd = &fset->fset_kml;
loff_t offset;
ENTRY;
write_lock(&fd->fd_lock);
/* Determine the largest valid offset, i.e. up until the first
* reservation held on the file. */
if ( !list_empty(&fd->fd_reservations) ) {
struct presto_reservation_data *rd;
rd = list_entry(fd->fd_reservations.next,
struct presto_reservation_data,
ri_list);
offset = rd->ri_offset;
kml_recno = rd->ri_recno;
} else {
offset = fd->fd_file->f_dentry->d_inode->i_size;
kml_recno = fset->fset_kml.fd_recno;
}
write_unlock(&fd->fd_lock);
return offset;
}
static int presto_kml_dispatch(struct presto_file_set *fset)
{
int rc = 0;
unsigned int kml_recno;
struct presto_log_fd *fd = &fset->fset_kml;
loff_t offset;
ENTRY;
write_lock(&fd->fd_lock);
/* Determine the largest valid offset, i.e. up until the first
* reservation held on the file. */
if ( !list_empty(&fd->fd_reservations) ) {
struct presto_reservation_data *rd;
rd = list_entry(fd->fd_reservations.next,
struct presto_reservation_data,
ri_list);
offset = rd->ri_offset;
kml_recno = rd->ri_recno;
} else {
offset = fd->fd_file->f_dentry->d_inode->i_size;
kml_recno = fset->fset_kml.fd_recno;
}
if ( kml_recno < fset->fset_lento_recno ) {
CERROR("presto_kml_dispatch: smoke is coming\n");
write_unlock(&fd->fd_lock);
EXIT;
return 0;
} else if ( kml_recno == fset->fset_lento_recno ) {
write_unlock(&fd->fd_lock);
EXIT;
return 0;
/* XXX add a further "if" here to delay the KML upcall */
#if 0
} else if ( kml_recno < fset->fset_lento_recno + 100) {
write_unlock(&fd->fd_lock);
EXIT;
return 0;
#endif
}
CDEBUG(D_PIOCTL, "fset: %s\n", fset->fset_name);
rc = izo_upc_kml(fset->fset_cache->cache_psdev->uc_minor,
fset->fset_lento_off, fset->fset_lento_recno,
offset + fset->fset_kml_logical_off, kml_recno,
fset->fset_name);
if ( rc ) {
write_unlock(&fd->fd_lock);
EXIT;
return rc;
}
fset->fset_lento_off = offset;
fset->fset_lento_recno = kml_recno;
write_unlock(&fd->fd_lock);
EXIT;
return 0;
}
int izo_lookup_file(struct presto_file_set *fset, char *path,
struct nameidata *nd)
{
int error = 0;
CDEBUG(D_CACHE, "looking up: %s\n", path);
error = path_lookup(path, LOOKUP_PARENT, nd);
if (error) {
EXIT;
return error;
}
return 0;
}
/* FIXME: this function is a mess of locking and error handling. There's got to
* be a better way. */
static int do_truncate_rename(struct presto_file_set *fset, char *oldname,
char *newname)
{
struct dentry *old_dentry, *new_dentry;
struct nameidata oldnd, newnd;
char *oldpath, *newpath;
int error;
ENTRY;
oldpath = izo_make_path(fset, oldname);
if (oldpath == NULL) {
EXIT;
return -ENOENT;
}
newpath = izo_make_path(fset, newname);
if (newpath == NULL) {
error = -ENOENT;
EXIT;
goto exit;
}
if ((error = izo_lookup_file(fset, oldpath, &oldnd)) != 0) {
EXIT;
goto exit1;
}
if ((error = izo_lookup_file(fset, newpath, &newnd)) != 0) {
EXIT;
goto exit2;
}
lock_rename(newnd.dentry, oldnd.dentry);
old_dentry = lookup_hash(&oldnd.last, oldnd.dentry);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry)) {
EXIT;
goto exit3;
}
error = -ENOENT;
if (!old_dentry->d_inode) {
EXIT;
goto exit4;
}
new_dentry = lookup_hash(&newnd.last, newnd.dentry);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry)) {
EXIT;
goto exit4;
}
{
extern int presto_rename(struct inode *old_dir,struct dentry *old_dentry,
struct inode *new_dir,struct dentry *new_dentry);
error = presto_rename(old_dentry->d_parent->d_inode, old_dentry,
new_dentry->d_parent->d_inode, new_dentry);
}
dput(new_dentry);
EXIT;
exit4:
dput(old_dentry);
exit3:
unlock_rename(newnd.dentry, oldnd.dentry);
path_release(&newnd);
exit2:
path_release(&oldnd);
exit1:
PRESTO_FREE(newpath, strlen(newpath) + 1);
exit:
PRESTO_FREE(oldpath, strlen(oldpath) + 1);
return error;
}
/* This function is called with the fset->fset_kml.fd_lock held */
int presto_finish_kml_truncate(struct presto_file_set *fset,
unsigned long int offset)
{
struct lento_vfs_context info;
void *handle;
struct file *f;
struct dentry *dentry;
int error = 0, len;
struct nameidata nd;
char *kmlpath = NULL, *smlpath = NULL;
ENTRY;
if (offset == 0) {
/* Lento couldn't do what it needed to; abort the truncation. */
fset->fset_kml.fd_truncating = 0;
EXIT;
return 0;
}
/* someone is about to write to the end of the KML; try again later. */
if ( !list_empty(&fset->fset_kml.fd_reservations) ) {
EXIT;
return -EAGAIN;
}
f = presto_copy_kml_tail(fset, offset);
if (IS_ERR(f)) {
EXIT;
return PTR_ERR(f);
}
/* In a single transaction:
*
* - unlink 'kml'
* - rename 'kml_tmp' to 'kml'
* - unlink 'sml'
* - rename 'sml_tmp' to 'sml'
* - rewrite the first record of last_rcvd with the new kml
* offset.
*/
handle = presto_trans_start(fset, fset->fset_dentry->d_inode,
KML_OPCODE_KML_TRUNC);
if (IS_ERR(handle)) {
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
CERROR("ERROR: presto_finish_kml_truncate: no space for transaction\n");
EXIT;
return -ENOMEM;
}
memset(&info, 0, sizeof(info));
info.flags = LENTO_FL_IGNORE_TIME;
kmlpath = izo_make_path(fset, "kml");
if (kmlpath == NULL) {
error = -ENOMEM;
CERROR("make_path failed: ENOMEM\n");
EXIT;
goto exit_commit;
}
if ((error = izo_lookup_file(fset, kmlpath, &nd)) != 0) {
CERROR("izo_lookup_file(kml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
CERROR("lookup_hash failed\n");
EXIT;
goto exit_commit;
}
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
dput(dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
if (error != 0) {
CERROR("presto_do_unlink(kml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
smlpath = izo_make_path(fset, "sml");
if (smlpath == NULL) {
error = -ENOMEM;
CERROR("make_path() failed: ENOMEM\n");
EXIT;
goto exit_commit;
}
if ((error = izo_lookup_file(fset, smlpath, &nd)) != 0) {
CERROR("izo_lookup_file(sml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
CERROR("lookup_hash failed\n");
EXIT;
goto exit_commit;
}
error = presto_do_unlink(fset, dentry->d_parent, dentry, &info);
dput(dentry);
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
if (error != 0) {
CERROR("presto_do_unlink(sml) failed: %d.\n", error);
EXIT;
goto exit_commit;
}
error = do_truncate_rename(fset, "kml_tmp", "kml");
if (error != 0)
CERROR("do_truncate_rename(kml_tmp, kml) failed: %d\n", error);
error = do_truncate_rename(fset, "sml_tmp", "sml");
if (error != 0)
CERROR("do_truncate_rename(sml_tmp, sml) failed: %d\n", error);
/* Write a new 'last_rcvd' record with the new KML offset */
fset->fset_kml_logical_off += offset;
CDEBUG(D_CACHE, "new kml_logical_offset: %Lu\n",
fset->fset_kml_logical_off);
if (presto_write_kml_logical_offset(fset) != 0) {
CERROR("presto_write_kml_logical_offset failed\n");
}
presto_trans_commit(fset, handle);
/* Everything was successful, so swap the KML file descriptors */
filp_close(fset->fset_kml.fd_file, NULL);
fset->fset_kml.fd_file = f;
fset->fset_kml.fd_offset -= offset;
fset->fset_kml.fd_truncating = 0;
EXIT;
return 0;
exit_commit:
presto_trans_commit(fset, handle);
len = strlen("/.intermezzo/") + strlen(fset->fset_name) +strlen("sml");
if (kmlpath != NULL)
PRESTO_FREE(kmlpath, len);
if (smlpath != NULL)
PRESTO_FREE(smlpath, len);
return error;
}
/* structure of an extended log record:
buf-prefix buf-body [string1 [string2 [string3]]] buf-suffix
note: moves offset forward
*/
static inline int presto_write_record(struct file *f, loff_t *off,
const char *buf, size_t size,
const char *string1, int len1,
const char *string2, int len2,
const char *string3, int len3)
{
size_t prefix_size;
int rc;
prefix_size = size - sizeof(struct kml_suffix);
rc = presto_fwrite(f, buf, prefix_size, off);
if ( rc != prefix_size ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
if ( string1 && len1 ) {
rc = presto_fwrite(f, string1, len1, off);
if ( rc != len1 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
if ( string2 && len2 ) {
rc = presto_fwrite(f, string2, len2, off);
if ( rc != len2 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
if ( string3 && len3 ) {
rc = presto_fwrite(f, string3, len3, off);
if ( rc != len3 ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
}
rc = presto_fwrite(f, buf + prefix_size,
sizeof(struct kml_suffix), off);
if ( rc != sizeof(struct kml_suffix) ) {
CERROR("Write error!\n");
EXIT;
return -EIO;
}
return 0;
}
/*
* rec->size must be valid prior to calling this function.
*
* had to export this for branch_reinter in kml_reint.c
*/
int presto_log(struct presto_file_set *fset, struct rec_info *rec,
const char *buf, size_t size,
const char *string1, int len1,
const char *string2, int len2,
const char *string3, int len3)
{
int rc;
struct presto_reservation_data rd;
loff_t offset;
struct presto_log_fd *fd;
struct kml_suffix *s;
int prefix_size;
ENTRY;
/* buf is NULL when no_journal is in effect */
if (!buf) {
EXIT;
return -EINVAL;
}
if (rec->is_kml) {
fd = &fset->fset_kml;
} else {
fd = &fset->fset_lml;
}
presto_reserve_record(fset, fd, rec, &rd);
if (rec->is_kml) {
if (rec->offset < fset->fset_kml_logical_off) {
CERROR("record with pre-trunc offset. tell phil.\n");
BUG();
}
offset = rec->offset - fset->fset_kml_logical_off;
} else {
offset = rec->offset;
}
/* now we know the record number */
prefix_size = size - sizeof(struct kml_suffix);
s = (struct kml_suffix *) (buf + prefix_size);
s->recno = cpu_to_le32(rec->recno);
rc = presto_write_record(fd->fd_file, &offset, buf, size,
string1, len1, string2, len2, string3, len3);
if (rc) {
CERROR("presto: error writing record to %s\n",
rec->is_kml ? "KML" : "LML");
return rc;
}
presto_release_record(fd, &rd);
rc = presto_kml_dispatch(fset);
EXIT;
return rc;
}
/* read from the record at tail */
static int presto_last_record(struct presto_log_fd *fd, loff_t *size,
loff_t *tail_offset, __u32 *recno, loff_t tail)
{
struct kml_suffix suffix;
int rc;
loff_t zeroes;
*recno = 0;
*tail_offset = 0;
*size = 0;
if (tail < sizeof(struct kml_prefix_hdr) + sizeof(suffix)) {
EXIT;
return 0;
}
zeroes = tail - sizeof(int);
while ( zeroes >= 0 ) {
int data;
rc = presto_fread(fd->fd_file, (char *)&data, sizeof(data),
&zeroes);
if ( rc != sizeof(data) ) {
rc = -EIO;
return rc;
}
if (data)
break;
zeroes -= 2 * sizeof(data);
}
/* zeroes at the begining of file. this is needed to prevent
presto_fread errors -SHP
*/
if (zeroes <= 0) return 0;
zeroes -= sizeof(suffix) + sizeof(int);
rc = presto_fread(fd->fd_file, (char *)&suffix, sizeof(suffix), &zeroes);
if ( rc != sizeof(suffix) ) {
EXIT;
return rc;
}
if ( suffix.len > 500 ) {
CERROR("InterMezzo: Warning long record tail at %ld, rec tail_offset at %ld (size %d)\n",
(long) zeroes, (long)*tail_offset, suffix.len);
}
*recno = suffix.recno;
*size = suffix.len;
*tail_offset = zeroes;
return 0;
}
static int izo_kml_last_recno(struct presto_log_fd *logfd)
{
int rc;
loff_t size;
loff_t tail_offset;
int recno;
loff_t tail = logfd->fd_file->f_dentry->d_inode->i_size;
rc = presto_last_record(logfd, &size, &tail_offset, &recno, tail);
if (rc != 0) {
EXIT;
return rc;
}
logfd->fd_offset = tail_offset;
logfd->fd_recno = recno;
CDEBUG(D_JOURNAL, "setting fset_kml->fd_recno to %d, offset %Ld\n",
recno, tail_offset);
EXIT;
return 0;
}
struct file *izo_log_open(struct presto_file_set *fset, char *name, int flags)
{
struct presto_cache *cache = fset->fset_cache;
struct file *f;
int error;
ENTRY;
f = izo_fset_open(fset, name, flags, 0644);
error = PTR_ERR(f);
if (IS_ERR(f)) {
EXIT;
return f;
}
error = -EINVAL;
if ( cache != presto_get_cache(f->f_dentry->d_inode) ) {
CERROR("InterMezzo: %s cache does not match fset cache!\n",name);
fset->fset_kml.fd_file = NULL;
filp_close(f, NULL);
f = NULL;
EXIT;
return f;
}
if (cache->cache_filter && cache->cache_filter->o_trops &&
cache->cache_filter->o_trops->tr_journal_data) {
cache->cache_filter->o_trops->tr_journal_data
(f->f_dentry->d_inode);
} else {
CERROR("InterMezzo WARNING: no file data logging!\n");
}
EXIT;
return f;
}
int izo_init_kml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
ENTRY;
if (logfd->fd_file) {
CDEBUG(D_INODE, "fset already has KML open\n");
EXIT;
return 0;
}
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "kml", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
logfd->fd_file = f;
error = izo_kml_last_recno(logfd);
if (error) {
logfd->fd_file = NULL;
filp_close(f, NULL);
CERROR("InterMezzo: IO error in KML of fset %s\n",
fset->fset_name);
EXIT;
return error;
}
fset->fset_lento_off = logfd->fd_offset;
fset->fset_lento_recno = logfd->fd_recno;
EXIT;
return error;
}
int izo_init_last_rcvd_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
struct rec_info recinfo;
ENTRY;
if (logfd->fd_file != NULL) {
CDEBUG(D_INODE, "fset already has last_rcvd open\n");
EXIT;
return 0;
}
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "last_rcvd", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
logfd->fd_file = f;
logfd->fd_offset = f->f_dentry->d_inode->i_size;
error = izo_rep_cache_init(fset);
if (presto_read_kml_logical_offset(&recinfo, fset) == 0) {
fset->fset_kml_logical_off = recinfo.offset;
} else {
/* The 'last_rcvd' file doesn't contain a kml offset record,
* probably because we just created 'last_rcvd'. Write one. */
fset->fset_kml_logical_off = 0;
presto_write_kml_logical_offset(fset);
}
EXIT;
return error;
}
int izo_init_lml_file(struct presto_file_set *fset, struct presto_log_fd *logfd)
{
int error = 0;
struct file *f;
ENTRY;
if (logfd->fd_file) {
CDEBUG(D_INODE, "fset already has lml open\n");
EXIT;
return 0;
}
logfd->fd_lock = RW_LOCK_UNLOCKED;
INIT_LIST_HEAD(&logfd->fd_reservations);
f = izo_log_open(fset, "lml", O_RDWR | O_CREAT);
if (IS_ERR(f)) {
error = PTR_ERR(f);
return error;
}
logfd->fd_file = f;
logfd->fd_offset = f->f_dentry->d_inode->i_size;
EXIT;
return error;
}
/* Get the KML-offset record from the last_rcvd file */
int presto_read_kml_logical_offset(struct rec_info *recinfo,
struct presto_file_set *fset)
{
loff_t off;
struct izo_rcvd_rec rec;
char uuid[16] = {0};
off = izo_rcvd_get(&rec, fset, uuid);
if (off < 0)
return -1;
recinfo->offset = rec.lr_local_offset;
return 0;
}
int presto_write_kml_logical_offset(struct presto_file_set *fset)
{
loff_t rc;
struct izo_rcvd_rec rec;
char uuid[16] = {0};
rc = izo_rcvd_get(&rec, fset, uuid);
if (rc < 0)
memset(&rec, 0, sizeof(rec));
rec.lr_local_offset =
cpu_to_le64(fset->fset_kml_logical_off);
return izo_rcvd_write(fset, &rec);
}
struct file * presto_copy_kml_tail(struct presto_file_set *fset,
unsigned long int start)
{
struct file *f;
int len;
loff_t read_off, write_off, bytes;
ENTRY;
/* Copy the tail of 'kml' to 'kml_tmp' */
f = izo_log_open(fset, "kml_tmp", O_RDWR);
if (IS_ERR(f)) {
EXIT;
return f;
}
write_off = 0;
read_off = start;
bytes = fset->fset_kml.fd_offset - start;
while (bytes > 0) {
char buf[4096];
int toread;
if (bytes > sizeof(buf))
toread = sizeof(buf);
else
toread = bytes;
len = presto_fread(fset->fset_kml.fd_file, buf, toread,
&read_off);
if (len <= 0)
break;
if (presto_fwrite(f, buf, len, &write_off) != len) {
filp_close(f, NULL);
EXIT;
return ERR_PTR(-EIO);
}
bytes -= len;
}
EXIT;
return f;
}
/* LML records here */
/* this writes an LML record to the LML file (rec->is_kml =0) */
int presto_write_lml_close(struct rec_info *rec,
struct presto_file_set *fset,
struct file *file,
__u64 remote_ino,
__u64 remote_generation,
struct presto_version *remote_version,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
char *buffer;
struct dentry *dentry = file->f_dentry;
__u64 ino;
__u32 pathlen;
char *path;
__u32 generation;
int size;
char *logrecord;
char record[292];
struct dentry *root;
int error;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
CDEBUG(D_INODE, "Path: %s\n", path);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
ino = cpu_to_le64(dentry->d_inode->i_ino);
generation = cpu_to_le32(dentry->d_inode->i_generation);
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
sizeof(ino) + sizeof(generation) + sizeof(pathlen) +
sizeof(remote_ino) + sizeof(remote_generation) +
sizeof(remote_version) + sizeof(rec->offset) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 0;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &remote_ino, sizeof(remote_ino));
logrecord = logit(logrecord, &remote_generation,
sizeof(remote_generation));
logrecord = log_version(logrecord, remote_version);
logrecord = logit(logrecord, &rec->offset, sizeof(rec->offset));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
/*
* Check if the given record is at the end of the file. If it is, truncate
* the lml to the record's offset, removing it. Repeat on prior record,
* until we reach an active record or a reserved record (as defined by the
* reservations list).
*/
static int presto_truncate_lml_tail(struct presto_file_set *fset)
{
loff_t lml_tail;
loff_t lml_last_rec;
loff_t lml_last_recsize;
loff_t local_offset;
int recno;
struct kml_prefix_hdr prefix;
struct inode *inode = fset->fset_lml.fd_file->f_dentry->d_inode;
void *handle;
int rc;
ENTRY;
/* If someone else is already truncating the LML, return. */
write_lock(&fset->fset_lml.fd_lock);
if (fset->fset_lml.fd_truncating == 1 ) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
/* someone is about to write to the end of the LML */
if ( !list_empty(&fset->fset_lml.fd_reservations) ) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
lml_tail = fset->fset_lml.fd_file->f_dentry->d_inode->i_size;
/* Nothing to truncate?*/
if (lml_tail == 0) {
write_unlock(&fset->fset_lml.fd_lock);
EXIT;
return 0;
}
fset->fset_lml.fd_truncating = 1;
write_unlock(&fset->fset_lml.fd_lock);
presto_last_record(&fset->fset_lml, &lml_last_recsize,
&lml_last_rec, &recno, lml_tail);
/* Do we have a record to check? If not we have zeroes at the
beginning of the file. -SHP
*/
if (lml_last_recsize != 0) {
local_offset = lml_last_rec - lml_last_recsize;
rc = presto_fread(fset->fset_lml.fd_file, (char *)&prefix,
sizeof(prefix), &local_offset);
if (rc != sizeof(prefix)) {
EXIT;
goto tr_out;
}
if ( prefix.opcode != KML_OPCODE_NOOP ) {
EXIT;
rc = 0;
/* We may have zeroes at the end of the file, should
we clear them out? -SHP
*/
goto tr_out;
}
} else
lml_last_rec=0;
handle = presto_trans_start(fset, inode, KML_OPCODE_TRUNC);
if ( IS_ERR(handle) ) {
EXIT;
rc = -ENOMEM;
goto tr_out;
}
rc = izo_do_truncate(fset, fset->fset_lml.fd_file->f_dentry,
lml_last_rec - lml_last_recsize, lml_tail);
presto_trans_commit(fset, handle);
if ( rc == 0 ) {
rc = 1;
}
EXIT;
tr_out:
CDEBUG(D_JOURNAL, "rc = %d\n", rc);
write_lock(&fset->fset_lml.fd_lock);
fset->fset_lml.fd_truncating = 0;
write_unlock(&fset->fset_lml.fd_lock);
return rc;
}
int presto_truncate_lml(struct presto_file_set *fset)
{
int rc;
ENTRY;
while ( (rc = presto_truncate_lml_tail(fset)) > 0);
if ( rc < 0 && rc != -EALREADY) {
CERROR("truncate_lml error %d\n", rc);
}
EXIT;
return rc;
}
int presto_clear_lml_close(struct presto_file_set *fset, loff_t lml_offset)
{
int rc;
struct kml_prefix_hdr record;
loff_t offset = lml_offset;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
CDEBUG(D_JOURNAL, "reading prefix: off %ld, size %Zd\n",
(long)lml_offset, sizeof(record));
rc = presto_fread(fset->fset_lml.fd_file, (char *)&record,
sizeof(record), &offset);
if ( rc != sizeof(record) ) {
CERROR("presto: clear_lml io error %d\n", rc);
EXIT;
return -EIO;
}
/* overwrite the prefix */
CDEBUG(D_JOURNAL, "overwriting prefix: off %ld\n", (long)lml_offset);
record.opcode = KML_OPCODE_NOOP;
offset = lml_offset;
/* note: this does just a single transaction in the cache */
rc = presto_fwrite(fset->fset_lml.fd_file, (char *)(&record),
sizeof(record), &offset);
if ( rc != sizeof(record) ) {
EXIT;
return -EIO;
}
EXIT;
return 0;
}
/* now a journal function for every operation */
int presto_journal_setattr(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, struct presto_version *old_ver,
struct izo_rollback_data *rb, struct iattr *iattr)
{
int opcode = KML_OPCODE_SETATTR;
char *buffer, *path, *logrecord, record[316];
struct dentry *root;
__u32 uid, gid, mode, valid, flags, pathlen;
__u64 fsize, mtime, ctime;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
EXIT;
return 0;
}
root = fset->fset_dentry;
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*old_ver) +
sizeof(valid) + sizeof(mode) + sizeof(uid) + sizeof(gid) +
sizeof(fsize) + sizeof(mtime) + sizeof(ctime) + sizeof(flags) +
sizeof(pathlen) + sizeof(*rb) + sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
/* Only journal one kind of mtime, and not atime at all. Also don't
* journal bogus data in iattr, to make the journal more compressible.
*/
if (iattr->ia_valid & ATTR_MTIME_SET)
iattr->ia_valid = iattr->ia_valid | ATTR_MTIME;
valid = cpu_to_le32(iattr->ia_valid & ~(ATTR_ATIME | ATTR_MTIME_SET |
ATTR_ATIME_SET));
mode = iattr->ia_valid & ATTR_MODE ? cpu_to_le32(iattr->ia_mode): 0;
uid = iattr->ia_valid & ATTR_UID ? cpu_to_le32(iattr->ia_uid): 0;
gid = iattr->ia_valid & ATTR_GID ? cpu_to_le32(iattr->ia_gid): 0;
fsize = iattr->ia_valid & ATTR_SIZE ? cpu_to_le64(iattr->ia_size): 0;
mtime = iattr->ia_valid & ATTR_MTIME ? cpu_to_le64(iattr->ia_mtime.tv_sec): 0;
ctime = iattr->ia_valid & ATTR_CTIME ? cpu_to_le64(iattr->ia_ctime.tv_sec): 0;
flags = iattr->ia_valid & ATTR_ATTR_FLAG ?
cpu_to_le32(iattr->ia_attr_flags): 0;
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, old_ver);
logrecord = logit(logrecord, &valid, sizeof(valid));
logrecord = logit(logrecord, &mode, sizeof(mode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &fsize, sizeof(fsize));
logrecord = logit(logrecord, &mtime, sizeof(mtime));
logrecord = logit(logrecord, &ctime, sizeof(ctime));
logrecord = logit(logrecord, &flags, sizeof(flags));
logrecord = log_rollback(logrecord, rb);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_get_fileid(int minor, struct presto_file_set *fset,
struct dentry *dentry)
{
int opcode = KML_OPCODE_GET_FILEID;
struct rec_info rec;
char *buffer, *path, *logrecord, record[4096]; /*include path*/
struct dentry *root;
__u32 uid, gid, pathlen;
int error, size;
struct kml_suffix *suffix;
ENTRY;
root = fset->fset_dentry;
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(pathlen) +
size_round(le32_to_cpu(pathlen)) +
sizeof(struct kml_suffix);
CDEBUG(D_FILE, "kml size: %d\n", size);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
memset(&rec, 0, sizeof(rec));
rec.is_kml = 1;
rec.size = size;
logrecord = journal_log_prefix(record, opcode, &rec);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, path, size_round(le32_to_cpu(pathlen)));
suffix = (struct kml_suffix *)logrecord;
logrecord = journal_log_suffix(logrecord, record, fset, dentry, &rec);
/* journal_log_suffix expects journal_log to set this */
suffix->recno = 0;
CDEBUG(D_FILE, "actual kml size: %Zd\n", logrecord - record);
CDEBUG(D_FILE, "get fileid: uid %d, gid %d, path: %s\n", uid, gid,path);
error = izo_upc_get_fileid(minor, size, record,
size_round(le32_to_cpu(pathlen)), path,
fset->fset_name);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_journal_create(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_file_ver, int mode)
{
int opcode = KML_OPCODE_CREATE;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, pathlen;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_journal_symlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, const char *target,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver)
{
int opcode = KML_OPCODE_SYMLINK;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, pathlen;
__u32 targetlen = cpu_to_le32(strlen(target));
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(targetlen) + sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(targetlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_link_ver);
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &targetlen, sizeof(targetlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
target, size_round(le32_to_cpu(targetlen)),
NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_journal_mkdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *tgt_dir_ver,
struct presto_version *new_dir_ver, int mode)
{
int opcode = KML_OPCODE_MKDIR;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, pathlen;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_dir_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int
presto_journal_rmdir(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dir, struct presto_version *tgt_dir_ver,
struct presto_version *old_dir_ver,
struct izo_rollback_data *rb, int len, const char *name)
{
int opcode = KML_OPCODE_RMDIR;
char *buffer, *path, *logrecord, record[316];
__u32 pathlen, llen;
struct dentry *root;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
llen = cpu_to_le32(len);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dir, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
CDEBUG(D_JOURNAL, "path: %s (%d), name: %s (%d), size %d\n",
path, pathlen, name, len, size);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(len);
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dir);
logrecord = log_version(logrecord, old_dir_ver);
logrecord = logit(logrecord, rb, sizeof(*rb));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &llen, sizeof(llen));
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(len),
NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int
presto_journal_mknod(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dentry, struct presto_version *tgt_dir_ver,
struct presto_version *new_node_ver, int mode,
int dmajor, int dminor )
{
int opcode = KML_OPCODE_MKNOD;
char *buffer, *path, *logrecord, record[292];
struct dentry *root;
__u32 uid, gid, lmode, lmajor, lminor, pathlen;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
uid = cpu_to_le32(dentry->d_inode->i_uid);
gid = cpu_to_le32(dentry->d_inode->i_gid);
lmode = cpu_to_le32(mode);
lmajor = cpu_to_le32(dmajor);
lminor = cpu_to_le32(dminor);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(lmajor) +
sizeof(lminor) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dentry->d_parent);
logrecord = log_version(logrecord, new_node_ver);
logrecord = logit(logrecord, &lmode, sizeof(lmode));
logrecord = logit(logrecord, &uid, sizeof(uid));
logrecord = logit(logrecord, &gid, sizeof(gid));
logrecord = logit(logrecord, &lmajor, sizeof(lmajor));
logrecord = logit(logrecord, &lminor, sizeof(lminor));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int
presto_journal_link(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *tgt_dir_ver,
struct presto_version *new_link_ver)
{
int opcode = KML_OPCODE_LINK;
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
__u32 pathlen, srcpathlen;
struct dentry *root;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
BUFF_ALLOC(srcbuffer, NULL);
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
BUFF_ALLOC(buffer, srcbuffer);
path = presto_path(tgt, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(srcpathlen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(srcpathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, tgt->d_parent);
logrecord = log_version(logrecord, new_link_ver);
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
error = presto_log(fset, rec, record, size,
srcpath, size_round(le32_to_cpu(srcpathlen)),
path, size_round(le32_to_cpu(pathlen)),
NULL, 0);
BUFF_FREE(srcbuffer);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_journal_rename(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *src, struct dentry *tgt,
struct presto_version *src_dir_ver,
struct presto_version *tgt_dir_ver)
{
int opcode = KML_OPCODE_RENAME;
char *buffer, *srcbuffer, *path, *srcpath, *logrecord, record[292];
__u32 pathlen, srcpathlen;
struct dentry *root;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
BUFF_ALLOC(srcbuffer, NULL);
srcpath = presto_path(src, root, srcbuffer, PAGE_SIZE);
srcpathlen = cpu_to_le32(MYPATHLEN(srcbuffer, srcpath));
BUFF_ALLOC(buffer, srcbuffer);
path = presto_path(tgt, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 4 * sizeof(*src_dir_ver) +
sizeof(srcpathlen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(srcpathlen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, src_dir_ver);
logrecord = log_dentry_version(logrecord, src->d_parent);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, tgt->d_parent);
logrecord = logit(logrecord, &srcpathlen, sizeof(srcpathlen));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, tgt, rec);
error = presto_log(fset, rec, record, size,
srcpath, size_round(le32_to_cpu(srcpathlen)),
path, size_round(le32_to_cpu(pathlen)),
NULL, 0);
BUFF_FREE(buffer);
BUFF_FREE(srcbuffer);
EXIT;
return error;
}
int presto_journal_unlink(struct rec_info *rec, struct presto_file_set *fset,
struct dentry *dir, struct presto_version *tgt_dir_ver,
struct presto_version *old_file_ver,
struct izo_rollback_data *rb, struct dentry *dentry,
char *old_target, int old_targetlen)
{
int opcode = KML_OPCODE_UNLINK;
char *buffer, *path, *logrecord, record[316];
const char *name;
__u32 pathlen, llen;
struct dentry *root;
int error, size, len;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
name = dentry->d_name.name;
len = dentry->d_name.len;
llen = cpu_to_le32(len);
BUFF_ALLOC(buffer, NULL);
path = presto_path(dir, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) +
sizeof(pathlen) + sizeof(llen) + sizeof(*rb) +
sizeof(old_targetlen) + sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen)) + size_round(len) +
size_round(old_targetlen);
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, tgt_dir_ver);
logrecord = log_dentry_version(logrecord, dir);
logrecord = log_version(logrecord, old_file_ver);
logrecord = log_rollback(logrecord, rb);
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &llen, sizeof(llen));
logrecord = logit(logrecord, &old_targetlen, sizeof(old_targetlen));
logrecord = journal_log_suffix(logrecord, record, fset, dir, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(len),
old_target, size_round(old_targetlen));
BUFF_FREE(buffer);
EXIT;
return error;
}
int
presto_journal_close(struct rec_info *rec, struct presto_file_set *fset,
struct presto_file_data *fd, struct dentry *dentry,
struct presto_version *old_file_ver,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
char *buffer, *path, *logrecord, record[316];
struct dentry *root;
int error, size, i;
__u32 pathlen, generation;
__u64 ino;
__u32 open_fsuid;
__u32 open_fsgid;
__u32 open_ngroups;
__u32 open_groups[NGROUPS_SMALL];
__u32 open_mode;
__u32 open_uid;
__u32 open_gid;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
EXIT;
return 0;
}
root = fset->fset_dentry;
if (fd) {
open_ngroups = fd->fd_ngroups;
for (i = 0; i < fd->fd_ngroups; i++)
open_groups[i] = (__u32) fd->fd_groups[i];
open_mode = fd->fd_mode;
open_uid = fd->fd_uid;
open_gid = fd->fd_gid;
open_fsuid = fd->fd_fsuid;
open_fsgid = fd->fd_fsgid;
} else {
open_ngroups = current->group_info->ngroups;
for (i=0; i<current->group_info->ngroups; i++)
open_groups[i] = (__u32) GROUP_AT(current->group_info,i);
open_mode = dentry->d_inode->i_mode;
open_uid = dentry->d_inode->i_uid;
open_gid = dentry->d_inode->i_gid;
open_fsuid = current->fsuid;
open_fsgid = current->fsgid;
}
BUFF_ALLOC(buffer, NULL);
path = presto_path(dentry, root, buffer, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(buffer, path));
ino = cpu_to_le64(dentry->d_inode->i_ino);
generation = cpu_to_le32(dentry->d_inode->i_generation);
size = sizeof(__u32) * open_ngroups +
sizeof(open_mode) + sizeof(open_uid) + sizeof(open_gid) +
sizeof(struct kml_prefix_hdr) + sizeof(*old_file_ver) +
sizeof(*new_file_ver) + sizeof(ino) + sizeof(generation) +
sizeof(pathlen) + sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix_with_groups_and_ids(
record, opcode, rec, open_ngroups, open_groups,
open_fsuid, open_fsgid);
logrecord = logit(logrecord, &open_mode, sizeof(open_mode));
logrecord = logit(logrecord, &open_uid, sizeof(open_uid));
logrecord = logit(logrecord, &open_gid, sizeof(open_gid));
logrecord = log_version(logrecord, old_file_ver);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
BUFF_FREE(buffer);
EXIT;
return error;
}
int presto_rewrite_close(struct rec_info *rec, struct presto_file_set *fset,
char *path, __u32 pathlen,
int ngroups, __u32 *groups,
__u64 ino, __u32 generation,
struct presto_version *new_file_ver)
{
int opcode = KML_OPCODE_CLOSE;
char *logrecord, record[292];
struct dentry *root;
int error, size;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
root = fset->fset_dentry;
size = sizeof(__u32) * ngroups +
sizeof(struct kml_prefix_hdr) + sizeof(*new_file_ver) +
sizeof(ino) + sizeof(generation) +
sizeof(le32_to_cpu(pathlen)) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
rec->size = size + size_round(le32_to_cpu(pathlen));
logrecord = journal_log_prefix_with_groups(record, opcode, rec,
ngroups, groups);
logrecord = log_version(logrecord, new_file_ver);
logrecord = logit(logrecord, &ino, sizeof(ino));
logrecord = logit(logrecord, &generation, sizeof(generation));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = journal_log_suffix(logrecord, record, fset, NULL, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
NULL, 0, NULL, 0);
EXIT;
return error;
}
/* write closes for the local close records in the LML */
int presto_complete_lml(struct presto_file_set *fset)
{
__u32 groups[NGROUPS_SMALL];
loff_t lml_offset;
loff_t read_offset;
char *buffer;
void *handle;
struct rec_info rec;
struct close_rec {
struct presto_version new_file_ver;
__u64 ino;
__u32 generation;
__u32 pathlen;
__u64 remote_ino;
__u32 remote_generation;
__u32 remote_version;
__u64 lml_offset;
} close_rec;
struct file *file = fset->fset_lml.fd_file;
struct kml_prefix_hdr prefix;
int rc = 0;
ENTRY;
lml_offset = 0;
again:
if (lml_offset >= file->f_dentry->d_inode->i_size) {
EXIT;
return rc;
}
read_offset = lml_offset;
rc = presto_fread(file, (char *)&prefix,
sizeof(prefix), &read_offset);
if ( rc != sizeof(prefix) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 1, tell Peter\n");
return -EIO;
}
if ( prefix.opcode == KML_OPCODE_NOOP ) {
lml_offset += prefix.len;
goto again;
}
rc = presto_fread(file, (char *)groups,
prefix.ngroups * sizeof(__u32), &read_offset);
if ( rc != prefix.ngroups * sizeof(__u32) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 2, tell Peter\n");
return -EIO;
}
rc = presto_fread(file, (char *)&close_rec,
sizeof(close_rec), &read_offset);
if ( rc != sizeof(close_rec) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 3, tell Peter\n");
return -EIO;
}
/* is this a backfetch or a close record? */
if ( le64_to_cpu(close_rec.remote_ino) != 0 ) {
lml_offset += prefix.len;
goto again;
}
BUFF_ALLOC(buffer, NULL);
rc = presto_fread(file, (char *)buffer,
le32_to_cpu(close_rec.pathlen), &read_offset);
if ( rc != le32_to_cpu(close_rec.pathlen) ) {
EXIT;
CERROR("presto_complete_lml: ioerror - 4, tell Peter\n");
return -EIO;
}
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_RELEASE);
if ( IS_ERR(handle) ) {
EXIT;
return -ENOMEM;
}
rc = presto_clear_lml_close(fset, lml_offset);
if ( rc ) {
CERROR("error during clearing: %d\n", rc);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
rc = presto_rewrite_close(&rec, fset, buffer, close_rec.pathlen,
prefix.ngroups, groups,
close_rec.ino, close_rec.generation,
&close_rec.new_file_ver);
if ( rc ) {
CERROR("error during rewrite close: %d\n", rc);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
presto_trans_commit(fset, handle);
if ( rc ) {
CERROR("error during truncation: %d\n", rc);
EXIT;
return rc;
}
lml_offset += prefix.len;
CDEBUG(D_JOURNAL, "next LML record at: %ld\n", (long)lml_offset);
goto again;
EXIT;
return -EINVAL;
}
#ifdef CONFIG_FS_EXT_ATTR
/* Journal an ea operation. A NULL buffer implies the attribute is
* getting deleted. In this case we simply change the opcode, but nothing
* else is affected.
*/
int presto_journal_set_ext_attr (struct rec_info *rec,
struct presto_file_set *fset,
struct dentry *dentry,
struct presto_version *ver, const char *name,
const char *buffer, int buffer_len,
int flags)
{
int opcode = (buffer == NULL) ?
KML_OPCODE_DELEXTATTR :
KML_OPCODE_SETEXTATTR ;
char *temp, *path, *logrecord, record[292];
struct dentry *root;
int error, size;
__u32 namelen=cpu_to_le32(strnlen(name,PRESTO_EXT_ATTR_NAME_MAX));
__u32 buflen=(buffer != NULL)? cpu_to_le32(buffer_len): cpu_to_le32(0);
__u32 mode, pathlen;
ENTRY;
if ( presto_no_journal(fset) ) {
EXIT;
return 0;
}
if (!dentry->d_inode || (dentry->d_inode->i_nlink == 0)
|| ((dentry->d_parent != dentry) && d_unhashed(dentry))) {
EXIT;
return 0;
}
root = fset->fset_dentry;
BUFF_ALLOC(temp, NULL);
path = presto_path(dentry, root, temp, PAGE_SIZE);
pathlen = cpu_to_le32(MYPATHLEN(temp, path));
flags=cpu_to_le32(flags);
/* Ugly, but needed. posix ACLs change the mode without using
* setattr, we need to record these changes. The EA code per se
* is not really affected.
*/
mode=cpu_to_le32(dentry->d_inode->i_mode);
size = sizeof(__u32) * current->group_info->ngroups +
sizeof(struct kml_prefix_hdr) +
2 * sizeof(struct presto_version) +
sizeof(flags) + sizeof(mode) + sizeof(namelen) +
sizeof(buflen) + sizeof(pathlen) +
sizeof(struct kml_suffix);
if ( size > sizeof(record) )
CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__);
rec->is_kml = 1;
/* Make space for a path, a attr name and value*/
/* We use the buflen instead of buffer_len to make sure that we
* journal the right length. This may be a little paranoid, but
* with 64 bits round the corner, I would rather be safe than sorry!
* Also this handles deletes with non-zero buffer_lengths correctly.
* SHP
*/
rec->size = size + size_round(le32_to_cpu(pathlen)) +
size_round(le32_to_cpu(namelen)) +
size_round(le32_to_cpu(buflen));
logrecord = journal_log_prefix(record, opcode, rec);
logrecord = log_version(logrecord, ver);
logrecord = log_dentry_version(logrecord, dentry);
logrecord = logit(logrecord, &flags, sizeof(flags));
logrecord = logit(logrecord, &mode, sizeof(flags));
logrecord = logit(logrecord, &pathlen, sizeof(pathlen));
logrecord = logit(logrecord, &namelen, sizeof(namelen));
logrecord = logit(logrecord, &buflen, sizeof(buflen));
logrecord = journal_log_suffix(logrecord, record, fset, dentry, rec);
error = presto_log(fset, rec, record, size,
path, size_round(le32_to_cpu(pathlen)),
name, size_round(le32_to_cpu(namelen)),
buffer, size_round(le32_to_cpu(buflen)));
BUFF_FREE(temp);
EXIT;
return error;
}
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include <linux/ext2_fs.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#if defined(CONFIG_EXT2_FS)
/* EXT2 has no journalling, so these functions do nothing */
static loff_t presto_e2_freespace(struct presto_cache *cache,
struct super_block *sb)
{
unsigned long freebl = le32_to_cpu(EXT2_SB(sb)->s_es->s_free_blocks_count);
unsigned long avail = freebl - le32_to_cpu(EXT2_SB(sb)->s_es->s_r_blocks_count);
return (avail << EXT2_BLOCK_SIZE_BITS(sb));
}
/* start the filesystem journal operations */
static void *presto_e2_trans_start(struct presto_file_set *fset, struct inode *inode, int op)
{
__u32 avail_kmlblocks;
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext2"))
return NULL;
avail_kmlblocks = EXT2_SB(inode->i_sb)->s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
return (void *) 1;
}
static void presto_e2_trans_commit(struct presto_file_set *fset, void *handle)
{
do {} while (0);
}
static int presto_e2_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
struct journal_ops presto_ext2_journal_ops = {
.tr_all_data = presto_e2_has_all_data,
.tr_avail = presto_e2_freespace,
.tr_start = presto_e2_trans_start,
.tr_commit = presto_e2_trans_commit,
.tr_journal_data = NULL
};
#endif /* CONFIG_EXT2_FS */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#endif
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
/* space requirements:
presto_do_truncate:
used to truncate the KML forward to next fset->chunksize boundary
- zero partial block
- update inode
presto_write_record:
write header (< one block)
write one path (< MAX_PATHLEN)
possibly write another path (< MAX_PATHLEN)
write suffix (< one block)
presto_update_last_rcvd
write one block
*/
static loff_t presto_e3_freespace(struct presto_cache *cache,
struct super_block *sb)
{
loff_t freebl = le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
loff_t avail = freebl -
le32_to_cpu(EXT3_SB(sb)->s_es->s_r_blocks_count);
return (avail << EXT3_BLOCK_SIZE_BITS(sb));
}
/* start the filesystem journal operations */
static void *presto_e3_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
int jblocks;
int trunc_blks, one_path_blks, extra_path_blks,
extra_name_blks, lml_blks;
__u32 avail_kmlblocks;
handle_t *handle;
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext3"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
avail_kmlblocks = EXT3_SB(inode->i_sb)->s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
/* Need journal space for:
at least three writes to KML (two one block writes, one a path)
possibly a second name (unlink, rmdir)
possibly a second path (symlink, rename)
a one block write to the last rcvd file
*/
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1;
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode);
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode);
/* additional blocks appear for "two pathname" operations
and operations involving the LML records
*/
switch (op) {
case KML_OPCODE_TRUNC:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case KML_OPCODE_KML_TRUNC:
/* Hopefully this is a little better, but I'm still mostly
* guessing here. */
/* unlink 1 */
jblocks = extra_name_blks + trunc_blks +
EXT3_DELETE_TRANS_BLOCKS + 2;
/* unlink 2 */
jblocks += extra_name_blks + trunc_blks +
EXT3_DELETE_TRANS_BLOCKS + 2;
/* rename 1 */
jblocks += 2 * extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
/* rename 2 */
jblocks += 2 * extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
break;
case KML_OPCODE_RELEASE:
/*
jblocks = one_path_blks + lml_blks + 2*trunc_blks;
*/
jblocks = one_path_blks;
break;
case KML_OPCODE_SETATTR:
jblocks = one_path_blks + trunc_blks + 1 ;
break;
case KML_OPCODE_CREATE:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 3 + 2;
break;
case KML_OPCODE_LINK:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 2;
break;
case KML_OPCODE_UNLINK:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS + 2;
break;
case KML_OPCODE_SYMLINK:
jblocks = one_path_blks + extra_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 5;
break;
case KML_OPCODE_MKDIR:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 4 + 2;
break;
case KML_OPCODE_RMDIR:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS + 1;
break;
case KML_OPCODE_MKNOD:
jblocks = one_path_blks + trunc_blks +
EXT3_DATA_TRANS_BLOCKS + 3 + 2;
break;
case KML_OPCODE_RENAME:
jblocks = one_path_blks + extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
break;
case KML_OPCODE_WRITE:
jblocks = one_path_blks;
/* add this when we can wrap our transaction with
that of ext3_file_write (ordered writes)
+ EXT3_DATA_TRANS_BLOCKS;
*/
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n",
jblocks, op);
/* journal_start/stop does not do its own locking while updating
* the handle/transaction information. Hence we create our own
* critical section to protect these calls. -SHP
*/
lock_kernel();
handle = journal_start(EXT3_JOURNAL(inode), jblocks);
unlock_kernel();
return handle;
}
static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle)
{
if ( presto_no_journal(fset) || !handle)
return;
/* See comments before journal_start above. -SHP */
lock_kernel();
journal_stop(handle);
unlock_kernel();
}
static void presto_e3_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
*/
static int presto_e3_has_all_data(struct inode *inode)
{
int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
const long direct_blocks = EXT3_NDIR_BLOCKS,
indirect_blocks = ptrs,
double_blocks = (1 << (ptrs_bits * 2));
long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
ENTRY;
if (inode->i_size == 0) {
EXIT;
return 1;
}
if (block < direct_blocks) {
/* No indirect blocks, no problem. */
} else if (block < indirect_blocks + direct_blocks) {
block++;
} else if (block < double_blocks + indirect_blocks + direct_blocks) {
block += 2;
} else if (((block - double_blocks - indirect_blocks - direct_blocks)
>> (ptrs_bits * 2)) < ptrs) {
block += 3;
}
block *= (inode->i_sb->s_blocksize / 512);
CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks);
if (block > inode->i_blocks) {
EXIT;
return 0;
}
EXIT;
return 1;
}
struct journal_ops presto_ext3_journal_ops = {
.tr_all_data = presto_e3_has_all_data,
.tr_avail = presto_e3_freespace,
.tr_start = presto_e3_trans_start,
.tr_commit = presto_e3_trans_commit,
.tr_journal_data = presto_e3_journal_file_data,
.tr_ilookup = presto_iget_ilookup
};
#endif /* CONFIG_EXT3_FS */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#ifdef CONFIG_OBDFS_FS
#include /usr/src/obd/include/linux/obdfs.h
#endif
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#ifdef CONFIG_OBDFS_FS
static unsigned long presto_obdfs_freespace(struct presto_file_set *fset,
struct super_block *sb)
{
return 0x0fffff;
}
/* start the filesystem journal operations */
static void *presto_obdfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
return (void *) 1;
}
#if 0
int jblocks;
int trunc_blks, one_path_blks, extra_path_blks,
extra_name_blks, lml_blks;
__u32 avail_kmlblocks;
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "ext3"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count;
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
/* Need journal space for:
at least three writes to KML (two one block writes, one a path)
possibly a second name (unlink, rmdir)
possibly a second path (symlink, rename)
a one block write to the last rcvd file
*/
trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1;
one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode);
extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode);
/* additional blocks appear for "two pathname" operations
and operations involving the LML records
*/
switch (op) {
case PRESTO_OP_TRUNC:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_RELEASE:
/*
jblocks = one_path_blks + lml_blks + 2*trunc_blks;
*/
jblocks = one_path_blks;
break;
case PRESTO_OP_SETATTR:
jblocks = one_path_blks + trunc_blks + 1 ;
break;
case PRESTO_OP_CREATE:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 3;
break;
case PRESTO_OP_LINK:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS;
break;
case PRESTO_OP_UNLINK:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_SYMLINK:
jblocks = one_path_blks + extra_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 5;
break;
case PRESTO_OP_MKDIR:
jblocks = one_path_blks + trunc_blks
+ EXT3_DATA_TRANS_BLOCKS + 4;
break;
case PRESTO_OP_RMDIR:
jblocks = one_path_blks + extra_name_blks + trunc_blks
+ EXT3_DELETE_TRANS_BLOCKS;
break;
case PRESTO_OP_MKNOD:
jblocks = one_path_blks + trunc_blks +
EXT3_DATA_TRANS_BLOCKS + 3;
break;
case PRESTO_OP_RENAME:
jblocks = one_path_blks + extra_path_blks + trunc_blks +
2 * EXT3_DATA_TRANS_BLOCKS + 2;
break;
case PRESTO_OP_WRITE:
jblocks = one_path_blks;
/* add this when we can wrap our transaction with
that of ext3_file_write (ordered writes)
+ EXT3_DATA_TRANS_BLOCKS;
*/
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
return journal_start(EXT3_JOURNAL(inode), jblocks);
}
#endif
void presto_obdfs_trans_commit(struct presto_file_set *fset, void *handle)
{
#if 0
if ( presto_no_journal(fset) || !handle)
return;
journal_stop(handle);
#endif
}
void presto_obdfs_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
struct journal_ops presto_obdfs_journal_ops = {
.tr_avail = presto_obdfs_freespace,
.tr_start = presto_obdfs_trans_start,
.tr_commit = presto_obdfs_trans_commit,
.tr_journal_data = presto_obdfs_journal_file_data
};
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if 0
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
#include <linux/reiserfs_fs.h>
#include <linux/reiserfs_fs_sb.h>
#include <linux/reiserfs_fs_i.h>
#endif
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE)
static loff_t presto_reiserfs_freespace(struct presto_cache *cache,
struct super_block *sb)
{
struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (sb);
loff_t avail;
avail = le32_to_cpu(rs->s_free_blocks) *
le16_to_cpu(rs->s_blocksize);
return avail;
}
/* start the filesystem journal operations */
static void *presto_reiserfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
int jblocks;
__u32 avail_kmlblocks;
struct reiserfs_transaction_handle *th ;
PRESTO_ALLOC(th, sizeof(*th));
if (!th) {
CERROR("presto: No memory for trans handle\n");
return NULL;
}
avail_kmlblocks = presto_reiserfs_freespace(fset->fset_cache,
inode->i_sb);
if ( presto_no_journal(fset) ||
strcmp(fset->fset_cache->cache_type, "reiserfs"))
{
CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
fset->fset_cache->cache_type);
return NULL;
}
if ( avail_kmlblocks < 3 ) {
return ERR_PTR(-ENOSPC);
}
if ( (op != PRESTO_OP_UNLINK && op != PRESTO_OP_RMDIR)
&& avail_kmlblocks < 6 ) {
return ERR_PTR(-ENOSPC);
}
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
CDEBUG(D_JOURNAL, "creating journal handle (%d blocks)\n", jblocks);
lock_kernel();
journal_begin(th, inode->i_sb, jblocks);
unlock_kernel();
return th;
}
static void presto_reiserfs_trans_commit(struct presto_file_set *fset,
void *handle)
{
int jblocks;
jblocks = 3 + JOURNAL_PER_BALANCE_CNT * 4;
lock_kernel();
journal_end(handle, fset->fset_cache->cache_sb, jblocks);
unlock_kernel();
PRESTO_FREE(handle, sizeof(struct reiserfs_transaction_handle));
}
static void presto_reiserfs_journal_file_data(struct inode *inode)
{
#ifdef EXT3_JOURNAL_DATA_FL
inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
#else
#warning You must have a facility to enable journaled writes for recovery!
#endif
}
static int presto_reiserfs_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
struct journal_ops presto_reiserfs_journal_ops = {
.tr_all_data = presto_reiserfs_has_all_data,
.tr_avail = presto_reiserfs_freespace,
.tr_start = presto_reiserfs_trans_start,
.tr_commit = presto_reiserfs_trans_commit,
.tr_journal_data = presto_reiserfs_journal_file_data
};
#endif
#endif
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Los Alamos National Laboratory
* Copyright (C) 2000 TurboLinux, Inc.
* Copyright (C) 2001 Mountain View Data, Inc.
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/param.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if defined(CONFIG_TMPFS)
#include <linux/jbd.h>
#if defined(CONFIG_EXT3)
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#endif
#endif
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#if defined(CONFIG_TMPFS)
/* space requirements:
presto_do_truncate:
used to truncate the KML forward to next fset->chunksize boundary
- zero partial block
- update inode
presto_write_record:
write header (< one block)
write one path (< MAX_PATHLEN)
possibly write another path (< MAX_PATHLEN)
write suffix (< one block)
presto_update_last_rcvd
write one block
*/
static loff_t presto_tmpfs_freespace(struct presto_cache *cache,
struct super_block *sb)
{
return (1<<30);
}
/* start the filesystem journal operations */
static void *presto_tmpfs_trans_start(struct presto_file_set *fset,
struct inode *inode,
int op)
{
return (void *)1;
}
static void presto_tmpfs_trans_commit(struct presto_file_set *fset, void *handle)
{
return;
}
static void presto_tmpfs_journal_file_data(struct inode *inode)
{
return;
}
/* The logic here is a slightly modified version of ext3/inode.c:block_to_path
*/
static int presto_tmpfs_has_all_data(struct inode *inode)
{
return 0;
}
struct journal_ops presto_tmpfs_journal_ops = {
.tr_all_data = presto_tmpfs_has_all_data,
.tr_avail = presto_tmpfs_freespace,
.tr_start = presto_tmpfs_trans_start,
.tr_commit = presto_tmpfs_trans_commit,
.tr_journal_data = presto_tmpfs_journal_file_data,
.tr_ilookup = presto_tmpfs_ilookup,
.tr_add_ilookup = presto_add_ilookup_dentry
};
#endif /* CONFIG_EXT3_FS */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#if 0
/* XFS Support not there yet */
#ifdef CONFIG_FS_XFS
#include <linux/xfs_fs.h>
#endif
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#include "intermezzo_journal.h"
#if 0
/* XFS has journalling, but these functions do nothing yet... */
static unsigned long presto_xfs_freespace(struct presto_file_set *fset,
struct super_block *sb)
{
#if 0
vfs_t *vfsp = LINVFS_GET_VFS(sb);
struct statvfs_t stat;
bhv_desc_t *bdp;
unsigned long avail;
int rc;
VFS_STATVFS(vfsp, &stat, NULL, rc);
avail = statp.f_bfree;
return sbp->sb_fdblocks;
#endif
return 0x0fffffff;
}
/* start the filesystem journal operations */
static void *
presto_xfs_trans_start(struct presto_file_set *fset,
struct inode *inode, int op)
{
int xfs_op;
/* do a free blocks check as in journal_ext3? does anything protect
* the space in that case or can it disappear out from under us
* anyway? */
/* copied from xfs_trans.h, skipping header maze for now */
#define XFS_TRANS_SETATTR_NOT_SIZE 1
#define XFS_TRANS_SETATTR_SIZE 2
#define XFS_TRANS_INACTIVE 3
#define XFS_TRANS_CREATE 4
#define XFS_TRANS_CREATE_TRUNC 5
#define XFS_TRANS_TRUNCATE_FILE 6
#define XFS_TRANS_REMOVE 7
#define XFS_TRANS_LINK 8
#define XFS_TRANS_RENAME 9
#define XFS_TRANS_MKDIR 10
#define XFS_TRANS_RMDIR 11
#define XFS_TRANS_SYMLINK 12
/* map the op onto the values for XFS so it can do reservation. if
* we don't have enough info to differentiate between e.g. setattr
* with or without size, what do we do? will it adjust? */
switch (op) {
case PRESTO_OP_SETATTR:
/* or XFS_TRANS_SETATTR_NOT_SIZE? */
xfs_op = XFS_TRANS_SETATTR_SIZE;
break;
case PRESTO_OP_CREATE:
/* or CREATE_TRUNC? */
xfs_op = XFS_TRANS_CREATE;
break;
case PRESTO_OP_LINK:
xfs_op = XFS_TRANS_LINK;
break;
case PRESTO_OP_UNLINK:
xfs_op = XFS_TRANS_REMOVE;
break;
case PRESTO_OP_SYMLINK:
xfs_op = XFS_TRANS_SYMLINK;
break;
case PRESTO_OP_MKDIR:
xfs_op = XFS_TRANS_MKDIR;
break;
case PRESTO_OP_RMDIR:
xfs_op = XFS_TRANS_RMDIR;
break;
case PRESTO_OP_MKNOD:
/* XXX can't find an analog for mknod? */
xfs_op = XFS_TRANS_CREATE;
break;
case PRESTO_OP_RENAME:
xfs_op = XFS_TRANS_RENAME;
break;
default:
CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
return NULL;
}
return xfs_trans_start(inode, xfs_op);
}
static void presto_xfs_trans_commit(struct presto_file_set *fset, void *handle)
{
/* assert (handle == current->j_handle) */
xfs_trans_stop(handle);
}
static void presto_xfs_journal_file_data(struct inode *inode)
{
return;
}
static int presto_xfs_has_all_data(struct inode *inode)
{
BUG();
return 0;
}
struct journal_ops presto_xfs_journal_ops = {
.tr_all_data = presto_xfs_has_all_data,
.tr_avail = presto_xfs_freespace,
.tr_start = presto_xfs_trans_start,
.tr_commit = presto_xfs_trans_commit,
.tr_journal_data = presto_xfs_journal_file_data
};
#endif
#endif /* CONFIG_XFS_FS */
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include "intermezzo_fs.h"
#include "intermezzo_upcall.h"
#include "intermezzo_psdev.h"
#include "intermezzo_kml.h"
static struct presto_file_set * kml_getfset (char *path)
{
return presto_path2fileset(path);
}
/* Send the KML buffer and related volume info into kernel */
int begin_kml_reint (struct file *file, unsigned long arg)
{
struct {
char *volname;
int namelen;
char *recbuf;
int reclen; /* int newpos; */
} input;
struct kml_fsdata *kml_fsdata = NULL;
struct presto_file_set *fset = NULL;
char *path;
int error;
ENTRY;
/* allocate buffer & copy it to kernel space */
if (copy_from_user(&input, (char *)arg, sizeof(input))) {
EXIT;
return -EFAULT;
}
if (input.reclen > kml_fsdata->kml_maxsize)
return -ENOMEM; /* we'll find solution to this in the future */
PRESTO_ALLOC(path, char *, input.namelen + 1);
if ( !path ) {
EXIT;
return -ENOMEM;
}
if (copy_from_user(path, input.volname, input.namelen)) {
PRESTO_FREE(path, input.namelen + 1);
EXIT;
return -EFAULT;
}
path[input.namelen] = '\0';
fset = kml_getfset (path);
PRESTO_FREE(path, input.namelen + 1);
kml_fsdata = FSET_GET_KMLDATA(fset);
/* read the buf from user memory here */
if (copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen)) {
EXIT;
return -EFAULT;
}
kml_fsdata->kml_len = input.reclen;
decode_kmlrec (&kml_fsdata->kml_reint_cache,
kml_fsdata->kml_buf, kml_fsdata->kml_len);
kml_fsdata->kml_reint_current = kml_fsdata->kml_reint_cache.next;
kml_fsdata->kml_reintpos = 0;
kml_fsdata->kml_count = 0;
return 0;
}
/* DO_KML_REINT */
int do_kml_reint (struct file *file, unsigned long arg)
{
struct {
char *volname;
int namelen;
char *path;
int pathlen;
int recno;
int offset;
int len;
int generation;
__u64 ino;
} input;
int error;
char *path;
struct kml_rec *close_rec;
struct kml_fsdata *kml_fsdata;
struct presto_file_set *fset;
ENTRY;
if (copy_from_user(&input, (char *)arg, sizeof(input))) {
EXIT;
return -EFAULT;
}
PRESTO_ALLOC(path, char *, input.namelen + 1);
if ( !path ) {
EXIT;
return -ENOMEM;
}
if (copy_from_user(path, input.volname, input.namelen)) {
PRESTO_FREE(path, input.namelen + 1);
EXIT;
return -EFAULT;
}
path[input.namelen] = '\0';
fset = kml_getfset (path);
PRESTO_FREE(path, input.namelen + 1);
kml_fsdata = FSET_GET_KMLDATA(fset);
error = kml_reintbuf(kml_fsdata,
fset->fset_mtpt->d_name.name,
&close_rec);
if (error == KML_CLOSE_BACKFETCH && close_rec != NULL) {
struct kml_close *close = &close_rec->rec_kml.close;
input.ino = close->ino;
input.generation = close->generation;
if (strlen (close->path) + 1 < input.pathlen) {
strcpy (input.path, close->path);
input.pathlen = strlen (close->path) + 1;
input.recno = close_rec->rec_tail.recno;
input.offset = close_rec->rec_kml_offset;
input.len = close_rec->rec_size;
input.generation = close->generation;
input.ino = close->ino;
}
else {
CDEBUG(D_KML, "KML_DO_REINT::no space to save:%d < %d",
strlen (close->path) + 1, input.pathlen);
error = -ENOMEM;
}
if (copy_to_user((char *)arg, &input, sizeof (input)))
return -EFAULT;
}
return error;
}
/* END_KML_REINT */
int end_kml_reint (struct file *file, unsigned long arg)
{
/* Free KML buffer and related volume info */
struct {
char *volname;
int namelen;
#if 0
int count;
int newpos;
#endif
} input;
struct presto_file_set *fset = NULL;
struct kml_fsdata *kml_fsdata = NULL;
int error;
char *path;
ENTRY;
if (copy_from_user(&input, (char *)arg, sizeof(input))) {
EXIT;
return -EFAULT;
}
PRESTO_ALLOC(path, char *, input.namelen + 1);
if ( !path ) {
EXIT;
return -ENOMEM;
}
if (copy_from_user(path, input.volname, input.namelen)) {
if ( error ) {
PRESTO_FREE(path, input.namelen + 1);
EXIT;
return -EFAULT;
}
path[input.namelen] = '\0';
fset = kml_getfset (path);
PRESTO_FREE(path, input.namelen + 1);
kml_fsdata = FSET_GET_KMLDATA(fset);
delete_kmlrec (&kml_fsdata->kml_reint_cache);
/* kml reint support */
kml_fsdata->kml_reint_current = NULL;
kml_fsdata->kml_len = 0;
kml_fsdata->kml_reintpos = 0;
kml_fsdata->kml_count = 0;
#if 0
input.newpos = kml_upc->newpos;
input.count = kml_upc->count;
if (copy_to_user((char *)arg, &input, sizeof (input)))
return -EFAULT;
#endif
return error;
}
/*
* KML Decoding
*
* Copryright (C) 1996 Arthur Ma <arthur.ma@mountainviewdata.com>
*
* Copyright (C) 2001 Mountainview Data, Inc.
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include "intermezzo_fs.h"
#include "intermezzo_kml.h"
static int size_round (int val);
static int unpack_create (struct kml_create *rec, char *buf,
int pos, int *rec_offs);
static int unpack_open (struct kml_open *rec, char *buf,
int pos, int *rec_offs);
static int unpack_symlink (struct kml_symlink *rec, char *buf,
int pos, int *rec_offs);
static int unpack_mknod (struct kml_mknod *rec, char *buf,
int pos, int *rec_offs);
static int unpack_link (struct kml_link *rec, char *buf,
int pos, int *rec_offs);
static int unpack_rename (struct kml_rename *rec, char *buf,
int pos, int *rec_offs);
static int unpack_unlink (struct kml_unlink *rec, char *buf,
int pos, int *rec_offs);
static int unpack_rmdir (struct kml_rmdir *rec, char *buf,
int pos, int *rec_offs);
static int unpack_setattr (struct kml_setattr *rec, char *buf,
int pos, int *rec_offs);
static int unpack_close (struct kml_close *rec, char *buf,
int pos, int *rec_offs);
static int unpack_mkdir (struct kml_mkdir *rec, char *buf,
int pos, int *rec_offs);
#if 0
static int unpack_endmark (struct kml_endmark *rec, char *buf,
int pos, int *rec_offs);
static void print_kml_endmark (struct kml_endmark *rec);
#endif
static int kml_unpack (char *kml_buf, int rec_size, int kml_offset,
struct kml_rec **newrec);
static char *kml_version (struct presto_version *ver);
static void print_kml_prefix (struct big_journal_prefix *head);
static void print_kml_create (struct kml_create *rec);
static void print_kml_mkdir (struct kml_mkdir *rec);
static void print_kml_unlink (struct kml_unlink *rec);
static void print_kml_rmdir (struct kml_rmdir *rec);
static void print_kml_close (struct kml_close *rec);
static void print_kml_symlink (struct kml_symlink *rec);
static void print_kml_rename (struct kml_rename *rec);
static void print_kml_setattr (struct kml_setattr *rec);
static void print_kml_link (struct kml_link *rec);
static void print_kml_mknod (struct kml_mknod *rec);
static void print_kml_open (struct kml_open *rec);
static void print_kml_suffix (struct journal_suffix *tail);
static char *readrec (char *recbuf, int reclen, int pos, int *size);
#define KML_PREFIX_WORDS 8
static int kml_unpack (char *kml_buf, int rec_size, int kml_offset,
struct kml_rec **newrec)
{
struct kml_rec *rec;
char *p;
int pos, rec_offs;
int error;
ENTRY;
if (rec_size < sizeof (struct journal_prefix) +
sizeof (struct journal_suffix))
return -EBADF;
PRESTO_ALLOC(rec, struct kml_rec *, sizeof (struct kml_rec));
if (rec == NULL) {
EXIT;
return -ENOMEM;
}
rec->rec_kml_offset = kml_offset;
rec->rec_size = rec_size;
p = kml_buf;
p = dlogit (&rec->rec_head, p, KML_PREFIX_WORDS * sizeof (int));
p = dlogit (&rec->rec_head.groups, p,
sizeof (int) * rec->rec_head.ngroups);
pos = sizeof (struct journal_prefix) +
sizeof (int) * rec->rec_head.ngroups;
switch (rec->rec_head.opcode)
{
case KML_CREATE:
error = unpack_create (&rec->rec_kml.create,
kml_buf, pos, &rec_offs);
break;
case KML_MKDIR:
error = unpack_mkdir (&rec->rec_kml.mkdir,
kml_buf, pos, &rec_offs);
break;
case KML_UNLINK:
error = unpack_unlink (&rec->rec_kml.unlink,
kml_buf, pos, &rec_offs);
break;
case KML_RMDIR:
error = unpack_rmdir (&rec->rec_kml.rmdir,
kml_buf, pos, &rec_offs);
break;
case KML_CLOSE:
error = unpack_close (&rec->rec_kml.close,
kml_buf, pos, &rec_offs);
break;
case KML_SYMLINK:
error = unpack_symlink (&rec->rec_kml.symlink,
kml_buf, pos, &rec_offs);
break;
case KML_RENAME:
error = unpack_rename (&rec->rec_kml.rename,
kml_buf, pos, &rec_offs);
break;
case KML_SETATTR:
error = unpack_setattr (&rec->rec_kml.setattr,
kml_buf, pos, &rec_offs);
break;
case KML_LINK:
error = unpack_link (&rec->rec_kml.link,
kml_buf, pos, &rec_offs);
break;
case KML_OPEN:
error = unpack_open (&rec->rec_kml.open,
kml_buf, pos, &rec_offs);
break;
case KML_MKNOD:
error = unpack_mknod (&rec->rec_kml.mknod,
kml_buf, pos, &rec_offs);
break;
#if 0
case KML_ENDMARK:
error = unpack_endmark (&rec->rec_kml.endmark,
kml_buf, pos, &rec_offs);
break;
#endif
default:
CDEBUG (D_KML, "wrong opcode::%u\n",
rec->rec_head.opcode);
EXIT;
return -EINVAL;
}
if (error) {
PRESTO_FREE (rec, sizeof (struct kml_rec));
return -EINVAL;
}
p = kml_buf + rec_offs;
p = dlogit (&rec->rec_tail, p, sizeof (struct journal_suffix));
memset (&rec->kml_optimize, 0, sizeof (struct kml_optimize));
*newrec = rec;
EXIT;
return 0;
}
static int size_round (int val)
{
return (val + 3) & (~0x3);
}
static int unpack_create (struct kml_create *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 88;
int pathlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->mode, p, sizeof (int));
p = dlogit (&rec->uid, p, sizeof (int));
p = dlogit (&rec->gid, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
*rec_offs = pos + unpack_size + size_round(pathlen);
EXIT;
return 0;
}
static int unpack_open (struct kml_open *rec, char *buf,
int pos, int *rec_offs)
{
*rec_offs = pos;
return 0;
}
static int unpack_symlink (struct kml_symlink *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 88;
int pathlen, targetlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->uid, p, sizeof (int));
p = dlogit (&rec->gid, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
p = dlogit (&targetlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->sourcepath = q;
PRESTO_ALLOC(q, char *, targetlen + 1);
if (q == NULL) {
PRESTO_FREE (rec->sourcepath, pathlen + 1);
EXIT;
return -ENOMEM;
}
memcpy (q, p, targetlen);
q[targetlen] = '\0';
rec->targetpath = q;
*rec_offs = pos + unpack_size + size_round(pathlen) +
size_round(targetlen);
EXIT;
return 0;
}
static int unpack_mknod (struct kml_mknod *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 96;
int pathlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->mode, p, sizeof (int));
p = dlogit (&rec->uid, p, sizeof (int));
p = dlogit (&rec->gid, p, sizeof (int));
p = dlogit (&rec->major, p, sizeof (int));
p = dlogit (&rec->minor, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
*rec_offs = pos + unpack_size + size_round(pathlen);
EXIT;
return 0;
}
static int unpack_link (struct kml_link *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 80;
int pathlen, targetlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&pathlen, p, sizeof (int));
p = dlogit (&targetlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->sourcepath = q;
p += size_round (pathlen);
PRESTO_ALLOC(q, char *, targetlen + 1);
if (q == NULL) {
PRESTO_FREE (rec->sourcepath, pathlen + 1);
EXIT;
return -ENOMEM;
}
memcpy (q, p, targetlen);
q[targetlen] = '\0';
rec->targetpath = q;
*rec_offs = pos + unpack_size + size_round(pathlen) +
size_round(targetlen);
EXIT;
return 0;
}
static int unpack_rename (struct kml_rename *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 104;
int pathlen, targetlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_tgtv, p, sizeof (struct presto_version));
p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
p = dlogit (&pathlen, p, sizeof (int));
p = dlogit (&targetlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->sourcepath = q;
p += size_round (pathlen);
PRESTO_ALLOC(q, char *, targetlen + 1);
if (q == NULL) {
PRESTO_FREE (rec->sourcepath, pathlen + 1);
EXIT;
return -ENOMEM;
}
memcpy (q, p, targetlen);
q[targetlen] = '\0';
rec->targetpath = q;
*rec_offs = pos + unpack_size + size_round(pathlen) +
size_round(targetlen);
EXIT;
return 0;
}
static int unpack_unlink (struct kml_unlink *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 80;
int pathlen, targetlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
p = dlogit (&pathlen, p, sizeof (int));
p = dlogit (&targetlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
p += size_round (pathlen);
PRESTO_ALLOC(q, char *, targetlen + 1);
if (q == NULL) {
PRESTO_FREE (rec->path, pathlen + 1);
EXIT;
return -ENOMEM;
}
memcpy (q, p, targetlen);
q[targetlen] = '\0';
rec->name = q;
/* fix the presto_journal_unlink problem */
*rec_offs = pos + unpack_size + size_round(pathlen) +
size_round(targetlen);
EXIT;
return 0;
}
static int unpack_rmdir (struct kml_rmdir *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 80;
int pathlen, targetlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->old_tgtv, p, sizeof (struct presto_version));
p = dlogit (&pathlen, p, sizeof (int));
p = dlogit (&targetlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
p += size_round (pathlen);
PRESTO_ALLOC(q, char *, targetlen + 1);
if (q == NULL) {
PRESTO_FREE (rec->path, pathlen + 1);
EXIT;
return -ENOMEM;
}
memcpy (q, p, targetlen);
q[targetlen] = '\0';
rec->name = q;
*rec_offs = pos + unpack_size + size_round(pathlen) +
size_round(targetlen);
EXIT;
return 0;
}
static int unpack_setattr (struct kml_setattr *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 72;
struct kml_attr {
__u64 size, mtime, ctime;
} objattr;
int valid, mode, uid, gid, flags;
int pathlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_objectv, p, sizeof (struct presto_version));
p = dlogit (&valid, p, sizeof (int));
p = dlogit (&mode, p, sizeof (int));
p = dlogit (&uid, p, sizeof (int));
p = dlogit (&gid, p, sizeof (int));
p = dlogit (&objattr, p, sizeof (struct kml_attr));
p = dlogit (&flags, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
rec->iattr.ia_valid = valid;
rec->iattr.ia_mode = mode;
rec->iattr.ia_uid = uid;
rec->iattr.ia_gid = gid;
rec->iattr.ia_size = objattr.size;
rec->iattr.ia_mtime = objattr.mtime;
rec->iattr.ia_ctime = objattr.ctime;
rec->iattr.ia_atime = 0;
rec->iattr.ia_attr_flags = flags;
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
p += pathlen;
*rec_offs = pos + unpack_size + size_round(pathlen);
EXIT;
return 0;
}
static int unpack_close (struct kml_close *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 52;
int pathlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->open_mode, p, sizeof (int));
p = dlogit (&rec->open_uid, p, sizeof (int));
p = dlogit (&rec->open_gid, p, sizeof (int));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->ino, p, sizeof (__u64));
p = dlogit (&rec->generation, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
p += pathlen;
*rec_offs = pos + unpack_size + size_round(pathlen);
EXIT;
return 0;
}
static int unpack_mkdir (struct kml_mkdir *rec, char *buf,
int pos, int *rec_offs)
{
char *p, *q;
int unpack_size = 88;
int pathlen;
ENTRY;
p = buf + pos;
p = dlogit (&rec->old_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_parentv, p, sizeof (struct presto_version));
p = dlogit (&rec->new_objectv, p, sizeof (struct presto_version));
p = dlogit (&rec->mode, p, sizeof (int));
p = dlogit (&rec->uid, p, sizeof (int));
p = dlogit (&rec->gid, p, sizeof (int));
p = dlogit (&pathlen, p, sizeof (int));
PRESTO_ALLOC(q, char *, pathlen + 1);
if (q == NULL) {
EXIT;
return -ENOMEM;
}
memcpy (q, p, pathlen);
q[pathlen] = '\0';
rec->path = q;
p += pathlen;
*rec_offs = pos + unpack_size + size_round(pathlen);
EXIT;
return 0;
}
#if 0
static int unpack_endmark (struct kml_endmark *rec, char *buf,
int pos, int *rec_offs)
{
char *p;
p = buf + pos;
p = dlogit (&rec->total, p, sizeof (int));
PRESTO_ALLOC (rec->kop, struct kml_kop_node *,
sizeof (struct kml_kop_node) * rec->total);
if (rec->kop == NULL) {
EXIT;
return -ENOMEM;
}
p = dlogit (rec->kop, p, sizeof (struct kml_kop_node) * rec->total);
*rec_offs = pos + sizeof (int) + sizeof (struct kml_kop_node) * rec->total;
return 0;
}
#endif
static char *kml_version (struct presto_version *ver)
{
static char buf[256];
sprintf (buf, "mt::%lld, ct::%lld, size::%lld",
ver->pv_mtime, ver->pv_ctime, ver->pv_size);
return buf;
}
static void print_kml_prefix (struct big_journal_prefix *head)
{
int i;
CDEBUG (D_KML, " === KML PREFIX\n");
CDEBUG (D_KML, " len = %u\n", head->len);
CDEBUG (D_KML, " version = %u\n", head->version);
CDEBUG (D_KML, " pid = %u\n", head->pid);
CDEBUG (D_KML, " uid = %u\n", head->uid);
CDEBUG (D_KML, " fsuid = %u\n", head->fsuid);
CDEBUG (D_KML, " fsgid = %u\n", head->fsgid);
CDEBUG (D_KML, " opcode = %u\n", head->opcode);
CDEBUG (D_KML, " ngroup = %u", head->ngroups);
for (i = 0; i < head->ngroups; i++)
CDEBUG (D_KML, "%u ", head->groups[i]);
CDEBUG (D_KML, "\n");
}
static void print_kml_create (struct kml_create *rec)
{
CDEBUG (D_KML, " === CREATE\n");
CDEBUG (D_KML, " path::%s\n", rec->path);
CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
CDEBUG (D_KML, " mode::%o\n", rec->mode);
CDEBUG (D_KML, " uid::%d\n", rec->uid);
CDEBUG (D_KML, " gid::%d\n", rec->gid);
}
static void print_kml_mkdir (struct kml_mkdir *rec)
{
CDEBUG (D_KML, " === MKDIR\n");
CDEBUG (D_KML, " path::%s\n", rec->path);
CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
CDEBUG (D_KML, " mode::%o\n", rec->mode);
CDEBUG (D_KML, " uid::%d\n", rec->uid);
CDEBUG (D_KML, " gid::%d\n", rec->gid);
}
static void print_kml_unlink (struct kml_unlink *rec)
{
CDEBUG (D_KML, " === UNLINK\n");
CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name);
CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv));
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
}
static void print_kml_rmdir (struct kml_rmdir *rec)
{
CDEBUG (D_KML, " === RMDIR\n");
CDEBUG (D_KML, " path::%s/%s\n", rec->path, rec->name);
CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv));
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
}
static void print_kml_close (struct kml_close *rec)
{
CDEBUG (D_KML, " === CLOSE\n");
CDEBUG (D_KML, " mode::%o\n", rec->open_mode);
CDEBUG (D_KML, " uid::%d\n", rec->open_uid);
CDEBUG (D_KML, " gid::%d\n", rec->open_gid);
CDEBUG (D_KML, " path::%s\n", rec->path);
CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " ino::%lld\n", rec->ino);
CDEBUG (D_KML, " gen::%u\n", rec->generation);
}
static void print_kml_symlink (struct kml_symlink *rec)
{
CDEBUG (D_KML, " === SYMLINK\n");
CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath);
CDEBUG (D_KML, " t-path::%s\n", rec->targetpath);
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " uid::%d\n", rec->uid);
CDEBUG (D_KML, " gid::%d\n", rec->gid);
}
static void print_kml_rename (struct kml_rename *rec)
{
CDEBUG (D_KML, " === RENAME\n");
CDEBUG (D_KML, " s-path::%s\n", rec->sourcepath);
CDEBUG (D_KML, " t-path::%s\n", rec->targetpath);
CDEBUG (D_KML, " old_tgtv::%s\n", kml_version (&rec->old_tgtv));
CDEBUG (D_KML, " new_tgtv::%s\n", kml_version (&rec->new_tgtv));
CDEBUG (D_KML, " new_objv::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv));
}
static void print_kml_setattr (struct kml_setattr *rec)
{
CDEBUG (D_KML, " === SETATTR\n");
CDEBUG (D_KML, " path::%s\n", rec->path);
CDEBUG (D_KML, " old_objv::%s\n", kml_version (&rec->old_objectv));
CDEBUG (D_KML, " valid::0x%x\n", rec->iattr.ia_valid);
CDEBUG (D_KML, " mode::%o\n", rec->iattr.ia_mode);
CDEBUG (D_KML, " uid::%d\n", rec->iattr.ia_uid);
CDEBUG (D_KML, " gid::%d\n", rec->iattr.ia_gid);
CDEBUG (D_KML, " size::%u\n", (u32) rec->iattr.ia_size);
CDEBUG (D_KML, " mtime::%u\n", (u32) rec->iattr.ia_mtime);
CDEBUG (D_KML, " ctime::%u\n", (u32) rec->iattr.ia_ctime);
CDEBUG (D_KML, " flags::%u\n", (u32) rec->iattr.ia_attr_flags);
}
static void print_kml_link (struct kml_link *rec)
{
CDEBUG (D_KML, " === LINK\n");
CDEBUG (D_KML, " path::%s ==> %s\n", rec->sourcepath, rec->targetpath);
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
}
static void print_kml_mknod (struct kml_mknod *rec)
{
CDEBUG (D_KML, " === MKNOD\n");
CDEBUG (D_KML, " path::%s\n", rec->path);
CDEBUG (D_KML, " new_obj::%s\n", kml_version (&rec->new_objectv));
CDEBUG (D_KML, " old_parv::%s\n", kml_version (&rec->old_parentv));
CDEBUG (D_KML, " new_parv::%s\n", kml_version (&rec->new_parentv));
CDEBUG (D_KML, " mode::%o\n", rec->mode);
CDEBUG (D_KML, " uid::%d\n", rec->uid);
CDEBUG (D_KML, " gid::%d\n", rec->gid);
CDEBUG (D_KML, " major::%d\n", rec->major);
CDEBUG (D_KML, " minor::%d\n", rec->minor);
}
static void print_kml_open (struct kml_open *rec)
{
CDEBUG (D_KML, " === OPEN\n");
}
#if 0
static void print_kml_endmark (struct kml_endmark *rec)
{
int i;
CDEBUG (D_KML, " === ENDMARK\n");
CDEBUG (D_KML, " total::%u\n", rec->total);
for (i = 0; i < rec->total; i++)
{
CDEBUG (D_KML, " recno=%ld::flag=%ld,op=%ld, i_ino=%ld, \
i_nlink=%ld\n", (long) rec->kop[i].kml_recno,
(long) rec->kop[i].kml_flag, (long) rec->kop[i].kml_op,
(long) rec->kop[i].i_ino, (long) rec->kop[i].i_nlink);
}
}
#endif
static void print_kml_optimize (struct kml_optimize *rec)
{
CDEBUG (D_KML, " === OPTIMIZE\n");
if (rec->kml_flag == KML_REC_DELETE)
CDEBUG (D_KML, " kml_flag::deleted\n");
else
CDEBUG (D_KML, " kml_flag::exist\n");
CDEBUG (D_KML, " kml_op::%u\n", rec->kml_op);
CDEBUG (D_KML, " i_nlink::%d\n", rec->i_nlink);
CDEBUG (D_KML, " i_ino::%u\n", rec->i_ino);
}
static void print_kml_suffix (struct journal_suffix *tail)
{
CDEBUG (D_KML, " === KML SUFFIX\n");
CDEBUG (D_KML, " prevrec::%ld\n", tail->prevrec);
CDEBUG (D_KML, " recno::%ld\n", (long) tail->recno);
CDEBUG (D_KML, " time::%d\n", tail->time);
CDEBUG (D_KML, " len::%d\n", tail->len);
}
void kml_printrec (struct kml_rec *rec, int kml_printop)
{
if (kml_printop & PRINT_KML_PREFIX)
print_kml_prefix (&rec->rec_head);
if (kml_printop & PRINT_KML_REC)
{
switch (rec->rec_head.opcode)
{
case KML_CREATE:
print_kml_create (&rec->rec_kml.create);
break;
case KML_MKDIR:
print_kml_mkdir (&rec->rec_kml.mkdir);
break;
case KML_UNLINK:
print_kml_unlink (&rec->rec_kml.unlink);
break;
case KML_RMDIR:
print_kml_rmdir (&rec->rec_kml.rmdir);
break;
case KML_CLOSE:
print_kml_close (&rec->rec_kml.close);
break;
case KML_SYMLINK:
print_kml_symlink (&rec->rec_kml.symlink);
break;
case KML_RENAME:
print_kml_rename (&rec->rec_kml.rename);
break;
case KML_SETATTR:
print_kml_setattr (&rec->rec_kml.setattr);
break;
case KML_LINK:
print_kml_link (&rec->rec_kml.link);
break;
case KML_OPEN:
print_kml_open (&rec->rec_kml.open);
break;
case KML_MKNOD:
print_kml_mknod (&rec->rec_kml.mknod);
break;
#if 0
case KML_ENDMARK:
print_kml_endmark (&rec->rec_kml.endmark);
#endif
break;
default:
CDEBUG (D_KML, " === BAD RECORD, opcode=%u\n",
rec->rec_head.opcode);
break;
}
}
if (kml_printop & PRINT_KML_SUFFIX)
print_kml_suffix (&rec->rec_tail);
if (kml_printop & PRINT_KML_OPTIMIZE)
print_kml_optimize (&rec->kml_optimize);
}
void kml_freerec (struct kml_rec *rec)
{
char *sourcepath = NULL,
*targetpath = NULL;
switch (rec->rec_head.opcode)
{
case KML_CREATE:
sourcepath = rec->rec_kml.create.path;
break;
case KML_MKDIR:
sourcepath = rec->rec_kml.create.path;
break;
case KML_UNLINK:
sourcepath = rec->rec_kml.unlink.path;
targetpath = rec->rec_kml.unlink.name;
break;
case KML_RMDIR:
sourcepath = rec->rec_kml.rmdir.path;
targetpath = rec->rec_kml.rmdir.name;
break;
case KML_CLOSE:
sourcepath = rec->rec_kml.close.path;
break;
case KML_SYMLINK:
sourcepath = rec->rec_kml.symlink.sourcepath;
targetpath = rec->rec_kml.symlink.targetpath;
break;
case KML_RENAME:
sourcepath = rec->rec_kml.rename.sourcepath;
targetpath = rec->rec_kml.rename.targetpath;
break;
case KML_SETATTR:
sourcepath = rec->rec_kml.setattr.path;
break;
case KML_LINK:
sourcepath = rec->rec_kml.link.sourcepath;
targetpath = rec->rec_kml.link.targetpath;
break;
case KML_OPEN:
break;
case KML_MKNOD:
sourcepath = rec->rec_kml.mknod.path;
break;
#if 0
case KML_ENDMARK:
PRESTO_FREE (rec->rec_kml.endmark.kop, sizeof (int) +
sizeof (struct kml_kop_node) *
rec->rec_kml.endmark.total);
#endif
break;
default:
break;
}
if (sourcepath != NULL)
PRESTO_FREE (sourcepath, strlen (sourcepath) + 1);
if (targetpath != NULL)
PRESTO_FREE (targetpath, strlen (targetpath) + 1);
}
char *readrec (char *recbuf, int reclen, int pos, int *size)
{
char *p = recbuf + pos;
*size = *((int *) p);
if (*size > (reclen - pos))
return NULL;
return p;
}
int kml_decoderec (char *buf, int pos, int buflen, int *size,
struct kml_rec **newrec)
{
char *tmp;
int error;
tmp = readrec (buf, buflen, pos, size);
if (tmp == NULL)
return -EBADF;
error = kml_unpack (tmp, *size, pos, newrec);
return error;
}
#if 0
static void fill_kmlrec_optimize (struct list_head *head,
struct kml_rec *optrec)
{
struct kml_rec *kmlrec;
struct list_head *tmp;
struct kml_endmark *km;
struct kml_optimize *ko;
int n;
if (optrec->rec_kml.endmark.total == 0)
return;
n = optrec->rec_kml.endmark.total - 1;
tmp = head->prev;
km = &optrec->rec_kml.endmark;
while ( n >= 0 && tmp != head )
{
kmlrec = list_entry(tmp, struct kml_rec,
kml_optimize.kml_chains);
tmp = tmp->prev;
if (kmlrec->rec_tail.recno == km->kop[n].kml_recno)
{
ko = &kmlrec->kml_optimize;
ko->kml_flag = km->kop[n].kml_flag;
ko->kml_op = km->kop[n].kml_op;
ko->i_nlink = km->kop[n].i_nlink;
ko->i_ino = km->kop[n].i_ino;
n --;
}
}
if (n != -1)
CDEBUG (D_KML, "Yeah!!!, KML optimize error, recno=%d, n=%d\n",
optrec->rec_tail.recno, n);
}
#endif
int decode_kmlrec (struct list_head *head, char *kml_buf, int buflen)
{
struct kml_rec *rec;
int pos = 0, size;
int err;
while (pos < buflen) {
err = kml_decoderec (kml_buf, pos, buflen, &size, &rec);
if (err != 0)
break;
#if 0
if (rec->rec_head.opcode == KML_ENDMARK) {
fill_kmlrec_optimize (head, rec);
mark_rec_deleted (rec);
}
#endif
list_add_tail (&rec->kml_optimize.kml_chains, head);
pos += size;
}
return err;
}
int delete_kmlrec (struct list_head *head)
{
struct kml_rec *rec;
struct list_head *tmp;
if (list_empty(head))
return 0;
tmp = head->next;
while ( tmp != head ) {
rec = list_entry(tmp, struct kml_rec,
kml_optimize.kml_chains);
tmp = tmp->next;
kml_freerec (rec);
}
INIT_LIST_HEAD(head);
return 0;
}
int print_allkmlrec (struct list_head *head, int printop)
{
struct kml_rec *rec;
struct list_head *tmp;
if (list_empty(head))
return 0;
tmp = head->next;
while ( tmp != head ) {
rec = list_entry(tmp, struct kml_rec,
kml_optimize.kml_chains);
tmp = tmp->next;
#if 0
if (printop & PRINT_KML_EXIST) {
if (is_deleted_node (rec))
continue;
}
else if (printop & PRINT_KML_DELETE) {
if (! is_deleted_node (rec))
continue;
}
#endif
kml_printrec (rec, printop);
}
INIT_LIST_HEAD(head);
return 0;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Reintegration of KML records
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
static void kmlreint_pre_secure(struct kml_rec *rec, struct file *dir,
struct run_ctxt *saved)
{
struct run_ctxt ctxt;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
int i;
ctxt.fsuid = rec->prefix.hdr->fsuid;
ctxt.fsgid = rec->prefix.hdr->fsgid;
ctxt.fs = KERNEL_DS;
ctxt.pwd = dd->dd_fset->fset_dentry;
ctxt.pwdmnt = dd->dd_fset->fset_mnt;
ctxt.root = ctxt.pwd;
ctxt.rootmnt = ctxt.pwdmnt;
if (rec->prefix.hdr->ngroups > 0) {
ctxt.group_info = groups_alloc(rec->prefix.hdr->ngroups);
for (i = 0; i< ctxt.group_info->ngroups; i++)
GROUP_AT(ctxt.group_info,i)= rec->prefix.groups[i];
} else
ctxt.group_info = groups_alloc(0);
push_ctxt(saved, &ctxt);
}
/* Append two strings in a less-retarded fashion. */
static char * path_join(char *p1, int p1len, char *p2, int p2len)
{
int size = p1len + p2len + 2; /* possibly one extra /, one NULL */
char *path;
path = kmalloc(size, GFP_KERNEL);
if (path == NULL)
return NULL;
memcpy(path, p1, p1len);
if (path[p1len - 1] != '/') {
path[p1len] = '/';
p1len++;
}
memcpy(path + p1len, p2, p2len);
path[p1len + p2len] = '\0';
return path;
}
static inline int kml_recno_equal(struct kml_rec *rec,
struct presto_file_set *fset)
{
return (rec->suffix->recno == fset->fset_lento_recno + 1);
}
static inline int version_equal(struct presto_version *a, struct inode *inode)
{
if (a == NULL)
return 1;
if (inode == NULL) {
CERROR("InterMezzo: NULL inode in version_equal()\n");
return 0;
}
if (inode->i_mtime.tv_sec == a->pv_mtime_sec &&
inode->i_mtime.tv_nsec == a->pv_mtime_nsec &&
(S_ISDIR(inode->i_mode) || inode->i_size == a->pv_size))
return 1;
return 0;
}
static int reint_close(struct kml_rec *rec, struct file *file,
struct lento_vfs_context *given_info)
{
struct run_ctxt saved_ctxt;
int error;
struct presto_file_set *fset;
struct lento_vfs_context info;
ENTRY;
memcpy(&info, given_info, sizeof(*given_info));
CDEBUG (D_KML, "=====REINT_CLOSE::%s\n", rec->path);
fset = presto_fset(file->f_dentry);
if (fset->fset_flags & FSET_DATA_ON_DEMAND) {
struct iattr iattr;
iattr.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_SIZE;
iattr.ia_mtime.tv_sec = (time_t)rec->new_objectv->pv_mtime_sec;
iattr.ia_mtime.tv_nsec = (time_t)rec->new_objectv->pv_mtime_nsec;
iattr.ia_ctime.tv_sec = (time_t)rec->new_objectv->pv_ctime_sec;
iattr.ia_ctime.tv_nsec = (time_t)rec->new_objectv->pv_ctime_nsec;
iattr.ia_size = (time_t)rec->new_objectv->pv_size;
/* no kml record, but update last rcvd */
/* save fileid in dentry for later backfetch */
info.flags |= LENTO_FL_EXPECT | LENTO_FL_SET_DDFILEID;
info.remote_ino = rec->ino;
info.remote_generation = rec->generation;
info.flags &= ~LENTO_FL_KML;
kmlreint_pre_secure(rec, file, &saved_ctxt);
error = lento_setattr(rec->path, &iattr, &info);
pop_ctxt(&saved_ctxt);
presto_d2d(file->f_dentry)->dd_flags &= ~PRESTO_DATA;
} else {
int minor = presto_f2m(fset);
info.updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
info.updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
memcpy(&info.remote_version, rec->old_objectv,
sizeof(*rec->old_objectv));
info.remote_ino = rec->ino;
info.remote_generation = rec->generation;
error = izo_upc_backfetch(minor, rec->path, fset->fset_name,
&info);
if (error) {
CERROR("backfetch error %d\n", error);
/* if file doesn't exist anymore, then ignore the CLOSE
* and just update the last_rcvd.
*/
if (error == ENOENT) {
CDEBUG(D_KML, "manually updating remote offset uuid %s"
"recno %d offset %Lu\n", info.uuid, info.recno,
(unsigned long long) info.kml_offset);
error = izo_rcvd_upd_remote(fset, info.uuid, info.recno, info.kml_offset);
if(error)
CERROR("izo_rcvd_upd_remote error %d\n", error);
}
}
/* propagate error to avoid further reint */
}
EXIT;
return error;
}
static int reint_create(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error; ENTRY;
CDEBUG (D_KML, "=====REINT_CREATE::%s\n", rec->path);
info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_create(rec->path, rec->mode, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_link(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
ENTRY;
CDEBUG (D_KML, "=====REINT_LINK::%s -> %s\n", rec->path, rec->target);
info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_link(rec->path, rec->target, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_mkdir(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
ENTRY;
CDEBUG (D_KML, "=====REINT_MKDIR::%s\n", rec->path);
info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_mkdir(rec->path, rec->mode, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_mknod(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
dev_t dev;
ENTRY;
CDEBUG (D_KML, "=====REINT_MKNOD::%s\n", rec->path);
info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
dev = rec->rdev ? old_decode_dev(rec->rdev) : MKDEV(rec->major, rec->minor);
error = lento_mknod(rec->path, rec->mode, dev, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_noop(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
return 0;
}
static int reint_rename(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
ENTRY;
CDEBUG (D_KML, "=====REINT_RENAME::%s -> %s\n", rec->path, rec->target);
info->updated_time.tv_sec = rec->new_objectv->pv_mtime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_mtime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_rename(rec->path, rec->target, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_rmdir(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
char *path;
ENTRY;
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
if (path == NULL) {
EXIT;
return -ENOMEM;
}
CDEBUG (D_KML, "=====REINT_RMDIR::%s\n", path);
info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec;
info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_rmdir(path, info);
pop_ctxt(&saved_ctxt);
kfree(path);
EXIT;
return error;
}
static int reint_setattr(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
struct iattr iattr;
int error;
ENTRY;
iattr.ia_valid = rec->valid;
iattr.ia_mode = (umode_t)rec->mode;
iattr.ia_uid = (uid_t)rec->uid;
iattr.ia_gid = (gid_t)rec->gid;
iattr.ia_size = (off_t)rec->size;
iattr.ia_ctime.tv_sec = rec->ctime_sec;
iattr.ia_ctime.tv_nsec = rec->ctime_nsec;
iattr.ia_mtime.tv_sec = rec->mtime_sec;
iattr.ia_mtime.tv_nsec = rec->mtime_nsec;
iattr.ia_atime = iattr.ia_mtime; /* We don't track atimes. */
iattr.ia_attr_flags = rec->flags;
CDEBUG (D_KML, "=====REINT_SETATTR::%s (%d)\n", rec->path, rec->valid);
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_setattr(rec->path, &iattr, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_symlink(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
ENTRY;
CDEBUG (D_KML, "=====REINT_SYMLINK::%s -> %s\n", rec->path, rec->target);
info->updated_time.tv_sec = rec->new_objectv->pv_ctime_sec;
info->updated_time.tv_nsec = rec->new_objectv->pv_ctime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_symlink(rec->target, rec->path, info);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
static int reint_unlink(struct kml_rec *rec, struct file *dir,
struct lento_vfs_context *info)
{
struct run_ctxt saved_ctxt;
int error;
char *path;
ENTRY;
path = path_join(rec->path, rec->pathlen - 1, rec->target, rec->targetlen);
if (path == NULL) {
EXIT;
return -ENOMEM;
}
CDEBUG (D_KML, "=====REINT_UNLINK::%s\n", path);
info->updated_time.tv_sec = rec->new_parentv->pv_mtime_sec;
info->updated_time.tv_nsec = rec->new_parentv->pv_mtime_nsec;
kmlreint_pre_secure(rec, dir, &saved_ctxt);
error = lento_unlink(path, info);
pop_ctxt(&saved_ctxt);
kfree(path);
EXIT;
return error;
}
static int branch_reint_rename(struct presto_file_set *fset, struct kml_rec *rec,
struct file *dir, struct lento_vfs_context *info,
char * kml_data, __u64 kml_size)
{
int error;
ENTRY;
error = reint_rename(rec, dir, info);
if (error == -ENOENT) {
/* normal reint failed because path was not found */
struct rec_info rec;
CDEBUG(D_KML, "saving branch rename kml\n");
rec.is_kml = 1;
rec.size = kml_size;
error = presto_log(fset, &rec, kml_data, kml_size,
NULL, 0, NULL, 0, NULL, 0);
if (error == 0)
error = presto_write_last_rcvd(&rec, fset, info);
}
EXIT;
return error;
}
int branch_reinter(struct presto_file_set *fset, struct kml_rec *rec,
struct file *dir, struct lento_vfs_context *info,
char * kml_data, __u64 kml_size)
{
int error = 0;
int op = rec->prefix.hdr->opcode;
if (op == KML_OPCODE_CLOSE) {
/* regular close and backfetch */
error = reint_close(rec, dir, info);
} else if (op == KML_OPCODE_RENAME) {
/* rename only if name already exists */
error = branch_reint_rename(fset, rec, dir, info,
kml_data, kml_size);
} else {
/* just rewrite kml into branch/kml and update last_rcvd */
struct rec_info rec;
CDEBUG(D_KML, "Saving branch kml\n");
rec.is_kml = 1;
rec.size = kml_size;
error = presto_log(fset, &rec, kml_data, kml_size,
NULL, 0, NULL, 0, NULL, 0);
if (error == 0)
error = presto_write_last_rcvd(&rec, fset, info);
}
return error;
}
typedef int (*reinter_t)(struct kml_rec *rec, struct file *basedir,
struct lento_vfs_context *info);
static reinter_t presto_reinters[KML_OPCODE_NUM] =
{
[KML_OPCODE_CLOSE] = reint_close,
[KML_OPCODE_CREATE] = reint_create,
[KML_OPCODE_LINK] = reint_link,
[KML_OPCODE_MKDIR] = reint_mkdir,
[KML_OPCODE_MKNOD] = reint_mknod,
[KML_OPCODE_NOOP] = reint_noop,
[KML_OPCODE_RENAME] = reint_rename,
[KML_OPCODE_RMDIR] = reint_rmdir,
[KML_OPCODE_SETATTR] = reint_setattr,
[KML_OPCODE_SYMLINK] = reint_symlink,
[KML_OPCODE_UNLINK] = reint_unlink,
};
static inline reinter_t get_reinter(int op)
{
if (op < 0 || op >= sizeof(presto_reinters) / sizeof(reinter_t))
return NULL;
else
return presto_reinters[op];
}
int kml_reint_rec(struct file *dir, struct izo_ioctl_data *data)
{
char *ptr;
char *end;
struct kml_rec rec;
int error = 0;
struct lento_vfs_context info;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
int op;
reinter_t reinter;
struct izo_rcvd_rec lr_rec;
int off;
ENTRY;
error = presto_prep(dir->f_dentry, &cache, &fset);
if ( error ) {
CERROR("intermezzo: Reintegration on invalid file\n");
return error;
}
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) {
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
dir->f_dentry->d_inode->i_ino);
return -EINVAL;
}
if (data->ioc_plen1 > 64 * 1024) {
EXIT;
return -ENOSPC;
}
ptr = fset->fset_reint_buf;
end = ptr + data->ioc_plen1;
if (copy_from_user(ptr, data->ioc_pbuf1, data->ioc_plen1)) {
EXIT;
error = -EFAULT;
goto out;
}
error = kml_unpack(&rec, &ptr, end);
if (error) {
EXIT;
error = -EFAULT;
goto out;
}
off = izo_rcvd_get(&lr_rec, fset, data->ioc_uuid);
if (off < 0) {
CERROR("No last_rcvd record, setting to 0\n");
memset(&lr_rec, 0, sizeof(lr_rec));
}
data->ioc_kmlsize = ptr - fset->fset_reint_buf;
if (rec.suffix->recno != lr_rec.lr_remote_recno + 1) {
CERROR("KML record number %Lu expected, not %d\n",
(unsigned long long) (lr_rec.lr_remote_recno + 1),
rec.suffix->recno);
#if 0
if (!version_check(&rec, dd->dd_fset, &info)) {
/* FIXME: do an upcall to resolve conflicts */
CERROR("intermezzo: would be a conflict!\n");
error = -EINVAL;
EXIT;
goto out;
}
#endif
}
op = rec.prefix.hdr->opcode;
reinter = get_reinter(op);
if (!reinter) {
CERROR("%s: Unrecognized KML opcode %d\n", __FUNCTION__, op);
error = -EINVAL;
EXIT;
goto out;
}
info.kml_offset = data->ioc_offset + data->ioc_kmlsize;
info.recno = rec.suffix->recno;
info.flags = LENTO_FL_EXPECT;
if (data->ioc_flags)
info.flags |= LENTO_FL_KML;
memcpy(info.uuid, data->ioc_uuid, sizeof(info.uuid));
if (fset->fset_flags & FSET_IS_BRANCH && data->ioc_flags)
error = branch_reinter(fset, &rec, dir, &info, fset->fset_reint_buf,
data->ioc_kmlsize);
else
error = reinter(&rec, dir, &info);
out:
EXIT;
return error;
}
int izo_get_fileid(struct file *dir, struct izo_ioctl_data *data)
{
char *buf = NULL;
char *ptr;
char *end;
struct kml_rec rec;
struct file *file;
struct presto_cache *cache;
struct presto_file_set *fset;
struct presto_dentry_data *dd = presto_d2d(dir->f_dentry);
struct run_ctxt saved_ctxt;
int error;
ENTRY;
error = presto_prep(dir->f_dentry, &cache, &fset);
if ( error ) {
CERROR("intermezzo: Reintegration on invalid file\n");
return error;
}
if (!dd || !dd->dd_fset || dd->dd_fset->fset_dentry != dir->f_dentry) {
CERROR("intermezzo: reintegration on non-fset root (ino %ld)\n",
dir->f_dentry->d_inode->i_ino);
return -EINVAL;
}
PRESTO_ALLOC(buf, data->ioc_plen1);
if (!buf) {
EXIT;
return -ENOMEM;
}
ptr = buf;
end = buf + data->ioc_plen1;
if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) {
EXIT;
PRESTO_FREE(buf, data->ioc_plen1);
return -EFAULT;
}
error = kml_unpack(&rec, &ptr, end);
if (error) {
EXIT;
PRESTO_FREE(buf, data->ioc_plen1);
return -EFAULT;
}
kmlreint_pre_secure(&rec, dir, &saved_ctxt);
file = filp_open(rec.path, O_RDONLY, 0);
if (!file || IS_ERR(file)) {
error = PTR_ERR(file);
goto out;
}
data->ioc_ino = file->f_dentry->d_inode->i_ino;
data->ioc_generation = file->f_dentry->d_inode->i_generation;
filp_close(file, 0);
CDEBUG(D_FILE, "%s ino %Lx, gen %Lx\n", rec.path,
(unsigned long long) data->ioc_ino,
(unsigned long long) data->ioc_generation);
out:
if (buf)
PRESTO_FREE(buf, data->ioc_plen1);
pop_ctxt(&saved_ctxt);
EXIT;
return error;
}
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <asm/uaccess.h>
#include "intermezzo_fs.h"
#include "intermezzo_upcall.h"
#include "intermezzo_psdev.h"
#include "intermezzo_kml.h"
int kml_init (struct presto_file_set *fset)
{
struct kml_fsdata *data;
ENTRY;
PRESTO_ALLOC (data, struct kml_fsdata *, sizeof (struct kml_fsdata));
if (data == NULL) {
EXIT;
return -ENOMEM;
}
INIT_LIST_HEAD (&data->kml_reint_cache);
INIT_LIST_HEAD (&data->kml_kop_cache);
PRESTO_ALLOC (data->kml_buf, char *, KML_REINT_MAXBUF);
if (data->kml_buf == NULL) {
PRESTO_FREE (data, sizeof (struct kml_fsdata));
EXIT;
return -ENOMEM;
}
data->kml_maxsize = KML_REINT_MAXBUF;
data->kml_len = 0;
data->kml_reintpos = 0;
data->kml_count = 0;
fset->fset_kmldata = data;
EXIT;
return 0;
}
int kml_cleanup (struct presto_file_set *fset)
{
struct kml_fsdata *data = fset->fset_kmldata;
if (data == NULL)
return 0;
fset->fset_kmldata = NULL;
#if 0
kml_sop_cleanup (&data->kml_reint_cache);
kml_kop_cleanup (&data->kml_kop_cache);
#endif
PRESTO_FREE (data->kml_buf, KML_REINT_MAXBUF);
PRESTO_FREE (data, sizeof (struct kml_fsdata));
return 0;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Unpacking of KML records
*
*/
#ifdef __KERNEL__
# include <linux/module.h>
# include <linux/errno.h>
# include <linux/kernel.h>
# include <linux/major.h>
# include <linux/sched.h>
# include <linux/lp.h>
# include <linux/slab.h>
# include <linux/ioport.h>
# include <linux/fcntl.h>
# include <linux/delay.h>
# include <linux/skbuff.h>
# include <linux/proc_fs.h>
# include <linux/vmalloc.h>
# include <linux/fs.h>
# include <linux/poll.h>
# include <linux/init.h>
# include <linux/list.h>
# include <linux/stat.h>
# include <asm/io.h>
# include <asm/segment.h>
# include <asm/system.h>
# include <asm/poll.h>
# include <asm/uaccess.h>
#else
# include <time.h>
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
# include <errno.h>
# include <sys/stat.h>
# include <glib.h>
#endif
#include "intermezzo_lib.h"
#include "intermezzo_idl.h"
#include "intermezzo_fs.h"
int kml_unpack_version(struct presto_version **ver, char **buf, char *end)
{
char *ptr = *buf;
struct presto_version *pv;
UNLOGP(*ver, struct presto_version, ptr, end);
pv = *ver;
pv->pv_mtime_sec = NTOH__u32(pv->pv_mtime_sec);
pv->pv_mtime_nsec = NTOH__u32(pv->pv_mtime_nsec);
pv->pv_ctime_sec = NTOH__u32(pv->pv_ctime_sec);
pv->pv_ctime_nsec = NTOH__u32(pv->pv_ctime_nsec);
pv->pv_size = NTOH__u64(pv->pv_size);
*buf = ptr;
return 0;
}
static int kml_unpack_noop(struct kml_rec *rec, char **buf, char *end)
{
return 0;
}
static int kml_unpack_get_fileid(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_create(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_mkdir(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_unlink(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
LUNLOGV(rec->old_targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
UNLOGL(rec->old_target, char, rec->old_targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_rmdir(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_close(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
LUNLOGV(rec->mode, __u32, ptr, end); // used for open_mode
LUNLOGV(rec->uid, __u32, ptr, end); // used for open_uid
LUNLOGV(rec->gid, __u32, ptr, end); // used for open_gid
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->ino, __u64, ptr, end);
LUNLOGV(rec->generation, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_symlink(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_rename(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_setattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_objectv, &ptr, end);
LUNLOGV(rec->valid, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->size, __u64, ptr, end);
LUNLOGV(rec->mtime_sec, __u32, ptr, end);
LUNLOGV(rec->mtime_nsec, __u32, ptr, end);
LUNLOGV(rec->ctime_sec, __u32, ptr, end);
LUNLOGV(rec->ctime_nsec, __u32, ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->old_mode, __u32, ptr, end);
LUNLOGV(rec->old_rdev, __u32, ptr, end);
LUNLOGV(rec->old_uid, __u64, ptr, end);
LUNLOGV(rec->old_gid, __u64, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_link(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_mknod(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_parentv, &ptr, end);
kml_unpack_version(&rec->new_parentv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->uid, __u32, ptr, end);
LUNLOGV(rec->gid, __u32, ptr, end);
LUNLOGV(rec->major, __u32, ptr, end);
LUNLOGV(rec->minor, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_write(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
static int kml_unpack_release(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
static int kml_unpack_trunc(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
static int kml_unpack_setextattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->namelen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->name, char, rec->namelen, ptr, end);
UNLOGL(rec->target, char, rec->targetlen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_delextattr(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
kml_unpack_version(&rec->old_objectv, &ptr, end);
kml_unpack_version(&rec->new_objectv, &ptr, end);
LUNLOGV(rec->flags, __u32, ptr, end);
LUNLOGV(rec->mode, __u32, ptr, end);
LUNLOGV(rec->pathlen, __u32, ptr, end);
LUNLOGV(rec->namelen, __u32, ptr, end);
LUNLOGV(rec->targetlen, __u32, ptr, end);
UNLOGL(rec->path, char, rec->pathlen, ptr, end);
UNLOGL(rec->name, char, rec->namelen, ptr, end);
*buf = ptr;
return 0;
}
static int kml_unpack_open(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
static int kml_unpack_kml_trunc(struct kml_rec *rec, char **buf, char *end)
{
printf("NOT IMPLEMENTED");
return 0;
}
typedef int (*unpacker)(struct kml_rec *rec, char **buf, char *end);
static unpacker unpackers[KML_OPCODE_NUM] =
{
[KML_OPCODE_NOOP] = kml_unpack_noop,
[KML_OPCODE_CREATE] = kml_unpack_create,
[KML_OPCODE_MKDIR] = kml_unpack_mkdir,
[KML_OPCODE_UNLINK] = kml_unpack_unlink,
[KML_OPCODE_RMDIR] = kml_unpack_rmdir,
[KML_OPCODE_CLOSE] = kml_unpack_close,
[KML_OPCODE_SYMLINK] = kml_unpack_symlink,
[KML_OPCODE_RENAME] = kml_unpack_rename,
[KML_OPCODE_SETATTR] = kml_unpack_setattr,
[KML_OPCODE_LINK] = kml_unpack_link,
[KML_OPCODE_OPEN] = kml_unpack_open,
[KML_OPCODE_MKNOD] = kml_unpack_mknod,
[KML_OPCODE_WRITE] = kml_unpack_write,
[KML_OPCODE_RELEASE] = kml_unpack_release,
[KML_OPCODE_TRUNC] = kml_unpack_trunc,
[KML_OPCODE_SETEXTATTR] = kml_unpack_setextattr,
[KML_OPCODE_DELEXTATTR] = kml_unpack_delextattr,
[KML_OPCODE_KML_TRUNC] = kml_unpack_kml_trunc,
[KML_OPCODE_GET_FILEID] = kml_unpack_get_fileid
};
int kml_unpack_prefix(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
int n;
UNLOGP(rec->prefix.hdr, struct kml_prefix_hdr, ptr, end);
rec->prefix.hdr->len = NTOH__u32(rec->prefix.hdr->len);
rec->prefix.hdr->version = NTOH__u32(rec->prefix.hdr->version);
rec->prefix.hdr->pid = NTOH__u32(rec->prefix.hdr->pid);
rec->prefix.hdr->auid = NTOH__u32(rec->prefix.hdr->auid);
rec->prefix.hdr->fsuid = NTOH__u32(rec->prefix.hdr->fsuid);
rec->prefix.hdr->fsgid = NTOH__u32(rec->prefix.hdr->fsgid);
rec->prefix.hdr->opcode = NTOH__u32(rec->prefix.hdr->opcode);
rec->prefix.hdr->ngroups = NTOH__u32(rec->prefix.hdr->ngroups);
UNLOGL(rec->prefix.groups, __u32, rec->prefix.hdr->ngroups, ptr, end);
for (n = 0; n < rec->prefix.hdr->ngroups; n++) {
rec->prefix.groups[n] = NTOH__u32(rec->prefix.groups[n]);
}
*buf = ptr;
return 0;
}
int kml_unpack_suffix(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
UNLOGP(rec->suffix, struct kml_suffix, ptr, end);
rec->suffix->prevrec = NTOH__u32(rec->suffix->prevrec);
rec->suffix->recno = NTOH__u32(rec->suffix->recno);
rec->suffix->time = NTOH__u32(rec->suffix->time);
rec->suffix->len = NTOH__u32(rec->suffix->len);
*buf = ptr;
return 0;
}
int kml_unpack(struct kml_rec *rec, char **buf, char *end)
{
char *ptr = *buf;
int err;
if (((unsigned long)ptr % 4) != 0) {
printf("InterMezzo: %s: record misaligned.\n", __FUNCTION__);
return -EINVAL;
}
while (ptr < end) {
__u32 *i = (__u32 *)ptr;
if (*i)
break;
ptr += sizeof(*i);
}
*buf = ptr;
memset(rec, 0, sizeof(*rec));
err = kml_unpack_prefix(rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpack_prefix failed: %d\n",
__FUNCTION__, err);
return err;
}
if (rec->prefix.hdr->opcode < 0 ||
rec->prefix.hdr->opcode >= KML_OPCODE_NUM) {
printf("InterMezzo: %s: invalid opcode (%d)\n",
__FUNCTION__, rec->prefix.hdr->opcode);
return -EINVAL;
}
err = unpackers[rec->prefix.hdr->opcode](rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpacker failed: %d\n",
__FUNCTION__, err);
return err;
}
err = kml_unpack_suffix(rec, &ptr, end);
if (err) {
printf("InterMezzo: %s: unpack_suffix failed: %d\n",
__FUNCTION__, err);
return err;
}
if (rec->prefix.hdr->len != rec->suffix->len) {
printf("InterMezzo: %s: lengths don't match\n",
__FUNCTION__);
return -EINVAL;
}
if ((rec->prefix.hdr->len % 4) != 0) {
printf("InterMezzo: %s: record length not a "
"multiple of 4.\n", __FUNCTION__);
return -EINVAL;
}
if (ptr - *buf != rec->prefix.hdr->len) {
printf("InterMezzo: %s: unpacking error\n",
__FUNCTION__);
return -EINVAL;
}
while (ptr < end) {
__u32 *i = (__u32 *)ptr;
if (*i)
break;
ptr += sizeof(*i);
}
*buf = ptr;
return 0;
}
#ifndef __KERNEL__
#define STR(ptr) ((ptr))? (ptr) : ""
#define OPNAME(n) [KML_OPCODE_##n] = #n
static char *opnames[KML_OPCODE_NUM] = {
OPNAME(NOOP),
OPNAME(CREATE),
OPNAME(MKDIR),
OPNAME(UNLINK),
OPNAME(RMDIR),
OPNAME(CLOSE),
OPNAME(SYMLINK),
OPNAME(RENAME),
OPNAME(SETATTR),
OPNAME(LINK),
OPNAME(OPEN),
OPNAME(MKNOD),
OPNAME(WRITE),
OPNAME(RELEASE),
OPNAME(TRUNC),
OPNAME(SETEXTATTR),
OPNAME(DELEXTATTR),
OPNAME(KML_TRUNC),
OPNAME(GET_FILEID)
};
#undef OPNAME
static char *print_opname(int op)
{
if (op < 0 || op >= sizeof (opnames) / sizeof (*opnames))
return NULL;
return opnames[op];
}
static char *print_time(__u64 i)
{
char buf[128];
memset(buf, 0, 128);
#ifndef __KERNEL__
strftime(buf, 128, "%Y/%m/%d %H:%M:%S", gmtime((time_t *)&i));
#else
sprintf(buf, "%Ld\n", i);
#endif
return strdup(buf);
}
static char *print_version(struct presto_version *ver)
{
char ver_buf[128];
char *mtime;
char *ctime;
if (!ver || ver->pv_ctime == 0) {
return strdup("");
}
mtime = print_time(ver->pv_mtime);
ctime = print_time(ver->pv_ctime);
sprintf(ver_buf, "mtime %s, ctime %s, len %lld",
mtime, ctime, ver->pv_size);
free(mtime);
free(ctime);
return strdup(ver_buf);
}
char *kml_print_rec(struct kml_rec *rec, int brief)
{
char *str;
char *nov, *oov, *ntv, *otv, *npv, *opv;
char *rectime, *mtime, *ctime;
if (brief) {
str = g_strdup_printf(" %08d %7s %*s %*s",
rec->suffix->recno,
print_opname (rec->prefix.hdr->opcode),
rec->pathlen, STR(rec->path),
rec->targetlen, STR(rec->target));
return str;
}
rectime = print_time(rec->suffix->time);
mtime = print_time(rec->mtime);
ctime = print_time(rec->ctime);
nov = print_version(rec->new_objectv);
oov = print_version(rec->old_objectv);
ntv = print_version(rec->new_targetv);
otv = print_version(rec->old_targetv);
npv = print_version(rec->new_parentv);
opv = print_version(rec->old_parentv);
str = g_strdup_printf("\n -- Record:\n"
" Recno %d\n"
" KML off %lld\n"
" Version %d\n"
" Len %d\n"
" Suf len %d\n"
" Time %s\n"
" Opcode %d\n"
" Op %s\n"
" Pid %d\n"
" AUid %d\n"
" Fsuid %d\n"
" Fsgid %d\n"
" Prevrec %d\n"
" Ngroups %d\n"
//" Groups @{$self->{groups}}\n"
" -- Path:\n"
" Inode %d\n"
" Gen num %u\n"
" Old mode %o\n"
" Old rdev %x\n"
" Old uid %llu\n"
" Old gid %llu\n"
" Path %*s\n"
//" Open_mode %o\n",
" Pathlen %d\n"
" Tgt %*s\n"
" Tgtlen %d\n"
" Old Tgt %*s\n"
" Old Tgtln %d\n"
" -- Attr:\n"
" Valid %x\n"
" mode %o, uid %d, gid %d, size %lld, mtime %s, ctime %s rdev %x (%d:%d)\n"
" -- Versions:\n"
" New object %s\n"
" Old object %s\n"
" New target %s\n"
" Old target %s\n"
" New parent %s\n"
" Old parent %s\n",
rec->suffix->recno,
rec->offset,
rec->prefix.hdr->version,
rec->prefix.hdr->len,
rec->suffix->len,
rectime,
rec->prefix.hdr->opcode,
print_opname (rec->prefix.hdr->opcode),
rec->prefix.hdr->pid,
rec->prefix.hdr->auid,
rec->prefix.hdr->fsuid,
rec->prefix.hdr->fsgid,
rec->suffix->prevrec,
rec->prefix.hdr->ngroups,
rec->ino,
rec->generation,
rec->old_mode,
rec->old_rdev,
rec->old_uid,
rec->old_gid,
rec->pathlen,
STR(rec->path),
rec->pathlen,
rec->targetlen,
STR(rec->target),
rec->targetlen,
rec->old_targetlen,
STR(rec->old_target),
rec->old_targetlen,
rec->valid,
rec->mode,
rec->uid,
rec->gid,
rec->size,
mtime,
ctime,
rec->rdev, rec->major, rec->minor,
nov, oov, ntv, otv, npv, opv);
free(nov);
free(oov);
free(ntv);
free(otv);
free(npv);
free(opv);
free(rectime);
free(ctime);
free(mtime);
return str;
}
#endif
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "intermezzo_fs.h"
#include "intermezzo_kml.h"
// dlogit -- oppsite to logit ()
// return the sbuf + size;
char *dlogit (void *tbuf, const void *sbuf, int size)
{
char *ptr = (char *)sbuf;
memcpy(tbuf, ptr, size);
ptr += size;
return ptr;
}
static spinlock_t kml_lock = SPIN_LOCK_UNLOCKED;
static char buf[1024];
char * bdup_printf (char *format, ...)
{
va_list args;
int i;
char *path;
unsigned long flags;
spin_lock_irqsave(&kml_lock, flags);
va_start(args, format);
i = vsprintf(buf, format, args); /* hopefully i < sizeof(buf) */
va_end(args);
PRESTO_ALLOC (path, char *, i + 1);
if (path == NULL)
return NULL;
strcpy (path, buf);
spin_unlock_irqrestore(&kml_lock, flags);
return path;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
* Copyright (C) 2000 Mountain View Data, Inc.
*
* Extended Attribute Support
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fsfilter.h>
#include "intermezzo_fs.h"
int filter_print_entry = 0;
int filter_debug = 0xfffffff;
/*
* The function in this file are responsible for setting up the
* correct methods layered file systems like InterMezzo and snapfs
*/
static struct filter_fs filter_oppar[FILTER_FS_TYPES];
/* get to the upper methods (intermezzo, snapfs) */
inline struct super_operations *filter_c2usops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sops;
}
inline struct inode_operations *filter_c2udiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dir_iops;
}
inline struct inode_operations *filter_c2ufiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_file_iops;
}
inline struct inode_operations *filter_c2usiops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sym_iops;
}
inline struct file_operations *filter_c2udfops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dir_fops;
}
inline struct file_operations *filter_c2uffops(struct filter_fs *cache)
{
return &cache->o_fops.filter_file_fops;
}
inline struct file_operations *filter_c2usfops(struct filter_fs *cache)
{
return &cache->o_fops.filter_sym_fops;
}
inline struct dentry_operations *filter_c2udops(struct filter_fs *cache)
{
return &cache->o_fops.filter_dentry_ops;
}
/* get to the cache (lower) methods */
inline struct super_operations *filter_c2csops(struct filter_fs *cache)
{
return cache->o_caops.cache_sops;
}
inline struct inode_operations *filter_c2cdiops(struct filter_fs *cache)
{
return cache->o_caops.cache_dir_iops;
}
inline struct inode_operations *filter_c2cfiops(struct filter_fs *cache)
{
return cache->o_caops.cache_file_iops;
}
inline struct inode_operations *filter_c2csiops(struct filter_fs *cache)
{
return cache->o_caops.cache_sym_iops;
}
inline struct file_operations *filter_c2cdfops(struct filter_fs *cache)
{
return cache->o_caops.cache_dir_fops;
}
inline struct file_operations *filter_c2cffops(struct filter_fs *cache)
{
return cache->o_caops.cache_file_fops;
}
inline struct file_operations *filter_c2csfops(struct filter_fs *cache)
{
return cache->o_caops.cache_sym_fops;
}
inline struct dentry_operations *filter_c2cdops(struct filter_fs *cache)
{
return cache->o_caops.cache_dentry_ops;
}
void filter_setup_journal_ops(struct filter_fs *ops, char *cache_type)
{
if ( strlen(cache_type) == strlen("ext2") &&
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
#ifdef CONFIG_EXT2_FS
ops->o_trops = &presto_ext2_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("ext3") &&
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
ops->o_trops = &presto_ext3_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("tmpfs") &&
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
#if defined(CONFIG_TMPFS)
ops->o_trops = &presto_tmpfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("reiserfs") &&
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
#if 0
/* #if defined(CONFIG_REISERFS_FS) || defined(CONFIG_REISERFS_FS_MODULE) */
ops->o_trops = &presto_reiserfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("xfs") &&
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
#if 0
/*#if defined(CONFIG_XFS_FS) || defined (CONFIG_XFS_FS_MODULE) */
ops->o_trops = &presto_xfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("obdfs") &&
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
#if defined(CONFIG_OBDFS_FS) || defined (CONFIG_OBDFS_FS_MODULE)
ops->o_trops = presto_obdfs_journal_ops;
#else
ops->o_trops = NULL;
#endif
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
}
/* find the cache for this FS */
struct filter_fs *filter_get_filter_fs(const char *cache_type)
{
struct filter_fs *ops = NULL;
FENTRY;
if ( strlen(cache_type) == strlen("ext2") &&
memcmp(cache_type, "ext2", strlen("ext2")) == 0 ) {
ops = &filter_oppar[FILTER_FS_EXT2];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("xfs") &&
memcmp(cache_type, "xfs", strlen("xfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_XFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("ext3") &&
memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) {
ops = &filter_oppar[FILTER_FS_EXT3];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("tmpfs") &&
memcmp(cache_type, "tmpfs", strlen("tmpfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_TMPFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("reiserfs") &&
memcmp(cache_type, "reiserfs", strlen("reiserfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_REISERFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if ( strlen(cache_type) == strlen("obdfs") &&
memcmp(cache_type, "obdfs", strlen("obdfs")) == 0 ) {
ops = &filter_oppar[FILTER_FS_OBDFS];
FDEBUG(D_SUPER, "ops at %p\n", ops);
}
if (ops == NULL) {
CERROR("prepare to die: unrecognized cache type for Filter\n");
}
FEXIT;
return ops;
}
/*
* Frobnicate the InterMezzo operations
* this establishes the link between the InterMezzo file system
* and the underlying file system used for the cache.
*/
void filter_setup_super_ops(struct filter_fs *cache, struct super_operations *cache_sops, struct super_operations *filter_sops)
{
/* Get ptr to the shared struct snapfs_ops structure. */
struct filter_ops *props = &cache->o_fops;
/* Get ptr to the shared struct cache_ops structure. */
struct cache_ops *caops = &cache->o_caops;
FENTRY;
if ( cache->o_flags & FILTER_DID_SUPER_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_SUPER_OPS;
/* Set the cache superblock operations to point to the
superblock operations of the underlying file system. */
caops->cache_sops = cache_sops;
/*
* Copy the cache (real fs) superblock ops to the "filter"
* superblock ops as defaults. Some will be changed below
*/
memcpy(&props->filter_sops, cache_sops, sizeof(*cache_sops));
/* 'put_super' unconditionally is that of filter */
if (filter_sops->put_super) {
props->filter_sops.put_super = filter_sops->put_super;
}
if (cache_sops->read_inode) {
props->filter_sops.read_inode = filter_sops->read_inode;
FDEBUG(D_INODE, "setting filter_read_inode, cache_ops %p, cache %p, ri at %p\n",
cache, cache, props->filter_sops.read_inode);
}
if (cache_sops->remount_fs)
props->filter_sops.remount_fs = filter_sops->remount_fs;
FEXIT;
}
void filter_setup_dir_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *cache_filter_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
if ( cache->o_flags & FILTER_DID_DIR_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_DIR_OPS;
/* former ops become cache_ops */
cache->o_caops.cache_dir_iops = cache_iops;
cache->o_caops.cache_dir_fops = cache_fops;
FDEBUG(D_SUPER, "filter at %p, cache iops %p, iops %p\n",
cache, cache_iops, filter_c2udiops(cache));
/* setup our dir iops: copy and modify */
memcpy(filter_c2udiops(cache), cache_iops, sizeof(*cache_iops));
/* abbreviate */
cache_filter_iops = filter_c2udiops(cache);
/* methods that filter if cache filesystem has these ops */
if (cache_iops->lookup && filter_iops->lookup)
cache_filter_iops->lookup = filter_iops->lookup;
if (cache_iops->create && filter_iops->create)
cache_filter_iops->create = filter_iops->create;
if (cache_iops->link && filter_iops->link)
cache_filter_iops->link = filter_iops->link;
if (cache_iops->unlink && filter_iops->unlink)
cache_filter_iops->unlink = filter_iops->unlink;
if (cache_iops->mkdir && filter_iops->mkdir)
cache_filter_iops->mkdir = filter_iops->mkdir;
if (cache_iops->rmdir && filter_iops->rmdir)
cache_filter_iops->rmdir = filter_iops->rmdir;
if (cache_iops->symlink && filter_iops->symlink)
cache_filter_iops->symlink = filter_iops->symlink;
if (cache_iops->rename && filter_iops->rename)
cache_filter_iops->rename = filter_iops->rename;
if (cache_iops->mknod && filter_iops->mknod)
cache_filter_iops->mknod = filter_iops->mknod;
if (cache_iops->permission && filter_iops->permission)
cache_filter_iops->permission = filter_iops->permission;
if (cache_iops->getattr)
cache_filter_iops->getattr = filter_iops->getattr;
/* Some filesystems do not use a setattr method of their own
instead relying on inode_setattr/write_inode. We still need to
journal these so we make setattr an unconditional operation.
XXX: we should probably check for write_inode. SHP
*/
/*if (cache_iops->setattr)*/
cache_filter_iops->setattr = filter_iops->setattr;
#ifdef CONFIG_FS_EXT_ATTR
/* For now we assume that posix acls are handled through extended
* attributes. If this is not the case, we must explicitly trap
* posix_set_acl. SHP
*/
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
cache_filter_iops->set_ext_attr = filter_iops->set_ext_attr;
#endif
/* copy dir fops */
memcpy(filter_c2udfops(cache), cache_fops, sizeof(*cache_fops));
/* unconditional filtering operations */
filter_c2udfops(cache)->ioctl = filter_fops->ioctl;
FEXIT;
}
void filter_setup_file_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *pr_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
if ( cache->o_flags & FILTER_DID_FILE_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_FILE_OPS;
/* steal the old ops */
/* former ops become cache_ops */
cache->o_caops.cache_file_iops = cache_iops;
cache->o_caops.cache_file_fops = cache_fops;
/* abbreviate */
pr_iops = filter_c2ufiops(cache);
/* setup our dir iops: copy and modify */
memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
/* copy dir fops */
CERROR("*** cache file ops at %p\n", cache_fops);
memcpy(filter_c2uffops(cache), cache_fops, sizeof(*cache_fops));
/* assign */
/* See comments above in filter_setup_dir_ops. SHP */
/*if (cache_iops->setattr)*/
pr_iops->setattr = filter_iops->setattr;
if (cache_iops->getattr)
pr_iops->getattr = filter_iops->getattr;
/* XXX Should this be conditional rmr ? */
pr_iops->permission = filter_iops->permission;
#ifdef CONFIG_FS_EXT_ATTR
/* For now we assume that posix acls are handled through extended
* attributes. If this is not the case, we must explicitly trap and
* posix_set_acl
*/
if (cache_iops->set_ext_attr && filter_iops->set_ext_attr)
pr_iops->set_ext_attr = filter_iops->set_ext_attr;
#endif
/* unconditional filtering operations */
filter_c2uffops(cache)->open = filter_fops->open;
filter_c2uffops(cache)->release = filter_fops->release;
filter_c2uffops(cache)->write = filter_fops->write;
filter_c2uffops(cache)->ioctl = filter_fops->ioctl;
FEXIT;
}
/* XXX in 2.3 there are "fast" and "slow" symlink ops for ext2 XXX */
void filter_setup_symlink_ops(struct filter_fs *cache, struct inode *inode, struct inode_operations *filter_iops, struct file_operations *filter_fops)
{
struct inode_operations *pr_iops;
struct inode_operations *cache_iops = inode->i_op;
struct file_operations *cache_fops = inode->i_fop;
FENTRY;
if ( cache->o_flags & FILTER_DID_SYMLINK_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_SYMLINK_OPS;
/* steal the old ops */
cache->o_caops.cache_sym_iops = cache_iops;
cache->o_caops.cache_sym_fops = cache_fops;
/* abbreviate */
pr_iops = filter_c2usiops(cache);
/* setup our dir iops: copy and modify */
memcpy(pr_iops, cache_iops, sizeof(*cache_iops));
/* See comments above in filter_setup_dir_ops. SHP */
/* if (cache_iops->setattr) */
pr_iops->setattr = filter_iops->setattr;
if (cache_iops->getattr)
pr_iops->getattr = filter_iops->getattr;
/* assign */
/* copy fops - careful for symlinks they might be NULL */
if ( cache_fops ) {
memcpy(filter_c2usfops(cache), cache_fops, sizeof(*cache_fops));
}
FEXIT;
}
void filter_setup_dentry_ops(struct filter_fs *cache,
struct dentry_operations *cache_dop,
struct dentry_operations *filter_dop)
{
if ( cache->o_flags & FILTER_DID_DENTRY_OPS ) {
FEXIT;
return;
}
cache->o_flags |= FILTER_DID_DENTRY_OPS;
cache->o_caops.cache_dentry_ops = cache_dop;
memcpy(&cache->o_fops.filter_dentry_ops,
filter_dop, sizeof(*filter_dop));
if (cache_dop && cache_dop != filter_dop && cache_dop->d_revalidate){
CERROR("WARNING: filter overriding revalidation!\n");
}
return;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Author: Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 1998 Stelias Computing Inc
* Copyright (C) 1999 Red Hat Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* This file implements basic routines supporting the semantics
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/string.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
int presto_walk(const char *name, struct nameidata *nd)
{
int err;
/* we do not follow symlinks to support symlink operations
correctly. The vfs should always hand us resolved dentries
so we should not be required to use LOOKUP_FOLLOW. At the
reintegrating end, lento again should be working with the
resolved pathname and not the symlink. SHP
XXX: This code implies that direct symlinks do not work. SHP
*/
unsigned int flags = 0; //LOOKUP_POSITIVE;
ENTRY;
err = path_lookup(name, flags, nd);
return err;
}
/* find the presto minor device for this inode */
int presto_i2m(struct inode *inode)
{
struct presto_cache *cache;
ENTRY;
cache = presto_get_cache(inode);
CDEBUG(D_PSDEV, "\n");
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache for dev %s, ino %ld\n",
inode->i_sb->s_id, inode->i_ino);
EXIT;
return -1;
}
EXIT;
return cache->cache_psdev->uc_minor;
}
inline int presto_f2m(struct presto_file_set *fset)
{
return fset->fset_cache->cache_psdev->uc_minor;
}
inline int presto_c2m(struct presto_cache *cache)
{
return cache->cache_psdev->uc_minor;
}
/* XXX check this out */
struct presto_file_set *presto_path2fileset(const char *name)
{
struct nameidata nd;
struct presto_file_set *fileset;
int error;
ENTRY;
error = presto_walk(name, &nd);
if (!error) {
#if 0
error = do_revalidate(nd.dentry);
#endif
if (!error)
fileset = presto_fset(nd.dentry);
path_release(&nd);
EXIT;
} else
fileset = ERR_PTR(error);
EXIT;
return fileset;
}
/* check a flag on this dentry or fset root. Semantics:
- most flags: test if it is set
- PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
*/
int presto_chk(struct dentry *dentry, int flag)
{
int minor;
struct presto_file_set *fset = presto_fset(dentry);
ENTRY;
minor = presto_i2m(dentry->d_inode);
if ( izo_channels[minor].uc_no_filter ) {
EXIT;
return ~0;
}
/* if the fileset is in sync DATA and ATTR are OK */
if ( fset &&
(flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
(fset->fset_flags & FSET_INSYNC) ) {
CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
fset->fset_dentry->d_inode->i_ino);
EXIT;
return 1;
}
EXIT;
return (presto_d2d(dentry)->dd_flags & flag);
}
/* set a bit in the dentry flags */
void presto_set(struct dentry *dentry, int flag)
{
ENTRY;
if ( dentry->d_inode ) {
CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
dentry->d_inode->i_ino, flag);
}
if ( presto_d2d(dentry) == NULL) {
CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
dentry->d_name.len, dentry->d_name.name);
BUG();
}
presto_d2d(dentry)->dd_flags |= flag;
EXIT;
}
/* given a path: complete the closes on the fset */
int lento_complete_closes(char *path)
{
struct nameidata nd;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
ENTRY;
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_complete;
}
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto out_complete;
}
/* transactions and locking are internal to this function */
error = presto_complete_lml(fset);
EXIT;
out_complete:
path_release(&nd);
return error;
}
#if 0
/* given a path: write a close record and cancel an LML record, finally
call truncate LML. Lento is doing this so it goes in with uid/gid's
root.
*/
int lento_cancel_lml(char *path,
__u64 lml_offset,
__u64 remote_ino,
__u32 remote_generation,
__u32 remote_version,
struct lento_vfs_context *info)
{
struct nameidata nd;
struct rec_info rec;
struct dentry *dentry;
int error;
struct presto_file_set *fset;
void *handle;
struct presto_version new_ver;
ENTRY;
error = presto_walk(path, &nd);
if (error) {
EXIT;
return error;
}
dentry = nd.dentry;
error = -ENXIO;
if ( !presto_ispresto(dentry->d_inode) ) {
EXIT;
goto out_cancel_lml;
}
fset = presto_fset(dentry);
error=-EINVAL;
if (fset==NULL) {
CERROR("No fileset!\n");
EXIT;
goto out_cancel_lml;
}
/* this only requires a transaction below which is automatic */
handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE);
if ( IS_ERR(handle) ) {
error = -ENOMEM;
EXIT;
goto out_cancel_lml;
}
if (info->flags & LENTO_FL_CANCEL_LML) {
error = presto_clear_lml_close(fset, lml_offset);
if ( error ) {
presto_trans_commit(fset, handle);
EXIT;
goto out_cancel_lml;
}
}
if (info->flags & LENTO_FL_WRITE_KML) {
presto_getversion(&new_ver, dentry->d_inode);
error = presto_journal_close(&rec, fset, NULL, dentry,
&new_ver);
if ( error ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
if (info->flags & LENTO_FL_WRITE_EXPECT) {
error = presto_write_last_rcvd(&rec, fset, info);
if ( error < 0 ) {
EXIT;
presto_trans_commit(fset, handle);
goto out_cancel_lml;
}
}
presto_trans_commit(fset, handle);
if (info->flags & LENTO_FL_CANCEL_LML) {
presto_truncate_lml(fset);
}
out_cancel_lml:
EXIT;
path_release(&nd);
return error;
}
#endif
/* given a dentry, operate on the flags in its dentry. Used by downcalls */
int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
int error = 0;
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
presto_d2d(dentry)->dd_flags &= and_flag;
presto_d2d(dentry)->dd_flags |= or_flag;
if (res)
*res = presto_d2d(dentry)->dd_flags;
return error;
}
/* given a path, operate on the flags in its cache. Used by mark_ioctl */
int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag,
int *res)
{
struct presto_cache *cache;
if (presto_d2d(dentry) == NULL) {
CERROR("InterMezzo: no ddata for inode %ld in %s\n",
dentry->d_inode->i_ino, __FUNCTION__);
return -EINVAL;
}
CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
dentry->d_inode->i_ino, and_flag, or_flag,
presto_d2d(dentry)->dd_flags);
cache = presto_get_cache(dentry->d_inode);
if ( !cache ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
return -EBADF;
}
cache->cache_flags &= and_flag;
cache->cache_flags |= or_flag;
if (res)
*res = (int)cache->cache_flags;
return 0;
}
int presto_set_max_kml_size(const char *path, unsigned long max_size)
{
struct presto_file_set *fset;
ENTRY;
fset = presto_path2fileset(path);
if (IS_ERR(fset)) {
EXIT;
return PTR_ERR(fset);
}
fset->kml_truncate_size = max_size;
CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
max_size, path);
EXIT;
return 0;
}
int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag,
int * res)
{
struct presto_file_set *fset;
fset = presto_fset(dentry);
if ( !fset ) {
CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
make_bad_inode(dentry->d_inode);
return -EBADF;
}
fset->fset_flags &= and_flag;
fset->fset_flags |= or_flag;
if (res)
*res = (int)fset->fset_flags;
return 0;
}
/* talk to Lento about the permit */
static int presto_permit_upcall(struct dentry *dentry)
{
int rc;
char *path, *buffer;
int pathlen;
int minor;
int fsetnamelen;
struct presto_file_set *fset = NULL;
ENTRY;
if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
EXIT;
return -EINVAL;
}
fset = presto_fset(dentry);
if (!fset) {
EXIT;
return -ENOTCONN;
}
if ( !presto_lento_up(minor) ) {
if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
EXIT;
return 0;
} else {
EXIT;
return -ENOTCONN;
}
}
PRESTO_ALLOC(buffer, PAGE_SIZE);
if ( !buffer ) {
CERROR("PRESTO: out of memory!\n");
EXIT;
return -ENOMEM;
}
path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
pathlen = MYPATHLEN(buffer, path);
fsetnamelen = strlen(fset->fset_name);
rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
PRESTO_FREE(buffer, PAGE_SIZE);
EXIT;
return rc;
}
/* get a write permit for the fileset of this inode
* - if this returns a negative value there was an error
* - if 0 is returned the permit was already in the kernel -- or --
* Lento gave us the permit without reintegration
* - lento returns the number of records it reintegrated
*
* Note that if this fileset has branches, a permit will -never- to a normal
* process for writing in the data area (ie, outside of .intermezzo)
*/
int presto_get_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
int rc = 0;
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -EINVAL;
}
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
fset = presto_fset(de);
if ( !fset ) {
CERROR("Presto: no fileset in presto_get_permit!\n");
EXIT;
return -EINVAL;
}
if (fset->fset_flags & FSET_HAS_BRANCHES) {
EXIT;
return -EROFS;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
fset->fset_permit_count++;
CDEBUG(D_INODE, "permit count now %d, inode %lx\n",
fset->fset_permit_count, inode->i_ino);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
/* Allow reintegration to proceed without locks -SHP */
fset->fset_permit_upcall_count++;
if (fset->fset_permit_upcall_count == 1) {
spin_unlock(&fset->fset_permit_lock);
rc = presto_permit_upcall(fset->fset_dentry);
spin_lock(&fset->fset_permit_lock);
fset->fset_permit_upcall_count--;
if (rc == 0) {
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
} else if (rc == ENOTCONN) {
CERROR("InterMezzo: disconnected operation. stealing permit.\n");
izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
NULL);
fset->fset_permit_count++;
/* set a disconnected flag here to stop upcalls */
rc = 0;
} else {
CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
rc = -EROFS;
/* go to sleep here and try again? */
}
wake_up_interruptible(&fset->fset_permit_queue);
} else {
/* Someone is already doing an upcall; go to sleep. */
DECLARE_WAITQUEUE(wait, current);
spin_unlock(&fset->fset_permit_lock);
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_upcall_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
if (signal_pending(current)) {
remove_wait_queue(&fset->fset_permit_queue,
&wait);
return -ERESTARTSYS;
}
schedule();
}
remove_wait_queue(&fset->fset_permit_queue, &wait);
/* We've been woken up: do we have the permit? */
if (fset->fset_flags & FSET_HASPERMIT)
/* FIXME: Is this the right thing? */
rc = -EAGAIN;
}
CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
"rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
int presto_put_permit(struct inode * inode)
{
struct dentry *de;
struct presto_file_set *fset;
int minor = presto_i2m(inode);
ENTRY;
if (minor < 0) {
EXIT;
return -1;
}
if ( ISLENTO(minor) ) {
EXIT;
return 0;
}
if (list_empty(&inode->i_dentry)) {
CERROR("No alias for inode %d\n", (int) inode->i_ino);
EXIT;
return -1;
}
de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
fset = presto_fset(de);
if ( !fset ) {
CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
EXIT;
return -1;
}
if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
EXIT;
return 0;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_HASPERMIT) {
if (fset->fset_permit_count > 0)
fset->fset_permit_count--;
else
CERROR("Put permit while permit count is 0, "
"inode %ld!\n", inode->i_ino);
} else {
fset->fset_permit_count = 0;
CERROR("InterMezzo: put permit while no permit, inode %ld, "
"flags %x!\n", inode->i_ino, fset->fset_flags);
}
CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
fset->fset_permit_count, inode->i_ino);
if (fset->fset_flags & FSET_PERMIT_WAITING &&
fset->fset_permit_count == 0) {
CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
inode->i_ino);
wake_up_interruptible(&fset->fset_permit_queue);
}
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
void presto_getversion(struct presto_version * presto_version,
struct inode * inode)
{
presto_version->pv_mtime_sec = inode->i_mtime.tv_sec;
presto_version->pv_mtime_nsec = inode->i_mtime.tv_nsec;
presto_version->pv_ctime_sec = inode->i_ctime.tv_sec;
presto_version->pv_ctime_nsec = inode->i_ctime.tv_nsec;
presto_version->pv_size = (__u64)inode->i_size;
}
/* If uuid is non-null, it is the uuid of the peer that's making the revocation
* request. If it is null, this request was made locally, without external
* pressure to give up the permit. This most often occurs when a client
* starts up.
*
* FIXME: this function needs to be refactored slightly once we start handling
* multiple clients.
*/
int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
{
struct presto_file_set *fset;
DECLARE_WAITQUEUE(wait, current);
int minor, rc;
ENTRY;
minor = presto_i2m(dentry->d_inode);
if (minor < 0) {
EXIT;
return -ENODEV;
}
fset = presto_fset(dentry);
if (fset == NULL) {
EXIT;
return -ENODEV;
}
spin_lock(&fset->fset_permit_lock);
if (fset->fset_flags & FSET_PERMIT_WAITING) {
CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n");
EXIT;
spin_unlock(&fset->fset_permit_lock);
return -EINVAL;
}
if (fset->fset_permit_count == 0)
goto got_permit;
/* Something is still using this permit. Mark that we're waiting for it
* and go to sleep. */
rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
spin_unlock(&fset->fset_permit_lock);
if (rc < 0) {
EXIT;
return rc;
}
add_wait_queue(&fset->fset_permit_queue, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
spin_lock(&fset->fset_permit_lock);
if (fset->fset_permit_count == 0)
break;
spin_unlock(&fset->fset_permit_lock);
if (signal_pending(current)) {
/* FIXME: there must be a better thing to return... */
remove_wait_queue(&fset->fset_permit_queue, &wait);
EXIT;
return -ERESTARTSYS;
}
/* FIXME: maybe there should be a timeout here. */
schedule();
}
remove_wait_queue(&fset->fset_permit_queue, &wait);
got_permit:
/* By this point fset->fset_permit_count is zero and we're holding the
* lock. */
CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
dentry->d_inode->i_ino);
if (uuid != NULL) {
rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
if (rc < 0) {
spin_unlock(&fset->fset_permit_lock);
EXIT;
return rc;
}
}
izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
spin_unlock(&fset->fset_permit_lock);
EXIT;
return 0;
}
inline int presto_is_read_only(struct presto_file_set * fset)
{
int minor, mask;
struct presto_cache *cache = fset->fset_cache;
minor= cache->cache_psdev->uc_minor;
mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
if ( fset->fset_flags & mask )
return 1;
mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
return ((cache->cache_flags & mask)? 1 : 0);
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* An implementation of a loadable kernel mode driver providing
* multiple kernel/user space bidirectional communications links.
*
* Author: Alan Cox <alan@cymru.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
* Adapted to become the Linux 2.0 Coda pseudo device
* Peter Braam <braam@maths.ox.ac.uk>
* Michael Callahan <mjc@emmy.smith.edu>
*
* Changes for Linux 2.1
* Copyright (c) 1997 Carnegie-Mellon University
*
* Redone again for InterMezzo
* Copyright (c) 1998 Peter J. Braam
* Copyright (c) 2000 Mountain View Data, Inc.
* Copyright (c) 2000 Tacitus Systems, Inc.
* Copyright (c) 2001 Cluster File Systems, Inc.
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/lp.h>
#include <linux/slab.h>
#include <linux/ioport.h>
#include <linux/fcntl.h>
#include <linux/delay.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/devfs_fs_kernel.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/system.h>
#include <asm/poll.h>
#include <asm/uaccess.h>
#include <linux/miscdevice.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#ifdef PRESTO_DEVEL
int presto_print_entry = 1;
int presto_debug = 4095;
#else
int presto_print_entry = 0;
int presto_debug = 0;
#endif
/* Like inode.c (presto_sym_iops), the initializer is just to prevent
izo_channels from appearing as a COMMON symbol (and therefore
interfering with other modules that use the same variable name). */
struct upc_channel izo_channels[MAX_CHANNEL] = {{0}};
int izo_psdev_get_free_channel(void)
{
int i, result = -1;
for (i = 0 ; i < MAX_CHANNEL ; i++ ) {
if (list_empty(&(izo_channels[i].uc_cache_list))) {
result = i;
break;
}
}
return result;
}
int izo_psdev_setpid(int minor)
{
struct upc_channel *channel;
if (minor < 0 || minor >= MAX_CHANNEL) {
return -EINVAL;
}
channel = &(izo_channels[minor]);
/*
* This ioctl is performed by each Lento that starts up
* and wants to do further communication with presto.
*/
CDEBUG(D_PSDEV, "Setting current pid to %d channel %d\n",
current->pid, minor);
channel->uc_pid = current->pid;
spin_lock(&channel->uc_lock);
if ( !list_empty(&channel->uc_processing) ) {
struct list_head *lh;
struct upc_req *req;
CERROR("WARNING: setpid & processing not empty!\n");
list_for_each(lh, &channel->uc_processing) {
req = list_entry(lh, struct upc_req, rq_chain);
/* freeing of req and data is done by the sleeper */
wake_up(&req->rq_sleep);
}
}
if ( !list_empty(&channel->uc_processing) ) {
CERROR("BAD: FAILDED TO CLEAN PROCESSING LIST!\n");
}
spin_unlock(&channel->uc_lock);
EXIT;
return 0;
}
int izo_psdev_setchannel(struct file *file, int fd)
{
struct file *psdev_file = fget(fd);
struct presto_cache *cache = presto_get_cache(file->f_dentry->d_inode);
if (!psdev_file) {
CERROR("%s: no psdev_file!\n", __FUNCTION__);
return -EINVAL;
}
if (!cache) {
CERROR("%s: no cache!\n", __FUNCTION__);
fput(psdev_file);
return -EINVAL;
}
if (psdev_file->private_data) {
CERROR("%s: channel already set!\n", __FUNCTION__);
fput(psdev_file);
return -EINVAL;
}
psdev_file->private_data = cache->cache_psdev;
fput(psdev_file);
EXIT;
return 0;
}
inline int presto_lento_up(int minor)
{
return izo_channels[minor].uc_pid;
}
static unsigned int presto_psdev_poll(struct file *file, poll_table * wait)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
unsigned int mask = POLLOUT | POLLWRNORM;
/* ENTRY; this will flood you */
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
poll_wait(file, &(channel->uc_waitq), wait);
spin_lock(&channel->uc_lock);
if (!list_empty(&channel->uc_pending)) {
CDEBUG(D_PSDEV, "Non-empty pending list.\n");
mask |= POLLIN | POLLRDNORM;
}
spin_unlock(&channel->uc_lock);
/* EXIT; will flood you */
return mask;
}
/*
* Receive a message written by Lento to the psdev
*/
static ssize_t presto_psdev_write(struct file *file, const char *buf,
size_t count, loff_t *off)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req = NULL;
struct upc_req *tmp;
struct list_head *lh;
struct izo_upcall_resp hdr;
int error;
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
/* Peek at the opcode, uniquefier */
if ( count < sizeof(hdr) ) {
CERROR("presto_psdev_write: Lento didn't write full hdr.\n");
return -EINVAL;
}
error = copy_from_user(&hdr, buf, sizeof(hdr));
if ( error )
return -EFAULT;
CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n",
current->pid, hdr.opcode, hdr.unique);
spin_lock(&channel->uc_lock);
/* Look for the message on the processing queue. */
list_for_each(lh, &channel->uc_processing) {
tmp = list_entry(lh, struct upc_req , rq_chain);
if (tmp->rq_unique == hdr.unique) {
req = tmp;
/* unlink here: keeps search length minimal */
list_del_init(&req->rq_chain);
CDEBUG(D_PSDEV,"Eureka opc %d uniq %d!\n",
hdr.opcode, hdr.unique);
break;
}
}
spin_unlock(&channel->uc_lock);
if (!req) {
CERROR("psdev_write: msg (%d, %d) not found\n",
hdr.opcode, hdr.unique);
return(-ESRCH);
}
/* move data into response buffer. */
if (req->rq_bufsize < count) {
CERROR("psdev_write: too much cnt: %d, cnt: %Zd, "
"opc: %d, uniq: %d.\n",
req->rq_bufsize, count, hdr.opcode, hdr.unique);
count = req->rq_bufsize; /* don't have more space! */
}
error = copy_from_user(req->rq_data, buf, count);
if ( error )
return -EFAULT;
/* adjust outsize: good upcalls can be aware of this */
req->rq_rep_size = count;
req->rq_flags |= REQ_WRITE;
wake_up(&req->rq_sleep);
return(count);
}
/*
* Read a message from the kernel to Lento
*/
static ssize_t presto_psdev_read(struct file * file, char * buf,
size_t count, loff_t *off)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req;
int result = count;
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
spin_lock(&channel->uc_lock);
if (list_empty(&(channel->uc_pending))) {
CDEBUG(D_UPCALL, "Empty pending list in read, not good\n");
spin_unlock(&channel->uc_lock);
return -EINVAL;
}
req = list_entry((channel->uc_pending.next), struct upc_req, rq_chain);
list_del(&(req->rq_chain));
if (! (req->rq_flags & REQ_ASYNC) ) {
list_add(&(req->rq_chain), channel->uc_processing.prev);
}
spin_unlock(&channel->uc_lock);
req->rq_flags |= REQ_READ;
/* Move the input args into userspace */
CDEBUG(D_PSDEV, "\n");
if (req->rq_bufsize <= count) {
result = req->rq_bufsize;
}
if (count < req->rq_bufsize) {
CERROR ("psdev_read: buffer too small, read %Zd of %d bytes\n",
count, req->rq_bufsize);
}
if ( copy_to_user(buf, req->rq_data, result) ) {
BUG();
return -EFAULT;
}
/* If request was asynchronous don't enqueue, but free */
if (req->rq_flags & REQ_ASYNC) {
CDEBUG(D_PSDEV, "psdev_read: async msg (%d, %d), result %d\n",
req->rq_opcode, req->rq_unique, result);
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(*req));
return result;
}
return result;
}
static int presto_psdev_open(struct inode * inode, struct file * file)
{
ENTRY;
file->private_data = NULL;
CDEBUG(D_PSDEV, "Psdev_open: caller: %d, flags: %d\n", current->pid, file->f_flags);
EXIT;
return 0;
}
static int presto_psdev_release(struct inode * inode, struct file * file)
{
struct upc_channel *channel = (struct upc_channel *)file->private_data;
struct upc_req *req;
struct list_head *lh;
ENTRY;
if ( ! channel ) {
CERROR("%s: bad psdev file\n", __FUNCTION__);
return -EBADF;
}
CDEBUG(D_PSDEV, "Lento: pid %d\n", current->pid);
channel->uc_pid = 0;
/* Wake up clients so they can return. */
CDEBUG(D_PSDEV, "Wake up clients sleeping for pending.\n");
spin_lock(&channel->uc_lock);
list_for_each(lh, &channel->uc_pending) {
req = list_entry(lh, struct upc_req, rq_chain);
/* Async requests stay around for a new lento */
if (req->rq_flags & REQ_ASYNC) {
continue;
}
/* the sleeper will free the req and data */
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
CDEBUG(D_PSDEV, "Wake up clients sleeping for processing\n");
list_for_each(lh, &channel->uc_processing) {
req = list_entry(lh, struct upc_req, rq_chain);
/* freeing of req and data is done by the sleeper */
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
spin_unlock(&channel->uc_lock);
CDEBUG(D_PSDEV, "Done.\n");
EXIT;
return 0;
}
static struct file_operations presto_psdev_fops = {
.owner = THIS_MODULE,
.read = presto_psdev_read,
.write = presto_psdev_write,
.poll = presto_psdev_poll,
.open = presto_psdev_open,
.release = presto_psdev_release
};
/* modules setup */
static struct miscdevice intermezzo_psdev = {
INTERMEZZO_MINOR,
"intermezzo",
&presto_psdev_fops
};
int presto_psdev_init(void)
{
int i;
int err;
if ( (err = misc_register(&intermezzo_psdev)) ) {
CERROR("%s: cannot register %d err %d\n",
__FUNCTION__, INTERMEZZO_MINOR, err);
return -EIO;
}
memset(&izo_channels, 0, sizeof(izo_channels));
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
struct upc_channel *channel = &(izo_channels[i]);
INIT_LIST_HEAD(&channel->uc_pending);
INIT_LIST_HEAD(&channel->uc_processing);
INIT_LIST_HEAD(&channel->uc_cache_list);
init_waitqueue_head(&channel->uc_waitq);
channel->uc_lock = SPIN_LOCK_UNLOCKED;
channel->uc_hard = 0;
channel->uc_no_filter = 0;
channel->uc_no_journal = 0;
channel->uc_no_upcall = 0;
channel->uc_timeout = 30;
channel->uc_errorval = 0;
channel->uc_minor = i;
}
return 0;
}
void presto_psdev_cleanup(void)
{
int i;
misc_deregister(&intermezzo_psdev);
for ( i = 0 ; i < MAX_CHANNEL ; i++ ) {
struct upc_channel *channel = &(izo_channels[i]);
struct list_head *lh, *next;
spin_lock(&channel->uc_lock);
if ( ! list_empty(&channel->uc_pending)) {
CERROR("Weird, tell Peter: module cleanup and pending list not empty dev %d\n", i);
}
if ( ! list_empty(&channel->uc_processing)) {
CERROR("Weird, tell Peter: module cleanup and processing list not empty dev %d\n", i);
}
if ( ! list_empty(&channel->uc_cache_list)) {
CERROR("Weird, tell Peter: module cleanup and cache listnot empty dev %d\n", i);
}
list_for_each_safe(lh, next, &channel->uc_pending) {
struct upc_req *req;
req = list_entry(lh, struct upc_req, rq_chain);
if ( req->rq_flags & REQ_ASYNC ) {
list_del(&(req->rq_chain));
CDEBUG(D_UPCALL, "free pending upcall type %d\n",
req->rq_opcode);
PRESTO_FREE(req->rq_data, req->rq_bufsize);
PRESTO_FREE(req, sizeof(struct upc_req));
} else {
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
}
list_for_each(lh, &channel->uc_processing) {
struct upc_req *req;
req = list_entry(lh, struct upc_req, rq_chain);
list_del(&(req->rq_chain));
req->rq_flags |= REQ_DEAD;
wake_up(&req->rq_sleep);
}
spin_unlock(&channel->uc_lock);
}
}
/*
* lento_upcall and lento_downcall routines
*/
static inline unsigned long lento_waitfor_upcall
(struct upc_channel *channel, struct upc_req *req, int minor)
{
DECLARE_WAITQUEUE(wait, current);
unsigned long posttime;
req->rq_posttime = posttime = jiffies;
add_wait_queue(&req->rq_sleep, &wait);
for (;;) {
if ( izo_channels[minor].uc_hard == 0 )
set_current_state(TASK_INTERRUPTIBLE);
else
set_current_state(TASK_UNINTERRUPTIBLE);
/* got a reply */
if ( req->rq_flags & (REQ_WRITE | REQ_DEAD) )
break;
/* these cases only apply when TASK_INTERRUPTIBLE */
if ( !izo_channels[minor].uc_hard && signal_pending(current) ) {
/* if this process really wants to die, let it go */
if (sigismember(&(current->pending.signal), SIGKILL)||
sigismember(&(current->pending.signal), SIGINT) )
break;
/* signal is present: after timeout always return
really smart idea, probably useless ... */
if ( time_after(jiffies, req->rq_posttime +
izo_channels[minor].uc_timeout * HZ) )
break;
}
schedule();
}
spin_lock(&channel->uc_lock);
list_del_init(&req->rq_chain);
spin_unlock(&channel->uc_lock);
remove_wait_queue(&req->rq_sleep, &wait);
set_current_state(TASK_RUNNING);
CDEBUG(D_SPECIAL, "posttime: %ld, returned: %ld\n",
posttime, jiffies-posttime);
return (jiffies - posttime);
}
/*
* lento_upcall will return an error in the case of
* failed communication with Lento _or_ will peek at Lento
* reply and return Lento's error.
*
* As lento has 2 types of errors, normal errors (positive) and internal
* errors (negative), normal errors are negated, while internal errors
* are all mapped to -EINTR, while showing a nice warning message. (jh)
*
* lento_upcall will always free buffer, either directly, when an upcall
* is read (in presto_psdev_read), when the filesystem is unmounted, or
* when the module is unloaded.
*/
int izo_upc_upcall(int minor, int *size, struct izo_upcall_hdr *buffer,
int async)
{
unsigned long runtime;
struct upc_channel *channel;
struct izo_upcall_resp *out;
struct upc_req *req;
int error = 0;
ENTRY;
channel = &(izo_channels[minor]);
if (channel->uc_no_upcall) {
EXIT;
goto exit_buf;
}
if (!channel->uc_pid && !async) {
EXIT;
error = -ENXIO;
goto exit_buf;
}
/* Format the request message. */
PRESTO_ALLOC(req, sizeof(struct upc_req));
if ( !req ) {
EXIT;
error = -ENOMEM;
goto exit_buf;
}
req->rq_data = (void *)buffer;
req->rq_flags = 0;
req->rq_bufsize = *size;
req->rq_rep_size = 0;
req->rq_opcode = buffer->u_opc;
req->rq_unique = ++channel->uc_seq;
init_waitqueue_head(&req->rq_sleep);
/* Fill in the common input args. */
buffer->u_uniq = req->rq_unique;
buffer->u_async = async;
/* Remove potential datarace possibility*/
if ( async )
req->rq_flags = REQ_ASYNC;
spin_lock(&channel->uc_lock);
/* Append msg to pending queue and poke Lento. */
list_add(&req->rq_chain, channel->uc_pending.prev);
spin_unlock(&channel->uc_lock);
CDEBUG(D_UPCALL,
"Proc %d waking Lento %d for(opc,uniq) =(%d,%d) msg at %p.\n",
current->pid, channel->uc_pid, req->rq_opcode,
req->rq_unique, req);
wake_up_interruptible(&channel->uc_waitq);
if ( async ) {
/* req, rq_data are freed in presto_psdev_read for async */
/* req->rq_flags = REQ_ASYNC;*/
EXIT;
return 0;
}
/* We can be interrupted while we wait for Lento to process
* our request. If the interrupt occurs before Lento has read
* the request, we dequeue and return. If it occurs after the
* read but before the reply, we dequeue, send a signal
* message, and return. If it occurs after the reply we ignore
* it. In no case do we want to restart the syscall. If it
* was interrupted by a lento shutdown (psdev_close), return
* ENODEV. */
/* Go to sleep. Wake up on signals only after the timeout. */
runtime = lento_waitfor_upcall(channel, req, minor);
CDEBUG(D_TIMING, "opc: %d time: %ld uniq: %d size: %d\n",
req->rq_opcode, jiffies - req->rq_posttime,
req->rq_unique, req->rq_rep_size);
CDEBUG(D_UPCALL,
"..process %d woken up by Lento for req at 0x%p, data at %p\n",
current->pid, req, req->rq_data);
if (channel->uc_pid) { /* i.e. Lento is still alive */
/* Op went through, interrupt or not we go on */
if (req->rq_flags & REQ_WRITE) {
out = (struct izo_upcall_resp *)req->rq_data;
/* here we map positive Lento errors to kernel errors */
if ( out->result < 0 ) {
CERROR("Tell Peter: Lento returns negative error %d, for oc %d!\n",
out->result, out->opcode);
out->result = EINVAL;
}
error = -out->result;
CDEBUG(D_UPCALL, "upcall: (u,o,r) (%d, %d, %d) out at %p\n",
out->unique, out->opcode, out->result, out);
*size = req->rq_rep_size;
EXIT;
goto exit_req;
}
/* Interrupted before lento read it. */
if ( !(req->rq_flags & REQ_READ) && signal_pending(current)) {
CDEBUG(D_UPCALL,
"Interrupt before read: (op,un)=(%d,%d), flags %x\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
/* perhaps the best way to convince the app to give up? */
error = -EINTR;
EXIT;
goto exit_req;
}
/* interrupted after Lento did its read, send signal */
if ( (req->rq_flags & REQ_READ) && signal_pending(current) ) {
CDEBUG(D_UPCALL,"Interrupt after read: op = %d.%d, flags = %x\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
error = -EINTR;
} else {
CERROR("Lento: Strange interruption - tell Peter.\n");
error = -EINTR;
}
} else { /* If lento died i.e. !UC_OPEN(channel) */
CERROR("lento_upcall: Lento dead on (op,un) (%d.%d) flags %d\n",
req->rq_opcode, req->rq_unique, req->rq_flags);
error = -ENODEV;
}
exit_req:
PRESTO_FREE(req, sizeof(struct upc_req));
exit_buf:
PRESTO_FREE(buffer,*size);
return error;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Manage RCVD records for clients in the kernel
*
*/
#include <linux/module.h>
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/fsfilter.h>
#include "intermezzo_fs.h"
/*
* this file contains a hash table of replicators/clients for a
* fileset. It allows fast lookup and update of reintegration status
*/
struct izo_offset_rec {
struct list_head or_list;
char or_uuid[16];
loff_t or_offset;
};
#define RCACHE_BITS 8
#define RCACHE_SIZE (1 << RCACHE_BITS)
#define RCACHE_MASK (RCACHE_SIZE - 1)
static struct list_head *
izo_rep_cache(void)
{
int i;
struct list_head *cache;
PRESTO_ALLOC(cache, sizeof(struct list_head) * RCACHE_SIZE);
if (cache == NULL) {
CERROR("intermezzo-fatal: no memory for replicator cache\n");
return NULL;
}
memset(cache, 0, sizeof(struct list_head) * RCACHE_SIZE);
for (i = 0; i < RCACHE_SIZE; i++)
INIT_LIST_HEAD(&cache[i]);
return cache;
}
static struct list_head *
izo_rep_hash(struct list_head *cache, char *uuid)
{
return &cache[(RCACHE_MASK & uuid[1])];
}
static void
izo_rep_cache_clean(struct presto_file_set *fset)
{
int i;
struct list_head *bucket;
struct list_head *tmp;
if (fset->fset_clients == NULL)
return;
for (i = 0; i < RCACHE_SIZE; i++) {
tmp = bucket = &fset->fset_clients[i];
tmp = tmp->next;
while (tmp != bucket) {
struct izo_offset_rec *offrec;
tmp = tmp->next;
list_del(tmp);
offrec = list_entry(tmp, struct izo_offset_rec,
or_list);
PRESTO_FREE(offrec, sizeof(struct izo_offset_rec));
}
}
}
struct izo_offset_rec *
izo_rep_cache_find(struct presto_file_set *fset, char *uuid)
{
struct list_head *tmp, *buck = izo_rep_hash(fset->fset_clients, uuid);
struct izo_offset_rec *rec = NULL;
list_for_each(tmp, buck) {
rec = list_entry(tmp, struct izo_offset_rec, or_list);
if ( memcmp(rec->or_uuid, uuid, sizeof(rec->or_uuid)) == 0 )
return rec;
}
return NULL;
}
static int
izo_rep_cache_add(struct presto_file_set *fset, struct izo_rcvd_rec *rec,
loff_t offset)
{
struct izo_offset_rec *offrec;
if (izo_rep_cache_find(fset, rec->lr_uuid)) {
CERROR("izo: duplicate client entry %s off %Ld\n",
fset->fset_name, offset);
return -EINVAL;
}
PRESTO_ALLOC(offrec, sizeof(*offrec));
if (offrec == NULL) {
CERROR("izo: cannot allocate offrec\n");
return -ENOMEM;
}
memcpy(offrec->or_uuid, rec->lr_uuid, sizeof(rec->lr_uuid));
offrec->or_offset = offset;
list_add(&offrec->or_list,
izo_rep_hash(fset->fset_clients, rec->lr_uuid));
return 0;
}
int
izo_rep_cache_init(struct presto_file_set *fset)
{
struct izo_rcvd_rec rec;
loff_t offset = 0, last_offset = 0;
fset->fset_clients = izo_rep_cache();
if (fset->fset_clients == NULL) {
CERROR("Error initializing client cache\n");
return -ENOMEM;
}
while ( presto_fread(fset->fset_rcvd.fd_file, (char *)&rec,
sizeof(rec), &offset) == sizeof(rec) ) {
int rc;
if ((rc = izo_rep_cache_add(fset, &rec, last_offset)) < 0) {
izo_rep_cache_clean(fset);
return rc;
}
last_offset = offset;
}
return 0;
}
/*
* Return local last_rcvd record for the client. Update or create
* if necessary.
*
* XXX: After this call, any -EINVAL from izo_rcvd_get is a real error.
*/
int
izo_repstatus(struct presto_file_set *fset, __u64 client_kmlsize,
struct izo_rcvd_rec *lr_client, struct izo_rcvd_rec *lr_server)
{
int rc;
rc = izo_rcvd_get(lr_server, fset, lr_client->lr_uuid);
if (rc < 0 && rc != -EINVAL) {
return rc;
}
/* client is new or has been reset. */
if (rc < 0 || (client_kmlsize == 0 && lr_client->lr_remote_offset == 0)) {
memset(lr_server, 0, sizeof(*lr_server));
memcpy(lr_server->lr_uuid, lr_client->lr_uuid, sizeof(lr_server->lr_uuid));
rc = izo_rcvd_write(fset, lr_server);
if (rc < 0)
return rc;
}
/* update intersync */
rc = izo_upc_repstatus(presto_f2m(fset), fset->fset_name, lr_server);
return rc;
}
loff_t
izo_rcvd_get(struct izo_rcvd_rec *rec, struct presto_file_set *fset, char *uuid)
{
struct izo_offset_rec *offrec;
struct izo_rcvd_rec tmprec;
loff_t offset;
offrec = izo_rep_cache_find(fset, uuid);
if (offrec == NULL) {
CDEBUG(D_SPECIAL, "izo_get_rcvd: uuid not in hash.\n");
return -EINVAL;
}
offset = offrec->or_offset;
if (rec == NULL)
return offset;
if (presto_fread(fset->fset_rcvd.fd_file, (char *)&tmprec,
sizeof(tmprec), &offset) != sizeof(tmprec)) {
CERROR("izo_get_rcvd: Unable to read from last_rcvd file offset "
"%Lu\n", offset);
return -EIO;
}
memcpy(rec->lr_uuid, tmprec.lr_uuid, sizeof(tmprec.lr_uuid));
rec->lr_remote_recno = le64_to_cpu(tmprec.lr_remote_recno);
rec->lr_remote_offset = le64_to_cpu(tmprec.lr_remote_offset);
rec->lr_local_recno = le64_to_cpu(tmprec.lr_local_recno);
rec->lr_local_offset = le64_to_cpu(tmprec.lr_local_offset);
rec->lr_last_ctime = le64_to_cpu(tmprec.lr_last_ctime);
return offrec->or_offset;
}
/* Try to lookup the UUID in the hash. Insert it if it isn't found. Write the
* data to the file.
*
* Returns the offset of the beginning of the record in the last_rcvd file. */
loff_t
izo_rcvd_write(struct presto_file_set *fset, struct izo_rcvd_rec *rec)
{
struct izo_offset_rec *offrec;
loff_t offset, rc;
ENTRY;
offrec = izo_rep_cache_find(fset, rec->lr_uuid);
if (offrec == NULL) {
/* I don't think it should be possible for an entry to be not in
* the hash table without also having an invalid offset, but we
* handle it gracefully regardless. */
write_lock(&fset->fset_rcvd.fd_lock);
offset = fset->fset_rcvd.fd_offset;
fset->fset_rcvd.fd_offset += sizeof(*rec);
write_unlock(&fset->fset_rcvd.fd_lock);
rc = izo_rep_cache_add(fset, rec, offset);
if (rc < 0) {
EXIT;
return rc;
}
} else
offset = offrec->or_offset;
rc = presto_fwrite(fset->fset_rcvd.fd_file, (char *)rec, sizeof(*rec),
&offset);
if (rc == sizeof(*rec))
/* presto_fwrite() advances 'offset' */
rc = offset - sizeof(*rec);
EXIT;
return rc;
}
loff_t
izo_rcvd_upd_remote(struct presto_file_set *fset, char * uuid, __u64 remote_recno,
__u64 remote_offset)
{
struct izo_rcvd_rec rec;
loff_t rc;
ENTRY;
rc = izo_rcvd_get(&rec, fset, uuid);
if (rc < 0)
return rc;
rec.lr_remote_recno = remote_recno;
rec.lr_remote_offset = remote_offset;
rc = izo_rcvd_write(fset, &rec);
EXIT;
if (rc < 0)
return rc;
return 0;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* presto's super.c
*/
static char rcsid[] __attribute ((unused)) = "$Id: super.c,v 1.4 2002/10/12 02:16:19 rread Exp $";
#define INTERMEZZO_VERSION "$Revision: 1.4 $"
#include <asm/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/module.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#ifdef PRESTO_DEBUG
long presto_vmemory = 0;
long presto_kmemory = 0;
#endif
/* returns an allocated string, copied out from data if opt is found */
static char *opt_read(const char *opt, char *data)
{
char *value;
char *retval;
CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
if ( strncmp(opt, data, strlen(opt)) )
return NULL;
if ( (value = strchr(data, '=')) == NULL )
return NULL;
value++;
PRESTO_ALLOC(retval, strlen(value) + 1);
if ( !retval ) {
CERROR("InterMezzo: Out of memory!\n");
return NULL;
}
strcpy(retval, value);
CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
return retval;
}
static void opt_store(char **dst, char *opt)
{
if (!dst)
CERROR("intermezzo: store_opt, error dst == NULL\n");
if (*dst)
PRESTO_FREE(*dst, strlen(*dst) + 1);
*dst = opt;
}
static void opt_set_default(char **dst, char *defval)
{
if (!dst)
CERROR("intermezzo: store_opt, error dst == NULL\n");
if (*dst)
PRESTO_FREE(*dst, strlen(*dst) + 1);
if (defval) {
char *def_alloced;
PRESTO_ALLOC(def_alloced, strlen(defval)+1);
if (!def_alloced) {
CERROR("InterMezzo: Out of memory!\n");
return ;
}
strcpy(def_alloced, defval);
*dst = def_alloced;
}
}
/* Find the options for InterMezzo in "options", saving them into the
* passed pointers. If the pointer is null, the option is discarded.
* Copy out all non-InterMezzo options into cache_data (to be passed
* to the read_super operation of the cache). The return value will
* be a pointer to the end of the cache_data.
*/
static char *presto_options(struct file_system_type *fstype,
char *options, char *cache_data,
char **cache_type, char **fileset,
char **channel)
{
char *this_char;
char *opt_ptr = options;
char *cache_data_end = cache_data;
/* set the defaults */
if (strcmp(fstype->name, "intermezzo") == 0)
opt_set_default(cache_type, "ext3");
else
opt_set_default(cache_type, "tmpfs");
if (!options || !cache_data)
return cache_data_end;
CDEBUG(D_SUPER, "parsing options\n");
while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
char *opt;
if (!*this_char)
continue;
CDEBUG(D_SUPER, "this_char %s\n", this_char);
if ( (opt = opt_read("fileset", this_char)) ) {
opt_store(fileset, opt);
continue;
}
if ( (opt = opt_read("cache_type", this_char)) ) {
opt_store(cache_type, opt);
continue;
}
if ( (opt = opt_read("channel", this_char)) ) {
opt_store(channel, opt);
continue;
}
cache_data_end +=
sprintf(cache_data_end, "%s%s",
cache_data_end != cache_data ? ",":"",
this_char);
}
return cache_data_end;
}
static int presto_set_channel(struct presto_cache *cache, char *channel)
{
int minor;
ENTRY;
if (!channel) {
minor = izo_psdev_get_free_channel();
} else {
minor = simple_strtoul(channel, NULL, 0);
}
if (minor < 0 || minor >= MAX_CHANNEL) {
CERROR("all channels in use or channel too large %d\n",
minor);
return -EINVAL;
}
cache->cache_psdev = &(izo_channels[minor]);
list_add(&cache->cache_channel_list,
&cache->cache_psdev->uc_cache_list);
EXIT;
return minor;
}
/* We always need to remove the presto options before passing
mount options to cache FS */
struct super_block *
presto_get_sb(struct file_system_type *izo_type, int flags,
const char *devname, void *data)
{
struct file_system_type *fstype;
struct presto_cache *cache = NULL;
char *cache_data = NULL;
char *cache_data_end;
char *cache_type = NULL;
char *fileset = NULL;
char *channel = NULL;
struct super_block *sb;
int err;
unsigned int minor;
ENTRY;
/* reserve space for the cache's data */
PRESTO_ALLOC(cache_data, PAGE_SIZE);
if ( !cache_data ) {
CERROR("presto_read_super: Cannot allocate data page.\n");
EXIT;
goto out_err;
}
/* read and validate options */
cache_data_end = presto_options(izo_type, data, cache_data, &cache_type,
&fileset, &channel);
/* was there anything for the cache filesystem in the data? */
if (cache_data_end == cache_data) {
PRESTO_FREE(cache_data, PAGE_SIZE);
cache_data_end = cache_data = NULL;
} else {
CDEBUG(D_SUPER, "cache_data at %p is: %s\n", cache_data,
cache_data);
}
/* set up the cache */
cache = presto_cache_init();
if ( !cache ) {
CERROR("presto_read_super: failure allocating cache.\n");
EXIT;
goto out_err;
}
cache->cache_type = cache_type;
/* link cache to channel */
minor = presto_set_channel(cache, channel);
if (minor < 0) {
EXIT;
goto out_err;
}
CDEBUG(D_SUPER, "Presto: type=%s, fset=%s, dev= %d, flags %x\n",
cache_type, fileset?fileset:"NULL", minor, cache->cache_flags);
/* get the filter for the cache */
fstype = get_fs_type(cache_type);
cache->cache_filter = filter_get_filter_fs((const char *)cache_type);
if ( !fstype || !cache->cache_filter) {
CERROR("Presto: unrecognized fs type or cache type\n");
EXIT;
goto out_err;
}
sb = fstype->get_sb(fstype, flags, devname, cache_data);
if ( !sb || IS_ERR(sb)) {
CERROR("InterMezzo: cache mount failure.\n");
EXIT;
goto out_err;
}
/* can we in fact mount the cache */
if (sb->s_bdev && (strcmp(fstype->name, "vintermezzo") == 0)) {
CERROR("vintermezzo must not be used with a block device\n");
EXIT;
goto out_err;
}
/* this might have been freed above */
if (cache_data) {
PRESTO_FREE(cache_data, PAGE_SIZE);
cache_data = NULL;
}
cache->cache_sb = sb;
cache->cache_root = dget(sb->s_root);
/* we now know the dev of the cache: hash the cache */
presto_cache_add(cache);
err = izo_prepare_fileset(sb->s_root, fileset);
filter_setup_journal_ops(cache->cache_filter, cache->cache_type);
/* make sure we have our own super operations: sb
still contains the cache operations */
filter_setup_super_ops(cache->cache_filter, sb->s_op,
&presto_super_ops);
sb->s_op = filter_c2usops(cache->cache_filter);
/* get izo directory operations: sb->s_root->d_inode exists now */
filter_setup_dir_ops(cache->cache_filter, sb->s_root->d_inode,
&presto_dir_iops, &presto_dir_fops);
filter_setup_dentry_ops(cache->cache_filter, sb->s_root->d_op,
&presto_dentry_ops);
sb->s_root->d_inode->i_op = filter_c2udiops(cache->cache_filter);
sb->s_root->d_inode->i_fop = filter_c2udfops(cache->cache_filter);
sb->s_root->d_op = filter_c2udops(cache->cache_filter);
EXIT;
return sb;
out_err:
CDEBUG(D_SUPER, "out_err called\n");
if (cache)
PRESTO_FREE(cache, sizeof(struct presto_cache));
if (cache_data)
PRESTO_FREE(cache_data, PAGE_SIZE);
if (fileset)
PRESTO_FREE(fileset, strlen(fileset) + 1);
if (channel)
PRESTO_FREE(channel, strlen(channel) + 1);
if (cache_type)
PRESTO_FREE(cache_type, strlen(cache_type) + 1);
CDEBUG(D_MALLOC, "mount error exit: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
return ERR_PTR(-EINVAL);
}
#ifdef PRESTO_DEVEL
static DECLARE_FSTYPE(presto_fs_type, "izo", presto_read_super, FS_REQUIRES_DEV);
static DECLARE_FSTYPE(vpresto_fs_type, "vintermezzo", presto_read_super, FS_LITTER);
#else
static struct file_system_type vpresto_fs_type = {
.owner = THIS_MODULE,
.name = "vintermezzo",
.get_sb = presto_get_sb,
.kill_sb = kill_litter_super,
};
static struct file_system_type presto_fs_type = {
.owner = THIS_MODULE,
.name = "intermezzo",
.get_sb = presto_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
#endif
int __init init_intermezzo_fs(void)
{
int status;
printk(KERN_INFO "InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION
" info@clusterfs.com\n");
status = presto_psdev_init();
if ( status ) {
CERROR("Problem (%d) in init_intermezzo_psdev\n", status);
return status;
}
status = init_intermezzo_sysctl();
if (status) {
CERROR("presto: failed in init_intermezzo_sysctl!\n");
}
presto_cache_init_hash();
if (!presto_init_ddata_cache()) {
CERROR("presto out of memory!\n");
return -ENOMEM;
}
status = register_filesystem(&presto_fs_type);
if (status) {
CERROR("presto: failed in register_filesystem!\n");
}
status = register_filesystem(&vpresto_fs_type);
if (status) {
CERROR("vpresto: failed in register_filesystem!\n");
}
return status;
}
void __exit exit_intermezzo_fs(void)
{
int err;
ENTRY;
if ( (err = unregister_filesystem(&presto_fs_type)) != 0 ) {
CERROR("presto: failed to unregister filesystem\n");
}
if ( (err = unregister_filesystem(&vpresto_fs_type)) != 0 ) {
CERROR("vpresto: failed to unregister filesystem\n");
}
presto_psdev_cleanup();
cleanup_intermezzo_sysctl();
presto_cleanup_ddata_cache();
CERROR("after cleanup: kmem %ld, vmem %ld\n",
presto_kmemory, presto_vmemory);
}
MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("InterMezzo Kernel/Intersync communications " INTERMEZZO_VERSION);
MODULE_LICENSE("GPL");
module_init(init_intermezzo_fs)
module_exit(exit_intermezzo_fs)
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 1999 Peter J. Braam <braam@clusterfs.com>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Sysctrl entries for Intermezzo!
*/
#include <linux/config.h> /* for CONFIG_PROC_FS */
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/stat.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <asm/bitops.h>
#include <asm/segment.h>
#include <asm/uaccess.h>
#include <linux/utsname.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
/* /proc entries */
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_fs_intermezzo;
int intermezzo_mount_get_info( char * buffer, char ** start, off_t offset,
int length)
{
int len=0;
/* this works as long as we are below 1024 characters! */
*start = buffer + offset;
len -= offset;
if ( len < 0 )
return -EINVAL;
return len;
}
#endif
/* SYSCTL below */
static struct ctl_table_header *intermezzo_table_header = NULL;
/* 0x100 to avoid any chance of collisions at any point in the tree with
* non-directories
*/
#define PSDEV_INTERMEZZO (0x100)
#define PSDEV_DEBUG 1 /* control debugging */
#define PSDEV_TRACE 2 /* control enter/leave pattern */
#define PSDEV_TIMEOUT 3 /* timeout on upcalls to become intrble */
#define PSDEV_HARD 4 /* mount type "hard" or "soft" */
#define PSDEV_NO_FILTER 5 /* controls presto_chk */
#define PSDEV_NO_JOURNAL 6 /* controls presto_chk */
#define PSDEV_NO_UPCALL 7 /* controls lento_upcall */
#define PSDEV_ERRORVAL 8 /* controls presto_debug_fail_blkdev */
#define PSDEV_EXCL_GID 9 /* which GID is ignored by presto */
#define PSDEV_BYTES_TO_CLOSE 11 /* bytes to write before close */
/* These are global presto control options */
#define PRESTO_PRIMARY_CTLCNT 2
static struct ctl_table presto_table[ PRESTO_PRIMARY_CTLCNT + MAX_CHANNEL + 1] =
{
{PSDEV_DEBUG, "debug", &presto_debug, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_TRACE, "trace", &presto_print_entry, sizeof(int), 0644, NULL, &proc_dointvec},
};
/*
* Intalling the sysctl entries: strategy
* - have templates for each /proc/sys/intermezzo/ entry
* such an entry exists for each /dev/presto
* (proto_channel_entry)
* - have a template for the contents of such directories
* (proto_psdev_table)
* - have the master table (presto_table)
*
* When installing, malloc, memcpy and fix up the pointers to point to
* the appropriate constants in izo_channels[your_minor]
*/
static ctl_table proto_psdev_table[] = {
{PSDEV_HARD, "hard", 0, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_FILTER, "no_filter", 0, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_JOURNAL, "no_journal", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_NO_UPCALL, "no_upcall", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
{PSDEV_TIMEOUT, "timeout", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
#ifdef PRESTO_DEBUG
{PSDEV_ERRORVAL, "errorval", NULL, sizeof(int), 0644, NULL, &proc_dointvec},
#endif
{ 0 }
};
static ctl_table proto_channel_entry = {
PSDEV_INTERMEZZO, 0, NULL, 0, 0555, 0,
};
static ctl_table intermezzo_table[2] = {
{PSDEV_INTERMEZZO, "intermezzo", NULL, 0, 0555, presto_table},
{0}
};
/* support for external setting and getting of opts. */
/* particularly via ioctl. The Right way to do this is via sysctl,
* but that will have to wait until intermezzo gets its own nice set of
* sysctl IDs
*/
/* we made these separate as setting may in future be more restricted
* than getting
*/
#ifdef RON_MINNICH
int dosetopt(int minor, struct psdev_opt *opt)
{
int retval = 0;
int newval = opt->optval;
ENTRY;
switch(opt->optname) {
case PSDEV_TIMEOUT:
izo_channels[minor].uc_timeout = newval;
break;
case PSDEV_HARD:
izo_channels[minor].uc_hard = newval;
break;
case PSDEV_NO_FILTER:
izo_channels[minor].uc_no_filter = newval;
break;
case PSDEV_NO_JOURNAL:
izo_channels[minor].uc_no_journal = newval;
break;
case PSDEV_NO_UPCALL:
izo_channels[minor].uc_no_upcall = newval;
break;
#ifdef PRESTO_DEBUG
case PSDEV_ERRORVAL: {
/* If we have a positive arg, set a breakpoint for that
* value. If we have a negative arg, make that device
* read-only. FIXME It would be much better to only
* allow setting the underlying device read-only for the
* current presto cache.
*/
int errorval = izo_channels[minor].uc_errorval;
if (errorval < 0) {
if (newval == 0)
set_device_ro(-errorval, 0);
else
CERROR("device %s already read only\n",
kdevname(-errorval));
} else {
if (newval < 0)
set_device_ro(-newval, 1);
izo_channels[minor].uc_errorval = newval;
CDEBUG(D_PSDEV, "setting errorval to %d\n", newval);
}
break;
}
#endif
case PSDEV_TRACE:
case PSDEV_DEBUG:
case PSDEV_BYTES_TO_CLOSE:
default:
CDEBUG(D_PSDEV,
"ioctl: dosetopt: minor %d, bad optname 0x%x, \n",
minor, opt->optname);
retval = -EINVAL;
}
EXIT;
return retval;
}
int dogetopt(int minor, struct psdev_opt *opt)
{
int retval = 0;
ENTRY;
switch(opt->optname) {
case PSDEV_TIMEOUT:
opt->optval = izo_channels[minor].uc_timeout;
break;
case PSDEV_HARD:
opt->optval = izo_channels[minor].uc_hard;
break;
case PSDEV_NO_FILTER:
opt->optval = izo_channels[minor].uc_no_filter;
break;
case PSDEV_NO_JOURNAL:
opt->optval = izo_channels[minor].uc_no_journal;
break;
case PSDEV_NO_UPCALL:
opt->optval = izo_channels[minor].uc_no_upcall;
break;
#ifdef PSDEV_DEBUG
case PSDEV_ERRORVAL: {
int errorval = izo_channels[minor].uc_errorval;
if (errorval < 0 && is_read_only(-errorval))
CERROR("device %s has been set read-only\n",
kdevname(-errorval));
opt->optval = izo_channels[minor].uc_errorval;
break;
}
#endif
case PSDEV_TRACE:
case PSDEV_DEBUG:
case PSDEV_BYTES_TO_CLOSE:
default:
CDEBUG(D_PSDEV,
"ioctl: dogetopt: minor %d, bad optval 0x%x, \n",
minor, opt->optname);
retval = -EINVAL;
}
EXIT;
return retval;
}
#endif
/* allocate the tables for the presto devices. We need
* sizeof(proto_channel_table)/sizeof(proto_channel_table[0])
* entries for each dev
*/
int /* __init */ init_intermezzo_sysctl(void)
{
int i;
int total_dev = MAX_CHANNEL;
int entries_per_dev = sizeof(proto_psdev_table) /
sizeof(proto_psdev_table[0]);
int total_entries = entries_per_dev * total_dev;
ctl_table *dev_ctl_table;
PRESTO_ALLOC(dev_ctl_table, sizeof(ctl_table) * total_entries);
if (! dev_ctl_table) {
CERROR("WARNING: presto couldn't allocate dev_ctl_table\n");
EXIT;
return -ENOMEM;
}
/* now fill in the entries ... we put the individual presto<x>
* entries at the end of the table, and the per-presto stuff
* starting at the front. We assume that the compiler makes
* this code more efficient, but really, who cares ... it
* happens once per reboot.
*/
for(i = 0; i < total_dev; i++) {
void *p;
/* entry for this /proc/sys/intermezzo/intermezzo"i" */
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
/* entries for the individual "files" in this "directory" */
ctl_table *psdev_entries = &dev_ctl_table[i * entries_per_dev];
/* init the psdev and psdev_entries with the prototypes */
*psdev = proto_channel_entry;
memcpy(psdev_entries, proto_psdev_table,
sizeof(proto_psdev_table));
/* now specialize them ... */
/* the psdev has to point to psdev_entries, and fix the number */
psdev->ctl_name = psdev->ctl_name + i + 1; /* sorry */
PRESTO_ALLOC(p, PROCNAME_SIZE);
psdev->procname = p;
if (!psdev->procname) {
PRESTO_FREE(dev_ctl_table,
sizeof(ctl_table) * total_entries);
return -ENOMEM;
}
sprintf((char *) psdev->procname, "intermezzo%d", i);
/* hook presto into */
psdev->child = psdev_entries;
/* now for each psdev entry ... */
psdev_entries[0].data = &(izo_channels[i].uc_hard);
psdev_entries[1].data = &(izo_channels[i].uc_no_filter);
psdev_entries[2].data = &(izo_channels[i].uc_no_journal);
psdev_entries[3].data = &(izo_channels[i].uc_no_upcall);
psdev_entries[4].data = &(izo_channels[i].uc_timeout);
#ifdef PRESTO_DEBUG
psdev_entries[5].data = &(izo_channels[i].uc_errorval);
#endif
}
#ifdef CONFIG_SYSCTL
if ( !intermezzo_table_header )
intermezzo_table_header =
register_sysctl_table(intermezzo_table, 0);
#endif
#ifdef CONFIG_PROC_FS
proc_fs_intermezzo = proc_mkdir("intermezzo", proc_root_fs);
proc_fs_intermezzo->owner = THIS_MODULE;
create_proc_info_entry("mounts", 0, proc_fs_intermezzo,
intermezzo_mount_get_info);
#endif
return 0;
}
void cleanup_intermezzo_sysctl(void)
{
int total_dev = MAX_CHANNEL;
int entries_per_dev = sizeof(proto_psdev_table) /
sizeof(proto_psdev_table[0]);
int total_entries = entries_per_dev * total_dev;
int i;
#ifdef CONFIG_SYSCTL
if ( intermezzo_table_header )
unregister_sysctl_table(intermezzo_table_header);
intermezzo_table_header = NULL;
#endif
for(i = 0; i < total_dev; i++) {
/* entry for this /proc/sys/intermezzo/intermezzo"i" */
ctl_table *psdev = &presto_table[i + PRESTO_PRIMARY_CTLCNT];
PRESTO_FREE(psdev->procname, PROCNAME_SIZE);
}
/* presto_table[PRESTO_PRIMARY_CTLCNT].child points to the
* dev_ctl_table previously allocated in init_intermezzo_psdev()
*/
PRESTO_FREE(presto_table[PRESTO_PRIMARY_CTLCNT].child, sizeof(ctl_table) * total_entries);
#ifdef CONFIG_PROC_FS
remove_proc_entry("mounts", proc_fs_intermezzo);
remove_proc_entry("intermezzo", proc_root_fs);
#endif
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc. <braam@clusterfs.com>
* Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Mostly platform independent upcall operations to a cache manager:
* -- upcalls
* -- upcall routines
*
*/
#include <asm/system.h>
#include <asm/segment.h>
#include <asm/signal.h>
#include <linux/signal.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <asm/uaccess.h>
#include "intermezzo_lib.h"
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#include "intermezzo_idl.h"
/*
At present:
-- Asynchronous calls:
- kml: give a "more" kml indication to userland
- kml_truncate: initiate KML truncation
- release_permit: kernel is done with permit
-- Synchronous
- open: fetch file
- permit: get a permit
Errors returned by user level code are positive
*/
static struct izo_upcall_hdr *upc_pack(__u32 opcode, int pathlen, char *path,
char *fsetname, int reclen, char *rec,
int *size)
{
struct izo_upcall_hdr *hdr;
char *ptr;
ENTRY;
*size = sizeof(struct izo_upcall_hdr);
if ( fsetname ) {
*size += round_strlen(fsetname);
}
if ( path ) {
*size += round_strlen(path);
}
if ( rec ) {
*size += size_round(reclen);
}
PRESTO_ALLOC(hdr, *size);
if (!hdr) {
CERROR("intermezzo upcall: out of memory (opc %d)\n", opcode);
EXIT;
return NULL;
}
memset(hdr, 0, *size);
ptr = (char *)hdr + sizeof(*hdr);
/* XXX do we need fsuid ? */
hdr->u_len = *size;
hdr->u_version = IZO_UPC_VERSION;
hdr->u_opc = opcode;
hdr->u_pid = current->pid;
hdr->u_uid = current->fsuid;
if (path) {
/*XXX Robert: please review what len to pass in for
NUL terminated strings */
hdr->u_pathlen = strlen(path);
LOGL0(path, hdr->u_pathlen, ptr);
}
if (fsetname) {
hdr->u_fsetlen = strlen(fsetname);
LOGL0(fsetname, strlen(fsetname), ptr);
}
if (rec) {
hdr->u_reclen = reclen;
LOGL(rec, reclen, ptr);
}
EXIT;
return hdr;
}
/* the upcalls */
int izo_upc_kml(int minor, __u64 offset, __u32 first_recno, __u64 length, __u32 last_recno, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return 0;
}
hdr = upc_pack(IZO_UPC_KML, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
hdr->u_offset = offset;
hdr->u_first_recno = first_recno;
hdr->u_length = length;
hdr->u_last_recno = last_recno;
CDEBUG(D_UPCALL, "KML: fileset %s, offset %Lu, length %Lu, "
"first %u, last %d; minor %d\n",
fsetname,
(unsigned long long) hdr->u_offset,
(unsigned long long) hdr->u_length,
hdr->u_first_recno,
hdr->u_last_recno, minor);
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
EXIT;
return -error;
}
int izo_upc_kml_truncate(int minor, __u64 length, __u32 last_recno, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return 0;
}
hdr = upc_pack(IZO_UPC_KML_TRUNC, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
hdr->u_length = length;
hdr->u_last_recno = last_recno;
CDEBUG(D_UPCALL, "KML TRUNCATE: fileset %s, length %Lu, "
"last recno %d, minor %d\n",
fsetname,
(unsigned long long) hdr->u_length,
hdr->u_last_recno, minor);
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
EXIT;
return error;
}
int izo_upc_open(int minor, __u32 pathlen, char *path, char *fsetname, struct lento_vfs_context *info)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_OPEN, pathlen, path, fsetname,
sizeof(*info), (char*)info, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
CDEBUG(D_UPCALL, "path %s\n", path);
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_get_fileid(int minor, __u32 reclen, char *rec,
__u32 pathlen, char *path, char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_GET_FILEID, pathlen, path, fsetname, reclen, rec, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
CDEBUG(D_UPCALL, "path %s\n", path);
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_backfetch(int minor, char *path, char *fsetname, struct lento_vfs_context *info)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_BACKFETCH, strlen(path), path, fsetname,
sizeof(*info), (char *)info, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
/* This is currently synchronous, kml_reint_record blocks */
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_permit(int minor, struct dentry *dentry, __u32 pathlen, char *path,
char *fsetname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
hdr = upc_pack(IZO_UPC_PERMIT, pathlen, path, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
CDEBUG(D_UPCALL, "Permit minor %d path %s\n", minor, path);
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error == -EROFS) {
int err;
CERROR("InterMezzo: ERROR - requested permit for read-only "
"fileset.\n Setting \"%s\" read-only!\n", path);
err = izo_mark_cache(dentry, 0xFFFFFFFF, CACHE_CLIENT_RO, NULL);
if (err)
CERROR("InterMezzo ERROR: mark_cache %d\n", err);
} else if (error) {
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
}
EXIT;
return error;
}
/* This is a ping-pong upcall handled on the server when a client (uuid)
* requests the permit for itself. */
int izo_upc_revoke_permit(int minor, char *fsetname, __u8 uuid[16])
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
hdr = upc_pack(IZO_UPC_REVOKE_PERMIT, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_go_fetch_kml(int minor, char *fsetname, __u8 uuid[16],
__u64 kmlsize)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_GO_FETCH_KML, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
hdr->u_offset = kmlsize;
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
error = izo_upc_upcall(minor, &size, hdr, ASYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_connect(int minor, __u64 ip_address, __u64 port, __u8 uuid[16],
int client_flag)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_CONNECT, 0, NULL, NULL, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
hdr->u_offset = ip_address;
hdr->u_length = port;
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
hdr->u_first_recno = client_flag;
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error) {
CERROR("%s: error %d\n", __FUNCTION__, error);
}
EXIT;
return -error;
}
int izo_upc_set_kmlsize(int minor, char *fsetname, __u8 uuid[16], __u64 kmlsize)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_SET_KMLSIZE, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
memcpy(hdr->u_uuid, uuid, sizeof(hdr->u_uuid));
hdr->u_length = kmlsize;
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_repstatus(int minor, char * fsetname, struct izo_rcvd_rec *lr_server)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_REPSTATUS, 0, NULL, fsetname,
sizeof(*lr_server), (char*)lr_server,
&size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("%s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
#if 0
int izo_upc_client_make_branch(int minor, char *fsetname, char *tagname,
char *branchname)
{
int size, error;
struct izo_upcall_hdr *hdr;
int pathlen;
char *path;
ENTRY;
hdr = upc_pack(IZO_UPC_CLIENT_MAKE_BRANCH, strlen(tagname), tagname,
fsetname, strlen(branchname) + 1, branchname, &size);
if (!hdr || IS_ERR(hdr)) {
error = -PTR_ERR(hdr);
goto error;
}
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: error %d\n", error);
error:
PRESTO_FREE(path, pathlen);
EXIT;
return error;
}
#endif
int izo_upc_server_make_branch(int minor, char *fsetname)
{
int size, error;
struct izo_upcall_hdr *hdr;
ENTRY;
hdr = upc_pack(IZO_UPC_SERVER_MAKE_BRANCH, 0, NULL, fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
error = -PTR_ERR(hdr);
goto error;
}
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: error %d\n", error);
error:
EXIT;
return -error;
}
int izo_upc_branch_undo(int minor, char *fsetname, char *branchname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_BRANCH_UNDO, strlen(branchname), branchname,
fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
int izo_upc_branch_redo(int minor, char *fsetname, char *branchname)
{
int size;
int error;
struct izo_upcall_hdr *hdr;
ENTRY;
if (!presto_lento_up(minor)) {
EXIT;
return -EIO;
}
hdr = upc_pack(IZO_UPC_BRANCH_REDO, strlen(branchname) + 1, branchname,
fsetname, 0, NULL, &size);
if (!hdr || IS_ERR(hdr)) {
EXIT;
return -PTR_ERR(hdr);
}
error = izo_upc_upcall(minor, &size, hdr, SYNCHRONOUS);
if (error)
CERROR("InterMezzo: %s: error %d\n", __FUNCTION__, error);
EXIT;
return -error;
}
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
* Copyright (C) 2000 Stelias Computing, Inc.
* Copyright (C) 2000 Red Hat, Inc.
*
* This file is part of InterMezzo, http://www.inter-mezzo.org.
*
* InterMezzo is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* InterMezzo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with InterMezzo; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* vfs.c
*
* This file implements kernel downcalls from lento.
*
* Author: Rob Simmonds <simmonds@stelias.com>
* Andreas Dilger <adilger@stelias.com>
* Copyright (C) 2000 Stelias Computing Inc
* Copyright (C) 2000 Red Hat Inc.
*
* Extended attribute support
* Copyright (C) 2001 Shirish H. Phatak, Tacit Networks, Inc.
*
* This code is based on code from namei.c in the linux file system;
* see copyright notice below.
*/
/** namei.c copyright **/
/*
* linux/fs/namei.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
/*
* Some corrections by tytso.
*/
/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
* lookup logic.
*/
/** end of namei.c copyright **/
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/semaphore.h>
#include <asm/pgtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/genhd.h>
#include "intermezzo_fs.h"
#include "intermezzo_psdev.h"
#ifdef CONFIG_FS_EXT_ATTR
# include <linux/ext_attr.h>
# if 0 /* was a broken check for Posix ACLs */
# include <linux/posix_acl.h>
# endif
#endif
extern struct inode_operations presto_sym_iops;
/* Write the last_rcvd values to the last_rcvd file. We don't know what the
* UUID or last_ctime values are, so we have to read from the file first
* (sigh).
* exported for branch_reinter in kml_reint.c*/
int presto_write_last_rcvd(struct rec_info *recinfo,
struct presto_file_set *fset,
struct lento_vfs_context *info)
{
int rc;
struct izo_rcvd_rec rcvd_rec;
ENTRY;
memset(&rcvd_rec, 0, sizeof(rcvd_rec));
memcpy(rcvd_rec.lr_uuid, info->uuid, sizeof(rcvd_rec.lr_uuid));
rcvd_rec.lr_remote_recno = HTON__u64(info->recno);
rcvd_rec.lr_remote_offset = HTON__u64(info->kml_offset);
rcvd_rec.lr_local_recno = HTON__u64(recinfo->recno);
rcvd_rec.lr_local_offset = HTON__u64(recinfo->offset + recinfo->size);
rc = izo_rcvd_write(fset, &rcvd_rec);
if (rc < 0) {
/* izo_rcvd_write returns negative errors and non-negative
* offsets */
CERROR("InterMezzo: izo_rcvd_write failed: %d\n", rc);
EXIT;
return rc;
}
EXIT;
return 0;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
*/
static inline int check_sticky(struct inode *dir, struct inode *inode)
{
if (!(dir->i_mode & S_ISVTX))
return 0;
if (inode->i_uid == current->fsuid)
return 0;
if (dir->i_uid == current->fsuid)
return 0;
return !capable(CAP_FOWNER);
}
/* from linux/fs/namei.c */
static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
{
int error;
if (!victim->d_inode || victim->d_parent->d_inode != dir)
return -ENOENT;
error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
if (error)
return error;
if (IS_APPEND(dir))
return -EPERM;
if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
IS_IMMUTABLE(victim->d_inode))
return -EPERM;
if (isdir) {
if (!S_ISDIR(victim->d_inode->i_mode))
return -ENOTDIR;
if (IS_ROOT(victim))
return -EBUSY;
} else if (S_ISDIR(victim->d_inode->i_mode))
return -EISDIR;
return 0;
}
/* from linux/fs/namei.c */
static inline int may_create(struct inode *dir, struct dentry *child) {
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
return -ENOENT;
return permission(dir,MAY_WRITE | MAY_EXEC, NULL);
}
#ifdef PRESTO_DEBUG
/* The loop_discard_io() function is available via a kernel patch to the
* loop block device. It "works" by accepting writes, but throwing them
* away, rather than trying to write them to disk. The old method worked
* by setting the underlying device read-only, but that has the problem
* that dirty buffers are kept in memory, and ext3 didn't like that at all.
*/
#ifdef CONFIG_LOOP_DISCARD
#define BLKDEV_FAIL(dev,fail) loop_discard_io(dev,fail)
#else
#define BLKDEV_FAIL(dev,fail) set_device_ro(dev, 1)
#endif
/* If a breakpoint has been set via /proc/sys/intermezzo/intermezzoX/errorval,
* that is the same as "value", the underlying device will "fail" now.
*/
inline void presto_debug_fail_blkdev(struct presto_file_set *fset,
unsigned long value)
{
int minor = presto_f2m(fset);
int errorval = izo_channels[minor].uc_errorval;
struct block_device *bdev = fset->fset_dentry->d_inode->i_sb->s_bdev;
char b[BDEVNAME_SIZE];
if (errorval && errorval == (long)value && !bdev_read_only(bdev)) {
CDEBUG(D_SUPER, "setting device %s read only\n",
bdevname(bdev, b));
BLKDEV_FAIL(bdev, 1);
izo_channels[minor].uc_errorval = -bdev->bd_dev;
}
}
#else
#define presto_debug_fail_blkdev(dev,value) do {} while (0)
#endif
static inline int presto_do_kml(struct lento_vfs_context *info,
struct dentry *dentry)
{
if ( ! (info->flags & LENTO_FL_KML) )
return 0;
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
return 0;
return 1;
}
static inline int presto_do_rcvd(struct lento_vfs_context *info,
struct dentry *dentry)
{
if ( ! (info->flags & LENTO_FL_EXPECT) )
return 0;
if ( presto_chk(dentry, PRESTO_DONT_JOURNAL) )
return 0;
return 1;
}
/* XXX fixme: this should not fail, all these dentries are in memory
when _we_ call this */
int presto_settime(struct presto_file_set *fset,
struct dentry *newobj,
struct dentry *parent,
struct dentry *target,
struct lento_vfs_context *ctx,
int valid)
{
int error = 0;
struct dentry *dentry;
struct inode *inode;
struct inode_operations *iops;
struct iattr iattr;
ENTRY;
if (ctx->flags & LENTO_FL_IGNORE_TIME ) {
EXIT;
return 0;
}
iattr.ia_ctime = ctx->updated_time;
iattr.ia_mtime = ctx->updated_time;
iattr.ia_valid = valid;
while (1) {
if (parent && ctx->flags & LENTO_FL_TOUCH_PARENT) {
dentry = parent;
parent = NULL;
} else if (newobj && ctx->flags & LENTO_FL_TOUCH_NEWOBJ) {
dentry = newobj;
newobj = NULL;
} else if (target) {
dentry = target;
target = NULL;
} else
break;
inode = dentry->d_inode;
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops) {
EXIT;
return error;
}
if (iops->setattr != NULL)
error = iops->setattr(dentry, &iattr);
else {
error = 0;
inode_setattr(dentry->d_inode, &iattr);
}
}
EXIT;
return error;
}
void izo_get_rollback_data(struct inode *inode, struct izo_rollback_data *rb)
{
rb->rb_mode = (__u32)inode->i_mode;
rb->rb_rdev = (__u32)old_encode_dev(inode->i_rdev);
rb->rb_uid = (__u64)inode->i_uid;
rb->rb_gid = (__u64)inode->i_gid;
}
int presto_do_close(struct presto_file_set *fset, struct file *file)
{
struct rec_info rec;
int rc = -ENOSPC;
void *handle;
struct inode *inode = file->f_dentry->d_inode;
struct presto_file_data *fdata =
(struct presto_file_data *)file->private_data;
ENTRY;
presto_getversion(&fdata->fd_info.remote_version, inode);
rc = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (rc) {
EXIT;
return rc;
}
handle = presto_trans_start(fset, file->f_dentry->d_inode,
KML_OPCODE_RELEASE);
if ( IS_ERR(handle) ) {
CERROR("presto_release: no space for transaction\n");
return rc;
}
if (fdata->fd_info.flags & LENTO_FL_KML)
rc = presto_journal_close(&rec, fset, fdata, file->f_dentry,
&fdata->fd_version,
&fdata->fd_info.remote_version);
if (rc) {
CERROR("presto_close: cannot journal close\n");
goto out;
}
if (fdata->fd_info.flags & LENTO_FL_EXPECT)
rc = presto_write_last_rcvd(&rec, fset, &fdata->fd_info);
if (rc) {
CERROR("presto_close: cannot journal last_rcvd\n");
goto out;
}
presto_trans_commit(fset, handle);
/* cancel the LML record */
handle = presto_trans_start(fset, inode, KML_OPCODE_WRITE);
if ( IS_ERR(handle) ) {
CERROR("presto_release: no space for clear\n");
return -ENOSPC;
}
rc = presto_clear_lml_close(fset, fdata->fd_lml_offset);
if (rc < 0 ) {
CERROR("presto_close: cannot journal close\n");
goto out;
}
presto_truncate_lml(fset);
out:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
EXIT;
return rc;
}
int presto_do_setattr(struct presto_file_set *fset, struct dentry *dentry,
struct iattr *iattr, struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode = dentry->d_inode;
struct inode_operations *iops;
int error;
struct presto_version old_ver, new_ver;
struct izo_rollback_data rb;
void *handle;
loff_t old_size=inode->i_size;
ENTRY;
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
presto_getversion(&old_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
error = presto_reserve_space(fset->fset_cache, 2*PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
if (iattr->ia_valid & ATTR_SIZE) {
if (izo_mark_dentry(dentry, ~PRESTO_DATA, 0, NULL) != 0)
CERROR("izo_mark_dentry(inode %ld, ~PRESTO_DATA) "
"failed\n", dentry->d_inode->i_ino);
handle = presto_trans_start(fset, dentry->d_inode,
KML_OPCODE_TRUNC);
} else {
handle = presto_trans_start(fset, dentry->d_inode,
KML_OPCODE_SETATTR);
}
if ( IS_ERR(handle) ) {
CERROR("presto_do_setattr: no space for transaction\n");
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH);
return -ENOSPC;
}
if (dentry->d_inode && iops && iops->setattr) {
error = iops->setattr(dentry, iattr);
} else {
error = inode_change_ok(dentry->d_inode, iattr);
if (!error)
inode_setattr(inode, iattr);
}
if (!error && (iattr->ia_valid & ATTR_SIZE))
vmtruncate(inode, iattr->ia_size);
if (error) {
EXIT;
goto exit;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x10);
if ( presto_do_kml(info, dentry) ) {
if ((iattr->ia_valid & ATTR_SIZE) && (old_size != inode->i_size)) {
/* Journal a close whenever we see a potential truncate
* At the receiving end, lento should explicitly remove
* ATTR_SIZE from the list of valid attributes */
presto_getversion(&new_ver, inode);
error = presto_journal_close(&rec, fset, NULL, dentry,
&old_ver, &new_ver);
}
if (!error)
error = presto_journal_setattr(&rec, fset, dentry,
&old_ver, &rb, iattr);
}
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_SETATTR | 0x30);
EXIT;
exit:
presto_release_space(fset->fset_cache, 2*PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
return error;
}
int lento_setattr(const char *name, struct iattr *iattr,
struct lento_vfs_context *info)
{
struct nameidata nd;
struct dentry *dentry;
struct presto_file_set *fset;
int error;
#if 0 /* was a broken check for Posix ACLs */
int (*set_posix_acl)(struct inode *, int type, posix_acl_t *)=NULL;
#endif
ENTRY;
CDEBUG(D_PIOCTL,"name %s, valid %#x, mode %#o, uid %d, gid %d, size %Ld\n",
name, iattr->ia_valid, iattr->ia_mode, iattr->ia_uid,
iattr->ia_gid, iattr->ia_size);
CDEBUG(D_PIOCTL, "atime %#lx, mtime %#lx, ctime %#lx, attr_flags %#x\n",
iattr->ia_atime.tv_sec, iattr->ia_mtime.tv_sec, iattr->ia_ctime.tv_sec,
iattr->ia_attr_flags);
CDEBUG(D_PIOCTL, "offset %d, recno %d, flags %#x\n",
info->slot_offset, info->recno, info->flags);
lock_kernel();
error = presto_walk(name, &nd);
if (error) {
EXIT;
goto exit;
}
dentry = nd.dentry;
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_lock;
}
/* NOTE: this prevents us from changing the filetype on setattr,
* as we normally only want to change permission bits.
* If this is not correct, then we need to fix the perl code
* to always send the file type OR'ed with the permission.
*/
if (iattr->ia_valid & ATTR_MODE) {
int set_mode = iattr->ia_mode;
iattr->ia_mode = (iattr->ia_mode & S_IALLUGO) |
(dentry->d_inode->i_mode & ~S_IALLUGO);
CDEBUG(D_PIOCTL, "chmod: orig %#o, set %#o, result %#o\n",
dentry->d_inode->i_mode, set_mode, iattr->ia_mode);
#if 0 /* was a broken check for Posix ACLs */
/* ACl code interacts badly with setattr
* since it tries to modify the ACL using
* set_ext_attr which recurses back into presto.
* This only happens if ATTR_MODE is set.
* Here we are doing a "forced" mode set
* (initiated by lento), so we disable the
* set_posix_acl operation which
* prevents such recursion. -SHP
*
* This will probably still be required when native
* acl journalling is in place.
*/
set_posix_acl=dentry->d_inode->i_op->set_posix_acl;
dentry->d_inode->i_op->set_posix_acl=NULL;
#endif
}
error = presto_do_setattr(fset, dentry, iattr, info);
if (info->flags & LENTO_FL_SET_DDFILEID) {
struct presto_dentry_data *dd = presto_d2d(dentry);
if (dd) {
dd->remote_ino = info->remote_ino;
dd->remote_generation = info->remote_generation;
}
}
#if 0 /* was a broken check for Posix ACLs */
/* restore the inode_operations if we changed them*/
if (iattr->ia_valid & ATTR_MODE)
dentry->d_inode->i_op->set_posix_acl=set_posix_acl;
#endif
EXIT;
exit_lock:
path_release(&nd);
exit:
unlock_kernel();
return error;
}
int presto_do_create(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_file_ver;
struct inode_operations *iops;
void *handle;
ENTRY;
mode &= S_IALLUGO;
mode |= S_IFREG;
// down(&dir->d_inode->i_zombie);
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_pre_lock;
}
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->create) {
EXIT;
goto exit_pre_lock;
}
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_CREATE);
if ( IS_ERR(handle) ) {
EXIT;
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_create: no space for transaction\n");
error=-ENOSPC;
goto exit_pre_lock;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->create(dir->d_inode, dentry, mode, NULL);
if (error) {
EXIT;
goto exit_lock;
}
if (dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
/* was this already done? */
presto_set_ops(dentry->d_inode, cache->cache_filter);
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* if Lento creates this file, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit_lock;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x10);
if ( presto_do_kml(info, dentry) ) {
presto_getversion(&new_file_ver, dentry->d_inode);
error = presto_journal_create(&rec, fset, dentry, &tgt_dir_ver,
&new_file_ver,
dentry->d_inode->i_mode);
}
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_CREATE | 0x30);
/* add inode dentry */
if (fset->fset_cache->cache_filter->o_trops->tr_add_ilookup ) {
struct dentry *d;
d = fset->fset_cache->cache_filter->o_trops->tr_add_ilookup
(dir->d_inode->i_sb->s_root, dentry);
}
EXIT;
exit_lock:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_pre_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
// up(&dir->d_inode->i_zombie);
return error;
}
int lento_create(const char *name, int mode, struct lento_vfs_context *info)
{
int error;
struct nameidata nd;
char * pathname;
struct dentry *dentry;
struct presto_file_set *fset;
ENTRY;
pathname = getname(name);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
goto exit;
}
/* this looks up the parent */
error = path_lookup(pathname, LOOKUP_PARENT, &nd);
if (error) {
EXIT;
goto exit;
}
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
EXIT;
goto exit_lock;
}
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_lock;
}
error = presto_do_create(fset, dentry->d_parent, dentry, (mode&S_IALLUGO)|S_IFREG,
info);
EXIT;
exit_lock:
path_release (&nd);
dput(dentry);
up(&dentry->d_parent->d_inode->i_sem);
putname(pathname);
exit:
return error;
}
int presto_do_link(struct presto_file_set *fset, struct dentry *old_dentry,
struct dentry *dir, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode;
int error;
struct inode_operations *iops;
struct presto_version tgt_dir_ver;
struct presto_version new_link_ver;
void *handle;
// down(&dir->d_inode->i_zombie);
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = -ENOENT;
inode = old_dentry->d_inode;
if (!inode)
goto exit_lock;
error = may_create(dir->d_inode, new_dentry);
if (error)
goto exit_lock;
error = -EXDEV;
if (dir->d_inode->i_sb != inode->i_sb)
goto exit_lock;
/*
* A link to an append-only or immutable file cannot be created.
*/
error = -EPERM;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
EXIT;
goto exit_lock;
}
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->link) {
EXIT;
goto exit_lock;
}
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_LINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_link: no space for transaction\n");
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->link(old_dentry, dir->d_inode, new_dentry);
unlock_kernel();
if (error) {
EXIT;
goto exit_lock;
}
/* link dd data to that of existing dentry */
old_dentry->d_op->d_release(new_dentry);
if (!presto_d2d(old_dentry))
BUG();
presto_d2d(old_dentry)->dd_count++;
new_dentry->d_fsdata = presto_d2d(old_dentry);
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, new_dentry,
info, ATTR_CTIME);
if (error) {
EXIT;
goto exit_lock;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x10);
presto_getversion(&new_link_ver, new_dentry->d_inode);
if ( presto_do_kml(info, old_dentry) )
error = presto_journal_link(&rec, fset, old_dentry, new_dentry,
&tgt_dir_ver, &new_link_ver);
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x20);
if ( presto_do_rcvd(info, old_dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_LINK | 0x30);
EXIT;
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
// up(&dir->d_inode->i_zombie);
return error;
}
int lento_link(const char * oldname, const char * newname,
struct lento_vfs_context *info)
{
int error;
char * to;
struct presto_file_set *fset;
to = getname(newname);
error = PTR_ERR(to);
if (!IS_ERR(to)) {
struct dentry *new_dentry;
struct nameidata nd, old_nd;
error = __user_walk(oldname, 0, &old_nd);
if (error)
goto exit;
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out;
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
fset = presto_fset(new_dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto out2;
}
error = presto_do_link(fset, old_nd.dentry,
nd.dentry,
new_dentry, info);
dput(new_dentry);
}
out2:
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
path_release(&old_nd);
exit:
putname(to);
}
return error;
}
int presto_do_unlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode_operations *iops;
struct presto_version tgt_dir_ver, old_file_ver;
struct izo_rollback_data rb;
void *handle;
int do_kml = 0, do_rcvd = 0, linkno = 0, error, old_targetlen = 0;
char *old_target = NULL;
ENTRY;
// down(&dir->d_inode->i_zombie);
error = may_delete(dir->d_inode, dentry, 0);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->unlink) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = presto_reserve_space(fset->fset_cache, PRESTO_REQLOW);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
if (presto_d2d(dentry)) {
struct presto_dentry_data *dd = presto_d2d(dentry);
struct dentry *de = dd->dd_inodentry;
if (de && dentry->d_inode->i_nlink == 1) {
dd->dd_count--;
dd->dd_inodentry = NULL;
de->d_fsdata = NULL;
atomic_dec(&de->d_inode->i_count);
de->d_inode = NULL;
dput(de);
}
}
presto_getversion(&tgt_dir_ver, dir->d_inode);
presto_getversion(&old_file_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_UNLINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
CERROR("ERROR: presto_do_unlink: no space for transaction. Tell Peter.\n");
// up(&dir->d_inode->i_zombie);
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
if (d_mountpoint(dentry))
error = -EBUSY;
else {
lock_kernel();
linkno = dentry->d_inode->i_nlink;
if (linkno > 1) {
dget(dentry);
}
if (S_ISLNK(dentry->d_inode->i_mode)) {
mm_segment_t old_fs;
struct inode_operations *riops;
riops = filter_c2csiops(fset->fset_cache->cache_filter);
PRESTO_ALLOC(old_target, PATH_MAX);
if (old_target == NULL) {
error = -ENOMEM;
EXIT;
goto exit;
}
old_fs = get_fs();
set_fs(get_ds());
if (riops->readlink == NULL)
CERROR("InterMezzo %s: no readlink iops.\n",
__FUNCTION__);
else
old_targetlen =
riops->readlink(dentry, old_target,
PATH_MAX);
if (old_targetlen < 0) {
CERROR("InterMezzo: readlink failed: %ld\n",
PTR_ERR(old_target));
PRESTO_FREE(old_target, PATH_MAX);
old_target = NULL;
old_targetlen = 0;
}
set_fs(old_fs);
}
do_kml = presto_do_kml(info, dir);
do_rcvd = presto_do_rcvd(info, dir);
error = iops->unlink(dir->d_inode, dentry);
unlock_kernel();
}
if (linkno > 1) {
/* FIXME: Combine this with the next call? */
error = presto_settime(fset, NULL, NULL, dentry,
info, ATTR_CTIME);
dput(dentry);
if (error) {
EXIT;
goto exit;
}
}
error = presto_settime(fset, NULL, NULL, dir,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
// up(&dir->d_inode->i_zombie);
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x10);
if ( do_kml )
error = presto_journal_unlink(&rec, fset, dir, &tgt_dir_ver,
&old_file_ver, &rb, dentry,
old_target, old_targetlen);
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x20);
if ( do_rcvd ) {
error = presto_write_last_rcvd(&rec, fset, info);
}
presto_debug_fail_blkdev(fset, KML_OPCODE_UNLINK | 0x30);
EXIT;
exit:
presto_release_space(fset->fset_cache, PRESTO_REQLOW);
presto_trans_commit(fset, handle);
if (old_target != NULL)
PRESTO_FREE(old_target, PATH_MAX);
return error;
}
int lento_unlink(const char *pathname, struct lento_vfs_context *info)
{
int error = 0;
char * name;
struct dentry *dentry;
struct nameidata nd;
struct presto_file_set *fset;
ENTRY;
name = getname(pathname);
if(IS_ERR(name))
return PTR_ERR(name);
error = path_lookup(name, LOOKUP_PARENT, &nd);
if (error)
goto exit;
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit2;
}
/* Why not before? Because we want correct error value */
if (nd.last.name[nd.last.len])
goto slashes;
error = presto_do_unlink(fset, nd.dentry, dentry, info);
if (!error)
d_delete(dentry);
exit2:
EXIT;
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
exit1:
path_release(&nd);
exit:
putname(name);
return error;
slashes:
error = !dentry->d_inode ? -ENOENT :
S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
goto exit2;
}
int presto_do_symlink(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, const char *oldname,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_link_ver;
struct inode_operations *iops;
void *handle;
ENTRY;
// down(&dir->d_inode->i_zombie);
/* record + max path len + space to free */
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->symlink) {
EXIT;
goto exit_lock;
}
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_SYMLINK);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
CERROR("ERROR: presto_do_symlink: no space for transaction. Tell Peter.\n");
EXIT;
// up(&dir->d_inode->i_zombie);
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = iops->symlink(dir->d_inode, dentry, oldname);
if (error) {
EXIT;
goto exit;
}
if (dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
presto_set_ops(dentry->d_inode, cache->cache_filter);
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* XXX ? Cache state ? if Lento creates a symlink */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x10);
presto_getversion(&new_link_ver, dentry->d_inode);
if ( presto_do_kml(info, dentry) )
error = presto_journal_symlink(&rec, fset, dentry, oldname,
&tgt_dir_ver, &new_link_ver);
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_SYMLINK | 0x30);
EXIT;
exit:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
// up(&dir->d_inode->i_zombie);
return error;
}
int lento_symlink(const char *oldname, const char *newname,
struct lento_vfs_context *info)
{
int error;
char *from;
char *to;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
ENTRY;
lock_kernel();
from = getname(oldname);
error = PTR_ERR(from);
if (IS_ERR(from)) {
EXIT;
goto exit;
}
to = getname(newname);
error = PTR_ERR(to);
if (IS_ERR(to)) {
EXIT;
goto exit_from;
}
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error) {
EXIT;
goto exit_to;
}
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
path_release(&nd);
EXIT;
goto exit_to;
}
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
path_release(&nd);
EXIT;
goto exit_lock;
}
error = presto_do_symlink(fset, nd.dentry,
dentry, from, info);
path_release(&nd);
EXIT;
exit_lock:
up(&nd.dentry->d_inode->i_sem);
dput(dentry);
exit_to:
putname(to);
exit_from:
putname(from);
exit:
unlock_kernel();
return error;
}
int presto_do_mkdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, new_dir_ver;
void *handle;
ENTRY;
// down(&dir->d_inode->i_zombie);
/* one journal record + directory block + room for removals*/
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
error = -EPERM;
if (!filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir) {
EXIT;
goto exit_lock;
}
error = -ENOSPC;
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKDIR);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
CERROR("presto_do_mkdir: no space for transaction\n");
goto exit_lock;
}
DQUOT_INIT(dir->d_inode);
mode &= (S_IRWXUGO|S_ISVTX);
lock_kernel();
error = filter_c2cdiops(fset->fset_cache->cache_filter)->mkdir(dir->d_inode, dentry, mode);
if (error) {
EXIT;
goto exit;
}
if ( dentry->d_inode && !error) {
struct presto_cache *cache = fset->fset_cache;
presto_set_ops(dentry->d_inode, cache->cache_filter);
filter_setup_dentry_ops(cache->cache_filter,
dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* if Lento does this, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, dir, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x10);
presto_getversion(&new_dir_ver, dentry->d_inode);
if ( presto_do_kml(info, dir) )
error = presto_journal_mkdir(&rec, fset, dentry, &tgt_dir_ver,
&new_dir_ver,
dentry->d_inode->i_mode);
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_MKDIR | 0x30);
EXIT;
exit:
unlock_kernel();
presto_trans_commit(fset, handle);
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH + 4096);
// up(&dir->d_inode->i_zombie);
return error;
}
/*
* Look out: this function may change a normal dentry
* into a directory dentry (different size)..
*/
int lento_mkdir(const char *name, int mode, struct lento_vfs_context *info)
{
int error;
char *pathname;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
ENTRY;
CDEBUG(D_PIOCTL, "name: %s, mode %o, offset %d, recno %d, flags %x\n",
name, mode, info->slot_offset, info->recno, info->flags);
pathname = getname(name);
error = PTR_ERR(pathname);
if (IS_ERR(pathname)) {
EXIT;
return error;
}
error = path_lookup(pathname, LOOKUP_PARENT, &nd);
if (error)
goto out_name;
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if (!fset) {
CERROR("No fileset!\n");
EXIT;
goto out_dput;
}
error = presto_do_mkdir(fset, nd.dentry, dentry,
mode & S_IALLUGO, info);
out_dput:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out_name:
EXIT;
putname(pathname);
CDEBUG(D_PIOCTL, "error: %d\n", error);
return error;
}
static void d_unhash(struct dentry *dentry)
{
dget(dentry);
switch (atomic_read(&dentry->d_count)) {
default:
shrink_dcache_parent(dentry);
if (atomic_read(&dentry->d_count) != 2)
break;
case 2:
d_drop(dentry);
}
}
int presto_do_rmdir(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct presto_version tgt_dir_ver, old_dir_ver;
struct izo_rollback_data rb;
struct inode_operations *iops;
void *handle;
int do_kml, do_rcvd;
int size;
ENTRY;
error = may_delete(dir->d_inode, dentry, 1);
if (error)
return error;
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->rmdir) {
EXIT;
return error;
}
size = PRESTO_REQHIGH - dentry->d_inode->i_size;
error = presto_reserve_space(fset->fset_cache, size);
if (error) {
EXIT;
return error;
}
presto_getversion(&tgt_dir_ver, dir->d_inode);
presto_getversion(&old_dir_ver, dentry->d_inode);
izo_get_rollback_data(dentry->d_inode, &rb);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_RMDIR);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, size);
CERROR("ERROR: presto_do_rmdir: no space for transaction. Tell Peter.\n");
return -ENOSPC;
}
DQUOT_INIT(dir->d_inode);
do_kml = presto_do_kml(info, dir);
do_rcvd = presto_do_rcvd(info, dir);
// double_down(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
d_unhash(dentry);
if (IS_DEADDIR(dir->d_inode))
error = -ENOENT;
else if (d_mountpoint(dentry)) {
CERROR("foo: d_mountpoint(dentry): ino %ld\n",
dentry->d_inode->i_ino);
error = -EBUSY;
} else {
lock_kernel();
error = iops->rmdir(dir->d_inode, dentry);
unlock_kernel();
if (!error) {
dentry->d_inode->i_flags |= S_DEAD;
error = presto_settime(fset, NULL, NULL, dir, info,
ATTR_CTIME | ATTR_MTIME);
}
}
// double_up(&dir->d_inode->i_zombie, &dentry->d_inode->i_zombie);
if (!error)
d_delete(dentry);
dput(dentry);
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x10);
if ( !error && do_kml )
error = presto_journal_rmdir(&rec, fset, dir, &tgt_dir_ver,
&old_dir_ver, &rb,
dentry->d_name.len,
dentry->d_name.name);
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x20);
if ( !error && do_rcvd )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_RMDIR | 0x30);
EXIT;
presto_trans_commit(fset, handle);
presto_release_space(fset->fset_cache, size);
return error;
}
int lento_rmdir(const char *pathname, struct lento_vfs_context *info)
{
int error = 0;
char * name;
struct dentry *dentry;
struct presto_file_set *fset;
struct nameidata nd;
ENTRY;
name = getname(pathname);
if(IS_ERR(name)) {
EXIT;
return PTR_ERR(name);
}
error = path_lookup(name, LOOKUP_PARENT, &nd);
if (error) {
EXIT;
goto exit;
}
switch(nd.last_type) {
case LAST_DOTDOT:
error = -ENOTEMPTY;
EXIT;
goto exit1;
case LAST_ROOT:
case LAST_DOT:
error = -EBUSY;
EXIT;
goto exit1;
}
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_put;
}
error = presto_do_rmdir(fset, nd.dentry, dentry, info);
exit_put:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
exit1:
path_release(&nd);
exit:
putname(name);
EXIT;
return error;
}
int presto_do_mknod(struct presto_file_set *fset, struct dentry *dir,
struct dentry *dentry, int mode, dev_t dev,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error = -EPERM;
struct presto_version tgt_dir_ver, new_node_ver;
struct inode_operations *iops;
void *handle;
ENTRY;
// down(&dir->d_inode->i_zombie);
/* one KML entry */
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
// up(&dir->d_inode->i_zombie);
return error;
}
if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) {
EXIT;
goto exit_lock;
}
error = may_create(dir->d_inode, dentry);
if (error) {
EXIT;
goto exit_lock;
}
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops->mknod) {
EXIT;
goto exit_lock;
}
DQUOT_INIT(dir->d_inode);
lock_kernel();
error = -ENOSPC;
presto_getversion(&tgt_dir_ver, dir->d_inode);
handle = presto_trans_start(fset, dir->d_inode, KML_OPCODE_MKNOD);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_mknod: no space for transaction\n");
goto exit_lock2;
}
error = iops->mknod(dir->d_inode, dentry, mode, dev);
if (error) {
EXIT;
goto exit_commit;
}
if ( dentry->d_inode) {
struct presto_cache *cache = fset->fset_cache;
presto_set_ops(dentry->d_inode, cache->cache_filter);
filter_setup_dentry_ops(cache->cache_filter, dentry->d_op,
&presto_dentry_ops);
dentry->d_op = filter_c2udops(cache->cache_filter);
/* if Lento does this, we won't have data */
if ( ISLENTO(presto_c2m(cache)) ) {
presto_set(dentry, PRESTO_ATTR);
} else {
presto_set(dentry, PRESTO_ATTR | PRESTO_DATA);
}
}
error = presto_settime(fset, NULL, NULL, dir,
info, ATTR_MTIME);
if (error) {
EXIT;
}
error = presto_settime(fset, NULL, NULL, dentry,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
}
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x10);
presto_getversion(&new_node_ver, dentry->d_inode);
if ( presto_do_kml(info, dentry) )
error = presto_journal_mknod(&rec, fset, dentry, &tgt_dir_ver,
&new_node_ver,
dentry->d_inode->i_mode,
MAJOR(dev), MINOR(dev) );
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_MKNOD | 0x30);
EXIT;
exit_commit:
presto_trans_commit(fset, handle);
exit_lock2:
unlock_kernel();
exit_lock:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
// up(&dir->d_inode->i_zombie);
return error;
}
int lento_mknod(const char *filename, int mode, dev_t dev,
struct lento_vfs_context *info)
{
int error = 0;
char * tmp;
struct dentry * dentry;
struct nameidata nd;
struct presto_file_set *fset;
ENTRY;
if (S_ISDIR(mode))
return -EPERM;
tmp = getname(filename);
if (IS_ERR(tmp))
return PTR_ERR(tmp);
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
dentry = lookup_create(&nd, 0);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
fset = presto_fset(dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit_put;
}
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = -EOPNOTSUPP;
break;
case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
error = presto_do_mknod(fset, nd.dentry, dentry,
mode, dev, info);
break;
case S_IFDIR:
error = -EPERM;
break;
default:
error = -EINVAL;
}
exit_put:
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
return error;
}
int do_rename(struct presto_file_set *fset,
struct dentry *old_parent, struct dentry *old_dentry,
struct dentry *new_parent, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
struct rec_info rec;
int error;
struct inode_operations *iops;
struct presto_version src_dir_ver, tgt_dir_ver;
void *handle;
int new_inode_unlink = 0;
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
ENTRY;
presto_getversion(&src_dir_ver, old_dir);
presto_getversion(&tgt_dir_ver, new_dir);
error = -EPERM;
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
if (!iops || !iops->rename) {
EXIT;
return error;
}
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
handle = presto_trans_start(fset, old_dir, KML_OPCODE_RENAME);
if ( IS_ERR(handle) ) {
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
CERROR("presto_do_rename: no space for transaction\n");
return -ENOSPC;
}
if (new_dentry->d_inode && new_dentry->d_inode->i_nlink > 1) {
dget(new_dentry);
new_inode_unlink = 1;
}
error = iops->rename(old_dir, old_dentry, new_dir, new_dentry);
if (error) {
EXIT;
goto exit;
}
if (new_inode_unlink) {
error = presto_settime(fset, NULL, NULL, old_dentry,
info, ATTR_CTIME);
dput(old_dentry);
if (error) {
EXIT;
goto exit;
}
}
info->flags |= LENTO_FL_TOUCH_PARENT;
error = presto_settime(fset, NULL, new_parent, old_parent,
info, ATTR_CTIME | ATTR_MTIME);
if (error) {
EXIT;
goto exit;
}
/* XXX make a distinction between cross file set
* and intra file set renames here
*/
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x10);
if ( presto_do_kml(info, old_dentry) )
error = presto_journal_rename(&rec, fset, old_dentry,
new_dentry,
&src_dir_ver, &tgt_dir_ver);
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x20);
if ( presto_do_rcvd(info, old_dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_RENAME | 0x30);
EXIT;
exit:
presto_trans_commit(fset, handle);
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
return error;
}
static
int presto_rename_dir(struct presto_file_set *fset, struct dentry *old_parent,
struct dentry *old_dentry, struct dentry *new_parent,
struct dentry *new_dentry, struct lento_vfs_context *info)
{
int error;
struct inode *target;
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;
error = may_delete(old_dir, old_dentry, 1);
if (error)
return error;
if (new_dir->i_sb != old_dir->i_sb)
return -EXDEV;
if (!new_dentry->d_inode)
error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, 1);
if (error)
return error;
if (!old_dir->i_op || !old_dir->i_op->rename)
return -EPERM;
/*
* If we are going to change the parent - check write permissions,
* we'll need to flip '..'.
*/
if (new_dir != old_dir) {
error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
}
if (error)
return error;
DQUOT_INIT(old_dir);
DQUOT_INIT(new_dir);
down(&old_dir->i_sb->s_vfs_rename_sem);
error = -EINVAL;
if (is_subdir(new_dentry, old_dentry))
goto out_unlock;
target = new_dentry->d_inode;
if (target) { /* Hastur! Hastur! Hastur! */
// triple_down(&old_dir->i_zombie,
// &new_dir->i_zombie,
// &target->i_zombie);
d_unhash(new_dentry);
} else
// double_down(&old_dir->i_zombie,
// &new_dir->i_zombie);
if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
error = -ENOENT;
else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
error = -EBUSY;
else
error = do_rename(fset, old_parent, old_dentry,
new_parent, new_dentry, info);
if (target) {
if (!error)
target->i_flags |= S_DEAD;
// triple_up(&old_dir->i_zombie,
// &new_dir->i_zombie,
// &target->i_zombie);
if (d_unhashed(new_dentry))
d_rehash(new_dentry);
dput(new_dentry);
} else
// double_up(&old_dir->i_zombie,
// &new_dir->i_zombie);
if (!error)
d_move(old_dentry,new_dentry);
out_unlock:
up(&old_dir->i_sb->s_vfs_rename_sem);
return error;
}
static
int presto_rename_other(struct presto_file_set *fset, struct dentry *old_parent,
struct dentry *old_dentry, struct dentry *new_parent,
struct dentry *new_dentry, struct lento_vfs_context *info)
{
struct inode *old_dir = old_parent->d_inode;
struct inode *new_dir = new_parent->d_inode;
int error;
if (old_dentry->d_inode == new_dentry->d_inode)
return 0;
error = may_delete(old_dir, old_dentry, 0);
if (error)
return error;
if (new_dir->i_sb != old_dir->i_sb)
return -EXDEV;
if (!new_dentry->d_inode)
error = may_create(new_dir, new_dentry);
else
error = may_delete(new_dir, new_dentry, 0);
if (error)
return error;
if (!old_dir->i_op || !old_dir->i_op->rename)
return -EPERM;
DQUOT_INIT(old_dir);
DQUOT_INIT(new_dir);
// double_down(&old_dir->i_zombie, &new_dir->i_zombie);
if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
error = -EBUSY;
else
error = do_rename(fset, old_parent, old_dentry,
new_parent, new_dentry, info);
// double_up(&old_dir->i_zombie, &new_dir->i_zombie);
if (error)
return error;
/* The following d_move() should become unconditional */
if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
d_move(old_dentry, new_dentry);
}
return 0;
}
int presto_do_rename(struct presto_file_set *fset,
struct dentry *old_parent, struct dentry *old_dentry,
struct dentry *new_parent, struct dentry *new_dentry,
struct lento_vfs_context *info)
{
if (S_ISDIR(old_dentry->d_inode->i_mode))
return presto_rename_dir(fset, old_parent,old_dentry,new_parent,
new_dentry, info);
else
return presto_rename_other(fset, old_parent, old_dentry,
new_parent,new_dentry, info);
}
int lento_do_rename(const char *oldname, const char *newname,
struct lento_vfs_context *info)
{
int error = 0;
struct dentry * old_dir, * new_dir;
struct dentry * old_dentry, *new_dentry;
struct nameidata oldnd, newnd;
struct presto_file_set *fset;
ENTRY;
error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
if (error)
goto exit;
error = path_lookup(newname, LOOKUP_PARENT, &newnd);
if (error)
goto exit1;
error = -EXDEV;
if (oldnd.mnt != newnd.mnt)
goto exit2;
old_dir = oldnd.dentry;
error = -EBUSY;
if (oldnd.last_type != LAST_NORM)
goto exit2;
new_dir = newnd.dentry;
if (newnd.last_type != LAST_NORM)
goto exit2;
lock_rename(new_dir, old_dir);
old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
/* source must exist */
error = -ENOENT;
if (!old_dentry->d_inode)
goto exit4;
fset = presto_fset(old_dentry);
error = -EINVAL;
if ( !fset ) {
CERROR("No fileset!\n");
EXIT;
goto exit4;
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
error = -ENOTDIR;
if (oldnd.last.name[oldnd.last.len])
goto exit4;
if (newnd.last.name[newnd.last.len])
goto exit4;
}
new_dentry = lookup_hash(&newnd.last, new_dir);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto exit4;
lock_kernel();
error = presto_do_rename(fset, old_dir, old_dentry,
new_dir, new_dentry, info);
unlock_kernel();
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
exit2:
path_release(&newnd);
exit1:
path_release(&oldnd);
exit:
return error;
}
int lento_rename(const char * oldname, const char * newname,
struct lento_vfs_context *info)
{
int error;
char * from;
char * to;
from = getname(oldname);
if(IS_ERR(from))
return PTR_ERR(from);
to = getname(newname);
error = PTR_ERR(to);
if (!IS_ERR(to)) {
error = lento_do_rename(from,to, info);
putname(to);
}
putname(from);
return error;
}
struct dentry *presto_iopen(struct dentry *dentry,
ino_t ino, unsigned int generation)
{
struct presto_file_set *fset;
char name[48];
int error;
ENTRY;
/* see if we already have the dentry we want */
if (dentry->d_inode && dentry->d_inode->i_ino == ino &&
dentry->d_inode->i_generation == generation) {
EXIT;
return dentry;
}
/* Make sure we have a cache beneath us. We should always find at
* least one dentry inside the cache (if it exists), otherwise not
* even the cache root exists, or we passed in a bad name.
*/
fset = presto_fset(dentry);
error = -EINVAL;
if (!fset) {
CERROR("No fileset for %*s!\n",
dentry->d_name.len, dentry->d_name.name);
EXIT;
dput(dentry);
return ERR_PTR(error);
}
dput(dentry);
sprintf(name, "%s%#lx%c%#x",
PRESTO_ILOOKUP_MAGIC, ino, PRESTO_ILOOKUP_SEP, generation);
CDEBUG(D_PIOCTL, "opening %ld by number (as %s)\n", ino, name);
return lookup_one_len(name, fset->fset_dentry, strlen(name));
}
static struct file *presto_filp_dopen(struct dentry *dentry, int flags)
{
struct file *f;
struct inode *inode;
int flag, error;
ENTRY;
error = -ENFILE;
f = get_empty_filp();
if (!f) {
CDEBUG(D_PIOCTL, "error getting file pointer\n");
EXIT;
goto out;
}
f->f_flags = flag = flags;
f->f_mode = (flag+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error) {
CDEBUG(D_PIOCTL, "error getting write access\n");
EXIT; goto cleanup_file;
}
}
/* XXX: where the fuck is ->f_vfsmnt? */
f->f_dentry = dentry;
f->f_mapping = dentry->d_inode->i_mapping;
f->f_pos = 0;
//f->f_reada = 0;
f->f_op = NULL;
if (inode->i_op)
/* XXX should we set to presto ops, or leave at cache ops? */
f->f_op = inode->i_fop;
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode, f);
if (error) {
CDEBUG(D_PIOCTL, "error calling cache 'open'\n");
EXIT;
goto cleanup_all;
}
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
return f;
cleanup_all:
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
cleanup_file:
put_filp(f);
out:
return ERR_PTR(error);
}
/* Open an inode by number. We pass in the cache root name (or a subdirectory
* from the cache that is guaranteed to exist) to be able to access the cache.
*/
int lento_iopen(const char *name, ino_t ino, unsigned int generation,
int flags)
{
char * tmp;
struct dentry *dentry;
struct nameidata nd;
int fd;
int error;
ENTRY;
CDEBUG(D_PIOCTL,
"open %s:inode %#lx (%ld), generation %x (%d), flags %d \n",
name, ino, ino, generation, generation, flags);
/* We don't allow creation of files by number only, as it would
* lead to a dangling files not in any directory. We could also
* just turn off the flag and ignore it.
*/
if (flags & O_CREAT) {
CERROR("%s: create file by inode number (%ld) not allowed\n",
__FUNCTION__, ino);
EXIT;
return -EACCES;
}
tmp = getname(name);
if (IS_ERR(tmp)) {
EXIT;
return PTR_ERR(tmp);
}
lock_kernel();
again: /* look the named file or a parent directory so we can get the cache */
error = presto_walk(tmp, &nd);
if ( error && error != -ENOENT ) {
EXIT;
unlock_kernel();
putname(tmp);
return error;
}
if (error == -ENOENT)
dentry = NULL;
else
dentry = nd.dentry;
/* we didn't find the named file, so see if a parent exists */
if (!dentry) {
char *slash;
slash = strrchr(tmp, '/');
if (slash && slash != tmp) {
*slash = '\0';
path_release(&nd);
goto again;
}
/* we should never get here... */
CDEBUG(D_PIOCTL, "no more path components to try!\n");
fd = -ENOENT;
goto exit;
}
CDEBUG(D_PIOCTL, "returned dentry %p\n", dentry);
dentry = presto_iopen(dentry, ino, generation);
fd = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
EXIT;
goto exit;
}
/* XXX start of code that might be replaced by something like:
* if (flags & (O_WRONLY | O_RDWR)) {
* error = get_write_access(dentry->d_inode);
* if (error) {
* EXIT;
* goto cleanup_dput;
* }
* }
* fd = open_dentry(dentry, flags);
*
* including the presto_filp_dopen() function (check dget counts!)
*/
fd = get_unused_fd();
if (fd < 0) {
EXIT;
goto exit;
}
{
int error;
struct file * f = presto_filp_dopen(dentry, flags);
error = PTR_ERR(f);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = error;
} else {
fd_install(fd, f);
}
}
/* end of code that might be replaced by open_dentry */
EXIT;
exit:
unlock_kernel();
path_release(&nd);
putname(tmp);
return fd;
}
#ifdef CONFIG_FS_EXT_ATTR
#if 0 /* was a broken check for Posix ACLs */
/* Posix ACL code changes i_mode without using a notify_change (or
* a mark_inode_dirty!). We need to duplicate this at the reintegrator
* which is done by this function. This function also takes care of
* resetting the cached posix acls in this inode. If we don't reset these
* VFS continues using the old acl information, which by now may be out of
* date.
*/
int presto_setmode(struct presto_file_set *fset, struct dentry *dentry,
mode_t mode)
{
struct inode *inode = dentry->d_inode;
ENTRY;
/* The extended attributes for this inode were modified.
* At this point we can not be sure if any of the ACL
* information for this inode was updated. So we will
* force VFS to reread the acls. Note that we do this
* only when called from the SETEXTATTR ioctl, which is why we
* do this while setting the mode of the file. Also note
* that mark_inode_dirty is not be needed for i_*acl only
* to force i_mode info to disk, and should be removed once
* we use notify_change to update the mode.
* XXX: is mode setting really needed? Just setting acl's should
* be enough! VFS should change the i_mode as needed? SHP
*/
if (inode->i_acl &&
inode->i_acl != POSIX_ACL_NOT_CACHED)
posix_acl_release(inode->i_acl);
if (inode->i_default_acl &&
inode->i_default_acl != POSIX_ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
inode->i_acl = POSIX_ACL_NOT_CACHED;
inode->i_default_acl = POSIX_ACL_NOT_CACHED;
inode->i_mode = mode;
/* inode should already be dirty...but just in case */
mark_inode_dirty(inode);
return 0;
#if 0
/* XXX: The following code is the preferred way to set mode,
* however, I need to carefully go through possible recursion
* paths back into presto. See comments in presto_do_setattr.
*/
{
int error=0;
struct super_operations *sops;
struct iattr iattr;
iattr.ia_mode = mode;
iattr.ia_valid = ATTR_MODE|ATTR_FORCE;
error = -EPERM;
sops = filter_c2csops(fset->fset_cache->cache_filter);
if (!sops &&
!sops->notify_change) {
EXIT;
return error;
}
error = sops->notify_change(dentry, &iattr);
EXIT;
return error;
}
#endif
}
#endif
/* setextattr Interface to cache filesystem */
int presto_do_set_ext_attr(struct presto_file_set *fset,
struct dentry *dentry,
const char *name, void *buffer,
size_t buffer_len, int flags, mode_t *mode,
struct lento_vfs_context *info)
{
struct rec_info rec;
struct inode *inode = dentry->d_inode;
struct inode_operations *iops;
int error;
struct presto_version ver;
void *handle;
char temp[PRESTO_EXT_ATTR_NAME_MAX+1];
ENTRY;
error = -EROFS;
if (IS_RDONLY(inode)) {
EXIT;
return -EROFS;
}
if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
EXIT;
return -EPERM;
}
presto_getversion(&ver, inode);
error = -EPERM;
/* We need to invoke different filters based on whether
* this dentry is a regular file, directory or symlink.
*/
switch (inode->i_mode & S_IFMT) {
case S_IFLNK: /* symlink */
iops = filter_c2csiops(fset->fset_cache->cache_filter);
break;
case S_IFDIR: /* directory */
iops = filter_c2cdiops(fset->fset_cache->cache_filter);
break;
case S_IFREG:
default: /* everything else including regular files */
iops = filter_c2cfiops(fset->fset_cache->cache_filter);
}
if (!iops && !iops->set_ext_attr) {
EXIT;
return error;
}
error = presto_reserve_space(fset->fset_cache, PRESTO_REQHIGH);
if (error) {
EXIT;
return error;
}
handle = presto_trans_start(fset,dentry->d_inode,KML_OPCODE_SETEXTATTR);
if ( IS_ERR(handle) ) {
CERROR("presto_do_set_ext_attr: no space for transaction\n");
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
return -ENOSPC;
}
/* We first "truncate" name to the maximum allowable in presto */
/* This simulates the strncpy_from_use code in fs/ext_attr.c */
strlcpy(temp,name,sizeof(temp));
/* Pass down to cache*/
error = iops->set_ext_attr(inode,temp,buffer,buffer_len,flags);
if (error) {
EXIT;
goto exit;
}
#if 0 /* was a broken check for Posix ACLs */
/* Reset mode if specified*/
/* XXX: when we do native acl support, move this code out! */
if (mode != NULL) {
error = presto_setmode(fset, dentry, *mode);
if (error) {
EXIT;
goto exit;
}
}
#endif
/* Reset ctime. Only inode change time (ctime) is affected */
error = presto_settime(fset, NULL, NULL, dentry, info, ATTR_CTIME);
if (error) {
EXIT;
goto exit;
}
if (flags & EXT_ATTR_FLAG_USER) {
CERROR(" USER flag passed to presto_do_set_ext_attr!\n");
BUG();
}
/* We are here, so set_ext_attr succeeded. We no longer need to keep
* track of EXT_ATTR_FLAG_{EXISTS,CREATE}, instead, we will force
* the attribute value during log replay. -SHP
*/
flags &= ~(EXT_ATTR_FLAG_EXISTS | EXT_ATTR_FLAG_CREATE);
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x10);
if ( presto_do_kml(info, dentry) )
error = presto_journal_set_ext_attr
(&rec, fset, dentry, &ver, name, buffer,
buffer_len, flags);
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x20);
if ( presto_do_rcvd(info, dentry) )
error = presto_write_last_rcvd(&rec, fset, info);
presto_debug_fail_blkdev(fset, KML_OPCODE_SETEXTATTR | 0x30);
EXIT;
exit:
presto_release_space(fset->fset_cache, PRESTO_REQHIGH);
presto_trans_commit(fset, handle);
return error;
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment