Commit 31143d5d authored by David Howells's avatar David Howells Committed by Linus Torvalds

AFS: implement basic file write support

Implement support for writing to regular AFS files, including:

 (1) write

 (2) truncate

 (3) fsync, fdatasync

 (4) chmod, chown, chgrp, utime.

AFS writeback attempts to batch writes into as chunks as large as it can manage
up to the point that it writes back 65535 pages in one chunk or it meets a
locked page.

Furthermore, if a page has been written to using a particular key, then should
another write to that page use some other key, the first write will be flushed
before the second is allowed to take place.  If the first write fails due to a
security error, then the page will be scrapped and reread before the second
write takes place.

If a page is dirty and the callback on it is broken by the server, then the
dirty data is not discarded (same behaviour as NFS).

Shared-writable mappings are not supported by this patch.

[akpm@linux-foundation.org: fix a bunch of warnings]
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 416351f2
......@@ -22,6 +22,7 @@ kafs-objs := \
vlclient.o \
vlocation.o \
vnode.o \
volume.o
volume.o \
write.o
obj-$(CONFIG_AFS_FS) := kafs.o
......@@ -18,6 +18,8 @@
enum AFS_FS_Operations {
FSFETCHDATA = 130, /* AFS Fetch file data */
FSFETCHSTATUS = 132, /* AFS Fetch file status */
FSSTOREDATA = 133, /* AFS Store file data */
FSSTORESTATUS = 135, /* AFS Store file status */
FSREMOVEFILE = 136, /* AFS Remove a file */
FSCREATEFILE = 137, /* AFS Create a file */
FSRENAME = 138, /* AFS Rename or move a file or directory */
......
......@@ -56,6 +56,7 @@ const struct inode_operations afs_dir_inode_operations = {
.rename = afs_rename,
.permission = afs_permission,
.getattr = afs_getattr,
.setattr = afs_setattr,
};
static struct dentry_operations afs_fs_dentry_operations = {
......
......@@ -15,32 +15,43 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include "internal.h"
static int afs_readpage(struct file *file, struct page *page);
static void afs_invalidatepage(struct page *page, unsigned long offset);
static int afs_releasepage(struct page *page, gfp_t gfp_flags);
static int afs_launder_page(struct page *page);
const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = afs_file_write,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
.fsync = afs_fsync,
};
const struct inode_operations afs_file_inode_operations = {
.getattr = afs_getattr,
.setattr = afs_setattr,
.permission = afs_permission,
};
const struct address_space_operations afs_fs_aops = {
.readpage = afs_readpage,
.set_page_dirty = __set_page_dirty_nobuffers,
.set_page_dirty = afs_set_page_dirty,
.launder_page = afs_launder_page,
.releasepage = afs_releasepage,
.invalidatepage = afs_invalidatepage,
.prepare_write = afs_prepare_write,
.commit_write = afs_commit_write,
.writepage = afs_writepage,
.writepages = afs_writepages,
};
/*
......@@ -230,11 +241,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset)
BUG_ON(!PageLocked(page));
if (PagePrivate(page)) {
#ifdef AFS_CACHING_SUPPORT
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
cachefs_uncache_page(vnode->cache,page);
#endif
/* We release buffers only if the entire page is being
* invalidated.
* The get_block cached value has been unconditionally
......@@ -254,20 +260,34 @@ static void afs_invalidatepage(struct page *page, unsigned long offset)
_leave(" = %d", ret);
}
/*
* write back a dirty page
*/
static int afs_launder_page(struct page *page)
{
_enter("{%lu}", page->index);
return 0;
}
/*
* release a page and cleanup its private data
*/
static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
struct afs_writeback *wb;
_enter("{{%x:%u}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
gfp_flags);
if (PagePrivate(page)) {
wb = (struct afs_writeback *) page_private(page);
ASSERT(wb != NULL);
set_page_private(page, 0);
ClearPagePrivate(page);
afs_put_writeback(wb);
}
_leave(" = 0");
......
This diff is collapsed.
......@@ -209,7 +209,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
*/
void afs_zap_data(struct afs_vnode *vnode)
{
kenter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode);
_enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode);
/* nuke all the non-dirty pages that aren't locked, mapped or being
* written back */
......@@ -334,6 +334,7 @@ void afs_clear_inode(struct inode *inode)
vnode->server = NULL;
}
ASSERT(list_empty(&vnode->writebacks));
ASSERT(!vnode->cb_promised);
#ifdef AFS_CACHING_SUPPORT
......@@ -350,3 +351,47 @@ void afs_clear_inode(struct inode *inode)
_leave("");
}
/*
* set the attributes of an inode
*/
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
struct key *key;
int ret;
_enter("{%x:%u},{n=%s},%x",
vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
attr->ia_valid);
if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
ATTR_MTIME))) {
_leave(" = 0 [unsupported]");
return 0;
}
/* flush any dirty data outstanding on a regular file */
if (S_ISREG(vnode->vfs_inode.i_mode)) {
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
afs_writeback_all(vnode);
}
if (attr->ia_valid & ATTR_FILE) {
key = attr->ia_file->private_data;
} else {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
goto error;
}
}
ret = afs_vnode_setattr(vnode, key, attr);
if (!(attr->ia_valid & ATTR_FILE))
key_put(key);
error:
_leave(" = %d", ret);
return ret;
}
......@@ -21,6 +21,7 @@
#define AFS_CELL_MAX_ADDRS 15
struct pagevec;
struct afs_call;
typedef enum {
......@@ -75,12 +76,15 @@ struct afs_call {
struct key *key; /* security for this call */
struct afs_server *server; /* server affected by incoming CM call */
void *request; /* request data (first part) */
void *request2; /* request data (second part) */
struct address_space *mapping; /* page set */
struct afs_writeback *wb; /* writeback being performed */
void *buffer; /* reply receive buffer */
void *reply; /* reply buffer (first part) */
void *reply2; /* reply buffer (second part) */
void *reply3; /* reply buffer (third part) */
void *reply4; /* reply buffer (fourth part) */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
enum { /* call state */
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
......@@ -97,14 +101,18 @@ struct afs_call {
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned reply_size; /* current size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
unsigned short offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
__be32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
afs_dataversion_t store_version; /* updated version expected from store */
};
struct afs_call_type {
......@@ -123,6 +131,32 @@ struct afs_call_type {
void (*destructor)(struct afs_call *call);
};
/*
* record of an outstanding writeback on a vnode
*/
struct afs_writeback {
struct list_head link; /* link in vnode->writebacks */
struct work_struct writer; /* work item to perform the writeback */
struct afs_vnode *vnode; /* vnode to which this write applies */
struct key *key; /* owner of this write */
wait_queue_head_t waitq; /* completion and ready wait queue */
pgoff_t first; /* first page in batch */
pgoff_t point; /* last page in current store op */
pgoff_t last; /* last page in batch (inclusive) */
unsigned offset_first; /* offset into first page of start of write */
unsigned to_last; /* offset into last page of end of write */
int num_conflicts; /* count of conflicting writes in list */
int usage;
bool conflicts; /* T if has dependent conflicts */
enum {
AFS_WBACK_SYNCING, /* synchronisation being performed */
AFS_WBACK_PENDING, /* write pending */
AFS_WBACK_CONFLICTING, /* conflicting writes posted */
AFS_WBACK_WRITING, /* writing back */
AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
} state __attribute__((packed));
};
/*
* AFS superblock private data
* - there's one superblock per volume
......@@ -305,6 +339,7 @@ struct afs_vnode {
wait_queue_head_t update_waitq; /* status fetch waitqueue */
int update_cnt; /* number of outstanding ops that will update the
* status */
spinlock_t writeback_lock; /* lock for writebacks */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
......@@ -316,6 +351,8 @@ struct afs_vnode {
long acl_order; /* ACL check count (callback break count) */
struct list_head writebacks; /* alterations in pagecache that need writing */
/* outstanding callback notification on this file */
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
......@@ -463,6 +500,12 @@ extern int afs_fs_rename(struct afs_server *, struct key *,
struct afs_vnode *, const char *,
struct afs_vnode *, const char *,
const struct afs_wait_mode *);
extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
pgoff_t, pgoff_t, unsigned, unsigned,
const struct afs_wait_mode *);
extern int afs_fs_setattr(struct afs_server *, struct key *,
struct afs_vnode *, struct iattr *,
const struct afs_wait_mode *);
/*
* inode.c
......@@ -473,6 +516,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *,
extern void afs_zap_data(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int afs_setattr(struct dentry *, struct iattr *);
extern void afs_clear_inode(struct inode *);
/*
......@@ -625,6 +669,9 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
struct afs_file_status *, struct afs_server **);
extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
struct key *, const char *, const char *);
extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
unsigned, unsigned);
extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
/*
* volume.c
......@@ -641,6 +688,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
extern int afs_volume_release_fileserver(struct afs_vnode *,
struct afs_server *, int);
/*
* write.c
*/
extern int afs_set_page_dirty(struct page *);
extern void afs_put_writeback(struct afs_writeback *);
extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned);
extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned);
extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
extern int afs_write_inode(struct inode *, int);
extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
extern int afs_fsync(struct file *, struct dentry *, int);
/*****************************************************************************/
/*
* debug tracing
......
......@@ -236,6 +236,70 @@ void afs_flat_call_destructor(struct afs_call *call)
call->buffer = NULL;
}
/*
* attach the data from a bunch of pages on an inode to a call
*/
int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov)
{
struct page *pages[8];
unsigned count, n, loop, offset, to;
pgoff_t first = call->first, last = call->last;
int ret;
_enter("");
offset = call->first_offset;
call->first_offset = 0;
do {
_debug("attach %lx-%lx", first, last);
count = last - first + 1;
if (count > ARRAY_SIZE(pages))
count = ARRAY_SIZE(pages);
n = find_get_pages_contig(call->mapping, first, count, pages);
ASSERTCMP(n, ==, count);
loop = 0;
do {
msg->msg_flags = 0;
to = PAGE_SIZE;
if (first + loop >= last)
to = call->last_to;
else
msg->msg_flags = MSG_MORE;
iov->iov_base = kmap(pages[loop]) + offset;
iov->iov_len = to - offset;
offset = 0;
_debug("- range %u-%u%s",
offset, to, msg->msg_flags ? " [more]" : "");
msg->msg_iov = (struct iovec *) iov;
msg->msg_iovlen = 1;
/* have to change the state *before* sending the last
* packet as RxRPC might give us the reply before it
* returns from sending the request */
if (first + loop >= last)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(call->rxcall, msg,
to - offset);
kunmap(pages[loop]);
if (ret < 0)
break;
} while (++loop < count);
first += count;
for (loop = 0; loop < count; loop++)
put_page(pages[loop]);
if (ret < 0)
break;
} while (first < last);
_leave(" = %d", ret);
return ret;
}
/*
* initiate a call
*/
......@@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
_debug("MAKE %p{%s} [%d]",
call, call->type->name, atomic_read(&afs_outstanding_calls));
_debug("____MAKE %p{%s,%x} [%d]____",
call, call->type->name, key_serial(call->key),
atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
INIT_WORK(&call->async_work, afs_process_async_call);
......@@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
/* have to change the state *before* sending the last packet as RxRPC
* might give us the reply before it returns from sending the
* request */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
if (ret < 0)
goto error_do_abort;
if (call->send_pages) {
ret = afs_send_pages(call, &msg, iov);
if (ret < 0)
goto error_do_abort;
}
/* at this point, an async call may no longer exist as it may have
* already completed */
return wait_mode->wait(call);
......
......@@ -50,6 +50,7 @@ static const struct super_operations afs_super_ops = {
.statfs = simple_statfs,
.alloc_inode = afs_alloc_inode,
.drop_inode = generic_delete_inode,
.write_inode = afs_write_inode,
.destroy_inode = afs_destroy_inode,
.clear_inode = afs_clear_inode,
.umount_begin = afs_umount_begin,
......@@ -66,7 +67,7 @@ enum {
afs_opt_vol,
};
static const match_table_t afs_options_list = {
static match_table_t afs_options_list = {
{ afs_opt_cell, "cell=%s" },
{ afs_opt_rwpath, "rwpath" },
{ afs_opt_vol, "vol=%s" },
......@@ -459,7 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
init_waitqueue_head(&vnode->update_waitq);
mutex_init(&vnode->permits_lock);
mutex_init(&vnode->validate_lock);
spin_lock_init(&vnode->writeback_lock);
spin_lock_init(&vnode->lock);
INIT_LIST_HEAD(&vnode->writebacks);
INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
}
}
......
......@@ -753,3 +753,110 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
_leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
return PTR_ERR(server);
}
/*
* write to a file
*/
int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
unsigned offset, unsigned to)
{
struct afs_server *server;
struct afs_vnode *vnode = wb->vnode;
int ret;
_enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
key_serial(wb->key),
first, last, offset, to);
/* this op will fetch the status */
spin_lock(&vnode->lock);
vnode->update_cnt++;
spin_unlock(&vnode->lock);
do {
/* pick a server to query */
server = afs_volume_pick_fileserver(vnode);
if (IS_ERR(server))
goto no_server;
_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
ret = afs_fs_store_data(server, wb, first, last, offset, to,
&afs_sync_call);
} while (!afs_volume_release_fileserver(vnode, server, ret));
/* adjust the flags */
if (ret == 0) {
afs_vnode_finalise_status_update(vnode, server);
afs_put_server(server);
} else {
afs_vnode_status_update_failed(vnode, ret);
}
_leave(" = %d", ret);
return ret;
no_server:
spin_lock(&vnode->lock);
vnode->update_cnt--;
ASSERTCMP(vnode->update_cnt, >=, 0);
spin_unlock(&vnode->lock);
return PTR_ERR(server);
}
/*
* set the attributes on a file
*/
int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
struct iattr *attr)
{
struct afs_server *server;
int ret;
_enter("%s{%x:%u.%u},%x",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
key_serial(key));
/* this op will fetch the status */
spin_lock(&vnode->lock);
vnode->update_cnt++;
spin_unlock(&vnode->lock);
do {
/* pick a server to query */
server = afs_volume_pick_fileserver(vnode);
if (IS_ERR(server))
goto no_server;
_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call);
} while (!afs_volume_release_fileserver(vnode, server, ret));
/* adjust the flags */
if (ret == 0) {
afs_vnode_finalise_status_update(vnode, server);
afs_put_server(server);
} else {
afs_vnode_status_update_failed(vnode, ret);
}
_leave(" = %d", ret);
return ret;
no_server:
spin_lock(&vnode->lock);
vnode->update_cnt--;
ASSERTCMP(vnode->update_cnt, >=, 0);
spin_unlock(&vnode->lock);
return PTR_ERR(server);
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment