Commit 8b2a464c authored by David Howells's avatar David Howells

afs: Add an address list concept

Add an RCU replaceable address list structure to hold a list of server
addresses.  The list also holds the

To this end:

 (1) A cell's VL server address list can be loaded directly via insmod or
     echo to /proc/fs/afs/cells or dynamically from a DNS query for AFSDB
     or SRV records.

 (2) Anyone wanting to use a cell's VL server address must wait until the
     cell record comes online and has tried to obtain some addresses.

 (3) An FS server's address list, for the moment, has a single entry that
     is the key to the server list.  This will change in the future when a
     server is instead keyed on its UUID and the VL.GetAddrsU operation is
     used.

 (4) An 'address cursor' concept is introduced to handle iteration through
     the address list.  This is passed to the afs_make_call() as, in the
     future, stuff (such as abort code) that doesn't outlast the call will
     be returned in it.

In the future, we might want to annotate the list with information about
how each address fares.  We might then want to propagate such annotations
over address list replacement.

Whilst we're at it, we allow IPv6 addresses to be specified in
colon-delimited lists by enclosing them in square brackets.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 989782dc
......@@ -7,6 +7,7 @@ afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
kafs-objs := \
$(afs-cache-y) \
addr_list.o \
callback.o \
cell.o \
cmservice.o \
......
/* Server address list management
*
* Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
*/
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/dns_resolver.h>
#include <linux/inet.h>
#include <keys/rxrpc-type.h>
#include "internal.h"
#include "afs_fs.h"
#define AFS_MAX_ADDRESSES \
((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / \
sizeof(struct sockaddr_rxrpc)))
/*
* Release an address list.
*/
void afs_put_addrlist(struct afs_addr_list *alist)
{
if (alist && refcount_dec_and_test(&alist->usage))
call_rcu(&alist->rcu, (rcu_callback_t)kfree);
}
/*
* Allocate an address list.
*/
struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
unsigned short service,
unsigned short port)
{
struct afs_addr_list *alist;
unsigned int i;
_enter("%u,%u,%u", nr, service, port);
alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
GFP_KERNEL);
if (!alist)
return NULL;
refcount_set(&alist->usage, 1);
for (i = 0; i < nr; i++) {
struct sockaddr_rxrpc *srx = &alist->addrs[i];
srx->srx_family = AF_RXRPC;
srx->srx_service = service;
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(port);
}
return alist;
}
/*
* Parse a text string consisting of delimited addresses.
*/
struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
char delim,
unsigned short service,
unsigned short port)
{
struct afs_addr_list *alist;
const char *p, *end = text + len;
unsigned int nr = 0;
_enter("%*.*s,%c", (int)len, (int)len, text, delim);
if (!len)
return ERR_PTR(-EDESTADDRREQ);
if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
delim = ',';
/* Count the addresses */
p = text;
do {
if (!*p)
return ERR_PTR(-EINVAL);
if (*p == delim)
continue;
nr++;
if (*p == '[') {
p++;
if (p == end)
return ERR_PTR(-EINVAL);
p = memchr(p, ']', end - p);
if (!p)
return ERR_PTR(-EINVAL);
p++;
if (p >= end)
break;
}
p = memchr(p, delim, end - p);
if (!p)
break;
p++;
} while (p < end);
_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
if (nr > AFS_MAX_ADDRESSES)
nr = AFS_MAX_ADDRESSES;
alist = afs_alloc_addrlist(nr, service, port);
if (!alist)
return ERR_PTR(-ENOMEM);
/* Extract the addresses */
p = text;
do {
struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
char tdelim = delim;
if (*p == delim) {
p++;
continue;
}
if (*p == '[') {
p++;
tdelim = ']';
}
if (in4_pton(p, end - p,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
tdelim, &p)) {
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
} else if (in6_pton(p, end - p,
srx->transport.sin6.sin6_addr.s6_addr,
tdelim, &p)) {
/* Nothing to do */
} else {
goto bad_address;
}
if (tdelim == ']') {
if (p == end || *p != ']')
goto bad_address;
p++;
}
if (p < end) {
if (*p == '+') {
/* Port number specification "+1234" */
unsigned int xport = 0;
p++;
if (p >= end || !isdigit(*p))
goto bad_address;
do {
xport *= 10;
xport += *p - '0';
if (xport > 65535)
goto bad_address;
p++;
} while (p < end && isdigit(*p));
srx->transport.sin6.sin6_port = htons(xport);
} else if (*p == delim) {
p++;
} else {
goto bad_address;
}
}
alist->nr_addrs++;
} while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
_leave(" = [nr %u]", alist->nr_addrs);
return alist;
bad_address:
kfree(alist);
return ERR_PTR(-EINVAL);
}
/*
* Compare old and new address lists to see if there's been any change.
* - How to do this in better than O(Nlog(N)) time?
* - We don't really want to sort the address list, but would rather take the
* list as we got it so as not to undo record rotation by the DNS server.
*/
#if 0
static int afs_cmp_addr_list(const struct afs_addr_list *a1,
const struct afs_addr_list *a2)
{
}
#endif
/*
* Perform a DNS query for VL servers and build a up an address list.
*/
struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
{
struct afs_addr_list *alist;
char *vllist = NULL;
int ret;
_enter("%s", cell->name);
ret = dns_query("afsdb", cell->name, cell->name_len,
"ipv4", &vllist, _expiry);
if (ret < 0)
return ERR_PTR(ret);
alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
VL_SERVICE, AFS_VL_PORT);
if (IS_ERR(alist)) {
kfree(vllist);
if (alist != ERR_PTR(-ENOMEM))
pr_err("Failed to parse DNS data\n");
return alist;
}
kfree(vllist);
return alist;
}
/*
* Get an address to try.
*/
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
{
_enter("%hu+%hd", ac->start, (short)ac->index);
if (!ac->alist)
return false;
if (ac->begun) {
ac->index++;
if (ac->index == ac->alist->nr_addrs)
ac->index = 0;
if (ac->index == ac->start) {
ac->error = -EDESTADDRREQ;
return false;
}
}
ac->begun = true;
ac->responded = false;
ac->addr = &ac->alist->addrs[ac->index];
return true;
}
/*
* Release an address list cursor.
*/
int afs_end_cursor(struct afs_addr_cursor *ac)
{
if (ac->responded && ac->index != ac->start)
WRITE_ONCE(ac->alist->index, ac->index);
afs_put_addrlist(ac->alist);
ac->alist = NULL;
return ac->error;
}
/*
* Set the address cursor for iterating over VL servers.
*/
int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
{
struct afs_addr_list *alist;
int ret;
if (!rcu_access_pointer(cell->vl_addrs)) {
ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
TASK_INTERRUPTIBLE);
if (ret < 0)
return ret;
if (!rcu_access_pointer(cell->vl_addrs) &&
ktime_get_real_seconds() < cell->dns_expiry)
return cell->error;
}
read_lock(&cell->vl_addrs_lock);
alist = rcu_dereference_protected(cell->vl_addrs,
lockdep_is_held(&cell->vl_addrs_lock));
if (alist->nr_addrs > 0)
afs_get_addrlist(alist);
else
alist = NULL;
read_unlock(&cell->vl_addrs_lock);
if (!alist)
return -EDESTADDRREQ;
ac->alist = alist;
ac->addr = NULL;
ac->start = READ_ONCE(alist->index);
ac->index = ac->start;
ac->error = 0;
ac->begun = false;
return 0;
}
......@@ -9,7 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/key.h>
#include <linux/ctype.h>
......@@ -152,68 +151,33 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
init_rwsem(&cell->vl_sem);
INIT_LIST_HEAD(&cell->vl_list);
spin_lock_init(&cell->vl_lock);
seqlock_init(&cell->vl_addrs_lock);
cell->flags = (1 << AFS_CELL_FL_NOT_READY);
for (i = 0; i < AFS_CELL_MAX_ADDRS; i++) {
struct sockaddr_rxrpc *srx = &cell->vl_addrs[i];
srx->srx_family = AF_RXRPC;
srx->srx_service = VL_SERVICE;
srx->transport_type = SOCK_DGRAM;
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(AFS_VL_PORT);
}
cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
(1 << AFS_CELL_FL_NO_LOOKUP_YET));
rwlock_init(&cell->vl_addrs_lock);
/* Fill in the VL server list if we were given a list of addresses to
* use.
*/
if (vllist) {
char delim = ':';
if (strchr(vllist, ',') || !strchr(vllist, '.'))
delim = ',';
do {
struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
if (in4_pton(vllist, -1,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
delim, &vllist)) {
srx->transport_len = sizeof(struct sockaddr_in6);
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
} else if (in6_pton(vllist, -1,
srx->transport.sin6.sin6_addr.s6_addr,
delim, &vllist)) {
srx->transport_len = sizeof(struct sockaddr_in6);
srx->transport.sin6.sin6_family = AF_INET6;
} else {
goto bad_address;
}
struct afs_addr_list *alist;
cell->vl_naddrs++;
if (!*vllist)
break;
vllist++;
} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && vllist);
alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
VL_SERVICE, AFS_VL_PORT);
if (IS_ERR(alist)) {
ret = PTR_ERR(alist);
goto parse_failed;
}
/* Disable DNS refresh for manually-specified cells */
rcu_assign_pointer(cell->vl_addrs, alist);
cell->dns_expiry = TIME64_MAX;
} else {
/* We're going to need to 'refresh' this cell's VL server list
* from the DNS before we can use it.
*/
cell->dns_expiry = S64_MIN;
}
_leave(" = %p", cell);
return cell;
bad_address:
printk(KERN_ERR "kAFS: bad VL server IP address\n");
ret = -EINVAL;
parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
......@@ -325,7 +289,6 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
if (excl) {
ret = -EEXIST;
} else {
ASSERTCMP(atomic_read(&cursor->usage), >=, 1);
afs_get_cell(cursor);
ret = 0;
}
......@@ -333,8 +296,10 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
kfree(candidate);
if (ret == 0)
goto wait_for_cell;
goto error_noput;
error:
afs_put_cell(net, cell);
error_noput:
_leave(" = %d [error]", ret);
return ERR_PTR(ret);
}
......@@ -396,78 +361,50 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
*/
static void afs_update_cell(struct afs_cell *cell)
{
struct afs_addr_list *alist, *old;
time64_t now, expiry;
char *vllist = NULL;
int ret;
_enter("%s", cell->name);
ret = dns_query("afsdb", cell->name, cell->name_len,
"ipv4", &vllist, &expiry);
_debug("query %d", ret);
switch (ret) {
case 0 ... INT_MAX:
clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
goto parse_dns_data;
alist = afs_dns_query(cell, &expiry);
if (IS_ERR(alist)) {
switch (PTR_ERR(alist)) {
case -ENODATA:
/* The DNS said that the cell does not exist */
set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = ktime_get_real_seconds() + 61;
break;
case -ENODATA:
clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
cell->dns_expiry = ktime_get_real_seconds() + 61;
cell->error = -EDESTADDRREQ;
goto out;
case -EAGAIN:
case -ECONNREFUSED:
default:
set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = ktime_get_real_seconds() + 10;
break;
}
case -EAGAIN:
case -ECONNREFUSED:
default:
/* Unable to query DNS. */
set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = ktime_get_real_seconds() + 10;
cell->error = -EDESTADDRREQ;
goto out;
}
parse_dns_data:
write_seqlock(&cell->vl_addrs_lock);
ret = -EINVAL;
do {
struct sockaddr_rxrpc *srx = &cell->vl_addrs[cell->vl_naddrs];
if (in4_pton(vllist, -1,
(u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
',', (const char **)&vllist)) {
srx->transport_len = sizeof(struct sockaddr_in6);
srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
} else if (in6_pton(vllist, -1,
srx->transport.sin6.sin6_addr.s6_addr,
',', (const char **)&vllist)) {
srx->transport_len = sizeof(struct sockaddr_in6);
srx->transport.sin6.sin6_family = AF_INET6;
} else {
goto bad_address;
}
} else {
clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
cell->vl_naddrs++;
if (!*vllist)
break;
vllist++;
/* Exclusion on changing vl_addrs is achieved by a
* non-reentrant work item.
*/
old = rcu_dereference_protected(cell->vl_addrs, true);
rcu_assign_pointer(cell->vl_addrs, alist);
cell->dns_expiry = expiry;
} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS);
if (old)
afs_put_addrlist(old);
}
if (cell->vl_naddrs < AFS_CELL_MAX_ADDRS)
memset(cell->vl_addrs + cell->vl_naddrs, 0,
(AFS_CELL_MAX_ADDRS - cell->vl_naddrs) * sizeof(cell->vl_addrs[0]));
if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
now = ktime_get_real_seconds();
cell->dns_expiry = expiry;
afs_set_cell_timer(cell->net, expiry - now);
bad_address:
write_sequnlock(&cell->vl_addrs_lock);
out:
afs_set_cell_timer(cell->net, cell->dns_expiry - now);
_leave("");
}
......@@ -482,6 +419,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
ASSERTCMP(atomic_read(&cell->usage), ==, 0);
afs_put_addrlist(cell->vl_addrs);
key_put(cell->anonymous_key);
kfree(cell);
......@@ -514,6 +452,15 @@ void afs_cells_timer(struct timer_list *timer)
afs_dec_cells_outstanding(net);
}
/*
* Get a reference on a cell record.
*/
struct afs_cell *afs_get_cell(struct afs_cell *cell)
{
atomic_inc(&cell->usage);
return cell;
}
/*
* Drop a reference on a cell record.
*/
......
This diff is collapsed.
......@@ -70,6 +70,17 @@ enum afs_call_state {
AFS_CALL_COMPLETE, /* Completed or failed */
};
/*
* List of server addresses.
*/
struct afs_addr_list {
struct rcu_head rcu; /* Must be first */
refcount_t usage;
unsigned short nr_addrs;
unsigned short index; /* Address currently in use */
struct sockaddr_rxrpc addrs[];
};
/*
* a record of an in-progress RxRPC call
*/
......@@ -283,16 +294,15 @@ struct afs_cell {
#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */
enum afs_cell_state state;
short error;
spinlock_t vl_lock; /* vl_list lock */
/* VLDB server list. */
seqlock_t vl_addrs_lock;
unsigned short vl_naddrs; /* number of VL servers in addr list */
unsigned short vl_curr_svix; /* current server index */
struct sockaddr_rxrpc vl_addrs[AFS_CELL_MAX_ADDRS]; /* cell VL server addresses */
rwlock_t vl_addrs_lock; /* Lock on vl_addrs */
struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */
u8 name_len; /* Length of name */
char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
};
......@@ -343,7 +353,7 @@ struct afs_vlocation {
struct afs_server {
atomic_t usage;
time64_t time_of_death; /* time at which put reduced usage to 0 */
struct sockaddr_rxrpc addr; /* server address */
struct afs_addr_list __rcu *addrs; /* List of addresses for this server */
struct afs_net *net; /* Network namespace in which the server resides */
struct afs_cell *cell; /* cell in which server resides */
struct list_head link; /* link in cell's server list */
......@@ -485,7 +495,48 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */
};
/*
* Cursor for iterating over a server's address list.
*/
struct afs_addr_cursor {
struct afs_addr_list *alist; /* Current address list (pins ref) */
struct sockaddr_rxrpc *addr;
unsigned short start; /* Starting point in alist->addrs[] */
unsigned short index; /* Wrapping offset from start to current addr */
short error;
bool begun; /* T if we've begun iteration */
bool responded; /* T if the current address responded */
};
/*
* Cursor for iterating over a set of fileservers.
*/
struct afs_fs_cursor {
struct afs_addr_cursor ac;
struct afs_server *server; /* Current server (pins ref) */
};
/*****************************************************************************/
/*
* addr_list.c
*/
static inline struct afs_addr_list *afs_get_addrlist(struct afs_addr_list *alist)
{
if (alist)
refcount_inc(&alist->usage);
return alist;
}
extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
unsigned short,
unsigned short);
extern void afs_put_addrlist(struct afs_addr_list *);
extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
unsigned short, unsigned short);
extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
extern bool afs_iterate_addresses(struct afs_addr_cursor *);
extern int afs_end_cursor(struct afs_addr_cursor *);
extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
/*
* cache.c
*/
......@@ -521,17 +572,11 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
/*
* cell.c
*/
static inline struct afs_cell *afs_get_cell(struct afs_cell *cell)
{
if (cell)
atomic_inc(&cell->usage);
return cell;
}
extern int afs_cell_init(struct afs_net *, const char *);
extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
const char *, bool);
extern struct afs_cell *afs_get_cell(struct afs_cell *);
extern void afs_put_cell(struct afs_net *, struct afs_cell *);
extern void afs_manage_cells(struct work_struct *);
extern void afs_cells_timer(struct timer_list *);
......@@ -574,40 +619,41 @@ extern int afs_flock(struct file *, int, struct file_lock *);
/*
* fsclient.c
*/
extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, struct afs_volsync *,
bool);
extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *, bool);
extern int afs_fs_fetch_data(struct afs_server *, struct key *,
extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, struct afs_read *, bool);
extern int afs_fs_create(struct afs_server *, struct key *,
extern int afs_fs_create(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, const char *, umode_t,
struct afs_fid *, struct afs_file_status *,
struct afs_callback *, bool);
extern int afs_fs_remove(struct afs_server *, struct key *,
extern int afs_fs_remove(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, const char *, bool, bool);
extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
extern int afs_fs_link(struct afs_fs_cursor *, struct key *, struct afs_vnode *,
struct afs_vnode *, const char *, bool);
extern int afs_fs_symlink(struct afs_server *, struct key *,
extern int afs_fs_symlink(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, const char *, const char *,
struct afs_fid *, struct afs_file_status *, bool);
extern int afs_fs_rename(struct afs_server *, struct key *,
extern int afs_fs_rename(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, const char *,
struct afs_vnode *, const char *, bool);
extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
extern int afs_fs_store_data(struct afs_fs_cursor *, struct afs_writeback *,
pgoff_t, pgoff_t, unsigned, unsigned, bool);
extern int afs_fs_setattr(struct afs_server *, struct key *,
extern int afs_fs_setattr(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, struct iattr *, bool);
extern int afs_fs_get_volume_status(struct afs_server *, struct key *,
extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct key *,
struct afs_vnode *,
struct afs_volume_status *, bool);
extern int afs_fs_set_lock(struct afs_server *, struct key *,
extern int afs_fs_set_lock(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, afs_lock_type_t, bool);
extern int afs_fs_extend_lock(struct afs_server *, struct key *,
extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, bool);
extern int afs_fs_release_lock(struct afs_server *, struct key *,
extern int afs_fs_release_lock(struct afs_fs_cursor *, struct key *,
struct afs_vnode *, bool);
extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct key *, bool);
extern int afs_fs_give_up_all_callbacks(struct afs_server *, struct afs_addr_cursor *,
struct key *, bool);
/*
* inode.c
......@@ -697,7 +743,7 @@ extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
extern int afs_queue_call_work(struct afs_call *);
extern long afs_make_call(struct sockaddr_rxrpc *, struct afs_call *, gfp_t, bool);
extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
size_t, size_t);
......@@ -751,13 +797,11 @@ extern void __exit afs_fs_exit(void);
/*
* vlclient.c
*/
extern int afs_vl_get_entry_by_name(struct afs_net *,
struct sockaddr_rxrpc *, struct key *,
const char *, struct afs_cache_vlocation *,
bool);
extern int afs_vl_get_entry_by_id(struct afs_net *,
struct sockaddr_rxrpc *, struct key *,
afs_volid_t, afs_voltype_t,
extern int afs_vl_get_entry_by_name(struct afs_net *, struct afs_addr_cursor *,
struct key *, const char *,
struct afs_cache_vlocation *, bool);
extern int afs_vl_get_entry_by_id(struct afs_net *, struct afs_addr_cursor *,
struct key *, afs_volid_t, afs_voltype_t,
struct afs_cache_vlocation *, bool);
/*
......@@ -828,9 +872,11 @@ static inline struct afs_volume *afs_get_volume(struct afs_volume *volume)
extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
extern int afs_volume_release_fileserver(struct afs_vnode *,
struct afs_server *, int);
extern void afs_init_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
extern int afs_set_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
extern bool afs_volume_pick_fileserver(struct afs_fs_cursor *, struct afs_vnode *);
extern bool afs_iterate_fs_cursor(struct afs_fs_cursor *, struct afs_vnode *);
extern int afs_end_fs_cursor(struct afs_fs_cursor *, struct afs_net *);
/*
* write.c
......
......@@ -514,23 +514,23 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
*/
static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
{
struct afs_addr_list *alist;
struct afs_cell *cell = m->private;
loff_t pos = *_pos;
_enter("cell=%p pos=%Ld", cell, *_pos);
rcu_read_lock();
/* lock the list against modification */
down_read(&cell->vl_sem);
alist = rcu_dereference(cell->vl_addrs);
/* allow for the header line */
if (!pos)
return (void *) 1;
pos--;
if (pos >= cell->vl_naddrs)
if (!alist || pos >= alist->nr_addrs)
return NULL;
return &cell->vl_addrs[pos];
return alist->addrs + pos;
}
/*
......@@ -539,17 +539,18 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
loff_t *_pos)
{
struct afs_addr_list *alist;
struct afs_cell *cell = p->private;
loff_t pos;
_enter("cell=%p{nad=%u} pos=%Ld", cell, cell->vl_naddrs, *_pos);
alist = rcu_dereference(cell->vl_addrs);
pos = *_pos;
(*_pos)++;
if (pos >= cell->vl_naddrs)
if (!alist || pos >= alist->nr_addrs)
return NULL;
return &cell->vl_addrs[pos];
return alist->addrs + pos;
}
/*
......@@ -557,9 +558,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
*/
static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
{
struct afs_cell *cell = p->private;
up_read(&cell->vl_sem);
rcu_read_unlock();
}
/*
......@@ -658,7 +657,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
}
/* display one cell per line on subsequent lines */
sprintf(ipaddr, "%pISp", &server->addr.transport);
sprintf(ipaddr, "%pISp", &server->addrs->addrs[0].transport);
seq_printf(m, "%3d %-15s %5d\n",
atomic_read(&server->usage), ipaddr, server->fs_state);
......
......@@ -321,9 +321,10 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
/*
* initiate a call
*/
long afs_make_call(struct sockaddr_rxrpc *srx, struct afs_call *call,
long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
gfp_t gfp, bool async)
{
struct sockaddr_rxrpc *srx = ac->addr;
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
......
......@@ -56,7 +56,9 @@ static int afs_install_server(struct afs_server *server)
p = *pp;
_debug("- consider %p", p);
xserver = rb_entry(p, struct afs_server, master_rb);
diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr));
diff = memcmp(&server->addrs->addrs[0],
&xserver->addrs->addrs[0],
sizeof(sizeof(server->addrs->addrs[0])));
if (diff < 0)
pp = &(*pp)->rb_left;
else if (diff > 0)
......@@ -85,25 +87,38 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
_enter("");
server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
if (server) {
atomic_set(&server->usage, 1);
server->net = cell->net;
server->cell = cell;
INIT_LIST_HEAD(&server->link);
INIT_LIST_HEAD(&server->grave);
init_rwsem(&server->sem);
spin_lock_init(&server->fs_lock);
INIT_LIST_HEAD(&server->cb_interests);
rwlock_init(&server->cb_break_lock);
server->addr = *addr;
afs_inc_servers_outstanding(cell->net);
_leave(" = %p{%d}", server, atomic_read(&server->usage));
} else {
_leave(" = NULL [nomem]");
}
if (!server)
goto enomem;
server->addrs = kzalloc(sizeof(struct afs_addr_list) +
sizeof(struct sockaddr_rxrpc),
GFP_KERNEL);
if (!server->addrs)
goto enomem_server;
atomic_set(&server->usage, 1);
server->net = cell->net;
server->cell = cell;
INIT_LIST_HEAD(&server->link);
INIT_LIST_HEAD(&server->grave);
init_rwsem(&server->sem);
spin_lock_init(&server->fs_lock);
INIT_LIST_HEAD(&server->cb_interests);
rwlock_init(&server->cb_break_lock);
refcount_set(&server->addrs->usage, 1);
server->addrs->nr_addrs = 1;
server->addrs->addrs[0] = *addr;
afs_inc_servers_outstanding(cell->net);
_leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
enomem_server:
kfree(server);
enomem:
_leave(" = NULL [nomem]");
return NULL;
}
/*
......@@ -120,7 +135,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
read_lock(&cell->servers_lock);
list_for_each_entry(server, &cell->servers, link) {
if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
goto found_server_quickly;
}
read_unlock(&cell->servers_lock);
......@@ -135,7 +150,7 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell,
/* check the cell's server list again */
list_for_each_entry(server, &cell->servers, link) {
if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
if (memcmp(&server->addrs->addrs[0], addr, sizeof(*addr)) == 0)
goto found_server;
}
......@@ -204,7 +219,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
_debug("- consider %p", p);
diff = memcmp(srx, &server->addr, sizeof(*srx));
diff = memcmp(srx, &server->addrs->addrs[0], sizeof(*srx));
if (diff < 0) {
p = p->rb_left;
} else if (diff > 0) {
......@@ -269,10 +284,19 @@ void afs_put_server(struct afs_net *net, struct afs_server *server)
*/
static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
{
struct afs_addr_list *alist = server->addrs;
struct afs_addr_cursor ac = {
.alist = alist,
.addr = &alist->addrs[0],
.start = alist->index,
.index = alist->index,
.error = 0,
};
_enter("%p", server);
afs_fs_give_up_all_callbacks(server, NULL, false);
afs_fs_give_up_all_callbacks(server, &ac, NULL, false);
afs_put_cell(net, server->cell);
afs_put_addrlist(server->addrs);
kfree(server);
afs_dec_servers_outstanding(net);
}
......
......@@ -114,7 +114,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = {
* dispatch a get volume entry by name operation
*/
int afs_vl_get_entry_by_name(struct afs_net *net,
struct sockaddr_rxrpc *addr,
struct afs_addr_cursor *ac,
struct key *key,
const char *volname,
struct afs_cache_vlocation *entry,
......@@ -146,14 +146,14 @@ int afs_vl_get_entry_by_name(struct afs_net *net,
memset((void *) bp + volnamesz, 0, padsz);
/* initiate the call */
return afs_make_call(addr, call, GFP_KERNEL, async);
return afs_make_call(ac, call, GFP_KERNEL, async);
}
/*
* dispatch a get volume entry by ID operation
*/
int afs_vl_get_entry_by_id(struct afs_net *net,
struct sockaddr_rxrpc *addr,
struct afs_addr_cursor *ac,
struct key *key,
afs_volid_t volid,
afs_voltype_t voltype,
......@@ -179,5 +179,5 @@ int afs_vl_get_entry_by_id(struct afs_net *net,
*bp = htonl(voltype);
/* initiate the call */
return afs_make_call(addr, call, GFP_KERNEL, async);
return afs_make_call(ac, call, GFP_KERNEL, async);
}
......@@ -29,22 +29,25 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
struct key *key,
struct afs_cache_vlocation *vldb)
{
struct afs_cell *cell = vl->cell;
int count, ret;
struct afs_addr_cursor ac;
int ret;
_enter("%s,%s", cell->name, vl->vldb.name);
_enter("%s,%s", vl->cell->name, vl->vldb.name);
ret = afs_set_vl_cursor(&ac, vl->cell);
if (ret < 0)
return ret;
down_write(&vl->cell->vl_sem);
ret = -ENOMEDIUM;
for (count = cell->vl_naddrs; count > 0; count--) {
struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
while (afs_iterate_addresses(&ac)) {
_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
/* attempt to access the VL server */
ret = afs_vl_get_entry_by_name(cell->net, addr, key,
vl->vldb.name, vldb, false);
switch (ret) {
ac.error = afs_vl_get_entry_by_name(vl->cell->net, &ac, key,
vl->vldb.name, vldb, false);
switch (ac.error) {
case 0:
goto out;
case -ENOMEM:
......@@ -52,26 +55,24 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
case -ENETUNREACH:
case -EHOSTUNREACH:
case -ECONNREFUSED:
if (ret == -ENOMEM || ret == -ENONET)
if (ac.error == -ENOMEM || ac.error == -ENONET)
goto out;
goto rotate;
break;
case -ENOMEDIUM:
case -EKEYREJECTED:
case -EKEYEXPIRED:
ac.responded = true;
goto out;
default:
ret = -EIO;
goto rotate;
ac.responded = true;
ac.error = -EIO;
break;
}
/* rotate the server records upon lookup failure */
rotate:
cell->vl_curr_svix++;
cell->vl_curr_svix %= cell->vl_naddrs;
}
out:
up_write(&vl->cell->vl_sem);
ret = afs_end_cursor(&ac);
_leave(" = %d", ret);
return ret;
}
......@@ -86,22 +87,24 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
afs_voltype_t voltype,
struct afs_cache_vlocation *vldb)
{
struct afs_cell *cell = vl->cell;
int count, ret;
struct afs_addr_cursor ac;
int ret;
_enter("%s,%x,%d,", cell->name, volid, voltype);
_enter("%s,%x,%d,", vl->cell->name, volid, voltype);
ret = afs_set_vl_cursor(&ac, vl->cell);
if (ret < 0)
return ret;
down_write(&vl->cell->vl_sem);
ret = -ENOMEDIUM;
for (count = cell->vl_naddrs; count > 0; count--) {
struct sockaddr_rxrpc *addr = &cell->vl_addrs[cell->vl_curr_svix];
_debug("CellServ[%hu]: %pIS", cell->vl_curr_svix, &addr->transport);
while (afs_iterate_addresses(&ac)) {
_debug("CellServ[%hu]: %pIS", ac.index, &ac.addr->transport);
/* attempt to access the VL server */
ret = afs_vl_get_entry_by_id(cell->net, addr, key, volid,
voltype, vldb, false);
switch (ret) {
ac.error = afs_vl_get_entry_by_id(vl->cell->net, &ac, key, volid,
voltype, vldb, false);
switch (ac.error) {
case 0:
goto out;
case -ENOMEM:
......@@ -109,10 +112,11 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
case -ENETUNREACH:
case -EHOSTUNREACH:
case -ECONNREFUSED:
if (ret == -ENOMEM || ret == -ENONET)
if (ac.error == -ENOMEM || ac.error == -ENONET)
goto out;
goto rotate;
case -EBUSY:
ac.responded = true;
vl->upd_busy_cnt++;
if (vl->upd_busy_cnt <= 3) {
if (vl->upd_busy_cnt > 1) {
......@@ -124,30 +128,31 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
}
break;
case -ENOMEDIUM:
ac.responded = true;
vl->upd_rej_cnt++;
goto rotate;
default:
ret = -EIO;
ac.responded = true;
ac.error = -EIO;
goto rotate;
}
/* rotate the server records upon lookup failure */
rotate:
cell->vl_curr_svix++;
cell->vl_curr_svix %= cell->vl_naddrs;
vl->upd_busy_cnt = 0;
}
out:
if (ret < 0 && vl->upd_rej_cnt > 0) {
if (ac.error < 0 && vl->upd_rej_cnt > 0) {
printk(KERN_NOTICE "kAFS:"
" Active volume no longer valid '%s'\n",
vl->vldb.name);
vl->valid = 0;
ret = -ENOMEDIUM;
ac.error = -ENOMEDIUM;
}
up_write(&vl->cell->vl_sem);
ret = afs_end_cursor(&ac);
_leave(" = %d", ret);
return ret;
}
......
This diff is collapsed.
......@@ -209,11 +209,45 @@ void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume)
_leave(" [destroyed]");
}
/*
* Initialise a filesystem server cursor for iterating over FS servers.
*/
void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
{
fc->ac.alist = NULL;
fc->ac.addr = NULL;
fc->ac.start = 0;
fc->ac.index = 0;
fc->ac.error = 0;
fc->server = NULL;
}
/*
* Set a filesystem server cursor for using a specific FS server.
*/
int afs_set_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
{
afs_init_fs_cursor(fc, vnode);
read_seqlock_excl(&vnode->cb_lock);
if (vnode->cb_interest) {
if (vnode->cb_interest->server->fs_state == 0)
fc->server = afs_get_server(vnode->cb_interest->server);
else
fc->ac.error = vnode->cb_interest->server->fs_state;
} else {
fc->ac.error = -ESTALE;
}
read_sequnlock_excl(&vnode->cb_lock);
return fc->ac.error;
}
/*
* pick a server to use to try accessing this volume
* - returns with an elevated usage count on the server chosen
*/
struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
bool afs_volume_pick_fileserver(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
{
struct afs_volume *volume = vnode->volume;
struct afs_server *server;
......@@ -223,19 +257,18 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
/* stick with the server we're already using if we can */
if (vnode->cb_interest && vnode->cb_interest->server->fs_state == 0) {
afs_get_server(vnode->cb_interest->server);
_leave(" = %p [current]", vnode->cb_interest->server);
return vnode->cb_interest->server;
fc->server = afs_get_server(vnode->cb_interest->server);
goto set_server;
}
down_read(&volume->server_sem);
/* handle the no-server case */
if (volume->nservers == 0) {
ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
fc->ac.error = volume->rjservers ? -ENOMEDIUM : -ESTALE;
up_read(&volume->server_sem);
_leave(" = %d [no servers]", ret);
return ERR_PTR(ret);
_leave(" = f [no servers %d]", fc->ac.error);
return false;
}
/* basically, just search the list for the first live server and use
......@@ -280,13 +313,15 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
}
}
error:
fc->ac.error = ret;
/* no available servers
* - TODO: handle the no active servers case better
*/
error:
up_read(&volume->server_sem);
_leave(" = %d", ret);
return ERR_PTR(ret);
_leave(" = f [%d]", fc->ac.error);
return false;
picked_server:
/* Found an apparently healthy server. We need to register an interest
......@@ -296,37 +331,41 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
&volume->cb_interests[loop], server);
if (ret < 0)
goto error;
afs_get_server(server);
fc->server = afs_get_server(server);
up_read(&volume->server_sem);
_leave(" = %p (picked %pIS)",
server, &server->addr.transport);
return server;
set_server:
fc->ac.alist = afs_get_addrlist(fc->server->addrs);
fc->ac.addr = &fc->ac.alist->addrs[0];
_debug("USING SERVER: %pIS\n", &fc->ac.addr->transport);
_leave(" = t (picked %pIS)", &fc->ac.addr->transport);
return true;
}
/*
* release a server after use
* - releases the ref on the server struct that was acquired by picking
* - records result of using a particular server to access a volume
* - return 0 to try again, 1 if okay or to issue error
* - the caller must release the server struct if result was 0
* - return true to try again, false if okay or to issue error
* - the caller must release the server struct if result was false
*/
int afs_volume_release_fileserver(struct afs_vnode *vnode,
struct afs_server *server,
int result)
bool afs_iterate_fs_cursor(struct afs_fs_cursor *fc,
struct afs_vnode *vnode)
{
struct afs_volume *volume = vnode->volume;
struct afs_server *server = fc->server;
unsigned loop;
_enter("%s,%pIS,%d",
volume->vlocation->vldb.name, &server->addr.transport, result);
volume->vlocation->vldb.name, &fc->ac.addr->transport,
fc->ac.error);
switch (result) {
switch (fc->ac.error) {
/* success */
case 0:
server->fs_state = 0;
_leave("");
return 1;
_leave(" = f");
return false;
/* the fileserver denied all knowledge of the volume */
case -ENOMEDIUM:
......@@ -363,8 +402,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
*/
up_write(&volume->server_sem);
afs_put_server(afs_v2net(vnode), server);
_leave(" [completely rejected]");
return 1;
fc->server = NULL;
_leave(" = f [completely rejected]");
return false;
/* problem reaching the server */
case -ENETUNREACH:
......@@ -378,8 +418,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
*/
spin_lock(&server->fs_lock);
if (!server->fs_state) {
server->fs_state = result;
printk("kAFS: SERVER DEAD state=%d\n", result);
server->fs_state = fc->ac.error;
printk("kAFS: SERVER DEAD state=%d\n", fc->ac.error);
}
spin_unlock(&server->fs_lock);
goto try_next_server;
......@@ -390,8 +430,9 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
case -ENONET:
/* tell the caller to accept the result */
afs_put_server(afs_v2net(vnode), server);
_leave(" [local failure]");
return 1;
fc->server = NULL;
_leave(" = f [local failure]");
return false;
}
/* tell the caller to loop around and try the next server */
......@@ -399,6 +440,16 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
up_write(&volume->server_sem);
try_next_server:
afs_put_server(afs_v2net(vnode), server);
_leave(" [try next server]");
return 0;
_leave(" = t [try next server]");
return true;
}
/*
* Clean up a fileserver cursor.
*/
int afs_end_fs_cursor(struct afs_fs_cursor *fc, struct afs_net *net)
{
afs_end_cursor(&fc->ac);
afs_put_server(net, fc->server);
return fc->ac.error;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment