Commit 01b1df28 authored by Dave Kleikamp's avatar Dave Kleikamp

Merge jfs@jfs.bkbits.net:linux-2.5

into kleikamp.austin.ibm.com:/home/shaggy/bk/jfs-2.5
parents 648044ef 0d84f0ac
VERSION = 2
PATCHLEVEL = 5
SUBLEVEL = 25
SUBLEVEL = 26
EXTRAVERSION =
# *DOCUMENTATION*
......
......@@ -561,11 +561,10 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
#define nlmclt_decode_norep NULL
#define PROC(proc, argtype, restype) \
{ "nlm_" #proc, \
(kxdrproc_t) nlmclt_encode_##argtype, \
(kxdrproc_t) nlmclt_decode_##restype, \
MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
0 \
{ .p_procname = "nlm_" #proc, \
.p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
.p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
}
static struct rpc_procinfo nlm_procedures[] = {
......
......@@ -567,11 +567,10 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
#define nlm4clt_decode_norep NULL
#define PROC(proc, argtype, restype) \
{ "nlm4_" #proc, \
(kxdrproc_t) nlm4clt_encode_##argtype, \
(kxdrproc_t) nlm4clt_decode_##restype, \
MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
0 \
{ .p_procname = "nlm4_" #proc, \
.p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
.p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
.p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \
}
static struct rpc_procinfo nlm4_procedures[] = {
......
......@@ -671,33 +671,32 @@ nfs_stat_to_errno(int stat)
# define MAX(a, b) (((a) > (b))? (a) : (b))
#endif
#define PROC(proc, argtype, restype) \
{ "nfs_" #proc, \
(kxdrproc_t) nfs_xdr_##argtype, \
(kxdrproc_t) nfs_xdr_##restype, \
MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
0 \
#define PROC(proc, argtype, restype, timer) \
{ .p_procname = "nfs_" #proc, \
.p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs_xdr_##restype, \
.p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
.p_timer = timer \
}
static struct rpc_procinfo nfs_procedures[18] = {
PROC(null, enc_void, dec_void),
PROC(getattr, fhandle, attrstat),
PROC(setattr, sattrargs, attrstat),
PROC(root, enc_void, dec_void),
PROC(lookup, diropargs, diropres),
PROC(readlink, readlinkargs, readlinkres),
PROC(read, readargs, readres),
PROC(writecache, enc_void, dec_void),
PROC(write, writeargs, writeres),
PROC(create, createargs, diropres),
PROC(remove, diropargs, stat),
PROC(rename, renameargs, stat),
PROC(link, linkargs, stat),
PROC(symlink, symlinkargs, stat),
PROC(mkdir, createargs, diropres),
PROC(rmdir, diropargs, stat),
PROC(readdir, readdirargs, readdirres),
PROC(statfs, fhandle, statfsres),
PROC(null, enc_void, dec_void, 0),
PROC(getattr, fhandle, attrstat, 1),
PROC(setattr, sattrargs, attrstat, 0),
PROC(root, enc_void, dec_void, 0),
PROC(lookup, diropargs, diropres, 2),
PROC(readlink, readlinkargs, readlinkres, 3),
PROC(read, readargs, readres, 3),
PROC(writecache, enc_void, dec_void, 0),
PROC(write, writeargs, writeres, 4),
PROC(create, createargs, diropres, 0),
PROC(remove, diropargs, stat, 0),
PROC(rename, renameargs, stat, 0),
PROC(link, linkargs, stat, 0),
PROC(symlink, symlinkargs, stat, 0),
PROC(mkdir, createargs, diropres, 0),
PROC(rmdir, diropargs, stat, 0),
PROC(readdir, readdirargs, readdirres, 3),
PROC(statfs, fhandle, statfsres, 0),
};
struct rpc_version nfs_version2 = {
......
......@@ -988,37 +988,37 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
# define MAX(a, b) (((a) > (b))? (a) : (b))
#endif
#define PROC(proc, argtype, restype) \
{ "nfs3_" #proc, \
(kxdrproc_t) nfs3_xdr_##argtype, \
(kxdrproc_t) nfs3_xdr_##restype, \
MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
0 \
#define PROC(proc, argtype, restype, timer) \
{ .p_procname = "nfs3_" #proc, \
.p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
.p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
.p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
.p_timer = timer \
}
static struct rpc_procinfo nfs3_procedures[22] = {
PROC(null, enc_void, dec_void),
PROC(getattr, fhandle, attrstat),
PROC(setattr, sattrargs, wccstat),
PROC(lookup, diropargs, lookupres),
PROC(access, accessargs, accessres),
PROC(readlink, readlinkargs, readlinkres),
PROC(read, readargs, readres),
PROC(write, writeargs, writeres),
PROC(create, createargs, createres),
PROC(mkdir, mkdirargs, createres),
PROC(symlink, symlinkargs, createres),
PROC(mknod, mknodargs, createres),
PROC(remove, diropargs, wccstat),
PROC(rmdir, diropargs, wccstat),
PROC(rename, renameargs, renameres),
PROC(link, linkargs, linkres),
PROC(readdir, readdirargs, readdirres),
PROC(readdirplus, readdirargs, readdirres),
PROC(fsstat, fhandle, fsstatres),
PROC(fsinfo, fhandle, fsinfores),
PROC(pathconf, fhandle, pathconfres),
PROC(commit, commitargs, commitres),
PROC(null, enc_void, dec_void, 0),
PROC(getattr, fhandle, attrstat, 1),
PROC(setattr, sattrargs, wccstat, 0),
PROC(lookup, diropargs, lookupres, 2),
PROC(access, accessargs, accessres, 1),
PROC(readlink, readlinkargs, readlinkres, 3),
PROC(read, readargs, readres, 3),
PROC(write, writeargs, writeres, 4),
PROC(create, createargs, createres, 0),
PROC(mkdir, mkdirargs, createres, 0),
PROC(symlink, symlinkargs, createres, 0),
PROC(mknod, mknodargs, createres, 0),
PROC(remove, diropargs, wccstat, 0),
PROC(rmdir, diropargs, wccstat, 0),
PROC(rename, renameargs, renameres, 0),
PROC(link, linkargs, linkres, 0),
PROC(readdir, readdirargs, readdirres, 3),
PROC(readdirplus, readdirargs, readdirres, 3),
PROC(fsstat, fhandle, fsstatres, 0),
PROC(fsinfo, fhandle, fsinfores, 0),
PROC(pathconf, fhandle, pathconfres, 0),
PROC(commit, commitargs, commitres, 5),
};
struct rpc_version nfs_version3 = {
......
......@@ -15,6 +15,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/timer.h>
#include <asm/signal.h>
/*
......@@ -52,6 +53,8 @@ struct rpc_clnt {
unsigned int cl_flags; /* misc client flags */
unsigned long cl_hardmax; /* max hard timeout */
struct rpc_rtt cl_rtt; /* RTO estimator data */
struct rpc_portmap cl_pmap; /* port mapping */
struct rpc_wait_queue cl_bindwait; /* waiting on getport() */
......@@ -91,6 +94,7 @@ struct rpc_procinfo {
kxdrproc_t p_decode; /* XDR decode function */
unsigned int p_bufsiz; /* req. buffer size */
unsigned int p_count; /* call count */
unsigned int p_timer; /* Which RTT timer to use */
};
#define rpcproc_bufsiz(clnt, proc) ((clnt)->cl_procinfo[proc].p_bufsiz)
......@@ -98,6 +102,7 @@ struct rpc_procinfo {
#define rpcproc_decode(clnt, proc) ((clnt)->cl_procinfo[proc].p_decode)
#define rpcproc_name(clnt, proc) ((clnt)->cl_procinfo[proc].p_procname)
#define rpcproc_count(clnt, proc) ((clnt)->cl_procinfo[proc].p_count)
#define rpcproc_timer(clnt, proc) ((clnt)->cl_procinfo[proc].p_timer)
#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt))
#define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr)
......
/*
* linux/include/linux/sunrpc/timer.h
*
* Declarations for the RPC transport timer.
*
* Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
*/
#ifndef _LINUX_SUNRPC_TIMER_H
#define _LINUX_SUNRPC_TIMER_H
#include <asm/atomic.h>
struct rpc_rtt {
long timeo; /* default timeout value */
long srtt[5]; /* smoothed round trip time << 3 */
long sdrtt[5]; /* soothed medium deviation of RTT */
atomic_t ntimeouts; /* Global count of the number of timeouts */
};
extern void rpc_init_rtt(struct rpc_rtt *rt, long timeo);
extern void rpc_update_rtt(struct rpc_rtt *rt, int timer, long m);
extern long rpc_calc_rto(struct rpc_rtt *rt, int timer);
static inline void rpc_inc_timeo(struct rpc_rtt *rt)
{
atomic_inc(&rt->ntimeouts);
}
static inline void rpc_clear_timeo(struct rpc_rtt *rt)
{
atomic_set(&rt->ntimeouts, 0);
}
static inline int rpc_ntimeo(struct rpc_rtt *rt)
{
return atomic_read(&rt->ntimeouts);
}
#endif /* _LINUX_SUNRPC_TIMER_H */
......@@ -19,7 +19,7 @@
* The transport code maintains an estimate on the maximum number of out-
* standing RPC requests, using a smoothed version of the congestion
* avoidance implemented in 44BSD. This is basically the Van Jacobson
* slow start algorithm: If a retransmit occurs, the congestion window is
* congestion algorithm: If a retransmit occurs, the congestion window is
* halved; otherwise, it is incremented by 1/cwnd when
*
* - a reply is received and
......@@ -32,15 +32,13 @@
* Note: on machines with low memory we should probably use a smaller
* MAXREQS value: At 32 outstanding reqs with 8 megs of RAM, fragment
* reassembly will frequently run out of memory.
* Come Linux 2.3, we'll handle fragments directly.
*/
#define RPC_MAXCONG 16
#define RPC_MAXREQS (RPC_MAXCONG + 1)
#define RPC_CWNDSCALE 256
#define RPC_MAXCONG (16)
#define RPC_MAXREQS RPC_MAXCONG
#define RPC_CWNDSCALE (256)
#define RPC_MAXCWND (RPC_MAXCONG * RPC_CWNDSCALE)
#define RPC_INITCWND RPC_CWNDSCALE
#define RPCXPRT_CONGESTED(xprt) \
((xprt)->cong >= (xprt)->cwnd)
#define RPC_INITCWND (RPC_MAXCWND >> 1)
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
/* Default timeout values */
#define RPC_MAX_UDP_TIMEOUT (60*HZ)
......@@ -83,6 +81,7 @@ struct rpc_rqst {
struct rpc_task * rq_task; /* RPC task data */
__u32 rq_xid; /* request XID */
struct rpc_rqst * rq_next; /* free list */
int rq_cong; /* has incremented xprt->cong */
int rq_received; /* receive completed */
struct list_head rq_list;
......@@ -98,9 +97,9 @@ struct rpc_rqst {
u32 rq_bytes_sent; /* Bytes we have sent */
#ifdef RPC_PROFILE
unsigned long rq_xtime; /* when transmitted */
#endif
long rq_xtime; /* when transmitted */
int rq_ntimeo;
int rq_nresend;
};
#define rq_svec rq_snd_buf.head
#define rq_slen rq_snd_buf.len
......@@ -122,9 +121,9 @@ struct rpc_xprt {
unsigned long cong; /* current congestion */
unsigned long cwnd; /* congestion window */
unsigned long congtime; /* hold cwnd until then */
struct rpc_wait_queue sending; /* requests waiting to send */
struct rpc_wait_queue resend; /* requests waiting to resend */
struct rpc_wait_queue pending; /* requests in flight */
struct rpc_wait_queue backlog; /* waiting for slot */
struct rpc_rqst * free; /* free slots */
......
......@@ -9,7 +9,8 @@ export-objs := sunrpc_syms.o
sunrpc-y := clnt.o xprt.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o \
pmap_clnt.o xdr.o sunrpc_syms.o
pmap_clnt.o timer.o xdr.o \
sunrpc_syms.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
sunrpc-objs := $(sunrpc-y)
......
......@@ -104,6 +104,8 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
if (!clnt->cl_port)
clnt->cl_autobind = 1;
rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval);
if (!rpcauth_create(flavor, clnt))
goto out_no_auth;
......@@ -669,7 +671,7 @@ call_timeout(struct rpc_task *task)
rpc_exit(task, -EIO);
return;
}
if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN) && rpc_ntimeo(&clnt->cl_rtt) > 7) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
if (req)
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
......
#include <linux/version.h>
#include <linux/types.h>
#include <linux/unistd.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/timer.h>
#define RPC_RTO_MAX (60*HZ)
#define RPC_RTO_INIT (HZ/5)
#define RPC_RTO_MIN (2)
void
rpc_init_rtt(struct rpc_rtt *rt, long timeo)
{
long t = (timeo - RPC_RTO_INIT) << 3;
int i;
rt->timeo = timeo;
if (t < 0)
t = 0;
for (i = 0; i < 5; i++) {
rt->srtt[i] = t;
rt->sdrtt[i] = RPC_RTO_INIT;
}
atomic_set(&rt->ntimeouts, 0);
}
void
rpc_update_rtt(struct rpc_rtt *rt, int timer, long m)
{
long *srtt, *sdrtt;
if (timer-- == 0)
return;
if (m == 0)
m = 1;
srtt = &rt->srtt[timer];
m -= *srtt >> 3;
*srtt += m;
if (m < 0)
m = -m;
sdrtt = &rt->sdrtt[timer];
m -= *sdrtt >> 2;
*sdrtt += m;
/* Set lower bound on the variance */
if (*sdrtt < RPC_RTO_MIN)
*sdrtt = RPC_RTO_MIN;
}
/*
* Estimate rto for an nfs rpc sent via. an unreliable datagram.
* Use the mean and mean deviation of rtt for the appropriate type of rpc
* for the frequent rpcs and a default for the others.
* The justification for doing "other" this way is that these rpcs
* happen so infrequently that timer est. would probably be stale.
* Also, since many of these rpcs are
* non-idempotent, a conservative timeout is desired.
* getattr, lookup,
* read, write, commit - A+4D
* other - timeo
*/
long
rpc_calc_rto(struct rpc_rtt *rt, int timer)
{
long res;
if (timer-- == 0)
return rt->timeo;
res = (rt->srtt[timer] >> 3) + rt->sdrtt[timer];
if (res > RPC_RTO_MAX)
res = RPC_RTO_MAX;
return res;
}
......@@ -242,11 +242,11 @@ void xdr_kunmap(struct xdr_buf *xdr, size_t base)
return;
if (base || xdr->page_base) {
pglen -= base;
base += xdr->page_base;
ppage += base >> PAGE_CACHE_SHIFT;
}
for (;;) {
flush_dcache_page(*ppage);
flush_page_to_ram(*ppage);
kunmap(*ppage);
if (pglen <= PAGE_CACHE_SIZE)
break;
......
......@@ -77,6 +77,8 @@
# define RPCDBG_FACILITY RPCDBG_XPRT
#endif
#define XPRT_MAX_BACKOFF (8)
/*
* Local functions
*/
......@@ -87,6 +89,7 @@ static void xprt_disconnect(struct rpc_xprt *);
static void xprt_reconn_status(struct rpc_task *task);
static struct socket *xprt_create_socket(int, struct rpc_timeout *);
static int xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#ifdef RPC_DEBUG_DATA
/*
......@@ -138,13 +141,18 @@ xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
int retval;
spin_lock_bh(&xprt->sock_lock);
if (!xprt->snd_task)
if (!xprt->snd_task) {
if (xprt->nocong || __xprt_get_cong(xprt, task))
xprt->snd_task = task;
else if (xprt->snd_task != task) {
}
if (xprt->snd_task != task) {
dprintk("RPC: %4d TCP write queue full (task %d)\n",
task->tk_pid, xprt->snd_task->tk_pid);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (task->tk_rqstp->rq_nresend)
rpc_sleep_on(&xprt->resend, task, NULL, NULL);
else
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
}
retval = xprt->snd_task == task;
......@@ -152,17 +160,41 @@ xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
return retval;
}
static void
__xprt_lock_write_next(struct rpc_xprt *xprt)
{
struct rpc_task *task;
if (xprt->snd_task)
return;
if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
return;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
task = rpc_wake_up_next(&xprt->sending);
if (!task)
return;
}
if (xprt->nocong || __xprt_get_cong(xprt, task))
xprt->snd_task = task;
}
/*
* Releases the socket for use by other requests.
*/
static void
__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task)
xprt->snd_task = NULL;
__xprt_lock_write_next(xprt);
}
static inline void
xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task == task) {
xprt->snd_task = NULL;
rpc_wake_up_next(&xprt->sending);
}
__xprt_release_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock);
}
......@@ -233,6 +265,40 @@ xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
return result;
}
/*
* Van Jacobson congestion avoidance. Check if the congestion window
* overflowed. Put the task to sleep if this is the case.
*/
static int
__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (req->rq_cong)
return 1;
dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
if (RPCXPRT_CONGESTED(xprt))
return 0;
req->rq_cong = 1;
xprt->cong += RPC_CWNDSCALE;
return 1;
}
/*
* Adjust the congestion window, and wake up the next task
* that has been sleeping due to congestion
*/
static void
__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
if (!req->rq_cong)
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
__xprt_lock_write_next(xprt);
}
/*
* Adjust RPC congestion window
* We use a time-smoothed congestion estimator to avoid heavy oscillation.
......@@ -242,40 +308,22 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
{
unsigned long cwnd;
if (xprt->nocong)
return;
/*
* Note: we're in a BH context
*/
spin_lock(&xprt->xprt_lock);
cwnd = xprt->cwnd;
if (result >= 0) {
if (xprt->cong < cwnd || time_before(jiffies, xprt->congtime))
goto out;
if (result >= 0 && xprt->cong <= cwnd) {
/* The (cwnd >> 1) term makes sure
* the result gets rounded properly. */
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
if (cwnd > RPC_MAXCWND)
cwnd = RPC_MAXCWND;
else
pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
xprt->congtime = jiffies + ((cwnd * HZ) << 2) / RPC_CWNDSCALE;
dprintk("RPC: cong %08lx, cwnd was %08lx, now %08lx, "
"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
(xprt->congtime-jiffies)*1000/HZ);
__xprt_lock_write_next(xprt);
} else if (result == -ETIMEDOUT) {
if ((cwnd >>= 1) < RPC_CWNDSCALE)
cwnd >>= 1;
if (cwnd < RPC_CWNDSCALE)
cwnd = RPC_CWNDSCALE;
xprt->congtime = jiffies + ((cwnd * HZ) << 3) / RPC_CWNDSCALE;
dprintk("RPC: cong %ld, cwnd was %ld, now %ld, "
"time %ld ms\n", xprt->cong, xprt->cwnd, cwnd,
(xprt->congtime-jiffies)*1000/HZ);
pprintk("RPC: %lu %ld cwnd\n", jiffies, cwnd);
}
dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
xprt->cong, xprt->cwnd, cwnd);
xprt->cwnd = cwnd;
out:
spin_unlock(&xprt->xprt_lock);
}
/*
......@@ -427,7 +475,7 @@ xprt_reconnect(struct rpc_task *task)
/* if the socket is already closing, delay 5 secs */
if ((1<<inet->state) & ~(TCP_SYN_SENT|TCP_SYN_RECV))
task->tk_timeout = 5*HZ;
rpc_sleep_on(&xprt->sending, task, xprt_reconn_status, NULL);
rpc_sleep_on(&xprt->pending, task, xprt_reconn_status, NULL);
release_sock(inet);
return;
}
......@@ -480,13 +528,23 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
* Complete reply received.
* The TCP code relies on us to remove the request from xprt->pending.
*/
static inline void
static void
xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
{
struct rpc_task *task = req->rq_task;
struct rpc_clnt *clnt = task->tk_client;
/* Adjust congestion window */
if (!xprt->nocong) {
xprt_adjust_cwnd(xprt, copied);
__xprt_put_cong(xprt, req);
if (!req->rq_nresend) {
int timer = rpcproc_timer(clnt, task->tk_msg.rpc_proc);
if (timer)
rpc_update_rtt(&clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime);
}
rpc_clear_timeo(&clnt->cl_rtt);
}
#ifdef RPC_PROFILE
/* Profile only reads for now */
......@@ -872,7 +930,7 @@ tcp_state_change(struct sock *sk)
xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
spin_lock(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending)
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
rpc_wake_up_task(xprt->snd_task);
spin_unlock(&xprt->sock_lock);
break;
......@@ -909,7 +967,7 @@ xprt_write_space(struct sock *sk)
if (!xprt_test_and_set_wspace(xprt)) {
spin_lock(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->sending)
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
rpc_wake_up_task(xprt->snd_task);
spin_unlock(&xprt->sock_lock);
}
......@@ -922,6 +980,21 @@ xprt_write_space(struct sock *sk)
}
}
/*
* Exponential backoff for UDP retries
*/
static inline int
xprt_expbackoff(struct rpc_task *task, struct rpc_rqst *req)
{
int backoff;
req->rq_ntimeo++;
backoff = min(rpc_ntimeo(&task->tk_client->cl_rtt), XPRT_MAX_BACKOFF);
if (req->rq_ntimeo < (1 << backoff))
return 1;
return 0;
}
/*
* RPC receive timeout handler.
*/
......@@ -934,7 +1007,17 @@ xprt_timer(struct rpc_task *task)
spin_lock(&xprt->sock_lock);
if (req->rq_received)
goto out;
xprt_adjust_cwnd(xprt, -ETIMEDOUT);
if (!xprt->nocong) {
if (xprt_expbackoff(task, req)) {
rpc_add_timer(task, xprt_timer);
goto out_unlock;
}
rpc_inc_timeo(&task->tk_client->cl_rtt);
xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
__xprt_put_cong(xprt, req);
}
req->rq_nresend++;
dprintk("RPC: %4d xprt_timer (%s request)\n",
task->tk_pid, req ? "pending" : "backlogged");
......@@ -943,6 +1026,7 @@ xprt_timer(struct rpc_task *task)
out:
task->tk_timeout = 0;
rpc_wake_up_task(task);
out_unlock:
spin_unlock(&xprt->sock_lock);
}
......@@ -982,15 +1066,13 @@ xprt_transmit(struct rpc_task *task)
if (!xprt_lock_write(xprt, task))
return;
#ifdef RPC_PROFILE
req->rq_xtime = jiffies;
#endif
do_xprt_transmit(task);
}
static void
do_xprt_transmit(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
int status, retry = 0;
......@@ -1002,6 +1084,7 @@ do_xprt_transmit(struct rpc_task *task)
*/
while (1) {
xprt_clear_wspace(xprt);
req->rq_xtime = jiffies;
status = xprt_sendmsg(xprt, req);
if (status < 0)
......@@ -1043,7 +1126,7 @@ do_xprt_transmit(struct rpc_task *task)
spin_lock_bh(&xprt->sock_lock);
if (!xprt_wspace(xprt)) {
task->tk_timeout = req->rq_timeout.to_current;
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
}
spin_unlock_bh(&xprt->sock_lock);
return;
......@@ -1059,19 +1142,29 @@ do_xprt_transmit(struct rpc_task *task)
if (xprt->stream)
xprt_disconnect(xprt);
req->rq_bytes_sent = 0;
goto out_release;
}
out_release:
spin_lock_bh(&xprt->sock_lock);
__xprt_release_write(xprt, task);
__xprt_put_cong(xprt, req);
spin_unlock_bh(&xprt->sock_lock);
return;
out_receive:
dprintk("RPC: %4d xmit complete\n", task->tk_pid);
/* Set the task's receive timeout value */
if (!xprt->nocong) {
task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
rpcproc_timer(clnt, task->tk_msg.rpc_proc));
req->rq_ntimeo = 0;
if (task->tk_timeout > req->rq_timeout.to_maxval)
task->tk_timeout = req->rq_timeout.to_maxval;
} else
task->tk_timeout = req->rq_timeout.to_current;
spin_lock_bh(&xprt->sock_lock);
if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
__xprt_release_write(xprt, task);
spin_unlock_bh(&xprt->sock_lock);
out_release:
xprt_release_write(xprt, task);
}
/*
......@@ -1086,9 +1179,7 @@ xprt_reserve(struct rpc_task *task)
if (task->tk_rqstp)
return 0;
dprintk("RPC: %4d xprt_reserve cong = %ld cwnd = %ld\n",
task->tk_pid, xprt->cong, xprt->cwnd);
spin_lock_bh(&xprt->xprt_lock);
spin_lock(&xprt->xprt_lock);
xprt_reserve_status(task);
if (task->tk_rqstp) {
task->tk_timeout = 0;
......@@ -1099,7 +1190,7 @@ xprt_reserve(struct rpc_task *task)
task->tk_status = -EAGAIN;
rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
}
spin_unlock_bh(&xprt->xprt_lock);
spin_unlock(&xprt->xprt_lock);
dprintk("RPC: %4d xprt_reserve returns %d\n",
task->tk_pid, task->tk_status);
return task->tk_status;
......@@ -1121,18 +1212,13 @@ xprt_reserve_status(struct rpc_task *task)
} else if (task->tk_rqstp) {
/* We've already been given a request slot: NOP */
} else {
if (RPCXPRT_CONGESTED(xprt) || !(req = xprt->free))
if (!(req = xprt->free))
goto out_nofree;
/* OK: There's room for us. Grab a free slot and bump
* congestion value */
/* OK: There's room for us. Grab a free slot */
xprt->free = req->rq_next;
req->rq_next = NULL;
xprt->cong += RPC_CWNDSCALE;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
if (xprt->free)
xprt_clear_backlog(xprt);
}
return;
......@@ -1176,14 +1262,11 @@ xprt_release(struct rpc_task *task)
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req;
if (xprt->snd_task == task) {
if (xprt->stream)
xprt_disconnect(xprt);
xprt_release_write(xprt, task);
}
if (!(req = task->tk_rqstp))
return;
spin_lock_bh(&xprt->sock_lock);
__xprt_release_write(xprt, task);
__xprt_put_cong(xprt, req);
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
spin_unlock_bh(&xprt->sock_lock);
......@@ -1192,15 +1275,12 @@ xprt_release(struct rpc_task *task)
dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
spin_lock_bh(&xprt->xprt_lock);
spin_lock(&xprt->xprt_lock);
req->rq_next = xprt->free;
xprt->free = req;
/* Decrease congestion value. */
xprt->cong -= RPC_CWNDSCALE;
xprt_clear_backlog(xprt);
spin_unlock_bh(&xprt->xprt_lock);
spin_unlock(&xprt->xprt_lock);
}
/*
......@@ -1256,7 +1336,6 @@ xprt_setup(struct socket *sock, int proto,
xprt->nocong = 1;
} else
xprt->cwnd = RPC_INITCWND;
xprt->congtime = jiffies;
spin_lock_init(&xprt->sock_lock);
spin_lock_init(&xprt->xprt_lock);
init_waitqueue_head(&xprt->cong_wait);
......@@ -1273,6 +1352,7 @@ xprt_setup(struct socket *sock, int proto,
INIT_RPC_WAITQ(&xprt->pending, "xprt_pending");
INIT_RPC_WAITQ(&xprt->sending, "xprt_sending");
INIT_RPC_WAITQ(&xprt->resend, "xprt_resend");
INIT_RPC_WAITQ(&xprt->backlog, "xprt_backlog");
/* initialize free list */
......@@ -1404,6 +1484,7 @@ xprt_shutdown(struct rpc_xprt *xprt)
{
xprt->shutdown = 1;
rpc_wake_up(&xprt->sending);
rpc_wake_up(&xprt->resend);
rpc_wake_up(&xprt->pending);
rpc_wake_up(&xprt->backlog);
if (waitqueue_active(&xprt->cong_wait))
......@@ -1415,8 +1496,6 @@ xprt_shutdown(struct rpc_xprt *xprt)
*/
int
xprt_clear_backlog(struct rpc_xprt *xprt) {
if (RPCXPRT_CONGESTED(xprt))
return 0;
rpc_wake_up_next(&xprt->backlog);
if (waitqueue_active(&xprt->cong_wait))
wake_up(&xprt->cong_wait);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment