Commit 63e9b66e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://linux-nfs.org/~bfields/linux

* 'for-linus' of git://linux-nfs.org/~bfields/linux: (100 commits)
  SUNRPC: RPC program information is stored in unsigned integers
  SUNRPC: Move exported symbol definitions after function declaration part 2
  NLM: tear down RPC clients in nlm_shutdown_hosts
  SUNRPC: spin svc_rqst initialization to its own function
  nfsd: more careful input validation in nfsctl write methods
  lockd: minor log message fix
  knfsd: don't bother mapping putrootfh enoent to eperm
  rdma: makefile
  rdma: ONCRPC RDMA protocol marshalling
  rdma: SVCRDMA sendto
  rdma: SVCRDMA recvfrom
  rdma: SVCRDMA Core Transport Services
  rdma: SVCRDMA Transport Module
  rdma: SVCRMDA Header File
  svc: Add svc_xprt_names service to replace svc_sock_names
  knfsd: Support adding transports by writing portlist file
  svc: Add svc API that queries for a transport instance
  svc: Add /proc/sys/sunrpc/transport files
  svc: Add transport hdr size for defer/revisit
  svc: Move the xprt independent code to the svc_xprt.c file
  ...
parents 687fcdf7 ea339d46
......@@ -10,42 +10,16 @@
#define SUNRPC_SVCSOCK_H
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
/*
* RPC server socket.
*/
struct svc_sock {
struct list_head sk_ready; /* list of ready sockets */
struct list_head sk_list; /* list of all sockets */
struct svc_xprt sk_xprt;
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
atomic_t sk_inuse; /* use count */
unsigned long sk_flags;
#define SK_BUSY 0 /* enqueued/receiving */
#define SK_CONN 1 /* conn pending */
#define SK_CLOSE 2 /* dead or dying */
#define SK_DATA 3 /* data pending */
#define SK_TEMP 4 /* temp (TCP) socket */
#define SK_DEAD 6 /* socket closed */
#define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */
#define SK_DEFERRED 8 /* request on sk_deferred */
#define SK_OLD 9 /* used for temp socket aging mark+sweep */
#define SK_DETACHED 10 /* detached from tempsocks list */
atomic_t sk_reserved; /* space on outq that is reserved */
spinlock_t sk_lock; /* protects sk_deferred and
* sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
int (*sk_recvfrom)(struct svc_rqst *rqstp);
int (*sk_sendto)(struct svc_rqst *rqstp);
/* We keep the old state_change and data_ready CB's here */
void (*sk_ostate)(struct sock *);
void (*sk_odata)(struct sock *, int bytes);
......@@ -54,21 +28,12 @@ struct svc_sock {
/* private TCP part */
int sk_reclen; /* length of record */
int sk_tcplen; /* current read length */
time_t sk_lastrecv; /* time of last received request */
/* cache of various info for TCP sockets */
void *sk_info_authunix;
struct sockaddr_storage sk_local; /* local address */
struct sockaddr_storage sk_remote; /* remote peer's address */
int sk_remotelen; /* length of address */
};
/*
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short, int flags);
void svc_force_close_socket(struct svc_sock *);
void svc_close_all(struct list_head *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
......@@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv,
int fd,
char *name_return,
int *proto);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
/*
* svc_makesock socket characteristics
......
......@@ -112,7 +112,8 @@ struct xdr_buf {
__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len);
__be32 *xdr_encode_string(__be32 *p, const char *s);
__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen);
__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp,
unsigned int maxlen);
__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *);
__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
......
......@@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o
sunrpc_syms.o cache.o rpc_pipe.o \
svc_xprt.o
sunrpc-$(CONFIG_PROC_FS) += stats.o
sunrpc-$(CONFIG_SYSCTL) += sysctl.o
......@@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd,
/* major/minor */
len = qword_get(&mesg, buf, mlen);
if (len < 0)
if (len <= 0)
goto out;
if (len == 0) {
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
} else {
rsii.major_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
goto out;
rsii.minor_status = simple_strtoul(buf, &ep, 10);
if (*ep)
goto out;
/* out_handle */
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_handle, buf, len))
goto out;
/* out_handle */
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_handle, buf, len))
goto out;
/* out_token */
len = qword_get(&mesg, buf, mlen);
status = -EINVAL;
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
}
/* out_token */
len = qword_get(&mesg, buf, mlen);
status = -EINVAL;
if (len < 0)
goto out;
status = -ENOMEM;
if (dup_to_netobj(&rsii.out_token, buf, len))
goto out;
rsii.h.expiry_time = expiry;
rsip = rsi_update(&rsii, rsip);
status = 0;
......@@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj tmpobj;
struct rsi *rsip, rsikey;
int ret;
/* Read the verifier; should be NULL: */
*authp = rpc_autherr_badverf;
......@@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
/* No upcall result: */
return SVC_DROP;
case 0:
ret = SVC_DROP;
/* Got an answer to the upcall; use it: */
if (gss_write_init_verf(rqstp, rsip))
return SVC_DROP;
goto out;
if (resv->iov_len + 4 > PAGE_SIZE)
return SVC_DROP;
goto out;
svc_putnl(resv, RPC_SUCCESS);
if (svc_safe_putnetobj(resv, &rsip->out_handle))
return SVC_DROP;
goto out;
if (resv->iov_len + 3 * 4 > PAGE_SIZE)
return SVC_DROP;
goto out;
svc_putnl(resv, rsip->major_status);
svc_putnl(resv, rsip->minor_status);
svc_putnl(resv, GSS_SEQ_WIN);
if (svc_safe_putnetobj(resv, &rsip->out_token))
return SVC_DROP;
goto out;
}
return SVC_COMPLETE;
ret = SVC_COMPLETE;
out:
cache_put(&rsip->h, &rsi_cache);
return ret;
}
/*
......@@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
rsci->h.expiry_time = get_seconds();
set_bit(CACHE_NEGATIVE, &rsci->h.flags);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
......@@ -1386,19 +1388,26 @@ int
gss_svc_init(void)
{
int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
if (rv == 0) {
cache_register(&rsc_cache);
cache_register(&rsi_cache);
}
if (rv)
return rv;
rv = cache_register(&rsc_cache);
if (rv)
goto out1;
rv = cache_register(&rsi_cache);
if (rv)
goto out2;
return 0;
out2:
cache_unregister(&rsc_cache);
out1:
svc_auth_unregister(RPC_AUTH_GSS);
return rv;
}
void
gss_svc_shutdown(void)
{
if (cache_unregister(&rsc_cache))
printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n");
if (cache_unregister(&rsi_cache))
printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n");
cache_unregister(&rsc_cache);
cache_unregister(&rsi_cache);
svc_auth_unregister(RPC_AUTH_GSS);
}
......@@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail,
cache_put(h, detail);
return rv;
}
EXPORT_SYMBOL(cache_check);
/*
* caches need to be periodically cleaned.
......@@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations;
static void do_cache_clean(struct work_struct *work);
static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
void cache_register(struct cache_detail *cd)
static void remove_cache_proc_entries(struct cache_detail *cd)
{
cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
if (cd->proc_ent) {
struct proc_dir_entry *p;
cd->proc_ent->owner = cd->owner;
cd->channel_ent = cd->content_ent = NULL;
if (cd->proc_ent == NULL)
return;
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->flush_ent = p;
if (p) {
p->proc_fops = &cache_flush_operations;
p->owner = cd->owner;
p->data = cd;
}
#ifdef CONFIG_PROC_FS
static int create_cache_proc_entries(struct cache_detail *cd)
{
struct proc_dir_entry *p;
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->channel_ent = p;
if (p) {
p->proc_fops = &cache_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p) {
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
}
cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
if (cd->proc_ent == NULL)
goto out_nomem;
cd->proc_ent->owner = cd->owner;
cd->channel_ent = cd->content_ent = NULL;
p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent);
cd->flush_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &cache_flush_operations;
p->owner = cd->owner;
p->data = cd;
if (cd->cache_request || cd->cache_parse) {
p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->channel_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &cache_file_operations;
p->owner = cd->owner;
p->data = cd;
}
if (cd->cache_show) {
p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
cd->proc_ent);
cd->content_ent = p;
if (p == NULL)
goto out_nomem;
p->proc_fops = &content_file_operations;
p->owner = cd->owner;
p->data = cd;
}
return 0;
out_nomem:
remove_cache_proc_entries(cd);
return -ENOMEM;
}
#else /* CONFIG_PROC_FS */
static int create_cache_proc_entries(struct cache_detail *cd)
{
return 0;
}
#endif
int cache_register(struct cache_detail *cd)
{
int ret;
ret = create_cache_proc_entries(cd);
if (ret)
return ret;
rwlock_init(&cd->hash_lock);
INIT_LIST_HEAD(&cd->queue);
spin_lock(&cache_list_lock);
......@@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd)
/* start the cleaning process */
schedule_delayed_work(&cache_cleaner, 0);
return 0;
}
EXPORT_SYMBOL(cache_register);
int cache_unregister(struct cache_detail *cd)
void cache_unregister(struct cache_detail *cd)
{
cache_purge(cd);
spin_lock(&cache_list_lock);
......@@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd)
if (cd->entries || atomic_read(&cd->inuse)) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
return -EBUSY;
goto out;
}
if (current_detail == cd)
current_detail = NULL;
list_del_init(&cd->others);
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
if (cd->proc_ent) {
if (cd->flush_ent)
remove_proc_entry("flush", cd->proc_ent);
if (cd->channel_ent)
remove_proc_entry("channel", cd->proc_ent);
if (cd->content_ent)
remove_proc_entry("content", cd->proc_ent);
cd->proc_ent = NULL;
remove_proc_entry(cd->name, proc_net_rpc);
}
remove_cache_proc_entries(cd);
if (list_empty(&cache_list)) {
/* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner);
}
return 0;
return;
out:
printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL(cache_unregister);
/* clean cache tries to find something to clean
* and cleans it.
......@@ -489,6 +519,7 @@ void cache_flush(void)
while (cache_clean() != -1)
cond_resched();
}
EXPORT_SYMBOL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
......@@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail)
cache_flush();
detail->flush_time = 1;
}
EXPORT_SYMBOL(cache_purge);
/*
......@@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner)
/*
* communicate with user-space
*
* We have a magic /proc file - /proc/sunrpc/cache
* On read, you get a full request, or block
* On write, an update request is processed
* Poll works if anything to read, and always allows write
* We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
* On read, you get a full request, or block.
* On write, an update request is processed.
* Poll works if anything to read, and always allows write.
*
* Implemented by linked list of requests. Each open file has
* a ->private that also exists in this list. New request are added
* a ->private that also exists in this list. New requests are added
* to the end and may wakeup and preceding readers.
* New readers are added to the head. If, on read, an item is found with
* CACHE_UPCALLING clear, we free it from the list.
......@@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str)
*bpp = bp;
*lp = len;
}
EXPORT_SYMBOL(qword_add);
void qword_addhex(char **bpp, int *lp, char *buf, int blen)
{
......@@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen)
*bpp = bp;
*lp = len;
}
EXPORT_SYMBOL(qword_addhex);
static void warn_no_listener(struct cache_detail *detail)
{
......@@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
*dest = '\0';
return len;
}
EXPORT_SYMBOL(qword_get);
/*
......@@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf,
struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
char tbuf[20];
unsigned long p = *ppos;
int len;
size_t len;
sprintf(tbuf, "%lu\n", cd->flush_time);
len = strlen(tbuf);
if (p >= len)
return 0;
len -= p;
if (len > count) len = count;
if (len > count)
len = count;
if (copy_to_user(buf, (void*)(tbuf+p), len))
len = -EFAULT;
else
*ppos += len;
return -EFAULT;
*ppos += len;
return len;
}
......
......@@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL;
static int rpc_proc_show(struct seq_file *seq, void *v) {
const struct rpc_stat *statp = seq->private;
const struct rpc_program *prog = statp->program;
int i, j;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
......@@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
const struct svc_program *prog = statp->program;
const struct svc_procedure *proc;
const struct svc_version *vers;
int i, j;
unsigned int i, j;
seq_printf(seq,
"net %u %u %u %u\n",
......@@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
seq_putc(seq, '\n');
}
}
EXPORT_SYMBOL(svc_seq_show);
/**
* rpc_alloc_iostats - allocate an rpc_iostats structure
......@@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
{
return do_register(statp->program->pg_name, statp, fops);
}
EXPORT_SYMBOL(svc_proc_register);
void
svc_proc_unregister(const char *name)
{
remove_proc_entry(name, proc_net_rpc);
}
EXPORT_SYMBOL(svc_proc_unregister);
void
rpc_proc_init(void)
......
......@@ -22,48 +22,6 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
/* RPC server stuff */
EXPORT_SYMBOL(svc_create);
EXPORT_SYMBOL(svc_create_thread);
EXPORT_SYMBOL(svc_create_pooled);
EXPORT_SYMBOL(svc_set_num_threads);
EXPORT_SYMBOL(svc_exit_thread);
EXPORT_SYMBOL(svc_destroy);
EXPORT_SYMBOL(svc_drop);
EXPORT_SYMBOL(svc_process);
EXPORT_SYMBOL(svc_recv);
EXPORT_SYMBOL(svc_wake_up);
EXPORT_SYMBOL(svc_makesock);
EXPORT_SYMBOL(svc_reserve);
EXPORT_SYMBOL(svc_auth_register);
EXPORT_SYMBOL(auth_domain_lookup);
EXPORT_SYMBOL(svc_authenticate);
EXPORT_SYMBOL(svc_set_client);
/* RPC statistics */
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(svc_proc_register);
EXPORT_SYMBOL(svc_proc_unregister);
EXPORT_SYMBOL(svc_seq_show);
#endif
/* caching... */
EXPORT_SYMBOL(auth_domain_find);
EXPORT_SYMBOL(auth_domain_put);
EXPORT_SYMBOL(auth_unix_add_addr);
EXPORT_SYMBOL(auth_unix_forget_old);
EXPORT_SYMBOL(auth_unix_lookup);
EXPORT_SYMBOL(cache_check);
EXPORT_SYMBOL(cache_flush);
EXPORT_SYMBOL(cache_purge);
EXPORT_SYMBOL(cache_register);
EXPORT_SYMBOL(cache_unregister);
EXPORT_SYMBOL(qword_add);
EXPORT_SYMBOL(qword_addhex);
EXPORT_SYMBOL(qword_get);
EXPORT_SYMBOL(svcauth_unix_purge);
EXPORT_SYMBOL(unix_domain_find);
extern struct cache_detail ip_map_cache, unix_gid_cache;
static int __init
......@@ -85,7 +43,8 @@ init_sunrpc(void)
#endif
cache_register(&ip_map_cache);
cache_register(&unix_gid_cache);
init_socket_xprt();
svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
rpcauth_init_module();
out:
return err;
......@@ -96,12 +55,11 @@ cleanup_sunrpc(void)
{
rpcauth_remove_module();
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
unregister_rpc_pipefs();
rpc_destroy_mempool();
if (cache_unregister(&ip_map_cache))
printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n");
if (cache_unregister(&unix_gid_cache))
printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n");
cache_unregister(&ip_map_cache);
cache_unregister(&unix_gid_cache);
#ifdef RPC_DEBUG
rpc_unregister_sysctl();
#endif
......
......@@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
int vers;
unsigned int vers;
unsigned int xdrsize;
unsigned int i;
......@@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize,
{
return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
......@@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
return serv;
}
EXPORT_SYMBOL(svc_create_pooled);
/*
* Destroy an RPC service. Should be called with the BKL held
......@@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
serv->sv_nrthreads);
......@@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
svc_force_close_socket(svsk);
svc_close_all(&serv->sv_tempsocks);
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
svc_force_close_socket(svsk);
svc_close_all(&serv->sv_permsocks);
BUG_ON(!list_empty(&serv->sv_permsocks));
BUG_ON(!list_empty(&serv->sv_tempsocks));
......@@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv)
kfree(serv->sv_pools);
kfree(serv);
}
EXPORT_SYMBOL(svc_destroy);
/*
* Allocate an RPC server's buffer space.
......@@ -536,31 +534,17 @@ svc_release_buffer(struct svc_rqst *rqstp)
put_page(rqstp->rq_pages[i]);
}
/*
* Create a thread in the given pool. Caller must hold BKL.
* On a NUMA or SMP machine, with a multi-pool serv, the thread
* will be restricted to run on the cpus belonging to the pool.
*/
static int
__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
struct svc_pool *pool)
struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
int have_oldmask = 0;
cpumask_t oldmask;
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
goto out;
goto out_enomem;
init_waitqueue_head(&rqstp->rq_wait);
if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
......@@ -569,6 +553,45 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_argp)
goto out_thread;
rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
if (!rqstp->rq_resp)
goto out_thread;
if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
goto out_thread;
return rqstp;
out_thread:
svc_exit_thread(rqstp);
out_enomem:
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(svc_prepare_thread);
/*
* Create a thread in the given pool. Caller must hold BKL.
* On a NUMA or SMP machine, with a multi-pool serv, the thread
* will be restricted to run on the cpus belonging to the pool.
*/
static int
__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
struct svc_pool *pool)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
int have_oldmask = 0;
cpumask_t oldmask;
rqstp = svc_prepare_thread(serv, pool);
if (IS_ERR(rqstp)) {
error = PTR_ERR(rqstp);
goto out;
}
if (serv->sv_nrpools > 1)
have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
......@@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
{
return __svc_create_thread(func, serv, &serv->sv_pools[0]);
}
EXPORT_SYMBOL(svc_create_thread);
/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
......@@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return error;
}
EXPORT_SYMBOL(svc_set_num_threads);
/*
* Called from a server thread as it's exiting. Caller must hold BKL.
......@@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
if (serv)
svc_destroy(serv);
}
EXPORT_SYMBOL(svc_exit_thread);
/*
* Register an RPC service with the local portmapper.
......@@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
{
struct svc_program *progp;
unsigned long flags;
int i, error = 0, dummy;
unsigned int i;
int error = 0, dummy;
if (!port)
clear_thread_flag(TIF_SIGPENDING);
......@@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putnl(resv, 0);
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
......@@ -1049,16 +1076,15 @@ err_bad:
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
EXPORT_SYMBOL(svc_process);
/*
* Return (transport-specific) limit on the rpc payload.
*/
u32 svc_max_payload(const struct svc_rqst *rqstp)
{
int max = RPCSVC_MAXPAYLOAD_TCP;
u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
max = RPCSVC_MAXPAYLOAD_UDP;
if (rqstp->rq_server->sv_max_payload < max)
max = rqstp->rq_server->sv_max_payload;
return max;
......
This diff is collapsed.
......@@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
EXPORT_SYMBOL(svc_authenticate);
int svc_set_client(struct svc_rqst *rqstp)
{
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL(svc_set_client);
/* A request, which was authenticated, has now executed.
* Time to finalise the credentials and verifier
......@@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
spin_unlock(&authtab_lock);
return rv;
}
EXPORT_SYMBOL(svc_auth_register);
void
svc_auth_unregister(rpc_authflavor_t flavor)
......@@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom)
spin_unlock(&auth_domain_lock);
}
}
EXPORT_SYMBOL(auth_domain_put);
struct auth_domain *
auth_domain_lookup(char *name, struct auth_domain *new)
......@@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new)
spin_unlock(&auth_domain_lock);
return new;
}
EXPORT_SYMBOL(auth_domain_lookup);
struct auth_domain *auth_domain_find(char *name)
{
return auth_domain_lookup(name, NULL);
}
EXPORT_SYMBOL(auth_domain_find);
......@@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name)
rv = auth_domain_lookup(name, &new->h);
}
}
EXPORT_SYMBOL(unix_domain_find);
static void svcauth_unix_domain_release(struct auth_domain *dom)
{
......@@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
else
return -ENOMEM;
}
EXPORT_SYMBOL(auth_unix_add_addr);
int auth_unix_forget_old(struct auth_domain *dom)
{
......@@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
udom->addr_changes++;
return 0;
}
EXPORT_SYMBOL(auth_unix_forget_old);
struct auth_domain *auth_unix_lookup(struct in_addr addr)
{
......@@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr)
cache_put(&ipm->h, &ip_map_cache);
return rv;
}
EXPORT_SYMBOL(auth_unix_lookup);
void svcauth_unix_purge(void)
{
cache_purge(&ip_map_cache);
}
EXPORT_SYMBOL(svcauth_unix_purge);
static inline struct ip_map *
ip_map_cached_get(struct svc_rqst *rqstp)
{
struct ip_map *ipm;
struct svc_sock *svsk = rqstp->rq_sock;
spin_lock(&svsk->sk_lock);
ipm = svsk->sk_info_authunix;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
svsk->sk_info_authunix = NULL;
spin_unlock(&svsk->sk_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
struct ip_map *ipm = NULL;
struct svc_xprt *xprt = rqstp->rq_xprt;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
ipm = xprt->xpt_auth_cache;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
xprt->xpt_auth_cache = NULL;
spin_unlock(&xprt->xpt_lock);
cache_put(&ipm->h, &ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
}
cache_get(&ipm->h);
spin_unlock(&xprt->xpt_lock);
}
spin_unlock(&svsk->sk_lock);
return ipm;
}
static inline void
ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
{
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_xprt *xprt = rqstp->rq_xprt;
spin_lock(&svsk->sk_lock);
if (svsk->sk_sock->type == SOCK_STREAM &&
svsk->sk_info_authunix == NULL) {
/* newly cached, keep the reference */
svsk->sk_info_authunix = ipm;
ipm = NULL;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
if (xprt->xpt_auth_cache == NULL) {
/* newly cached, keep the reference */
xprt->xpt_auth_cache = ipm;
ipm = NULL;
}
spin_unlock(&xprt->xpt_lock);
}
spin_unlock(&svsk->sk_lock);
if (ipm)
cache_put(&ipm->h, &ip_map_cache);
}
......
This diff is collapsed.
......@@ -18,6 +18,7 @@
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
/*
* Declare the debug flags here
......@@ -55,6 +56,30 @@ rpc_unregister_sysctl(void)
}
}
static int proc_do_xprt(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
int len;
if ((*ppos && !write) || !*lenp) {
*lenp = 0;
return 0;
}
if (write)
return -EINVAL;
else {
len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
if (!access_ok(VERIFY_WRITE, buffer, len))
return -EFAULT;
if (__copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
}
*lenp -= len;
*ppos += len;
return 0;
}
static int
proc_dodebug(ctl_table *table, int write, struct file *file,
void __user *buffer, size_t *lenp, loff_t *ppos)
......@@ -147,6 +172,12 @@ static ctl_table debug_table[] = {
.mode = 0644,
.proc_handler = &proc_dodebug
},
{
.procname = "transports",
.maxlen = 256,
.mode = 0444,
.proc_handler = &proc_do_xprt,
},
{ .ctl_name = 0 }
};
......
......@@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string)
EXPORT_SYMBOL(xdr_encode_string);
__be32 *
xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
xdr_decode_string_inplace(__be32 *p, char **sp,
unsigned int *lenp, unsigned int maxlen)
{
unsigned int len;
u32 len;
if ((len = ntohl(*p++)) > maxlen)
len = ntohl(*p++);
if (len > maxlen)
return NULL;
*lenp = len;
*sp = (char *) p;
......
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
static unsigned int min_max_inline = 4096;
static unsigned int max_max_inline = 65536;
atomic_t rdma_stat_recv;
atomic_t rdma_stat_read;
atomic_t rdma_stat_write;
atomic_t rdma_stat_sq_starve;
atomic_t rdma_stat_rq_starve;
atomic_t rdma_stat_rq_poll;
atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
* resets the associated statistic to zero. Any read returns it's
* current value.
*/
static int read_reset_stat(ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos)
{
atomic_t *stat = (atomic_t *)table->data;
if (!stat)
return -EINVAL;
if (write)
atomic_set(stat, 0);
else {
char str_buf[32];
char *data;
int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
if (len >= 32)
return -EFAULT;
len = strlen(str_buf);
if (*ppos > len) {
*lenp = 0;
return 0;
}
data = &str_buf[*ppos];
len -= *ppos;
if (len > *lenp)
len = *lenp;
if (len && copy_to_user(buffer, str_buf, len))
return -EFAULT;
*lenp = len;
*ppos += len;
}
return 0;
}
static struct ctl_table_header *svcrdma_table_header;
static ctl_table svcrdma_parm_table[] = {
{
.procname = "max_requests",
.data = &svcrdma_max_requests,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_requests,
.extra2 = &max_max_requests
},
{
.procname = "max_req_size",
.data = &svcrdma_max_req_size,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_max_inline,
.extra2 = &max_max_inline
},
{
.procname = "max_outbound_read_requests",
.data = &svcrdma_ord,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &min_ord,
.extra2 = &max_ord,
},
{
.procname = "rdma_stat_read",
.data = &rdma_stat_read,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_recv",
.data = &rdma_stat_recv,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_write",
.data = &rdma_stat_write,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_starve",
.data = &rdma_stat_sq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_starve",
.data = &rdma_stat_rq_starve,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_poll",
.data = &rdma_stat_rq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_rq_prod",
.data = &rdma_stat_rq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_poll",
.data = &rdma_stat_sq_poll,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.procname = "rdma_stat_sq_prod",
.data = &rdma_stat_sq_prod,
.maxlen = sizeof(atomic_t),
.mode = 0644,
.proc_handler = &read_reset_stat,
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_table[] = {
{
.procname = "svc_rdma",
.mode = 0555,
.child = svcrdma_parm_table
},
{
.ctl_name = 0,
},
};
static ctl_table svcrdma_root_table[] = {
{
.ctl_name = CTL_SUNRPC,
.procname = "sunrpc",
.mode = 0555,
.child = svcrdma_table
},
{
.ctl_name = 0,
},
};
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
svc_unreg_xprt_class(&svc_rdma_class);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
dprintk("\tsq_depth : %d\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
return 0;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
module_init(svc_rdma_init);
module_exit(svc_rdma_cleanup);
/*
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the BSD-type
* license below:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of the Network Appliance, Inc. nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/debug.h>
#include <asm/unaligned.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : u32 offset into XDR stream
* handle : u32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static u32 *decode_read_list(u32 *va, u32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
while (ch->rc_discrim != xdr_zero) {
u64 ch_offset;
if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
(unsigned long)vaend) {
dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
return NULL;
}
ch->rc_discrim = ntohl(ch->rc_discrim);
ch->rc_position = ntohl(ch->rc_position);
ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle);
ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length);
va = (u32 *)&ch->rc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
ch++;
}
return (u32 *)&ch->rc_position;
}
/*
* Determine number of chunks and total bytes in chunk list. The chunk
* list has already been verified to fit within the RPCRDMA header.
*/
void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
int *ch_count, int *byte_count)
{
/* compute the number of bytes represented by read chunks */
*byte_count = 0;
*ch_count = 0;
for (; ch->rc_discrim != 0; ch++) {
*byte_count = *byte_count + ch->rc_target.rs_length;
*ch_count = *ch_count + 1;
}
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : u32 RKEY ---+
* length : u32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length;
}
static u32 *decode_reply_array(u32 *va, u32 *vaend)
{
int ch_no;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
ary->wc_discrim = ntohl(ary->wc_discrim);
ary->wc_nchunks = ntohl(ary->wc_nchunks);
if (((unsigned long)&ary->wc_array[0] +
(sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, ary->wc_nchunks, vaend);
return NULL;
}
for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) {
u64 ch_offset;
ary->wc_array[ch_no].wc_target.rs_handle =
ntohl(ary->wc_array[ch_no].wc_target.rs_handle);
ary->wc_array[ch_no].wc_target.rs_length =
ntohl(ary->wc_array[ch_no].wc_target.rs_length);
va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset;
xdr_decode_hyper(va, &ch_offset);
put_unaligned(ch_offset, (u64 *)va);
}
return (u32 *)&ary->wc_array[ch_no];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
u32 *va;
u32 *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Verify that there's enough bytes for header + something */
if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
dprintk("svcrdma: header too short = %d\n",
rqstp->rq_arg.len);
return -EINVAL;
}
/* Decode the header */
rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
rmsgp->rm_type = ntohl(rmsgp->rm_type);
if (rmsgp->rm_vers != RPCRDMA_VERSION)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
int hdrlen;
rmsgp->rm_body.rm_padded.rm_align =
ntohl(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
if (hdrlen > rqstp->rq_arg.len)
return -EINVAL;
return hdrlen;
}
/* The chunk list may contain either a read chunk list or a write
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_write_list(va, vaend);
if (!va)
return -EINVAL;
va = decode_reply_array(va, vaend);
if (!va)
return -EINVAL;
rqstp->rq_arg.head[0].iov_base = va;
hdr_len = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdr_len;
*rdma_req = rmsgp;
return hdr_len;
}
int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
struct rpcrdma_read_chunk *ch;
struct rpcrdma_write_array *ary;
u32 *va;
u32 hdrlen;
dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
rqstp);
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
/*
* Skip all chunks to find RPC msg. These were previously processed
*/
va = &rmsgp->rm_body.rm_chunks[0];
/* Skip read-list */
for (ch = (struct rpcrdma_read_chunk *)va;
ch->rc_discrim != xdr_zero; ch++);
va = (u32 *)&ch->rc_position;
/* Skip write-list */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
/* Skip reply-array */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
va = (u32 *)&ary->wc_array[ary->wc_nchunks];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err, u32 *va)
{
u32 *startp = va;
*va++ = htonl(rmsgp->rm_xid);
*va++ = htonl(rmsgp->rm_vers);
*va++ = htonl(xprt->sc_max_requests);
*va++ = htonl(RDMA_ERROR);
*va++ = htonl(err);
if (err == ERR_VERS) {
*va++ = htonl(RPCRDMA_VERSION);
*va++ = htonl(RPCRDMA_VERSION);
}
return (int)((unsigned long)va - (unsigned long)startp);
}
int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
{
struct rpcrdma_write_array *wr_ary;
/* There is no read-list in a reply */
/* skip write list */
wr_ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
return (unsigned long) wr_ary - (unsigned long) rmsgp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
{
struct rpcrdma_write_array *ary;
/* no read-list */
rmsgp->rm_body.rm_chunks[0] = xdr_zero;
/* write-array discrim */
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
/* reply-array discriminator */
ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
}
void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
int chunk_no,
u32 rs_handle, u64 rs_offset,
u32 write_len)
{
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = htonl(rs_handle);
seg->rs_length = htonl(write_len);
xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_argp,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
rdma_resp->rm_type = htonl(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment