summary refs log tree commit diff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c166
-rw-r--r--net/9p/protocol.c44
-rw-r--r--net/9p/trans_common.c97
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_virtio.c129
6 files changed, 419 insertions, 50 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a6..a0874cc1f718 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
 	util.o \
 	protocol.o \
 	trans_fd.o \
+	trans_common.o \
 
 9pnet_virtio-objs := \
 	trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbff..347ec0cd2718 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 			return ERR_PTR(-ENOMEM);
 		}
 		init_waitqueue_head(req->wq);
-		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-								GFP_KERNEL);
-		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-								GFP_KERNEL);
+		if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+				P9_TRANS_PREF_PAYLOAD_SEP) {
+			int alloc_msize = min(c->msize, 4096);
+			req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+					GFP_KERNEL);
+			req->tc->capacity = alloc_msize;
+			req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+					GFP_KERNEL);
+			req->rc->capacity = alloc_msize;
+		} else {
+			req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+					GFP_KERNEL);
+			req->tc->capacity = c->msize;
+			req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+					GFP_KERNEL);
+			req->rc->capacity = c->msize;
+		}
 		if ((!req->tc) || (!req->rc)) {
 			printk(KERN_ERR "Couldn't grow tag array\n");
 			kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 			return ERR_PTR(-ENOMEM);
 		}
 		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
-		req->tc->capacity = c->msize;
 		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
-		req->rc->capacity = c->msize;
 	}
 
 	p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 {
 	int8_t type;
 	int err;
+	int ecode;
 
 	err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
 	if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		return err;
 	}
 
-	if (type == P9_RERROR || type == P9_RLERROR) {
-		int ecode;
-
-		if (!p9_is_proto_dotl(c)) {
-			char *ename;
+	if (type != P9_RERROR && type != P9_RLERROR)
+		return 0;
 
-			err = p9pdu_readf(req->rc, c->proto_version, "s?d",
-								&ename, &ecode);
-			if (err)
-				goto out_err;
+	if (!p9_is_proto_dotl(c)) {
+		char *ename;
+
+		if (req->tc->pbuf_size) {
+			/* Handle user buffers */
+			size_t len = req->rc->size - req->rc->offset;
+			if (req->tc->pubuf) {
+				/* User Buffer */
+				err = copy_from_user(
+					&req->rc->sdata[req->rc->offset],
+					req->tc->pubuf, len);
+				if (err) {
+					err = -EFAULT;
+					goto out_err;
+				}
+			} else {
+				/* Kernel Buffer */
+				memmove(&req->rc->sdata[req->rc->offset],
+						req->tc->pkbuf, len);
+			}
+		}
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+				&ename, &ecode);
+		if (err)
+			goto out_err;
 
-			if (p9_is_proto_dotu(c))
-				err = -ecode;
+		if (p9_is_proto_dotu(c))
+			err = -ecode;
 
-			if (!err || !IS_ERR_VALUE(err)) {
-				err = p9_errstr2errno(ename, strlen(ename));
+		if (!err || !IS_ERR_VALUE(err)) {
+			err = p9_errstr2errno(ename, strlen(ename));
 
-				P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+			P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
+					ename);
 
-				kfree(ename);
-			}
-		} else {
-			err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
-			err = -ecode;
-
-			P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+			kfree(ename);
 		}
+	} else {
+		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+		err = -ecode;
+
+		P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+	}
 
-	} else
-		err = 0;
 
 	return err;
 
@@ -1191,6 +1220,27 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fsync);
 
+int p9_client_sync_fs(struct p9_fid *fid)
+{
+	int err = 0;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
+
+	clnt = fid->clnt;
+	req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_sync_fs);
+
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 	if (count < rsize)
 		rsize = count;
 
-	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+	/* Don't bother zerocopy form small IO (< 1024) */
+	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+			P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+		req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
+				rsize, data, udata);
+	} else {
+		req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
+				rsize);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
 
-	if (data) {
-		memmove(data, dataptr, count);
-	} else {
-		err = copy_to_user(udata, dataptr, count);
-		if (err) {
-			err = -EFAULT;
-			goto free_and_error;
+	if (!req->tc->pbuf_size) {
+		if (data) {
+			memmove(data, dataptr, count);
+		} else {
+			err = copy_to_user(udata, dataptr, count);
+			if (err) {
+				err = -EFAULT;
+				goto free_and_error;
+			}
 		}
 	}
 	p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 
 	if (count < rsize)
 		rsize = count;
-	if (data)
-		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
-								rsize, data);
-	else
-		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
-								rsize, udata);
+
+	/* Don't bother zerocopy form small IO (< 1024) */
+	if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+				P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+		req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
+				rsize, data, udata);
+	} else {
+
+		if (data)
+			req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
+					offset, rsize, data);
+		else
+			req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
+					offset, rsize, udata);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 	if (count < rsize)
 		rsize = count;
 
-	req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+	if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+			P9_TRANS_PREF_PAYLOAD_SEP) {
+		req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
+				offset, rsize, data);
+	} else {
+		req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
+				offset, rsize);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
 
-	if (data)
+	if (!req->tc->pbuf_size && data)
 		memmove(data, dataptr, count);
 
 	p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f210928..2ce515b859b3 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 	return size - len;
 }
 
+static size_t
+pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
+		size_t size)
+{
+	BUG_ON(pdu->size > P9_IOHDRSZ);
+	pdu->pubuf = (char __user *)udata;
+	pdu->pkbuf = (char *)kdata;
+	pdu->pbuf_size = size;
+	return 0;
+}
+
+static size_t
+pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
+{
+	BUG_ON(pdu->size > P9_READDIRHDRSZ);
+	pdu->pkbuf = (char *)kdata;
+	pdu->pbuf_size = size;
+	return 0;
+}
+
 /*
 	b - int8_t
 	w - int16_t
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					errcode = -EFAULT;
 			}
 			break;
+		case 'E':{
+				 int32_t cnt = va_arg(ap, int32_t);
+				 const char *k = va_arg(ap, const void *);
+				 const char *u = va_arg(ap, const void *);
+				 errcode = p9pdu_writef(pdu, proto_version, "d",
+						 cnt);
+				 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
+					errcode = -EFAULT;
+			 }
+			 break;
+		case 'F':{
+				 int32_t cnt = va_arg(ap, int32_t);
+				 const char *k = va_arg(ap, const void *);
+				 errcode = p9pdu_writef(pdu, proto_version, "d",
+						 cnt);
+				 if (!errcode && pdu_write_readdir(pdu, k, cnt))
+					errcode = -EFAULT;
+			 }
+			 break;
 		case 'U':{
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
 
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
 {
+	pdu->id = type;
 	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
 }
 
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
 {
 	pdu->offset = 0;
 	pdu->size = 0;
+	pdu->private = NULL;
+	pdu->pubuf = NULL;
+	pdu->pkbuf = NULL;
+	pdu->pbuf_size = 0;
 }
 
 int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 000000000000..d62b9aa58df8
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/scatterlist.h>
+#include "trans_common.h"
+
+/**
+ *  p9_release_req_pages - Release pages after the transaction.
+ *  @*private: PDU's private page of struct trans_rpage_info
+ */
+void
+p9_release_req_pages(struct trans_rpage_info *rpinfo)
+{
+	int i = 0;
+
+	while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
+		put_page(rpinfo->rp_data[i]);
+		i++;
+	}
+}
+EXPORT_SYMBOL(p9_release_req_pages);
+
+/**
+ * p9_nr_pages - Return number of pages needed to accomodate the payload.
+ */
+int
+p9_nr_pages(struct p9_req_t *req)
+{
+	int start_page, end_page;
+	start_page =  (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
+	end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+			PAGE_SIZE - 1) >> PAGE_SHIFT;
+	return end_page - start_page;
+}
+EXPORT_SYMBOL(p9_nr_pages);
+
+/**
+ * payload_gup - Translates user buffer into kernel pages and
+ * pins them either for read/write through get_user_pages_fast().
+ * @req: Request to be sent to server.
+ * @pdata_off: data offset into the first page after translation (gup).
+ * @pdata_len: Total length of the IO. gup may not return requested # of pages.
+ * @nr_pages: number of pages to accomodate the payload
+ * @rw: Indicates if the pages are for read or write.
+ */
+int
+p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
+		int nr_pages, u8 rw)
+{
+	uint32_t first_page_bytes = 0;
+	uint32_t pdata_mapped_pages;
+	struct trans_rpage_info  *rpinfo;
+
+	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+
+	if (*pdata_off)
+		first_page_bytes = min((PAGE_SIZE - *pdata_off),
+				req->tc->pbuf_size);
+
+	rpinfo = req->tc->private;
+	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
+			nr_pages, rw, &rpinfo->rp_data[0]);
+
+	if (pdata_mapped_pages < 0) {
+		printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
+				"nr_pages:%d\n", pdata_mapped_pages,
+				req->tc->pubuf, nr_pages);
+		pdata_mapped_pages = 0;
+		return -EIO;
+	}
+	rpinfo->rp_nr_pages = pdata_mapped_pages;
+	if (*pdata_off) {
+		*pdata_len = first_page_bytes;
+		*pdata_len += min((req->tc->pbuf_size - *pdata_len),
+				((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
+	} else {
+		*pdata_len = min(req->tc->pbuf_size,
+				(size_t)pdata_mapped_pages << PAGE_SHIFT);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 000000000000..76309223bb02
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/* TRUE if it is user context */
+#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
+
+/**
+ * struct trans_rpage_info - To store mapped page information in PDU.
+ * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
+ * @rp_nr_pages: Number of mapped pages
+ * @rp_data: Array of page pointers
+ */
+struct trans_rpage_info {
+	u8 rp_alloc;
+	int rp_nr_pages;
+	struct page *rp_data[0];
+};
+
+void p9_release_req_pages(struct trans_rpage_info *);
+int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
+int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20e..9b550ed9c711 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -45,6 +45,7 @@
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
 #include <linux/virtio_9p.h>
+#include "trans_common.h"
 
 #define VIRTQUEUE_NUM	128
 
@@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq)
 					rc->tag);
 			req = p9_tag_lookup(chan->client, rc->tag);
 			req->status = REQ_STATUS_RCVD;
+			if (req->tc->private) {
+				struct trans_rpage_info *rp = req->tc->private;
+				/*Release pages */
+				p9_release_req_pages(rp);
+				if (rp->rp_alloc)
+					kfree(rp);
+				req->tc->private = NULL;
+			}
 			p9_client_cb(chan->client, req);
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
@@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
 }
 
 /**
+ * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
+ * this takes a list of pages.
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @pdata_off: Offset into the first page
+ * @**pdata: a list of pages to add into sg.
+ * @count: amount of data to pack into the scatter/gather list
+ */
+static int
+pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
+		struct page **pdata, int count)
+{
+	int s;
+	int i = 0;
+	int index = start;
+
+	if (pdata_off) {
+		s = min((int)(PAGE_SIZE - pdata_off), count);
+		sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
+		count -= s;
+	}
+
+	while (count) {
+		BUG_ON(index > limit);
+		s = min((int)PAGE_SIZE, count);
+		sg_set_page(&sg[index++], pdata[i++], s, 0);
+		count -= s;
+	}
+	return index-start;
+}
+
+/**
  * p9_virtio_request - issue a request
  * @client: client instance issuing the request
  * @req: request to be issued
@@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
 static int
 p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 {
-	int in, out;
+	int in, out, inp, outp;
 	struct virtio_chan *chan = client->trans;
 	char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
 	unsigned long flags;
-	int err;
+	size_t pdata_off = 0;
+	struct trans_rpage_info *rpinfo = NULL;
+	int err, pdata_len = 0;
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
 req_retry:
 	req->status = REQ_STATUS_SENT;
 
+	if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
+		int nr_pages = p9_nr_pages(req);
+		int rpinfo_size = sizeof(struct trans_rpage_info) +
+			sizeof(struct page *) * nr_pages;
+
+		if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
+			/* We can use sdata */
+			req->tc->private = req->tc->sdata + req->tc->size;
+			rpinfo = (struct trans_rpage_info *)req->tc->private;
+			rpinfo->rp_alloc = 0;
+		} else {
+			req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
+			if (!req->tc->private) {
+				P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
+					"private kmalloc returned NULL");
+				return -ENOMEM;
+			}
+			rpinfo = (struct trans_rpage_info *)req->tc->private;
+			rpinfo->rp_alloc = 1;
+		}
+
+		err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
+				req->tc->id == P9_TREAD ? 1 : 0);
+		if (err < 0) {
+			if (rpinfo->rp_alloc)
+				kfree(rpinfo);
+			return err;
+		}
+	}
+
 	spin_lock_irqsave(&chan->lock, flags);
+
+	/* Handle out VirtIO ring buffers */
 	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
-								req->tc->size);
-	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
-								client->msize);
+			req->tc->size);
+
+	if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
+		/* We have additional write payload buffer to take care */
+		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+			outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+					pdata_off, rpinfo->rp_data, pdata_len);
+		} else {
+			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+								req->tc->pkbuf;
+			outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
+					req->tc->pbuf_size);
+		}
+		out += outp;
+	}
+
+	/* Handle in VirtIO ring buffers */
+	if (req->tc->pbuf_size &&
+		((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
+		/*
+		 * Take care of additional Read payload.
+		 * 11 is the read/write header = PDU Header(7) + IO Size (4).
+		 * Arrange in such a way that server places header in the
+		 * alloced memory and payload onto the user buffer.
+		 */
+		inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+		/*
+		 * Running executables in the filesystem may result in
+		 * a read request with kernel buffer as opposed to user buffer.
+		 */
+		if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+			in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
+					pdata_off, rpinfo->rp_data, pdata_len);
+		} else {
+			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+								req->tc->pkbuf;
+			in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
+					pbuf, req->tc->pbuf_size);
+		}
+		in += inp;
+	} else {
+		in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
+				client->msize);
+	}
 
 	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
 	if (err < 0) {
@@ -246,6 +362,8 @@ req_retry:
 			P9_DPRINTK(P9_DEBUG_TRANS,
 					"9p debug: "
 					"virtio rpc add_buf returned failure");
+			if (rpinfo && rpinfo->rp_alloc)
+				kfree(rpinfo);
 			return -EIO;
 		}
 	}
@@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.request = p9_virtio_request,
 	.cancel = p9_virtio_cancel,
 	.maxsize = PAGE_SIZE*16,
+	.pref = P9_TRANS_PREF_PAYLOAD_SEP,
 	.def = 0,
 	.owner = THIS_MODULE,
 };