summary refs log tree commit diff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/agent.c19
-rw-r--r--drivers/infiniband/core/cm.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c6
-rw-r--r--drivers/infiniband/core/mad.c195
-rw-r--r--drivers/infiniband/core/mad_priv.h16
-rw-r--r--drivers/infiniband/core/mad_rmpp.c148
-rw-r--r--drivers/infiniband/core/smi.h9
-rw-r--r--drivers/infiniband/core/sysfs.c36
-rw-r--r--drivers/infiniband/core/user_mad.c225
-rw-r--r--drivers/infiniband/core/uverbs.h5
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c202
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/core/verbs.c259
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c161
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c29
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c170
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h53
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c448
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c31
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c25
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c85
38 files changed, 1959 insertions, 801 deletions
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 34b724afd28d..ecd1a3057c61 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -78,25 +78,6 @@ ib_get_agent_port(struct ib_device *device, int port_num)
 	return entry;
 }
 
-int smi_check_local_dr_smp(struct ib_smp *smp,
-			   struct ib_device *device,
-			   int port_num)
-{
-	struct ib_agent_port_private *port_priv;
-
-	if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-		return 1;
-
-	port_priv = ib_get_agent_port(device, port_num);
-	if (!port_priv) {
-		printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
-		       "not open\n", device->name, port_num);
-		return 1;
-	}
-
-	return smi_check_local_smp(port_priv->agent[0], smp);
-}
-
 int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 			struct ib_wc *wc, struct ib_device *device,
 			int port_num, int qpn)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2514de3480d8..7cfedb8d9bcd 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -121,7 +121,7 @@ struct cm_id_private {
 
 	struct rb_node service_node;
 	struct rb_node sidr_id_node;
-	spinlock_t lock;
+	spinlock_t lock;	/* Do not acquire inside cm.lock */
 	wait_queue_head_t wait;
 	atomic_t refcount;
 
@@ -1547,40 +1547,46 @@ static int cm_rep_handler(struct cm_work *work)
 		return -EINVAL;
 	}
 
+	cm_format_rep_event(work);
+
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	switch (cm_id_priv->id.state) {
+	case IB_CM_REQ_SENT:
+	case IB_CM_MRA_REQ_RCVD:
+		break;
+	default:
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ret = -EINVAL;
+		goto error;
+	}
+
 	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
 	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
 
-	spin_lock_irqsave(&cm.lock, flags);
+	spin_lock(&cm.lock);
 	/* Check for duplicate REP. */
 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock(&cm.lock);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		ret = -EINVAL;
 		goto error;
 	}
 	/* Check for a stale connection. */
 	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
-		spin_unlock_irqrestore(&cm.lock, flags);
+		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
+			 &cm.remote_id_table);
+		cm_id_priv->timewait_info->inserted_remote_id = 0;
+		spin_unlock(&cm.lock);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     NULL, 0);
 		ret = -EINVAL;
 		goto error;
 	}
-	spin_unlock_irqrestore(&cm.lock, flags);
-
-	cm_format_rep_event(work);
+	spin_unlock(&cm.lock);
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	switch (cm_id_priv->id.state) {
-	case IB_CM_REQ_SENT:
-	case IB_CM_MRA_REQ_RCVD:
-		break;
-	default:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		ret = -EINVAL;
-		goto error;
-	}
 	cm_id_priv->id.state = IB_CM_REP_RCVD;
 	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
 	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
@@ -1603,7 +1609,7 @@ static int cm_rep_handler(struct cm_work *work)
 		cm_deref_id(cm_id_priv);
 	return 0;
 
-error:	cm_cleanup_timewait(cm_id_priv->timewait_info);
+error:
 	cm_deref_id(cm_id_priv);
 	return ret;
 }
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index d34a6f1c4f4c..838bf54458d2 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -278,9 +278,9 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 	{
 		struct ib_pool_fmr *fmr;
 		struct ib_fmr_attr attr = {
-			.max_pages = params->max_pages_per_fmr,
-			.max_maps  = IB_FMR_MAX_REMAPS,
-			.page_size = PAGE_SHIFT
+			.max_pages  = params->max_pages_per_fmr,
+			.max_maps   = IB_FMR_MAX_REMAPS,
+			.page_shift = params->page_shift
 		};
 
 		for (i = 0; i < params->pool_size; ++i) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c82f47a66e48..f7854b65fd55 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -31,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
+ * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 #include <linux/dma-mapping.h>
 
@@ -679,8 +679,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 		goto out;
 	}
 	/* Check to post send on QP or process locally */
-	ret = smi_check_local_dr_smp(smp, device, port_num);
-	if (!ret || !device->process_mad)
+	ret = smi_check_local_smp(smp, device);
+	if (!ret)
 		goto out;
 
 	local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -765,18 +765,67 @@ out:
 	return ret;
 }
 
-static int get_buf_length(int hdr_len, int data_len)
+static int get_pad_size(int hdr_len, int data_len)
 {
 	int seg_size, pad;
 
 	seg_size = sizeof(struct ib_mad) - hdr_len;
 	if (data_len && seg_size) {
 		pad = seg_size - data_len % seg_size;
-		if (pad == seg_size)
-			pad = 0;
+		return pad == seg_size ? 0 : pad;
 	} else
-		pad = seg_size;
-	return hdr_len + data_len + pad;
+		return seg_size;
+}
+
+static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	struct ib_rmpp_segment *s, *t;
+
+	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
+		list_del(&s->list);
+		kfree(s);
+	}
+}
+
+static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
+				gfp_t gfp_mask)
+{
+	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
+	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
+	struct ib_rmpp_segment *seg = NULL;
+	int left, seg_size, pad;
+
+	send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
+	seg_size = send_buf->seg_size;
+	pad = send_wr->pad;
+
+	/* Allocate data segments. */
+	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
+		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+		if (!seg) {
+			printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
+			       "alloc failed for len %zd, gfp %#x\n",
+			       sizeof (*seg) + seg_size, gfp_mask);
+			free_send_rmpp_list(send_wr);
+			return -ENOMEM;
+		}
+		seg->num = ++send_buf->seg_count;
+		list_add_tail(&seg->list, &send_wr->rmpp_list);
+	}
+
+	/* Zero any padding */
+	if (pad)
+		memset(seg->data + seg_size - pad, 0, pad);
+
+	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
+					  agent.rmpp_version;
+	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
+	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+
+	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
+					struct ib_rmpp_segment, list);
+	send_wr->last_ack_seg = send_wr->cur_seg;
+	return 0;
 }
 
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
@@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
-	int buf_size;
+	int pad, message_size, ret, size;
 	void *buf;
 
 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
 				      agent);
-	buf_size = get_buf_length(hdr_len, data_len);
+	pad = get_pad_size(hdr_len, data_len);
+	message_size = hdr_len + data_len + pad;
 
 	if ((!mad_agent->rmpp_version &&
-	     (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
-	    (!rmpp_active && buf_size > sizeof(struct ib_mad)))
+	     (rmpp_active || message_size > sizeof(struct ib_mad))) ||
+	    (!rmpp_active && message_size > sizeof(struct ib_mad)))
 		return ERR_PTR(-EINVAL);
 
-	buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
+	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
+	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	mad_send_wr = buf + buf_size;
+	mad_send_wr = buf + size;
+	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
 	mad_send_wr->send_buf.mad = buf;
+	mad_send_wr->send_buf.hdr_len = hdr_len;
+	mad_send_wr->send_buf.data_len = data_len;
+	mad_send_wr->pad = pad;
 
 	mad_send_wr->mad_agent_priv = mad_agent_priv;
-	mad_send_wr->sg_list[0].length = buf_size;
+	mad_send_wr->sg_list[0].length = hdr_len;
 	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+	mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
+	mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
 
 	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
 	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
-	mad_send_wr->send_wr.num_sge = 1;
+	mad_send_wr->send_wr.num_sge = 2;
 	mad_send_wr->send_wr.opcode = IB_WR_SEND;
 	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
 	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
 
 	if (rmpp_active) {
-		struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
-		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
-						   IB_MGMT_RMPP_HDR + data_len);
-		rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
-		rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
-		ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
-				  IB_MGMT_RMPP_FLAG_ACTIVE);
+		ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
+		if (ret) {
+			kfree(buf);
+			return ERR_PTR(ret);
+		}
 	}
 
 	mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 }
 EXPORT_SYMBOL(ib_create_send_mad);
 
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
+{
+	struct ib_mad_send_wr_private *mad_send_wr;
+	struct list_head *list;
+
+	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+				   send_buf);
+	list = &mad_send_wr->cur_seg->list;
+
+	if (mad_send_wr->cur_seg->num < seg_num) {
+		list_for_each_entry(mad_send_wr->cur_seg, list, list)
+			if (mad_send_wr->cur_seg->num == seg_num)
+				break;
+	} else if (mad_send_wr->cur_seg->num > seg_num) {
+		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
+			if (mad_send_wr->cur_seg->num == seg_num)
+				break;
+	}
+	return mad_send_wr->cur_seg->data;
+}
+EXPORT_SYMBOL(ib_get_rmpp_segment);
+
+static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
+{
+	if (mad_send_wr->send_buf.seg_count)
+		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
+					   mad_send_wr->seg_num);
+	else
+		return mad_send_wr->send_buf.mad +
+		       mad_send_wr->send_buf.hdr_len;
+}
+
 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
+	struct ib_mad_send_wr_private *mad_send_wr;
 
 	mad_agent_priv = container_of(send_buf->mad_agent,
 				      struct ib_mad_agent_private, agent);
-	kfree(send_buf->mad);
+	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+				   send_buf);
 
+	free_send_rmpp_list(mad_send_wr);
+	kfree(send_buf->mad);
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
 		wake_up(&mad_agent_priv->wait);
 }
@@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
 	mad_agent = mad_send_wr->send_buf.mad_agent;
 	sge = mad_send_wr->sg_list;
-	sge->addr = dma_map_single(mad_agent->device->dma_device,
-				   mad_send_wr->send_buf.mad, sge->length,
-				   DMA_TO_DEVICE);
-	pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+	sge[0].addr = dma_map_single(mad_agent->device->dma_device,
+				     mad_send_wr->send_buf.mad,
+				     sge[0].length,
+				     DMA_TO_DEVICE);
+	pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
+
+	sge[1].addr = dma_map_single(mad_agent->device->dma_device,
+				     ib_get_payload(mad_send_wr),
+				     sge[1].length,
+				     DMA_TO_DEVICE);
+	pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
 
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 		list_add_tail(&mad_send_wr->mad_list.list, list);
 	}
 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
-	if (ret)
+	if (ret) {
 		dma_unmap_single(mad_agent->device->dma_device,
-				 pci_unmap_addr(mad_send_wr, mapping),
-				 sge->length, DMA_TO_DEVICE);
-
+				 pci_unmap_addr(mad_send_wr, header_mapping),
+				 sge[0].length, DMA_TO_DEVICE);
+		dma_unmap_single(mad_agent->device->dma_device,
+				 pci_unmap_addr(mad_send_wr, payload_mapping),
+				 sge[1].length, DMA_TO_DEVICE);
+	}
 	return ret;
 }
 
@@ -1661,9 +1762,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 					    port_priv->device->node_type,
 					    port_priv->port_num))
 			goto out;
-		if (!smi_check_local_dr_smp(&recv->mad.smp,
-					    port_priv->device,
-					    port_priv->port_num))
+		if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
 			goto out;
 	}
 
@@ -1862,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
 
 retry:
 	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
-			 pci_unmap_addr(mad_send_wr, mapping),
+			 pci_unmap_addr(mad_send_wr, header_mapping),
 			 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+			 pci_unmap_addr(mad_send_wr, payload_mapping),
+			 mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
 	queued_send_wr = NULL;
 	spin_lock_irqsave(&send_queue->lock, flags);
 	list_del(&mad_list->list);
@@ -2262,8 +2364,12 @@ static void timeout_sends(void *data)
 static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
 {
 	struct ib_mad_port_private *port_priv = cq->cq_context;
+	unsigned long flags;
 
-	queue_work(port_priv->wq, &port_priv->work);
+	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+	if (!list_empty(&port_priv->port_list))
+		queue_work(port_priv->wq, &port_priv->work);
+	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
 }
 
 /*
@@ -2575,18 +2681,23 @@ static int ib_mad_port_open(struct ib_device *device,
 	}
 	INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
 
+	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
+	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
 	ret = ib_mad_port_start(port_priv);
 	if (ret) {
 		printk(KERN_ERR PFX "Couldn't start port\n");
 		goto error9;
 	}
 
-	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
-	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
-	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
 	return 0;
 
 error9:
+	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
+	list_del_init(&port_priv->port_list);
+	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
+
 	destroy_workqueue(port_priv->wq);
 error8:
 	destroy_mad_qp(&port_priv->qp_info[1]);
@@ -2623,11 +2734,9 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
 		printk(KERN_ERR PFX "Port %d not found\n", port_num);
 		return -ENODEV;
 	}
-	list_del(&port_priv->port_list);
+	list_del_init(&port_priv->port_list);
 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
 
-	/* Stop processing completions. */
-	flush_workqueue(port_priv->wq);
 	destroy_workqueue(port_priv->wq);
 	destroy_mad_qp(&port_priv->qp_info[1]);
 	destroy_mad_qp(&port_priv->qp_info[0]);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 570f78682af3..a7125d4b5ccf 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -31,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ * $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 
 #ifndef __IB_MAD_PRIV_H__
@@ -85,6 +85,12 @@ struct ib_mad_private {
 	} mad;
 } __attribute__ ((packed));
 
+struct ib_rmpp_segment {
+	struct list_head list;
+	u32 num;
+	u8 data[0];
+};
+
 struct ib_mad_agent_private {
 	struct list_head agent_list;
 	struct ib_mad_agent agent;
@@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
 	struct list_head agent_list;
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_buf send_buf;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	DECLARE_PCI_UNMAP_ADDR(header_mapping)
+	DECLARE_PCI_UNMAP_ADDR(payload_mapping)
 	struct ib_send_wr send_wr;
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
 	__be64 tid;
@@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
 	enum ib_wc_status status;
 
 	/* RMPP control */
+	struct list_head rmpp_list;
+	struct ib_rmpp_segment *last_ack_seg;
+	struct ib_rmpp_segment *cur_seg;
 	int last_ack;
 	int seg_num;
 	int newwin;
-	int total_seg;
-	int data_offset;
 	int pad;
 };
 
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 3249e1d8c07b..bacfdd5bddad 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -111,14 +111,14 @@ static int data_offset(u8 mgmt_class)
 		return IB_MGMT_RMPP_HDR;
 }
 
-static void format_ack(struct ib_rmpp_mad *ack,
+static void format_ack(struct ib_mad_send_buf *msg,
 		       struct ib_rmpp_mad *data,
 		       struct mad_rmpp_recv *rmpp_recv)
 {
+	struct ib_rmpp_mad *ack = msg->mad;
 	unsigned long flags;
 
-	memcpy(&ack->mad_hdr, &data->mad_hdr,
-	       data_offset(data->mad_hdr.mgmt_class));
+	memcpy(ack, &data->mad_hdr, msg->hdr_len);
 
 	ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
 	ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
@@ -135,16 +135,16 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
 		     struct ib_mad_recv_wc *recv_wc)
 {
 	struct ib_mad_send_buf *msg;
-	int ret;
+	int ret, hdr_len;
 
+	hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
 	msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
-				 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
-				 IB_MGMT_RMPP_DATA, GFP_KERNEL);
+				 recv_wc->wc->pkey_index, 1, hdr_len,
+				 0, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
-		   rmpp_recv);
+	format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
 	msg->ah = rmpp_recv->ah;
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
@@ -156,16 +156,17 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
 {
 	struct ib_mad_send_buf *msg;
 	struct ib_ah *ah;
+	int hdr_len;
 
 	ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
 				  recv_wc->recv_buf.grh, agent->port_num);
 	if (IS_ERR(ah))
 		return (void *) ah;
 
+	hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
 	msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
 				 recv_wc->wc->pkey_index, 1,
-				 IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
-				 GFP_KERNEL);
+				 hdr_len, 0, GFP_KERNEL);
 	if (IS_ERR(msg))
 		ib_destroy_ah(ah);
 	else
@@ -195,8 +196,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
 		return;
 
 	rmpp_mad = msg->mad;
-	memcpy(rmpp_mad, recv_wc->recv_buf.mad,
-	       data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
+	memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
 
 	rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
 	rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
@@ -433,44 +433,6 @@ static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
 	return rmpp_wc;
 }
 
-void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf)
-{
-	struct ib_mad_recv_buf *seg_buf;
-	struct ib_rmpp_mad *rmpp_mad;
-	void *data;
-	int size, len, offset;
-	u8 flags;
-
-	len = mad_recv_wc->mad_len;
-	if (len <= sizeof(struct ib_mad)) {
-		memcpy(buf, mad_recv_wc->recv_buf.mad, len);
-		return;
-	}
-
-	offset = data_offset(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
-
-	list_for_each_entry(seg_buf, &mad_recv_wc->rmpp_list, list) {
-		rmpp_mad = (struct ib_rmpp_mad *)seg_buf->mad;
-		flags = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr);
-
-		if (flags & IB_MGMT_RMPP_FLAG_FIRST) {
-			data = rmpp_mad;
-			size = sizeof(*rmpp_mad);
-		} else {
-			data = (void *) rmpp_mad + offset;
-			if (flags & IB_MGMT_RMPP_FLAG_LAST)
-				size = len;
-			else
-				size = sizeof(*rmpp_mad) - offset;
-		}
-
-		memcpy(buf, data, size);
-		len -= size;
-		buf += size;
-	}
-}
-EXPORT_SYMBOL(ib_coalesce_recv_mad);
-
 static struct ib_mad_recv_wc *
 continue_rmpp(struct ib_mad_agent_private *agent,
 	      struct ib_mad_recv_wc *mad_recv_wc)
@@ -570,50 +532,33 @@ start_rmpp(struct ib_mad_agent_private *agent,
 	return mad_recv_wc;
 }
 
-static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
-{
-	return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
-	       (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
-	       (mad_send_wr->seg_num - 1);
-}
-
 static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
 	int timeout;
-	u32 paylen;
+	u32 paylen = 0;
 
 	rmpp_mad = mad_send_wr->send_buf.mad;
 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
-	rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
+	rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
 
 	if (mad_send_wr->seg_num == 1) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
-		paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
+		paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
 			 mad_send_wr->pad;
-		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
-		mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
-	} else {
-		mad_send_wr->send_wr.num_sge = 2;
-		mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
-		mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
-		mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
-						 mad_send_wr->data_offset;
-		mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
-		rmpp_mad->rmpp_hdr.paylen_newwin = 0;
 	}
 
-	if (mad_send_wr->seg_num == mad_send_wr->total_seg) {
+	if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
 		paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
-		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 	}
+	rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 
 	/* 2 seconds for an ACK until we can find the packet lifetime */
 	timeout = mad_send_wr->send_buf.timeout_ms;
 	if (!timeout || timeout > 2000)
 		mad_send_wr->timeout = msecs_to_jiffies(2000);
-	mad_send_wr->seg_num++;
+
 	return ib_send_mad(mad_send_wr);
 }
 
@@ -629,7 +574,7 @@ static void abort_send(struct ib_mad_agent_private *agent, __be64 tid,
 	if (!mad_send_wr)
 		goto out;	/* Unmatched send */
 
-	if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
 		goto out;	/* Send is already done */
 
@@ -645,6 +590,18 @@ out:
 	spin_unlock_irqrestore(&agent->lock, flags);
 }
 
+static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
+				   int seg_num)
+{
+	struct list_head *list;
+
+	wr->last_ack = seg_num;
+	list = &wr->last_ack_seg->list;
+	list_for_each_entry(wr->last_ack_seg, list, list)
+		if (wr->last_ack_seg->num == seg_num)
+			break;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 			     struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -675,11 +632,12 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 	if (!mad_send_wr)
 		goto out;	/* Unmatched ACK */
 
-	if ((mad_send_wr->last_ack == mad_send_wr->total_seg) ||
+	if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
 	    (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
 		goto out;	/* Send is already done */
 
-	if (seg_num > mad_send_wr->total_seg || seg_num > mad_send_wr->newwin) {
+	if (seg_num > mad_send_wr->send_buf.seg_count ||
+	    seg_num > mad_send_wr->newwin) {
 		spin_unlock_irqrestore(&agent->lock, flags);
 		abort_send(agent, rmpp_mad->mad_hdr.tid,
 			   IB_MGMT_RMPP_STATUS_S2B);
@@ -691,11 +649,11 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 		goto out;	/* Old ACK */
 
 	if (seg_num > mad_send_wr->last_ack) {
-		mad_send_wr->last_ack = seg_num;
+		adjust_last_ack(mad_send_wr, seg_num);
 		mad_send_wr->retries = mad_send_wr->send_buf.retries;
 	}
 	mad_send_wr->newwin = newwin;
-	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
+	if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
 		/* If no response is expected, the ACK completes the send */
 		if (!mad_send_wr->send_buf.timeout_ms) {
 			struct ib_mad_send_wc wc;
@@ -714,7 +672,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 					     mad_send_wr->send_buf.timeout_ms);
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
-		   mad_send_wr->seg_num <= mad_send_wr->total_seg) {
+		   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
 		/* Send failure will just result in a timeout/retry */
 		ret = send_next_seg(mad_send_wr);
 		if (ret)
@@ -838,31 +796,19 @@ out:
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
-	int i, total_len, ret;
+	int ret;
 
 	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED;
 
-	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
+	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
+		mad_send_wr->seg_num = 1;
 		return IB_RMPP_RESULT_INTERNAL;
+	}
 
-	if (mad_send_wr->send_wr.num_sge > 1)
-		return -EINVAL;		/* TODO: support num_sge > 1 */
-
-	mad_send_wr->seg_num = 1;
 	mad_send_wr->newwin = 1;
-	mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
-
-	total_len = 0;
-	for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
-		total_len += mad_send_wr->send_wr.sg_list[i].length;
-
-        mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
-			(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
-	mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
-			   be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
 
 	/* We need to wait for the final ACK even if there isn't a response */
 	mad_send_wr->refcount += (mad_send_wr->timeout == 0);
@@ -893,14 +839,14 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
 	if (!mad_send_wr->timeout)
 		return IB_RMPP_RESULT_PROCESSED; /* Response received */
 
-	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
+	if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
 		mad_send_wr->timeout =
 			msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 		return IB_RMPP_RESULT_PROCESSED; /* Send done */
 	}
 
-	if (mad_send_wr->seg_num > mad_send_wr->newwin ||
-	    mad_send_wr->seg_num > mad_send_wr->total_seg)
+	if (mad_send_wr->seg_num == mad_send_wr->newwin ||
+	    mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
 		return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
 
 	ret = send_next_seg(mad_send_wr);
@@ -921,10 +867,12 @@ int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
 
-	if (mad_send_wr->last_ack == mad_send_wr->total_seg)
+	if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
 		return IB_RMPP_RESULT_PROCESSED;
 
-	mad_send_wr->seg_num = mad_send_wr->last_ack + 1;
+	mad_send_wr->seg_num = mad_send_wr->last_ack;
+	mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
+
 	ret = send_next_seg(mad_send_wr);
 	if (ret)
 		return IB_RMPP_RESULT_PROCESSED;
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 2b3c40198f81..3011bfd86dc5 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -49,19 +49,16 @@ extern int smi_check_forward_dr_smp(struct ib_smp *smp);
 extern int smi_handle_dr_smp_send(struct ib_smp *smp,
 				  u8 node_type,
 				  int port_num);
-extern int smi_check_local_dr_smp(struct ib_smp *smp,
-				  struct ib_device *device,
-				  int port_num);
 
 /*
  * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
  */
-static inline int smi_check_local_smp(struct ib_mad_agent *mad_agent,
-                         	      struct ib_smp *smp)
+static inline int smi_check_local_smp(struct ib_smp *smp,
+				      struct ib_device *device)
 {
 	/* C14-9:3 -- We're at the end of the DR segment of path */
 	/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
-	return ((mad_agent->device->process_mad &&
+	return ((device->process_mad &&
 		!ib_get_smp_direction(smp) &&
 		(smp->hop_ptr == smp->hop_cnt + 1)));
 }
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 5982d687a000..15121cb5a1f6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -112,7 +112,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
 		return ret;
 
 	return sprintf(buf, "%d: %s\n", attr.state,
-		       attr.state >= 0 && attr.state <= ARRAY_SIZE(state_name) ?
+		       attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
 		       state_name[attr.state] : "UNKNOWN");
 }
 
@@ -472,8 +472,10 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
 			goto err;
 
 		if (snprintf(element->name, sizeof(element->name),
-			     "%d", i) >= sizeof(element->name))
+			     "%d", i) >= sizeof(element->name)) {
+			kfree(element);
 			goto err;
+		}
 
 		element->attr.attr.name  = element->name;
 		element->attr.attr.mode  = S_IRUGO;
@@ -628,14 +630,42 @@ static ssize_t show_node_guid(struct class_device *cdev, char *buf)
 		       be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
 }
 
+static ssize_t show_node_desc(struct class_device *cdev, char *buf)
+{
+	struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+
+	return sprintf(buf, "%.64s\n", dev->node_desc);
+}
+
+static ssize_t set_node_desc(struct class_device *cdev, const char *buf,
+			      size_t count)
+{
+	struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
+	struct ib_device_modify desc = {};
+	int ret;
+
+	if (!dev->modify_device)
+		return -EIO;
+
+	memcpy(desc.node_desc, buf, min_t(int, count, 64));
+	ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
 static CLASS_DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
 static CLASS_DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
 static CLASS_DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
+static CLASS_DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc,
+			 set_node_desc);
 
 static struct class_device_attribute *ib_class_attributes[] = {
 	&class_device_attr_node_type,
 	&class_device_attr_sys_image_guid,
-	&class_device_attr_node_guid
+	&class_device_attr_node_guid,
+	&class_device_attr_node_desc
 };
 
 static struct class ib_class = {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c908de8db5a9..fb6cd42601f9 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $
+ * $Id: user_mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 
 #include <linux/module.h>
@@ -121,6 +121,7 @@ struct ib_umad_file {
 
 struct ib_umad_packet {
 	struct ib_mad_send_buf *msg;
+	struct ib_mad_recv_wc  *recv_wc;
 	struct list_head   list;
 	int		   length;
 	struct ib_user_mad mad;
@@ -176,31 +177,32 @@ static int queue_packet(struct ib_umad_file *file,
 	return ret;
 }
 
+static int data_offset(u8 mgmt_class)
+{
+	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
+		return IB_MGMT_SA_HDR;
+	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
+		return IB_MGMT_VENDOR_HDR;
+	else
+		return IB_MGMT_RMPP_HDR;
+}
+
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *send_wc)
 {
 	struct ib_umad_file *file = agent->context;
-	struct ib_umad_packet *timeout;
 	struct ib_umad_packet *packet = send_wc->send_buf->context[0];
 
 	ib_destroy_ah(packet->msg->ah);
 	ib_free_send_mad(packet->msg);
 
 	if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
-		timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
-		if (!timeout)
-			goto out;
-
-		timeout->length 	= IB_MGMT_MAD_HDR;
-		timeout->mad.hdr.id 	= packet->mad.hdr.id;
-		timeout->mad.hdr.status = ETIMEDOUT;
-		memcpy(timeout->mad.data, packet->mad.data,
-		       sizeof (struct ib_mad_hdr));
-
-		if (queue_packet(file, agent, timeout))
-			kfree(timeout);
+		packet->length = IB_MGMT_MAD_HDR;
+		packet->mad.hdr.status = ETIMEDOUT;
+		if (!queue_packet(file, agent, packet))
+			return;
 	}
-out:
 	kfree(packet);
 }
 
@@ -209,22 +211,20 @@ static void recv_handler(struct ib_mad_agent *agent,
 {
 	struct ib_umad_file *file = agent->context;
 	struct ib_umad_packet *packet;
-	int length;
 
 	if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
-		goto out;
+		goto err1;
 
-	length = mad_recv_wc->mad_len;
-	packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
+	packet = kzalloc(sizeof *packet, GFP_KERNEL);
 	if (!packet)
-		goto out;
+		goto err1;
 
-	packet->length = length;
-
-	ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
+	packet->length = mad_recv_wc->mad_len;
+	packet->recv_wc = mad_recv_wc;
 
 	packet->mad.hdr.status    = 0;
-	packet->mad.hdr.length    = length + sizeof (struct ib_user_mad);
+	packet->mad.hdr.length    = sizeof (struct ib_user_mad) +
+				    mad_recv_wc->mad_len;
 	packet->mad.hdr.qpn 	  = cpu_to_be32(mad_recv_wc->wc->src_qp);
 	packet->mad.hdr.lid 	  = cpu_to_be16(mad_recv_wc->wc->slid);
 	packet->mad.hdr.sl  	  = mad_recv_wc->wc->sl;
@@ -240,12 +240,79 @@ static void recv_handler(struct ib_mad_agent *agent,
 	}
 
 	if (queue_packet(file, agent, packet))
-		kfree(packet);
+		goto err2;
+	return;
 
-out:
+err2:
+	kfree(packet);
+err1:
 	ib_free_recv_mad(mad_recv_wc);
 }
 
+static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
+			     size_t count)
+{
+	struct ib_mad_recv_buf *recv_buf;
+	int left, seg_payload, offset, max_seg_payload;
+
+	/* We need enough room to copy the first (or only) MAD segment. */
+	recv_buf = &packet->recv_wc->recv_buf;
+	if ((packet->length <= sizeof (*recv_buf->mad) &&
+	     count < sizeof (packet->mad) + packet->length) ||
+	    (packet->length > sizeof (*recv_buf->mad) &&
+	     count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
+		return -EINVAL;
+
+	if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
+		return -EFAULT;
+
+	buf += sizeof (packet->mad);
+	seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
+	if (copy_to_user(buf, recv_buf->mad, seg_payload))
+		return -EFAULT;
+
+	if (seg_payload < packet->length) {
+		/*
+		 * Multipacket RMPP MAD message. Copy remainder of message.
+		 * Note that last segment may have a shorter payload.
+		 */
+		if (count < sizeof (packet->mad) + packet->length) {
+			/*
+			 * The buffer is too small, return the first RMPP segment,
+			 * which includes the RMPP message length.
+			 */
+			return -ENOSPC;
+		}
+		offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class);
+		max_seg_payload = sizeof (struct ib_mad) - offset;
+
+		for (left = packet->length - seg_payload, buf += seg_payload;
+		     left; left -= seg_payload, buf += seg_payload) {
+			recv_buf = container_of(recv_buf->list.next,
+						struct ib_mad_recv_buf, list);
+			seg_payload = min(left, max_seg_payload);
+			if (copy_to_user(buf, ((void *) recv_buf->mad) + offset,
+					 seg_payload))
+				return -EFAULT;
+		}
+	}
+	return sizeof (packet->mad) + packet->length;
+}
+
+static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
+			     size_t count)
+{
+	ssize_t size = sizeof (packet->mad) + packet->length;
+
+	if (count < size)
+		return -EINVAL;
+
+	if (copy_to_user(buf, &packet->mad, size))
+		return -EFAULT;
+
+	return size;
+}
+
 static ssize_t ib_umad_read(struct file *filp, char __user *buf,
 			    size_t count, loff_t *pos)
 {
@@ -253,7 +320,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
 	struct ib_umad_packet *packet;
 	ssize_t ret;
 
-	if (count < sizeof (struct ib_user_mad) + sizeof (struct ib_mad))
+	if (count < sizeof (struct ib_user_mad))
 		return -EINVAL;
 
 	spin_lock_irq(&file->recv_lock);
@@ -276,28 +343,44 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
 
 	spin_unlock_irq(&file->recv_lock);
 
-	if (count < packet->length + sizeof (struct ib_user_mad)) {
-		/* Return length needed (and first RMPP segment) if too small */
-		if (copy_to_user(buf, &packet->mad,
-				 sizeof (struct ib_user_mad) + sizeof (struct ib_mad)))
-			ret = -EFAULT;
-		else
-			ret = -ENOSPC;
-	} else if (copy_to_user(buf, &packet->mad,
-				packet->length + sizeof (struct ib_user_mad)))
-		ret = -EFAULT;
+	if (packet->recv_wc)
+		ret = copy_recv_mad(buf, packet, count);
 	else
-		ret = packet->length + sizeof (struct ib_user_mad);
+		ret = copy_send_mad(buf, packet, count);
+
 	if (ret < 0) {
 		/* Requeue packet */
 		spin_lock_irq(&file->recv_lock);
 		list_add(&packet->list, &file->recv_list);
 		spin_unlock_irq(&file->recv_lock);
-	} else
+	} else {
+		if (packet->recv_wc)
+			ib_free_recv_mad(packet->recv_wc);
 		kfree(packet);
+	}
 	return ret;
 }
 
+static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf)
+{
+	int left, seg;
+
+	/* Copy class specific header */
+	if ((msg->hdr_len > IB_MGMT_RMPP_HDR) &&
+	    copy_from_user(msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR,
+			   msg->hdr_len - IB_MGMT_RMPP_HDR))
+		return -EFAULT;
+
+	/* All headers are in place.  Copy data segments. */
+	for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0;
+	     seg++, left -= msg->seg_size, buf += msg->seg_size) {
+		if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf,
+				   min(left, msg->seg_size)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
 static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 			     size_t count, loff_t *pos)
 {
@@ -309,14 +392,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	struct ib_rmpp_mad *rmpp_mad;
 	u8 method;
 	__be64 *tid;
-	int ret, length, hdr_len, copy_offset;
-	int rmpp_active, has_rmpp_header;
+	int ret, data_len, hdr_len, copy_offset, rmpp_active;
 
 	if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
 		return -EINVAL;
 
-	length = count - sizeof (struct ib_user_mad);
-	packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+	packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
 	if (!packet)
 		return -ENOMEM;
 
@@ -363,35 +444,25 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
 		hdr_len = IB_MGMT_SA_HDR;
 		copy_offset = IB_MGMT_RMPP_HDR;
-		has_rmpp_header = 1;
+		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+			      IB_MGMT_RMPP_FLAG_ACTIVE;
 	} else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
 		   rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
-			hdr_len = IB_MGMT_VENDOR_HDR;
-			copy_offset = IB_MGMT_RMPP_HDR;
-			has_rmpp_header = 1;
+		hdr_len = IB_MGMT_VENDOR_HDR;
+		copy_offset = IB_MGMT_RMPP_HDR;
+		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+			      IB_MGMT_RMPP_FLAG_ACTIVE;
 	} else {
 		hdr_len = IB_MGMT_MAD_HDR;
 		copy_offset = IB_MGMT_MAD_HDR;
-		has_rmpp_header = 0;
-	}
-
-	if (has_rmpp_header)
-		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
-			      IB_MGMT_RMPP_FLAG_ACTIVE;
-	else
 		rmpp_active = 0;
-
-	/* Validate that the management class can support RMPP */
-	if (rmpp_active && !agent->rmpp_version) {
-		ret = -EINVAL;
-		goto err_ah;
 	}
 
+	data_len = count - sizeof (struct ib_user_mad) - hdr_len;
 	packet->msg = ib_create_send_mad(agent,
 					 be32_to_cpu(packet->mad.hdr.qpn),
-					 0, rmpp_active,
-					 hdr_len, length - hdr_len,
-					 GFP_KERNEL);
+					 0, rmpp_active, hdr_len,
+					 data_len, GFP_KERNEL);
 	if (IS_ERR(packet->msg)) {
 		ret = PTR_ERR(packet->msg);
 		goto err_ah;
@@ -402,14 +473,21 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	packet->msg->retries 	= packet->mad.hdr.retries;
 	packet->msg->context[0] = packet;
 
-	/* Copy MAD headers (RMPP header in place) */
+	/* Copy MAD header.  Any RMPP header is already in place. */
 	memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
-	/* Now, copy rest of message from user into send buffer */
-	if (copy_from_user(packet->msg->mad + copy_offset,
-			   buf + sizeof (struct ib_user_mad) + copy_offset,
-			   length - copy_offset)) {
-		ret = -EFAULT;
-		goto err_msg;
+	buf += sizeof (struct ib_user_mad);
+
+	if (!rmpp_active) {
+		if (copy_from_user(packet->msg->mad + copy_offset,
+				   buf + copy_offset,
+				   hdr_len + data_len - copy_offset)) {
+			ret = -EFAULT;
+			goto err_msg;
+		}
+	} else {
+		ret = copy_rmpp_mad(packet->msg, buf);
+		if (ret)
+			goto err_msg;
 	}
 
 	/*
@@ -433,18 +511,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 		goto err_msg;
 
 	up_read(&file->port->mutex);
-
 	return count;
 
 err_msg:
 	ib_free_send_mad(packet->msg);
-
 err_ah:
 	ib_destroy_ah(ah);
-
 err_up:
 	up_read(&file->port->mutex);
-
 err:
 	kfree(packet);
 	return ret;
@@ -627,8 +701,11 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
 	already_dead = file->agents_dead;
 	file->agents_dead = 1;
 
-	list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
+	list_for_each_entry_safe(packet, tmp, &file->recv_list, list) {
+		if (packet->recv_wc)
+			ib_free_recv_mad(packet->recv_wc);
 		kfree(packet);
+	}
 
 	list_del(&file->port_list);
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index f7eecbc6af6c..3372d67ff139 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -178,10 +178,12 @@ IB_UVERBS_DECLARE_CMD(reg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
 IB_UVERBS_DECLARE_CMD(create_comp_channel);
 IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(resize_cq);
 IB_UVERBS_DECLARE_CMD(poll_cq);
 IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(query_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
 IB_UVERBS_DECLARE_CMD(post_send);
@@ -193,6 +195,7 @@ IB_UVERBS_DECLARE_CMD(attach_mcast);
 IB_UVERBS_DECLARE_CMD(detach_mcast);
 IB_UVERBS_DECLARE_CMD(create_srq);
 IB_UVERBS_DECLARE_CMD(modify_srq);
+IB_UVERBS_DECLARE_CMD(query_srq);
 IB_UVERBS_DECLARE_CMD(destroy_srq);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 407b6284d7d5..9f69bd48eb1b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,7 +1,8 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -675,6 +676,46 @@ err:
 	return ret;
 }
 
+ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_resize_cq	cmd;
+	struct ib_uverbs_resize_cq_resp	resp;
+	struct ib_udata                 udata;
+	struct ib_cq			*cq;
+	int				ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof resp);
+
+	mutex_lock(&ib_uverbs_idr_mutex);
+
+	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+	if (!cq || cq->uobject->context != file->ucontext || !cq->device->resize_cq)
+		goto out;
+
+	ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+	if (ret)
+		goto out;
+
+	memset(&resp, 0, sizeof resp);
+	resp.cqe = cq->cqe;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	mutex_unlock(&ib_uverbs_idr_mutex);
+
+	return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 			  const char __user *buf, int in_len,
 			  int out_len)
@@ -956,6 +997,106 @@ err_up:
 	return ret;
 }
 
+ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
+			   const char __user *buf, int in_len,
+			   int out_len)
+{
+	struct ib_uverbs_query_qp      cmd;
+	struct ib_uverbs_query_qp_resp resp;
+	struct ib_qp                   *qp;
+	struct ib_qp_attr              *attr;
+	struct ib_qp_init_attr         *init_attr;
+	int                            ret;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	attr      = kmalloc(sizeof *attr, GFP_KERNEL);
+	init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+	if (!attr || !init_attr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mutex_lock(&ib_uverbs_idr_mutex);
+
+	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+	if (qp && qp->uobject->context == file->ucontext)
+		ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
+	else
+		ret = -EINVAL;
+
+	mutex_unlock(&ib_uverbs_idr_mutex);
+
+	if (ret)
+		goto out;
+
+	memset(&resp, 0, sizeof resp);
+
+	resp.qp_state               = attr->qp_state;
+	resp.cur_qp_state           = attr->cur_qp_state;
+	resp.path_mtu               = attr->path_mtu;
+	resp.path_mig_state         = attr->path_mig_state;
+	resp.qkey                   = attr->qkey;
+	resp.rq_psn                 = attr->rq_psn;
+	resp.sq_psn                 = attr->sq_psn;
+	resp.dest_qp_num            = attr->dest_qp_num;
+	resp.qp_access_flags        = attr->qp_access_flags;
+	resp.pkey_index             = attr->pkey_index;
+	resp.alt_pkey_index         = attr->alt_pkey_index;
+	resp.en_sqd_async_notify    = attr->en_sqd_async_notify;
+	resp.max_rd_atomic          = attr->max_rd_atomic;
+	resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
+	resp.min_rnr_timer          = attr->min_rnr_timer;
+	resp.port_num               = attr->port_num;
+	resp.timeout                = attr->timeout;
+	resp.retry_cnt              = attr->retry_cnt;
+	resp.rnr_retry              = attr->rnr_retry;
+	resp.alt_port_num           = attr->alt_port_num;
+	resp.alt_timeout            = attr->alt_timeout;
+
+	memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+	resp.dest.flow_label        = attr->ah_attr.grh.flow_label;
+	resp.dest.sgid_index        = attr->ah_attr.grh.sgid_index;
+	resp.dest.hop_limit         = attr->ah_attr.grh.hop_limit;
+	resp.dest.traffic_class     = attr->ah_attr.grh.traffic_class;
+	resp.dest.dlid              = attr->ah_attr.dlid;
+	resp.dest.sl                = attr->ah_attr.sl;
+	resp.dest.src_path_bits     = attr->ah_attr.src_path_bits;
+	resp.dest.static_rate       = attr->ah_attr.static_rate;
+	resp.dest.is_global         = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+	resp.dest.port_num          = attr->ah_attr.port_num;
+
+	memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+	resp.alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
+	resp.alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
+	resp.alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
+	resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+	resp.alt_dest.dlid          = attr->alt_ah_attr.dlid;
+	resp.alt_dest.sl            = attr->alt_ah_attr.sl;
+	resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+	resp.alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
+	resp.alt_dest.is_global     = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+	resp.alt_dest.port_num      = attr->alt_ah_attr.port_num;
+
+	resp.max_send_wr            = init_attr->cap.max_send_wr;
+	resp.max_recv_wr            = init_attr->cap.max_recv_wr;
+	resp.max_send_sge           = init_attr->cap.max_send_sge;
+	resp.max_recv_sge           = init_attr->cap.max_recv_sge;
+	resp.max_inline_data        = init_attr->cap.max_inline_data;
+	resp.sq_sig_all             = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	kfree(attr);
+	kfree(init_attr);
+
+	return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -990,7 +1131,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->dest_qp_num 	  = cmd.dest_qp_num;
 	attr->qp_access_flags 	  = cmd.qp_access_flags;
 	attr->pkey_index 	  = cmd.pkey_index;
-	attr->alt_pkey_index 	  = cmd.pkey_index;
+	attr->alt_pkey_index 	  = cmd.alt_pkey_index;
 	attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
 	attr->max_rd_atomic 	  = cmd.max_rd_atomic;
 	attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
@@ -1094,8 +1235,8 @@ out:
 }
 
 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+			    const char __user *buf, int in_len,
+			    int out_len)
 {
 	struct ib_uverbs_post_send      cmd;
 	struct ib_uverbs_post_send_resp resp;
@@ -1323,8 +1464,8 @@ err:
 }
 
 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+			    const char __user *buf, int in_len,
+			    int out_len)
 {
 	struct ib_uverbs_post_recv      cmd;
 	struct ib_uverbs_post_recv_resp resp;
@@ -1374,8 +1515,8 @@ out:
 }
 
 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
-                            const char __user *buf, int in_len,
-                            int out_len)
+				const char __user *buf, int in_len,
+				int out_len)
 {
 	struct ib_uverbs_post_srq_recv      cmd;
 	struct ib_uverbs_post_srq_recv_resp resp;
@@ -1723,6 +1864,8 @@ retry:
 		goto err_destroy;
 
 	resp.srq_handle = uobj->uobject.id;
+	resp.max_wr     = attr.attr.max_wr;
+	resp.max_sge    = attr.attr.max_sge;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -1783,6 +1926,49 @@ out:
 	return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
+			    const char __user *buf,
+			    int in_len, int out_len)
+{
+	struct ib_uverbs_query_srq      cmd;
+	struct ib_uverbs_query_srq_resp resp;
+	struct ib_srq_attr              attr;
+	struct ib_srq                   *srq;
+	int                             ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	mutex_lock(&ib_uverbs_idr_mutex);
+
+	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+	if (srq && srq->uobject->context == file->ucontext)
+		ret = ib_query_srq(srq, &attr);
+	else
+		ret = -EINVAL;
+
+	mutex_unlock(&ib_uverbs_idr_mutex);
+
+	if (ret)
+		goto out;
+
+	memset(&resp, 0, sizeof resp);
+
+	resp.max_wr    = attr.max_wr;
+	resp.max_sge   = attr.max_sge;
+	resp.srq_limit = attr.srq_limit;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 			      const char __user *buf, int in_len,
 			      int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 903f85a4bc0c..ff092a0a94da 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
@@ -91,10 +91,12 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_CMD_DEREG_MR]      	= ib_uverbs_dereg_mr,
 	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
 	[IB_USER_VERBS_CMD_CREATE_CQ]     	= ib_uverbs_create_cq,
+	[IB_USER_VERBS_CMD_RESIZE_CQ]     	= ib_uverbs_resize_cq,
 	[IB_USER_VERBS_CMD_POLL_CQ]     	= ib_uverbs_poll_cq,
 	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]     	= ib_uverbs_req_notify_cq,
 	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= ib_uverbs_destroy_cq,
 	[IB_USER_VERBS_CMD_CREATE_QP]     	= ib_uverbs_create_qp,
+	[IB_USER_VERBS_CMD_QUERY_QP]     	= ib_uverbs_query_qp,
 	[IB_USER_VERBS_CMD_MODIFY_QP]     	= ib_uverbs_modify_qp,
 	[IB_USER_VERBS_CMD_DESTROY_QP]    	= ib_uverbs_destroy_qp,
 	[IB_USER_VERBS_CMD_POST_SEND]    	= ib_uverbs_post_send,
@@ -106,6 +108,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= ib_uverbs_detach_mcast,
 	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= ib_uverbs_create_srq,
 	[IB_USER_VERBS_CMD_MODIFY_SRQ]    	= ib_uverbs_modify_srq,
+	[IB_USER_VERBS_CMD_QUERY_SRQ]     	= ib_uverbs_query_srq,
 	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= ib_uverbs_destroy_srq,
 };
 
@@ -461,7 +464,6 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
 				event->event, &uobj->async_list,
 				&uobj->async_events_reported);
-				
 }
 
 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c857361be449..cae0845f472a 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -5,7 +5,7 @@
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -245,6 +245,258 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
 }
 EXPORT_SYMBOL(ib_create_qp);
 
+static const struct {
+	int			valid;
+	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETY + 1];
+	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETY + 1];
+} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+	[IB_QPS_RESET] = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR]   = { .valid = 1 },
+		[IB_QPS_INIT]  = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		},
+	},
+	[IB_QPS_INIT]  = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_INIT]  = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_RTR]   = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UC]  = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN),
+				[IB_QPT_RC]  = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN			|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_MIN_RNR_TIMER),
+			},
+			.opt_param = {
+				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+				 [IB_QPT_UC]  = (IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+			 }
+		}
+	},
+	[IB_QPS_RTR]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UD]  = IB_QP_SQ_PSN,
+				[IB_QPT_UC]  = IB_QP_SQ_PSN,
+				[IB_QPT_RC]  = (IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_SQ_PSN			|
+						IB_QP_MAX_QP_RD_ATOMIC),
+				[IB_QPT_SMI] = IB_QP_SQ_PSN,
+				[IB_QPT_GSI] = IB_QP_SQ_PSN,
+			},
+			.opt_param = {
+				 [IB_QPT_UD]  = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+				 [IB_QPT_UC]  = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_RC]  = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_MIN_RNR_TIMER		|
+						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+			 }
+		}
+	},
+	[IB_QPS_RTS]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE		|
+						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_SQD]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
+			}
+		},
+	},
+	[IB_QPS_SQD]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_SQD]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_AV			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_PORT			|
+						IB_QP_AV			|
+						IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_MAX_QP_RD_ATOMIC		|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		}
+	},
+	[IB_QPS_SQE]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		}
+	},
+	[IB_QPS_ERR] = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 }
+	}
+};
+
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+		       enum ib_qp_type type, enum ib_qp_attr_mask mask)
+{
+	enum ib_qp_attr_mask req_param, opt_param;
+
+	if (cur_state  < 0 || cur_state  > IB_QPS_ERR ||
+	    next_state < 0 || next_state > IB_QPS_ERR)
+		return 0;
+
+	if (mask & IB_QP_CUR_STATE  &&
+	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+		return 0;
+
+	if (!qp_state_table[cur_state][next_state].valid)
+		return 0;
+
+	req_param = qp_state_table[cur_state][next_state].req_param[type];
+	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+
+	if ((mask & req_param) != req_param)
+		return 0;
+
+	if (mask & ~(req_param | opt_param | IB_QP_STATE))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL(ib_modify_qp_is_ok);
+
 int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
@@ -322,11 +574,10 @@ int ib_destroy_cq(struct ib_cq *cq)
 }
 EXPORT_SYMBOL(ib_destroy_cq);
 
-int ib_resize_cq(struct ib_cq *cq,
-                 int           cqe)
+int ib_resize_cq(struct ib_cq *cq, int cqe)
 {
 	return cq->device->resize_cq ?
-		cq->device->resize_cq(cq, cqe) : -ENOSYS;
+		cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
 }
 EXPORT_SYMBOL(ib_resize_cq);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index a19e0ed03d7c..f023d3936518 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -147,7 +147,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
 	switch (ah->type) {
 	case MTHCA_AH_ON_HCA:
 		mthca_free(&dev->av_table.alloc,
- 			   (ah->avdma - dev->av_table.ddr_av_base) /
+			   (ah->avdma - dev->av_table.ddr_av_base) /
 			   MTHCA_AV_SIZE);
 		break;
 
@@ -193,6 +193,37 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 	return 0;
 }
 
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+	struct mthca_ah *ah   = to_mah(ibah);
+	struct mthca_dev *dev = to_mdev(ibah->device);
+
+	/* Only implement for MAD and memfree ah for now. */
+	if (ah->type == MTHCA_AH_ON_HCA)
+		return -ENOSYS;
+
+	memset(attr, 0, sizeof *attr);
+	attr->dlid          = be16_to_cpu(ah->av->dlid);
+	attr->sl            = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+	attr->static_rate   = ah->av->msg_sr & 0x7;
+	attr->src_path_bits = ah->av->g_slid & 0x7F;
+	attr->port_num      = be32_to_cpu(ah->av->port_pd) >> 24;
+	attr->ah_flags      = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
+
+	if (attr->ah_flags) {
+		attr->grh.traffic_class =
+			be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
+		attr->grh.flow_label =
+			be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
+		attr->grh.hop_limit  = ah->av->hop_limit;
+		attr->grh.sgid_index = ah->av->gid_index &
+				       (dev->limits.gid_table_len - 1);
+		memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+	}
+
+	return 0;
+}
+
 int __devinit mthca_init_av_table(struct mthca_dev *dev)
 {
 	int err;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 2825615ce81c..343eca507870 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -182,25 +182,58 @@ struct mthca_cmd_context {
 	u8                status;
 };
 
+static int fw_cmd_doorbell = 1;
+module_param(fw_cmd_doorbell, int, 0644);
+MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
+		 "(and supported by FW)");
+
 static inline int go_bit(struct mthca_dev *dev)
 {
 	return readl(dev->hcr + HCR_STATUS_OFFSET) &
 		swab32(1 << HCR_GO_BIT);
 }
 
-static int mthca_cmd_post(struct mthca_dev *dev,
-			  u64 in_param,
-			  u64 out_param,
-			  u32 in_modifier,
-			  u8 op_modifier,
-			  u16 op,
-			  u16 token,
-			  int event)
+static void mthca_cmd_post_dbell(struct mthca_dev *dev,
+				 u64 in_param,
+				 u64 out_param,
+				 u32 in_modifier,
+				 u8 op_modifier,
+				 u16 op,
+				 u16 token)
 {
-	int err = 0;
+	void __iomem *ptr = dev->cmd.dbell_map;
+	u16 *offs = dev->cmd.dbell_offsets;
 
-	mutex_lock(&dev->cmd.hcr_mutex);
+	__raw_writel((__force u32) cpu_to_be32(in_param >> 32),           ptr + offs[0]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful),  ptr + offs[1]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(in_modifier),              ptr + offs[2]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(out_param >> 32),          ptr + offs[3]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32(token << 16),              ptr + offs[5]);
+	wmb();
+	__raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT)                |
+					       (1 << HCA_E_BIT)                 |
+					       (op_modifier << HCR_OPMOD_SHIFT) |
+					        op),                      ptr + offs[6]);
+	wmb();
+	__raw_writel((__force u32) 0,                                     ptr + offs[7]);
+	wmb();
+}
 
+static int mthca_cmd_post_hcr(struct mthca_dev *dev,
+			      u64 in_param,
+			      u64 out_param,
+			      u32 in_modifier,
+			      u8 op_modifier,
+			      u16 op,
+			      u16 token,
+			      int event)
+{
 	if (event) {
 		unsigned long end = jiffies + GO_BIT_TIMEOUT;
 
@@ -210,10 +243,8 @@ static int mthca_cmd_post(struct mthca_dev *dev,
 		}
 	}
 
-	if (go_bit(dev)) {
-		err = -EAGAIN;
-		goto out;
-	}
+	if (go_bit(dev))
+		return -EAGAIN;
 
 	/*
 	 * We use writel (instead of something like memcpy_toio)
@@ -236,7 +267,29 @@ static int mthca_cmd_post(struct mthca_dev *dev,
 					       (op_modifier << HCR_OPMOD_SHIFT) |
 					       op),                       dev->hcr + 6 * 4);
 
-out:
+	return 0;
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+			  u64 in_param,
+			  u64 out_param,
+			  u32 in_modifier,
+			  u8 op_modifier,
+			  u16 op,
+			  u16 token,
+			  int event)
+{
+	int err = 0;
+
+	mutex_lock(&dev->cmd.hcr_mutex);
+
+	if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
+		mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
+					   op_modifier, op, token);
+	else
+		err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
+					 op_modifier, op, token, event);
+
 	mutex_unlock(&dev->cmd.hcr_mutex);
 	return err;
 }
@@ -275,7 +328,7 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
 	}
 
 	if (out_is_imm)
-		*out_param = 
+		*out_param =
 			(u64) be32_to_cpu((__force __be32)
 					  __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
 			(u64) be32_to_cpu((__force __be32)
@@ -386,7 +439,7 @@ static int mthca_cmd_box(struct mthca_dev *dev,
 			 unsigned long timeout,
 			 u8 *status)
 {
-	if (dev->cmd.use_events)
+	if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
 		return mthca_cmd_wait(dev, in_param, &out_param, 0,
 				      in_modifier, op_modifier, op,
 				      timeout, status);
@@ -423,7 +476,7 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
 			 unsigned long timeout,
 			 u8 *status)
 {
-	if (dev->cmd.use_events)
+	if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
 		return mthca_cmd_wait(dev, in_param, out_param, 1,
 				      in_modifier, op_modifier, op,
 				      timeout, status);
@@ -437,7 +490,7 @@ int mthca_cmd_init(struct mthca_dev *dev)
 {
 	mutex_init(&dev->cmd.hcr_mutex);
 	sema_init(&dev->cmd.poll_sem, 1);
-	dev->cmd.use_events = 0;
+	dev->cmd.flags = 0;
 
 	dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
 			   MTHCA_HCR_SIZE);
@@ -461,6 +514,8 @@ void mthca_cmd_cleanup(struct mthca_dev *dev)
 {
 	pci_pool_destroy(dev->cmd.pool);
 	iounmap(dev->hcr);
+	if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
+		iounmap(dev->cmd.dbell_map);
 }
 
 /*
@@ -498,7 +553,8 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
 		; /* nothing */
 	--dev->cmd.token_mask;
 
-	dev->cmd.use_events = 1;
+	dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
+
 	down(&dev->cmd.poll_sem);
 
 	return 0;
@@ -511,7 +567,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
 {
 	int i;
 
-	dev->cmd.use_events = 0;
+	dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
 
 	for (i = 0; i < dev->cmd.max_cmds; ++i)
 		down(&dev->cmd.event_sem);
@@ -596,8 +652,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 		 * address or size and use that as our log2 size.
 		 */
 		lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
-		if (lg < 12) {
-			mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n",
+		if (lg < MTHCA_ICM_PAGE_SHIFT) {
+			mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
+				   MTHCA_ICM_PAGE_SIZE,
 				   (unsigned long long) mthca_icm_addr(&iter),
 				   mthca_icm_size(&iter));
 			err = -EINVAL;
@@ -609,8 +666,9 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 				virt += 1 << lg;
 			}
 
-			pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
-							   (i << lg)) | (lg - 12));
+			pages[nent * 2 + 1] =
+				cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) |
+					    (lg - MTHCA_ICM_PAGE_SHIFT));
 			ts += 1 << (lg - 10);
 			++tc;
 
@@ -661,12 +719,41 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
 	return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
 }
 
+static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
+{
+	unsigned long addr;
+	u16 max_off = 0;
+	int i;
+
+	for (i = 0; i < 8; ++i)
+		max_off = max(max_off, dev->cmd.dbell_offsets[i]);
+
+	if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
+		mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
+			   "length 0x%x crosses a page boundary\n",
+			   (unsigned long long) base, max_off);
+		return;
+	}
+
+	addr = pci_resource_start(dev->pdev, 2) +
+		((pci_resource_len(dev->pdev, 2) - 1) & base);
+	dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
+	if (!dev->cmd.dbell_map)
+		return;
+
+	dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
+	mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
+}
+
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 {
 	struct mthca_mailbox *mailbox;
 	u32 *outbox;
+	u64 base;
+	u32 tmp;
 	int err = 0;
 	u8 lg;
+	int i;
 
 #define QUERY_FW_OUT_SIZE             0x100
 #define QUERY_FW_VER_OFFSET            0x00
@@ -674,6 +761,10 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 #define QUERY_FW_ERR_START_OFFSET      0x30
 #define QUERY_FW_ERR_SIZE_OFFSET       0x38
 
+#define QUERY_FW_CMD_DB_EN_OFFSET      0x10
+#define QUERY_FW_CMD_DB_OFFSET         0x50
+#define QUERY_FW_CMD_DB_BASE           0x60
+
 #define QUERY_FW_START_OFFSET          0x20
 #define QUERY_FW_END_OFFSET            0x28
 
@@ -702,16 +793,29 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 		((dev->fw_ver & 0xffff0000ull) >> 16) |
 		((dev->fw_ver & 0x0000ffffull) << 16);
 
+	mthca_dbg(dev, "FW version %012llx, max commands %d\n",
+		  (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+
 	MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
 	dev->cmd.max_cmds = 1 << lg;
 	MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
 	MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
 
-	mthca_dbg(dev, "FW version %012llx, max commands %d\n",
-		  (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
 	mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
 		  (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
 
+	MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
+	if (tmp & 0x1) {
+		mthca_dbg(dev, "FW supports commands through doorbells\n");
+
+		MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
+		for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
+			MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
+				  QUERY_FW_CMD_DB_OFFSET + (i << 1));
+
+		mthca_setup_cmd_doorbells(dev, base);
+	}
+
 	if (mthca_is_memfree(dev)) {
 		MTHCA_GET(dev->fw.arbel.fw_pages,       outbox, QUERY_FW_SIZE_OFFSET);
 		MTHCA_GET(dev->fw.arbel.clr_int_base,   outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
@@ -720,12 +824,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 		mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
 
 		/*
-		 * Arbel page size is always 4 KB; round up number of
-		 * system pages needed.
+		 * Round up number of system pages needed in case
+		 * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
 		 */
 		dev->fw.arbel.fw_pages =
-			ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >>
-				(PAGE_SHIFT - 12);
+			ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+				(PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
 
 		mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
 			  (unsigned long long) dev->fw.arbel.clr_int_base,
@@ -1173,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 	int err;
 
 #define INIT_HCA_IN_SIZE             	 0x200
-#define INIT_HCA_FLAGS_OFFSET        	 0x014
+#define INIT_HCA_FLAGS1_OFFSET           0x00c
+#define INIT_HCA_FLAGS2_OFFSET           0x014
 #define INIT_HCA_QPC_OFFSET          	 0x020
 #define  INIT_HCA_QPC_BASE_OFFSET    	 (INIT_HCA_QPC_OFFSET + 0x10)
 #define  INIT_HCA_LOG_QP_OFFSET      	 (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1216,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 
 	memset(inbox, 0, INIT_HCA_IN_SIZE);
 
+	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
+
 #if defined(__LITTLE_ENDIAN)
-	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+	*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
 #elif defined(__BIG_ENDIAN)
-	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+	*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
 #else
 #error Host endianness not defined
 #endif
 	/* Check port for UD address vector: */
-	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+	*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
 
 	/* We leave wqe_quota, responder_exu, etc as 0 (default) */
 
@@ -1438,11 +1546,11 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 		return ret;
 
 	/*
-	 * Arbel page size is always 4 KB; round up number of system
-	 * pages needed.
+	 * Round up number of system pages needed in case
+	 * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
 	 */
-	*aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12);
-	*aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12);
+	*aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+		(PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
 
 	return 0;
 }
@@ -1514,6 +1622,37 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 			     CMD_TIME_CLASS_A, status);
 }
 
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+		    u8 *status)
+{
+	struct mthca_mailbox *mailbox;
+	__be32 *inbox;
+	int err;
+
+#define RESIZE_CQ_IN_SIZE		0x40
+#define RESIZE_CQ_LOG_SIZE_OFFSET	0x0c
+#define RESIZE_CQ_LKEY_OFFSET		0x1c
+
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
+
+	memset(inbox, 0, RESIZE_CQ_IN_SIZE);
+	/*
+	 * Leave start address fields zeroed out -- mthca assumes that
+	 * MRs for CQs always start at virtual address 0.
+	 */
+	MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
+	MTHCA_PUT(inbox, lkey,     RESIZE_CQ_LKEY_OFFSET);
+
+	err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
+			CMD_TIME_CLASS_B, status);
+
+	mthca_free_mailbox(dev, mailbox);
+	return err;
+}
+
 int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int srq_num, u8 *status)
 {
@@ -1529,37 +1668,69 @@ int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 			     CMD_TIME_CLASS_A, status);
 }
 
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+		    struct mthca_mailbox *mailbox, u8 *status)
+{
+	return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
+			     CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status);
+}
+
 int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
 {
 	return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
 			 CMD_TIME_CLASS_B, status);
 }
 
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+		    enum ib_qp_state next, u32 num, int is_ee,
+		    struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status)
 {
-	static const u16 op[] = {
-		[MTHCA_TRANS_RST2INIT]  = CMD_RST2INIT_QPEE,
-		[MTHCA_TRANS_INIT2INIT] = CMD_INIT2INIT_QPEE,
-		[MTHCA_TRANS_INIT2RTR]  = CMD_INIT2RTR_QPEE,
-		[MTHCA_TRANS_RTR2RTS]   = CMD_RTR2RTS_QPEE,
-		[MTHCA_TRANS_RTS2RTS]   = CMD_RTS2RTS_QPEE,
-		[MTHCA_TRANS_SQERR2RTS] = CMD_SQERR2RTS_QPEE,
-		[MTHCA_TRANS_ANY2ERR]   = CMD_2ERR_QPEE,
-		[MTHCA_TRANS_RTS2SQD]   = CMD_RTS2SQD_QPEE,
-		[MTHCA_TRANS_SQD2SQD]   = CMD_SQD2SQD_QPEE,
-		[MTHCA_TRANS_SQD2RTS]   = CMD_SQD2RTS_QPEE,
-		[MTHCA_TRANS_ANY2RST]   = CMD_ERR2RST_QPEE
+	static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+		[IB_QPS_RESET] = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_INIT]	= CMD_RST2INIT_QPEE,
+		},
+		[IB_QPS_INIT]  = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_INIT]	= CMD_INIT2INIT_QPEE,
+			[IB_QPS_RTR]	= CMD_INIT2RTR_QPEE,
+		},
+		[IB_QPS_RTR]   = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_RTS]	= CMD_RTR2RTS_QPEE,
+		},
+		[IB_QPS_RTS]   = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_RTS]	= CMD_RTS2RTS_QPEE,
+			[IB_QPS_SQD]	= CMD_RTS2SQD_QPEE,
+		},
+		[IB_QPS_SQD] = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_RTS]	= CMD_SQD2RTS_QPEE,
+			[IB_QPS_SQD]	= CMD_SQD2SQD_QPEE,
+		},
+		[IB_QPS_SQE] = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+			[IB_QPS_RTS]	= CMD_SQERR2RTS_QPEE,
+		},
+		[IB_QPS_ERR] = {
+			[IB_QPS_RESET]	= CMD_ERR2RST_QPEE,
+			[IB_QPS_ERR]	= CMD_2ERR_QPEE,
+		}
 	};
+
 	u8 op_mod = 0;
 	int my_mailbox = 0;
 	int err;
 
-	if (trans < 0 || trans >= ARRAY_SIZE(op))
-		return -EINVAL;
-
-	if (trans == MTHCA_TRANS_ANY2RST) {
+	if (op[cur][next] == CMD_ERR2RST_QPEE) {
 		op_mod = 3;	/* don't write outbox, any->reset */
 
 		/* For debugging */
@@ -1571,34 +1742,35 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 			} else
 				mailbox = NULL;
 		}
-	} else {
-		if (0) {
+
+		err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+				    (!!is_ee << 24) | num, op_mod,
+				    op[cur][next], CMD_TIME_CLASS_C, status);
+
+		if (0 && mailbox) {
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk("  opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
+			printk(" %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
-					printk("  [%02x] ", i * 4);
+					printk("[%02x] ", i * 4);
 				printk(" %08x",
 				       be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
 					printk("\n");
 			}
 		}
-	}
-
-	if (trans == MTHCA_TRANS_ANY2RST) {
-		err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
-				    (!!is_ee << 24) | num, op_mod,
-				    op[trans], CMD_TIME_CLASS_C, status);
 
-		if (0 && mailbox) {
+		if (my_mailbox)
+			mthca_free_mailbox(dev, mailbox);
+	} else {
+		if (0) {
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk(" %08x\n", be32_to_cpup(mailbox->buf));
+			printk("  opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
-					printk("[%02x] ", i * 4);
+					printk("  [%02x] ", i * 4);
 				printk(" %08x",
 				       be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
@@ -1606,12 +1778,9 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 			}
 		}
 
-	} else
-		err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
-				op_mod, op[trans], CMD_TIME_CLASS_C, status);
-
-	if (my_mailbox)
-		mthca_free_mailbox(dev, mailbox);
+		err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
+				op_mod, op[cur][next], CMD_TIME_CLASS_C, status);
+	}
 
 	return err;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 18175bec84c2..e4ec35c40dd3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 Cisco Systems.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -73,9 +74,9 @@ enum {
 	MTHCA_CMD_STAT_REG_BOUND      = 0x21,
 	/* HCA local attached memory not present: */
 	MTHCA_CMD_STAT_LAM_NOT_PRE    = 0x22,
-        /* Bad management packet (silently discarded): */
+	/* Bad management packet (silently discarded): */
 	MTHCA_CMD_STAT_BAD_PKT 	      = 0x30,
-        /* More outstanding CQEs in CQ than new CQ size: */
+	/* More outstanding CQEs in CQ than new CQ size: */
 	MTHCA_CMD_STAT_BAD_SIZE       = 0x40
 };
 
@@ -298,13 +299,18 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+		    u8 *status);
 int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int srq_num, u8 *status);
 int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int srq_num, u8 *status);
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+		    struct mthca_mailbox *mailbox, u8 *status);
 int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
-int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+		    enum ib_qp_state next, u32 num, int is_ee,
+		    struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status);
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
 		   struct mthca_mailbox *mailbox, u8 *status);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 96f1a86bf049..76aabc5bf371 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
@@ -150,24 +150,29 @@ struct mthca_err_cqe {
 #define MTHCA_ARBEL_CQ_DB_REQ_NOT      (2 << 24)
 #define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
 
-static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
+						 int entry)
 {
-	if (cq->is_direct)
-		return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
+	if (buf->is_direct)
+		return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
 	else
-		return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+		return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
 			+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
 }
 
-static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
+static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+{
+	return get_cqe_from_buf(&cq->buf, entry);
+}
+
+static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
 {
-	struct mthca_cqe *cqe = get_cqe(cq, i);
 	return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
 }
 
 static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
 {
-	return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
+	return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
 }
 
 static inline void set_cqe_hw(struct mthca_cqe *cqe)
@@ -289,7 +294,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 	 * from our QP and therefore don't need to be checked.
 	 */
 	for (prod_index = cq->cons_index;
-	     cqe_sw(cq, prod_index & cq->ibcq.cqe);
+	     cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
 	     ++prod_index)
 		if (prod_index == cq->cons_index + cq->ibcq.cqe)
 			break;
@@ -324,12 +329,58 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 		wake_up(&cq->wait);
 }
 
-static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
-			    struct mthca_qp *qp, int wqe_index, int is_send,
-			    struct mthca_err_cqe *cqe,
-			    struct ib_wc *entry, int *free_cqe)
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
+{
+	int i;
+
+	/*
+	 * In Tavor mode, the hardware keeps the consumer and producer
+	 * indices mod the CQ size.  Since we might be making the CQ
+	 * bigger, we need to deal with the case where the producer
+	 * index wrapped around before the CQ was resized.
+	 */
+	if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
+	    cq->ibcq.cqe < cq->resize_buf->cqe) {
+		cq->cons_index &= cq->ibcq.cqe;
+		if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
+			cq->cons_index -= cq->ibcq.cqe + 1;
+	}
+
+	for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
+		memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+					i & cq->resize_buf->cqe),
+		       get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
+}
+
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
+{
+	int ret;
+	int i;
+
+	ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
+			      MTHCA_MAX_DIRECT_CQ_SIZE,
+			      &buf->queue, &buf->is_direct,
+			      &dev->driver_pd, 1, &buf->mr);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nent; ++i)
+		set_cqe_hw(get_cqe_from_buf(buf, i));
+
+	return 0;
+}
+
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
+{
+	mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
+		       buf->is_direct, &buf->mr);
+}
+
+static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
+			     struct mthca_qp *qp, int wqe_index, int is_send,
+			     struct mthca_err_cqe *cqe,
+			     struct ib_wc *entry, int *free_cqe)
 {
-	int err;
 	int dbd;
 	__be32 new_wqe;
 
@@ -412,11 +463,9 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	 * error case, so we don't have to check the doorbell count, etc.
 	 */
 	if (mthca_is_memfree(dev))
-		return 0;
+		return;
 
-	err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
-	if (err)
-		return err;
+	mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
 
 	/*
 	 * If we're at the end of the WQE chain, or we've used up our
@@ -424,15 +473,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	 * the next poll operation.
 	 */
 	if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
-		return 0;
+		return;
 
 	cqe->db_cnt   = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd);
 	cqe->wqe      = new_wqe;
 	cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
 
 	*free_cqe = 0;
-
-	return 0;
 }
 
 static inline int mthca_poll_one(struct mthca_dev *dev,
@@ -518,9 +565,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 	}
 
 	if (is_error) {
-		err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
-				       (struct mthca_err_cqe *) cqe,
-				       entry, &free_cqe);
+		handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
+				 (struct mthca_err_cqe *) cqe,
+				 entry, &free_cqe);
 		goto out;
 	}
 
@@ -614,11 +661,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
 
 	spin_lock_irqsave(&cq->lock, flags);
 
-	for (npolled = 0; npolled < num_entries; ++npolled) {
+	npolled = 0;
+repoll:
+	while (npolled < num_entries) {
 		err = mthca_poll_one(dev, cq, &qp,
 				     &freed, entry + npolled);
 		if (err)
 			break;
+		++npolled;
 	}
 
 	if (freed) {
@@ -626,6 +676,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
 		update_cons_index(dev, cq, freed);
 	}
 
+	/*
+	 * If a CQ resize is in progress and we discovered that the
+	 * old buffer is empty, then peek in the new buffer, and if
+	 * it's not empty, switch to the new buffer and continue
+	 * polling there.
+	 */
+	if (unlikely(err == -EAGAIN && cq->resize_buf &&
+		     cq->resize_buf->state == CQ_RESIZE_READY)) {
+		/*
+		 * In Tavor mode, the hardware keeps the producer
+		 * index modulo the CQ size.  Since we might be making
+		 * the CQ bigger, we need to mask our consumer index
+		 * using the size of the old CQ buffer before looking
+		 * in the new CQ buffer.
+		 */
+		if (!mthca_is_memfree(dev))
+			cq->cons_index &= cq->ibcq.cqe;
+
+		if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
+					    cq->cons_index & cq->resize_buf->cqe))) {
+			struct mthca_cq_buf tbuf;
+			int tcqe;
+
+			tbuf         = cq->buf;
+			tcqe         = cq->ibcq.cqe;
+			cq->buf      = cq->resize_buf->buf;
+			cq->ibcq.cqe = cq->resize_buf->cqe;
+
+			cq->resize_buf->buf   = tbuf;
+			cq->resize_buf->cqe   = tcqe;
+			cq->resize_buf->state = CQ_RESIZE_SWAPPED;
+
+			goto repoll;
+		}
+	}
+
 	spin_unlock_irqrestore(&cq->lock, flags);
 
 	return err == 0 || err == -EAGAIN ? npolled : err;
@@ -684,24 +770,14 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
 	return 0;
 }
 
-static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
-{
-	mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
-		       &cq->queue, cq->is_direct, &cq->mr);
-}
-
 int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_ucontext *ctx, u32 pdn,
 		  struct mthca_cq *cq)
 {
-	int size = nent * MTHCA_CQ_ENTRY_SIZE;
 	struct mthca_mailbox *mailbox;
 	struct mthca_cq_context *cq_context;
 	int err = -ENOMEM;
 	u8 status;
-	int i;
-
-	might_sleep();
 
 	cq->ibcq.cqe  = nent - 1;
 	cq->is_kernel = !ctx;
@@ -739,14 +815,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 	cq_context = mailbox->buf;
 
 	if (cq->is_kernel) {
-		err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE,
-				      &cq->queue, &cq->is_direct,
-				      &dev->driver_pd, 1, &cq->mr);
+		err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
 		if (err)
 			goto err_out_mailbox;
-
-		for (i = 0; i < nent; ++i)
-			set_cqe_hw(get_cqe(cq, i));
 	}
 
 	spin_lock_init(&cq->lock);
@@ -765,7 +836,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 	cq_context->error_eqn       = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
 	cq_context->comp_eqn        = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
 	cq_context->pd              = cpu_to_be32(pdn);
-	cq_context->lkey            = cpu_to_be32(cq->mr.ibmr.lkey);
+	cq_context->lkey            = cpu_to_be32(cq->buf.mr.ibmr.lkey);
 	cq_context->cqn             = cpu_to_be32(cq->cqn);
 
 	if (mthca_is_memfree(dev)) {
@@ -803,7 +874,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 
 err_out_free_mr:
 	if (cq->is_kernel)
-		mthca_free_cq_buf(dev, cq);
+		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
 
 err_out_mailbox:
 	mthca_free_mailbox(dev, mailbox);
@@ -832,8 +903,6 @@ void mthca_free_cq(struct mthca_dev *dev,
 	int err;
 	u8 status;
 
-	might_sleep();
-
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox)) {
 		mthca_warn(dev, "No memory for mailbox to free CQ.\n");
@@ -871,7 +940,7 @@ void mthca_free_cq(struct mthca_dev *dev,
 	wait_event(cq->wait, !atomic_read(&cq->refcount));
 
 	if (cq->is_kernel) {
-		mthca_free_cq_buf(dev, cq);
+		mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
 		if (mthca_is_memfree(dev)) {
 			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
 			mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e481037288d6..ad52edbefe98 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
@@ -53,8 +53,8 @@
 
 #define DRV_NAME	"ib_mthca"
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"0.07"
-#define DRV_RELDATE	"February 13, 2006"
+#define DRV_VERSION	"0.08"
+#define DRV_RELDATE	"February 14, 2006"
 
 enum {
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -64,7 +64,8 @@ enum {
 	MTHCA_FLAG_NO_LAM     = 1 << 5,
 	MTHCA_FLAG_FMR        = 1 << 6,
 	MTHCA_FLAG_MEMFREE    = 1 << 7,
-	MTHCA_FLAG_PCIE       = 1 << 8
+	MTHCA_FLAG_PCIE       = 1 << 8,
+	MTHCA_FLAG_SINAI_OPT  = 1 << 9
 };
 
 enum {
@@ -110,9 +111,17 @@ enum {
 	MTHCA_OPCODE_INVALID        = 0xff
 };
 
+enum {
+	MTHCA_CMD_USE_EVENTS         = 1 << 0,
+	MTHCA_CMD_POST_DOORBELLS     = 1 << 1
+};
+
+enum {
+	MTHCA_CMD_NUM_DBELL_DWORDS = 8
+};
+
 struct mthca_cmd {
 	struct pci_pool          *pool;
-	int                       use_events;
 	struct mutex              hcr_mutex;
 	struct semaphore 	  poll_sem;
 	struct semaphore 	  event_sem;
@@ -121,6 +130,9 @@ struct mthca_cmd {
 	int                       free_head;
 	struct mthca_cmd_context *context;
 	u16                       token_mask;
+	u32                       flags;
+	void __iomem             *dbell_map;
+	u16                       dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
 };
 
 struct mthca_limits {
@@ -470,12 +482,16 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		    enum ib_event_type event_type);
 void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 		    struct mthca_srq *srq);
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
 
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     enum ib_srq_attr_mask attr_mask);
+int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
@@ -486,6 +502,8 @@ int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
 
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		   struct ib_qp_init_attr *qp_init_attr);
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
@@ -495,8 +513,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			  struct ib_send_wr **bad_wr);
 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			     struct ib_recv_wr **bad_wr);
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
-		       int index, int *dbd, __be32 *new_wqe);
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+			int index, int *dbd, __be32 *new_wqe);
 int mthca_alloc_qp(struct mthca_dev *dev,
 		   struct mthca_pd *pd,
 		   struct mthca_cq *send_cq,
@@ -522,6 +540,7 @@ int mthca_create_ah(struct mthca_dev *dev,
 int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
 int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 		  struct ib_ud_header *header);
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
 int mthca_ah_grh_present(struct mthca_ah *ah);
 
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 2eabb27804cd..cbdc348fb689 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -497,7 +497,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 
 	eq->dev  = dev;
 	eq->nent = roundup_pow_of_two(max(nent, 2));
- 	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
+	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
@@ -825,7 +825,7 @@ void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev)
 {
 	u8 status;
 
-	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status);
+	mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
 	pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
 		       PCI_DMA_BIDIRECTIONAL);
 	__free_page(dev->eq_table.icm_page);
@@ -928,7 +928,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev)
 		mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
 			   dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
 
-	for (i = 0; i < MTHCA_EQ_CMD; ++i)
+	for (i = 0; i < MTHCA_NUM_EQ; ++i)
 		if (mthca_is_memfree(dev))
 			arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
 		else
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 1229c604c6e0..4ace6a392f41 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -109,6 +109,19 @@ static void smp_snoop(struct ib_device *ibdev,
 	}
 }
 
+static void node_desc_override(struct ib_device *dev,
+			       struct ib_mad *mad)
+{
+	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+	     mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+	    mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+	    mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+		mutex_lock(&to_mdev(dev)->cap_mask_mutex);
+		memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+		mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
+	}
+}
+
 static void forward_trap(struct mthca_dev *dev,
 			 u8 port_num,
 			 struct ib_mad *mad)
@@ -207,8 +220,10 @@ int mthca_process_mad(struct ib_device *ibdev,
 		return IB_MAD_RESULT_FAILURE;
 	}
 
-	if (!out_mad->mad_hdr.status)
+	if (!out_mad->mad_hdr.status) {
 		smp_snoop(ibdev, port_num, in_mad);
+		node_desc_override(ibdev, out_mad);
+	}
 
 	/* set return bit in status of directed route responses */
 	if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 9c849d27b06e..266f347c6707 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -935,13 +935,19 @@ enum {
 
 static struct {
 	u64 latest_fw;
-	int is_memfree;
-	int is_pcie;
+	u32 flags;
 } mthca_hca_table[] = {
-	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 },
-	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 },
-	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 },
-	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
+	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
+			   .flags     = 0 },
+	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 400),
+			   .flags     = MTHCA_FLAG_PCIE },
+	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
+			   .flags     = MTHCA_FLAG_MEMFREE |
+					MTHCA_FLAG_PCIE },
+	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 0, 800),
+			   .flags     = MTHCA_FLAG_MEMFREE |
+					MTHCA_FLAG_PCIE    |
+					MTHCA_FLAG_SINAI_OPT }
 };
 
 static int __devinit mthca_init_one(struct pci_dev *pdev,
@@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 
 	mdev->pdev = pdev;
 
+	mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
-	if (mthca_hca_table[id->driver_data].is_memfree)
-		mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
-	if (mthca_hca_table[id->driver_data].is_pcie)
-		mdev->mthca_flags |= MTHCA_FLAG_PCIE;
 
 	/*
 	 * Now reset the HCA before we touch the PCI capabilities or
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 321f11e707f2..9965bda9afed 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -187,7 +187,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 	for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
 		if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
-			mthca_dbg(dev, "QP %06x already a member of MGM\n", 
+			mthca_dbg(dev, "QP %06x already a member of MGM\n",
 				  ibqp->qp_num);
 			err = 0;
 			goto out;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index d709cb162a72..15cc2f6eb475 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -202,7 +202,8 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
 
 	if (--table->icm[i]->refcount == 0) {
 		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
-				MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+				MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+				&status);
 		mthca_free_icm(dev, table->icm[i]);
 		table->icm[i] = NULL;
 	}
@@ -336,7 +337,8 @@ err:
 	for (i = 0; i < num_icm; ++i)
 		if (table->icm[i]) {
 			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
-					MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+				        &status);
 			mthca_free_icm(dev, table->icm[i]);
 		}
 
@@ -353,7 +355,8 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
 	for (i = 0; i < table->num_icm; ++i)
 		if (table->icm[i]) {
 			mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
-					MTHCA_TABLE_CHUNK_SIZE >> 12, &status);
+					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+					&status);
 			mthca_free_icm(dev, table->icm[i]);
 		}
 
@@ -364,7 +367,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
 {
 	return dev->uar_table.uarc_base +
 		uar->index * dev->uar_table.uarc_size +
-		page * 4096;
+		page * MTHCA_ICM_PAGE_SIZE;
 }
 
 int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
@@ -401,7 +404,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 	if (ret < 0)
 		goto out;
 
-	db_tab->page[i].mem.length = 4096;
+	db_tab->page[i].mem.length = MTHCA_ICM_PAGE_SIZE;
 	db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
 
 	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -455,7 +458,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
 	if (!mthca_is_memfree(dev))
 		return NULL;
 
-	npages = dev->uar_table.uarc_size / 4096;
+	npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
 	db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
 	if (!db_tab)
 		return ERR_PTR(-ENOMEM);
@@ -478,7 +481,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 	if (!mthca_is_memfree(dev))
 		return;
 
-	for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+	for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
 		if (db_tab->page[i].uvirt) {
 			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
 			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
@@ -551,20 +554,20 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
 	page = dev->db_tab->page + end;
 
 alloc:
-	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096,
+	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 					  &page->mapping, GFP_KERNEL);
 	if (!page->db_rec) {
 		ret = -ENOMEM;
 		goto out;
 	}
-	memset(page->db_rec, 0, 4096);
+	memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
 
 	ret = mthca_MAP_ICM_page(dev, page->mapping,
 				 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
 	if (!ret && status)
 		ret = -EINVAL;
 	if (ret) {
-		dma_free_coherent(&dev->pdev->dev, 4096,
+		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  page->db_rec, page->mapping);
 		goto out;
 	}
@@ -612,7 +615,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
 	    i >= dev->db_tab->max_group1 - 1) {
 		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
 
-		dma_free_coherent(&dev->pdev->dev, 4096,
+		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  page->db_rec, page->mapping);
 		page->db_rec = NULL;
 
@@ -640,7 +643,7 @@ int mthca_init_db_tab(struct mthca_dev *dev)
 
 	mutex_init(&dev->db_tab->mutex);
 
-	dev->db_tab->npages     = dev->uar_table.uarc_size / 4096;
+	dev->db_tab->npages     = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
 	dev->db_tab->max_group1 = 0;
 	dev->db_tab->min_group2 = dev->db_tab->npages - 1;
 
@@ -681,7 +684,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
 
 		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
 
-		dma_free_coherent(&dev->pdev->dev, 4096,
+		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
 				  dev->db_tab->page[i].db_rec,
 				  dev->db_tab->page[i].mapping);
 	}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 36f1141a08aa..6d42947e1dc4 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -45,6 +45,12 @@
 	((256 - sizeof (struct list_head) - 2 * sizeof (int)) /		\
 	 (sizeof (struct scatterlist)))
 
+enum {
+	MTHCA_ICM_PAGE_SHIFT	= 12,
+	MTHCA_ICM_PAGE_SIZE	= 1 << MTHCA_ICM_PAGE_SHIFT,
+	MTHCA_DB_REC_PER_PAGE	= MTHCA_ICM_PAGE_SIZE / 8
+};
+
 struct mthca_icm_chunk {
 	struct list_head   list;
 	int                npages;
@@ -131,10 +137,6 @@ static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
 	return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
 }
 
-enum {
-	MTHCA_DB_REC_PER_PAGE = 4096 / 8
-};
-
 struct mthca_db_page {
 	DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
 	__be64    *db_rec;
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index e995e2aa016d..698b62125765 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
 #define MTHCA_MPT_STATUS_SW 0xF0
 #define MTHCA_MPT_STATUS_HW 0x00
 
+#define SINAI_FMR_KEY_INC 0x1000000
+
 /*
  * Buddy allocator for MTT segments (currently not very efficient
  * since it doesn't keep a free list and just searches linearly
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 		return tavor_key_to_hw_index(key);
 }
 
+static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
+{
+	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		return ((key << 20) & 0x800000) | (key & 0x7fffff);
+	else
+		return key;
+}
+
 int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 {
@@ -340,13 +350,12 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 	int err;
 	u8 status;
 
-	might_sleep();
-
 	WARN_ON(buffer_size_shift >= 32);
 
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
 	if (key == -1)
 		return -ENOMEM;
+	key = adjust_key(dev, key);
 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 
 	if (mthca_is_memfree(dev)) {
@@ -467,8 +476,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 	int err;
 	u8 status;
 
-	might_sleep();
-
 	err = mthca_HW2SW_MPT(dev, NULL,
 			      key_to_hw_index(dev, mr->ibmr.lkey) &
 			      (dev->limits.num_mpts - 1),
@@ -495,9 +502,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 	int err = -ENOMEM;
 	int i;
 
-	might_sleep();
-
-	if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
+	if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
 		return -EINVAL;
 
 	/* For Arbel, all MTTs must fit in the same page. */
@@ -510,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
 	if (key == -1)
 		return -ENOMEM;
+	key = adjust_key(dev, key);
 
 	idx = key & (dev->limits.num_mpts - 1);
 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -523,7 +529,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		BUG_ON(!mr->mem.arbel.mpt);
 	} else
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
-		       	sizeof *(mr->mem.tavor.mpt) * idx;
+			sizeof *(mr->mem.tavor.mpt) * idx;
 
 	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
 	if (IS_ERR(mr->mtt))
@@ -549,7 +555,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
 
-	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
+	mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	memset(&mpt_entry->start, 0,
@@ -617,7 +623,7 @@ static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
 	if (list_len > fmr->attr.max_pages)
 		return -EINVAL;
 
-	page_mask = (1 << fmr->attr.page_size) - 1;
+	page_mask = (1 << fmr->attr.page_shift) - 1;
 
 	/* We are getting page lists, so va must be page aligned. */
 	if (iova & page_mask)
@@ -665,7 +671,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 	}
 
 	mpt_entry.lkey   = cpu_to_be32(key);
-	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 	mpt_entry.start  = cpu_to_be64(iova);
 
 	__raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
@@ -693,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 	++fmr->maps;
 
 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
-	key += dev->limits.num_mpts;
+	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		key += SINAI_FMR_KEY_INC;
+	else
+		key += dev->limits.num_mpts;
 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
 
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
@@ -706,7 +715,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 
 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
-	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
+	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
 
 	wmb();
@@ -766,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 	else
 		dev->mthca_flags |= MTHCA_FLAG_FMR;
 
+	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+		mthca_dbg(dev, "Memory key throughput optimization activated.\n");
+
 	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
 			       fls(dev->limits.num_mtt_segs - 1));
 
@@ -785,7 +797,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 		}
 
 		dev->mr_table.tavor_fmr.mpt_base =
-		       	ioremap(dev->mr_table.mpt_base,
+			ioremap(dev->mr_table.mpt_base,
 				(1 << i) * sizeof (struct mthca_mpt_entry));
 
 		if (!dev->mr_table.tavor_fmr.mpt_base) {
@@ -813,7 +825,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 			goto err_reserve_fmr;
 
 		dev->mr_table.fmr_mtt_buddy =
-		       	&dev->mr_table.tavor_fmr.mtt_buddy;
+			&dev->mr_table.tavor_fmr.mtt_buddy;
 	} else
 		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
index 3dbf06a6e6f4..105fc5faaddf 100644
--- a/drivers/infiniband/hw/mthca/mthca_pd.c
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -43,8 +43,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
 {
 	int err = 0;
 
-	might_sleep();
-
 	pd->privileged = privileged;
 
 	atomic_set(&pd->sqp_count, 0);
@@ -66,7 +64,6 @@ int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
 
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
 {
-	might_sleep();
 	if (pd->privileged)
 		mthca_free_mr(dev, &pd->ntmr);
 	mthca_free(&dev->pd_table.alloc, pd->pd_num);
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 08a909371b0a..58d44aa3c302 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
 		}
 		if (total_size > mem_avail) {
 			mthca_err(dev, "Profile requires 0x%llx bytes; "
-				  "won't in 0x%llx bytes of context memory.\n",
+				  "won't fit in 0x%llx bytes of context memory.\n",
 				  (unsigned long long) total_size,
 				  (unsigned long long) mem_avail);
 			kfree(profile);
@@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
 	 */
 	dev->limits.num_pds = MTHCA_NUM_PDS;
 
+	if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
+	    init_hca->log_mpt_sz > 23) {
+		mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
+			   init_hca->log_mpt_sz);
+		mthca_warn(dev, "Disabling memory key throughput optimization.\n");
+		dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
+	}
+
 	/*
 	 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
 	 * systems it may use too much vmalloc space to map all MTT
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index e88e39aef85a..2c250bc11c33 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
@@ -108,12 +108,12 @@ static int mthca_query_device(struct ib_device *ibdev,
 	props->max_srq_wr          = mdev->limits.max_srq_wqes;
 	props->max_srq_sge         = mdev->limits.max_sg;
 	props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
-	props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 
+	props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
 					IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 	props->max_pkeys           = mdev->limits.pkey_table_len;
 	props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
 	props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 
+	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
 
 	err = 0;
@@ -176,6 +176,23 @@ static int mthca_query_port(struct ib_device *ibdev,
 	return err;
 }
 
+static int mthca_modify_device(struct ib_device *ibdev,
+			       int mask,
+			       struct ib_device_modify *props)
+{
+	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+		return -EOPNOTSUPP;
+
+	if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+		if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+			return -ERESTARTSYS;
+		memcpy(ibdev->node_desc, props->node_desc, 64);
+		mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+	}
+
+	return 0;
+}
+
 static int mthca_modify_port(struct ib_device *ibdev,
 			     u8 port, int port_modify_mask,
 			     struct ib_port_modify *props)
@@ -669,9 +686,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 	}
 
 	if (context) {
-		cq->mr.ibmr.lkey    = ucmd.lkey;
-		cq->set_ci_db_index = ucmd.set_db_index;
-		cq->arm_db_index    = ucmd.arm_db_index;
+		cq->buf.mr.ibmr.lkey = ucmd.lkey;
+		cq->set_ci_db_index  = ucmd.set_db_index;
+		cq->arm_db_index     = ucmd.arm_db_index;
 	}
 
 	for (nent = 1; nent <= entries; nent <<= 1)
@@ -689,6 +706,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 		goto err_free;
 	}
 
+	cq->resize_buf = NULL;
+
 	return &cq->ibcq;
 
 err_free:
@@ -707,6 +726,121 @@ err_unmap_set:
 	return ERR_PTR(err);
 }
 
+static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
+				  int entries)
+{
+	int ret;
+
+	spin_lock_irq(&cq->lock);
+	if (cq->resize_buf) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
+	cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+	if (!cq->resize_buf) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	cq->resize_buf->state = CQ_RESIZE_ALLOC;
+
+	ret = 0;
+
+unlock:
+	spin_unlock_irq(&cq->lock);
+
+	if (ret)
+		return ret;
+
+	ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+	if (ret) {
+		spin_lock_irq(&cq->lock);
+		kfree(cq->resize_buf);
+		cq->resize_buf = NULL;
+		spin_unlock_irq(&cq->lock);
+		return ret;
+	}
+
+	cq->resize_buf->cqe = entries - 1;
+
+	spin_lock_irq(&cq->lock);
+	cq->resize_buf->state = CQ_RESIZE_READY;
+	spin_unlock_irq(&cq->lock);
+
+	return 0;
+}
+
+static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+	struct mthca_dev *dev = to_mdev(ibcq->device);
+	struct mthca_cq *cq = to_mcq(ibcq);
+	struct mthca_resize_cq ucmd;
+	u32 lkey;
+	u8 status;
+	int ret;
+
+	if (entries < 1 || entries > dev->limits.max_cqes)
+		return -EINVAL;
+
+	entries = roundup_pow_of_two(entries + 1);
+	if (entries == ibcq->cqe + 1)
+		return 0;
+
+	if (cq->is_kernel) {
+		ret = mthca_alloc_resize_buf(dev, cq, entries);
+		if (ret)
+			return ret;
+		lkey = cq->resize_buf->buf.mr.ibmr.lkey;
+	} else {
+		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+			return -EFAULT;
+		lkey = ucmd.lkey;
+	}
+
+	ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
+	if (status)
+		ret = -EINVAL;
+
+	if (ret) {
+		if (cq->resize_buf) {
+			mthca_free_cq_buf(dev, &cq->resize_buf->buf,
+					  cq->resize_buf->cqe);
+			kfree(cq->resize_buf);
+			spin_lock_irq(&cq->lock);
+			cq->resize_buf = NULL;
+			spin_unlock_irq(&cq->lock);
+		}
+		return ret;
+	}
+
+	if (cq->is_kernel) {
+		struct mthca_cq_buf tbuf;
+		int tcqe;
+
+		spin_lock_irq(&cq->lock);
+		if (cq->resize_buf->state == CQ_RESIZE_READY) {
+			mthca_cq_resize_copy_cqes(cq);
+			tbuf         = cq->buf;
+			tcqe         = cq->ibcq.cqe;
+			cq->buf      = cq->resize_buf->buf;
+			cq->ibcq.cqe = cq->resize_buf->cqe;
+		} else {
+			tbuf = cq->resize_buf->buf;
+			tcqe = cq->resize_buf->cqe;
+		}
+
+		kfree(cq->resize_buf);
+		cq->resize_buf = NULL;
+		spin_unlock_irq(&cq->lock);
+
+		mthca_free_cq_buf(dev, &tbuf, tcqe);
+	} else
+		ibcq->cqe = entries - 1;
+
+	return 0;
+}
+
 static int mthca_destroy_cq(struct ib_cq *cq)
 {
 	if (cq->uobject) {
@@ -1070,6 +1204,20 @@ static int mthca_init_node_data(struct mthca_dev *dev)
 		goto out;
 
 	init_query_mad(in_mad);
+	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+	err = mthca_MAD_IFC(dev, 1, 1,
+			    1, NULL, NULL, in_mad, out_mad,
+			    &status);
+	if (err)
+		goto out;
+	if (status) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
 	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 
 	err = mthca_MAD_IFC(dev, 1, 1,
@@ -1113,14 +1261,17 @@ int mthca_register_device(struct mthca_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ib_dev.node_type            = IB_NODE_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
@@ -1128,6 +1279,7 @@ int mthca_register_device(struct mthca_dev *dev)
 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
 	dev->ib_dev.query_device         = mthca_query_device;
 	dev->ib_dev.query_port           = mthca_query_port;
+	dev->ib_dev.modify_device        = mthca_modify_device;
 	dev->ib_dev.modify_port          = mthca_modify_port;
 	dev->ib_dev.query_pkey           = mthca_query_pkey;
 	dev->ib_dev.query_gid            = mthca_query_gid;
@@ -1137,11 +1289,13 @@ int mthca_register_device(struct mthca_dev *dev)
 	dev->ib_dev.alloc_pd             = mthca_alloc_pd;
 	dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
 	dev->ib_dev.create_ah            = mthca_ah_create;
+	dev->ib_dev.query_ah             = mthca_ah_query;
 	dev->ib_dev.destroy_ah           = mthca_ah_destroy;
 
 	if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
 		dev->ib_dev.create_srq           = mthca_create_srq;
-		dev->ib_dev.modify_srq		 = mthca_modify_srq;
+		dev->ib_dev.modify_srq           = mthca_modify_srq;
+		dev->ib_dev.query_srq            = mthca_query_srq;
 		dev->ib_dev.destroy_srq          = mthca_destroy_srq;
 
 		if (mthca_is_memfree(dev))
@@ -1152,8 +1306,10 @@ int mthca_register_device(struct mthca_dev *dev)
 
 	dev->ib_dev.create_qp            = mthca_create_qp;
 	dev->ib_dev.modify_qp            = mthca_modify_qp;
+	dev->ib_dev.query_qp             = mthca_query_qp;
 	dev->ib_dev.destroy_qp           = mthca_destroy_qp;
 	dev->ib_dev.create_cq            = mthca_create_cq;
+	dev->ib_dev.resize_cq            = mthca_resize_cq;
 	dev->ib_dev.destroy_cq           = mthca_destroy_cq;
 	dev->ib_dev.poll_cq              = mthca_poll_cq;
 	dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1e73947b4702..2e7f52136965 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ struct mthca_ah {
  * - wait_event until ref count is zero
  *
  * It is the consumer's responsibilty to make sure that no QP
- * operations (WQE posting or state modification) are pending when the
+ * operations (WQE posting or state modification) are pending when a
  * QP is destroyed.  Also, the consumer must make sure that calls to
- * qp_modify are serialized.
+ * qp_modify are serialized.  Similarly, the consumer is responsible
+ * for ensuring that no CQ resize operations are pending when a CQ
+ * is destroyed.
  *
  * Possible optimizations (wait for profile data to see if/where we
  * have locks bouncing between CPUs):
@@ -176,25 +178,40 @@ struct mthca_ah {
  *   send queue and one for the receive queue)
  */
 
+struct mthca_cq_buf {
+	union mthca_buf		queue;
+	struct mthca_mr		mr;
+	int			is_direct;
+};
+
+struct mthca_cq_resize {
+	struct mthca_cq_buf	buf;
+	int			cqe;
+	enum {
+		CQ_RESIZE_ALLOC,
+		CQ_RESIZE_READY,
+		CQ_RESIZE_SWAPPED
+	}			state;
+};
+
 struct mthca_cq {
-	struct ib_cq           ibcq;
-	spinlock_t             lock;
-	atomic_t               refcount;
-	int                    cqn;
-	u32                    cons_index;
-	int                    is_direct;
-	int                    is_kernel;
+	struct ib_cq		ibcq;
+	spinlock_t		lock;
+	atomic_t		refcount;
+	int			cqn;
+	u32			cons_index;
+	struct mthca_cq_buf	buf;
+	struct mthca_cq_resize *resize_buf;
+	int			is_kernel;
 
 	/* Next fields are Arbel only */
-	int                    set_ci_db_index;
-	__be32                *set_ci_db;
-	int                    arm_db_index;
-	__be32                *arm_db;
-	int                    arm_sn;
+	int			set_ci_db_index;
+	__be32		       *set_ci_db;
+	int			arm_db_index;
+	__be32		       *arm_db;
+	int			arm_sn;
 
-	union mthca_buf        queue;
-	struct mthca_mr        mr;
-	wait_queue_head_t      wait;
+	wait_queue_head_t	wait;
 };
 
 struct mthca_srq {
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index fba608ed7df2..f673c461e30b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -2,7 +2,7 @@
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems. All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2004 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -286,207 +286,6 @@ static int to_mthca_st(int transport)
 	}
 }
 
-static const struct {
-	int trans;
-	u32 req_param[NUM_TRANS];
-	u32 opt_param[NUM_TRANS];
-} state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_INIT]  = {
-			.trans = MTHCA_TRANS_RST2INIT,
-			.req_param = {
-				[UD]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_ACCESS_FLAGS),
-				[RC]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_ACCESS_FLAGS),
-				[MLX] = (IB_QP_PKEY_INDEX |
-					 IB_QP_QKEY),
-			},
-			/* bug-for-bug compatibility with VAPI: */
-			.opt_param = {
-				[MLX] = IB_QP_PORT
-			}
-		},
-	},
-	[IB_QPS_INIT]  = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_INIT]  = {
-			.trans = MTHCA_TRANS_INIT2INIT,
-			.opt_param = {
-				[UD]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_ACCESS_FLAGS),
-				[RC]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_PORT       |
-					 IB_QP_ACCESS_FLAGS),
-				[MLX] = (IB_QP_PKEY_INDEX |
-					 IB_QP_QKEY),
-			}
-		},
-		[IB_QPS_RTR]   = {
-			.trans = MTHCA_TRANS_INIT2RTR,
-			.req_param = {
-				[UC]  = (IB_QP_AV                  |
-					 IB_QP_PATH_MTU            |
-					 IB_QP_DEST_QPN            |
-					 IB_QP_RQ_PSN),
-				[RC]  = (IB_QP_AV                  |
-					 IB_QP_PATH_MTU            |
-					 IB_QP_DEST_QPN            |
-					 IB_QP_RQ_PSN              |
-					 IB_QP_MAX_DEST_RD_ATOMIC  |
-					 IB_QP_MIN_RNR_TIMER),
-			},
-			.opt_param = {
-				[UD]  = (IB_QP_PKEY_INDEX |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_ALT_PATH     |
-					 IB_QP_ACCESS_FLAGS |
-					 IB_QP_PKEY_INDEX),
-				[RC]  = (IB_QP_ALT_PATH     |
-					 IB_QP_ACCESS_FLAGS |
-					 IB_QP_PKEY_INDEX),
-				[MLX] = (IB_QP_PKEY_INDEX |
-					 IB_QP_QKEY),
-			}
-		}
-	},
-	[IB_QPS_RTR]   = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_RTS]   = {
-			.trans = MTHCA_TRANS_RTR2RTS,
-			.req_param = {
-				[UD]  = IB_QP_SQ_PSN,
-				[UC]  = IB_QP_SQ_PSN,
-				[RC]  = (IB_QP_TIMEOUT           |
-					 IB_QP_RETRY_CNT         |
-					 IB_QP_RNR_RETRY         |
-					 IB_QP_SQ_PSN            |
-					 IB_QP_MAX_QP_RD_ATOMIC),
-				[MLX] = IB_QP_SQ_PSN,
-			},
-			.opt_param = {
-				[UD]  = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PATH_MIG_STATE),
-				[RC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_MIN_RNR_TIMER         |
-					 IB_QP_PATH_MIG_STATE),
-				[MLX] = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-			}
-		}
-	},
-	[IB_QPS_RTS]   = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_RTS]   = {
-			.trans = MTHCA_TRANS_RTS2RTS,
-			.opt_param = {
-				[UD]  = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_ACCESS_FLAGS          |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_PATH_MIG_STATE),
-				[RC]  = (IB_QP_ACCESS_FLAGS          |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_PATH_MIG_STATE        |
-					 IB_QP_MIN_RNR_TIMER),
-				[MLX] = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-			}
-		},
-		[IB_QPS_SQD]   = {
-			.trans = MTHCA_TRANS_RTS2SQD,
-		},
-	},
-	[IB_QPS_SQD]   = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_RTS]   = {
-			.trans = MTHCA_TRANS_SQD2RTS,
-			.opt_param = {
-				[UD]  = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PATH_MIG_STATE),
-				[RC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_MIN_RNR_TIMER         |
-					 IB_QP_PATH_MIG_STATE),
-				[MLX] = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-			}
-		},
-		[IB_QPS_SQD]   = {
-			.trans = MTHCA_TRANS_SQD2SQD,
-			.opt_param = {
-				[UD]  = (IB_QP_PKEY_INDEX            |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_AV                    |
-					 IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PKEY_INDEX            |
-					 IB_QP_PATH_MIG_STATE),
-				[RC]  = (IB_QP_AV                    |
-					 IB_QP_TIMEOUT               |
-					 IB_QP_RETRY_CNT             |
-					 IB_QP_RNR_RETRY             |
-					 IB_QP_MAX_QP_RD_ATOMIC      |
-					 IB_QP_MAX_DEST_RD_ATOMIC    |
-					 IB_QP_CUR_STATE             |
-					 IB_QP_ALT_PATH              |
-					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PKEY_INDEX            |
-					 IB_QP_MIN_RNR_TIMER         |
-					 IB_QP_PATH_MIG_STATE),
-				[MLX] = (IB_QP_PKEY_INDEX            |
-					 IB_QP_QKEY),
-			}
-		}
-	},
-	[IB_QPS_SQE]   = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR },
-		[IB_QPS_RTS]   = {
-			.trans = MTHCA_TRANS_SQERR2RTS,
-			.opt_param = {
-				[UD]  = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-				[UC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_ACCESS_FLAGS),
-				[MLX] = (IB_QP_CUR_STATE             |
-					 IB_QP_QKEY),
-			}
-		}
-	},
-	[IB_QPS_ERR] = {
-		[IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST },
-		[IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }
-	}
-};
-
 static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr,
 			int attr_mask)
 {
@@ -549,6 +348,141 @@ static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
 	return cpu_to_be32(hw_access_flags);
 }
 
+static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
+{
+	switch (mthca_state) {
+	case MTHCA_QP_STATE_RST:      return IB_QPS_RESET;
+	case MTHCA_QP_STATE_INIT:     return IB_QPS_INIT;
+	case MTHCA_QP_STATE_RTR:      return IB_QPS_RTR;
+	case MTHCA_QP_STATE_RTS:      return IB_QPS_RTS;
+	case MTHCA_QP_STATE_DRAINING:
+	case MTHCA_QP_STATE_SQD:      return IB_QPS_SQD;
+	case MTHCA_QP_STATE_SQE:      return IB_QPS_SQE;
+	case MTHCA_QP_STATE_ERR:      return IB_QPS_ERR;
+	default:                      return -1;
+	}
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
+{
+	switch (mthca_mig_state) {
+	case 0:  return IB_MIG_ARMED;
+	case 1:  return IB_MIG_REARM;
+	case 3:  return IB_MIG_MIGRATED;
+	default: return -1;
+	}
+}
+
+static int to_ib_qp_access_flags(int mthca_flags)
+{
+	int ib_flags = 0;
+
+	if (mthca_flags & MTHCA_QP_BIT_RRE)
+		ib_flags |= IB_ACCESS_REMOTE_READ;
+	if (mthca_flags & MTHCA_QP_BIT_RWE)
+		ib_flags |= IB_ACCESS_REMOTE_WRITE;
+	if (mthca_flags & MTHCA_QP_BIT_RAE)
+		ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+	return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
+				struct mthca_qp_path *path)
+{
+	memset(ib_ah_attr, 0, sizeof *path);
+	ib_ah_attr->port_num 	  = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+	ib_ah_attr->dlid     	  = be16_to_cpu(path->rlid);
+	ib_ah_attr->sl       	  = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
+	ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
+	ib_ah_attr->static_rate   = path->static_rate & 0x7;
+	ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+	if (ib_ah_attr->ah_flags) {
+		ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
+		ib_ah_attr->grh.hop_limit  = path->hop_limit;
+		ib_ah_attr->grh.traffic_class =
+			(be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
+		ib_ah_attr->grh.flow_label =
+			be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
+		memcpy(ib_ah_attr->grh.dgid.raw,
+			path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+	}
+}
+
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		   struct ib_qp_init_attr *qp_init_attr)
+{
+	struct mthca_dev *dev = to_mdev(ibqp->device);
+	struct mthca_qp *qp = to_mqp(ibqp);
+	int err;
+	struct mthca_mailbox *mailbox;
+	struct mthca_qp_param *qp_param;
+	struct mthca_qp_context *context;
+	int mthca_state;
+	u8 status;
+
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
+	if (err)
+		goto out;
+	if (status) {
+		mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
+		err = -EINVAL;
+		goto out;
+	}
+
+	qp_param    = mailbox->buf;
+	context     = &qp_param->context;
+	mthca_state = be32_to_cpu(context->flags) >> 28;
+
+	qp_attr->qp_state 	     = to_ib_qp_state(mthca_state);
+	qp_attr->cur_qp_state 	     = qp_attr->qp_state;
+	qp_attr->path_mtu 	     = context->mtu_msgmax >> 5;
+	qp_attr->path_mig_state      =
+		to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+	qp_attr->qkey 		     = be32_to_cpu(context->qkey);
+	qp_attr->rq_psn 	     = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+	qp_attr->sq_psn 	     = be32_to_cpu(context->next_send_psn) & 0xffffff;
+	qp_attr->dest_qp_num 	     = be32_to_cpu(context->remote_qpn) & 0xffffff;
+	qp_attr->qp_access_flags     =
+		to_ib_qp_access_flags(be32_to_cpu(context->params2));
+	qp_attr->cap.max_send_wr     = qp->sq.max;
+	qp_attr->cap.max_recv_wr     = qp->rq.max;
+	qp_attr->cap.max_send_sge    = qp->sq.max_gs;
+	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+	to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+	to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+
+	qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+	qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+
+	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+	qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
+
+	qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+	qp_attr->max_dest_rd_atomic =
+		1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+	qp_attr->min_rnr_timer 	    =
+		(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+	qp_attr->port_num 	    = qp_attr->ah_attr.port_num;
+	qp_attr->timeout 	    = context->pri_path.ackto >> 3;
+	qp_attr->retry_cnt 	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
+	qp_attr->rnr_retry 	    = context->pri_path.rnr_retry >> 5;
+	qp_attr->alt_port_num 	    = qp_attr->alt_ah_attr.port_num;
+	qp_attr->alt_timeout 	    = context->alt_path.ackto >> 3;
+	qp_init_attr->cap 	    = qp_attr->cap;
+
+out:
+	mthca_free_mailbox(dev, mailbox);
+	return err;
+}
+
 static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
 {
 	path->g_mylmc     = ah->src_path_bits & 0x7f;
@@ -559,9 +493,9 @@ static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
 		path->g_mylmc   |= 1 << 7;
 		path->mgid_index = ah->grh.sgid_index;
 		path->hop_limit  = ah->grh.hop_limit;
-		path->sl_tclass_flowlabel = 
+		path->sl_tclass_flowlabel =
 			cpu_to_be32((ah->sl << 28)                |
-				    (ah->grh.traffic_class << 20) | 
+				    (ah->grh.traffic_class << 20) |
 				    (ah->grh.flow_label));
 		memcpy(path->rgid, ah->grh.dgid.raw, 16);
 	} else
@@ -576,18 +510,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	struct mthca_mailbox *mailbox;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *qp_context;
-	u32 req_param, opt_param;
+	u32 sqd_event = 0;
 	u8 status;
 	int err;
 
 	if (attr_mask & IB_QP_CUR_STATE) {
-		if (attr->cur_qp_state != IB_QPS_RTR &&
-		    attr->cur_qp_state != IB_QPS_RTS &&
-		    attr->cur_qp_state != IB_QPS_SQD &&
-		    attr->cur_qp_state != IB_QPS_SQE)
-			return -EINVAL;
-		else
-			cur_state = attr->cur_qp_state;
+		cur_state = attr->cur_qp_state;
 	} else {
 		spin_lock_irq(&qp->sq.lock);
 		spin_lock(&qp->rq.lock);
@@ -596,44 +524,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		spin_unlock_irq(&qp->sq.lock);
 	}
 
-	if (attr_mask & IB_QP_STATE) {
-               if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
-			return -EINVAL;
-		new_state = attr->qp_state;
-	} else
-		new_state = cur_state;
-
-	if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) {
-		mthca_dbg(dev, "Illegal QP transition "
-			  "%d->%d\n", cur_state, new_state);
-		return -EINVAL;
-	}
-
-	req_param = state_table[cur_state][new_state].req_param[qp->transport];
-	opt_param = state_table[cur_state][new_state].opt_param[qp->transport];
+	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
-	if ((req_param & attr_mask) != req_param) {
-		mthca_dbg(dev, "QP transition "
-			  "%d->%d missing req attr 0x%08x\n",
-			  cur_state, new_state,
-			  req_param & ~attr_mask);
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+		mthca_dbg(dev, "Bad QP transition (transport %d) "
+			  "%d->%d with attr 0x%08x\n",
+			  qp->transport, cur_state, new_state,
+			  attr_mask);
 		return -EINVAL;
 	}
 
-	if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) {
-		mthca_dbg(dev, "QP transition (transport %d) "
-			  "%d->%d has extra attr 0x%08x\n",
-			  qp->transport,
-			  cur_state, new_state,
-			  attr_mask & ~(req_param | opt_param |
-						 IB_QP_STATE));
-		return -EINVAL;
-	}
-
-	if ((attr_mask & IB_QP_PKEY_INDEX) && 
+	if ((attr_mask & IB_QP_PKEY_INDEX) &&
 	     attr->pkey_index >= dev->limits.pkey_table_len) {
-		mthca_dbg(dev, "PKey index (%u) too large. max is %d\n",
-			  attr->pkey_index,dev->limits.pkey_table_len-1); 
+		mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+			  attr->pkey_index, dev->limits.pkey_table_len-1);
 		return -EINVAL;
 	}
 
@@ -733,7 +637,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (attr_mask & IB_QP_RNR_RETRY) {
 		qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
 			attr->rnr_retry << 5;
-		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | 
+		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
 							MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
 	}
 
@@ -748,14 +652,20 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
+		if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
+			mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
+				  attr->alt_pkey_index, dev->limits.pkey_table_len-1);
+			return -EINVAL;
+		}
+
 		if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
-			mthca_dbg(dev, "Alternate port number (%u) is invalid\n", 
+			mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
 				attr->alt_port_num);
 			return -EINVAL;
 		}
 
 		mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path);
-		qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 
+		qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
 							      attr->alt_port_num << 24);
 		qp_context->alt_path.ackto = attr->alt_timeout << 3;
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
@@ -841,11 +751,16 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_context->srqn = cpu_to_be32(1 << 24 |
 					       to_msrq(ibqp->srq)->srqn);
 
-	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
-			      qp->qpn, 0, mailbox, 0, &status);
+	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
+	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY		&&
+	    attr->en_sqd_async_notify)
+		sqd_event = 1 << 31;
+
+	err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
+			      mailbox, sqd_event, &status);
 	if (status) {
-		mthca_warn(dev, "modify QP %d returned status %02x.\n",
-			   state_table[cur_state][new_state].trans, status);
+		mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
+			   cur_state, new_state, status);
 		err = -EINVAL;
 	}
 
@@ -1078,10 +993,10 @@ static int mthca_map_memfree(struct mthca_dev *dev,
 		if (ret)
 			goto err_qpc;
 
- 		ret = mthca_table_get(dev, dev->qp_table.rdb_table,
- 				      qp->qpn << dev->qp_table.rdb_shift);
- 		if (ret)
- 			goto err_eqpc;
+		ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+				      qp->qpn << dev->qp_table.rdb_shift);
+		if (ret)
+			goto err_eqpc;
 
 	}
 
@@ -1393,7 +1308,8 @@ void mthca_free_qp(struct mthca_dev *dev,
 	wait_event(qp->wait, !atomic_read(&qp->refcount));
 
 	if (qp->state != IB_QPS_RESET)
-		mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);
+		mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+				NULL, 0, &status);
 
 	/*
 	 * If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1699,7 +1615,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				    mthca_opcode[wr->opcode]);
 		wmb();
 		((struct mthca_next_seg *) prev_wqe)->ee_nds =
-			cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
+			cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
+				    ((wr->send_flags & IB_SEND_FENCE) ?
+				    MTHCA_NEXT_FENCE : 0));
 
 		if (!size0) {
 			size0 = size;
@@ -2061,7 +1979,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				    mthca_opcode[wr->opcode]);
 		wmb();
 		((struct mthca_next_seg *) prev_wqe)->ee_nds =
-			cpu_to_be32(MTHCA_NEXT_DBD | size);
+			cpu_to_be32(MTHCA_NEXT_DBD | size |
+                                    ((wr->send_flags & IB_SEND_FENCE) ?
+                                    MTHCA_NEXT_FENCE : 0));
 
 		if (!size0) {
 			size0 = size;
@@ -2115,7 +2035,7 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	int i;
 	void *wqe;
 
- 	spin_lock_irqsave(&qp->rq.lock, flags);
+	spin_lock_irqsave(&qp->rq.lock, flags);
 
 	/* XXX check that state is OK to post receive */
 
@@ -2182,8 +2102,8 @@ out:
 	return err;
 }
 
-int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
-		       int index, int *dbd, __be32 *new_wqe)
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+			int index, int *dbd, __be32 *new_wqe)
 {
 	struct mthca_next_seg *next;
 
@@ -2193,7 +2113,7 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 	 */
 	if (qp->ibqp.srq) {
 		*new_wqe = 0;
-		return 0;
+		return;
 	}
 
 	if (is_send)
@@ -2207,8 +2127,6 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 			(next->ee_nds & cpu_to_be32(0x3f));
 	else
 		*new_wqe = 0;
-
-	return 0;
 }
 
 int __devinit mthca_init_qp_table(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index e7e153d9c4c6..47a6a754a591 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -49,7 +49,8 @@ struct mthca_tavor_srq_context {
 	__be32 state_pd;
 	__be32 lkey;
 	__be32 uar;
-	__be32 wqe_cnt;
+	__be16 limit_watermark;
+	__be16 wqe_cnt;
 	u32    reserved[2];
 };
 
@@ -271,6 +272,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 	srq->first_free = 0;
 	srq->last_free  = srq->max - 1;
 
+	attr->max_wr    = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+	attr->max_sge   = srq->max_gs;
+
 	return 0;
 
 err_out_free_srq:
@@ -339,7 +343,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
 
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     enum ib_srq_attr_mask attr_mask)
-{	
+{
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
 	int ret;
@@ -360,6 +364,41 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 	return 0;
 }
 
+int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+	struct mthca_dev *dev = to_mdev(ibsrq->device);
+	struct mthca_srq *srq = to_msrq(ibsrq);
+	struct mthca_mailbox *mailbox;
+	struct mthca_arbel_srq_context *arbel_ctx;
+	struct mthca_tavor_srq_context *tavor_ctx;
+	u8 status;
+	int err;
+
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
+	if (err)
+		goto out;
+
+	if (mthca_is_memfree(dev)) {
+		arbel_ctx = mailbox->buf;
+		srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
+	} else {
+		tavor_ctx = mailbox->buf;
+		srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
+	}
+
+	srq_attr->max_wr  = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max;
+	srq_attr->max_sge = srq->max_gs;
+
+out:
+	mthca_free_mailbox(dev, mailbox);
+
+	return err;
+}
+
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type)
 {
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index bb015c6494c4..02cc0a766f3a 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +75,11 @@ struct mthca_create_cq_resp {
 	__u32 reserved;
 };
 
+struct mthca_resize_cq {
+	__u32 lkey;
+	__u32 reserved;
+};
+
 struct mthca_create_srq {
 	__u32 lkey;
 	__u32 db_index;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 2f85a9a831b1..1251f86ec856 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -217,10 +217,16 @@ struct ipoib_neigh {
 	struct list_head    list;
 };
 
+/*
+ * We stash a pointer to our private neighbour information after our
+ * hardware address in neigh->ha.  The ALIGN() expression here makes
+ * sure that this pointer is stored aligned so that an unaligned
+ * load is not needed to dereference it.
+ */
 static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
 {
-	return (struct ipoib_neigh **) (neigh->ha + 24 -
-					(offsetof(struct neighbour, ha) & 4));
+	return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
+				     INFINIBAND_ALEN, sizeof(void *));
 }
 
 extern struct workqueue_struct *ipoib_workqueue;
@@ -253,7 +259,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_down(struct net_device *dev, int flush);
 int ipoib_ib_dev_stop(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 86bcdd72a107..a1f5a05f2f36 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -416,6 +416,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
 	ret = ipoib_ib_post_receives(dev);
 	if (ret) {
 		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
+		ipoib_ib_dev_stop(dev);
 		return -1;
 	}
 
@@ -434,7 +435,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
 	return ipoib_mcast_start_thread(dev);
 }
 
-int ipoib_ib_dev_down(struct net_device *dev)
+int ipoib_ib_dev_down(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -449,10 +450,11 @@ int ipoib_ib_dev_down(struct net_device *dev)
 		set_bit(IPOIB_PKEY_STOP, &priv->flags);
 		cancel_delayed_work(&priv->pkey_task);
 		mutex_unlock(&pkey_mutex);
-		flush_workqueue(ipoib_workqueue);
+		if (flush)
+			flush_workqueue(ipoib_workqueue);
 	}
 
-	ipoib_mcast_stop_thread(dev, 1);
+	ipoib_mcast_stop_thread(dev, flush);
 	ipoib_mcast_dev_flush(dev);
 
 	ipoib_flush_paths(dev);
@@ -590,7 +592,7 @@ void ipoib_ib_dev_flush(void *_dev)
 
 	ipoib_dbg(priv, "flushing\n");
 
-	ipoib_ib_dev_down(dev);
+	ipoib_ib_dev_down(dev, 0);
 
 	/*
 	 * The device could have been brought down between the start and when
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c3b5f79d1168..37da8d3dc388 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -133,7 +133,13 @@ static int ipoib_stop(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	ipoib_ib_dev_down(dev);
+	/*
+	 * Now flush workqueue to make sure a scheduled task doesn't
+	 * bring our internal state back up.
+	 */
+	flush_workqueue(ipoib_workqueue);
+
+	ipoib_ib_dev_down(dev, 1);
 	ipoib_ib_dev_stop(dev);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
@@ -247,7 +253,6 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
 		*to_ipoib_neigh(neigh->neighbour) = NULL;
-		neigh->neighbour->ops->destructor = NULL;
 		kfree(neigh);
 	}
 
@@ -513,12 +518,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 			   be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
 	} else {
 		neigh->ah  = NULL;
-		if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-			__skb_queue_tail(&neigh->queue, skb);
-		} else {
-			++priv->stats.tx_dropped;
-			dev_kfree_skb_any(skb);
-		}
+		__skb_queue_tail(&neigh->queue, skb);
 
 		if (!path->query && path_rec_start(dev, path))
 			goto err;
@@ -530,7 +530,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
 err:
 	*to_ipoib_neigh(skb->dst->neighbour) = NULL;
 	list_del(&neigh->list);
-	neigh->neighbour->ops->destructor = NULL;
 	kfree(neigh);
 
 	++priv->stats.tx_dropped;
@@ -769,21 +768,9 @@ static void ipoib_neigh_destructor(struct neighbour *n)
 		ipoib_put_ah(ah);
 }
 
-static int ipoib_neigh_setup(struct neighbour *neigh)
-{
-	/*
-	 * Is this kosher?  I can't find anybody in the kernel that
-	 * sets neigh->destructor, so we should be able to set it here
-	 * without trouble.
-	 */
-	neigh->ops->destructor = ipoib_neigh_destructor;
-
-	return 0;
-}
-
 static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
 {
-	parms->neigh_setup = ipoib_neigh_setup;
+	parms->neigh_destructor = ipoib_neigh_destructor;
 
 	return 0;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a2408d7ec598..93c462eaf4fd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -115,7 +115,6 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 		if (neigh->ah)
 			ipoib_put_ah(neigh->ah);
 		*to_ipoib_neigh(neigh->neighbour) = NULL;
-		neigh->neighbour->ops->destructor = NULL;
 		kfree(neigh);
 	}
 
@@ -213,6 +212,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 {
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct ipoib_ah *ah;
 	int ret;
 
 	mcast->mcmember = *mcmember;
@@ -269,8 +269,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 				av.static_rate, priv->local_rate,
 				ib_sa_rate_enum_to_int(mcast->mcmember.rate));
 
-		mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
-		if (!mcast->ah) {
+		ah = ipoib_create_ah(dev, priv->pd, &av);
+		if (!ah) {
 			ipoib_warn(priv, "ib_address_create failed\n");
 		} else {
 			ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
@@ -280,6 +280,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 					be16_to_cpu(mcast->mcmember.mlid),
 					mcast->mcmember.sl);
 		}
+
+		spin_lock_irq(&priv->lock);
+		mcast->ah = ah;
+		spin_unlock_irq(&priv->lock);
 	}
 
 	/* actually send any queued packets */
@@ -432,9 +436,11 @@ static void ipoib_mcast_join_complete(int status,
 	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
 		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
+	mutex_lock(&mcast_mutex);
+
+	spin_lock_irq(&priv->lock);
 	mcast->query = NULL;
 
-	mutex_lock(&mcast_mutex);
 	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
 		if (status == -ETIMEDOUT)
 			queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -443,6 +449,7 @@ static void ipoib_mcast_join_complete(int status,
 					   mcast->backoff * HZ);
 	} else
 		complete(&mcast->done);
+	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
 
 	return;
@@ -630,21 +637,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 	if (flush)
 		flush_workqueue(ipoib_workqueue);
 
+	spin_lock_irq(&priv->lock);
 	if (priv->broadcast && priv->broadcast->query) {
 		ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
 		priv->broadcast->query = NULL;
+		spin_unlock_irq(&priv->lock);
 		ipoib_dbg_mcast(priv, "waiting for bcast\n");
 		wait_for_completion(&priv->broadcast->done);
-	}
+	} else
+		spin_unlock_irq(&priv->lock);
 
 	list_for_each_entry(mcast, &priv->multicast_list, list) {
+		spin_lock_irq(&priv->lock);
 		if (mcast->query) {
 			ib_sa_cancel_query(mcast->query_id, mcast->query);
 			mcast->query = NULL;
+			spin_unlock_irq(&priv->lock);
 			ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
 					IPOIB_GID_ARG(mcast->mcmember.mgid));
 			wait_for_completion(&mcast->done);
-		}
+		} else
+			spin_unlock_irq(&priv->lock);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index faaf10e5fc7b..18d2f53ec34c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -255,6 +255,6 @@ void ipoib_event(struct ib_event_handler *handler,
 	    record->event == IB_EVENT_LID_CHANGE  ||
 	    record->event == IB_EVENT_SM_CHANGE) {
 		ipoib_dbg(priv, "Port active event\n");
-		schedule_work(&priv->flush_task);
+		queue_work(ipoib_workqueue, &priv->flush_task);
 	}
 }
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 960dae5c87d1..a13dcdf90a4f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1237,6 +1237,87 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
 	return ret;
 }
 
+static ssize_t show_id_ext(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "0x%016llx\n",
+		       (unsigned long long) be64_to_cpu(target->id_ext));
+}
+
+static ssize_t show_ioc_guid(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "0x%016llx\n",
+		       (unsigned long long) be64_to_cpu(target->ioc_guid));
+}
+
+static ssize_t show_service_id(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "0x%016llx\n",
+		       (unsigned long long) be64_to_cpu(target->service_id));
+}
+
+static ssize_t show_pkey(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
+}
+
+static ssize_t show_dgid(struct class_device *cdev, char *buf)
+{
+	struct srp_target_port *target = host_to_target(class_to_shost(cdev));
+
+	if (target->state == SRP_TARGET_DEAD ||
+	    target->state == SRP_TARGET_REMOVED)
+		return -ENODEV;
+
+	return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[0]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[1]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[2]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[3]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[4]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[5]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[6]),
+		       be16_to_cpu(((__be16 *) target->path.dgid.raw)[7]));
+}
+
+static CLASS_DEVICE_ATTR(id_ext,	S_IRUGO, show_id_ext,		NULL);
+static CLASS_DEVICE_ATTR(ioc_guid,	S_IRUGO, show_ioc_guid,		NULL);
+static CLASS_DEVICE_ATTR(service_id,	S_IRUGO, show_service_id,	NULL);
+static CLASS_DEVICE_ATTR(pkey,		S_IRUGO, show_pkey,		NULL);
+static CLASS_DEVICE_ATTR(dgid,		S_IRUGO, show_dgid,		NULL);
+
+static struct class_device_attribute *srp_host_attrs[] = {
+	&class_device_attr_id_ext,
+	&class_device_attr_ioc_guid,
+	&class_device_attr_service_id,
+	&class_device_attr_pkey,
+	&class_device_attr_dgid,
+	NULL
+};
+
 static struct scsi_host_template srp_template = {
 	.module				= THIS_MODULE,
 	.name				= DRV_NAME,
@@ -1249,7 +1330,8 @@ static struct scsi_host_template srp_template = {
 	.this_id			= -1,
 	.sg_tablesize			= SRP_MAX_INDIRECT,
 	.cmd_per_lun			= SRP_SQ_SIZE,
-	.use_clustering			= ENABLE_CLUSTERING
+	.use_clustering			= ENABLE_CLUSTERING,
+	.shost_attrs			= srp_host_attrs
 };
 
 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
@@ -1366,6 +1448,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
 				strlcpy(dgid, p + i * 2, 3);
 				target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16);
 			}
+			kfree(p);
 			break;
 
 		case SRP_OPT_PKEY: