summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-driver-dma-ioatdma30
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt1
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt1
-rw-r--r--Documentation/devicetree/bindings/dma/sun6i-dma.txt1
-rw-r--r--Documentation/dmaengine/provider.txt7
-rw-r--r--drivers/dma/Kconfig6
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/altera-msgdma.c927
-rw-r--r--drivers/dma/amba-pl08x.c2
-rw-r--r--drivers/dma/at_xdmac.c13
-rw-r--r--drivers/dma/bcm-sba-raid.c544
-rw-r--r--drivers/dma/ioat/dma.c10
-rw-r--r--drivers/dma/ioat/dma.h3
-rw-r--r--drivers/dma/ioat/init.c2
-rw-r--r--drivers/dma/ioat/sysfs.c42
-rw-r--r--drivers/dma/k3dma.c12
-rw-r--r--drivers/dma/of-dma.c8
-rw-r--r--drivers/dma/pl330.c2
-rw-r--r--drivers/dma/ppc4xx/adma.c37
-rw-r--r--drivers/dma/qcom/bam_dma.c6
-rw-r--r--drivers/dma/qcom/hidma.c37
-rw-r--r--drivers/dma/qcom/hidma.h7
-rw-r--r--drivers/dma/qcom/hidma_ll.c11
-rw-r--r--drivers/dma/qcom/hidma_mgmt.c16
-rw-r--r--drivers/dma/sh/rcar-dmac.c85
-rw-r--r--drivers/dma/ste_dma40.c4
-rw-r--r--drivers/dma/sun6i-dma.c33
-rw-r--r--drivers/dma/ti-dma-crossbar.c2
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c30
-rw-r--r--include/linux/dma/qcom_bam_dma.h79
-rw-r--r--include/linux/dmaengine.h4
31 files changed, 1587 insertions, 376 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-ioatdma b/Documentation/ABI/stable/sysfs-driver-dma-ioatdma
new file mode 100644
index 000000000000..420c1d09e42f
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-dma-ioatdma
@@ -0,0 +1,30 @@
+What:           sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/cap
+Date:           December 3, 2009
+KernelVersion:  2.6.32
+Contact:        dmaengine@vger.kernel.org
+Description:	Capabilities the DMA supports.Currently there are DMA_PQ, DMA_PQ_VAL,
+		DMA_XOR,DMA_XOR_VAL,DMA_INTERRUPT.
+
+What:           sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_active
+Date:           December 3, 2009
+KernelVersion:  2.6.32
+Contact:        dmaengine@vger.kernel.org
+Description:	The number of descriptors active in the ring.
+
+What:           sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/ring_size
+Date:           December 3, 2009
+KernelVersion:  2.6.32
+Contact:        dmaengine@vger.kernel.org
+Description:	Descriptor ring size, total number of descriptors available.
+
+What:           sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/version
+Date:           December 3, 2009
+KernelVersion:  2.6.32
+Contact:        dmaengine@vger.kernel.org
+Description:	Version of ioatdma device.
+
+What:           sys/devices/pciXXXX:XX/0000:XX:XX.X/dma/dma<n>chan<n>/quickdata/intr_coalesce
+Date:           August 8, 2017
+KernelVersion:  4.14
+Contact:        dmaengine@vger.kernel.org
+Description:	Tune-able interrupt delay value per channel basis.
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index 79a204d50234..891db41e9420 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -25,6 +25,7 @@ Required Properties:
 		- "renesas,dmac-r8a7794" (R-Car E2)
 		- "renesas,dmac-r8a7795" (R-Car H3)
 		- "renesas,dmac-r8a7796" (R-Car M3-W)
+		- "renesas,dmac-r8a77970" (R-Car V3M)
 
 - reg: base address and length of the registers block for the DMAC
 
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
index e7780a186a36..1be6941ac1e5 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -8,6 +8,7 @@ Required Properties:
 	  - "renesas,r8a7793-usb-dmac" (R-Car M2-N)
 	  - "renesas,r8a7794-usb-dmac" (R-Car E2)
 	  - "renesas,r8a7795-usb-dmac" (R-Car H3)
+	  - "renesas,r8a7796-usb-dmac" (R-Car M3-W)
 - reg: base address and length of the registers block for the DMAC
 - interrupts: interrupt specifiers for the DMAC, one for each entry in
   interrupt-names.
diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
index 6b267045f522..98fbe1a5c6dd 100644
--- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -9,6 +9,7 @@ Required properties:
 		  "allwinner,sun8i-a23-dma"
 		  "allwinner,sun8i-a83t-dma"
 		  "allwinner,sun8i-h3-dma"
+		  "allwinner,sun8i-v3s-dma"
 - reg:		Should contain the registers base address and length
 - interrupts:	Should contain a reference to the interrupt used by this device
 - clocks:	Should contain a reference to the parent AHB clock
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
index a75f52ff2e49..5dbe054a40ad 100644
--- a/Documentation/dmaengine/provider.txt
+++ b/Documentation/dmaengine/provider.txt
@@ -388,6 +388,13 @@ where to put them)
 	  when DMA_CTRL_REUSE is already set
 	- Terminating the channel
 
+  * DMA_PREP_CMD
+    - If set, the client driver tells DMA controller that passed data in DMA
+      API is command data.
+    - Interpretation of command data is DMA controller specific. It can be
+      used for issuing commands to other peripherals/register reads/register
+      writes for which the descriptor should be in different format from
+      normal data descriptors.
 
 General Design Notes
 --------------------
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index fa8f9c07ce73..fadc4d8783bd 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -56,6 +56,12 @@ config DMA_OF
 	select DMA_ENGINE
 
 #devices
+config ALTERA_MSGDMA
+	tristate "Altera / Intel mSGDMA Engine"
+	select DMA_ENGINE
+	help
+	  Enable support for Altera / Intel mSGDMA controller.
+
 config AMBA_PL08X
 	bool "ARM PrimeCell PL080 or PL081 support"
 	depends on ARM_AMBA
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index d12ab2985ed1..f08f8de1b567 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_DMA_OF) += of-dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 
 #devices
+obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
new file mode 100644
index 000000000000..32905d5606ac
--- /dev/null
+++ b/drivers/dma/altera-msgdma.c
@@ -0,0 +1,927 @@
+/*
+ * DMA driver for Altera mSGDMA IP core
+ *
+ * Copyright (C) 2017 Stefan Roese <sr@denx.de>
+ *
+ * Based on drivers/dma/xilinx/zynqmp_dma.c, which is:
+ * Copyright (C) 2016 Xilinx, Inc. All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+
+#define MSGDMA_MAX_TRANS_LEN		U32_MAX
+#define MSGDMA_DESC_NUM			1024
+
+/**
+ * struct msgdma_extended_desc - implements an extended descriptor
+ * @read_addr_lo: data buffer source address low bits
+ * @write_addr_lo: data buffer destination address low bits
+ * @len: the number of bytes to transfer per descriptor
+ * @burst_seq_num: bit 31:24 write burst
+ *		   bit 23:16 read burst
+ *		   bit 15:00 sequence number
+ * @stride: bit 31:16 write stride
+ *	    bit 15:00 read stride
+ * @read_addr_hi: data buffer source address high bits
+ * @write_addr_hi: data buffer destination address high bits
+ * @control: characteristics of the transfer
+ */
+struct msgdma_extended_desc {
+	u32 read_addr_lo;
+	u32 write_addr_lo;
+	u32 len;
+	u32 burst_seq_num;
+	u32 stride;
+	u32 read_addr_hi;
+	u32 write_addr_hi;
+	u32 control;
+};
+
+/* mSGDMA descriptor control field bit definitions */
+#define MSGDMA_DESC_CTL_SET_CH(x)	((x) & 0xff)
+#define MSGDMA_DESC_CTL_GEN_SOP		BIT(8)
+#define MSGDMA_DESC_CTL_GEN_EOP		BIT(9)
+#define MSGDMA_DESC_CTL_PARK_READS	BIT(10)
+#define MSGDMA_DESC_CTL_PARK_WRITES	BIT(11)
+#define MSGDMA_DESC_CTL_END_ON_EOP	BIT(12)
+#define MSGDMA_DESC_CTL_END_ON_LEN	BIT(13)
+#define MSGDMA_DESC_CTL_TR_COMP_IRQ	BIT(14)
+#define MSGDMA_DESC_CTL_EARLY_IRQ	BIT(15)
+#define MSGDMA_DESC_CTL_TR_ERR_IRQ	GENMASK(23, 16)
+#define MSGDMA_DESC_CTL_EARLY_DONE	BIT(24)
+
+/*
+ * Writing "1" the "go" bit commits the entire descriptor into the
+ * descriptor FIFO(s)
+ */
+#define MSGDMA_DESC_CTL_GO		BIT(31)
+
+/* Tx buffer control flags */
+#define MSGDMA_DESC_CTL_TX_FIRST	(MSGDMA_DESC_CTL_GEN_SOP |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_MIDDLE	(MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_LAST		(MSGDMA_DESC_CTL_GEN_EOP |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_SINGLE	(MSGDMA_DESC_CTL_GEN_SOP |	\
+					 MSGDMA_DESC_CTL_GEN_EOP |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_RX_SINGLE	(MSGDMA_DESC_CTL_END_ON_EOP |	\
+					 MSGDMA_DESC_CTL_END_ON_LEN |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_EARLY_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+/* mSGDMA extended descriptor stride definitions */
+#define MSGDMA_DESC_STRIDE_RD		0x00000001
+#define MSGDMA_DESC_STRIDE_WR		0x00010000
+#define MSGDMA_DESC_STRIDE_RW		0x00010001
+
+/* mSGDMA dispatcher control and status register map */
+#define MSGDMA_CSR_STATUS		0x00	/* Read / Clear */
+#define MSGDMA_CSR_CONTROL		0x04	/* Read / Write */
+#define MSGDMA_CSR_RW_FILL_LEVEL	0x08	/* 31:16 - write fill level */
+						/* 15:00 - read fill level */
+#define MSGDMA_CSR_RESP_FILL_LEVEL	0x0c	/* response FIFO fill level */
+#define MSGDMA_CSR_RW_SEQ_NUM		0x10	/* 31:16 - write seq number */
+						/* 15:00 - read seq number */
+
+/* mSGDMA CSR status register bit definitions */
+#define MSGDMA_CSR_STAT_BUSY			BIT(0)
+#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY		BIT(1)
+#define MSGDMA_CSR_STAT_DESC_BUF_FULL		BIT(2)
+#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY		BIT(3)
+#define MSGDMA_CSR_STAT_RESP_BUF_FULL		BIT(4)
+#define MSGDMA_CSR_STAT_STOPPED			BIT(5)
+#define MSGDMA_CSR_STAT_RESETTING		BIT(6)
+#define MSGDMA_CSR_STAT_STOPPED_ON_ERR		BIT(7)
+#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY	BIT(8)
+#define MSGDMA_CSR_STAT_IRQ			BIT(9)
+#define MSGDMA_CSR_STAT_MASK			GENMASK(9, 0)
+#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ	GENMASK(8, 0)
+
+#define DESC_EMPTY	(MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \
+			 MSGDMA_CSR_STAT_RESP_BUF_EMPTY)
+
+/* mSGDMA CSR control register bit definitions */
+#define MSGDMA_CSR_CTL_STOP			BIT(0)
+#define MSGDMA_CSR_CTL_RESET			BIT(1)
+#define MSGDMA_CSR_CTL_STOP_ON_ERR		BIT(2)
+#define MSGDMA_CSR_CTL_STOP_ON_EARLY		BIT(3)
+#define MSGDMA_CSR_CTL_GLOBAL_INTR		BIT(4)
+#define MSGDMA_CSR_CTL_STOP_DESCS		BIT(5)
+
+/* mSGDMA CSR fill level bits */
+#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v)		(((v) & 0xffff0000) >> 16)
+#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v)		((v) & 0x0000ffff)
+#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v)	((v) & 0x0000ffff)
+
+#define MSGDMA_CSR_SEQ_NUM_GET(v)		(((v) & 0xffff0000) >> 16)
+
+/* mSGDMA response register map */
+#define MSGDMA_RESP_BYTES_TRANSFERRED	0x00
+#define MSGDMA_RESP_STATUS		0x04
+
+/* mSGDMA response register bit definitions */
+#define MSGDMA_RESP_EARLY_TERM	BIT(8)
+#define MSGDMA_RESP_ERR_MASK	0xff
+
+/**
+ * struct msgdma_sw_desc - implements a sw descriptor
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @free_list: node of the free SW descriprots list
+ */
+struct msgdma_sw_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct msgdma_extended_desc hw_desc;
+	struct list_head node;
+	struct list_head tx_list;
+};
+
+/**
+ * struct msgdma_device - DMA device structure
+ */
+struct msgdma_device {
+	spinlock_t lock;
+	struct device *dev;
+	struct tasklet_struct irq_tasklet;
+	struct list_head pending_list;
+	struct list_head free_list;
+	struct list_head active_list;
+	struct list_head done_list;
+	u32 desc_free_cnt;
+	bool idle;
+
+	struct dma_device dmadev;
+	struct dma_chan	dmachan;
+	dma_addr_t hw_desq;
+	struct msgdma_sw_desc *sw_desq;
+	unsigned int npendings;
+
+	struct dma_slave_config slave_cfg;
+
+	int irq;
+
+	/* mSGDMA controller */
+	void __iomem *csr;
+
+	/* mSGDMA descriptors */
+	void __iomem *desc;
+
+	/* mSGDMA response */
+	void __iomem *resp;
+};
+
+#define to_mdev(chan)	container_of(chan, struct msgdma_device, dmachan)
+#define tx_to_desc(tx)	container_of(tx, struct msgdma_sw_desc, async_tx)
+
+/**
+ * msgdma_get_descriptor - Get the sw descriptor from the pool
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: The sw descriptor
+ */
+static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+
+	spin_lock_bh(&mdev->lock);
+	desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node);
+	list_del(&desc->node);
+	spin_unlock_bh(&mdev->lock);
+
+	INIT_LIST_HEAD(&desc->tx_list);
+
+	return desc;
+}
+
+/**
+ * msgdma_free_descriptor - Issue pending transactions
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_free_descriptor(struct msgdma_device *mdev,
+				   struct msgdma_sw_desc *desc)
+{
+	struct msgdma_sw_desc *child, *next;
+
+	mdev->desc_free_cnt++;
+	list_add_tail(&desc->node, &mdev->free_list);
+	list_for_each_entry_safe(child, next, &desc->tx_list, node) {
+		mdev->desc_free_cnt++;
+		list_move_tail(&child->node, &mdev->free_list);
+	}
+}
+
+/**
+ * msgdma_free_desc_list - Free descriptors list
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @list: List to parse and delete the descriptor
+ */
+static void msgdma_free_desc_list(struct msgdma_device *mdev,
+				  struct list_head *list)
+{
+	struct msgdma_sw_desc *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node)
+		msgdma_free_descriptor(mdev, desc);
+}
+
+/**
+ * msgdma_desc_config - Configure the descriptor
+ * @desc: Hw descriptor pointer
+ * @dst: Destination buffer address
+ * @src: Source buffer address
+ * @len: Transfer length
+ */
+static void msgdma_desc_config(struct msgdma_extended_desc *desc,
+			       dma_addr_t dst, dma_addr_t src, size_t len,
+			       u32 stride)
+{
+	/* Set lower 32bits of src & dst addresses in the descriptor */
+	desc->read_addr_lo = lower_32_bits(src);
+	desc->write_addr_lo = lower_32_bits(dst);
+
+	/* Set upper 32bits of src & dst addresses in the descriptor */
+	desc->read_addr_hi = upper_32_bits(src);
+	desc->write_addr_hi = upper_32_bits(dst);
+
+	desc->len = len;
+	desc->stride = stride;
+	desc->burst_seq_num = 0;	/* 0 will result in max burst length */
+
+	/*
+	 * Don't set interrupt on xfer end yet, this will be done later
+	 * for the "last" descriptor
+	 */
+	desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO |
+		MSGDMA_DESC_CTL_END_ON_LEN;
+}
+
+/**
+ * msgdma_desc_config_eod - Mark the descriptor as end descriptor
+ * @desc: Hw descriptor pointer
+ */
+static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc)
+{
+	desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ;
+}
+
+/**
+ * msgdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor pointer
+ *
+ * Return: cookie value
+ */
+static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct msgdma_device *mdev = to_mdev(tx->chan);
+	struct msgdma_sw_desc *new;
+	dma_cookie_t cookie;
+
+	new = tx_to_desc(tx);
+	spin_lock_bh(&mdev->lock);
+	cookie = dma_cookie_assign(tx);
+
+	list_add_tail(&new->node, &mdev->pending_list);
+	spin_unlock_bh(&mdev->lock);
+
+	return cookie;
+}
+
+/**
+ * msgdma_prep_memcpy - prepare descriptors for memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: Destination buffer address
+ * @dma_src: Source buffer address
+ * @len: Transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+		   dma_addr_t dma_src, size_t len, ulong flags)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct msgdma_sw_desc *new, *first = NULL;
+	struct msgdma_extended_desc *desc;
+	size_t copy;
+	u32 desc_cnt;
+
+	desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN);
+
+	spin_lock_bh(&mdev->lock);
+	if (desc_cnt > mdev->desc_free_cnt) {
+		spin_unlock_bh(&mdev->lock);
+		dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+		return NULL;
+	}
+	mdev->desc_free_cnt -= desc_cnt;
+	spin_unlock_bh(&mdev->lock);
+
+	do {
+		/* Allocate and populate the descriptor */
+		new = msgdma_get_descriptor(mdev);
+
+		copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN);
+		desc = &new->hw_desc;
+		msgdma_desc_config(desc, dma_dst, dma_src, copy,
+				   MSGDMA_DESC_STRIDE_RW);
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+		if (!first)
+			first = new;
+		else
+			list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	msgdma_desc_config_eod(desc);
+	async_tx_ack(&first->async_tx);
+	first->async_tx.flags = flags;
+
+	return &first->async_tx;
+}
+
+/**
+ * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction
+ *
+ * @dchan: DMA channel
+ * @sgl: Destination scatter list
+ * @sg_len: Number of entries in destination scatter list
+ * @dir: DMA transfer direction
+ * @flags: transfer ack flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		     unsigned int sg_len, enum dma_transfer_direction dir,
+		     unsigned long flags, void *context)
+
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct dma_slave_config *cfg = &mdev->slave_cfg;
+	struct msgdma_sw_desc *new, *first = NULL;
+	void *desc = NULL;
+	size_t len, avail;
+	dma_addr_t dma_dst, dma_src;
+	u32 desc_cnt = 0, i;
+	struct scatterlist *sg;
+	u32 stride;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN);
+
+	spin_lock_bh(&mdev->lock);
+	if (desc_cnt > mdev->desc_free_cnt) {
+		spin_unlock_bh(&mdev->lock);
+		dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+		return NULL;
+	}
+	mdev->desc_free_cnt -= desc_cnt;
+	spin_unlock_bh(&mdev->lock);
+
+	avail = sg_dma_len(sgl);
+
+	/* Run until we are out of scatterlist entries */
+	while (true) {
+		/* Allocate and populate the descriptor */
+		new = msgdma_get_descriptor(mdev);
+
+		desc = &new->hw_desc;
+		len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN);
+
+		if (dir == DMA_MEM_TO_DEV) {
+			dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+			dma_dst = cfg->dst_addr;
+			stride = MSGDMA_DESC_STRIDE_RD;
+		} else {
+			dma_src = cfg->src_addr;
+			dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+			stride = MSGDMA_DESC_STRIDE_WR;
+		}
+		msgdma_desc_config(desc, dma_dst, dma_src, len, stride);
+		avail -= len;
+
+		if (!first)
+			first = new;
+		else
+			list_add_tail(&new->node, &first->tx_list);
+
+		/* Fetch the next scatterlist entry */
+		if (avail == 0) {
+			if (sg_len == 0)
+				break;
+			sgl = sg_next(sgl);
+			if (sgl == NULL)
+				break;
+			sg_len--;
+			avail = sg_dma_len(sgl);
+		}
+	}
+
+	msgdma_desc_config_eod(desc);
+	first->async_tx.flags = flags;
+
+	return &first->async_tx;
+}
+
+static int msgdma_dma_config(struct dma_chan *dchan,
+			     struct dma_slave_config *config)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+
+	memcpy(&mdev->slave_cfg, config, sizeof(*config));
+
+	return 0;
+}
+
+static void msgdma_reset(struct msgdma_device *mdev)
+{
+	u32 val;
+	int ret;
+
+	/* Reset mSGDMA */
+	iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+	iowrite32(MSGDMA_CSR_CTL_RESET, mdev->csr + MSGDMA_CSR_CONTROL);
+
+	ret = readl_poll_timeout(mdev->csr + MSGDMA_CSR_STATUS, val,
+				 (val & MSGDMA_CSR_STAT_RESETTING) == 0,
+				 1, 10000);
+	if (ret)
+		dev_err(mdev->dev, "DMA channel did not reset\n");
+
+	/* Clear all status bits */
+	iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+
+	/* Enable the DMA controller including interrupts */
+	iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY |
+		  MSGDMA_CSR_CTL_GLOBAL_INTR, mdev->csr + MSGDMA_CSR_CONTROL);
+
+	mdev->idle = true;
+};
+
+static void msgdma_copy_one(struct msgdma_device *mdev,
+			    struct msgdma_sw_desc *desc)
+{
+	void __iomem *hw_desc = mdev->desc;
+
+	/*
+	 * Check if the DESC FIFO it not full. If its full, we need to wait
+	 * for at least one entry to become free again
+	 */
+	while (ioread32(mdev->csr + MSGDMA_CSR_STATUS) &
+	       MSGDMA_CSR_STAT_DESC_BUF_FULL)
+		mdelay(1);
+
+	/*
+	 * The descriptor needs to get copied into the descriptor FIFO
+	 * of the DMA controller. The descriptor will get flushed to the
+	 * FIFO, once the last word (control word) is written. Since we
+	 * are not 100% sure that memcpy() writes all word in the "correct"
+	 * oder (address from low to high) on all architectures, we make
+	 * sure this control word is written last by single coding it and
+	 * adding some write-barriers here.
+	 */
+	memcpy((void __force *)hw_desc, &desc->hw_desc,
+	       sizeof(desc->hw_desc) - sizeof(u32));
+
+	/* Write control word last to flush this descriptor into the FIFO */
+	mdev->idle = false;
+	wmb();
+	iowrite32(desc->hw_desc.control, hw_desc +
+		  offsetof(struct msgdma_extended_desc, control));
+	wmb();
+}
+
+/**
+ * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev,
+				     struct msgdma_sw_desc *desc)
+{
+	struct msgdma_sw_desc *sdesc, *next;
+
+	msgdma_copy_one(mdev, desc);
+
+	list_for_each_entry_safe(sdesc, next, &desc->tx_list, node)
+		msgdma_copy_one(mdev, sdesc);
+}
+
+/**
+ * msgdma_start_transfer - Initiate the new transfer
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_start_transfer(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+
+	if (!mdev->idle)
+		return;
+
+	desc = list_first_entry_or_null(&mdev->pending_list,
+					struct msgdma_sw_desc, node);
+	if (!desc)
+		return;
+
+	list_splice_tail_init(&mdev->pending_list, &mdev->active_list);
+	msgdma_copy_desc_to_fifo(mdev, desc);
+}
+
+/**
+ * msgdma_issue_pending - Issue pending transactions
+ * @chan: DMA channel pointer
+ */
+static void msgdma_issue_pending(struct dma_chan *chan)
+{
+	struct msgdma_device *mdev = to_mdev(chan);
+
+	spin_lock_bh(&mdev->lock);
+	msgdma_start_transfer(mdev);
+	spin_unlock_bh(&mdev->lock);
+}
+
+/**
+ * msgdma_chan_desc_cleanup - Cleanup the completed descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc, *next;
+
+	list_for_each_entry_safe(desc, next, &mdev->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		list_del(&desc->node);
+
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock(&mdev->lock);
+			callback(callback_param);
+			spin_lock(&mdev->lock);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		msgdma_free_descriptor(mdev, desc);
+	}
+}
+
+/**
+ * msgdma_complete_descriptor - Mark the active descriptor as complete
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_complete_descriptor(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+
+	desc = list_first_entry_or_null(&mdev->active_list,
+					struct msgdma_sw_desc, node);
+	if (!desc)
+		return;
+	list_del(&desc->node);
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_descriptors - Free channel descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_free_descriptors(struct msgdma_device *mdev)
+{
+	msgdma_free_desc_list(mdev, &mdev->active_list);
+	msgdma_free_desc_list(mdev, &mdev->pending_list);
+	msgdma_free_desc_list(mdev, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel pointer
+ */
+static void msgdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+
+	spin_lock_bh(&mdev->lock);
+	msgdma_free_descriptors(mdev);
+	spin_unlock_bh(&mdev->lock);
+	kfree(mdev->sw_desq);
+}
+
+/**
+ * msgdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: Number of descriptors on success and failure value on error
+ */
+static int msgdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct msgdma_sw_desc *desc;
+	int i;
+
+	mdev->sw_desq = kcalloc(MSGDMA_DESC_NUM, sizeof(*desc), GFP_NOWAIT);
+	if (!mdev->sw_desq)
+		return -ENOMEM;
+
+	mdev->idle = true;
+	mdev->desc_free_cnt = MSGDMA_DESC_NUM;
+
+	INIT_LIST_HEAD(&mdev->free_list);
+
+	for (i = 0; i < MSGDMA_DESC_NUM; i++) {
+		desc = mdev->sw_desq + i;
+		dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan);
+		desc->async_tx.tx_submit = msgdma_tx_submit;
+		list_add_tail(&desc->node, &mdev->free_list);
+	}
+
+	return MSGDMA_DESC_NUM;
+}
+
+/**
+ * msgdma_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Altera sSGDMA channel structure
+ */
+static void msgdma_tasklet(unsigned long data)
+{
+	struct msgdma_device *mdev = (struct msgdma_device *)data;
+	u32 count;
+	u32 __maybe_unused size;
+	u32 __maybe_unused status;
+
+	spin_lock(&mdev->lock);
+
+	/* Read number of responses that are available */
+	count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
+	dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
+		__func__, __LINE__, count);
+
+	while (count--) {
+		/*
+		 * Read both longwords to purge this response from the FIFO
+		 * On Avalon-MM implementations, size and status do not
+		 * have any real values, like transferred bytes or error
+		 * bits. So we need to just drop these values.
+		 */
+		size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED);
+		status = ioread32(mdev->resp - MSGDMA_RESP_STATUS);
+
+		msgdma_complete_descriptor(mdev);
+		msgdma_chan_desc_cleanup(mdev);
+	}
+
+	spin_unlock(&mdev->lock);
+}
+
+/**
+ * msgdma_irq_handler - Altera mSGDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t msgdma_irq_handler(int irq, void *data)
+{
+	struct msgdma_device *mdev = data;
+	u32 status;
+
+	status = ioread32(mdev->csr + MSGDMA_CSR_STATUS);
+	if ((status & MSGDMA_CSR_STAT_BUSY) == 0) {
+		/* Start next transfer if the DMA controller is idle */
+		spin_lock(&mdev->lock);
+		mdev->idle = true;
+		msgdma_start_transfer(mdev);
+		spin_unlock(&mdev->lock);
+	}
+
+	tasklet_schedule(&mdev->irq_tasklet);
+
+	/* Clear interrupt in mSGDMA controller */
+	iowrite32(MSGDMA_CSR_STAT_IRQ, mdev->csr + MSGDMA_CSR_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * msgdma_chan_remove - Channel remove function
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_dev_remove(struct msgdma_device *mdev)
+{
+	if (!mdev)
+		return;
+
+	devm_free_irq(mdev->dev, mdev->irq, mdev);
+	tasklet_kill(&mdev->irq_tasklet);
+	list_del(&mdev->dmachan.device_node);
+}
+
+static int request_and_map(struct platform_device *pdev, const char *name,
+			   struct resource **res, void __iomem **ptr)
+{
+	struct resource *region;
+	struct device *device = &pdev->dev;
+
+	*res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	if (*res == NULL) {
+		dev_err(device, "resource %s not defined\n", name);
+		return -ENODEV;
+	}
+
+	region = devm_request_mem_region(device, (*res)->start,
+					 resource_size(*res), dev_name(device));
+	if (region == NULL) {
+		dev_err(device, "unable to request %s\n", name);
+		return -EBUSY;
+	}
+
+	*ptr = devm_ioremap_nocache(device, region->start,
+				    resource_size(region));
+	if (*ptr == NULL) {
+		dev_err(device, "ioremap_nocache of %s failed!", name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * msgdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int msgdma_probe(struct platform_device *pdev)
+{
+	struct msgdma_device *mdev;
+	struct dma_device *dma_dev;
+	struct resource *dma_res;
+	int ret;
+
+	mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_NOWAIT);
+	if (!mdev)
+		return -ENOMEM;
+
+	mdev->dev = &pdev->dev;
+
+	/* Map CSR space */
+	ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr);
+	if (ret)
+		return ret;
+
+	/* Map (extended) descriptor space */
+	ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc);
+	if (ret)
+		return ret;
+
+	/* Map response space */
+	ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, mdev);
+
+	/* Get interrupt nr from platform data */
+	mdev->irq = platform_get_irq(pdev, 0);
+	if (mdev->irq < 0)
+		return -ENXIO;
+
+	ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler,
+			       0, dev_name(&pdev->dev), mdev);
+	if (ret)
+		return ret;
+
+	tasklet_init(&mdev->irq_tasklet, msgdma_tasklet, (unsigned long)mdev);
+
+	dma_cookie_init(&mdev->dmachan);
+
+	spin_lock_init(&mdev->lock);
+
+	INIT_LIST_HEAD(&mdev->active_list);
+	INIT_LIST_HEAD(&mdev->pending_list);
+	INIT_LIST_HEAD(&mdev->done_list);
+	INIT_LIST_HEAD(&mdev->free_list);
+
+	dma_dev = &mdev->dmadev;
+
+	/* Set DMA capabilities */
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) |
+		BIT(DMA_MEM_TO_MEM);
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	/* Init DMA link list */
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Set base routines */
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_issue_pending = msgdma_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
+	dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy;
+	dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg;
+	dma_dev->device_config = msgdma_dma_config;
+
+	dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = msgdma_free_chan_resources;
+
+	mdev->dmachan.device = dma_dev;
+	list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels);
+
+	/* Set DMA mask to 64 bits */
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret)
+			goto fail;
+	}
+
+	msgdma_reset(mdev);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto fail;
+
+	dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n");
+
+	return 0;
+
+fail:
+	msgdma_dev_remove(mdev);
+
+	return ret;
+}
+
+/**
+ * msgdma_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int msgdma_remove(struct platform_device *pdev)
+{
+	struct msgdma_device *mdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&mdev->dmadev);
+	msgdma_dev_remove(mdev);
+
+	dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n");
+
+	return 0;
+}
+
+static struct platform_driver msgdma_driver = {
+	.driver = {
+		.name = "altera-msgdma",
+	},
+	.probe = msgdma_probe,
+	.remove = msgdma_remove,
+};
+
+module_platform_driver(msgdma_driver);
+
+MODULE_ALIAS("platform:altera-msgdma");
+MODULE_DESCRIPTION("Altera mSGDMA driver");
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 13cc95c0474c..b52b0d55247e 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -3033,7 +3033,7 @@ static struct vendor_data vendor_ftdmac020 = {
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
-static struct amba_id pl08x_ids[] = {
+static const struct amba_id pl08x_ids[] = {
 	/* Samsung PL080S variant */
 	{
 		.id	= 0x0a141080,
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 7d4e0bcda9af..c00e3923d7d8 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -875,7 +875,7 @@ at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
 	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
 	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
 		dev_dbg(chan2dev(chan),
-			"%s: chunk too big (%d, max size %lu)...\n",
+			"%s: chunk too big (%zu, max size %lu)...\n",
 			__func__, chunk->size,
 			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
 		return NULL;
@@ -956,7 +956,7 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
 	if ((xt->numf > 1) && (xt->frame_size > 1))
 		return NULL;
 
-	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n",
 		__func__, &xt->src_start, &xt->dst_start,	xt->numf,
 		xt->frame_size, flags);
 
@@ -990,7 +990,7 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
 			dst_skip = chunk->size + dst_icg;
 
 			dev_dbg(chan2dev(chan),
-				"%s: chunk size=%d, src icg=%d, dst icg=%d\n",
+				"%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n",
 				__func__, chunk->size, src_icg, dst_icg);
 
 			desc = at_xdmac_interleaved_queue_desc(chan, atchan,
@@ -1207,7 +1207,7 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
 	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
 	struct at_xdmac_desc	*desc;
 
-	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n",
 		__func__, &dest, len, value, flags);
 
 	if (unlikely(!len))
@@ -1883,8 +1883,11 @@ static int atmel_xdmac_resume(struct device *dev)
 	struct at_xdmac_chan	*atchan;
 	struct dma_chan		*chan, *_chan;
 	int			i;
+	int ret;
 
-	clk_prepare_enable(atxdmac->clk);
+	ret = clk_prepare_enable(atxdmac->clk);
+	if (ret)
+		return ret;
 
 	/* Clear pending interrupts. */
 	for (i = 0; i < atxdmac->dma.chancnt; i++) {
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index e41bbc7cb094..6c2c44724637 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -36,6 +36,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/list.h>
@@ -48,7 +49,8 @@
 
 #include "dmaengine.h"
 
-/* SBA command related defines */
+/* ====== Driver macros and defines ===== */
+
 #define SBA_TYPE_SHIFT					48
 #define SBA_TYPE_MASK					GENMASK(1, 0)
 #define SBA_TYPE_A					0x0
@@ -82,39 +84,40 @@
 #define SBA_CMD_WRITE_BUFFER				0xc
 #define SBA_CMD_GALOIS					0xe
 
+#define SBA_MAX_REQ_PER_MBOX_CHANNEL			8192
+
 /* Driver helper macros */
 #define to_sba_request(tx)		\
 	container_of(tx, struct sba_request, tx)
 #define to_sba_device(dchan)		\
 	container_of(dchan, struct sba_device, dma_chan)
 
-enum sba_request_state {
-	SBA_REQUEST_STATE_FREE = 1,
-	SBA_REQUEST_STATE_ALLOCED = 2,
-	SBA_REQUEST_STATE_PENDING = 3,
-	SBA_REQUEST_STATE_ACTIVE = 4,
-	SBA_REQUEST_STATE_RECEIVED = 5,
-	SBA_REQUEST_STATE_COMPLETED = 6,
-	SBA_REQUEST_STATE_ABORTED = 7,
+/* ===== Driver data structures ===== */
+
+enum sba_request_flags {
+	SBA_REQUEST_STATE_FREE		= 0x001,
+	SBA_REQUEST_STATE_ALLOCED	= 0x002,
+	SBA_REQUEST_STATE_PENDING	= 0x004,
+	SBA_REQUEST_STATE_ACTIVE	= 0x008,
+	SBA_REQUEST_STATE_ABORTED	= 0x010,
+	SBA_REQUEST_STATE_MASK		= 0x0ff,
+	SBA_REQUEST_FENCE		= 0x100,
 };
 
 struct sba_request {
 	/* Global state */
 	struct list_head node;
 	struct sba_device *sba;
-	enum sba_request_state state;
-	bool fence;
+	u32 flags;
 	/* Chained requests management */
 	struct sba_request *first;
 	struct list_head next;
-	unsigned int next_count;
 	atomic_t next_pending_count;
 	/* BRCM message data */
-	void *resp;
-	dma_addr_t resp_dma;
-	struct brcm_sba_command *cmds;
 	struct brcm_message msg;
 	struct dma_async_tx_descriptor tx;
+	/* SBA commands */
+	struct brcm_sba_command cmds[0];
 };
 
 enum sba_version {
@@ -152,19 +155,18 @@ struct sba_device {
 	void *cmds_base;
 	dma_addr_t cmds_dma_base;
 	spinlock_t reqs_lock;
-	struct sba_request *reqs;
 	bool reqs_fence;
 	struct list_head reqs_alloc_list;
 	struct list_head reqs_pending_list;
 	struct list_head reqs_active_list;
-	struct list_head reqs_received_list;
-	struct list_head reqs_completed_list;
 	struct list_head reqs_aborted_list;
 	struct list_head reqs_free_list;
-	int reqs_free_count;
+	/* DebugFS directory entries */
+	struct dentry *root;
+	struct dentry *stats;
 };
 
-/* ====== SBA command helper routines ===== */
+/* ====== Command helper routines ===== */
 
 static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
 {
@@ -196,32 +198,50 @@ static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
 	       ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
 }
 
-/* ====== Channel resource management routines ===== */
+/* ====== General helper routines ===== */
+
+static void sba_peek_mchans(struct sba_device *sba)
+{
+	int mchan_idx;
+
+	for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
+		mbox_client_peek_data(sba->mchans[mchan_idx]);
+}
 
 static struct sba_request *sba_alloc_request(struct sba_device *sba)
 {
+	bool found = false;
 	unsigned long flags;
 	struct sba_request *req = NULL;
 
 	spin_lock_irqsave(&sba->reqs_lock, flags);
+	list_for_each_entry(req, &sba->reqs_free_list, node) {
+		if (async_tx_test_ack(&req->tx)) {
+			list_move_tail(&req->node, &sba->reqs_alloc_list);
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
 
-	req = list_first_entry_or_null(&sba->reqs_free_list,
-				       struct sba_request, node);
-	if (req) {
-		list_move_tail(&req->node, &sba->reqs_alloc_list);
-		req->state = SBA_REQUEST_STATE_ALLOCED;
-		req->fence = false;
-		req->first = req;
-		INIT_LIST_HEAD(&req->next);
-		req->next_count = 1;
-		atomic_set(&req->next_pending_count, 1);
-
-		sba->reqs_free_count--;
-
-		dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+	if (!found) {
+		/*
+		 * We have no more free requests so, we peek
+		 * mailbox channels hoping few active requests
+		 * would have completed which will create more
+		 * room for new requests.
+		 */
+		sba_peek_mchans(sba);
+		return NULL;
 	}
 
-	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+	req->flags = SBA_REQUEST_STATE_ALLOCED;
+	req->first = req;
+	INIT_LIST_HEAD(&req->next);
+	atomic_set(&req->next_pending_count, 1);
+
+	dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+	async_tx_ack(&req->tx);
 
 	return req;
 }
@@ -231,7 +251,8 @@ static void _sba_pending_request(struct sba_device *sba,
 				 struct sba_request *req)
 {
 	lockdep_assert_held(&sba->reqs_lock);
-	req->state = SBA_REQUEST_STATE_PENDING;
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_PENDING;
 	list_move_tail(&req->node, &sba->reqs_pending_list);
 	if (list_empty(&sba->reqs_active_list))
 		sba->reqs_fence = false;
@@ -246,9 +267,10 @@ static bool _sba_active_request(struct sba_device *sba,
 		sba->reqs_fence = false;
 	if (sba->reqs_fence)
 		return false;
-	req->state = SBA_REQUEST_STATE_ACTIVE;
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_ACTIVE;
 	list_move_tail(&req->node, &sba->reqs_active_list);
-	if (req->fence)
+	if (req->flags & SBA_REQUEST_FENCE)
 		sba->reqs_fence = true;
 	return true;
 }
@@ -258,7 +280,8 @@ static void _sba_abort_request(struct sba_device *sba,
 			       struct sba_request *req)
 {
 	lockdep_assert_held(&sba->reqs_lock);
-	req->state = SBA_REQUEST_STATE_ABORTED;
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_ABORTED;
 	list_move_tail(&req->node, &sba->reqs_aborted_list);
 	if (list_empty(&sba->reqs_active_list))
 		sba->reqs_fence = false;
@@ -269,42 +292,11 @@ static void _sba_free_request(struct sba_device *sba,
 			      struct sba_request *req)
 {
 	lockdep_assert_held(&sba->reqs_lock);
-	req->state = SBA_REQUEST_STATE_FREE;
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_FREE;
 	list_move_tail(&req->node, &sba->reqs_free_list);
 	if (list_empty(&sba->reqs_active_list))
 		sba->reqs_fence = false;
-	sba->reqs_free_count++;
-}
-
-static void sba_received_request(struct sba_request *req)
-{
-	unsigned long flags;
-	struct sba_device *sba = req->sba;
-
-	spin_lock_irqsave(&sba->reqs_lock, flags);
-	req->state = SBA_REQUEST_STATE_RECEIVED;
-	list_move_tail(&req->node, &sba->reqs_received_list);
-	spin_unlock_irqrestore(&sba->reqs_lock, flags);
-}
-
-static void sba_complete_chained_requests(struct sba_request *req)
-{
-	unsigned long flags;
-	struct sba_request *nreq;
-	struct sba_device *sba = req->sba;
-
-	spin_lock_irqsave(&sba->reqs_lock, flags);
-
-	req->state = SBA_REQUEST_STATE_COMPLETED;
-	list_move_tail(&req->node, &sba->reqs_completed_list);
-	list_for_each_entry(nreq, &req->next, next) {
-		nreq->state = SBA_REQUEST_STATE_COMPLETED;
-		list_move_tail(&nreq->node, &sba->reqs_completed_list);
-	}
-	if (list_empty(&sba->reqs_active_list))
-		sba->reqs_fence = false;
-
-	spin_unlock_irqrestore(&sba->reqs_lock, flags);
 }
 
 static void sba_free_chained_requests(struct sba_request *req)
@@ -332,8 +324,7 @@ static void sba_chain_request(struct sba_request *first,
 
 	list_add_tail(&req->next, &first->next);
 	req->first = first;
-	first->next_count++;
-	atomic_set(&first->next_pending_count, first->next_count);
+	atomic_inc(&first->next_pending_count);
 
 	spin_unlock_irqrestore(&sba->reqs_lock, flags);
 }
@@ -349,14 +340,6 @@ static void sba_cleanup_nonpending_requests(struct sba_device *sba)
 	list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
 		_sba_free_request(sba, req);
 
-	/* Freeup all received request */
-	list_for_each_entry_safe(req, req1, &sba->reqs_received_list, node)
-		_sba_free_request(sba, req);
-
-	/* Freeup all completed request */
-	list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node)
-		_sba_free_request(sba, req);
-
 	/* Set all active requests as aborted */
 	list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node)
 		_sba_abort_request(sba, req);
@@ -383,26 +366,6 @@ static void sba_cleanup_pending_requests(struct sba_device *sba)
 	spin_unlock_irqrestore(&sba->reqs_lock, flags);
 }
 
-/* ====== DMAENGINE callbacks ===== */
-
-static void sba_free_chan_resources(struct dma_chan *dchan)
-{
-	/*
-	 * Channel resources are pre-alloced so we just free-up
-	 * whatever we can so that we can re-use pre-alloced
-	 * channel resources next time.
-	 */
-	sba_cleanup_nonpending_requests(to_sba_device(dchan));
-}
-
-static int sba_device_terminate_all(struct dma_chan *dchan)
-{
-	/* Cleanup all pending requests */
-	sba_cleanup_pending_requests(to_sba_device(dchan));
-
-	return 0;
-}
-
 static int sba_send_mbox_request(struct sba_device *sba,
 				 struct sba_request *req)
 {
@@ -419,42 +382,156 @@ static int sba_send_mbox_request(struct sba_device *sba,
 		dev_err(sba->dev, "send message failed with error %d", ret);
 		return ret;
 	}
+
+	/* Check error returned by mailbox controller */
 	ret = req->msg.error;
 	if (ret < 0) {
 		dev_err(sba->dev, "message error %d", ret);
-		return ret;
 	}
 
-	return 0;
+	/* Signal txdone for mailbox channel */
+	mbox_client_txdone(sba->mchans[mchans_idx], ret);
+
+	return ret;
 }
 
-static void sba_issue_pending(struct dma_chan *dchan)
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_process_pending_requests(struct sba_device *sba)
 {
 	int ret;
-	unsigned long flags;
-	struct sba_request *req, *req1;
-	struct sba_device *sba = to_sba_device(dchan);
+	u32 count;
+	struct sba_request *req;
 
-	spin_lock_irqsave(&sba->reqs_lock, flags);
+	/*
+	 * Process few pending requests
+	 *
+	 * For now, we process (<number_of_mailbox_channels> * 8)
+	 * number of requests at a time.
+	 */
+	count = sba->mchans_count * 8;
+	while (!list_empty(&sba->reqs_pending_list) && count) {
+		/* Get the first pending request */
+		req = list_first_entry(&sba->reqs_pending_list,
+				       struct sba_request, node);
 
-	/* Process all pending request */
-	list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node) {
 		/* Try to make request active */
 		if (!_sba_active_request(sba, req))
 			break;
 
 		/* Send request to mailbox channel */
-		spin_unlock_irqrestore(&sba->reqs_lock, flags);
 		ret = sba_send_mbox_request(sba, req);
-		spin_lock_irqsave(&sba->reqs_lock, flags);
-
-		/* If something went wrong then keep request pending */
 		if (ret < 0) {
 			_sba_pending_request(sba, req);
 			break;
 		}
+
+		count--;
+	}
+}
+
+static void sba_process_received_request(struct sba_device *sba,
+					 struct sba_request *req)
+{
+	unsigned long flags;
+	struct dma_async_tx_descriptor *tx;
+	struct sba_request *nreq, *first = req->first;
+
+	/* Process only after all chained requests are received */
+	if (!atomic_dec_return(&first->next_pending_count)) {
+		tx = &first->tx;
+
+		WARN_ON(tx->cookie < 0);
+		if (tx->cookie > 0) {
+			dma_cookie_complete(tx);
+			dmaengine_desc_get_callback_invoke(tx, NULL);
+			dma_descriptor_unmap(tx);
+			tx->callback = NULL;
+			tx->callback_result = NULL;
+		}
+
+		dma_run_dependencies(tx);
+
+		spin_lock_irqsave(&sba->reqs_lock, flags);
+
+		/* Free all requests chained to first request */
+		list_for_each_entry(nreq, &first->next, next)
+			_sba_free_request(sba, nreq);
+		INIT_LIST_HEAD(&first->next);
+
+		/* Free the first request */
+		_sba_free_request(sba, first);
+
+		/* Process pending requests */
+		_sba_process_pending_requests(sba);
+
+		spin_unlock_irqrestore(&sba->reqs_lock, flags);
 	}
+}
+
+static void sba_write_stats_in_seqfile(struct sba_device *sba,
+				       struct seq_file *file)
+{
+	unsigned long flags;
+	struct sba_request *req;
+	u32 free_count = 0, alloced_count = 0;
+	u32 pending_count = 0, active_count = 0, aborted_count = 0;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	list_for_each_entry(req, &sba->reqs_free_list, node)
+		if (async_tx_test_ack(&req->tx))
+			free_count++;
+
+	list_for_each_entry(req, &sba->reqs_alloc_list, node)
+		alloced_count++;
+
+	list_for_each_entry(req, &sba->reqs_pending_list, node)
+		pending_count++;
+
+	list_for_each_entry(req, &sba->reqs_active_list, node)
+		active_count++;
 
+	list_for_each_entry(req, &sba->reqs_aborted_list, node)
+		aborted_count++;
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+	seq_printf(file, "maximum requests   = %d\n", sba->max_req);
+	seq_printf(file, "free requests      = %d\n", free_count);
+	seq_printf(file, "alloced requests   = %d\n", alloced_count);
+	seq_printf(file, "pending requests   = %d\n", pending_count);
+	seq_printf(file, "active requests    = %d\n", active_count);
+	seq_printf(file, "aborted requests   = %d\n", aborted_count);
+}
+
+/* ====== DMAENGINE callbacks ===== */
+
+static void sba_free_chan_resources(struct dma_chan *dchan)
+{
+	/*
+	 * Channel resources are pre-alloced so we just free-up
+	 * whatever we can so that we can re-use pre-alloced
+	 * channel resources next time.
+	 */
+	sba_cleanup_nonpending_requests(to_sba_device(dchan));
+}
+
+static int sba_device_terminate_all(struct dma_chan *dchan)
+{
+	/* Cleanup all pending requests */
+	sba_cleanup_pending_requests(to_sba_device(dchan));
+
+	return 0;
+}
+
+static void sba_issue_pending(struct dma_chan *dchan)
+{
+	unsigned long flags;
+	struct sba_device *sba = to_sba_device(dchan);
+
+	/* Process pending requests */
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+	_sba_process_pending_requests(sba);
 	spin_unlock_irqrestore(&sba->reqs_lock, flags);
 }
 
@@ -486,17 +563,15 @@ static enum dma_status sba_tx_status(struct dma_chan *dchan,
 				     dma_cookie_t cookie,
 				     struct dma_tx_state *txstate)
 {
-	int mchan_idx;
 	enum dma_status ret;
 	struct sba_device *sba = to_sba_device(dchan);
 
-	for (mchan_idx = 0; mchan_idx < sba->mchans_count; mchan_idx++)
-		mbox_client_peek_data(sba->mchans[mchan_idx]);
-
 	ret = dma_cookie_status(dchan, cookie, txstate);
 	if (ret == DMA_COMPLETE)
 		return ret;
 
+	sba_peek_mchans(sba);
+
 	return dma_cookie_status(dchan, cookie, txstate);
 }
 
@@ -506,6 +581,7 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
 {
 	u64 cmd;
 	u32 c_mdata;
+	dma_addr_t resp_dma = req->tx.phys;
 	struct brcm_sba_command *cmdsp = cmds;
 
 	/* Type-B command to load dummy data into buf0 */
@@ -521,7 +597,7 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
 	cmdsp->cmd = cmd;
 	*cmdsp->cmd_dma = cpu_to_le64(cmd);
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
-	cmdsp->data = req->resp_dma;
+	cmdsp->data = resp_dma;
 	cmdsp->data_len = req->sba->hw_resp_size;
 	cmdsp++;
 
@@ -542,11 +618,11 @@ static void sba_fillup_interrupt_msg(struct sba_request *req,
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 	if (req->sba->hw_resp_size) {
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-		cmdsp->resp = req->resp_dma;
+		cmdsp->resp = resp_dma;
 		cmdsp->resp_len = req->sba->hw_resp_size;
 	}
 	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
-	cmdsp->data = req->resp_dma;
+	cmdsp->data = resp_dma;
 	cmdsp->data_len = req->sba->hw_resp_size;
 	cmdsp++;
 
@@ -573,7 +649,7 @@ sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags)
 	 * Force fence so that no requests are submitted
 	 * until DMA callback for this request is invoked.
 	 */
-	req->fence = true;
+	req->flags |= SBA_REQUEST_FENCE;
 
 	/* Fillup request message */
 	sba_fillup_interrupt_msg(req, req->cmds, &req->msg);
@@ -593,6 +669,7 @@ static void sba_fillup_memcpy_msg(struct sba_request *req,
 {
 	u64 cmd;
 	u32 c_mdata;
+	dma_addr_t resp_dma = req->tx.phys;
 	struct brcm_sba_command *cmdsp = cmds;
 
 	/* Type-B command to load data into buf0 */
@@ -629,7 +706,7 @@ static void sba_fillup_memcpy_msg(struct sba_request *req,
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 	if (req->sba->hw_resp_size) {
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-		cmdsp->resp = req->resp_dma;
+		cmdsp->resp = resp_dma;
 		cmdsp->resp_len = req->sba->hw_resp_size;
 	}
 	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -656,7 +733,8 @@ sba_prep_dma_memcpy_req(struct sba_device *sba,
 	req = sba_alloc_request(sba);
 	if (!req)
 		return NULL;
-	req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
 
 	/* Fillup request message */
 	sba_fillup_memcpy_msg(req, req->cmds, &req->msg,
@@ -711,6 +789,7 @@ static void sba_fillup_xor_msg(struct sba_request *req,
 	u64 cmd;
 	u32 c_mdata;
 	unsigned int i;
+	dma_addr_t resp_dma = req->tx.phys;
 	struct brcm_sba_command *cmdsp = cmds;
 
 	/* Type-B command to load data into buf0 */
@@ -766,7 +845,7 @@ static void sba_fillup_xor_msg(struct sba_request *req,
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 	if (req->sba->hw_resp_size) {
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-		cmdsp->resp = req->resp_dma;
+		cmdsp->resp = resp_dma;
 		cmdsp->resp_len = req->sba->hw_resp_size;
 	}
 	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -782,7 +861,7 @@ static void sba_fillup_xor_msg(struct sba_request *req,
 	msg->error = 0;
 }
 
-struct sba_request *
+static struct sba_request *
 sba_prep_dma_xor_req(struct sba_device *sba,
 		     dma_addr_t off, dma_addr_t dst, dma_addr_t *src,
 		     u32 src_cnt, size_t len, unsigned long flags)
@@ -793,7 +872,8 @@ sba_prep_dma_xor_req(struct sba_device *sba,
 	req = sba_alloc_request(sba);
 	if (!req)
 		return NULL;
-	req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
 
 	/* Fillup request message */
 	sba_fillup_xor_msg(req, req->cmds, &req->msg,
@@ -854,6 +934,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
 	u64 cmd;
 	u32 c_mdata;
 	unsigned int i;
+	dma_addr_t resp_dma = req->tx.phys;
 	struct brcm_sba_command *cmdsp = cmds;
 
 	if (pq_continue) {
@@ -947,7 +1028,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
 		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 		if (req->sba->hw_resp_size) {
 			cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-			cmdsp->resp = req->resp_dma;
+			cmdsp->resp = resp_dma;
 			cmdsp->resp_len = req->sba->hw_resp_size;
 		}
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -974,7 +1055,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
 		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 		if (req->sba->hw_resp_size) {
 			cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-			cmdsp->resp = req->resp_dma;
+			cmdsp->resp = resp_dma;
 			cmdsp->resp_len = req->sba->hw_resp_size;
 		}
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -991,7 +1072,7 @@ static void sba_fillup_pq_msg(struct sba_request *req,
 	msg->error = 0;
 }
 
-struct sba_request *
+static struct sba_request *
 sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
 		    dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src,
 		    u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
@@ -1002,7 +1083,8 @@ sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
 	req = sba_alloc_request(sba);
 	if (!req)
 		return NULL;
-	req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
 
 	/* Fillup request messages */
 	sba_fillup_pq_msg(req, dmaf_continue(flags),
@@ -1027,6 +1109,7 @@ static void sba_fillup_pq_single_msg(struct sba_request *req,
 	u64 cmd;
 	u32 c_mdata;
 	u8 pos, dpos = raid6_gflog[scf];
+	dma_addr_t resp_dma = req->tx.phys;
 	struct brcm_sba_command *cmdsp = cmds;
 
 	if (!dst_p)
@@ -1105,7 +1188,7 @@ static void sba_fillup_pq_single_msg(struct sba_request *req,
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 	if (req->sba->hw_resp_size) {
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-		cmdsp->resp = req->resp_dma;
+		cmdsp->resp = resp_dma;
 		cmdsp->resp_len = req->sba->hw_resp_size;
 	}
 	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -1226,7 +1309,7 @@ skip_q_computation:
 	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
 	if (req->sba->hw_resp_size) {
 		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
-		cmdsp->resp = req->resp_dma;
+		cmdsp->resp = resp_dma;
 		cmdsp->resp_len = req->sba->hw_resp_size;
 	}
 	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
@@ -1243,7 +1326,7 @@ skip_q:
 	msg->error = 0;
 }
 
-struct sba_request *
+static struct sba_request *
 sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
 			   dma_addr_t *dst_p, dma_addr_t *dst_q,
 			   dma_addr_t src, u8 scf, size_t len,
@@ -1255,7 +1338,8 @@ sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
 	req = sba_alloc_request(sba);
 	if (!req)
 		return NULL;
-	req->fence = (flags & DMA_PREP_FENCE) ? true : false;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
 
 	/* Fillup request messages */
 	sba_fillup_pq_single_msg(req,  dmaf_continue(flags),
@@ -1370,40 +1454,10 @@ fail:
 
 /* ====== Mailbox callbacks ===== */
 
-static void sba_dma_tx_actions(struct sba_request *req)
-{
-	struct dma_async_tx_descriptor *tx = &req->tx;
-
-	WARN_ON(tx->cookie < 0);
-
-	if (tx->cookie > 0) {
-		dma_cookie_complete(tx);
-
-		/*
-		 * Call the callback (must not sleep or submit new
-		 * operations to this channel)
-		 */
-		if (tx->callback)
-			tx->callback(tx->callback_param);
-
-		dma_descriptor_unmap(tx);
-	}
-
-	/* Run dependent operations */
-	dma_run_dependencies(tx);
-
-	/* If waiting for 'ack' then move to completed list */
-	if (!async_tx_test_ack(&req->tx))
-		sba_complete_chained_requests(req);
-	else
-		sba_free_chained_requests(req);
-}
-
 static void sba_receive_message(struct mbox_client *cl, void *msg)
 {
-	unsigned long flags;
 	struct brcm_message *m = msg;
-	struct sba_request *req = m->ctx, *req1;
+	struct sba_request *req = m->ctx;
 	struct sba_device *sba = req->sba;
 
 	/* Error count if message has error */
@@ -1411,52 +1465,37 @@ static void sba_receive_message(struct mbox_client *cl, void *msg)
 		dev_err(sba->dev, "%s got message with error %d",
 			dma_chan_name(&sba->dma_chan), m->error);
 
-	/* Mark request as received */
-	sba_received_request(req);
-
-	/* Wait for all chained requests to be completed */
-	if (atomic_dec_return(&req->first->next_pending_count))
-		goto done;
-
-	/* Point to first request */
-	req = req->first;
-
-	/* Update request */
-	if (req->state == SBA_REQUEST_STATE_RECEIVED)
-		sba_dma_tx_actions(req);
-	else
-		sba_free_chained_requests(req);
+	/* Process received request */
+	sba_process_received_request(sba, req);
+}
 
-	spin_lock_irqsave(&sba->reqs_lock, flags);
+/* ====== Debugfs callbacks ====== */
 
-	/* Re-check all completed request waiting for 'ack' */
-	list_for_each_entry_safe(req, req1, &sba->reqs_completed_list, node) {
-		spin_unlock_irqrestore(&sba->reqs_lock, flags);
-		sba_dma_tx_actions(req);
-		spin_lock_irqsave(&sba->reqs_lock, flags);
-	}
+static int sba_debugfs_stats_show(struct seq_file *file, void *offset)
+{
+	struct platform_device *pdev = to_platform_device(file->private);
+	struct sba_device *sba = platform_get_drvdata(pdev);
 
-	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+	/* Write stats in file */
+	sba_write_stats_in_seqfile(sba, file);
 
-done:
-	/* Try to submit pending request */
-	sba_issue_pending(&sba->dma_chan);
+	return 0;
 }
 
 /* ====== Platform driver routines ===== */
 
 static int sba_prealloc_channel_resources(struct sba_device *sba)
 {
-	int i, j, p, ret = 0;
+	int i, j, ret = 0;
 	struct sba_request *req = NULL;
 
-	sba->resp_base = dma_alloc_coherent(sba->dma_dev.dev,
+	sba->resp_base = dma_alloc_coherent(sba->mbox_dev,
 					    sba->max_resp_pool_size,
 					    &sba->resp_dma_base, GFP_KERNEL);
 	if (!sba->resp_base)
 		return -ENOMEM;
 
-	sba->cmds_base = dma_alloc_coherent(sba->dma_dev.dev,
+	sba->cmds_base = dma_alloc_coherent(sba->mbox_dev,
 					    sba->max_cmds_pool_size,
 					    &sba->cmds_dma_base, GFP_KERNEL);
 	if (!sba->cmds_base) {
@@ -1469,36 +1508,23 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
 	INIT_LIST_HEAD(&sba->reqs_alloc_list);
 	INIT_LIST_HEAD(&sba->reqs_pending_list);
 	INIT_LIST_HEAD(&sba->reqs_active_list);
-	INIT_LIST_HEAD(&sba->reqs_received_list);
-	INIT_LIST_HEAD(&sba->reqs_completed_list);
 	INIT_LIST_HEAD(&sba->reqs_aborted_list);
 	INIT_LIST_HEAD(&sba->reqs_free_list);
 
-	sba->reqs = devm_kcalloc(sba->dev, sba->max_req,
-				 sizeof(*req), GFP_KERNEL);
-	if (!sba->reqs) {
-		ret = -ENOMEM;
-		goto fail_free_cmds_pool;
-	}
-
-	for (i = 0, p = 0; i < sba->max_req; i++) {
-		req = &sba->reqs[i];
+	for (i = 0; i < sba->max_req; i++) {
+		req = devm_kzalloc(sba->dev,
+				sizeof(*req) +
+				sba->max_cmd_per_req * sizeof(req->cmds[0]),
+				GFP_KERNEL);
+		if (!req) {
+			ret = -ENOMEM;
+			goto fail_free_cmds_pool;
+		}
 		INIT_LIST_HEAD(&req->node);
 		req->sba = sba;
-		req->state = SBA_REQUEST_STATE_FREE;
+		req->flags = SBA_REQUEST_STATE_FREE;
 		INIT_LIST_HEAD(&req->next);
-		req->next_count = 1;
 		atomic_set(&req->next_pending_count, 0);
-		req->fence = false;
-		req->resp = sba->resp_base + p;
-		req->resp_dma = sba->resp_dma_base + p;
-		p += sba->hw_resp_size;
-		req->cmds = devm_kcalloc(sba->dev, sba->max_cmd_per_req,
-					 sizeof(*req->cmds), GFP_KERNEL);
-		if (!req->cmds) {
-			ret = -ENOMEM;
-			goto fail_free_cmds_pool;
-		}
 		for (j = 0; j < sba->max_cmd_per_req; j++) {
 			req->cmds[j].cmd = 0;
 			req->cmds[j].cmd_dma = sba->cmds_base +
@@ -1509,21 +1535,20 @@ static int sba_prealloc_channel_resources(struct sba_device *sba)
 		}
 		memset(&req->msg, 0, sizeof(req->msg));
 		dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+		async_tx_ack(&req->tx);
 		req->tx.tx_submit = sba_tx_submit;
-		req->tx.phys = req->resp_dma;
+		req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size;
 		list_add_tail(&req->node, &sba->reqs_free_list);
 	}
 
-	sba->reqs_free_count = sba->max_req;
-
 	return 0;
 
 fail_free_cmds_pool:
-	dma_free_coherent(sba->dma_dev.dev,
+	dma_free_coherent(sba->mbox_dev,
 			  sba->max_cmds_pool_size,
 			  sba->cmds_base, sba->cmds_dma_base);
 fail_free_resp_pool:
-	dma_free_coherent(sba->dma_dev.dev,
+	dma_free_coherent(sba->mbox_dev,
 			  sba->max_resp_pool_size,
 			  sba->resp_base, sba->resp_dma_base);
 	return ret;
@@ -1532,9 +1557,9 @@ fail_free_resp_pool:
 static void sba_freeup_channel_resources(struct sba_device *sba)
 {
 	dmaengine_terminate_all(&sba->dma_chan);
-	dma_free_coherent(sba->dma_dev.dev, sba->max_cmds_pool_size,
+	dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size,
 			  sba->cmds_base, sba->cmds_dma_base);
-	dma_free_coherent(sba->dma_dev.dev, sba->max_resp_pool_size,
+	dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size,
 			  sba->resp_base, sba->resp_dma_base);
 	sba->resp_base = NULL;
 	sba->resp_dma_base = 0;
@@ -1625,6 +1650,13 @@ static int sba_probe(struct platform_device *pdev)
 	sba->dev = &pdev->dev;
 	platform_set_drvdata(pdev, sba);
 
+	/* Number of channels equals number of mailbox channels */
+	ret = of_count_phandle_with_args(pdev->dev.of_node,
+					 "mboxes", "#mbox-cells");
+	if (ret <= 0)
+		return -ENODEV;
+	mchans_count = ret;
+
 	/* Determine SBA version from DT compatible string */
 	if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba"))
 		sba->ver = SBA_VER_1;
@@ -1637,14 +1669,12 @@ static int sba_probe(struct platform_device *pdev)
 	/* Derived Configuration parameters */
 	switch (sba->ver) {
 	case SBA_VER_1:
-		sba->max_req = 1024;
 		sba->hw_buf_size = 4096;
 		sba->hw_resp_size = 8;
 		sba->max_pq_coefs = 6;
 		sba->max_pq_srcs = 6;
 		break;
 	case SBA_VER_2:
-		sba->max_req = 1024;
 		sba->hw_buf_size = 4096;
 		sba->hw_resp_size = 8;
 		sba->max_pq_coefs = 30;
@@ -1658,6 +1688,7 @@ static int sba_probe(struct platform_device *pdev)
 	default:
 		return -EINVAL;
 	}
+	sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL * mchans_count;
 	sba->max_cmd_per_req = sba->max_pq_srcs + 3;
 	sba->max_xor_srcs = sba->max_cmd_per_req - 1;
 	sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size;
@@ -1668,25 +1699,17 @@ static int sba_probe(struct platform_device *pdev)
 	sba->client.dev			= &pdev->dev;
 	sba->client.rx_callback		= sba_receive_message;
 	sba->client.tx_block		= false;
-	sba->client.knows_txdone	= false;
+	sba->client.knows_txdone	= true;
 	sba->client.tx_tout		= 0;
 
-	/* Number of channels equals number of mailbox channels */
-	ret = of_count_phandle_with_args(pdev->dev.of_node,
-					 "mboxes", "#mbox-cells");
-	if (ret <= 0)
-		return -ENODEV;
-	mchans_count = ret;
-	sba->mchans_count = 0;
-	atomic_set(&sba->mchans_current, 0);
-
 	/* Allocate mailbox channel array */
-	sba->mchans = devm_kcalloc(&pdev->dev, sba->mchans_count,
+	sba->mchans = devm_kcalloc(&pdev->dev, mchans_count,
 				   sizeof(*sba->mchans), GFP_KERNEL);
 	if (!sba->mchans)
 		return -ENOMEM;
 
 	/* Request mailbox channels */
+	sba->mchans_count = 0;
 	for (i = 0; i < mchans_count; i++) {
 		sba->mchans[i] = mbox_request_channel(&sba->client, i);
 		if (IS_ERR(sba->mchans[i])) {
@@ -1695,6 +1718,7 @@ static int sba_probe(struct platform_device *pdev)
 		}
 		sba->mchans_count++;
 	}
+	atomic_set(&sba->mchans_current, 0);
 
 	/* Find-out underlying mailbox device */
 	ret = of_parse_phandle_with_args(pdev->dev.of_node,
@@ -1723,15 +1747,34 @@ static int sba_probe(struct platform_device *pdev)
 		}
 	}
 
-	/* Register DMA device with linux async framework */
-	ret = sba_async_register(sba);
+	/* Prealloc channel resource */
+	ret = sba_prealloc_channel_resources(sba);
 	if (ret)
 		goto fail_free_mchans;
 
-	/* Prealloc channel resource */
-	ret = sba_prealloc_channel_resources(sba);
+	/* Check availability of debugfs */
+	if (!debugfs_initialized())
+		goto skip_debugfs;
+
+	/* Create debugfs root entry */
+	sba->root = debugfs_create_dir(dev_name(sba->dev), NULL);
+	if (IS_ERR_OR_NULL(sba->root)) {
+		dev_err(sba->dev, "failed to create debugfs root entry\n");
+		sba->root = NULL;
+		goto skip_debugfs;
+	}
+
+	/* Create debugfs stats entry */
+	sba->stats = debugfs_create_devm_seqfile(sba->dev, "stats", sba->root,
+						 sba_debugfs_stats_show);
+	if (IS_ERR_OR_NULL(sba->stats))
+		dev_err(sba->dev, "failed to create debugfs stats file\n");
+skip_debugfs:
+
+	/* Register DMA device with Linux async framework */
+	ret = sba_async_register(sba);
 	if (ret)
-		goto fail_async_dev_unreg;
+		goto fail_free_resources;
 
 	/* Print device info */
 	dev_info(sba->dev, "%s using SBAv%d and %d mailbox channels",
@@ -1740,8 +1783,9 @@ static int sba_probe(struct platform_device *pdev)
 
 	return 0;
 
-fail_async_dev_unreg:
-	dma_async_device_unregister(&sba->dma_dev);
+fail_free_resources:
+	debugfs_remove_recursive(sba->root);
+	sba_freeup_channel_resources(sba);
 fail_free_mchans:
 	for (i = 0; i < sba->mchans_count; i++)
 		mbox_free_channel(sba->mchans[i]);
@@ -1753,10 +1797,12 @@ static int sba_remove(struct platform_device *pdev)
 	int i;
 	struct sba_device *sba = platform_get_drvdata(pdev);
 
-	sba_freeup_channel_resources(sba);
-
 	dma_async_device_unregister(&sba->dma_dev);
 
+	debugfs_remove_recursive(sba->root);
+
+	sba_freeup_channel_resources(sba);
+
 	for (i = 0; i < sba->mchans_count; i++)
 		mbox_free_channel(sba->mchans[i]);
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index a371b07a0981..f70cc74032ea 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -644,9 +644,13 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
 		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 	}
 
-	/* 5 microsecond delay per pending descriptor */
-	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
-	       ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
+	/* microsecond delay by sysfs variable  per pending descriptor */
+	if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) {
+		writew(min((ioat_chan->intr_coalesce * (active - i)),
+		       IOAT_INTRDELAY_MASK),
+		       ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
+		ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce;
+	}
 }
 
 static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index a9bc1a15b0d1..56200eefcf5e 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -142,11 +142,14 @@ struct ioatdma_chan {
 	spinlock_t prep_lock;
 	struct ioat_descs descs[2];
 	int desc_chunks;
+	int intr_coalesce;
+	int prev_intr_coalesce;
 };
 
 struct ioat_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct dma_chan *, char *);
+	ssize_t (*store)(struct dma_chan *, const char *, size_t);
 };
 
 /**
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index ed8ed1192775..93e006c3441d 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -39,7 +39,7 @@ MODULE_VERSION(IOAT_DMA_VERSION);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel Corporation");
 
-static struct pci_device_id ioat_pci_tbl[] = {
+static const struct pci_device_id ioat_pci_tbl[] = {
 	/* I/OAT v3 platforms */
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c
index cb4a857ee21b..3ac677f29e8f 100644
--- a/drivers/dma/ioat/sysfs.c
+++ b/drivers/dma/ioat/sysfs.c
@@ -64,8 +64,24 @@ ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	return entry->show(&ioat_chan->dma_chan, page);
 }
 
+static ssize_t
+ioat_attr_store(struct kobject *kobj, struct attribute *attr,
+const char *page, size_t count)
+{
+	struct ioat_sysfs_entry *entry;
+	struct ioatdma_chan *ioat_chan;
+
+	entry = container_of(attr, struct ioat_sysfs_entry, attr);
+	ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+	if (!entry->store)
+		return -EIO;
+	return entry->store(&ioat_chan->dma_chan, page, count);
+}
+
 const struct sysfs_ops ioat_sysfs_ops = {
 	.show	= ioat_attr_show,
+	.store  = ioat_attr_store,
 };
 
 void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type)
@@ -121,11 +137,37 @@ static ssize_t ring_active_show(struct dma_chan *c, char *page)
 }
 static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
 
+static ssize_t intr_coalesce_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", ioat_chan->intr_coalesce);
+}
+
+static ssize_t intr_coalesce_store(struct dma_chan *c, const char *page,
+size_t count)
+{
+	int intr_coalesce = 0;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	if (sscanf(page, "%du", &intr_coalesce) != -1) {
+		if ((intr_coalesce < 0) ||
+		    (intr_coalesce > IOAT_INTRDELAY_MASK))
+			return -EINVAL;
+		ioat_chan->intr_coalesce = intr_coalesce;
+	}
+
+	return count;
+}
+
+static struct ioat_sysfs_entry intr_coalesce_attr = __ATTR_RW(intr_coalesce);
+
 static struct attribute *ioat_attrs[] = {
 	&ring_size_attr.attr,
 	&ring_active_attr.attr,
 	&ioat_cap_attr.attr,
 	&ioat_version_attr.attr,
+	&intr_coalesce_attr.attr,
 	NULL,
 };
 
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index 01e25c68dd5a..01d2a750a621 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -223,7 +223,6 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
 			if (c && (tc1 & BIT(i))) {
 				spin_lock_irqsave(&c->vc.lock, flags);
 				vchan_cookie_complete(&p->ds_run->vd);
-				WARN_ON_ONCE(p->ds_done);
 				p->ds_done = p->ds_run;
 				p->ds_run = NULL;
 				spin_unlock_irqrestore(&c->vc.lock, flags);
@@ -274,13 +273,14 @@ static int k3_dma_start_txd(struct k3_dma_chan *c)
 		 */
 		list_del(&ds->vd.node);
 
-		WARN_ON_ONCE(c->phy->ds_run);
-		WARN_ON_ONCE(c->phy->ds_done);
 		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
 		/* start dma */
 		k3_dma_set_desc(c->phy, &ds->desc_hw[0]);
 		return 0;
 	}
+	c->phy->ds_run = NULL;
+	c->phy->ds_done = NULL;
 	return -EAGAIN;
 }
 
@@ -722,11 +722,7 @@ static int k3_dma_terminate_all(struct dma_chan *chan)
 			k3_dma_free_desc(&p->ds_run->vd);
 			p->ds_run = NULL;
 		}
-		if (p->ds_done) {
-			k3_dma_free_desc(&p->ds_done->vd);
-			p->ds_done = NULL;
-		}
-
+		p->ds_done = NULL;
 	}
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 	vchan_dma_desc_free_list(&c->vc, &head);
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index faae0bfe1109..91fd395c90c4 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -38,8 +38,8 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
 		if (ofdma->of_node == dma_spec->np)
 			return ofdma;
 
-	pr_debug("%s: can't find DMA controller %s\n", __func__,
-		 dma_spec->np->full_name);
+	pr_debug("%s: can't find DMA controller %pOF\n", __func__,
+		 dma_spec->np);
 
 	return NULL;
 }
@@ -255,8 +255,8 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 
 	count = of_property_count_strings(np, "dma-names");
 	if (count < 0) {
-		pr_err("%s: dma-names property of node '%s' missing or empty\n",
-			__func__, np->full_name);
+		pr_err("%s: dma-names property of node '%pOF' missing or empty\n",
+			__func__, np);
 		return ERR_PTR(-ENODEV);
 	}
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index b19ee04567b5..f122c2a7b9f0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -3023,7 +3023,7 @@ static int pl330_remove(struct amba_device *adev)
 	return 0;
 }
 
-static struct amba_id pl330_ids[] = {
+static const struct amba_id pl330_ids[] = {
 	{
 		.id	= 0x00041330,
 		.mask	= 0x000fffff,
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index b1535b1fe95c..4cf0d4d0cecf 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -4040,9 +4040,9 @@ static int ppc440spe_adma_probe(struct platform_device *ofdev)
 		/* it is DMA0 or DMA1 */
 		idx = of_get_property(np, "cell-index", &len);
 		if (!idx || (len != sizeof(u32))) {
-			dev_err(&ofdev->dev, "Device node %s has missing "
+			dev_err(&ofdev->dev, "Device node %pOF has missing "
 				"or invalid cell-index property\n",
-				np->full_name);
+				np);
 			return -EINVAL;
 		}
 		id = *idx;
@@ -4307,7 +4307,7 @@ static int ppc440spe_adma_remove(struct platform_device *ofdev)
  * "poly" allows setting/checking used polynomial (for PPC440SPe only).
  */
 
-static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
+static ssize_t devices_show(struct device_driver *dev, char *buf)
 {
 	ssize_t size = 0;
 	int i;
@@ -4321,16 +4321,17 @@ static ssize_t show_ppc440spe_devices(struct device_driver *dev, char *buf)
 	}
 	return size;
 }
+static DRIVER_ATTR_RO(devices);
 
-static ssize_t show_ppc440spe_r6enable(struct device_driver *dev, char *buf)
+static ssize_t enable_show(struct device_driver *dev, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE,
 			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
 			ppc440spe_r6_enabled ? "EN" : "DIS");
 }
 
-static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
-					const char *buf, size_t count)
+static ssize_t enable_store(struct device_driver *dev, const char *buf,
+			    size_t count)
 {
 	unsigned long val;
 
@@ -4357,8 +4358,9 @@ static ssize_t store_ppc440spe_r6enable(struct device_driver *dev,
 	}
 	return count;
 }
+static DRIVER_ATTR_RW(enable);
 
-static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
+static ssize_t poly_store(struct device_driver *dev, char *buf)
 {
 	ssize_t size = 0;
 	u32 reg;
@@ -4377,8 +4379,8 @@ static ssize_t show_ppc440spe_r6poly(struct device_driver *dev, char *buf)
 	return size;
 }
 
-static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
-				      const char *buf, size_t count)
+static ssize_t poly_store(struct device_driver *dev, const char *buf,
+			  size_t count)
 {
 	unsigned long reg, val;
 
@@ -4404,12 +4406,7 @@ static ssize_t store_ppc440spe_r6poly(struct device_driver *dev,
 
 	return count;
 }
-
-static DRIVER_ATTR(devices, S_IRUGO, show_ppc440spe_devices, NULL);
-static DRIVER_ATTR(enable, S_IRUGO | S_IWUSR, show_ppc440spe_r6enable,
-		   store_ppc440spe_r6enable);
-static DRIVER_ATTR(poly, S_IRUGO | S_IWUSR, show_ppc440spe_r6poly,
-		   store_ppc440spe_r6poly);
+static DRIVER_ATTR_RW(poly);
 
 /*
  * Common initialisation for RAID engines; allocate memory for
@@ -4448,8 +4445,7 @@ static int ppc440spe_configure_raid_devices(void)
 	dcr_base = dcr_resource_start(np, 0);
 	dcr_len = dcr_resource_len(np, 0);
 	if (!dcr_base && !dcr_len) {
-		pr_err("%s: can't get DCR registers base/len!\n",
-			np->full_name);
+		pr_err("%pOF: can't get DCR registers base/len!\n", np);
 		of_node_put(np);
 		iounmap(i2o_reg);
 		return -ENODEV;
@@ -4457,7 +4453,7 @@ static int ppc440spe_configure_raid_devices(void)
 
 	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
 	if (!DCR_MAP_OK(i2o_dcr_host)) {
-		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		pr_err("%pOF: failed to map DCRs!\n", np);
 		of_node_put(np);
 		iounmap(i2o_reg);
 		return -ENODEV;
@@ -4518,15 +4514,14 @@ static int ppc440spe_configure_raid_devices(void)
 	dcr_base = dcr_resource_start(np, 0);
 	dcr_len = dcr_resource_len(np, 0);
 	if (!dcr_base && !dcr_len) {
-		pr_err("%s: can't get DCR registers base/len!\n",
-			np->full_name);
+		pr_err("%pOF: can't get DCR registers base/len!\n", np);
 		ret = -ENODEV;
 		goto out_mq;
 	}
 
 	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
 	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
-		pr_err("%s: failed to map DCRs!\n", np->full_name);
+		pr_err("%pOF: failed to map DCRs!\n", np);
 		ret = -ENODEV;
 		goto out_mq;
 	}
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 03c4eb3fd314..6d89fb6a6a92 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -65,6 +65,7 @@ struct bam_desc_hw {
 #define DESC_FLAG_EOT BIT(14)
 #define DESC_FLAG_EOB BIT(13)
 #define DESC_FLAG_NWD BIT(12)
+#define DESC_FLAG_CMD BIT(11)
 
 struct bam_async_desc {
 	struct virt_dma_desc vd;
@@ -645,6 +646,9 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
 		unsigned int curr_offset = 0;
 
 		do {
+			if (flags & DMA_PREP_CMD)
+				desc->flags |= cpu_to_le16(DESC_FLAG_CMD);
+
 			desc->addr = cpu_to_le32(sg_dma_address(sg) +
 						 curr_offset);
 
@@ -960,7 +964,7 @@ static void bam_start_dma(struct bam_chan *bchan)
 
 	/* set any special flags on the last descriptor */
 	if (async_desc->num_desc == async_desc->xfer_len)
-		desc[async_desc->xfer_len - 1].flags =
+		desc[async_desc->xfer_len - 1].flags |=
 					cpu_to_le16(async_desc->flags);
 	else
 		desc[async_desc->xfer_len - 1].flags |=
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index 34fb6afd229b..e3669850aef4 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -411,7 +411,40 @@ hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src,
 		return NULL;
 
 	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
-				     src, dest, len, flags);
+				     src, dest, len, flags,
+				     HIDMA_TRE_MEMCPY);
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return &mdesc->desc;
+}
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value,
+		size_t len, unsigned long flags)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_desc *mdesc = NULL;
+	struct hidma_dev *mdma = mchan->dmadev;
+	unsigned long irqflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	if (!mdesc)
+		return NULL;
+
+	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+				     value, dest, len, flags,
+				     HIDMA_TRE_MEMSET);
 
 	/* Place descriptor in prepared list */
 	spin_lock_irqsave(&mchan->lock, irqflags);
@@ -776,6 +809,7 @@ static int hidma_probe(struct platform_device *pdev)
 	pm_runtime_get_sync(dmadev->ddev.dev);
 
 	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
+	dma_cap_set(DMA_MEMSET, dmadev->ddev.cap_mask);
 	if (WARN_ON(!pdev->dev.dma_mask)) {
 		rc = -ENXIO;
 		goto dmafree;
@@ -786,6 +820,7 @@ static int hidma_probe(struct platform_device *pdev)
 	dmadev->dev_trca = trca;
 	dmadev->trca_resource = trca_resource;
 	dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy;
+	dmadev->ddev.device_prep_dma_memset = hidma_prep_dma_memset;
 	dmadev->ddev.device_alloc_chan_resources = hidma_alloc_chan_resources;
 	dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources;
 	dmadev->ddev.device_tx_status = hidma_tx_status;
diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
index 41e0aa283828..5f9966e82c0b 100644
--- a/drivers/dma/qcom/hidma.h
+++ b/drivers/dma/qcom/hidma.h
@@ -28,6 +28,11 @@
 #define HIDMA_TRE_DEST_LOW_IDX		4
 #define HIDMA_TRE_DEST_HI_IDX		5
 
+enum tre_type {
+	HIDMA_TRE_MEMCPY = 3,
+	HIDMA_TRE_MEMSET = 4,
+};
+
 struct hidma_tre {
 	atomic_t allocated;		/* if this channel is allocated	    */
 	bool queued;			/* flag whether this is pending     */
@@ -150,7 +155,7 @@ void hidma_ll_start(struct hidma_lldev *llhndl);
 int hidma_ll_disable(struct hidma_lldev *lldev);
 int hidma_ll_enable(struct hidma_lldev *llhndl);
 void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch,
-	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags);
+	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags, u32 txntype);
 void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi);
 int hidma_ll_setup(struct hidma_lldev *lldev);
 struct hidma_lldev *hidma_ll_init(struct device *dev, u32 max_channels,
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
index 1530a661518d..4999e266b2de 100644
--- a/drivers/dma/qcom/hidma_ll.c
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -105,10 +105,6 @@ enum ch_state {
 	HIDMA_CH_STOPPED = 4,
 };
 
-enum tre_type {
-	HIDMA_TRE_MEMCPY = 3,
-};
-
 enum err_code {
 	HIDMA_EVRE_STATUS_COMPLETE = 1,
 	HIDMA_EVRE_STATUS_ERROR = 4,
@@ -174,8 +170,7 @@ int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name,
 	tre->err_info = 0;
 	tre->lldev = lldev;
 	tre_local = &tre->tre_local[0];
-	tre_local[HIDMA_TRE_CFG_IDX] = HIDMA_TRE_MEMCPY;
-	tre_local[HIDMA_TRE_CFG_IDX] |= (lldev->chidx & 0xFF) << 8;
+	tre_local[HIDMA_TRE_CFG_IDX] = (lldev->chidx & 0xFF) << 8;
 	tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16);	/* set IEOB */
 	*tre_ch = i;
 	if (callback)
@@ -607,7 +602,7 @@ int hidma_ll_disable(struct hidma_lldev *lldev)
 
 void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
 				  dma_addr_t src, dma_addr_t dest, u32 len,
-				  u32 flags)
+				  u32 flags, u32 txntype)
 {
 	struct hidma_tre *tre;
 	u32 *tre_local;
@@ -626,6 +621,8 @@ void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
 	}
 
 	tre_local = &tre->tre_local[0];
+	tre_local[HIDMA_TRE_CFG_IDX] &= ~GENMASK(7, 0);
+	tre_local[HIDMA_TRE_CFG_IDX] |= txntype;
 	tre_local[HIDMA_TRE_LEN_IDX] = len;
 	tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src);
 	tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src);
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index 5a0991bc4787..7335e2eb9b72 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -28,7 +28,7 @@
 
 #include "hidma_mgmt.h"
 
-#define HIDMA_QOS_N_OFFSET		0x300
+#define HIDMA_QOS_N_OFFSET		0x700
 #define HIDMA_CFG_OFFSET		0x400
 #define HIDMA_MAX_BUS_REQ_LEN_OFFSET	0x41C
 #define HIDMA_MAX_XACTIONS_OFFSET	0x420
@@ -227,7 +227,8 @@ static int hidma_mgmt_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	if (max_write_request) {
+	if (max_write_request &&
+			(max_write_request != mgmtdev->max_write_request)) {
 		dev_info(&pdev->dev, "overriding max-write-burst-bytes: %d\n",
 			max_write_request);
 		mgmtdev->max_write_request = max_write_request;
@@ -240,7 +241,8 @@ static int hidma_mgmt_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "max-read-burst-bytes missing\n");
 		goto out;
 	}
-	if (max_read_request) {
+	if (max_read_request &&
+			(max_read_request != mgmtdev->max_read_request)) {
 		dev_info(&pdev->dev, "overriding max-read-burst-bytes: %d\n",
 			max_read_request);
 		mgmtdev->max_read_request = max_read_request;
@@ -253,7 +255,8 @@ static int hidma_mgmt_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "max-write-transactions missing\n");
 		goto out;
 	}
-	if (max_wr_xactions) {
+	if (max_wr_xactions &&
+			(max_wr_xactions != mgmtdev->max_wr_xactions)) {
 		dev_info(&pdev->dev, "overriding max-write-transactions: %d\n",
 			max_wr_xactions);
 		mgmtdev->max_wr_xactions = max_wr_xactions;
@@ -266,7 +269,8 @@ static int hidma_mgmt_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "max-read-transactions missing\n");
 		goto out;
 	}
-	if (max_rd_xactions) {
+	if (max_rd_xactions &&
+			(max_rd_xactions != mgmtdev->max_rd_xactions)) {
 		dev_info(&pdev->dev, "overriding max-read-transactions: %d\n",
 			max_rd_xactions);
 		mgmtdev->max_rd_xactions = max_rd_xactions;
@@ -354,7 +358,7 @@ static int __init hidma_mgmt_of_populate_channels(struct device_node *np)
 	struct platform_device_info pdevinfo;
 	struct of_phandle_args out_irq;
 	struct device_node *child;
-	struct resource *res;
+	struct resource *res = NULL;
 	const __be32 *cell;
 	int ret = 0, size, i, num;
 	u64 addr, addr_size;
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index ffcadca53243..2b2c7db3e480 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1690,6 +1690,15 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
 	if (!irqname)
 		return -ENOMEM;
 
+	/*
+	 * Initialize the DMA engine channel and add it to the DMA engine
+	 * channels list.
+	 */
+	chan->device = &dmac->engine;
+	dma_cookie_init(chan);
+
+	list_add_tail(&chan->device_node, &dmac->engine.channels);
+
 	ret = devm_request_threaded_irq(dmac->dev, rchan->irq,
 					rcar_dmac_isr_channel,
 					rcar_dmac_isr_channel_thread, 0,
@@ -1700,15 +1709,6 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
 		return ret;
 	}
 
-	/*
-	 * Initialize the DMA engine channel and add it to the DMA engine
-	 * channels list.
-	 */
-	chan->device = &dmac->engine;
-	dma_cookie_init(chan);
-
-	list_add_tail(&chan->device_node, &dmac->engine.channels);
-
 	return 0;
 }
 
@@ -1794,14 +1794,6 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	if (!irqname)
 		return -ENOMEM;
 
-	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
-			       irqname, dmac);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
-			irq, ret);
-		return ret;
-	}
-
 	/* Enable runtime PM and initialize the device. */
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -1818,8 +1810,32 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 		goto error;
 	}
 
-	/* Initialize the channels. */
-	INIT_LIST_HEAD(&dmac->engine.channels);
+	/* Initialize engine */
+	engine = &dmac->engine;
+
+	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+	engine->dev		= &pdev->dev;
+	engine->copy_align	= ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
+
+	engine->src_addr_widths	= widths;
+	engine->dst_addr_widths	= widths;
+	engine->directions	= BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	engine->device_alloc_chan_resources	= rcar_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources	= rcar_dmac_free_chan_resources;
+	engine->device_prep_dma_memcpy		= rcar_dmac_prep_dma_memcpy;
+	engine->device_prep_slave_sg		= rcar_dmac_prep_slave_sg;
+	engine->device_prep_dma_cyclic		= rcar_dmac_prep_dma_cyclic;
+	engine->device_config			= rcar_dmac_device_config;
+	engine->device_terminate_all		= rcar_dmac_chan_terminate_all;
+	engine->device_tx_status		= rcar_dmac_tx_status;
+	engine->device_issue_pending		= rcar_dmac_issue_pending;
+	engine->device_synchronize		= rcar_dmac_device_synchronize;
+
+	INIT_LIST_HEAD(&engine->channels);
 
 	for (i = 0; i < dmac->n_channels; ++i) {
 		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i],
@@ -1828,6 +1844,14 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 			goto error;
 	}
 
+	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
+			       irqname, dmac);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+			irq, ret);
+		return ret;
+	}
+
 	/* Register the DMAC as a DMA provider for DT. */
 	ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
 					 NULL);
@@ -1839,29 +1863,6 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	 *
 	 * Default transfer size of 32 bytes requires 32-byte alignment.
 	 */
-	engine = &dmac->engine;
-	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
-	dma_cap_set(DMA_SLAVE, engine->cap_mask);
-
-	engine->dev = &pdev->dev;
-	engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
-
-	engine->src_addr_widths = widths;
-	engine->dst_addr_widths = widths;
-	engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
-	engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-
-	engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources;
-	engine->device_free_chan_resources = rcar_dmac_free_chan_resources;
-	engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy;
-	engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg;
-	engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;
-	engine->device_config = rcar_dmac_device_config;
-	engine->device_terminate_all = rcar_dmac_chan_terminate_all;
-	engine->device_tx_status = rcar_dmac_tx_status;
-	engine->device_issue_pending = rcar_dmac_issue_pending;
-	engine->device_synchronize = rcar_dmac_device_synchronize;
-
 	ret = dma_async_device_register(engine);
 	if (ret < 0)
 		goto error;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 97e1d8b00e22..c2b089af0420 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -79,7 +79,7 @@ static int dma40_memcpy_channels[] = {
 };
 
 /* Default configuration for physcial memcpy */
-static struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
+static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
 	.mode = STEDMA40_MODE_PHYSICAL,
 	.dir = DMA_MEM_TO_MEM,
 
@@ -93,7 +93,7 @@ static struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
 };
 
 /* Default configuration for logical memcpy */
-static struct stedma40_chan_cfg dma40_memcpy_conf_log = {
+static const struct stedma40_chan_cfg dma40_memcpy_conf_log = {
 	.mode = STEDMA40_MODE_LOGICAL,
 	.dir = DMA_MEM_TO_MEM,
 
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index a2358780ab2c..bcd496edc70f 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -101,6 +101,17 @@ struct sun6i_dma_config {
 	u32 nr_max_channels;
 	u32 nr_max_requests;
 	u32 nr_max_vchans;
+	/*
+	 * In the datasheets/user manuals of newer Allwinner SoCs, a special
+	 * bit (bit 2 at register 0x20) is present.
+	 * It's named "DMA MCLK interface circuit auto gating bit" in the
+	 * documents, and the footnote of this register says that this bit
+	 * should be set up when initializing the DMA controller.
+	 * Allwinner A23/A33 user manuals do not have this bit documented,
+	 * however these SoCs really have and need this bit, as seen in the
+	 * BSP kernel source code.
+	 */
+	bool gate_needed;
 };
 
 /*
@@ -1009,6 +1020,7 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = {
 	.nr_max_channels = 8,
 	.nr_max_requests = 24,
 	.nr_max_vchans   = 37,
+	.gate_needed	 = true,
 };
 
 static struct sun6i_dma_config sun8i_a83t_dma_cfg = {
@@ -1028,11 +1040,24 @@ static struct sun6i_dma_config sun8i_h3_dma_cfg = {
 	.nr_max_vchans   = 34,
 };
 
+/*
+ * The V3s have only 8 physical channels, a maximum DRQ port id of 23,
+ * and a total of 24 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_v3s_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 23,
+	.nr_max_vchans   = 24,
+	.gate_needed	 = true,
+};
+
 static const struct of_device_id sun6i_dma_match[] = {
 	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
 	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
 	{ .compatible = "allwinner,sun8i-a83t-dma", .data = &sun8i_a83t_dma_cfg },
 	{ .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
+	{ .compatible = "allwinner,sun8i-v3s-dma", .data = &sun8i_v3s_dma_cfg },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, sun6i_dma_match);
@@ -1174,13 +1199,7 @@ static int sun6i_dma_probe(struct platform_device *pdev)
 		goto err_dma_unregister;
 	}
 
-	/*
-	 * sun8i variant requires us to toggle a dma gating register,
-	 * as seen in Allwinner's SDK. This register is not documented
-	 * in the A23 user manual.
-	 */
-	if (of_device_is_compatible(pdev->dev.of_node,
-				    "allwinner,sun8i-a23-dma"))
+	if (sdc->cfg->gate_needed)
 		writel(SUN8I_DMA_GATE_ENABLE, sdc->base + SUN8I_DMA_GATE);
 
 	return 0;
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
index 2403475a37cf..2f65a8fde21d 100644
--- a/drivers/dma/ti-dma-crossbar.c
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -308,7 +308,7 @@ static const struct of_device_id ti_dra7_master_match[] = {
 static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p)
 {
 	for (; len > 0; len--)
-		clear_bit(offset + (len - 1), p);
+		set_bit(offset + (len - 1), p);
 }
 
 static int ti_dra7_xbar_probe(struct platform_device *pdev)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 8cf87b1a284b..8722bcba489d 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2124,7 +2124,7 @@ static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
 		return err;
 	}
 
@@ -2142,25 +2142,25 @@ static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
 		return err;
 	}
 
 	err = clk_prepare_enable(*tx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
 		goto err_disable_axiclk;
 	}
 
 	err = clk_prepare_enable(*rx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
 		goto err_disable_txclk;
 	}
 
 	err = clk_prepare_enable(*sg_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable sg_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable sg_clk (%d)\n", err);
 		goto err_disable_rxclk;
 	}
 
@@ -2189,26 +2189,26 @@ static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to get axi_clk (%d)\n", err);
 		return err;
 	}
 
 	*dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
 	if (IS_ERR(*dev_clk)) {
 		err = PTR_ERR(*dev_clk);
-		dev_err(&pdev->dev, "failed to get dev_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to get dev_clk (%d)\n", err);
 		return err;
 	}
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
 		return err;
 	}
 
 	err = clk_prepare_enable(*dev_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable dev_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable dev_clk (%d)\n", err);
 		goto err_disable_axiclk;
 	}
 
@@ -2229,7 +2229,7 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
 	if (IS_ERR(*axi_clk)) {
 		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
 		return err;
 	}
 
@@ -2251,31 +2251,31 @@ static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable axi_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
 		return err;
 	}
 
 	err = clk_prepare_enable(*tx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
 		goto err_disable_axiclk;
 	}
 
 	err = clk_prepare_enable(*txs_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable txs_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable txs_clk (%d)\n", err);
 		goto err_disable_txclk;
 	}
 
 	err = clk_prepare_enable(*rx_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
 		goto err_disable_txsclk;
 	}
 
 	err = clk_prepare_enable(*rxs_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable rxs_clk (%u)\n", err);
+		dev_err(&pdev->dev, "failed to enable rxs_clk (%d)\n", err);
 		goto err_disable_rxclk;
 	}
 
diff --git a/include/linux/dma/qcom_bam_dma.h b/include/linux/dma/qcom_bam_dma.h
new file mode 100644
index 000000000000..077d43a358e5
--- /dev/null
+++ b/include/linux/dma/qcom_bam_dma.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _QCOM_BAM_DMA_H
+#define _QCOM_BAM_DMA_H
+
+#include <asm/byteorder.h>
+
+/*
+ * This data type corresponds to the native Command Element
+ * supported by BAM DMA Engine.
+ *
+ * @cmd_and_addr - upper 8 bits command and lower 24 bits register address.
+ * @data - for write command: content to be written into peripheral register.
+ *	   for read command: dest addr to write peripheral register value.
+ * @mask - register mask.
+ * @reserved - for future usage.
+ *
+ */
+struct bam_cmd_element {
+	__le32 cmd_and_addr;
+	__le32 data;
+	__le32 mask;
+	__le32 reserved;
+};
+
+/*
+ * This enum indicates the command type in a command element
+ */
+enum bam_command_type {
+	BAM_WRITE_COMMAND = 0,
+	BAM_READ_COMMAND,
+};
+
+/*
+ * prep_bam_ce_le32 - Wrapper function to prepare a single BAM command
+ * element with the data already in le32 format.
+ *
+ * @bam_ce: bam command element
+ * @addr: target address
+ * @cmd: BAM command
+ * @data: actual data for write and dest addr for read in le32
+ */
+static inline void
+bam_prep_ce_le32(struct bam_cmd_element *bam_ce, u32 addr,
+		 enum bam_command_type cmd, __le32 data)
+{
+	bam_ce->cmd_and_addr =
+		cpu_to_le32((addr & 0xffffff) | ((cmd & 0xff) << 24));
+	bam_ce->data = data;
+	bam_ce->mask = cpu_to_le32(0xffffffff);
+}
+
+/*
+ * bam_prep_ce - Wrapper function to prepare a single BAM command element
+ * with the data.
+ *
+ * @bam_ce: BAM command element
+ * @addr: target address
+ * @cmd: BAM command
+ * @data: actual data for write and dest addr for read
+ */
+static inline void
+bam_prep_ce(struct bam_cmd_element *bam_ce, u32 addr,
+	    enum bam_command_type cmd, u32 data)
+{
+	bam_prep_ce_le32(bam_ce, addr, cmd, cpu_to_le32(data));
+}
+#endif
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 64fbd380c430..8319101170fc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -185,6 +185,9 @@ struct dma_interleaved_template {
  *  on the result of this operation
  * @DMA_CTRL_REUSE: client can reuse the descriptor and submit again till
  *  cleared or freed
+ * @DMA_PREP_CMD: tell the driver that the data passed to DMA API is command
+ *  data and the descriptor should be in different format from normal
+ *  data descriptors.
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
@@ -194,6 +197,7 @@ enum dma_ctrl_flags {
 	DMA_PREP_CONTINUE = (1 << 4),
 	DMA_PREP_FENCE = (1 << 5),
 	DMA_CTRL_REUSE = (1 << 6),
+	DMA_PREP_CMD = (1 << 7),
 };
 
 /**