summary refs log tree commit diff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-29 15:34:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-29 15:34:57 -0700
commitef08e78268423fc4d7fbc3e54bd9a67fc8da7cc5 (patch)
treed0561d3ef89c9cd277a38168e33850666cbd33c4 /drivers
parent71db34fc4330f7c784397acb9f1e6ee7f7b32eb2 (diff)
parent5b2e02e401deb44e7f5befe19404d8b2688efea4 (diff)
downloadlinux-ef08e78268423fc4d7fbc3e54bd9a67fc8da7cc5.tar.gz
Merge branch 'next' of git://git.infradead.org/users/vkoul/slave-dma
Pull slave-dmaengine update from Vinod Koul:
 "This includes the cookie cleanup by Russell, the addition of context
  parameter for dmaengine APIs, more arm dmaengine driver cleanup by
  moving code to dmaengine, this time for imx by Javier and pl330 by
  Boojin along with the usual driver fixes."

Fix up some fairly trivial conflicts with various other cleanups.

* 'next' of git://git.infradead.org/users/vkoul/slave-dma: (67 commits)
  dmaengine: imx: fix the build failure on x86_64
  dmaengine: i.MX: Fix merge of cookie branch.
  dmaengine: i.MX: Add support for interleaved transfers.
  dmaengine: imx-dma: use 'dev_dbg' and 'dev_warn' for messages.
  dmaengine: imx-dma: remove 'imx_dmav1_baseaddr' and 'dma_clk'.
  dmaengine: imx-dma: remove unused arg of imxdma_sg_next.
  dmaengine: imx-dma: remove internal structure.
  dmaengine: imx-dma: remove 'resbytes' field of 'internal' structure.
  dmaengine: imx-dma: remove 'in_use' field of 'internal' structure.
  dmaengine: imx-dma: remove sg member from internal structure.
  dmaengine: imx-dma: remove 'imxdma_setup_sg_hw' function.
  dmaengine: imx-dma: remove 'imxdma_config_channel_hw' function.
  dmaengine: imx-dma: remove 'imxdma_setup_mem2mem_hw' function.
  dmaengine: imx-dma: remove dma_mode member of internal structure.
  dmaengine: imx-dma: remove data member from internal structure.
  dmaengine: imx-dma: merge old dma-v1.c with imx-dma.c
  dmaengine: at_hdmac: add slave config operation
  dmaengine: add context parameter to prep_slave_sg and prep_dma_cyclic
  dmaengine/dma_slave: introduce inline wrappers
  dma: imx-sdma: Treat firmware messages as warnings instead of erros
  ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dma/Kconfig3
-rw-r--r--drivers/dma/amba-pl08x.c46
-rw-r--r--drivers/dma/at_hdmac.c111
-rw-r--r--drivers/dma/at_hdmac_regs.h34
-rw-r--r--drivers/dma/coh901318.c41
-rw-r--r--drivers/dma/dmaengine.c8
-rw-r--r--drivers/dma/dmaengine.h89
-rw-r--r--drivers/dma/dw_dmac.c228
-rw-r--r--drivers/dma/dw_dmac_regs.h16
-rw-r--r--drivers/dma/ep93xx_dma.c31
-rw-r--r--drivers/dma/fsldma.c28
-rw-r--r--drivers/dma/fsldma.h1
-rw-r--r--drivers/dma/imx-dma.c950
-rw-r--r--drivers/dma/imx-sdma.c187
-rw-r--r--drivers/dma/intel_mid_dma.c46
-rw-r--r--drivers/dma/intel_mid_dma_regs.h2
-rw-r--r--drivers/dma/ioat/dma.c21
-rw-r--r--drivers/dma/ioat/dma.h23
-rw-r--r--drivers/dma/ioat/dma_v2.c13
-rw-r--r--drivers/dma/ioat/dma_v3.c12
-rw-r--r--drivers/dma/iop-adma.c52
-rw-r--r--drivers/dma/ipu/ipu_idmac.c25
-rw-r--r--drivers/dma/mpc512x_dma.c25
-rw-r--r--drivers/dma/mv_xor.c34
-rw-r--r--drivers/dma/mv_xor.h3
-rw-r--r--drivers/dma/mxs-dma.c28
-rw-r--r--drivers/dma/pch_dma.c37
-rw-r--r--drivers/dma/pl330.c2149
-rw-r--r--drivers/dma/ppc4xx/adma.c49
-rw-r--r--drivers/dma/ppc4xx/adma.h2
-rw-r--r--drivers/dma/shdma.c33
-rw-r--r--drivers/dma/shdma.h1
-rw-r--r--drivers/dma/sirf-dma.c27
-rw-r--r--drivers/dma/ste_dma40.c41
-rw-r--r--drivers/dma/timb_dma.c37
-rw-r--r--drivers/dma/txx9dmac.c43
-rw-r--r--drivers/dma/txx9dmac.h1
-rw-r--r--drivers/media/video/mx3_camera.c2
-rw-r--r--drivers/media/video/timblogiw.c2
-rw-r--r--drivers/mmc/host/atmel-mci.c21
-rw-r--r--drivers/mmc/host/mmci.c4
-rw-r--r--drivers/mmc/host/mxcmmc.c5
-rw-r--r--drivers/mmc/host/mxs-mmc.c2
-rw-r--r--drivers/mmc/host/sh_mmcif.c4
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c4
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-lib.c25
-rw-r--r--drivers/net/ethernet/micrel/ks8842.c4
-rw-r--r--drivers/spi/spi-dw-mid.c7
-rw-r--r--drivers/spi/spi-ep93xx.c4
-rw-r--r--drivers/spi/spi-pl022.c6
-rw-r--r--drivers/spi/spi-topcliff-pch.c4
-rw-r--r--drivers/tty/serial/amba-pl011.c9
-rw-r--r--drivers/tty/serial/pch_uart.c4
-rw-r--r--drivers/tty/serial/sh-sci.c4
-rw-r--r--drivers/usb/musb/ux500_dma.c4
-rw-r--r--drivers/usb/renesas_usbhs/fifo.c5
-rw-r--r--drivers/video/mx3fb.c4
57 files changed, 3616 insertions, 985 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 4a6c46dea8a0..cf9da362d64f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -201,7 +201,6 @@ config PL330_DMA
 	tristate "DMA API Driver for PL330"
 	select DMA_ENGINE
 	depends on ARM_AMBA
-	select PL330
 	help
 	  Select if your platform has one or more PL330 DMACs.
 	  You need to provide platform specific settings via
@@ -231,7 +230,7 @@ config IMX_SDMA
 
 config IMX_DMA
 	tristate "i.MX DMA support"
-	depends on IMX_HAVE_DMA_V1
+	depends on ARCH_MXC
 	select DMA_ENGINE
 	help
 	  Support the i.MX DMA engine. This engine is integrated into
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8a281584458b..c301a8ec31aa 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -85,6 +85,8 @@
 #include <linux/slab.h>
 #include <asm/hardware/pl080.h>
 
+#include "dmaengine.h"
+
 #define DRIVER_NAME	"pl08xdmac"
 
 static struct amba_driver pl08x_amba_driver;
@@ -649,7 +651,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			}
 
 			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
-					(bd.srcbus.addr % bd.srcbus.buswidth)) {
+					(bd.dstbus.addr % bd.dstbus.buswidth)) {
 				dev_err(&pl08x->adev->dev,
 					"%s src & dst address must be aligned to src"
 					" & dst width if peripheral is flow controller",
@@ -919,13 +921,10 @@ static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
 	struct pl08x_txd *txd = to_pl08x_txd(tx);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&plchan->lock, flags);
-
-	plchan->chan.cookie += 1;
-	if (plchan->chan.cookie < 0)
-		plchan->chan.cookie = 1;
-	tx->cookie = plchan->chan.cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* Put this onto the pending list */
 	list_add_tail(&txd->node, &plchan->pend_list);
@@ -945,7 +944,7 @@ static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	spin_unlock_irqrestore(&plchan->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
@@ -965,31 +964,17 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
 		dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
-	u32 bytesleft = 0;
 
-	last_used = plchan->chan.cookie;
-	last_complete = plchan->lc;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
-	if (ret == DMA_SUCCESS) {
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS)
 		return ret;
-	}
 
 	/*
 	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
 	 */
-	last_used = plchan->chan.cookie;
-	last_complete = plchan->lc;
-
-	/* Get number of bytes left in the active transactions and queue */
-	bytesleft = pl08x_getbytes_chan(plchan);
-
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 bytesleft);
+	dma_set_residue(txstate, pl08x_getbytes_chan(plchan));
 
 	if (plchan->state == PL08X_CHAN_PAUSED)
 		return DMA_PAUSED;
@@ -1139,6 +1124,8 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
 	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
 
+	plchan->device_fc = config->device_fc;
+
 	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
 		plchan->src_addr = config->src_addr;
 		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
@@ -1326,7 +1313,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
@@ -1370,7 +1357,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		return NULL;
 	}
 
-	if (plchan->cd->device_fc)
+	if (plchan->device_fc)
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
 			PL080_FLOW_PER2MEM_PER;
 	else
@@ -1541,7 +1528,7 @@ static void pl08x_tasklet(unsigned long data)
 
 	if (txd) {
 		/* Update last completed */
-		plchan->lc = txd->tx.cookie;
+		dma_cookie_complete(&txd->tx);
 	}
 
 	/* If a new descriptor is queued, set it up plchan->at is NULL here */
@@ -1722,8 +1709,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 			 chan->name);
 
 		chan->chan.device = dmadev;
-		chan->chan.cookie = 0;
-		chan->lc = 0;
+		dma_cookie_init(&chan->chan);
 
 		spin_lock_init(&chan->lock);
 		INIT_LIST_HEAD(&chan->pend_list);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index f4aed5fc2cb6..7aa58d204892 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -27,6 +27,7 @@
 #include <linux/of_device.h>
 
 #include "at_hdmac_regs.h"
+#include "dmaengine.h"
 
 /*
  * Glossary
@@ -192,27 +193,6 @@ static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
 }
 
 /**
- * atc_assign_cookie - compute and assign new cookie
- * @atchan: channel we work on
- * @desc: descriptor to assign cookie for
- *
- * Called with atchan->lock held and bh disabled
- */
-static dma_cookie_t
-atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc)
-{
-	dma_cookie_t cookie = atchan->chan_common.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	atchan->chan_common.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
-/**
  * atc_dostart - starts the DMA engine for real
  * @atchan: the channel we want to start
  * @first: first descriptor in the list we want to begin with
@@ -269,7 +249,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	dev_vdbg(chan2dev(&atchan->chan_common),
 		"descriptor %u complete\n", txd->cookie);
 
-	atchan->completed_cookie = txd->cookie;
+	dma_cookie_complete(txd);
 
 	/* move children to free_list */
 	list_splice_init(&desc->tx_list, &atchan->free_list);
@@ -547,7 +527,7 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long		flags;
 
 	spin_lock_irqsave(&atchan->lock, flags);
-	cookie = atc_assign_cookie(atchan, desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&atchan->active_list)) {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
@@ -659,14 +639,16 @@ err_desc_get:
  * @sg_len: number of entries in @scatterlist
  * @direction: DMA direction
  * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
 	struct at_desc		*first = NULL;
 	struct at_desc		*prev = NULL;
 	u32			ctrla;
@@ -688,19 +670,18 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 	}
 
-	reg_width = atslave->reg_width;
-
 	ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
 			| ATC_FC_MEM2PER
 			| ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF);
-		reg = atslave->tx_reg;
+		reg = sconfig->dst_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
 			u32		len;
@@ -728,13 +709,14 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		}
 		break;
 	case DMA_DEV_TO_MEM:
+		reg_width = convert_buswidth(sconfig->src_addr_width);
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
 			| ATC_FC_PER2MEM
 			| ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF);
 
-		reg = atslave->rx_reg;
+		reg = sconfig->src_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
 			u32		len;
@@ -810,12 +792,15 @@ err_out:
  * atc_dma_cyclic_fill_desc - Fill one period decriptor
  */
 static int
-atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
+atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 		unsigned int period_index, dma_addr_t buf_addr,
-		size_t period_len, enum dma_transfer_direction direction)
+		unsigned int reg_width, size_t period_len,
+		enum dma_transfer_direction direction)
 {
-	u32		ctrla;
-	unsigned int	reg_width = atslave->reg_width;
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	u32			ctrla;
 
 	/* prepare common CRTLA value */
 	ctrla =   ATC_DEFAULT_CTRLA | atslave->ctrla
@@ -826,7 +811,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
-		desc->lli.daddr = atslave->tx_reg;
+		desc->lli.daddr = sconfig->dst_addr;
 		desc->lli.ctrla = ctrla;
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
 				| ATC_SRC_ADDR_MODE_INCR
@@ -836,7 +821,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		break;
 
 	case DMA_DEV_TO_MEM:
-		desc->lli.saddr = atslave->rx_reg;
+		desc->lli.saddr = sconfig->src_addr;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
@@ -860,16 +845,20 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
  * @buf_len: total number of bytes for the entire buffer
  * @period_len: number of bytes for each period
  * @direction: transfer direction, to or from device
+ * @context: transfer context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
 	struct at_desc		*first = NULL;
 	struct at_desc		*prev = NULL;
 	unsigned long		was_cyclic;
+	unsigned int		reg_width;
 	unsigned int		periods = buf_len / period_len;
 	unsigned int		i;
 
@@ -889,8 +878,13 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		return NULL;
 	}
 
+	if (sconfig->direction == DMA_MEM_TO_DEV)
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+	else
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+
 	/* Check for too big/unaligned periods and unaligned DMA buffer */
-	if (atc_dma_cyclic_check_values(atslave->reg_width, buf_addr,
+	if (atc_dma_cyclic_check_values(reg_width, buf_addr,
 					period_len, direction))
 		goto err_out;
 
@@ -902,8 +896,8 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		if (!desc)
 			goto err_desc_get;
 
-		if (atc_dma_cyclic_fill_desc(atslave, desc, i, buf_addr,
-						period_len, direction))
+		if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
+					     reg_width, period_len, direction))
 			goto err_desc_get;
 
 		atc_desc_chain(&first, &prev, desc);
@@ -926,6 +920,23 @@ err_out:
 	return NULL;
 }
 
+static int set_runtime_config(struct dma_chan *chan,
+			      struct dma_slave_config *sconfig)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&atchan->dma_sconfig.src_maxburst);
+	convert_burst(&atchan->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
 
 static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
@@ -986,6 +997,8 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		clear_bit(ATC_IS_CYCLIC, &atchan->status);
 
 		spin_unlock_irqrestore(&atchan->lock, flags);
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
 	} else {
 		return -ENXIO;
 	}
@@ -1016,26 +1029,20 @@ atc_tx_status(struct dma_chan *chan,
 
 	spin_lock_irqsave(&atchan->lock, flags);
 
-	last_complete = atchan->completed_cookie;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		atc_cleanup_descriptors(atchan);
 
-		last_complete = atchan->completed_cookie;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
+	last_complete = chan->completed_cookie;
+	last_used = chan->cookie;
+
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
 	if (ret != DMA_SUCCESS)
-		dma_set_tx_state(txstate, last_complete, last_used,
-			atc_first_active(atchan)->len);
-	else
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		dma_set_residue(txstate, atc_first_active(atchan)->len);
 
 	if (atc_chan_is_paused(atchan))
 		ret = DMA_PAUSED;
@@ -1129,7 +1136,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 	spin_lock_irqsave(&atchan->lock, flags);
 	atchan->descs_allocated = i;
 	list_splice(&tmp_list, &atchan->free_list);
-	atchan->completed_cookie = chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
 	/* channel parameters */
@@ -1329,7 +1336,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 		struct at_dma_chan	*atchan = &atdma->chan[i];
 
 		atchan->chan_common.device = &atdma->dma_common;
-		atchan->chan_common.cookie = atchan->completed_cookie = 1;
+		dma_cookie_init(&atchan->chan_common);
 		list_add_tail(&atchan->chan_common.device_node,
 				&atdma->dma_common.channels);
 
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index a8d3277d60b5..897a8bcaec90 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -207,8 +207,8 @@ enum atc_status {
  * @save_cfg: configuration register that is saved on suspend/resume cycle
  * @save_dscr: for cyclic operations, preserve next descriptor address in
  *             the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via DMA_SLAVE_CONFIG
  * @lock: serializes enqueue/dequeue operations to descriptors lists
- * @completed_cookie: identifier for the most recently completed operation
  * @active_list: list of descriptors dmaengine is being running on
  * @queue: list of descriptors ready to be submitted to engine
  * @free_list: list of descriptors usable by the channel
@@ -223,11 +223,11 @@ struct at_dma_chan {
 	struct tasklet_struct	tasklet;
 	u32			save_cfg;
 	u32			save_dscr;
+	struct dma_slave_config dma_sconfig;
 
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
-	dma_cookie_t		completed_cookie;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
@@ -245,6 +245,36 @@ static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
 	return container_of(dchan, struct at_dma_chan, chan_common);
 }
 
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return 1;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return 2;
+	default:
+		/* For 1 byte width or fallback */
+		return 0;
+	}
+}
 
 /*--  Controller  ------------------------------------------------------*/
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index d65a718c0f9b..dc89455f5550 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -24,6 +24,7 @@
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
+#include "dmaengine.h"
 
 #define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
 
@@ -59,7 +60,6 @@ struct coh901318_base {
 struct coh901318_chan {
 	spinlock_t lock;
 	int allocated;
-	int completed;
 	int id;
 	int stopped;
 
@@ -318,20 +318,6 @@ static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
 
 	return 0;
 }
-static dma_cookie_t
-coh901318_assign_cookie(struct coh901318_chan *cohc,
-			struct coh901318_desc *cohd)
-{
-	dma_cookie_t cookie = cohc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	cohc->chan.cookie = cookie;
-	cohd->desc.cookie = cookie;
-
-	return cookie;
-}
 
 static struct coh901318_desc *
 coh901318_desc_get(struct coh901318_chan *cohc)
@@ -705,7 +691,7 @@ static void dma_tasklet(unsigned long data)
 	callback_param = cohd_fin->desc.callback_param;
 
 	/* sign this job as completed on the channel */
-	cohc->completed = cohd_fin->desc.cookie;
+	dma_cookie_complete(&cohd_fin->desc);
 
 	/* release the lli allocation and remove the descriptor */
 	coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli);
@@ -929,7 +915,7 @@ static int coh901318_alloc_chan_resources(struct dma_chan *chan)
 	coh901318_config(cohc, NULL);
 
 	cohc->allocated = 1;
-	cohc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	spin_unlock_irqrestore(&cohc->lock, flags);
 
@@ -966,16 +952,16 @@ coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
 						   desc);
 	struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&cohc->lock, flags);
-
-	tx->cookie = coh901318_assign_cookie(cohc, cohd);
+	cookie = dma_cookie_assign(tx);
 
 	coh901318_desc_queue(cohc, cohd);
 
 	spin_unlock_irqrestore(&cohc->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static struct dma_async_tx_descriptor *
@@ -1035,7 +1021,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 static struct dma_async_tx_descriptor *
 coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags)
+			unsigned long flags, void *context)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
 	struct coh901318_lli *lli;
@@ -1165,17 +1151,12 @@ coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		 struct dma_tx_state *txstate)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
-
-	last_complete = cohc->completed;
-	last_used = chan->cookie;
+	enum dma_status ret;
 
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	/* FIXME: should be conditional on ret != DMA_SUCCESS? */
+	dma_set_residue(txstate, coh901318_get_bytes_left(chan));
 
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 coh901318_get_bytes_left(chan));
 	if (ret == DMA_IN_PROGRESS && cohc->stopped)
 		ret = DMA_PAUSED;
 
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index a6c6051ec858..767bcc31b365 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -510,8 +510,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 					 dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 			} else if (err)
-				pr_debug("dmaengine: failed to get %s: (%d)\n",
-					 dma_chan_name(chan), err);
+				pr_debug("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
 			else
 				break;
 			if (--device->privatecnt == 0)
@@ -564,8 +564,8 @@ void dmaengine_get(void)
 				list_del_rcu(&device->global_node);
 				break;
 			} else if (err)
-				pr_err("dmaengine: failed to get %s: (%d)\n",
-				       dma_chan_name(chan), err);
+				pr_err("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
 		}
 	}
 
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
new file mode 100644
index 000000000000..17f983a4e9ba
--- /dev/null
+++ b/drivers/dma/dmaengine.h
@@ -0,0 +1,89 @@
+/*
+ * The contents of this file are private to DMA engine drivers, and is not
+ * part of the API to be used by DMA engine users.
+ */
+#ifndef DMAENGINE_H
+#define DMAENGINE_H
+
+#include <linux/bug.h>
+#include <linux/dmaengine.h>
+
+/**
+ * dma_cookie_init - initialize the cookies for a DMA channel
+ * @chan: dma channel to initialize
+ */
+static inline void dma_cookie_init(struct dma_chan *chan)
+{
+	chan->cookie = DMA_MIN_COOKIE;
+	chan->completed_cookie = DMA_MIN_COOKIE;
+}
+
+/**
+ * dma_cookie_assign - assign a DMA engine cookie to the descriptor
+ * @tx: descriptor needing cookie
+ *
+ * Assign a unique non-zero per-channel cookie to the descriptor.
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *chan = tx->chan;
+	dma_cookie_t cookie;
+
+	cookie = chan->cookie + 1;
+	if (cookie < DMA_MIN_COOKIE)
+		cookie = DMA_MIN_COOKIE;
+	tx->cookie = chan->cookie = cookie;
+
+	return cookie;
+}
+
+/**
+ * dma_cookie_complete - complete a descriptor
+ * @tx: descriptor to complete
+ *
+ * Mark this descriptor complete by updating the channels completed
+ * cookie marker.  Zero the descriptors cookie to prevent accidental
+ * repeated completions.
+ *
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx)
+{
+	BUG_ON(tx->cookie < DMA_MIN_COOKIE);
+	tx->chan->completed_cookie = tx->cookie;
+	tx->cookie = 0;
+}
+
+/**
+ * dma_cookie_status - report cookie status
+ * @chan: dma channel
+ * @cookie: cookie we are interested in
+ * @state: dma_tx_state structure to return last/used cookies
+ *
+ * Report the status of the cookie, filling in the state structure if
+ * non-NULL.  No locking is required.
+ */
+static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	dma_cookie_t used, complete;
+
+	used = chan->cookie;
+	complete = chan->completed_cookie;
+	barrier();
+	if (state) {
+		state->last = complete;
+		state->used = used;
+		state->residue = 0;
+	}
+	return dma_async_is_complete(cookie, complete, used);
+}
+
+static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
+{
+	if (state)
+		state->residue = residue;
+}
+
+#endif
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9b592b02b5f4..7439079f5eed 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -9,6 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
@@ -22,6 +23,7 @@
 #include <linux/slab.h>
 
 #include "dw_dmac_regs.h"
+#include "dmaengine.h"
 
 /*
  * This supports the Synopsys "DesignWare AHB Central DMA Controller",
@@ -33,19 +35,23 @@
  * which does not support descriptor writeback.
  */
 
-#define DWC_DEFAULT_CTLLO(private) ({				\
-		struct dw_dma_slave *__slave = (private);	\
-		int dms = __slave ? __slave->dst_master : 0;	\
-		int sms = __slave ? __slave->src_master : 1;	\
-		u8 smsize = __slave ? __slave->src_msize : DW_DMA_MSIZE_16; \
-		u8 dmsize = __slave ? __slave->dst_msize : DW_DMA_MSIZE_16; \
+#define DWC_DEFAULT_CTLLO(_chan) ({				\
+		struct dw_dma_slave *__slave = (_chan->private);	\
+		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
+		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
+		int _dms = __slave ? __slave->dst_master : 0;	\
+		int _sms = __slave ? __slave->src_master : 1;	\
+		u8 _smsize = __slave ? _sconfig->src_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+		u8 _dmsize = __slave ? _sconfig->dst_maxburst :	\
+			DW_DMA_MSIZE_16;			\
 								\
-		(DWC_CTLL_DST_MSIZE(dmsize)			\
-		 | DWC_CTLL_SRC_MSIZE(smsize)			\
+		(DWC_CTLL_DST_MSIZE(_dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
 		 | DWC_CTLL_LLP_D_EN				\
 		 | DWC_CTLL_LLP_S_EN				\
-		 | DWC_CTLL_DMS(dms)				\
-		 | DWC_CTLL_SMS(sms));				\
+		 | DWC_CTLL_DMS(_dms)				\
+		 | DWC_CTLL_SMS(_sms));				\
 	})
 
 /*
@@ -151,21 +157,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	}
 }
 
-/* Called with dwc->lock held and bh disabled */
-static dma_cookie_t
-dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
-{
-	dma_cookie_t cookie = dwc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	dwc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
@@ -192,7 +183,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
 	dwc->initialized = true;
@@ -245,7 +235,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
 	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	dwc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	if (callback_required) {
 		callback = txd->callback;
 		param = txd->callback_param;
@@ -329,12 +319,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	/*
-	 * Clear block interrupt flag before scanning so that we don't
-	 * miss any, and read LLP before RAW_XFER to ensure it is
-	 * valid if we decide to scan the list.
-	 */
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	llp = channel_readl(dwc, LLP);
 	status_xfer = dma_readl(dw, RAW.XFER);
 
@@ -470,17 +454,16 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr);
 
 /* called with dwc->lock held and all DMAC interrupts disabled */
 static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
-		u32 status_block, u32 status_err, u32 status_xfer)
+		u32 status_err, u32 status_xfer)
 {
 	unsigned long flags;
 
-	if (status_block & dwc->mask) {
+	if (dwc->mask) {
 		void (*callback)(void *param);
 		void *callback_param;
 
 		dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
 				channel_readl(dwc, LLP));
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 
 		callback = dwc->cdesc->period_callback;
 		callback_param = dwc->cdesc->period_callback_param;
@@ -520,7 +503,6 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 		channel_writel(dwc, CTL_LO, 0);
 		channel_writel(dwc, CTL_HI, 0);
 
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 		dma_writel(dw, CLEAR.ERROR, dwc->mask);
 		dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -537,36 +519,29 @@ static void dw_dma_tasklet(unsigned long data)
 {
 	struct dw_dma *dw = (struct dw_dma *)data;
 	struct dw_dma_chan *dwc;
-	u32 status_block;
 	u32 status_xfer;
 	u32 status_err;
 	int i;
 
-	status_block = dma_readl(dw, RAW.BLOCK);
 	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
-	dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
-			status_block, status_err);
+	dev_vdbg(dw->dma.dev, "tasklet: status_err=%x\n", status_err);
 
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
 		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
-			dwc_handle_cyclic(dw, dwc, status_block, status_err,
-					status_xfer);
+			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
-		else if ((status_block | status_xfer) & (1 << i))
+		else if (status_xfer & (1 << i))
 			dwc_scan_descriptors(dw, dwc);
 	}
 
 	/*
-	 * Re-enable interrupts. Block Complete interrupts are only
-	 * enabled if the INT_EN bit in the descriptor is set. This
-	 * will trigger a scan before the whole list is done.
+	 * Re-enable interrupts.
 	 */
 	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
 
@@ -583,7 +558,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 	 * softirq handler.
 	 */
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
 
 	status = dma_readl(dw, STATUS_INT);
@@ -594,7 +568,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 
 		/* Try to recover */
 		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
-		channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
@@ -615,7 +588,7 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long		flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	cookie = dwc_assign_cookie(dwc, desc);
+	cookie = dma_cookie_assign(tx);
 
 	/*
 	 * REVISIT: We should attempt to chain as many descriptors as
@@ -674,7 +647,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	else
 		src_width = dst_width = 0;
 
-	ctllo = DWC_DEFAULT_CTLLO(chan->private)
+	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
 			| DWC_CTLL_SRC_WIDTH(src_width)
 			| DWC_CTLL_DST_INC
@@ -731,10 +704,11 @@ err_desc_get:
 static struct dma_async_tx_descriptor *
 dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma_slave	*dws = chan->private;
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
 	u32			ctllo;
@@ -750,25 +724,34 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (unlikely(!dws || !sg_len))
 		return NULL;
 
-	reg_width = dws->reg_width;
 	prev = first = NULL;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
-		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+		reg_width = __fls(sconfig->dst_addr_width);
+		reg = sconfig->dst_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
-				| DWC_CTLL_SRC_INC
-				| DWC_CTLL_FC(dws->fc));
-		reg = dws->tx_reg;
+				| DWC_CTLL_SRC_INC);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
-			mem_width = 2;
-			if (unlikely(mem & 3 || len & 3))
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
 				mem_width = 0;
 
 slave_sg_todev_fill_desc:
@@ -812,21 +795,30 @@ slave_sg_todev_fill_desc:
 		}
 		break;
 	case DMA_DEV_TO_MEM:
-		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+		reg_width = __fls(sconfig->src_addr_width);
+		reg = sconfig->src_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
-				| DWC_CTLL_SRC_FIX
-				| DWC_CTLL_FC(dws->fc));
+				| DWC_CTLL_SRC_FIX);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
-		reg = dws->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
-			mem_width = 2;
-			if (unlikely(mem & 3 || len & 3))
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
 				mem_width = 0;
 
 slave_sg_fromdev_fill_desc:
@@ -890,6 +882,39 @@ err_desc_get:
 	return NULL;
 }
 
+/*
+ * Fix sconfig's burst size according to dw_dmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+ *
+ * NOTE: burst size 2 is not supported by controller.
+ *
+ * This can be done by finding least significant bit set: n & (n - 1)
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+static int
+set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&dwc->dma_sconfig.src_maxburst);
+	convert_burst(&dwc->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
 static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
@@ -939,8 +964,11 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		/* Flush all pending and queued descriptors */
 		list_for_each_entry_safe(desc, _desc, &list, desc_node)
 			dwc_descriptor_complete(dwc, desc, false);
-	} else
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
+	} else {
 		return -ENXIO;
+	}
 
 	return 0;
 }
@@ -951,28 +979,17 @@ dwc_tx_status(struct dma_chan *chan,
 	      struct dma_tx_state *txstate)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int			ret;
-
-	last_complete = dwc->completed;
-	last_used = chan->cookie;
+	enum dma_status		ret;
 
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
-		last_complete = dwc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
 	if (ret != DMA_SUCCESS)
-		dma_set_tx_state(txstate, last_complete, last_used,
-				dwc_first_active(dwc)->len);
-	else
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		dma_set_residue(txstate, dwc_first_active(dwc)->len);
 
 	if (dwc->paused)
 		return DMA_PAUSED;
@@ -1004,7 +1021,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 		return -EIO;
 	}
 
-	dwc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	/*
 	 * NOTE: some controllers may have additional features that we
@@ -1068,7 +1085,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 
 	/* Disable interrupts */
 	channel_clear_bit(dw, MASK.XFER, dwc->mask);
-	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
@@ -1120,7 +1136,6 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 		return -EBUSY;
 	}
 
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -1175,11 +1190,11 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		enum dma_transfer_direction direction)
 {
 	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
+	struct dma_slave_config		*sconfig = &dwc->dma_sconfig;
 	struct dw_cyclic_desc		*cdesc;
 	struct dw_cyclic_desc		*retval = NULL;
 	struct dw_desc			*desc;
 	struct dw_desc			*last = NULL;
-	struct dw_dma_slave		*dws = chan->private;
 	unsigned long			was_cyclic;
 	unsigned int			reg_width;
 	unsigned int			periods;
@@ -1203,7 +1218,12 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	}
 
 	retval = ERR_PTR(-EINVAL);
-	reg_width = dws->reg_width;
+
+	if (direction == DMA_MEM_TO_DEV)
+		reg_width = __ffs(sconfig->dst_addr_width);
+	else
+		reg_width = __ffs(sconfig->src_addr_width);
+
 	periods = buf_len / period_len;
 
 	/* Check for too big/unaligned periods and unaligned DMA buffer. */
@@ -1236,26 +1256,34 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 
 		switch (direction) {
 		case DMA_MEM_TO_DEV:
-			desc->lli.dar = dws->tx_reg;
+			desc->lli.dar = sconfig->dst_addr;
 			desc->lli.sar = buf_addr + (period_len * i);
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_FIX
 					| DWC_CTLL_SRC_INC
-					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+				DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
 			break;
 		case DMA_DEV_TO_MEM:
 			desc->lli.dar = buf_addr + (period_len * i);
-			desc->lli.sar = dws->rx_reg;
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+			desc->lli.sar = sconfig->src_addr;
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_DST_INC
 					| DWC_CTLL_SRC_FIX
-					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+				DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
 			break;
 		default:
 			break;
@@ -1322,7 +1350,6 @@ void dw_dma_cyclic_free(struct dma_chan *chan)
 	while (dma_readl(dw, CH_EN) & dwc->mask)
 		cpu_relax();
 
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -1347,7 +1374,6 @@ static void dw_dma_off(struct dw_dma *dw)
 	dma_writel(dw, CFG, 0);
 
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
@@ -1369,7 +1395,7 @@ static int __init dw_probe(struct platform_device *pdev)
 	int			err;
 	int			i;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
 		return -EINVAL;
 
@@ -1423,7 +1449,7 @@ static int __init dw_probe(struct platform_device *pdev)
 		struct dw_dma_chan	*dwc = &dw->chan[i];
 
 		dwc->chan.device = &dw->dma;
-		dwc->chan.cookie = dwc->completed = 1;
+		dma_cookie_init(&dwc->chan);
 		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
 			list_add_tail(&dwc->chan.device_node,
 					&dw->dma.channels);
@@ -1432,7 +1458,7 @@ static int __init dw_probe(struct platform_device *pdev)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = 7 - i;
+			dwc->priority = pdata->nr_channels - i - 1;
 		else
 			dwc->priority = i;
 
@@ -1449,13 +1475,11 @@ static int __init dw_probe(struct platform_device *pdev)
 
 	/* Clear/disable all interrupts on all channels. */
 	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
-	dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
@@ -1562,6 +1586,10 @@ static int dw_resume_noirq(struct device *dev)
 static const struct dev_pm_ops dw_dev_pm_ops = {
 	.suspend_noirq = dw_suspend_noirq,
 	.resume_noirq = dw_resume_noirq,
+	.freeze_noirq = dw_suspend_noirq,
+	.thaw_noirq = dw_resume_noirq,
+	.restore_noirq = dw_resume_noirq,
+	.poweroff_noirq = dw_suspend_noirq,
 };
 
 static struct platform_driver dw_driver = {
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 5eef6946a367..f298f69ecbf9 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -13,6 +13,18 @@
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 
+/* flow controller */
+enum dw_dma_fc {
+	DW_DMA_FC_D_M2M,
+	DW_DMA_FC_D_M2P,
+	DW_DMA_FC_D_P2M,
+	DW_DMA_FC_D_P2P,
+	DW_DMA_FC_P_P2M,
+	DW_DMA_FC_SP_P2P,
+	DW_DMA_FC_P_M2P,
+	DW_DMA_FC_DP_P2P,
+};
+
 /*
  * Redefine this macro to handle differences between 32- and 64-bit
  * addressing, big vs. little endian, etc.
@@ -146,13 +158,15 @@ struct dw_dma_chan {
 
 	/* these other elements are all protected by lock */
 	unsigned long		flags;
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
 	struct dw_cyclic_desc	*cdesc;
 
 	unsigned int		descs_allocated;
+
+	/* configuration passed via DMA_SLAVE_CONFIG */
+	struct dma_slave_config dma_sconfig;
 };
 
 static inline struct dw_dma_chan_regs __iomem *
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 59e7a965772b..e6f133b78dc2 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -28,6 +28,8 @@
 
 #include <mach/dma.h>
 
+#include "dmaengine.h"
+
 /* M2P registers */
 #define M2P_CONTROL			0x0000
 #define M2P_CONTROL_STALLINT		BIT(0)
@@ -122,7 +124,6 @@ struct ep93xx_dma_desc {
  * @lock: lock protecting the fields following
  * @flags: flags for the channel
  * @buffer: which buffer to use next (0/1)
- * @last_completed: last completed cookie value
  * @active: flattened chain of descriptors currently being processed
  * @queue: pending descriptors which are handled next
  * @free_list: list of free descriptors which can be used
@@ -157,7 +158,6 @@ struct ep93xx_dma_chan {
 #define EP93XX_DMA_IS_CYCLIC		0
 
 	int				buffer;
-	dma_cookie_t			last_completed;
 	struct list_head		active;
 	struct list_head		queue;
 	struct list_head		free_list;
@@ -703,7 +703,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
 	desc = ep93xx_dma_get_active(edmac);
 	if (desc) {
 		if (desc->complete) {
-			edmac->last_completed = desc->txd.cookie;
+			dma_cookie_complete(&desc->txd);
 			list_splice_init(&edmac->active, &list);
 		}
 		callback = desc->txd.callback;
@@ -783,17 +783,10 @@ static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long flags;
 
 	spin_lock_irqsave(&edmac->lock, flags);
-
-	cookie = edmac->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
+	cookie = dma_cookie_assign(tx);
 
 	desc = container_of(tx, struct ep93xx_dma_desc, txd);
 
-	edmac->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
 	/*
 	 * If nothing is currently prosessed, we push this descriptor
 	 * directly to the hardware. Otherwise we put the descriptor
@@ -861,8 +854,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 		goto fail_clk_disable;
 
 	spin_lock_irq(&edmac->lock);
-	edmac->last_completed = 1;
-	edmac->chan.cookie = 1;
+	dma_cookie_init(&edmac->chan);
 	ret = edmac->edma->hw_setup(edmac);
 	spin_unlock_irq(&edmac->lock);
 
@@ -983,13 +975,14 @@ fail:
  * @sg_len: number of entries in @sgl
  * @dir: direction of tha DMA transfer
  * @flags: flags for the descriptor
+ * @context: operation context (ignored)
  *
  * Returns a valid DMA descriptor or %NULL in case of failure.
  */
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			 unsigned int sg_len, enum dma_transfer_direction dir,
-			 unsigned long flags)
+			 unsigned long flags, void *context)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1056,6 +1049,7 @@ fail:
  * @buf_len: length of the buffer (in bytes)
  * @period_len: lenght of a single period
  * @dir: direction of the operation
+ * @context: operation context (ignored)
  *
  * Prepares a descriptor for cyclic DMA operation. This means that once the
  * descriptor is submitted, we will be submitting in a @period_len sized
@@ -1068,7 +1062,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_transfer_direction dir)
+			   enum dma_transfer_direction dir, void *context)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1248,18 +1242,13 @@ static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
 					    struct dma_tx_state *state)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
-	dma_cookie_t last_used, last_completed;
 	enum dma_status ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&edmac->lock, flags);
-	last_used = chan->cookie;
-	last_completed = edmac->last_completed;
+	ret = dma_cookie_status(chan, cookie, state);
 	spin_unlock_irqrestore(&edmac->lock, flags);
 
-	ret = dma_async_is_complete(cookie, last_completed, last_used);
-	dma_set_tx_state(state, last_completed, last_used, 0);
-
 	return ret;
 }
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index b98070c33ca9..8f84761f98ba 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,6 +35,7 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
+#include "dmaengine.h"
 #include "fsldma.h"
 
 #define chan_dbg(chan, fmt, arg...)					\
@@ -413,17 +414,10 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	 * assign cookies to all of the software descriptors
 	 * that make up this transaction
 	 */
-	cookie = chan->common.cookie;
 	list_for_each_entry(child, &desc->tx_list, node) {
-		cookie++;
-		if (cookie < DMA_MIN_COOKIE)
-			cookie = DMA_MIN_COOKIE;
-
-		child->async_tx.cookie = cookie;
+		cookie = dma_cookie_assign(&child->async_tx);
 	}
 
-	chan->common.cookie = cookie;
-
 	/* put this transaction onto the tail of the pending queue */
 	append_ld_queue(chan, desc);
 
@@ -765,6 +759,7 @@ fail:
  * @sg_len: number of entries in @scatterlist
  * @direction: DMA direction
  * @flags: DMAEngine flags
+ * @context: transaction context (ignored)
  *
  * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
  * DMA_SLAVE API, this gets the device-specific information from the
@@ -772,7 +767,8 @@ fail:
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	/*
 	 * This operation is not supported on the Freescale DMA controller
@@ -984,19 +980,14 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					struct dma_tx_state *txstate)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	dma_cookie_t last_complete;
-	dma_cookie_t last_used;
+	enum dma_status ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	last_complete = chan->completed_cookie;
-	last_used = dchan->cookie;
-
+	ret = dma_cookie_status(dchan, cookie, txstate);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 /*----------------------------------------------------------------------------*/
@@ -1087,8 +1078,8 @@ static void dma_do_tasklet(unsigned long data)
 
 		desc = to_fsl_desc(chan->ld_running.prev);
 		cookie = desc->async_tx.cookie;
+		dma_cookie_complete(&desc->async_tx);
 
-		chan->completed_cookie = cookie;
 		chan_dbg(chan, "completed_cookie=%d\n", cookie);
 	}
 
@@ -1303,6 +1294,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 	chan->idle = true;
 
 	chan->common.device = &fdev->common;
+	dma_cookie_init(&chan->common);
 
 	/* find the IRQ line, if it exists in the device tree */
 	chan->irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 9cb5aa57c677..f5c38791fc74 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -137,7 +137,6 @@ struct fsldma_device {
 struct fsldma_chan {
 	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
-	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
 	struct list_head ld_pending;	/* Link descriptors queue */
 	struct list_head ld_running;	/* Link descriptors queue */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 38586ba8da91..a45b5d2a5987 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -5,6 +5,7 @@
  * found on i.MX1/21/27
  *
  * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
@@ -22,37 +23,159 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/clk.h>
 #include <linux/dmaengine.h>
 #include <linux/module.h>
 
 #include <asm/irq.h>
-#include <mach/dma-v1.h>
+#include <mach/dma.h>
 #include <mach/hardware.h>
 
+#include "dmaengine.h"
+#define IMXDMA_MAX_CHAN_DESCRIPTORS	16
+#define IMX_DMA_CHANNELS  16
+
+#define IMX_DMA_2D_SLOTS	2
+#define IMX_DMA_2D_SLOT_A	0
+#define IMX_DMA_2D_SLOT_B	1
+
+#define IMX_DMA_LENGTH_LOOP	((unsigned int)-1)
+#define IMX_DMA_MEMSIZE_32	(0 << 4)
+#define IMX_DMA_MEMSIZE_8	(1 << 4)
+#define IMX_DMA_MEMSIZE_16	(2 << 4)
+#define IMX_DMA_TYPE_LINEAR	(0 << 10)
+#define IMX_DMA_TYPE_2D		(1 << 10)
+#define IMX_DMA_TYPE_FIFO	(2 << 10)
+
+#define IMX_DMA_ERR_BURST     (1 << 0)
+#define IMX_DMA_ERR_REQUEST   (1 << 1)
+#define IMX_DMA_ERR_TRANSFER  (1 << 2)
+#define IMX_DMA_ERR_BUFFER    (1 << 3)
+#define IMX_DMA_ERR_TIMEOUT   (1 << 4)
+
+#define DMA_DCR     0x00		/* Control Register */
+#define DMA_DISR    0x04		/* Interrupt status Register */
+#define DMA_DIMR    0x08		/* Interrupt mask Register */
+#define DMA_DBTOSR  0x0c		/* Burst timeout status Register */
+#define DMA_DRTOSR  0x10		/* Request timeout Register */
+#define DMA_DSESR   0x14		/* Transfer Error Status Register */
+#define DMA_DBOSR   0x18		/* Buffer overflow status Register */
+#define DMA_DBTOCR  0x1c		/* Burst timeout control Register */
+#define DMA_WSRA    0x40		/* W-Size Register A */
+#define DMA_XSRA    0x44		/* X-Size Register A */
+#define DMA_YSRA    0x48		/* Y-Size Register A */
+#define DMA_WSRB    0x4c		/* W-Size Register B */
+#define DMA_XSRB    0x50		/* X-Size Register B */
+#define DMA_YSRB    0x54		/* Y-Size Register B */
+#define DMA_SAR(x)  (0x80 + ((x) << 6))	/* Source Address Registers */
+#define DMA_DAR(x)  (0x84 + ((x) << 6))	/* Destination Address Registers */
+#define DMA_CNTR(x) (0x88 + ((x) << 6))	/* Count Registers */
+#define DMA_CCR(x)  (0x8c + ((x) << 6))	/* Control Registers */
+#define DMA_RSSR(x) (0x90 + ((x) << 6))	/* Request source select Registers */
+#define DMA_BLR(x)  (0x94 + ((x) << 6))	/* Burst length Registers */
+#define DMA_RTOR(x) (0x98 + ((x) << 6))	/* Request timeout Registers */
+#define DMA_BUCR(x) (0x98 + ((x) << 6))	/* Bus Utilization Registers */
+#define DMA_CCNR(x) (0x9C + ((x) << 6))	/* Channel counter Registers */
+
+#define DCR_DRST           (1<<1)
+#define DCR_DEN            (1<<0)
+#define DBTOCR_EN          (1<<15)
+#define DBTOCR_CNT(x)      ((x) & 0x7fff)
+#define CNTR_CNT(x)        ((x) & 0xffffff)
+#define CCR_ACRPT          (1<<14)
+#define CCR_DMOD_LINEAR    (0x0 << 12)
+#define CCR_DMOD_2D        (0x1 << 12)
+#define CCR_DMOD_FIFO      (0x2 << 12)
+#define CCR_DMOD_EOBFIFO   (0x3 << 12)
+#define CCR_SMOD_LINEAR    (0x0 << 10)
+#define CCR_SMOD_2D        (0x1 << 10)
+#define CCR_SMOD_FIFO      (0x2 << 10)
+#define CCR_SMOD_EOBFIFO   (0x3 << 10)
+#define CCR_MDIR_DEC       (1<<9)
+#define CCR_MSEL_B         (1<<8)
+#define CCR_DSIZ_32        (0x0 << 6)
+#define CCR_DSIZ_8         (0x1 << 6)
+#define CCR_DSIZ_16        (0x2 << 6)
+#define CCR_SSIZ_32        (0x0 << 4)
+#define CCR_SSIZ_8         (0x1 << 4)
+#define CCR_SSIZ_16        (0x2 << 4)
+#define CCR_REN            (1<<3)
+#define CCR_RPT            (1<<2)
+#define CCR_FRC            (1<<1)
+#define CCR_CEN            (1<<0)
+#define RTOR_EN            (1<<15)
+#define RTOR_CLK           (1<<14)
+#define RTOR_PSC           (1<<13)
+
+enum  imxdma_prep_type {
+	IMXDMA_DESC_MEMCPY,
+	IMXDMA_DESC_INTERLEAVED,
+	IMXDMA_DESC_SLAVE_SG,
+	IMXDMA_DESC_CYCLIC,
+};
+
+struct imx_dma_2d_config {
+	u16		xsr;
+	u16		ysr;
+	u16		wsr;
+	int		count;
+};
+
+struct imxdma_desc {
+	struct list_head		node;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	dma_addr_t			src;
+	dma_addr_t			dest;
+	size_t				len;
+	enum dma_transfer_direction	direction;
+	enum imxdma_prep_type		type;
+	/* For memcpy and interleaved */
+	unsigned int			config_port;
+	unsigned int			config_mem;
+	/* For interleaved transfers */
+	unsigned int			x;
+	unsigned int			y;
+	unsigned int			w;
+	/* For slave sg and cyclic */
+	struct scatterlist		*sg;
+	unsigned int			sgcount;
+};
+
 struct imxdma_channel {
+	int				hw_chaining;
+	struct timer_list		watchdog;
 	struct imxdma_engine		*imxdma;
 	unsigned int			channel;
-	unsigned int			imxdma_channel;
 
+	struct tasklet_struct		dma_tasklet;
+	struct list_head		ld_free;
+	struct list_head		ld_queue;
+	struct list_head		ld_active;
+	int				descs_allocated;
 	enum dma_slave_buswidth		word_size;
 	dma_addr_t			per_address;
 	u32				watermark_level;
 	struct dma_chan			chan;
-	spinlock_t			lock;
 	struct dma_async_tx_descriptor	desc;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	int				dma_request;
 	struct scatterlist		*sg_list;
+	u32				ccr_from_device;
+	u32				ccr_to_device;
+	bool				enabled_2d;
+	int				slot_2d;
 };
 
-#define MAX_DMA_CHANNELS 8
-
 struct imxdma_engine {
 	struct device			*dev;
 	struct device_dma_parameters	dma_parms;
 	struct dma_device		dma_device;
-	struct imxdma_channel		channel[MAX_DMA_CHANNELS];
+	void __iomem			*base;
+	struct clk			*dma_clk;
+	spinlock_t			lock;
+	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
+	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
 };
 
 static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
@@ -60,36 +183,418 @@ static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
 	return container_of(chan, struct imxdma_channel, chan);
 }
 
-static void imxdma_handle(struct imxdma_channel *imxdmac)
+static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_desc *desc;
+
+	if (!list_empty(&imxdmac->ld_active)) {
+		desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc,
+					node);
+		if (desc->type == IMXDMA_DESC_CYCLIC)
+			return true;
+	}
+	return false;
+}
+
+
+
+static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val,
+			     unsigned offset)
+{
+	__raw_writel(val, imxdma->base + offset);
+}
+
+static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
+{
+	return __raw_readl(imxdma->base + offset);
+}
+
+static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
+{
+	if (cpu_is_mx27())
+		return imxdmac->hw_chaining;
+	else
+		return 0;
+}
+
+/*
+ * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
+ */
+static inline int imxdma_sg_next(struct imxdma_desc *d)
 {
-	if (imxdmac->desc.callback)
-		imxdmac->desc.callback(imxdmac->desc.callback_param);
-	imxdmac->last_completed = imxdmac->desc.cookie;
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct scatterlist *sg = d->sg;
+	unsigned long now;
+
+	now = min(d->len, sg->length);
+	if (d->len != IMX_DMA_LENGTH_LOOP)
+		d->len -= now;
+
+	if (d->direction == DMA_DEV_TO_MEM)
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_DAR(imxdmac->channel));
+	else
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_SAR(imxdmac->channel));
+
+	imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel));
+
+	dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, "
+		"size 0x%08x\n", __func__, imxdmac->channel,
+		 imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
+
+	return now;
 }
 
-static void imxdma_irq_handler(int channel, void *data)
+static void imxdma_enable_hw(struct imxdma_desc *d)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	local_irq_save(flags);
+
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) &
+			 ~(1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
+			 CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
+
+	if ((cpu_is_mx21() || cpu_is_mx27()) &&
+			d->sg && imxdma_hw_chain(imxdmac)) {
+		d->sg = sg_next(d->sg);
+		if (d->sg) {
+			u32 tmp;
+			imxdma_sg_next(d);
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel));
+			imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT,
+					 DMA_CCR(channel));
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static void imxdma_disable_hw(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	if (imxdma_hw_chain(imxdmac))
+		del_timer(&imxdmac->watchdog);
+
+	local_irq_save(flags);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) |
+			 (1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) &
+			 ~CCR_CEN, DMA_CCR(channel));
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	local_irq_restore(flags);
+}
+
+static void imxdma_watchdog(unsigned long data)
+{
+	struct imxdma_channel *imxdmac = (struct imxdma_channel *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(channel));
 
-	imxdmac->status = DMA_SUCCESS;
-	imxdma_handle(imxdmac);
+	/* Tasklet watchdog error handler */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+	dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n",
+		imxdmac->channel);
 }
 
-static void imxdma_err_handler(int channel, void *data, int error)
+static irqreturn_t imxdma_err_handler(int irq, void *dev_id)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_engine *imxdma = dev_id;
+	unsigned int err_mask;
+	int i, disr;
+	int errcode;
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DRTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DSESR)  |
+		   imx_dmav1_readl(imxdma, DMA_DBOSR);
+
+	if (!err_mask)
+		return IRQ_HANDLED;
+
+	imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR);
+
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (!(err_mask & (1 << i)))
+			continue;
+		errcode = 0;
+
+		if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR);
+			errcode |= IMX_DMA_ERR_BURST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR);
+			errcode |= IMX_DMA_ERR_REQUEST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR);
+			errcode |= IMX_DMA_ERR_TRANSFER;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR);
+			errcode |= IMX_DMA_ERR_BUFFER;
+		}
+		/* Tasklet error handler */
+		tasklet_schedule(&imxdma->channel[i].dma_tasklet);
+
+		printk(KERN_WARNING
+		       "DMA timeout on channel %d -%s%s%s%s\n", i,
+		       errcode & IMX_DMA_ERR_BURST ?    " burst" : "",
+		       errcode & IMX_DMA_ERR_REQUEST ?  " request" : "",
+		       errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
+		       errcode & IMX_DMA_ERR_BUFFER ?   " buffer" : "");
+	}
+	return IRQ_HANDLED;
+}
+
+static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int chno = imxdmac->channel;
+	struct imxdma_desc *desc;
+
+	spin_lock(&imxdma->lock);
+	if (list_empty(&imxdmac->ld_active)) {
+		spin_unlock(&imxdma->lock);
+		goto out;
+	}
+
+	desc = list_first_entry(&imxdmac->ld_active,
+				struct imxdma_desc,
+				node);
+	spin_unlock(&imxdma->lock);
+
+	if (desc->sg) {
+		u32 tmp;
+		desc->sg = sg_next(desc->sg);
+
+		if (desc->sg) {
+			imxdma_sg_next(desc);
+
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno));
+
+			if (imxdma_hw_chain(imxdmac)) {
+				/* FIXME: The timeout should probably be
+				 * configurable
+				 */
+				mod_timer(&imxdmac->watchdog,
+					jiffies + msecs_to_jiffies(500));
+
+				tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT;
+				imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+			} else {
+				imx_dmav1_writel(imxdma, tmp & ~CCR_CEN,
+						 DMA_CCR(chno));
+				tmp |= CCR_CEN;
+			}
+
+			imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+
+			if (imxdma_chan_is_doing_cyclic(imxdmac))
+				/* Tasklet progression */
+				tasklet_schedule(&imxdmac->dma_tasklet);
+
+			return;
+		}
+
+		if (imxdma_hw_chain(imxdmac)) {
+			del_timer(&imxdmac->watchdog);
+			return;
+		}
+	}
+
+out:
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(chno));
+	/* Tasklet irq */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	int i, disr;
+
+	if (cpu_is_mx21() || cpu_is_mx27())
+		imxdma_err_handler(irq, dev_id);
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr);
+
+	imx_dmav1_writel(imxdma, disr, DMA_DISR);
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (disr & (1 << i))
+			dma_irq_handle_channel(&imxdma->channel[i]);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int imxdma_xfer_desc(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+	int slot = -1;
+	int i;
+
+	/* Configure and enable */
+	switch (d->type) {
+	case IMXDMA_DESC_INTERLEAVED:
+		/* Try to get a free 2D slot */
+		spin_lock_irqsave(&imxdma->lock, flags);
+		for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+			if ((imxdma->slots_2d[i].count > 0) &&
+			((imxdma->slots_2d[i].xsr != d->x) ||
+			(imxdma->slots_2d[i].ysr != d->y) ||
+			(imxdma->slots_2d[i].wsr != d->w)))
+				continue;
+			slot = i;
+			break;
+		}
+		if (slot < 0)
+			return -EBUSY;
+
+		imxdma->slots_2d[slot].xsr = d->x;
+		imxdma->slots_2d[slot].ysr = d->y;
+		imxdma->slots_2d[slot].wsr = d->w;
+		imxdma->slots_2d[slot].count++;
+
+		imxdmac->slot_2d = slot;
+		imxdmac->enabled_2d = true;
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+
+		if (slot == IMX_DMA_2D_SLOT_A) {
+			d->config_mem &= ~CCR_MSEL_B;
+			d->config_port &= ~CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+		} else {
+			d->config_mem |= CCR_MSEL_B;
+			d->config_port |= CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+		}
+		/*
+		 * We fall-through here intentionally, since a 2D transfer is
+		 * similar to MEMCPY just adding the 2D slot configuration.
+		 */
+	case IMXDMA_DESC_MEMCPY:
+		imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2),
+			 DMA_CCR(imxdmac->channel));
+
+		imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
+
+		dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
+			"dma_length=%d\n", __func__, imxdmac->channel,
+			d->dest, d->src, d->len);
+
+		break;
+	/* Cyclic transfer is the same as slave_sg with special sg configuration. */
+	case IMXDMA_DESC_CYCLIC:
+	case IMXDMA_DESC_SLAVE_SG:
+		if (d->direction == DMA_DEV_TO_MEM) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_SAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (dev2mem)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else if (d->direction == DMA_MEM_TO_DEV) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_DAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (mem2dev)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else {
+			dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
+				__func__, imxdmac->channel);
+			return -EINVAL;
+		}
+
+		imxdma_sg_next(d);
 
-	imxdmac->status = DMA_ERROR;
-	imxdma_handle(imxdmac);
+		break;
+	default:
+		return -EINVAL;
+	}
+	imxdma_enable_hw(d);
+	return 0;
 }
 
-static void imxdma_progression(int channel, void *data,
-		struct scatterlist *sg)
+static void imxdma_tasklet(unsigned long data)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_channel *imxdmac = (void *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
 
-	imxdmac->status = DMA_SUCCESS;
-	imxdma_handle(imxdmac);
+	spin_lock(&imxdma->lock);
+
+	if (list_empty(&imxdmac->ld_active)) {
+		/* Someone might have called terminate all */
+		goto out;
+	}
+	desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
+
+	if (desc->desc.callback)
+		desc->desc.callback(desc->desc.callback_param);
+
+	dma_cookie_complete(&desc->desc);
+
+	/* If we are dealing with a cyclic descriptor keep it on ld_active */
+	if (imxdma_chan_is_doing_cyclic(imxdmac))
+		goto out;
+
+	/* Free 2D slot if it was an interleaved transfer */
+	if (imxdmac->enabled_2d) {
+		imxdma->slots_2d[imxdmac->slot_2d].count--;
+		imxdmac->enabled_2d = false;
+	}
+
+	list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
+
+	if (!list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc,
+					node);
+		list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
+		if (imxdma_xfer_desc(desc) < 0)
+			dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
+				 __func__, imxdmac->channel);
+	}
+out:
+	spin_unlock(&imxdma->lock);
 }
 
 static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -97,13 +602,18 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct dma_slave_config *dmaengine_cfg = (void *)arg;
-	int ret;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
 	unsigned int mode = 0;
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		imxdmac->status = DMA_ERROR;
-		imx_dma_disable(imxdmac->imxdma_channel);
+		imxdma_disable_hw(imxdmac);
+
+		spin_lock_irqsave(&imxdma->lock, flags);
+		list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+		list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+		spin_unlock_irqrestore(&imxdma->lock, flags);
 		return 0;
 	case DMA_SLAVE_CONFIG:
 		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
@@ -128,16 +638,22 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 			mode = IMX_DMA_MEMSIZE_32;
 			break;
 		}
-		ret = imx_dma_config_channel(imxdmac->imxdma_channel,
-				mode | IMX_DMA_TYPE_FIFO,
-				IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR,
-				imxdmac->dma_request, 1);
-
-		if (ret)
-			return ret;
 
-		imx_dma_config_burstlen(imxdmac->imxdma_channel,
-				imxdmac->watermark_level * imxdmac->word_size);
+		imxdmac->hw_chaining = 1;
+		if (!imxdma_hw_chain(imxdmac))
+			return -EINVAL;
+		imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) |
+			((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) |
+			CCR_REN;
+		imxdmac->ccr_to_device =
+			(IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) |
+			((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN;
+		imx_dmav1_writel(imxdma, imxdmac->dma_request,
+				 DMA_RSSR(imxdmac->channel));
+
+		/* Set burst length */
+		imx_dmav1_writel(imxdma, imxdmac->watermark_level *
+				imxdmac->word_size, DMA_BLR(imxdmac->channel));
 
 		return 0;
 	default:
@@ -151,43 +667,20 @@ static enum dma_status imxdma_tx_status(struct dma_chan *chan,
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *txstate)
 {
-	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
-	dma_cookie_t last_used;
-	enum dma_status ret;
-
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, imxdmac->last_completed, last_used);
-	dma_set_tx_state(txstate, imxdmac->last_completed, last_used, 0);
-
-	return ret;
-}
-
-static dma_cookie_t imxdma_assign_cookie(struct imxdma_channel *imxdma)
-{
-	dma_cookie_t cookie = imxdma->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	imxdma->chan.cookie = cookie;
-	imxdma->desc.cookie = cookie;
-
-	return cookie;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	dma_cookie_t cookie;
+	unsigned long flags;
 
-	spin_lock_irq(&imxdmac->lock);
-
-	cookie = imxdma_assign_cookie(imxdmac);
-
-	imx_dma_enable(imxdmac->imxdma_channel);
-
-	spin_unlock_irq(&imxdmac->lock);
+	spin_lock_irqsave(&imxdma->lock, flags);
+	list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue);
+	cookie = dma_cookie_assign(tx);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 
 	return cookie;
 }
@@ -197,23 +690,52 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan)
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imx_dma_data *data = chan->private;
 
-	imxdmac->dma_request = data->dma_request;
+	if (data != NULL)
+		imxdmac->dma_request = data->dma_request;
 
-	dma_async_tx_descriptor_init(&imxdmac->desc, chan);
-	imxdmac->desc.tx_submit = imxdma_tx_submit;
-	/* txd.flags will be overwritten in prep funcs */
-	imxdmac->desc.flags = DMA_CTRL_ACK;
+	while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) {
+		struct imxdma_desc *desc;
 
-	imxdmac->status = DMA_SUCCESS;
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+		__memzero(&desc->desc, sizeof(struct dma_async_tx_descriptor));
+		dma_async_tx_descriptor_init(&desc->desc, chan);
+		desc->desc.tx_submit = imxdma_tx_submit;
+		/* txd.flags will be overwritten in prep funcs */
+		desc->desc.flags = DMA_CTRL_ACK;
+		desc->status = DMA_SUCCESS;
+
+		list_add_tail(&desc->node, &imxdmac->ld_free);
+		imxdmac->descs_allocated++;
+	}
 
-	return 0;
+	if (!imxdmac->descs_allocated)
+		return -ENOMEM;
+
+	return imxdmac->descs_allocated;
 }
 
 static void imxdma_free_chan_resources(struct dma_chan *chan)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+
+	imxdma_disable_hw(imxdmac);
+	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
 
-	imx_dma_disable(imxdmac->imxdma_channel);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
+		kfree(desc);
+		imxdmac->descs_allocated--;
+	}
+	INIT_LIST_HEAD(&imxdmac->ld_free);
 
 	if (imxdmac->sg_list) {
 		kfree(imxdmac->sg_list);
@@ -224,27 +746,23 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct scatterlist *sg;
-	int i, ret, dma_length = 0;
-	unsigned int dmamode;
+	int i, dma_length = 0;
+	struct imxdma_desc *desc;
 
-	if (imxdmac->status == DMA_IN_PROGRESS)
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
 
-	imxdmac->status = DMA_IN_PROGRESS;
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
 
 	for_each_sg(sgl, sg, sg_len, i) {
 		dma_length += sg->length;
 	}
 
-	if (direction == DMA_DEV_TO_MEM)
-		dmamode = DMA_MODE_READ;
-	else
-		dmamode = DMA_MODE_WRITE;
-
 	switch (imxdmac->word_size) {
 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
 		if (sgl->length & 3 || sgl->dma_address & 3)
@@ -260,37 +778,41 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		return NULL;
 	}
 
-	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, sgl, sg_len,
-		 dma_length, imxdmac->per_address, dmamode);
-	if (ret)
-		return NULL;
+	desc->type = IMXDMA_DESC_SLAVE_SG;
+	desc->sg = sgl;
+	desc->sgcount = sg_len;
+	desc->len = dma_length;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
 
-	return &imxdmac->desc;
+	return &desc->desc;
 }
 
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
-	int i, ret;
+	struct imxdma_desc *desc;
+	int i;
 	unsigned int periods = buf_len / period_len;
-	unsigned int dmamode;
 
 	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
 			__func__, imxdmac->channel, buf_len, period_len);
 
-	if (imxdmac->status == DMA_IN_PROGRESS)
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
-	imxdmac->status = DMA_IN_PROGRESS;
 
-	ret = imx_dma_setup_progression_handler(imxdmac->imxdma_channel,
-			imxdma_progression);
-	if (ret) {
-		dev_err(imxdma->dev, "Failed to setup the DMA handler\n");
-		return NULL;
-	}
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
 
 	if (imxdmac->sg_list)
 		kfree(imxdmac->sg_list);
@@ -316,62 +838,221 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	imxdmac->sg_list[periods].page_link =
 		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
 
-	if (direction == DMA_DEV_TO_MEM)
-		dmamode = DMA_MODE_READ;
-	else
-		dmamode = DMA_MODE_WRITE;
+	desc->type = IMXDMA_DESC_CYCLIC;
+	desc->sg = imxdmac->sg_list;
+	desc->sgcount = periods;
+	desc->len = IMX_DMA_LENGTH_LOOP;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
 
-	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, imxdmac->sg_list, periods,
-		 IMX_DMA_LENGTH_LOOP, imxdmac->per_address, dmamode);
-	if (ret)
+	dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
+			__func__, imxdmac->channel, src, dest, len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
 
-	return &imxdmac->desc;
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_MEMCPY;
+	desc->src = src;
+	desc->dest = dest;
+	desc->len = len;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
+		"   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
+		imxdmac->channel, xt->src_start, xt->dst_start,
+		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+		xt->numf, xt->frame_size);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_INTERLEAVED;
+	desc->src = xt->src_start;
+	desc->dest = xt->dst_start;
+	desc->x = xt->sgl[0].size;
+	desc->y = xt->numf;
+	desc->w = xt->sgl[0].icg + desc->x;
+	desc->len = desc->x * desc->y;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32;
+	desc->config_mem = IMX_DMA_MEMSIZE_32;
+	if (xt->src_sgl)
+		desc->config_mem |= IMX_DMA_TYPE_2D;
+	if (xt->dst_sgl)
+		desc->config_port |= IMX_DMA_TYPE_2D;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
 }
 
 static void imxdma_issue_pending(struct dma_chan *chan)
 {
-	/*
-	 * Nothing to do. We only have a single descriptor
-	 */
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	if (list_empty(&imxdmac->ld_active) &&
+	    !list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue,
+					struct imxdma_desc, node);
+
+		if (imxdma_xfer_desc(desc) < 0) {
+			dev_warn(imxdma->dev,
+				 "%s: channel: %d couldn't issue DMA xfer\n",
+				 __func__, imxdmac->channel);
+		} else {
+			list_move_tail(imxdmac->ld_queue.next,
+				       &imxdmac->ld_active);
+		}
+	}
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 }
 
 static int __init imxdma_probe(struct platform_device *pdev)
-{
+	{
 	struct imxdma_engine *imxdma;
 	int ret, i;
 
+
 	imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL);
 	if (!imxdma)
 		return -ENOMEM;
 
+	if (cpu_is_mx1()) {
+		imxdma->base = MX1_IO_ADDRESS(MX1_DMA_BASE_ADDR);
+	} else if (cpu_is_mx21()) {
+		imxdma->base = MX21_IO_ADDRESS(MX21_DMA_BASE_ADDR);
+	} else if (cpu_is_mx27()) {
+		imxdma->base = MX27_IO_ADDRESS(MX27_DMA_BASE_ADDR);
+	} else {
+		kfree(imxdma);
+		return 0;
+	}
+
+	imxdma->dma_clk = clk_get(NULL, "dma");
+	if (IS_ERR(imxdma->dma_clk))
+		return PTR_ERR(imxdma->dma_clk);
+	clk_enable(imxdma->dma_clk);
+
+	/* reset DMA module */
+	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
+
+	if (cpu_is_mx1()) {
+		ret = request_irq(MX1_DMA_INT, dma_irq_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
+			kfree(imxdma);
+			return ret;
+		}
+
+		ret = request_irq(MX1_DMA_ERR, imxdma_err_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
+			free_irq(MX1_DMA_INT, NULL);
+			kfree(imxdma);
+			return ret;
+		}
+	}
+
+	/* enable DMA module */
+	imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR);
+
+	/* clear all interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR);
+
+	/* disable interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR);
+
 	INIT_LIST_HEAD(&imxdma->dma_device.channels);
 
 	dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+	/* Initialize 2D global parameters */
+	for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+		imxdma->slots_2d[i].count = 0;
+
+	spin_lock_init(&imxdma->lock);
 
 	/* Initialize channel parameters */
-	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
 		struct imxdma_channel *imxdmac = &imxdma->channel[i];
 
-		imxdmac->imxdma_channel = imx_dma_request_by_prio("dmaengine",
-				DMA_PRIO_MEDIUM);
-		if ((int)imxdmac->channel < 0) {
-			ret = -ENODEV;
-			goto err_init;
+		if (cpu_is_mx21() || cpu_is_mx27()) {
+			ret = request_irq(MX2x_INT_DMACH0 + i,
+					dma_irq_handler, 0, "DMA", imxdma);
+			if (ret) {
+				dev_warn(imxdma->dev, "Can't register IRQ %d "
+					 "for DMA channel %d\n",
+					 MX2x_INT_DMACH0 + i, i);
+				goto err_init;
+			}
+			init_timer(&imxdmac->watchdog);
+			imxdmac->watchdog.function = &imxdma_watchdog;
+			imxdmac->watchdog.data = (unsigned long)imxdmac;
 		}
 
-		imx_dma_setup_handlers(imxdmac->imxdma_channel,
-		       imxdma_irq_handler, imxdma_err_handler, imxdmac);
-
 		imxdmac->imxdma = imxdma;
-		spin_lock_init(&imxdmac->lock);
 
+		INIT_LIST_HEAD(&imxdmac->ld_queue);
+		INIT_LIST_HEAD(&imxdmac->ld_free);
+		INIT_LIST_HEAD(&imxdmac->ld_active);
+
+		tasklet_init(&imxdmac->dma_tasklet, imxdma_tasklet,
+			     (unsigned long)imxdmac);
 		imxdmac->chan.device = &imxdma->dma_device;
+		dma_cookie_init(&imxdmac->chan);
 		imxdmac->channel = i;
 
 		/* Add the channel to the DMAC list */
-		list_add_tail(&imxdmac->chan.device_node, &imxdma->dma_device.channels);
+		list_add_tail(&imxdmac->chan.device_node,
+			      &imxdma->dma_device.channels);
 	}
 
 	imxdma->dev = &pdev->dev;
@@ -382,11 +1063,14 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	imxdma->dma_device.device_tx_status = imxdma_tx_status;
 	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
 	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+	imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
 	imxdma->dma_device.device_control = imxdma_control;
 	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
 
 	platform_set_drvdata(pdev, imxdma);
 
+	imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */
 	imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms;
 	dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
 
@@ -399,9 +1083,13 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	return 0;
 
 err_init:
-	while (--i >= 0) {
-		struct imxdma_channel *imxdmac = &imxdma->channel[i];
-		imx_dma_free(imxdmac->imxdma_channel);
+
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		while (--i >= 0)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
 	}
 
 	kfree(imxdma);
@@ -415,10 +1103,12 @@ static int __exit imxdma_remove(struct platform_device *pdev)
 
         dma_async_device_unregister(&imxdma->dma_device);
 
-	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-		struct imxdma_channel *imxdmac = &imxdma->channel[i];
-
-		 imx_dma_free(imxdmac->imxdma_channel);
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		for (i = 0; i < IMX_DMA_CHANNELS; i++)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
 	}
 
         kfree(imxdma);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 63540d3e2153..d3e38e28bb6b 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
@@ -41,6 +42,8 @@
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
+#include "dmaengine.h"
+
 /* SDMA registers */
 #define SDMA_H_C0PTR		0x000
 #define SDMA_H_INTR		0x004
@@ -259,19 +262,18 @@ struct sdma_channel {
 	unsigned int			pc_from_device, pc_to_device;
 	unsigned long			flags;
 	dma_addr_t			per_address;
-	u32				event_mask0, event_mask1;
-	u32				watermark_level;
+	unsigned long			event_mask[2];
+	unsigned long			watermark_level;
 	u32				shp_addr, per_addr;
 	struct dma_chan			chan;
 	spinlock_t			lock;
 	struct dma_async_tx_descriptor	desc;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	unsigned int			chn_count;
 	unsigned int			chn_real_count;
 };
 
-#define IMX_DMA_SG_LOOP		(1 << 0)
+#define IMX_DMA_SG_LOOP		BIT(0)
 
 #define MAX_DMA_CHANNELS 32
 #define MXC_SDMA_DEFAULT_PRIORITY 1
@@ -345,9 +347,9 @@ static const struct of_device_id sdma_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, sdma_dt_ids);
 
-#define SDMA_H_CONFIG_DSPDMA	(1 << 12) /* indicates if the DSPDMA is used */
-#define SDMA_H_CONFIG_RTD_PINS	(1 << 11) /* indicates if Real-Time Debug pins are enabled */
-#define SDMA_H_CONFIG_ACR	(1 << 4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_DSPDMA	BIT(12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	BIT(11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	BIT(4)  /* indicates if AHB freq /core freq = 2 or 1 */
 #define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
 
 static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
@@ -362,37 +364,42 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 {
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
-	u32 evt, mcu, dsp;
+	unsigned long evt, mcu, dsp;
 
 	if (event_override && mcu_override && dsp_override)
 		return -EINVAL;
 
-	evt = __raw_readl(sdma->regs + SDMA_H_EVTOVR);
-	mcu = __raw_readl(sdma->regs + SDMA_H_HOSTOVR);
-	dsp = __raw_readl(sdma->regs + SDMA_H_DSPOVR);
+	evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR);
+	mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR);
 
 	if (dsp_override)
-		dsp &= ~(1 << channel);
+		__clear_bit(channel, &dsp);
 	else
-		dsp |= (1 << channel);
+		__set_bit(channel, &dsp);
 
 	if (event_override)
-		evt &= ~(1 << channel);
+		__clear_bit(channel, &evt);
 	else
-		evt |= (1 << channel);
+		__set_bit(channel, &evt);
 
 	if (mcu_override)
-		mcu &= ~(1 << channel);
+		__clear_bit(channel, &mcu);
 	else
-		mcu |= (1 << channel);
+		__set_bit(channel, &mcu);
 
-	__raw_writel(evt, sdma->regs + SDMA_H_EVTOVR);
-	__raw_writel(mcu, sdma->regs + SDMA_H_HOSTOVR);
-	__raw_writel(dsp, sdma->regs + SDMA_H_DSPOVR);
+	writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR);
+	writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR);
 
 	return 0;
 }
 
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	writel(BIT(channel), sdma->regs + SDMA_H_START);
+}
+
 /*
  * sdma_run_channel - run a channel and wait till it's done
  */
@@ -404,7 +411,7 @@ static int sdma_run_channel(struct sdma_channel *sdmac)
 
 	init_completion(&sdmac->done);
 
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+	sdma_enable_channel(sdma, channel);
 
 	ret = wait_for_completion_timeout(&sdmac->done, HZ);
 
@@ -451,12 +458,12 @@ static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
 {
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
-	u32 val;
+	unsigned long val;
 	u32 chnenbl = chnenbl_ofs(sdma, event);
 
-	val = __raw_readl(sdma->regs + chnenbl);
-	val |= (1 << channel);
-	__raw_writel(val, sdma->regs + chnenbl);
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__set_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
 static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
@@ -464,11 +471,11 @@ static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
 	u32 chnenbl = chnenbl_ofs(sdma, event);
-	u32 val;
+	unsigned long val;
 
-	val = __raw_readl(sdma->regs + chnenbl);
-	val &= ~(1 << channel);
-	__raw_writel(val, sdma->regs + chnenbl);
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__clear_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
 static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
@@ -522,7 +529,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
 	else
 		sdmac->status = DMA_SUCCESS;
 
-	sdmac->last_completed = sdmac->desc.cookie;
+	dma_cookie_complete(&sdmac->desc);
 	if (sdmac->desc.callback)
 		sdmac->desc.callback(sdmac->desc.callback_param);
 }
@@ -544,10 +551,10 @@ static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
 static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 {
 	struct sdma_engine *sdma = dev_id;
-	u32 stat;
+	unsigned long stat;
 
-	stat = __raw_readl(sdma->regs + SDMA_H_INTR);
-	__raw_writel(stat, sdma->regs + SDMA_H_INTR);
+	stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
+	writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
 
 	while (stat) {
 		int channel = fls(stat) - 1;
@@ -555,7 +562,7 @@ static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 
 		mxc_sdma_handle_channel(sdmac);
 
-		stat &= ~(1 << channel);
+		__clear_bit(channel, &stat);
 	}
 
 	return IRQ_HANDLED;
@@ -663,11 +670,11 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 		return load_address;
 
 	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
-	dev_dbg(sdma->dev, "wml = 0x%08x\n", sdmac->watermark_level);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level);
 	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
 	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
-	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", sdmac->event_mask0);
-	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", sdmac->event_mask1);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]);
 
 	mutex_lock(&sdma->channel_0_lock);
 
@@ -677,8 +684,8 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	/* Send by context the event mask,base address for peripheral
 	 * and watermark level
 	 */
-	context->gReg[0] = sdmac->event_mask1;
-	context->gReg[1] = sdmac->event_mask0;
+	context->gReg[0] = sdmac->event_mask[1];
+	context->gReg[1] = sdmac->event_mask[0];
 	context->gReg[2] = sdmac->per_addr;
 	context->gReg[6] = sdmac->shp_addr;
 	context->gReg[7] = sdmac->watermark_level;
@@ -701,7 +708,7 @@ static void sdma_disable_channel(struct sdma_channel *sdmac)
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
 
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_STATSTOP);
+	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 }
 
@@ -711,13 +718,13 @@ static int sdma_config_channel(struct sdma_channel *sdmac)
 
 	sdma_disable_channel(sdmac);
 
-	sdmac->event_mask0 = 0;
-	sdmac->event_mask1 = 0;
+	sdmac->event_mask[0] = 0;
+	sdmac->event_mask[1] = 0;
 	sdmac->shp_addr = 0;
 	sdmac->per_addr = 0;
 
 	if (sdmac->event_id0) {
-		if (sdmac->event_id0 > 32)
+		if (sdmac->event_id0 >= sdmac->sdma->num_events)
 			return -EINVAL;
 		sdma_event_enable(sdmac, sdmac->event_id0);
 	}
@@ -740,15 +747,14 @@ static int sdma_config_channel(struct sdma_channel *sdmac)
 			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
 		/* Handle multiple event channels differently */
 		if (sdmac->event_id1) {
-			sdmac->event_mask1 = 1 << (sdmac->event_id1 % 32);
+			sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32);
 			if (sdmac->event_id1 > 31)
-				sdmac->watermark_level |= 1 << 31;
-			sdmac->event_mask0 = 1 << (sdmac->event_id0 % 32);
+				__set_bit(31, &sdmac->watermark_level);
+			sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32);
 			if (sdmac->event_id0 > 31)
-				sdmac->watermark_level |= 1 << 30;
+				__set_bit(30, &sdmac->watermark_level);
 		} else {
-			sdmac->event_mask0 = 1 << sdmac->event_id0;
-			sdmac->event_mask1 = 1 << (sdmac->event_id0 - 32);
+			__set_bit(sdmac->event_id0, sdmac->event_mask);
 		}
 		/* Watermark Level */
 		sdmac->watermark_level |= sdmac->watermark_level;
@@ -774,7 +780,7 @@ static int sdma_set_channel_priority(struct sdma_channel *sdmac,
 		return -EINVAL;
 	}
 
-	__raw_writel(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+	writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
 
 	return 0;
 }
@@ -796,8 +802,6 @@ static int sdma_request_channel(struct sdma_channel *sdmac)
 	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
 	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
 
-	clk_enable(sdma->clk);
-
 	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
 
 	init_completion(&sdmac->done);
@@ -810,24 +814,6 @@ out:
 	return ret;
 }
 
-static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
-{
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
-}
-
-static dma_cookie_t sdma_assign_cookie(struct sdma_channel *sdmac)
-{
-	dma_cookie_t cookie = sdmac->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	sdmac->chan.cookie = cookie;
-	sdmac->desc.cookie = cookie;
-
-	return cookie;
-}
-
 static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct sdma_channel, chan);
@@ -837,14 +823,11 @@ static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	unsigned long flags;
 	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
-	struct sdma_engine *sdma = sdmac->sdma;
 	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&sdmac->lock, flags);
 
-	cookie = sdma_assign_cookie(sdmac);
-
-	sdma_enable_channel(sdma, sdmac->channel);
+	cookie = dma_cookie_assign(tx);
 
 	spin_unlock_irqrestore(&sdmac->lock, flags);
 
@@ -875,11 +858,14 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 
 	sdmac->peripheral_type = data->peripheral_type;
 	sdmac->event_id0 = data->dma_request;
-	ret = sdma_set_channel_priority(sdmac, prio);
+
+	clk_enable(sdmac->sdma->clk);
+
+	ret = sdma_request_channel(sdmac);
 	if (ret)
 		return ret;
 
-	ret = sdma_request_channel(sdmac);
+	ret = sdma_set_channel_priority(sdmac, prio);
 	if (ret)
 		return ret;
 
@@ -916,7 +902,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1014,7 +1000,8 @@ err_out:
 
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1128,7 +1115,7 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 
 	last_used = chan->cookie;
 
-	dma_set_tx_state(txstate, sdmac->last_completed, last_used,
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used,
 			sdmac->chn_count - sdmac->chn_real_count);
 
 	return sdmac->status;
@@ -1136,9 +1123,11 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 
 static void sdma_issue_pending(struct dma_chan *chan)
 {
-	/*
-	 * Nothing to do. We only have a single descriptor
-	 */
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		sdma_enable_channel(sdma, sdmac->channel);
 }
 
 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1	34
@@ -1230,7 +1219,7 @@ static int __init sdma_init(struct sdma_engine *sdma)
 	clk_enable(sdma->clk);
 
 	/* Be sure SDMA has not started yet */
-	__raw_writel(0, sdma->regs + SDMA_H_C0PTR);
+	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
 
 	sdma->channel_control = dma_alloc_coherent(NULL,
 			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
@@ -1253,11 +1242,11 @@ static int __init sdma_init(struct sdma_engine *sdma)
 
 	/* disable all channels */
 	for (i = 0; i < sdma->num_events; i++)
-		__raw_writel(0, sdma->regs + chnenbl_ofs(sdma, i));
+		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
 
 	/* All channels have priority 0 */
 	for (i = 0; i < MAX_DMA_CHANNELS; i++)
-		__raw_writel(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+		writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
 
 	ret = sdma_request_channel(&sdma->channel[0]);
 	if (ret)
@@ -1266,16 +1255,16 @@ static int __init sdma_init(struct sdma_engine *sdma)
 	sdma_config_ownership(&sdma->channel[0], false, true, false);
 
 	/* Set Command Channel (Channel Zero) */
-	__raw_writel(0x4050, sdma->regs + SDMA_CHN0ADDR);
+	writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
 
 	/* Set bits of CONFIG register but with static context switching */
 	/* FIXME: Check whether to set ACR bit depending on clock ratios */
-	__raw_writel(0, sdma->regs + SDMA_H_CONFIG);
+	writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
 
-	__raw_writel(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+	writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
 
 	/* Set bits of CONFIG register with given context switching mode */
-	__raw_writel(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+	writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
 
 	/* Initializes channel's priorities */
 	sdma_set_channel_priority(&sdma->channel[0], 7);
@@ -1367,6 +1356,7 @@ static int __init sdma_probe(struct platform_device *pdev)
 		spin_lock_init(&sdmac->lock);
 
 		sdmac->chan.device = &sdma->dma_device;
+		dma_cookie_init(&sdmac->chan);
 		sdmac->channel = i;
 
 		/*
@@ -1387,7 +1377,9 @@ static int __init sdma_probe(struct platform_device *pdev)
 		sdma_add_scripts(sdma, pdata->script_addrs);
 
 	if (pdata) {
-		sdma_get_firmware(sdma, pdata->fw_name);
+		ret = sdma_get_firmware(sdma, pdata->fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
 	} else {
 		/*
 		 * Because that device tree does not encode ROM script address,
@@ -1396,15 +1388,12 @@ static int __init sdma_probe(struct platform_device *pdev)
 		 */
 		ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
 					      &fw_name);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get firmware name\n");
-			goto err_init;
-		}
-
-		ret = sdma_get_firmware(sdma, fw_name);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get firmware\n");
-			goto err_init;
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware name\n");
+		else {
+			ret = sdma_get_firmware(sdma, fw_name);
+			if (ret)
+				dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
 		}
 	}
 
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 74f70aadf9e4..c900ca7aaec4 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -29,6 +29,8 @@
 #include <linux/intel_mid_dma.h>
 #include <linux/module.h>
 
+#include "dmaengine.h"
+
 #define MAX_CHAN	4 /*max ch across controllers*/
 #include "intel_mid_dma_regs.h"
 
@@ -288,7 +290,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
 	struct intel_mid_dma_lli	*llitem;
 	void *param_txd = NULL;
 
-	midc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	callback_txd = txd->callback;
 	param_txd = txd->callback_param;
 
@@ -434,14 +436,7 @@ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	dma_cookie_t		cookie;
 
 	spin_lock_bh(&midc->lock);
-	cookie = midc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	midc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&midc->active_list))
 		list_add_tail(&desc->desc_node, &midc->active_list);
@@ -482,31 +477,18 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
 						dma_cookie_t cookie,
 						struct dma_tx_state *txstate)
 {
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int				ret;
+	struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
+	enum dma_status ret;
 
-	last_complete = midc->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		spin_lock_bh(&midc->lock);
 		midc_scan_descriptors(to_middma_device(chan->device), midc);
 		spin_unlock_bh(&midc->lock);
 
-		last_complete = midc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
-	if (txstate) {
-		txstate->last = last_complete;
-		txstate->used = last_used;
-		txstate->residue = 0;
-	}
 	return ret;
 }
 
@@ -732,13 +714,14 @@ err_desc_get:
  * @sg_len: length of sg txn
  * @direction: DMA transfer dirtn
  * @flags: DMA flags
+ * @context: transfer context (ignored)
  *
  * Prepares LLI based periphral transfer
  */
 static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 			struct dma_chan *chan, struct scatterlist *sgl,
 			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags)
+			unsigned long flags, void *context)
 {
 	struct intel_mid_dma_chan *midc = NULL;
 	struct intel_mid_dma_slave *mids = NULL;
@@ -832,7 +815,6 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 		/*trying to free ch in use!!!!!*/
 		pr_err("ERR_MDMA: trying to free ch in use\n");
 	}
-	pm_runtime_put(&mid->pdev->dev);
 	spin_lock_bh(&midc->lock);
 	midc->descs_allocated = 0;
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
@@ -853,6 +835,7 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 	/* Disable CH interrupts */
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
+	pm_runtime_put(&mid->pdev->dev);
 }
 
 /**
@@ -886,7 +869,7 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
 		pm_runtime_put(&mid->pdev->dev);
 		return -EIO;
 	}
-	midc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	spin_lock_bh(&midc->lock);
 	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
@@ -1056,7 +1039,8 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 	}
 	err_status &= mid->intr_mask;
 	if (err_status) {
-		iowrite32(MASK_INTR_REG(err_status), mid->dma_base + MASK_ERR);
+		iowrite32((err_status << INT_MASK_WE),
+			  mid->dma_base + MASK_ERR);
 		call_tasklet = 1;
 	}
 	if (call_tasklet)
@@ -1118,7 +1102,7 @@ static int mid_setup_dma(struct pci_dev *pdev)
 		struct intel_mid_dma_chan *midch = &dma->ch[i];
 
 		midch->chan.device = &dma->common;
-		midch->chan.cookie =  1;
+		dma_cookie_init(&midch->chan);
 		midch->ch_id = dma->chan_base + i;
 		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
 
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index c83d35b97bd8..1bfa9268feaf 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -165,7 +165,6 @@ union intel_mid_dma_cfg_hi {
  * @dma_base: MMIO register space DMA engine base pointer
  * @ch_id: DMA channel id
  * @lock: channel spinlock
- * @completed: DMA cookie
  * @active_list: current active descriptors
  * @queue: current queued up descriptors
  * @free_list: current free descriptors
@@ -183,7 +182,6 @@ struct intel_mid_dma_chan {
 	void __iomem		*dma_base;
 	int			ch_id;
 	spinlock_t		lock;
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index a4d6cb0c0343..31493d80e0e9 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -40,6 +40,8 @@
 #include "registers.h"
 #include "hw.h"
 
+#include "../dmaengine.h"
+
 int ioat_pending_level = 4;
 module_param(ioat_pending_level, int, 0644);
 MODULE_PARM_DESC(ioat_pending_level,
@@ -107,6 +109,7 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *c
 	chan->reg_base = device->reg_base + (0x80 * (idx + 1));
 	spin_lock_init(&chan->cleanup_lock);
 	chan->common.device = dma;
+	dma_cookie_init(&chan->common);
 	list_add_tail(&chan->common.device_node, &dma->channels);
 	device->idx[idx] = chan;
 	init_timer(&chan->timer);
@@ -235,12 +238,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	spin_lock_bh(&ioat->desc_lock);
 	/* cookie incr and addition to used_list must be atomic */
-	cookie = c->cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	c->cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
 
 	/* write address into NextDescriptor field of last desc in chain */
@@ -603,8 +601,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
 		 */
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
-			chan->completed_cookie = tx->cookie;
-			tx->cookie = 0;
+			dma_cookie_complete(tx);
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
 			ioat->active -= desc->hw->tx_cnt;
 			if (tx->callback) {
@@ -733,13 +730,15 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 {
 	struct ioat_chan_common *chan = to_chan_common(c);
 	struct ioatdma_device *device = chan->device;
+	enum dma_status ret;
 
-	if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
-		return DMA_SUCCESS;
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
 	device->cleanup_fn((unsigned long) c);
 
-	return ioat_tx_status(c, cookie, txstate);
+	return dma_cookie_status(c, cookie, txstate);
 }
 
 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 5216c8a92a21..c7888bccd974 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -90,7 +90,6 @@ struct ioat_chan_common {
 	void __iomem *reg_base;
 	unsigned long last_completion;
 	spinlock_t cleanup_lock;
-	dma_cookie_t completed_cookie;
 	unsigned long state;
 	#define IOAT_COMPLETION_PENDING 0
 	#define IOAT_COMPLETION_ACK 1
@@ -143,28 +142,6 @@ static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
 	return container_of(chan, struct ioat_dma_chan, base);
 }
 
-/**
- * ioat_tx_status - poll the status of an ioat transaction
- * @c: channel handle
- * @cookie: transaction identifier
- * @txstate: if set, updated with the transaction state
- */
-static inline enum dma_status
-ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
-		 struct dma_tx_state *txstate)
-{
-	struct ioat_chan_common *chan = to_chan_common(c);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-
-	last_used = c->cookie;
-	last_complete = chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
 /* wrapper around hardware descriptor format + additional software fields */
 
 /**
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 5d65f8377971..e8e110ff3d96 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -41,6 +41,8 @@
 #include "registers.h"
 #include "hw.h"
 
+#include "../dmaengine.h"
+
 int ioat_ring_alloc_order = 8;
 module_param(ioat_ring_alloc_order, int, 0644);
 MODULE_PARM_DESC(ioat_ring_alloc_order,
@@ -147,8 +149,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
-			chan->completed_cookie = tx->cookie;
-			tx->cookie = 0;
+			dma_cookie_complete(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -398,13 +399,9 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
 	struct dma_chan *c = tx->chan;
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 	struct ioat_chan_common *chan = &ioat->base;
-	dma_cookie_t cookie = c->cookie;
+	dma_cookie_t cookie;
 
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	tx->cookie = cookie;
-	c->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
 
 	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index f519c93a61e7..2c4476c0e405 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -61,6 +61,7 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
+#include "../dmaengine.h"
 #include "registers.h"
 #include "hw.h"
 #include "dma.h"
@@ -277,9 +278,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 		dump_desc_dbg(ioat, desc);
 		tx = &desc->txd;
 		if (tx->cookie) {
-			chan->completed_cookie = tx->cookie;
+			dma_cookie_complete(tx);
 			ioat3_dma_unmap(ioat, desc, idx + i);
-			tx->cookie = 0;
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -411,13 +411,15 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 		struct dma_tx_state *txstate)
 {
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	enum dma_status ret;
 
-	if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
-		return DMA_SUCCESS;
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
 	ioat3_cleanup(ioat);
 
-	return ioat_tx_status(c, cookie, txstate);
+	return dma_cookie_status(c, cookie, txstate);
 }
 
 static struct dma_async_tx_descriptor *
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index faf88b7e1e71..da6c4c2c066a 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -36,6 +36,8 @@
 
 #include <mach/adma.h>
 
+#include "dmaengine.h"
+
 #define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
 #define to_iop_adma_device(dev) \
 	container_of(dev, struct iop_adma_device, common)
@@ -317,7 +319,7 @@ static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
 	}
 
 	if (cookie > 0) {
-		iop_chan->completed_cookie = cookie;
+		iop_chan->common.completed_cookie = cookie;
 		pr_debug("\tcompleted cookie %d\n", cookie);
 	}
 }
@@ -438,18 +440,6 @@ retry:
 	return NULL;
 }
 
-static dma_cookie_t
-iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
-	struct iop_adma_desc_slot *desc)
-{
-	dma_cookie_t cookie = iop_chan->common.cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	iop_chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
 static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
 {
 	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
@@ -477,7 +467,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 	slots_per_op = grp_start->slots_per_op;
 
 	spin_lock_bh(&iop_chan->lock);
-	cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	old_chain_tail = list_entry(iop_chan->chain.prev,
 		struct iop_adma_desc_slot, chain_node);
@@ -904,24 +894,15 @@ static enum dma_status iop_adma_status(struct dma_chan *chan,
 					struct dma_tx_state *txstate)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	enum dma_status ret;
-
-	last_used = chan->cookie;
-	last_complete = iop_chan->completed_cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	int ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS)
 		return ret;
 
 	iop_adma_slot_cleanup(iop_chan);
 
-	last_used = chan->cookie;
-	last_complete = iop_chan->completed_cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static irqreturn_t iop_adma_eot_handler(int irq, void *data)
@@ -1565,6 +1546,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&iop_chan->chain);
 	INIT_LIST_HEAD(&iop_chan->all_slots);
 	iop_chan->common.device = dma_dev;
+	dma_cookie_init(&iop_chan->common);
 	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
@@ -1642,16 +1624,12 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
 		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
 		iop_desc_set_memcpy_src_addr(grp_start, 0);
 
-		cookie = iop_chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		iop_chan->completed_cookie = cookie - 1;
-		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		iop_chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(iop_chan_is_busy(iop_chan));
@@ -1699,16 +1677,12 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
 		iop_desc_set_xor_src_addr(grp_start, 0, 0);
 		iop_desc_set_xor_src_addr(grp_start, 1, 0);
 
-		cookie = iop_chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		iop_chan->completed_cookie = cookie - 1;
-		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		iop_chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(iop_chan_is_busy(iop_chan));
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 6212b16e8cf2..62e3f8ec2461 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -25,6 +25,7 @@
 
 #include <mach/ipu.h>
 
+#include "../dmaengine.h"
 #include "ipu_intern.h"
 
 #define FS_VF_IN_VALID	0x00000002
@@ -866,14 +867,7 @@ static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
 
-	cookie = ichan->dma_chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	/* from dmaengine.h: "last cookie value returned to client" */
-	ichan->dma_chan.cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* ipu->lock can be taken under ichan->lock, but not v.v. */
 	spin_lock_irqsave(&ichan->lock, flags);
@@ -1295,7 +1289,7 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
 	/* Flip the active buffer - even if update above failed */
 	ichan->active_buffer = !ichan->active_buffer;
 	if (done)
-		ichan->completed = desc->txd.cookie;
+		dma_cookie_complete(&desc->txd);
 
 	callback = desc->txd.callback;
 	callback_param = desc->txd.callback_param;
@@ -1341,7 +1335,8 @@ static void ipu_gc_tasklet(unsigned long arg)
 /* Allocate and initialise a transfer descriptor. */
 static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
 		struct scatterlist *sgl, unsigned int sg_len,
-		enum dma_transfer_direction direction, unsigned long tx_flags)
+		enum dma_transfer_direction direction, unsigned long tx_flags,
+		void *context)
 {
 	struct idmac_channel *ichan = to_idmac_chan(chan);
 	struct idmac_tx_desc *desc = NULL;
@@ -1510,8 +1505,7 @@ static int idmac_alloc_chan_resources(struct dma_chan *chan)
 	BUG_ON(chan->client_count > 1);
 	WARN_ON(ichan->status != IPU_CHANNEL_FREE);
 
-	chan->cookie		= 1;
-	ichan->completed	= -ENXIO;
+	dma_cookie_init(chan);
 
 	ret = ipu_irq_map(chan->chan_id);
 	if (ret < 0)
@@ -1600,9 +1594,7 @@ static void idmac_free_chan_resources(struct dma_chan *chan)
 static enum dma_status idmac_tx_status(struct dma_chan *chan,
 		       dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	struct idmac_channel *ichan = to_idmac_chan(chan);
-
-	dma_set_tx_state(txstate, ichan->completed, chan->cookie, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
 	if (cookie != chan->cookie)
 		return DMA_ERROR;
 	return DMA_SUCCESS;
@@ -1638,11 +1630,10 @@ static int __init ipu_idmac_init(struct ipu *ipu)
 
 		ichan->status		= IPU_CHANNEL_FREE;
 		ichan->sec_chan_en	= false;
-		ichan->completed	= -ENXIO;
 		snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
 
 		dma_chan->device	= &idmac->dma;
-		dma_chan->cookie	= 1;
+		dma_cookie_init(dma_chan);
 		dma_chan->chan_id	= i;
 		list_add_tail(&dma_chan->device_node, &dma->channels);
 	}
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 4d6d4cf66949..2ab0a3d0eed5 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -44,6 +44,8 @@
 
 #include <linux/random.h>
 
+#include "dmaengine.h"
+
 /* Number of DMA Transfer descriptors allocated per channel */
 #define MPC_DMA_DESCRIPTORS	64
 
@@ -188,7 +190,6 @@ struct mpc_dma_chan {
 	struct list_head		completed;
 	struct mpc_dma_tcd		*tcd;
 	dma_addr_t			tcd_paddr;
-	dma_cookie_t			completed_cookie;
 
 	/* Lock for this structure */
 	spinlock_t			lock;
@@ -365,7 +366,7 @@ static void mpc_dma_process_completed(struct mpc_dma *mdma)
 		/* Free descriptors */
 		spin_lock_irqsave(&mchan->lock, flags);
 		list_splice_tail_init(&list, &mchan->free);
-		mchan->completed_cookie = last_cookie;
+		mchan->chan.completed_cookie = last_cookie;
 		spin_unlock_irqrestore(&mchan->lock, flags);
 	}
 }
@@ -438,13 +439,7 @@ static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
 		mpc_dma_execute(mchan);
 
 	/* Update cookie */
-	cookie = mchan->chan.cookie + 1;
-	if (cookie <= 0)
-		cookie = 1;
-
-	mchan->chan.cookie = cookie;
-	mdesc->desc.cookie = cookie;
-
+	cookie = dma_cookie_assign(txd);
 	spin_unlock_irqrestore(&mchan->lock, flags);
 
 	return cookie;
@@ -562,17 +557,14 @@ mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	       struct dma_tx_state *txstate)
 {
 	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	enum dma_status ret;
 	unsigned long flags;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 
 	spin_lock_irqsave(&mchan->lock, flags);
-	last_used = mchan->chan.cookie;
-	last_complete = mchan->completed_cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irqrestore(&mchan->lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 /* Prepare descriptor for memory to memory copy */
@@ -741,8 +733,7 @@ static int __devinit mpc_dma_probe(struct platform_device *op)
 		mchan = &mdma->channels[i];
 
 		mchan->chan.device = dma;
-		mchan->chan.cookie = 1;
-		mchan->completed_cookie = mchan->chan.cookie;
+		dma_cookie_init(&mchan->chan);
 
 		INIT_LIST_HEAD(&mchan->free);
 		INIT_LIST_HEAD(&mchan->prepared);
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index e779b434af45..fa5d55fea46c 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -26,6 +26,8 @@
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <plat/mv_xor.h>
+
+#include "dmaengine.h"
 #include "mv_xor.h"
 
 static void mv_xor_issue_pending(struct dma_chan *chan);
@@ -435,7 +437,7 @@ static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 	}
 
 	if (cookie > 0)
-		mv_chan->completed_cookie = cookie;
+		mv_chan->common.completed_cookie = cookie;
 }
 
 static void
@@ -534,18 +536,6 @@ retry:
 	return NULL;
 }
 
-static dma_cookie_t
-mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
-		      struct mv_xor_desc_slot *desc)
-{
-	dma_cookie_t cookie = mv_chan->common.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-	mv_chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
 /************************ DMA engine API functions ****************************/
 static dma_cookie_t
 mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -563,7 +553,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	grp_start = sw_desc->group_head;
 
 	spin_lock_bh(&mv_chan->lock);
-	cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&mv_chan->chain))
 		list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
@@ -820,27 +810,16 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
 					  struct dma_tx_state *txstate)
 {
 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
 
-	last_used = chan->cookie;
-	last_complete = mv_chan->completed_cookie;
-	mv_chan->is_complete_cookie = cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS) {
 		mv_xor_clean_completed_slots(mv_chan);
 		return ret;
 	}
 	mv_xor_slot_cleanup(mv_chan);
 
-	last_used = chan->cookie;
-	last_complete = mv_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void mv_dump_xor_regs(struct mv_xor_chan *chan)
@@ -1214,6 +1193,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&mv_chan->completed_slots);
 	INIT_LIST_HEAD(&mv_chan->all_slots);
 	mv_chan->common.device = dma_dev;
+	dma_cookie_init(&mv_chan->common);
 
 	list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
 
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 977b592e976b..654876b7ba1d 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -78,7 +78,6 @@ struct mv_xor_device {
 /**
  * struct mv_xor_chan - internal representation of a XOR channel
  * @pending: allows batching of hardware operations
- * @completed_cookie: identifier for the most recently completed operation
  * @lock: serializes enqueue/dequeue operations to the descriptors pool
  * @mmr_base: memory mapped register base
  * @idx: the index of the xor channel
@@ -93,7 +92,6 @@ struct mv_xor_device {
  */
 struct mv_xor_chan {
 	int			pending;
-	dma_cookie_t		completed_cookie;
 	spinlock_t		lock; /* protects the descriptor slot pool */
 	void __iomem		*mmr_base;
 	unsigned int		idx;
@@ -109,7 +107,6 @@ struct mv_xor_chan {
 #ifdef USE_TIMER
 	unsigned long		cleanup_time;
 	u32			current_on_last_cleanup;
-	dma_cookie_t		is_complete_cookie;
 #endif
 };
 
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index b06cd4ca626f..65334c49b71e 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -28,6 +28,8 @@
 #include <mach/dma.h>
 #include <mach/common.h>
 
+#include "dmaengine.h"
+
 /*
  * NOTE: The term "PIO" throughout the mxs-dma implementation means
  * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
@@ -111,7 +113,6 @@ struct mxs_dma_chan {
 	struct mxs_dma_ccw		*ccw;
 	dma_addr_t			ccw_phys;
 	int				desc_count;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	unsigned int			flags;
 #define MXS_DMA_SG_LOOP			(1 << 0)
@@ -193,19 +194,6 @@ static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
 	mxs_chan->status = DMA_IN_PROGRESS;
 }
 
-static dma_cookie_t mxs_dma_assign_cookie(struct mxs_dma_chan *mxs_chan)
-{
-	dma_cookie_t cookie = mxs_chan->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	mxs_chan->chan.cookie = cookie;
-	mxs_chan->desc.cookie = cookie;
-
-	return cookie;
-}
-
 static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct mxs_dma_chan, chan);
@@ -217,7 +205,7 @@ static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	mxs_dma_enable_chan(mxs_chan);
 
-	return mxs_dma_assign_cookie(mxs_chan);
+	return dma_cookie_assign(tx);
 }
 
 static void mxs_dma_tasklet(unsigned long data)
@@ -274,7 +262,7 @@ static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
 		stat1 &= ~(1 << channel);
 
 		if (mxs_chan->status == DMA_SUCCESS)
-			mxs_chan->last_completed = mxs_chan->desc.cookie;
+			dma_cookie_complete(&mxs_chan->desc);
 
 		/* schedule tasklet on this channel */
 		tasklet_schedule(&mxs_chan->tasklet);
@@ -352,7 +340,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long append)
+		unsigned long append, void *context)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -447,7 +435,8 @@ err_out:
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -538,7 +527,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
 	dma_cookie_t last_used;
 
 	last_used = chan->cookie;
-	dma_set_tx_state(txstate, mxs_chan->last_completed, last_used, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0);
 
 	return mxs_chan->status;
 }
@@ -630,6 +619,7 @@ static int __init mxs_dma_probe(struct platform_device *pdev)
 
 		mxs_chan->mxs_dma = mxs_dma;
 		mxs_chan->chan.device = &mxs_dma->dma_device;
+		dma_cookie_init(&mxs_chan->chan);
 
 		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
 			     (unsigned long) mxs_chan);
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 823f58179f9d..65c0495a6d40 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/pch_dma.h>
 
+#include "dmaengine.h"
+
 #define DRV_NAME "pch-dma"
 
 #define DMA_CTL0_DISABLE		0x0
@@ -105,7 +107,6 @@ struct pch_dma_chan {
 
 	spinlock_t		lock;
 
-	dma_cookie_t		completed_cookie;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
@@ -416,20 +417,6 @@ static void pdc_advance_work(struct pch_dma_chan *pd_chan)
 	}
 }
 
-static dma_cookie_t pdc_assign_cookie(struct pch_dma_chan *pd_chan,
-				      struct pch_dma_desc *desc)
-{
-	dma_cookie_t cookie = pd_chan->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	pd_chan->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 {
 	struct pch_dma_desc *desc = to_pd_desc(txd);
@@ -437,7 +424,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 	dma_cookie_t cookie;
 
 	spin_lock(&pd_chan->lock);
-	cookie = pdc_assign_cookie(pd_chan, desc);
+	cookie = dma_cookie_assign(txd);
 
 	if (list_empty(&pd_chan->active_list)) {
 		list_add_tail(&desc->desc_node, &pd_chan->active_list);
@@ -544,7 +531,7 @@ static int pd_alloc_chan_resources(struct dma_chan *chan)
 	spin_lock_irq(&pd_chan->lock);
 	list_splice(&tmp_list, &pd_chan->free_list);
 	pd_chan->descs_allocated = i;
-	pd_chan->completed_cookie = chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_irq(&pd_chan->lock);
 
 	pdc_enable_irq(chan, 1);
@@ -578,19 +565,12 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_completed;
-	int ret;
+	enum dma_status ret;
 
 	spin_lock_irq(&pd_chan->lock);
-	last_completed = pd_chan->completed_cookie;
-	last_used = chan->cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irq(&pd_chan->lock);
 
-	ret = dma_async_is_complete(cookie, last_completed, last_used);
-
-	dma_set_tx_state(txstate, last_completed, last_used, 0);
-
 	return ret;
 }
 
@@ -607,7 +587,8 @@ static void pd_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			struct scatterlist *sgl, unsigned int sg_len,
-			enum dma_transfer_direction direction, unsigned long flags)
+			enum dma_transfer_direction direction, unsigned long flags,
+			void *context)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_slave *pd_slave = chan->private;
@@ -932,7 +913,7 @@ static int __devinit pch_dma_probe(struct pci_dev *pdev,
 		struct pch_dma_chan *pd_chan = &pd->channels[i];
 
 		pd_chan->chan.device = &pd->dma;
-		pd_chan->chan.cookie = 1;
+		dma_cookie_init(&pd_chan->chan);
 
 		pd_chan->membase = &regs->desc[i];
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 16b66c827f19..282caf118be8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1,4 +1,6 @@
-/* linux/drivers/dma/pl330.c
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
  *
  * Copyright (C) 2010 Samsung Electronics Co. Ltd.
  *	Jaswinder Singh <jassi.brar@samsung.com>
@@ -9,10 +11,15 @@
  * (at your option) any later version.
  */
 
+#include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 #include <linux/amba/bus.h>
@@ -21,8 +28,497 @@
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 
+#include "dmaengine.h"
+#define PL330_MAX_CHAN		8
+#define PL330_MAX_IRQS		32
+#define PL330_MAX_PERI		32
+
+enum pl330_srccachectrl {
+	SCCTRL0,	/* Noncacheable and nonbufferable */
+	SCCTRL1,	/* Bufferable only */
+	SCCTRL2,	/* Cacheable, but do not allocate */
+	SCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	SINVALID1,
+	SINVALID2,
+	SCCTRL6,	/* Cacheable write-through, allocate on reads only */
+	SCCTRL7,	/* Cacheable write-back, allocate on reads only */
+};
+
+enum pl330_dstcachectrl {
+	DCCTRL0,	/* Noncacheable and nonbufferable */
+	DCCTRL1,	/* Bufferable only */
+	DCCTRL2,	/* Cacheable, but do not allocate */
+	DCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	DINVALID1,	/* AWCACHE = 0x1000 */
+	DINVALID2,
+	DCCTRL6,	/* Cacheable write-through, allocate on writes only */
+	DCCTRL7,	/* Cacheable write-back, allocate on writes only */
+};
+
+enum pl330_byteswap {
+	SWAP_NO,
+	SWAP_2,
+	SWAP_4,
+	SWAP_8,
+	SWAP_16,
+};
+
+enum pl330_reqtype {
+	MEMTOMEM,
+	MEMTODEV,
+	DEVTOMEM,
+	DEVTODEV,
+};
+
+/* Register and Bit field Definitions */
+#define DS			0x0
+#define DS_ST_STOP		0x0
+#define DS_ST_EXEC		0x1
+#define DS_ST_CMISS		0x2
+#define DS_ST_UPDTPC		0x3
+#define DS_ST_WFE		0x4
+#define DS_ST_ATBRR		0x5
+#define DS_ST_QBUSY		0x6
+#define DS_ST_WFP		0x7
+#define DS_ST_KILL		0x8
+#define DS_ST_CMPLT		0x9
+#define DS_ST_FLTCMP		0xe
+#define DS_ST_FAULT		0xf
+
+#define DPC			0x4
+#define INTEN			0x20
+#define ES			0x24
+#define INTSTATUS		0x28
+#define INTCLR			0x2c
+#define FSM			0x30
+#define FSC			0x34
+#define FTM			0x38
+
+#define _FTC			0x40
+#define FTC(n)			(_FTC + (n)*0x4)
+
+#define _CS			0x100
+#define CS(n)			(_CS + (n)*0x8)
+#define CS_CNS			(1 << 21)
+
+#define _CPC			0x104
+#define CPC(n)			(_CPC + (n)*0x8)
+
+#define _SA			0x400
+#define SA(n)			(_SA + (n)*0x20)
+
+#define _DA			0x404
+#define DA(n)			(_DA + (n)*0x20)
+
+#define _CC			0x408
+#define CC(n)			(_CC + (n)*0x20)
+
+#define CC_SRCINC		(1 << 0)
+#define CC_DSTINC		(1 << 14)
+#define CC_SRCPRI		(1 << 8)
+#define CC_DSTPRI		(1 << 22)
+#define CC_SRCNS		(1 << 9)
+#define CC_DSTNS		(1 << 23)
+#define CC_SRCIA		(1 << 10)
+#define CC_DSTIA		(1 << 24)
+#define CC_SRCBRSTLEN_SHFT	4
+#define CC_DSTBRSTLEN_SHFT	18
+#define CC_SRCBRSTSIZE_SHFT	1
+#define CC_DSTBRSTSIZE_SHFT	15
+#define CC_SRCCCTRL_SHFT	11
+#define CC_SRCCCTRL_MASK	0x7
+#define CC_DSTCCTRL_SHFT	25
+#define CC_DRCCCTRL_MASK	0x7
+#define CC_SWAP_SHFT		28
+
+#define _LC0			0x40c
+#define LC0(n)			(_LC0 + (n)*0x20)
+
+#define _LC1			0x410
+#define LC1(n)			(_LC1 + (n)*0x20)
+
+#define DBGSTATUS		0xd00
+#define DBG_BUSY		(1 << 0)
+
+#define DBGCMD			0xd04
+#define DBGINST0		0xd08
+#define DBGINST1		0xd0c
+
+#define CR0			0xe00
+#define CR1			0xe04
+#define CR2			0xe08
+#define CR3			0xe0c
+#define CR4			0xe10
+#define CRD			0xe14
+
+#define PERIPH_ID		0xfe0
+#define PERIPH_REV_SHIFT	20
+#define PERIPH_REV_MASK		0xf
+#define PERIPH_REV_R0P0		0
+#define PERIPH_REV_R1P0		1
+#define PERIPH_REV_R1P1		2
+#define PCELL_ID		0xff0
+
+#define CR0_PERIPH_REQ_SET	(1 << 0)
+#define CR0_BOOT_EN_SET		(1 << 1)
+#define CR0_BOOT_MAN_NS		(1 << 2)
+#define CR0_NUM_CHANS_SHIFT	4
+#define CR0_NUM_CHANS_MASK	0x7
+#define CR0_NUM_PERIPH_SHIFT	12
+#define CR0_NUM_PERIPH_MASK	0x1f
+#define CR0_NUM_EVENTS_SHIFT	17
+#define CR0_NUM_EVENTS_MASK	0x1f
+
+#define CR1_ICACHE_LEN_SHIFT	0
+#define CR1_ICACHE_LEN_MASK	0x7
+#define CR1_NUM_ICACHELINES_SHIFT	4
+#define CR1_NUM_ICACHELINES_MASK	0xf
+
+#define CRD_DATA_WIDTH_SHIFT	0
+#define CRD_DATA_WIDTH_MASK	0x7
+#define CRD_WR_CAP_SHIFT	4
+#define CRD_WR_CAP_MASK		0x7
+#define CRD_WR_Q_DEP_SHIFT	8
+#define CRD_WR_Q_DEP_MASK	0xf
+#define CRD_RD_CAP_SHIFT	12
+#define CRD_RD_CAP_MASK		0x7
+#define CRD_RD_Q_DEP_SHIFT	16
+#define CRD_RD_Q_DEP_MASK	0xf
+#define CRD_DATA_BUFF_SHIFT	20
+#define CRD_DATA_BUFF_MASK	0x3ff
+
+#define PART			0x330
+#define DESIGNER		0x41
+#define REVISION		0x0
+#define INTEG_CFG		0x0
+#define PERIPH_ID_VAL		((PART << 0) | (DESIGNER << 12))
+
+#define PCELL_ID_VAL		0xb105f00d
+
+#define PL330_STATE_STOPPED		(1 << 0)
+#define PL330_STATE_EXECUTING		(1 << 1)
+#define PL330_STATE_WFE			(1 << 2)
+#define PL330_STATE_FAULTING		(1 << 3)
+#define PL330_STATE_COMPLETING		(1 << 4)
+#define PL330_STATE_WFP			(1 << 5)
+#define PL330_STATE_KILLING		(1 << 6)
+#define PL330_STATE_FAULT_COMPLETING	(1 << 7)
+#define PL330_STATE_CACHEMISS		(1 << 8)
+#define PL330_STATE_UPDTPC		(1 << 9)
+#define PL330_STATE_ATBARRIER		(1 << 10)
+#define PL330_STATE_QUEUEBUSY		(1 << 11)
+#define PL330_STATE_INVALID		(1 << 15)
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+				| PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH		0x54
+#define CMD_DMAEND		0x00
+#define CMD_DMAFLUSHP		0x35
+#define CMD_DMAGO		0xa0
+#define CMD_DMALD		0x04
+#define CMD_DMALDP		0x25
+#define CMD_DMALP		0x20
+#define CMD_DMALPEND		0x28
+#define CMD_DMAKILL		0x01
+#define CMD_DMAMOV		0xbc
+#define CMD_DMANOP		0x18
+#define CMD_DMARMB		0x12
+#define CMD_DMASEV		0x34
+#define CMD_DMAST		0x08
+#define CMD_DMASTP		0x29
+#define CMD_DMASTZ		0x0c
+#define CMD_DMAWFE		0x36
+#define CMD_DMAWFP		0x30
+#define CMD_DMAWMB		0x13
+
+#define SZ_DMAADDH		3
+#define SZ_DMAEND		1
+#define SZ_DMAFLUSHP		2
+#define SZ_DMALD		1
+#define SZ_DMALDP		2
+#define SZ_DMALP		2
+#define SZ_DMALPEND		2
+#define SZ_DMAKILL		1
+#define SZ_DMAMOV		6
+#define SZ_DMANOP		1
+#define SZ_DMARMB		1
+#define SZ_DMASEV		2
+#define SZ_DMAST		1
+#define SZ_DMASTP		2
+#define SZ_DMASTZ		1
+#define SZ_DMAWFE		2
+#define SZ_DMAWFP		2
+#define SZ_DMAWMB		1
+#define SZ_DMAGO		6
+
+#define BRST_LEN(ccr)		((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr)		(1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr)	((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
+#define BURST_TO_BYTE(c, ccr)	((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))
+
+/*
+ * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ	256
+
+/* If the _pl330_req is available to the client */
+#define IS_FREE(req)	(*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
+
+#ifdef PL330_DEBUG_MCGEN
+static unsigned cmd_line;
+#define PL330_DBGCMD_DUMP(off, x...)	do { \
+						printk("%x:", cmd_line); \
+						printk(x); \
+						cmd_line += off; \
+					} while (0)
+#define PL330_DBGMC_START(addr)		(cmd_line = addr)
+#else
+#define PL330_DBGCMD_DUMP(off, x...)	do {} while (0)
+#define PL330_DBGMC_START(addr)		do {} while (0)
+#endif
+
+/* The number of default descriptors */
+
 #define NR_DEFAULT_DESC	16
 
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+	u32	periph_id;
+	u32	pcell_id;
+#define DMAC_MODE_NS	(1 << 0)
+	unsigned int	mode;
+	unsigned int	data_bus_width:10; /* In number of bits */
+	unsigned int	data_buf_dep:10;
+	unsigned int	num_chan:4;
+	unsigned int	num_peri:6;
+	u32		peri_ns;
+	unsigned int	num_events:6;
+	u32		irq_ns;
+};
+
+/* Handle to the DMAC provided to the PL330 core */
+struct pl330_info {
+	/* Owning device */
+	struct device *dev;
+	/* Size of MicroCode buffers for each channel. */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers. */
+	void __iomem	*base;
+	/* Client can freely use it. */
+	void	*client_data;
+	/* PL330 core data, Client must not touch it. */
+	void	*pl330_data;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+	/*
+	 * If the DMAC has some reset mechanism, then the
+	 * client may want to provide pointer to the method.
+	 */
+	void (*dmac_reset)(struct pl330_info *pi);
+};
+
+/**
+ * Request Configuration.
+ * The PL330 core does not modify this and uses the last
+ * working configuration if the request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+	/* Address Incrementing */
+	unsigned dst_inc:1;
+	unsigned src_inc:1;
+
+	/*
+	 * For now, the SRC & DST protection levels
+	 * and burst size/length are assumed same.
+	 */
+	bool nonsecure;
+	bool privileged;
+	bool insnaccess;
+	unsigned brst_len:5;
+	unsigned brst_size:3; /* in power of 2 */
+
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+	enum pl330_byteswap swap;
+	struct pl330_config *pcfg;
+};
+
+/*
+ * One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+	u32 src_addr;
+	u32 dst_addr;
+	/* Size to xfer */
+	u32 bytes;
+	/*
+	 * Pointer to next xfer in the list.
+	 * The last xfer in the req must point to NULL.
+	 */
+	struct pl330_xfer *next;
+};
+
+/* The xfer callbacks are made with one of these arguments. */
+enum pl330_op_err {
+	/* The all xfers in the request were success. */
+	PL330_ERR_NONE,
+	/* If req aborted due to global error. */
+	PL330_ERR_ABORT,
+	/* If req failed due to problem with Channel. */
+	PL330_ERR_FAIL,
+};
+
+/* A request defining Scatter-Gather List ending with NULL xfer. */
+struct pl330_req {
+	enum pl330_reqtype rqtype;
+	/* Index of peripheral for the xfer. */
+	unsigned peri:5;
+	/* Unique token for this xfer, set by the client. */
+	void *token;
+	/* Callback to be called after xfer. */
+	void (*xfer_cb)(void *token, enum pl330_op_err err);
+	/* If NULL, req will be done at last set parameters. */
+	struct pl330_reqcfg *cfg;
+	/* Pointer to first xfer in the request. */
+	struct pl330_xfer *x;
+};
+
+/*
+ * To know the status of the channel and DMAC, the client
+ * provides a pointer to this structure. The PL330 core
+ * fills it with current information.
+ */
+struct pl330_chanstatus {
+	/*
+	 * If the DMAC engine halted due to some error,
+	 * the client should remove-add DMAC.
+	 */
+	bool dmac_halted;
+	/*
+	 * If channel is halted due to some error,
+	 * the client should ABORT/FLUSH and START the channel.
+	 */
+	bool faulting;
+	/* Location of last load */
+	u32 src_addr;
+	/* Location of last store */
+	u32 dst_addr;
+	/*
+	 * Pointer to the currently active req, NULL if channel is
+	 * inactive, even though the requests may be present.
+	 */
+	struct pl330_req *top_req;
+	/* Pointer to req waiting second in the queue if any. */
+	struct pl330_req *wait_req;
+};
+
+enum pl330_chan_op {
+	/* Start the channel */
+	PL330_OP_START,
+	/* Abort the active xfer */
+	PL330_OP_ABORT,
+	/* Stop xfer and flush queue */
+	PL330_OP_FLUSH,
+};
+
+struct _xfer_spec {
+	u32 ccr;
+	struct pl330_req *r;
+	struct pl330_xfer *x;
+};
+
+enum dmamov_dst {
+	SAR = 0,
+	CCR,
+	DAR,
+};
+
+enum pl330_dst {
+	SRC = 0,
+	DST,
+};
+
+enum pl330_cond {
+	SINGLE,
+	BURST,
+	ALWAYS,
+};
+
+struct _pl330_req {
+	u32 mc_bus;
+	void *mc_cpu;
+	/* Number of bytes taken to setup MC for the req */
+	u32 mc_len;
+	struct pl330_req *r;
+	/* Hook to attach to DMAC's list of reqs with due callback */
+	struct list_head rqd;
+};
+
+/* ToBeDone for tasklet */
+struct _pl330_tbd {
+	bool reset_dmac;
+	bool reset_mngr;
+	u8 reset_chan;
+};
+
+/* A DMAC Thread */
+struct pl330_thread {
+	u8 id;
+	int ev;
+	/* If the channel is not yet acquired by any client */
+	bool free;
+	/* Parent DMAC */
+	struct pl330_dmac *dmac;
+	/* Only two at a time */
+	struct _pl330_req req[2];
+	/* Index of the last enqueued request */
+	unsigned lstenq;
+	/* Index of the last submitted request or -1 if the DMA is stopped */
+	int req_running;
+};
+
+enum pl330_dmac_state {
+	UNINIT,
+	INIT,
+	DYING,
+};
+
+/* A DMAC */
+struct pl330_dmac {
+	spinlock_t		lock;
+	/* Holds list of reqs with due callbacks */
+	struct list_head	req_done;
+	/* Pointer to platform specific stuff */
+	struct pl330_info	*pinfo;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of MicroCode buffer */
+	u32			mcode_bus;
+	/* CPU address of MicroCode buffer */
+	void			*mcode_cpu;
+	/* List of all Channel threads */
+	struct pl330_thread	*channels;
+	/* Pointer to the MANAGER thread */
+	struct pl330_thread	*manager;
+	/* To handle bad news in interrupt */
+	struct tasklet_struct	tasks;
+	struct _pl330_tbd	dmac_tbd;
+	/* State of DMAC operation */
+	enum pl330_dmac_state	state;
+};
+
 enum desc_status {
 	/* In the DMAC pool */
 	FREE,
@@ -51,9 +547,6 @@ struct dma_pl330_chan {
 	/* DMA-Engine Channel */
 	struct dma_chan chan;
 
-	/* Last completed cookie */
-	dma_cookie_t completed;
-
 	/* List of to be xfered descriptors */
 	struct list_head work_list;
 
@@ -117,6 +610,1599 @@ struct dma_pl330_desc {
 	struct dma_pl330_chan *pchan;
 };
 
+static inline void _callback(struct pl330_req *r, enum pl330_op_err err)
+{
+	if (r && r->xfer_cb)
+		r->xfer_cb(r->token, err);
+}
+
+static inline bool _queue_empty(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1]))
+		? true : false;
+}
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1]))
+		? false : true;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	/* MANAGER is indexed at the end */
+	if (thrd->id == pl330->pinfo->pcfg.num_chan)
+		return true;
+	else
+		return false;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_id(struct pl330_info *pi, u32 off)
+{
+	void __iomem *regs = pi->base;
+	u32 id = 0;
+
+	id |= (readb(regs + off + 0x0) << 0);
+	id |= (readb(regs + off + 0x4) << 8);
+	id |= (readb(regs + off + 0x8) << 16);
+	id |= (readb(regs + off + 0xc) << 24);
+
+	return id;
+}
+
+static inline u32 get_revision(u32 periph_id)
+{
+	return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK;
+}
+
+static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
+		enum pl330_dst da, u16 val)
+{
+	if (dry_run)
+		return SZ_DMAADDH;
+
+	buf[0] = CMD_DMAADDH;
+	buf[0] |= (da << 1);
+	*((u16 *)&buf[1]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n",
+		da == 1 ? "DA" : "SA", val);
+
+	return SZ_DMAADDH;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAEND;
+
+	buf[0] = CMD_DMAEND;
+
+	PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n");
+
+	return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAFLUSHP;
+
+	buf[0] = CMD_DMAFLUSHP;
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3);
+
+	return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[],	enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMALD;
+
+	buf[0] = CMD_DMALD;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMALDP;
+
+	buf[0] = CMD_DMALDP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+		unsigned loop, u8 cnt)
+{
+	if (dry_run)
+		return SZ_DMALP;
+
+	buf[0] = CMD_DMALP;
+
+	if (loop)
+		buf[0] |= (1 << 1);
+
+	cnt--; /* DMAC increments by 1 internally */
+	buf[1] = cnt;
+
+	PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt);
+
+	return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+	enum pl330_cond cond;
+	bool forever;
+	unsigned loop;
+	u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+		const struct _arg_LPEND *arg)
+{
+	enum pl330_cond cond = arg->cond;
+	bool forever = arg->forever;
+	unsigned loop = arg->loop;
+	u8 bjump = arg->bjump;
+
+	if (dry_run)
+		return SZ_DMALPEND;
+
+	buf[0] = CMD_DMALPEND;
+
+	if (loop)
+		buf[0] |= (1 << 2);
+
+	if (!forever)
+		buf[0] |= (1 << 4);
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	buf[1] = bjump;
+
+	PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n",
+			forever ? "FE" : "END",
+			cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'),
+			loop ? '1' : '0',
+			bjump);
+
+	return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAKILL;
+
+	buf[0] = CMD_DMAKILL;
+
+	return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+		enum dmamov_dst dst, u32 val)
+{
+	if (dry_run)
+		return SZ_DMAMOV;
+
+	buf[0] = CMD_DMAMOV;
+	buf[1] = dst;
+	*((u32 *)&buf[2]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
+		dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
+
+	return SZ_DMAMOV;
+}
+
+static inline u32 _emit_NOP(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMANOP;
+
+	buf[0] = CMD_DMANOP;
+
+	PL330_DBGCMD_DUMP(SZ_DMANOP, "\tDMANOP\n");
+
+	return SZ_DMANOP;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMARMB;
+
+	buf[0] = CMD_DMARMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n");
+
+	return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+	if (dry_run)
+		return SZ_DMASEV;
+
+	buf[0] = CMD_DMASEV;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3);
+
+	return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMAST;
+
+	buf[0] = CMD_DMAST;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMASTP;
+
+	buf[0] = CMD_DMASTP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMASTP;
+}
+
+static inline u32 _emit_STZ(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMASTZ;
+
+	buf[0] = CMD_DMASTZ;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTZ, "\tDMASTZ\n");
+
+	return SZ_DMASTZ;
+}
+
+static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev,
+		unsigned invalidate)
+{
+	if (dry_run)
+		return SZ_DMAWFE;
+
+	buf[0] = CMD_DMAWFE;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	if (invalidate)
+		buf[1] |= (1 << 1);
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFE, "\tDMAWFE %u%s\n",
+		ev >> 3, invalidate ? ", I" : "");
+
+	return SZ_DMAWFE;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAWFP;
+
+	buf[0] = CMD_DMAWFP;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (0 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (0 << 0);
+	else
+		buf[0] |= (0 << 1) | (1 << 0);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3);
+
+	return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAWMB;
+
+	buf[0] = CMD_DMAWMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n");
+
+	return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+	u8 chan;
+	u32 addr;
+	unsigned ns;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+		const struct _arg_GO *arg)
+{
+	u8 chan = arg->chan;
+	u32 addr = arg->addr;
+	unsigned ns = arg->ns;
+
+	if (dry_run)
+		return SZ_DMAGO;
+
+	buf[0] = CMD_DMAGO;
+	buf[0] |= (ns << 1);
+
+	buf[1] = chan & 0x7;
+
+	*((u32 *)&buf[2]) = addr;
+
+	return SZ_DMAGO;
+}
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	unsigned long loops = msecs_to_loops(5);
+
+	do {
+		/* Until Manager is Idle */
+		if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+			break;
+
+		cpu_relax();
+	} while (--loops);
+
+	if (!loops)
+		return true;
+
+	return false;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+		u8 insn[], bool as_manager)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	val = (insn[0] << 16) | (insn[1] << 24);
+	if (!as_manager) {
+		val |= (1 << 0);
+		val |= (thrd->id << 8); /* Channel Number */
+	}
+	writel(val, regs + DBGINST0);
+
+	val = *((u32 *)&insn[2]);
+	writel(val, regs + DBGINST1);
+
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd)) {
+		dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n");
+		return;
+	}
+
+	/* Get going */
+	writel(0, regs + DBGCMD);
+}
+
+/*
+ * Mark a _pl330_req as free.
+ * We do it by writing DMAEND as the first instruction
+ * because no valid request is going to have DMAEND as
+ * its first instruction to execute.
+ */
+static void mark_free(struct pl330_thread *thrd, int idx)
+{
+	struct _pl330_req *req = &thrd->req[idx];
+
+	_emit_END(0, req->mc_cpu);
+	req->mc_len = 0;
+
+	thrd->req_running = -1;
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	if (is_manager(thrd))
+		val = readl(regs + DS) & 0xf;
+	else
+		val = readl(regs + CS(thrd->id)) & 0xf;
+
+	switch (val) {
+	case DS_ST_STOP:
+		return PL330_STATE_STOPPED;
+	case DS_ST_EXEC:
+		return PL330_STATE_EXECUTING;
+	case DS_ST_CMISS:
+		return PL330_STATE_CACHEMISS;
+	case DS_ST_UPDTPC:
+		return PL330_STATE_UPDTPC;
+	case DS_ST_WFE:
+		return PL330_STATE_WFE;
+	case DS_ST_FAULT:
+		return PL330_STATE_FAULTING;
+	case DS_ST_ATBRR:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_ATBARRIER;
+	case DS_ST_QBUSY:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_QUEUEBUSY;
+	case DS_ST_WFP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_WFP;
+	case DS_ST_KILL:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_KILLING;
+	case DS_ST_CMPLT:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_COMPLETING;
+	case DS_ST_FLTCMP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_FAULT_COMPLETING;
+	default:
+		return PL330_STATE_INVALID;
+	}
+}
+
+static void _stop(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+	/* Return if nothing needs to be done */
+	if (_state(thrd) == PL330_STATE_COMPLETING
+		  || _state(thrd) == PL330_STATE_KILLING
+		  || _state(thrd) == PL330_STATE_STOPPED)
+		return;
+
+	_emit_KILL(0, insn);
+
+	/* Stop generating interrupts for SEV */
+	writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN);
+
+	_execute_DBGINSN(thrd, insn, is_manager(thrd));
+}
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	struct _pl330_req *req;
+	struct pl330_req *r;
+	struct _arg_GO go;
+	unsigned ns;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+	int idx;
+
+	/* Return if already ACTIVE */
+	if (_state(thrd) != PL330_STATE_STOPPED)
+		return true;
+
+	idx = 1 - thrd->lstenq;
+	if (!IS_FREE(&thrd->req[idx]))
+		req = &thrd->req[idx];
+	else {
+		idx = thrd->lstenq;
+		if (!IS_FREE(&thrd->req[idx]))
+			req = &thrd->req[idx];
+		else
+			req = NULL;
+	}
+
+	/* Return if no request */
+	if (!req || !req->r)
+		return true;
+
+	r = req->r;
+
+	if (r->cfg)
+		ns = r->cfg->nonsecure ? 1 : 0;
+	else if (readl(regs + CS(thrd->id)) & CS_CNS)
+		ns = 1;
+	else
+		ns = 0;
+
+	/* See 'Abort Sources' point-4 at Page 2-25 */
+	if (_manager_ns(thrd) && !ns)
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n",
+			__func__, __LINE__);
+
+	go.chan = thrd->id;
+	go.addr = req->mc_bus;
+	go.ns = ns;
+	_emit_GO(0, insn, &go);
+
+	/* Set to generate interrupts for SEV */
+	writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+	/* Only manager can execute GO */
+	_execute_DBGINSN(thrd, insn, true);
+
+	thrd->req_running = idx;
+
+	return true;
+}
+
+static bool _start(struct pl330_thread *thrd)
+{
+	switch (_state(thrd)) {
+	case PL330_STATE_FAULT_COMPLETING:
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+		if (_state(thrd) == PL330_STATE_KILLING)
+			UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_FAULTING:
+		_stop(thrd);
+
+	case PL330_STATE_KILLING:
+	case PL330_STATE_COMPLETING:
+		UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_STOPPED:
+		return _trigger(thrd);
+
+	case PL330_STATE_WFP:
+	case PL330_STATE_QUEUEBUSY:
+	case PL330_STATE_ATBARRIER:
+	case PL330_STATE_UPDTPC:
+	case PL330_STATE_CACHEMISS:
+	case PL330_STATE_EXECUTING:
+		return true;
+
+	case PL330_STATE_WFE: /* For RESUME, nothing yet */
+	default:
+		return false;
+	}
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+	struct pl330_config *pcfg = pxs->r->cfg->pcfg;
+
+	/* check lock-up free version */
+	if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		}
+	} else {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_RMB(dry_run, &buf[off]);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+			off += _emit_WMB(dry_run, &buf[off]);
+		}
+	}
+
+	return off;
+}
+
+static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LD(dry_run, &buf[off], ALWAYS);
+		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static int _bursts(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	switch (pxs->r->rqtype) {
+	case MEMTODEV:
+		off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
+		break;
+	case DEVTOMEM:
+		off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	case MEMTOMEM:
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	default:
+		off += 0x40000000; /* Scare off the Client */
+		break;
+	}
+
+	return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(unsigned dry_run, u8 buf[],
+		unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+	int cyc, cycmax, szlp, szlpend, szbrst, off;
+	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+	struct _arg_LPEND lpend;
+
+	/* Max iterations possible in DMALP is 256 */
+	if (*bursts >= 256*256) {
+		lcnt1 = 256;
+		lcnt0 = 256;
+		cyc = *bursts / lcnt1 / lcnt0;
+	} else if (*bursts > 256) {
+		lcnt1 = 256;
+		lcnt0 = *bursts / lcnt1;
+		cyc = 1;
+	} else {
+		lcnt1 = *bursts;
+		lcnt0 = 0;
+		cyc = 1;
+	}
+
+	szlp = _emit_LP(1, buf, 0, 0);
+	szbrst = _bursts(1, buf, pxs, 1);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 0;
+	lpend.bjump = 0;
+	szlpend = _emit_LPEND(1, buf, &lpend);
+
+	if (lcnt0) {
+		szlp *= 2;
+		szlpend *= 2;
+	}
+
+	/*
+	 * Max bursts that we can unroll due to limit on the
+	 * size of backward jump that can be encoded in DMALPEND
+	 * which is 8-bits and hence 255
+	 */
+	cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+	cyc = (cycmax < cyc) ? cycmax : cyc;
+
+	off = 0;
+
+	if (lcnt0) {
+		off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+		ljmp0 = off;
+	}
+
+	off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+	ljmp1 = off;
+
+	off += _bursts(dry_run, &buf[off], pxs, cyc);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 1;
+	lpend.bjump = off - ljmp1;
+	off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+	if (lcnt0) {
+		lpend.cond = ALWAYS;
+		lpend.forever = false;
+		lpend.loop = 0;
+		lpend.bjump = off - ljmp0;
+		off += _emit_LPEND(dry_run, &buf[off], &lpend);
+	}
+
+	*bursts = lcnt1 * cyc;
+	if (lcnt0)
+		*bursts *= lcnt0;
+
+	return off;
+}
+
+static inline int _setup_loops(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	u32 ccr = pxs->ccr;
+	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+	int off = 0;
+
+	while (bursts) {
+		c = bursts;
+		off += _loop(dry_run, &buf[off], &c, pxs);
+		bursts -= c;
+	}
+
+	return off;
+}
+
+static inline int _setup_xfer(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	int off = 0;
+
+	/* DMAMOV SAR, x->src_addr */
+	off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+	/* DMAMOV DAR, x->dst_addr */
+	off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+	/* Setup Loop(s) */
+	off += _setup_loops(dry_run, &buf[off], pxs);
+
+	return off;
+}
+
+/*
+ * A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC for the req.
+ */
+static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
+		unsigned index, struct _xfer_spec *pxs)
+{
+	struct _pl330_req *req = &thrd->req[index];
+	struct pl330_xfer *x;
+	u8 *buf = req->mc_cpu;
+	int off = 0;
+
+	PL330_DBGMC_START(req->mc_bus);
+
+	/* DMAMOV CCR, ccr */
+	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+	x = pxs->r->x;
+	do {
+		/* Error if xfer length is not aligned at burst size */
+		if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr)))
+			return -EINVAL;
+
+		pxs->x = x;
+		off += _setup_xfer(dry_run, &buf[off], pxs);
+
+		x = x->next;
+	} while (x);
+
+	/* DMASEV peripheral/event */
+	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+	/* DMAEND */
+	off += _emit_END(dry_run, &buf[off]);
+
+	return off;
+}
+
+static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
+{
+	u32 ccr = 0;
+
+	if (rqc->src_inc)
+		ccr |= CC_SRCINC;
+
+	if (rqc->dst_inc)
+		ccr |= CC_DSTINC;
+
+	/* We set same protection levels for Src and DST for now */
+	if (rqc->privileged)
+		ccr |= CC_SRCPRI | CC_DSTPRI;
+	if (rqc->nonsecure)
+		ccr |= CC_SRCNS | CC_DSTNS;
+	if (rqc->insnaccess)
+		ccr |= CC_SRCIA | CC_DSTIA;
+
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+	ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+	ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+	ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
+	ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
+
+	ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+	return ccr;
+}
+
+static inline bool _is_valid(u32 ccr)
+{
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+
+	dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
+	scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
+
+	if (dcctl == DINVALID1 || dcctl == DINVALID2
+			|| scctl == SINVALID1 || scctl == SINVALID2)
+		return false;
+	else
+		return true;
+}
+
+/*
+ * Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ */
+static int pl330_submit_req(void *ch_id, struct pl330_req *r)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	struct pl330_info *pi;
+	struct _xfer_spec xs;
+	unsigned long flags;
+	void __iomem *regs;
+	unsigned idx;
+	u32 ccr;
+	int ret = 0;
+
+	/* No Req or Unacquired Channel or DMAC */
+	if (!r || !thrd || thrd->free)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	pi = pl330->pinfo;
+	regs = pi->base;
+
+	if (pl330->state == DYING
+		|| pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d\n",
+			__func__, __LINE__);
+		return -EAGAIN;
+	}
+
+	/* If request for non-existing peripheral */
+	if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) {
+		dev_info(thrd->dmac->pinfo->dev,
+				"%s:%d Invalid peripheral(%u)!\n",
+				__func__, __LINE__, r->peri);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	if (_queue_full(thrd)) {
+		ret = -EAGAIN;
+		goto xfer_exit;
+	}
+
+	/* Prefer Secure Channel */
+	if (!_manager_ns(thrd))
+		r->cfg->nonsecure = 0;
+	else
+		r->cfg->nonsecure = 1;
+
+	/* Use last settings, if not provided */
+	if (r->cfg)
+		ccr = _prepare_ccr(r->cfg);
+	else
+		ccr = readl(regs + CC(thrd->id));
+
+	/* If this req doesn't have valid xfer settings */
+	if (!_is_valid(ccr)) {
+		ret = -EINVAL;
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n",
+			__func__, __LINE__, ccr);
+		goto xfer_exit;
+	}
+
+	idx = IS_FREE(&thrd->req[0]) ? 0 : 1;
+
+	xs.ccr = ccr;
+	xs.r = r;
+
+	/* First dry run to check if req is acceptable */
+	ret = _setup_req(1, thrd, idx, &xs);
+	if (ret < 0)
+		goto xfer_exit;
+
+	if (ret > pi->mcbufsz / 2) {
+		dev_info(thrd->dmac->pinfo->dev,
+			"%s:%d Trying increasing mcbufsz\n",
+				__func__, __LINE__);
+		ret = -ENOMEM;
+		goto xfer_exit;
+	}
+
+	/* Hook the request */
+	thrd->lstenq = idx;
+	thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs);
+	thrd->req[idx].r = r;
+
+	ret = 0;
+
+xfer_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return ret;
+}
+
+static void pl330_dotask(unsigned long data)
+{
+	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
+	struct pl330_info *pi = pl330->pinfo;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	/* The DMAC itself gone nuts */
+	if (pl330->dmac_tbd.reset_dmac) {
+		pl330->state = DYING;
+		/* Reset the manager too */
+		pl330->dmac_tbd.reset_mngr = true;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_dmac = false;
+	}
+
+	if (pl330->dmac_tbd.reset_mngr) {
+		_stop(pl330->manager);
+		/* Reset all channels */
+		pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_mngr = false;
+	}
+
+	for (i = 0; i < pi->pcfg.num_chan; i++) {
+
+		if (pl330->dmac_tbd.reset_chan & (1 << i)) {
+			struct pl330_thread *thrd = &pl330->channels[i];
+			void __iomem *regs = pi->base;
+			enum pl330_op_err err;
+
+			_stop(thrd);
+
+			if (readl(regs + FSC) & (1 << thrd->id))
+				err = PL330_ERR_FAIL;
+			else
+				err = PL330_ERR_ABORT;
+
+			spin_unlock_irqrestore(&pl330->lock, flags);
+
+			_callback(thrd->req[1 - thrd->lstenq].r, err);
+			_callback(thrd->req[thrd->lstenq].r, err);
+
+			spin_lock_irqsave(&pl330->lock, flags);
+
+			thrd->req[0].r = NULL;
+			thrd->req[1].r = NULL;
+			mark_free(thrd, 0);
+			mark_free(thrd, 1);
+
+			/* Clear the reset flag */
+			pl330->dmac_tbd.reset_chan &= ~(1 << i);
+		}
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+static int pl330_update(const struct pl330_info *pi)
+{
+	struct _pl330_req *rqdone;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	void __iomem *regs;
+	u32 val;
+	int id, ev, ret = 0;
+
+	if (!pi || !pi->pl330_data)
+		return 0;
+
+	regs = pi->base;
+	pl330 = pi->pl330_data;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	val = readl(regs + FSM) & 0x1;
+	if (val)
+		pl330->dmac_tbd.reset_mngr = true;
+	else
+		pl330->dmac_tbd.reset_mngr = false;
+
+	val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
+	pl330->dmac_tbd.reset_chan |= val;
+	if (val) {
+		int i = 0;
+		while (i < pi->pcfg.num_chan) {
+			if (val & (1 << i)) {
+				dev_info(pi->dev,
+					"Reset Channel-%d\t CS-%x FTC-%x\n",
+						i, readl(regs + CS(i)),
+						readl(regs + FTC(i)));
+				_stop(&pl330->channels[i]);
+			}
+			i++;
+		}
+	}
+
+	/* Check which event happened i.e, thread notified */
+	val = readl(regs + ES);
+	if (pi->pcfg.num_events < 32
+			&& val & ~((1 << pi->pcfg.num_events) - 1)) {
+		pl330->dmac_tbd.reset_dmac = true;
+		dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__);
+		ret = 1;
+		goto updt_exit;
+	}
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+		if (val & (1 << ev)) { /* Event occurred */
+			struct pl330_thread *thrd;
+			u32 inten = readl(regs + INTEN);
+			int active;
+
+			/* Clear the event */
+			if (inten & (1 << ev))
+				writel(1 << ev, regs + INTCLR);
+
+			ret = 1;
+
+			id = pl330->events[ev];
+
+			thrd = &pl330->channels[id];
+
+			active = thrd->req_running;
+			if (active == -1) /* Aborted */
+				continue;
+
+			rqdone = &thrd->req[active];
+			mark_free(thrd, active);
+
+			/* Get going again ASAP */
+			_start(thrd);
+
+			/* For now, just make a list of callbacks to be done */
+			list_add_tail(&rqdone->rqd, &pl330->req_done);
+		}
+	}
+
+	/* Now that we are in no hurry, do the callbacks */
+	while (!list_empty(&pl330->req_done)) {
+		struct pl330_req *r;
+
+		rqdone = container_of(pl330->req_done.next,
+					struct _pl330_req, rqd);
+
+		list_del_init(&rqdone->rqd);
+
+		/* Detach the req */
+		r = rqdone->r;
+		rqdone->r = NULL;
+
+		spin_unlock_irqrestore(&pl330->lock, flags);
+		_callback(r, PL330_ERR_NONE);
+		spin_lock_irqsave(&pl330->lock, flags);
+	}
+
+updt_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	if (pl330->dmac_tbd.reset_dmac
+			|| pl330->dmac_tbd.reset_mngr
+			|| pl330->dmac_tbd.reset_chan) {
+		ret = 1;
+		tasklet_schedule(&pl330->tasks);
+	}
+
+	return ret;
+}
+
+static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int ret = 0, active;
+
+	if (!thrd || thrd->free || thrd->dmac->state == DYING)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	active = thrd->req_running;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	switch (op) {
+	case PL330_OP_FLUSH:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		thrd->req[0].r = NULL;
+		thrd->req[1].r = NULL;
+		mark_free(thrd, 0);
+		mark_free(thrd, 1);
+		break;
+
+	case PL330_OP_ABORT:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		/* ABORT is only for the active req */
+		if (active == -1)
+			break;
+
+		thrd->req[active].r = NULL;
+		mark_free(thrd, active);
+
+		/* Start the next */
+	case PL330_OP_START:
+		if ((active == -1) && !_start(thrd))
+			ret = -EIO;
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+	return ret;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+	int ev;
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++)
+		if (pl330->events[ev] == -1) {
+			pl330->events[ev] = thrd->id;
+			return ev;
+		}
+
+	return -1;
+}
+
+static bool _chan_ns(const struct pl330_info *pi, int i)
+{
+	return pi->pcfg.irq_ns & (1 << i);
+}
+
+/* Upon success, returns IdentityToken for the
+ * allocated channel, NULL otherwise.
+ */
+static void *pl330_request_channel(const struct pl330_info *pi)
+{
+	struct pl330_thread *thrd = NULL;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int chans, i;
+
+	if (!pi || !pi->pl330_data)
+		return NULL;
+
+	pl330 = pi->pl330_data;
+
+	if (pl330->state == DYING)
+		return NULL;
+
+	chans = pi->pcfg.num_chan;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		if ((thrd->free) && (!_manager_ns(thrd) ||
+					_chan_ns(pi, i))) {
+			thrd->ev = _alloc_event(thrd);
+			if (thrd->ev >= 0) {
+				thrd->free = false;
+				thrd->lstenq = 1;
+				thrd->req[0].r = NULL;
+				mark_free(thrd, 0);
+				thrd->req[1].r = NULL;
+				mark_free(thrd, 1);
+				break;
+			}
+		}
+		thrd = NULL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return thrd;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	/* If the event is valid and was held by the thread */
+	if (ev >= 0 && ev < pi->pcfg.num_events
+			&& pl330->events[ev] == thrd->id)
+		pl330->events[ev] = -1;
+}
+
+static void pl330_release_channel(void *ch_id)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+
+	if (!thrd || thrd->free)
+		return;
+
+	_stop(thrd);
+
+	_callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT);
+	_callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT);
+
+	pl330 = thrd->dmac;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+	_free_event(thrd, thrd->ev);
+	thrd->free = true;
+	spin_unlock_irqrestore(&pl330->lock, flags);
+}
+
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_info *pi)
+{
+	void __iomem *regs = pi->base;
+	u32 val;
+
+	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+	val &= CRD_DATA_WIDTH_MASK;
+	pi->pcfg.data_bus_width = 8 * (1 << val);
+
+	val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
+	val &= CRD_DATA_BUFF_MASK;
+	pi->pcfg.data_buf_dep = val + 1;
+
+	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+	val &= CR0_NUM_CHANS_MASK;
+	val += 1;
+	pi->pcfg.num_chan = val;
+
+	val = readl(regs + CR0);
+	if (val & CR0_PERIPH_REQ_SET) {
+		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+		val += 1;
+		pi->pcfg.num_peri = val;
+		pi->pcfg.peri_ns = readl(regs + CR4);
+	} else {
+		pi->pcfg.num_peri = 0;
+	}
+
+	val = readl(regs + CR0);
+	if (val & CR0_BOOT_MAN_NS)
+		pi->pcfg.mode |= DMAC_MODE_NS;
+	else
+		pi->pcfg.mode &= ~DMAC_MODE_NS;
+
+	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+	val &= CR0_NUM_EVENTS_MASK;
+	val += 1;
+	pi->pcfg.num_events = val;
+
+	pi->pcfg.irq_ns = readl(regs + CR3);
+
+	pi->pcfg.periph_id = get_id(pi, PERIPH_ID);
+	pi->pcfg.pcell_id = get_id(pi, PCELL_ID);
+}
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	thrd->req[0].mc_cpu = pl330->mcode_cpu
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].mc_bus = pl330->mcode_bus
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].r = NULL;
+	mark_free(thrd, 0);
+
+	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+				+ pi->mcbufsz / 2;
+	thrd->req[1].mc_bus = thrd->req[0].mc_bus
+				+ pi->mcbufsz / 2;
+	thrd->req[1].r = NULL;
+	mark_free(thrd, 1);
+}
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Allocate 1 Manager and 'chans' Channel threads */
+	pl330->channels = kzalloc((1 + chans) * sizeof(*thrd),
+					GFP_KERNEL);
+	if (!pl330->channels)
+		return -ENOMEM;
+
+	/* Init Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		thrd->id = i;
+		thrd->dmac = pl330;
+		_reset_thread(thrd);
+		thrd->free = true;
+	}
+
+	/* MANAGER is indexed at the end */
+	thrd = &pl330->channels[chans];
+	thrd->id = chans;
+	thrd->dmac = pl330;
+	thrd->free = false;
+	pl330->manager = thrd;
+
+	return 0;
+}
+
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	int ret;
+
+	/*
+	 * Alloc MicroCode buffer for 'chans' Channel threads.
+	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+	 */
+	pl330->mcode_cpu = dma_alloc_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				&pl330->mcode_bus, GFP_KERNEL);
+	if (!pl330->mcode_cpu) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	ret = dmac_alloc_threads(pl330);
+	if (ret) {
+		dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n",
+			__func__, __LINE__);
+		dma_free_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pl330_add(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+	void __iomem *regs;
+	int i, ret;
+
+	if (!pi || !pi->dev)
+		return -EINVAL;
+
+	/* If already added */
+	if (pi->pl330_data)
+		return -EINVAL;
+
+	/*
+	 * If the SoC can perform reset on the DMAC, then do it
+	 * before reading its configuration.
+	 */
+	if (pi->dmac_reset)
+		pi->dmac_reset(pi);
+
+	regs = pi->base;
+
+	/* Check if we can handle this DMAC */
+	if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL
+	   || get_id(pi, PCELL_ID) != PCELL_ID_VAL) {
+		dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
+			get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID));
+		return -EINVAL;
+	}
+
+	/* Read the configuration of the DMAC */
+	read_dmac_config(pi);
+
+	if (pi->pcfg.num_events == 0) {
+		dev_err(pi->dev, "%s:%d Can't work without events!\n",
+			__func__, __LINE__);
+		return -EINVAL;
+	}
+
+	pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
+	if (!pl330) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	/* Assign the info structure and private data */
+	pl330->pinfo = pi;
+	pi->pl330_data = pl330;
+
+	spin_lock_init(&pl330->lock);
+
+	INIT_LIST_HEAD(&pl330->req_done);
+
+	/* Use default MC buffer size if not provided */
+	if (!pi->mcbufsz)
+		pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+	/* Mark all events as free */
+	for (i = 0; i < pi->pcfg.num_events; i++)
+		pl330->events[i] = -1;
+
+	/* Allocate resources needed by the DMAC */
+	ret = dmac_alloc_resources(pl330);
+	if (ret) {
+		dev_err(pi->dev, "Unable to create channels for DMAC\n");
+		kfree(pl330);
+		return ret;
+	}
+
+	tasklet_init(&pl330->tasks, pl330_dotask, (unsigned long) pl330);
+
+	pl330->state = INIT;
+
+	return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Release Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		pl330_release_channel((void *)thrd);
+	}
+
+	/* Free memory */
+	kfree(pl330->channels);
+
+	return 0;
+}
+
+static void dmac_free_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+
+	dmac_free_threads(pl330);
+
+	dma_free_coherent(pi->dev, chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+}
+
+static void pl330_del(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+
+	if (!pi || !pi->pl330_data)
+		return;
+
+	pl330 = pi->pl330_data;
+
+	pl330->state = UNINIT;
+
+	tasklet_kill(&pl330->tasks);
+
+	/* Free DMAC resources */
+	dmac_free_resources(pl330);
+
+	kfree(pl330);
+	pi->pl330_data = NULL;
+}
+
 /* forward declaration */
 static struct amba_driver pl330_driver;
 
@@ -234,7 +2320,7 @@ static void pl330_tasklet(unsigned long data)
 	/* Pick up ripe tomatoes */
 	list_for_each_entry_safe(desc, _dt, &pch->work_list, node)
 		if (desc->status == DONE) {
-			pch->completed = desc->txd.cookie;
+			dma_cookie_complete(&desc->txd);
 			list_move_tail(&desc->node, &list);
 		}
 
@@ -305,7 +2391,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 
 	spin_lock_irqsave(&pch->lock, flags);
 
-	pch->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 	pch->cyclic = false;
 
 	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
@@ -340,7 +2426,6 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 		/* Mark all desc done */
 		list_for_each_entry_safe(desc, _dt, &pch->work_list , node) {
 			desc->status = DONE;
-			pch->completed = desc->txd.cookie;
 			list_move_tail(&desc->node, &list);
 		}
 
@@ -396,18 +2481,7 @@ static enum dma_status
 pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		 struct dma_tx_state *txstate)
 {
-	struct dma_pl330_chan *pch = to_pchan(chan);
-	dma_cookie_t last_done, last_used;
-	int ret;
-
-	last_done = pch->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_done, last_used);
-
-	dma_set_tx_state(txstate, last_done, last_used, 0);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void pl330_issue_pending(struct dma_chan *chan)
@@ -430,26 +2504,16 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_irqsave(&pch->lock, flags);
 
 	/* Assign cookies to all nodes */
-	cookie = tx->chan->cookie;
-
 	while (!list_empty(&last->node)) {
 		desc = list_entry(last->node.next, struct dma_pl330_desc, node);
 
-		if (++cookie < 0)
-			cookie = 1;
-		desc->txd.cookie = cookie;
+		dma_cookie_assign(&desc->txd);
 
 		list_move_tail(&desc->node, &pch->work_list);
 	}
 
-	if (++cookie < 0)
-		cookie = 1;
-	last->txd.cookie = cookie;
-
+	cookie = dma_cookie_assign(&last->txd);
 	list_add_tail(&last->node, &pch->work_list);
-
-	tx->chan->cookie = cookie;
-
 	spin_unlock_irqrestore(&pch->lock, flags);
 
 	return cookie;
@@ -553,6 +2617,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	async_tx_ack(&desc->txd);
 
 	desc->req.peri = peri_id ? pch->chan.chan_id : 0;
+	desc->rqcfg.pcfg = &pch->dmac->pif.pcfg;
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
@@ -621,7 +2686,8 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -711,7 +2777,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flg)
+		unsigned long flg, void *context)
 {
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -829,7 +2895,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	if (IS_ERR(pdmac->clk)) {
 		dev_err(&adev->dev, "Cannot get operation clock.\n");
 		ret = -EINVAL;
-		goto probe_err1;
+		goto probe_err2;
 	}
 
 	amba_set_drvdata(adev, pdmac);
@@ -843,11 +2909,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = request_irq(irq, pl330_irq_handler, 0,
 			dev_name(&adev->dev), pi);
 	if (ret)
-		goto probe_err2;
+		goto probe_err3;
 
 	ret = pl330_add(pi);
 	if (ret)
-		goto probe_err3;
+		goto probe_err4;
 
 	INIT_LIST_HEAD(&pdmac->desc_pool);
 	spin_lock_init(&pdmac->pool_lock);
@@ -904,7 +2970,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = dma_async_device_register(pd);
 	if (ret) {
 		dev_err(&adev->dev, "unable to register DMAC\n");
-		goto probe_err4;
+		goto probe_err5;
 	}
 
 	dev_info(&adev->dev,
@@ -917,10 +2983,15 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	return 0;
 
-probe_err4:
+probe_err5:
 	pl330_del(pi);
-probe_err3:
+probe_err4:
 	free_irq(irq, pi);
+probe_err3:
+#ifndef CONFIG_PM_RUNTIME
+	clk_disable(pdmac->clk);
+#endif
+	clk_put(pdmac->clk);
 probe_err2:
 	iounmap(pi->base);
 probe_err1:
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index fc457a7e8832..ced98826684a 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -46,6 +46,7 @@
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include "adma.h"
+#include "../dmaengine.h"
 
 enum ppc_adma_init_code {
 	PPC_ADMA_INIT_OK = 0,
@@ -1930,7 +1931,7 @@ static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
 				if (end_of_chain && slot_cnt) {
 					/* Should wait for ZeroSum completion */
 					if (cookie > 0)
-						chan->completed_cookie = cookie;
+						chan->common.completed_cookie = cookie;
 					return;
 				}
 
@@ -1960,7 +1961,7 @@ static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
 	BUG_ON(!seen_current);
 
 	if (cookie > 0) {
-		chan->completed_cookie = cookie;
+		chan->common.completed_cookie = cookie;
 		pr_debug("\tcompleted cookie %d\n", cookie);
 	}
 
@@ -2150,22 +2151,6 @@ static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
 }
 
 /**
- * ppc440spe_desc_assign_cookie - assign a cookie
- */
-static dma_cookie_t ppc440spe_desc_assign_cookie(
-		struct ppc440spe_adma_chan *chan,
-		struct ppc440spe_adma_desc_slot *desc)
-{
-	dma_cookie_t cookie = chan->common.cookie;
-
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
-/**
  * ppc440spe_rxor_set_region_data -
  */
 static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
@@ -2235,8 +2220,7 @@ static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 	slots_per_op = group_start->slots_per_op;
 
 	spin_lock_bh(&chan->lock);
-
-	cookie = ppc440spe_desc_assign_cookie(chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (unlikely(list_empty(&chan->chain))) {
 		/* first peer */
@@ -3944,28 +3928,16 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct ppc440spe_adma_chan *ppc440spe_chan;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
 
 	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
-	last_used = chan->cookie;
-	last_complete = ppc440spe_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS)
 		return ret;
 
 	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
 
-	last_used = chan->cookie;
-	last_complete = ppc440spe_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 /**
@@ -4050,16 +4022,12 @@ static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
 		async_tx_ack(&sw_desc->async_tx);
 		ppc440spe_desc_init_null_xor(group_start);
 
-		cookie = chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		chan->completed_cookie = cookie - 1;
-		chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(ppc440spe_chan_is_busy(chan));
@@ -4529,6 +4497,7 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev)
 	INIT_LIST_HEAD(&chan->all_slots);
 	chan->device = adev;
 	chan->common.device = &adev->common;
+	dma_cookie_init(&chan->common);
 	list_add_tail(&chan->common.device_node, &adev->common.channels);
 	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
 		     (unsigned long)chan);
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
index 8ada5a812e3b..26b7a5ed9ac7 100644
--- a/drivers/dma/ppc4xx/adma.h
+++ b/drivers/dma/ppc4xx/adma.h
@@ -81,7 +81,6 @@ struct ppc440spe_adma_device {
  * @common: common dmaengine channel object members
  * @all_slots: complete domain of slots usable by the channel
  * @pending: allows batching of hardware operations
- * @completed_cookie: identifier for the most recently completed operation
  * @slots_allocated: records the actual size of the descriptor slot pool
  * @hw_chain_inited: h/w descriptor chain initialization flag
  * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
@@ -99,7 +98,6 @@ struct ppc440spe_adma_chan {
 	struct list_head all_slots;
 	struct ppc440spe_adma_desc_slot *last_used;
 	int pending;
-	dma_cookie_t completed_cookie;
 	int slots_allocated;
 	int hw_chain_inited;
 	struct tasklet_struct irq_tasklet;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 812fd76e9c18..19d7a8d3975d 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -30,6 +30,8 @@
 #include <linux/kdebug.h>
 #include <linux/spinlock.h>
 #include <linux/rculist.h>
+
+#include "dmaengine.h"
 #include "shdma.h"
 
 /* DMA descriptor control */
@@ -296,13 +298,7 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 	else
 		power_up = false;
 
-	cookie = sh_chan->common.cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-
-	sh_chan->common.cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* Mark all chunks of this descriptor as submitted, move to the queue */
 	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
@@ -673,7 +669,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 
 static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
@@ -764,12 +761,12 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all
 			cookie = tx->cookie;
 
 		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
-			if (sh_chan->completed_cookie != desc->cookie - 1)
+			if (sh_chan->common.completed_cookie != desc->cookie - 1)
 				dev_dbg(sh_chan->dev,
 					"Completing cookie %d, expected %d\n",
 					desc->cookie,
-					sh_chan->completed_cookie + 1);
-			sh_chan->completed_cookie = desc->cookie;
+					sh_chan->common.completed_cookie + 1);
+			sh_chan->common.completed_cookie = desc->cookie;
 		}
 
 		/* Call callback on the last chunk */
@@ -823,7 +820,7 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all
 		 * Terminating and the loop completed normally: forgive
 		 * uncompleted cookies
 		 */
-		sh_chan->completed_cookie = sh_chan->common.cookie;
+		sh_chan->common.completed_cookie = sh_chan->common.cookie;
 
 	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
 
@@ -883,23 +880,14 @@ static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
 					struct dma_tx_state *txstate)
 {
 	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status status;
 	unsigned long flags;
 
 	sh_dmae_chan_ld_cleanup(sh_chan, false);
 
-	/* First read completed cookie to avoid a skew */
-	last_complete = sh_chan->completed_cookie;
-	rmb();
-	last_used = chan->cookie;
-	BUG_ON(last_complete < 0);
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
 	spin_lock_irqsave(&sh_chan->desc_lock, flags);
 
-	status = dma_async_is_complete(cookie, last_complete, last_used);
+	status = dma_cookie_status(chan, cookie, txstate);
 
 	/*
 	 * If we don't find cookie on the queue, it has been aborted and we have
@@ -1102,6 +1090,7 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
 
 	/* reference struct dma_device */
 	new_sh_chan->common.device = &shdev->common;
+	dma_cookie_init(&new_sh_chan->common);
 
 	new_sh_chan->dev = shdev->common.dev;
 	new_sh_chan->id = id;
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
index 2b55a276dc5b..0b1d2c105f02 100644
--- a/drivers/dma/shdma.h
+++ b/drivers/dma/shdma.h
@@ -30,7 +30,6 @@ enum dmae_pm_state {
 };
 
 struct sh_dmae_chan {
-	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
 	struct list_head ld_queue;	/* Link descriptors queue */
 	struct list_head ld_free;	/* Link descriptors free */
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 2333810d1688..434ad31174f2 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -18,6 +18,8 @@
 #include <linux/of_platform.h>
 #include <linux/sirfsoc_dma.h>
 
+#include "dmaengine.h"
+
 #define SIRFSOC_DMA_DESCRIPTORS                 16
 #define SIRFSOC_DMA_CHANNELS                    16
 
@@ -59,7 +61,6 @@ struct sirfsoc_dma_chan {
 	struct list_head		queued;
 	struct list_head		active;
 	struct list_head		completed;
-	dma_cookie_t			completed_cookie;
 	unsigned long			happened_cyclic;
 	unsigned long			completed_cyclic;
 
@@ -208,7 +209,7 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
 			/* Free descriptors */
 			spin_lock_irqsave(&schan->lock, flags);
 			list_splice_tail_init(&list, &schan->free);
-			schan->completed_cookie = last_cookie;
+			schan->chan.completed_cookie = last_cookie;
 			spin_unlock_irqrestore(&schan->lock, flags);
 		} else {
 			/* for cyclic channel, desc is always in active list */
@@ -258,13 +259,7 @@ static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
 	/* Move descriptor to queue */
 	list_move_tail(&sdesc->node, &schan->queued);
 
-	/* Update cookie */
-	cookie = schan->chan.cookie + 1;
-	if (cookie <= 0)
-		cookie = 1;
-
-	schan->chan.cookie = cookie;
-	sdesc->desc.cookie = cookie;
+	cookie = dma_cookie_assign(txd);
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -414,16 +409,13 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	unsigned long flags;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
+	enum dma_status ret;
 
 	spin_lock_irqsave(&schan->lock, flags);
-	last_used = schan->chan.cookie;
-	last_complete = schan->completed_cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irqrestore(&schan->lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
@@ -497,7 +489,7 @@ err_dir:
 static struct dma_async_tx_descriptor *
 sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
 	size_t buf_len, size_t period_len,
-	enum dma_transfer_direction direction)
+	enum dma_transfer_direction direction, void *context)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	struct sirfsoc_dma_desc *sdesc = NULL;
@@ -635,8 +627,7 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 		schan = &sdma->channels[i];
 
 		schan->chan.device = dma;
-		schan->chan.cookie = 1;
-		schan->completed_cookie = schan->chan.cookie;
+		dma_cookie_init(&schan->chan);
 
 		INIT_LIST_HEAD(&schan->free);
 		INIT_LIST_HEAD(&schan->prepared);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index cc5ecbc067a3..bdd41d4bfa8d 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -21,6 +21,7 @@
 
 #include <plat/ste_dma40.h>
 
+#include "dmaengine.h"
 #include "ste_dma40_ll.h"
 
 #define D40_NAME "dma40"
@@ -220,8 +221,6 @@ struct d40_base;
  *
  * @lock: A spinlock to protect this struct.
  * @log_num: The logical number, if any of this channel.
- * @completed: Starts with 1, after first interrupt it is set to dma engine's
- * current cookie.
  * @pending_tx: The number of pending transfers. Used between interrupt handler
  * and tasklet.
  * @busy: Set to true when transfer is ongoing on this channel.
@@ -250,8 +249,6 @@ struct d40_base;
 struct d40_chan {
 	spinlock_t			 lock;
 	int				 log_num;
-	/* ID of the most recent completed transfer */
-	int				 completed;
 	int				 pending_tx;
 	bool				 busy;
 	struct d40_phy_res		*phy_chan;
@@ -1223,21 +1220,14 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 					     chan);
 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&d40c->lock, flags);
-
-	d40c->chan.cookie++;
-
-	if (d40c->chan.cookie < 0)
-		d40c->chan.cookie = 1;
-
-	d40d->txd.cookie = d40c->chan.cookie;
-
+	cookie = dma_cookie_assign(tx);
 	d40_desc_queue(d40c, d40d);
-
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static int d40_start(struct d40_chan *d40c)
@@ -1357,7 +1347,7 @@ static void dma_tasklet(unsigned long data)
 		goto err;
 
 	if (!d40d->cyclic)
-		d40c->completed = d40d->txd.cookie;
+		dma_cookie_complete(&d40d->txd);
 
 	/*
 	 * If terminating a channel pending_tx is set to zero.
@@ -2182,7 +2172,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	bool is_free_phy;
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	d40c->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	/* If no dma configuration is set use default configuration (memcpy) */
 	if (!d40c->configured) {
@@ -2299,7 +2289,8 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
 							 enum dma_transfer_direction direction,
-							 unsigned long dma_flags)
+							 unsigned long dma_flags,
+							 void *context)
 {
 	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
 		return NULL;
@@ -2310,7 +2301,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_transfer_direction direction)
+		     enum dma_transfer_direction direction, void *context)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
@@ -2342,25 +2333,19 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
 				     struct dma_tx_state *txstate)
 {
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
+	enum dma_status ret;
 
 	if (d40c->phy_chan == NULL) {
 		chan_err(d40c, "Cannot read status of unallocated channel\n");
 		return -EINVAL;
 	}
 
-	last_complete = d40c->completed;
-	last_used = chan->cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS)
+		dma_set_residue(txstate, stedma40_residue(chan));
 
 	if (d40_is_paused(d40c))
 		ret = DMA_PAUSED;
-	else
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
-
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 stedma40_residue(chan));
 
 	return ret;
 }
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a6f9c1684a0f..4e0dff59901d 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -31,6 +31,8 @@
 
 #include <linux/timb_dma.h>
 
+#include "dmaengine.h"
+
 #define DRIVER_NAME "timb-dma"
 
 /* Global DMA registers */
@@ -84,7 +86,6 @@ struct timb_dma_chan {
 					especially the lists and descriptors,
 					from races between the tasklet and calls
 					from above */
-	dma_cookie_t		last_completed_cookie;
 	bool			ongoing;
 	struct list_head	active_list;
 	struct list_head	queue;
@@ -284,7 +285,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 	else
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR);
 */
-	td_chan->last_completed_cookie = txd->cookie;
+	dma_cookie_complete(txd);
 	td_chan->ongoing = false;
 
 	callback = txd->callback;
@@ -349,12 +350,7 @@ static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd)
 	dma_cookie_t cookie;
 
 	spin_lock_bh(&td_chan->lock);
-
-	cookie = txd->chan->cookie;
-	if (++cookie < 0)
-		cookie = 1;
-	txd->chan->cookie = cookie;
-	txd->cookie = cookie;
+	cookie = dma_cookie_assign(txd);
 
 	if (list_empty(&td_chan->active_list)) {
 		dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__,
@@ -481,8 +477,7 @@ static int td_alloc_chan_resources(struct dma_chan *chan)
 	}
 
 	spin_lock_bh(&td_chan->lock);
-	td_chan->last_completed_cookie = 1;
-	chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_bh(&td_chan->lock);
 
 	return 0;
@@ -515,24 +510,13 @@ static void td_free_chan_resources(struct dma_chan *chan)
 static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
-	struct timb_dma_chan *td_chan =
-		container_of(chan, struct timb_dma_chan, chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int			ret;
+	enum dma_status ret;
 
 	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
 
-	last_complete = td_chan->last_completed_cookie;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	ret = dma_cookie_status(chan, cookie, txstate);
 
-	dev_dbg(chan2dev(chan),
-		"%s: exit, ret: %d, last_complete: %d, last_used: %d\n",
-		__func__, ret, last_complete, last_used);
+	dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", 	__func__, ret);
 
 	return ret;
 }
@@ -558,7 +542,8 @@ static void td_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	struct timb_dma_chan *td_chan =
 		container_of(chan, struct timb_dma_chan, chan);
@@ -766,7 +751,7 @@ static int __devinit td_probe(struct platform_device *pdev)
 		}
 
 		td_chan->chan.device = &td->dma;
-		td_chan->chan.cookie = 1;
+		dma_cookie_init(&td_chan->chan);
 		spin_lock_init(&td_chan->lock);
 		INIT_LIST_HEAD(&td_chan->active_list);
 		INIT_LIST_HEAD(&td_chan->queue);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 6122c364cf11..913f55c76c99 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -15,6 +15,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
+
+#include "dmaengine.h"
 #include "txx9dmac.h"
 
 static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
@@ -279,21 +281,6 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
 	}
 }
 
-/* Called with dc->lock held and bh disabled */
-static dma_cookie_t
-txx9dmac_assign_cookie(struct txx9dmac_chan *dc, struct txx9dmac_desc *desc)
-{
-	dma_cookie_t cookie = dc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	dc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 /*----------------------------------------------------------------------*/
 
 static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
@@ -424,7 +411,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
 	dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
 		 txd->cookie, desc);
 
-	dc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	callback = txd->callback;
 	param = txd->callback_param;
 
@@ -738,7 +725,7 @@ static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
 	dma_cookie_t cookie;
 
 	spin_lock_bh(&dc->lock);
-	cookie = txx9dmac_assign_cookie(dc, desc);
+	cookie = dma_cookie_assign(tx);
 
 	dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
 		 desc->txd.cookie, desc);
@@ -846,7 +833,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 static struct dma_async_tx_descriptor *
 txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
 	struct txx9dmac_dev *ddev = dc->ddev;
@@ -972,27 +959,17 @@ txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		   struct dma_tx_state *txstate)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
+	enum dma_status ret;
 
-	last_complete = dc->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		spin_lock_bh(&dc->lock);
 		txx9dmac_scan_descriptors(dc);
 		spin_unlock_bh(&dc->lock);
 
-		last_complete = dc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
 	return ret;
 }
 
@@ -1057,7 +1034,7 @@ static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
 		return -EIO;
 	}
 
-	dc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
 	txx9dmac_chan_set_SMPCHN(dc);
@@ -1186,7 +1163,7 @@ static int __init txx9dmac_chan_probe(struct platform_device *pdev)
 	dc->ddev->chan[ch] = dc;
 	dc->chan.device = &dc->dma;
 	list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
-	dc->chan.cookie = dc->completed = 1;
+	dma_cookie_init(&dc->chan);
 
 	if (is_dmac64(dc))
 		dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
index 365d42366b9f..f5a760598882 100644
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -172,7 +172,6 @@ struct txx9dmac_chan {
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 74522773e934..93c35ef5f0ad 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -286,7 +286,7 @@ static void mx3_videobuf_queue(struct vb2_buffer *vb)
 		sg_dma_address(sg)	= vb2_dma_contig_plane_dma_addr(vb, 0);
 		sg_dma_len(sg)		= new_size;
 
-		txd = ichan->dma_chan.device->device_prep_slave_sg(
+		txd = dmaengine_prep_slave_sg(
 			&ichan->dma_chan, sg, 1, DMA_DEV_TO_MEM,
 			DMA_PREP_INTERRUPT);
 		if (!txd)
diff --git a/drivers/media/video/timblogiw.c b/drivers/media/video/timblogiw.c
index 4ed1c7c28ae7..02194c056b00 100644
--- a/drivers/media/video/timblogiw.c
+++ b/drivers/media/video/timblogiw.c
@@ -564,7 +564,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
 
 	spin_unlock_irq(&fh->queue_lock);
 
-	desc = fh->chan->device->device_prep_slave_sg(fh->chan,
+	desc = dmaengine_prep_slave_sg(fh->chan,
 		buf->sg, sg_elems, DMA_DEV_TO_MEM,
 		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
 	if (!desc) {
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 390863e7efbd..9819dc09ce08 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -24,6 +24,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
+#include <linux/types.h>
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdio.h>
@@ -173,6 +174,7 @@ struct atmel_mci {
 
 	struct atmel_mci_dma	dma;
 	struct dma_chan		*data_chan;
+	struct dma_slave_config	dma_conf;
 
 	u32			cmd_status;
 	u32			data_status;
@@ -863,16 +865,17 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
 
 	if (data->flags & MMC_DATA_READ) {
 		direction = DMA_FROM_DEVICE;
-		slave_dirn = DMA_DEV_TO_MEM;
+		host->dma_conf.direction = slave_dirn = DMA_DEV_TO_MEM;
 	} else {
 		direction = DMA_TO_DEVICE;
-		slave_dirn = DMA_MEM_TO_DEV;
+		host->dma_conf.direction = slave_dirn = DMA_MEM_TO_DEV;
 	}
 
 	sglen = dma_map_sg(chan->device->dev, data->sg,
 			data->sg_len, direction);
 
-	desc = chan->device->device_prep_slave_sg(chan,
+	dmaengine_slave_config(chan, &host->dma_conf);
+	desc = dmaengine_prep_slave_sg(chan,
 			data->sg, sglen, slave_dirn,
 			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc)
@@ -1960,10 +1963,6 @@ static bool atmci_configure_dma(struct atmel_mci *host)
 	if (pdata && find_slave_dev(pdata->dma_slave)) {
 		dma_cap_mask_t mask;
 
-		setup_dma_addr(pdata->dma_slave,
-			       host->mapbase + ATMCI_TDR,
-			       host->mapbase + ATMCI_RDR);
-
 		/* Try to grab a DMA channel */
 		dma_cap_zero(mask);
 		dma_cap_set(DMA_SLAVE, mask);
@@ -1977,6 +1976,14 @@ static bool atmci_configure_dma(struct atmel_mci *host)
 		dev_info(&host->pdev->dev,
 					"using %s for DMA transfers\n",
 					dma_chan_name(host->dma.chan));
+
+		host->dma_conf.src_addr = host->mapbase + ATMCI_RDR;
+		host->dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_conf.src_maxburst = 1;
+		host->dma_conf.dst_addr = host->mapbase + ATMCI_TDR;
+		host->dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_conf.dst_maxburst = 1;
+		host->dma_conf.device_fc = false;
 		return true;
 	}
 }
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 983e244eca76..032b84791a16 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -30,6 +30,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/amba/mmci.h>
 #include <linux/pm_runtime.h>
+#include <linux/types.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -400,6 +401,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
 		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
 		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+		.device_fc = false,
 	};
 	struct dma_chan *chan;
 	struct dma_device *device;
@@ -441,7 +443,7 @@ static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 		return -EINVAL;
 
 	dmaengine_slave_config(chan, &conf);
-	desc = device->device_prep_slave_sg(chan, data->sg, nr_sg,
+	desc = dmaengine_prep_slave_sg(chan, data->sg, nr_sg,
 					    conf.direction, DMA_CTRL_ACK);
 	if (!desc)
 		goto unmap_exit;
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 4184b7946bbf..b2058b432320 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -33,6 +33,7 @@
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/dmaengine.h>
+#include <linux/types.h>
 
 #include <asm/dma.h>
 #include <asm/irq.h>
@@ -254,7 +255,7 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 	if (nents != data->sg_len)
 		return -EINVAL;
 
-	host->desc = host->dma->device->device_prep_slave_sg(host->dma,
+	host->desc = dmaengine_prep_slave_sg(host->dma,
 		data->sg, data->sg_len, slave_dirn,
 		DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -267,6 +268,7 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 	wmb();
 
 	dmaengine_submit(host->desc);
+	dma_async_issue_pending(host->dma);
 
 	return 0;
 }
@@ -710,6 +712,7 @@ static int mxcmci_setup_dma(struct mmc_host *mmc)
 	config->src_addr_width = 4;
 	config->dst_maxburst = host->burstlen;
 	config->src_maxburst = host->burstlen;
+	config->device_fc = false;
 
 	return dmaengine_slave_config(host->dma, config);
 }
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 382c835d217c..65f36cf2ff33 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -324,7 +324,7 @@ static struct dma_async_tx_descriptor *mxs_mmc_prep_dma(
 		sg_len = SSP_PIO_NUM;
 	}
 
-	desc = host->dmach->device->device_prep_slave_sg(host->dmach,
+	desc = dmaengine_prep_slave_sg(host->dmach,
 				sgl, sg_len, host->slave_dirn, append);
 	if (desc) {
 		desc->callback = mxs_mmc_dma_irq_callback;
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 60f205708f54..aafaf0b6eb1c 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -286,7 +286,7 @@ static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
 			 DMA_FROM_DEVICE);
 	if (ret > 0) {
 		host->dma_active = true;
-		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+		desc = dmaengine_prep_slave_sg(chan, sg, ret,
 			DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	}
 
@@ -335,7 +335,7 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
 			 DMA_TO_DEVICE);
 	if (ret > 0) {
 		host->dma_active = true;
-		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+		desc = dmaengine_prep_slave_sg(chan, sg, ret,
 			DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 8253ec12003e..fff928604859 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -88,7 +88,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 
 	ret = dma_map_sg(chan->device->dev, sg, host->sg_len, DMA_FROM_DEVICE);
 	if (ret > 0)
-		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+		desc = dmaengine_prep_slave_sg(chan, sg, ret,
 			DMA_DEV_TO_MEM, DMA_CTRL_ACK);
 
 	if (desc) {
@@ -169,7 +169,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 
 	ret = dma_map_sg(chan->device->dev, sg, host->sg_len, DMA_TO_DEVICE);
 	if (ret > 0)
-		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+		desc = dmaengine_prep_slave_sg(chan, sg, ret,
 			DMA_MEM_TO_DEV, DMA_CTRL_ACK);
 
 	if (desc) {
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 7db6555ed3ba..590dd5cceed6 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -835,7 +835,7 @@ int gpmi_send_command(struct gpmi_nand_data *this)
 		| BM_GPMI_CTRL0_ADDRESS_INCREMENT
 		| BF_GPMI_CTRL0_XFER_COUNT(this->command_length);
 	pio[1] = pio[2] = 0;
-	desc = channel->device->device_prep_slave_sg(channel,
+	desc = dmaengine_prep_slave_sg(channel,
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
 	if (!desc) {
@@ -848,8 +848,7 @@ int gpmi_send_command(struct gpmi_nand_data *this)
 
 	sg_init_one(sgl, this->cmd_buffer, this->command_length);
 	dma_map_sg(this->dev, sgl, 1, DMA_TO_DEVICE);
-	desc = channel->device->device_prep_slave_sg(channel,
-					sgl, 1, DMA_MEM_TO_DEV, 1);
+	desc = dmaengine_prep_slave_sg(channel, sgl, 1, DMA_MEM_TO_DEV, 1);
 	if (!desc) {
 		pr_err("step 2 error\n");
 		return -1;
@@ -880,8 +879,7 @@ int gpmi_send_data(struct gpmi_nand_data *this)
 		| BF_GPMI_CTRL0_ADDRESS(address)
 		| BF_GPMI_CTRL0_XFER_COUNT(this->upper_len);
 	pio[1] = 0;
-	desc = channel->device->device_prep_slave_sg(channel,
-					(struct scatterlist *)pio,
+	desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
 	if (!desc) {
 		pr_err("step 1 error\n");
@@ -890,7 +888,7 @@ int gpmi_send_data(struct gpmi_nand_data *this)
 
 	/* [2] send DMA request */
 	prepare_data_dma(this, DMA_TO_DEVICE);
-	desc = channel->device->device_prep_slave_sg(channel, &this->data_sgl,
+	desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
 						1, DMA_MEM_TO_DEV, 1);
 	if (!desc) {
 		pr_err("step 2 error\n");
@@ -916,7 +914,7 @@ int gpmi_read_data(struct gpmi_nand_data *this)
 		| BF_GPMI_CTRL0_ADDRESS(BV_GPMI_CTRL0_ADDRESS__NAND_DATA)
 		| BF_GPMI_CTRL0_XFER_COUNT(this->upper_len);
 	pio[1] = 0;
-	desc = channel->device->device_prep_slave_sg(channel,
+	desc = dmaengine_prep_slave_sg(channel,
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
 	if (!desc) {
@@ -926,8 +924,8 @@ int gpmi_read_data(struct gpmi_nand_data *this)
 
 	/* [2] : send DMA request */
 	prepare_data_dma(this, DMA_FROM_DEVICE);
-	desc = channel->device->device_prep_slave_sg(channel, &this->data_sgl,
-						1, DMA_DEV_TO_MEM, 1);
+	desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
+					1, DMA_DEV_TO_MEM, 1);
 	if (!desc) {
 		pr_err("step 2 error\n");
 		return -1;
@@ -972,8 +970,7 @@ int gpmi_send_page(struct gpmi_nand_data *this,
 	pio[4] = payload;
 	pio[5] = auxiliary;
 
-	desc = channel->device->device_prep_slave_sg(channel,
-					(struct scatterlist *)pio,
+	desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
 	if (!desc) {
 		pr_err("step 2 error\n");
@@ -1007,7 +1004,7 @@ int gpmi_read_page(struct gpmi_nand_data *this,
 		| BF_GPMI_CTRL0_ADDRESS(address)
 		| BF_GPMI_CTRL0_XFER_COUNT(0);
 	pio[1] = 0;
-	desc = channel->device->device_prep_slave_sg(channel,
+	desc = dmaengine_prep_slave_sg(channel,
 				(struct scatterlist *)pio, 2,
 				DMA_TRANS_NONE, 0);
 	if (!desc) {
@@ -1036,7 +1033,7 @@ int gpmi_read_page(struct gpmi_nand_data *this,
 	pio[3] = geo->page_size;
 	pio[4] = payload;
 	pio[5] = auxiliary;
-	desc = channel->device->device_prep_slave_sg(channel,
+	desc = dmaengine_prep_slave_sg(channel,
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 1);
 	if (!desc) {
@@ -1055,7 +1052,7 @@ int gpmi_read_page(struct gpmi_nand_data *this,
 		| BF_GPMI_CTRL0_ADDRESS(address)
 		| BF_GPMI_CTRL0_XFER_COUNT(geo->page_size);
 	pio[1] = 0;
-	desc = channel->device->device_prep_slave_sg(channel,
+	desc = dmaengine_prep_slave_sg(channel,
 				(struct scatterlist *)pio, 2,
 				DMA_TRANS_NONE, 1);
 	if (!desc) {
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 0686b93f1857..f84dd2dc82b6 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -458,7 +458,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev)
 	if (sg_dma_len(&ctl->sg) % 4)
 		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
 
-	ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
+	ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
 		&ctl->sg, 1, DMA_MEM_TO_DEV,
 		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
 	if (!ctl->adesc)
@@ -570,7 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev)
 
 		sg_dma_len(sg) = DMA_BUFFER_SIZE;
 
-		ctl->adesc = ctl->chan->device->device_prep_slave_sg(ctl->chan,
+		ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
 			sg, 1, DMA_DEV_TO_MEM,
 			DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
 
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 8418eb036651..b9f0192758d6 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
+#include <linux/types.h>
 
 #include "spi-dw.h"
 
@@ -136,6 +137,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	txconf.dst_maxburst = LNW_DMA_MSIZE_16;
 	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	txconf.device_fc = false;
 
 	txchan->device->device_control(txchan, DMA_SLAVE_CONFIG,
 				       (unsigned long) &txconf);
@@ -144,7 +146,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	dws->tx_sgl.dma_address = dws->tx_dma;
 	dws->tx_sgl.length = dws->len;
 
-	txdesc = txchan->device->device_prep_slave_sg(txchan,
+	txdesc = dmaengine_prep_slave_sg(txchan,
 				&dws->tx_sgl,
 				1,
 				DMA_MEM_TO_DEV,
@@ -158,6 +160,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	rxconf.src_maxburst = LNW_DMA_MSIZE_16;
 	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	rxconf.device_fc = false;
 
 	rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG,
 				       (unsigned long) &rxconf);
@@ -166,7 +169,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
 	dws->rx_sgl.dma_address = dws->rx_dma;
 	dws->rx_sgl.length = dws->len;
 
-	rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
+	rxdesc = dmaengine_prep_slave_sg(rxchan,
 				&dws->rx_sgl,
 				1,
 				DMA_DEV_TO_MEM,
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index d46e55c720b7..6db2887852d6 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -633,8 +633,8 @@ ep93xx_spi_dma_prepare(struct ep93xx_spi *espi, enum dma_data_direction dir)
 	if (!nents)
 		return ERR_PTR(-ENOMEM);
 
-	txd = chan->device->device_prep_slave_sg(chan, sgt->sgl, nents,
-						 slave_dirn, DMA_CTRL_ACK);
+	txd = dmaengine_prep_slave_sg(chan, sgt->sgl, nents,
+					slave_dirn, DMA_CTRL_ACK);
 	if (!txd) {
 		dma_unmap_sg(chan->device->dev, sgt->sgl, sgt->nents, dir);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index dc8485d1e883..96f0da66b185 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -880,10 +880,12 @@ static int configure_dma(struct pl022 *pl022)
 	struct dma_slave_config rx_conf = {
 		.src_addr = SSP_DR(pl022->phybase),
 		.direction = DMA_DEV_TO_MEM,
+		.device_fc = false,
 	};
 	struct dma_slave_config tx_conf = {
 		.dst_addr = SSP_DR(pl022->phybase),
 		.direction = DMA_MEM_TO_DEV,
+		.device_fc = false,
 	};
 	unsigned int pages;
 	int ret;
@@ -1017,7 +1019,7 @@ static int configure_dma(struct pl022 *pl022)
 		goto err_tx_sgmap;
 
 	/* Send both scatterlists */
-	rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
+	rxdesc = dmaengine_prep_slave_sg(rxchan,
 				      pl022->sgt_rx.sgl,
 				      rx_sglen,
 				      DMA_DEV_TO_MEM,
@@ -1025,7 +1027,7 @@ static int configure_dma(struct pl022 *pl022)
 	if (!rxdesc)
 		goto err_rxdesc;
 
-	txdesc = txchan->device->device_prep_slave_sg(txchan,
+	txdesc = dmaengine_prep_slave_sg(txchan,
 				      pl022->sgt_tx.sgl,
 				      tx_sglen,
 				      DMA_MEM_TO_DEV,
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 5c6fa5ed3366..ec47d3bdfd13 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -1099,7 +1099,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 		sg_dma_address(sg) = dma->rx_buf_dma + sg->offset;
 	}
 	sg = dma->sg_rx_p;
-	desc_rx = dma->chan_rx->device->device_prep_slave_sg(dma->chan_rx, sg,
+	desc_rx = dmaengine_prep_slave_sg(dma->chan_rx, sg,
 					num, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_rx) {
@@ -1158,7 +1158,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 		sg_dma_address(sg) = dma->tx_buf_dma + sg->offset;
 	}
 	sg = dma->sg_tx_p;
-	desc_tx = dma->chan_tx->device->device_prep_slave_sg(dma->chan_tx,
+	desc_tx = dmaengine_prep_slave_sg(dma->chan_tx,
 					sg, num, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_tx) {
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 20d795d9b591..0c65c9e66986 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -51,6 +51,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/delay.h>
+#include <linux/types.h>
 
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -271,6 +272,7 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
 		.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
 		.direction = DMA_MEM_TO_DEV,
 		.dst_maxburst = uap->fifosize >> 1,
+		.device_fc = false,
 	};
 	struct dma_chan *chan;
 	dma_cap_mask_t mask;
@@ -304,6 +306,7 @@ static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
 			.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
 			.direction = DMA_DEV_TO_MEM,
 			.src_maxburst = uap->fifosize >> 1,
+			.device_fc = false,
 		};
 
 		chan = dma_request_channel(mask, plat->dma_filter, plat->dma_rx_param);
@@ -481,7 +484,7 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap)
 		return -EBUSY;
 	}
 
-	desc = dma_dev->device_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV,
+	desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV,
 					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
 		dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE);
@@ -664,7 +667,6 @@ static void pl011_dma_rx_callback(void *data);
 static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
 {
 	struct dma_chan *rxchan = uap->dmarx.chan;
-	struct dma_device *dma_dev;
 	struct pl011_dmarx_data *dmarx = &uap->dmarx;
 	struct dma_async_tx_descriptor *desc;
 	struct pl011_sgbuf *sgbuf;
@@ -675,8 +677,7 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
 	/* Start the RX DMA job */
 	sgbuf = uap->dmarx.use_buf_b ?
 		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
-	dma_dev = rxchan->device;
-	desc = rxchan->device->device_prep_slave_sg(rxchan, &sgbuf->sg, 1,
+	desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1,
 					DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	/*
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 332f2eb8abbc..e825460478be 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -844,7 +844,7 @@ static int dma_handle_rx(struct eg20t_port *priv)
 
 	sg_dma_address(sg) = priv->rx_buf_dma;
 
-	desc = priv->chan_rx->device->device_prep_slave_sg(priv->chan_rx,
+	desc = dmaengine_prep_slave_sg(priv->chan_rx,
 			sg, 1, DMA_DEV_TO_MEM,
 			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -1003,7 +1003,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv)
 			sg_dma_len(sg) = size;
 	}
 
-	desc = priv->chan_tx->device->device_prep_slave_sg(priv->chan_tx,
+	desc = dmaengine_prep_slave_sg(priv->chan_tx,
 					priv->sg_tx_p, nent, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 61b7fd2729cd..f8db8a70c14e 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1338,7 +1338,7 @@ static void sci_submit_rx(struct sci_port *s)
 		struct scatterlist *sg = &s->sg_rx[i];
 		struct dma_async_tx_descriptor *desc;
 
-		desc = chan->device->device_prep_slave_sg(chan,
+		desc = dmaengine_prep_slave_sg(chan,
 			sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 
 		if (desc) {
@@ -1453,7 +1453,7 @@ static void work_fn_tx(struct work_struct *work)
 
 	BUG_ON(!sg_dma_len(sg));
 
-	desc = chan->device->device_prep_slave_sg(chan,
+	desc = dmaengine_prep_slave_sg(chan,
 			sg, s->sg_len_tx, DMA_MEM_TO_DEV,
 			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c
index 97cb45916c43..d05c7fbbb703 100644
--- a/drivers/usb/musb/ux500_dma.c
+++ b/drivers/usb/musb/ux500_dma.c
@@ -115,12 +115,12 @@ static bool ux500_configure_channel(struct dma_channel *channel,
 	slave_conf.dst_addr = usb_fifo_addr;
 	slave_conf.dst_addr_width = addr_width;
 	slave_conf.dst_maxburst = 16;
+	slave_conf.device_fc = false;
 
 	dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG,
 					     (unsigned long) &slave_conf);
 
-	dma_desc = dma_chan->device->
-			device_prep_slave_sg(dma_chan, &sg, 1, direction,
+	dma_desc = dmaengine_prep_slave_sg(dma_chan, &sg, 1, direction,
 					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!dma_desc)
 		return false;
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 3648c82a17fe..6ec7f838d7fa 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -786,9 +786,8 @@ static void xfer_work(struct work_struct *work)
 	sg_dma_address(&sg) = pkt->dma + pkt->actual;
 	sg_dma_len(&sg) = pkt->trans;
 
-	desc = chan->device->device_prep_slave_sg(chan, &sg, 1, dir,
-						  DMA_PREP_INTERRUPT |
-						  DMA_CTRL_ACK);
+	desc = dmaengine_prep_slave_sg(chan, &sg, 1, dir,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc)
 		return;
 
diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index 727a5149d818..eec0d7b748eb 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -337,7 +337,7 @@ static void sdc_enable_channel(struct mx3fb_info *mx3_fbi)
 
 	/* This enables the channel */
 	if (mx3_fbi->cookie < 0) {
-		mx3_fbi->txd = dma_chan->device->device_prep_slave_sg(dma_chan,
+		mx3_fbi->txd = dmaengine_prep_slave_sg(dma_chan,
 		      &mx3_fbi->sg[0], 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 		if (!mx3_fbi->txd) {
 			dev_err(mx3fb->dev, "Cannot allocate descriptor on %d\n",
@@ -1091,7 +1091,7 @@ static int mx3fb_pan_display(struct fb_var_screeninfo *var,
 	if (mx3_fbi->txd)
 		async_tx_ack(mx3_fbi->txd);
 
-	txd = dma_chan->device->device_prep_slave_sg(dma_chan, sg +
+	txd = dmaengine_prep_slave_sg(dma_chan, sg +
 		mx3_fbi->cur_ipu_buf, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 	if (!txd) {
 		dev_err(fbi->device,