summary refs log tree commit diff
path: root/drivers/dma
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma')
-rw-r--r--drivers/dma/Kconfig3
-rw-r--r--drivers/dma/amba-pl08x.c46
-rw-r--r--drivers/dma/at_hdmac.c111
-rw-r--r--drivers/dma/at_hdmac_regs.h34
-rw-r--r--drivers/dma/coh901318.c41
-rw-r--r--drivers/dma/dmaengine.c8
-rw-r--r--drivers/dma/dmaengine.h89
-rw-r--r--drivers/dma/dw_dmac.c228
-rw-r--r--drivers/dma/dw_dmac_regs.h16
-rw-r--r--drivers/dma/ep93xx_dma.c31
-rw-r--r--drivers/dma/fsldma.c28
-rw-r--r--drivers/dma/fsldma.h1
-rw-r--r--drivers/dma/imx-dma.c950
-rw-r--r--drivers/dma/imx-sdma.c187
-rw-r--r--drivers/dma/intel_mid_dma.c46
-rw-r--r--drivers/dma/intel_mid_dma_regs.h2
-rw-r--r--drivers/dma/ioat/dma.c21
-rw-r--r--drivers/dma/ioat/dma.h23
-rw-r--r--drivers/dma/ioat/dma_v2.c13
-rw-r--r--drivers/dma/ioat/dma_v3.c12
-rw-r--r--drivers/dma/iop-adma.c52
-rw-r--r--drivers/dma/ipu/ipu_idmac.c25
-rw-r--r--drivers/dma/mpc512x_dma.c25
-rw-r--r--drivers/dma/mv_xor.c34
-rw-r--r--drivers/dma/mv_xor.h3
-rw-r--r--drivers/dma/mxs-dma.c28
-rw-r--r--drivers/dma/pch_dma.c37
-rw-r--r--drivers/dma/pl330.c2149
-rw-r--r--drivers/dma/ppc4xx/adma.c49
-rw-r--r--drivers/dma/ppc4xx/adma.h2
-rw-r--r--drivers/dma/shdma.c33
-rw-r--r--drivers/dma/shdma.h1
-rw-r--r--drivers/dma/sirf-dma.c27
-rw-r--r--drivers/dma/ste_dma40.c41
-rw-r--r--drivers/dma/timb_dma.c37
-rw-r--r--drivers/dma/txx9dmac.c43
-rw-r--r--drivers/dma/txx9dmac.h1
37 files changed, 3547 insertions, 930 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 4a6c46dea8a0..cf9da362d64f 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -201,7 +201,6 @@ config PL330_DMA
 	tristate "DMA API Driver for PL330"
 	select DMA_ENGINE
 	depends on ARM_AMBA
-	select PL330
 	help
 	  Select if your platform has one or more PL330 DMACs.
 	  You need to provide platform specific settings via
@@ -231,7 +230,7 @@ config IMX_SDMA
 
 config IMX_DMA
 	tristate "i.MX DMA support"
-	depends on IMX_HAVE_DMA_V1
+	depends on ARCH_MXC
 	select DMA_ENGINE
 	help
 	  Support the i.MX DMA engine. This engine is integrated into
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8a281584458b..c301a8ec31aa 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -85,6 +85,8 @@
 #include <linux/slab.h>
 #include <asm/hardware/pl080.h>
 
+#include "dmaengine.h"
+
 #define DRIVER_NAME	"pl08xdmac"
 
 static struct amba_driver pl08x_amba_driver;
@@ -649,7 +651,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			}
 
 			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
-					(bd.srcbus.addr % bd.srcbus.buswidth)) {
+					(bd.dstbus.addr % bd.dstbus.buswidth)) {
 				dev_err(&pl08x->adev->dev,
 					"%s src & dst address must be aligned to src"
 					" & dst width if peripheral is flow controller",
@@ -919,13 +921,10 @@ static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
 	struct pl08x_txd *txd = to_pl08x_txd(tx);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&plchan->lock, flags);
-
-	plchan->chan.cookie += 1;
-	if (plchan->chan.cookie < 0)
-		plchan->chan.cookie = 1;
-	tx->cookie = plchan->chan.cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* Put this onto the pending list */
 	list_add_tail(&txd->node, &plchan->pend_list);
@@ -945,7 +944,7 @@ static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	spin_unlock_irqrestore(&plchan->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
@@ -965,31 +964,17 @@ static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
 		dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
-	u32 bytesleft = 0;
 
-	last_used = plchan->chan.cookie;
-	last_complete = plchan->lc;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
-	if (ret == DMA_SUCCESS) {
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_SUCCESS)
 		return ret;
-	}
 
 	/*
 	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
 	 */
-	last_used = plchan->chan.cookie;
-	last_complete = plchan->lc;
-
-	/* Get number of bytes left in the active transactions and queue */
-	bytesleft = pl08x_getbytes_chan(plchan);
-
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 bytesleft);
+	dma_set_residue(txstate, pl08x_getbytes_chan(plchan));
 
 	if (plchan->state == PL08X_CHAN_PAUSED)
 		return DMA_PAUSED;
@@ -1139,6 +1124,8 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
 	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
 
+	plchan->device_fc = config->device_fc;
+
 	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
 		plchan->src_addr = config->src_addr;
 		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
@@ -1326,7 +1313,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
@@ -1370,7 +1357,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		return NULL;
 	}
 
-	if (plchan->cd->device_fc)
+	if (plchan->device_fc)
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
 			PL080_FLOW_PER2MEM_PER;
 	else
@@ -1541,7 +1528,7 @@ static void pl08x_tasklet(unsigned long data)
 
 	if (txd) {
 		/* Update last completed */
-		plchan->lc = txd->tx.cookie;
+		dma_cookie_complete(&txd->tx);
 	}
 
 	/* If a new descriptor is queued, set it up plchan->at is NULL here */
@@ -1722,8 +1709,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 			 chan->name);
 
 		chan->chan.device = dmadev;
-		chan->chan.cookie = 0;
-		chan->lc = 0;
+		dma_cookie_init(&chan->chan);
 
 		spin_lock_init(&chan->lock);
 		INIT_LIST_HEAD(&chan->pend_list);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index f4aed5fc2cb6..7aa58d204892 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -27,6 +27,7 @@
 #include <linux/of_device.h>
 
 #include "at_hdmac_regs.h"
+#include "dmaengine.h"
 
 /*
  * Glossary
@@ -192,27 +193,6 @@ static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
 }
 
 /**
- * atc_assign_cookie - compute and assign new cookie
- * @atchan: channel we work on
- * @desc: descriptor to assign cookie for
- *
- * Called with atchan->lock held and bh disabled
- */
-static dma_cookie_t
-atc_assign_cookie(struct at_dma_chan *atchan, struct at_desc *desc)
-{
-	dma_cookie_t cookie = atchan->chan_common.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	atchan->chan_common.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
-/**
  * atc_dostart - starts the DMA engine for real
  * @atchan: the channel we want to start
  * @first: first descriptor in the list we want to begin with
@@ -269,7 +249,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	dev_vdbg(chan2dev(&atchan->chan_common),
 		"descriptor %u complete\n", txd->cookie);
 
-	atchan->completed_cookie = txd->cookie;
+	dma_cookie_complete(txd);
 
 	/* move children to free_list */
 	list_splice_init(&desc->tx_list, &atchan->free_list);
@@ -547,7 +527,7 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long		flags;
 
 	spin_lock_irqsave(&atchan->lock, flags);
-	cookie = atc_assign_cookie(atchan, desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&atchan->active_list)) {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
@@ -659,14 +639,16 @@ err_desc_get:
  * @sg_len: number of entries in @scatterlist
  * @direction: DMA direction
  * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
 	struct at_desc		*first = NULL;
 	struct at_desc		*prev = NULL;
 	u32			ctrla;
@@ -688,19 +670,18 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 	}
 
-	reg_width = atslave->reg_width;
-
 	ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
 			| ATC_FC_MEM2PER
 			| ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF);
-		reg = atslave->tx_reg;
+		reg = sconfig->dst_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
 			u32		len;
@@ -728,13 +709,14 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		}
 		break;
 	case DMA_DEV_TO_MEM:
+		reg_width = convert_buswidth(sconfig->src_addr_width);
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
 			| ATC_FC_PER2MEM
 			| ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF);
 
-		reg = atslave->rx_reg;
+		reg = sconfig->src_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
 			u32		len;
@@ -810,12 +792,15 @@ err_out:
  * atc_dma_cyclic_fill_desc - Fill one period decriptor
  */
 static int
-atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
+atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 		unsigned int period_index, dma_addr_t buf_addr,
-		size_t period_len, enum dma_transfer_direction direction)
+		unsigned int reg_width, size_t period_len,
+		enum dma_transfer_direction direction)
 {
-	u32		ctrla;
-	unsigned int	reg_width = atslave->reg_width;
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	u32			ctrla;
 
 	/* prepare common CRTLA value */
 	ctrla =   ATC_DEFAULT_CTRLA | atslave->ctrla
@@ -826,7 +811,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
-		desc->lli.daddr = atslave->tx_reg;
+		desc->lli.daddr = sconfig->dst_addr;
 		desc->lli.ctrla = ctrla;
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
 				| ATC_SRC_ADDR_MODE_INCR
@@ -836,7 +821,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		break;
 
 	case DMA_DEV_TO_MEM:
-		desc->lli.saddr = atslave->rx_reg;
+		desc->lli.saddr = sconfig->src_addr;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
 		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
@@ -860,16 +845,20 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
  * @buf_len: total number of bytes for the entire buffer
  * @period_len: number of bytes for each period
  * @direction: transfer direction, to or from device
+ * @context: transfer context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
 	struct at_desc		*first = NULL;
 	struct at_desc		*prev = NULL;
 	unsigned long		was_cyclic;
+	unsigned int		reg_width;
 	unsigned int		periods = buf_len / period_len;
 	unsigned int		i;
 
@@ -889,8 +878,13 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		return NULL;
 	}
 
+	if (sconfig->direction == DMA_MEM_TO_DEV)
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+	else
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+
 	/* Check for too big/unaligned periods and unaligned DMA buffer */
-	if (atc_dma_cyclic_check_values(atslave->reg_width, buf_addr,
+	if (atc_dma_cyclic_check_values(reg_width, buf_addr,
 					period_len, direction))
 		goto err_out;
 
@@ -902,8 +896,8 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		if (!desc)
 			goto err_desc_get;
 
-		if (atc_dma_cyclic_fill_desc(atslave, desc, i, buf_addr,
-						period_len, direction))
+		if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
+					     reg_width, period_len, direction))
 			goto err_desc_get;
 
 		atc_desc_chain(&first, &prev, desc);
@@ -926,6 +920,23 @@ err_out:
 	return NULL;
 }
 
+static int set_runtime_config(struct dma_chan *chan,
+			      struct dma_slave_config *sconfig)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&atchan->dma_sconfig.src_maxburst);
+	convert_burst(&atchan->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
 
 static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
@@ -986,6 +997,8 @@ static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		clear_bit(ATC_IS_CYCLIC, &atchan->status);
 
 		spin_unlock_irqrestore(&atchan->lock, flags);
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
 	} else {
 		return -ENXIO;
 	}
@@ -1016,26 +1029,20 @@ atc_tx_status(struct dma_chan *chan,
 
 	spin_lock_irqsave(&atchan->lock, flags);
 
-	last_complete = atchan->completed_cookie;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		atc_cleanup_descriptors(atchan);
 
-		last_complete = atchan->completed_cookie;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
+	last_complete = chan->completed_cookie;
+	last_used = chan->cookie;
+
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
 	if (ret != DMA_SUCCESS)
-		dma_set_tx_state(txstate, last_complete, last_used,
-			atc_first_active(atchan)->len);
-	else
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		dma_set_residue(txstate, atc_first_active(atchan)->len);
 
 	if (atc_chan_is_paused(atchan))
 		ret = DMA_PAUSED;
@@ -1129,7 +1136,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 	spin_lock_irqsave(&atchan->lock, flags);
 	atchan->descs_allocated = i;
 	list_splice(&tmp_list, &atchan->free_list);
-	atchan->completed_cookie = chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
 	/* channel parameters */
@@ -1329,7 +1336,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 		struct at_dma_chan	*atchan = &atdma->chan[i];
 
 		atchan->chan_common.device = &atdma->dma_common;
-		atchan->chan_common.cookie = atchan->completed_cookie = 1;
+		dma_cookie_init(&atchan->chan_common);
 		list_add_tail(&atchan->chan_common.device_node,
 				&atdma->dma_common.channels);
 
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index a8d3277d60b5..897a8bcaec90 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -207,8 +207,8 @@ enum atc_status {
  * @save_cfg: configuration register that is saved on suspend/resume cycle
  * @save_dscr: for cyclic operations, preserve next descriptor address in
  *             the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via DMA_SLAVE_CONFIG
  * @lock: serializes enqueue/dequeue operations to descriptors lists
- * @completed_cookie: identifier for the most recently completed operation
  * @active_list: list of descriptors dmaengine is being running on
  * @queue: list of descriptors ready to be submitted to engine
  * @free_list: list of descriptors usable by the channel
@@ -223,11 +223,11 @@ struct at_dma_chan {
 	struct tasklet_struct	tasklet;
 	u32			save_cfg;
 	u32			save_dscr;
+	struct dma_slave_config dma_sconfig;
 
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
-	dma_cookie_t		completed_cookie;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
@@ -245,6 +245,36 @@ static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
 	return container_of(dchan, struct at_dma_chan, chan_common);
 }
 
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return 1;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return 2;
+	default:
+		/* For 1 byte width or fallback */
+		return 0;
+	}
+}
 
 /*--  Controller  ------------------------------------------------------*/
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index d65a718c0f9b..dc89455f5550 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -24,6 +24,7 @@
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
+#include "dmaengine.h"
 
 #define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
 
@@ -59,7 +60,6 @@ struct coh901318_base {
 struct coh901318_chan {
 	spinlock_t lock;
 	int allocated;
-	int completed;
 	int id;
 	int stopped;
 
@@ -318,20 +318,6 @@ static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
 
 	return 0;
 }
-static dma_cookie_t
-coh901318_assign_cookie(struct coh901318_chan *cohc,
-			struct coh901318_desc *cohd)
-{
-	dma_cookie_t cookie = cohc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	cohc->chan.cookie = cookie;
-	cohd->desc.cookie = cookie;
-
-	return cookie;
-}
 
 static struct coh901318_desc *
 coh901318_desc_get(struct coh901318_chan *cohc)
@@ -705,7 +691,7 @@ static void dma_tasklet(unsigned long data)
 	callback_param = cohd_fin->desc.callback_param;
 
 	/* sign this job as completed on the channel */
-	cohc->completed = cohd_fin->desc.cookie;
+	dma_cookie_complete(&cohd_fin->desc);
 
 	/* release the lli allocation and remove the descriptor */
 	coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli);
@@ -929,7 +915,7 @@ static int coh901318_alloc_chan_resources(struct dma_chan *chan)
 	coh901318_config(cohc, NULL);
 
 	cohc->allocated = 1;
-	cohc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	spin_unlock_irqrestore(&cohc->lock, flags);
 
@@ -966,16 +952,16 @@ coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
 						   desc);
 	struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&cohc->lock, flags);
-
-	tx->cookie = coh901318_assign_cookie(cohc, cohd);
+	cookie = dma_cookie_assign(tx);
 
 	coh901318_desc_queue(cohc, cohd);
 
 	spin_unlock_irqrestore(&cohc->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static struct dma_async_tx_descriptor *
@@ -1035,7 +1021,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 static struct dma_async_tx_descriptor *
 coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags)
+			unsigned long flags, void *context)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
 	struct coh901318_lli *lli;
@@ -1165,17 +1151,12 @@ coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		 struct dma_tx_state *txstate)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
-
-	last_complete = cohc->completed;
-	last_used = chan->cookie;
+	enum dma_status ret;
 
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	/* FIXME: should be conditional on ret != DMA_SUCCESS? */
+	dma_set_residue(txstate, coh901318_get_bytes_left(chan));
 
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 coh901318_get_bytes_left(chan));
 	if (ret == DMA_IN_PROGRESS && cohc->stopped)
 		ret = DMA_PAUSED;
 
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index a6c6051ec858..767bcc31b365 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -510,8 +510,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 					 dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 			} else if (err)
-				pr_debug("dmaengine: failed to get %s: (%d)\n",
-					 dma_chan_name(chan), err);
+				pr_debug("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
 			else
 				break;
 			if (--device->privatecnt == 0)
@@ -564,8 +564,8 @@ void dmaengine_get(void)
 				list_del_rcu(&device->global_node);
 				break;
 			} else if (err)
-				pr_err("dmaengine: failed to get %s: (%d)\n",
-				       dma_chan_name(chan), err);
+				pr_err("%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
 		}
 	}
 
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
new file mode 100644
index 000000000000..17f983a4e9ba
--- /dev/null
+++ b/drivers/dma/dmaengine.h
@@ -0,0 +1,89 @@
+/*
+ * The contents of this file are private to DMA engine drivers, and is not
+ * part of the API to be used by DMA engine users.
+ */
+#ifndef DMAENGINE_H
+#define DMAENGINE_H
+
+#include <linux/bug.h>
+#include <linux/dmaengine.h>
+
+/**
+ * dma_cookie_init - initialize the cookies for a DMA channel
+ * @chan: dma channel to initialize
+ */
+static inline void dma_cookie_init(struct dma_chan *chan)
+{
+	chan->cookie = DMA_MIN_COOKIE;
+	chan->completed_cookie = DMA_MIN_COOKIE;
+}
+
+/**
+ * dma_cookie_assign - assign a DMA engine cookie to the descriptor
+ * @tx: descriptor needing cookie
+ *
+ * Assign a unique non-zero per-channel cookie to the descriptor.
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *chan = tx->chan;
+	dma_cookie_t cookie;
+
+	cookie = chan->cookie + 1;
+	if (cookie < DMA_MIN_COOKIE)
+		cookie = DMA_MIN_COOKIE;
+	tx->cookie = chan->cookie = cookie;
+
+	return cookie;
+}
+
+/**
+ * dma_cookie_complete - complete a descriptor
+ * @tx: descriptor to complete
+ *
+ * Mark this descriptor complete by updating the channels completed
+ * cookie marker.  Zero the descriptors cookie to prevent accidental
+ * repeated completions.
+ *
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx)
+{
+	BUG_ON(tx->cookie < DMA_MIN_COOKIE);
+	tx->chan->completed_cookie = tx->cookie;
+	tx->cookie = 0;
+}
+
+/**
+ * dma_cookie_status - report cookie status
+ * @chan: dma channel
+ * @cookie: cookie we are interested in
+ * @state: dma_tx_state structure to return last/used cookies
+ *
+ * Report the status of the cookie, filling in the state structure if
+ * non-NULL.  No locking is required.
+ */
+static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	dma_cookie_t used, complete;
+
+	used = chan->cookie;
+	complete = chan->completed_cookie;
+	barrier();
+	if (state) {
+		state->last = complete;
+		state->used = used;
+		state->residue = 0;
+	}
+	return dma_async_is_complete(cookie, complete, used);
+}
+
+static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
+{
+	if (state)
+		state->residue = residue;
+}
+
+#endif
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9b592b02b5f4..7439079f5eed 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -9,6 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
@@ -22,6 +23,7 @@
 #include <linux/slab.h>
 
 #include "dw_dmac_regs.h"
+#include "dmaengine.h"
 
 /*
  * This supports the Synopsys "DesignWare AHB Central DMA Controller",
@@ -33,19 +35,23 @@
  * which does not support descriptor writeback.
  */
 
-#define DWC_DEFAULT_CTLLO(private) ({				\
-		struct dw_dma_slave *__slave = (private);	\
-		int dms = __slave ? __slave->dst_master : 0;	\
-		int sms = __slave ? __slave->src_master : 1;	\
-		u8 smsize = __slave ? __slave->src_msize : DW_DMA_MSIZE_16; \
-		u8 dmsize = __slave ? __slave->dst_msize : DW_DMA_MSIZE_16; \
+#define DWC_DEFAULT_CTLLO(_chan) ({				\
+		struct dw_dma_slave *__slave = (_chan->private);	\
+		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
+		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
+		int _dms = __slave ? __slave->dst_master : 0;	\
+		int _sms = __slave ? __slave->src_master : 1;	\
+		u8 _smsize = __slave ? _sconfig->src_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+		u8 _dmsize = __slave ? _sconfig->dst_maxburst :	\
+			DW_DMA_MSIZE_16;			\
 								\
-		(DWC_CTLL_DST_MSIZE(dmsize)			\
-		 | DWC_CTLL_SRC_MSIZE(smsize)			\
+		(DWC_CTLL_DST_MSIZE(_dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
 		 | DWC_CTLL_LLP_D_EN				\
 		 | DWC_CTLL_LLP_S_EN				\
-		 | DWC_CTLL_DMS(dms)				\
-		 | DWC_CTLL_SMS(sms));				\
+		 | DWC_CTLL_DMS(_dms)				\
+		 | DWC_CTLL_SMS(_sms));				\
 	})
 
 /*
@@ -151,21 +157,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	}
 }
 
-/* Called with dwc->lock held and bh disabled */
-static dma_cookie_t
-dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
-{
-	dma_cookie_t cookie = dwc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	dwc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
@@ -192,7 +183,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
-	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_set_bit(dw, MASK.ERROR, dwc->mask);
 
 	dwc->initialized = true;
@@ -245,7 +235,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
 	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	dwc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	if (callback_required) {
 		callback = txd->callback;
 		param = txd->callback_param;
@@ -329,12 +319,6 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	unsigned long flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	/*
-	 * Clear block interrupt flag before scanning so that we don't
-	 * miss any, and read LLP before RAW_XFER to ensure it is
-	 * valid if we decide to scan the list.
-	 */
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	llp = channel_readl(dwc, LLP);
 	status_xfer = dma_readl(dw, RAW.XFER);
 
@@ -470,17 +454,16 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr);
 
 /* called with dwc->lock held and all DMAC interrupts disabled */
 static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
-		u32 status_block, u32 status_err, u32 status_xfer)
+		u32 status_err, u32 status_xfer)
 {
 	unsigned long flags;
 
-	if (status_block & dwc->mask) {
+	if (dwc->mask) {
 		void (*callback)(void *param);
 		void *callback_param;
 
 		dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
 				channel_readl(dwc, LLP));
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 
 		callback = dwc->cdesc->period_callback;
 		callback_param = dwc->cdesc->period_callback_param;
@@ -520,7 +503,6 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 		channel_writel(dwc, CTL_LO, 0);
 		channel_writel(dwc, CTL_HI, 0);
 
-		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 		dma_writel(dw, CLEAR.ERROR, dwc->mask);
 		dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -537,36 +519,29 @@ static void dw_dma_tasklet(unsigned long data)
 {
 	struct dw_dma *dw = (struct dw_dma *)data;
 	struct dw_dma_chan *dwc;
-	u32 status_block;
 	u32 status_xfer;
 	u32 status_err;
 	int i;
 
-	status_block = dma_readl(dw, RAW.BLOCK);
 	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
-	dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
-			status_block, status_err);
+	dev_vdbg(dw->dma.dev, "tasklet: status_err=%x\n", status_err);
 
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
 		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
-			dwc_handle_cyclic(dw, dwc, status_block, status_err,
-					status_xfer);
+			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
-		else if ((status_block | status_xfer) & (1 << i))
+		else if (status_xfer & (1 << i))
 			dwc_scan_descriptors(dw, dwc);
 	}
 
 	/*
-	 * Re-enable interrupts. Block Complete interrupts are only
-	 * enabled if the INT_EN bit in the descriptor is set. This
-	 * will trigger a scan before the whole list is done.
+	 * Re-enable interrupts.
 	 */
 	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
 
@@ -583,7 +558,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 	 * softirq handler.
 	 */
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
 
 	status = dma_readl(dw, STATUS_INT);
@@ -594,7 +568,6 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 
 		/* Try to recover */
 		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
-		channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
@@ -615,7 +588,7 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long		flags;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	cookie = dwc_assign_cookie(dwc, desc);
+	cookie = dma_cookie_assign(tx);
 
 	/*
 	 * REVISIT: We should attempt to chain as many descriptors as
@@ -674,7 +647,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	else
 		src_width = dst_width = 0;
 
-	ctllo = DWC_DEFAULT_CTLLO(chan->private)
+	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
 			| DWC_CTLL_SRC_WIDTH(src_width)
 			| DWC_CTLL_DST_INC
@@ -731,10 +704,11 @@ err_desc_get:
 static struct dma_async_tx_descriptor *
 dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma_slave	*dws = chan->private;
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
 	u32			ctllo;
@@ -750,25 +724,34 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (unlikely(!dws || !sg_len))
 		return NULL;
 
-	reg_width = dws->reg_width;
 	prev = first = NULL;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
-		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+		reg_width = __fls(sconfig->dst_addr_width);
+		reg = sconfig->dst_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
-				| DWC_CTLL_SRC_INC
-				| DWC_CTLL_FC(dws->fc));
-		reg = dws->tx_reg;
+				| DWC_CTLL_SRC_INC);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
-			mem_width = 2;
-			if (unlikely(mem & 3 || len & 3))
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
 				mem_width = 0;
 
 slave_sg_todev_fill_desc:
@@ -812,21 +795,30 @@ slave_sg_todev_fill_desc:
 		}
 		break;
 	case DMA_DEV_TO_MEM:
-		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+		reg_width = __fls(sconfig->src_addr_width);
+		reg = sconfig->src_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
-				| DWC_CTLL_SRC_FIX
-				| DWC_CTLL_FC(dws->fc));
+				| DWC_CTLL_SRC_FIX);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
 
-		reg = dws->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
 			u32		len, dlen, mem;
 
 			mem = sg_phys(sg);
 			len = sg_dma_len(sg);
-			mem_width = 2;
-			if (unlikely(mem & 3 || len & 3))
+
+			if (!((mem | len) & 7))
+				mem_width = 3;
+			else if (!((mem | len) & 3))
+				mem_width = 2;
+			else if (!((mem | len) & 1))
+				mem_width = 1;
+			else
 				mem_width = 0;
 
 slave_sg_fromdev_fill_desc:
@@ -890,6 +882,39 @@ err_desc_get:
 	return NULL;
 }
 
+/*
+ * Fix sconfig's burst size according to dw_dmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+ *
+ * NOTE: burst size 2 is not supported by controller.
+ *
+ * This can be done by finding least significant bit set: n & (n - 1)
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+static int
+set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&dwc->dma_sconfig.src_maxburst);
+	convert_burst(&dwc->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
 static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
@@ -939,8 +964,11 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		/* Flush all pending and queued descriptors */
 		list_for_each_entry_safe(desc, _desc, &list, desc_node)
 			dwc_descriptor_complete(dwc, desc, false);
-	} else
+	} else if (cmd == DMA_SLAVE_CONFIG) {
+		return set_runtime_config(chan, (struct dma_slave_config *)arg);
+	} else {
 		return -ENXIO;
+	}
 
 	return 0;
 }
@@ -951,28 +979,17 @@ dwc_tx_status(struct dma_chan *chan,
 	      struct dma_tx_state *txstate)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int			ret;
-
-	last_complete = dwc->completed;
-	last_used = chan->cookie;
+	enum dma_status		ret;
 
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
-		last_complete = dwc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
 	if (ret != DMA_SUCCESS)
-		dma_set_tx_state(txstate, last_complete, last_used,
-				dwc_first_active(dwc)->len);
-	else
-		dma_set_tx_state(txstate, last_complete, last_used, 0);
+		dma_set_residue(txstate, dwc_first_active(dwc)->len);
 
 	if (dwc->paused)
 		return DMA_PAUSED;
@@ -1004,7 +1021,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 		return -EIO;
 	}
 
-	dwc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	/*
 	 * NOTE: some controllers may have additional features that we
@@ -1068,7 +1085,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 
 	/* Disable interrupts */
 	channel_clear_bit(dw, MASK.XFER, dwc->mask);
-	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
@@ -1120,7 +1136,6 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 		return -EBUSY;
 	}
 
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -1175,11 +1190,11 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		enum dma_transfer_direction direction)
 {
 	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
+	struct dma_slave_config		*sconfig = &dwc->dma_sconfig;
 	struct dw_cyclic_desc		*cdesc;
 	struct dw_cyclic_desc		*retval = NULL;
 	struct dw_desc			*desc;
 	struct dw_desc			*last = NULL;
-	struct dw_dma_slave		*dws = chan->private;
 	unsigned long			was_cyclic;
 	unsigned int			reg_width;
 	unsigned int			periods;
@@ -1203,7 +1218,12 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 	}
 
 	retval = ERR_PTR(-EINVAL);
-	reg_width = dws->reg_width;
+
+	if (direction == DMA_MEM_TO_DEV)
+		reg_width = __ffs(sconfig->dst_addr_width);
+	else
+		reg_width = __ffs(sconfig->src_addr_width);
+
 	periods = buf_len / period_len;
 
 	/* Check for too big/unaligned periods and unaligned DMA buffer. */
@@ -1236,26 +1256,34 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 
 		switch (direction) {
 		case DMA_MEM_TO_DEV:
-			desc->lli.dar = dws->tx_reg;
+			desc->lli.dar = sconfig->dst_addr;
 			desc->lli.sar = buf_addr + (period_len * i);
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_FIX
 					| DWC_CTLL_SRC_INC
-					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+				DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
 			break;
 		case DMA_DEV_TO_MEM:
 			desc->lli.dar = buf_addr + (period_len * i);
-			desc->lli.sar = dws->rx_reg;
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
+			desc->lli.sar = sconfig->src_addr;
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_DST_INC
 					| DWC_CTLL_SRC_FIX
-					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
+
+			desc->lli.ctllo |= sconfig->device_fc ?
+				DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+				DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
 			break;
 		default:
 			break;
@@ -1322,7 +1350,6 @@ void dw_dma_cyclic_free(struct dma_chan *chan)
 	while (dma_readl(dw, CH_EN) & dwc->mask)
 		cpu_relax();
 
-	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -1347,7 +1374,6 @@ static void dw_dma_off(struct dw_dma *dw)
 	dma_writel(dw, CFG, 0);
 
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
@@ -1369,7 +1395,7 @@ static int __init dw_probe(struct platform_device *pdev)
 	int			err;
 	int			i;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
 		return -EINVAL;
 
@@ -1423,7 +1449,7 @@ static int __init dw_probe(struct platform_device *pdev)
 		struct dw_dma_chan	*dwc = &dw->chan[i];
 
 		dwc->chan.device = &dw->dma;
-		dwc->chan.cookie = dwc->completed = 1;
+		dma_cookie_init(&dwc->chan);
 		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
 			list_add_tail(&dwc->chan.device_node,
 					&dw->dma.channels);
@@ -1432,7 +1458,7 @@ static int __init dw_probe(struct platform_device *pdev)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = 7 - i;
+			dwc->priority = pdata->nr_channels - i - 1;
 		else
 			dwc->priority = i;
 
@@ -1449,13 +1475,11 @@ static int __init dw_probe(struct platform_device *pdev)
 
 	/* Clear/disable all interrupts on all channels. */
 	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
-	dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
@@ -1562,6 +1586,10 @@ static int dw_resume_noirq(struct device *dev)
 static const struct dev_pm_ops dw_dev_pm_ops = {
 	.suspend_noirq = dw_suspend_noirq,
 	.resume_noirq = dw_resume_noirq,
+	.freeze_noirq = dw_suspend_noirq,
+	.thaw_noirq = dw_resume_noirq,
+	.restore_noirq = dw_resume_noirq,
+	.poweroff_noirq = dw_suspend_noirq,
 };
 
 static struct platform_driver dw_driver = {
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 5eef6946a367..f298f69ecbf9 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -13,6 +13,18 @@
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 
+/* flow controller */
+enum dw_dma_fc {
+	DW_DMA_FC_D_M2M,
+	DW_DMA_FC_D_M2P,
+	DW_DMA_FC_D_P2M,
+	DW_DMA_FC_D_P2P,
+	DW_DMA_FC_P_P2M,
+	DW_DMA_FC_SP_P2P,
+	DW_DMA_FC_P_M2P,
+	DW_DMA_FC_DP_P2P,
+};
+
 /*
  * Redefine this macro to handle differences between 32- and 64-bit
  * addressing, big vs. little endian, etc.
@@ -146,13 +158,15 @@ struct dw_dma_chan {
 
 	/* these other elements are all protected by lock */
 	unsigned long		flags;
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
 	struct dw_cyclic_desc	*cdesc;
 
 	unsigned int		descs_allocated;
+
+	/* configuration passed via DMA_SLAVE_CONFIG */
+	struct dma_slave_config dma_sconfig;
 };
 
 static inline struct dw_dma_chan_regs __iomem *
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 59e7a965772b..e6f133b78dc2 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -28,6 +28,8 @@
 
 #include <mach/dma.h>
 
+#include "dmaengine.h"
+
 /* M2P registers */
 #define M2P_CONTROL			0x0000
 #define M2P_CONTROL_STALLINT		BIT(0)
@@ -122,7 +124,6 @@ struct ep93xx_dma_desc {
  * @lock: lock protecting the fields following
  * @flags: flags for the channel
  * @buffer: which buffer to use next (0/1)
- * @last_completed: last completed cookie value
  * @active: flattened chain of descriptors currently being processed
  * @queue: pending descriptors which are handled next
  * @free_list: list of free descriptors which can be used
@@ -157,7 +158,6 @@ struct ep93xx_dma_chan {
 #define EP93XX_DMA_IS_CYCLIC		0
 
 	int				buffer;
-	dma_cookie_t			last_completed;
 	struct list_head		active;
 	struct list_head		queue;
 	struct list_head		free_list;
@@ -703,7 +703,7 @@ static void ep93xx_dma_tasklet(unsigned long data)
 	desc = ep93xx_dma_get_active(edmac);
 	if (desc) {
 		if (desc->complete) {
-			edmac->last_completed = desc->txd.cookie;
+			dma_cookie_complete(&desc->txd);
 			list_splice_init(&edmac->active, &list);
 		}
 		callback = desc->txd.callback;
@@ -783,17 +783,10 @@ static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	unsigned long flags;
 
 	spin_lock_irqsave(&edmac->lock, flags);
-
-	cookie = edmac->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
+	cookie = dma_cookie_assign(tx);
 
 	desc = container_of(tx, struct ep93xx_dma_desc, txd);
 
-	edmac->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
 	/*
 	 * If nothing is currently prosessed, we push this descriptor
 	 * directly to the hardware. Otherwise we put the descriptor
@@ -861,8 +854,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 		goto fail_clk_disable;
 
 	spin_lock_irq(&edmac->lock);
-	edmac->last_completed = 1;
-	edmac->chan.cookie = 1;
+	dma_cookie_init(&edmac->chan);
 	ret = edmac->edma->hw_setup(edmac);
 	spin_unlock_irq(&edmac->lock);
 
@@ -983,13 +975,14 @@ fail:
  * @sg_len: number of entries in @sgl
  * @dir: direction of tha DMA transfer
  * @flags: flags for the descriptor
+ * @context: operation context (ignored)
  *
  * Returns a valid DMA descriptor or %NULL in case of failure.
  */
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			 unsigned int sg_len, enum dma_transfer_direction dir,
-			 unsigned long flags)
+			 unsigned long flags, void *context)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1056,6 +1049,7 @@ fail:
  * @buf_len: length of the buffer (in bytes)
  * @period_len: lenght of a single period
  * @dir: direction of the operation
+ * @context: operation context (ignored)
  *
  * Prepares a descriptor for cyclic DMA operation. This means that once the
  * descriptor is submitted, we will be submitting in a @period_len sized
@@ -1068,7 +1062,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_transfer_direction dir)
+			   enum dma_transfer_direction dir, void *context)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1248,18 +1242,13 @@ static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
 					    struct dma_tx_state *state)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
-	dma_cookie_t last_used, last_completed;
 	enum dma_status ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&edmac->lock, flags);
-	last_used = chan->cookie;
-	last_completed = edmac->last_completed;
+	ret = dma_cookie_status(chan, cookie, state);
 	spin_unlock_irqrestore(&edmac->lock, flags);
 
-	ret = dma_async_is_complete(cookie, last_completed, last_used);
-	dma_set_tx_state(state, last_completed, last_used, 0);
-
 	return ret;
 }
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index b98070c33ca9..8f84761f98ba 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -35,6 +35,7 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
+#include "dmaengine.h"
 #include "fsldma.h"
 
 #define chan_dbg(chan, fmt, arg...)					\
@@ -413,17 +414,10 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	 * assign cookies to all of the software descriptors
 	 * that make up this transaction
 	 */
-	cookie = chan->common.cookie;
 	list_for_each_entry(child, &desc->tx_list, node) {
-		cookie++;
-		if (cookie < DMA_MIN_COOKIE)
-			cookie = DMA_MIN_COOKIE;
-
-		child->async_tx.cookie = cookie;
+		cookie = dma_cookie_assign(&child->async_tx);
 	}
 
-	chan->common.cookie = cookie;
-
 	/* put this transaction onto the tail of the pending queue */
 	append_ld_queue(chan, desc);
 
@@ -765,6 +759,7 @@ fail:
  * @sg_len: number of entries in @scatterlist
  * @direction: DMA direction
  * @flags: DMAEngine flags
+ * @context: transaction context (ignored)
  *
  * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
  * DMA_SLAVE API, this gets the device-specific information from the
@@ -772,7 +767,8 @@ fail:
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	/*
 	 * This operation is not supported on the Freescale DMA controller
@@ -984,19 +980,14 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					struct dma_tx_state *txstate)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	dma_cookie_t last_complete;
-	dma_cookie_t last_used;
+	enum dma_status ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	last_complete = chan->completed_cookie;
-	last_used = dchan->cookie;
-
+	ret = dma_cookie_status(dchan, cookie, txstate);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 /*----------------------------------------------------------------------------*/
@@ -1087,8 +1078,8 @@ static void dma_do_tasklet(unsigned long data)
 
 		desc = to_fsl_desc(chan->ld_running.prev);
 		cookie = desc->async_tx.cookie;
+		dma_cookie_complete(&desc->async_tx);
 
-		chan->completed_cookie = cookie;
 		chan_dbg(chan, "completed_cookie=%d\n", cookie);
 	}
 
@@ -1303,6 +1294,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 	chan->idle = true;
 
 	chan->common.device = &fdev->common;
+	dma_cookie_init(&chan->common);
 
 	/* find the IRQ line, if it exists in the device tree */
 	chan->irq = irq_of_parse_and_map(node, 0);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 9cb5aa57c677..f5c38791fc74 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -137,7 +137,6 @@ struct fsldma_device {
 struct fsldma_chan {
 	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
-	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
 	struct list_head ld_pending;	/* Link descriptors queue */
 	struct list_head ld_running;	/* Link descriptors queue */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 38586ba8da91..a45b5d2a5987 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -5,6 +5,7 @@
  * found on i.MX1/21/27
  *
  * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
@@ -22,37 +23,159 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/clk.h>
 #include <linux/dmaengine.h>
 #include <linux/module.h>
 
 #include <asm/irq.h>
-#include <mach/dma-v1.h>
+#include <mach/dma.h>
 #include <mach/hardware.h>
 
+#include "dmaengine.h"
+#define IMXDMA_MAX_CHAN_DESCRIPTORS	16
+#define IMX_DMA_CHANNELS  16
+
+#define IMX_DMA_2D_SLOTS	2
+#define IMX_DMA_2D_SLOT_A	0
+#define IMX_DMA_2D_SLOT_B	1
+
+#define IMX_DMA_LENGTH_LOOP	((unsigned int)-1)
+#define IMX_DMA_MEMSIZE_32	(0 << 4)
+#define IMX_DMA_MEMSIZE_8	(1 << 4)
+#define IMX_DMA_MEMSIZE_16	(2 << 4)
+#define IMX_DMA_TYPE_LINEAR	(0 << 10)
+#define IMX_DMA_TYPE_2D		(1 << 10)
+#define IMX_DMA_TYPE_FIFO	(2 << 10)
+
+#define IMX_DMA_ERR_BURST     (1 << 0)
+#define IMX_DMA_ERR_REQUEST   (1 << 1)
+#define IMX_DMA_ERR_TRANSFER  (1 << 2)
+#define IMX_DMA_ERR_BUFFER    (1 << 3)
+#define IMX_DMA_ERR_TIMEOUT   (1 << 4)
+
+#define DMA_DCR     0x00		/* Control Register */
+#define DMA_DISR    0x04		/* Interrupt status Register */
+#define DMA_DIMR    0x08		/* Interrupt mask Register */
+#define DMA_DBTOSR  0x0c		/* Burst timeout status Register */
+#define DMA_DRTOSR  0x10		/* Request timeout Register */
+#define DMA_DSESR   0x14		/* Transfer Error Status Register */
+#define DMA_DBOSR   0x18		/* Buffer overflow status Register */
+#define DMA_DBTOCR  0x1c		/* Burst timeout control Register */
+#define DMA_WSRA    0x40		/* W-Size Register A */
+#define DMA_XSRA    0x44		/* X-Size Register A */
+#define DMA_YSRA    0x48		/* Y-Size Register A */
+#define DMA_WSRB    0x4c		/* W-Size Register B */
+#define DMA_XSRB    0x50		/* X-Size Register B */
+#define DMA_YSRB    0x54		/* Y-Size Register B */
+#define DMA_SAR(x)  (0x80 + ((x) << 6))	/* Source Address Registers */
+#define DMA_DAR(x)  (0x84 + ((x) << 6))	/* Destination Address Registers */
+#define DMA_CNTR(x) (0x88 + ((x) << 6))	/* Count Registers */
+#define DMA_CCR(x)  (0x8c + ((x) << 6))	/* Control Registers */
+#define DMA_RSSR(x) (0x90 + ((x) << 6))	/* Request source select Registers */
+#define DMA_BLR(x)  (0x94 + ((x) << 6))	/* Burst length Registers */
+#define DMA_RTOR(x) (0x98 + ((x) << 6))	/* Request timeout Registers */
+#define DMA_BUCR(x) (0x98 + ((x) << 6))	/* Bus Utilization Registers */
+#define DMA_CCNR(x) (0x9C + ((x) << 6))	/* Channel counter Registers */
+
+#define DCR_DRST           (1<<1)
+#define DCR_DEN            (1<<0)
+#define DBTOCR_EN          (1<<15)
+#define DBTOCR_CNT(x)      ((x) & 0x7fff)
+#define CNTR_CNT(x)        ((x) & 0xffffff)
+#define CCR_ACRPT          (1<<14)
+#define CCR_DMOD_LINEAR    (0x0 << 12)
+#define CCR_DMOD_2D        (0x1 << 12)
+#define CCR_DMOD_FIFO      (0x2 << 12)
+#define CCR_DMOD_EOBFIFO   (0x3 << 12)
+#define CCR_SMOD_LINEAR    (0x0 << 10)
+#define CCR_SMOD_2D        (0x1 << 10)
+#define CCR_SMOD_FIFO      (0x2 << 10)
+#define CCR_SMOD_EOBFIFO   (0x3 << 10)
+#define CCR_MDIR_DEC       (1<<9)
+#define CCR_MSEL_B         (1<<8)
+#define CCR_DSIZ_32        (0x0 << 6)
+#define CCR_DSIZ_8         (0x1 << 6)
+#define CCR_DSIZ_16        (0x2 << 6)
+#define CCR_SSIZ_32        (0x0 << 4)
+#define CCR_SSIZ_8         (0x1 << 4)
+#define CCR_SSIZ_16        (0x2 << 4)
+#define CCR_REN            (1<<3)
+#define CCR_RPT            (1<<2)
+#define CCR_FRC            (1<<1)
+#define CCR_CEN            (1<<0)
+#define RTOR_EN            (1<<15)
+#define RTOR_CLK           (1<<14)
+#define RTOR_PSC           (1<<13)
+
+enum  imxdma_prep_type {
+	IMXDMA_DESC_MEMCPY,
+	IMXDMA_DESC_INTERLEAVED,
+	IMXDMA_DESC_SLAVE_SG,
+	IMXDMA_DESC_CYCLIC,
+};
+
+struct imx_dma_2d_config {
+	u16		xsr;
+	u16		ysr;
+	u16		wsr;
+	int		count;
+};
+
+struct imxdma_desc {
+	struct list_head		node;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	dma_addr_t			src;
+	dma_addr_t			dest;
+	size_t				len;
+	enum dma_transfer_direction	direction;
+	enum imxdma_prep_type		type;
+	/* For memcpy and interleaved */
+	unsigned int			config_port;
+	unsigned int			config_mem;
+	/* For interleaved transfers */
+	unsigned int			x;
+	unsigned int			y;
+	unsigned int			w;
+	/* For slave sg and cyclic */
+	struct scatterlist		*sg;
+	unsigned int			sgcount;
+};
+
 struct imxdma_channel {
+	int				hw_chaining;
+	struct timer_list		watchdog;
 	struct imxdma_engine		*imxdma;
 	unsigned int			channel;
-	unsigned int			imxdma_channel;
 
+	struct tasklet_struct		dma_tasklet;
+	struct list_head		ld_free;
+	struct list_head		ld_queue;
+	struct list_head		ld_active;
+	int				descs_allocated;
 	enum dma_slave_buswidth		word_size;
 	dma_addr_t			per_address;
 	u32				watermark_level;
 	struct dma_chan			chan;
-	spinlock_t			lock;
 	struct dma_async_tx_descriptor	desc;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	int				dma_request;
 	struct scatterlist		*sg_list;
+	u32				ccr_from_device;
+	u32				ccr_to_device;
+	bool				enabled_2d;
+	int				slot_2d;
 };
 
-#define MAX_DMA_CHANNELS 8
-
 struct imxdma_engine {
 	struct device			*dev;
 	struct device_dma_parameters	dma_parms;
 	struct dma_device		dma_device;
-	struct imxdma_channel		channel[MAX_DMA_CHANNELS];
+	void __iomem			*base;
+	struct clk			*dma_clk;
+	spinlock_t			lock;
+	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
+	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
 };
 
 static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
@@ -60,36 +183,418 @@ static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
 	return container_of(chan, struct imxdma_channel, chan);
 }
 
-static void imxdma_handle(struct imxdma_channel *imxdmac)
+static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_desc *desc;
+
+	if (!list_empty(&imxdmac->ld_active)) {
+		desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc,
+					node);
+		if (desc->type == IMXDMA_DESC_CYCLIC)
+			return true;
+	}
+	return false;
+}
+
+
+
+static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val,
+			     unsigned offset)
+{
+	__raw_writel(val, imxdma->base + offset);
+}
+
+static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
+{
+	return __raw_readl(imxdma->base + offset);
+}
+
+static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
+{
+	if (cpu_is_mx27())
+		return imxdmac->hw_chaining;
+	else
+		return 0;
+}
+
+/*
+ * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
+ */
+static inline int imxdma_sg_next(struct imxdma_desc *d)
 {
-	if (imxdmac->desc.callback)
-		imxdmac->desc.callback(imxdmac->desc.callback_param);
-	imxdmac->last_completed = imxdmac->desc.cookie;
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct scatterlist *sg = d->sg;
+	unsigned long now;
+
+	now = min(d->len, sg->length);
+	if (d->len != IMX_DMA_LENGTH_LOOP)
+		d->len -= now;
+
+	if (d->direction == DMA_DEV_TO_MEM)
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_DAR(imxdmac->channel));
+	else
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_SAR(imxdmac->channel));
+
+	imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel));
+
+	dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, "
+		"size 0x%08x\n", __func__, imxdmac->channel,
+		 imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
+
+	return now;
 }
 
-static void imxdma_irq_handler(int channel, void *data)
+static void imxdma_enable_hw(struct imxdma_desc *d)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	local_irq_save(flags);
+
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) &
+			 ~(1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
+			 CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
+
+	if ((cpu_is_mx21() || cpu_is_mx27()) &&
+			d->sg && imxdma_hw_chain(imxdmac)) {
+		d->sg = sg_next(d->sg);
+		if (d->sg) {
+			u32 tmp;
+			imxdma_sg_next(d);
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel));
+			imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT,
+					 DMA_CCR(channel));
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static void imxdma_disable_hw(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	if (imxdma_hw_chain(imxdmac))
+		del_timer(&imxdmac->watchdog);
+
+	local_irq_save(flags);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) |
+			 (1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) &
+			 ~CCR_CEN, DMA_CCR(channel));
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	local_irq_restore(flags);
+}
+
+static void imxdma_watchdog(unsigned long data)
+{
+	struct imxdma_channel *imxdmac = (struct imxdma_channel *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(channel));
 
-	imxdmac->status = DMA_SUCCESS;
-	imxdma_handle(imxdmac);
+	/* Tasklet watchdog error handler */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+	dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n",
+		imxdmac->channel);
 }
 
-static void imxdma_err_handler(int channel, void *data, int error)
+static irqreturn_t imxdma_err_handler(int irq, void *dev_id)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_engine *imxdma = dev_id;
+	unsigned int err_mask;
+	int i, disr;
+	int errcode;
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DRTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DSESR)  |
+		   imx_dmav1_readl(imxdma, DMA_DBOSR);
+
+	if (!err_mask)
+		return IRQ_HANDLED;
+
+	imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR);
+
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (!(err_mask & (1 << i)))
+			continue;
+		errcode = 0;
+
+		if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR);
+			errcode |= IMX_DMA_ERR_BURST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR);
+			errcode |= IMX_DMA_ERR_REQUEST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR);
+			errcode |= IMX_DMA_ERR_TRANSFER;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR);
+			errcode |= IMX_DMA_ERR_BUFFER;
+		}
+		/* Tasklet error handler */
+		tasklet_schedule(&imxdma->channel[i].dma_tasklet);
+
+		printk(KERN_WARNING
+		       "DMA timeout on channel %d -%s%s%s%s\n", i,
+		       errcode & IMX_DMA_ERR_BURST ?    " burst" : "",
+		       errcode & IMX_DMA_ERR_REQUEST ?  " request" : "",
+		       errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
+		       errcode & IMX_DMA_ERR_BUFFER ?   " buffer" : "");
+	}
+	return IRQ_HANDLED;
+}
+
+static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int chno = imxdmac->channel;
+	struct imxdma_desc *desc;
+
+	spin_lock(&imxdma->lock);
+	if (list_empty(&imxdmac->ld_active)) {
+		spin_unlock(&imxdma->lock);
+		goto out;
+	}
+
+	desc = list_first_entry(&imxdmac->ld_active,
+				struct imxdma_desc,
+				node);
+	spin_unlock(&imxdma->lock);
+
+	if (desc->sg) {
+		u32 tmp;
+		desc->sg = sg_next(desc->sg);
+
+		if (desc->sg) {
+			imxdma_sg_next(desc);
+
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno));
+
+			if (imxdma_hw_chain(imxdmac)) {
+				/* FIXME: The timeout should probably be
+				 * configurable
+				 */
+				mod_timer(&imxdmac->watchdog,
+					jiffies + msecs_to_jiffies(500));
+
+				tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT;
+				imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+			} else {
+				imx_dmav1_writel(imxdma, tmp & ~CCR_CEN,
+						 DMA_CCR(chno));
+				tmp |= CCR_CEN;
+			}
+
+			imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+
+			if (imxdma_chan_is_doing_cyclic(imxdmac))
+				/* Tasklet progression */
+				tasklet_schedule(&imxdmac->dma_tasklet);
+
+			return;
+		}
+
+		if (imxdma_hw_chain(imxdmac)) {
+			del_timer(&imxdmac->watchdog);
+			return;
+		}
+	}
+
+out:
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(chno));
+	/* Tasklet irq */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	int i, disr;
+
+	if (cpu_is_mx21() || cpu_is_mx27())
+		imxdma_err_handler(irq, dev_id);
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr);
+
+	imx_dmav1_writel(imxdma, disr, DMA_DISR);
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (disr & (1 << i))
+			dma_irq_handle_channel(&imxdma->channel[i]);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int imxdma_xfer_desc(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+	int slot = -1;
+	int i;
+
+	/* Configure and enable */
+	switch (d->type) {
+	case IMXDMA_DESC_INTERLEAVED:
+		/* Try to get a free 2D slot */
+		spin_lock_irqsave(&imxdma->lock, flags);
+		for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+			if ((imxdma->slots_2d[i].count > 0) &&
+			((imxdma->slots_2d[i].xsr != d->x) ||
+			(imxdma->slots_2d[i].ysr != d->y) ||
+			(imxdma->slots_2d[i].wsr != d->w)))
+				continue;
+			slot = i;
+			break;
+		}
+		if (slot < 0)
+			return -EBUSY;
+
+		imxdma->slots_2d[slot].xsr = d->x;
+		imxdma->slots_2d[slot].ysr = d->y;
+		imxdma->slots_2d[slot].wsr = d->w;
+		imxdma->slots_2d[slot].count++;
+
+		imxdmac->slot_2d = slot;
+		imxdmac->enabled_2d = true;
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+
+		if (slot == IMX_DMA_2D_SLOT_A) {
+			d->config_mem &= ~CCR_MSEL_B;
+			d->config_port &= ~CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+		} else {
+			d->config_mem |= CCR_MSEL_B;
+			d->config_port |= CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+		}
+		/*
+		 * We fall-through here intentionally, since a 2D transfer is
+		 * similar to MEMCPY just adding the 2D slot configuration.
+		 */
+	case IMXDMA_DESC_MEMCPY:
+		imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2),
+			 DMA_CCR(imxdmac->channel));
+
+		imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
+
+		dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
+			"dma_length=%d\n", __func__, imxdmac->channel,
+			d->dest, d->src, d->len);
+
+		break;
+	/* Cyclic transfer is the same as slave_sg with special sg configuration. */
+	case IMXDMA_DESC_CYCLIC:
+	case IMXDMA_DESC_SLAVE_SG:
+		if (d->direction == DMA_DEV_TO_MEM) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_SAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (dev2mem)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else if (d->direction == DMA_MEM_TO_DEV) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_DAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
+				"total length=%d dev_addr=0x%08x (mem2dev)\n",
+				__func__, imxdmac->channel, d->sg, d->sgcount,
+				d->len, imxdmac->per_address);
+		} else {
+			dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
+				__func__, imxdmac->channel);
+			return -EINVAL;
+		}
+
+		imxdma_sg_next(d);
 
-	imxdmac->status = DMA_ERROR;
-	imxdma_handle(imxdmac);
+		break;
+	default:
+		return -EINVAL;
+	}
+	imxdma_enable_hw(d);
+	return 0;
 }
 
-static void imxdma_progression(int channel, void *data,
-		struct scatterlist *sg)
+static void imxdma_tasklet(unsigned long data)
 {
-	struct imxdma_channel *imxdmac = data;
+	struct imxdma_channel *imxdmac = (void *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
 
-	imxdmac->status = DMA_SUCCESS;
-	imxdma_handle(imxdmac);
+	spin_lock(&imxdma->lock);
+
+	if (list_empty(&imxdmac->ld_active)) {
+		/* Someone might have called terminate all */
+		goto out;
+	}
+	desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
+
+	if (desc->desc.callback)
+		desc->desc.callback(desc->desc.callback_param);
+
+	dma_cookie_complete(&desc->desc);
+
+	/* If we are dealing with a cyclic descriptor keep it on ld_active */
+	if (imxdma_chan_is_doing_cyclic(imxdmac))
+		goto out;
+
+	/* Free 2D slot if it was an interleaved transfer */
+	if (imxdmac->enabled_2d) {
+		imxdma->slots_2d[imxdmac->slot_2d].count--;
+		imxdmac->enabled_2d = false;
+	}
+
+	list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
+
+	if (!list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc,
+					node);
+		list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
+		if (imxdma_xfer_desc(desc) < 0)
+			dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
+				 __func__, imxdmac->channel);
+	}
+out:
+	spin_unlock(&imxdma->lock);
 }
 
 static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -97,13 +602,18 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct dma_slave_config *dmaengine_cfg = (void *)arg;
-	int ret;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
 	unsigned int mode = 0;
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		imxdmac->status = DMA_ERROR;
-		imx_dma_disable(imxdmac->imxdma_channel);
+		imxdma_disable_hw(imxdmac);
+
+		spin_lock_irqsave(&imxdma->lock, flags);
+		list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+		list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+		spin_unlock_irqrestore(&imxdma->lock, flags);
 		return 0;
 	case DMA_SLAVE_CONFIG:
 		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
@@ -128,16 +638,22 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 			mode = IMX_DMA_MEMSIZE_32;
 			break;
 		}
-		ret = imx_dma_config_channel(imxdmac->imxdma_channel,
-				mode | IMX_DMA_TYPE_FIFO,
-				IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR,
-				imxdmac->dma_request, 1);
-
-		if (ret)
-			return ret;
 
-		imx_dma_config_burstlen(imxdmac->imxdma_channel,
-				imxdmac->watermark_level * imxdmac->word_size);
+		imxdmac->hw_chaining = 1;
+		if (!imxdma_hw_chain(imxdmac))
+			return -EINVAL;
+		imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) |
+			((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) |
+			CCR_REN;
+		imxdmac->ccr_to_device =
+			(IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) |
+			((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN;
+		imx_dmav1_writel(imxdma, imxdmac->dma_request,
+				 DMA_RSSR(imxdmac->channel));
+
+		/* Set burst length */
+		imx_dmav1_writel(imxdma, imxdmac->watermark_level *
+				imxdmac->word_size, DMA_BLR(imxdmac->channel));
 
 		return 0;
 	default:
@@ -151,43 +667,20 @@ static enum dma_status imxdma_tx_status(struct dma_chan *chan,
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *txstate)
 {
-	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
-	dma_cookie_t last_used;
-	enum dma_status ret;
-
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, imxdmac->last_completed, last_used);
-	dma_set_tx_state(txstate, imxdmac->last_completed, last_used, 0);
-
-	return ret;
-}
-
-static dma_cookie_t imxdma_assign_cookie(struct imxdma_channel *imxdma)
-{
-	dma_cookie_t cookie = imxdma->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	imxdma->chan.cookie = cookie;
-	imxdma->desc.cookie = cookie;
-
-	return cookie;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	dma_cookie_t cookie;
+	unsigned long flags;
 
-	spin_lock_irq(&imxdmac->lock);
-
-	cookie = imxdma_assign_cookie(imxdmac);
-
-	imx_dma_enable(imxdmac->imxdma_channel);
-
-	spin_unlock_irq(&imxdmac->lock);
+	spin_lock_irqsave(&imxdma->lock, flags);
+	list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue);
+	cookie = dma_cookie_assign(tx);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 
 	return cookie;
 }
@@ -197,23 +690,52 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan)
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imx_dma_data *data = chan->private;
 
-	imxdmac->dma_request = data->dma_request;
+	if (data != NULL)
+		imxdmac->dma_request = data->dma_request;
 
-	dma_async_tx_descriptor_init(&imxdmac->desc, chan);
-	imxdmac->desc.tx_submit = imxdma_tx_submit;
-	/* txd.flags will be overwritten in prep funcs */
-	imxdmac->desc.flags = DMA_CTRL_ACK;
+	while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) {
+		struct imxdma_desc *desc;
 
-	imxdmac->status = DMA_SUCCESS;
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+		__memzero(&desc->desc, sizeof(struct dma_async_tx_descriptor));
+		dma_async_tx_descriptor_init(&desc->desc, chan);
+		desc->desc.tx_submit = imxdma_tx_submit;
+		/* txd.flags will be overwritten in prep funcs */
+		desc->desc.flags = DMA_CTRL_ACK;
+		desc->status = DMA_SUCCESS;
+
+		list_add_tail(&desc->node, &imxdmac->ld_free);
+		imxdmac->descs_allocated++;
+	}
 
-	return 0;
+	if (!imxdmac->descs_allocated)
+		return -ENOMEM;
+
+	return imxdmac->descs_allocated;
 }
 
 static void imxdma_free_chan_resources(struct dma_chan *chan)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+
+	imxdma_disable_hw(imxdmac);
+	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
 
-	imx_dma_disable(imxdmac->imxdma_channel);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
+		kfree(desc);
+		imxdmac->descs_allocated--;
+	}
+	INIT_LIST_HEAD(&imxdmac->ld_free);
 
 	if (imxdmac->sg_list) {
 		kfree(imxdmac->sg_list);
@@ -224,27 +746,23 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct scatterlist *sg;
-	int i, ret, dma_length = 0;
-	unsigned int dmamode;
+	int i, dma_length = 0;
+	struct imxdma_desc *desc;
 
-	if (imxdmac->status == DMA_IN_PROGRESS)
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
 
-	imxdmac->status = DMA_IN_PROGRESS;
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
 
 	for_each_sg(sgl, sg, sg_len, i) {
 		dma_length += sg->length;
 	}
 
-	if (direction == DMA_DEV_TO_MEM)
-		dmamode = DMA_MODE_READ;
-	else
-		dmamode = DMA_MODE_WRITE;
-
 	switch (imxdmac->word_size) {
 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
 		if (sgl->length & 3 || sgl->dma_address & 3)
@@ -260,37 +778,41 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		return NULL;
 	}
 
-	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, sgl, sg_len,
-		 dma_length, imxdmac->per_address, dmamode);
-	if (ret)
-		return NULL;
+	desc->type = IMXDMA_DESC_SLAVE_SG;
+	desc->sg = sgl;
+	desc->sgcount = sg_len;
+	desc->len = dma_length;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
 
-	return &imxdmac->desc;
+	return &desc->desc;
 }
 
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
-	int i, ret;
+	struct imxdma_desc *desc;
+	int i;
 	unsigned int periods = buf_len / period_len;
-	unsigned int dmamode;
 
 	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
 			__func__, imxdmac->channel, buf_len, period_len);
 
-	if (imxdmac->status == DMA_IN_PROGRESS)
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
-	imxdmac->status = DMA_IN_PROGRESS;
 
-	ret = imx_dma_setup_progression_handler(imxdmac->imxdma_channel,
-			imxdma_progression);
-	if (ret) {
-		dev_err(imxdma->dev, "Failed to setup the DMA handler\n");
-		return NULL;
-	}
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
 
 	if (imxdmac->sg_list)
 		kfree(imxdmac->sg_list);
@@ -316,62 +838,221 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	imxdmac->sg_list[periods].page_link =
 		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
 
-	if (direction == DMA_DEV_TO_MEM)
-		dmamode = DMA_MODE_READ;
-	else
-		dmamode = DMA_MODE_WRITE;
+	desc->type = IMXDMA_DESC_CYCLIC;
+	desc->sg = imxdmac->sg_list;
+	desc->sgcount = periods;
+	desc->len = IMX_DMA_LENGTH_LOOP;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
 
-	ret = imx_dma_setup_sg(imxdmac->imxdma_channel, imxdmac->sg_list, periods,
-		 IMX_DMA_LENGTH_LOOP, imxdmac->per_address, dmamode);
-	if (ret)
+	dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
+			__func__, imxdmac->channel, src, dest, len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
 		return NULL;
 
-	return &imxdmac->desc;
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_MEMCPY;
+	desc->src = src;
+	desc->dest = dest;
+	desc->len = len;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
+		"   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
+		imxdmac->channel, xt->src_start, xt->dst_start,
+		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+		xt->numf, xt->frame_size);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_INTERLEAVED;
+	desc->src = xt->src_start;
+	desc->dest = xt->dst_start;
+	desc->x = xt->sgl[0].size;
+	desc->y = xt->numf;
+	desc->w = xt->sgl[0].icg + desc->x;
+	desc->len = desc->x * desc->y;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32;
+	desc->config_mem = IMX_DMA_MEMSIZE_32;
+	if (xt->src_sgl)
+		desc->config_mem |= IMX_DMA_TYPE_2D;
+	if (xt->dst_sgl)
+		desc->config_port |= IMX_DMA_TYPE_2D;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
 }
 
 static void imxdma_issue_pending(struct dma_chan *chan)
 {
-	/*
-	 * Nothing to do. We only have a single descriptor
-	 */
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	if (list_empty(&imxdmac->ld_active) &&
+	    !list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue,
+					struct imxdma_desc, node);
+
+		if (imxdma_xfer_desc(desc) < 0) {
+			dev_warn(imxdma->dev,
+				 "%s: channel: %d couldn't issue DMA xfer\n",
+				 __func__, imxdmac->channel);
+		} else {
+			list_move_tail(imxdmac->ld_queue.next,
+				       &imxdmac->ld_active);
+		}
+	}
+	spin_unlock_irqrestore(&imxdma->lock, flags);
 }
 
 static int __init imxdma_probe(struct platform_device *pdev)
-{
+	{
 	struct imxdma_engine *imxdma;
 	int ret, i;
 
+
 	imxdma = kzalloc(sizeof(*imxdma), GFP_KERNEL);
 	if (!imxdma)
 		return -ENOMEM;
 
+	if (cpu_is_mx1()) {
+		imxdma->base = MX1_IO_ADDRESS(MX1_DMA_BASE_ADDR);
+	} else if (cpu_is_mx21()) {
+		imxdma->base = MX21_IO_ADDRESS(MX21_DMA_BASE_ADDR);
+	} else if (cpu_is_mx27()) {
+		imxdma->base = MX27_IO_ADDRESS(MX27_DMA_BASE_ADDR);
+	} else {
+		kfree(imxdma);
+		return 0;
+	}
+
+	imxdma->dma_clk = clk_get(NULL, "dma");
+	if (IS_ERR(imxdma->dma_clk))
+		return PTR_ERR(imxdma->dma_clk);
+	clk_enable(imxdma->dma_clk);
+
+	/* reset DMA module */
+	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
+
+	if (cpu_is_mx1()) {
+		ret = request_irq(MX1_DMA_INT, dma_irq_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
+			kfree(imxdma);
+			return ret;
+		}
+
+		ret = request_irq(MX1_DMA_ERR, imxdma_err_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
+			free_irq(MX1_DMA_INT, NULL);
+			kfree(imxdma);
+			return ret;
+		}
+	}
+
+	/* enable DMA module */
+	imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR);
+
+	/* clear all interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR);
+
+	/* disable interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR);
+
 	INIT_LIST_HEAD(&imxdma->dma_device.channels);
 
 	dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+	/* Initialize 2D global parameters */
+	for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+		imxdma->slots_2d[i].count = 0;
+
+	spin_lock_init(&imxdma->lock);
 
 	/* Initialize channel parameters */
-	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
 		struct imxdma_channel *imxdmac = &imxdma->channel[i];
 
-		imxdmac->imxdma_channel = imx_dma_request_by_prio("dmaengine",
-				DMA_PRIO_MEDIUM);
-		if ((int)imxdmac->channel < 0) {
-			ret = -ENODEV;
-			goto err_init;
+		if (cpu_is_mx21() || cpu_is_mx27()) {
+			ret = request_irq(MX2x_INT_DMACH0 + i,
+					dma_irq_handler, 0, "DMA", imxdma);
+			if (ret) {
+				dev_warn(imxdma->dev, "Can't register IRQ %d "
+					 "for DMA channel %d\n",
+					 MX2x_INT_DMACH0 + i, i);
+				goto err_init;
+			}
+			init_timer(&imxdmac->watchdog);
+			imxdmac->watchdog.function = &imxdma_watchdog;
+			imxdmac->watchdog.data = (unsigned long)imxdmac;
 		}
 
-		imx_dma_setup_handlers(imxdmac->imxdma_channel,
-		       imxdma_irq_handler, imxdma_err_handler, imxdmac);
-
 		imxdmac->imxdma = imxdma;
-		spin_lock_init(&imxdmac->lock);
 
+		INIT_LIST_HEAD(&imxdmac->ld_queue);
+		INIT_LIST_HEAD(&imxdmac->ld_free);
+		INIT_LIST_HEAD(&imxdmac->ld_active);
+
+		tasklet_init(&imxdmac->dma_tasklet, imxdma_tasklet,
+			     (unsigned long)imxdmac);
 		imxdmac->chan.device = &imxdma->dma_device;
+		dma_cookie_init(&imxdmac->chan);
 		imxdmac->channel = i;
 
 		/* Add the channel to the DMAC list */
-		list_add_tail(&imxdmac->chan.device_node, &imxdma->dma_device.channels);
+		list_add_tail(&imxdmac->chan.device_node,
+			      &imxdma->dma_device.channels);
 	}
 
 	imxdma->dev = &pdev->dev;
@@ -382,11 +1063,14 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	imxdma->dma_device.device_tx_status = imxdma_tx_status;
 	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
 	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+	imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
 	imxdma->dma_device.device_control = imxdma_control;
 	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
 
 	platform_set_drvdata(pdev, imxdma);
 
+	imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */
 	imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms;
 	dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
 
@@ -399,9 +1083,13 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	return 0;
 
 err_init:
-	while (--i >= 0) {
-		struct imxdma_channel *imxdmac = &imxdma->channel[i];
-		imx_dma_free(imxdmac->imxdma_channel);
+
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		while (--i >= 0)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
 	}
 
 	kfree(imxdma);
@@ -415,10 +1103,12 @@ static int __exit imxdma_remove(struct platform_device *pdev)
 
         dma_async_device_unregister(&imxdma->dma_device);
 
-	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
-		struct imxdma_channel *imxdmac = &imxdma->channel[i];
-
-		 imx_dma_free(imxdmac->imxdma_channel);
+	if (cpu_is_mx21() || cpu_is_mx27()) {
+		for (i = 0; i < IMX_DMA_CHANNELS; i++)
+			free_irq(MX2x_INT_DMACH0 + i, NULL);
+	} else if cpu_is_mx1() {
+		free_irq(MX1_DMA_INT, NULL);
+		free_irq(MX1_DMA_ERR, NULL);
 	}
 
         kfree(imxdma);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 63540d3e2153..d3e38e28bb6b 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
@@ -41,6 +42,8 @@
 #include <mach/dma.h>
 #include <mach/hardware.h>
 
+#include "dmaengine.h"
+
 /* SDMA registers */
 #define SDMA_H_C0PTR		0x000
 #define SDMA_H_INTR		0x004
@@ -259,19 +262,18 @@ struct sdma_channel {
 	unsigned int			pc_from_device, pc_to_device;
 	unsigned long			flags;
 	dma_addr_t			per_address;
-	u32				event_mask0, event_mask1;
-	u32				watermark_level;
+	unsigned long			event_mask[2];
+	unsigned long			watermark_level;
 	u32				shp_addr, per_addr;
 	struct dma_chan			chan;
 	spinlock_t			lock;
 	struct dma_async_tx_descriptor	desc;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	unsigned int			chn_count;
 	unsigned int			chn_real_count;
 };
 
-#define IMX_DMA_SG_LOOP		(1 << 0)
+#define IMX_DMA_SG_LOOP		BIT(0)
 
 #define MAX_DMA_CHANNELS 32
 #define MXC_SDMA_DEFAULT_PRIORITY 1
@@ -345,9 +347,9 @@ static const struct of_device_id sdma_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, sdma_dt_ids);
 
-#define SDMA_H_CONFIG_DSPDMA	(1 << 12) /* indicates if the DSPDMA is used */
-#define SDMA_H_CONFIG_RTD_PINS	(1 << 11) /* indicates if Real-Time Debug pins are enabled */
-#define SDMA_H_CONFIG_ACR	(1 << 4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_DSPDMA	BIT(12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	BIT(11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	BIT(4)  /* indicates if AHB freq /core freq = 2 or 1 */
 #define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
 
 static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
@@ -362,37 +364,42 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 {
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
-	u32 evt, mcu, dsp;
+	unsigned long evt, mcu, dsp;
 
 	if (event_override && mcu_override && dsp_override)
 		return -EINVAL;
 
-	evt = __raw_readl(sdma->regs + SDMA_H_EVTOVR);
-	mcu = __raw_readl(sdma->regs + SDMA_H_HOSTOVR);
-	dsp = __raw_readl(sdma->regs + SDMA_H_DSPOVR);
+	evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR);
+	mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR);
 
 	if (dsp_override)
-		dsp &= ~(1 << channel);
+		__clear_bit(channel, &dsp);
 	else
-		dsp |= (1 << channel);
+		__set_bit(channel, &dsp);
 
 	if (event_override)
-		evt &= ~(1 << channel);
+		__clear_bit(channel, &evt);
 	else
-		evt |= (1 << channel);
+		__set_bit(channel, &evt);
 
 	if (mcu_override)
-		mcu &= ~(1 << channel);
+		__clear_bit(channel, &mcu);
 	else
-		mcu |= (1 << channel);
+		__set_bit(channel, &mcu);
 
-	__raw_writel(evt, sdma->regs + SDMA_H_EVTOVR);
-	__raw_writel(mcu, sdma->regs + SDMA_H_HOSTOVR);
-	__raw_writel(dsp, sdma->regs + SDMA_H_DSPOVR);
+	writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR);
+	writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR);
 
 	return 0;
 }
 
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	writel(BIT(channel), sdma->regs + SDMA_H_START);
+}
+
 /*
  * sdma_run_channel - run a channel and wait till it's done
  */
@@ -404,7 +411,7 @@ static int sdma_run_channel(struct sdma_channel *sdmac)
 
 	init_completion(&sdmac->done);
 
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
+	sdma_enable_channel(sdma, channel);
 
 	ret = wait_for_completion_timeout(&sdmac->done, HZ);
 
@@ -451,12 +458,12 @@ static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
 {
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
-	u32 val;
+	unsigned long val;
 	u32 chnenbl = chnenbl_ofs(sdma, event);
 
-	val = __raw_readl(sdma->regs + chnenbl);
-	val |= (1 << channel);
-	__raw_writel(val, sdma->regs + chnenbl);
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__set_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
 static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
@@ -464,11 +471,11 @@ static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
 	u32 chnenbl = chnenbl_ofs(sdma, event);
-	u32 val;
+	unsigned long val;
 
-	val = __raw_readl(sdma->regs + chnenbl);
-	val &= ~(1 << channel);
-	__raw_writel(val, sdma->regs + chnenbl);
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__clear_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
 static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
@@ -522,7 +529,7 @@ static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
 	else
 		sdmac->status = DMA_SUCCESS;
 
-	sdmac->last_completed = sdmac->desc.cookie;
+	dma_cookie_complete(&sdmac->desc);
 	if (sdmac->desc.callback)
 		sdmac->desc.callback(sdmac->desc.callback_param);
 }
@@ -544,10 +551,10 @@ static void mxc_sdma_handle_channel(struct sdma_channel *sdmac)
 static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 {
 	struct sdma_engine *sdma = dev_id;
-	u32 stat;
+	unsigned long stat;
 
-	stat = __raw_readl(sdma->regs + SDMA_H_INTR);
-	__raw_writel(stat, sdma->regs + SDMA_H_INTR);
+	stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
+	writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
 
 	while (stat) {
 		int channel = fls(stat) - 1;
@@ -555,7 +562,7 @@ static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 
 		mxc_sdma_handle_channel(sdmac);
 
-		stat &= ~(1 << channel);
+		__clear_bit(channel, &stat);
 	}
 
 	return IRQ_HANDLED;
@@ -663,11 +670,11 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 		return load_address;
 
 	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
-	dev_dbg(sdma->dev, "wml = 0x%08x\n", sdmac->watermark_level);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level);
 	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
 	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
-	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", sdmac->event_mask0);
-	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", sdmac->event_mask1);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]);
 
 	mutex_lock(&sdma->channel_0_lock);
 
@@ -677,8 +684,8 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	/* Send by context the event mask,base address for peripheral
 	 * and watermark level
 	 */
-	context->gReg[0] = sdmac->event_mask1;
-	context->gReg[1] = sdmac->event_mask0;
+	context->gReg[0] = sdmac->event_mask[1];
+	context->gReg[1] = sdmac->event_mask[0];
 	context->gReg[2] = sdmac->per_addr;
 	context->gReg[6] = sdmac->shp_addr;
 	context->gReg[7] = sdmac->watermark_level;
@@ -701,7 +708,7 @@ static void sdma_disable_channel(struct sdma_channel *sdmac)
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
 
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_STATSTOP);
+	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 }
 
@@ -711,13 +718,13 @@ static int sdma_config_channel(struct sdma_channel *sdmac)
 
 	sdma_disable_channel(sdmac);
 
-	sdmac->event_mask0 = 0;
-	sdmac->event_mask1 = 0;
+	sdmac->event_mask[0] = 0;
+	sdmac->event_mask[1] = 0;
 	sdmac->shp_addr = 0;
 	sdmac->per_addr = 0;
 
 	if (sdmac->event_id0) {
-		if (sdmac->event_id0 > 32)
+		if (sdmac->event_id0 >= sdmac->sdma->num_events)
 			return -EINVAL;
 		sdma_event_enable(sdmac, sdmac->event_id0);
 	}
@@ -740,15 +747,14 @@ static int sdma_config_channel(struct sdma_channel *sdmac)
 			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
 		/* Handle multiple event channels differently */
 		if (sdmac->event_id1) {
-			sdmac->event_mask1 = 1 << (sdmac->event_id1 % 32);
+			sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32);
 			if (sdmac->event_id1 > 31)
-				sdmac->watermark_level |= 1 << 31;
-			sdmac->event_mask0 = 1 << (sdmac->event_id0 % 32);
+				__set_bit(31, &sdmac->watermark_level);
+			sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32);
 			if (sdmac->event_id0 > 31)
-				sdmac->watermark_level |= 1 << 30;
+				__set_bit(30, &sdmac->watermark_level);
 		} else {
-			sdmac->event_mask0 = 1 << sdmac->event_id0;
-			sdmac->event_mask1 = 1 << (sdmac->event_id0 - 32);
+			__set_bit(sdmac->event_id0, sdmac->event_mask);
 		}
 		/* Watermark Level */
 		sdmac->watermark_level |= sdmac->watermark_level;
@@ -774,7 +780,7 @@ static int sdma_set_channel_priority(struct sdma_channel *sdmac,
 		return -EINVAL;
 	}
 
-	__raw_writel(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+	writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
 
 	return 0;
 }
@@ -796,8 +802,6 @@ static int sdma_request_channel(struct sdma_channel *sdmac)
 	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
 	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
 
-	clk_enable(sdma->clk);
-
 	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
 
 	init_completion(&sdmac->done);
@@ -810,24 +814,6 @@ out:
 	return ret;
 }
 
-static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
-{
-	__raw_writel(1 << channel, sdma->regs + SDMA_H_START);
-}
-
-static dma_cookie_t sdma_assign_cookie(struct sdma_channel *sdmac)
-{
-	dma_cookie_t cookie = sdmac->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	sdmac->chan.cookie = cookie;
-	sdmac->desc.cookie = cookie;
-
-	return cookie;
-}
-
 static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct sdma_channel, chan);
@@ -837,14 +823,11 @@ static dma_cookie_t sdma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	unsigned long flags;
 	struct sdma_channel *sdmac = to_sdma_chan(tx->chan);
-	struct sdma_engine *sdma = sdmac->sdma;
 	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&sdmac->lock, flags);
 
-	cookie = sdma_assign_cookie(sdmac);
-
-	sdma_enable_channel(sdma, sdmac->channel);
+	cookie = dma_cookie_assign(tx);
 
 	spin_unlock_irqrestore(&sdmac->lock, flags);
 
@@ -875,11 +858,14 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 
 	sdmac->peripheral_type = data->peripheral_type;
 	sdmac->event_id0 = data->dma_request;
-	ret = sdma_set_channel_priority(sdmac, prio);
+
+	clk_enable(sdmac->sdma->clk);
+
+	ret = sdma_request_channel(sdmac);
 	if (ret)
 		return ret;
 
-	ret = sdma_request_channel(sdmac);
+	ret = sdma_set_channel_priority(sdmac, prio);
 	if (ret)
 		return ret;
 
@@ -916,7 +902,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1014,7 +1000,8 @@ err_out:
 
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1128,7 +1115,7 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 
 	last_used = chan->cookie;
 
-	dma_set_tx_state(txstate, sdmac->last_completed, last_used,
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used,
 			sdmac->chn_count - sdmac->chn_real_count);
 
 	return sdmac->status;
@@ -1136,9 +1123,11 @@ static enum dma_status sdma_tx_status(struct dma_chan *chan,
 
 static void sdma_issue_pending(struct dma_chan *chan)
 {
-	/*
-	 * Nothing to do. We only have a single descriptor
-	 */
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	if (sdmac->status == DMA_IN_PROGRESS)
+		sdma_enable_channel(sdma, sdmac->channel);
 }
 
 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1	34
@@ -1230,7 +1219,7 @@ static int __init sdma_init(struct sdma_engine *sdma)
 	clk_enable(sdma->clk);
 
 	/* Be sure SDMA has not started yet */
-	__raw_writel(0, sdma->regs + SDMA_H_C0PTR);
+	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
 
 	sdma->channel_control = dma_alloc_coherent(NULL,
 			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
@@ -1253,11 +1242,11 @@ static int __init sdma_init(struct sdma_engine *sdma)
 
 	/* disable all channels */
 	for (i = 0; i < sdma->num_events; i++)
-		__raw_writel(0, sdma->regs + chnenbl_ofs(sdma, i));
+		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
 
 	/* All channels have priority 0 */
 	for (i = 0; i < MAX_DMA_CHANNELS; i++)
-		__raw_writel(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+		writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
 
 	ret = sdma_request_channel(&sdma->channel[0]);
 	if (ret)
@@ -1266,16 +1255,16 @@ static int __init sdma_init(struct sdma_engine *sdma)
 	sdma_config_ownership(&sdma->channel[0], false, true, false);
 
 	/* Set Command Channel (Channel Zero) */
-	__raw_writel(0x4050, sdma->regs + SDMA_CHN0ADDR);
+	writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
 
 	/* Set bits of CONFIG register but with static context switching */
 	/* FIXME: Check whether to set ACR bit depending on clock ratios */
-	__raw_writel(0, sdma->regs + SDMA_H_CONFIG);
+	writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
 
-	__raw_writel(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+	writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
 
 	/* Set bits of CONFIG register with given context switching mode */
-	__raw_writel(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+	writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
 
 	/* Initializes channel's priorities */
 	sdma_set_channel_priority(&sdma->channel[0], 7);
@@ -1367,6 +1356,7 @@ static int __init sdma_probe(struct platform_device *pdev)
 		spin_lock_init(&sdmac->lock);
 
 		sdmac->chan.device = &sdma->dma_device;
+		dma_cookie_init(&sdmac->chan);
 		sdmac->channel = i;
 
 		/*
@@ -1387,7 +1377,9 @@ static int __init sdma_probe(struct platform_device *pdev)
 		sdma_add_scripts(sdma, pdata->script_addrs);
 
 	if (pdata) {
-		sdma_get_firmware(sdma, pdata->fw_name);
+		ret = sdma_get_firmware(sdma, pdata->fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
 	} else {
 		/*
 		 * Because that device tree does not encode ROM script address,
@@ -1396,15 +1388,12 @@ static int __init sdma_probe(struct platform_device *pdev)
 		 */
 		ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
 					      &fw_name);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get firmware name\n");
-			goto err_init;
-		}
-
-		ret = sdma_get_firmware(sdma, fw_name);
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get firmware\n");
-			goto err_init;
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware name\n");
+		else {
+			ret = sdma_get_firmware(sdma, fw_name);
+			if (ret)
+				dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
 		}
 	}
 
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 74f70aadf9e4..c900ca7aaec4 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -29,6 +29,8 @@
 #include <linux/intel_mid_dma.h>
 #include <linux/module.h>
 
+#include "dmaengine.h"
+
 #define MAX_CHAN	4 /*max ch across controllers*/
 #include "intel_mid_dma_regs.h"
 
@@ -288,7 +290,7 @@ static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
 	struct intel_mid_dma_lli	*llitem;
 	void *param_txd = NULL;
 
-	midc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	callback_txd = txd->callback;
 	param_txd = txd->callback_param;
 
@@ -434,14 +436,7 @@ static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	dma_cookie_t		cookie;
 
 	spin_lock_bh(&midc->lock);
-	cookie = midc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	midc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&midc->active_list))
 		list_add_tail(&desc->desc_node, &midc->active_list);
@@ -482,31 +477,18 @@ static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
 						dma_cookie_t cookie,
 						struct dma_tx_state *txstate)
 {
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int				ret;
+	struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
+	enum dma_status ret;
 
-	last_complete = midc->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		spin_lock_bh(&midc->lock);
 		midc_scan_descriptors(to_middma_device(chan->device), midc);
 		spin_unlock_bh(&midc->lock);
 
-		last_complete = midc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
-	if (txstate) {
-		txstate->last = last_complete;
-		txstate->used = last_used;
-		txstate->residue = 0;
-	}
 	return ret;
 }
 
@@ -732,13 +714,14 @@ err_desc_get:
  * @sg_len: length of sg txn
  * @direction: DMA transfer dirtn
  * @flags: DMA flags
+ * @context: transfer context (ignored)
  *
  * Prepares LLI based periphral transfer
  */
 static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 			struct dma_chan *chan, struct scatterlist *sgl,
 			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags)
+			unsigned long flags, void *context)
 {
 	struct intel_mid_dma_chan *midc = NULL;
 	struct intel_mid_dma_slave *mids = NULL;
@@ -832,7 +815,6 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 		/*trying to free ch in use!!!!!*/
 		pr_err("ERR_MDMA: trying to free ch in use\n");
 	}
-	pm_runtime_put(&mid->pdev->dev);
 	spin_lock_bh(&midc->lock);
 	midc->descs_allocated = 0;
 	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
@@ -853,6 +835,7 @@ static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
 	/* Disable CH interrupts */
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
 	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
+	pm_runtime_put(&mid->pdev->dev);
 }
 
 /**
@@ -886,7 +869,7 @@ static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
 		pm_runtime_put(&mid->pdev->dev);
 		return -EIO;
 	}
-	midc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	spin_lock_bh(&midc->lock);
 	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
@@ -1056,7 +1039,8 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 	}
 	err_status &= mid->intr_mask;
 	if (err_status) {
-		iowrite32(MASK_INTR_REG(err_status), mid->dma_base + MASK_ERR);
+		iowrite32((err_status << INT_MASK_WE),
+			  mid->dma_base + MASK_ERR);
 		call_tasklet = 1;
 	}
 	if (call_tasklet)
@@ -1118,7 +1102,7 @@ static int mid_setup_dma(struct pci_dev *pdev)
 		struct intel_mid_dma_chan *midch = &dma->ch[i];
 
 		midch->chan.device = &dma->common;
-		midch->chan.cookie =  1;
+		dma_cookie_init(&midch->chan);
 		midch->ch_id = dma->chan_base + i;
 		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
 
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index c83d35b97bd8..1bfa9268feaf 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -165,7 +165,6 @@ union intel_mid_dma_cfg_hi {
  * @dma_base: MMIO register space DMA engine base pointer
  * @ch_id: DMA channel id
  * @lock: channel spinlock
- * @completed: DMA cookie
  * @active_list: current active descriptors
  * @queue: current queued up descriptors
  * @free_list: current free descriptors
@@ -183,7 +182,6 @@ struct intel_mid_dma_chan {
 	void __iomem		*dma_base;
 	int			ch_id;
 	spinlock_t		lock;
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index a4d6cb0c0343..31493d80e0e9 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -40,6 +40,8 @@
 #include "registers.h"
 #include "hw.h"
 
+#include "../dmaengine.h"
+
 int ioat_pending_level = 4;
 module_param(ioat_pending_level, int, 0644);
 MODULE_PARM_DESC(ioat_pending_level,
@@ -107,6 +109,7 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *c
 	chan->reg_base = device->reg_base + (0x80 * (idx + 1));
 	spin_lock_init(&chan->cleanup_lock);
 	chan->common.device = dma;
+	dma_cookie_init(&chan->common);
 	list_add_tail(&chan->common.device_node, &dma->channels);
 	device->idx[idx] = chan;
 	init_timer(&chan->timer);
@@ -235,12 +238,7 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	spin_lock_bh(&ioat->desc_lock);
 	/* cookie incr and addition to used_list must be atomic */
-	cookie = c->cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	c->cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
 
 	/* write address into NextDescriptor field of last desc in chain */
@@ -603,8 +601,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
 		 */
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
-			chan->completed_cookie = tx->cookie;
-			tx->cookie = 0;
+			dma_cookie_complete(tx);
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
 			ioat->active -= desc->hw->tx_cnt;
 			if (tx->callback) {
@@ -733,13 +730,15 @@ ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 {
 	struct ioat_chan_common *chan = to_chan_common(c);
 	struct ioatdma_device *device = chan->device;
+	enum dma_status ret;
 
-	if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
-		return DMA_SUCCESS;
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
 	device->cleanup_fn((unsigned long) c);
 
-	return ioat_tx_status(c, cookie, txstate);
+	return dma_cookie_status(c, cookie, txstate);
 }
 
 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 5216c8a92a21..c7888bccd974 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -90,7 +90,6 @@ struct ioat_chan_common {
 	void __iomem *reg_base;
 	unsigned long last_completion;
 	spinlock_t cleanup_lock;
-	dma_cookie_t completed_cookie;
 	unsigned long state;
 	#define IOAT_COMPLETION_PENDING 0
 	#define IOAT_COMPLETION_ACK 1
@@ -143,28 +142,6 @@ static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
 	return container_of(chan, struct ioat_dma_chan, base);
 }
 
-/**
- * ioat_tx_status - poll the status of an ioat transaction
- * @c: channel handle
- * @cookie: transaction identifier
- * @txstate: if set, updated with the transaction state
- */
-static inline enum dma_status
-ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
-		 struct dma_tx_state *txstate)
-{
-	struct ioat_chan_common *chan = to_chan_common(c);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-
-	last_used = c->cookie;
-	last_complete = chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
 /* wrapper around hardware descriptor format + additional software fields */
 
 /**
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 5d65f8377971..e8e110ff3d96 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -41,6 +41,8 @@
 #include "registers.h"
 #include "hw.h"
 
+#include "../dmaengine.h"
+
 int ioat_ring_alloc_order = 8;
 module_param(ioat_ring_alloc_order, int, 0644);
 MODULE_PARM_DESC(ioat_ring_alloc_order,
@@ -147,8 +149,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
 			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
-			chan->completed_cookie = tx->cookie;
-			tx->cookie = 0;
+			dma_cookie_complete(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -398,13 +399,9 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
 	struct dma_chan *c = tx->chan;
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 	struct ioat_chan_common *chan = &ioat->base;
-	dma_cookie_t cookie = c->cookie;
+	dma_cookie_t cookie;
 
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	tx->cookie = cookie;
-	c->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
 
 	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index f519c93a61e7..2c4476c0e405 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -61,6 +61,7 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
+#include "../dmaengine.h"
 #include "registers.h"
 #include "hw.h"
 #include "dma.h"
@@ -277,9 +278,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
 		dump_desc_dbg(ioat, desc);
 		tx = &desc->txd;
 		if (tx->cookie) {
-			chan->completed_cookie = tx->cookie;
+			dma_cookie_complete(tx);
 			ioat3_dma_unmap(ioat, desc, idx + i);
-			tx->cookie = 0;
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -411,13 +411,15 @@ ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 		struct dma_tx_state *txstate)
 {
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+	enum dma_status ret;
 
-	if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
-		return DMA_SUCCESS;
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
 	ioat3_cleanup(ioat);
 
-	return ioat_tx_status(c, cookie, txstate);
+	return dma_cookie_status(c, cookie, txstate);
 }
 
 static struct dma_async_tx_descriptor *
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index faf88b7e1e71..da6c4c2c066a 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -36,6 +36,8 @@
 
 #include <mach/adma.h>
 
+#include "dmaengine.h"
+
 #define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
 #define to_iop_adma_device(dev) \
 	container_of(dev, struct iop_adma_device, common)
@@ -317,7 +319,7 @@ static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
 	}
 
 	if (cookie > 0) {
-		iop_chan->completed_cookie = cookie;
+		iop_chan->common.completed_cookie = cookie;
 		pr_debug("\tcompleted cookie %d\n", cookie);
 	}
 }
@@ -438,18 +440,6 @@ retry:
 	return NULL;
 }
 
-static dma_cookie_t
-iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
-	struct iop_adma_desc_slot *desc)
-{
-	dma_cookie_t cookie = iop_chan->common.cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	iop_chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
 static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
 {
 	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
@@ -477,7 +467,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 	slots_per_op = grp_start->slots_per_op;
 
 	spin_lock_bh(&iop_chan->lock);
-	cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	old_chain_tail = list_entry(iop_chan->chain.prev,
 		struct iop_adma_desc_slot, chain_node);
@@ -904,24 +894,15 @@ static enum dma_status iop_adma_status(struct dma_chan *chan,
 					struct dma_tx_state *txstate)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	enum dma_status ret;
-
-	last_used = chan->cookie;
-	last_complete = iop_chan->completed_cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	int ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS)
 		return ret;
 
 	iop_adma_slot_cleanup(iop_chan);
 
-	last_used = chan->cookie;
-	last_complete = iop_chan->completed_cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static irqreturn_t iop_adma_eot_handler(int irq, void *data)
@@ -1565,6 +1546,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&iop_chan->chain);
 	INIT_LIST_HEAD(&iop_chan->all_slots);
 	iop_chan->common.device = dma_dev;
+	dma_cookie_init(&iop_chan->common);
 	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
@@ -1642,16 +1624,12 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
 		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
 		iop_desc_set_memcpy_src_addr(grp_start, 0);
 
-		cookie = iop_chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		iop_chan->completed_cookie = cookie - 1;
-		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		iop_chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(iop_chan_is_busy(iop_chan));
@@ -1699,16 +1677,12 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
 		iop_desc_set_xor_src_addr(grp_start, 0, 0);
 		iop_desc_set_xor_src_addr(grp_start, 1, 0);
 
-		cookie = iop_chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		iop_chan->completed_cookie = cookie - 1;
-		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		iop_chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(iop_chan_is_busy(iop_chan));
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 6212b16e8cf2..62e3f8ec2461 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -25,6 +25,7 @@
 
 #include <mach/ipu.h>
 
+#include "../dmaengine.h"
 #include "ipu_intern.h"
 
 #define FS_VF_IN_VALID	0x00000002
@@ -866,14 +867,7 @@ static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
 
-	cookie = ichan->dma_chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	/* from dmaengine.h: "last cookie value returned to client" */
-	ichan->dma_chan.cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* ipu->lock can be taken under ichan->lock, but not v.v. */
 	spin_lock_irqsave(&ichan->lock, flags);
@@ -1295,7 +1289,7 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
 	/* Flip the active buffer - even if update above failed */
 	ichan->active_buffer = !ichan->active_buffer;
 	if (done)
-		ichan->completed = desc->txd.cookie;
+		dma_cookie_complete(&desc->txd);
 
 	callback = desc->txd.callback;
 	callback_param = desc->txd.callback_param;
@@ -1341,7 +1335,8 @@ static void ipu_gc_tasklet(unsigned long arg)
 /* Allocate and initialise a transfer descriptor. */
 static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
 		struct scatterlist *sgl, unsigned int sg_len,
-		enum dma_transfer_direction direction, unsigned long tx_flags)
+		enum dma_transfer_direction direction, unsigned long tx_flags,
+		void *context)
 {
 	struct idmac_channel *ichan = to_idmac_chan(chan);
 	struct idmac_tx_desc *desc = NULL;
@@ -1510,8 +1505,7 @@ static int idmac_alloc_chan_resources(struct dma_chan *chan)
 	BUG_ON(chan->client_count > 1);
 	WARN_ON(ichan->status != IPU_CHANNEL_FREE);
 
-	chan->cookie		= 1;
-	ichan->completed	= -ENXIO;
+	dma_cookie_init(chan);
 
 	ret = ipu_irq_map(chan->chan_id);
 	if (ret < 0)
@@ -1600,9 +1594,7 @@ static void idmac_free_chan_resources(struct dma_chan *chan)
 static enum dma_status idmac_tx_status(struct dma_chan *chan,
 		       dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	struct idmac_channel *ichan = to_idmac_chan(chan);
-
-	dma_set_tx_state(txstate, ichan->completed, chan->cookie, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
 	if (cookie != chan->cookie)
 		return DMA_ERROR;
 	return DMA_SUCCESS;
@@ -1638,11 +1630,10 @@ static int __init ipu_idmac_init(struct ipu *ipu)
 
 		ichan->status		= IPU_CHANNEL_FREE;
 		ichan->sec_chan_en	= false;
-		ichan->completed	= -ENXIO;
 		snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
 
 		dma_chan->device	= &idmac->dma;
-		dma_chan->cookie	= 1;
+		dma_cookie_init(dma_chan);
 		dma_chan->chan_id	= i;
 		list_add_tail(&dma_chan->device_node, &dma->channels);
 	}
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 4d6d4cf66949..2ab0a3d0eed5 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -44,6 +44,8 @@
 
 #include <linux/random.h>
 
+#include "dmaengine.h"
+
 /* Number of DMA Transfer descriptors allocated per channel */
 #define MPC_DMA_DESCRIPTORS	64
 
@@ -188,7 +190,6 @@ struct mpc_dma_chan {
 	struct list_head		completed;
 	struct mpc_dma_tcd		*tcd;
 	dma_addr_t			tcd_paddr;
-	dma_cookie_t			completed_cookie;
 
 	/* Lock for this structure */
 	spinlock_t			lock;
@@ -365,7 +366,7 @@ static void mpc_dma_process_completed(struct mpc_dma *mdma)
 		/* Free descriptors */
 		spin_lock_irqsave(&mchan->lock, flags);
 		list_splice_tail_init(&list, &mchan->free);
-		mchan->completed_cookie = last_cookie;
+		mchan->chan.completed_cookie = last_cookie;
 		spin_unlock_irqrestore(&mchan->lock, flags);
 	}
 }
@@ -438,13 +439,7 @@ static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
 		mpc_dma_execute(mchan);
 
 	/* Update cookie */
-	cookie = mchan->chan.cookie + 1;
-	if (cookie <= 0)
-		cookie = 1;
-
-	mchan->chan.cookie = cookie;
-	mdesc->desc.cookie = cookie;
-
+	cookie = dma_cookie_assign(txd);
 	spin_unlock_irqrestore(&mchan->lock, flags);
 
 	return cookie;
@@ -562,17 +557,14 @@ mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	       struct dma_tx_state *txstate)
 {
 	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	enum dma_status ret;
 	unsigned long flags;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 
 	spin_lock_irqsave(&mchan->lock, flags);
-	last_used = mchan->chan.cookie;
-	last_complete = mchan->completed_cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irqrestore(&mchan->lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 /* Prepare descriptor for memory to memory copy */
@@ -741,8 +733,7 @@ static int __devinit mpc_dma_probe(struct platform_device *op)
 		mchan = &mdma->channels[i];
 
 		mchan->chan.device = dma;
-		mchan->chan.cookie = 1;
-		mchan->completed_cookie = mchan->chan.cookie;
+		dma_cookie_init(&mchan->chan);
 
 		INIT_LIST_HEAD(&mchan->free);
 		INIT_LIST_HEAD(&mchan->prepared);
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index e779b434af45..fa5d55fea46c 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -26,6 +26,8 @@
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <plat/mv_xor.h>
+
+#include "dmaengine.h"
 #include "mv_xor.h"
 
 static void mv_xor_issue_pending(struct dma_chan *chan);
@@ -435,7 +437,7 @@ static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 	}
 
 	if (cookie > 0)
-		mv_chan->completed_cookie = cookie;
+		mv_chan->common.completed_cookie = cookie;
 }
 
 static void
@@ -534,18 +536,6 @@ retry:
 	return NULL;
 }
 
-static dma_cookie_t
-mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
-		      struct mv_xor_desc_slot *desc)
-{
-	dma_cookie_t cookie = mv_chan->common.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-	mv_chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
 /************************ DMA engine API functions ****************************/
 static dma_cookie_t
 mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -563,7 +553,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	grp_start = sw_desc->group_head;
 
 	spin_lock_bh(&mv_chan->lock);
-	cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&mv_chan->chain))
 		list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
@@ -820,27 +810,16 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
 					  struct dma_tx_state *txstate)
 {
 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
 
-	last_used = chan->cookie;
-	last_complete = mv_chan->completed_cookie;
-	mv_chan->is_complete_cookie = cookie;
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS) {
 		mv_xor_clean_completed_slots(mv_chan);
 		return ret;
 	}
 	mv_xor_slot_cleanup(mv_chan);
 
-	last_used = chan->cookie;
-	last_complete = mv_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void mv_dump_xor_regs(struct mv_xor_chan *chan)
@@ -1214,6 +1193,7 @@ static int __devinit mv_xor_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&mv_chan->completed_slots);
 	INIT_LIST_HEAD(&mv_chan->all_slots);
 	mv_chan->common.device = dma_dev;
+	dma_cookie_init(&mv_chan->common);
 
 	list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
 
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 977b592e976b..654876b7ba1d 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -78,7 +78,6 @@ struct mv_xor_device {
 /**
  * struct mv_xor_chan - internal representation of a XOR channel
  * @pending: allows batching of hardware operations
- * @completed_cookie: identifier for the most recently completed operation
  * @lock: serializes enqueue/dequeue operations to the descriptors pool
  * @mmr_base: memory mapped register base
  * @idx: the index of the xor channel
@@ -93,7 +92,6 @@ struct mv_xor_device {
  */
 struct mv_xor_chan {
 	int			pending;
-	dma_cookie_t		completed_cookie;
 	spinlock_t		lock; /* protects the descriptor slot pool */
 	void __iomem		*mmr_base;
 	unsigned int		idx;
@@ -109,7 +107,6 @@ struct mv_xor_chan {
 #ifdef USE_TIMER
 	unsigned long		cleanup_time;
 	u32			current_on_last_cleanup;
-	dma_cookie_t		is_complete_cookie;
 #endif
 };
 
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index b06cd4ca626f..65334c49b71e 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -28,6 +28,8 @@
 #include <mach/dma.h>
 #include <mach/common.h>
 
+#include "dmaengine.h"
+
 /*
  * NOTE: The term "PIO" throughout the mxs-dma implementation means
  * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
@@ -111,7 +113,6 @@ struct mxs_dma_chan {
 	struct mxs_dma_ccw		*ccw;
 	dma_addr_t			ccw_phys;
 	int				desc_count;
-	dma_cookie_t			last_completed;
 	enum dma_status			status;
 	unsigned int			flags;
 #define MXS_DMA_SG_LOOP			(1 << 0)
@@ -193,19 +194,6 @@ static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
 	mxs_chan->status = DMA_IN_PROGRESS;
 }
 
-static dma_cookie_t mxs_dma_assign_cookie(struct mxs_dma_chan *mxs_chan)
-{
-	dma_cookie_t cookie = mxs_chan->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	mxs_chan->chan.cookie = cookie;
-	mxs_chan->desc.cookie = cookie;
-
-	return cookie;
-}
-
 static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct mxs_dma_chan, chan);
@@ -217,7 +205,7 @@ static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	mxs_dma_enable_chan(mxs_chan);
 
-	return mxs_dma_assign_cookie(mxs_chan);
+	return dma_cookie_assign(tx);
 }
 
 static void mxs_dma_tasklet(unsigned long data)
@@ -274,7 +262,7 @@ static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
 		stat1 &= ~(1 << channel);
 
 		if (mxs_chan->status == DMA_SUCCESS)
-			mxs_chan->last_completed = mxs_chan->desc.cookie;
+			dma_cookie_complete(&mxs_chan->desc);
 
 		/* schedule tasklet on this channel */
 		tasklet_schedule(&mxs_chan->tasklet);
@@ -352,7 +340,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long append)
+		unsigned long append, void *context)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -447,7 +435,8 @@ err_out:
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -538,7 +527,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
 	dma_cookie_t last_used;
 
 	last_used = chan->cookie;
-	dma_set_tx_state(txstate, mxs_chan->last_completed, last_used, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0);
 
 	return mxs_chan->status;
 }
@@ -630,6 +619,7 @@ static int __init mxs_dma_probe(struct platform_device *pdev)
 
 		mxs_chan->mxs_dma = mxs_dma;
 		mxs_chan->chan.device = &mxs_dma->dma_device;
+		dma_cookie_init(&mxs_chan->chan);
 
 		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
 			     (unsigned long) mxs_chan);
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 823f58179f9d..65c0495a6d40 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/pch_dma.h>
 
+#include "dmaengine.h"
+
 #define DRV_NAME "pch-dma"
 
 #define DMA_CTL0_DISABLE		0x0
@@ -105,7 +107,6 @@ struct pch_dma_chan {
 
 	spinlock_t		lock;
 
-	dma_cookie_t		completed_cookie;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;
@@ -416,20 +417,6 @@ static void pdc_advance_work(struct pch_dma_chan *pd_chan)
 	}
 }
 
-static dma_cookie_t pdc_assign_cookie(struct pch_dma_chan *pd_chan,
-				      struct pch_dma_desc *desc)
-{
-	dma_cookie_t cookie = pd_chan->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	pd_chan->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 {
 	struct pch_dma_desc *desc = to_pd_desc(txd);
@@ -437,7 +424,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 	dma_cookie_t cookie;
 
 	spin_lock(&pd_chan->lock);
-	cookie = pdc_assign_cookie(pd_chan, desc);
+	cookie = dma_cookie_assign(txd);
 
 	if (list_empty(&pd_chan->active_list)) {
 		list_add_tail(&desc->desc_node, &pd_chan->active_list);
@@ -544,7 +531,7 @@ static int pd_alloc_chan_resources(struct dma_chan *chan)
 	spin_lock_irq(&pd_chan->lock);
 	list_splice(&tmp_list, &pd_chan->free_list);
 	pd_chan->descs_allocated = i;
-	pd_chan->completed_cookie = chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_irq(&pd_chan->lock);
 
 	pdc_enable_irq(chan, 1);
@@ -578,19 +565,12 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_completed;
-	int ret;
+	enum dma_status ret;
 
 	spin_lock_irq(&pd_chan->lock);
-	last_completed = pd_chan->completed_cookie;
-	last_used = chan->cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irq(&pd_chan->lock);
 
-	ret = dma_async_is_complete(cookie, last_completed, last_used);
-
-	dma_set_tx_state(txstate, last_completed, last_used, 0);
-
 	return ret;
 }
 
@@ -607,7 +587,8 @@ static void pd_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			struct scatterlist *sgl, unsigned int sg_len,
-			enum dma_transfer_direction direction, unsigned long flags)
+			enum dma_transfer_direction direction, unsigned long flags,
+			void *context)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_slave *pd_slave = chan->private;
@@ -932,7 +913,7 @@ static int __devinit pch_dma_probe(struct pci_dev *pdev,
 		struct pch_dma_chan *pd_chan = &pd->channels[i];
 
 		pd_chan->chan.device = &pd->dma;
-		pd_chan->chan.cookie = 1;
+		dma_cookie_init(&pd_chan->chan);
 
 		pd_chan->membase = &regs->desc[i];
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 16b66c827f19..282caf118be8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1,4 +1,6 @@
-/* linux/drivers/dma/pl330.c
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
  *
  * Copyright (C) 2010 Samsung Electronics Co. Ltd.
  *	Jaswinder Singh <jassi.brar@samsung.com>
@@ -9,10 +11,15 @@
  * (at your option) any later version.
  */
 
+#include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 #include <linux/amba/bus.h>
@@ -21,8 +28,497 @@
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 
+#include "dmaengine.h"
+#define PL330_MAX_CHAN		8
+#define PL330_MAX_IRQS		32
+#define PL330_MAX_PERI		32
+
+enum pl330_srccachectrl {
+	SCCTRL0,	/* Noncacheable and nonbufferable */
+	SCCTRL1,	/* Bufferable only */
+	SCCTRL2,	/* Cacheable, but do not allocate */
+	SCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	SINVALID1,
+	SINVALID2,
+	SCCTRL6,	/* Cacheable write-through, allocate on reads only */
+	SCCTRL7,	/* Cacheable write-back, allocate on reads only */
+};
+
+enum pl330_dstcachectrl {
+	DCCTRL0,	/* Noncacheable and nonbufferable */
+	DCCTRL1,	/* Bufferable only */
+	DCCTRL2,	/* Cacheable, but do not allocate */
+	DCCTRL3,	/* Cacheable and bufferable, but do not allocate */
+	DINVALID1,	/* AWCACHE = 0x1000 */
+	DINVALID2,
+	DCCTRL6,	/* Cacheable write-through, allocate on writes only */
+	DCCTRL7,	/* Cacheable write-back, allocate on writes only */
+};
+
+enum pl330_byteswap {
+	SWAP_NO,
+	SWAP_2,
+	SWAP_4,
+	SWAP_8,
+	SWAP_16,
+};
+
+enum pl330_reqtype {
+	MEMTOMEM,
+	MEMTODEV,
+	DEVTOMEM,
+	DEVTODEV,
+};
+
+/* Register and Bit field Definitions */
+#define DS			0x0
+#define DS_ST_STOP		0x0
+#define DS_ST_EXEC		0x1
+#define DS_ST_CMISS		0x2
+#define DS_ST_UPDTPC		0x3
+#define DS_ST_WFE		0x4
+#define DS_ST_ATBRR		0x5
+#define DS_ST_QBUSY		0x6
+#define DS_ST_WFP		0x7
+#define DS_ST_KILL		0x8
+#define DS_ST_CMPLT		0x9
+#define DS_ST_FLTCMP		0xe
+#define DS_ST_FAULT		0xf
+
+#define DPC			0x4
+#define INTEN			0x20
+#define ES			0x24
+#define INTSTATUS		0x28
+#define INTCLR			0x2c
+#define FSM			0x30
+#define FSC			0x34
+#define FTM			0x38
+
+#define _FTC			0x40
+#define FTC(n)			(_FTC + (n)*0x4)
+
+#define _CS			0x100
+#define CS(n)			(_CS + (n)*0x8)
+#define CS_CNS			(1 << 21)
+
+#define _CPC			0x104
+#define CPC(n)			(_CPC + (n)*0x8)
+
+#define _SA			0x400
+#define SA(n)			(_SA + (n)*0x20)
+
+#define _DA			0x404
+#define DA(n)			(_DA + (n)*0x20)
+
+#define _CC			0x408
+#define CC(n)			(_CC + (n)*0x20)
+
+#define CC_SRCINC		(1 << 0)
+#define CC_DSTINC		(1 << 14)
+#define CC_SRCPRI		(1 << 8)
+#define CC_DSTPRI		(1 << 22)
+#define CC_SRCNS		(1 << 9)
+#define CC_DSTNS		(1 << 23)
+#define CC_SRCIA		(1 << 10)
+#define CC_DSTIA		(1 << 24)
+#define CC_SRCBRSTLEN_SHFT	4
+#define CC_DSTBRSTLEN_SHFT	18
+#define CC_SRCBRSTSIZE_SHFT	1
+#define CC_DSTBRSTSIZE_SHFT	15
+#define CC_SRCCCTRL_SHFT	11
+#define CC_SRCCCTRL_MASK	0x7
+#define CC_DSTCCTRL_SHFT	25
+#define CC_DRCCCTRL_MASK	0x7
+#define CC_SWAP_SHFT		28
+
+#define _LC0			0x40c
+#define LC0(n)			(_LC0 + (n)*0x20)
+
+#define _LC1			0x410
+#define LC1(n)			(_LC1 + (n)*0x20)
+
+#define DBGSTATUS		0xd00
+#define DBG_BUSY		(1 << 0)
+
+#define DBGCMD			0xd04
+#define DBGINST0		0xd08
+#define DBGINST1		0xd0c
+
+#define CR0			0xe00
+#define CR1			0xe04
+#define CR2			0xe08
+#define CR3			0xe0c
+#define CR4			0xe10
+#define CRD			0xe14
+
+#define PERIPH_ID		0xfe0
+#define PERIPH_REV_SHIFT	20
+#define PERIPH_REV_MASK		0xf
+#define PERIPH_REV_R0P0		0
+#define PERIPH_REV_R1P0		1
+#define PERIPH_REV_R1P1		2
+#define PCELL_ID		0xff0
+
+#define CR0_PERIPH_REQ_SET	(1 << 0)
+#define CR0_BOOT_EN_SET		(1 << 1)
+#define CR0_BOOT_MAN_NS		(1 << 2)
+#define CR0_NUM_CHANS_SHIFT	4
+#define CR0_NUM_CHANS_MASK	0x7
+#define CR0_NUM_PERIPH_SHIFT	12
+#define CR0_NUM_PERIPH_MASK	0x1f
+#define CR0_NUM_EVENTS_SHIFT	17
+#define CR0_NUM_EVENTS_MASK	0x1f
+
+#define CR1_ICACHE_LEN_SHIFT	0
+#define CR1_ICACHE_LEN_MASK	0x7
+#define CR1_NUM_ICACHELINES_SHIFT	4
+#define CR1_NUM_ICACHELINES_MASK	0xf
+
+#define CRD_DATA_WIDTH_SHIFT	0
+#define CRD_DATA_WIDTH_MASK	0x7
+#define CRD_WR_CAP_SHIFT	4
+#define CRD_WR_CAP_MASK		0x7
+#define CRD_WR_Q_DEP_SHIFT	8
+#define CRD_WR_Q_DEP_MASK	0xf
+#define CRD_RD_CAP_SHIFT	12
+#define CRD_RD_CAP_MASK		0x7
+#define CRD_RD_Q_DEP_SHIFT	16
+#define CRD_RD_Q_DEP_MASK	0xf
+#define CRD_DATA_BUFF_SHIFT	20
+#define CRD_DATA_BUFF_MASK	0x3ff
+
+#define PART			0x330
+#define DESIGNER		0x41
+#define REVISION		0x0
+#define INTEG_CFG		0x0
+#define PERIPH_ID_VAL		((PART << 0) | (DESIGNER << 12))
+
+#define PCELL_ID_VAL		0xb105f00d
+
+#define PL330_STATE_STOPPED		(1 << 0)
+#define PL330_STATE_EXECUTING		(1 << 1)
+#define PL330_STATE_WFE			(1 << 2)
+#define PL330_STATE_FAULTING		(1 << 3)
+#define PL330_STATE_COMPLETING		(1 << 4)
+#define PL330_STATE_WFP			(1 << 5)
+#define PL330_STATE_KILLING		(1 << 6)
+#define PL330_STATE_FAULT_COMPLETING	(1 << 7)
+#define PL330_STATE_CACHEMISS		(1 << 8)
+#define PL330_STATE_UPDTPC		(1 << 9)
+#define PL330_STATE_ATBARRIER		(1 << 10)
+#define PL330_STATE_QUEUEBUSY		(1 << 11)
+#define PL330_STATE_INVALID		(1 << 15)
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+				| PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH		0x54
+#define CMD_DMAEND		0x00
+#define CMD_DMAFLUSHP		0x35
+#define CMD_DMAGO		0xa0
+#define CMD_DMALD		0x04
+#define CMD_DMALDP		0x25
+#define CMD_DMALP		0x20
+#define CMD_DMALPEND		0x28
+#define CMD_DMAKILL		0x01
+#define CMD_DMAMOV		0xbc
+#define CMD_DMANOP		0x18
+#define CMD_DMARMB		0x12
+#define CMD_DMASEV		0x34
+#define CMD_DMAST		0x08
+#define CMD_DMASTP		0x29
+#define CMD_DMASTZ		0x0c
+#define CMD_DMAWFE		0x36
+#define CMD_DMAWFP		0x30
+#define CMD_DMAWMB		0x13
+
+#define SZ_DMAADDH		3
+#define SZ_DMAEND		1
+#define SZ_DMAFLUSHP		2
+#define SZ_DMALD		1
+#define SZ_DMALDP		2
+#define SZ_DMALP		2
+#define SZ_DMALPEND		2
+#define SZ_DMAKILL		1
+#define SZ_DMAMOV		6
+#define SZ_DMANOP		1
+#define SZ_DMARMB		1
+#define SZ_DMASEV		2
+#define SZ_DMAST		1
+#define SZ_DMASTP		2
+#define SZ_DMASTZ		1
+#define SZ_DMAWFE		2
+#define SZ_DMAWFP		2
+#define SZ_DMAWMB		1
+#define SZ_DMAGO		6
+
+#define BRST_LEN(ccr)		((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr)		(1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr)	((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
+#define BURST_TO_BYTE(c, ccr)	((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))
+
+/*
+ * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ	256
+
+/* If the _pl330_req is available to the client */
+#define IS_FREE(req)	(*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
+
+#ifdef PL330_DEBUG_MCGEN
+static unsigned cmd_line;
+#define PL330_DBGCMD_DUMP(off, x...)	do { \
+						printk("%x:", cmd_line); \
+						printk(x); \
+						cmd_line += off; \
+					} while (0)
+#define PL330_DBGMC_START(addr)		(cmd_line = addr)
+#else
+#define PL330_DBGCMD_DUMP(off, x...)	do {} while (0)
+#define PL330_DBGMC_START(addr)		do {} while (0)
+#endif
+
+/* The number of default descriptors */
+
 #define NR_DEFAULT_DESC	16
 
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+	u32	periph_id;
+	u32	pcell_id;
+#define DMAC_MODE_NS	(1 << 0)
+	unsigned int	mode;
+	unsigned int	data_bus_width:10; /* In number of bits */
+	unsigned int	data_buf_dep:10;
+	unsigned int	num_chan:4;
+	unsigned int	num_peri:6;
+	u32		peri_ns;
+	unsigned int	num_events:6;
+	u32		irq_ns;
+};
+
+/* Handle to the DMAC provided to the PL330 core */
+struct pl330_info {
+	/* Owning device */
+	struct device *dev;
+	/* Size of MicroCode buffers for each channel. */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers. */
+	void __iomem	*base;
+	/* Client can freely use it. */
+	void	*client_data;
+	/* PL330 core data, Client must not touch it. */
+	void	*pl330_data;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+	/*
+	 * If the DMAC has some reset mechanism, then the
+	 * client may want to provide pointer to the method.
+	 */
+	void (*dmac_reset)(struct pl330_info *pi);
+};
+
+/**
+ * Request Configuration.
+ * The PL330 core does not modify this and uses the last
+ * working configuration if the request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+	/* Address Incrementing */
+	unsigned dst_inc:1;
+	unsigned src_inc:1;
+
+	/*
+	 * For now, the SRC & DST protection levels
+	 * and burst size/length are assumed same.
+	 */
+	bool nonsecure;
+	bool privileged;
+	bool insnaccess;
+	unsigned brst_len:5;
+	unsigned brst_size:3; /* in power of 2 */
+
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+	enum pl330_byteswap swap;
+	struct pl330_config *pcfg;
+};
+
+/*
+ * One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+	u32 src_addr;
+	u32 dst_addr;
+	/* Size to xfer */
+	u32 bytes;
+	/*
+	 * Pointer to next xfer in the list.
+	 * The last xfer in the req must point to NULL.
+	 */
+	struct pl330_xfer *next;
+};
+
+/* The xfer callbacks are made with one of these arguments. */
+enum pl330_op_err {
+	/* The all xfers in the request were success. */
+	PL330_ERR_NONE,
+	/* If req aborted due to global error. */
+	PL330_ERR_ABORT,
+	/* If req failed due to problem with Channel. */
+	PL330_ERR_FAIL,
+};
+
+/* A request defining Scatter-Gather List ending with NULL xfer. */
+struct pl330_req {
+	enum pl330_reqtype rqtype;
+	/* Index of peripheral for the xfer. */
+	unsigned peri:5;
+	/* Unique token for this xfer, set by the client. */
+	void *token;
+	/* Callback to be called after xfer. */
+	void (*xfer_cb)(void *token, enum pl330_op_err err);
+	/* If NULL, req will be done at last set parameters. */
+	struct pl330_reqcfg *cfg;
+	/* Pointer to first xfer in the request. */
+	struct pl330_xfer *x;
+};
+
+/*
+ * To know the status of the channel and DMAC, the client
+ * provides a pointer to this structure. The PL330 core
+ * fills it with current information.
+ */
+struct pl330_chanstatus {
+	/*
+	 * If the DMAC engine halted due to some error,
+	 * the client should remove-add DMAC.
+	 */
+	bool dmac_halted;
+	/*
+	 * If channel is halted due to some error,
+	 * the client should ABORT/FLUSH and START the channel.
+	 */
+	bool faulting;
+	/* Location of last load */
+	u32 src_addr;
+	/* Location of last store */
+	u32 dst_addr;
+	/*
+	 * Pointer to the currently active req, NULL if channel is
+	 * inactive, even though the requests may be present.
+	 */
+	struct pl330_req *top_req;
+	/* Pointer to req waiting second in the queue if any. */
+	struct pl330_req *wait_req;
+};
+
+enum pl330_chan_op {
+	/* Start the channel */
+	PL330_OP_START,
+	/* Abort the active xfer */
+	PL330_OP_ABORT,
+	/* Stop xfer and flush queue */
+	PL330_OP_FLUSH,
+};
+
+struct _xfer_spec {
+	u32 ccr;
+	struct pl330_req *r;
+	struct pl330_xfer *x;
+};
+
+enum dmamov_dst {
+	SAR = 0,
+	CCR,
+	DAR,
+};
+
+enum pl330_dst {
+	SRC = 0,
+	DST,
+};
+
+enum pl330_cond {
+	SINGLE,
+	BURST,
+	ALWAYS,
+};
+
+struct _pl330_req {
+	u32 mc_bus;
+	void *mc_cpu;
+	/* Number of bytes taken to setup MC for the req */
+	u32 mc_len;
+	struct pl330_req *r;
+	/* Hook to attach to DMAC's list of reqs with due callback */
+	struct list_head rqd;
+};
+
+/* ToBeDone for tasklet */
+struct _pl330_tbd {
+	bool reset_dmac;
+	bool reset_mngr;
+	u8 reset_chan;
+};
+
+/* A DMAC Thread */
+struct pl330_thread {
+	u8 id;
+	int ev;
+	/* If the channel is not yet acquired by any client */
+	bool free;
+	/* Parent DMAC */
+	struct pl330_dmac *dmac;
+	/* Only two at a time */
+	struct _pl330_req req[2];
+	/* Index of the last enqueued request */
+	unsigned lstenq;
+	/* Index of the last submitted request or -1 if the DMA is stopped */
+	int req_running;
+};
+
+enum pl330_dmac_state {
+	UNINIT,
+	INIT,
+	DYING,
+};
+
+/* A DMAC */
+struct pl330_dmac {
+	spinlock_t		lock;
+	/* Holds list of reqs with due callbacks */
+	struct list_head	req_done;
+	/* Pointer to platform specific stuff */
+	struct pl330_info	*pinfo;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of MicroCode buffer */
+	u32			mcode_bus;
+	/* CPU address of MicroCode buffer */
+	void			*mcode_cpu;
+	/* List of all Channel threads */
+	struct pl330_thread	*channels;
+	/* Pointer to the MANAGER thread */
+	struct pl330_thread	*manager;
+	/* To handle bad news in interrupt */
+	struct tasklet_struct	tasks;
+	struct _pl330_tbd	dmac_tbd;
+	/* State of DMAC operation */
+	enum pl330_dmac_state	state;
+};
+
 enum desc_status {
 	/* In the DMAC pool */
 	FREE,
@@ -51,9 +547,6 @@ struct dma_pl330_chan {
 	/* DMA-Engine Channel */
 	struct dma_chan chan;
 
-	/* Last completed cookie */
-	dma_cookie_t completed;
-
 	/* List of to be xfered descriptors */
 	struct list_head work_list;
 
@@ -117,6 +610,1599 @@ struct dma_pl330_desc {
 	struct dma_pl330_chan *pchan;
 };
 
+static inline void _callback(struct pl330_req *r, enum pl330_op_err err)
+{
+	if (r && r->xfer_cb)
+		r->xfer_cb(r->token, err);
+}
+
+static inline bool _queue_empty(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1]))
+		? true : false;
+}
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+	return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1]))
+		? false : true;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	/* MANAGER is indexed at the end */
+	if (thrd->id == pl330->pinfo->pcfg.num_chan)
+		return true;
+	else
+		return false;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_id(struct pl330_info *pi, u32 off)
+{
+	void __iomem *regs = pi->base;
+	u32 id = 0;
+
+	id |= (readb(regs + off + 0x0) << 0);
+	id |= (readb(regs + off + 0x4) << 8);
+	id |= (readb(regs + off + 0x8) << 16);
+	id |= (readb(regs + off + 0xc) << 24);
+
+	return id;
+}
+
+static inline u32 get_revision(u32 periph_id)
+{
+	return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK;
+}
+
+static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
+		enum pl330_dst da, u16 val)
+{
+	if (dry_run)
+		return SZ_DMAADDH;
+
+	buf[0] = CMD_DMAADDH;
+	buf[0] |= (da << 1);
+	*((u16 *)&buf[1]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n",
+		da == 1 ? "DA" : "SA", val);
+
+	return SZ_DMAADDH;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAEND;
+
+	buf[0] = CMD_DMAEND;
+
+	PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n");
+
+	return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAFLUSHP;
+
+	buf[0] = CMD_DMAFLUSHP;
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3);
+
+	return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[],	enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMALD;
+
+	buf[0] = CMD_DMALD;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMALDP;
+
+	buf[0] = CMD_DMALDP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+		unsigned loop, u8 cnt)
+{
+	if (dry_run)
+		return SZ_DMALP;
+
+	buf[0] = CMD_DMALP;
+
+	if (loop)
+		buf[0] |= (1 << 1);
+
+	cnt--; /* DMAC increments by 1 internally */
+	buf[1] = cnt;
+
+	PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt);
+
+	return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+	enum pl330_cond cond;
+	bool forever;
+	unsigned loop;
+	u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+		const struct _arg_LPEND *arg)
+{
+	enum pl330_cond cond = arg->cond;
+	bool forever = arg->forever;
+	unsigned loop = arg->loop;
+	u8 bjump = arg->bjump;
+
+	if (dry_run)
+		return SZ_DMALPEND;
+
+	buf[0] = CMD_DMALPEND;
+
+	if (loop)
+		buf[0] |= (1 << 2);
+
+	if (!forever)
+		buf[0] |= (1 << 4);
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	buf[1] = bjump;
+
+	PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n",
+			forever ? "FE" : "END",
+			cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'),
+			loop ? '1' : '0',
+			bjump);
+
+	return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAKILL;
+
+	buf[0] = CMD_DMAKILL;
+
+	return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+		enum dmamov_dst dst, u32 val)
+{
+	if (dry_run)
+		return SZ_DMAMOV;
+
+	buf[0] = CMD_DMAMOV;
+	buf[1] = dst;
+	*((u32 *)&buf[2]) = val;
+
+	PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
+		dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
+
+	return SZ_DMAMOV;
+}
+
+static inline u32 _emit_NOP(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMANOP;
+
+	buf[0] = CMD_DMANOP;
+
+	PL330_DBGCMD_DUMP(SZ_DMANOP, "\tDMANOP\n");
+
+	return SZ_DMANOP;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMARMB;
+
+	buf[0] = CMD_DMARMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n");
+
+	return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+	if (dry_run)
+		return SZ_DMASEV;
+
+	buf[0] = CMD_DMASEV;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3);
+
+	return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMAST;
+
+	buf[0] = CMD_DMAST;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMASTP;
+
+	buf[0] = CMD_DMASTP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMASTP;
+}
+
+static inline u32 _emit_STZ(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMASTZ;
+
+	buf[0] = CMD_DMASTZ;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTZ, "\tDMASTZ\n");
+
+	return SZ_DMASTZ;
+}
+
+static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev,
+		unsigned invalidate)
+{
+	if (dry_run)
+		return SZ_DMAWFE;
+
+	buf[0] = CMD_DMAWFE;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	if (invalidate)
+		buf[1] |= (1 << 1);
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFE, "\tDMAWFE %u%s\n",
+		ev >> 3, invalidate ? ", I" : "");
+
+	return SZ_DMAWFE;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAWFP;
+
+	buf[0] = CMD_DMAWFP;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (0 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (0 << 0);
+	else
+		buf[0] |= (0 << 1) | (1 << 0);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3);
+
+	return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAWMB;
+
+	buf[0] = CMD_DMAWMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n");
+
+	return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+	u8 chan;
+	u32 addr;
+	unsigned ns;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+		const struct _arg_GO *arg)
+{
+	u8 chan = arg->chan;
+	u32 addr = arg->addr;
+	unsigned ns = arg->ns;
+
+	if (dry_run)
+		return SZ_DMAGO;
+
+	buf[0] = CMD_DMAGO;
+	buf[0] |= (ns << 1);
+
+	buf[1] = chan & 0x7;
+
+	*((u32 *)&buf[2]) = addr;
+
+	return SZ_DMAGO;
+}
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	unsigned long loops = msecs_to_loops(5);
+
+	do {
+		/* Until Manager is Idle */
+		if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+			break;
+
+		cpu_relax();
+	} while (--loops);
+
+	if (!loops)
+		return true;
+
+	return false;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+		u8 insn[], bool as_manager)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	val = (insn[0] << 16) | (insn[1] << 24);
+	if (!as_manager) {
+		val |= (1 << 0);
+		val |= (thrd->id << 8); /* Channel Number */
+	}
+	writel(val, regs + DBGINST0);
+
+	val = *((u32 *)&insn[2]);
+	writel(val, regs + DBGINST1);
+
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd)) {
+		dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n");
+		return;
+	}
+
+	/* Get going */
+	writel(0, regs + DBGCMD);
+}
+
+/*
+ * Mark a _pl330_req as free.
+ * We do it by writing DMAEND as the first instruction
+ * because no valid request is going to have DMAEND as
+ * its first instruction to execute.
+ */
+static void mark_free(struct pl330_thread *thrd, int idx)
+{
+	struct _pl330_req *req = &thrd->req[idx];
+
+	_emit_END(0, req->mc_cpu);
+	req->mc_len = 0;
+
+	thrd->req_running = -1;
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u32 val;
+
+	if (is_manager(thrd))
+		val = readl(regs + DS) & 0xf;
+	else
+		val = readl(regs + CS(thrd->id)) & 0xf;
+
+	switch (val) {
+	case DS_ST_STOP:
+		return PL330_STATE_STOPPED;
+	case DS_ST_EXEC:
+		return PL330_STATE_EXECUTING;
+	case DS_ST_CMISS:
+		return PL330_STATE_CACHEMISS;
+	case DS_ST_UPDTPC:
+		return PL330_STATE_UPDTPC;
+	case DS_ST_WFE:
+		return PL330_STATE_WFE;
+	case DS_ST_FAULT:
+		return PL330_STATE_FAULTING;
+	case DS_ST_ATBRR:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_ATBARRIER;
+	case DS_ST_QBUSY:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_QUEUEBUSY;
+	case DS_ST_WFP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_WFP;
+	case DS_ST_KILL:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_KILLING;
+	case DS_ST_CMPLT:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_COMPLETING;
+	case DS_ST_FLTCMP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_FAULT_COMPLETING;
+	default:
+		return PL330_STATE_INVALID;
+	}
+}
+
+static void _stop(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+	/* Return if nothing needs to be done */
+	if (_state(thrd) == PL330_STATE_COMPLETING
+		  || _state(thrd) == PL330_STATE_KILLING
+		  || _state(thrd) == PL330_STATE_STOPPED)
+		return;
+
+	_emit_KILL(0, insn);
+
+	/* Stop generating interrupts for SEV */
+	writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN);
+
+	_execute_DBGINSN(thrd, insn, is_manager(thrd));
+}
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo->base;
+	struct _pl330_req *req;
+	struct pl330_req *r;
+	struct _arg_GO go;
+	unsigned ns;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+	int idx;
+
+	/* Return if already ACTIVE */
+	if (_state(thrd) != PL330_STATE_STOPPED)
+		return true;
+
+	idx = 1 - thrd->lstenq;
+	if (!IS_FREE(&thrd->req[idx]))
+		req = &thrd->req[idx];
+	else {
+		idx = thrd->lstenq;
+		if (!IS_FREE(&thrd->req[idx]))
+			req = &thrd->req[idx];
+		else
+			req = NULL;
+	}
+
+	/* Return if no request */
+	if (!req || !req->r)
+		return true;
+
+	r = req->r;
+
+	if (r->cfg)
+		ns = r->cfg->nonsecure ? 1 : 0;
+	else if (readl(regs + CS(thrd->id)) & CS_CNS)
+		ns = 1;
+	else
+		ns = 0;
+
+	/* See 'Abort Sources' point-4 at Page 2-25 */
+	if (_manager_ns(thrd) && !ns)
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n",
+			__func__, __LINE__);
+
+	go.chan = thrd->id;
+	go.addr = req->mc_bus;
+	go.ns = ns;
+	_emit_GO(0, insn, &go);
+
+	/* Set to generate interrupts for SEV */
+	writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+	/* Only manager can execute GO */
+	_execute_DBGINSN(thrd, insn, true);
+
+	thrd->req_running = idx;
+
+	return true;
+}
+
+static bool _start(struct pl330_thread *thrd)
+{
+	switch (_state(thrd)) {
+	case PL330_STATE_FAULT_COMPLETING:
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+		if (_state(thrd) == PL330_STATE_KILLING)
+			UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_FAULTING:
+		_stop(thrd);
+
+	case PL330_STATE_KILLING:
+	case PL330_STATE_COMPLETING:
+		UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_STOPPED:
+		return _trigger(thrd);
+
+	case PL330_STATE_WFP:
+	case PL330_STATE_QUEUEBUSY:
+	case PL330_STATE_ATBARRIER:
+	case PL330_STATE_UPDTPC:
+	case PL330_STATE_CACHEMISS:
+	case PL330_STATE_EXECUTING:
+		return true;
+
+	case PL330_STATE_WFE: /* For RESUME, nothing yet */
+	default:
+		return false;
+	}
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+	struct pl330_config *pcfg = pxs->r->cfg->pcfg;
+
+	/* check lock-up free version */
+	if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		}
+	} else {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_RMB(dry_run, &buf[off]);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+			off += _emit_WMB(dry_run, &buf[off]);
+		}
+	}
+
+	return off;
+}
+
+static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LD(dry_run, &buf[off], ALWAYS);
+		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static int _bursts(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	switch (pxs->r->rqtype) {
+	case MEMTODEV:
+		off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
+		break;
+	case DEVTOMEM:
+		off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	case MEMTOMEM:
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+	default:
+		off += 0x40000000; /* Scare off the Client */
+		break;
+	}
+
+	return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(unsigned dry_run, u8 buf[],
+		unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+	int cyc, cycmax, szlp, szlpend, szbrst, off;
+	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+	struct _arg_LPEND lpend;
+
+	/* Max iterations possible in DMALP is 256 */
+	if (*bursts >= 256*256) {
+		lcnt1 = 256;
+		lcnt0 = 256;
+		cyc = *bursts / lcnt1 / lcnt0;
+	} else if (*bursts > 256) {
+		lcnt1 = 256;
+		lcnt0 = *bursts / lcnt1;
+		cyc = 1;
+	} else {
+		lcnt1 = *bursts;
+		lcnt0 = 0;
+		cyc = 1;
+	}
+
+	szlp = _emit_LP(1, buf, 0, 0);
+	szbrst = _bursts(1, buf, pxs, 1);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 0;
+	lpend.bjump = 0;
+	szlpend = _emit_LPEND(1, buf, &lpend);
+
+	if (lcnt0) {
+		szlp *= 2;
+		szlpend *= 2;
+	}
+
+	/*
+	 * Max bursts that we can unroll due to limit on the
+	 * size of backward jump that can be encoded in DMALPEND
+	 * which is 8-bits and hence 255
+	 */
+	cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+	cyc = (cycmax < cyc) ? cycmax : cyc;
+
+	off = 0;
+
+	if (lcnt0) {
+		off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+		ljmp0 = off;
+	}
+
+	off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+	ljmp1 = off;
+
+	off += _bursts(dry_run, &buf[off], pxs, cyc);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 1;
+	lpend.bjump = off - ljmp1;
+	off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+	if (lcnt0) {
+		lpend.cond = ALWAYS;
+		lpend.forever = false;
+		lpend.loop = 0;
+		lpend.bjump = off - ljmp0;
+		off += _emit_LPEND(dry_run, &buf[off], &lpend);
+	}
+
+	*bursts = lcnt1 * cyc;
+	if (lcnt0)
+		*bursts *= lcnt0;
+
+	return off;
+}
+
+static inline int _setup_loops(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	u32 ccr = pxs->ccr;
+	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+	int off = 0;
+
+	while (bursts) {
+		c = bursts;
+		off += _loop(dry_run, &buf[off], &c, pxs);
+		bursts -= c;
+	}
+
+	return off;
+}
+
+static inline int _setup_xfer(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	int off = 0;
+
+	/* DMAMOV SAR, x->src_addr */
+	off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+	/* DMAMOV DAR, x->dst_addr */
+	off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+	/* Setup Loop(s) */
+	off += _setup_loops(dry_run, &buf[off], pxs);
+
+	return off;
+}
+
+/*
+ * A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC for the req.
+ */
+static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
+		unsigned index, struct _xfer_spec *pxs)
+{
+	struct _pl330_req *req = &thrd->req[index];
+	struct pl330_xfer *x;
+	u8 *buf = req->mc_cpu;
+	int off = 0;
+
+	PL330_DBGMC_START(req->mc_bus);
+
+	/* DMAMOV CCR, ccr */
+	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+	x = pxs->r->x;
+	do {
+		/* Error if xfer length is not aligned at burst size */
+		if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr)))
+			return -EINVAL;
+
+		pxs->x = x;
+		off += _setup_xfer(dry_run, &buf[off], pxs);
+
+		x = x->next;
+	} while (x);
+
+	/* DMASEV peripheral/event */
+	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+	/* DMAEND */
+	off += _emit_END(dry_run, &buf[off]);
+
+	return off;
+}
+
+static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
+{
+	u32 ccr = 0;
+
+	if (rqc->src_inc)
+		ccr |= CC_SRCINC;
+
+	if (rqc->dst_inc)
+		ccr |= CC_DSTINC;
+
+	/* We set same protection levels for Src and DST for now */
+	if (rqc->privileged)
+		ccr |= CC_SRCPRI | CC_DSTPRI;
+	if (rqc->nonsecure)
+		ccr |= CC_SRCNS | CC_DSTNS;
+	if (rqc->insnaccess)
+		ccr |= CC_SRCIA | CC_DSTIA;
+
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+	ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+	ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+	ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
+	ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
+
+	ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+	return ccr;
+}
+
+static inline bool _is_valid(u32 ccr)
+{
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+
+	dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
+	scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
+
+	if (dcctl == DINVALID1 || dcctl == DINVALID2
+			|| scctl == SINVALID1 || scctl == SINVALID2)
+		return false;
+	else
+		return true;
+}
+
+/*
+ * Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ */
+static int pl330_submit_req(void *ch_id, struct pl330_req *r)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	struct pl330_info *pi;
+	struct _xfer_spec xs;
+	unsigned long flags;
+	void __iomem *regs;
+	unsigned idx;
+	u32 ccr;
+	int ret = 0;
+
+	/* No Req or Unacquired Channel or DMAC */
+	if (!r || !thrd || thrd->free)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	pi = pl330->pinfo;
+	regs = pi->base;
+
+	if (pl330->state == DYING
+		|| pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d\n",
+			__func__, __LINE__);
+		return -EAGAIN;
+	}
+
+	/* If request for non-existing peripheral */
+	if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) {
+		dev_info(thrd->dmac->pinfo->dev,
+				"%s:%d Invalid peripheral(%u)!\n",
+				__func__, __LINE__, r->peri);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	if (_queue_full(thrd)) {
+		ret = -EAGAIN;
+		goto xfer_exit;
+	}
+
+	/* Prefer Secure Channel */
+	if (!_manager_ns(thrd))
+		r->cfg->nonsecure = 0;
+	else
+		r->cfg->nonsecure = 1;
+
+	/* Use last settings, if not provided */
+	if (r->cfg)
+		ccr = _prepare_ccr(r->cfg);
+	else
+		ccr = readl(regs + CC(thrd->id));
+
+	/* If this req doesn't have valid xfer settings */
+	if (!_is_valid(ccr)) {
+		ret = -EINVAL;
+		dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n",
+			__func__, __LINE__, ccr);
+		goto xfer_exit;
+	}
+
+	idx = IS_FREE(&thrd->req[0]) ? 0 : 1;
+
+	xs.ccr = ccr;
+	xs.r = r;
+
+	/* First dry run to check if req is acceptable */
+	ret = _setup_req(1, thrd, idx, &xs);
+	if (ret < 0)
+		goto xfer_exit;
+
+	if (ret > pi->mcbufsz / 2) {
+		dev_info(thrd->dmac->pinfo->dev,
+			"%s:%d Trying increasing mcbufsz\n",
+				__func__, __LINE__);
+		ret = -ENOMEM;
+		goto xfer_exit;
+	}
+
+	/* Hook the request */
+	thrd->lstenq = idx;
+	thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs);
+	thrd->req[idx].r = r;
+
+	ret = 0;
+
+xfer_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return ret;
+}
+
+static void pl330_dotask(unsigned long data)
+{
+	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
+	struct pl330_info *pi = pl330->pinfo;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	/* The DMAC itself gone nuts */
+	if (pl330->dmac_tbd.reset_dmac) {
+		pl330->state = DYING;
+		/* Reset the manager too */
+		pl330->dmac_tbd.reset_mngr = true;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_dmac = false;
+	}
+
+	if (pl330->dmac_tbd.reset_mngr) {
+		_stop(pl330->manager);
+		/* Reset all channels */
+		pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_mngr = false;
+	}
+
+	for (i = 0; i < pi->pcfg.num_chan; i++) {
+
+		if (pl330->dmac_tbd.reset_chan & (1 << i)) {
+			struct pl330_thread *thrd = &pl330->channels[i];
+			void __iomem *regs = pi->base;
+			enum pl330_op_err err;
+
+			_stop(thrd);
+
+			if (readl(regs + FSC) & (1 << thrd->id))
+				err = PL330_ERR_FAIL;
+			else
+				err = PL330_ERR_ABORT;
+
+			spin_unlock_irqrestore(&pl330->lock, flags);
+
+			_callback(thrd->req[1 - thrd->lstenq].r, err);
+			_callback(thrd->req[thrd->lstenq].r, err);
+
+			spin_lock_irqsave(&pl330->lock, flags);
+
+			thrd->req[0].r = NULL;
+			thrd->req[1].r = NULL;
+			mark_free(thrd, 0);
+			mark_free(thrd, 1);
+
+			/* Clear the reset flag */
+			pl330->dmac_tbd.reset_chan &= ~(1 << i);
+		}
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+static int pl330_update(const struct pl330_info *pi)
+{
+	struct _pl330_req *rqdone;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	void __iomem *regs;
+	u32 val;
+	int id, ev, ret = 0;
+
+	if (!pi || !pi->pl330_data)
+		return 0;
+
+	regs = pi->base;
+	pl330 = pi->pl330_data;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	val = readl(regs + FSM) & 0x1;
+	if (val)
+		pl330->dmac_tbd.reset_mngr = true;
+	else
+		pl330->dmac_tbd.reset_mngr = false;
+
+	val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
+	pl330->dmac_tbd.reset_chan |= val;
+	if (val) {
+		int i = 0;
+		while (i < pi->pcfg.num_chan) {
+			if (val & (1 << i)) {
+				dev_info(pi->dev,
+					"Reset Channel-%d\t CS-%x FTC-%x\n",
+						i, readl(regs + CS(i)),
+						readl(regs + FTC(i)));
+				_stop(&pl330->channels[i]);
+			}
+			i++;
+		}
+	}
+
+	/* Check which event happened i.e, thread notified */
+	val = readl(regs + ES);
+	if (pi->pcfg.num_events < 32
+			&& val & ~((1 << pi->pcfg.num_events) - 1)) {
+		pl330->dmac_tbd.reset_dmac = true;
+		dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__);
+		ret = 1;
+		goto updt_exit;
+	}
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+		if (val & (1 << ev)) { /* Event occurred */
+			struct pl330_thread *thrd;
+			u32 inten = readl(regs + INTEN);
+			int active;
+
+			/* Clear the event */
+			if (inten & (1 << ev))
+				writel(1 << ev, regs + INTCLR);
+
+			ret = 1;
+
+			id = pl330->events[ev];
+
+			thrd = &pl330->channels[id];
+
+			active = thrd->req_running;
+			if (active == -1) /* Aborted */
+				continue;
+
+			rqdone = &thrd->req[active];
+			mark_free(thrd, active);
+
+			/* Get going again ASAP */
+			_start(thrd);
+
+			/* For now, just make a list of callbacks to be done */
+			list_add_tail(&rqdone->rqd, &pl330->req_done);
+		}
+	}
+
+	/* Now that we are in no hurry, do the callbacks */
+	while (!list_empty(&pl330->req_done)) {
+		struct pl330_req *r;
+
+		rqdone = container_of(pl330->req_done.next,
+					struct _pl330_req, rqd);
+
+		list_del_init(&rqdone->rqd);
+
+		/* Detach the req */
+		r = rqdone->r;
+		rqdone->r = NULL;
+
+		spin_unlock_irqrestore(&pl330->lock, flags);
+		_callback(r, PL330_ERR_NONE);
+		spin_lock_irqsave(&pl330->lock, flags);
+	}
+
+updt_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	if (pl330->dmac_tbd.reset_dmac
+			|| pl330->dmac_tbd.reset_mngr
+			|| pl330->dmac_tbd.reset_chan) {
+		ret = 1;
+		tasklet_schedule(&pl330->tasks);
+	}
+
+	return ret;
+}
+
+static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int ret = 0, active;
+
+	if (!thrd || thrd->free || thrd->dmac->state == DYING)
+		return -EINVAL;
+
+	pl330 = thrd->dmac;
+	active = thrd->req_running;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	switch (op) {
+	case PL330_OP_FLUSH:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		thrd->req[0].r = NULL;
+		thrd->req[1].r = NULL;
+		mark_free(thrd, 0);
+		mark_free(thrd, 1);
+		break;
+
+	case PL330_OP_ABORT:
+		/* Make sure the channel is stopped */
+		_stop(thrd);
+
+		/* ABORT is only for the active req */
+		if (active == -1)
+			break;
+
+		thrd->req[active].r = NULL;
+		mark_free(thrd, active);
+
+		/* Start the next */
+	case PL330_OP_START:
+		if ((active == -1) && !_start(thrd))
+			ret = -EIO;
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+	return ret;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+	int ev;
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++)
+		if (pl330->events[ev] == -1) {
+			pl330->events[ev] = thrd->id;
+			return ev;
+		}
+
+	return -1;
+}
+
+static bool _chan_ns(const struct pl330_info *pi, int i)
+{
+	return pi->pcfg.irq_ns & (1 << i);
+}
+
+/* Upon success, returns IdentityToken for the
+ * allocated channel, NULL otherwise.
+ */
+static void *pl330_request_channel(const struct pl330_info *pi)
+{
+	struct pl330_thread *thrd = NULL;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+	int chans, i;
+
+	if (!pi || !pi->pl330_data)
+		return NULL;
+
+	pl330 = pi->pl330_data;
+
+	if (pl330->state == DYING)
+		return NULL;
+
+	chans = pi->pcfg.num_chan;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		if ((thrd->free) && (!_manager_ns(thrd) ||
+					_chan_ns(pi, i))) {
+			thrd->ev = _alloc_event(thrd);
+			if (thrd->ev >= 0) {
+				thrd->free = false;
+				thrd->lstenq = 1;
+				thrd->req[0].r = NULL;
+				mark_free(thrd, 0);
+				thrd->req[1].r = NULL;
+				mark_free(thrd, 1);
+				break;
+			}
+		}
+		thrd = NULL;
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return thrd;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	/* If the event is valid and was held by the thread */
+	if (ev >= 0 && ev < pi->pcfg.num_events
+			&& pl330->events[ev] == thrd->id)
+		pl330->events[ev] = -1;
+}
+
+static void pl330_release_channel(void *ch_id)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	unsigned long flags;
+
+	if (!thrd || thrd->free)
+		return;
+
+	_stop(thrd);
+
+	_callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT);
+	_callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT);
+
+	pl330 = thrd->dmac;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+	_free_event(thrd, thrd->ev);
+	thrd->free = true;
+	spin_unlock_irqrestore(&pl330->lock, flags);
+}
+
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_info *pi)
+{
+	void __iomem *regs = pi->base;
+	u32 val;
+
+	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+	val &= CRD_DATA_WIDTH_MASK;
+	pi->pcfg.data_bus_width = 8 * (1 << val);
+
+	val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
+	val &= CRD_DATA_BUFF_MASK;
+	pi->pcfg.data_buf_dep = val + 1;
+
+	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+	val &= CR0_NUM_CHANS_MASK;
+	val += 1;
+	pi->pcfg.num_chan = val;
+
+	val = readl(regs + CR0);
+	if (val & CR0_PERIPH_REQ_SET) {
+		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+		val += 1;
+		pi->pcfg.num_peri = val;
+		pi->pcfg.peri_ns = readl(regs + CR4);
+	} else {
+		pi->pcfg.num_peri = 0;
+	}
+
+	val = readl(regs + CR0);
+	if (val & CR0_BOOT_MAN_NS)
+		pi->pcfg.mode |= DMAC_MODE_NS;
+	else
+		pi->pcfg.mode &= ~DMAC_MODE_NS;
+
+	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+	val &= CR0_NUM_EVENTS_MASK;
+	val += 1;
+	pi->pcfg.num_events = val;
+
+	pi->pcfg.irq_ns = readl(regs + CR3);
+
+	pi->pcfg.periph_id = get_id(pi, PERIPH_ID);
+	pi->pcfg.pcell_id = get_id(pi, PCELL_ID);
+}
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = pl330->pinfo;
+
+	thrd->req[0].mc_cpu = pl330->mcode_cpu
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].mc_bus = pl330->mcode_bus
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].r = NULL;
+	mark_free(thrd, 0);
+
+	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+				+ pi->mcbufsz / 2;
+	thrd->req[1].mc_bus = thrd->req[0].mc_bus
+				+ pi->mcbufsz / 2;
+	thrd->req[1].r = NULL;
+	mark_free(thrd, 1);
+}
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Allocate 1 Manager and 'chans' Channel threads */
+	pl330->channels = kzalloc((1 + chans) * sizeof(*thrd),
+					GFP_KERNEL);
+	if (!pl330->channels)
+		return -ENOMEM;
+
+	/* Init Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		thrd->id = i;
+		thrd->dmac = pl330;
+		_reset_thread(thrd);
+		thrd->free = true;
+	}
+
+	/* MANAGER is indexed at the end */
+	thrd = &pl330->channels[chans];
+	thrd->id = chans;
+	thrd->dmac = pl330;
+	thrd->free = false;
+	pl330->manager = thrd;
+
+	return 0;
+}
+
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	int ret;
+
+	/*
+	 * Alloc MicroCode buffer for 'chans' Channel threads.
+	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+	 */
+	pl330->mcode_cpu = dma_alloc_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				&pl330->mcode_bus, GFP_KERNEL);
+	if (!pl330->mcode_cpu) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	ret = dmac_alloc_threads(pl330);
+	if (ret) {
+		dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n",
+			__func__, __LINE__);
+		dma_free_coherent(pi->dev,
+				chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pl330_add(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+	void __iomem *regs;
+	int i, ret;
+
+	if (!pi || !pi->dev)
+		return -EINVAL;
+
+	/* If already added */
+	if (pi->pl330_data)
+		return -EINVAL;
+
+	/*
+	 * If the SoC can perform reset on the DMAC, then do it
+	 * before reading its configuration.
+	 */
+	if (pi->dmac_reset)
+		pi->dmac_reset(pi);
+
+	regs = pi->base;
+
+	/* Check if we can handle this DMAC */
+	if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL
+	   || get_id(pi, PCELL_ID) != PCELL_ID_VAL) {
+		dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
+			get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID));
+		return -EINVAL;
+	}
+
+	/* Read the configuration of the DMAC */
+	read_dmac_config(pi);
+
+	if (pi->pcfg.num_events == 0) {
+		dev_err(pi->dev, "%s:%d Can't work without events!\n",
+			__func__, __LINE__);
+		return -EINVAL;
+	}
+
+	pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
+	if (!pl330) {
+		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	/* Assign the info structure and private data */
+	pl330->pinfo = pi;
+	pi->pl330_data = pl330;
+
+	spin_lock_init(&pl330->lock);
+
+	INIT_LIST_HEAD(&pl330->req_done);
+
+	/* Use default MC buffer size if not provided */
+	if (!pi->mcbufsz)
+		pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+	/* Mark all events as free */
+	for (i = 0; i < pi->pcfg.num_events; i++)
+		pl330->events[i] = -1;
+
+	/* Allocate resources needed by the DMAC */
+	ret = dmac_alloc_resources(pl330);
+	if (ret) {
+		dev_err(pi->dev, "Unable to create channels for DMAC\n");
+		kfree(pl330);
+		return ret;
+	}
+
+	tasklet_init(&pl330->tasks, pl330_dotask, (unsigned long) pl330);
+
+	pl330->state = INIT;
+
+	return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Release Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		pl330_release_channel((void *)thrd);
+	}
+
+	/* Free memory */
+	kfree(pl330->channels);
+
+	return 0;
+}
+
+static void dmac_free_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+
+	dmac_free_threads(pl330);
+
+	dma_free_coherent(pi->dev, chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+}
+
+static void pl330_del(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+
+	if (!pi || !pi->pl330_data)
+		return;
+
+	pl330 = pi->pl330_data;
+
+	pl330->state = UNINIT;
+
+	tasklet_kill(&pl330->tasks);
+
+	/* Free DMAC resources */
+	dmac_free_resources(pl330);
+
+	kfree(pl330);
+	pi->pl330_data = NULL;
+}
+
 /* forward declaration */
 static struct amba_driver pl330_driver;
 
@@ -234,7 +2320,7 @@ static void pl330_tasklet(unsigned long data)
 	/* Pick up ripe tomatoes */
 	list_for_each_entry_safe(desc, _dt, &pch->work_list, node)
 		if (desc->status == DONE) {
-			pch->completed = desc->txd.cookie;
+			dma_cookie_complete(&desc->txd);
 			list_move_tail(&desc->node, &list);
 		}
 
@@ -305,7 +2391,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 
 	spin_lock_irqsave(&pch->lock, flags);
 
-	pch->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 	pch->cyclic = false;
 
 	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
@@ -340,7 +2426,6 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 		/* Mark all desc done */
 		list_for_each_entry_safe(desc, _dt, &pch->work_list , node) {
 			desc->status = DONE;
-			pch->completed = desc->txd.cookie;
 			list_move_tail(&desc->node, &list);
 		}
 
@@ -396,18 +2481,7 @@ static enum dma_status
 pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		 struct dma_tx_state *txstate)
 {
-	struct dma_pl330_chan *pch = to_pchan(chan);
-	dma_cookie_t last_done, last_used;
-	int ret;
-
-	last_done = pch->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_done, last_used);
-
-	dma_set_tx_state(txstate, last_done, last_used, 0);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void pl330_issue_pending(struct dma_chan *chan)
@@ -430,26 +2504,16 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_irqsave(&pch->lock, flags);
 
 	/* Assign cookies to all nodes */
-	cookie = tx->chan->cookie;
-
 	while (!list_empty(&last->node)) {
 		desc = list_entry(last->node.next, struct dma_pl330_desc, node);
 
-		if (++cookie < 0)
-			cookie = 1;
-		desc->txd.cookie = cookie;
+		dma_cookie_assign(&desc->txd);
 
 		list_move_tail(&desc->node, &pch->work_list);
 	}
 
-	if (++cookie < 0)
-		cookie = 1;
-	last->txd.cookie = cookie;
-
+	cookie = dma_cookie_assign(&last->txd);
 	list_add_tail(&last->node, &pch->work_list);
-
-	tx->chan->cookie = cookie;
-
 	spin_unlock_irqrestore(&pch->lock, flags);
 
 	return cookie;
@@ -553,6 +2617,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	async_tx_ack(&desc->txd);
 
 	desc->req.peri = peri_id ? pch->chan.chan_id : 0;
+	desc->rqcfg.pcfg = &pch->dmac->pif.pcfg;
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
@@ -621,7 +2686,8 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
-		size_t period_len, enum dma_transfer_direction direction)
+		size_t period_len, enum dma_transfer_direction direction,
+		void *context)
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -711,7 +2777,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flg)
+		unsigned long flg, void *context)
 {
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -829,7 +2895,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	if (IS_ERR(pdmac->clk)) {
 		dev_err(&adev->dev, "Cannot get operation clock.\n");
 		ret = -EINVAL;
-		goto probe_err1;
+		goto probe_err2;
 	}
 
 	amba_set_drvdata(adev, pdmac);
@@ -843,11 +2909,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = request_irq(irq, pl330_irq_handler, 0,
 			dev_name(&adev->dev), pi);
 	if (ret)
-		goto probe_err2;
+		goto probe_err3;
 
 	ret = pl330_add(pi);
 	if (ret)
-		goto probe_err3;
+		goto probe_err4;
 
 	INIT_LIST_HEAD(&pdmac->desc_pool);
 	spin_lock_init(&pdmac->pool_lock);
@@ -904,7 +2970,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	ret = dma_async_device_register(pd);
 	if (ret) {
 		dev_err(&adev->dev, "unable to register DMAC\n");
-		goto probe_err4;
+		goto probe_err5;
 	}
 
 	dev_info(&adev->dev,
@@ -917,10 +2983,15 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	return 0;
 
-probe_err4:
+probe_err5:
 	pl330_del(pi);
-probe_err3:
+probe_err4:
 	free_irq(irq, pi);
+probe_err3:
+#ifndef CONFIG_PM_RUNTIME
+	clk_disable(pdmac->clk);
+#endif
+	clk_put(pdmac->clk);
 probe_err2:
 	iounmap(pi->base);
 probe_err1:
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index fc457a7e8832..ced98826684a 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -46,6 +46,7 @@
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include "adma.h"
+#include "../dmaengine.h"
 
 enum ppc_adma_init_code {
 	PPC_ADMA_INIT_OK = 0,
@@ -1930,7 +1931,7 @@ static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
 				if (end_of_chain && slot_cnt) {
 					/* Should wait for ZeroSum completion */
 					if (cookie > 0)
-						chan->completed_cookie = cookie;
+						chan->common.completed_cookie = cookie;
 					return;
 				}
 
@@ -1960,7 +1961,7 @@ static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
 	BUG_ON(!seen_current);
 
 	if (cookie > 0) {
-		chan->completed_cookie = cookie;
+		chan->common.completed_cookie = cookie;
 		pr_debug("\tcompleted cookie %d\n", cookie);
 	}
 
@@ -2150,22 +2151,6 @@ static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
 }
 
 /**
- * ppc440spe_desc_assign_cookie - assign a cookie
- */
-static dma_cookie_t ppc440spe_desc_assign_cookie(
-		struct ppc440spe_adma_chan *chan,
-		struct ppc440spe_adma_desc_slot *desc)
-{
-	dma_cookie_t cookie = chan->common.cookie;
-
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-	chan->common.cookie = desc->async_tx.cookie = cookie;
-	return cookie;
-}
-
-/**
  * ppc440spe_rxor_set_region_data -
  */
 static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
@@ -2235,8 +2220,7 @@ static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 	slots_per_op = group_start->slots_per_op;
 
 	spin_lock_bh(&chan->lock);
-
-	cookie = ppc440spe_desc_assign_cookie(chan, sw_desc);
+	cookie = dma_cookie_assign(tx);
 
 	if (unlikely(list_empty(&chan->chain))) {
 		/* first peer */
@@ -3944,28 +3928,16 @@ static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct ppc440spe_adma_chan *ppc440spe_chan;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status ret;
 
 	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
-	last_used = chan->cookie;
-	last_complete = ppc440spe_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_SUCCESS)
 		return ret;
 
 	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
 
-	last_used = chan->cookie;
-	last_complete = ppc440spe_chan->completed_cookie;
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 /**
@@ -4050,16 +4022,12 @@ static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
 		async_tx_ack(&sw_desc->async_tx);
 		ppc440spe_desc_init_null_xor(group_start);
 
-		cookie = chan->common.cookie;
-		cookie++;
-		if (cookie <= 1)
-			cookie = 2;
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
 
 		/* initialize the completed cookie to be less than
 		 * the most recently used cookie
 		 */
-		chan->completed_cookie = cookie - 1;
-		chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+		chan->common.completed_cookie = cookie - 1;
 
 		/* channel should not be busy */
 		BUG_ON(ppc440spe_chan_is_busy(chan));
@@ -4529,6 +4497,7 @@ static int __devinit ppc440spe_adma_probe(struct platform_device *ofdev)
 	INIT_LIST_HEAD(&chan->all_slots);
 	chan->device = adev;
 	chan->common.device = &adev->common;
+	dma_cookie_init(&chan->common);
 	list_add_tail(&chan->common.device_node, &adev->common.channels);
 	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
 		     (unsigned long)chan);
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
index 8ada5a812e3b..26b7a5ed9ac7 100644
--- a/drivers/dma/ppc4xx/adma.h
+++ b/drivers/dma/ppc4xx/adma.h
@@ -81,7 +81,6 @@ struct ppc440spe_adma_device {
  * @common: common dmaengine channel object members
  * @all_slots: complete domain of slots usable by the channel
  * @pending: allows batching of hardware operations
- * @completed_cookie: identifier for the most recently completed operation
  * @slots_allocated: records the actual size of the descriptor slot pool
  * @hw_chain_inited: h/w descriptor chain initialization flag
  * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
@@ -99,7 +98,6 @@ struct ppc440spe_adma_chan {
 	struct list_head all_slots;
 	struct ppc440spe_adma_desc_slot *last_used;
 	int pending;
-	dma_cookie_t completed_cookie;
 	int slots_allocated;
 	int hw_chain_inited;
 	struct tasklet_struct irq_tasklet;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 812fd76e9c18..19d7a8d3975d 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -30,6 +30,8 @@
 #include <linux/kdebug.h>
 #include <linux/spinlock.h>
 #include <linux/rculist.h>
+
+#include "dmaengine.h"
 #include "shdma.h"
 
 /* DMA descriptor control */
@@ -296,13 +298,7 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 	else
 		power_up = false;
 
-	cookie = sh_chan->common.cookie;
-	cookie++;
-	if (cookie < 0)
-		cookie = 1;
-
-	sh_chan->common.cookie = cookie;
-	tx->cookie = cookie;
+	cookie = dma_cookie_assign(tx);
 
 	/* Mark all chunks of this descriptor as submitted, move to the queue */
 	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
@@ -673,7 +669,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 
 static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
@@ -764,12 +761,12 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all
 			cookie = tx->cookie;
 
 		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
-			if (sh_chan->completed_cookie != desc->cookie - 1)
+			if (sh_chan->common.completed_cookie != desc->cookie - 1)
 				dev_dbg(sh_chan->dev,
 					"Completing cookie %d, expected %d\n",
 					desc->cookie,
-					sh_chan->completed_cookie + 1);
-			sh_chan->completed_cookie = desc->cookie;
+					sh_chan->common.completed_cookie + 1);
+			sh_chan->common.completed_cookie = desc->cookie;
 		}
 
 		/* Call callback on the last chunk */
@@ -823,7 +820,7 @@ static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all
 		 * Terminating and the loop completed normally: forgive
 		 * uncompleted cookies
 		 */
-		sh_chan->completed_cookie = sh_chan->common.cookie;
+		sh_chan->common.completed_cookie = sh_chan->common.cookie;
 
 	spin_unlock_irqrestore(&sh_chan->desc_lock, flags);
 
@@ -883,23 +880,14 @@ static enum dma_status sh_dmae_tx_status(struct dma_chan *chan,
 					struct dma_tx_state *txstate)
 {
 	struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
 	enum dma_status status;
 	unsigned long flags;
 
 	sh_dmae_chan_ld_cleanup(sh_chan, false);
 
-	/* First read completed cookie to avoid a skew */
-	last_complete = sh_chan->completed_cookie;
-	rmb();
-	last_used = chan->cookie;
-	BUG_ON(last_complete < 0);
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
 	spin_lock_irqsave(&sh_chan->desc_lock, flags);
 
-	status = dma_async_is_complete(cookie, last_complete, last_used);
+	status = dma_cookie_status(chan, cookie, txstate);
 
 	/*
 	 * If we don't find cookie on the queue, it has been aborted and we have
@@ -1102,6 +1090,7 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
 
 	/* reference struct dma_device */
 	new_sh_chan->common.device = &shdev->common;
+	dma_cookie_init(&new_sh_chan->common);
 
 	new_sh_chan->dev = shdev->common.dev;
 	new_sh_chan->id = id;
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
index 2b55a276dc5b..0b1d2c105f02 100644
--- a/drivers/dma/shdma.h
+++ b/drivers/dma/shdma.h
@@ -30,7 +30,6 @@ enum dmae_pm_state {
 };
 
 struct sh_dmae_chan {
-	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
 	struct list_head ld_queue;	/* Link descriptors queue */
 	struct list_head ld_free;	/* Link descriptors free */
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 2333810d1688..434ad31174f2 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -18,6 +18,8 @@
 #include <linux/of_platform.h>
 #include <linux/sirfsoc_dma.h>
 
+#include "dmaengine.h"
+
 #define SIRFSOC_DMA_DESCRIPTORS                 16
 #define SIRFSOC_DMA_CHANNELS                    16
 
@@ -59,7 +61,6 @@ struct sirfsoc_dma_chan {
 	struct list_head		queued;
 	struct list_head		active;
 	struct list_head		completed;
-	dma_cookie_t			completed_cookie;
 	unsigned long			happened_cyclic;
 	unsigned long			completed_cyclic;
 
@@ -208,7 +209,7 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
 			/* Free descriptors */
 			spin_lock_irqsave(&schan->lock, flags);
 			list_splice_tail_init(&list, &schan->free);
-			schan->completed_cookie = last_cookie;
+			schan->chan.completed_cookie = last_cookie;
 			spin_unlock_irqrestore(&schan->lock, flags);
 		} else {
 			/* for cyclic channel, desc is always in active list */
@@ -258,13 +259,7 @@ static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
 	/* Move descriptor to queue */
 	list_move_tail(&sdesc->node, &schan->queued);
 
-	/* Update cookie */
-	cookie = schan->chan.cookie + 1;
-	if (cookie <= 0)
-		cookie = 1;
-
-	schan->chan.cookie = cookie;
-	sdesc->desc.cookie = cookie;
+	cookie = dma_cookie_assign(txd);
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -414,16 +409,13 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	unsigned long flags;
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
+	enum dma_status ret;
 
 	spin_lock_irqsave(&schan->lock, flags);
-	last_used = schan->chan.cookie;
-	last_complete = schan->completed_cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
 	spin_unlock_irqrestore(&schan->lock, flags);
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-	return dma_async_is_complete(cookie, last_complete, last_used);
+	return ret;
 }
 
 static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
@@ -497,7 +489,7 @@ err_dir:
 static struct dma_async_tx_descriptor *
 sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
 	size_t buf_len, size_t period_len,
-	enum dma_transfer_direction direction)
+	enum dma_transfer_direction direction, void *context)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	struct sirfsoc_dma_desc *sdesc = NULL;
@@ -635,8 +627,7 @@ static int __devinit sirfsoc_dma_probe(struct platform_device *op)
 		schan = &sdma->channels[i];
 
 		schan->chan.device = dma;
-		schan->chan.cookie = 1;
-		schan->completed_cookie = schan->chan.cookie;
+		dma_cookie_init(&schan->chan);
 
 		INIT_LIST_HEAD(&schan->free);
 		INIT_LIST_HEAD(&schan->prepared);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index cc5ecbc067a3..bdd41d4bfa8d 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -21,6 +21,7 @@
 
 #include <plat/ste_dma40.h>
 
+#include "dmaengine.h"
 #include "ste_dma40_ll.h"
 
 #define D40_NAME "dma40"
@@ -220,8 +221,6 @@ struct d40_base;
  *
  * @lock: A spinlock to protect this struct.
  * @log_num: The logical number, if any of this channel.
- * @completed: Starts with 1, after first interrupt it is set to dma engine's
- * current cookie.
  * @pending_tx: The number of pending transfers. Used between interrupt handler
  * and tasklet.
  * @busy: Set to true when transfer is ongoing on this channel.
@@ -250,8 +249,6 @@ struct d40_base;
 struct d40_chan {
 	spinlock_t			 lock;
 	int				 log_num;
-	/* ID of the most recent completed transfer */
-	int				 completed;
 	int				 pending_tx;
 	bool				 busy;
 	struct d40_phy_res		*phy_chan;
@@ -1223,21 +1220,14 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 					     chan);
 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
 	unsigned long flags;
+	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&d40c->lock, flags);
-
-	d40c->chan.cookie++;
-
-	if (d40c->chan.cookie < 0)
-		d40c->chan.cookie = 1;
-
-	d40d->txd.cookie = d40c->chan.cookie;
-
+	cookie = dma_cookie_assign(tx);
 	d40_desc_queue(d40c, d40d);
-
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
-	return tx->cookie;
+	return cookie;
 }
 
 static int d40_start(struct d40_chan *d40c)
@@ -1357,7 +1347,7 @@ static void dma_tasklet(unsigned long data)
 		goto err;
 
 	if (!d40d->cyclic)
-		d40c->completed = d40d->txd.cookie;
+		dma_cookie_complete(&d40d->txd);
 
 	/*
 	 * If terminating a channel pending_tx is set to zero.
@@ -2182,7 +2172,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	bool is_free_phy;
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	d40c->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	/* If no dma configuration is set use default configuration (memcpy) */
 	if (!d40c->configured) {
@@ -2299,7 +2289,8 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
 							 enum dma_transfer_direction direction,
-							 unsigned long dma_flags)
+							 unsigned long dma_flags,
+							 void *context)
 {
 	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
 		return NULL;
@@ -2310,7 +2301,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_transfer_direction direction)
+		     enum dma_transfer_direction direction, void *context)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
@@ -2342,25 +2333,19 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
 				     struct dma_tx_state *txstate)
 {
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
+	enum dma_status ret;
 
 	if (d40c->phy_chan == NULL) {
 		chan_err(d40c, "Cannot read status of unallocated channel\n");
 		return -EINVAL;
 	}
 
-	last_complete = d40c->completed;
-	last_used = chan->cookie;
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_SUCCESS)
+		dma_set_residue(txstate, stedma40_residue(chan));
 
 	if (d40_is_paused(d40c))
 		ret = DMA_PAUSED;
-	else
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
-
-	dma_set_tx_state(txstate, last_complete, last_used,
-			 stedma40_residue(chan));
 
 	return ret;
 }
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a6f9c1684a0f..4e0dff59901d 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -31,6 +31,8 @@
 
 #include <linux/timb_dma.h>
 
+#include "dmaengine.h"
+
 #define DRIVER_NAME "timb-dma"
 
 /* Global DMA registers */
@@ -84,7 +86,6 @@ struct timb_dma_chan {
 					especially the lists and descriptors,
 					from races between the tasklet and calls
 					from above */
-	dma_cookie_t		last_completed_cookie;
 	bool			ongoing;
 	struct list_head	active_list;
 	struct list_head	queue;
@@ -284,7 +285,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 	else
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR);
 */
-	td_chan->last_completed_cookie = txd->cookie;
+	dma_cookie_complete(txd);
 	td_chan->ongoing = false;
 
 	callback = txd->callback;
@@ -349,12 +350,7 @@ static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd)
 	dma_cookie_t cookie;
 
 	spin_lock_bh(&td_chan->lock);
-
-	cookie = txd->chan->cookie;
-	if (++cookie < 0)
-		cookie = 1;
-	txd->chan->cookie = cookie;
-	txd->cookie = cookie;
+	cookie = dma_cookie_assign(txd);
 
 	if (list_empty(&td_chan->active_list)) {
 		dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__,
@@ -481,8 +477,7 @@ static int td_alloc_chan_resources(struct dma_chan *chan)
 	}
 
 	spin_lock_bh(&td_chan->lock);
-	td_chan->last_completed_cookie = 1;
-	chan->cookie = 1;
+	dma_cookie_init(chan);
 	spin_unlock_bh(&td_chan->lock);
 
 	return 0;
@@ -515,24 +510,13 @@ static void td_free_chan_resources(struct dma_chan *chan)
 static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
-	struct timb_dma_chan *td_chan =
-		container_of(chan, struct timb_dma_chan, chan);
-	dma_cookie_t		last_used;
-	dma_cookie_t		last_complete;
-	int			ret;
+	enum dma_status ret;
 
 	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
 
-	last_complete = td_chan->last_completed_cookie;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
-
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	ret = dma_cookie_status(chan, cookie, txstate);
 
-	dev_dbg(chan2dev(chan),
-		"%s: exit, ret: %d, last_complete: %d, last_used: %d\n",
-		__func__, ret, last_complete, last_used);
+	dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", 	__func__, ret);
 
 	return ret;
 }
@@ -558,7 +542,8 @@ static void td_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	struct timb_dma_chan *td_chan =
 		container_of(chan, struct timb_dma_chan, chan);
@@ -766,7 +751,7 @@ static int __devinit td_probe(struct platform_device *pdev)
 		}
 
 		td_chan->chan.device = &td->dma;
-		td_chan->chan.cookie = 1;
+		dma_cookie_init(&td_chan->chan);
 		spin_lock_init(&td_chan->lock);
 		INIT_LIST_HEAD(&td_chan->active_list);
 		INIT_LIST_HEAD(&td_chan->queue);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 6122c364cf11..913f55c76c99 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -15,6 +15,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
+
+#include "dmaengine.h"
 #include "txx9dmac.h"
 
 static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
@@ -279,21 +281,6 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
 	}
 }
 
-/* Called with dc->lock held and bh disabled */
-static dma_cookie_t
-txx9dmac_assign_cookie(struct txx9dmac_chan *dc, struct txx9dmac_desc *desc)
-{
-	dma_cookie_t cookie = dc->chan.cookie;
-
-	if (++cookie < 0)
-		cookie = 1;
-
-	dc->chan.cookie = cookie;
-	desc->txd.cookie = cookie;
-
-	return cookie;
-}
-
 /*----------------------------------------------------------------------*/
 
 static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
@@ -424,7 +411,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
 	dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
 		 txd->cookie, desc);
 
-	dc->completed = txd->cookie;
+	dma_cookie_complete(txd);
 	callback = txd->callback;
 	param = txd->callback_param;
 
@@ -738,7 +725,7 @@ static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
 	dma_cookie_t cookie;
 
 	spin_lock_bh(&dc->lock);
-	cookie = txx9dmac_assign_cookie(dc, desc);
+	cookie = dma_cookie_assign(tx);
 
 	dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
 		 desc->txd.cookie, desc);
@@ -846,7 +833,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 static struct dma_async_tx_descriptor *
 txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags)
+		unsigned long flags, void *context)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
 	struct txx9dmac_dev *ddev = dc->ddev;
@@ -972,27 +959,17 @@ txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		   struct dma_tx_state *txstate)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
-	dma_cookie_t last_used;
-	dma_cookie_t last_complete;
-	int ret;
+	enum dma_status ret;
 
-	last_complete = dc->completed;
-	last_used = chan->cookie;
-
-	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS) {
 		spin_lock_bh(&dc->lock);
 		txx9dmac_scan_descriptors(dc);
 		spin_unlock_bh(&dc->lock);
 
-		last_complete = dc->completed;
-		last_used = chan->cookie;
-
-		ret = dma_async_is_complete(cookie, last_complete, last_used);
+		ret = dma_cookie_status(chan, cookie, txstate);
 	}
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
-
 	return ret;
 }
 
@@ -1057,7 +1034,7 @@ static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
 		return -EIO;
 	}
 
-	dc->completed = chan->cookie = 1;
+	dma_cookie_init(chan);
 
 	dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
 	txx9dmac_chan_set_SMPCHN(dc);
@@ -1186,7 +1163,7 @@ static int __init txx9dmac_chan_probe(struct platform_device *pdev)
 	dc->ddev->chan[ch] = dc;
 	dc->chan.device = &dc->dma;
 	list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
-	dc->chan.cookie = dc->completed = 1;
+	dma_cookie_init(&dc->chan);
 
 	if (is_dmac64(dc))
 		dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
index 365d42366b9f..f5a760598882 100644
--- a/drivers/dma/txx9dmac.h
+++ b/drivers/dma/txx9dmac.h
@@ -172,7 +172,6 @@ struct txx9dmac_chan {
 	spinlock_t		lock;
 
 	/* these other elements are all protected by lock */
-	dma_cookie_t		completed;
 	struct list_head	active_list;
 	struct list_head	queue;
 	struct list_head	free_list;