summary refs log tree commit diff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dma/Kconfig12
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/dmatest.c14
-rw-r--r--drivers/dma/dw_dmac.c103
-rw-r--r--drivers/dma/dw_dmac_regs.h12
-rw-r--r--drivers/dma/fsldma.c551
-rw-r--r--drivers/dma/fsldma.h6
-rw-r--r--drivers/dma/mxs-dma.c724
-rw-r--r--drivers/dma/pch_dma.c35
-rw-r--r--drivers/dma/ste_dma40.c1402
-rw-r--r--drivers/dma/ste_dma40_ll.c218
-rw-r--r--drivers/dma/ste_dma40_ll.h66
12 files changed, 1902 insertions, 1242 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 1c28816152fa..a572600e44eb 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -82,7 +82,7 @@ config INTEL_IOP_ADMA
 
 config DW_DMAC
 	tristate "Synopsys DesignWare AHB DMA support"
-	depends on AVR32
+	depends on HAVE_CLK
 	select DMA_ENGINE
 	default y if CPU_AT32AP7000
 	help
@@ -221,12 +221,20 @@ config IMX_SDMA
 
 config IMX_DMA
 	tristate "i.MX DMA support"
-	depends on ARCH_MX1 || ARCH_MX21 || MACH_MX27
+	depends on IMX_HAVE_DMA_V1
 	select DMA_ENGINE
 	help
 	  Support the i.MX DMA engine. This engine is integrated into
 	  Freescale i.MX1/21/27 chips.
 
+config MXS_DMA
+	bool "MXS DMA support"
+	depends on SOC_IMX23 || SOC_IMX28
+	select DMA_ENGINE
+	help
+	  Support the MXS DMA engine. This engine including APBH-DMA
+	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 1be065a62f8c..836095ab3c5c 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 5589358b684d..e0888cb538d4 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -54,6 +54,11 @@ module_param(pq_sources, uint, S_IRUGO);
 MODULE_PARM_DESC(pq_sources,
 		"Number of p+q source buffers (default: 3)");
 
+static int timeout = 3000;
+module_param(timeout, uint, S_IRUGO);
+MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), \
+		Pass -1 for infinite timeout");
+
 /*
  * Initialization patterns. All bytes in the source buffer has bit 7
  * set, all bytes in the destination buffer has bit 7 cleared.
@@ -285,7 +290,12 @@ static int dmatest_func(void *data)
 
 	set_user_nice(current, 10);
 
-	flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
+	/*
+	 * src buffers are freed by the DMAEngine code with dma_unmap_single()
+	 * dst buffers are freed by ourselves below
+	 */
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
+	      | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
 
 	while (!kthread_should_stop()
 	       && !(iterations && total_tests >= iterations)) {
@@ -294,7 +304,7 @@ static int dmatest_func(void *data)
 		dma_addr_t dma_srcs[src_cnt];
 		dma_addr_t dma_dsts[dst_cnt];
 		struct completion cmp;
-		unsigned long tmo = msecs_to_jiffies(3000);
+		unsigned long tmo = msecs_to_jiffies(timeout);
 		u8 align = 0;
 
 		total_tests++;
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index a3991ab0d67e..9c25c7d099e4 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -32,26 +32,30 @@
  * which does not support descriptor writeback.
  */
 
-/* NOTE:  DMS+SMS is system-specific. We should get this information
- * from the platform code somehow.
- */
-#define DWC_DEFAULT_CTLLO	(DWC_CTLL_DST_MSIZE(0)		\
-				| DWC_CTLL_SRC_MSIZE(0)		\
-				| DWC_CTLL_DMS(0)		\
-				| DWC_CTLL_SMS(1)		\
-				| DWC_CTLL_LLP_D_EN		\
-				| DWC_CTLL_LLP_S_EN)
+#define DWC_DEFAULT_CTLLO(private) ({				\
+		struct dw_dma_slave *__slave = (private);	\
+		int dms = __slave ? __slave->dst_master : 0;	\
+		int sms = __slave ? __slave->src_master : 1;	\
+		u8 smsize = __slave ? __slave->src_msize : DW_DMA_MSIZE_16; \
+		u8 dmsize = __slave ? __slave->dst_msize : DW_DMA_MSIZE_16; \
+								\
+		(DWC_CTLL_DST_MSIZE(dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(smsize)			\
+		 | DWC_CTLL_LLP_D_EN				\
+		 | DWC_CTLL_LLP_S_EN				\
+		 | DWC_CTLL_DMS(dms)				\
+		 | DWC_CTLL_SMS(sms));				\
+	})
 
 /*
  * This is configuration-dependent and usually a funny size like 4095.
- * Let's round it down to the nearest power of two.
  *
  * Note that this is a transfer count, i.e. if we transfer 32-bit
- * words, we can do 8192 bytes per descriptor.
+ * words, we can do 16380 bytes per descriptor.
  *
  * This parameter is also system-specific.
  */
-#define DWC_MAX_COUNT	2048U
+#define DWC_MAX_COUNT	4095U
 
 /*
  * Number of descriptors to allocate for each channel. This should be
@@ -84,11 +88,6 @@ static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
 	return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
 }
 
-static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
-{
-	return list_entry(dwc->queue.next, struct dw_desc, desc_node);
-}
-
 static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
 {
 	struct dw_desc *desc, *_desc;
@@ -201,6 +200,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	dma_async_tx_callback		callback;
 	void				*param;
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct dw_desc			*child;
 
 	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
 
@@ -209,6 +209,12 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	param = txd->callback_param;
 
 	dwc_sync_desc_for_cpu(dwc, desc);
+
+	/* async_tx_ack */
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		async_tx_ack(&child->txd);
+	async_tx_ack(&desc->txd);
+
 	list_splice_init(&desc->tx_list, &dwc->free_list);
 	list_move(&desc->desc_node, &dwc->free_list);
 
@@ -259,10 +265,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	 * Submit queued descriptors ASAP, i.e. before we go through
 	 * the completed ones.
 	 */
-	if (!list_empty(&dwc->queue))
-		dwc_dostart(dwc, dwc_first_queued(dwc));
 	list_splice_init(&dwc->active_list, &list);
-	list_splice_init(&dwc->queue, &dwc->active_list);
+	if (!list_empty(&dwc->queue)) {
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
+	}
 
 	list_for_each_entry_safe(desc, _desc, &list, desc_node)
 		dwc_descriptor_complete(dwc, desc);
@@ -291,6 +298,9 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		return;
 	}
 
+	if (list_empty(&dwc->active_list))
+		return;
+
 	dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
@@ -319,8 +329,8 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		cpu_relax();
 
 	if (!list_empty(&dwc->queue)) {
-		dwc_dostart(dwc, dwc_first_queued(dwc));
-		list_splice_init(&dwc->queue, &dwc->active_list);
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
 	}
 }
 
@@ -346,7 +356,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	 */
 	bad_desc = dwc_first_active(dwc);
 	list_del_init(&bad_desc->desc_node);
-	list_splice_init(&dwc->queue, dwc->active_list.prev);
+	list_move(dwc->queue.next, dwc->active_list.prev);
 
 	/* Clear the error flag and try to restart the controller */
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
@@ -541,8 +551,8 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	if (list_empty(&dwc->active_list)) {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
 				desc->txd.cookie);
-		dwc_dostart(dwc, desc);
 		list_add_tail(&desc->desc_node, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
 	} else {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
 				desc->txd.cookie);
@@ -581,14 +591,16 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	 * We can be a lot more clever here, but this should take care
 	 * of the most common optimization.
 	 */
-	if (!((src | dest  | len) & 3))
+	if (!((src | dest  | len) & 7))
+		src_width = dst_width = 3;
+	else if (!((src | dest  | len) & 3))
 		src_width = dst_width = 2;
 	else if (!((src | dest | len) & 1))
 		src_width = dst_width = 1;
 	else
 		src_width = dst_width = 0;
 
-	ctllo = DWC_DEFAULT_CTLLO
+	ctllo = DWC_DEFAULT_CTLLO(chan->private)
 			| DWC_CTLL_DST_WIDTH(dst_width)
 			| DWC_CTLL_SRC_WIDTH(src_width)
 			| DWC_CTLL_DST_INC
@@ -669,11 +681,11 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	switch (direction) {
 	case DMA_TO_DEVICE:
-		ctllo = (DWC_DEFAULT_CTLLO
+		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
 				| DWC_CTLL_SRC_INC
-				| DWC_CTLL_FC_M2P);
+				| DWC_CTLL_FC(dws->fc));
 		reg = dws->tx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
@@ -714,11 +726,11 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		}
 		break;
 	case DMA_FROM_DEVICE:
-		ctllo = (DWC_DEFAULT_CTLLO
+		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
 				| DWC_CTLL_SRC_FIX
-				| DWC_CTLL_FC_P2M);
+				| DWC_CTLL_FC(dws->fc));
 
 		reg = dws->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
@@ -834,7 +846,9 @@ dwc_tx_status(struct dma_chan *chan,
 
 	ret = dma_async_is_complete(cookie, last_complete, last_used);
 	if (ret != DMA_SUCCESS) {
+		spin_lock_bh(&dwc->lock);
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+		spin_unlock_bh(&dwc->lock);
 
 		last_complete = dwc->completed;
 		last_used = chan->cookie;
@@ -889,8 +903,11 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
 
 		cfghi = dws->cfg_hi;
-		cfglo = dws->cfg_lo;
+		cfglo = dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
 	}
+
+	cfglo |= DWC_CFGL_CH_PRIOR(dwc->priority);
+
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
 
@@ -1126,23 +1143,23 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		case DMA_TO_DEVICE:
 			desc->lli.dar = dws->tx_reg;
 			desc->lli.sar = buf_addr + (period_len * i);
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_FIX
 					| DWC_CTLL_SRC_INC
-					| DWC_CTLL_FC_M2P
+					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
 		case DMA_FROM_DEVICE:
 			desc->lli.dar = buf_addr + (period_len * i);
 			desc->lli.sar = dws->rx_reg;
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_DST_INC
 					| DWC_CTLL_SRC_FIX
-					| DWC_CTLL_FC_P2M
+					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
 		default:
@@ -1307,7 +1324,17 @@ static int __init dw_probe(struct platform_device *pdev)
 		dwc->chan.device = &dw->dma;
 		dwc->chan.cookie = dwc->completed = 1;
 		dwc->chan.chan_id = i;
-		list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
+			list_add_tail(&dwc->chan.device_node,
+					&dw->dma.channels);
+		else
+			list_add(&dwc->chan.device_node, &dw->dma.channels);
+
+		/* 7 is highest priority & 0 is lowest. */
+		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+			dwc->priority = 7 - i;
+		else
+			dwc->priority = i;
 
 		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
 		spin_lock_init(&dwc->lock);
@@ -1335,6 +1362,8 @@ static int __init dw_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+	if (pdata->is_private)
+		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
 	dw->dma.dev = &pdev->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
 	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
@@ -1447,7 +1476,7 @@ static int __init dw_init(void)
 {
 	return platform_driver_probe(&dw_driver, dw_probe);
 }
-module_init(dw_init);
+subsys_initcall(dw_init);
 
 static void __exit dw_exit(void)
 {
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index d9a939f67f46..720f821527f8 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -86,6 +86,7 @@ struct dw_dma_regs {
 #define DWC_CTLL_SRC_MSIZE(n)	((n)<<14)
 #define DWC_CTLL_S_GATH_EN	(1 << 17)	/* src gather, !FIX */
 #define DWC_CTLL_D_SCAT_EN	(1 << 18)	/* dst scatter, !FIX */
+#define DWC_CTLL_FC(n)		((n) << 20)
 #define DWC_CTLL_FC_M2M		(0 << 20)	/* mem-to-mem */
 #define DWC_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
 #define DWC_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
@@ -101,6 +102,8 @@ struct dw_dma_regs {
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
 /* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
+#define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
 #define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
 #define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
@@ -134,6 +137,7 @@ struct dw_dma_chan {
 	struct dma_chan		chan;
 	void __iomem		*ch_regs;
 	u8			mask;
+	u8			priority;
 
 	spinlock_t		lock;
 
@@ -155,9 +159,9 @@ __dwc_regs(struct dw_dma_chan *dwc)
 }
 
 #define channel_readl(dwc, name) \
-	__raw_readl(&(__dwc_regs(dwc)->name))
+	readl(&(__dwc_regs(dwc)->name))
 #define channel_writel(dwc, name, val) \
-	__raw_writel((val), &(__dwc_regs(dwc)->name))
+	writel((val), &(__dwc_regs(dwc)->name))
 
 static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
 {
@@ -181,9 +185,9 @@ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
 }
 
 #define dma_readl(dw, name) \
-	__raw_readl(&(__dw_regs(dw)->name))
+	readl(&(__dw_regs(dw)->name))
 #define dma_writel(dw, name, val) \
-	__raw_writel((val), &(__dw_regs(dw)->name))
+	writel((val), &(__dw_regs(dw)->name))
 
 #define channel_set_bit(dw, reg, mask) \
 	dma_writel(dw, reg, ((mask) << 8) | (mask))
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e3854a8f0de0..6b396759e7f5 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -37,35 +37,16 @@
 
 #include "fsldma.h"
 
-static const char msg_ld_oom[] = "No free memory for link descriptor\n";
+#define chan_dbg(chan, fmt, arg...)					\
+	dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)					\
+	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
 
-static void dma_init(struct fsldma_chan *chan)
-{
-	/* Reset the channel */
-	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+static const char msg_ld_oom[] = "No free memory for link descriptor";
 
-	switch (chan->feature & FSL_DMA_IP_MASK) {
-	case FSL_DMA_IP_85XX:
-		/* Set the channel to below modes:
-		 * EIE - Error interrupt enable
-		 * EOSIE - End of segments interrupt enable (basic mode)
-		 * EOLNIE - End of links interrupt enable
-		 * BWC - Bandwidth sharing among channels
-		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
-				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE
-				| FSL_DMA_MR_EOSIE, 32);
-		break;
-	case FSL_DMA_IP_83XX:
-		/* Set the channel to below modes:
-		 * EOTIE - End-of-transfer interrupt enable
-		 * PRC_RM - PCI read multiple
-		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
-				| FSL_DMA_MR_PRC_RM, 32);
-		break;
-	}
-}
+/*
+ * Register Helpers
+ */
 
 static void set_sr(struct fsldma_chan *chan, u32 val)
 {
@@ -77,14 +58,38 @@ static u32 get_sr(struct fsldma_chan *chan)
 	return DMA_IN(chan, &chan->regs->sr, 32);
 }
 
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+{
+	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static u32 get_bcr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->bcr, 32);
+}
+
+/*
+ * Descriptor Helpers
+ */
+
 static void set_desc_cnt(struct fsldma_chan *chan,
 				struct fsl_dma_ld_hw *hw, u32 count)
 {
 	hw->count = CPU_TO_DMA(chan, count, 32);
 }
 
+static u32 get_desc_cnt(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	return DMA_TO_CPU(chan, desc->hw.count, 32);
+}
+
 static void set_desc_src(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t src)
+			 struct fsl_dma_ld_hw *hw, dma_addr_t src)
 {
 	u64 snoop_bits;
 
@@ -93,8 +98,18 @@ static void set_desc_src(struct fsldma_chan *chan,
 	hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
 }
 
+static dma_addr_t get_desc_src(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.src_addr, 64) & ~snoop_bits;
+}
+
 static void set_desc_dst(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t dst)
+			 struct fsl_dma_ld_hw *hw, dma_addr_t dst)
 {
 	u64 snoop_bits;
 
@@ -103,8 +118,18 @@ static void set_desc_dst(struct fsldma_chan *chan,
 	hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
 }
 
+static dma_addr_t get_desc_dst(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.dst_addr, 64) & ~snoop_bits;
+}
+
 static void set_desc_next(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t next)
+			  struct fsl_dma_ld_hw *hw, dma_addr_t next)
 {
 	u64 snoop_bits;
 
@@ -113,24 +138,46 @@ static void set_desc_next(struct fsldma_chan *chan,
 	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
 }
 
-static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
-	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
-}
+	u64 snoop_bits;
 
-static dma_addr_t get_cdar(struct fsldma_chan *chan)
-{
-	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
-}
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
 
-static dma_addr_t get_ndar(struct fsldma_chan *chan)
-{
-	return DMA_IN(chan, &chan->regs->ndar, 64);
+	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+			| snoop_bits, 64);
 }
 
-static u32 get_bcr(struct fsldma_chan *chan)
+/*
+ * DMA Engine Hardware Control Helpers
+ */
+
+static void dma_init(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->bcr, 32);
+	/* Reset the channel */
+	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		/* Set the channel to below modes:
+		 * EIE - Error interrupt enable
+		 * EOLNIE - End of links interrupt enable
+		 * BWC - Bandwidth sharing among channels
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
+				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32);
+		break;
+	case FSL_DMA_IP_83XX:
+		/* Set the channel to below modes:
+		 * EOTIE - End-of-transfer interrupt enable
+		 * PRC_RM - PCI read multiple
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
+				| FSL_DMA_MR_PRC_RM, 32);
+		break;
+	}
 }
 
 static int dma_is_idle(struct fsldma_chan *chan)
@@ -139,25 +186,32 @@ static int dma_is_idle(struct fsldma_chan *chan)
 	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
 }
 
+/*
+ * Start the DMA controller
+ *
+ * Preconditions:
+ * - the CDAR register must point to the start descriptor
+ * - the MRn[CS] bit must be cleared
+ */
 static void dma_start(struct fsldma_chan *chan)
 {
 	u32 mode;
 
 	mode = DMA_IN(chan, &chan->regs->mr, 32);
 
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
-			DMA_OUT(chan, &chan->regs->bcr, 0, 32);
-			mode |= FSL_DMA_MR_EMP_EN;
-		} else {
-			mode &= ~FSL_DMA_MR_EMP_EN;
-		}
+	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+		DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+		mode |= FSL_DMA_MR_EMP_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMP_EN;
 	}
 
-	if (chan->feature & FSL_DMA_CHAN_START_EXT)
+	if (chan->feature & FSL_DMA_CHAN_START_EXT) {
 		mode |= FSL_DMA_MR_EMS_EN;
-	else
+	} else {
+		mode &= ~FSL_DMA_MR_EMS_EN;
 		mode |= FSL_DMA_MR_CS;
+	}
 
 	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
@@ -167,13 +221,26 @@ static void dma_halt(struct fsldma_chan *chan)
 	u32 mode;
 	int i;
 
+	/* read the mode register */
 	mode = DMA_IN(chan, &chan->regs->mr, 32);
-	mode |= FSL_DMA_MR_CA;
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 
-	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA);
+	/*
+	 * The 85xx controller supports channel abort, which will stop
+	 * the current transfer. On 83xx, this bit is the transfer error
+	 * mask bit, which should not be changed.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		mode |= FSL_DMA_MR_CA;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+
+		mode &= ~FSL_DMA_MR_CA;
+	}
+
+	/* stop the DMA controller */
+	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
 	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 
+	/* wait for the DMA controller to become idle */
 	for (i = 0; i < 100; i++) {
 		if (dma_is_idle(chan))
 			return;
@@ -182,20 +249,7 @@ static void dma_halt(struct fsldma_chan *chan)
 	}
 
 	if (!dma_is_idle(chan))
-		dev_err(chan->dev, "DMA halt timeout!\n");
-}
-
-static void set_ld_eol(struct fsldma_chan *chan,
-			struct fsl_desc_sw *desc)
-{
-	u64 snoop_bits;
-
-	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
-		? FSL_DMA_SNEN : 0;
-
-	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
-		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
-			| snoop_bits, 64);
+		chan_err(chan, "DMA halt timeout!\n");
 }
 
 /**
@@ -321,8 +375,7 @@ static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
 		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
 }
 
-static void append_ld_queue(struct fsldma_chan *chan,
-			    struct fsl_desc_sw *desc)
+static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
 	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
 
@@ -363,8 +416,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	cookie = chan->common.cookie;
 	list_for_each_entry(child, &desc->tx_list, node) {
 		cookie++;
-		if (cookie < 0)
-			cookie = 1;
+		if (cookie < DMA_MIN_COOKIE)
+			cookie = DMA_MIN_COOKIE;
 
 		child->async_tx.cookie = cookie;
 	}
@@ -385,15 +438,14 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
  *
  * Return - The descriptor allocated. NULL for failed.
  */
-static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
-					struct fsldma_chan *chan)
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc;
 	dma_addr_t pdesc;
 
 	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
 	if (!desc) {
-		dev_dbg(chan->dev, "out of memory for link desc\n");
+		chan_dbg(chan, "out of memory for link descriptor\n");
 		return NULL;
 	}
 
@@ -403,10 +455,13 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
 	desc->async_tx.tx_submit = fsl_dma_tx_submit;
 	desc->async_tx.phys = pdesc;
 
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p allocated\n", desc);
+#endif
+
 	return desc;
 }
 
-
 /**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
  * @chan : Freescale DMA channel
@@ -427,13 +482,11 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
 	 * We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
 	 */
-	chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
-					  chan->dev,
+	chan->desc_pool = dma_pool_create(chan->name, chan->dev,
 					  sizeof(struct fsl_desc_sw),
 					  __alignof__(struct fsl_desc_sw), 0);
 	if (!chan->desc_pool) {
-		dev_err(chan->dev, "unable to allocate channel %d "
-				   "descriptor pool\n", chan->id);
+		chan_err(chan, "unable to allocate descriptor pool\n");
 		return -ENOMEM;
 	}
 
@@ -455,6 +508,9 @@ static void fsldma_free_desc_list(struct fsldma_chan *chan,
 
 	list_for_each_entry_safe(desc, _desc, list, node) {
 		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 }
@@ -466,6 +522,9 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
 
 	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
 		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 }
@@ -479,7 +538,7 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	unsigned long flags;
 
-	dev_dbg(chan->dev, "Free all channel resources.\n");
+	chan_dbg(chan, "free all channel resources\n");
 	spin_lock_irqsave(&chan->desc_lock, flags);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
@@ -502,7 +561,7 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 
 	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(chan->dev, msg_ld_oom);
+		chan_err(chan, "%s\n", msg_ld_oom);
 		return NULL;
 	}
 
@@ -512,14 +571,15 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 	/* Insert the link descriptor to the LD ring */
 	list_add_tail(&new->node, &new->tx_list);
 
-	/* Set End-of-link to the last link descriptor of new list*/
+	/* Set End-of-link to the last link descriptor of new list */
 	set_ld_eol(chan, new);
 
 	return &new->async_tx;
 }
 
-static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
-	struct dma_chan *dchan, dma_addr_t dma_dst, dma_addr_t dma_src,
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
 {
 	struct fsldma_chan *chan;
@@ -539,12 +599,9 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 		/* Allocate the link descriptor from DMA pool */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev, msg_ld_oom);
+			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
-#ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
 
 		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
 
@@ -572,7 +629,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 	new->async_tx.flags = flags; /* client is in control of this ack */
 	new->async_tx.cookie = -EBUSY;
 
-	/* Set End-of-link to the last link descriptor of new list*/
+	/* Set End-of-link to the last link descriptor of new list */
 	set_ld_eol(chan, new);
 
 	return &first->async_tx;
@@ -627,12 +684,9 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
 		/* allocate and populate the descriptor */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev, msg_ld_oom);
+			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
-#ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
-#endif
 
 		set_desc_cnt(chan, &new->hw, len);
 		set_desc_src(chan, &new->hw, src);
@@ -744,14 +798,15 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
+		spin_lock_irqsave(&chan->desc_lock, flags);
+
 		/* Halt the DMA engine */
 		dma_halt(chan);
 
-		spin_lock_irqsave(&chan->desc_lock, flags);
-
 		/* Remove and free all of the descriptors in the LD queue */
 		fsldma_free_desc_list(chan, &chan->ld_pending);
 		fsldma_free_desc_list(chan, &chan->ld_running);
+		chan->idle = true;
 
 		spin_unlock_irqrestore(&chan->desc_lock, flags);
 		return 0;
@@ -789,140 +844,87 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }
 
 /**
- * fsl_dma_update_completed_cookie - Update the completed cookie.
- * @chan : Freescale DMA channel
- *
- * CONTEXT: hardirq
- */
-static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-	unsigned long flags;
-	dma_cookie_t cookie;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	if (list_empty(&chan->ld_running)) {
-		dev_dbg(chan->dev, "no running descriptors\n");
-		goto out_unlock;
-	}
-
-	/* Get the last descriptor, update the cookie to that */
-	desc = to_fsl_desc(chan->ld_running.prev);
-	if (dma_is_idle(chan))
-		cookie = desc->async_tx.cookie;
-	else {
-		cookie = desc->async_tx.cookie - 1;
-		if (unlikely(cookie < DMA_MIN_COOKIE))
-			cookie = DMA_MAX_COOKIE;
-	}
-
-	chan->completed_cookie = cookie;
-
-out_unlock:
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-}
-
-/**
- * fsldma_desc_status - Check the status of a descriptor
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
  * @chan: Freescale DMA channel
- * @desc: DMA SW descriptor
- *
- * This function will return the status of the given descriptor
- */
-static enum dma_status fsldma_desc_status(struct fsldma_chan *chan,
-					  struct fsl_desc_sw *desc)
-{
-	return dma_async_is_complete(desc->async_tx.cookie,
-				     chan->completed_cookie,
-				     chan->common.cookie);
-}
-
-/**
- * fsl_chan_ld_cleanup - Clean up link descriptors
- * @chan : Freescale DMA channel
+ * @desc: descriptor to cleanup and free
  *
- * This function clean up the ld_queue of DMA channel.
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
  */
-static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
 {
-	struct fsl_desc_sw *desc, *_desc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	dev_dbg(chan->dev, "chan completed_cookie = %d\n", chan->completed_cookie);
-	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
-		dma_async_tx_callback callback;
-		void *callback_param;
-
-		if (fsldma_desc_status(chan, desc) == DMA_IN_PROGRESS)
-			break;
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct device *dev = chan->common.device->dev;
+	dma_addr_t src = get_desc_src(chan, desc);
+	dma_addr_t dst = get_desc_dst(chan, desc);
+	u32 len = get_desc_cnt(chan, desc);
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+		txd->callback(txd->callback_param);
+	}
 
-		/* Remove from the list of running transactions */
-		list_del(&desc->node);
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
 
-		/* Run the link descriptor callback function */
-		callback = desc->async_tx.callback;
-		callback_param = desc->async_tx.callback_param;
-		if (callback) {
-			spin_unlock_irqrestore(&chan->desc_lock, flags);
-			dev_dbg(chan->dev, "LD %p callback\n", desc);
-			callback(callback_param);
-			spin_lock_irqsave(&chan->desc_lock, flags);
-		}
+	/* Unmap the dst buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
+	}
 
-		/* Run any dependencies, then free the descriptor */
-		dma_run_dependencies(&desc->async_tx);
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	/* Unmap the src buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
 	}
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p free\n", desc);
+#endif
+	dma_pool_free(chan->desc_pool, desc, txd->phys);
 }
 
 /**
  * fsl_chan_xfer_ld_queue - transfer any pending transactions
  * @chan : Freescale DMA channel
  *
- * This will make sure that any pending transactions will be run.
- * If the DMA controller is idle, it will be started. Otherwise,
- * the DMA controller's interrupt handler will start any pending
- * transactions when it becomes idle.
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
  */
 static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
 
 	/*
 	 * If the list of pending descriptors is empty, then we
 	 * don't need to do any work at all
 	 */
 	if (list_empty(&chan->ld_pending)) {
-		dev_dbg(chan->dev, "no pending LDs\n");
-		goto out_unlock;
+		chan_dbg(chan, "no pending LDs\n");
+		return;
 	}
 
 	/*
-	 * The DMA controller is not idle, which means the interrupt
-	 * handler will start any queued transactions when it runs
-	 * at the end of the current transaction
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
 	 */
-	if (!dma_is_idle(chan)) {
-		dev_dbg(chan->dev, "DMA controller still busy\n");
-		goto out_unlock;
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
 	}
 
 	/*
-	 * TODO:
-	 * make sure the dma_halt() function really un-wedges the
-	 * controller as much as possible
-	 */
-	dma_halt(chan);
-
-	/*
 	 * If there are some link descriptors which have not been
 	 * transferred, we need to start the controller
 	 */
@@ -931,18 +933,32 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 * Move all elements from the queue of pending transactions
 	 * onto the list of running transactions
 	 */
+	chan_dbg(chan, "idle, starting controller\n");
 	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
 	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
 
 	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = DMA_IN(chan, &chan->regs->mr, 32);
+		mode &= ~FSL_DMA_MR_CS;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	}
+
+	/*
 	 * Program the descriptor's address into the DMA controller,
 	 * then start the DMA transaction
 	 */
 	set_cdar(chan, desc->async_tx.phys);
-	dma_start(chan);
+	get_cdar(chan);
 
-out_unlock:
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	dma_start(chan);
+	chan->idle = false;
 }
 
 /**
@@ -952,7 +968,11 @@ out_unlock:
 static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
 	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
@@ -964,16 +984,18 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					struct dma_tx_state *txstate)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	dma_cookie_t last_used;
 	dma_cookie_t last_complete;
+	dma_cookie_t last_used;
+	unsigned long flags;
 
-	fsl_chan_ld_cleanup(chan);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	last_used = dchan->cookie;
 	last_complete = chan->completed_cookie;
+	last_used = dchan->cookie;
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
+	dma_set_tx_state(txstate, last_complete, last_used, 0);
 	return dma_async_is_complete(cookie, last_complete, last_used);
 }
 
@@ -984,21 +1006,20 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 static irqreturn_t fsldma_chan_irq(int irq, void *data)
 {
 	struct fsldma_chan *chan = data;
-	int update_cookie = 0;
-	int xfer_ld_q = 0;
 	u32 stat;
 
 	/* save and clear the status register */
 	stat = get_sr(chan);
 	set_sr(chan, stat);
-	dev_dbg(chan->dev, "irq: channel %d, stat = 0x%x\n", chan->id, stat);
+	chan_dbg(chan, "irq: stat = 0x%x\n", stat);
 
+	/* check that this was really our device */
 	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
 	if (!stat)
 		return IRQ_NONE;
 
 	if (stat & FSL_DMA_SR_TE)
-		dev_err(chan->dev, "Transfer Error!\n");
+		chan_err(chan, "Transfer Error!\n");
 
 	/*
 	 * Programming Error
@@ -1006,29 +1027,10 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * triger a PE interrupt.
 	 */
 	if (stat & FSL_DMA_SR_PE) {
-		dev_dbg(chan->dev, "irq: Programming Error INT\n");
-		if (get_bcr(chan) == 0) {
-			/* BCR register is 0, this is a DMA_INTERRUPT async_tx.
-			 * Now, update the completed cookie, and continue the
-			 * next uncompleted transfer.
-			 */
-			update_cookie = 1;
-			xfer_ld_q = 1;
-		}
+		chan_dbg(chan, "irq: Programming Error INT\n");
 		stat &= ~FSL_DMA_SR_PE;
-	}
-
-	/*
-	 * If the link descriptor segment transfer finishes,
-	 * we will recycle the used descriptor.
-	 */
-	if (stat & FSL_DMA_SR_EOSI) {
-		dev_dbg(chan->dev, "irq: End-of-segments INT\n");
-		dev_dbg(chan->dev, "irq: clndar 0x%llx, nlndar 0x%llx\n",
-			(unsigned long long)get_cdar(chan),
-			(unsigned long long)get_ndar(chan));
-		stat &= ~FSL_DMA_SR_EOSI;
-		update_cookie = 1;
+		if (get_bcr(chan) != 0)
+			chan_err(chan, "Programming Error!\n");
 	}
 
 	/*
@@ -1036,10 +1038,8 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * and start the next transfer if it exist.
 	 */
 	if (stat & FSL_DMA_SR_EOCDI) {
-		dev_dbg(chan->dev, "irq: End-of-Chain link INT\n");
+		chan_dbg(chan, "irq: End-of-Chain link INT\n");
 		stat &= ~FSL_DMA_SR_EOCDI;
-		update_cookie = 1;
-		xfer_ld_q = 1;
 	}
 
 	/*
@@ -1048,27 +1048,79 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * prepare next transfer.
 	 */
 	if (stat & FSL_DMA_SR_EOLNI) {
-		dev_dbg(chan->dev, "irq: End-of-link INT\n");
+		chan_dbg(chan, "irq: End-of-link INT\n");
 		stat &= ~FSL_DMA_SR_EOLNI;
-		xfer_ld_q = 1;
 	}
 
-	if (update_cookie)
-		fsl_dma_update_completed_cookie(chan);
-	if (xfer_ld_q)
-		fsl_chan_xfer_ld_queue(chan);
+	/* check that the DMA controller is really idle */
+	if (!dma_is_idle(chan))
+		chan_err(chan, "irq: controller not idle!\n");
+
+	/* check that we handled all of the bits */
 	if (stat)
-		dev_dbg(chan->dev, "irq: unhandled sr 0x%02x\n", stat);
+		chan_err(chan, "irq: unhandled sr 0x%08x\n", stat);
 
-	dev_dbg(chan->dev, "irq: Exit\n");
+	/*
+	 * Schedule the tasklet to handle all cleanup of the current
+	 * transaction. It will start a new transaction if there is
+	 * one pending.
+	 */
 	tasklet_schedule(&chan->tasklet);
+	chan_dbg(chan, "irq: Exit\n");
 	return IRQ_HANDLED;
 }
 
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
-	fsl_chan_ld_cleanup(chan);
+	struct fsl_desc_sw *desc, *_desc;
+	LIST_HEAD(ld_cleanup);
+	unsigned long flags;
+
+	chan_dbg(chan, "tasklet entry\n");
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->ld_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_fsl_desc(chan->ld_running.prev);
+		cookie = desc->async_tx.cookie;
+
+		chan->completed_cookie = cookie;
+		chan_dbg(chan, "completed_cookie=%d\n", cookie);
+	}
+
+	/*
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
+	 */
+	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+
+	/* the hardware is now idle and ready for more */
+	chan->idle = true;
+
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+
+		/* Run all cleanup for this descriptor */
+		fsldma_cleanup_descriptor(chan, desc);
+	}
+
+	chan_dbg(chan, "tasklet exit\n");
 }
 
 static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
@@ -1116,7 +1168,7 @@ static void fsldma_free_irqs(struct fsldma_device *fdev)
 	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
 		chan = fdev->chan[i];
 		if (chan && chan->irq != NO_IRQ) {
-			dev_dbg(fdev->dev, "free channel %d IRQ\n", chan->id);
+			chan_dbg(chan, "free per-channel IRQ\n");
 			free_irq(chan->irq, chan);
 		}
 	}
@@ -1143,19 +1195,16 @@ static int fsldma_request_irqs(struct fsldma_device *fdev)
 			continue;
 
 		if (chan->irq == NO_IRQ) {
-			dev_err(fdev->dev, "no interrupts property defined for "
-					   "DMA channel %d. Please fix your "
-					   "device tree\n", chan->id);
+			chan_err(chan, "interrupts property missing in device tree\n");
 			ret = -ENODEV;
 			goto out_unwind;
 		}
 
-		dev_dbg(fdev->dev, "request channel %d IRQ\n", chan->id);
+		chan_dbg(chan, "request per-channel IRQ\n");
 		ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
 				  "fsldma-chan", chan);
 		if (ret) {
-			dev_err(fdev->dev, "unable to request IRQ for DMA "
-					   "channel %d\n", chan->id);
+			chan_err(chan, "unable to request per-channel IRQ\n");
 			goto out_unwind;
 		}
 	}
@@ -1230,6 +1279,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 
 	fdev->chan[chan->id] = chan;
 	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id);
 
 	/* Initialize the channel */
 	dma_init(chan);
@@ -1250,6 +1300,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 	spin_lock_init(&chan->desc_lock);
 	INIT_LIST_HEAD(&chan->ld_pending);
 	INIT_LIST_HEAD(&chan->ld_running);
+	chan->idle = true;
 
 	chan->common.device = &fdev->common;
 
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index ba9f403c0fbe..9cb5aa57c677 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -102,8 +102,8 @@ struct fsl_desc_sw {
 } __attribute__((aligned(32)));
 
 struct fsldma_chan_regs {
-	u32 mr;	/* 0x00 - Mode Register */
-	u32 sr;	/* 0x04 - Status Register */
+	u32 mr;		/* 0x00 - Mode Register */
+	u32 sr;		/* 0x04 - Status Register */
 	u64 cdar;	/* 0x08 - Current descriptor address register */
 	u64 sar;	/* 0x10 - Source Address Register */
 	u64 dar;	/* 0x18 - Destination Address Register */
@@ -135,6 +135,7 @@ struct fsldma_device {
 #define FSL_DMA_CHAN_START_EXT	0x00002000
 
 struct fsldma_chan {
+	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
 	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
@@ -147,6 +148,7 @@ struct fsldma_chan {
 	int id;				/* Raw id of this channel */
 	struct tasklet_struct tasklet;
 	u32 feature;
+	bool idle;			/* DMA controller is idle */
 
 	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
 	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
new file mode 100644
index 000000000000..88aad4f54002
--- /dev/null
+++ b/drivers/dma/mxs-dma.c
@@ -0,0 +1,724 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Refer to drivers/dma/imx-sdma.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+
+#include <asm/irq.h>
+#include <mach/mxs.h>
+#include <mach/dma.h>
+#include <mach/common.h>
+
+/*
+ * NOTE: The term "PIO" throughout the mxs-dma implementation means
+ * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
+ * dma can program the controller registers of peripheral devices.
+ */
+
+#define MXS_DMA_APBH		0
+#define MXS_DMA_APBX		1
+#define dma_is_apbh()		(mxs_dma->dev_id == MXS_DMA_APBH)
+
+#define APBH_VERSION_LATEST	3
+#define apbh_is_old()		(mxs_dma->version < APBH_VERSION_LATEST)
+
+#define HW_APBHX_CTRL0				0x000
+#define BM_APBH_CTRL0_APB_BURST8_EN		(1 << 29)
+#define BM_APBH_CTRL0_APB_BURST_EN		(1 << 28)
+#define BP_APBH_CTRL0_CLKGATE_CHANNEL		8
+#define BP_APBH_CTRL0_RESET_CHANNEL		16
+#define HW_APBHX_CTRL1				0x010
+#define HW_APBHX_CTRL2				0x020
+#define HW_APBHX_CHANNEL_CTRL			0x030
+#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL	16
+#define HW_APBH_VERSION				(cpu_is_mx23() ? 0x3f0 : 0x800)
+#define HW_APBX_VERSION				0x800
+#define BP_APBHX_VERSION_MAJOR			24
+#define HW_APBHX_CHn_NXTCMDAR(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x050 : 0x110) + (n) * 0x70)
+#define HW_APBHX_CHn_SEMA(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x080 : 0x140) + (n) * 0x70)
+
+/*
+ * ccw bits definitions
+ *
+ * COMMAND:		0..1	(2)
+ * CHAIN:		2	(1)
+ * IRQ:			3	(1)
+ * NAND_LOCK:		4	(1) - not implemented
+ * NAND_WAIT4READY:	5	(1) - not implemented
+ * DEC_SEM:		6	(1)
+ * WAIT4END:		7	(1)
+ * HALT_ON_TERMINATE:	8	(1)
+ * TERMINATE_FLUSH:	9	(1)
+ * RESERVED:		10..11	(2)
+ * PIO_NUM:		12..15	(4)
+ */
+#define BP_CCW_COMMAND		0
+#define BM_CCW_COMMAND		(3 << 0)
+#define CCW_CHAIN		(1 << 2)
+#define CCW_IRQ			(1 << 3)
+#define CCW_DEC_SEM		(1 << 6)
+#define CCW_WAIT4END		(1 << 7)
+#define CCW_HALT_ON_TERM	(1 << 8)
+#define CCW_TERM_FLUSH		(1 << 9)
+#define BP_CCW_PIO_NUM		12
+#define BM_CCW_PIO_NUM		(0xf << 12)
+
+#define BF_CCW(value, field)	(((value) << BP_CCW_##field) & BM_CCW_##field)
+
+#define MXS_DMA_CMD_NO_XFER	0
+#define MXS_DMA_CMD_WRITE	1
+#define MXS_DMA_CMD_READ	2
+#define MXS_DMA_CMD_DMA_SENSE	3	/* not implemented */
+
+struct mxs_dma_ccw {
+	u32		next;
+	u16		bits;
+	u16		xfer_bytes;
+#define MAX_XFER_BYTES	0xff00
+	u32		bufaddr;
+#define MXS_PIO_WORDS	16
+	u32		pio_words[MXS_PIO_WORDS];
+};
+
+#define NUM_CCW	(int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw))
+
+struct mxs_dma_chan {
+	struct mxs_dma_engine		*mxs_dma;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+	int				chan_irq;
+	struct mxs_dma_ccw		*ccw;
+	dma_addr_t			ccw_phys;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+	unsigned int			flags;
+#define MXS_DMA_SG_LOOP			(1 << 0)
+};
+
+#define MXS_DMA_CHANNELS		16
+#define MXS_DMA_CHANNELS_MASK		0xffff
+
+struct mxs_dma_engine {
+	int				dev_id;
+	unsigned int			version;
+	void __iomem			*base;
+	struct clk			*clk;
+	struct dma_device		dma_device;
+	struct device_dma_parameters	dma_parms;
+	struct mxs_dma_chan		mxs_chans[MXS_DMA_CHANNELS];
+};
+
+static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+}
+
+static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* set cmd_addr up */
+	writel(mxs_chan->ccw_phys,
+		mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(chan_id));
+
+	/* enable apbh channel clock */
+	if (dma_is_apbh()) {
+		if (apbh_is_old())
+			writel(1 << (chan_id + BP_APBH_CTRL0_CLKGATE_CHANNEL),
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+		else
+			writel(1 << chan_id,
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+	}
+
+	/* write 1 to SEMA to kick off the channel */
+	writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(chan_id));
+}
+
+static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* disable apbh channel clock */
+	if (dma_is_apbh()) {
+		if (apbh_is_old())
+			writel(1 << (chan_id + BP_APBH_CTRL0_CLKGATE_CHANNEL),
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+		else
+			writel(1 << chan_id,
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	}
+
+	mxs_chan->status = DMA_SUCCESS;
+}
+
+static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* freeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+
+	mxs_chan->status = DMA_PAUSED;
+}
+
+static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* unfreeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_CLR_ADDR);
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+}
+
+static dma_cookie_t mxs_dma_assign_cookie(struct mxs_dma_chan *mxs_chan)
+{
+	dma_cookie_t cookie = mxs_chan->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	mxs_chan->chan.cookie = cookie;
+	mxs_chan->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(tx->chan);
+
+	mxs_dma_enable_chan(mxs_chan);
+
+	return mxs_dma_assign_cookie(mxs_chan);
+}
+
+static void mxs_dma_tasklet(unsigned long data)
+{
+	struct mxs_dma_chan *mxs_chan = (struct mxs_dma_chan *) data;
+
+	if (mxs_chan->desc.callback)
+		mxs_chan->desc.callback(mxs_chan->desc.callback_param);
+}
+
+static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
+{
+	struct mxs_dma_engine *mxs_dma = dev_id;
+	u32 stat1, stat2;
+
+	/* completion status */
+	stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
+	stat1 &= MXS_DMA_CHANNELS_MASK;
+	writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + MXS_CLR_ADDR);
+
+	/* error status */
+	stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
+	writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + MXS_CLR_ADDR);
+
+	/*
+	 * When both completion and error of termination bits set at the
+	 * same time, we do not take it as an error.  IOW, it only becomes
+	 * an error we need to handler here in case of ether it's (1) an bus
+	 * error or (2) a termination error with no completion.
+	 */
+	stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
+		(~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
+
+	/* combine error and completion status for checking */
+	stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
+	while (stat1) {
+		int channel = fls(stat1) - 1;
+		struct mxs_dma_chan *mxs_chan =
+			&mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
+
+		if (channel >= MXS_DMA_CHANNELS) {
+			dev_dbg(mxs_dma->dma_device.dev,
+				"%s: error in channel %d\n", __func__,
+				channel - MXS_DMA_CHANNELS);
+			mxs_chan->status = DMA_ERROR;
+			mxs_dma_reset_chan(mxs_chan);
+		} else {
+			if (mxs_chan->flags & MXS_DMA_SG_LOOP)
+				mxs_chan->status = DMA_IN_PROGRESS;
+			else
+				mxs_chan->status = DMA_SUCCESS;
+		}
+
+		stat1 &= ~(1 << channel);
+
+		if (mxs_chan->status == DMA_SUCCESS)
+			mxs_chan->last_completed = mxs_chan->desc.cookie;
+
+		/* schedule tasklet on this channel */
+		tasklet_schedule(&mxs_chan->tasklet);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_data *data = chan->private;
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int ret;
+
+	if (!data)
+		return -EINVAL;
+
+	mxs_chan->chan_irq = data->chan_irq;
+
+	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+				&mxs_chan->ccw_phys, GFP_KERNEL);
+	if (!mxs_chan->ccw) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	memset(mxs_chan->ccw, 0, PAGE_SIZE);
+
+	ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+				0, "mxs-dma", mxs_dma);
+	if (ret)
+		goto err_irq;
+
+	ret = clk_enable(mxs_dma->clk);
+	if (ret)
+		goto err_clk;
+
+	mxs_dma_reset_chan(mxs_chan);
+
+	dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
+	mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
+
+	/* the descriptor is ready */
+	async_tx_ack(&mxs_chan->desc);
+
+	return 0;
+
+err_clk:
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+err_irq:
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+err_alloc:
+	return ret;
+}
+
+static void mxs_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	mxs_dma_disable_chan(mxs_chan);
+
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+
+	clk_disable(mxs_dma->clk);
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long append)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	struct mxs_dma_ccw *ccw;
+	struct scatterlist *sg;
+	int i, j;
+	u32 *pio;
+	static int idx;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS && !append)
+		return NULL;
+
+	if (sg_len + (append ? idx : 0) > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				sg_len, NUM_CCW);
+		goto err_out;
+	}
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags = 0;
+
+	/*
+	 * If the sg is prepared with append flag set, the sg
+	 * will be appended to the last prepared sg.
+	 */
+	if (append) {
+		BUG_ON(idx < 1);
+		ccw = &mxs_chan->ccw[idx - 1];
+		ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits &= ~CCW_IRQ;
+		ccw->bits &= ~CCW_DEC_SEM;
+		ccw->bits &= ~CCW_WAIT4END;
+	} else {
+		idx = 0;
+	}
+
+	if (direction == DMA_NONE) {
+		ccw = &mxs_chan->ccw[idx++];
+		pio = (u32 *) sgl;
+
+		for (j = 0; j < sg_len;)
+			ccw->pio_words[j++] = *pio++;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_DEC_SEM;
+		ccw->bits |= CCW_WAIT4END;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(sg_len, PIO_NUM);
+		ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND);
+	} else {
+		for_each_sg(sgl, sg, sg_len, i) {
+			if (sg->length > MAX_XFER_BYTES) {
+				dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n",
+						sg->length, MAX_XFER_BYTES);
+				goto err_out;
+			}
+
+			ccw = &mxs_chan->ccw[idx++];
+
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+			ccw->bufaddr = sg->dma_address;
+			ccw->xfer_bytes = sg->length;
+
+			ccw->bits = 0;
+			ccw->bits |= CCW_CHAIN;
+			ccw->bits |= CCW_HALT_ON_TERM;
+			ccw->bits |= CCW_TERM_FLUSH;
+			ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
+					COMMAND);
+
+			if (i + 1 == sg_len) {
+				ccw->bits &= ~CCW_CHAIN;
+				ccw->bits |= CCW_IRQ;
+				ccw->bits |= CCW_DEC_SEM;
+				ccw->bits |= CCW_WAIT4END;
+			}
+		}
+	}
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int num_periods = buf_len / period_len;
+	int i = 0, buf = 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags |= MXS_DMA_SG_LOOP;
+
+	if (num_periods > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				num_periods, NUM_CCW);
+		goto err_out;
+	}
+
+	if (period_len > MAX_XFER_BYTES) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum period size exceeded: %d > %d\n",
+				period_len, MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i];
+
+		if (i + 1 == num_periods)
+			ccw->next = mxs_chan->ccw_phys;
+		else
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1);
+
+		ccw->bufaddr = dma_addr;
+		ccw->xfer_bytes = period_len;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		mxs_dma_disable_chan(mxs_chan);
+		break;
+	case DMA_PAUSE:
+		mxs_dma_pause_chan(mxs_chan);
+		break;
+	case DMA_RESUME:
+		mxs_dma_resume_chan(mxs_chan);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	dma_cookie_t last_used;
+
+	last_used = chan->cookie;
+	dma_set_tx_state(txstate, mxs_chan->last_completed, last_used, 0);
+
+	return mxs_chan->status;
+}
+
+static void mxs_dma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor.
+	 */
+}
+
+static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+{
+	int ret;
+
+	ret = clk_enable(mxs_dma->clk);
+	if (ret)
+		goto err_out;
+
+	ret = mxs_reset_block(mxs_dma->base);
+	if (ret)
+		goto err_out;
+
+	/* only major version matters */
+	mxs_dma->version = readl(mxs_dma->base +
+				((mxs_dma->dev_id == MXS_DMA_APBX) ?
+				HW_APBX_VERSION : HW_APBH_VERSION)) >>
+				BP_APBHX_VERSION_MAJOR;
+
+	/* enable apbh burst */
+	if (dma_is_apbh()) {
+		writel(BM_APBH_CTRL0_APB_BURST_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+		writel(BM_APBH_CTRL0_APB_BURST8_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	}
+
+	/* enable irq for all the channels */
+	writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
+		mxs_dma->base + HW_APBHX_CTRL1 + MXS_SET_ADDR);
+
+	clk_disable(mxs_dma->clk);
+
+	return 0;
+
+err_out:
+	return ret;
+}
+
+static int __init mxs_dma_probe(struct platform_device *pdev)
+{
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(pdev);
+	struct mxs_dma_engine *mxs_dma;
+	struct resource *iores;
+	int ret, i;
+
+	mxs_dma = kzalloc(sizeof(*mxs_dma), GFP_KERNEL);
+	if (!mxs_dma)
+		return -ENOMEM;
+
+	mxs_dma->dev_id = id_entry->driver_data;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!request_mem_region(iores->start, resource_size(iores),
+				pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	mxs_dma->base = ioremap(iores->start, resource_size(iores));
+	if (!mxs_dma->base) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	mxs_dma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxs_dma->clk)) {
+		ret = PTR_ERR(mxs_dma->clk);
+		goto err_clk;
+	}
+
+	dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&mxs_dma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MXS_DMA_CHANNELS; i++) {
+		struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i];
+
+		mxs_chan->mxs_dma = mxs_dma;
+		mxs_chan->chan.device = &mxs_dma->dma_device;
+
+		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
+			     (unsigned long) mxs_chan);
+
+
+		/* Add the channel to mxs_chan list */
+		list_add_tail(&mxs_chan->chan.device_node,
+			&mxs_dma->dma_device.channels);
+	}
+
+	ret = mxs_dma_init(mxs_dma);
+	if (ret)
+		goto err_init;
+
+	mxs_dma->dma_device.dev = &pdev->dev;
+
+	/* mxs_dma gets 65535 bytes maximum sg size */
+	mxs_dma->dma_device.dev->dma_parms = &mxs_dma->dma_parms;
+	dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES);
+
+	mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources;
+	mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources;
+	mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status;
+	mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg;
+	mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic;
+	mxs_dma->dma_device.device_control = mxs_dma_control;
+	mxs_dma->dma_device.device_issue_pending = mxs_dma_issue_pending;
+
+	ret = dma_async_device_register(&mxs_dma->dma_device);
+	if (ret) {
+		dev_err(mxs_dma->dma_device.dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(mxs_dma->dma_device.dev, "initialized\n");
+
+	return 0;
+
+err_init:
+	clk_put(mxs_dma->clk);
+err_clk:
+	iounmap(mxs_dma->base);
+err_ioremap:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+	kfree(mxs_dma);
+	return ret;
+}
+
+static struct platform_device_id mxs_dma_type[] = {
+	{
+		.name = "mxs-dma-apbh",
+		.driver_data = MXS_DMA_APBH,
+	}, {
+		.name = "mxs-dma-apbx",
+		.driver_data = MXS_DMA_APBX,
+	}
+};
+
+static struct platform_driver mxs_dma_driver = {
+	.driver		= {
+		.name	= "mxs-dma",
+	},
+	.id_table	= mxs_dma_type,
+};
+
+static int __init mxs_dma_module_init(void)
+{
+	return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+}
+subsys_initcall(mxs_dma_module_init);
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 1c38418ae61f..8d8fef1480a9 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -82,7 +82,7 @@ struct pch_dma_regs {
 	u32	dma_sts1;
 	u32	reserved2;
 	u32	reserved3;
-	struct pch_dma_desc_regs desc[0];
+	struct pch_dma_desc_regs desc[MAX_CHAN_NR];
 };
 
 struct pch_dma_desc {
@@ -124,7 +124,7 @@ struct pch_dma {
 	struct pci_pool		*pool;
 	struct pch_dma_regs	regs;
 	struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
-	struct pch_dma_chan	channels[0];
+	struct pch_dma_chan	channels[MAX_CHAN_NR];
 };
 
 #define PCH_DMA_CTL0	0x00
@@ -366,7 +366,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 	struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
 	dma_cookie_t cookie;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock(&pd_chan->lock);
 	cookie = pdc_assign_cookie(pd_chan, desc);
 
 	if (list_empty(&pd_chan->active_list)) {
@@ -376,7 +376,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 		list_add_tail(&desc->desc_node, &pd_chan->queue);
 	}
 
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock(&pd_chan->lock);
 	return 0;
 }
 
@@ -386,7 +386,7 @@ static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
 	struct pch_dma *pd = to_pd(chan->device);
 	dma_addr_t addr;
 
-	desc = pci_pool_alloc(pd->pool, GFP_KERNEL, &addr);
+	desc = pci_pool_alloc(pd->pool, flags, &addr);
 	if (desc) {
 		memset(desc, 0, sizeof(struct pch_dma_desc));
 		INIT_LIST_HEAD(&desc->tx_list);
@@ -405,7 +405,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
 	struct pch_dma_desc *ret = NULL;
 	int i;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock(&pd_chan->lock);
 	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
 		i++;
 		if (async_tx_test_ack(&desc->txd)) {
@@ -415,15 +415,15 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
 		}
 		dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
 	}
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock(&pd_chan->lock);
 	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
 
 	if (!ret) {
 		ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO);
 		if (ret) {
-			spin_lock_bh(&pd_chan->lock);
+			spin_lock(&pd_chan->lock);
 			pd_chan->descs_allocated++;
-			spin_unlock_bh(&pd_chan->lock);
+			spin_unlock(&pd_chan->lock);
 		} else {
 			dev_err(chan2dev(&pd_chan->chan),
 				"failed to alloc desc\n");
@@ -437,10 +437,10 @@ static void pdc_desc_put(struct pch_dma_chan *pd_chan,
 			 struct pch_dma_desc *desc)
 {
 	if (desc) {
-		spin_lock_bh(&pd_chan->lock);
+		spin_lock(&pd_chan->lock);
 		list_splice_init(&desc->tx_list, &pd_chan->free_list);
 		list_add(&desc->desc_node, &pd_chan->free_list);
-		spin_unlock_bh(&pd_chan->lock);
+		spin_unlock(&pd_chan->lock);
 	}
 }
 
@@ -530,9 +530,9 @@ static void pd_issue_pending(struct dma_chan *chan)
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 
 	if (pdc_is_idle(pd_chan)) {
-		spin_lock_bh(&pd_chan->lock);
+		spin_lock(&pd_chan->lock);
 		pdc_advance_work(pd_chan);
-		spin_unlock_bh(&pd_chan->lock);
+		spin_unlock(&pd_chan->lock);
 	}
 }
 
@@ -592,7 +592,6 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			goto err_desc_get;
 		}
 
-
 		if (!first) {
 			first = desc;
 		} else {
@@ -641,13 +640,13 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 
 	spin_unlock_bh(&pd_chan->lock);
 
-
 	return 0;
 }
 
 static void pdc_tasklet(unsigned long data)
 {
 	struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data;
+	unsigned long flags;
 
 	if (!pdc_is_idle(pd_chan)) {
 		dev_err(chan2dev(&pd_chan->chan),
@@ -655,12 +654,12 @@ static void pdc_tasklet(unsigned long data)
 		return;
 	}
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irqsave(&pd_chan->lock, flags);
 	if (test_and_clear_bit(0, &pd_chan->err_status))
 		pdc_handle_error(pd_chan);
 	else
 		pdc_advance_work(pd_chan);
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irqrestore(&pd_chan->lock, flags);
 }
 
 static irqreturn_t pd_irq(int irq, void *devid)
@@ -694,6 +693,7 @@ static irqreturn_t pd_irq(int irq, void *devid)
 	return ret;
 }
 
+#ifdef	CONFIG_PM
 static void pch_dma_save_regs(struct pch_dma *pd)
 {
 	struct pch_dma_chan *pd_chan;
@@ -771,6 +771,7 @@ static int pch_dma_resume(struct pci_dev *pdev)
 
 	return 0;
 }
+#endif
 
 static int __devinit pch_dma_probe(struct pci_dev *pdev,
 				   const struct pci_device_id *id)
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 6e1d46a65d0e..af955de035f4 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -68,6 +68,7 @@ enum d40_command {
  * @base: Pointer to memory area when the pre_alloc_lli's are not large
  * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
  * pre_alloc_lli is used.
+ * @dma_addr: DMA address, if mapped
  * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
  * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
  * one buffer to one buffer.
@@ -75,6 +76,7 @@ enum d40_command {
 struct d40_lli_pool {
 	void	*base;
 	int	 size;
+	dma_addr_t	dma_addr;
 	/* Space for dst and src, plus an extra for padding */
 	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
 };
@@ -94,7 +96,6 @@ struct d40_lli_pool {
  * during a transfer.
  * @node: List entry.
  * @is_in_client_list: true if the client owns this descriptor.
- * @is_hw_linked: true if this job will automatically be continued for
  * the previous one.
  *
  * This descriptor is used for both logical and physical transfers.
@@ -114,7 +115,7 @@ struct d40_desc {
 	struct list_head		 node;
 
 	bool				 is_in_client_list;
-	bool				 is_hw_linked;
+	bool				 cyclic;
 };
 
 /**
@@ -130,6 +131,7 @@ struct d40_desc {
  */
 struct d40_lcla_pool {
 	void		*base;
+	dma_addr_t	dma_addr;
 	void		*base_unaligned;
 	int		 pages;
 	spinlock_t	 lock;
@@ -303,9 +305,37 @@ struct d40_reg_val {
 	unsigned int val;
 };
 
-static int d40_pool_lli_alloc(struct d40_desc *d40d,
-			      int lli_len, bool is_log)
+static struct device *chan2dev(struct d40_chan *d40c)
 {
+	return &d40c->chan.dev->device;
+}
+
+static bool chan_is_physical(struct d40_chan *chan)
+{
+	return chan->log_num == D40_PHY_CHAN;
+}
+
+static bool chan_is_logical(struct d40_chan *chan)
+{
+	return !chan_is_physical(chan);
+}
+
+static void __iomem *chan_base(struct d40_chan *chan)
+{
+	return chan->base->virtbase + D40_DREG_PCBASE +
+	       chan->phy_chan->num * D40_DREG_PCDELTA;
+}
+
+#define d40_err(dev, format, arg...)		\
+	dev_err(dev, "[%s] " format, __func__, ## arg)
+
+#define chan_err(d40c, format, arg...)		\
+	d40_err(chan2dev(d40c), format, ## arg)
+
+static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
+			      int lli_len)
+{
+	bool is_log = chan_is_logical(d40c);
 	u32 align;
 	void *base;
 
@@ -319,7 +349,7 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d,
 		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
 		d40d->lli_pool.base = NULL;
 	} else {
-		d40d->lli_pool.size = ALIGN(lli_len * 2 * align, align);
+		d40d->lli_pool.size = lli_len * 2 * align;
 
 		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
 		d40d->lli_pool.base = base;
@@ -329,22 +359,37 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d,
 	}
 
 	if (is_log) {
-		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
-					      align);
-		d40d->lli_log.dst = PTR_ALIGN(d40d->lli_log.src + lli_len,
-					      align);
+		d40d->lli_log.src = PTR_ALIGN(base, align);
+		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
+
+		d40d->lli_pool.dma_addr = 0;
 	} else {
-		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
-					      align);
-		d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len,
-					      align);
+		d40d->lli_phy.src = PTR_ALIGN(base, align);
+		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
+
+		d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
+							 d40d->lli_phy.src,
+							 d40d->lli_pool.size,
+							 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(d40c->base->dev,
+				      d40d->lli_pool.dma_addr)) {
+			kfree(d40d->lli_pool.base);
+			d40d->lli_pool.base = NULL;
+			d40d->lli_pool.dma_addr = 0;
+			return -ENOMEM;
+		}
 	}
 
 	return 0;
 }
 
-static void d40_pool_lli_free(struct d40_desc *d40d)
+static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
+	if (d40d->lli_pool.dma_addr)
+		dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr,
+				 d40d->lli_pool.size, DMA_TO_DEVICE);
+
 	kfree(d40d->lli_pool.base);
 	d40d->lli_pool.base = NULL;
 	d40d->lli_pool.size = 0;
@@ -391,7 +436,7 @@ static int d40_lcla_free_all(struct d40_chan *d40c,
 	int i;
 	int ret = -EINVAL;
 
-	if (d40c->log_num == D40_PHY_CHAN)
+	if (chan_is_physical(d40c))
 		return 0;
 
 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
@@ -430,7 +475,7 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 
 		list_for_each_entry_safe(d, _d, &d40c->client, node)
 			if (async_tx_test_ack(&d->txd)) {
-				d40_pool_lli_free(d);
+				d40_pool_lli_free(d40c, d);
 				d40_desc_remove(d);
 				desc = d;
 				memset(desc, 0, sizeof(*desc));
@@ -450,6 +495,7 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
 
+	d40_pool_lli_free(d40c, d40d);
 	d40_lcla_free_all(d40c, d40d);
 	kmem_cache_free(d40c->base->desc_slab, d40d);
 }
@@ -459,57 +505,128 @@ static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
 	list_add_tail(&desc->node, &d40c->active);
 }
 
-static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
 {
-	int curr_lcla = -EINVAL, next_lcla;
+	struct d40_phy_lli *lli_dst = desc->lli_phy.dst;
+	struct d40_phy_lli *lli_src = desc->lli_phy.src;
+	void __iomem *base = chan_base(chan);
+
+	writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG);
+	writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT);
+	writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR);
+	writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK);
+
+	writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG);
+	writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT);
+	writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR);
+	writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
+}
 
-	if (d40c->log_num == D40_PHY_CHAN) {
-		d40_phy_lli_write(d40c->base->virtbase,
-				  d40c->phy_chan->num,
-				  d40d->lli_phy.dst,
-				  d40d->lli_phy.src);
-		d40d->lli_current = d40d->lli_len;
-	} else {
+static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_lcla_pool *pool = &chan->base->lcla_pool;
+	struct d40_log_lli_bidir *lli = &desc->lli_log;
+	int lli_current = desc->lli_current;
+	int lli_len = desc->lli_len;
+	bool cyclic = desc->cyclic;
+	int curr_lcla = -EINVAL;
+	int first_lcla = 0;
+	bool linkback;
 
-		if ((d40d->lli_len - d40d->lli_current) > 1)
-			curr_lcla = d40_lcla_alloc_one(d40c, d40d);
+	/*
+	 * We may have partially running cyclic transfers, in case we did't get
+	 * enough LCLA entries.
+	 */
+	linkback = cyclic && lli_current == 0;
 
-		d40_log_lli_lcpa_write(d40c->lcpa,
-				       &d40d->lli_log.dst[d40d->lli_current],
-				       &d40d->lli_log.src[d40d->lli_current],
-				       curr_lcla);
+	/*
+	 * For linkback, we need one LCLA even with only one link, because we
+	 * can't link back to the one in LCPA space
+	 */
+	if (linkback || (lli_len - lli_current > 1)) {
+		curr_lcla = d40_lcla_alloc_one(chan, desc);
+		first_lcla = curr_lcla;
+	}
 
-		d40d->lli_current++;
-		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
-			struct d40_log_lli *lcla;
+	/*
+	 * For linkback, we normally load the LCPA in the loop since we need to
+	 * link it to the second LCLA and not the first.  However, if we
+	 * couldn't even get a first LCLA, then we have to run in LCPA and
+	 * reload manually.
+	 */
+	if (!linkback || curr_lcla == -EINVAL) {
+		unsigned int flags = 0;
 
-			if (d40d->lli_current + 1 < d40d->lli_len)
-				next_lcla = d40_lcla_alloc_one(d40c, d40d);
-			else
-				next_lcla = -EINVAL;
+		if (curr_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
 
-			lcla = d40c->base->lcla_pool.base +
-				d40c->phy_chan->num * 1024 +
-				8 * curr_lcla * 2;
+		d40_log_lli_lcpa_write(chan->lcpa,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       curr_lcla,
+				       flags);
+		lli_current++;
+	}
 
-			d40_log_lli_lcla_write(lcla,
-					       &d40d->lli_log.dst[d40d->lli_current],
-					       &d40d->lli_log.src[d40d->lli_current],
-					       next_lcla);
+	if (curr_lcla < 0)
+		goto out;
 
-			(void) dma_map_single(d40c->base->dev, lcla,
-					      2 * sizeof(struct d40_log_lli),
-					      DMA_TO_DEVICE);
+	for (; lli_current < lli_len; lli_current++) {
+		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
+					   8 * curr_lcla * 2;
+		struct d40_log_lli *lcla = pool->base + lcla_offset;
+		unsigned int flags = 0;
+		int next_lcla;
 
-			curr_lcla = next_lcla;
+		if (lli_current + 1 < lli_len)
+			next_lcla = d40_lcla_alloc_one(chan, desc);
+		else
+			next_lcla = linkback ? first_lcla : -EINVAL;
 
-			if (curr_lcla == -EINVAL) {
-				d40d->lli_current++;
-				break;
-			}
+		if (cyclic || next_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
+
+		if (linkback && curr_lcla == first_lcla) {
+			/* First link goes in both LCPA and LCLA */
+			d40_log_lli_lcpa_write(chan->lcpa,
+					       &lli->dst[lli_current],
+					       &lli->src[lli_current],
+					       next_lcla, flags);
+		}
+
+		/*
+		 * One unused LCLA in the cyclic case if the very first
+		 * next_lcla fails...
+		 */
+		d40_log_lli_lcla_write(lcla,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       next_lcla, flags);
+
+		dma_sync_single_range_for_device(chan->base->dev,
+					pool->dma_addr, lcla_offset,
+					2 * sizeof(struct d40_log_lli),
+					DMA_TO_DEVICE);
 
+		curr_lcla = next_lcla;
+
+		if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
+			lli_current++;
+			break;
 		}
 	}
+
+out:
+	desc->lli_current = lli_current;
+}
+
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (chan_is_physical(d40c)) {
+		d40_phy_lli_load(d40c, d40d);
+		d40d->lli_current = d40d->lli_len;
+	} else
+		d40_log_lli_to_lcxa(d40c, d40d);
 }
 
 static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
@@ -543,18 +660,6 @@ static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
 	return d;
 }
 
-static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
-{
-	struct d40_desc *d;
-
-	if (list_empty(&d40c->queue))
-		return NULL;
-	list_for_each_entry(d, &d40c->queue, node)
-		if (list_is_last(&d->node, &d40c->queue))
-			break;
-	return d;
-}
-
 static int d40_psize_2_burst_size(bool is_log, int psize)
 {
 	if (is_log) {
@@ -666,9 +771,9 @@ static int d40_channel_execute_command(struct d40_chan *d40c,
 		}
 
 		if (i == D40_SUSPEND_MAX_IT) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s]: unable to suspend the chl %d (log: %d) status %x\n",
-				__func__, d40c->phy_chan->num, d40c->log_num,
+			chan_err(d40c,
+				"unable to suspend the chl %d (log: %d) status %x\n",
+				d40c->phy_chan->num, d40c->log_num,
 				status);
 			dump_stack();
 			ret = -EBUSY;
@@ -701,17 +806,45 @@ static void d40_term_all(struct d40_chan *d40c)
 	d40c->busy = false;
 }
 
+static void __d40_config_set_event(struct d40_chan *d40c, bool enable,
+				   u32 event, int reg)
+{
+	void __iomem *addr = chan_base(d40c) + reg;
+	int tries;
+
+	if (!enable) {
+		writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+		return;
+	}
+
+	/*
+	 * The hardware sometimes doesn't register the enable when src and dst
+	 * event lines are active on the same logical channel.  Retry to ensure
+	 * it does.  Usually only one retry is sufficient.
+	 */
+	tries = 100;
+	while (--tries) {
+		writel((D40_ACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+
+		if (readl(addr) & D40_EVENTLINE_MASK(event))
+			break;
+	}
+
+	if (tries != 99)
+		dev_dbg(chan2dev(d40c),
+			"[%s] workaround enable S%cLNK (%d tries)\n",
+			__func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D',
+			100 - tries);
+
+	WARN_ON(!tries);
+}
+
 static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 {
-	u32 val;
 	unsigned long flags;
 
-	/* Notice, that disable requires the physical channel to be stopped */
-	if (do_enable)
-		val = D40_ACTIVATE_EVENTLINE;
-	else
-		val = D40_DEACTIVATE_EVENTLINE;
-
 	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
 
 	/* Enable event line connected to device (or memcpy) */
@@ -719,20 +852,15 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) {
 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
 
-		writel((val << D40_EVENTLINE_POS(event)) |
-		       ~D40_EVENTLINE_MASK(event),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSLNK);
+		__d40_config_set_event(d40c, do_enable, event,
+				       D40_CHAN_REG_SSLNK);
 	}
+
 	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM) {
 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
 
-		writel((val << D40_EVENTLINE_POS(event)) |
-		       ~D40_EVENTLINE_MASK(event),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDLNK);
+		__d40_config_set_event(d40c, do_enable, event,
+				       D40_CHAN_REG_SDLNK);
 	}
 
 	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
@@ -740,15 +868,12 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 
 static u32 d40_chan_has_events(struct d40_chan *d40c)
 {
+	void __iomem *chanbase = chan_base(d40c);
 	u32 val;
 
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSLNK);
+	val = readl(chanbase + D40_CHAN_REG_SSLNK);
+	val |= readl(chanbase + D40_CHAN_REG_SDLNK);
 
-	val |= readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		     d40c->phy_chan->num * D40_DREG_PCDELTA +
-		     D40_CHAN_REG_SDLNK);
 	return val;
 }
 
@@ -771,7 +896,7 @@ static u32 d40_get_prmo(struct d40_chan *d40c)
 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
 	};
 
-	if (d40c->log_num == D40_PHY_CHAN)
+	if (chan_is_physical(d40c))
 		return phy_map[d40c->dma_cfg.mode_opt];
 	else
 		return log_map[d40c->dma_cfg.mode_opt];
@@ -785,7 +910,7 @@ static void d40_config_write(struct d40_chan *d40c)
 	/* Odd addresses are even addresses + 4 */
 	addr_base = (d40c->phy_chan->num % 2) * 4;
 	/* Setup channel mode to logical or physical */
-	var = ((u32)(d40c->log_num != D40_PHY_CHAN) + 1) <<
+	var = ((u32)(chan_is_logical(d40c)) + 1) <<
 		D40_CHAN_POS(d40c->phy_chan->num);
 	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
 
@@ -794,30 +919,18 @@ static void d40_config_write(struct d40_chan *d40c)
 
 	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
+		int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
+			   & D40_SREG_ELEM_LOG_LIDX_MASK;
+		void __iomem *chanbase = chan_base(d40c);
+
 		/* Set default config for CFG reg */
-		writel(d40c->src_def_cfg,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSCFG);
-		writel(d40c->dst_def_cfg,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDCFG);
+		writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG);
+		writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG);
 
 		/* Set LIDX for lcla */
-		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-		       D40_SREG_ELEM_LOG_LIDX_MASK,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDELT);
-
-		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-		       D40_SREG_ELEM_LOG_LIDX_MASK,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSELT);
-
+		writel(lidx, chanbase + D40_CHAN_REG_SSELT);
+		writel(lidx, chanbase + D40_CHAN_REG_SDELT);
 	}
 }
 
@@ -825,15 +938,15 @@ static u32 d40_residue(struct d40_chan *d40c)
 {
 	u32 num_elt;
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
 			>> D40_MEM_LCSP2_ECNT_POS;
-	else
-		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				 d40c->phy_chan->num * D40_DREG_PCDELTA +
-				 D40_CHAN_REG_SDELT) &
-			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
-			D40_SREG_ELEM_PHY_ECNT_POS;
+	else {
+		u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT);
+		num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK)
+			  >> D40_SREG_ELEM_PHY_ECNT_POS;
+	}
+
 	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
 }
 
@@ -841,20 +954,17 @@ static bool d40_tx_is_linked(struct d40_chan *d40c)
 {
 	bool is_link;
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
 	else
-		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				d40c->phy_chan->num * D40_DREG_PCDELTA +
-				D40_CHAN_REG_SDLNK) &
-			D40_SREG_LNK_PHYS_LNK_MASK;
+		is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
+			  & D40_SREG_LNK_PHYS_LNK_MASK;
+
 	return is_link;
 }
 
-static int d40_pause(struct dma_chan *chan)
+static int d40_pause(struct d40_chan *d40c)
 {
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
 	int res = 0;
 	unsigned long flags;
 
@@ -865,7 +975,7 @@ static int d40_pause(struct dma_chan *chan)
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res == 0) {
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			d40_config_set_event(d40c, false);
 			/* Resume the other logical channels if any */
 			if (d40_chan_has_events(d40c))
@@ -878,10 +988,8 @@ static int d40_pause(struct dma_chan *chan)
 	return res;
 }
 
-static int d40_resume(struct dma_chan *chan)
+static int d40_resume(struct d40_chan *d40c)
 {
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
 	int res = 0;
 	unsigned long flags;
 
@@ -891,7 +999,7 @@ static int d40_resume(struct dma_chan *chan)
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	if (d40c->base->rev == 0)
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			res = d40_channel_execute_command(d40c,
 							  D40_DMA_SUSPEND_REQ);
 			goto no_suspend;
@@ -900,7 +1008,7 @@ static int d40_resume(struct dma_chan *chan)
 	/* If bytes left to transfer or linked tx resume job */
 	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
 
-		if (d40c->log_num != D40_PHY_CHAN)
+		if (chan_is_logical(d40c))
 			d40_config_set_event(d40c, true);
 
 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
@@ -911,75 +1019,20 @@ no_suspend:
 	return res;
 }
 
-static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d)
+static int d40_terminate_all(struct d40_chan *chan)
 {
-	/* TODO: Write */
-}
-
-static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d)
-{
-	struct d40_desc *d40d_prev = NULL;
-	int i;
-	u32 val;
-
-	if (!list_empty(&d40c->queue))
-		d40d_prev = d40_last_queued(d40c);
-	else if (!list_empty(&d40c->active))
-		d40d_prev = d40_first_active_get(d40c);
-
-	if (!d40d_prev)
-		return;
-
-	/* Here we try to join this job with previous jobs */
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSLNK);
-
-	/* Figure out which link we're currently transmitting */
-	for (i = 0; i < d40d_prev->lli_len; i++)
-		if (val == d40d_prev->lli_phy.src[i].reg_lnk)
-			break;
-
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS;
-
-	if (i == (d40d_prev->lli_len - 1) && val > 0) {
-		/* Change the current one */
-		writel(virt_to_phys(d40d->lli_phy.src),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSLNK);
-		writel(virt_to_phys(d40d->lli_phy.dst),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDLNK);
-
-		d40d->is_hw_linked = true;
-
-	} else if (i < d40d_prev->lli_len) {
-		(void) dma_unmap_single(d40c->base->dev,
-					virt_to_phys(d40d_prev->lli_phy.src),
-					d40d_prev->lli_pool.size,
-					DMA_TO_DEVICE);
+	unsigned long flags;
+	int ret = 0;
 
-		/* Keep the settings */
-		val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk &
-			~D40_SREG_LNK_PHYS_LNK_MASK;
-		d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk =
-			val | virt_to_phys(d40d->lli_phy.src);
+	ret = d40_pause(chan);
+	if (!ret && chan_is_physical(chan))
+		ret = d40_channel_execute_command(chan, D40_DMA_STOP);
 
-		val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk &
-			~D40_SREG_LNK_PHYS_LNK_MASK;
-		d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk =
-			val | virt_to_phys(d40d->lli_phy.dst);
+	spin_lock_irqsave(&chan->lock, flags);
+	d40_term_all(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
-		(void) dma_map_single(d40c->base->dev,
-				      d40d_prev->lli_phy.src,
-				      d40d_prev->lli_pool.size,
-				      DMA_TO_DEVICE);
-		d40d->is_hw_linked = true;
-	}
+	return ret;
 }
 
 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
@@ -990,8 +1043,6 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
 	unsigned long flags;
 
-	(void) d40_pause(&d40c->chan);
-
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	d40c->chan.cookie++;
@@ -1001,17 +1052,10 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	d40d->txd.cookie = d40c->chan.cookie;
 
-	if (d40c->log_num == D40_PHY_CHAN)
-		d40_tx_submit_phy(d40c, d40d);
-	else
-		d40_tx_submit_log(d40c, d40d);
-
 	d40_desc_queue(d40c, d40d);
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
-	(void) d40_resume(&d40c->chan);
-
 	return tx->cookie;
 }
 
@@ -1020,7 +1064,7 @@ static int d40_start(struct d40_chan *d40c)
 	if (d40c->base->rev == 0) {
 		int err;
 
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			err = d40_channel_execute_command(d40c,
 							  D40_DMA_SUSPEND_REQ);
 			if (err)
@@ -1028,7 +1072,7 @@ static int d40_start(struct d40_chan *d40c)
 		}
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		d40_config_set_event(d40c, true);
 
 	return d40_channel_execute_command(d40c, D40_DMA_RUN);
@@ -1051,21 +1095,14 @@ static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
 		/* Add to active queue */
 		d40_desc_submit(d40c, d40d);
 
-		/*
-		 * If this job is already linked in hw,
-		 * do not submit it.
-		 */
-
-		if (!d40d->is_hw_linked) {
-			/* Initiate DMA job */
-			d40_desc_load(d40c, d40d);
+		/* Initiate DMA job */
+		d40_desc_load(d40c, d40d);
 
-			/* Start dma job */
-			err = d40_start(d40c);
+		/* Start dma job */
+		err = d40_start(d40c);
 
-			if (err)
-				return NULL;
-		}
+		if (err)
+			return NULL;
 	}
 
 	return d40d;
@@ -1082,17 +1119,36 @@ static void dma_tc_handle(struct d40_chan *d40c)
 	if (d40d == NULL)
 		return;
 
-	d40_lcla_free_all(d40c, d40d);
+	if (d40d->cyclic) {
+		/*
+		 * If this was a paritially loaded list, we need to reloaded
+		 * it, and only when the list is completed.  We need to check
+		 * for done because the interrupt will hit for every link, and
+		 * not just the last one.
+		 */
+		if (d40d->lli_current < d40d->lli_len
+		    && !d40_tx_is_linked(d40c)
+		    && !d40_residue(d40c)) {
+			d40_lcla_free_all(d40c, d40d);
+			d40_desc_load(d40c, d40d);
+			(void) d40_start(d40c);
 
-	if (d40d->lli_current < d40d->lli_len) {
-		d40_desc_load(d40c, d40d);
-		/* Start dma job */
-		(void) d40_start(d40c);
-		return;
-	}
+			if (d40d->lli_current == d40d->lli_len)
+				d40d->lli_current = 0;
+		}
+	} else {
+		d40_lcla_free_all(d40c, d40d);
 
-	if (d40_queue_start(d40c) == NULL)
-		d40c->busy = false;
+		if (d40d->lli_current < d40d->lli_len) {
+			d40_desc_load(d40c, d40d);
+			/* Start dma job */
+			(void) d40_start(d40c);
+			return;
+		}
+
+		if (d40_queue_start(d40c) == NULL)
+			d40c->busy = false;
+	}
 
 	d40c->pending_tx++;
 	tasklet_schedule(&d40c->tasklet);
@@ -1111,11 +1167,11 @@ static void dma_tasklet(unsigned long data)
 
 	/* Get first active entry from list */
 	d40d = d40_first_active_get(d40c);
-
 	if (d40d == NULL)
 		goto err;
 
-	d40c->completed = d40d->txd.cookie;
+	if (!d40d->cyclic)
+		d40c->completed = d40d->txd.cookie;
 
 	/*
 	 * If terminating a channel pending_tx is set to zero.
@@ -1130,16 +1186,18 @@ static void dma_tasklet(unsigned long data)
 	callback = d40d->txd.callback;
 	callback_param = d40d->txd.callback_param;
 
-	if (async_tx_test_ack(&d40d->txd)) {
-		d40_pool_lli_free(d40d);
-		d40_desc_remove(d40d);
-		d40_desc_free(d40c, d40d);
-	} else {
-		if (!d40d->is_in_client_list) {
+	if (!d40d->cyclic) {
+		if (async_tx_test_ack(&d40d->txd)) {
+			d40_pool_lli_free(d40c, d40d);
 			d40_desc_remove(d40d);
-			d40_lcla_free_all(d40c, d40d);
-			list_add_tail(&d40d->node, &d40c->client);
-			d40d->is_in_client_list = true;
+			d40_desc_free(d40c, d40d);
+		} else {
+			if (!d40d->is_in_client_list) {
+				d40_desc_remove(d40d);
+				d40_lcla_free_all(d40c, d40d);
+				list_add_tail(&d40d->node, &d40c->client);
+				d40d->is_in_client_list = true;
+			}
 		}
 	}
 
@@ -1216,9 +1274,8 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data)
 		if (!il[row].is_error)
 			dma_tc_handle(d40c);
 		else
-			dev_err(base->dev,
-				"[%s] IRQ chan: %ld offset %d idx %d\n",
-				__func__, chan, il[row].offset, idx);
+			d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n",
+				chan, il[row].offset, idx);
 
 		spin_unlock(&d40c->lock);
 	}
@@ -1237,8 +1294,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
 
 	if (!conf->dir) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n",
-			__func__);
+		chan_err(d40c, "Invalid direction.\n");
 		res = -EINVAL;
 	}
 
@@ -1246,46 +1302,40 @@ static int d40_validate_conf(struct d40_chan *d40c,
 	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
 	    d40c->runtime_addr == 0) {
 
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid TX channel address (%d)\n",
-			__func__, conf->dst_dev_type);
+		chan_err(d40c, "Invalid TX channel address (%d)\n",
+			 conf->dst_dev_type);
 		res = -EINVAL;
 	}
 
 	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
 	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
 	    d40c->runtime_addr == 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid RX channel address (%d)\n",
-			__func__, conf->src_dev_type);
+		chan_err(d40c, "Invalid RX channel address (%d)\n",
+			conf->src_dev_type);
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
 	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n",
-			__func__);
+		chan_err(d40c, "Invalid dst\n");
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
 	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n",
-			__func__);
+		chan_err(d40c, "Invalid src\n");
 		res = -EINVAL;
 	}
 
 	if (src_event_group == STEDMA40_DEV_SRC_MEMORY &&
 	    dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] No event line\n", __func__);
+		chan_err(d40c, "No event line\n");
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH &&
 	    (src_event_group != dst_event_group)) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid event group\n", __func__);
+		chan_err(d40c, "Invalid event group\n");
 		res = -EINVAL;
 	}
 
@@ -1294,9 +1344,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 		 * DMAC HW supports it. Will be added to this driver,
 		 * in case any dma client requires it.
 		 */
-		dev_err(&d40c->chan.dev->device,
-			"[%s] periph to periph not supported\n",
-			__func__);
+		chan_err(d40c, "periph to periph not supported\n");
 		res = -EINVAL;
 	}
 
@@ -1309,9 +1357,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 		 * src (burst x width) == dst (burst x width)
 		 */
 
-		dev_err(&d40c->chan.dev->device,
-			"[%s] src (burst x width) != dst (burst x width)\n",
-			__func__);
+		chan_err(d40c, "src (burst x width) != dst (burst x width)\n");
 		res = -EINVAL;
 	}
 
@@ -1514,8 +1560,7 @@ static int d40_config_memcpy(struct d40_chan *d40c)
 		   dma_has_cap(DMA_SLAVE, cap)) {
 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy;
 	} else {
-		dev_err(&d40c->chan.dev->device, "[%s] No memcpy\n",
-			__func__);
+		chan_err(d40c, "No memcpy\n");
 		return -EINVAL;
 	}
 
@@ -1540,21 +1585,19 @@ static int d40_free_dma(struct d40_chan *d40c)
 	/* Release client owned descriptors */
 	if (!list_empty(&d40c->client))
 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
-			d40_pool_lli_free(d);
+			d40_pool_lli_free(d40c, d);
 			d40_desc_remove(d);
 			d40_desc_free(d40c, d);
 		}
 
 	if (phy == NULL) {
-		dev_err(&d40c->chan.dev->device, "[%s] phy == null\n",
-			__func__);
+		chan_err(d40c, "phy == null\n");
 		return -EINVAL;
 	}
 
 	if (phy->allocated_src == D40_ALLOC_FREE &&
 	    phy->allocated_dst == D40_ALLOC_FREE) {
-		dev_err(&d40c->chan.dev->device, "[%s] channel already free\n",
-			__func__);
+		chan_err(d40c, "channel already free\n");
 		return -EINVAL;
 	}
 
@@ -1566,19 +1609,17 @@ static int d40_free_dma(struct d40_chan *d40c)
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
 		is_src = true;
 	} else {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unknown direction\n", __func__);
+		chan_err(d40c, "Unknown direction\n");
 		return -EINVAL;
 	}
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res) {
-		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
-			__func__);
+		chan_err(d40c, "suspend failed\n");
 		return res;
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		/* Release logical channel, deactivate the event line */
 
 		d40_config_set_event(d40c, false);
@@ -1594,9 +1635,8 @@ static int d40_free_dma(struct d40_chan *d40c)
 				res = d40_channel_execute_command(d40c,
 								  D40_DMA_RUN);
 				if (res) {
-					dev_err(&d40c->chan.dev->device,
-						"[%s] Executing RUN command\n",
-						__func__);
+					chan_err(d40c,
+						"Executing RUN command\n");
 					return res;
 				}
 			}
@@ -1609,8 +1649,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 	/* Release physical channel */
 	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
 	if (res) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to stop channel\n", __func__);
+		chan_err(d40c, "Failed to stop channel\n");
 		return res;
 	}
 	d40c->phy_chan = NULL;
@@ -1622,6 +1661,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 
 static bool d40_is_paused(struct d40_chan *d40c)
 {
+	void __iomem *chanbase = chan_base(d40c);
 	bool is_paused = false;
 	unsigned long flags;
 	void __iomem *active_reg;
@@ -1630,7 +1670,7 @@ static bool d40_is_paused(struct d40_chan *d40c)
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	if (d40c->log_num == D40_PHY_CHAN) {
+	if (chan_is_physical(d40c)) {
 		if (d40c->phy_chan->num % 2 == 0)
 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
 		else
@@ -1648,17 +1688,12 @@ static bool d40_is_paused(struct d40_chan *d40c)
 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
-		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			       d40c->phy_chan->num * D40_DREG_PCDELTA +
-			       D40_CHAN_REG_SDLNK);
+		status = readl(chanbase + D40_CHAN_REG_SDLNK);
 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
-		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			       d40c->phy_chan->num * D40_DREG_PCDELTA +
-			       D40_CHAN_REG_SSLNK);
+		status = readl(chanbase + D40_CHAN_REG_SSLNK);
 	} else {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unknown direction\n", __func__);
+		chan_err(d40c, "Unknown direction\n");
 		goto _exit;
 	}
 
@@ -1688,114 +1723,184 @@ static u32 stedma40_residue(struct dma_chan *chan)
 	return bytes_left;
 }
 
-struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
-						   struct scatterlist *sgl_dst,
-						   struct scatterlist *sgl_src,
-						   unsigned int sgl_len,
-						   unsigned long dma_flags)
+static int
+d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
 {
-	int res;
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
-	unsigned long flags;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	int ret;
 
-	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unallocated channel.\n", __func__);
-		return ERR_PTR(-EINVAL);
-	}
+	ret = d40_log_sg_to_lli(sg_src, sg_len,
+				src_dev_addr,
+				desc->lli_log.src,
+				chan->log_def.lcsp1,
+				src_info->data_width,
+				dst_info->data_width);
 
-	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
+	ret = d40_log_sg_to_lli(sg_dst, sg_len,
+				dst_dev_addr,
+				desc->lli_log.dst,
+				chan->log_def.lcsp3,
+				dst_info->data_width,
+				src_info->data_width);
 
-	if (d40d == NULL)
+	return ret < 0 ? ret : 0;
+}
+
+static int
+d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	unsigned long flags = 0;
+	int ret;
+
+	if (desc->cyclic)
+		flags |= LLI_CYCLIC | LLI_TERM_INT;
+
+	ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
+				desc->lli_phy.src,
+				virt_to_phys(desc->lli_phy.src),
+				chan->src_def_cfg,
+				src_info, dst_info, flags);
+
+	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
+				desc->lli_phy.dst,
+				virt_to_phys(desc->lli_phy.dst),
+				chan->dst_def_cfg,
+				dst_info, src_info, flags);
+
+	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
+				   desc->lli_pool.size, DMA_TO_DEVICE);
+
+	return ret < 0 ? ret : 0;
+}
+
+
+static struct d40_desc *
+d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
+	      unsigned int sg_len, unsigned long dma_flags)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct d40_desc *desc;
+	int ret;
+
+	desc = d40_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width,
+					cfg->dst_info.data_width);
+	if (desc->lli_len < 0) {
+		chan_err(chan, "Unaligned size\n");
 		goto err;
+	}
 
-	d40d->lli_len = d40_sg_2_dmalen(sgl_dst, sgl_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
+	ret = d40_pool_lli_alloc(chan, desc, desc->lli_len);
+	if (ret < 0) {
+		chan_err(chan, "Could not allocate lli\n");
 		goto err;
 	}
 
-	d40d->lli_current = 0;
-	d40d->txd.flags = dma_flags;
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	desc->lli_current = 0;
+	desc->txd.flags = dma_flags;
+	desc->txd.tx_submit = d40_tx_submit;
 
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
-			goto err;
-		}
+	dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
 
-		(void) d40_log_sg_to_lli(sgl_src,
-					 sgl_len,
-					 d40d->lli_log.src,
-					 d40c->log_def.lcsp1,
-					 d40c->dma_cfg.src_info.data_width,
-					 d40c->dma_cfg.dst_info.data_width);
-
-		(void) d40_log_sg_to_lli(sgl_dst,
-					 sgl_len,
-					 d40d->lli_log.dst,
-					 d40c->log_def.lcsp3,
-					 d40c->dma_cfg.dst_info.data_width,
-					 d40c->dma_cfg.src_info.data_width);
-	} else {
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
-			goto err;
-		}
+	return desc;
+
+err:
+	d40_desc_free(chan, desc);
+	return NULL;
+}
+
+static dma_addr_t
+d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+{
+	struct stedma40_platform_data *plat = chan->base->plat_data;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	dma_addr_t addr;
 
-		res = d40_phy_sg_to_lli(sgl_src,
-					sgl_len,
-					0,
-					d40d->lli_phy.src,
-					virt_to_phys(d40d->lli_phy.src),
-					d40c->src_def_cfg,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width,
-					d40c->dma_cfg.src_info.psize);
+	if (chan->runtime_addr)
+		return chan->runtime_addr;
 
-		if (res < 0)
-			goto err;
+	if (direction == DMA_FROM_DEVICE)
+		addr = plat->dev_rx[cfg->src_dev_type];
+	else if (direction == DMA_TO_DEVICE)
+		addr = plat->dev_tx[cfg->dst_dev_type];
 
-		res = d40_phy_sg_to_lli(sgl_dst,
-					sgl_len,
-					0,
-					d40d->lli_phy.dst,
-					virt_to_phys(d40d->lli_phy.dst),
-					d40c->dst_def_cfg,
-					d40c->dma_cfg.dst_info.data_width,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.psize);
+	return addr;
+}
 
-		if (res < 0)
-			goto err;
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
+	    struct scatterlist *sg_dst, unsigned int sg_len,
+	    enum dma_data_direction direction, unsigned long dma_flags)
+{
+	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
+	dma_addr_t src_dev_addr = 0;
+	dma_addr_t dst_dev_addr = 0;
+	struct d40_desc *desc;
+	unsigned long flags;
+	int ret;
 
-		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-				      d40d->lli_pool.size, DMA_TO_DEVICE);
+	if (!chan->phy_chan) {
+		chan_err(chan, "Cannot prepare unallocated channel\n");
+		return NULL;
 	}
 
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
 
-	d40d->txd.tx_submit = d40_tx_submit;
+	spin_lock_irqsave(&chan->lock, flags);
 
-	spin_unlock_irqrestore(&d40c->lock, flags);
+	desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
+	if (desc == NULL)
+		goto err;
+
+	if (sg_next(&sg_src[sg_len - 1]) == sg_src)
+		desc->cyclic = true;
+
+	if (direction != DMA_NONE) {
+		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
+
+		if (direction == DMA_FROM_DEVICE)
+			src_dev_addr = dev_addr;
+		else if (direction == DMA_TO_DEVICE)
+			dst_dev_addr = dev_addr;
+	}
+
+	if (chan_is_logical(chan))
+		ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+	else
+		ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+
+	if (ret) {
+		chan_err(chan, "Failed to prepare %s sg job: %d\n",
+			 chan_is_logical(chan) ? "log" : "phy", ret);
+		goto err;
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return &desc->txd;
 
-	return &d40d->txd;
 err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
+	if (desc)
+		d40_desc_free(chan, desc);
+	spin_unlock_irqrestore(&chan->lock, flags);
 	return NULL;
 }
-EXPORT_SYMBOL(stedma40_memcpy_sg);
 
 bool stedma40_filter(struct dma_chan *chan, void *data)
 {
@@ -1818,6 +1923,38 @@ bool stedma40_filter(struct dma_chan *chan, void *data)
 }
 EXPORT_SYMBOL(stedma40_filter);
 
+static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
+{
+	bool realtime = d40c->dma_cfg.realtime;
+	bool highprio = d40c->dma_cfg.high_priority;
+	u32 prioreg = highprio ? D40_DREG_PSEG1 : D40_DREG_PCEG1;
+	u32 rtreg = realtime ? D40_DREG_RSEG1 : D40_DREG_RCEG1;
+	u32 event = D40_TYPE_TO_EVENT(dev_type);
+	u32 group = D40_TYPE_TO_GROUP(dev_type);
+	u32 bit = 1 << event;
+
+	/* Destination event lines are stored in the upper halfword */
+	if (!src)
+		bit <<= 16;
+
+	writel(bit, d40c->base->virtbase + prioreg + group * 4);
+	writel(bit, d40c->base->virtbase + rtreg + group * 4);
+}
+
+static void d40_set_prio_realtime(struct d40_chan *d40c)
+{
+	if (d40c->base->rev < 3)
+		return;
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.src_dev_type, true);
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_MEM_TO_PERIPH) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.dst_dev_type, false);
+}
+
 /* DMA ENGINE functions */
 static int d40_alloc_chan_resources(struct dma_chan *chan)
 {
@@ -1834,9 +1971,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	if (!d40c->configured) {
 		err = d40_config_memcpy(d40c);
 		if (err) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Failed to configure memcpy channel\n",
-				__func__);
+			chan_err(d40c, "Failed to configure memcpy channel\n");
 			goto fail;
 		}
 	}
@@ -1844,16 +1979,17 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 
 	err = d40_allocate_channel(d40c);
 	if (err) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to allocate channel\n", __func__);
+		chan_err(d40c, "Failed to allocate channel\n");
 		goto fail;
 	}
 
 	/* Fill in basic CFG register values */
 	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
-		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
+		    &d40c->dst_def_cfg, chan_is_logical(d40c));
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	d40_set_prio_realtime(d40c);
+
+	if (chan_is_logical(d40c)) {
 		d40_log_cfg(&d40c->dma_cfg,
 			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
 
@@ -1886,8 +2022,7 @@ static void d40_free_chan_resources(struct dma_chan *chan)
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot free unallocated channel\n", __func__);
+		chan_err(d40c, "Cannot free unallocated channel\n");
 		return;
 	}
 
@@ -1897,8 +2032,7 @@ static void d40_free_chan_resources(struct dma_chan *chan)
 	err = d40_free_dma(d40c);
 
 	if (err)
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to free channel\n", __func__);
+		chan_err(d40c, "Failed to free channel\n");
 	spin_unlock_irqrestore(&d40c->lock, flags);
 }
 
@@ -1908,251 +2042,31 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 						       size_t size,
 						       unsigned long dma_flags)
 {
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
-	unsigned long flags;
-
-	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated.\n", __func__);
-		return ERR_PTR(-EINVAL);
-	}
-
-	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
-
-	if (d40d == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Descriptor is NULL\n", __func__);
-		goto err;
-	}
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
 
-	d40d->txd.flags = dma_flags;
-	d40d->lli_len = d40_size_2_dmalen(size,
-					  d40c->dma_cfg.src_info.data_width,
-					  d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
-		goto err;
-	}
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
 
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
 
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
+	sg_dma_len(&dst_sg) = size;
+	sg_dma_len(&src_sg) = size;
 
-	d40d->txd.tx_submit = d40_tx_submit;
-
-	if (d40c->log_num != D40_PHY_CHAN) {
-
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
-			goto err;
-		}
-		d40d->lli_current = 0;
-
-		if (d40_log_buf_to_lli(d40d->lli_log.src,
-				       src,
-				       size,
-				       d40c->log_def.lcsp1,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       true) == NULL)
-			goto err;
-
-		if (d40_log_buf_to_lli(d40d->lli_log.dst,
-				       dst,
-				       size,
-				       d40c->log_def.lcsp3,
-				       d40c->dma_cfg.dst_info.data_width,
-				       d40c->dma_cfg.src_info.data_width,
-				       true) == NULL)
-			goto err;
-
-	} else {
-
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
-			goto err;
-		}
-
-		if (d40_phy_buf_to_lli(d40d->lli_phy.src,
-				       src,
-				       size,
-				       d40c->dma_cfg.src_info.psize,
-				       0,
-				       d40c->src_def_cfg,
-				       true,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       false) == NULL)
-			goto err;
-
-		if (d40_phy_buf_to_lli(d40d->lli_phy.dst,
-				       dst,
-				       size,
-				       d40c->dma_cfg.dst_info.psize,
-				       0,
-				       d40c->dst_def_cfg,
-				       true,
-				       d40c->dma_cfg.dst_info.data_width,
-				       d40c->dma_cfg.src_info.data_width,
-				       false) == NULL)
-			goto err;
-
-		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-				      d40d->lli_pool.size, DMA_TO_DEVICE);
-	}
-
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return &d40d->txd;
-
-err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return NULL;
+	return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *
-d40_prep_sg(struct dma_chan *chan,
-	    struct scatterlist *dst_sg, unsigned int dst_nents,
-	    struct scatterlist *src_sg, unsigned int src_nents,
-	    unsigned long dma_flags)
+d40_prep_memcpy_sg(struct dma_chan *chan,
+		   struct scatterlist *dst_sg, unsigned int dst_nents,
+		   struct scatterlist *src_sg, unsigned int src_nents,
+		   unsigned long dma_flags)
 {
 	if (dst_nents != src_nents)
 		return NULL;
 
-	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
-}
-
-static int d40_prep_slave_sg_log(struct d40_desc *d40d,
-				 struct d40_chan *d40c,
-				 struct scatterlist *sgl,
-				 unsigned int sg_len,
-				 enum dma_data_direction direction,
-				 unsigned long dma_flags)
-{
-	dma_addr_t dev_addr = 0;
-	int total_size;
-
-	d40d->lli_len = d40_sg_2_dmalen(sgl, sg_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
-		return -EINVAL;
-	}
-
-	if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Out of memory\n", __func__);
-		return -ENOMEM;
-	}
-
-	d40d->lli_current = 0;
-
-	if (direction == DMA_FROM_DEVICE)
-		if (d40c->runtime_addr)
-			dev_addr = d40c->runtime_addr;
-		else
-			dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
-		if (d40c->runtime_addr)
-			dev_addr = d40c->runtime_addr;
-		else
-			dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
-
-	else
-		return -EINVAL;
-
-	total_size = d40_log_sg_to_dev(sgl, sg_len,
-				       &d40d->lli_log,
-				       &d40c->log_def,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       direction,
-				       dev_addr);
-
-	if (total_size < 0)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
-				 struct d40_chan *d40c,
-				 struct scatterlist *sgl,
-				 unsigned int sgl_len,
-				 enum dma_data_direction direction,
-				 unsigned long dma_flags)
-{
-	dma_addr_t src_dev_addr;
-	dma_addr_t dst_dev_addr;
-	int res;
-
-	d40d->lli_len = d40_sg_2_dmalen(sgl, sgl_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
-		return -EINVAL;
-	}
-
-	if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Out of memory\n", __func__);
-		return -ENOMEM;
-	}
-
-	d40d->lli_current = 0;
-
-	if (direction == DMA_FROM_DEVICE) {
-		dst_dev_addr = 0;
-		if (d40c->runtime_addr)
-			src_dev_addr = d40c->runtime_addr;
-		else
-			src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
-	} else if (direction == DMA_TO_DEVICE) {
-		if (d40c->runtime_addr)
-			dst_dev_addr = d40c->runtime_addr;
-		else
-			dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
-		src_dev_addr = 0;
-	} else
-		return -EINVAL;
-
-	res = d40_phy_sg_to_lli(sgl,
-				sgl_len,
-				src_dev_addr,
-				d40d->lli_phy.src,
-				virt_to_phys(d40d->lli_phy.src),
-				d40c->src_def_cfg,
-				d40c->dma_cfg.src_info.data_width,
-				d40c->dma_cfg.dst_info.data_width,
-				d40c->dma_cfg.src_info.psize);
-	if (res < 0)
-		return res;
-
-	res = d40_phy_sg_to_lli(sgl,
-				sgl_len,
-				dst_dev_addr,
-				d40d->lli_phy.dst,
-				virt_to_phys(d40d->lli_phy.dst),
-				d40c->dst_def_cfg,
-				d40c->dma_cfg.dst_info.data_width,
-				d40c->dma_cfg.src_info.data_width,
-				d40c->dma_cfg.dst_info.psize);
-	if (res < 0)
-		return res;
-
-	(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-			      d40d->lli_pool.size, DMA_TO_DEVICE);
-	return 0;
+	return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
@@ -2161,52 +2075,40 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 enum dma_data_direction direction,
 							 unsigned long dma_flags)
 {
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
-	unsigned long flags;
-	int err;
-
-	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot prepare unallocated channel\n", __func__);
-		return ERR_PTR(-EINVAL);
-	}
+	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+		return NULL;
 
-	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
+	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
+}
 
-	if (d40d == NULL)
-		goto err;
+static struct dma_async_tx_descriptor *
+dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+		     size_t buf_len, size_t period_len,
+		     enum dma_data_direction direction)
+{
+	unsigned int periods = buf_len / period_len;
+	struct dma_async_tx_descriptor *txd;
+	struct scatterlist *sg;
+	int i;
 
-	if (d40c->log_num != D40_PHY_CHAN)
-		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
-					    direction, dma_flags);
-	else
-		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
-					    direction, dma_flags);
-	if (err) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to prepare %s slave sg job: %d\n",
-			__func__,
-			d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err);
-		goto err;
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL);
+	for (i = 0; i < periods; i++) {
+		sg_dma_address(&sg[i]) = dma_addr;
+		sg_dma_len(&sg[i]) = period_len;
+		dma_addr += period_len;
 	}
 
-	d40d->txd.flags = dma_flags;
+	sg[periods].offset = 0;
+	sg[periods].length = 0;
+	sg[periods].page_link =
+		((unsigned long)sg | 0x01) & ~0x02;
 
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
+	txd = d40_prep_sg(chan, sg, sg, periods, direction,
+			  DMA_PREP_INTERRUPT);
 
-	d40d->txd.tx_submit = d40_tx_submit;
+	kfree(sg);
 
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return &d40d->txd;
-
-err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return NULL;
+	return txd;
 }
 
 static enum dma_status d40_tx_status(struct dma_chan *chan,
@@ -2219,9 +2121,7 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
 	int ret;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot read status of unallocated channel\n",
-			__func__);
+		chan_err(d40c, "Cannot read status of unallocated channel\n");
 		return -EINVAL;
 	}
 
@@ -2245,8 +2145,7 @@ static void d40_issue_pending(struct dma_chan *chan)
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated!\n", __func__);
+		chan_err(d40c, "Channel is not allocated!\n");
 		return;
 	}
 
@@ -2339,7 +2238,7 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 		return;
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		if (config_maxburst >= 16)
 			psize = STEDMA40_PSIZE_LOG_16;
 		else if (config_maxburst >= 8)
@@ -2372,7 +2271,7 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
 
 	/* Fill in register values */
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
 	else
 		d40_phy_cfg(cfg, &d40c->src_def_cfg,
@@ -2393,25 +2292,20 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
-	unsigned long flags;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated!\n", __func__);
+		chan_err(d40c, "Channel is not allocated!\n");
 		return -EINVAL;
 	}
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		spin_lock_irqsave(&d40c->lock, flags);
-		d40_term_all(d40c);
-		spin_unlock_irqrestore(&d40c->lock, flags);
-		return 0;
+		return d40_terminate_all(d40c);
 	case DMA_PAUSE:
-		return d40_pause(chan);
+		return d40_pause(d40c);
 	case DMA_RESUME:
-		return d40_resume(chan);
+		return d40_resume(d40c);
 	case DMA_SLAVE_CONFIG:
 		d40_set_runtime_config(chan,
 			(struct dma_slave_config *) arg);
@@ -2456,6 +2350,35 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 	}
 }
 
+static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
+{
+	if (dma_has_cap(DMA_SLAVE, dev->cap_mask))
+		dev->device_prep_slave_sg = d40_prep_slave_sg;
+
+	if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
+		dev->device_prep_dma_memcpy = d40_prep_memcpy;
+
+		/*
+		 * This controller can only access address at even
+		 * 32bit boundaries, i.e. 2^2
+		 */
+		dev->copy_align = 2;
+	}
+
+	if (dma_has_cap(DMA_SG, dev->cap_mask))
+		dev->device_prep_dma_sg = d40_prep_memcpy_sg;
+
+	if (dma_has_cap(DMA_CYCLIC, dev->cap_mask))
+		dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic;
+
+	dev->device_alloc_chan_resources = d40_alloc_chan_resources;
+	dev->device_free_chan_resources = d40_free_chan_resources;
+	dev->device_issue_pending = d40_issue_pending;
+	dev->device_tx_status = d40_tx_status;
+	dev->device_control = d40_control;
+	dev->dev = base->dev;
+}
+
 static int __init d40_dmaengine_init(struct d40_base *base,
 				     int num_reserved_chans)
 {
@@ -2466,23 +2389,14 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_slave.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
 
-	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
-	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_slave.device_tx_status = d40_tx_status;
-	base->dma_slave.device_issue_pending = d40_issue_pending;
-	base->dma_slave.device_control = d40_control;
-	base->dma_slave.dev = base->dev;
+	d40_ops_init(base, &base->dma_slave);
 
 	err = dma_async_device_register(&base->dma_slave);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to register slave channels\n",
-			__func__);
+		d40_err(base->dev, "Failed to register slave channels\n");
 		goto failure1;
 	}
 
@@ -2491,29 +2405,15 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_memcpy.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
-	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
-
-	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
-	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_memcpy.device_tx_status = d40_tx_status;
-	base->dma_memcpy.device_issue_pending = d40_issue_pending;
-	base->dma_memcpy.device_control = d40_control;
-	base->dma_memcpy.dev = base->dev;
-	/*
-	 * This controller can only access address at even
-	 * 32bit boundaries, i.e. 2^2
-	 */
-	base->dma_memcpy.copy_align = 2;
+	dma_cap_set(DMA_SG, base->dma_memcpy.cap_mask);
+
+	d40_ops_init(base, &base->dma_memcpy);
 
 	err = dma_async_device_register(&base->dma_memcpy);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to regsiter memcpy only channels\n",
-			__func__);
+		d40_err(base->dev,
+			"Failed to regsiter memcpy only channels\n");
 		goto failure2;
 	}
 
@@ -2523,24 +2423,15 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_zero(base->dma_both.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
-	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
-
-	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
-	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_both.device_tx_status = d40_tx_status;
-	base->dma_both.device_issue_pending = d40_issue_pending;
-	base->dma_both.device_control = d40_control;
-	base->dma_both.dev = base->dev;
-	base->dma_both.copy_align = 2;
+	dma_cap_set(DMA_SG, base->dma_both.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+	d40_ops_init(base, &base->dma_both);
 	err = dma_async_device_register(&base->dma_both);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to register logical and physical capable channels\n",
-			__func__);
+		d40_err(base->dev,
+			"Failed to register logical and physical capable channels\n");
 		goto failure3;
 	}
 	return 0;
@@ -2616,9 +2507,10 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
 		/*
 		 * D40_DREG_PERIPHID2 Depends on HW revision:
-		 *  MOP500/HREF ED has 0x0008,
+		 *  DB8500ed has 0x0008,
 		 *  ? has 0x0018,
-		 *  HREF V1 has 0x0028
+		 *  DB8500v1 has 0x0028
+		 *  DB8500v2 has 0x0038
 		 */
 		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
 
@@ -2642,8 +2534,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	clk = clk_get(&pdev->dev, NULL);
 
 	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "[%s] No matching clock found\n",
-			__func__);
+		d40_err(&pdev->dev, "No matching clock found\n");
 		goto failure;
 	}
 
@@ -2666,9 +2557,8 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
 		if (dma_id_regs[i].val !=
 		    readl(virtbase + dma_id_regs[i].reg)) {
-			dev_err(&pdev->dev,
-				"[%s] Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
-				__func__,
+			d40_err(&pdev->dev,
+				"Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
 				dma_id_regs[i].val,
 				dma_id_regs[i].reg,
 				readl(virtbase + dma_id_regs[i].reg));
@@ -2681,9 +2571,8 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 
 	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
 	    D40_HW_DESIGNER) {
-		dev_err(&pdev->dev,
-			"[%s] Unknown designer! Got %x wanted %x\n",
-			__func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK,
+		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
+			val & D40_DREG_PERIPHID2_DESIGNER_MASK,
 			D40_HW_DESIGNER);
 		goto failure;
 	}
@@ -2713,7 +2602,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		       sizeof(struct d40_chan), GFP_KERNEL);
 
 	if (base == NULL) {
-		dev_err(&pdev->dev, "[%s] Out of memory\n", __func__);
+		d40_err(&pdev->dev, "Out of memory\n");
 		goto failure;
 	}
 
@@ -2860,6 +2749,7 @@ static void __init d40_hw_init(struct d40_base *base)
 
 static int __init d40_lcla_allocate(struct d40_base *base)
 {
+	struct d40_lcla_pool *pool = &base->lcla_pool;
 	unsigned long *page_list;
 	int i, j;
 	int ret = 0;
@@ -2885,9 +2775,8 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 						base->lcla_pool.pages);
 		if (!page_list[i]) {
 
-			dev_err(base->dev,
-				"[%s] Failed to allocate %d pages.\n",
-				__func__, base->lcla_pool.pages);
+			d40_err(base->dev, "Failed to allocate %d pages.\n",
+				base->lcla_pool.pages);
 
 			for (j = 0; j < i; j++)
 				free_pages(page_list[j], base->lcla_pool.pages);
@@ -2925,6 +2814,15 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 						 LCLA_ALIGNMENT);
 	}
 
+	pool->dma_addr = dma_map_single(base->dev, pool->base,
+					SZ_1K * base->num_phy_chans,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(base->dev, pool->dma_addr)) {
+		pool->dma_addr = 0;
+		ret = -ENOMEM;
+		goto failure;
+	}
+
 	writel(virt_to_phys(base->lcla_pool.base),
 	       base->virtbase + D40_DREG_LCLA);
 failure:
@@ -2957,9 +2855,7 @@ static int __init d40_probe(struct platform_device *pdev)
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
 	if (!res) {
 		ret = -ENOENT;
-		dev_err(&pdev->dev,
-			"[%s] No \"lcpa\" memory resource\n",
-			__func__);
+		d40_err(&pdev->dev, "No \"lcpa\" memory resource\n");
 		goto failure;
 	}
 	base->lcpa_size = resource_size(res);
@@ -2968,9 +2864,9 @@ static int __init d40_probe(struct platform_device *pdev)
 	if (request_mem_region(res->start, resource_size(res),
 			       D40_NAME " I/O lcpa") == NULL) {
 		ret = -EBUSY;
-		dev_err(&pdev->dev,
-			"[%s] Failed to request LCPA region 0x%x-0x%x\n",
-			__func__, res->start, res->end);
+		d40_err(&pdev->dev,
+			"Failed to request LCPA region 0x%x-0x%x\n",
+			res->start, res->end);
 		goto failure;
 	}
 
@@ -2986,16 +2882,13 @@ static int __init d40_probe(struct platform_device *pdev)
 	base->lcpa_base = ioremap(res->start, resource_size(res));
 	if (!base->lcpa_base) {
 		ret = -ENOMEM;
-		dev_err(&pdev->dev,
-			"[%s] Failed to ioremap LCPA region\n",
-			__func__);
+		d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
 		goto failure;
 	}
 
 	ret = d40_lcla_allocate(base);
 	if (ret) {
-		dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
-			__func__);
+		d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
 		goto failure;
 	}
 
@@ -3004,9 +2897,8 @@ static int __init d40_probe(struct platform_device *pdev)
 	base->irq = platform_get_irq(pdev, 0);
 
 	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
-
 	if (ret) {
-		dev_err(&pdev->dev, "[%s] No IRQ defined\n", __func__);
+		d40_err(&pdev->dev, "No IRQ defined\n");
 		goto failure;
 	}
 
@@ -3025,6 +2917,12 @@ failure:
 			kmem_cache_destroy(base->desc_slab);
 		if (base->virtbase)
 			iounmap(base->virtbase);
+
+		if (base->lcla_pool.dma_addr)
+			dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+					 SZ_1K * base->num_phy_chans,
+					 DMA_TO_DEVICE);
+
 		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
 			free_pages((unsigned long)base->lcla_pool.base,
 				   base->lcla_pool.pages);
@@ -3049,7 +2947,7 @@ failure:
 		kfree(base);
 	}
 
-	dev_err(&pdev->dev, "[%s] probe failed\n", __func__);
+	d40_err(&pdev->dev, "probe failed\n");
 	return ret;
 }
 
@@ -3060,7 +2958,7 @@ static struct platform_driver d40_driver = {
 	},
 };
 
-int __init stedma40_init(void)
+static int __init stedma40_init(void)
 {
 	return platform_driver_probe(&d40_driver, d40_probe);
 }
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 0b096a38322d..cad9e1daedff 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -125,13 +125,15 @@ void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
 static int d40_phy_fill_lli(struct d40_phy_lli *lli,
 			    dma_addr_t data,
 			    u32 data_size,
-			    int psize,
 			    dma_addr_t next_lli,
 			    u32 reg_cfg,
-			    bool term_int,
-			    u32 data_width,
-			    bool is_device)
+			    struct stedma40_half_channel_info *info,
+			    unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	unsigned int data_width = info->data_width;
+	int psize = info->psize;
 	int num_elems;
 
 	if (psize == STEDMA40_PSIZE_PHY_1)
@@ -154,7 +156,7 @@ static int d40_phy_fill_lli(struct d40_phy_lli *lli,
 	 * Distance to next element sized entry.
 	 * Usually the size of the element unless you want gaps.
 	 */
-	if (!is_device)
+	if (addr_inc)
 		lli->reg_elt |= (0x1 << data_width) <<
 			D40_SREG_ELEM_PHY_EIDX_POS;
 
@@ -198,47 +200,51 @@ static int d40_seg_size(int size, int data_width1, int data_width2)
 	return seg_max;
 }
 
-struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
-				       dma_addr_t addr,
-				       u32 size,
-				       int psize,
-				       dma_addr_t lli_phys,
-				       u32 reg_cfg,
-				       bool term_int,
-				       u32 data_width1,
-				       u32 data_width2,
-				       bool is_device)
+static struct d40_phy_lli *
+d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
+		   dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg,
+		   struct stedma40_half_channel_info *info,
+		   struct stedma40_half_channel_info *otherinfo,
+		   unsigned long flags)
 {
+	bool lastlink = flags & LLI_LAST_LINK;
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	bool cyclic = flags & LLI_CYCLIC;
 	int err;
 	dma_addr_t next = lli_phys;
 	int size_rest = size;
 	int size_seg = 0;
 
+	/*
+	 * This piece may be split up based on d40_seg_size(); we only want the
+	 * term int on the last part.
+	 */
+	if (term_int)
+		flags &= ~LLI_TERM_INT;
+
 	do {
-		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_seg = d40_seg_size(size_rest, info->data_width,
+					otherinfo->data_width);
 		size_rest -= size_seg;
 
-		if (term_int && size_rest == 0)
-			next = 0;
+		if (size_rest == 0 && term_int)
+			flags |= LLI_TERM_INT;
+
+		if (size_rest == 0 && lastlink)
+			next = cyclic ? first_phys : 0;
 		else
 			next = ALIGN(next + sizeof(struct d40_phy_lli),
 				     D40_LLI_ALIGN);
 
-		err = d40_phy_fill_lli(lli,
-				       addr,
-				       size_seg,
-				       psize,
-				       next,
-				       reg_cfg,
-				       !next,
-				       data_width1,
-				       is_device);
+		err = d40_phy_fill_lli(lli, addr, size_seg, next,
+				       reg_cfg, info, flags);
 
 		if (err)
 			goto err;
 
 		lli++;
-		if (!is_device)
+		if (addr_inc)
 			addr += size_seg;
 	} while (size_rest);
 
@@ -254,39 +260,35 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      struct d40_phy_lli *lli_sg,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width1,
-		      u32 data_width2,
-		      int psize)
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags)
 {
 	int total_size = 0;
 	int i;
 	struct scatterlist *current_sg = sg;
-	dma_addr_t dst;
 	struct d40_phy_lli *lli = lli_sg;
 	dma_addr_t l_phys = lli_phys;
 
+	if (!target)
+		flags |= LLI_ADDR_INC;
+
 	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t dst = target ?: sg_addr;
 
 		total_size += sg_dma_len(current_sg);
 
-		if (target)
-			dst = target;
-		else
-			dst = sg_phys(current_sg);
+		if (i == sg_len - 1)
+			flags |= LLI_TERM_INT | LLI_LAST_LINK;
 
 		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
 			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
 
-		lli = d40_phy_buf_to_lli(lli,
-					 dst,
-					 sg_dma_len(current_sg),
-					 psize,
-					 l_phys,
-					 reg_cfg,
-					 sg_len - 1 == i,
-					 data_width1,
-					 data_width2,
-					 target == dst);
+		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys,
+					 reg_cfg, info, otherinfo, flags);
+
 		if (lli == NULL)
 			return -EINVAL;
 	}
@@ -295,45 +297,22 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 }
 
 
-void d40_phy_lli_write(void __iomem *virtbase,
-		       u32 phy_chan_num,
-		       struct d40_phy_lli *lli_dst,
-		       struct d40_phy_lli *lli_src)
-{
-
-	writel(lli_src->reg_cfg, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSCFG);
-	writel(lli_src->reg_elt, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT);
-	writel(lli_src->reg_ptr, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSPTR);
-	writel(lli_src->reg_lnk, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSLNK);
-
-	writel(lli_dst->reg_cfg, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDCFG);
-	writel(lli_dst->reg_elt, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT);
-	writel(lli_dst->reg_ptr, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDPTR);
-	writel(lli_dst->reg_lnk, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDLNK);
-
-}
-
 /* DMA logical lli operations */
 
 static void d40_log_lli_link(struct d40_log_lli *lli_dst,
 			     struct d40_log_lli *lli_src,
-			     int next)
+			     int next, unsigned int flags)
 {
+	bool interrupt = flags & LLI_TERM_INT;
 	u32 slos = 0;
 	u32 dlos = 0;
 
 	if (next != -EINVAL) {
 		slos = next * 2;
 		dlos = next * 2 + 1;
-	} else {
+	}
+
+	if (interrupt) {
 		lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
 		lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
 	}
@@ -348,9 +327,9 @@ static void d40_log_lli_link(struct d40_log_lli *lli_dst,
 void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 			   struct d40_log_lli *lli_dst,
 			   struct d40_log_lli *lli_src,
-			   int next)
+			   int next, unsigned int flags)
 {
-	d40_log_lli_link(lli_dst, lli_src, next);
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
 
 	writel(lli_src->lcsp02, &lcpa[0].lcsp0);
 	writel(lli_src->lcsp13, &lcpa[0].lcsp1);
@@ -361,9 +340,9 @@ void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
 			   struct d40_log_lli *lli_dst,
 			   struct d40_log_lli *lli_src,
-			   int next)
+			   int next, unsigned int flags)
 {
-	d40_log_lli_link(lli_dst, lli_src, next);
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
 
 	writel(lli_src->lcsp02, &lcla[0].lcsp02);
 	writel(lli_src->lcsp13, &lcla[0].lcsp13);
@@ -375,8 +354,10 @@ static void d40_log_fill_lli(struct d40_log_lli *lli,
 			     dma_addr_t data, u32 data_size,
 			     u32 reg_cfg,
 			     u32 data_width,
-			     bool addr_inc)
+			     unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
+
 	lli->lcsp13 = reg_cfg;
 
 	/* The number of elements to transfer */
@@ -395,67 +376,15 @@ static void d40_log_fill_lli(struct d40_log_lli *lli,
 
 }
 
-int d40_log_sg_to_dev(struct scatterlist *sg,
-		      int sg_len,
-		      struct d40_log_lli_bidir *lli,
-		      struct d40_def_lcsp *lcsp,
-		      u32 src_data_width,
-		      u32 dst_data_width,
-		      enum dma_data_direction direction,
-		      dma_addr_t dev_addr)
-{
-	int total_size = 0;
-	struct scatterlist *current_sg = sg;
-	int i;
-	struct d40_log_lli *lli_src = lli->src;
-	struct d40_log_lli *lli_dst = lli->dst;
-
-	for_each_sg(sg, current_sg, sg_len, i) {
-		total_size += sg_dma_len(current_sg);
-
-		if (direction == DMA_TO_DEVICE) {
-			lli_src =
-				d40_log_buf_to_lli(lli_src,
-						   sg_phys(current_sg),
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp1, src_data_width,
-						   dst_data_width,
-						   true);
-			lli_dst =
-				d40_log_buf_to_lli(lli_dst,
-						   dev_addr,
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp3, dst_data_width,
-						   src_data_width,
-						   false);
-		} else {
-			lli_dst =
-				d40_log_buf_to_lli(lli_dst,
-						   sg_phys(current_sg),
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp3, dst_data_width,
-						   src_data_width,
-						   true);
-			lli_src =
-				d40_log_buf_to_lli(lli_src,
-						   dev_addr,
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp1, src_data_width,
-						   dst_data_width,
-						   false);
-		}
-	}
-	return total_size;
-}
-
-struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				       dma_addr_t addr,
 				       int size,
 				       u32 lcsp13, /* src or dst*/
 				       u32 data_width1,
 				       u32 data_width2,
-				       bool addr_inc)
+				       unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
 	struct d40_log_lli *lli = lli_sg;
 	int size_rest = size;
 	int size_seg = 0;
@@ -468,7 +397,7 @@ struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				 addr,
 				 size_seg,
 				 lcsp13, data_width1,
-				 addr_inc);
+				 flags);
 		if (addr_inc)
 			addr += size_seg;
 		lli++;
@@ -479,6 +408,7 @@ struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
+		      dma_addr_t dev_addr,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
 		      u32 data_width1, u32 data_width2)
@@ -487,14 +417,24 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 	struct scatterlist *current_sg = sg;
 	int i;
 	struct d40_log_lli *lli = lli_sg;
+	unsigned long flags = 0;
+
+	if (!dev_addr)
+		flags |= LLI_ADDR_INC;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t addr = dev_addr ?: sg_addr;
+
 		total_size += sg_dma_len(current_sg);
-		lli = d40_log_buf_to_lli(lli,
-					 sg_phys(current_sg),
-					 sg_dma_len(current_sg),
+
+		lli = d40_log_buf_to_lli(lli, addr, len,
 					 lcsp13,
-					 data_width1, data_width2, true);
+					 data_width1,
+					 data_width2,
+					 flags);
 	}
+
 	return total_size;
 }
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 9cc43495bea2..195ee65ee7f3 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -163,6 +163,22 @@
 #define D40_DREG_LCEIS1		0x0B4
 #define D40_DREG_LCEIS2		0x0B8
 #define D40_DREG_LCEIS3		0x0BC
+#define D40_DREG_PSEG1		0x110
+#define D40_DREG_PSEG2		0x114
+#define D40_DREG_PSEG3		0x118
+#define D40_DREG_PSEG4		0x11C
+#define D40_DREG_PCEG1		0x120
+#define D40_DREG_PCEG2		0x124
+#define D40_DREG_PCEG3		0x128
+#define D40_DREG_PCEG4		0x12C
+#define D40_DREG_RSEG1		0x130
+#define D40_DREG_RSEG2		0x134
+#define D40_DREG_RSEG3		0x138
+#define D40_DREG_RSEG4		0x13C
+#define D40_DREG_RCEG1		0x140
+#define D40_DREG_RCEG2		0x144
+#define D40_DREG_RCEG3		0x148
+#define D40_DREG_RCEG4		0x14C
 #define D40_DREG_STFU		0xFC8
 #define D40_DREG_ICFG		0xFCC
 #define D40_DREG_PERIPHID0	0xFE0
@@ -277,6 +293,13 @@ struct d40_def_lcsp {
 
 /* Physical channels */
 
+enum d40_lli_flags {
+	LLI_ADDR_INC	= 1 << 0,
+	LLI_TERM_INT	= 1 << 1,
+	LLI_CYCLIC	= 1 << 2,
+	LLI_LAST_LINK	= 1 << 3,
+};
+
 void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
 		 u32 *src_cfg,
 		 u32 *dst_cfg,
@@ -292,46 +315,15 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      struct d40_phy_lli *lli,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width1,
-		      u32 data_width2,
-		      int psize);
-
-struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
-				       dma_addr_t data,
-				       u32 data_size,
-				       int psize,
-				       dma_addr_t next_lli,
-				       u32 reg_cfg,
-				       bool term_int,
-				       u32 data_width1,
-				       u32 data_width2,
-				       bool is_device);
-
-void d40_phy_lli_write(void __iomem *virtbase,
-		       u32 phy_chan_num,
-		       struct d40_phy_lli *lli_dst,
-		       struct d40_phy_lli *lli_src);
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags);
 
 /* Logical channels */
 
-struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
-				       dma_addr_t addr,
-				       int size,
-				       u32 lcsp13, /* src or dst*/
-				       u32 data_width1, u32 data_width2,
-				       bool addr_inc);
-
-int d40_log_sg_to_dev(struct scatterlist *sg,
-		      int sg_len,
-		      struct d40_log_lli_bidir *lli,
-		      struct d40_def_lcsp *lcsp,
-		      u32 src_data_width,
-		      u32 dst_data_width,
-		      enum dma_data_direction direction,
-		      dma_addr_t dev_addr);
-
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
+		      dma_addr_t dev_addr,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
 		      u32 data_width1, u32 data_width2);
@@ -339,11 +331,11 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 			    struct d40_log_lli *lli_dst,
 			    struct d40_log_lli *lli_src,
-			    int next);
+			    int next, unsigned int flags);
 
 void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
 			    struct d40_log_lli *lli_dst,
 			    struct d40_log_lli *lli_src,
-			    int next);
+			    int next, unsigned int flags);
 
 #endif /* STE_DMA40_LLI_H */