summary refs log tree commit diff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-09 15:40:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-09 15:40:28 -0700
commit4879b7ae05431ebcd228a4ff25a81120b3d85891 (patch)
tree921267ebdc9757ee770db6ef74032a6524d94bfa
parentecc721a72c121e8b641d68efd24a225abedb9a30 (diff)
parentbe13ec668d043e5313985014d34735ec6ede074a (diff)
downloadlinux-4879b7ae05431ebcd228a4ff25a81120b3d85891.tar.gz
Merge tag 'dmaengine-4.12-rc1' of git://git.infradead.org/users/vkoul/slave-dma
Pull dmaengine updates from Vinod Koul:
 "This time again a smaller update consisting of:

   - support for TI DA8xx dma controller and updates to the cppi driver

   - updates on bunch of drivers like xilinx, pl08x, stm32-dma, mv_xor,
     ioat, dmatest"

* tag 'dmaengine-4.12-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (35 commits)
  dmaengine: pl08x: remove lock documentation
  dmaengine: pl08x: fix pl08x_dma_chan_state documentation
  dmaengine: pl08x: Use the BIT() macro consistently
  dmaengine: pl080: Fix some missing kerneldoc
  dmaengine: pl080: Cut some unused defines
  dmaengine: dmatest: Add check for supported buffer count (sg_buffers)
  dmaengine: dmatest: Select DMA_ENGINE_RAID as its needed for the slave_sg test
  dmaengine: virt-dma: Convert to use list_for_each_entry_safe()
  dma-debug: use offset_in_page() macro
  dmaengine: mv_xor: use offset_in_page() macro
  dmaengine: dmatest: use offset_in_page() macro
  dmaengine: sun4i: fix invalid argument
  dmaengine: ioat: use setup_timer
  dmaengine: cppi41: Fix an Oops happening in cppi41_dma_probe()
  dmaengine: pl330: remove pdata based initialization
  dmaengine: cppi: fix build error due to bad variable
  dmaengine: imx-sdma: add 1ms delay to ensure SDMA channel is stopped
  dmaengine: cppi41: use managed functions devm_*()
  dmaengine: cppi41: fix cppi41_dma_tx_status() logic
  dmaengine: qcom_hidma: pause the channel on shutdown
  ...
-rw-r--r--Documentation/devicetree/bindings/usb/da8xx-usb.txt41
-rw-r--r--arch/arm/plat-samsung/devs.c1
-rw-r--r--drivers/dma/Kconfig7
-rw-r--r--drivers/dma/amba-pl08x.c20
-rw-r--r--drivers/dma/cppi41.c168
-rw-r--r--drivers/dma/dmatest.c11
-rw-r--r--drivers/dma/imx-sdma.c19
-rw-r--r--drivers/dma/ioat/init.c4
-rw-r--r--drivers/dma/mv_xor.c9
-rw-r--r--drivers/dma/pl330.c42
-rw-r--r--drivers/dma/qcom/hidma.c15
-rw-r--r--drivers/dma/qcom/hidma_ll.c6
-rw-r--r--drivers/dma/sh/rcar-dmac.c52
-rw-r--r--drivers/dma/stm32-dma.c2
-rw-r--r--drivers/dma/sun4i-dma.c2
-rw-r--r--drivers/dma/virt-dma.c11
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c63
-rw-r--r--include/linux/amba/pl080.h50
-rw-r--r--include/linux/amba/pl330.h35
-rw-r--r--lib/dma-debug.c4
20 files changed, 304 insertions, 258 deletions
diff --git a/Documentation/devicetree/bindings/usb/da8xx-usb.txt b/Documentation/devicetree/bindings/usb/da8xx-usb.txt
index ccb844aba7d4..717c5f656237 100644
--- a/Documentation/devicetree/bindings/usb/da8xx-usb.txt
+++ b/Documentation/devicetree/bindings/usb/da8xx-usb.txt
@@ -18,10 +18,26 @@ Required properties:
 
  - phy-names: Should be "usb-phy"
 
+ - dmas: specifies the dma channels
+
+ - dma-names: specifies the names of the channels. Use "rxN" for receive
+   and "txN" for transmit endpoints. N specifies the endpoint number.
+
 Optional properties:
 ~~~~~~~~~~~~~~~~~~~~
  - vbus-supply: Phandle to a regulator providing the USB bus power.
 
+DMA
+~~~
+- compatible: ti,da830-cppi41
+- reg: offset and length of the following register spaces: CPPI DMA Controller,
+  CPPI DMA Scheduler, Queue Manager
+- reg-names: "controller", "scheduler", "queuemgr"
+- #dma-cells: should be set to 2. The first number represents the
+  channel number (0 … 3 for endpoints 1 … 4).
+  The second number is 0 for RX and 1 for TX transfers.
+- #dma-channels: should be set to 4 representing the 4 endpoints.
+
 Example:
 	usb_phy: usb-phy {
 		compatible = "ti,da830-usb-phy";
@@ -30,7 +46,10 @@ Example:
 	};
 	usb0: usb@200000 {
 		compatible = "ti,da830-musb";
-		reg =   <0x00200000 0x10000>;
+		reg = <0x00200000 0x1000>;
+		ranges;
+		#address-cells = <1>;
+		#size-cells = <1>;
 		interrupts = <58>;
 		interrupt-names = "mc";
 
@@ -39,5 +58,25 @@ Example:
 		phys = <&usb_phy 0>;
 		phy-names = "usb-phy";
 
+		dmas = <&cppi41dma 0 0 &cppi41dma 1 0
+			&cppi41dma 2 0 &cppi41dma 3 0
+			&cppi41dma 0 1 &cppi41dma 1 1
+			&cppi41dma 2 1 &cppi41dma 3 1>;
+		dma-names =
+			"rx1", "rx2", "rx3", "rx4",
+			"tx1", "tx2", "tx3", "tx4";
+
 		status = "okay";
+
+		cppi41dma: dma-controller@201000 {
+			compatible = "ti,da830-cppi41";
+			reg =  <0x201000 0x1000
+				0x202000 0x1000
+				0x204000 0x4000>;
+			reg-names = "controller", "scheduler", "queuemgr";
+			interrupts = <58>;
+			#dma-cells = <2>;
+			#dma-channels = <4>;
+		};
+
 	};
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 03fac123676d..dc269d9143bc 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -10,7 +10,6 @@
  * published by the Free Software Foundation.
 */
 
-#include <linux/amba/pl330.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d01d59812cf3..24e8597b2c3e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -514,12 +514,12 @@ config TIMB_DMA
 	  Enable support for the Timberdale FPGA DMA engine.
 
 config TI_CPPI41
-	tristate "AM33xx CPPI41 DMA support"
-	depends on ARCH_OMAP
+	tristate "CPPI 4.1 DMA support"
+	depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX)
 	select DMA_ENGINE
 	help
 	  The Communications Port Programming Interface (CPPI) 4.1 DMA engine
-	  is currently used by the USB driver on AM335x platforms.
+	  is currently used by the USB driver on AM335x and DA8xx platforms.
 
 config TI_DMA_CROSSBAR
 	bool
@@ -608,6 +608,7 @@ config ASYNC_TX_DMA
 config DMATEST
 	tristate "DMA Test client"
 	depends on DMA_ENGINE
+	select DMA_ENGINE_RAID
 	help
 	  Simple DMA test client. Say N unless you're debugging a
 	  DMA Device driver.
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 0b7c6ce629a6..6bb8813ca275 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -106,6 +106,7 @@ struct pl08x_driver_data;
 
 /**
  * struct vendor_data - vendor-specific config parameters for PL08x derivatives
+ * @config_offset: offset to the configuration register
  * @channels: the number of channels available in this variant
  * @signals: the number of request signals available from the hardware
  * @dualmaster: whether this version supports dual AHB masters or not.
@@ -145,6 +146,8 @@ struct pl08x_bus_data {
 /**
  * struct pl08x_phy_chan - holder for the physical channels
  * @id: physical index to this channel
+ * @base: memory base address for this physical channel
+ * @reg_config: configuration address for this physical channel
  * @lock: a lock to use when altering an instance of this struct
  * @serving: the virtual channel currently being served by this physical
  * channel
@@ -203,7 +206,7 @@ struct pl08x_txd {
 };
 
 /**
- * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * enum pl08x_dma_chan_state - holds the PL08x specific virtual channel
  * states
  * @PL08X_CHAN_IDLE: the channel is idle
  * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
@@ -226,9 +229,8 @@ enum pl08x_dma_chan_state {
  * @phychan: the physical channel utilized by this channel, if there is one
  * @name: name of channel
  * @cd: channel platform data
- * @runtime_addr: address for RX/TX according to the runtime config
+ * @cfg: slave configuration
  * @at: active transaction on this channel
- * @lock: a lock for this channel data
  * @host: a pointer to the host (internal use)
  * @state: whether the channel is idle, paused, running etc
  * @slave: whether this channel is a device (slave) or for memcpy
@@ -262,7 +264,7 @@ struct pl08x_dma_chan {
  * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
  * fetches
  * @mem_buses: set to indicate memory transfers on AHB2.
- * @lock: a spinlock for this struct
+ * @lli_words: how many words are used in each LLI item for this variant
  */
 struct pl08x_driver_data {
 	struct dma_device slave;
@@ -417,7 +419,7 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
 
 	/* Enable the DMA channel */
 	/* Do not access config register until channel shows as disabled */
-	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id))
+	while (readl(pl08x->base + PL080_EN_CHAN) & BIT(phychan->id))
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
@@ -484,8 +486,8 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 
 	writel(val, ch->reg_config);
 
-	writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR);
-	writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR);
+	writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 }
 
 static inline u32 get_bytes_in_cctl(u32 cctl)
@@ -1834,7 +1836,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 		return IRQ_NONE;
 
 	for (i = 0; i < pl08x->vd->channels; i++) {
-		if (((1 << i) & err) || ((1 << i) & tc)) {
+		if ((BIT(i) & err) || (BIT(i) & tc)) {
 			/* Locate physical channel */
 			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
 			struct pl08x_dma_chan *plchan = phychan->serving;
@@ -1872,7 +1874,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 			}
 			spin_unlock(&plchan->vc.lock);
 
-			mask |= (1 << i);
+			mask |= BIT(i);
 		}
 	}
 
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index d74cee077842..f7e965f63274 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -68,7 +68,6 @@
 #define QMGR_MEMCTRL_IDX_SH	16
 #define QMGR_MEMCTRL_DESC_SH	8
 
-#define QMGR_NUM_PEND	5
 #define QMGR_PEND(x)	(0x90 + (x) * 4)
 
 #define QMGR_PENDING_SLOT_Q(x)	(x / 32)
@@ -131,7 +130,6 @@ struct cppi41_dd {
 	u32 first_td_desc;
 	struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
 
-	void __iomem *usbss_mem;
 	void __iomem *ctrl_mem;
 	void __iomem *sched_mem;
 	void __iomem *qmgr_mem;
@@ -139,6 +137,10 @@ struct cppi41_dd {
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
+	u32 n_chans;
+	u8 platform;
 
 	struct list_head pending;	/* Pending queued transfers */
 	spinlock_t lock;		/* Lock for pending list */
@@ -149,8 +151,7 @@ struct cppi41_dd {
 	bool is_suspended;
 };
 
-#define FIST_COMPLETION_QUEUE	93
-static struct chan_queues usb_queues_tx[] = {
+static struct chan_queues am335x_usb_queues_tx[] = {
 	/* USB0 ENDP 1 */
 	[ 0] = { .submit = 32, .complete =  93},
 	[ 1] = { .submit = 34, .complete =  94},
@@ -186,7 +187,7 @@ static struct chan_queues usb_queues_tx[] = {
 	[29] = { .submit = 90, .complete = 139},
 };
 
-static const struct chan_queues usb_queues_rx[] = {
+static const struct chan_queues am335x_usb_queues_rx[] = {
 	/* USB0 ENDP 1 */
 	[ 0] = { .submit =  1, .complete = 109},
 	[ 1] = { .submit =  2, .complete = 110},
@@ -222,11 +223,26 @@ static const struct chan_queues usb_queues_rx[] = {
 	[29] = { .submit = 30, .complete = 155},
 };
 
+static const struct chan_queues da8xx_usb_queues_tx[] = {
+	[0] = { .submit =  16, .complete = 24},
+	[1] = { .submit =  18, .complete = 24},
+	[2] = { .submit =  20, .complete = 24},
+	[3] = { .submit =  22, .complete = 24},
+};
+
+static const struct chan_queues da8xx_usb_queues_rx[] = {
+	[0] = { .submit =  1, .complete = 26},
+	[1] = { .submit =  3, .complete = 26},
+	[2] = { .submit =  5, .complete = 26},
+	[3] = { .submit =  7, .complete = 26},
+};
+
 struct cppi_glue_infos {
-	irqreturn_t (*isr)(int irq, void *data);
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
 };
 
 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
@@ -285,19 +301,21 @@ static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
 static irqreturn_t cppi41_irq(int irq, void *data)
 {
 	struct cppi41_dd *cdd = data;
+	u16 first_completion_queue = cdd->first_completion_queue;
+	u16 qmgr_num_pend = cdd->qmgr_num_pend;
 	struct cppi41_channel *c;
 	int i;
 
-	for (i = QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE); i < QMGR_NUM_PEND;
+	for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
 			i++) {
 		u32 val;
 		u32 q_num;
 
 		val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
-		if (i == QMGR_PENDING_SLOT_Q(FIST_COMPLETION_QUEUE) && val) {
+		if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
 			u32 mask;
 			/* set corresponding bit for completetion Q 93 */
-			mask = 1 << QMGR_PENDING_BIT_Q(FIST_COMPLETION_QUEUE);
+			mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
 			/* not set all bits for queues less than Q 93 */
 			mask--;
 			/* now invert and keep only Q 93+ set */
@@ -402,11 +420,9 @@ static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
 	struct cppi41_channel *c = to_cpp41_chan(chan);
 	enum dma_status ret;
 
-	/* lock */
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (txstate && ret == DMA_COMPLETE)
-		txstate->residue = c->residue;
-	/* unlock */
+
+	dma_set_residue(txstate, c->residue);
 
 	return ret;
 }
@@ -630,7 +646,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 		if (!c->is_tx) {
 			reg |= GCR_STARV_RETRY;
 			reg |= GCR_DESC_TYPE_HOST;
-			reg |= c->q_comp_num;
+			reg |= cdd->td_queue.complete;
 		}
 		reg |= GCR_TEARDOWN;
 		cppi_writel(reg, c->gcr_reg);
@@ -641,7 +657,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 	if (!c->td_seen || !c->td_desc_seen) {
 
 		desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
-		if (!desc_phys)
+		if (!desc_phys && c->is_tx)
 			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 
 		if (desc_phys == c->desc_phys) {
@@ -723,39 +739,24 @@ static int cppi41_stop_chan(struct dma_chan *chan)
 	return 0;
 }
 
-static void cleanup_chans(struct cppi41_dd *cdd)
-{
-	while (!list_empty(&cdd->ddev.channels)) {
-		struct cppi41_channel *cchan;
-
-		cchan = list_first_entry(&cdd->ddev.channels,
-				struct cppi41_channel, chan.device_node);
-		list_del(&cchan->chan.device_node);
-		kfree(cchan);
-	}
-}
-
 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 {
-	struct cppi41_channel *cchan;
+	struct cppi41_channel *cchan, *chans;
 	int i;
-	int ret;
-	u32 n_chans;
+	u32 n_chans = cdd->n_chans;
 
-	ret = of_property_read_u32(dev->of_node, "#dma-channels",
-			&n_chans);
-	if (ret)
-		return ret;
 	/*
 	 * The channels can only be used as TX or as RX. So we add twice
 	 * that much dma channels because USB can only do RX or TX.
 	 */
 	n_chans *= 2;
 
+	chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+
 	for (i = 0; i < n_chans; i++) {
-		cchan = kzalloc(sizeof(*cchan), GFP_KERNEL);
-		if (!cchan)
-			goto err;
+		cchan = &chans[i];
 
 		cchan->cdd = cdd;
 		if (i & 1) {
@@ -775,9 +776,6 @@ static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 	cdd->first_td_desc = n_chans;
 
 	return 0;
-err:
-	cleanup_chans(cdd);
-	return -ENOMEM;
 }
 
 static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
@@ -859,7 +857,7 @@ static void init_sched(struct cppi41_dd *cdd)
 
 	word = 0;
 	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
-	for (ch = 0; ch < 15 * 2; ch += 2) {
+	for (ch = 0; ch < cdd->n_chans; ch += 2) {
 
 		reg = SCHED_ENTRY0_CHAN(ch);
 		reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;
@@ -869,7 +867,7 @@ static void init_sched(struct cppi41_dd *cdd)
 		cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
 		word++;
 	}
-	reg = 15 * 2 * 2 - 1;
+	reg = cdd->n_chans * 2 - 1;
 	reg |= DMA_SCHED_CTRL_EN;
 	cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
 }
@@ -885,7 +883,7 @@ static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 		return -ENOMEM;
 
 	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
-	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+	cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
 	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
 
 	ret = init_descs(dev, cdd);
@@ -894,6 +892,7 @@ static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 
 	cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
 	init_sched(cdd);
+
 	return 0;
 err_td:
 	deinit_cppi41(dev, cdd);
@@ -933,8 +932,9 @@ static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
 	else
 		queues = cdd->queues_rx;
 
-	BUILD_BUG_ON(ARRAY_SIZE(usb_queues_rx) != ARRAY_SIZE(usb_queues_tx));
-	if (WARN_ON(cchan->port_num > ARRAY_SIZE(usb_queues_rx)))
+	BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
+		     ARRAY_SIZE(am335x_usb_queues_tx));
+	if (WARN_ON(cchan->port_num > ARRAY_SIZE(am335x_usb_queues_rx)))
 		return false;
 
 	cchan->q_num = queues[cchan->port_num].submit;
@@ -962,15 +962,25 @@ static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 
-static const struct cppi_glue_infos usb_infos = {
-	.isr = cppi41_irq,
-	.queues_rx = usb_queues_rx,
-	.queues_tx = usb_queues_tx,
+static const struct cppi_glue_infos am335x_usb_infos = {
+	.queues_rx = am335x_usb_queues_rx,
+	.queues_tx = am335x_usb_queues_tx,
 	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 93,
+	.qmgr_num_pend = 5,
+};
+
+static const struct cppi_glue_infos da8xx_usb_infos = {
+	.queues_rx = da8xx_usb_queues_rx,
+	.queues_tx = da8xx_usb_queues_tx,
+	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 24,
+	.qmgr_num_pend = 2,
 };
 
 static const struct of_device_id cppi41_dma_ids[] = {
-	{ .compatible = "ti,am3359-cppi41", .data = &usb_infos},
+	{ .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
+	{ .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
 	{},
 };
 MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
@@ -995,6 +1005,8 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	struct cppi41_dd *cdd;
 	struct device *dev = &pdev->dev;
 	const struct cppi_glue_infos *glue_info;
+	struct resource *mem;
+	int index;
 	int irq;
 	int ret;
 
@@ -1021,19 +1033,31 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&cdd->ddev.channels);
 	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
 
-	cdd->usbss_mem = of_iomap(dev->of_node, 0);
-	cdd->ctrl_mem = of_iomap(dev->of_node, 1);
-	cdd->sched_mem = of_iomap(dev->of_node, 2);
-	cdd->qmgr_mem = of_iomap(dev->of_node, 3);
+	index = of_property_match_string(dev->of_node,
+					 "reg-names", "controller");
+	if (index < 0)
+		return index;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->ctrl_mem))
+		return PTR_ERR(cdd->ctrl_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
+	cdd->sched_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->sched_mem))
+		return PTR_ERR(cdd->sched_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
+	cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->qmgr_mem))
+		return PTR_ERR(cdd->qmgr_mem);
+
 	spin_lock_init(&cdd->lock);
 	INIT_LIST_HEAD(&cdd->pending);
 
 	platform_set_drvdata(pdev, cdd);
 
-	if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
-			!cdd->qmgr_mem)
-		return -ENXIO;
-
 	pm_runtime_enable(dev);
 	pm_runtime_set_autosuspend_delay(dev, 100);
 	pm_runtime_use_autosuspend(dev);
@@ -1044,6 +1068,13 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	cdd->queues_rx = glue_info->queues_rx;
 	cdd->queues_tx = glue_info->queues_tx;
 	cdd->td_queue = glue_info->td_queue;
+	cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
+	cdd->first_completion_queue = glue_info->first_completion_queue;
+
+	ret = of_property_read_u32(dev->of_node,
+				   "#dma-channels", &cdd->n_chans);
+	if (ret)
+		goto err_get_n_chans;
 
 	ret = init_cppi41(dev, cdd);
 	if (ret)
@@ -1056,18 +1087,18 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	irq = irq_of_parse_and_map(dev->of_node, 0);
 	if (!irq) {
 		ret = -EINVAL;
-		goto err_irq;
+		goto err_chans;
 	}
 
-	ret = devm_request_irq(&pdev->dev, irq, glue_info->isr, IRQF_SHARED,
+	ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
 			dev_name(dev), cdd);
 	if (ret)
-		goto err_irq;
+		goto err_chans;
 	cdd->irq = irq;
 
 	ret = dma_async_device_register(&cdd->ddev);
 	if (ret)
-		goto err_dma_reg;
+		goto err_chans;
 
 	ret = of_dma_controller_register(dev->of_node,
 			cppi41_dma_xlate, &cpp41_dma_info);
@@ -1080,20 +1111,14 @@ static int cppi41_dma_probe(struct platform_device *pdev)
 	return 0;
 err_of:
 	dma_async_device_unregister(&cdd->ddev);
-err_dma_reg:
-err_irq:
-	cleanup_chans(cdd);
 err_chans:
 	deinit_cppi41(dev, cdd);
 err_init_cppi:
 	pm_runtime_dont_use_autosuspend(dev);
+err_get_n_chans:
 err_get_sync:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
-	iounmap(cdd->usbss_mem);
-	iounmap(cdd->ctrl_mem);
-	iounmap(cdd->sched_mem);
-	iounmap(cdd->qmgr_mem);
 	return ret;
 }
 
@@ -1110,12 +1135,7 @@ static int cppi41_dma_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&cdd->ddev);
 
 	devm_free_irq(&pdev->dev, cdd->irq, cdd);
-	cleanup_chans(cdd);
 	deinit_cppi41(&pdev->dev, cdd);
-	iounmap(cdd->usbss_mem);
-	iounmap(cdd->ctrl_mem);
-	iounmap(cdd->sched_mem);
-	iounmap(cdd->qmgr_mem);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 54d581d407aa..a07ef3d6b3ec 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -535,6 +535,13 @@ static int dmatest_func(void *data)
 
 		total_tests++;
 
+		/* Check if buffer count fits into map count variable (u8) */
+		if ((src_cnt + dst_cnt) >= 255) {
+			pr_err("too many buffers (%d of 255 supported)\n",
+			       src_cnt + dst_cnt);
+			break;
+		}
+
 		if (1 << align > params->buf_size) {
 			pr_err("%u-byte buffer too small for %d-byte alignment\n",
 			       params->buf_size, 1 << align);
@@ -585,7 +592,7 @@ static int dmatest_func(void *data)
 		for (i = 0; i < src_cnt; i++) {
 			void *buf = thread->srcs[i];
 			struct page *pg = virt_to_page(buf);
-			unsigned pg_off = (unsigned long) buf & ~PAGE_MASK;
+			unsigned long pg_off = offset_in_page(buf);
 
 			um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
 						   um->len, DMA_TO_DEVICE);
@@ -605,7 +612,7 @@ static int dmatest_func(void *data)
 		for (i = 0; i < dst_cnt; i++) {
 			void *buf = thread->dsts[i];
 			struct page *pg = virt_to_page(buf);
-			unsigned pg_off = (unsigned long) buf & ~PAGE_MASK;
+			unsigned long pg_off = offset_in_page(buf);
 
 			dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
 					       DMA_BIDIRECTIONAL);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index d1651a50c349..085993cb2ccc 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -937,6 +937,21 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	return 0;
 }
 
+static int sdma_disable_channel_with_delay(struct dma_chan *chan)
+{
+	sdma_disable_channel(chan);
+
+	/*
+	 * According to NXP R&D team a delay of one BD SDMA cost time
+	 * (maximum is 1ms) should be added after disable of the channel
+	 * bit, to ensure SDMA core has really been stopped after SDMA
+	 * clients call .device_terminate_all.
+	 */
+	mdelay(1);
+
+	return 0;
+}
+
 static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
 {
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1828,11 +1843,11 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
 	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
 	sdma->dma_device.device_config = sdma_config;
-	sdma->dma_device.device_terminate_all = sdma_disable_channel;
+	sdma->dma_device.device_terminate_all = sdma_disable_channel_with_delay;
 	sdma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	sdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 	sdma->dma_device.device_issue_pending = sdma_issue_pending;
 	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
 	dma_set_max_seg_size(sdma->dma_device.dev, 65535);
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index cc5259b881d4..6ad4384b3fa8 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -760,9 +760,7 @@ ioat_init_channel(struct ioatdma_device *ioat_dma,
 	dma_cookie_init(&ioat_chan->dma_chan);
 	list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
 	ioat_dma->idx[idx] = ioat_chan;
-	init_timer(&ioat_chan->timer);
-	ioat_chan->timer.function = ioat_timer_event;
-	ioat_chan->timer.data = data;
+	setup_timer(&ioat_chan->timer, ioat_timer_event, data);
 	tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data);
 }
 
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 0cb951b743a6..25bc5b103aa2 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -960,7 +960,7 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	}
 
 	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
-			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
+			       offset_in_page(src), PAGE_SIZE,
 			       DMA_TO_DEVICE);
 	unmap->addr[0] = src_dma;
 
@@ -972,7 +972,7 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	unmap->to_cnt = 1;
 
 	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
-				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
+				offset_in_page(dest), PAGE_SIZE,
 				DMA_FROM_DEVICE);
 	unmap->addr[1] = dest_dma;
 
@@ -1580,11 +1580,6 @@ static int mv_xor_probe(struct platform_device *pdev)
 			int irq;
 
 			cd = &pdata->channels[i];
-			if (!cd) {
-				ret = -ENODEV;
-				goto err_channel_add;
-			}
-
 			irq = platform_get_irq(pdev, i);
 			if (irq < 0) {
 				ret = irq;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index f37f4978dabb..8b0da7fa520d 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -22,7 +22,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/amba/bus.h>
-#include <linux/amba/pl330.h>
 #include <linux/scatterlist.h>
 #include <linux/of.h>
 #include <linux/of_dma.h>
@@ -2077,18 +2076,6 @@ static void pl330_tasklet(unsigned long data)
 	}
 }
 
-bool pl330_filter(struct dma_chan *chan, void *param)
-{
-	u8 *peri_id;
-
-	if (chan->device->dev->driver != &pl330_driver.drv)
-		return false;
-
-	peri_id = chan->private;
-	return *peri_id == (unsigned long)param;
-}
-EXPORT_SYMBOL(pl330_filter);
-
 static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
 						struct of_dma *ofdma)
 {
@@ -2833,7 +2820,6 @@ static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
-	struct dma_pl330_platdata *pdat;
 	struct pl330_config *pcfg;
 	struct pl330_dmac *pl330;
 	struct dma_pl330_chan *pch, *_p;
@@ -2843,8 +2829,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	int num_chan;
 	struct device_node *np = adev->dev.of_node;
 
-	pdat = dev_get_platdata(&adev->dev);
-
 	ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -2857,7 +2841,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pd = &pl330->ddma;
 	pd->dev = &adev->dev;
 
-	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
+	pl330->mcbufsz = 0;
 
 	/* get quirk */
 	for (i = 0; i < ARRAY_SIZE(of_quirks); i++)
@@ -2901,10 +2885,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
-	if (pdat)
-		num_chan = max_t(int, pdat->nr_valid_peri, pcfg->num_chan);
-	else
-		num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
+	num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
 
 	pl330->num_peripherals = num_chan;
 
@@ -2916,11 +2897,8 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	for (i = 0; i < num_chan; i++) {
 		pch = &pl330->peripherals[i];
-		if (!adev->dev.of_node)
-			pch->chan.private = pdat ? &pdat->peri_id[i] : NULL;
-		else
-			pch->chan.private = adev->dev.of_node;
 
+		pch->chan.private = adev->dev.of_node;
 		INIT_LIST_HEAD(&pch->submitted_list);
 		INIT_LIST_HEAD(&pch->work_list);
 		INIT_LIST_HEAD(&pch->completed_list);
@@ -2933,15 +2911,11 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		list_add_tail(&pch->chan.device_node, &pd->channels);
 	}
 
-	if (pdat) {
-		pd->cap_mask = pdat->cap_mask;
-	} else {
-		dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-		if (pcfg->num_peri) {
-			dma_cap_set(DMA_SLAVE, pd->cap_mask);
-			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
-			dma_cap_set(DMA_PRIVATE, pd->cap_mask);
-		}
+	dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+	if (pcfg->num_peri) {
+		dma_cap_set(DMA_SLAVE, pd->cap_mask);
+		dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+		dma_cap_set(DMA_PRIVATE, pd->cap_mask);
 	}
 
 	pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index 3c982c96b4b7..5072a7d306d4 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -865,6 +865,20 @@ bailout:
 	return rc;
 }
 
+static void hidma_shutdown(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+	dev_info(dmadev->ddev.dev, "HI-DMA engine shutdown\n");
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	if (hidma_ll_disable(dmadev->lldev))
+		dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+}
+
 static int hidma_remove(struct platform_device *pdev)
 {
 	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
@@ -908,6 +922,7 @@ MODULE_DEVICE_TABLE(of, hidma_match);
 static struct platform_driver hidma_driver = {
 	.probe = hidma_probe,
 	.remove = hidma_remove,
+	.shutdown = hidma_shutdown,
 	.driver = {
 		   .name = "hidma",
 		   .of_match_table = hidma_match,
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
index 6645bdf0d151..1530a661518d 100644
--- a/drivers/dma/qcom/hidma_ll.c
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -499,6 +499,9 @@ int hidma_ll_enable(struct hidma_lldev *lldev)
 	lldev->trch_state = HIDMA_CH_ENABLED;
 	lldev->evch_state = HIDMA_CH_ENABLED;
 
+	/* enable irqs */
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
 	return 0;
 }
 
@@ -596,6 +599,9 @@ int hidma_ll_disable(struct hidma_lldev *lldev)
 
 	lldev->trch_state = HIDMA_CH_SUSPENDED;
 	lldev->evch_state = HIDMA_CH_SUSPENDED;
+
+	/* disable interrupts */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
 	return 0;
 }
 
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 48b22d5c8602..db41795fe42a 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -344,13 +344,19 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
 
 	if (desc->hwdescs.use) {
-		struct rcar_dmac_xfer_chunk *chunk;
+		struct rcar_dmac_xfer_chunk *chunk =
+			list_first_entry(&desc->chunks,
+					 struct rcar_dmac_xfer_chunk, node);
 
 		dev_dbg(chan->chan.device->dev,
 			"chan%u: queue desc %p: %u@%pad\n",
 			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+				     chunk->src_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+				     chunk->dst_addr >> 32);
 		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
 				     desc->hwdescs.dma >> 32);
 #endif
@@ -368,8 +374,6 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
 		 * should. Initialize it manually with the destination address
 		 * of the first chunk.
 		 */
-		chunk = list_first_entry(&desc->chunks,
-					 struct rcar_dmac_xfer_chunk, node);
 		rcar_dmac_chan_write(chan, RCAR_DMADAR,
 				     chunk->dst_addr & 0xffffffff);
 
@@ -855,8 +859,12 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 	unsigned int nchunks = 0;
 	unsigned int max_chunk_size;
 	unsigned int full_size = 0;
-	bool highmem = false;
+	bool cross_boundary = false;
 	unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32 high_dev_addr;
+	u32 high_mem_addr;
+#endif
 
 	desc = rcar_dmac_desc_get(chan);
 	if (!desc)
@@ -882,6 +890,16 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 
 		full_size += len;
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (i == 0) {
+			high_dev_addr = dev_addr >> 32;
+			high_mem_addr = mem_addr >> 32;
+		}
+
+		if ((dev_addr >> 32 != high_dev_addr) ||
+		    (mem_addr >> 32 != high_mem_addr))
+			cross_boundary = true;
+#endif
 		while (len) {
 			unsigned int size = min(len, max_chunk_size);
 
@@ -890,18 +908,14 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 			 * Prevent individual transfers from crossing 4GB
 			 * boundaries.
 			 */
-			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
 				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
-			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+				cross_boundary = true;
+			}
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
 				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
-
-			/*
-			 * Check if either of the source or destination address
-			 * can't be expressed in 32 bits. If so we can't use
-			 * hardware descriptor lists.
-			 */
-			if (dev_addr >> 32 || mem_addr >> 32)
-				highmem = true;
+				cross_boundary = true;
+			}
 #endif
 
 			chunk = rcar_dmac_xfer_chunk_get(chan);
@@ -943,13 +957,11 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
 	 * Use hardware descriptor lists if possible when more than one chunk
 	 * needs to be transferred (otherwise they don't make much sense).
 	 *
-	 * The highmem check currently covers the whole transfer. As an
-	 * optimization we could use descriptor lists for consecutive lowmem
-	 * chunks and direct manual mode for highmem chunks. Whether the
-	 * performance improvement would be significant enough compared to the
-	 * additional complexity remains to be investigated.
+	 * Source/Destination address should be located in same 4GiB region
+	 * in the 40bit address space when it uses Hardware descriptor,
+	 * and cross_boundary is checking it.
 	 */
-	desc->hwdescs.use = !highmem && nchunks > 1;
+	desc->hwdescs.use = !cross_boundary && nchunks > 1;
 	if (desc->hwdescs.use) {
 		if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
 			desc->hwdescs.use = false;
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 49f86cabcfec..786fc8fcc38e 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -1008,7 +1008,7 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 
 	c = dma_get_slave_channel(&chan->vchan.chan);
 	if (!c) {
-		dev_err(dev, "No more channel avalaible\n");
+		dev_err(dev, "No more channels available\n");
 		return NULL;
 	}
 
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index 57aa227bfadb..f4ed3f17607c 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -238,7 +238,7 @@ static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv,
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
-	for_each_clear_bit_from(i, &priv->pchans_used, max) {
+	for_each_clear_bit_from(i, priv->pchans_used, max) {
 		pchan = &pchans[i];
 		pchan->vchan = vchan;
 		set_bit(i, priv->pchans_used);
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index e47fc9b0944f..545e97279083 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(vchan_find_desc);
 static void vchan_complete(unsigned long arg)
 {
 	struct virt_dma_chan *vc = (struct virt_dma_chan *)arg;
-	struct virt_dma_desc *vd;
+	struct virt_dma_desc *vd, *_vd;
 	struct dmaengine_desc_callback cb;
 	LIST_HEAD(head);
 
@@ -103,8 +103,7 @@ static void vchan_complete(unsigned long arg)
 
 	dmaengine_desc_callback_invoke(&cb, NULL);
 
-	while (!list_empty(&head)) {
-		vd = list_first_entry(&head, struct virt_dma_desc, node);
+	list_for_each_entry_safe(vd, _vd, &head, node) {
 		dmaengine_desc_get_callback(&vd->tx, &cb);
 
 		list_del(&vd->node);
@@ -119,9 +118,9 @@ static void vchan_complete(unsigned long arg)
 
 void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 {
-	while (!list_empty(head)) {
-		struct virt_dma_desc *vd = list_first_entry(head,
-			struct virt_dma_desc, node);
+	struct virt_dma_desc *vd, *_vd;
+
+	list_for_each_entry_safe(vd, _vd, head, node) {
 		if (dmaengine_desc_test_reuse(&vd->tx)) {
 			list_move_tail(&vd->node, &vc->desc_allocated);
 		} else {
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 8288fe4d17c3..8cf87b1a284b 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -331,6 +331,7 @@ struct xilinx_dma_tx_descriptor {
  * @seg_v: Statically allocated segments base
  * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
  * @start_transfer: Differentiate b/w DMA IP's transfer
+ * @stop_transfer: Differentiate b/w DMA IP's quiesce
  */
 struct xilinx_dma_chan {
 	struct xilinx_dma_device *xdev;
@@ -361,6 +362,7 @@ struct xilinx_dma_chan {
 	struct xilinx_axidma_tx_segment *seg_v;
 	struct xilinx_axidma_tx_segment *cyclic_seg_v;
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
+	int (*stop_transfer)(struct xilinx_dma_chan *chan);
 	u16 tdest;
 };
 
@@ -946,26 +948,32 @@ static bool xilinx_dma_is_idle(struct xilinx_dma_chan *chan)
 }
 
 /**
- * xilinx_dma_halt - Halt DMA channel
+ * xilinx_dma_stop_transfer - Halt DMA channel
  * @chan: Driver specific DMA channel
  */
-static void xilinx_dma_halt(struct xilinx_dma_chan *chan)
+static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
 {
-	int err;
 	u32 val;
 
 	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
 
 	/* Wait for the hardware to halt */
-	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
-				      (val & XILINX_DMA_DMASR_HALTED), 0,
-				      XILINX_DMA_LOOP_COUNT);
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_HALTED, 0,
+				       XILINX_DMA_LOOP_COUNT);
+}
 
-	if (err) {
-		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
-			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
-		chan->err = true;
-	}
+/**
+ * xilinx_cdma_stop_transfer - Wait for the current transfer to complete
+ * @chan: Driver specific DMA channel
+ */
+static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+	u32 val;
+
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_IDLE, 0,
+				       XILINX_DMA_LOOP_COUNT);
 }
 
 /**
@@ -1653,7 +1661,7 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_cdma_tx_segment *segment, *prev;
+	struct xilinx_cdma_tx_segment *segment;
 	struct xilinx_cdma_desc_hw *hw;
 
 	if (!len || len > XILINX_DMA_MAX_TRANS_LEN)
@@ -1680,21 +1688,11 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
 		hw->dest_addr_msb = upper_32_bits(dma_dst);
 	}
 
-	/* Fill the previous next descriptor with current */
-	prev = list_last_entry(&desc->segments,
-			       struct xilinx_cdma_tx_segment, node);
-	prev->hw.next_desc = segment->phys;
-
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);
 
-	prev = segment;
-
-	/* Link the last hardware descriptor with the first. */
-	segment = list_first_entry(&desc->segments,
-				struct xilinx_cdma_tx_segment, node);
 	desc->async_tx.phys = segment->phys;
-	prev->hw.next_desc = segment->phys;
+	hw->next_desc = segment->phys;
 
 	return &desc->async_tx;
 
@@ -2003,12 +2001,17 @@ static int xilinx_dma_terminate_all(struct dma_chan *dchan)
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	u32 reg;
+	int err;
 
 	if (chan->cyclic)
 		xilinx_dma_chan_reset(chan);
 
-	/* Halt the DMA engine */
-	xilinx_dma_halt(chan);
+	err = chan->stop_transfer(chan);
+	if (err) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+		chan->err = true;
+	}
 
 	/* Remove and free all of the descriptors in the lists */
 	xilinx_dma_free_descriptors(chan);
@@ -2397,12 +2400,16 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		return err;
 	}
 
-	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		chan->start_transfer = xilinx_dma_start_transfer;
-	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->start_transfer = xilinx_cdma_start_transfer;
-	else
+		chan->stop_transfer = xilinx_cdma_stop_transfer;
+	} else {
 		chan->start_transfer = xilinx_vdma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	}
 
 	/* Initialize the tasklet */
 	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 91b84a7f0539..580b5323a717 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -38,24 +38,16 @@
 #define PL080_SOFT_LSREQ			(0x2C)
 
 #define PL080_CONFIG				(0x30)
-#define PL080_CONFIG_M2_BE			(1 << 2)
-#define PL080_CONFIG_M1_BE			(1 << 1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
+#define PL080_CONFIG_M2_BE			BIT(2)
+#define PL080_CONFIG_M1_BE			BIT(1)
+#define PL080_CONFIG_ENABLE			BIT(0)
 
 #define PL080_SYNC				(0x34)
 
 /* Per channel configuration registers */
 
-#define PL080_Cx_STRIDE				(0x20)
+/* Per channel configuration registers */
 #define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_SRC_ADDR(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_DST_ADDR(x)			((0x104 + (x * 0x20)))
-#define PL080_Cx_LLI(x)				((0x108 + (x * 0x20)))
-#define PL080_Cx_CONTROL(x)			((0x10C + (x * 0x20)))
-#define PL080_Cx_CONFIG(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONTROL2(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONFIG(x)			((0x114 + (x * 0x20)))
-
 #define PL080_CH_SRC_ADDR			(0x00)
 #define PL080_CH_DST_ADDR			(0x04)
 #define PL080_CH_LLI				(0x08)
@@ -66,18 +58,18 @@
 
 #define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
 #define PL080_LLI_ADDR_SHIFT			(2)
-#define PL080_LLI_LM_AHB2			(1 << 0)
+#define PL080_LLI_LM_AHB2			BIT(0)
 
-#define PL080_CONTROL_TC_IRQ_EN			(1 << 31)
+#define PL080_CONTROL_TC_IRQ_EN			BIT(31)
 #define PL080_CONTROL_PROT_MASK			(0x7 << 28)
 #define PL080_CONTROL_PROT_SHIFT		(28)
-#define PL080_CONTROL_PROT_CACHE		(1 << 30)
-#define PL080_CONTROL_PROT_BUFF			(1 << 29)
-#define PL080_CONTROL_PROT_SYS			(1 << 28)
-#define PL080_CONTROL_DST_INCR			(1 << 27)
-#define PL080_CONTROL_SRC_INCR			(1 << 26)
-#define PL080_CONTROL_DST_AHB2			(1 << 25)
-#define PL080_CONTROL_SRC_AHB2			(1 << 24)
+#define PL080_CONTROL_PROT_CACHE		BIT(30)
+#define PL080_CONTROL_PROT_BUFF			BIT(29)
+#define PL080_CONTROL_PROT_SYS			BIT(28)
+#define PL080_CONTROL_DST_INCR			BIT(27)
+#define PL080_CONTROL_SRC_INCR			BIT(26)
+#define PL080_CONTROL_DST_AHB2			BIT(25)
+#define PL080_CONTROL_SRC_AHB2			BIT(24)
 #define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
 #define PL080_CONTROL_DWIDTH_SHIFT		(21)
 #define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
@@ -103,20 +95,20 @@
 #define PL080_WIDTH_16BIT			(0x1)
 #define PL080_WIDTH_32BIT			(0x2)
 
-#define PL080N_CONFIG_ITPROT			(1 << 20)
-#define PL080N_CONFIG_SECPROT			(1 << 19)
-#define PL080_CONFIG_HALT			(1 << 18)
-#define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
-#define PL080_CONFIG_LOCK			(1 << 16)
-#define PL080_CONFIG_TC_IRQ_MASK		(1 << 15)
-#define PL080_CONFIG_ERR_IRQ_MASK		(1 << 14)
+#define PL080N_CONFIG_ITPROT			BIT(20)
+#define PL080N_CONFIG_SECPROT			BIT(19)
+#define PL080_CONFIG_HALT			BIT(18)
+#define PL080_CONFIG_ACTIVE			BIT(17)  /* RO */
+#define PL080_CONFIG_LOCK			BIT(16)
+#define PL080_CONFIG_TC_IRQ_MASK		BIT(15)
+#define PL080_CONFIG_ERR_IRQ_MASK		BIT(14)
 #define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
 #define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
 #define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
 #define PL080_CONFIG_DST_SEL_SHIFT		(6)
 #define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
 #define PL080_CONFIG_SRC_SEL_SHIFT		(1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
+#define PL080_CONFIG_ENABLE			BIT(0)
 
 #define PL080_FLOW_MEM2MEM			(0x0)
 #define PL080_FLOW_MEM2PER			(0x1)
diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h
deleted file mode 100644
index fe93758e8403..000000000000
--- a/include/linux/amba/pl330.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* linux/include/linux/amba/pl330.h
- *
- * Copyright (C) 2010 Samsung Electronics Co. Ltd.
- *	Jaswinder Singh <jassi.brar@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef	__AMBA_PL330_H_
-#define	__AMBA_PL330_H_
-
-#include <linux/dmaengine.h>
-
-struct dma_pl330_platdata {
-	/*
-	 * Number of valid peripherals connected to DMAC.
-	 * This may be different from the value read from
-	 * CR0, as the PL330 implementation might have 'holes'
-	 * in the peri list or the peri could also be reached
-	 * from another DMAC which the platform prefers.
-	 */
-	u8 nr_valid_peri;
-	/* Array of valid peripherals */
-	u8 *peri_id;
-	/* Operational capabilities */
-	dma_cap_mask_t cap_mask;
-	/* Bytes to allocate for MC buffer */
-	unsigned mcbuf_sz;
-};
-
-extern bool pl330_filter(struct dma_chan *chan, void *param);
-#endif	/* __AMBA_PL330_H_ */
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fe4d50c992df..ea4cc3dde4f1 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1498,7 +1498,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 	entry->type      = dma_debug_coherent;
 	entry->dev       = dev;
 	entry->pfn	 = page_to_pfn(virt_to_page(virt));
-	entry->offset	 = (size_t) virt & ~PAGE_MASK;
+	entry->offset	 = offset_in_page(virt);
 	entry->size      = size;
 	entry->dev_addr  = dma_addr;
 	entry->direction = DMA_BIDIRECTIONAL;
@@ -1514,7 +1514,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 		.type           = dma_debug_coherent,
 		.dev            = dev,
 		.pfn		= page_to_pfn(virt_to_page(virt)),
-		.offset		= (size_t) virt & ~PAGE_MASK,
+		.offset		= offset_in_page(virt),
 		.dev_addr       = addr,
 		.size           = size,
 		.direction      = DMA_BIDIRECTIONAL,