summary refs log tree commit diff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 15:56:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-07 15:56:34 -0700
commit416a2f4f91525fcdec821320bc4608cf012d418e (patch)
treea54e571a108fa801cca22409ef18db417ba7b43a
parent62e6e5940c0c09433efa52d0fa9a11623a4704b2 (diff)
parentb957df98469240d459bcfae6904b36d6ecea9bee (diff)
downloadlinux-416a2f4f91525fcdec821320bc4608cf012d418e.tar.gz
Merge tag 'dmaengine-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine
Pull dmaengine updates from Vinod Koul:
 "New Support:

   - MT6795 SoC dma controller (AngeloGioacchino Del Regno)

   - qcom-adm controller yaml binding (Christian Marangi)

   - Renesas r8a779g0 dma controller yaml binding (Geert Uytterhoeven)

   - Qualcomm SM6350 GPI dma controller (Luca Weiss)

  Updates:

   - STM32 DMA-MDMA chaining support (Amelie Delaunay)

   - make hsu driver use managed resources (Andy Shevchenko)

   - the usual round of idxd driver updates (Dave Jiang & Jerry
     Snitselaar)

   - apple dma driver iommu and pd properties and remove use
     of devres for irqs (Janne Grunau & Martin PoviĊĦer)

   - device_synchronize support for Xilinx zynqmp driver (Swati
     Agarwal)"

* tag 'dmaengine-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine: (60 commits)
  dmaengine: ioat: remove unused declarations in dma.h
  dmaengine: ti: k3-udma: Respond TX done if DMA_PREP_INTERRUPT is not requested
  dmaengine: zynqmp_dma: Add device_synchronize support
  dt-bindings: dma: add additional pbus reset to qcom,adm
  dt-bindings: dma: rework qcom,adm Documentation to yaml schema
  dt-bindings: dma: apple,admac: Add iommus and power-domains properties
  dmaengine: dw-edma: Remove runtime PM support
  dmaengine: idxd: add configuration for concurrent batch descriptor processing
  dmaengine: idxd: add configuration for concurrent work descriptor processing
  dmaengine: idxd: add WQ operation cap restriction support
  dmanegine: idxd: reformat opcap output to match bitmap_parse() input
  dmaengine: idxd: convert ats_dis to a wq flag
  dmaengine: ioat: stop mod_timer from resurrecting deleted timer in __cleanup()
  dmaengine: qcom-adm: fix wrong calling convention for prep_slave_sg
  dmaengine: qcom-adm: fix wrong sizeof config in slave_config
  dmaengine: ti: k3-psil: add additional TX threads for j721e
  dmaengine: ti: k3-psil: add additional TX threads for j7200
  dmaengine: apple-admac: Trigger shared reset
  dmaengine: apple-admac: Do not use devres for IRQs
  dmaengine: ti: edma: Remove some unused functions
  ...
-rw-r--r--Documentation/ABI/stable/sysfs-driver-dma-idxd35
-rw-r--r--Documentation/arm/index.rst1
-rw-r--r--Documentation/arm/stm32/stm32-dma-mdma-chaining.rst415
-rw-r--r--Documentation/devicetree/bindings/dma/apple,admac.yaml7
-rw-r--r--Documentation/devicetree/bindings/dma/arm,pl330.yaml6
-rw-r--r--Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,adm.yaml99
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml8
-rw-r--r--Documentation/devicetree/bindings/dma/qcom,gpi.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_adm.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml1
-rw-r--r--Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt2
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/amba-pl08x.c2
-rw-r--r--drivers/dma/apple-admac.c45
-rw-r--r--drivers/dma/at_xdmac.c5
-rw-r--r--drivers/dma/dw-edma/dw-edma-core.c12
-rw-r--r--drivers/dma/hisi_dma.c650
-rw-r--r--drivers/dma/hsu/hsu.c8
-rw-r--r--drivers/dma/hsu/hsu.h12
-rw-r--r--drivers/dma/hsu/pci.c47
-rw-r--r--drivers/dma/idxd/device.c38
-rw-r--r--drivers/dma/idxd/idxd.h10
-rw-r--r--drivers/dma/idxd/init.c36
-rw-r--r--drivers/dma/idxd/irq.c13
-rw-r--r--drivers/dma/idxd/registers.h35
-rw-r--r--drivers/dma/idxd/sysfs.c187
-rw-r--r--drivers/dma/ioat/dma.c6
-rw-r--r--drivers/dma/ioat/dma.h2
-rw-r--r--drivers/dma/mxs-dma.c11
-rw-r--r--drivers/dma/pl330.c4
-rw-r--r--drivers/dma/qcom/gpi.c7
-rw-r--r--drivers/dma/qcom/qcom_adm.c22
-rw-r--r--drivers/dma/s3c24xx-dma.c2
-rw-r--r--drivers/dma/sf-pdma/sf-pdma.c8
-rw-r--r--drivers/dma/sh/rcar-dmac.c4
-rw-r--r--drivers/dma/stm32-dma.c136
-rw-r--r--drivers/dma/stm32-dmamux.c12
-rw-r--r--drivers/dma/stm32-mdma.c70
-rw-r--r--drivers/dma/ti/edma.c40
-rw-r--r--drivers/dma/ti/k3-psil-j7200.c67
-rw-r--r--drivers/dma/ti/k3-psil-j721e.c79
-rw-r--r--drivers/dma/ti/k3-udma.c37
-rw-r--r--drivers/dma/xilinx/zynqmp_dma.c12
-rw-r--r--include/linux/dma/hsu.h6
-rw-r--r--include/linux/platform_data/dma-hsu.h2
47 files changed, 1854 insertions, 413 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 0c2b613f2373..8e2c2c405db2 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -227,6 +227,17 @@ Contact:	dmaengine@vger.kernel.org
 Description:	Indicate the number of retires for an enqcmds submission on a sharedwq.
 		A max value to set attribute is capped at 64.
 
+What:		/sys/bus/dsa/devices/wq<m>.<n>/op_config
+Date:		Sept 14, 2022
+KernelVersion:	6.0.0
+Contact:	dmaengine@vger.kernel.org
+Description:	Shows the operation capability bits displayed in bitmap format
+		presented by %*pb printk() output format specifier.
+		The attribute can be configured when the WQ is disabled in
+		order to configure the WQ to accept specific bits that
+		correlates to the operations allowed. It's visible only
+		on platforms that support the capability.
+
 What:           /sys/bus/dsa/devices/engine<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
@@ -255,3 +266,27 @@ Contact:	dmaengine@vger.kernel.org
 Description:	Indicates the number of Read Buffers reserved for the use of
 		engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
 		Reserved.
+
+What:		/sys/bus/dsa/devices/group<m>.<n>/desc_progress_limit
+Date:		Sept 14, 2022
+KernelVersion:	6.0.0
+Contact:	dmaengine@vger.kernel.org
+Description:	Allows control of the number of work descriptors that can be
+		concurrently processed by an engine in the group as a fraction
+		of the Maximum Work Descriptors in Progress value specified in
+		the ENGCAP register. The acceptable values are 0 (default),
+		1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of
+		the max value). It's visible only on platforms that support
+		the capability.
+
+What:		/sys/bus/dsa/devices/group<m>.<n>/batch_progress_limit
+Date:		Sept 14, 2022
+KernelVersion:	6.0.0
+Contact:	dmaengine@vger.kernel.org
+Description:	Allows control of the number of batch descriptors that can be
+		concurrently processed by an engine in the group as a fraction
+		of the Maximum Batch Descriptors in Progress value specified in
+		the ENGCAP register. The acceptable values are 0 (default),
+		1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of
+		the max value). It's visible only on platforms that support
+		the capability.
diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst
index 495ada7915e1..8c636d4a061f 100644
--- a/Documentation/arm/index.rst
+++ b/Documentation/arm/index.rst
@@ -59,6 +59,7 @@ SoC-specific documents
    stm32/stm32f429-overview
    stm32/stm32mp13-overview
    stm32/stm32mp157-overview
+   stm32/stm32-dma-mdma-chaining
 
    sunxi
 
diff --git a/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst
new file mode 100644
index 000000000000..2945e0e33104
--- /dev/null
+++ b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst
@@ -0,0 +1,415 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+STM32 DMA-MDMA chaining
+=======================
+
+
+Introduction
+------------
+
+  This document describes the STM32 DMA-MDMA chaining feature. But before going
+  further, let's introduce the peripherals involved.
+
+  To offload data transfers from the CPU, STM32 microprocessors (MPUs) embed
+  direct memory access controllers (DMA).
+
+  STM32MP1 SoCs embed both STM32 DMA and STM32 MDMA controllers. STM32 DMA
+  request routing capabilities are enhanced by a DMA request multiplexer
+  (STM32 DMAMUX).
+
+  **STM32 DMAMUX**
+
+  STM32 DMAMUX routes any DMA request from a given peripheral to any STM32 DMA
+  controller (STM32MP1 counts two STM32 DMA controllers) channels.
+
+  **STM32 DMA**
+
+  STM32 DMA is mainly used to implement central data buffer storage (usually in
+  the system SRAM) for different peripheral. It can access external RAMs but
+  without the ability to generate convenient burst transfer ensuring the best
+  load of the AXI.
+
+  **STM32 MDMA**
+
+  STM32 MDMA (Master DMA) is mainly used to manage direct data transfers between
+  RAM data buffers without CPU intervention. It can also be used in a
+  hierarchical structure that uses STM32 DMA as first level data buffer
+  interfaces for AHB peripherals, while the STM32 MDMA acts as a second level
+  DMA with better performance. As a AXI/AHB master, STM32 MDMA can take control
+  of the AXI/AHB bus.
+
+
+Principles
+----------
+
+  STM32 DMA-MDMA chaining feature relies on the strengths of STM32 DMA and
+  STM32 MDMA controllers.
+
+  STM32 DMA has a circular Double Buffer Mode (DBM). At each end of transaction
+  (when DMA data counter - DMA_SxNDTR - reaches 0), the memory pointers
+  (configured with DMA_SxSM0AR and DMA_SxM1AR) are swapped and the DMA data
+  counter is automatically reloaded. This allows the SW or the STM32 MDMA to
+  process one memory area while the second memory area is being filled/used by
+  the STM32 DMA transfer.
+
+  With STM32 MDMA linked-list mode, a single request initiates the data array
+  (collection of nodes) to be transferred until the linked-list pointer for the
+  channel is null. The channel transfer complete of the last node is the end of
+  transfer, unless first and last nodes are linked to each other, in such a
+  case, the linked-list loops on to create a circular MDMA transfer.
+
+  STM32 MDMA has direct connections with STM32 DMA. This enables autonomous
+  communication and synchronization between peripherals, thus saving CPU
+  resources and bus congestion. Transfer Complete signal of STM32 DMA channel
+  can triggers STM32 MDMA transfer. STM32 MDMA can clear the request generated
+  by the STM32 DMA by writing to its Interrupt Clear register (whose address is
+  stored in MDMA_CxMAR, and bit mask in MDMA_CxMDR).
+
+  .. table:: STM32 MDMA interconnect table with STM32 DMA
+
+    +--------------+----------------+-----------+------------+
+    | STM32 DMAMUX | STM32 DMA      | STM32 DMA | STM32 MDMA |
+    | channels     | channels       | Transfer  | request    |
+    |              |                | complete  |            |
+    |              |                | signal    |            |
+    +==============+================+===========+============+
+    | Channel *0*  | DMA1 channel 0 | dma1_tcf0 | *0x00*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *1*  | DMA1 channel 1 | dma1_tcf1 | *0x01*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *2*  | DMA1 channel 2 | dma1_tcf2 | *0x02*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *3*  | DMA1 channel 3 | dma1_tcf3 | *0x03*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *4*  | DMA1 channel 4 | dma1_tcf4 | *0x04*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *5*  | DMA1 channel 5 | dma1_tcf5 | *0x05*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *6*  | DMA1 channel 6 | dma1_tcf6 | *0x06*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *7*  | DMA1 channel 7 | dma1_tcf7 | *0x07*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *8*  | DMA2 channel 0 | dma2_tcf0 | *0x08*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *9*  | DMA2 channel 1 | dma2_tcf1 | *0x09*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *10* | DMA2 channel 2 | dma2_tcf2 | *0x0A*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *11* | DMA2 channel 3 | dma2_tcf3 | *0x0B*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *12* | DMA2 channel 4 | dma2_tcf4 | *0x0C*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *13* | DMA2 channel 5 | dma2_tcf5 | *0x0D*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *14* | DMA2 channel 6 | dma2_tcf6 | *0x0E*     |
+    +--------------+----------------+-----------+------------+
+    | Channel *15* | DMA2 channel 7 | dma2_tcf7 | *0x0F*     |
+    +--------------+----------------+-----------+------------+
+
+  STM32 DMA-MDMA chaining feature then uses a SRAM buffer. STM32MP1 SoCs embed
+  three fast access static internal RAMs of various size, used for data storage.
+  Due to STM32 DMA legacy (within microcontrollers), STM32 DMA performances are
+  bad with DDR, while they are optimal with SRAM. Hence the SRAM buffer used
+  between STM32 DMA and STM32 MDMA. This buffer is split in two equal periods
+  and STM32 DMA uses one period while STM32 MDMA uses the other period
+  simultaneously.
+  ::
+
+                    dma[1:2]-tcf[0:7]
+                   .----------------.
+     ____________ '    _________     V____________
+    | STM32 DMA  |    /  __|>_  \    | STM32 MDMA |
+    |------------|   |  /     \  |   |------------|
+    | DMA_SxM0AR |<=>| | SRAM  | |<=>| []-[]...[] |
+    | DMA_SxM1AR |   |  \_____/  |   |            |
+    |____________|    \___<|____/    |____________|
+
+  STM32 DMA-MDMA chaining uses (struct dma_slave_config).peripheral_config to
+  exchange the parameters needed to configure MDMA. These parameters are
+  gathered into a u32 array with three values:
+
+  * the STM32 MDMA request (which is actually the DMAMUX channel ID),
+  * the address of the STM32 DMA register to clear the Transfer Complete
+    interrupt flag,
+  * the mask of the Transfer Complete interrupt flag of the STM32 DMA channel.
+
+Device Tree updates for STM32 DMA-MDMA chaining support
+-------------------------------------------------------
+
+  **1. Allocate a SRAM buffer**
+
+    SRAM device tree node is defined in SoC device tree. You can refer to it in
+    your board device tree to define your SRAM pool.
+    ::
+
+          &sram {
+                  my_foo_device_dma_pool: dma-sram@0 {
+                          reg = <0x0 0x1000>;
+                  };
+          };
+
+    Be careful of the start index, in case there are other SRAM consumers.
+    Define your pool size strategically: to optimise chaining, the idea is that
+    STM32 DMA and STM32 MDMA can work simultaneously, on each buffer of the
+    SRAM.
+    If the SRAM period is greater than the expected DMA transfer, then STM32 DMA
+    and STM32 MDMA will work sequentially instead of simultaneously. It is not a
+    functional issue but it is not optimal.
+
+    Don't forget to refer to your SRAM pool in your device node. You need to
+    define a new property.
+    ::
+
+          &my_foo_device {
+                  ...
+                  my_dma_pool = &my_foo_device_dma_pool;
+          };
+
+    Then get this SRAM pool in your foo driver and allocate your SRAM buffer.
+
+  **2. Allocate a STM32 DMA channel and a STM32 MDMA channel**
+
+    You need to define an extra channel in your device tree node, in addition to
+    the one you should already have for "classic" DMA operation.
+
+    This new channel must be taken from STM32 MDMA channels, so, the phandle of
+    the DMA controller to use is the MDMA controller's one.
+    ::
+
+          &my_foo_device {
+                  [...]
+                  my_dma_pool = &my_foo_device_dma_pool;
+                  dmas = <&dmamux1 ...>,                // STM32 DMA channel
+                         <&mdma1 0 0x3 0x1200000a 0 0>; // + STM32 MDMA channel
+          };
+
+    Concerning STM32 MDMA bindings:
+
+    1. The request line number : whatever the value here, it will be overwritten
+    by MDMA driver with the STM32 DMAMUX channel ID passed through
+    (struct dma_slave_config).peripheral_config
+
+    2. The priority level : choose Very High (0x3) so that your channel will
+    take priority other the other during request arbitration
+
+    3. A 32bit mask specifying the DMA channel configuration : source and
+    destination address increment, block transfer with 128 bytes per single
+    transfer
+
+    4. The 32bit value specifying the register to be used to acknowledge the
+    request: it will be overwritten by MDMA driver, with the DMA channel
+    interrupt flag clear register address passed through
+    (struct dma_slave_config).peripheral_config
+
+    5. The 32bit mask specifying the value to be written to acknowledge the
+    request: it will be overwritten by MDMA driver, with the DMA channel
+    Transfer Complete flag passed through
+    (struct dma_slave_config).peripheral_config
+
+Driver updates for STM32 DMA-MDMA chaining support in foo driver
+----------------------------------------------------------------
+
+  **0. (optional) Refactor the original sg_table if dmaengine_prep_slave_sg()**
+
+    In case of dmaengine_prep_slave_sg(), the original sg_table can't be used as
+    is. Two new sg_tables must be created from the original one. One for
+    STM32 DMA transfer (where memory address targets now the SRAM buffer instead
+    of DDR buffer) and one for STM32 MDMA transfer (where memory address targets
+    the DDR buffer).
+
+    The new sg_list items must fit SRAM period length. Here is an example for
+    DMA_DEV_TO_MEM:
+    ::
+
+      /*
+        * Assuming sgl and nents, respectively the initial scatterlist and its
+        * length.
+        * Assuming sram_dma_buf and sram_period, respectively the memory
+        * allocated from the pool for DMA usage, and the length of the period,
+        * which is half of the sram_buf size.
+        */
+      struct sg_table new_dma_sgt, new_mdma_sgt;
+      struct scatterlist *s, *_sgl;
+      dma_addr_t ddr_dma_buf;
+      u32 new_nents = 0, len;
+      int i;
+
+      /* Count the number of entries needed */
+      for_each_sg(sgl, s, nents, i)
+              if (sg_dma_len(s) > sram_period)
+                      new_nents += DIV_ROUND_UP(sg_dma_len(s), sram_period);
+              else
+                      new_nents++;
+
+      /* Create sg table for STM32 DMA channel */
+      ret = sg_alloc_table(&new_dma_sgt, new_nents, GFP_ATOMIC);
+      if (ret)
+              dev_err(dev, "DMA sg table alloc failed\n");
+
+      for_each_sg(new_dma_sgt.sgl, s, new_dma_sgt.nents, i) {
+              _sgl = sgl;
+              sg_dma_len(s) = min(sg_dma_len(_sgl), sram_period);
+              /* Targets the beginning = first half of the sram_buf */
+              s->dma_address = sram_buf;
+              /*
+                * Targets the second half of the sram_buf
+                * for odd indexes of the item of the sg_list
+                */
+              if (i & 1)
+                      s->dma_address += sram_period;
+      }
+
+      /* Create sg table for STM32 MDMA channel */
+      ret = sg_alloc_table(&new_mdma_sgt, new_nents, GFP_ATOMIC);
+      if (ret)
+              dev_err(dev, "MDMA sg_table alloc failed\n");
+
+      _sgl = sgl;
+      len = sg_dma_len(sgl);
+      ddr_dma_buf = sg_dma_address(sgl);
+      for_each_sg(mdma_sgt.sgl, s, mdma_sgt.nents, i) {
+              size_t bytes = min_t(size_t, len, sram_period);
+
+              sg_dma_len(s) = bytes;
+              sg_dma_address(s) = ddr_dma_buf;
+              len -= bytes;
+
+              if (!len && sg_next(_sgl)) {
+                      _sgl = sg_next(_sgl);
+                      len = sg_dma_len(_sgl);
+                      ddr_dma_buf = sg_dma_address(_sgl);
+              } else {
+                      ddr_dma_buf += bytes;
+              }
+      }
+
+    Don't forget to release these new sg_tables after getting the descriptors
+    with dmaengine_prep_slave_sg().
+
+  **1. Set controller specific parameters**
+
+    First, use dmaengine_slave_config() with a struct dma_slave_config to
+    configure STM32 DMA channel. You just have to take care of DMA addresses,
+    the memory address (depending on the transfer direction) must point on your
+    SRAM buffer, and set (struct dma_slave_config).peripheral_size != 0.
+
+    STM32 DMA driver will check (struct dma_slave_config).peripheral_size to
+    determine if chaining is being used or not. If it is used, then STM32 DMA
+    driver fills (struct dma_slave_config).peripheral_config with an array of
+    three u32 : the first one containing STM32 DMAMUX channel ID, the second one
+    the channel interrupt flag clear register address, and the third one the
+    channel Transfer Complete flag mask.
+
+    Then, use dmaengine_slave_config with another struct dma_slave_config to
+    configure STM32 MDMA channel. Take care of DMA addresses, the device address
+    (depending on the transfer direction) must point on your SRAM buffer, and
+    the memory address must point to the buffer originally used for "classic"
+    DMA operation. Use the previous (struct dma_slave_config).peripheral_size
+    and .peripheral_config that have been updated by STM32 DMA driver, to set
+    (struct dma_slave_config).peripheral_size and .peripheral_config of the
+    struct dma_slave_config to configure STM32 MDMA channel.
+    ::
+
+      struct dma_slave_config dma_conf;
+      struct dma_slave_config mdma_conf;
+
+      memset(&dma_conf, 0, sizeof(dma_conf));
+      [...]
+      config.direction = DMA_DEV_TO_MEM;
+      config.dst_addr = sram_dma_buf;        // SRAM buffer
+      config.peripheral_size = 1;            // peripheral_size != 0 => chaining
+
+      dmaengine_slave_config(dma_chan, &dma_config);
+
+      memset(&mdma_conf, 0, sizeof(mdma_conf));
+      config.direction = DMA_DEV_TO_MEM;
+      mdma_conf.src_addr = sram_dma_buf;     // SRAM buffer
+      mdma_conf.dst_addr = rx_dma_buf;       // original memory buffer
+      mdma_conf.peripheral_size = dma_conf.peripheral_size;       // <- dma_conf
+      mdma_conf.peripheral_config = dma_config.peripheral_config; // <- dma_conf
+
+      dmaengine_slave_config(mdma_chan, &mdma_conf);
+
+  **2. Get a descriptor for STM32 DMA channel transaction**
+
+    In the same way you get your descriptor for your "classic" DMA operation,
+    you just have to replace the original sg_list (in case of
+    dmaengine_prep_slave_sg()) with the new sg_list using SRAM buffer, or to
+    replace the original buffer address, length and period (in case of
+    dmaengine_prep_dma_cyclic()) with the new SRAM buffer.
+
+  **3. Get a descriptor for STM32 MDMA channel transaction**
+
+    If you previously get descriptor (for STM32 DMA) with
+
+    * dmaengine_prep_slave_sg(), then use dmaengine_prep_slave_sg() for
+      STM32 MDMA;
+    * dmaengine_prep_dma_cyclic(), then use dmaengine_prep_dma_cyclic() for
+      STM32 MDMA.
+
+    Use the new sg_list using SRAM buffer (in case of dmaengine_prep_slave_sg())
+    or, depending on the transfer direction, either the original DDR buffer (in
+    case of DMA_DEV_TO_MEM) or the SRAM buffer (in case of DMA_MEM_TO_DEV), the
+    source address being previously set with dmaengine_slave_config().
+
+  **4. Submit both transactions**
+
+    Before submitting your transactions, you may need to define on which
+    descriptor you want a callback to be called at the end of the transfer
+    (dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()).
+    Depending on the direction, set the callback on the descriptor that finishes
+    the overal transfer:
+
+    * DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor
+    * DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor
+
+    Then, submit the descriptors whatever the order, with dmaengine_tx_submit().
+
+  **5. Issue pending requests (and wait for callback notification)**
+
+  As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue
+  STM32 MDMA channel before STM32 DMA channel.
+
+  If any, your callback will be called to warn you about the end of the overal
+  transfer or the period completion.
+
+  Don't forget to terminate both channels. STM32 DMA channel is configured in
+  cyclic Double-Buffer mode so it won't be disabled by HW, you need to terminate
+  it. STM32 MDMA channel will be stopped by HW in case of sg transfer, but not
+  in case of cyclic transfer. You can terminate it whatever the kind of transfer.
+
+  **STM32 DMA-MDMA chaining DMA_MEM_TO_DEV special case**
+
+  STM32 DMA-MDMA chaining in DMA_MEM_TO_DEV is a special case. Indeed, the
+  STM32 MDMA feeds the SRAM buffer with the DDR data, and the STM32 DMA reads
+  data from SRAM buffer. So some data (the first period) have to be copied in
+  SRAM buffer when the STM32 DMA starts to read.
+
+  A trick could be pausing the STM32 DMA channel (that will raise a Transfer
+  Complete signal, triggering the STM32 MDMA channel), but the first data read
+  by the STM32 DMA could be "wrong". The proper way is to prepare the first SRAM
+  period with dmaengine_prep_dma_memcpy(). Then this first period should be
+  "removed" from the sg or the cyclic transfer.
+
+  Due to this complexity, rather use the STM32 DMA-MDMA chaining for
+  DMA_DEV_TO_MEM and keep the "classic" DMA usage for DMA_MEM_TO_DEV, unless
+  you're not afraid.
+
+Resources
+---------
+
+  Application note, datasheet and reference manual are available on ST website
+  (STM32MP1_).
+
+  Dedicated focus on three application notes (AN5224_, AN4031_ & AN5001_)
+  dealing with STM32 DMAMUX, STM32 DMA and STM32 MDMA.
+
+.. _STM32MP1: https://www.st.com/en/microcontrollers-microprocessors/stm32mp1-series.html
+.. _AN5224: https://www.st.com/resource/en/application_note/an5224-stm32-dmamux-the-dma-request-router-stmicroelectronics.pdf
+.. _AN4031: https://www.st.com/resource/en/application_note/dm00046011-using-the-stm32f2-stm32f4-and-stm32f7-series-dma-controller-stmicroelectronics.pdf
+.. _AN5001: https://www.st.com/resource/en/application_note/an5001-stm32cube-expansion-package-for-stm32h7-series-mdma-stmicroelectronics.pdf
+
+:Authors:
+
+- Amelie Delaunay <amelie.delaunay@foss.st.com>
\ No newline at end of file
diff --git a/Documentation/devicetree/bindings/dma/apple,admac.yaml b/Documentation/devicetree/bindings/dma/apple,admac.yaml
index bdc8c129c4f5..3b1e667f7ea0 100644
--- a/Documentation/devicetree/bindings/dma/apple,admac.yaml
+++ b/Documentation/devicetree/bindings/dma/apple,admac.yaml
@@ -49,6 +49,13 @@ properties:
       in an interrupts-extended list the disconnected positions will contain
       an empty phandle reference <0>.
 
+  iommus:
+    minItems: 1
+    maxItems: 2
+
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/dma/arm,pl330.yaml b/Documentation/devicetree/bindings/dma/arm,pl330.yaml
index 2bec69b308f8..4a3dd6f5309b 100644
--- a/Documentation/devicetree/bindings/dma/arm,pl330.yaml
+++ b/Documentation/devicetree/bindings/dma/arm,pl330.yaml
@@ -55,6 +55,12 @@ properties:
 
   dma-coherent: true
 
+  iommus:
+    minItems: 1
+    maxItems: 9
+    description: Up to 1 IOMMU entry per DMA channel for writes and 1
+      IOMMU entry for reads.
+
   power-domains:
     maxItems: 1
 
diff --git a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml
index 19ea8dcbcbce..9ab4d81ead35 100644
--- a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml
@@ -22,6 +22,7 @@ properties:
       - items:
           - enum:
               - mediatek,mt2712-uart-dma
+              - mediatek,mt6795-uart-dma
               - mediatek,mt8365-uart-dma
               - mediatek,mt8516-uart-dma
           - const: mediatek,mt6577-uart-dma
diff --git a/Documentation/devicetree/bindings/dma/qcom,adm.yaml b/Documentation/devicetree/bindings/dma/qcom,adm.yaml
new file mode 100644
index 000000000000..6a9d7bc74aff
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/qcom,adm.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/qcom,adm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm ADM DMA Controller
+
+maintainers:
+  - Christian Marangi <ansuelsmth@gmail.com>
+  - Bjorn Andersson <bjorn.andersson@linaro.org>
+
+description: |
+  QCOM ADM DMA controller provides DMA capabilities for
+  peripheral buses such as NAND and SPI.
+
+properties:
+  compatible:
+    const: qcom,adm
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  "#dma-cells":
+    const: 1
+
+  clocks:
+    items:
+      - description: phandle to the core clock
+      - description: phandle to the iface clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: iface
+
+  resets:
+    items:
+      - description: phandle to the clk reset
+      - description: phandle to the pbus reset
+      - description: phandle to the c0 reset
+      - description: phandle to the c1 reset
+      - description: phandle to the c2 reset
+
+  reset-names:
+    items:
+      - const: clk
+      - const: pbus
+      - const: c0
+      - const: c1
+      - const: c2
+
+  qcom,ee:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: indicates the security domain identifier used in the secure world.
+    minimum: 0
+    maximum: 255
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - "#dma-cells"
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - qcom,ee
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-ipq806x.h>
+    #include <dt-bindings/reset/qcom,gcc-ipq806x.h>
+
+    adm_dma: dma-controller@18300000 {
+        compatible = "qcom,adm";
+        reg = <0x18300000 0x100000>;
+        interrupts = <0 170 0>;
+        #dma-cells = <1>;
+
+        clocks = <&gcc ADM0_CLK>,
+                  <&gcc ADM0_PBUS_CLK>;
+        clock-names = "core", "iface";
+
+        resets = <&gcc ADM0_RESET>,
+                  <&gcc ADM0_PBUS_RESET>,
+                  <&gcc ADM0_C0_RESET>,
+                  <&gcc ADM0_C1_RESET>,
+                  <&gcc ADM0_C2_RESET>;
+        reset-names = "clk", "pbus", "c0", "c1", "c2";
+        qcom,ee = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
index 9bf3a1b164f1..003098caf709 100644
--- a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml
@@ -8,7 +8,7 @@ title: Qualcomm Technologies Inc BAM DMA controller
 
 maintainers:
   - Andy Gross <agross@kernel.org>
-  - Bjorn Andersson <bjorn.andersson@linaro.org>
+  - Bjorn Andersson <andersson@kernel.org>
 
 allOf:
   - $ref: "dma-controller.yaml#"
@@ -20,7 +20,7 @@ properties:
       - qcom,bam-v1.3.0
         # MSM8974, APQ8074 and APQ8084
       - qcom,bam-v1.4.0
-        # MSM8916
+        # MSM8916 and SDM845
       - qcom,bam-v1.7.0
 
   clocks:
@@ -90,8 +90,8 @@ examples:
 
     dma-controller@f9944000 {
         compatible = "qcom,bam-v1.4.0";
-        reg = <0xf9944000 0x15000>;
-        interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+        reg = <0xf9944000 0x19000>;
+        interrupts = <GIC_SPI 239 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&gcc GCC_BLSP2_AHB_CLK>;
         clock-names = "bam_clk";
         #dma-cells = <1>;
diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 7d2fc4eb5530..eabf8a76d3a0 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -21,6 +21,7 @@ properties:
     enum:
       - qcom,sc7280-gpi-dma
       - qcom,sdm845-gpi-dma
+      - qcom,sm6350-gpi-dma
       - qcom,sm8150-gpi-dma
       - qcom,sm8250-gpi-dma
       - qcom,sm8350-gpi-dma
diff --git a/Documentation/devicetree/bindings/dma/qcom_adm.txt b/Documentation/devicetree/bindings/dma/qcom_adm.txt
deleted file mode 100644
index 9d3b2f917b7b..000000000000
--- a/Documentation/devicetree/bindings/dma/qcom_adm.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-QCOM ADM DMA Controller
-
-Required properties:
-- compatible: must contain "qcom,adm" for IPQ/APQ8064 and MSM8960
-- reg: Address range for DMA registers
-- interrupts: Should contain one interrupt shared by all channels
-- #dma-cells: must be <2>.  First cell denotes the channel number.  Second cell
-  denotes CRCI (client rate control interface) flow control assignment.
-- clocks: Should contain the core clock and interface clock.
-- clock-names: Must contain "core" for the core clock and "iface" for the
-  interface clock.
-- resets: Must contain an entry for each entry in reset names.
-- reset-names: Must include the following entries:
-  - clk
-  - c0
-  - c1
-  - c2
-- qcom,ee: indicates the security domain identifier used in the secure world.
-
-Example:
-		adm_dma: dma@18300000 {
-			compatible = "qcom,adm";
-			reg = <0x18300000 0x100000>;
-			interrupts = <0 170 0>;
-			#dma-cells = <2>;
-
-			clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>;
-			clock-names = "core", "iface";
-
-			resets = <&gcc ADM0_RESET>,
-				<&gcc ADM0_C0_RESET>,
-				<&gcc ADM0_C1_RESET>,
-				<&gcc ADM0_C2_RESET>;
-			reset-names = "clk", "c0", "c1", "c2";
-			qcom,ee = <0>;
-		};
-
-DMA clients must use the format descripted in the dma.txt file, using a three
-cell specifier for each channel.
-
-Each dmas request consists of 3 cells:
- 1. phandle pointing to the DMA controller
- 2. channel number
- 3. CRCI assignment, if applicable.  If no CRCI flow control is required, use 0.
-    The CRCI is used for flow control.  It identifies the peripheral device that
-    is the source/destination for the transferred data.
-
-Example:
-
-	spi4: spi@1a280000 {
-		spi-max-frequency = <50000000>;
-
-		pinctrl-0 = <&spi_pins>;
-		pinctrl-names = "default";
-
-		cs-gpios = <&qcom_pinmux 20 0>;
-
-		dmas = <&adm_dma 6 9>,
-			<&adm_dma 5 10>;
-		dma-names = "rx", "tx";
-	};
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
index 7202cd68e759..89b591a05bce 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
@@ -45,6 +45,7 @@ properties:
           - enum:
               - renesas,dmac-r8a779a0     # R-Car V3U
               - renesas,dmac-r8a779f0     # R-Car S4-8
+              - renesas,dmac-r8a779g0     # R-Car V4H
           - const: renesas,rcar-gen4-dmac # R-Car Gen4
 
   reg: true
diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
index b849a1ed389d..47e477cce6d2 100644
--- a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
+++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt
@@ -4,7 +4,7 @@ Required properties:
 - compatible:	"ti,dra7-dma-crossbar" for DRA7xx DMA crossbar
 		"ti,am335x-edma-crossbar" for AM335x and AM437x
 - reg:		Memory map for accessing module
-- #dma-cells:	Should be set to to match with the DMA controller's dma-cells
+- #dma-cells:	Should be set to match with the DMA controller's dma-cells
 		for ti,dra7-dma-crossbar and <3> for ti,am335x-edma-crossbar.
 - dma-requests:	Number of DMA requests the crossbar can receive
 - dma-masters:	phandle pointing to the DMA controller
diff --git a/MAINTAINERS b/MAINTAINERS
index a79d3f97a914..20fbb44b184e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9157,6 +9157,7 @@ F:	net/dsa/tag_hellcreek.c
 
 HISILICON DMA DRIVER
 M:	Zhou Wang <wangzhou1@hisilicon.com>
+M:	Jie Hai <haijie1@hisilicon.com>
 L:	dmaengine@vger.kernel.org
 S:	Maintained
 F:	drivers/dma/hisi_dma.c
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a06d2a7627aa..7524b62a8870 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -180,7 +180,7 @@ config DMA_SUN6I
 
 config DW_AXI_DMAC
 	tristate "Synopsys DesignWare AXI DMA support"
-	depends on OF || COMPILE_TEST
+	depends on OF
 	depends on HAS_IOMEM
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 487a01aa207d..eea8bd33b4b7 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -2367,7 +2367,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 	INIT_LIST_HEAD(&dmadev->channels);
 
 	/*
-	 * Register as many many memcpy as we have physical channels,
+	 * Register as many memcpy as we have physical channels,
 	 * we won't always be able to use all but the code will have
 	 * to cope with that situation.
 	 */
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index d1f74a3aa999..317ca76ccafd 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -12,8 +12,9 @@
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
-#include <linux/interrupt.h>
+#include <linux/reset.h>
 #include <linux/spinlock.h>
+#include <linux/interrupt.h>
 
 #include "dmaengine.h"
 
@@ -95,7 +96,9 @@ struct admac_data {
 	struct dma_device dma;
 	struct device *dev;
 	__iomem void *base;
+	struct reset_control *rstc;
 
+	int irq;
 	int irq_index;
 	int nchannels;
 	struct admac_chan channels[];
@@ -724,18 +727,17 @@ static int admac_probe(struct platform_device *pdev)
 
 	if (irq < 0)
 		return dev_err_probe(&pdev->dev, irq, "no usable interrupt\n");
-
-	err = devm_request_irq(&pdev->dev, irq, admac_interrupt,
-			       0, dev_name(&pdev->dev), ad);
-	if (err)
-		return dev_err_probe(&pdev->dev, err,
-				     "unable to register interrupt\n");
+	ad->irq = irq;
 
 	ad->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ad->base))
 		return dev_err_probe(&pdev->dev, PTR_ERR(ad->base),
 				     "unable to obtain MMIO resource\n");
 
+	ad->rstc = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
+	if (IS_ERR(ad->rstc))
+		return PTR_ERR(ad->rstc);
+
 	dma = &ad->dma;
 
 	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
@@ -774,17 +776,38 @@ static int admac_probe(struct platform_device *pdev)
 		tasklet_setup(&adchan->tasklet, admac_chan_tasklet);
 	}
 
-	err = dma_async_device_register(&ad->dma);
+	err = reset_control_reset(ad->rstc);
 	if (err)
-		return dev_err_probe(&pdev->dev, err, "failed to register DMA device\n");
+		return dev_err_probe(&pdev->dev, err,
+				     "unable to trigger reset\n");
+
+	err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad);
+	if (err) {
+		dev_err_probe(&pdev->dev, err,
+				"unable to register interrupt\n");
+		goto free_reset;
+	}
+
+	err = dma_async_device_register(&ad->dma);
+	if (err) {
+		dev_err_probe(&pdev->dev, err, "failed to register DMA device\n");
+		goto free_irq;
+	}
 
 	err = of_dma_controller_register(pdev->dev.of_node, admac_dma_of_xlate, ad);
 	if (err) {
 		dma_async_device_unregister(&ad->dma);
-		return dev_err_probe(&pdev->dev, err, "failed to register with OF\n");
+		dev_err_probe(&pdev->dev, err, "failed to register with OF\n");
+		goto free_irq;
 	}
 
 	return 0;
+
+free_irq:
+	free_irq(ad->irq, ad);
+free_reset:
+	reset_control_rearm(ad->rstc);
+	return err;
 }
 
 static int admac_remove(struct platform_device *pdev)
@@ -793,6 +816,8 @@ static int admac_remove(struct platform_device *pdev)
 
 	of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&ad->dma);
+	free_irq(ad->irq, ad);
+	reset_control_rearm(ad->rstc);
 
 	return 0;
 }
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index b102d8eb5d83..d6c9781cd46a 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1470,10 +1470,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	bool			initd;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_COMPLETE)
-		return ret;
-
-	if (!txstate)
+	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
 	spin_lock_irqsave(&atchan->lock, flags);
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index 07f756479663..c54b24ff5206 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/pm_runtime.h>
 #include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -682,15 +681,12 @@ static int dw_edma_alloc_chan_resources(struct dma_chan *dchan)
 	if (chan->status != EDMA_ST_IDLE)
 		return -EBUSY;
 
-	pm_runtime_get(chan->dw->chip->dev);
-
 	return 0;
 }
 
 static void dw_edma_free_chan_resources(struct dma_chan *dchan)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(5000);
-	struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
 	int ret;
 
 	while (time_before(jiffies, timeout)) {
@@ -703,8 +699,6 @@ static void dw_edma_free_chan_resources(struct dma_chan *dchan)
 
 		cpu_relax();
 	}
-
-	pm_runtime_put(chan->dw->chip->dev);
 }
 
 static int dw_edma_channel_setup(struct dw_edma *dw, bool write,
@@ -977,9 +971,6 @@ int dw_edma_probe(struct dw_edma_chip *chip)
 	if (err)
 		goto err_irq_free;
 
-	/* Power management */
-	pm_runtime_enable(dev);
-
 	/* Turn debugfs on */
 	dw_edma_v0_core_debugfs_on(dw);
 
@@ -1009,9 +1000,6 @@ int dw_edma_remove(struct dw_edma_chip *chip)
 	for (i = (dw->nr_irqs - 1); i >= 0; i--)
 		free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]);
 
-	/* Power management */
-	pm_runtime_disable(dev);
-
 	/* Deregister eDMA device */
 	dma_async_device_unregister(&dw->wr_edma);
 	list_for_each_entry_safe(chan, _chan, &dw->wr_edma.channels,
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
index 43817ced3a3e..c1350a36fddd 100644
--- a/drivers/dma/hisi_dma.c
+++ b/drivers/dma/hisi_dma.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2019 HiSilicon Limited. */
+/* Copyright(c) 2019-2022 HiSilicon Limited. */
+
 #include <linux/bitfield.h>
 #include <linux/dmaengine.h>
 #include <linux/init.h>
@@ -9,32 +10,87 @@
 #include <linux/spinlock.h>
 #include "virt-dma.h"
 
-#define HISI_DMA_SQ_BASE_L		0x0
-#define HISI_DMA_SQ_BASE_H		0x4
-#define HISI_DMA_SQ_DEPTH		0x8
-#define HISI_DMA_SQ_TAIL_PTR		0xc
-#define HISI_DMA_CQ_BASE_L		0x10
-#define HISI_DMA_CQ_BASE_H		0x14
-#define HISI_DMA_CQ_DEPTH		0x18
-#define HISI_DMA_CQ_HEAD_PTR		0x1c
-#define HISI_DMA_CTRL0			0x20
-#define HISI_DMA_CTRL0_QUEUE_EN_S	0
-#define HISI_DMA_CTRL0_QUEUE_PAUSE_S	4
-#define HISI_DMA_CTRL1			0x24
-#define HISI_DMA_CTRL1_QUEUE_RESET_S	0
-#define HISI_DMA_Q_FSM_STS		0x30
-#define HISI_DMA_FSM_STS_MASK		GENMASK(3, 0)
-#define HISI_DMA_INT_STS		0x40
-#define HISI_DMA_INT_STS_MASK		GENMASK(12, 0)
-#define HISI_DMA_INT_MSK		0x44
-#define HISI_DMA_MODE			0x217c
-#define HISI_DMA_OFFSET			0x100
-
-#define HISI_DMA_MSI_NUM		32
-#define HISI_DMA_CHAN_NUM		30
-#define HISI_DMA_Q_DEPTH_VAL		1024
-
-#define PCI_BAR_2			2
+/* HiSilicon DMA register common field define */
+#define HISI_DMA_Q_SQ_BASE_L			0x0
+#define HISI_DMA_Q_SQ_BASE_H			0x4
+#define HISI_DMA_Q_SQ_DEPTH			0x8
+#define HISI_DMA_Q_SQ_TAIL_PTR			0xc
+#define HISI_DMA_Q_CQ_BASE_L			0x10
+#define HISI_DMA_Q_CQ_BASE_H			0x14
+#define HISI_DMA_Q_CQ_DEPTH			0x18
+#define HISI_DMA_Q_CQ_HEAD_PTR			0x1c
+#define HISI_DMA_Q_CTRL0			0x20
+#define HISI_DMA_Q_CTRL0_QUEUE_EN		BIT(0)
+#define HISI_DMA_Q_CTRL0_QUEUE_PAUSE		BIT(4)
+#define HISI_DMA_Q_CTRL1			0x24
+#define HISI_DMA_Q_CTRL1_QUEUE_RESET		BIT(0)
+#define HISI_DMA_Q_FSM_STS			0x30
+#define HISI_DMA_Q_FSM_STS_MASK			GENMASK(3, 0)
+#define HISI_DMA_Q_ERR_INT_NUM0			0x84
+#define HISI_DMA_Q_ERR_INT_NUM1			0x88
+#define HISI_DMA_Q_ERR_INT_NUM2			0x8c
+
+/* HiSilicon IP08 DMA register and field define */
+#define HISI_DMA_HIP08_MODE			0x217C
+#define HISI_DMA_HIP08_Q_BASE			0x0
+#define HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN	BIT(2)
+#define HISI_DMA_HIP08_Q_INT_STS		0x40
+#define HISI_DMA_HIP08_Q_INT_MSK		0x44
+#define HISI_DMA_HIP08_Q_INT_STS_MASK		GENMASK(14, 0)
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM3		0x90
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM4		0x94
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM5		0x98
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM6		0x48
+#define HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT	BIT(24)
+
+/* HiSilicon IP09 DMA register and field define */
+#define HISI_DMA_HIP09_DMA_FLR_DISABLE		0xA00
+#define HISI_DMA_HIP09_DMA_FLR_DISABLE_B	BIT(0)
+#define HISI_DMA_HIP09_Q_BASE			0x2000
+#define HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN	GENMASK(31, 28)
+#define HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT		BIT(26)
+#define HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT		BIT(27)
+#define HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE	BIT(2)
+#define HISI_DMA_HIP09_Q_INT_STS		0x40
+#define HISI_DMA_HIP09_Q_INT_MSK		0x44
+#define HISI_DMA_HIP09_Q_INT_STS_MASK		0x1
+#define HISI_DMA_HIP09_Q_ERR_INT_STS		0x48
+#define HISI_DMA_HIP09_Q_ERR_INT_MSK		0x4C
+#define HISI_DMA_HIP09_Q_ERR_INT_STS_MASK	GENMASK(18, 1)
+#define HISI_DMA_HIP09_PORT_CFG_REG(port_id)	(0x800 + \
+						(port_id) * 0x20)
+#define HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B	BIT(16)
+
+#define HISI_DMA_HIP09_MAX_PORT_NUM		16
+
+#define HISI_DMA_HIP08_MSI_NUM			32
+#define HISI_DMA_HIP08_CHAN_NUM			30
+#define HISI_DMA_HIP09_MSI_NUM			4
+#define HISI_DMA_HIP09_CHAN_NUM			4
+#define HISI_DMA_REVISION_HIP08B		0x21
+#define HISI_DMA_REVISION_HIP09A		0x30
+
+#define HISI_DMA_Q_OFFSET			0x100
+#define HISI_DMA_Q_DEPTH_VAL			1024
+
+#define PCI_BAR_2				2
+
+#define HISI_DMA_POLL_Q_STS_DELAY_US		10
+#define HISI_DMA_POLL_Q_STS_TIME_OUT_US		1000
+
+#define HISI_DMA_MAX_DIR_NAME_LEN		128
+
+/*
+ * The HIP08B(HiSilicon IP08) and HIP09A(HiSilicon IP09) are DMA iEPs, they
+ * have the same pci device id but different pci revision.
+ * Unfortunately, they have different register layouts, so two layout
+ * enumerations are defined.
+ */
+enum hisi_dma_reg_layout {
+	HISI_DMA_REG_LAYOUT_INVALID = 0,
+	HISI_DMA_REG_LAYOUT_HIP08,
+	HISI_DMA_REG_LAYOUT_HIP09
+};
 
 enum hisi_dma_mode {
 	EP = 0,
@@ -105,9 +161,162 @@ struct hisi_dma_dev {
 	struct dma_device dma_dev;
 	u32 chan_num;
 	u32 chan_depth;
+	enum hisi_dma_reg_layout reg_layout;
+	void __iomem *queue_base; /* queue region start of register */
 	struct hisi_dma_chan chan[];
 };
 
+#ifdef CONFIG_DEBUG_FS
+
+static const struct debugfs_reg32 hisi_dma_comm_chan_regs[] = {
+	{"DMA_QUEUE_SQ_DEPTH                ", 0x0008ull},
+	{"DMA_QUEUE_SQ_TAIL_PTR             ", 0x000Cull},
+	{"DMA_QUEUE_CQ_DEPTH                ", 0x0018ull},
+	{"DMA_QUEUE_CQ_HEAD_PTR             ", 0x001Cull},
+	{"DMA_QUEUE_CTRL0                   ", 0x0020ull},
+	{"DMA_QUEUE_CTRL1                   ", 0x0024ull},
+	{"DMA_QUEUE_FSM_STS                 ", 0x0030ull},
+	{"DMA_QUEUE_SQ_STS                  ", 0x0034ull},
+	{"DMA_QUEUE_CQ_TAIL_PTR             ", 0x003Cull},
+	{"DMA_QUEUE_INT_STS                 ", 0x0040ull},
+	{"DMA_QUEUE_INT_MSK                 ", 0x0044ull},
+	{"DMA_QUEUE_INT_RO                  ", 0x006Cull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip08_chan_regs[] = {
+	{"DMA_QUEUE_BYTE_CNT                ", 0x0038ull},
+	{"DMA_ERR_INT_NUM6                  ", 0x0048ull},
+	{"DMA_QUEUE_DESP0                   ", 0x0050ull},
+	{"DMA_QUEUE_DESP1                   ", 0x0054ull},
+	{"DMA_QUEUE_DESP2                   ", 0x0058ull},
+	{"DMA_QUEUE_DESP3                   ", 0x005Cull},
+	{"DMA_QUEUE_DESP4                   ", 0x0074ull},
+	{"DMA_QUEUE_DESP5                   ", 0x0078ull},
+	{"DMA_QUEUE_DESP6                   ", 0x007Cull},
+	{"DMA_QUEUE_DESP7                   ", 0x0080ull},
+	{"DMA_ERR_INT_NUM0                  ", 0x0084ull},
+	{"DMA_ERR_INT_NUM1                  ", 0x0088ull},
+	{"DMA_ERR_INT_NUM2                  ", 0x008Cull},
+	{"DMA_ERR_INT_NUM3                  ", 0x0090ull},
+	{"DMA_ERR_INT_NUM4                  ", 0x0094ull},
+	{"DMA_ERR_INT_NUM5                  ", 0x0098ull},
+	{"DMA_QUEUE_SQ_STS2                 ", 0x00A4ull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip09_chan_regs[] = {
+	{"DMA_QUEUE_ERR_INT_STS             ", 0x0048ull},
+	{"DMA_QUEUE_ERR_INT_MSK             ", 0x004Cull},
+	{"DFX_SQ_READ_ERR_PTR               ", 0x0068ull},
+	{"DFX_DMA_ERR_INT_NUM0              ", 0x0084ull},
+	{"DFX_DMA_ERR_INT_NUM1              ", 0x0088ull},
+	{"DFX_DMA_ERR_INT_NUM2              ", 0x008Cull},
+	{"DFX_DMA_QUEUE_SQ_STS2             ", 0x00A4ull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip08_comm_regs[] = {
+	{"DMA_ECC_ERR_ADDR                  ", 0x2004ull},
+	{"DMA_ECC_ECC_CNT                   ", 0x2014ull},
+	{"COMMON_AND_CH_ERR_STS             ", 0x2030ull},
+	{"LOCAL_CPL_ID_STS_0                ", 0x20E0ull},
+	{"LOCAL_CPL_ID_STS_1                ", 0x20E4ull},
+	{"LOCAL_CPL_ID_STS_2                ", 0x20E8ull},
+	{"LOCAL_CPL_ID_STS_3                ", 0x20ECull},
+	{"LOCAL_TLP_NUM                     ", 0x2158ull},
+	{"SQCQ_TLP_NUM                      ", 0x2164ull},
+	{"CPL_NUM                           ", 0x2168ull},
+	{"INF_BACK_PRESS_STS                ", 0x2170ull},
+	{"DMA_CH_RAS_LEVEL                  ", 0x2184ull},
+	{"DMA_CM_RAS_LEVEL                  ", 0x2188ull},
+	{"DMA_CH_ERR_STS                    ", 0x2190ull},
+	{"DMA_CH_DONE_STS                   ", 0x2194ull},
+	{"DMA_SQ_TAG_STS_0                  ", 0x21A0ull},
+	{"DMA_SQ_TAG_STS_1                  ", 0x21A4ull},
+	{"DMA_SQ_TAG_STS_2                  ", 0x21A8ull},
+	{"DMA_SQ_TAG_STS_3                  ", 0x21ACull},
+	{"LOCAL_P_ID_STS_0                  ", 0x21B0ull},
+	{"LOCAL_P_ID_STS_1                  ", 0x21B4ull},
+	{"LOCAL_P_ID_STS_2                  ", 0x21B8ull},
+	{"LOCAL_P_ID_STS_3                  ", 0x21BCull},
+	{"DMA_PREBUFF_INFO_0                ", 0x2200ull},
+	{"DMA_CM_TABLE_INFO_0               ", 0x2220ull},
+	{"DMA_CM_CE_RO                      ", 0x2244ull},
+	{"DMA_CM_NFE_RO                     ", 0x2248ull},
+	{"DMA_CM_FE_RO                      ", 0x224Cull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip09_comm_regs[] = {
+	{"COMMON_AND_CH_ERR_STS             ", 0x0030ull},
+	{"DMA_PORT_IDLE_STS                 ", 0x0150ull},
+	{"DMA_CH_RAS_LEVEL                  ", 0x0184ull},
+	{"DMA_CM_RAS_LEVEL                  ", 0x0188ull},
+	{"DMA_CM_CE_RO                      ", 0x0244ull},
+	{"DMA_CM_NFE_RO                     ", 0x0248ull},
+	{"DMA_CM_FE_RO                      ", 0x024Cull},
+	{"DFX_INF_BACK_PRESS_STS0           ", 0x1A40ull},
+	{"DFX_INF_BACK_PRESS_STS1           ", 0x1A44ull},
+	{"DFX_INF_BACK_PRESS_STS2           ", 0x1A48ull},
+	{"DFX_DMA_WRR_DISABLE               ", 0x1A4Cull},
+	{"DFX_PA_REQ_TLP_NUM                ", 0x1C00ull},
+	{"DFX_PA_BACK_TLP_NUM               ", 0x1C04ull},
+	{"DFX_PA_RETRY_TLP_NUM              ", 0x1C08ull},
+	{"DFX_LOCAL_NP_TLP_NUM              ", 0x1C0Cull},
+	{"DFX_LOCAL_CPL_HEAD_TLP_NUM        ", 0x1C10ull},
+	{"DFX_LOCAL_CPL_DATA_TLP_NUM        ", 0x1C14ull},
+	{"DFX_LOCAL_CPL_EXT_DATA_TLP_NUM    ", 0x1C18ull},
+	{"DFX_LOCAL_P_HEAD_TLP_NUM          ", 0x1C1Cull},
+	{"DFX_LOCAL_P_ACK_TLP_NUM           ", 0x1C20ull},
+	{"DFX_BUF_ALOC_PORT_REQ_NUM         ", 0x1C24ull},
+	{"DFX_BUF_ALOC_PORT_RESULT_NUM      ", 0x1C28ull},
+	{"DFX_BUF_FAIL_SIZE_NUM             ", 0x1C2Cull},
+	{"DFX_BUF_ALOC_SIZE_NUM             ", 0x1C30ull},
+	{"DFX_BUF_NP_RELEASE_SIZE_NUM       ", 0x1C34ull},
+	{"DFX_BUF_P_RELEASE_SIZE_NUM        ", 0x1C38ull},
+	{"DFX_BUF_PORT_RELEASE_SIZE_NUM     ", 0x1C3Cull},
+	{"DFX_DMA_PREBUF_MEM0_ECC_ERR_ADDR  ", 0x1CA8ull},
+	{"DFX_DMA_PREBUF_MEM0_ECC_CNT       ", 0x1CACull},
+	{"DFX_DMA_LOC_NP_OSTB_ECC_ERR_ADDR  ", 0x1CB0ull},
+	{"DFX_DMA_LOC_NP_OSTB_ECC_CNT       ", 0x1CB4ull},
+	{"DFX_DMA_PREBUF_MEM1_ECC_ERR_ADDR  ", 0x1CC0ull},
+	{"DFX_DMA_PREBUF_MEM1_ECC_CNT       ", 0x1CC4ull},
+	{"DMA_CH_DONE_STS                   ", 0x02E0ull},
+	{"DMA_CH_ERR_STS                    ", 0x0320ull},
+};
+#endif /* CONFIG_DEBUG_FS*/
+
+static enum hisi_dma_reg_layout hisi_dma_get_reg_layout(struct pci_dev *pdev)
+{
+	if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+		return HISI_DMA_REG_LAYOUT_HIP08;
+	else if (pdev->revision >= HISI_DMA_REVISION_HIP09A)
+		return HISI_DMA_REG_LAYOUT_HIP09;
+
+	return HISI_DMA_REG_LAYOUT_INVALID;
+}
+
+static u32 hisi_dma_get_chan_num(struct pci_dev *pdev)
+{
+	if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+		return HISI_DMA_HIP08_CHAN_NUM;
+
+	return HISI_DMA_HIP09_CHAN_NUM;
+}
+
+static u32 hisi_dma_get_msi_num(struct pci_dev *pdev)
+{
+	if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+		return HISI_DMA_HIP08_MSI_NUM;
+
+	return HISI_DMA_HIP09_MSI_NUM;
+}
+
+static u32 hisi_dma_get_queue_base(struct pci_dev *pdev)
+{
+	if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+		return HISI_DMA_HIP08_Q_BASE;
+
+	return HISI_DMA_HIP09_Q_BASE;
+}
+
 static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c)
 {
 	return container_of(c, struct hisi_dma_chan, vc.chan);
@@ -121,7 +330,7 @@ static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd)
 static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index,
 				       u32 val)
 {
-	writel_relaxed(val, base + reg + index * HISI_DMA_OFFSET);
+	writel_relaxed(val, base + reg + index * HISI_DMA_Q_OFFSET);
 }
 
 static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
@@ -129,70 +338,103 @@ static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
 	u32 tmp;
 
 	tmp = readl_relaxed(addr);
-	tmp = val ? tmp | BIT(pos) : tmp & ~BIT(pos);
+	tmp = val ? tmp | pos : tmp & ~pos;
 	writel_relaxed(tmp, addr);
 }
 
 static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
 			       bool pause)
 {
-	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
-			     HISI_DMA_OFFSET;
+	void __iomem *addr;
 
-	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_PAUSE_S, pause);
+	addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 +
+	       index * HISI_DMA_Q_OFFSET;
+	hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_PAUSE, pause);
 }
 
 static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index,
 				bool enable)
 {
-	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL0 + index *
-			     HISI_DMA_OFFSET;
+	void __iomem *addr;
 
-	hisi_dma_update_bit(addr, HISI_DMA_CTRL0_QUEUE_EN_S, enable);
+	addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 +
+	       index * HISI_DMA_Q_OFFSET;
+	hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_EN, enable);
 }
 
 static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
 {
-	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_INT_MSK, qp_index,
-			    HISI_DMA_INT_STS_MASK);
+	void __iomem *q_base = hdma_dev->queue_base;
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK,
+				    qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK);
+	else {
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK,
+				    qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK,
+				    qp_index,
+				    HISI_DMA_HIP09_Q_ERR_INT_STS_MASK);
+	}
 }
 
 static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
 {
-	void __iomem *base = hdma_dev->base;
-
-	hisi_dma_chan_write(base, HISI_DMA_INT_STS, qp_index,
-			    HISI_DMA_INT_STS_MASK);
-	hisi_dma_chan_write(base, HISI_DMA_INT_MSK, qp_index, 0);
+	void __iomem *q_base = hdma_dev->queue_base;
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_STS,
+				    qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK,
+				    qp_index, 0);
+	} else {
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_STS,
+				    qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_STS,
+				    qp_index,
+				    HISI_DMA_HIP09_Q_ERR_INT_STS_MASK);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK,
+				    qp_index, 0);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK,
+				    qp_index, 0);
+	}
 }
 
 static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index)
 {
-	void __iomem *addr = hdma_dev->base + HISI_DMA_CTRL1 + index *
-			     HISI_DMA_OFFSET;
+	void __iomem *addr;
 
-	hisi_dma_update_bit(addr, HISI_DMA_CTRL1_QUEUE_RESET_S, 1);
+	addr = hdma_dev->queue_base +
+	       HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET;
+	hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL1_QUEUE_RESET, 1);
 }
 
 static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index)
 {
-	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, index, 0);
-	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+	void __iomem *q_base = hdma_dev->queue_base;
+
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0);
 }
 
-static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
+static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan,
+					      bool disable)
 {
 	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
 	u32 index = chan->qp_num, tmp;
+	void __iomem *addr;
 	int ret;
 
 	hisi_dma_pause_dma(hdma_dev, index, true);
 	hisi_dma_enable_dma(hdma_dev, index, false);
 	hisi_dma_mask_irq(hdma_dev, index);
 
-	ret = readl_relaxed_poll_timeout(hdma_dev->base +
-		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
-		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) != RUN, 10, 1000);
+	addr = hdma_dev->queue_base +
+	       HISI_DMA_Q_FSM_STS + index * HISI_DMA_Q_OFFSET;
+
+	ret = readl_relaxed_poll_timeout(addr, tmp,
+		FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) != RUN,
+		HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US);
 	if (ret) {
 		dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n");
 		WARN_ON(1);
@@ -201,12 +443,15 @@ static void hisi_dma_reset_hw_chan(struct hisi_dma_chan *chan)
 	hisi_dma_do_reset(hdma_dev, index);
 	hisi_dma_reset_qp_point(hdma_dev, index);
 	hisi_dma_pause_dma(hdma_dev, index, false);
-	hisi_dma_enable_dma(hdma_dev, index, true);
-	hisi_dma_unmask_irq(hdma_dev, index);
 
-	ret = readl_relaxed_poll_timeout(hdma_dev->base +
-		HISI_DMA_Q_FSM_STS + index * HISI_DMA_OFFSET, tmp,
-		FIELD_GET(HISI_DMA_FSM_STS_MASK, tmp) == IDLE, 10, 1000);
+	if (!disable) {
+		hisi_dma_enable_dma(hdma_dev, index, true);
+		hisi_dma_unmask_irq(hdma_dev, index);
+	}
+
+	ret = readl_relaxed_poll_timeout(addr, tmp,
+		FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) == IDLE,
+		HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US);
 	if (ret) {
 		dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n");
 		WARN_ON(1);
@@ -218,7 +463,7 @@ static void hisi_dma_free_chan_resources(struct dma_chan *c)
 	struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
 	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
 
-	hisi_dma_reset_hw_chan(chan);
+	hisi_dma_reset_or_disable_hw_chan(chan, false);
 	vchan_free_chan_resources(&chan->vc);
 
 	memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth);
@@ -267,7 +512,6 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
 
 	vd = vchan_next_desc(&chan->vc);
 	if (!vd) {
-		dev_err(&hdma_dev->pdev->dev, "no issued task!\n");
 		chan->desc = NULL;
 		return;
 	}
@@ -288,8 +532,8 @@ static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
 	chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth;
 
 	/* update sq_tail to trigger a new task */
-	hisi_dma_chan_write(hdma_dev->base, HISI_DMA_SQ_TAIL_PTR, chan->qp_num,
-			    chan->sq_tail);
+	hisi_dma_chan_write(hdma_dev->queue_base, HISI_DMA_Q_SQ_TAIL_PTR,
+			    chan->qp_num, chan->sq_tail);
 }
 
 static void hisi_dma_issue_pending(struct dma_chan *c)
@@ -299,7 +543,7 @@ static void hisi_dma_issue_pending(struct dma_chan *c)
 
 	spin_lock_irqsave(&chan->vc.lock, flags);
 
-	if (vchan_issue_pending(&chan->vc))
+	if (vchan_issue_pending(&chan->vc) && !chan->desc)
 		hisi_dma_start_transfer(chan);
 
 	spin_unlock_irqrestore(&chan->vc.lock, flags);
@@ -363,26 +607,86 @@ static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev)
 static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index)
 {
 	struct hisi_dma_chan *chan = &hdma_dev->chan[index];
+	void __iomem *q_base = hdma_dev->queue_base;
 	u32 hw_depth = hdma_dev->chan_depth - 1;
-	void __iomem *base = hdma_dev->base;
+	void __iomem *addr;
+	u32 tmp;
 
 	/* set sq, cq base */
-	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_L, index,
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_L, index,
 			    lower_32_bits(chan->sq_dma));
-	hisi_dma_chan_write(base, HISI_DMA_SQ_BASE_H, index,
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_H, index,
 			    upper_32_bits(chan->sq_dma));
-	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_L, index,
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_L, index,
 			    lower_32_bits(chan->cq_dma));
-	hisi_dma_chan_write(base, HISI_DMA_CQ_BASE_H, index,
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_H, index,
 			    upper_32_bits(chan->cq_dma));
 
 	/* set sq, cq depth */
-	hisi_dma_chan_write(base, HISI_DMA_SQ_DEPTH, index, hw_depth);
-	hisi_dma_chan_write(base, HISI_DMA_CQ_DEPTH, index, hw_depth);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_DEPTH, index, hw_depth);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_DEPTH, index, hw_depth);
 
 	/* init sq tail and cq head */
-	hisi_dma_chan_write(base, HISI_DMA_SQ_TAIL_PTR, index, 0);
-	hisi_dma_chan_write(base, HISI_DMA_CQ_HEAD_PTR, index, 0);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0);
+
+	/* init error interrupt stats */
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM0, index, 0);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM1, index, 0);
+	hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM2, index, 0);
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM3,
+				    index, 0);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM4,
+				    index, 0);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM5,
+				    index, 0);
+		hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM6,
+				    index, 0);
+		/*
+		 * init SQ/CQ direction selecting register.
+		 * "0" is to local side and "1" is to remote side.
+		 */
+		addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET;
+		hisi_dma_update_bit(addr, HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT, 0);
+
+		/*
+		 * 0 - Continue to next descriptor if error occurs.
+		 * 1 - Abort the DMA queue if error occurs.
+		 */
+		hisi_dma_update_bit(addr,
+				    HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN, 0);
+	} else {
+		addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET;
+
+		/*
+		 * init SQ/CQ direction selecting register.
+		 * "0" is to local side and "1" is to remote side.
+		 */
+		hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT, 0);
+		hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT, 0);
+
+		/*
+		 * 0 - Continue to next descriptor if error occurs.
+		 * 1 - Abort the DMA queue if error occurs.
+		 */
+
+		tmp = readl_relaxed(addr);
+		tmp &= ~HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN;
+		writel_relaxed(tmp, addr);
+
+		/*
+		 * 0 - dma should process FLR whith CPU.
+		 * 1 - dma not process FLR, only cpu process FLR.
+		 */
+		addr = q_base + HISI_DMA_HIP09_DMA_FLR_DISABLE +
+		       index * HISI_DMA_Q_OFFSET;
+		hisi_dma_update_bit(addr, HISI_DMA_HIP09_DMA_FLR_DISABLE_B, 0);
+
+		addr = q_base + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET;
+		hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE, 1);
+	}
 }
 
 static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
@@ -394,7 +698,7 @@ static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
 
 static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
 {
-	hisi_dma_reset_hw_chan(&hdma_dev->chan[qp_index]);
+	hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true);
 }
 
 static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev)
@@ -426,24 +730,23 @@ static irqreturn_t hisi_dma_irq(int irq, void *data)
 	struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
 	struct hisi_dma_desc *desc;
 	struct hisi_dma_cqe *cqe;
+	void __iomem *q_base;
 
 	spin_lock(&chan->vc.lock);
 
 	desc = chan->desc;
 	cqe = chan->cq + chan->cq_head;
+	q_base = hdma_dev->queue_base;
 	if (desc) {
+		chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth;
+		hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR,
+				    chan->qp_num, chan->cq_head);
 		if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) {
-			chan->cq_head = (chan->cq_head + 1) %
-					hdma_dev->chan_depth;
-			hisi_dma_chan_write(hdma_dev->base,
-					    HISI_DMA_CQ_HEAD_PTR, chan->qp_num,
-					    chan->cq_head);
 			vchan_cookie_complete(&desc->vd);
+			hisi_dma_start_transfer(chan);
 		} else {
 			dev_err(&hdma_dev->pdev->dev, "task error!\n");
 		}
-
-		chan->desc = NULL;
 	}
 
 	spin_unlock(&chan->vc.lock);
@@ -497,16 +800,169 @@ static void hisi_dma_disable_hw_channels(void *data)
 static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev,
 			      enum hisi_dma_mode mode)
 {
-	writel_relaxed(mode == RC ? 1 : 0, hdma_dev->base + HISI_DMA_MODE);
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+		writel_relaxed(mode == RC ? 1 : 0,
+			       hdma_dev->base + HISI_DMA_HIP08_MODE);
 }
 
+static void hisi_dma_init_hw(struct hisi_dma_dev *hdma_dev)
+{
+	void __iomem *addr;
+	int i;
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP09) {
+		for (i = 0; i < HISI_DMA_HIP09_MAX_PORT_NUM; i++) {
+			addr = hdma_dev->base + HISI_DMA_HIP09_PORT_CFG_REG(i);
+			hisi_dma_update_bit(addr,
+				HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B, 1);
+		}
+	}
+}
+
+static void hisi_dma_init_dma_dev(struct hisi_dma_dev *hdma_dev)
+{
+	struct dma_device *dma_dev;
+
+	dma_dev = &hdma_dev->dma_dev;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy;
+	dma_dev->device_tx_status = hisi_dma_tx_status;
+	dma_dev->device_issue_pending = hisi_dma_issue_pending;
+	dma_dev->device_terminate_all = hisi_dma_terminate_all;
+	dma_dev->device_synchronize = hisi_dma_synchronize;
+	dma_dev->directions = BIT(DMA_MEM_TO_MEM);
+	dma_dev->dev = &hdma_dev->pdev->dev;
+	INIT_LIST_HEAD(&dma_dev->channels);
+}
+
+/* --- debugfs implementation --- */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+static struct debugfs_reg32 *hisi_dma_get_ch_regs(struct hisi_dma_dev *hdma_dev,
+						  u32 *regs_sz)
+{
+	struct device *dev = &hdma_dev->pdev->dev;
+	struct debugfs_reg32 *regs;
+	u32 regs_sz_comm;
+
+	regs_sz_comm = ARRAY_SIZE(hisi_dma_comm_chan_regs);
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+		*regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip08_chan_regs);
+	else
+		*regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip09_chan_regs);
+
+	regs = devm_kcalloc(dev, *regs_sz, sizeof(struct debugfs_reg32),
+			    GFP_KERNEL);
+	if (!regs)
+		return NULL;
+	memcpy(regs, hisi_dma_comm_chan_regs, sizeof(hisi_dma_comm_chan_regs));
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+		memcpy(regs + regs_sz_comm, hisi_dma_hip08_chan_regs,
+		       sizeof(hisi_dma_hip08_chan_regs));
+	else
+		memcpy(regs + regs_sz_comm, hisi_dma_hip09_chan_regs,
+		       sizeof(hisi_dma_hip09_chan_regs));
+
+	return regs;
+}
+
+static int hisi_dma_create_chan_dir(struct hisi_dma_dev *hdma_dev)
+{
+	char dir_name[HISI_DMA_MAX_DIR_NAME_LEN];
+	struct debugfs_regset32 *regsets;
+	struct debugfs_reg32 *regs;
+	struct dentry *chan_dir;
+	struct device *dev;
+	u32 regs_sz;
+	int ret;
+	int i;
+
+	dev = &hdma_dev->pdev->dev;
+
+	regsets = devm_kcalloc(dev, hdma_dev->chan_num,
+			       sizeof(*regsets), GFP_KERNEL);
+	if (!regsets)
+		return -ENOMEM;
+
+	regs = hisi_dma_get_ch_regs(hdma_dev, &regs_sz);
+	if (!regs)
+		return -ENOMEM;
+
+	for (i = 0; i < hdma_dev->chan_num; i++) {
+		regsets[i].regs = regs;
+		regsets[i].nregs = regs_sz;
+		regsets[i].base = hdma_dev->queue_base + i * HISI_DMA_Q_OFFSET;
+		regsets[i].dev = dev;
+
+		memset(dir_name, 0, HISI_DMA_MAX_DIR_NAME_LEN);
+		ret = sprintf(dir_name, "channel%d", i);
+		if (ret < 0)
+			return ret;
+
+		chan_dir = debugfs_create_dir(dir_name,
+					      hdma_dev->dma_dev.dbg_dev_root);
+		debugfs_create_regset32("regs", 0444, chan_dir, &regsets[i]);
+	}
+
+	return 0;
+}
+
+static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev)
+{
+	struct debugfs_regset32 *regset;
+	struct device *dev;
+	int ret;
+
+	dev = &hdma_dev->pdev->dev;
+
+	if (hdma_dev->dma_dev.dbg_dev_root == NULL)
+		return;
+
+	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return;
+
+	if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+		regset->regs = hisi_dma_hip08_comm_regs;
+		regset->nregs = ARRAY_SIZE(hisi_dma_hip08_comm_regs);
+	} else {
+		regset->regs = hisi_dma_hip09_comm_regs;
+		regset->nregs = ARRAY_SIZE(hisi_dma_hip09_comm_regs);
+	}
+	regset->base = hdma_dev->base;
+	regset->dev = dev;
+
+	debugfs_create_regset32("regs", 0444,
+				hdma_dev->dma_dev.dbg_dev_root, regset);
+
+	ret = hisi_dma_create_chan_dir(hdma_dev);
+	if (ret < 0)
+		dev_info(&hdma_dev->pdev->dev, "fail to create debugfs for channels!\n");
+}
+#else
+static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) { }
+#endif /* CONFIG_DEBUG_FS*/
+/* --- debugfs implementation --- */
+
 static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	enum hisi_dma_reg_layout reg_layout;
 	struct device *dev = &pdev->dev;
 	struct hisi_dma_dev *hdma_dev;
 	struct dma_device *dma_dev;
+	u32 chan_num;
+	u32 msi_num;
 	int ret;
 
+	reg_layout = hisi_dma_get_reg_layout(pdev);
+	if (reg_layout == HISI_DMA_REG_LAYOUT_INVALID) {
+		dev_err(dev, "unsupported device!\n");
+		return -EINVAL;
+	}
+
 	ret = pcim_enable_device(pdev);
 	if (ret) {
 		dev_err(dev, "failed to enable device mem!\n");
@@ -523,40 +979,37 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		return ret;
 
-	hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, HISI_DMA_CHAN_NUM), GFP_KERNEL);
+	chan_num = hisi_dma_get_chan_num(pdev);
+	hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, chan_num),
+				GFP_KERNEL);
 	if (!hdma_dev)
 		return -EINVAL;
 
 	hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2];
 	hdma_dev->pdev = pdev;
-	hdma_dev->chan_num = HISI_DMA_CHAN_NUM;
 	hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL;
+	hdma_dev->chan_num = chan_num;
+	hdma_dev->reg_layout = reg_layout;
+	hdma_dev->queue_base = hdma_dev->base + hisi_dma_get_queue_base(pdev);
 
 	pci_set_drvdata(pdev, hdma_dev);
 	pci_set_master(pdev);
 
+	msi_num = hisi_dma_get_msi_num(pdev);
+
 	/* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */
-	ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM,
-				    PCI_IRQ_MSI);
+	ret = pci_alloc_irq_vectors(pdev, msi_num, msi_num, PCI_IRQ_MSI);
 	if (ret < 0) {
 		dev_err(dev, "Failed to allocate MSI vectors!\n");
 		return ret;
 	}
 
-	dma_dev = &hdma_dev->dma_dev;
-	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
-	dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
-	dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy;
-	dma_dev->device_tx_status = hisi_dma_tx_status;
-	dma_dev->device_issue_pending = hisi_dma_issue_pending;
-	dma_dev->device_terminate_all = hisi_dma_terminate_all;
-	dma_dev->device_synchronize = hisi_dma_synchronize;
-	dma_dev->directions = BIT(DMA_MEM_TO_MEM);
-	dma_dev->dev = dev;
-	INIT_LIST_HEAD(&dma_dev->channels);
+	hisi_dma_init_dma_dev(hdma_dev);
 
 	hisi_dma_set_mode(hdma_dev, RC);
 
+	hisi_dma_init_hw(hdma_dev);
+
 	ret = hisi_dma_enable_hw_channels(hdma_dev);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable hw channel!\n");
@@ -568,11 +1021,16 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		return ret;
 
+	dma_dev = &hdma_dev->dma_dev;
 	ret = dmaenginem_async_device_register(dma_dev);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(dev, "failed to register device!\n");
+		return ret;
+	}
+
+	hisi_dma_create_debugfs(hdma_dev);
 
-	return ret;
+	return 0;
 }
 
 static const struct pci_device_id hisi_dma_pci_tbl[] = {
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 92caae55aece..af5a2e252c25 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -16,12 +16,20 @@
  *    port 3, and so on.
  */
 
+#include <linux/bits.h>
 #include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
 #include <linux/module.h>
+#include <linux/percpu-defs.h>
+#include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
 
 #include "hsu.h"
 
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index 9e5956345748..3bca577b98a1 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -10,7 +10,11 @@
 #ifndef __DMA_HSU_H__
 #define __DMA_HSU_H__
 
-#include <linux/spinlock.h>
+#include <linux/bits.h>
+#include <linux/container_of.h>
+#include <linux/io.h>
+#include <linux/types.h>
+
 #include <linux/dma/hsu.h>
 
 #include "../virt-dma.h"
@@ -36,11 +40,11 @@
 
 /* Bits in HSU_CH_SR */
 #define HSU_CH_SR_DESCTO(x)	BIT(8 + (x))
-#define HSU_CH_SR_DESCTO_ANY	(BIT(11) | BIT(10) | BIT(9) | BIT(8))
+#define HSU_CH_SR_DESCTO_ANY	GENMASK(11, 8)
 #define HSU_CH_SR_CHE		BIT(15)
 #define HSU_CH_SR_DESCE(x)	BIT(16 + (x))
-#define HSU_CH_SR_DESCE_ANY	(BIT(19) | BIT(18) | BIT(17) | BIT(16))
-#define HSU_CH_SR_CDESC_ANY	(BIT(31) | BIT(30))
+#define HSU_CH_SR_DESCE_ANY	GENMASK(19, 16)
+#define HSU_CH_SR_CDESC_ANY	GENMASK(31, 30)
 
 /* Bits in HSU_CH_CR */
 #define HSU_CH_CR_CHA		BIT(0)
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
index 6a2df3dd78d0..0fcc0c0c22fc 100644
--- a/drivers/dma/hsu/pci.c
+++ b/drivers/dma/hsu/pci.c
@@ -10,6 +10,7 @@
 
 #include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 
@@ -26,29 +27,32 @@
 static irqreturn_t hsu_pci_irq(int irq, void *dev)
 {
 	struct hsu_dma_chip *chip = dev;
-	u32 dmaisr;
-	u32 status;
+	unsigned long dmaisr;
 	unsigned short i;
+	u32 status;
 	int ret = 0;
 	int err;
 
 	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
-	for (i = 0; i < chip->hsu->nr_channels; i++) {
-		if (dmaisr & 0x1) {
-			err = hsu_dma_get_status(chip, i, &status);
-			if (err > 0)
-				ret |= 1;
-			else if (err == 0)
-				ret |= hsu_dma_do_irq(chip, i, status);
-		}
-		dmaisr >>= 1;
+	for_each_set_bit(i, &dmaisr, chip->hsu->nr_channels) {
+		err = hsu_dma_get_status(chip, i, &status);
+		if (err > 0)
+			ret |= 1;
+		else if (err == 0)
+			ret |= hsu_dma_do_irq(chip, i, status);
 	}
 
 	return IRQ_RETVAL(ret);
 }
 
+static void hsu_pci_dma_remove(void *chip)
+{
+	hsu_dma_remove(chip);
+}
+
 static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct device *dev = &pdev->dev;
 	struct hsu_dma_chip *chip;
 	int ret;
 
@@ -87,9 +91,13 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		return ret;
 
-	ret = request_irq(chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip);
+	ret = devm_add_action_or_reset(dev, hsu_pci_dma_remove, chip);
 	if (ret)
-		goto err_register_irq;
+		return ret;
+
+	ret = devm_request_irq(dev, chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip);
+	if (ret)
+		return ret;
 
 	/*
 	 * On Intel Tangier B0 and Anniedale the interrupt line, disregarding
@@ -105,18 +113,6 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, chip);
 
 	return 0;
-
-err_register_irq:
-	hsu_dma_remove(chip);
-	return ret;
-}
-
-static void hsu_pci_remove(struct pci_dev *pdev)
-{
-	struct hsu_dma_chip *chip = pci_get_drvdata(pdev);
-
-	free_irq(chip->irq, chip);
-	hsu_dma_remove(chip);
 }
 
 static const struct pci_device_id hsu_pci_id_table[] = {
@@ -130,7 +126,6 @@ static struct pci_driver hsu_pci_driver = {
 	.name		= "hsu_dma_pci",
 	.id_table	= hsu_pci_id_table,
 	.probe		= hsu_pci_probe,
-	.remove		= hsu_pci_remove,
 };
 
 module_pci_driver(hsu_pci_driver);
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 5a8cc52c1abf..2c1e6f6daa62 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -196,6 +196,7 @@ int idxd_wq_enable(struct idxd_wq *wq)
 	}
 
 	wq->state = IDXD_WQ_ENABLED;
+	set_bit(wq->id, idxd->wq_enable_map);
 	dev_dbg(dev, "WQ %d enabled\n", wq->id);
 	return 0;
 }
@@ -223,6 +224,7 @@ int idxd_wq_disable(struct idxd_wq *wq, bool reset_config)
 
 	if (reset_config)
 		idxd_wq_disable_cleanup(wq);
+	clear_bit(wq->id, idxd->wq_enable_map);
 	wq->state = IDXD_WQ_DISABLED;
 	dev_dbg(dev, "WQ %d disabled\n", wq->id);
 	return 0;
@@ -258,7 +260,6 @@ void idxd_wq_reset(struct idxd_wq *wq)
 	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
 	idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
 	idxd_wq_disable_cleanup(wq);
-	wq->state = IDXD_WQ_DISABLED;
 }
 
 int idxd_wq_map_portal(struct idxd_wq *wq)
@@ -378,17 +379,20 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
 	struct idxd_device *idxd = wq->idxd;
 
 	lockdep_assert_held(&wq->wq_lock);
+	wq->state = IDXD_WQ_DISABLED;
 	memset(wq->wqcfg, 0, idxd->wqcfg_size);
 	wq->type = IDXD_WQT_NONE;
 	wq->threshold = 0;
 	wq->priority = 0;
-	wq->ats_dis = 0;
 	wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
 	clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
 	clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+	clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
 	memset(wq->name, 0, WQ_NAME_SIZE);
 	wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
 	wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
+	if (wq->opcap_bmap)
+		bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
 }
 
 static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
@@ -705,6 +709,8 @@ static void idxd_groups_clear_state(struct idxd_device *idxd)
 			group->tc_a = -1;
 			group->tc_b = -1;
 		}
+		group->desc_progress_limit = 0;
+		group->batch_progress_limit = 0;
 	}
 }
 
@@ -761,10 +767,10 @@ static void idxd_group_config_write(struct idxd_group *group)
 
 	/* setup GRPFLAGS */
 	grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
-	iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset);
-	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+	iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n",
 		group->id, grpcfg_offset,
-		ioread32(idxd->reg_base + grpcfg_offset));
+		ioread64(idxd->reg_base + grpcfg_offset));
 }
 
 static int idxd_groups_config_write(struct idxd_device *idxd)
@@ -807,7 +813,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
 	u32 wq_offset;
-	int i;
+	int i, n;
 
 	if (!wq->group)
 		return 0;
@@ -859,12 +865,23 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
 		wq->wqcfg->bof = 1;
 
 	if (idxd->hw.wq_cap.wq_ats_support)
-		wq->wqcfg->wq_ats_disable = wq->ats_dis;
+		wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
 
 	/* bytes 12-15 */
 	wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
 	wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size);
 
+	/* bytes 32-63 */
+	if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) {
+		memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8);
+		for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) {
+			int pos = n % BITS_PER_LONG_LONG;
+			int idx = n / BITS_PER_LONG_LONG;
+
+			wq->wqcfg->op_config[idx] |= BIT(pos);
+		}
+	}
+
 	dev_dbg(dev, "WQ %d CFGs\n", wq->id);
 	for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
 		wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
@@ -914,6 +931,9 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
 			group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
 		else
 			group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
+
+		group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit;
+		group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit;
 	}
 }
 
@@ -1096,8 +1116,8 @@ static void idxd_group_load_config(struct idxd_group *group)
 	}
 
 	grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
-	group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset);
-	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+	group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset);
+	dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n",
 		group->id, grpcfg_offset, group->grpcfg.flags.bits);
 }
 
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index fed0dfc1eaa8..1196ab342f01 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -11,6 +11,7 @@
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <linux/ioasid.h>
+#include <linux/bitmap.h>
 #include <linux/perf_event.h>
 #include <uapi/linux/idxd.h>
 #include "registers.h"
@@ -95,6 +96,8 @@ struct idxd_group {
 	u8 rdbufs_reserved;
 	int tc_a;
 	int tc_b;
+	int desc_progress_limit;
+	int batch_progress_limit;
 };
 
 struct idxd_pmu {
@@ -132,6 +135,7 @@ enum idxd_wq_state {
 enum idxd_wq_flag {
 	WQ_FLAG_DEDICATED = 0,
 	WQ_FLAG_BLOCK_ON_FAULT,
+	WQ_FLAG_ATS_DISABLE,
 };
 
 enum idxd_wq_type {
@@ -194,6 +198,8 @@ struct idxd_wq {
 	enum idxd_wq_state state;
 	unsigned long flags;
 	union wqcfg *wqcfg;
+	unsigned long *opcap_bmap;
+
 	struct dsa_hw_desc **hw_descs;
 	int num_descs;
 	union {
@@ -208,7 +214,6 @@ struct idxd_wq {
 	char name[WQ_NAME_SIZE + 1];
 	u64 max_xfer_bytes;
 	u32 max_batch_size;
-	bool ats_dis;
 };
 
 struct idxd_engine {
@@ -299,6 +304,7 @@ struct idxd_device {
 	int rdbuf_limit;
 	int nr_rdbufs;		/* non-reserved read buffers */
 	unsigned int wqcfg_size;
+	unsigned long *wq_enable_map;
 
 	union sw_err_reg sw_err;
 	wait_queue_head_t cmd_waitq;
@@ -308,6 +314,8 @@ struct idxd_device {
 	struct work_struct work;
 
 	struct idxd_pmu *idxd_pmu;
+
+	unsigned long *opcap_bmap;
 };
 
 /* IDXD software descriptor */
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index aa3478257ddb..2b18d512cbfc 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -151,6 +151,12 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 	if (!idxd->wqs)
 		return -ENOMEM;
 
+	idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev));
+	if (!idxd->wq_enable_map) {
+		kfree(idxd->wqs);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < idxd->max_wqs; i++) {
 		wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
 		if (!wq) {
@@ -185,6 +191,16 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 			rc = -ENOMEM;
 			goto err;
 		}
+
+		if (idxd->hw.wq_cap.op_config) {
+			wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
+			if (!wq->opcap_bmap) {
+				put_device(conf_dev);
+				rc = -ENOMEM;
+				goto err;
+			}
+			bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
+		}
 		idxd->wqs[i] = wq;
 	}
 
@@ -369,6 +385,19 @@ static void idxd_read_table_offsets(struct idxd_device *idxd)
 	dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
 }
 
+static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count)
+{
+	int i, j, nr;
+
+	for (i = 0, nr = 0; i < count; i++) {
+		for (j = 0; j < BITS_PER_LONG_LONG; j++) {
+			if (val[i] & BIT(j))
+				set_bit(nr, bmap);
+			nr++;
+		}
+	}
+}
+
 static void idxd_read_caps(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
@@ -427,6 +456,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
 				IDXD_OPCAP_OFFSET + i * sizeof(u64));
 		dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
 	}
+	multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4);
 }
 
 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
@@ -448,6 +478,12 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
 	if (idxd->id < 0)
 		return NULL;
 
+	idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev));
+	if (!idxd->opcap_bmap) {
+		ida_free(&idxd_ida, idxd->id);
+		return NULL;
+	}
+
 	device_initialize(conf_dev);
 	conf_dev->parent = dev;
 	conf_dev->bus = &dsa_bus_type;
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 743ead5ebc57..aa314ebec587 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -17,12 +17,6 @@ enum irq_work_type {
 	IRQ_WORK_PROCESS_FAULT,
 };
 
-struct idxd_fault {
-	struct work_struct work;
-	u64 addr;
-	struct idxd_device *idxd;
-};
-
 struct idxd_resubmit {
 	struct work_struct work;
 	struct idxd_desc *desc;
@@ -49,11 +43,12 @@ static void idxd_device_reinit(struct work_struct *work)
 		goto out;
 
 	for (i = 0; i < idxd->max_wqs; i++) {
-		struct idxd_wq *wq = idxd->wqs[i];
+		if (test_bit(i, idxd->wq_enable_map)) {
+			struct idxd_wq *wq = idxd->wqs[i];
 
-		if (wq->state == IDXD_WQ_ENABLED) {
 			rc = idxd_wq_enable(wq);
 			if (rc < 0) {
+				clear_bit(i, idxd->wq_enable_map);
 				dev_warn(dev, "Unable to re-enable wq %s\n",
 					 dev_name(wq_confdev(wq)));
 			}
@@ -324,13 +319,11 @@ halt:
 			idxd->state = IDXD_DEV_HALTED;
 			idxd_wqs_quiesce(idxd);
 			idxd_wqs_unmap_portal(idxd);
-			spin_lock(&idxd->dev_lock);
 			idxd_device_clear_state(idxd);
 			dev_err(&idxd->pdev->dev,
 				"idxd halted, need %s.\n",
 				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
 				"FLR" : "system reset");
-			spin_unlock(&idxd->dev_lock);
 			return -ENXIO;
 		}
 	}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index 02449aa9c454..fe3b8d04f9db 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -54,7 +54,8 @@ union wq_cap_reg {
 		u64 priority:1;
 		u64 occupancy:1;
 		u64 occupancy_int:1;
-		u64 rsvd3:10;
+		u64 op_config:1;
+		u64 rsvd3:9;
 	};
 	u64 bits;
 } __packed;
@@ -67,7 +68,8 @@ union group_cap_reg {
 		u64 total_rdbufs:8;	/* formerly total_tokens */
 		u64 rdbuf_ctrl:1;	/* formerly token_en */
 		u64 rdbuf_limit:1;	/* formerly token_limit */
-		u64 rsvd:46;
+		u64 progress_limit:1;	/* descriptor and batch descriptor */
+		u64 rsvd:45;
 	};
 	u64 bits;
 } __packed;
@@ -90,6 +92,8 @@ struct opcap {
 	u64 bits[4];
 };
 
+#define IDXD_MAX_OPCAP_BITS		256U
+
 #define IDXD_OPCAP_OFFSET		0x40
 
 #define IDXD_TABLE_OFFSET		0x60
@@ -285,16 +289,20 @@ union msix_perm {
 
 union group_flags {
 	struct {
-		u32 tc_a:3;
-		u32 tc_b:3;
-		u32 rsvd:1;
-		u32 use_rdbuf_limit:1;
-		u32 rdbufs_reserved:8;
-		u32 rsvd2:4;
-		u32 rdbufs_allowed:8;
-		u32 rsvd3:4;
+		u64 tc_a:3;
+		u64 tc_b:3;
+		u64 rsvd:1;
+		u64 use_rdbuf_limit:1;
+		u64 rdbufs_reserved:8;
+		u64 rsvd2:4;
+		u64 rdbufs_allowed:8;
+		u64 rsvd3:4;
+		u64 desc_progress_limit:2;
+		u64 rsvd4:2;
+		u64 batch_progress_limit:2;
+		u64 rsvd5:26;
 	};
-	u32 bits;
+	u64 bits;
 } __packed;
 
 struct grpcfg {
@@ -348,8 +356,11 @@ union wqcfg {
 
 		/* bytes 28-31 */
 		u32 rsvd8;
+
+		/* bytes 32-63 */
+		u64 op_config[4];
 	};
-	u32 bits[8];
+	u32 bits[16];
 } __packed;
 
 #define WQCFG_PASID_IDX                2
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 3f262a57441b..bdaccf9e0436 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -443,6 +443,67 @@ static struct device_attribute dev_attr_group_traffic_class_b =
 		__ATTR(traffic_class_b, 0644, group_traffic_class_b_show,
 		       group_traffic_class_b_store);
 
+static ssize_t group_desc_progress_limit_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct idxd_group *group = confdev_to_group(dev);
+
+	return sysfs_emit(buf, "%d\n", group->desc_progress_limit);
+}
+
+static ssize_t group_desc_progress_limit_store(struct device *dev,
+					       struct device_attribute *attr,
+					       const char *buf, size_t count)
+{
+	struct idxd_group *group = confdev_to_group(dev);
+	int val, rc;
+
+	rc = kstrtoint(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (val & ~GENMASK(1, 0))
+		return -EINVAL;
+
+	group->desc_progress_limit = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_desc_progress_limit =
+		__ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show,
+		       group_desc_progress_limit_store);
+
+static ssize_t group_batch_progress_limit_show(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct idxd_group *group = confdev_to_group(dev);
+
+	return sysfs_emit(buf, "%d\n", group->batch_progress_limit);
+}
+
+static ssize_t group_batch_progress_limit_store(struct device *dev,
+						struct device_attribute *attr,
+						const char *buf, size_t count)
+{
+	struct idxd_group *group = confdev_to_group(dev);
+	int val, rc;
+
+	rc = kstrtoint(buf, 10, &val);
+	if (rc < 0)
+		return -EINVAL;
+
+	if (val & ~GENMASK(1, 0))
+		return -EINVAL;
+
+	group->batch_progress_limit = val;
+	return count;
+}
+
+static struct device_attribute dev_attr_group_batch_progress_limit =
+		__ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show,
+		       group_batch_progress_limit_store);
 static struct attribute *idxd_group_attributes[] = {
 	&dev_attr_group_work_queues.attr,
 	&dev_attr_group_engines.attr,
@@ -454,11 +515,35 @@ static struct attribute *idxd_group_attributes[] = {
 	&dev_attr_group_read_buffers_reserved.attr,
 	&dev_attr_group_traffic_class_a.attr,
 	&dev_attr_group_traffic_class_b.attr,
+	&dev_attr_group_desc_progress_limit.attr,
+	&dev_attr_group_batch_progress_limit.attr,
 	NULL,
 };
 
+static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr,
+						     struct idxd_device *idxd)
+{
+	return (attr == &dev_attr_group_desc_progress_limit.attr ||
+		attr == &dev_attr_group_batch_progress_limit.attr) &&
+		!idxd->hw.group_cap.progress_limit;
+}
+
+static umode_t idxd_group_attr_visible(struct kobject *kobj,
+				       struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct idxd_group *group = confdev_to_group(dev);
+	struct idxd_device *idxd = group->idxd;
+
+	if (idxd_group_attr_progress_limit_invisible(attr, idxd))
+		return 0;
+
+	return attr->mode;
+}
+
 static const struct attribute_group idxd_group_attribute_group = {
 	.attrs = idxd_group_attributes,
+	.is_visible = idxd_group_attr_visible,
 };
 
 static const struct attribute_group *idxd_group_attribute_groups[] = {
@@ -973,7 +1058,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *
 {
 	struct idxd_wq *wq = confdev_to_wq(dev);
 
-	return sysfs_emit(buf, "%u\n", wq->ats_dis);
+	return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags));
 }
 
 static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
@@ -994,7 +1079,10 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute
 	if (rc < 0)
 		return rc;
 
-	wq->ats_dis = ats_dis;
+	if (ats_dis)
+		set_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+	else
+		clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
 
 	return count;
 }
@@ -1055,6 +1143,68 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib
 static struct device_attribute dev_attr_wq_enqcmds_retries =
 		__ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store);
 
+static ssize_t wq_op_config_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct idxd_wq *wq = confdev_to_wq(dev);
+
+	return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap);
+}
+
+static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask)
+{
+	int bit;
+
+	/*
+	 * The OPCAP is defined as 256 bits that represents each operation the device
+	 * supports per bit. Iterate through all the bits and check if the input mask
+	 * is set for bits that are not set in the OPCAP for the device. If no OPCAP
+	 * bit is set and input mask has the bit set, then return error.
+	 */
+	for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) {
+		if (!test_bit(bit, idxd->opcap_bmap))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct idxd_wq *wq = confdev_to_wq(dev);
+	struct idxd_device *idxd = wq->idxd;
+	unsigned long *opmask;
+	int rc;
+
+	if (wq->state != IDXD_WQ_DISABLED)
+		return -EPERM;
+
+	opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
+	if (!opmask)
+		return -ENOMEM;
+
+	rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS);
+	if (rc < 0)
+		goto err;
+
+	rc = idxd_verify_supported_opcap(idxd, opmask);
+	if (rc < 0)
+		goto err;
+
+	bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS);
+
+	bitmap_free(opmask);
+	return count;
+
+err:
+	bitmap_free(opmask);
+	return rc;
+}
+
+static struct device_attribute dev_attr_wq_op_config =
+		__ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store);
+
 static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_clients.attr,
 	&dev_attr_wq_state.attr,
@@ -1072,11 +1222,33 @@ static struct attribute *idxd_wq_attributes[] = {
 	&dev_attr_wq_ats_disable.attr,
 	&dev_attr_wq_occupancy.attr,
 	&dev_attr_wq_enqcmds_retries.attr,
+	&dev_attr_wq_op_config.attr,
 	NULL,
 };
 
+static bool idxd_wq_attr_op_config_invisible(struct attribute *attr,
+					     struct idxd_device *idxd)
+{
+	return attr == &dev_attr_wq_op_config.attr &&
+	       !idxd->hw.wq_cap.op_config;
+}
+
+static umode_t idxd_wq_attr_visible(struct kobject *kobj,
+				    struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct idxd_wq *wq = confdev_to_wq(dev);
+	struct idxd_device *idxd = wq->idxd;
+
+	if (idxd_wq_attr_op_config_invisible(attr, idxd))
+		return 0;
+
+	return attr->mode;
+}
+
 static const struct attribute_group idxd_wq_attribute_group = {
 	.attrs = idxd_wq_attributes,
+	.is_visible = idxd_wq_attr_visible,
 };
 
 static const struct attribute_group *idxd_wq_attribute_groups[] = {
@@ -1088,6 +1260,7 @@ static void idxd_conf_wq_release(struct device *dev)
 {
 	struct idxd_wq *wq = confdev_to_wq(dev);
 
+	bitmap_free(wq->opcap_bmap);
 	kfree(wq->wqcfg);
 	kfree(wq);
 }
@@ -1177,14 +1350,8 @@ static ssize_t op_cap_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
 	struct idxd_device *idxd = confdev_to_idxd(dev);
-	int i, rc = 0;
-
-	for (i = 0; i < 4; i++)
-		rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]);
 
-	rc--;
-	rc += sysfs_emit_at(buf, rc, "\n");
-	return rc;
+	return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap);
 }
 static DEVICE_ATTR_RO(op_cap);
 
@@ -1405,9 +1572,11 @@ static void idxd_conf_device_release(struct device *dev)
 	struct idxd_device *idxd = confdev_to_idxd(dev);
 
 	kfree(idxd->groups);
+	bitmap_free(idxd->wq_enable_map);
 	kfree(idxd->wqs);
 	kfree(idxd->engines);
 	ida_free(&idxd_ida, idxd->id);
+	bitmap_free(idxd->opcap_bmap);
 	kfree(idxd);
 }
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 37ff4ec7db76..e2070df6cad2 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -656,7 +656,7 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
 	if (active - i == 0) {
 		dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
 			__func__);
-		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+		mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 	}
 
 	/* microsecond delay by sysfs variable  per pending descriptor */
@@ -682,7 +682,7 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
 
 		if (chanerr &
 		    (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
-			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+			mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 			ioat_eh(ioat_chan);
 		}
 	}
@@ -879,7 +879,7 @@ static void check_active(struct ioatdma_chan *ioat_chan)
 	}
 
 	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
-		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+		mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 }
 
 static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan)
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 140cfe3782fb..35e06b382603 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -196,10 +196,8 @@ extern const struct sysfs_ops ioat_sysfs_ops;
 extern struct ioat_sysfs_entry ioat_version_attr;
 extern struct ioat_sysfs_entry ioat_cap_attr;
 extern int ioat_pending_level;
-extern int ioat_ring_alloc_order;
 extern struct kobj_type ioat_ktype;
 extern struct kmem_cache *ioat_cache;
-extern int ioat_ring_max_alloc_order;
 extern struct kmem_cache *ioat_sed_cache;
 
 static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 994fc4d2aca4..dc147cc2436e 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -670,7 +670,7 @@ static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
 	return mxs_chan->status;
 }
 
-static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+static int mxs_dma_init(struct mxs_dma_engine *mxs_dma)
 {
 	int ret;
 
@@ -741,7 +741,7 @@ static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
 				     ofdma->of_node);
 }
 
-static int __init mxs_dma_probe(struct platform_device *pdev)
+static int mxs_dma_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
 	const struct mxs_dma_type *dma_type;
@@ -839,10 +839,7 @@ static struct platform_driver mxs_dma_driver = {
 		.name	= "mxs-dma",
 		.of_match_table = mxs_dma_dt_ids,
 	},
+	.probe = mxs_dma_probe,
 };
 
-static int __init mxs_dma_module_init(void)
-{
-	return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
-}
-subsys_initcall(mxs_dma_module_init);
+builtin_platform_driver(mxs_dma_driver);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 09915a5cba3e..0d9257fbdfb0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2752,7 +2752,6 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		return NULL;
 
 	pch->cyclic = true;
-	desc->txd.flags = flags;
 
 	return &desc->txd;
 }
@@ -2804,8 +2803,6 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 
 	desc->bytes_requested = len;
 
-	desc->txd.flags = flags;
-
 	return &desc->txd;
 }
 
@@ -2889,7 +2886,6 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	}
 
 	/* Return the last desc in the chain */
-	desc->txd.flags = flg;
 	return &desc->txd;
 }
 
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 8f0c9c4e2efd..3f56514bbef8 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1150,9 +1150,9 @@ static void gpi_ev_tasklet(unsigned long data)
 {
 	struct gpii *gpii = (struct gpii *)data;
 
-	read_lock_bh(&gpii->pm_lock);
+	read_lock(&gpii->pm_lock);
 	if (!REG_ACCESS_VALID(gpii->pm_state)) {
-		read_unlock_bh(&gpii->pm_lock);
+		read_unlock(&gpii->pm_lock);
 		dev_err(gpii->gpi_dev->dev, "not processing any events, pm_state:%s\n",
 			TO_GPI_PM_STR(gpii->pm_state));
 		return;
@@ -1163,7 +1163,7 @@ static void gpi_ev_tasklet(unsigned long data)
 
 	/* enable IEOB, switching back to interrupts */
 	gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 1);
-	read_unlock_bh(&gpii->pm_lock);
+	read_unlock(&gpii->pm_lock);
 }
 
 /* marks all pending events for the channel as stale */
@@ -2288,6 +2288,7 @@ static int gpi_probe(struct platform_device *pdev)
 static const struct of_device_id gpi_of_match[] = {
 	{ .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 },
 	{ .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 },
+	{ .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 },
 	{ .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 },
diff --git a/drivers/dma/qcom/qcom_adm.c b/drivers/dma/qcom/qcom_adm.c
index facdacf8aede..d56caf1681ff 100644
--- a/drivers/dma/qcom/qcom_adm.c
+++ b/drivers/dma/qcom/qcom_adm.c
@@ -379,13 +379,13 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
 		if (blk_size < 0) {
 			dev_err(adev->dev, "invalid burst value: %d\n",
 				burst);
-			return ERR_PTR(-EINVAL);
+			return NULL;
 		}
 
 		crci = achan->crci & 0xf;
 		if (!crci || achan->crci > 0x1f) {
 			dev_err(adev->dev, "invalid crci value\n");
-			return ERR_PTR(-EINVAL);
+			return NULL;
 		}
 	}
 
@@ -403,8 +403,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	async_desc = kzalloc(sizeof(*async_desc), GFP_NOWAIT);
-	if (!async_desc)
-		return ERR_PTR(-ENOMEM);
+	if (!async_desc) {
+		dev_err(adev->dev, "not enough memory for async_desc struct\n");
+		return NULL;
+	}
 
 	async_desc->mux = achan->mux ? ADM_CRCI_CTL_MUX_SEL : 0;
 	async_desc->crci = crci;
@@ -414,8 +416,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
 				sizeof(*cple) + 2 * ADM_DESC_ALIGN;
 
 	async_desc->cpl = kzalloc(async_desc->dma_len, GFP_NOWAIT);
-	if (!async_desc->cpl)
+	if (!async_desc->cpl) {
+		dev_err(adev->dev, "not enough memory for cpl struct\n");
 		goto free;
+	}
 
 	async_desc->adev = adev;
 
@@ -437,8 +441,10 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
 	async_desc->dma_addr = dma_map_single(adev->dev, async_desc->cpl,
 					      async_desc->dma_len,
 					      DMA_TO_DEVICE);
-	if (dma_mapping_error(adev->dev, async_desc->dma_addr))
+	if (dma_mapping_error(adev->dev, async_desc->dma_addr)) {
+		dev_err(adev->dev, "dma mapping error for cpl\n");
 		goto free;
+	}
 
 	cple_addr = async_desc->dma_addr + ((void *)cple - async_desc->cpl);
 
@@ -454,7 +460,7 @@ static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
 
 free:
 	kfree(async_desc);
-	return ERR_PTR(-ENOMEM);
+	return NULL;
 }
 
 /**
@@ -494,7 +500,7 @@ static int adm_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
 
 	spin_lock_irqsave(&achan->vc.lock, flag);
 	memcpy(&achan->slave, cfg, sizeof(struct dma_slave_config));
-	if (cfg->peripheral_size == sizeof(config))
+	if (cfg->peripheral_size == sizeof(*config))
 		achan->crci = config->crci;
 	spin_unlock_irqrestore(&achan->vc.lock, flag);
 
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index f6ed7e889781..a09eeb545f7d 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1094,7 +1094,7 @@ static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
 	INIT_LIST_HEAD(&dmadev->channels);
 
 	/*
-	 * Register as many many memcpy as we have physical channels,
+	 * Register as many memcpy as we have physical channels,
 	 * we won't always be able to use all but the code will have
 	 * to cope with that situation.
 	 */
diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
index 4f8b8498c5c6..6b524eb6bcf3 100644
--- a/drivers/dma/sf-pdma/sf-pdma.c
+++ b/drivers/dma/sf-pdma/sf-pdma.c
@@ -405,10 +405,8 @@ static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma)
 		chan = &pdma->chans[i];
 
 		irq = platform_get_irq(pdev, i * 2);
-		if (irq < 0) {
-			dev_err(&pdev->dev, "ch(%d) Can't get done irq.\n", i);
+		if (irq < 0)
 			return -EINVAL;
-		}
 
 		r = devm_request_irq(&pdev->dev, irq, sf_pdma_done_isr, 0,
 				     dev_name(&pdev->dev), (void *)chan);
@@ -420,10 +418,8 @@ static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma)
 		chan->txirq = irq;
 
 		irq = platform_get_irq(pdev, (i * 2) + 1);
-		if (irq < 0) {
-			dev_err(&pdev->dev, "ch(%d) Can't get err irq.\n", i);
+		if (irq < 0)
 			return -EINVAL;
-		}
 
 		r = devm_request_irq(&pdev->dev, irq, sf_pdma_err_isr, 0,
 				     dev_name(&pdev->dev), (void *)chan);
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 13d12d660cc2..641d689d17ff 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -103,8 +103,8 @@ struct rcar_dmac_desc_page {
 	struct list_head node;
 
 	union {
-		struct rcar_dmac_desc descs[0];
-		struct rcar_dmac_xfer_chunk chunks[0];
+		DECLARE_FLEX_ARRAY(struct rcar_dmac_desc, descs);
+		DECLARE_FLEX_ARRAY(struct rcar_dmac_xfer_chunk, chunks);
 	};
 };
 
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index adb25a11c70f..4891a1767e5a 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -9,6 +9,7 @@
  *         Pierre-Yves Mordret <pierre-yves.mordret@st.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
@@ -32,8 +33,10 @@
 
 #define STM32_DMA_LISR			0x0000 /* DMA Low Int Status Reg */
 #define STM32_DMA_HISR			0x0004 /* DMA High Int Status Reg */
+#define STM32_DMA_ISR(n)		(((n) & 4) ? STM32_DMA_HISR : STM32_DMA_LISR)
 #define STM32_DMA_LIFCR			0x0008 /* DMA Low Int Flag Clear Reg */
 #define STM32_DMA_HIFCR			0x000c /* DMA High Int Flag Clear Reg */
+#define STM32_DMA_IFCR(n)		(((n) & 4) ? STM32_DMA_HIFCR : STM32_DMA_LIFCR)
 #define STM32_DMA_TCI			BIT(5) /* Transfer Complete Interrupt */
 #define STM32_DMA_HTI			BIT(4) /* Half Transfer Interrupt */
 #define STM32_DMA_TEI			BIT(3) /* Transfer Error Interrupt */
@@ -43,23 +46,22 @@
 					 | STM32_DMA_TEI \
 					 | STM32_DMA_DMEI \
 					 | STM32_DMA_FEI)
+/*
+ * If (chan->id % 4) is 2 or 3, left shift the mask by 16 bits;
+ * if (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+ */
+#define STM32_DMA_FLAGS_SHIFT(n)	({ typeof(n) (_n) = (n); \
+					   (((_n) & 2) << 3) | (((_n) & 1) * 6); })
 
 /* DMA Stream x Configuration Register */
 #define STM32_DMA_SCR(x)		(0x0010 + 0x18 * (x)) /* x = 0..7 */
-#define STM32_DMA_SCR_REQ(n)		((n & 0x7) << 25)
+#define STM32_DMA_SCR_REQ_MASK		GENMASK(27, 25)
 #define STM32_DMA_SCR_MBURST_MASK	GENMASK(24, 23)
-#define STM32_DMA_SCR_MBURST(n)	        ((n & 0x3) << 23)
 #define STM32_DMA_SCR_PBURST_MASK	GENMASK(22, 21)
-#define STM32_DMA_SCR_PBURST(n)	        ((n & 0x3) << 21)
 #define STM32_DMA_SCR_PL_MASK		GENMASK(17, 16)
-#define STM32_DMA_SCR_PL(n)		((n & 0x3) << 16)
 #define STM32_DMA_SCR_MSIZE_MASK	GENMASK(14, 13)
-#define STM32_DMA_SCR_MSIZE(n)		((n & 0x3) << 13)
 #define STM32_DMA_SCR_PSIZE_MASK	GENMASK(12, 11)
-#define STM32_DMA_SCR_PSIZE(n)		((n & 0x3) << 11)
-#define STM32_DMA_SCR_PSIZE_GET(n)	((n & STM32_DMA_SCR_PSIZE_MASK) >> 11)
 #define STM32_DMA_SCR_DIR_MASK		GENMASK(7, 6)
-#define STM32_DMA_SCR_DIR(n)		((n & 0x3) << 6)
 #define STM32_DMA_SCR_TRBUFF		BIT(20) /* Bufferable transfer for USART/UART */
 #define STM32_DMA_SCR_CT		BIT(19) /* Target in double buffer */
 #define STM32_DMA_SCR_DBM		BIT(18) /* Double Buffer Mode */
@@ -96,7 +98,6 @@
 /* DMA stream x FIFO control register */
 #define STM32_DMA_SFCR(x)		(0x0024 + 0x18 * (x))
 #define STM32_DMA_SFCR_FTH_MASK		GENMASK(1, 0)
-#define STM32_DMA_SFCR_FTH(n)		(n & STM32_DMA_SFCR_FTH_MASK)
 #define STM32_DMA_SFCR_FEIE		BIT(7) /* FIFO error interrupt enable */
 #define STM32_DMA_SFCR_DMDIS		BIT(2) /* Direct mode disable */
 #define STM32_DMA_SFCR_MASK		(STM32_DMA_SFCR_FEIE \
@@ -137,11 +138,9 @@
 
 /* DMA Features */
 #define STM32_DMA_THRESHOLD_FTR_MASK	GENMASK(1, 0)
-#define STM32_DMA_THRESHOLD_FTR_GET(n)	((n) & STM32_DMA_THRESHOLD_FTR_MASK)
 #define STM32_DMA_DIRECT_MODE_MASK	BIT(2)
-#define STM32_DMA_DIRECT_MODE_GET(n)	(((n) & STM32_DMA_DIRECT_MODE_MASK) >> 2)
 #define STM32_DMA_ALT_ACK_MODE_MASK	BIT(4)
-#define STM32_DMA_ALT_ACK_MODE_GET(n)	(((n) & STM32_DMA_ALT_ACK_MODE_MASK) >> 4)
+#define STM32_DMA_MDMA_STREAM_ID_MASK	GENMASK(19, 16)
 
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
@@ -195,6 +194,19 @@ struct stm32_dma_desc {
 	struct stm32_dma_sg_req sg_req[];
 };
 
+/**
+ * struct stm32_dma_mdma_config - STM32 DMA MDMA configuration
+ * @stream_id: DMA request to trigger STM32 MDMA transfer
+ * @ifcr: DMA interrupt flag clear register address,
+ *        used by STM32 MDMA to clear DMA Transfer Complete flag
+ * @tcf: DMA Transfer Complete flag
+ */
+struct stm32_dma_mdma_config {
+	u32 stream_id;
+	u32 ifcr;
+	u32 tcf;
+};
+
 struct stm32_dma_chan {
 	struct virt_dma_chan vchan;
 	bool config_init;
@@ -209,6 +221,8 @@ struct stm32_dma_chan {
 	u32 mem_burst;
 	u32 mem_width;
 	enum dma_status status;
+	bool trig_mdma;
+	struct stm32_dma_mdma_config mdma_config;
 };
 
 struct stm32_dma_device {
@@ -388,6 +402,13 @@ static int stm32_dma_slave_config(struct dma_chan *c,
 
 	memcpy(&chan->dma_sconfig, config, sizeof(*config));
 
+	/* Check if user is requesting DMA to trigger STM32 MDMA */
+	if (config->peripheral_size) {
+		config->peripheral_config = &chan->mdma_config;
+		config->peripheral_size = sizeof(chan->mdma_config);
+		chan->trig_mdma = true;
+	}
+
 	chan->config_init = true;
 
 	return 0;
@@ -401,17 +422,10 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
 	/*
 	 * Read "flags" from DMA_xISR register corresponding to the selected
 	 * DMA channel at the correct bit offset inside that register.
-	 *
-	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
-	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
 	 */
 
-	if (chan->id & 4)
-		dma_isr = stm32_dma_read(dmadev, STM32_DMA_HISR);
-	else
-		dma_isr = stm32_dma_read(dmadev, STM32_DMA_LISR);
-
-	flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+	dma_isr = stm32_dma_read(dmadev, STM32_DMA_ISR(chan->id));
+	flags = dma_isr >> STM32_DMA_FLAGS_SHIFT(chan->id);
 
 	return flags & STM32_DMA_MASKI;
 }
@@ -424,17 +438,11 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
 	/*
 	 * Write "flags" to the DMA_xIFCR register corresponding to the selected
 	 * DMA channel at the correct bit offset inside that register.
-	 *
-	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
-	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
 	 */
 	flags &= STM32_DMA_MASKI;
-	dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+	dma_ifcr = flags << STM32_DMA_FLAGS_SHIFT(chan->id);
 
-	if (chan->id & 4)
-		stm32_dma_write(dmadev, STM32_DMA_HIFCR, dma_ifcr);
-	else
-		stm32_dma_write(dmadev, STM32_DMA_LIFCR, dma_ifcr);
+	stm32_dma_write(dmadev, STM32_DMA_IFCR(chan->id), dma_ifcr);
 }
 
 static int stm32_dma_disable_chan(struct stm32_dma_chan *chan)
@@ -576,6 +584,10 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 	sg_req = &chan->desc->sg_req[chan->next_sg];
 	reg = &sg_req->chan_reg;
 
+	/* When DMA triggers STM32 MDMA, DMA Transfer Complete is managed by STM32 MDMA */
+	if (chan->trig_mdma && chan->dma_sconfig.direction != DMA_MEM_TO_DEV)
+		reg->dma_scr &= ~STM32_DMA_SCR_TCIE;
+
 	reg->dma_scr &= ~STM32_DMA_SCR_EN;
 	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
 	stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar);
@@ -725,6 +737,8 @@ static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr)
 
 	if (chan->desc->cyclic) {
 		vchan_cyclic_callback(&chan->desc->vdesc);
+		if (chan->trig_mdma)
+			return;
 		stm32_dma_sg_inc(chan);
 		/* cyclic while CIRC/DBM disable => post resume reconfiguration needed */
 		if (!(scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM)))
@@ -861,7 +875,8 @@ static int stm32_dma_resume(struct dma_chan *c)
 		sg_req = &chan->desc->sg_req[chan->next_sg - 1];
 
 	ndtr = sg_req->chan_reg.dma_sndtr;
-	offset = (ndtr - chan_reg.dma_sndtr) << STM32_DMA_SCR_PSIZE_GET(chan_reg.dma_scr);
+	offset = (ndtr - chan_reg.dma_sndtr);
+	offset <<= FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, chan_reg.dma_scr);
 	spar = sg_req->chan_reg.dma_spar;
 	sm0ar = sg_req->chan_reg.dma_sm0ar;
 	sm1ar = sg_req->chan_reg.dma_sm1ar;
@@ -973,16 +988,16 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 		if (src_burst_size < 0)
 			return src_burst_size;
 
-		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_DEV) |
-			STM32_DMA_SCR_PSIZE(dst_bus_width) |
-			STM32_DMA_SCR_MSIZE(src_bus_width) |
-			STM32_DMA_SCR_PBURST(dst_burst_size) |
-			STM32_DMA_SCR_MBURST(src_burst_size);
+		dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_DEV) |
+			FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, dst_bus_width) |
+			FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, src_bus_width) |
+			FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dst_burst_size) |
+			FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, src_burst_size);
 
 		/* Set FIFO threshold */
 		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
 		if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE)
-			chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(fifoth);
+			chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth);
 
 		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
@@ -1030,16 +1045,16 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 		if (dst_burst_size < 0)
 			return dst_burst_size;
 
-		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_DEV_TO_MEM) |
-			STM32_DMA_SCR_PSIZE(src_bus_width) |
-			STM32_DMA_SCR_MSIZE(dst_bus_width) |
-			STM32_DMA_SCR_PBURST(src_burst_size) |
-			STM32_DMA_SCR_MBURST(dst_burst_size);
+		dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_DEV_TO_MEM) |
+			FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, src_bus_width) |
+			FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, dst_bus_width) |
+			FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, src_burst_size) |
+			FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dst_burst_size);
 
 		/* Set FIFO threshold */
 		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
 		if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE)
-			chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(fifoth);
+			chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth);
 
 		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
@@ -1099,6 +1114,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 	else
 		chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
 
+	/* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */
+	if (chan->trig_mdma && sg_len > 1)
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM;
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
 					       sg_dma_len(sg),
@@ -1120,6 +1139,8 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
 		desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg);
 		desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg);
+		if (chan->trig_mdma)
+			desc->sg_req[i].chan_reg.dma_sm1ar += sg_dma_len(sg);
 		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
 	}
 
@@ -1207,8 +1228,11 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
 		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
 		desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr;
 		desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr;
+		if (chan->trig_mdma)
+			desc->sg_req[i].chan_reg.dma_sm1ar += period_len;
 		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
-		buf_addr += period_len;
+		if (!chan->trig_mdma)
+			buf_addr += period_len;
 	}
 
 	desc->num_sgs = num_periods;
@@ -1247,16 +1271,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
 
 		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
 		desc->sg_req[i].chan_reg.dma_scr =
-			STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
-			STM32_DMA_SCR_PBURST(dma_burst) |
-			STM32_DMA_SCR_MBURST(dma_burst) |
+			FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_MEM) |
+			FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) |
+			FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dma_burst) |
 			STM32_DMA_SCR_MINC |
 			STM32_DMA_SCR_PINC |
 			STM32_DMA_SCR_TCIE |
 			STM32_DMA_SCR_TEIE;
 		desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
-		desc->sg_req[i].chan_reg.dma_sfcr |=
-			STM32_DMA_SFCR_FTH(threshold);
+		desc->sg_req[i].chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, threshold);
 		desc->sg_req[i].chan_reg.dma_spar = src + offset;
 		desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
 		desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
@@ -1275,7 +1298,7 @@ static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
 
 	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
-	width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
+	width = FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, dma_scr);
 	ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
 
 	return ndtr << width;
@@ -1481,16 +1504,17 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
 	stm32_dma_clear_reg(&chan->chan_reg);
 
 	chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK;
-	chan->chan_reg.dma_scr |= STM32_DMA_SCR_REQ(cfg->request_line);
+	chan->chan_reg.dma_scr |= FIELD_PREP(STM32_DMA_SCR_REQ_MASK, cfg->request_line);
 
 	/* Enable Interrupts  */
 	chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
 
-	chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
-	if (STM32_DMA_DIRECT_MODE_GET(cfg->features))
+	chan->threshold = FIELD_GET(STM32_DMA_THRESHOLD_FTR_MASK, cfg->features);
+	if (FIELD_GET(STM32_DMA_DIRECT_MODE_MASK, cfg->features))
 		chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE;
-	if (STM32_DMA_ALT_ACK_MODE_GET(cfg->features))
+	if (FIELD_GET(STM32_DMA_ALT_ACK_MODE_MASK, cfg->features))
 		chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF;
+	chan->mdma_config.stream_id = FIELD_GET(STM32_DMA_MDMA_STREAM_ID_MASK, cfg->features);
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -1630,6 +1654,12 @@ static int stm32_dma_probe(struct platform_device *pdev)
 		chan->id = i;
 		chan->vchan.desc_free = stm32_dma_desc_free;
 		vchan_init(&chan->vchan, dd);
+
+		chan->mdma_config.ifcr = res->start;
+		chan->mdma_config.ifcr += STM32_DMA_IFCR(chan->id);
+
+		chan->mdma_config.tcf = STM32_DMA_TCI;
+		chan->mdma_config.tcf <<= STM32_DMA_FLAGS_SHIFT(chan->id);
 	}
 
 	ret = dma_async_device_register(dd);
diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
index eee0c5aa5fb5..ee3cbbf51006 100644
--- a/drivers/dma/stm32-dmamux.c
+++ b/drivers/dma/stm32-dmamux.c
@@ -39,13 +39,13 @@ struct stm32_dmamux_data {
 	u32 dma_requests; /* Number of DMA requests connected to DMAMUX */
 	u32 dmamux_requests; /* Number of DMA requests routed toward DMAs */
 	spinlock_t lock; /* Protects register access */
-	unsigned long *dma_inuse; /* Used DMA channel */
+	DECLARE_BITMAP(dma_inuse, STM32_DMAMUX_MAX_DMA_REQUESTS); /* Used DMA channel */
 	u32 ccr[STM32_DMAMUX_MAX_DMA_REQUESTS]; /* Used to backup CCR register
 						 * in suspend
 						 */
 	u32 dma_reqs[]; /* Number of DMA Request per DMA masters.
 			 *  [0] holds number of DMA Masters.
-			 *  To be kept at very end end of this structure
+			 *  To be kept at very end of this structure
 			 */
 };
 
@@ -147,7 +147,7 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec,
 	mux->request = dma_spec->args[0];
 
 	/*  craft DMA spec */
-	dma_spec->args[3] = dma_spec->args[2];
+	dma_spec->args[3] = dma_spec->args[2] | mux->chan_id << 16;
 	dma_spec->args[2] = dma_spec->args[1];
 	dma_spec->args[1] = 0;
 	dma_spec->args[0] = mux->chan_id - min;
@@ -229,12 +229,6 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
 
 	stm32_dmamux->dma_requests = dma_req;
 	stm32_dmamux->dma_reqs[0] = count;
-	stm32_dmamux->dma_inuse = devm_kcalloc(&pdev->dev,
-					       BITS_TO_LONGS(dma_req),
-					       sizeof(unsigned long),
-					       GFP_KERNEL);
-	if (!stm32_dmamux->dma_inuse)
-		return -ENOMEM;
 
 	if (device_property_read_u32(&pdev->dev, "dma-requests",
 				     &stm32_dmamux->dmamux_requests)) {
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index b11927ed4367..e28acbcb53f4 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -199,6 +199,7 @@ struct stm32_mdma_chan_config {
 	u32 transfer_config;
 	u32 mask_addr;
 	u32 mask_data;
+	bool m2m_hw; /* True when MDMA is triggered by STM32 DMA */
 };
 
 struct stm32_mdma_hwdesc {
@@ -227,6 +228,12 @@ struct stm32_mdma_desc {
 	struct stm32_mdma_desc_node node[];
 };
 
+struct stm32_mdma_dma_config {
+	u32 request;	/* STM32 DMA channel stream id, triggering MDMA */
+	u32 cmar;	/* STM32 DMA interrupt flag clear register address */
+	u32 cmdr;	/* STM32 DMA Transfer Complete flag */
+};
+
 struct stm32_mdma_chan {
 	struct virt_dma_chan vchan;
 	struct dma_pool *desc_pool;
@@ -539,13 +546,23 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
 		dst_addr = chan->dma_config.dst_addr;
 
 		/* Set device data size */
+		if (chan_config->m2m_hw)
+			dst_addr_width = stm32_mdma_get_max_width(dst_addr, buf_len,
+								  STM32_MDMA_MAX_BUF_LEN);
 		dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width);
 		if (dst_bus_width < 0)
 			return dst_bus_width;
 		ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK;
 		ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width);
+		if (chan_config->m2m_hw) {
+			ctcr &= ~STM32_MDMA_CTCR_DINCOS_MASK;
+			ctcr |= STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+		}
 
 		/* Set device burst value */
+		if (chan_config->m2m_hw)
+			dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width;
+
 		dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
 							   dst_maxburst,
 							   dst_addr_width);
@@ -588,13 +605,24 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
 		src_addr = chan->dma_config.src_addr;
 
 		/* Set device data size */
+		if (chan_config->m2m_hw)
+			src_addr_width = stm32_mdma_get_max_width(src_addr, buf_len,
+								  STM32_MDMA_MAX_BUF_LEN);
+
 		src_bus_width = stm32_mdma_get_width(chan, src_addr_width);
 		if (src_bus_width < 0)
 			return src_bus_width;
 		ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK;
 		ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width);
+		if (chan_config->m2m_hw) {
+			ctcr &= ~STM32_MDMA_CTCR_SINCOS_MASK;
+			ctcr |= STM32_MDMA_CTCR_SINCOS(src_bus_width);
+		}
 
 		/* Set device burst value */
+		if (chan_config->m2m_hw)
+			src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width;
+
 		src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
 							   src_maxburst,
 							   src_addr_width);
@@ -702,11 +730,15 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
 {
 	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
 	struct dma_slave_config *dma_config = &chan->dma_config;
+	struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
 	struct scatterlist *sg;
 	dma_addr_t src_addr, dst_addr;
-	u32 ccr, ctcr, ctbr;
+	u32 m2m_hw_period, ccr, ctcr, ctbr;
 	int i, ret = 0;
 
+	if (chan_config->m2m_hw)
+		m2m_hw_period = sg_dma_len(sgl);
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		if (sg_dma_len(sg) > STM32_MDMA_MAX_BLOCK_LEN) {
 			dev_err(chan2dev(chan), "Invalid block len\n");
@@ -716,6 +748,8 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
 		if (direction == DMA_MEM_TO_DEV) {
 			src_addr = sg_dma_address(sg);
 			dst_addr = dma_config->dst_addr;
+			if (chan_config->m2m_hw && (i & 1))
+				dst_addr += m2m_hw_period;
 			ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
 							&ctcr, &ctbr, src_addr,
 							sg_dma_len(sg));
@@ -723,6 +757,8 @@ static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
 					   src_addr);
 		} else {
 			src_addr = dma_config->src_addr;
+			if (chan_config->m2m_hw && (i & 1))
+				src_addr += m2m_hw_period;
 			dst_addr = sg_dma_address(sg);
 			ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
 							&ctcr, &ctbr, dst_addr,
@@ -755,6 +791,7 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
 			 unsigned long flags, void *context)
 {
 	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
 	struct stm32_mdma_desc *desc;
 	int i, ret;
 
@@ -777,6 +814,21 @@ stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
 	if (ret < 0)
 		goto xfer_setup_err;
 
+	/*
+	 * In case of M2M HW transfer triggered by STM32 DMA, we do not have to clear the
+	 * transfer complete flag by hardware in order to let the CPU rearm the STM32 DMA
+	 * with the next sg element and update some data in dmaengine framework.
+	 */
+	if (chan_config->m2m_hw && direction == DMA_MEM_TO_DEV) {
+		struct stm32_mdma_hwdesc *hwdesc;
+
+		for (i = 0; i < sg_len; i++) {
+			hwdesc = desc->node[i].hwdesc;
+			hwdesc->cmar = 0;
+			hwdesc->cmdr = 0;
+		}
+	}
+
 	desc->cyclic = false;
 
 	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
@@ -798,6 +850,7 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
 	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
 	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
 	struct dma_slave_config *dma_config = &chan->dma_config;
+	struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
 	struct stm32_mdma_desc *desc;
 	dma_addr_t src_addr, dst_addr;
 	u32 ccr, ctcr, ctbr, count;
@@ -858,8 +911,12 @@ stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
 		if (direction == DMA_MEM_TO_DEV) {
 			src_addr = buf_addr + i * period_len;
 			dst_addr = dma_config->dst_addr;
+			if (chan_config->m2m_hw && (i & 1))
+				dst_addr += period_len;
 		} else {
 			src_addr = dma_config->src_addr;
+			if (chan_config->m2m_hw && (i & 1))
+				src_addr += period_len;
 			dst_addr = buf_addr + i * period_len;
 		}
 
@@ -1244,6 +1301,17 @@ static int stm32_mdma_slave_config(struct dma_chan *c,
 
 	memcpy(&chan->dma_config, config, sizeof(*config));
 
+	/* Check if user is requesting STM32 DMA to trigger MDMA */
+	if (config->peripheral_size) {
+		struct stm32_mdma_dma_config *mdma_config;
+
+		mdma_config = (struct stm32_mdma_dma_config *)chan->dma_config.peripheral_config;
+		chan->chan_config.request = mdma_config->request;
+		chan->chan_config.mask_addr = mdma_config->cmar;
+		chan->chan_config.mask_data = mdma_config->cmdr;
+		chan->chan_config.m2m_hw = true;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index 4cbca80ee16e..fa06d7e6d8e3 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -352,12 +352,6 @@ static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
 	edma_modify(ecc, offset + (i << 2), and, or);
 }
 
-static inline void edma_or_array(struct edma_cc *ecc, int offset, int i,
-				 unsigned or)
-{
-	edma_or(ecc, offset + (i << 2), or);
-}
-
 static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
 				  unsigned or)
 {
@@ -370,11 +364,6 @@ static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
 	edma_write(ecc, offset + ((i * 2 + j) << 2), val);
 }
 
-static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset)
-{
-	return edma_read(ecc, EDMA_SHADOW0 + offset);
-}
-
 static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
 						   int offset, int i)
 {
@@ -393,36 +382,12 @@ static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
 	edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
 }
 
-static inline unsigned int edma_param_read(struct edma_cc *ecc, int offset,
-					   int param_no)
-{
-	return edma_read(ecc, EDMA_PARM + offset + (param_no << 5));
-}
-
-static inline void edma_param_write(struct edma_cc *ecc, int offset,
-				    int param_no, unsigned val)
-{
-	edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val);
-}
-
 static inline void edma_param_modify(struct edma_cc *ecc, int offset,
 				     int param_no, unsigned and, unsigned or)
 {
 	edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
 }
 
-static inline void edma_param_and(struct edma_cc *ecc, int offset, int param_no,
-				  unsigned and)
-{
-	edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and);
-}
-
-static inline void edma_param_or(struct edma_cc *ecc, int offset, int param_no,
-				 unsigned or)
-{
-	edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or);
-}
-
 static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
 					  int priority)
 {
@@ -743,11 +708,6 @@ static void edma_free_channel(struct edma_chan *echan)
 	edma_setup_interrupt(echan, false);
 }
 
-static inline struct edma_cc *to_edma_cc(struct dma_device *d)
-{
-	return container_of(d, struct edma_cc, dma_slave);
-}
-
 static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
 {
 	return container_of(c, struct edma_chan, vchan.chan);
diff --git a/drivers/dma/ti/k3-psil-j7200.c b/drivers/dma/ti/k3-psil-j7200.c
index 5ea63ea74822..e3feff869991 100644
--- a/drivers/dma/ti/k3-psil-j7200.c
+++ b/drivers/dma/ti/k3-psil-j7200.c
@@ -143,6 +143,57 @@ static struct psil_ep j7200_src_ep_map[] = {
 
 /* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
 static struct psil_ep j7200_dst_ep_map[] = {
+	/* PDMA_MCASP - McASP0-2 */
+	PSIL_PDMA_MCASP(0xc400),
+	PSIL_PDMA_MCASP(0xc401),
+	PSIL_PDMA_MCASP(0xc402),
+	/* PDMA_SPI_G0 - SPI0-3 */
+	PSIL_PDMA_XY_PKT(0xc600),
+	PSIL_PDMA_XY_PKT(0xc601),
+	PSIL_PDMA_XY_PKT(0xc602),
+	PSIL_PDMA_XY_PKT(0xc603),
+	PSIL_PDMA_XY_PKT(0xc604),
+	PSIL_PDMA_XY_PKT(0xc605),
+	PSIL_PDMA_XY_PKT(0xc606),
+	PSIL_PDMA_XY_PKT(0xc607),
+	PSIL_PDMA_XY_PKT(0xc608),
+	PSIL_PDMA_XY_PKT(0xc609),
+	PSIL_PDMA_XY_PKT(0xc60a),
+	PSIL_PDMA_XY_PKT(0xc60b),
+	PSIL_PDMA_XY_PKT(0xc60c),
+	PSIL_PDMA_XY_PKT(0xc60d),
+	PSIL_PDMA_XY_PKT(0xc60e),
+	PSIL_PDMA_XY_PKT(0xc60f),
+	/* PDMA_SPI_G1 - SPI4-7 */
+	PSIL_PDMA_XY_PKT(0xc610),
+	PSIL_PDMA_XY_PKT(0xc611),
+	PSIL_PDMA_XY_PKT(0xc612),
+	PSIL_PDMA_XY_PKT(0xc613),
+	PSIL_PDMA_XY_PKT(0xc614),
+	PSIL_PDMA_XY_PKT(0xc615),
+	PSIL_PDMA_XY_PKT(0xc616),
+	PSIL_PDMA_XY_PKT(0xc617),
+	PSIL_PDMA_XY_PKT(0xc618),
+	PSIL_PDMA_XY_PKT(0xc619),
+	PSIL_PDMA_XY_PKT(0xc61a),
+	PSIL_PDMA_XY_PKT(0xc61b),
+	PSIL_PDMA_XY_PKT(0xc61c),
+	PSIL_PDMA_XY_PKT(0xc61d),
+	PSIL_PDMA_XY_PKT(0xc61e),
+	PSIL_PDMA_XY_PKT(0xc61f),
+	/* PDMA_USART_G0 - UART0-1 */
+	PSIL_PDMA_XY_PKT(0xc700),
+	PSIL_PDMA_XY_PKT(0xc701),
+	/* PDMA_USART_G1 - UART2-3 */
+	PSIL_PDMA_XY_PKT(0xc702),
+	PSIL_PDMA_XY_PKT(0xc703),
+	/* PDMA_USART_G2 - UART4-9 */
+	PSIL_PDMA_XY_PKT(0xc704),
+	PSIL_PDMA_XY_PKT(0xc705),
+	PSIL_PDMA_XY_PKT(0xc706),
+	PSIL_PDMA_XY_PKT(0xc707),
+	PSIL_PDMA_XY_PKT(0xc708),
+	PSIL_PDMA_XY_PKT(0xc709),
 	/* CPSW5 */
 	PSIL_ETHERNET(0xca00),
 	PSIL_ETHERNET(0xca01),
@@ -161,6 +212,22 @@ static struct psil_ep j7200_dst_ep_map[] = {
 	PSIL_ETHERNET(0xf005),
 	PSIL_ETHERNET(0xf006),
 	PSIL_ETHERNET(0xf007),
+	/* MCU_PDMA_MISC_G0 - SPI0 */
+	PSIL_PDMA_XY_PKT(0xf100),
+	PSIL_PDMA_XY_PKT(0xf101),
+	PSIL_PDMA_XY_PKT(0xf102),
+	PSIL_PDMA_XY_PKT(0xf103),
+	/* MCU_PDMA_MISC_G1 - SPI1-2 */
+	PSIL_PDMA_XY_PKT(0xf200),
+	PSIL_PDMA_XY_PKT(0xf201),
+	PSIL_PDMA_XY_PKT(0xf202),
+	PSIL_PDMA_XY_PKT(0xf203),
+	PSIL_PDMA_XY_PKT(0xf204),
+	PSIL_PDMA_XY_PKT(0xf205),
+	PSIL_PDMA_XY_PKT(0xf206),
+	PSIL_PDMA_XY_PKT(0xf207),
+	/* MCU_PDMA_MISC_G2 - UART0 */
+	PSIL_PDMA_XY_PKT(0xf300),
 	/* SA2UL */
 	PSIL_SA2UL(0xf500, 1),
 	PSIL_SA2UL(0xf501, 1),
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
index 34e3fc565a37..e7c83d668bb6 100644
--- a/drivers/dma/ti/k3-psil-j721e.c
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -266,6 +266,69 @@ static struct psil_ep j721e_dst_ep_map[] = {
 	PSIL_ETHERNET(0xc205),
 	PSIL_ETHERNET(0xc206),
 	PSIL_ETHERNET(0xc207),
+	/* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */
+	PSIL_PDMA_MCASP(0xc400),
+	PSIL_PDMA_MCASP(0xc401),
+	PSIL_PDMA_MCASP(0xc402),
+	/* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */
+	PSIL_PDMA_MCASP(0xc500),
+	PSIL_PDMA_MCASP(0xc501),
+	PSIL_PDMA_MCASP(0xc502),
+	PSIL_PDMA_MCASP(0xc503),
+	PSIL_PDMA_MCASP(0xc504),
+	PSIL_PDMA_MCASP(0xc505),
+	PSIL_PDMA_MCASP(0xc506),
+	PSIL_PDMA_MCASP(0xc507),
+	PSIL_PDMA_MCASP(0xc508),
+	/* PDMA8 (PDMA_MISC_G0) - SPI0-1 */
+	PSIL_PDMA_XY_PKT(0xc600),
+	PSIL_PDMA_XY_PKT(0xc601),
+	PSIL_PDMA_XY_PKT(0xc602),
+	PSIL_PDMA_XY_PKT(0xc603),
+	PSIL_PDMA_XY_PKT(0xc604),
+	PSIL_PDMA_XY_PKT(0xc605),
+	PSIL_PDMA_XY_PKT(0xc606),
+	PSIL_PDMA_XY_PKT(0xc607),
+	/* PDMA9 (PDMA_MISC_G1) - SPI2-3 */
+	PSIL_PDMA_XY_PKT(0xc60c),
+	PSIL_PDMA_XY_PKT(0xc60d),
+	PSIL_PDMA_XY_PKT(0xc60e),
+	PSIL_PDMA_XY_PKT(0xc60f),
+	PSIL_PDMA_XY_PKT(0xc610),
+	PSIL_PDMA_XY_PKT(0xc611),
+	PSIL_PDMA_XY_PKT(0xc612),
+	PSIL_PDMA_XY_PKT(0xc613),
+	/* PDMA10 (PDMA_MISC_G2) - SPI4-5 */
+	PSIL_PDMA_XY_PKT(0xc618),
+	PSIL_PDMA_XY_PKT(0xc619),
+	PSIL_PDMA_XY_PKT(0xc61a),
+	PSIL_PDMA_XY_PKT(0xc61b),
+	PSIL_PDMA_XY_PKT(0xc61c),
+	PSIL_PDMA_XY_PKT(0xc61d),
+	PSIL_PDMA_XY_PKT(0xc61e),
+	PSIL_PDMA_XY_PKT(0xc61f),
+	/* PDMA11 (PDMA_MISC_G3) */
+	PSIL_PDMA_XY_PKT(0xc624),
+	PSIL_PDMA_XY_PKT(0xc625),
+	PSIL_PDMA_XY_PKT(0xc626),
+	PSIL_PDMA_XY_PKT(0xc627),
+	PSIL_PDMA_XY_PKT(0xc628),
+	PSIL_PDMA_XY_PKT(0xc629),
+	PSIL_PDMA_XY_PKT(0xc630),
+	PSIL_PDMA_XY_PKT(0xc63a),
+	/* PDMA13 (PDMA_USART_G0) - UART0-1 */
+	PSIL_PDMA_XY_PKT(0xc700),
+	PSIL_PDMA_XY_PKT(0xc701),
+	/* PDMA14 (PDMA_USART_G1) - UART2-3 */
+	PSIL_PDMA_XY_PKT(0xc702),
+	PSIL_PDMA_XY_PKT(0xc703),
+	/* PDMA15 (PDMA_USART_G2) - UART4-9 */
+	PSIL_PDMA_XY_PKT(0xc704),
+	PSIL_PDMA_XY_PKT(0xc705),
+	PSIL_PDMA_XY_PKT(0xc706),
+	PSIL_PDMA_XY_PKT(0xc707),
+	PSIL_PDMA_XY_PKT(0xc708),
+	PSIL_PDMA_XY_PKT(0xc709),
 	/* CPSW9 */
 	PSIL_ETHERNET(0xca00),
 	PSIL_ETHERNET(0xca01),
@@ -284,6 +347,22 @@ static struct psil_ep j721e_dst_ep_map[] = {
 	PSIL_ETHERNET(0xf005),
 	PSIL_ETHERNET(0xf006),
 	PSIL_ETHERNET(0xf007),
+	/* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+	PSIL_PDMA_XY_PKT(0xf100),
+	PSIL_PDMA_XY_PKT(0xf101),
+	PSIL_PDMA_XY_PKT(0xf102),
+	PSIL_PDMA_XY_PKT(0xf103),
+	/* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+	PSIL_PDMA_XY_PKT(0xf200),
+	PSIL_PDMA_XY_PKT(0xf201),
+	PSIL_PDMA_XY_PKT(0xf202),
+	PSIL_PDMA_XY_PKT(0xf203),
+	PSIL_PDMA_XY_PKT(0xf204),
+	PSIL_PDMA_XY_PKT(0xf205),
+	PSIL_PDMA_XY_PKT(0xf206),
+	PSIL_PDMA_XY_PKT(0xf207),
+	/* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+	PSIL_PDMA_XY_PKT(0xf300),
 	/* SA2UL */
 	PSIL_SA2UL(0xf500, 1),
 	PSIL_SA2UL(0xf501, 1),
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 2f0d2c68c93c..7b5081989b3d 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -263,6 +263,7 @@ struct udma_chan_config {
 	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
 
 	u32 tr_trigger_type;
+	unsigned long tx_flags;
 
 	/* PKDMA mapped channel */
 	int mapped_channel_id;
@@ -300,8 +301,6 @@ struct udma_chan {
 
 	struct udma_tx_drain tx_drain;
 
-	u32 bcnt; /* number of bytes completed since the start of the channel */
-
 	/* Channel configuration parameters */
 	struct udma_chan_config config;
 
@@ -757,6 +756,20 @@ static void udma_reset_rings(struct udma_chan *uc)
 	}
 }
 
+static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val)
+{
+	if (uc->desc->dir == DMA_DEV_TO_MEM) {
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+	} else {
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+		if (!uc->bchan)
+			udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+	}
+}
+
 static void udma_reset_counters(struct udma_chan *uc)
 {
 	u32 val;
@@ -790,8 +803,6 @@ static void udma_reset_counters(struct udma_chan *uc)
 		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
 		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
 	}
-
-	uc->bcnt = 0;
 }
 
 static int udma_reset_chan(struct udma_chan *uc, bool hard)
@@ -1045,9 +1056,14 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
 {
 	u32 peer_bcnt, bcnt;
 
-	/* Only TX towards PDMA is affected */
+	/*
+	 * Only TX towards PDMA is affected.
+	 * If DMA_PREP_INTERRUPT is not set by consumer then skip the transfer
+	 * completion calculation, consumer must ensure that there is no stale
+	 * data in DMA fabric in this case.
+	 */
 	if (uc->config.ep_type == PSIL_EP_NATIVE ||
-	    uc->config.dir != DMA_MEM_TO_DEV)
+	    uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
 		return true;
 
 	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
@@ -1115,7 +1131,7 @@ static void udma_check_tx_completion(struct work_struct *work)
 		if (uc->desc) {
 			struct udma_desc *d = uc->desc;
 
-			uc->bcnt += d->residue;
+			udma_decrement_byte_counters(uc, d->residue);
 			udma_start(uc);
 			vchan_cookie_complete(&d->vd);
 			break;
@@ -1168,7 +1184,7 @@ static irqreturn_t udma_ring_irq_handler(int irq, void *data)
 				vchan_cyclic_callback(&d->vd);
 			} else {
 				if (udma_is_desc_really_done(uc, d)) {
-					uc->bcnt += d->residue;
+					udma_decrement_byte_counters(uc, d->residue);
 					udma_start(uc);
 					vchan_cookie_complete(&d->vd);
 				} else {
@@ -1204,7 +1220,7 @@ static irqreturn_t udma_udma_irq_handler(int irq, void *data)
 			vchan_cyclic_callback(&d->vd);
 		} else {
 			/* TODO: figure out the real amount of data */
-			uc->bcnt += d->residue;
+			udma_decrement_byte_counters(uc, d->residue);
 			udma_start(uc);
 			vchan_cookie_complete(&d->vd);
 		}
@@ -3408,6 +3424,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (!burst)
 		burst = 1;
 
+	uc->config.tx_flags = tx_flags;
+
 	if (uc->config.pkt_mode)
 		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
 					   context);
@@ -3809,7 +3827,6 @@ static enum dma_status udma_tx_status(struct dma_chan *chan,
 			bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
 		}
 
-		bcnt -= uc->bcnt;
 		if (bcnt && !(bcnt % uc->desc->residue))
 			residue = 0;
 		else
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index 3f4ee3954384..21472a5d7636 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -796,6 +796,17 @@ static int zynqmp_dma_device_terminate_all(struct dma_chan *dchan)
 }
 
 /**
+ * zynqmp_dma_synchronize - Synchronizes the termination of a transfers to the current context.
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_synchronize(struct dma_chan *dchan)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+	tasklet_kill(&chan->tasklet);
+}
+
+/**
  * zynqmp_dma_prep_memcpy - prepare descriptors for memcpy transaction
  * @dchan: DMA channel
  * @dma_dst: Destination buffer address
@@ -1057,6 +1068,7 @@ static int zynqmp_dma_probe(struct platform_device *pdev)
 	p = &zdev->common;
 	p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy;
 	p->device_terminate_all = zynqmp_dma_device_terminate_all;
+	p->device_synchronize = zynqmp_dma_synchronize;
 	p->device_issue_pending = zynqmp_dma_issue_pending;
 	p->device_alloc_chan_resources = zynqmp_dma_alloc_chan_resources;
 	p->device_free_chan_resources = zynqmp_dma_free_chan_resources;
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index a6b7bc707356..77ea602c287c 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -8,11 +8,13 @@
 #ifndef _DMA_HSU_H
 #define _DMA_HSU_H
 
-#include <linux/device.h>
-#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+#include <linux/types.h>
 
 #include <linux/platform_data/dma-hsu.h>
 
+struct device;
 struct hsu_dma;
 
 /**
diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h
index c65b412b2b33..611bae193c1c 100644
--- a/include/linux/platform_data/dma-hsu.h
+++ b/include/linux/platform_data/dma-hsu.h
@@ -8,7 +8,7 @@
 #ifndef _PLATFORM_DATA_DMA_HSU_H
 #define _PLATFORM_DATA_DMA_HSU_H
 
-#include <linux/device.h>
+struct device;
 
 struct hsu_dma_slave {
 	struct device	*dma_dev;