summary refs log tree commit diff
path: root/drivers/spi
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/spi')
-rw-r--r--drivers/spi/Kconfig33
-rw-r--r--drivers/spi/Makefile3
-rw-r--r--drivers/spi/spi-atmel.c728
-rw-r--r--drivers/spi/spi-bcm2835.c422
-rw-r--r--drivers/spi/spi-bcm63xx.c81
-rw-r--r--drivers/spi/spi-fsl-cpm.c387
-rw-r--r--drivers/spi/spi-fsl-cpm.h43
-rw-r--r--drivers/spi/spi-fsl-lib.c8
-rw-r--r--drivers/spi/spi-fsl-lib.h15
-rw-r--r--drivers/spi/spi-fsl-spi.c601
-rw-r--r--drivers/spi/spi-fsl-spi.h72
-rw-r--r--drivers/spi/spi-gpio.c2
-rw-r--r--drivers/spi/spi-mpc512x-psc.c43
-rw-r--r--drivers/spi/spi-mxs.c1
-rw-r--r--drivers/spi/spi-oc-tiny.c4
-rw-r--r--drivers/spi/spi-omap2-mcspi.c43
-rw-r--r--drivers/spi/spi-pxa2xx-pci.c6
-rw-r--r--drivers/spi/spi-pxa2xx.c15
-rw-r--r--drivers/spi/spi-s3c64xx.c333
-rw-r--r--drivers/spi/spi-sh-msiof.c4
-rw-r--r--drivers/spi/spi-sirf.c2
-rw-r--r--drivers/spi/spi-tegra114.c1246
-rw-r--r--drivers/spi/spi-tegra20-sflash.c41
-rw-r--r--drivers/spi/spi-tegra20-slink.c109
-rw-r--r--drivers/spi/spi-topcliff-pch.c16
-rw-r--r--drivers/spi/spi.c25
-rw-r--r--drivers/spi/spidev.c2
27 files changed, 3417 insertions, 868 deletions
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f80eee74a311..141d8c10b764 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -55,6 +55,7 @@ comment "SPI Master Controller Drivers"
 
 config SPI_ALTERA
 	tristate "Altera SPI Controller"
+	depends on GENERIC_HARDIRQS
 	select SPI_BITBANG
 	help
 	  This is the driver for the Altera SPI Controller.
@@ -74,6 +75,17 @@ config SPI_ATMEL
 	  This selects a driver for the Atmel SPI Controller, present on
 	  many AT32 (AVR32) and AT91 (ARM) chips.
 
+config SPI_BCM2835
+	tristate "BCM2835 SPI controller"
+	depends on ARCH_BCM2835
+	help
+	  This selects a driver for the Broadcom BCM2835 SPI master.
+
+	  The BCM2835 contains two types of SPI master controller; the
+	  "universal SPI master", and the regular SPI controller. This driver
+	  is for the regular SPI controller. Slave mode operation is not also
+	  not supported.
+
 config SPI_BFIN5XX
 	tristate "SPI controller driver for ADI Blackfin5xx"
 	depends on BLACKFIN
@@ -218,16 +230,23 @@ config SPI_MPC512x_PSC
 
 config SPI_FSL_LIB
 	tristate
+	depends on OF
+
+config SPI_FSL_CPM
+	tristate
 	depends on FSL_SOC
 
 config SPI_FSL_SPI
-	bool "Freescale SPI controller"
-	depends on FSL_SOC
+	bool "Freescale SPI controller and Aeroflex Gaisler GRLIB SPI controller"
+	depends on OF
 	select SPI_FSL_LIB
+	select SPI_FSL_CPM if FSL_SOC
 	help
 	  This enables using the Freescale SPI controllers in master mode.
 	  MPC83xx platform uses the controller in cpu mode or CPM/QE mode.
 	  MPC8569 uses the controller in QE mode, MPC8610 in cpu mode.
+	  This also enables using the Aeroflex Gaisler GRLIB SPI controller in
+	  master mode.
 
 config SPI_FSL_ESPI
 	bool "Freescale eSPI controller"
@@ -310,7 +329,7 @@ config SPI_PXA2XX_DMA
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
-	depends on ARCH_PXA || PCI || ACPI
+	depends on (ARCH_PXA || PCI || ACPI) && GENERIC_HARDIRQS
 	select PXA_SSP if ARCH_PXA
 	help
 	  This enables using a PXA2xx or Sodaville SSP port as a SPI master
@@ -397,6 +416,14 @@ config SPI_MXS
 	help
 	  SPI driver for Freescale MXS devices.
 
+config SPI_TEGRA114
+	tristate "NVIDIA Tegra114 SPI Controller"
+	depends on ARCH_TEGRA && TEGRA20_APB_DMA
+	help
+	  SPI driver for NVIDIA Tegra114 SPI Controller interface. This controller
+	  is different than the older SoCs SPI controller and also register interface
+	  get changed with this controller.
+
 config SPI_TEGRA20_SFLASH
 	tristate "Nvidia Tegra20 Serial flash Controller"
 	depends on ARCH_TEGRA
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index e53c30941340..33f9c09561e7 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_SPI_ALTERA)		+= spi-altera.o
 obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
 obj-$(CONFIG_SPI_ATH79)			+= spi-ath79.o
 obj-$(CONFIG_SPI_AU1550)		+= spi-au1550.o
+obj-$(CONFIG_SPI_BCM2835)		+= spi-bcm2835.o
 obj-$(CONFIG_SPI_BCM63XX)		+= spi-bcm63xx.o
 obj-$(CONFIG_SPI_BFIN5XX)		+= spi-bfin5xx.o
 obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi-bfin-sport.o
@@ -28,6 +29,7 @@ obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-midpci.o
 spi-dw-midpci-objs			:= spi-dw-pci.o spi-dw-mid.o
 obj-$(CONFIG_SPI_EP93XX)		+= spi-ep93xx.o
 obj-$(CONFIG_SPI_FALCON)		+= spi-falcon.o
+obj-$(CONFIG_SPI_FSL_CPM)		+= spi-fsl-cpm.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
@@ -63,6 +65,7 @@ obj-$(CONFIG_SPI_SH_HSPI)		+= spi-sh-hspi.o
 obj-$(CONFIG_SPI_SH_MSIOF)		+= spi-sh-msiof.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi-sh-sci.o
 obj-$(CONFIG_SPI_SIRF)		+= spi-sirf.o
+obj-$(CONFIG_SPI_TEGRA114)		+= spi-tegra114.o
 obj-$(CONFIG_SPI_TEGRA20_SFLASH)	+= spi-tegra20-sflash.o
 obj-$(CONFIG_SPI_TEGRA20_SLINK)		+= spi-tegra20-slink.o
 obj-$(CONFIG_SPI_TI_SSP)		+= spi-ti-ssp.o
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 656d137db253..787bd2c22bca 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -15,16 +15,17 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
 #include <linux/platform_data/atmel.h>
+#include <linux/platform_data/dma-atmel.h>
 #include <linux/of.h>
 
-#include <asm/io.h>
-#include <asm/gpio.h>
-#include <mach/cpu.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
 
 /* SPI register offsets */
 #define SPI_CR					0x0000
@@ -39,6 +40,7 @@
 #define SPI_CSR1				0x0034
 #define SPI_CSR2				0x0038
 #define SPI_CSR3				0x003c
+#define SPI_VERSION				0x00fc
 #define SPI_RPR					0x0100
 #define SPI_RCR					0x0104
 #define SPI_TPR					0x0108
@@ -71,6 +73,8 @@
 #define SPI_FDIV_SIZE				1
 #define SPI_MODFDIS_OFFSET			4
 #define SPI_MODFDIS_SIZE			1
+#define SPI_WDRBT_OFFSET			5
+#define SPI_WDRBT_SIZE				1
 #define SPI_LLB_OFFSET				7
 #define SPI_LLB_SIZE				1
 #define SPI_PCS_OFFSET				16
@@ -180,6 +184,27 @@
 #define spi_writel(port,reg,value) \
 	__raw_writel((value), (port)->regs + SPI_##reg)
 
+/* use PIO for small transfers, avoiding DMA setup/teardown overhead and
+ * cache operations; better heuristics consider wordsize and bitrate.
+ */
+#define DMA_MIN_BYTES	16
+
+struct atmel_spi_dma {
+	struct dma_chan			*chan_rx;
+	struct dma_chan			*chan_tx;
+	struct scatterlist		sgrx;
+	struct scatterlist		sgtx;
+	struct dma_async_tx_descriptor	*data_desc_rx;
+	struct dma_async_tx_descriptor	*data_desc_tx;
+
+	struct at_dma_slave	dma_slave;
+};
+
+struct atmel_spi_caps {
+	bool	is_spi2;
+	bool	has_wdrbt;
+	bool	has_dma_support;
+};
 
 /*
  * The core SPI transfer engine just talks to a register bank to set up
@@ -188,7 +213,9 @@
  */
 struct atmel_spi {
 	spinlock_t		lock;
+	unsigned long		flags;
 
+	phys_addr_t		phybase;
 	void __iomem		*regs;
 	int			irq;
 	struct clk		*clk;
@@ -197,13 +224,23 @@ struct atmel_spi {
 
 	u8			stopping;
 	struct list_head	queue;
+	struct tasklet_struct	tasklet;
 	struct spi_transfer	*current_transfer;
 	unsigned long		current_remaining_bytes;
 	struct spi_transfer	*next_transfer;
 	unsigned long		next_remaining_bytes;
+	int			done_status;
 
+	/* scratch buffer */
 	void			*buffer;
 	dma_addr_t		buffer_dma;
+
+	struct atmel_spi_caps	caps;
+
+	bool			use_dma;
+	bool			use_pdc;
+	/* dmaengine data */
+	struct atmel_spi_dma	dma;
 };
 
 /* Controller-specific per-slave state */
@@ -222,14 +259,10 @@ struct atmel_spi_device {
  *  - SPI_SR.TXEMPTY, SPI_SR.NSSR (and corresponding irqs)
  *  - SPI_CSRx.CSAAT
  *  - SPI_CSRx.SBCR allows faster clocking
- *
- * We can determine the controller version by reading the VERSION
- * register, but I haven't checked that it exists on all chips, and
- * this is cheaper anyway.
  */
-static bool atmel_spi_is_v2(void)
+static bool atmel_spi_is_v2(struct atmel_spi *as)
 {
-	return !cpu_is_at91rm9200();
+	return as->caps.is_spi2;
 }
 
 /*
@@ -250,11 +283,6 @@ static bool atmel_spi_is_v2(void)
  * Master on Chip Select 0.")  No workaround exists for that ... so for
  * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH,
  * and (c) will trigger that first erratum in some cases.
- *
- * TODO: Test if the atmel_spi_is_v2() branch below works on
- * AT91RM9200 if we use some other register than CSR0. However, don't
- * do this unconditionally since AP7000 has an errata where the BITS
- * field in CSR0 overrides all other CSRs.
  */
 
 static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
@@ -263,15 +291,25 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
 	unsigned active = spi->mode & SPI_CS_HIGH;
 	u32 mr;
 
-	if (atmel_spi_is_v2()) {
-		/*
-		 * Always use CSR0. This ensures that the clock
-		 * switches to the correct idle polarity before we
-		 * toggle the CS.
+	if (atmel_spi_is_v2(as)) {
+		spi_writel(as, CSR0 + 4 * spi->chip_select, asd->csr);
+		/* For the low SPI version, there is a issue that PDC transfer
+		 * on CS1,2,3 needs SPI_CSR0.BITS config as SPI_CSR1,2,3.BITS
 		 */
 		spi_writel(as, CSR0, asd->csr);
-		spi_writel(as, MR, SPI_BF(PCS, 0x0e) | SPI_BIT(MODFDIS)
-				| SPI_BIT(MSTR));
+		if (as->caps.has_wdrbt) {
+			spi_writel(as, MR,
+					SPI_BF(PCS, ~(0x01 << spi->chip_select))
+					| SPI_BIT(WDRBT)
+					| SPI_BIT(MODFDIS)
+					| SPI_BIT(MSTR));
+		} else {
+			spi_writel(as, MR,
+					SPI_BF(PCS, ~(0x01 << spi->chip_select))
+					| SPI_BIT(MODFDIS)
+					| SPI_BIT(MSTR));
+		}
+
 		mr = spi_readl(as, MR);
 		gpio_set_value(asd->npcs_pin, active);
 	} else {
@@ -318,10 +356,26 @@ static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
 			asd->npcs_pin, active ? " (low)" : "",
 			mr);
 
-	if (atmel_spi_is_v2() || spi->chip_select != 0)
+	if (atmel_spi_is_v2(as) || spi->chip_select != 0)
 		gpio_set_value(asd->npcs_pin, !active);
 }
 
+static void atmel_spi_lock(struct atmel_spi *as)
+{
+	spin_lock_irqsave(&as->lock, as->flags);
+}
+
+static void atmel_spi_unlock(struct atmel_spi *as)
+{
+	spin_unlock_irqrestore(&as->lock, as->flags);
+}
+
+static inline bool atmel_spi_use_dma(struct atmel_spi *as,
+				struct spi_transfer *xfer)
+{
+	return as->use_dma && xfer->len >= DMA_MIN_BYTES;
+}
+
 static inline int atmel_spi_xfer_is_last(struct spi_message *msg,
 					struct spi_transfer *xfer)
 {
@@ -333,6 +387,265 @@ static inline int atmel_spi_xfer_can_be_chained(struct spi_transfer *xfer)
 	return xfer->delay_usecs == 0 && !xfer->cs_change;
 }
 
+static int atmel_spi_dma_slave_config(struct atmel_spi *as,
+				struct dma_slave_config *slave_config,
+				u8 bits_per_word)
+{
+	int err = 0;
+
+	if (bits_per_word > 8) {
+		slave_config->dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		slave_config->src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	} else {
+		slave_config->dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		slave_config->src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	}
+
+	slave_config->dst_addr = (dma_addr_t)as->phybase + SPI_TDR;
+	slave_config->src_addr = (dma_addr_t)as->phybase + SPI_RDR;
+	slave_config->src_maxburst = 1;
+	slave_config->dst_maxburst = 1;
+	slave_config->device_fc = false;
+
+	slave_config->direction = DMA_MEM_TO_DEV;
+	if (dmaengine_slave_config(as->dma.chan_tx, slave_config)) {
+		dev_err(&as->pdev->dev,
+			"failed to configure tx dma channel\n");
+		err = -EINVAL;
+	}
+
+	slave_config->direction = DMA_DEV_TO_MEM;
+	if (dmaengine_slave_config(as->dma.chan_rx, slave_config)) {
+		dev_err(&as->pdev->dev,
+			"failed to configure rx dma channel\n");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static bool filter(struct dma_chan *chan, void *slave)
+{
+	struct	at_dma_slave *sl = slave;
+
+	if (sl->dma_dev == chan->device->dev) {
+		chan->private = sl;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static int atmel_spi_configure_dma(struct atmel_spi *as)
+{
+	struct at_dma_slave *sdata = &as->dma.dma_slave;
+	struct dma_slave_config	slave_config;
+	int err;
+
+	if (sdata && sdata->dma_dev) {
+		dma_cap_mask_t mask;
+
+		/* Try to grab two DMA channels */
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+		as->dma.chan_tx = dma_request_channel(mask, filter, sdata);
+		if (as->dma.chan_tx)
+			as->dma.chan_rx =
+				dma_request_channel(mask, filter, sdata);
+	}
+	if (!as->dma.chan_rx || !as->dma.chan_tx) {
+		dev_err(&as->pdev->dev,
+			"DMA channel not available, SPI unable to use DMA\n");
+		err = -EBUSY;
+		goto error;
+	}
+
+	err = atmel_spi_dma_slave_config(as, &slave_config, 8);
+	if (err)
+		goto error;
+
+	dev_info(&as->pdev->dev,
+			"Using %s (tx) and %s (rx) for DMA transfers\n",
+			dma_chan_name(as->dma.chan_tx),
+			dma_chan_name(as->dma.chan_rx));
+	return 0;
+error:
+	if (as->dma.chan_rx)
+		dma_release_channel(as->dma.chan_rx);
+	if (as->dma.chan_tx)
+		dma_release_channel(as->dma.chan_tx);
+	return err;
+}
+
+static void atmel_spi_stop_dma(struct atmel_spi *as)
+{
+	if (as->dma.chan_rx)
+		as->dma.chan_rx->device->device_control(as->dma.chan_rx,
+							DMA_TERMINATE_ALL, 0);
+	if (as->dma.chan_tx)
+		as->dma.chan_tx->device->device_control(as->dma.chan_tx,
+							DMA_TERMINATE_ALL, 0);
+}
+
+static void atmel_spi_release_dma(struct atmel_spi *as)
+{
+	if (as->dma.chan_rx)
+		dma_release_channel(as->dma.chan_rx);
+	if (as->dma.chan_tx)
+		dma_release_channel(as->dma.chan_tx);
+}
+
+/* This function is called by the DMA driver from tasklet context */
+static void dma_callback(void *data)
+{
+	struct spi_master	*master = data;
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+
+	/* trigger SPI tasklet */
+	tasklet_schedule(&as->tasklet);
+}
+
+/*
+ * Next transfer using PIO.
+ * lock is held, spi tasklet is blocked
+ */
+static void atmel_spi_next_xfer_pio(struct spi_master *master,
+				struct spi_transfer *xfer)
+{
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+
+	dev_vdbg(master->dev.parent, "atmel_spi_next_xfer_pio\n");
+
+	as->current_remaining_bytes = xfer->len;
+
+	/* Make sure data is not remaining in RDR */
+	spi_readl(as, RDR);
+	while (spi_readl(as, SR) & SPI_BIT(RDRF)) {
+		spi_readl(as, RDR);
+		cpu_relax();
+	}
+
+	if (xfer->tx_buf)
+		spi_writel(as, TDR, *(u8 *)(xfer->tx_buf));
+	else
+		spi_writel(as, TDR, 0);
+
+	dev_dbg(master->dev.parent,
+		"  start pio xfer %p: len %u tx %p rx %p\n",
+		xfer, xfer->len, xfer->tx_buf, xfer->rx_buf);
+
+	/* Enable relevant interrupts */
+	spi_writel(as, IER, SPI_BIT(RDRF) | SPI_BIT(OVRES));
+}
+
+/*
+ * Submit next transfer for DMA.
+ * lock is held, spi tasklet is blocked
+ */
+static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
+				struct spi_transfer *xfer,
+				u32 *plen)
+{
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+	struct dma_chan		*rxchan = as->dma.chan_rx;
+	struct dma_chan		*txchan = as->dma.chan_tx;
+	struct dma_async_tx_descriptor *rxdesc;
+	struct dma_async_tx_descriptor *txdesc;
+	struct dma_slave_config	slave_config;
+	dma_cookie_t		cookie;
+	u32	len = *plen;
+
+	dev_vdbg(master->dev.parent, "atmel_spi_next_xfer_dma_submit\n");
+
+	/* Check that the channels are available */
+	if (!rxchan || !txchan)
+		return -ENODEV;
+
+	/* release lock for DMA operations */
+	atmel_spi_unlock(as);
+
+	/* prepare the RX dma transfer */
+	sg_init_table(&as->dma.sgrx, 1);
+	if (xfer->rx_buf) {
+		as->dma.sgrx.dma_address = xfer->rx_dma + xfer->len - *plen;
+	} else {
+		as->dma.sgrx.dma_address = as->buffer_dma;
+		if (len > BUFFER_SIZE)
+			len = BUFFER_SIZE;
+	}
+
+	/* prepare the TX dma transfer */
+	sg_init_table(&as->dma.sgtx, 1);
+	if (xfer->tx_buf) {
+		as->dma.sgtx.dma_address = xfer->tx_dma + xfer->len - *plen;
+	} else {
+		as->dma.sgtx.dma_address = as->buffer_dma;
+		if (len > BUFFER_SIZE)
+			len = BUFFER_SIZE;
+		memset(as->buffer, 0, len);
+	}
+
+	sg_dma_len(&as->dma.sgtx) = len;
+	sg_dma_len(&as->dma.sgrx) = len;
+
+	*plen = len;
+
+	if (atmel_spi_dma_slave_config(as, &slave_config, 8))
+		goto err_exit;
+
+	/* Send both scatterlists */
+	rxdesc = rxchan->device->device_prep_slave_sg(rxchan,
+					&as->dma.sgrx,
+					1,
+					DMA_FROM_DEVICE,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK,
+					NULL);
+	if (!rxdesc)
+		goto err_dma;
+
+	txdesc = txchan->device->device_prep_slave_sg(txchan,
+					&as->dma.sgtx,
+					1,
+					DMA_TO_DEVICE,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK,
+					NULL);
+	if (!txdesc)
+		goto err_dma;
+
+	dev_dbg(master->dev.parent,
+		"  start dma xfer %p: len %u tx %p/%08x rx %p/%08x\n",
+		xfer, xfer->len, xfer->tx_buf, xfer->tx_dma,
+		xfer->rx_buf, xfer->rx_dma);
+
+	/* Enable relevant interrupts */
+	spi_writel(as, IER, SPI_BIT(OVRES));
+
+	/* Put the callback on the RX transfer only, that should finish last */
+	rxdesc->callback = dma_callback;
+	rxdesc->callback_param = master;
+
+	/* Submit and fire RX and TX with TX last so we're ready to read! */
+	cookie = rxdesc->tx_submit(rxdesc);
+	if (dma_submit_error(cookie))
+		goto err_dma;
+	cookie = txdesc->tx_submit(txdesc);
+	if (dma_submit_error(cookie))
+		goto err_dma;
+	rxchan->device->device_issue_pending(rxchan);
+	txchan->device->device_issue_pending(txchan);
+
+	/* take back lock */
+	atmel_spi_lock(as);
+	return 0;
+
+err_dma:
+	spi_writel(as, IDR, SPI_BIT(OVRES));
+	atmel_spi_stop_dma(as);
+err_exit:
+	atmel_spi_lock(as);
+	return -ENOMEM;
+}
+
 static void atmel_spi_next_xfer_data(struct spi_master *master,
 				struct spi_transfer *xfer,
 				dma_addr_t *tx_dma,
@@ -350,6 +663,7 @@ static void atmel_spi_next_xfer_data(struct spi_master *master,
 		if (len > BUFFER_SIZE)
 			len = BUFFER_SIZE;
 	}
+
 	if (xfer->tx_buf)
 		*tx_dma = xfer->tx_dma + xfer->len - *plen;
 	else {
@@ -365,10 +679,10 @@ static void atmel_spi_next_xfer_data(struct spi_master *master,
 }
 
 /*
- * Submit next transfer for DMA.
+ * Submit next transfer for PDC.
  * lock is held, spi irq is blocked
  */
-static void atmel_spi_next_xfer(struct spi_master *master,
+static void atmel_spi_pdc_next_xfer(struct spi_master *master,
 				struct spi_message *msg)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
@@ -465,6 +779,48 @@ static void atmel_spi_next_xfer(struct spi_master *master,
 	spi_writel(as, PTCR, SPI_BIT(TXTEN) | SPI_BIT(RXTEN));
 }
 
+/*
+ * Choose way to submit next transfer and start it.
+ * lock is held, spi tasklet is blocked
+ */
+static void atmel_spi_dma_next_xfer(struct spi_master *master,
+				struct spi_message *msg)
+{
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+	struct spi_transfer	*xfer;
+	u32	remaining, len;
+
+	remaining = as->current_remaining_bytes;
+	if (remaining) {
+		xfer = as->current_transfer;
+		len = remaining;
+	} else {
+		if (!as->current_transfer)
+			xfer = list_entry(msg->transfers.next,
+				struct spi_transfer, transfer_list);
+		else
+			xfer = list_entry(
+				as->current_transfer->transfer_list.next,
+					struct spi_transfer, transfer_list);
+
+		as->current_transfer = xfer;
+		len = xfer->len;
+	}
+
+	if (atmel_spi_use_dma(as, xfer)) {
+		u32 total = len;
+		if (!atmel_spi_next_xfer_dma_submit(master, xfer, &len)) {
+			as->current_remaining_bytes = total - len;
+			return;
+		} else {
+			dev_err(&msg->spi->dev, "unable to use DMA, fallback to PIO\n");
+		}
+	}
+
+	/* use PIO if error appened using DMA */
+	atmel_spi_next_xfer_pio(master, xfer);
+}
+
 static void atmel_spi_next_message(struct spi_master *master)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
@@ -489,7 +845,10 @@ static void atmel_spi_next_message(struct spi_master *master)
 	} else
 		cs_activate(as, spi);
 
-	atmel_spi_next_xfer(master, msg);
+	if (as->use_pdc)
+		atmel_spi_pdc_next_xfer(master, msg);
+	else
+		atmel_spi_dma_next_xfer(master, msg);
 }
 
 /*
@@ -542,38 +901,213 @@ static void atmel_spi_dma_unmap_xfer(struct spi_master *master,
 				 xfer->len, DMA_FROM_DEVICE);
 }
 
+static void atmel_spi_disable_pdc_transfer(struct atmel_spi *as)
+{
+	spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
+}
+
 static void
 atmel_spi_msg_done(struct spi_master *master, struct atmel_spi *as,
-		struct spi_message *msg, int status, int stay)
+		struct spi_message *msg, int stay)
 {
-	if (!stay || status < 0)
+	if (!stay || as->done_status < 0)
 		cs_deactivate(as, msg->spi);
 	else
 		as->stay = msg->spi;
 
 	list_del(&msg->queue);
-	msg->status = status;
+	msg->status = as->done_status;
 
 	dev_dbg(master->dev.parent,
 		"xfer complete: %u bytes transferred\n",
 		msg->actual_length);
 
-	spin_unlock(&as->lock);
+	atmel_spi_unlock(as);
 	msg->complete(msg->context);
-	spin_lock(&as->lock);
+	atmel_spi_lock(as);
 
 	as->current_transfer = NULL;
 	as->next_transfer = NULL;
+	as->done_status = 0;
 
 	/* continue if needed */
-	if (list_empty(&as->queue) || as->stopping)
-		spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
-	else
+	if (list_empty(&as->queue) || as->stopping) {
+		if (as->use_pdc)
+			atmel_spi_disable_pdc_transfer(as);
+	} else {
 		atmel_spi_next_message(master);
+	}
+}
+
+/* Called from IRQ
+ * lock is held
+ *
+ * Must update "current_remaining_bytes" to keep track of data
+ * to transfer.
+ */
+static void
+atmel_spi_pump_pio_data(struct atmel_spi *as, struct spi_transfer *xfer)
+{
+	u8		*txp;
+	u8		*rxp;
+	unsigned long	xfer_pos = xfer->len - as->current_remaining_bytes;
+
+	if (xfer->rx_buf) {
+		rxp = ((u8 *)xfer->rx_buf) + xfer_pos;
+		*rxp = spi_readl(as, RDR);
+	} else {
+		spi_readl(as, RDR);
+	}
+
+	as->current_remaining_bytes--;
+
+	if (as->current_remaining_bytes) {
+		if (xfer->tx_buf) {
+			txp = ((u8 *)xfer->tx_buf) + xfer_pos + 1;
+			spi_writel(as, TDR, *txp);
+		} else {
+			spi_writel(as, TDR, 0);
+		}
+	}
+}
+
+/* Tasklet
+ * Called from DMA callback + pio transfer and overrun IRQ.
+ */
+static void atmel_spi_tasklet_func(unsigned long data)
+{
+	struct spi_master	*master = (struct spi_master *)data;
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+	struct spi_message	*msg;
+	struct spi_transfer	*xfer;
+
+	dev_vdbg(master->dev.parent, "atmel_spi_tasklet_func\n");
+
+	atmel_spi_lock(as);
+
+	xfer = as->current_transfer;
+
+	if (xfer == NULL)
+		/* already been there */
+		goto tasklet_out;
+
+	msg = list_entry(as->queue.next, struct spi_message, queue);
+
+	if (as->current_remaining_bytes == 0) {
+		if (as->done_status < 0) {
+			/* error happened (overrun) */
+			if (atmel_spi_use_dma(as, xfer))
+				atmel_spi_stop_dma(as);
+		} else {
+			/* only update length if no error */
+			msg->actual_length += xfer->len;
+		}
+
+		if (atmel_spi_use_dma(as, xfer))
+			if (!msg->is_dma_mapped)
+				atmel_spi_dma_unmap_xfer(master, xfer);
+
+		if (xfer->delay_usecs)
+			udelay(xfer->delay_usecs);
+
+		if (atmel_spi_xfer_is_last(msg, xfer) || as->done_status < 0) {
+			/* report completed (or erroneous) message */
+			atmel_spi_msg_done(master, as, msg, xfer->cs_change);
+		} else {
+			if (xfer->cs_change) {
+				cs_deactivate(as, msg->spi);
+				udelay(1);
+				cs_activate(as, msg->spi);
+			}
+
+			/*
+			 * Not done yet. Submit the next transfer.
+			 *
+			 * FIXME handle protocol options for xfer
+			 */
+			atmel_spi_dma_next_xfer(master, msg);
+		}
+	} else {
+		/*
+		 * Keep going, we still have data to send in
+		 * the current transfer.
+		 */
+		atmel_spi_dma_next_xfer(master, msg);
+	}
+
+tasklet_out:
+	atmel_spi_unlock(as);
 }
 
+/* Interrupt
+ *
+ * No need for locking in this Interrupt handler: done_status is the
+ * only information modified. What we need is the update of this field
+ * before tasklet runs. This is ensured by using barrier.
+ */
 static irqreturn_t
-atmel_spi_interrupt(int irq, void *dev_id)
+atmel_spi_pio_interrupt(int irq, void *dev_id)
+{
+	struct spi_master	*master = dev_id;
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+	u32			status, pending, imr;
+	struct spi_transfer	*xfer;
+	int			ret = IRQ_NONE;
+
+	imr = spi_readl(as, IMR);
+	status = spi_readl(as, SR);
+	pending = status & imr;
+
+	if (pending & SPI_BIT(OVRES)) {
+		ret = IRQ_HANDLED;
+		spi_writel(as, IDR, SPI_BIT(OVRES));
+		dev_warn(master->dev.parent, "overrun\n");
+
+		/*
+		 * When we get an overrun, we disregard the current
+		 * transfer. Data will not be copied back from any
+		 * bounce buffer and msg->actual_len will not be
+		 * updated with the last xfer.
+		 *
+		 * We will also not process any remaning transfers in
+		 * the message.
+		 *
+		 * All actions are done in tasklet with done_status indication
+		 */
+		as->done_status = -EIO;
+		smp_wmb();
+
+		/* Clear any overrun happening while cleaning up */
+		spi_readl(as, SR);
+
+		tasklet_schedule(&as->tasklet);
+
+	} else if (pending & SPI_BIT(RDRF)) {
+		atmel_spi_lock(as);
+
+		if (as->current_remaining_bytes) {
+			ret = IRQ_HANDLED;
+			xfer = as->current_transfer;
+			atmel_spi_pump_pio_data(as, xfer);
+			if (!as->current_remaining_bytes) {
+				/* no more data to xfer, kick tasklet */
+				spi_writel(as, IDR, pending);
+				tasklet_schedule(&as->tasklet);
+			}
+		}
+
+		atmel_spi_unlock(as);
+	} else {
+		WARN_ONCE(pending, "IRQ not handled, pending = %x\n", pending);
+		ret = IRQ_HANDLED;
+		spi_writel(as, IDR, pending);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+atmel_spi_pdc_interrupt(int irq, void *dev_id)
 {
 	struct spi_master	*master = dev_id;
 	struct atmel_spi	*as = spi_master_get_devdata(master);
@@ -582,7 +1116,7 @@ atmel_spi_interrupt(int irq, void *dev_id)
 	u32			status, pending, imr;
 	int			ret = IRQ_NONE;
 
-	spin_lock(&as->lock);
+	atmel_spi_lock(as);
 
 	xfer = as->current_transfer;
 	msg = list_entry(as->queue.next, struct spi_message, queue);
@@ -641,7 +1175,8 @@ atmel_spi_interrupt(int irq, void *dev_id)
 		/* Clear any overrun happening while cleaning up */
 		spi_readl(as, SR);
 
-		atmel_spi_msg_done(master, as, msg, -EIO, 0);
+		as->done_status = -EIO;
+		atmel_spi_msg_done(master, as, msg, 0);
 	} else if (pending & (SPI_BIT(RXBUFF) | SPI_BIT(ENDRX))) {
 		ret = IRQ_HANDLED;
 
@@ -659,7 +1194,7 @@ atmel_spi_interrupt(int irq, void *dev_id)
 
 			if (atmel_spi_xfer_is_last(msg, xfer)) {
 				/* report completed message */
-				atmel_spi_msg_done(master, as, msg, 0,
+				atmel_spi_msg_done(master, as, msg,
 						xfer->cs_change);
 			} else {
 				if (xfer->cs_change) {
@@ -673,18 +1208,18 @@ atmel_spi_interrupt(int irq, void *dev_id)
 				 *
 				 * FIXME handle protocol options for xfer
 				 */
-				atmel_spi_next_xfer(master, msg);
+				atmel_spi_pdc_next_xfer(master, msg);
 			}
 		} else {
 			/*
 			 * Keep going, we still have data to send in
 			 * the current transfer.
 			 */
-			atmel_spi_next_xfer(master, msg);
+			atmel_spi_pdc_next_xfer(master, msg);
 		}
 	}
 
-	spin_unlock(&as->lock);
+	atmel_spi_unlock(as);
 
 	return ret;
 }
@@ -719,7 +1254,7 @@ static int atmel_spi_setup(struct spi_device *spi)
 	}
 
 	/* see notes above re chipselect */
-	if (!atmel_spi_is_v2()
+	if (!atmel_spi_is_v2(as)
 			&& spi->chip_select == 0
 			&& (spi->mode & SPI_CS_HIGH)) {
 		dev_dbg(&spi->dev, "setup: can't be active-high\n");
@@ -728,7 +1263,7 @@ static int atmel_spi_setup(struct spi_device *spi)
 
 	/* v1 chips start out at half the peripheral bus speed. */
 	bus_hz = clk_get_rate(as->clk);
-	if (!atmel_spi_is_v2())
+	if (!atmel_spi_is_v2(as))
 		bus_hz /= 2;
 
 	if (spi->max_speed_hz) {
@@ -789,13 +1324,11 @@ static int atmel_spi_setup(struct spi_device *spi)
 		spi->controller_state = asd;
 		gpio_direction_output(npcs_pin, !(spi->mode & SPI_CS_HIGH));
 	} else {
-		unsigned long		flags;
-
-		spin_lock_irqsave(&as->lock, flags);
+		atmel_spi_lock(as);
 		if (as->stay == spi)
 			as->stay = NULL;
 		cs_deactivate(as, spi);
-		spin_unlock_irqrestore(&as->lock, flags);
+		atmel_spi_unlock(as);
 	}
 
 	asd->csr = csr;
@@ -804,7 +1337,7 @@ static int atmel_spi_setup(struct spi_device *spi)
 		"setup: %lu Hz bpw %u mode 0x%x -> csr%d %08x\n",
 		bus_hz / scbr, bits, spi->mode, spi->chip_select, csr);
 
-	if (!atmel_spi_is_v2())
+	if (!atmel_spi_is_v2(as))
 		spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
 
 	return 0;
@@ -814,7 +1347,6 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
 {
 	struct atmel_spi	*as;
 	struct spi_transfer	*xfer;
-	unsigned long		flags;
 	struct device		*controller = spi->master->dev.parent;
 	u8			bits;
 	struct atmel_spi_device	*asd;
@@ -854,13 +1386,10 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
 
 		/*
 		 * DMA map early, for performance (empties dcache ASAP) and
-		 * better fault reporting.  This is a DMA-only driver.
-		 *
-		 * NOTE that if dma_unmap_single() ever starts to do work on
-		 * platforms supported by this driver, we would need to clean
-		 * up mappings for previously-mapped transfers.
+		 * better fault reporting.
 		 */
-		if (!msg->is_dma_mapped) {
+		if ((!msg->is_dma_mapped) && (atmel_spi_use_dma(as, xfer)
+			|| as->use_pdc)) {
 			if (atmel_spi_dma_map_xfer(as, xfer) < 0)
 				return -ENOMEM;
 		}
@@ -879,11 +1408,11 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
 	msg->status = -EINPROGRESS;
 	msg->actual_length = 0;
 
-	spin_lock_irqsave(&as->lock, flags);
+	atmel_spi_lock(as);
 	list_add_tail(&msg->queue, &as->queue);
 	if (!as->current_transfer)
 		atmel_spi_next_message(spi->master);
-	spin_unlock_irqrestore(&as->lock, flags);
+	atmel_spi_unlock(as);
 
 	return 0;
 }
@@ -893,23 +1422,39 @@ static void atmel_spi_cleanup(struct spi_device *spi)
 	struct atmel_spi	*as = spi_master_get_devdata(spi->master);
 	struct atmel_spi_device	*asd = spi->controller_state;
 	unsigned		gpio = (unsigned) spi->controller_data;
-	unsigned long		flags;
 
 	if (!asd)
 		return;
 
-	spin_lock_irqsave(&as->lock, flags);
+	atmel_spi_lock(as);
 	if (as->stay == spi) {
 		as->stay = NULL;
 		cs_deactivate(as, spi);
 	}
-	spin_unlock_irqrestore(&as->lock, flags);
+	atmel_spi_unlock(as);
 
 	spi->controller_state = NULL;
 	gpio_free(gpio);
 	kfree(asd);
 }
 
+static inline unsigned int atmel_get_version(struct atmel_spi *as)
+{
+	return spi_readl(as, VERSION) & 0x00000fff;
+}
+
+static void atmel_get_caps(struct atmel_spi *as)
+{
+	unsigned int version;
+
+	version = atmel_get_version(as);
+	dev_info(&as->pdev->dev, "version: 0x%x\n", version);
+
+	as->caps.is_spi2 = version > 0x121;
+	as->caps.has_wdrbt = version >= 0x210;
+	as->caps.has_dma_support = version >= 0x212;
+}
+
 /*-------------------------------------------------------------------------*/
 
 static int atmel_spi_probe(struct platform_device *pdev)
@@ -963,15 +1508,39 @@ static int atmel_spi_probe(struct platform_device *pdev)
 
 	spin_lock_init(&as->lock);
 	INIT_LIST_HEAD(&as->queue);
+
 	as->pdev = pdev;
 	as->regs = ioremap(regs->start, resource_size(regs));
 	if (!as->regs)
 		goto out_free_buffer;
+	as->phybase = regs->start;
 	as->irq = irq;
 	as->clk = clk;
 
-	ret = request_irq(irq, atmel_spi_interrupt, 0,
-			dev_name(&pdev->dev), master);
+	atmel_get_caps(as);
+
+	as->use_dma = false;
+	as->use_pdc = false;
+	if (as->caps.has_dma_support) {
+		if (atmel_spi_configure_dma(as) == 0)
+			as->use_dma = true;
+	} else {
+		as->use_pdc = true;
+	}
+
+	if (as->caps.has_dma_support && !as->use_dma)
+		dev_info(&pdev->dev, "Atmel SPI Controller using PIO only\n");
+
+	if (as->use_pdc) {
+		ret = request_irq(irq, atmel_spi_pdc_interrupt, 0,
+					dev_name(&pdev->dev), master);
+	} else {
+		tasklet_init(&as->tasklet, atmel_spi_tasklet_func,
+					(unsigned long)master);
+
+		ret = request_irq(irq, atmel_spi_pio_interrupt, 0,
+					dev_name(&pdev->dev), master);
+	}
 	if (ret)
 		goto out_unmap_regs;
 
@@ -979,8 +1548,15 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	clk_enable(clk);
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
-	spi_writel(as, MR, SPI_BIT(MSTR) | SPI_BIT(MODFDIS));
-	spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
+	if (as->caps.has_wdrbt) {
+		spi_writel(as, MR, SPI_BIT(WDRBT) | SPI_BIT(MODFDIS)
+				| SPI_BIT(MSTR));
+	} else {
+		spi_writel(as, MR, SPI_BIT(MSTR) | SPI_BIT(MODFDIS));
+	}
+
+	if (as->use_pdc)
+		spi_writel(as, PTCR, SPI_BIT(RXTDIS) | SPI_BIT(TXTDIS));
 	spi_writel(as, CR, SPI_BIT(SPIEN));
 
 	/* go! */
@@ -989,11 +1565,14 @@ static int atmel_spi_probe(struct platform_device *pdev)
 
 	ret = spi_register_master(master);
 	if (ret)
-		goto out_reset_hw;
+		goto out_free_dma;
 
 	return 0;
 
-out_reset_hw:
+out_free_dma:
+	if (as->use_dma)
+		atmel_spi_release_dma(as);
+
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
 	clk_disable(clk);
@@ -1001,6 +1580,8 @@ out_reset_hw:
 out_unmap_regs:
 	iounmap(as->regs);
 out_free_buffer:
+	if (!as->use_pdc)
+		tasklet_kill(&as->tasklet);
 	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
 			as->buffer_dma);
 out_free:
@@ -1014,10 +1595,16 @@ static int atmel_spi_remove(struct platform_device *pdev)
 	struct spi_master	*master = platform_get_drvdata(pdev);
 	struct atmel_spi	*as = spi_master_get_devdata(master);
 	struct spi_message	*msg;
+	struct spi_transfer	*xfer;
 
 	/* reset the hardware and block queue progress */
 	spin_lock_irq(&as->lock);
 	as->stopping = 1;
+	if (as->use_dma) {
+		atmel_spi_stop_dma(as);
+		atmel_spi_release_dma(as);
+	}
+
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
 	spi_readl(as, SR);
@@ -1025,13 +1612,18 @@ static int atmel_spi_remove(struct platform_device *pdev)
 
 	/* Terminate remaining queued transfers */
 	list_for_each_entry(msg, &as->queue, queue) {
-		/* REVISIT unmapping the dma is a NOP on ARM and AVR32
-		 * but we shouldn't depend on that...
-		 */
+		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+			if (!msg->is_dma_mapped
+				&& (atmel_spi_use_dma(as, xfer)
+					|| as->use_pdc))
+				atmel_spi_dma_unmap_xfer(master, xfer);
+		}
 		msg->status = -ESHUTDOWN;
 		msg->complete(msg->context);
 	}
 
+	if (!as->use_pdc)
+		tasklet_kill(&as->tasklet);
 	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
 			as->buffer_dma);
 
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
new file mode 100644
index 000000000000..89c0b5033114
--- /dev/null
+++ b/drivers/spi/spi-bcm2835.c
@@ -0,0 +1,422 @@
+/*
+ * Driver for Broadcom BCM2835 SPI Controllers
+ *
+ * Copyright (C) 2012 Chris Boot
+ * Copyright (C) 2013 Stephen Warren
+ *
+ * This driver is inspired by:
+ * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
+ * spi-atmel.c, Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+
+/* SPI register offsets */
+#define BCM2835_SPI_CS			0x00
+#define BCM2835_SPI_FIFO		0x04
+#define BCM2835_SPI_CLK			0x08
+#define BCM2835_SPI_DLEN		0x0c
+#define BCM2835_SPI_LTOH		0x10
+#define BCM2835_SPI_DC			0x14
+
+/* Bitfields in CS */
+#define BCM2835_SPI_CS_LEN_LONG		0x02000000
+#define BCM2835_SPI_CS_DMA_LEN		0x01000000
+#define BCM2835_SPI_CS_CSPOL2		0x00800000
+#define BCM2835_SPI_CS_CSPOL1		0x00400000
+#define BCM2835_SPI_CS_CSPOL0		0x00200000
+#define BCM2835_SPI_CS_RXF		0x00100000
+#define BCM2835_SPI_CS_RXR		0x00080000
+#define BCM2835_SPI_CS_TXD		0x00040000
+#define BCM2835_SPI_CS_RXD		0x00020000
+#define BCM2835_SPI_CS_DONE		0x00010000
+#define BCM2835_SPI_CS_LEN		0x00002000
+#define BCM2835_SPI_CS_REN		0x00001000
+#define BCM2835_SPI_CS_ADCS		0x00000800
+#define BCM2835_SPI_CS_INTR		0x00000400
+#define BCM2835_SPI_CS_INTD		0x00000200
+#define BCM2835_SPI_CS_DMAEN		0x00000100
+#define BCM2835_SPI_CS_TA		0x00000080
+#define BCM2835_SPI_CS_CSPOL		0x00000040
+#define BCM2835_SPI_CS_CLEAR_RX		0x00000020
+#define BCM2835_SPI_CS_CLEAR_TX		0x00000010
+#define BCM2835_SPI_CS_CPOL		0x00000008
+#define BCM2835_SPI_CS_CPHA		0x00000004
+#define BCM2835_SPI_CS_CS_10		0x00000002
+#define BCM2835_SPI_CS_CS_01		0x00000001
+
+#define BCM2835_SPI_TIMEOUT_MS	30000
+#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS)
+
+#define DRV_NAME	"spi-bcm2835"
+
+struct bcm2835_spi {
+	void __iomem *regs;
+	struct clk *clk;
+	int irq;
+	struct completion done;
+	const u8 *tx_buf;
+	u8 *rx_buf;
+	int len;
+};
+
+static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
+{
+	return readl(bs->regs + reg);
+}
+
+static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned reg, u32 val)
+{
+	writel(val, bs->regs + reg);
+}
+
+static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs, int len)
+{
+	u8 byte;
+
+	while (len--) {
+		byte = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+		if (bs->rx_buf)
+			*bs->rx_buf++ = byte;
+	}
+}
+
+static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs, int len)
+{
+	u8 byte;
+
+	if (len > bs->len)
+		len = bs->len;
+
+	while (len--) {
+		byte = bs->tx_buf ? *bs->tx_buf++ : 0;
+		bcm2835_wr(bs, BCM2835_SPI_FIFO, byte);
+		bs->len--;
+	}
+}
+
+static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
+{
+	struct spi_master *master = dev_id;
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/*
+	 * RXR - RX needs Reading. This means 12 (or more) bytes have been
+	 * transmitted and hence 12 (or more) bytes have been received.
+	 *
+	 * The FIFO is 16-bytes deep. We check for this interrupt to keep the
+	 * FIFO full; we have a 4-byte-time buffer for IRQ latency. We check
+	 * this before DONE (TX empty) just in case we delayed processing this
+	 * interrupt for some reason.
+	 *
+	 * We only check for this case if we have more bytes to TX; at the end
+	 * of the transfer, we ignore this pipelining optimization, and let
+	 * bcm2835_spi_finish_transfer() drain the RX FIFO.
+	 */
+	if (bs->len && (cs & BCM2835_SPI_CS_RXR)) {
+		/* Read 12 bytes of data */
+		bcm2835_rd_fifo(bs, 12);
+
+		/* Write up to 12 bytes */
+		bcm2835_wr_fifo(bs, 12);
+
+		/*
+		 * We must have written something to the TX FIFO due to the
+		 * bs->len check above, so cannot be DONE. Hence, return
+		 * early. Note that DONE could also be set if we serviced an
+		 * RXR interrupt really late.
+		 */
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * DONE - TX empty. This occurs when we first enable the transfer
+	 * since we do not pre-fill the TX FIFO. At any other time, given that
+	 * we refill the TX FIFO above based on RXR, and hence ignore DONE if
+	 * RXR is set, DONE really does mean end-of-transfer.
+	 */
+	if (cs & BCM2835_SPI_CS_DONE) {
+		if (bs->len) { /* First interrupt in a transfer */
+			bcm2835_wr_fifo(bs, 16);
+		} else { /* Transfer complete */
+			/* Disable SPI interrupts */
+			cs &= ~(BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD);
+			bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+
+			/*
+			 * Wake up bcm2835_spi_transfer_one(), which will call
+			 * bcm2835_spi_finish_transfer(), to drain the RX FIFO.
+			 */
+			complete(&bs->done);
+		}
+
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int bcm2835_spi_start_transfer(struct spi_device *spi,
+		struct spi_transfer *tfr)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
+	unsigned long spi_hz, clk_hz, cdiv;
+	u32 cs = BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
+
+	spi_hz = tfr->speed_hz;
+	clk_hz = clk_get_rate(bs->clk);
+
+	if (spi_hz >= clk_hz / 2) {
+		cdiv = 2; /* clk_hz/2 is the fastest we can go */
+	} else if (spi_hz) {
+		/* CDIV must be a power of two */
+		cdiv = roundup_pow_of_two(DIV_ROUND_UP(clk_hz, spi_hz));
+
+		if (cdiv >= 65536)
+			cdiv = 0; /* 0 is the slowest we can go */
+	} else
+		cdiv = 0; /* 0 is the slowest we can go */
+
+	if (spi->mode & SPI_CPOL)
+		cs |= BCM2835_SPI_CS_CPOL;
+	if (spi->mode & SPI_CPHA)
+		cs |= BCM2835_SPI_CS_CPHA;
+
+	if (!(spi->mode & SPI_NO_CS)) {
+		if (spi->mode & SPI_CS_HIGH) {
+			cs |= BCM2835_SPI_CS_CSPOL;
+			cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+		}
+
+		cs |= spi->chip_select;
+	}
+
+	INIT_COMPLETION(bs->done);
+	bs->tx_buf = tfr->tx_buf;
+	bs->rx_buf = tfr->rx_buf;
+	bs->len = tfr->len;
+
+	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
+	/*
+	 * Enable the HW block. This will immediately trigger a DONE (TX
+	 * empty) interrupt, upon which we will fill the TX FIFO with the
+	 * first TX bytes. Pre-filling the TX FIFO here to avoid the
+	 * interrupt doesn't work:-(
+	 */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+
+	return 0;
+}
+
+static int bcm2835_spi_finish_transfer(struct spi_device *spi,
+		struct spi_transfer *tfr, bool cs_change)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/* Drain RX FIFO */
+	while (cs & BCM2835_SPI_CS_RXD) {
+		bcm2835_rd_fifo(bs, 1);
+		cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	}
+
+	if (tfr->delay_usecs)
+		udelay(tfr->delay_usecs);
+
+	if (cs_change)
+		/* Clear TA flag */
+		bcm2835_wr(bs, BCM2835_SPI_CS, cs & ~BCM2835_SPI_CS_TA);
+
+	return 0;
+}
+
+static int bcm2835_spi_transfer_one(struct spi_master *master,
+		struct spi_message *mesg)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	struct spi_transfer *tfr;
+	struct spi_device *spi = mesg->spi;
+	int err = 0;
+	unsigned int timeout;
+	bool cs_change;
+
+	list_for_each_entry(tfr, &mesg->transfers, transfer_list) {
+		err = bcm2835_spi_start_transfer(spi, tfr);
+		if (err)
+			goto out;
+
+		timeout = wait_for_completion_timeout(&bs->done,
+				msecs_to_jiffies(BCM2835_SPI_TIMEOUT_MS));
+		if (!timeout) {
+			err = -ETIMEDOUT;
+			goto out;
+		}
+
+		cs_change = tfr->cs_change ||
+			list_is_last(&tfr->transfer_list, &mesg->transfers);
+
+		err = bcm2835_spi_finish_transfer(spi, tfr, cs_change);
+		if (err)
+			goto out;
+
+		mesg->actual_length += (tfr->len - bs->len);
+	}
+
+out:
+	/* Clear FIFOs, and disable the HW block */
+	bcm2835_wr(bs, BCM2835_SPI_CS,
+		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
+	mesg->status = err;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static int bcm2835_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master *master;
+	struct bcm2835_spi *bs;
+	struct resource *res;
+	int err;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
+	if (!master) {
+		dev_err(&pdev->dev, "spi_alloc_master() failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, master);
+
+	master->mode_bits = BCM2835_SPI_MODE_BITS;
+	master->bits_per_word_mask = BIT(8 - 1);
+	master->bus_num = -1;
+	master->num_chipselect = 3;
+	master->transfer_one_message = bcm2835_spi_transfer_one;
+	master->dev.of_node = pdev->dev.of_node;
+
+	bs = spi_master_get_devdata(master);
+
+	init_completion(&bs->done);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "could not get memory resource\n");
+		err = -ENODEV;
+		goto out_master_put;
+	}
+
+	bs->regs = devm_request_and_ioremap(&pdev->dev, res);
+	if (!bs->regs) {
+		dev_err(&pdev->dev, "could not request/map memory region\n");
+		err = -ENODEV;
+		goto out_master_put;
+	}
+
+	bs->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(bs->clk)) {
+		err = PTR_ERR(bs->clk);
+		dev_err(&pdev->dev, "could not get clk: %d\n", err);
+		goto out_master_put;
+	}
+
+	bs->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (bs->irq <= 0) {
+		dev_err(&pdev->dev, "could not get IRQ: %d\n", bs->irq);
+		err = bs->irq ? bs->irq : -ENODEV;
+		goto out_master_put;
+	}
+
+	clk_prepare_enable(bs->clk);
+
+	err = request_irq(bs->irq, bcm2835_spi_interrupt, 0,
+			dev_name(&pdev->dev), master);
+	if (err) {
+		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
+		goto out_clk_disable;
+	}
+
+	/* initialise the hardware */
+	bcm2835_wr(bs, BCM2835_SPI_CS,
+		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
+
+	err = spi_register_master(master);
+	if (err) {
+		dev_err(&pdev->dev, "could not register SPI master: %d\n", err);
+		goto out_free_irq;
+	}
+
+	return 0;
+
+out_free_irq:
+	free_irq(bs->irq, master);
+out_clk_disable:
+	clk_disable_unprepare(bs->clk);
+out_master_put:
+	spi_master_put(master);
+	return err;
+}
+
+static int bcm2835_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+
+	free_irq(bs->irq, master);
+	spi_unregister_master(master);
+
+	/* Clear FIFOs, and disable the HW block */
+	bcm2835_wr(bs, BCM2835_SPI_CS,
+		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
+
+	clk_disable_unprepare(bs->clk);
+	spi_master_put(master);
+
+	return 0;
+}
+
+static const struct of_device_id bcm2835_spi_match[] = {
+	{ .compatible = "brcm,bcm2835-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, bcm2835_spi_match);
+
+static struct platform_driver bcm2835_spi_driver = {
+	.driver		= {
+		.name		= DRV_NAME,
+		.owner		= THIS_MODULE,
+		.of_match_table	= bcm2835_spi_match,
+	},
+	.probe		= bcm2835_spi_probe,
+	.remove		= bcm2835_spi_remove,
+};
+module_platform_driver(bcm2835_spi_driver);
+
+MODULE_DESCRIPTION("SPI controller driver for Broadcom BCM2835");
+MODULE_AUTHOR("Chris Boot <bootc@bootc.net>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 9578af782a77..a4ec5f4ec817 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -46,7 +46,6 @@ struct bcm63xx_spi {
 	int			irq;
 
 	/* Platform data */
-	u32			speed_hz;
 	unsigned		fifo_size;
 	unsigned int		msg_type_shift;
 	unsigned int		msg_ctl_width;
@@ -93,40 +92,16 @@ static const unsigned bcm63xx_spi_freq_table[SPI_CLK_MASK][2] = {
 	{   391000, SPI_CLK_0_391MHZ }
 };
 
-static int bcm63xx_spi_check_transfer(struct spi_device *spi,
-					struct spi_transfer *t)
-{
-	u8 bits_per_word;
-
-	bits_per_word = (t) ? t->bits_per_word : spi->bits_per_word;
-	if (bits_per_word != 8) {
-		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
-			__func__, bits_per_word);
-		return -EINVAL;
-	}
-
-	if (spi->chip_select > spi->master->num_chipselect) {
-		dev_err(&spi->dev, "%s, unsupported slave %d\n",
-			__func__, spi->chip_select);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static void bcm63xx_spi_setup_transfer(struct spi_device *spi,
 				      struct spi_transfer *t)
 {
 	struct bcm63xx_spi *bs = spi_master_get_devdata(spi->master);
-	u32 hz;
 	u8 clk_cfg, reg;
 	int i;
 
-	hz = (t) ? t->speed_hz : spi->max_speed_hz;
-
 	/* Find the closest clock configuration */
 	for (i = 0; i < SPI_CLK_MASK; i++) {
-		if (hz >= bcm63xx_spi_freq_table[i][0]) {
+		if (t->speed_hz >= bcm63xx_spi_freq_table[i][0]) {
 			clk_cfg = bcm63xx_spi_freq_table[i][1];
 			break;
 		}
@@ -143,7 +118,7 @@ static void bcm63xx_spi_setup_transfer(struct spi_device *spi,
 
 	bcm_spi_writeb(bs, reg, SPI_CLK_CFG);
 	dev_dbg(&spi->dev, "Setting clock register to %02x (hz %d)\n",
-		clk_cfg, hz);
+		clk_cfg, t->speed_hz);
 }
 
 /* the spi->mode bits understood by this driver: */
@@ -151,23 +126,12 @@ static void bcm63xx_spi_setup_transfer(struct spi_device *spi,
 
 static int bcm63xx_spi_setup(struct spi_device *spi)
 {
-	struct bcm63xx_spi *bs;
-	int ret;
-
-	bs = spi_master_get_devdata(spi->master);
-
-	if (!spi->bits_per_word)
-		spi->bits_per_word = 8;
-
-	if (spi->mode & ~MODEBITS) {
-		dev_err(&spi->dev, "%s, unsupported mode bits %x\n",
-			__func__, spi->mode & ~MODEBITS);
+	if (spi->bits_per_word != 8) {
+		dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
+			__func__, spi->bits_per_word);
 		return -EINVAL;
 	}
 
-	dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n",
-		__func__, spi->mode & MODEBITS, spi->bits_per_word, 0);
-
 	return 0;
 }
 
@@ -313,9 +277,12 @@ static int bcm63xx_spi_transfer_one(struct spi_master *master,
 	 * full-duplex transfers.
 	 */
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		status = bcm63xx_spi_check_transfer(spi, t);
-		if (status < 0)
+		if (t->bits_per_word != 8) {
+			dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n",
+				__func__, t->bits_per_word);
+			status = -EINVAL;
 			goto exit;
+		}
 
 		if (!first)
 			first = t;
@@ -444,18 +411,9 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, master);
 	bs->pdev = pdev;
 
-	if (!devm_request_mem_region(&pdev->dev, r->start,
-					resource_size(r), PFX)) {
-		dev_err(dev, "iomem request failed\n");
-		ret = -ENXIO;
-		goto out_err;
-	}
-
-	bs->regs = devm_ioremap_nocache(&pdev->dev, r->start,
-							resource_size(r));
-	if (!bs->regs) {
-		dev_err(dev, "unable to ioremap regs\n");
-		ret = -ENOMEM;
+	bs->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(bs->regs)) {
+		ret = PTR_ERR(bs->regs);
 		goto out_err;
 	}
 
@@ -477,7 +435,6 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	master->unprepare_transfer_hardware = bcm63xx_spi_unprepare_transfer;
 	master->transfer_one_message = bcm63xx_spi_transfer_one;
 	master->mode_bits = MODEBITS;
-	bs->speed_hz = pdata->speed_hz;
 	bs->msg_type_shift = pdata->msg_type_shift;
 	bs->msg_ctl_width = pdata->msg_ctl_width;
 	bs->tx_io = (u8 *)(bs->regs + bcm63xx_spireg(SPI_MSG_DATA));
@@ -490,11 +447,11 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	default:
 		dev_err(dev, "unsupported MSG_CTL width: %d\n",
 			 bs->msg_ctl_width);
-		goto out_clk_disable;
+		goto out_err;
 	}
 
 	/* Initialize hardware */
-	clk_enable(bs->clk);
+	clk_prepare_enable(bs->clk);
 	bcm_spi_writeb(bs, SPI_INTR_CLEAR_ALL, SPI_INT_STATUS);
 
 	/* register and we are done */
@@ -510,7 +467,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 	return 0;
 
 out_clk_disable:
-	clk_disable(clk);
+	clk_disable_unprepare(clk);
 out_err:
 	platform_set_drvdata(pdev, NULL);
 	spi_master_put(master);
@@ -531,7 +488,7 @@ static int bcm63xx_spi_remove(struct platform_device *pdev)
 	bcm_spi_writeb(bs, 0, SPI_INT_MASK);
 
 	/* HW shutdown */
-	clk_disable(bs->clk);
+	clk_disable_unprepare(bs->clk);
 	clk_put(bs->clk);
 
 	platform_set_drvdata(pdev, 0);
@@ -550,7 +507,7 @@ static int bcm63xx_spi_suspend(struct device *dev)
 
 	spi_master_suspend(master);
 
-	clk_disable(bs->clk);
+	clk_disable_unprepare(bs->clk);
 
 	return 0;
 }
@@ -561,7 +518,7 @@ static int bcm63xx_spi_resume(struct device *dev)
 			platform_get_drvdata(to_platform_device(dev));
 	struct bcm63xx_spi *bs = spi_master_get_devdata(master);
 
-	clk_enable(bs->clk);
+	clk_prepare_enable(bs->clk);
 
 	spi_master_resume(master);
 
diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
new file mode 100644
index 000000000000..07971e3fe58b
--- /dev/null
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -0,0 +1,387 @@
+/*
+ * Freescale SPI controller driver cpm functions.
+ *
+ * Maintainer: Kumar Gala
+ *
+ * Copyright (C) 2006 Polycom, Inc.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * CPM SPI and QE buffer descriptors mode support:
+ * Copyright (c) 2009  MontaVista Software, Inc.
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/fsl_devices.h>
+#include <linux/dma-mapping.h>
+#include <asm/cpm.h>
+#include <asm/qe.h>
+
+#include "spi-fsl-lib.h"
+#include "spi-fsl-cpm.h"
+#include "spi-fsl-spi.h"
+
+/* CPM1 and CPM2 are mutually exclusive. */
+#ifdef CONFIG_CPM1
+#include <asm/cpm1.h>
+#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_CH_SPI, 0)
+#else
+#include <asm/cpm2.h>
+#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_SPI_PAGE, CPM_CR_SPI_SBLOCK, 0, 0)
+#endif
+
+#define	SPIE_TXB	0x00000200	/* Last char is written to tx fifo */
+#define	SPIE_RXB	0x00000100	/* Last char is written to rx buf */
+
+/* SPCOM register values */
+#define	SPCOM_STR	(1 << 23)	/* Start transmit */
+
+#define	SPI_PRAM_SIZE	0x100
+#define	SPI_MRBLR	((unsigned int)PAGE_SIZE)
+
+static void *fsl_dummy_rx;
+static DEFINE_MUTEX(fsl_dummy_rx_lock);
+static int fsl_dummy_rx_refcnt;
+
+void fsl_spi_cpm_reinit_txrx(struct mpc8xxx_spi *mspi)
+{
+	if (mspi->flags & SPI_QE) {
+		qe_issue_cmd(QE_INIT_TX_RX, mspi->subblock,
+			     QE_CR_PROTOCOL_UNSPECIFIED, 0);
+	} else {
+		cpm_command(CPM_SPI_CMD, CPM_CR_INIT_TRX);
+		if (mspi->flags & SPI_CPM1) {
+			out_be16(&mspi->pram->rbptr,
+				 in_be16(&mspi->pram->rbase));
+			out_be16(&mspi->pram->tbptr,
+				 in_be16(&mspi->pram->tbase));
+		}
+	}
+}
+
+static void fsl_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi)
+{
+	struct cpm_buf_desc __iomem *tx_bd = mspi->tx_bd;
+	struct cpm_buf_desc __iomem *rx_bd = mspi->rx_bd;
+	unsigned int xfer_len = min(mspi->count, SPI_MRBLR);
+	unsigned int xfer_ofs;
+	struct fsl_spi_reg *reg_base = mspi->reg_base;
+
+	xfer_ofs = mspi->xfer_in_progress->len - mspi->count;
+
+	if (mspi->rx_dma == mspi->dma_dummy_rx)
+		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma);
+	else
+		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
+	out_be16(&rx_bd->cbd_datlen, 0);
+	out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP);
+
+	if (mspi->tx_dma == mspi->dma_dummy_tx)
+		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma);
+	else
+		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
+	out_be16(&tx_bd->cbd_datlen, xfer_len);
+	out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP |
+				 BD_SC_LAST);
+
+	/* start transfer */
+	mpc8xxx_spi_write_reg(&reg_base->command, SPCOM_STR);
+}
+
+int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
+		     struct spi_transfer *t, bool is_dma_mapped)
+{
+	struct device *dev = mspi->dev;
+	struct fsl_spi_reg *reg_base = mspi->reg_base;
+
+	if (is_dma_mapped) {
+		mspi->map_tx_dma = 0;
+		mspi->map_rx_dma = 0;
+	} else {
+		mspi->map_tx_dma = 1;
+		mspi->map_rx_dma = 1;
+	}
+
+	if (!t->tx_buf) {
+		mspi->tx_dma = mspi->dma_dummy_tx;
+		mspi->map_tx_dma = 0;
+	}
+
+	if (!t->rx_buf) {
+		mspi->rx_dma = mspi->dma_dummy_rx;
+		mspi->map_rx_dma = 0;
+	}
+
+	if (mspi->map_tx_dma) {
+		void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
+
+		mspi->tx_dma = dma_map_single(dev, nonconst_tx, t->len,
+					      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, mspi->tx_dma)) {
+			dev_err(dev, "unable to map tx dma\n");
+			return -ENOMEM;
+		}
+	} else if (t->tx_buf) {
+		mspi->tx_dma = t->tx_dma;
+	}
+
+	if (mspi->map_rx_dma) {
+		mspi->rx_dma = dma_map_single(dev, mspi->rx, t->len,
+					      DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, mspi->rx_dma)) {
+			dev_err(dev, "unable to map rx dma\n");
+			goto err_rx_dma;
+		}
+	} else if (t->rx_buf) {
+		mspi->rx_dma = t->rx_dma;
+	}
+
+	/* enable rx ints */
+	mpc8xxx_spi_write_reg(&reg_base->mask, SPIE_RXB);
+
+	mspi->xfer_in_progress = t;
+	mspi->count = t->len;
+
+	/* start CPM transfers */
+	fsl_spi_cpm_bufs_start(mspi);
+
+	return 0;
+
+err_rx_dma:
+	if (mspi->map_tx_dma)
+		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
+	return -ENOMEM;
+}
+
+void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct spi_transfer *t = mspi->xfer_in_progress;
+
+	if (mspi->map_tx_dma)
+		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
+	if (mspi->map_rx_dma)
+		dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
+	mspi->xfer_in_progress = NULL;
+}
+
+void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events)
+{
+	u16 len;
+	struct fsl_spi_reg *reg_base = mspi->reg_base;
+
+	dev_dbg(mspi->dev, "%s: bd datlen %d, count %d\n", __func__,
+		in_be16(&mspi->rx_bd->cbd_datlen), mspi->count);
+
+	len = in_be16(&mspi->rx_bd->cbd_datlen);
+	if (len > mspi->count) {
+		WARN_ON(1);
+		len = mspi->count;
+	}
+
+	/* Clear the events */
+	mpc8xxx_spi_write_reg(&reg_base->event, events);
+
+	mspi->count -= len;
+	if (mspi->count)
+		fsl_spi_cpm_bufs_start(mspi);
+	else
+		complete(&mspi->done);
+}
+
+static void *fsl_spi_alloc_dummy_rx(void)
+{
+	mutex_lock(&fsl_dummy_rx_lock);
+
+	if (!fsl_dummy_rx)
+		fsl_dummy_rx = kmalloc(SPI_MRBLR, GFP_KERNEL);
+	if (fsl_dummy_rx)
+		fsl_dummy_rx_refcnt++;
+
+	mutex_unlock(&fsl_dummy_rx_lock);
+
+	return fsl_dummy_rx;
+}
+
+static void fsl_spi_free_dummy_rx(void)
+{
+	mutex_lock(&fsl_dummy_rx_lock);
+
+	switch (fsl_dummy_rx_refcnt) {
+	case 0:
+		WARN_ON(1);
+		break;
+	case 1:
+		kfree(fsl_dummy_rx);
+		fsl_dummy_rx = NULL;
+		/* fall through */
+	default:
+		fsl_dummy_rx_refcnt--;
+		break;
+	}
+
+	mutex_unlock(&fsl_dummy_rx_lock);
+}
+
+static unsigned long fsl_spi_cpm_get_pram(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct device_node *np = dev->of_node;
+	const u32 *iprop;
+	int size;
+	void __iomem *spi_base;
+	unsigned long pram_ofs = -ENOMEM;
+
+	/* Can't use of_address_to_resource(), QE muram isn't at 0. */
+	iprop = of_get_property(np, "reg", &size);
+
+	/* QE with a fixed pram location? */
+	if (mspi->flags & SPI_QE && iprop && size == sizeof(*iprop) * 4)
+		return cpm_muram_alloc_fixed(iprop[2], SPI_PRAM_SIZE);
+
+	/* QE but with a dynamic pram location? */
+	if (mspi->flags & SPI_QE) {
+		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
+		qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, mspi->subblock,
+			     QE_CR_PROTOCOL_UNSPECIFIED, pram_ofs);
+		return pram_ofs;
+	}
+
+	spi_base = of_iomap(np, 1);
+	if (spi_base == NULL)
+		return -EINVAL;
+
+	if (mspi->flags & SPI_CPM2) {
+		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
+		out_be16(spi_base, pram_ofs);
+	} else {
+		struct spi_pram __iomem *pram = spi_base;
+		u16 rpbase = in_be16(&pram->rpbase);
+
+		/* Microcode relocation patch applied? */
+		if (rpbase) {
+			pram_ofs = rpbase;
+		} else {
+			pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
+			out_be16(spi_base, pram_ofs);
+		}
+	}
+
+	iounmap(spi_base);
+	return pram_ofs;
+}
+
+int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+	struct device_node *np = dev->of_node;
+	const u32 *iprop;
+	int size;
+	unsigned long pram_ofs;
+	unsigned long bds_ofs;
+
+	if (!(mspi->flags & SPI_CPM_MODE))
+		return 0;
+
+	if (!fsl_spi_alloc_dummy_rx())
+		return -ENOMEM;
+
+	if (mspi->flags & SPI_QE) {
+		iprop = of_get_property(np, "cell-index", &size);
+		if (iprop && size == sizeof(*iprop))
+			mspi->subblock = *iprop;
+
+		switch (mspi->subblock) {
+		default:
+			dev_warn(dev, "cell-index unspecified, assuming SPI1");
+			/* fall through */
+		case 0:
+			mspi->subblock = QE_CR_SUBBLOCK_SPI1;
+			break;
+		case 1:
+			mspi->subblock = QE_CR_SUBBLOCK_SPI2;
+			break;
+		}
+	}
+
+	pram_ofs = fsl_spi_cpm_get_pram(mspi);
+	if (IS_ERR_VALUE(pram_ofs)) {
+		dev_err(dev, "can't allocate spi parameter ram\n");
+		goto err_pram;
+	}
+
+	bds_ofs = cpm_muram_alloc(sizeof(*mspi->tx_bd) +
+				  sizeof(*mspi->rx_bd), 8);
+	if (IS_ERR_VALUE(bds_ofs)) {
+		dev_err(dev, "can't allocate bds\n");
+		goto err_bds;
+	}
+
+	mspi->dma_dummy_tx = dma_map_single(dev, empty_zero_page, PAGE_SIZE,
+					    DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, mspi->dma_dummy_tx)) {
+		dev_err(dev, "unable to map dummy tx buffer\n");
+		goto err_dummy_tx;
+	}
+
+	mspi->dma_dummy_rx = dma_map_single(dev, fsl_dummy_rx, SPI_MRBLR,
+					    DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, mspi->dma_dummy_rx)) {
+		dev_err(dev, "unable to map dummy rx buffer\n");
+		goto err_dummy_rx;
+	}
+
+	mspi->pram = cpm_muram_addr(pram_ofs);
+
+	mspi->tx_bd = cpm_muram_addr(bds_ofs);
+	mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
+
+	/* Initialize parameter ram. */
+	out_be16(&mspi->pram->tbase, cpm_muram_offset(mspi->tx_bd));
+	out_be16(&mspi->pram->rbase, cpm_muram_offset(mspi->rx_bd));
+	out_8(&mspi->pram->tfcr, CPMFCR_EB | CPMFCR_GBL);
+	out_8(&mspi->pram->rfcr, CPMFCR_EB | CPMFCR_GBL);
+	out_be16(&mspi->pram->mrblr, SPI_MRBLR);
+	out_be32(&mspi->pram->rstate, 0);
+	out_be32(&mspi->pram->rdp, 0);
+	out_be16(&mspi->pram->rbptr, 0);
+	out_be16(&mspi->pram->rbc, 0);
+	out_be32(&mspi->pram->rxtmp, 0);
+	out_be32(&mspi->pram->tstate, 0);
+	out_be32(&mspi->pram->tdp, 0);
+	out_be16(&mspi->pram->tbptr, 0);
+	out_be16(&mspi->pram->tbc, 0);
+	out_be32(&mspi->pram->txtmp, 0);
+
+	return 0;
+
+err_dummy_rx:
+	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
+err_dummy_tx:
+	cpm_muram_free(bds_ofs);
+err_bds:
+	cpm_muram_free(pram_ofs);
+err_pram:
+	fsl_spi_free_dummy_rx();
+	return -ENOMEM;
+}
+
+void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi)
+{
+	struct device *dev = mspi->dev;
+
+	if (!(mspi->flags & SPI_CPM_MODE))
+		return;
+
+	dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE);
+	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
+	cpm_muram_free(cpm_muram_offset(mspi->tx_bd));
+	cpm_muram_free(cpm_muram_offset(mspi->pram));
+	fsl_spi_free_dummy_rx();
+}
diff --git a/drivers/spi/spi-fsl-cpm.h b/drivers/spi/spi-fsl-cpm.h
new file mode 100644
index 000000000000..c71115805485
--- /dev/null
+++ b/drivers/spi/spi-fsl-cpm.h
@@ -0,0 +1,43 @@
+/*
+ * Freescale SPI controller driver cpm functions.
+ *
+ * Maintainer: Kumar Gala
+ *
+ * Copyright (C) 2006 Polycom, Inc.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * CPM SPI and QE buffer descriptors mode support:
+ * Copyright (c) 2009  MontaVista Software, Inc.
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __SPI_FSL_CPM_H__
+#define __SPI_FSL_CPM_H__
+
+#include "spi-fsl-lib.h"
+
+#ifdef CONFIG_FSL_SOC
+extern void fsl_spi_cpm_reinit_txrx(struct mpc8xxx_spi *mspi);
+extern int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
+			    struct spi_transfer *t, bool is_dma_mapped);
+extern void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi);
+extern void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events);
+extern int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi);
+extern void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi);
+#else
+static inline void fsl_spi_cpm_reinit_txrx(struct mpc8xxx_spi *mspi) { }
+static inline int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
+				   struct spi_transfer *t,
+				   bool is_dma_mapped) { return 0; }
+static inline void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi) { }
+static inline void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events) { }
+static inline int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) { return 0; }
+static inline void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi) { }
+#endif
+
+#endif /* __SPI_FSL_CPM_H__ */
diff --git a/drivers/spi/spi-fsl-lib.c b/drivers/spi/spi-fsl-lib.c
index 8ade675a04f1..a91db0e57b23 100644
--- a/drivers/spi/spi-fsl-lib.c
+++ b/drivers/spi/spi-fsl-lib.c
@@ -23,7 +23,9 @@
 #include <linux/mm.h>
 #include <linux/of_platform.h>
 #include <linux/spi/spi.h>
+#ifdef CONFIG_FSL_SOC
 #include <sysdev/fsl_soc.h>
+#endif
 
 #include "spi-fsl-lib.h"
 
@@ -208,6 +210,7 @@ int of_mpc8xxx_spi_probe(struct platform_device *ofdev)
 	/* Allocate bus num dynamically. */
 	pdata->bus_num = -1;
 
+#ifdef CONFIG_FSL_SOC
 	/* SPI controller is either clocked from QE or SoC clock. */
 	pdata->sysclk = get_brgfreq();
 	if (pdata->sysclk == -1) {
@@ -217,6 +220,11 @@ int of_mpc8xxx_spi_probe(struct platform_device *ofdev)
 			goto err;
 		}
 	}
+#else
+	ret = of_property_read_u32(np, "clock-frequency", &pdata->sysclk);
+	if (ret)
+		goto err;
+#endif
 
 	prop = of_get_property(np, "mode", NULL);
 	if (prop && !strcmp(prop, "cpu-qe"))
diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h
index cbe881b9ea76..52db6936778e 100644
--- a/drivers/spi/spi-fsl-lib.h
+++ b/drivers/spi/spi-fsl-lib.h
@@ -34,8 +34,10 @@ struct mpc8xxx_spi {
 
 	int subblock;
 	struct spi_pram __iomem *pram;
+#ifdef CONFIG_FSL_SOC
 	struct cpm_buf_desc __iomem *tx_bd;
 	struct cpm_buf_desc __iomem *rx_bd;
+#endif
 
 	struct spi_transfer *xfer_in_progress;
 
@@ -67,6 +69,15 @@ struct mpc8xxx_spi {
 
 	unsigned int flags;
 
+#ifdef CONFIG_SPI_FSL_SPI
+	int type;
+	int native_chipselects;
+	u8 max_bits_per_word;
+
+	void (*set_shifts)(u32 *rx_shift, u32 *tx_shift,
+			   int bits_per_word, int msb_first);
+#endif
+
 	struct workqueue_struct *workqueue;
 	struct work_struct work;
 
@@ -87,12 +98,12 @@ struct spi_mpc8xxx_cs {
 
 static inline void mpc8xxx_spi_write_reg(__be32 __iomem *reg, u32 val)
 {
-	out_be32(reg, val);
+	iowrite32be(val, reg);
 }
 
 static inline u32 mpc8xxx_spi_read_reg(__be32 __iomem *reg)
 {
-	return in_be32(reg);
+	return ioread32be(reg);
 }
 
 struct mpc8xxx_spi_probe_info {
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 086a9eef2e05..14e202ee7036 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -10,6 +10,10 @@
  * Copyright (c) 2009  MontaVista Software, Inc.
  * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
  *
+ * GRLIB support:
+ * Copyright (c) 2012 Aeroflex Gaisler AB.
+ * Author: Andreas Larsson <andreas@gaisler.com>
+ *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
@@ -30,75 +34,54 @@
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/gpio.h>
 #include <linux/of_gpio.h>
 
-#include <sysdev/fsl_soc.h>
-#include <asm/cpm.h>
-#include <asm/qe.h>
-
 #include "spi-fsl-lib.h"
+#include "spi-fsl-cpm.h"
+#include "spi-fsl-spi.h"
 
-/* CPM1 and CPM2 are mutually exclusive. */
-#ifdef CONFIG_CPM1
-#include <asm/cpm1.h>
-#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_CH_SPI, 0)
-#else
-#include <asm/cpm2.h>
-#define CPM_SPI_CMD mk_cr_cmd(CPM_CR_SPI_PAGE, CPM_CR_SPI_SBLOCK, 0, 0)
-#endif
-
-/* SPI Controller registers */
-struct fsl_spi_reg {
-	u8 res1[0x20];
-	__be32 mode;
-	__be32 event;
-	__be32 mask;
-	__be32 command;
-	__be32 transmit;
-	__be32 receive;
-};
-
-/* SPI Controller mode register definitions */
-#define	SPMODE_LOOP		(1 << 30)
-#define	SPMODE_CI_INACTIVEHIGH	(1 << 29)
-#define	SPMODE_CP_BEGIN_EDGECLK	(1 << 28)
-#define	SPMODE_DIV16		(1 << 27)
-#define	SPMODE_REV		(1 << 26)
-#define	SPMODE_MS		(1 << 25)
-#define	SPMODE_ENABLE		(1 << 24)
-#define	SPMODE_LEN(x)		((x) << 20)
-#define	SPMODE_PM(x)		((x) << 16)
-#define	SPMODE_OP		(1 << 14)
-#define	SPMODE_CG(x)		((x) << 7)
+#define TYPE_FSL	0
+#define TYPE_GRLIB	1
 
-/*
- * Default for SPI Mode:
- *	SPI MODE 0 (inactive low, phase middle, MSB, 8-bit length, slow clk
- */
-#define	SPMODE_INIT_VAL (SPMODE_CI_INACTIVEHIGH | SPMODE_DIV16 | SPMODE_REV | \
-			 SPMODE_MS | SPMODE_LEN(7) | SPMODE_PM(0xf))
-
-/* SPIE register values */
-#define	SPIE_NE		0x00000200	/* Not empty */
-#define	SPIE_NF		0x00000100	/* Not full */
+struct fsl_spi_match_data {
+	int type;
+};
 
-/* SPIM register values */
-#define	SPIM_NE		0x00000200	/* Not empty */
-#define	SPIM_NF		0x00000100	/* Not full */
+static struct fsl_spi_match_data of_fsl_spi_fsl_config = {
+	.type = TYPE_FSL,
+};
 
-#define	SPIE_TXB	0x00000200	/* Last char is written to tx fifo */
-#define	SPIE_RXB	0x00000100	/* Last char is written to rx buf */
+static struct fsl_spi_match_data of_fsl_spi_grlib_config = {
+	.type = TYPE_GRLIB,
+};
 
-/* SPCOM register values */
-#define	SPCOM_STR	(1 << 23)	/* Start transmit */
+static struct of_device_id of_fsl_spi_match[] = {
+	{
+		.compatible = "fsl,spi",
+		.data = &of_fsl_spi_fsl_config,
+	},
+	{
+		.compatible = "aeroflexgaisler,spictrl",
+		.data = &of_fsl_spi_grlib_config,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, of_fsl_spi_match);
 
-#define	SPI_PRAM_SIZE	0x100
-#define	SPI_MRBLR	((unsigned int)PAGE_SIZE)
+static int fsl_spi_get_type(struct device *dev)
+{
+	const struct of_device_id *match;
 
-static void *fsl_dummy_rx;
-static DEFINE_MUTEX(fsl_dummy_rx_lock);
-static int fsl_dummy_rx_refcnt;
+	if (dev->of_node) {
+		match = of_match_node(of_fsl_spi_match, dev->of_node);
+		if (match && match->data)
+			return ((struct fsl_spi_match_data *)match->data)->type;
+	}
+	return TYPE_FSL;
+}
 
 static void fsl_spi_change_mode(struct spi_device *spi)
 {
@@ -119,18 +102,7 @@ static void fsl_spi_change_mode(struct spi_device *spi)
 
 	/* When in CPM mode, we need to reinit tx and rx. */
 	if (mspi->flags & SPI_CPM_MODE) {
-		if (mspi->flags & SPI_QE) {
-			qe_issue_cmd(QE_INIT_TX_RX, mspi->subblock,
-				     QE_CR_PROTOCOL_UNSPECIFIED, 0);
-		} else {
-			cpm_command(CPM_SPI_CMD, CPM_CR_INIT_TRX);
-			if (mspi->flags & SPI_CPM1) {
-				out_be16(&mspi->pram->rbptr,
-					 in_be16(&mspi->pram->rbase));
-				out_be16(&mspi->pram->tbptr,
-					 in_be16(&mspi->pram->tbase));
-			}
-		}
+		fsl_spi_cpm_reinit_txrx(mspi);
 	}
 	mpc8xxx_spi_write_reg(mode, cs->hw_mode);
 	local_irq_restore(flags);
@@ -163,6 +135,40 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value)
 	}
 }
 
+static void fsl_spi_qe_cpu_set_shifts(u32 *rx_shift, u32 *tx_shift,
+				      int bits_per_word, int msb_first)
+{
+	*rx_shift = 0;
+	*tx_shift = 0;
+	if (msb_first) {
+		if (bits_per_word <= 8) {
+			*rx_shift = 16;
+			*tx_shift = 24;
+		} else if (bits_per_word <= 16) {
+			*rx_shift = 16;
+			*tx_shift = 16;
+		}
+	} else {
+		if (bits_per_word <= 8)
+			*rx_shift = 8;
+	}
+}
+
+static void fsl_spi_grlib_set_shifts(u32 *rx_shift, u32 *tx_shift,
+				     int bits_per_word, int msb_first)
+{
+	*rx_shift = 0;
+	*tx_shift = 0;
+	if (bits_per_word <= 16) {
+		if (msb_first) {
+			*rx_shift = 16; /* LSB in bit 16 */
+			*tx_shift = 32 - bits_per_word; /* MSB in bit 31 */
+		} else {
+			*rx_shift = 16 - bits_per_word; /* MSB in bit 15 */
+		}
+	}
+}
+
 static int mspi_apply_cpu_mode_quirks(struct spi_mpc8xxx_cs *cs,
 				struct spi_device *spi,
 				struct mpc8xxx_spi *mpc8xxx_spi,
@@ -173,31 +179,20 @@ static int mspi_apply_cpu_mode_quirks(struct spi_mpc8xxx_cs *cs,
 	if (bits_per_word <= 8) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u8;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u8;
-		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-			cs->rx_shift = 16;
-			cs->tx_shift = 24;
-		}
 	} else if (bits_per_word <= 16) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u16;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u16;
-		if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-			cs->rx_shift = 16;
-			cs->tx_shift = 16;
-		}
 	} else if (bits_per_word <= 32) {
 		cs->get_rx = mpc8xxx_spi_rx_buf_u32;
 		cs->get_tx = mpc8xxx_spi_tx_buf_u32;
 	} else
 		return -EINVAL;
 
-	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE &&
-	    spi->mode & SPI_LSB_FIRST) {
-		cs->tx_shift = 0;
-		if (bits_per_word <= 8)
-			cs->rx_shift = 8;
-		else
-			cs->rx_shift = 0;
-	}
+	if (mpc8xxx_spi->set_shifts)
+		mpc8xxx_spi->set_shifts(&cs->rx_shift, &cs->tx_shift,
+					bits_per_word,
+					!(spi->mode & SPI_LSB_FIRST));
+
 	mpc8xxx_spi->rx_shift = cs->rx_shift;
 	mpc8xxx_spi->tx_shift = cs->tx_shift;
 	mpc8xxx_spi->get_rx = cs->get_rx;
@@ -246,7 +241,8 @@ static int fsl_spi_setup_transfer(struct spi_device *spi,
 
 	/* Make sure its a bit width we support [4..16, 32] */
 	if ((bits_per_word < 4)
-	    || ((bits_per_word > 16) && (bits_per_word != 32)))
+	    || ((bits_per_word > 16) && (bits_per_word != 32))
+	    || (bits_per_word > mpc8xxx_spi->max_bits_per_word))
 		return -EINVAL;
 
 	if (!hz)
@@ -295,112 +291,6 @@ static int fsl_spi_setup_transfer(struct spi_device *spi,
 	return 0;
 }
 
-static void fsl_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi)
-{
-	struct cpm_buf_desc __iomem *tx_bd = mspi->tx_bd;
-	struct cpm_buf_desc __iomem *rx_bd = mspi->rx_bd;
-	unsigned int xfer_len = min(mspi->count, SPI_MRBLR);
-	unsigned int xfer_ofs;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
-
-	xfer_ofs = mspi->xfer_in_progress->len - mspi->count;
-
-	if (mspi->rx_dma == mspi->dma_dummy_rx)
-		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma);
-	else
-		out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs);
-	out_be16(&rx_bd->cbd_datlen, 0);
-	out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP);
-
-	if (mspi->tx_dma == mspi->dma_dummy_tx)
-		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma);
-	else
-		out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs);
-	out_be16(&tx_bd->cbd_datlen, xfer_len);
-	out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP |
-				 BD_SC_LAST);
-
-	/* start transfer */
-	mpc8xxx_spi_write_reg(&reg_base->command, SPCOM_STR);
-}
-
-static int fsl_spi_cpm_bufs(struct mpc8xxx_spi *mspi,
-				struct spi_transfer *t, bool is_dma_mapped)
-{
-	struct device *dev = mspi->dev;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
-
-	if (is_dma_mapped) {
-		mspi->map_tx_dma = 0;
-		mspi->map_rx_dma = 0;
-	} else {
-		mspi->map_tx_dma = 1;
-		mspi->map_rx_dma = 1;
-	}
-
-	if (!t->tx_buf) {
-		mspi->tx_dma = mspi->dma_dummy_tx;
-		mspi->map_tx_dma = 0;
-	}
-
-	if (!t->rx_buf) {
-		mspi->rx_dma = mspi->dma_dummy_rx;
-		mspi->map_rx_dma = 0;
-	}
-
-	if (mspi->map_tx_dma) {
-		void *nonconst_tx = (void *)mspi->tx; /* shut up gcc */
-
-		mspi->tx_dma = dma_map_single(dev, nonconst_tx, t->len,
-					      DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, mspi->tx_dma)) {
-			dev_err(dev, "unable to map tx dma\n");
-			return -ENOMEM;
-		}
-	} else if (t->tx_buf) {
-		mspi->tx_dma = t->tx_dma;
-	}
-
-	if (mspi->map_rx_dma) {
-		mspi->rx_dma = dma_map_single(dev, mspi->rx, t->len,
-					      DMA_FROM_DEVICE);
-		if (dma_mapping_error(dev, mspi->rx_dma)) {
-			dev_err(dev, "unable to map rx dma\n");
-			goto err_rx_dma;
-		}
-	} else if (t->rx_buf) {
-		mspi->rx_dma = t->rx_dma;
-	}
-
-	/* enable rx ints */
-	mpc8xxx_spi_write_reg(&reg_base->mask, SPIE_RXB);
-
-	mspi->xfer_in_progress = t;
-	mspi->count = t->len;
-
-	/* start CPM transfers */
-	fsl_spi_cpm_bufs_start(mspi);
-
-	return 0;
-
-err_rx_dma:
-	if (mspi->map_tx_dma)
-		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
-	return -ENOMEM;
-}
-
-static void fsl_spi_cpm_bufs_complete(struct mpc8xxx_spi *mspi)
-{
-	struct device *dev = mspi->dev;
-	struct spi_transfer *t = mspi->xfer_in_progress;
-
-	if (mspi->map_tx_dma)
-		dma_unmap_single(dev, mspi->tx_dma, t->len, DMA_TO_DEVICE);
-	if (mspi->map_rx_dma)
-		dma_unmap_single(dev, mspi->rx_dma, t->len, DMA_FROM_DEVICE);
-	mspi->xfer_in_progress = NULL;
-}
-
 static int fsl_spi_cpu_bufs(struct mpc8xxx_spi *mspi,
 				struct spi_transfer *t, unsigned int len)
 {
@@ -565,31 +455,45 @@ static int fsl_spi_setup(struct spi_device *spi)
 		cs->hw_mode = hw_mode; /* Restore settings */
 		return retval;
 	}
-	return 0;
-}
 
-static void fsl_spi_cpm_irq(struct mpc8xxx_spi *mspi, u32 events)
-{
-	u16 len;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	if (mpc8xxx_spi->type == TYPE_GRLIB) {
+		if (gpio_is_valid(spi->cs_gpio)) {
+			int desel;
 
-	dev_dbg(mspi->dev, "%s: bd datlen %d, count %d\n", __func__,
-		in_be16(&mspi->rx_bd->cbd_datlen), mspi->count);
+			retval = gpio_request(spi->cs_gpio,
+					      dev_name(&spi->dev));
+			if (retval)
+				return retval;
 
-	len = in_be16(&mspi->rx_bd->cbd_datlen);
-	if (len > mspi->count) {
-		WARN_ON(1);
-		len = mspi->count;
+			desel = !(spi->mode & SPI_CS_HIGH);
+			retval = gpio_direction_output(spi->cs_gpio, desel);
+			if (retval) {
+				gpio_free(spi->cs_gpio);
+				return retval;
+			}
+		} else if (spi->cs_gpio != -ENOENT) {
+			if (spi->cs_gpio < 0)
+				return spi->cs_gpio;
+			return -EINVAL;
+		}
+		/* When spi->cs_gpio == -ENOENT, a hole in the phandle list
+		 * indicates to use native chipselect if present, or allow for
+		 * an always selected chip
+		 */
 	}
 
-	/* Clear the events */
-	mpc8xxx_spi_write_reg(&reg_base->event, events);
+	/* Initialize chipselect - might be active for SPI_CS_HIGH mode */
+	fsl_spi_chipselect(spi, BITBANG_CS_INACTIVE);
 
-	mspi->count -= len;
-	if (mspi->count)
-		fsl_spi_cpm_bufs_start(mspi);
-	else
-		complete(&mspi->done);
+	return 0;
+}
+
+static void fsl_spi_cleanup(struct spi_device *spi)
+{
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+
+	if (mpc8xxx_spi->type == TYPE_GRLIB && gpio_is_valid(spi->cs_gpio))
+		gpio_free(spi->cs_gpio);
 }
 
 static void fsl_spi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
@@ -646,201 +550,51 @@ static irqreturn_t fsl_spi_irq(s32 irq, void *context_data)
 	return ret;
 }
 
-static void *fsl_spi_alloc_dummy_rx(void)
-{
-	mutex_lock(&fsl_dummy_rx_lock);
-
-	if (!fsl_dummy_rx)
-		fsl_dummy_rx = kmalloc(SPI_MRBLR, GFP_KERNEL);
-	if (fsl_dummy_rx)
-		fsl_dummy_rx_refcnt++;
-
-	mutex_unlock(&fsl_dummy_rx_lock);
-
-	return fsl_dummy_rx;
-}
-
-static void fsl_spi_free_dummy_rx(void)
+static void fsl_spi_remove(struct mpc8xxx_spi *mspi)
 {
-	mutex_lock(&fsl_dummy_rx_lock);
-
-	switch (fsl_dummy_rx_refcnt) {
-	case 0:
-		WARN_ON(1);
-		break;
-	case 1:
-		kfree(fsl_dummy_rx);
-		fsl_dummy_rx = NULL;
-		/* fall through */
-	default:
-		fsl_dummy_rx_refcnt--;
-		break;
-	}
-
-	mutex_unlock(&fsl_dummy_rx_lock);
+	iounmap(mspi->reg_base);
+	fsl_spi_cpm_free(mspi);
 }
 
-static unsigned long fsl_spi_cpm_get_pram(struct mpc8xxx_spi *mspi)
+static void fsl_spi_grlib_cs_control(struct spi_device *spi, bool on)
 {
-	struct device *dev = mspi->dev;
-	struct device_node *np = dev->of_node;
-	const u32 *iprop;
-	int size;
-	void __iomem *spi_base;
-	unsigned long pram_ofs = -ENOMEM;
-
-	/* Can't use of_address_to_resource(), QE muram isn't at 0. */
-	iprop = of_get_property(np, "reg", &size);
-
-	/* QE with a fixed pram location? */
-	if (mspi->flags & SPI_QE && iprop && size == sizeof(*iprop) * 4)
-		return cpm_muram_alloc_fixed(iprop[2], SPI_PRAM_SIZE);
-
-	/* QE but with a dynamic pram location? */
-	if (mspi->flags & SPI_QE) {
-		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-		qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, mspi->subblock,
-				QE_CR_PROTOCOL_UNSPECIFIED, pram_ofs);
-		return pram_ofs;
-	}
-
-	spi_base = of_iomap(np, 1);
-	if (spi_base == NULL)
-		return -EINVAL;
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	struct fsl_spi_reg *reg_base = mpc8xxx_spi->reg_base;
+	u32 slvsel;
+	u16 cs = spi->chip_select;
 
-	if (mspi->flags & SPI_CPM2) {
-		pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-		out_be16(spi_base, pram_ofs);
-	} else {
-		struct spi_pram __iomem *pram = spi_base;
-		u16 rpbase = in_be16(&pram->rpbase);
-
-		/* Microcode relocation patch applied? */
-		if (rpbase)
-			pram_ofs = rpbase;
-		else {
-			pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-			out_be16(spi_base, pram_ofs);
-		}
+	if (gpio_is_valid(spi->cs_gpio)) {
+		gpio_set_value(spi->cs_gpio, on);
+	} else if (cs < mpc8xxx_spi->native_chipselects) {
+		slvsel = mpc8xxx_spi_read_reg(&reg_base->slvsel);
+		slvsel = on ? (slvsel | (1 << cs)) : (slvsel & ~(1 << cs));
+		mpc8xxx_spi_write_reg(&reg_base->slvsel, slvsel);
 	}
-
-	iounmap(spi_base);
-	return pram_ofs;
 }
 
-static int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
+static void fsl_spi_grlib_probe(struct device *dev)
 {
-	struct device *dev = mspi->dev;
-	struct device_node *np = dev->of_node;
-	const u32 *iprop;
-	int size;
-	unsigned long pram_ofs;
-	unsigned long bds_ofs;
-
-	if (!(mspi->flags & SPI_CPM_MODE))
-		return 0;
-
-	if (!fsl_spi_alloc_dummy_rx())
-		return -ENOMEM;
-
-	if (mspi->flags & SPI_QE) {
-		iprop = of_get_property(np, "cell-index", &size);
-		if (iprop && size == sizeof(*iprop))
-			mspi->subblock = *iprop;
-
-		switch (mspi->subblock) {
-		default:
-			dev_warn(dev, "cell-index unspecified, assuming SPI1");
-			/* fall through */
-		case 0:
-			mspi->subblock = QE_CR_SUBBLOCK_SPI1;
-			break;
-		case 1:
-			mspi->subblock = QE_CR_SUBBLOCK_SPI2;
-			break;
-		}
-	}
-
-	pram_ofs = fsl_spi_cpm_get_pram(mspi);
-	if (IS_ERR_VALUE(pram_ofs)) {
-		dev_err(dev, "can't allocate spi parameter ram\n");
-		goto err_pram;
-	}
+	struct fsl_spi_platform_data *pdata = dev->platform_data;
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct fsl_spi_reg *reg_base = mpc8xxx_spi->reg_base;
+	int mbits;
+	u32 capabilities;
 
-	bds_ofs = cpm_muram_alloc(sizeof(*mspi->tx_bd) +
-				  sizeof(*mspi->rx_bd), 8);
-	if (IS_ERR_VALUE(bds_ofs)) {
-		dev_err(dev, "can't allocate bds\n");
-		goto err_bds;
-	}
+	capabilities = mpc8xxx_spi_read_reg(&reg_base->cap);
 
-	mspi->dma_dummy_tx = dma_map_single(dev, empty_zero_page, PAGE_SIZE,
-					    DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, mspi->dma_dummy_tx)) {
-		dev_err(dev, "unable to map dummy tx buffer\n");
-		goto err_dummy_tx;
-	}
+	mpc8xxx_spi->set_shifts = fsl_spi_grlib_set_shifts;
+	mbits = SPCAP_MAXWLEN(capabilities);
+	if (mbits)
+		mpc8xxx_spi->max_bits_per_word = mbits + 1;
 
-	mspi->dma_dummy_rx = dma_map_single(dev, fsl_dummy_rx, SPI_MRBLR,
-					    DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, mspi->dma_dummy_rx)) {
-		dev_err(dev, "unable to map dummy rx buffer\n");
-		goto err_dummy_rx;
+	mpc8xxx_spi->native_chipselects = 0;
+	if (SPCAP_SSEN(capabilities)) {
+		mpc8xxx_spi->native_chipselects = SPCAP_SSSZ(capabilities);
+		mpc8xxx_spi_write_reg(&reg_base->slvsel, 0xffffffff);
 	}
-
-	mspi->pram = cpm_muram_addr(pram_ofs);
-
-	mspi->tx_bd = cpm_muram_addr(bds_ofs);
-	mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
-
-	/* Initialize parameter ram. */
-	out_be16(&mspi->pram->tbase, cpm_muram_offset(mspi->tx_bd));
-	out_be16(&mspi->pram->rbase, cpm_muram_offset(mspi->rx_bd));
-	out_8(&mspi->pram->tfcr, CPMFCR_EB | CPMFCR_GBL);
-	out_8(&mspi->pram->rfcr, CPMFCR_EB | CPMFCR_GBL);
-	out_be16(&mspi->pram->mrblr, SPI_MRBLR);
-	out_be32(&mspi->pram->rstate, 0);
-	out_be32(&mspi->pram->rdp, 0);
-	out_be16(&mspi->pram->rbptr, 0);
-	out_be16(&mspi->pram->rbc, 0);
-	out_be32(&mspi->pram->rxtmp, 0);
-	out_be32(&mspi->pram->tstate, 0);
-	out_be32(&mspi->pram->tdp, 0);
-	out_be16(&mspi->pram->tbptr, 0);
-	out_be16(&mspi->pram->tbc, 0);
-	out_be32(&mspi->pram->txtmp, 0);
-
-	return 0;
-
-err_dummy_rx:
-	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
-err_dummy_tx:
-	cpm_muram_free(bds_ofs);
-err_bds:
-	cpm_muram_free(pram_ofs);
-err_pram:
-	fsl_spi_free_dummy_rx();
-	return -ENOMEM;
-}
-
-static void fsl_spi_cpm_free(struct mpc8xxx_spi *mspi)
-{
-	struct device *dev = mspi->dev;
-
-	if (!(mspi->flags & SPI_CPM_MODE))
-		return;
-
-	dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE);
-	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
-	cpm_muram_free(cpm_muram_offset(mspi->tx_bd));
-	cpm_muram_free(cpm_muram_offset(mspi->pram));
-	fsl_spi_free_dummy_rx();
-}
-
-static void fsl_spi_remove(struct mpc8xxx_spi *mspi)
-{
-	iounmap(mspi->reg_base);
-	fsl_spi_cpm_free(mspi);
+	master->num_chipselect = mpc8xxx_spi->native_chipselects;
+	pdata->cs_control = fsl_spi_grlib_cs_control;
 }
 
 static struct spi_master * fsl_spi_probe(struct device *dev,
@@ -866,27 +620,35 @@ static struct spi_master * fsl_spi_probe(struct device *dev,
 		goto err_probe;
 
 	master->setup = fsl_spi_setup;
+	master->cleanup = fsl_spi_cleanup;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
 	mpc8xxx_spi->spi_do_one_msg = fsl_spi_do_one_msg;
 	mpc8xxx_spi->spi_remove = fsl_spi_remove;
-
+	mpc8xxx_spi->max_bits_per_word = 32;
+	mpc8xxx_spi->type = fsl_spi_get_type(dev);
 
 	ret = fsl_spi_cpm_init(mpc8xxx_spi);
 	if (ret)
 		goto err_cpm_init;
 
-	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-		mpc8xxx_spi->rx_shift = 16;
-		mpc8xxx_spi->tx_shift = 24;
-	}
-
 	mpc8xxx_spi->reg_base = ioremap(mem->start, resource_size(mem));
 	if (mpc8xxx_spi->reg_base == NULL) {
 		ret = -ENOMEM;
 		goto err_ioremap;
 	}
 
+	if (mpc8xxx_spi->type == TYPE_GRLIB)
+		fsl_spi_grlib_probe(dev);
+
+	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
+		mpc8xxx_spi->set_shifts = fsl_spi_qe_cpu_set_shifts;
+
+	if (mpc8xxx_spi->set_shifts)
+		/* 8 bits per word and MSB first */
+		mpc8xxx_spi->set_shifts(&mpc8xxx_spi->rx_shift,
+					&mpc8xxx_spi->tx_shift, 8, 1);
+
 	/* Register for SPI Interrupt */
 	ret = request_irq(mpc8xxx_spi->irq, fsl_spi_irq,
 			  0, "fsl_spi", mpc8xxx_spi);
@@ -904,6 +666,10 @@ static struct spi_master * fsl_spi_probe(struct device *dev,
 
 	/* Enable SPI interface */
 	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
+	if (mpc8xxx_spi->max_bits_per_word < 8) {
+		regval &= ~SPMODE_LEN(0xF);
+		regval |= SPMODE_LEN(mpc8xxx_spi->max_bits_per_word - 1);
+	}
 	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE)
 		regval |= SPMODE_OP;
 
@@ -1047,28 +813,31 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
 	struct device_node *np = ofdev->dev.of_node;
 	struct spi_master *master;
 	struct resource mem;
-	struct resource irq;
+	int irq, type;
 	int ret = -ENOMEM;
 
 	ret = of_mpc8xxx_spi_probe(ofdev);
 	if (ret)
 		return ret;
 
-	ret = of_fsl_spi_get_chipselects(dev);
-	if (ret)
-		goto err;
+	type = fsl_spi_get_type(&ofdev->dev);
+	if (type == TYPE_FSL) {
+		ret = of_fsl_spi_get_chipselects(dev);
+		if (ret)
+			goto err;
+	}
 
 	ret = of_address_to_resource(np, 0, &mem);
 	if (ret)
 		goto err;
 
-	ret = of_irq_to_resource(np, 0, &irq);
-	if (!ret) {
+	irq = irq_of_parse_and_map(np, 0);
+	if (!irq) {
 		ret = -EINVAL;
 		goto err;
 	}
 
-	master = fsl_spi_probe(dev, &mem, irq.start);
+	master = fsl_spi_probe(dev, &mem, irq);
 	if (IS_ERR(master)) {
 		ret = PTR_ERR(master);
 		goto err;
@@ -1077,27 +846,25 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
 	return 0;
 
 err:
-	of_fsl_spi_free_chipselects(dev);
+	if (type == TYPE_FSL)
+		of_fsl_spi_free_chipselects(dev);
 	return ret;
 }
 
 static int of_fsl_spi_remove(struct platform_device *ofdev)
 {
+	struct spi_master *master = dev_get_drvdata(&ofdev->dev);
+	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
 	int ret;
 
 	ret = mpc8xxx_spi_remove(&ofdev->dev);
 	if (ret)
 		return ret;
-	of_fsl_spi_free_chipselects(&ofdev->dev);
+	if (mpc8xxx_spi->type == TYPE_FSL)
+		of_fsl_spi_free_chipselects(&ofdev->dev);
 	return 0;
 }
 
-static const struct of_device_id of_fsl_spi_match[] = {
-	{ .compatible = "fsl,spi" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, of_fsl_spi_match);
-
 static struct platform_driver of_fsl_spi_driver = {
 	.driver = {
 		.name = "fsl_spi",
@@ -1134,9 +901,7 @@ static int plat_mpc8xxx_spi_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	master = fsl_spi_probe(&pdev->dev, mem, irq);
-	if (IS_ERR(master))
-		return PTR_ERR(master);
-	return 0;
+	return PTR_RET(master);
 }
 
 static int plat_mpc8xxx_spi_remove(struct platform_device *pdev)
diff --git a/drivers/spi/spi-fsl-spi.h b/drivers/spi/spi-fsl-spi.h
new file mode 100644
index 000000000000..9a6dae00e3f3
--- /dev/null
+++ b/drivers/spi/spi-fsl-spi.h
@@ -0,0 +1,72 @@
+/*
+ * Freescale SPI controller driver.
+ *
+ * Maintainer: Kumar Gala
+ *
+ * Copyright (C) 2006 Polycom, Inc.
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * CPM SPI and QE buffer descriptors mode support:
+ * Copyright (c) 2009  MontaVista Software, Inc.
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * GRLIB support:
+ * Copyright (c) 2012 Aeroflex Gaisler AB.
+ * Author: Andreas Larsson <andreas@gaisler.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __SPI_FSL_SPI_H__
+#define __SPI_FSL_SPI_H__
+
+/* SPI Controller registers */
+struct fsl_spi_reg {
+	__be32 cap; /* TYPE_GRLIB specific */
+	u8 res1[0x1C];
+	__be32 mode;
+	__be32 event;
+	__be32 mask;
+	__be32 command;
+	__be32 transmit;
+	__be32 receive;
+	__be32 slvsel; /* TYPE_GRLIB specific */
+};
+
+/* SPI Controller mode register definitions */
+#define	SPMODE_LOOP		(1 << 30)
+#define	SPMODE_CI_INACTIVEHIGH	(1 << 29)
+#define	SPMODE_CP_BEGIN_EDGECLK	(1 << 28)
+#define	SPMODE_DIV16		(1 << 27)
+#define	SPMODE_REV		(1 << 26)
+#define	SPMODE_MS		(1 << 25)
+#define	SPMODE_ENABLE		(1 << 24)
+#define	SPMODE_LEN(x)		((x) << 20)
+#define	SPMODE_PM(x)		((x) << 16)
+#define	SPMODE_OP		(1 << 14)
+#define	SPMODE_CG(x)		((x) << 7)
+
+/* TYPE_GRLIB SPI Controller capability register definitions */
+#define SPCAP_SSEN(x)		(((x) >> 16) & 0x1)
+#define SPCAP_SSSZ(x)		(((x) >> 24) & 0xff)
+#define SPCAP_MAXWLEN(x)	(((x) >> 20) & 0xf)
+
+/*
+ * Default for SPI Mode:
+ *	SPI MODE 0 (inactive low, phase middle, MSB, 8-bit length, slow clk
+ */
+#define	SPMODE_INIT_VAL (SPMODE_CI_INACTIVEHIGH | SPMODE_DIV16 | SPMODE_REV | \
+			 SPMODE_MS | SPMODE_LEN(7) | SPMODE_PM(0xf))
+
+/* SPIE register values */
+#define	SPIE_NE		0x00000200	/* Not empty */
+#define	SPIE_NF		0x00000100	/* Not full */
+
+/* SPIM register values */
+#define	SPIM_NE		0x00000200	/* Not empty */
+#define	SPIM_NF		0x00000100	/* Not full */
+
+#endif /* __SPI_FSL_SPI_H__ */
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 9ddef55a7165..0021fc4c45bc 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -265,9 +265,9 @@ static int spi_gpio_setup(struct spi_device *spi)
 		}
 	}
 	if (!status) {
-		status = spi_bitbang_setup(spi);
 		/* in case it was initialized from static board data */
 		spi_gpio->cs_gpios[spi->chip_select] = cs;
+		status = spi_bitbang_setup(spi);
 	}
 
 	if (status) {
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 89480b281d74..dfddf336912d 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -28,6 +28,7 @@
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
 #include <linux/fsl_devices.h>
+#include <linux/gpio.h>
 #include <asm/mpc52xx_psc.h>
 
 struct mpc512x_psc_spi {
@@ -113,7 +114,7 @@ static void mpc512x_psc_spi_activate_cs(struct spi_device *spi)
 	out_be32(&psc->ccr, ccr);
 	mps->bits_per_word = cs->bits_per_word;
 
-	if (mps->cs_control)
+	if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
 		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 1 : 0);
 }
 
@@ -121,7 +122,7 @@ static void mpc512x_psc_spi_deactivate_cs(struct spi_device *spi)
 {
 	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
 
-	if (mps->cs_control)
+	if (mps->cs_control && gpio_is_valid(spi->cs_gpio))
 		mps->cs_control(spi, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
 
 }
@@ -148,6 +149,9 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi,
 	in_8(&psc->mode);
 	out_8(&psc->mode, 0x0);
 
+	/* enable transmiter/receiver */
+	out_8(&psc->command, MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
+
 	while (len) {
 		int count;
 		int i;
@@ -164,7 +168,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi,
 
 		for (i = count; i > 0; i--) {
 			data = tx_buf ? *tx_buf++ : 0;
-			if (len == EOFBYTE)
+			if (len == EOFBYTE && t->cs_change)
 				setbits32(&fifo->txcmd, MPC512x_PSC_FIFO_EOF);
 			out_8(&fifo->txdata_8, data);
 			len--;
@@ -176,10 +180,6 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi,
 		out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY);
 		out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY);
 
-		/* enable transmiter/receiver */
-		out_8(&psc->command,
-		      MPC52xx_PSC_TX_ENABLE | MPC52xx_PSC_RX_ENABLE);
-
 		wait_for_completion(&mps->done);
 
 		mdelay(1);
@@ -204,9 +204,6 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi,
 		while (in_be32(&fifo->rxcnt)) {
 			in_8(&fifo->rxdata_8);
 		}
-
-		out_8(&psc->command,
-		      MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE);
 	}
 	/* disable transmiter/receiver and fifo interrupt */
 	out_8(&psc->command, MPC52xx_PSC_TX_DISABLE | MPC52xx_PSC_RX_DISABLE);
@@ -278,6 +275,7 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
 	struct mpc512x_psc_spi *mps = spi_master_get_devdata(spi->master);
 	struct mpc512x_psc_spi_cs *cs = spi->controller_state;
 	unsigned long flags;
+	int ret;
 
 	if (spi->bits_per_word % 8)
 		return -EINVAL;
@@ -286,6 +284,19 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
 		cs = kzalloc(sizeof *cs, GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
+
+		if (gpio_is_valid(spi->cs_gpio)) {
+			ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev));
+			if (ret) {
+				dev_err(&spi->dev, "can't get CS gpio: %d\n",
+					ret);
+				kfree(cs);
+				return ret;
+			}
+			gpio_direction_output(spi->cs_gpio,
+					spi->mode & SPI_CS_HIGH ? 0 : 1);
+		}
+
 		spi->controller_state = cs;
 	}
 
@@ -319,6 +330,8 @@ static int mpc512x_psc_spi_transfer(struct spi_device *spi,
 
 static void mpc512x_psc_spi_cleanup(struct spi_device *spi)
 {
+	if (gpio_is_valid(spi->cs_gpio))
+		gpio_free(spi->cs_gpio);
 	kfree(spi->controller_state);
 }
 
@@ -405,6 +418,11 @@ static irqreturn_t mpc512x_psc_spi_isr(int irq, void *dev_id)
 	return IRQ_NONE;
 }
 
+static void mpc512x_spi_cs_control(struct spi_device *spi, bool onoff)
+{
+	gpio_set_value(spi->cs_gpio, onoff);
+}
+
 /* bus_num is used only for the case dev->platform_data == NULL */
 static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 					      u32 size, unsigned int irq,
@@ -425,12 +443,9 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	mps->irq = irq;
 
 	if (pdata == NULL) {
-		dev_err(dev, "probe called without platform data, no "
-			"cs_control function will be called\n");
-		mps->cs_control = NULL;
+		mps->cs_control = mpc512x_spi_cs_control;
 		mps->sysclk = 0;
 		master->bus_num = bus_num;
-		master->num_chipselect = 255;
 	} else {
 		mps->cs_control = pdata->cs_control;
 		mps->sysclk = pdata->sysclk;
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 22a0af0147fb..a1d5778e2bbb 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -612,6 +612,7 @@ static int mxs_spi_probe(struct platform_device *pdev)
 	ssp->dmach = dma_request_channel(mask, mxs_ssp_dma_filter, ssp);
 	if (!ssp->dmach) {
 		dev_err(ssp->dev, "Failed to request DMA\n");
+		ret = -ENODEV;
 		goto out_master_free;
 	}
 
diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index cb2e284bd814..e60a776ed2d4 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -393,8 +393,6 @@ static const struct of_device_id tiny_spi_match[] = {
 	{},
 };
 MODULE_DEVICE_TABLE(of, tiny_spi_match);
-#else /* CONFIG_OF */
-#define tiny_spi_match NULL
 #endif /* CONFIG_OF */
 
 static struct platform_driver tiny_spi_driver = {
@@ -404,7 +402,7 @@ static struct platform_driver tiny_spi_driver = {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,
 		.pm = NULL,
-		.of_match_table = tiny_spi_match,
+		.of_match_table = of_match_ptr(tiny_spi_match),
 	},
 };
 module_platform_driver(tiny_spi_driver);
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 893c3d78e426..86d2158946bb 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -285,8 +285,12 @@ static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
 
 	timeout = jiffies + msecs_to_jiffies(1000);
 	while (!(__raw_readl(reg) & bit)) {
-		if (time_after(jiffies, timeout))
-			return -1;
+		if (time_after(jiffies, timeout)) {
+			if (!(__raw_readl(reg) & bit))
+				return -ETIMEDOUT;
+			else
+				return 0;
+		}
 		cpu_relax();
 	}
 	return 0;
@@ -805,6 +809,10 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	return 0;
 }
 
+/*
+ * Note that we currently allow DMA only if we get a channel
+ * for both rx and tx. Otherwise we'll do PIO for both rx and tx.
+ */
 static int omap2_mcspi_request_dma(struct spi_device *spi)
 {
 	struct spi_master	*master = spi->master;
@@ -823,21 +831,22 @@ static int omap2_mcspi_request_dma(struct spi_device *spi)
 	dma_cap_set(DMA_SLAVE, mask);
 	sig = mcspi_dma->dma_rx_sync_dev;
 	mcspi_dma->dma_rx = dma_request_channel(mask, omap_dma_filter_fn, &sig);
-	if (!mcspi_dma->dma_rx) {
-		dev_err(&spi->dev, "no RX DMA engine channel for McSPI\n");
-		return -EAGAIN;
-	}
+	if (!mcspi_dma->dma_rx)
+		goto no_dma;
 
 	sig = mcspi_dma->dma_tx_sync_dev;
 	mcspi_dma->dma_tx = dma_request_channel(mask, omap_dma_filter_fn, &sig);
 	if (!mcspi_dma->dma_tx) {
-		dev_err(&spi->dev, "no TX DMA engine channel for McSPI\n");
 		dma_release_channel(mcspi_dma->dma_rx);
 		mcspi_dma->dma_rx = NULL;
-		return -EAGAIN;
+		goto no_dma;
 	}
 
 	return 0;
+
+no_dma:
+	dev_warn(&spi->dev, "not using DMA for McSPI\n");
+	return -EAGAIN;
 }
 
 static int omap2_mcspi_setup(struct spi_device *spi)
@@ -870,7 +879,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 
 	if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx) {
 		ret = omap2_mcspi_request_dma(spi);
-		if (ret < 0)
+		if (ret < 0 && ret != -EAGAIN)
 			return ret;
 	}
 
@@ -928,6 +937,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 	struct spi_device		*spi;
 	struct spi_transfer		*t = NULL;
 	struct spi_master		*master;
+	struct omap2_mcspi_dma		*mcspi_dma;
 	int				cs_active = 0;
 	struct omap2_mcspi_cs		*cs;
 	struct omap2_mcspi_device_config *cd;
@@ -937,6 +947,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 
 	spi = m->spi;
 	master = spi->master;
+	mcspi_dma = mcspi->dma_channels + spi->chip_select;
 	cs = spi->controller_state;
 	cd = spi->controller_data;
 
@@ -993,7 +1004,8 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 				__raw_writel(0, cs->base
 						+ OMAP2_MCSPI_TX0);
 
-			if (m->is_dma_mapped || t->len >= DMA_MIN_BYTES)
+			if ((mcspi_dma->dma_rx && mcspi_dma->dma_tx) &&
+			    (m->is_dma_mapped || t->len >= DMA_MIN_BYTES))
 				count = omap2_mcspi_txrx_dma(spi, t);
 			else
 				count = omap2_mcspi_txrx_pio(spi, t);
@@ -1040,10 +1052,14 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 static int omap2_mcspi_transfer_one_message(struct spi_master *master,
 		struct spi_message *m)
 {
+	struct spi_device	*spi;
 	struct omap2_mcspi	*mcspi;
+	struct omap2_mcspi_dma	*mcspi_dma;
 	struct spi_transfer	*t;
 
+	spi = m->spi;
 	mcspi = spi_master_get_devdata(master);
+	mcspi_dma = mcspi->dma_channels + spi->chip_select;
 	m->actual_length = 0;
 	m->status = 0;
 
@@ -1078,7 +1094,7 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
 		if (m->is_dma_mapped || len < DMA_MIN_BYTES)
 			continue;
 
-		if (tx_buf != NULL) {
+		if (mcspi_dma->dma_tx && tx_buf != NULL) {
 			t->tx_dma = dma_map_single(mcspi->dev, (void *) tx_buf,
 					len, DMA_TO_DEVICE);
 			if (dma_mapping_error(mcspi->dev, t->tx_dma)) {
@@ -1087,7 +1103,7 @@ static int omap2_mcspi_transfer_one_message(struct spi_master *master,
 				return -EINVAL;
 			}
 		}
-		if (rx_buf != NULL) {
+		if (mcspi_dma->dma_rx && rx_buf != NULL) {
 			t->rx_dma = dma_map_single(mcspi->dev, rx_buf, t->len,
 					DMA_FROM_DEVICE);
 			if (dma_mapping_error(mcspi->dev, t->rx_dma)) {
@@ -1277,7 +1293,8 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
 	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
 	pm_runtime_enable(&pdev->dev);
 
-	if (status || omap2_mcspi_master_setup(mcspi) < 0)
+	status = omap2_mcspi_master_setup(mcspi);
+	if (status < 0)
 		goto disable_pm;
 
 	status = spi_register_master(master);
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 364964d2ed04..74bc18775658 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -22,7 +22,7 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 		return ret;
 
 	ret = pcim_iomap_regions(dev, 1 << 0, "PXA2xx SPI");
-	if (!ret)
+	if (ret)
 		return ret;
 
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
@@ -47,8 +47,8 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	pi.size_data = sizeof(spi_pdata);
 
 	pdev = platform_device_register_full(&pi);
-	if (!pdev)
-		return -ENOMEM;
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
 
 	pci_set_drvdata(dev, pdev);
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 90b27a3508a6..f5d84d6f8222 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/ioport.h>
 #include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -68,6 +69,7 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 #define LPSS_TX_HITHRESH_DFLT	224
 
 /* Offset from drv_data->lpss_base */
+#define SSP_REG			0x0c
 #define SPI_CS_CONTROL		0x18
 #define SPI_CS_CONTROL_SW_MODE	BIT(0)
 #define SPI_CS_CONTROL_CS_HIGH	BIT(1)
@@ -138,6 +140,10 @@ detection_done:
 	/* Enable software chip select control */
 	value = SPI_CS_CONTROL_SW_MODE | SPI_CS_CONTROL_CS_HIGH;
 	__lpss_ssp_write_priv(drv_data, SPI_CS_CONTROL, value);
+
+	/* Enable multiblock DMA transfers */
+	if (drv_data->master_info->enable_dma)
+		__lpss_ssp_write_priv(drv_data, SSP_REG, 1);
 }
 
 static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
@@ -1083,11 +1089,9 @@ pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev)
 	ssp = &pdata->ssp;
 
 	ssp->phys_base = res->start;
-	ssp->mmio_base = devm_request_and_ioremap(&pdev->dev, res);
-	if (!ssp->mmio_base) {
-		dev_err(&pdev->dev, "failed to ioremap mmio_base\n");
-		return NULL;
-	}
+	ssp->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ssp->mmio_base))
+		return PTR_ERR(ssp->mmio_base);
 
 	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	ssp->irq = platform_get_irq(pdev, 0);
@@ -1168,7 +1172,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	master->dev.parent = &pdev->dev;
 	master->dev.of_node = pdev->dev.of_node;
-	ACPI_HANDLE_SET(&master->dev, ACPI_HANDLE(&pdev->dev));
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index e862ab8853aa..5000586cb98d 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/clk.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
@@ -31,9 +32,12 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 
-#include <mach/dma.h>
 #include <linux/platform_data/spi-s3c64xx.h>
 
+#ifdef CONFIG_S3C_DMA
+#include <mach/dma.h>
+#endif
+
 #define MAX_SPI_PORTS		3
 
 /* Registers and bit-fields */
@@ -131,9 +135,9 @@
 #define TXBUSY    (1<<3)
 
 struct s3c64xx_spi_dma_data {
-	unsigned		ch;
+	struct dma_chan *ch;
 	enum dma_transfer_direction direction;
-	enum dma_ch	dmach;
+	unsigned int dmach;
 };
 
 /**
@@ -195,16 +199,14 @@ struct s3c64xx_spi_driver_data {
 	unsigned                        cur_speed;
 	struct s3c64xx_spi_dma_data	rx_dma;
 	struct s3c64xx_spi_dma_data	tx_dma;
+#ifdef CONFIG_S3C_DMA
 	struct samsung_dma_ops		*ops;
+#endif
 	struct s3c64xx_spi_port_config	*port_conf;
 	unsigned int			port_id;
 	unsigned long			gpios[4];
 };
 
-static struct s3c2410_dma_client s3c64xx_spi_dma_client = {
-	.name = "samsung-spi-dma",
-};
-
 static void flush_fifo(struct s3c64xx_spi_driver_data *sdd)
 {
 	void __iomem *regs = sdd->regs;
@@ -281,6 +283,13 @@ static void s3c64xx_spi_dmacb(void *data)
 	spin_unlock_irqrestore(&sdd->lock, flags);
 }
 
+#ifdef CONFIG_S3C_DMA
+/* FIXME: remove this section once arch/arm/mach-s3c64xx uses dmaengine */
+
+static struct s3c2410_dma_client s3c64xx_spi_dma_client = {
+	.name = "samsung-spi-dma",
+};
+
 static void prepare_dma(struct s3c64xx_spi_dma_data *dma,
 					unsigned len, dma_addr_t buf)
 {
@@ -294,14 +303,14 @@ static void prepare_dma(struct s3c64xx_spi_dma_data *dma,
 		config.direction = sdd->rx_dma.direction;
 		config.fifo = sdd->sfr_start + S3C64XX_SPI_RX_DATA;
 		config.width = sdd->cur_bpw / 8;
-		sdd->ops->config(sdd->rx_dma.ch, &config);
+		sdd->ops->config((enum dma_ch)sdd->rx_dma.ch, &config);
 	} else {
 		sdd = container_of((void *)dma,
 			struct s3c64xx_spi_driver_data, tx_dma);
 		config.direction =  sdd->tx_dma.direction;
 		config.fifo = sdd->sfr_start + S3C64XX_SPI_TX_DATA;
 		config.width = sdd->cur_bpw / 8;
-		sdd->ops->config(sdd->tx_dma.ch, &config);
+		sdd->ops->config((enum dma_ch)sdd->tx_dma.ch, &config);
 	}
 
 	info.cap = DMA_SLAVE;
@@ -311,8 +320,8 @@ static void prepare_dma(struct s3c64xx_spi_dma_data *dma,
 	info.direction = dma->direction;
 	info.buf = buf;
 
-	sdd->ops->prepare(dma->ch, &info);
-	sdd->ops->trigger(dma->ch);
+	sdd->ops->prepare((enum dma_ch)dma->ch, &info);
+	sdd->ops->trigger((enum dma_ch)dma->ch);
 }
 
 static int acquire_dma(struct s3c64xx_spi_driver_data *sdd)
@@ -325,12 +334,150 @@ static int acquire_dma(struct s3c64xx_spi_driver_data *sdd)
 	req.cap = DMA_SLAVE;
 	req.client = &s3c64xx_spi_dma_client;
 
-	sdd->rx_dma.ch = sdd->ops->request(sdd->rx_dma.dmach, &req, dev, "rx");
-	sdd->tx_dma.ch = sdd->ops->request(sdd->tx_dma.dmach, &req, dev, "tx");
+	sdd->rx_dma.ch = (void *)sdd->ops->request(sdd->rx_dma.dmach, &req, dev, "rx");
+	sdd->tx_dma.ch = (void *)sdd->ops->request(sdd->tx_dma.dmach, &req, dev, "tx");
 
 	return 1;
 }
 
+static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
+{
+	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+
+	/* Acquire DMA channels */
+	while (!acquire_dma(sdd))
+		usleep_range(10000, 11000);
+
+	pm_runtime_get_sync(&sdd->pdev->dev);
+
+	return 0;
+}
+
+static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
+{
+	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+
+	/* Free DMA channels */
+	sdd->ops->release((enum dma_ch)sdd->rx_dma.ch, &s3c64xx_spi_dma_client);
+	sdd->ops->release((enum dma_ch)sdd->tx_dma.ch, &s3c64xx_spi_dma_client);
+
+	pm_runtime_put(&sdd->pdev->dev);
+
+	return 0;
+}
+
+static void s3c64xx_spi_dma_stop(struct s3c64xx_spi_driver_data *sdd,
+				 struct s3c64xx_spi_dma_data *dma)
+{
+	sdd->ops->stop((enum dma_ch)dma->ch);
+}
+#else
+
+static void prepare_dma(struct s3c64xx_spi_dma_data *dma,
+					unsigned len, dma_addr_t buf)
+{
+	struct s3c64xx_spi_driver_data *sdd;
+	struct dma_slave_config config;
+	struct scatterlist sg;
+	struct dma_async_tx_descriptor *desc;
+
+	if (dma->direction == DMA_DEV_TO_MEM) {
+		sdd = container_of((void *)dma,
+			struct s3c64xx_spi_driver_data, rx_dma);
+		config.direction = dma->direction;
+		config.src_addr = sdd->sfr_start + S3C64XX_SPI_RX_DATA;
+		config.src_addr_width = sdd->cur_bpw / 8;
+		config.src_maxburst = 1;
+		dmaengine_slave_config(dma->ch, &config);
+	} else {
+		sdd = container_of((void *)dma,
+			struct s3c64xx_spi_driver_data, tx_dma);
+		config.direction = dma->direction;
+		config.dst_addr = sdd->sfr_start + S3C64XX_SPI_TX_DATA;
+		config.dst_addr_width = sdd->cur_bpw / 8;
+		config.dst_maxburst = 1;
+		dmaengine_slave_config(dma->ch, &config);
+	}
+
+	sg_init_table(&sg, 1);
+	sg_dma_len(&sg) = len;
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(buf)),
+		    len, offset_in_page(buf));
+	sg_dma_address(&sg) = buf;
+
+	desc = dmaengine_prep_slave_sg(dma->ch,
+		&sg, 1, dma->direction, DMA_PREP_INTERRUPT);
+
+	desc->callback = s3c64xx_spi_dmacb;
+	desc->callback_param = dma;
+
+	dmaengine_submit(desc);
+	dma_async_issue_pending(dma->ch);
+}
+
+static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
+{
+	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+	dma_filter_fn filter = sdd->cntrlr_info->filter;
+	struct device *dev = &sdd->pdev->dev;
+	dma_cap_mask_t mask;
+	int ret;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* Acquire DMA channels */
+	sdd->rx_dma.ch = dma_request_slave_channel_compat(mask, filter,
+				(void*)sdd->rx_dma.dmach, dev, "rx");
+	if (!sdd->rx_dma.ch) {
+		dev_err(dev, "Failed to get RX DMA channel\n");
+		ret = -EBUSY;
+		goto out;
+	}
+
+	sdd->tx_dma.ch = dma_request_slave_channel_compat(mask, filter,
+				(void*)sdd->tx_dma.dmach, dev, "tx");
+	if (!sdd->tx_dma.ch) {
+		dev_err(dev, "Failed to get TX DMA channel\n");
+		ret = -EBUSY;
+		goto out_rx;
+	}
+
+	ret = pm_runtime_get_sync(&sdd->pdev->dev);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable device: %d\n", ret);
+		goto out_tx;
+	}
+
+	return 0;
+
+out_tx:
+	dma_release_channel(sdd->tx_dma.ch);
+out_rx:
+	dma_release_channel(sdd->rx_dma.ch);
+out:
+	return ret;
+}
+
+static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
+{
+	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
+
+	/* Free DMA channels */
+	dma_release_channel(sdd->rx_dma.ch);
+	dma_release_channel(sdd->tx_dma.ch);
+
+	pm_runtime_put(&sdd->pdev->dev);
+	return 0;
+}
+
+static void s3c64xx_spi_dma_stop(struct s3c64xx_spi_driver_data *sdd,
+				 struct s3c64xx_spi_dma_data *dma)
+{
+	dmaengine_terminate_all(dma->ch);
+}
+#endif
+
 static void enable_datapath(struct s3c64xx_spi_driver_data *sdd,
 				struct spi_device *spi,
 				struct spi_transfer *xfer, int dma_mode)
@@ -713,9 +860,9 @@ static int s3c64xx_spi_transfer_one_message(struct spi_master *master,
 		}
 
 		/* Polling method for xfers not bigger than FIFO capacity */
-		if (xfer->len <= ((FIFO_LVL_MASK(sdd) >> 1) + 1))
-			use_dma = 0;
-		else
+		use_dma = 0;
+		if (sdd->rx_dma.ch && sdd->tx_dma.ch &&
+		    (xfer->len > ((FIFO_LVL_MASK(sdd) >> 1) + 1)))
 			use_dma = 1;
 
 		spin_lock_irqsave(&sdd->lock, flags);
@@ -750,10 +897,10 @@ static int s3c64xx_spi_transfer_one_message(struct spi_master *master,
 			if (use_dma) {
 				if (xfer->tx_buf != NULL
 						&& (sdd->state & TXBUSY))
-					sdd->ops->stop(sdd->tx_dma.ch);
+					s3c64xx_spi_dma_stop(sdd, &sdd->tx_dma);
 				if (xfer->rx_buf != NULL
 						&& (sdd->state & RXBUSY))
-					sdd->ops->stop(sdd->rx_dma.ch);
+					s3c64xx_spi_dma_stop(sdd, &sdd->rx_dma);
 			}
 
 			goto out;
@@ -790,34 +937,7 @@ out:
 	return 0;
 }
 
-static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
-{
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
-
-	/* Acquire DMA channels */
-	while (!acquire_dma(sdd))
-		usleep_range(10000, 11000);
-
-	pm_runtime_get_sync(&sdd->pdev->dev);
-
-	return 0;
-}
-
-static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
-{
-	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
-
-	/* Free DMA channels */
-	sdd->ops->release(sdd->rx_dma.ch, &s3c64xx_spi_dma_client);
-	sdd->ops->release(sdd->tx_dma.ch, &s3c64xx_spi_dma_client);
-
-	pm_runtime_put(&sdd->pdev->dev);
-
-	return 0;
-}
-
 static struct s3c64xx_spi_csinfo *s3c64xx_get_slave_ctrldata(
-				struct s3c64xx_spi_driver_data *sdd,
 				struct spi_device *spi)
 {
 	struct s3c64xx_spi_csinfo *cs;
@@ -874,7 +994,7 @@ static int s3c64xx_spi_setup(struct spi_device *spi)
 
 	sdd = spi_master_get_devdata(spi->master);
 	if (!cs && spi->dev.of_node) {
-		cs = s3c64xx_get_slave_ctrldata(sdd, spi);
+		cs = s3c64xx_get_slave_ctrldata(spi);
 		spi->controller_data = cs;
 	}
 
@@ -912,15 +1032,6 @@ static int s3c64xx_spi_setup(struct spi_device *spi)
 
 	spin_unlock_irqrestore(&sdd->lock, flags);
 
-	if (spi->bits_per_word != 8
-			&& spi->bits_per_word != 16
-			&& spi->bits_per_word != 32) {
-		dev_err(&spi->dev, "setup: %dbits/wrd not supported!\n",
-							spi->bits_per_word);
-		err = -EINVAL;
-		goto setup_exit;
-	}
-
 	pm_runtime_get_sync(&sdd->pdev->dev);
 
 	/* Check if we can provide the requested rate */
@@ -994,25 +1105,30 @@ static irqreturn_t s3c64xx_spi_irq(int irq, void *data)
 {
 	struct s3c64xx_spi_driver_data *sdd = data;
 	struct spi_master *spi = sdd->master;
-	unsigned int val;
-
-	val = readl(sdd->regs + S3C64XX_SPI_PENDING_CLR);
+	unsigned int val, clr = 0;
 
-	val &= S3C64XX_SPI_PND_RX_OVERRUN_CLR |
-		S3C64XX_SPI_PND_RX_UNDERRUN_CLR |
-		S3C64XX_SPI_PND_TX_OVERRUN_CLR |
-		S3C64XX_SPI_PND_TX_UNDERRUN_CLR;
+	val = readl(sdd->regs + S3C64XX_SPI_STATUS);
 
-	writel(val, sdd->regs + S3C64XX_SPI_PENDING_CLR);
-
-	if (val & S3C64XX_SPI_PND_RX_OVERRUN_CLR)
+	if (val & S3C64XX_SPI_ST_RX_OVERRUN_ERR) {
+		clr = S3C64XX_SPI_PND_RX_OVERRUN_CLR;
 		dev_err(&spi->dev, "RX overrun\n");
-	if (val & S3C64XX_SPI_PND_RX_UNDERRUN_CLR)
+	}
+	if (val & S3C64XX_SPI_ST_RX_UNDERRUN_ERR) {
+		clr |= S3C64XX_SPI_PND_RX_UNDERRUN_CLR;
 		dev_err(&spi->dev, "RX underrun\n");
-	if (val & S3C64XX_SPI_PND_TX_OVERRUN_CLR)
+	}
+	if (val & S3C64XX_SPI_ST_TX_OVERRUN_ERR) {
+		clr |= S3C64XX_SPI_PND_TX_OVERRUN_CLR;
 		dev_err(&spi->dev, "TX overrun\n");
-	if (val & S3C64XX_SPI_PND_TX_UNDERRUN_CLR)
+	}
+	if (val & S3C64XX_SPI_ST_TX_UNDERRUN_ERR) {
+		clr |= S3C64XX_SPI_PND_TX_UNDERRUN_CLR;
 		dev_err(&spi->dev, "TX underrun\n");
+	}
+
+	/* Clear the pending irq by setting and then clearing it */
+	writel(clr, sdd->regs + S3C64XX_SPI_PENDING_CLR);
+	writel(0, sdd->regs + S3C64XX_SPI_PENDING_CLR);
 
 	return IRQ_HANDLED;
 }
@@ -1036,9 +1152,13 @@ static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel)
 	writel(0, regs + S3C64XX_SPI_MODE_CFG);
 	writel(0, regs + S3C64XX_SPI_PACKET_CNT);
 
-	/* Clear any irq pending bits */
-	writel(readl(regs + S3C64XX_SPI_PENDING_CLR),
-				regs + S3C64XX_SPI_PENDING_CLR);
+	/* Clear any irq pending bits, should set and clear the bits */
+	val = S3C64XX_SPI_PND_RX_OVERRUN_CLR |
+		S3C64XX_SPI_PND_RX_UNDERRUN_CLR |
+		S3C64XX_SPI_PND_TX_OVERRUN_CLR |
+		S3C64XX_SPI_PND_TX_UNDERRUN_CLR;
+	writel(val, regs + S3C64XX_SPI_PENDING_CLR);
+	writel(0, regs + S3C64XX_SPI_PENDING_CLR);
 
 	writel(0, regs + S3C64XX_SPI_SWAP_CFG);
 
@@ -1052,41 +1172,6 @@ static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel)
 }
 
 #ifdef CONFIG_OF
-static int s3c64xx_spi_parse_dt_gpio(struct s3c64xx_spi_driver_data *sdd)
-{
-	struct device *dev = &sdd->pdev->dev;
-	int idx, gpio, ret;
-
-	/* find gpios for mosi, miso and clock lines */
-	for (idx = 0; idx < 3; idx++) {
-		gpio = of_get_gpio(dev->of_node, idx);
-		if (!gpio_is_valid(gpio)) {
-			dev_err(dev, "invalid gpio[%d]: %d\n", idx, gpio);
-			goto free_gpio;
-		}
-		sdd->gpios[idx] = gpio;
-		ret = gpio_request(gpio, "spi-bus");
-		if (ret) {
-			dev_err(dev, "gpio [%d] request failed: %d\n",
-				gpio, ret);
-			goto free_gpio;
-		}
-	}
-	return 0;
-
-free_gpio:
-	while (--idx >= 0)
-		gpio_free(sdd->gpios[idx]);
-	return -EINVAL;
-}
-
-static void s3c64xx_spi_dt_gpio_free(struct s3c64xx_spi_driver_data *sdd)
-{
-	unsigned int idx;
-	for (idx = 0; idx < 3; idx++)
-		gpio_free(sdd->gpios[idx]);
-}
-
 static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
 {
 	struct s3c64xx_spi_info *sci;
@@ -1119,15 +1204,6 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
 {
 	return dev->platform_data;
 }
-
-static int s3c64xx_spi_parse_dt_gpio(struct s3c64xx_spi_driver_data *sdd)
-{
-	return -EINVAL;
-}
-
-static void s3c64xx_spi_dt_gpio_free(struct s3c64xx_spi_driver_data *sdd)
-{
-}
 #endif
 
 static const struct of_device_id s3c64xx_spi_dt_match[];
@@ -1238,6 +1314,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	master->unprepare_transfer_hardware = s3c64xx_spi_unprepare_transfer;
 	master->num_chipselect = sci->num_cs;
 	master->dma_alignment = 8;
+	master->bits_per_word_mask = BIT(32 - 1) | BIT(16 - 1) | BIT(8 - 1);
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
@@ -1247,10 +1324,7 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 		goto err0;
 	}
 
-	if (!sci->cfg_gpio && pdev->dev.of_node) {
-		if (s3c64xx_spi_parse_dt_gpio(sdd))
-			return -EBUSY;
-	} else if (sci->cfg_gpio == NULL || sci->cfg_gpio()) {
+	if (sci->cfg_gpio && sci->cfg_gpio()) {
 		dev_err(&pdev->dev, "Unable to config gpio\n");
 		ret = -EBUSY;
 		goto err0;
@@ -1261,13 +1335,13 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
 	if (IS_ERR(sdd->clk)) {
 		dev_err(&pdev->dev, "Unable to acquire clock 'spi'\n");
 		ret = PTR_ERR(sdd->clk);
-		goto err1;
+		goto err0;
 	}
 
 	if (clk_prepare_enable(sdd->clk)) {
 		dev_err(&pdev->dev, "Couldn't enable clock 'spi'\n");
 		ret = -EBUSY;
-		goto err1;
+		goto err0;
 	}
 
 	sprintf(clk_name, "spi_busclk%d", sci->src_clk_nr);
@@ -1324,9 +1398,6 @@ err3:
 	clk_disable_unprepare(sdd->src_clk);
 err2:
 	clk_disable_unprepare(sdd->clk);
-err1:
-	if (!sdd->cntrlr_info->cfg_gpio && pdev->dev.of_node)
-		s3c64xx_spi_dt_gpio_free(sdd);
 err0:
 	platform_set_drvdata(pdev, NULL);
 	spi_master_put(master);
@@ -1349,16 +1420,13 @@ static int s3c64xx_spi_remove(struct platform_device *pdev)
 
 	clk_disable_unprepare(sdd->clk);
 
-	if (!sdd->cntrlr_info->cfg_gpio && pdev->dev.of_node)
-		s3c64xx_spi_dt_gpio_free(sdd);
-
 	platform_set_drvdata(pdev, NULL);
 	spi_master_put(master);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int s3c64xx_spi_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -1370,9 +1438,6 @@ static int s3c64xx_spi_suspend(struct device *dev)
 	clk_disable_unprepare(sdd->src_clk);
 	clk_disable_unprepare(sdd->clk);
 
-	if (!sdd->cntrlr_info->cfg_gpio && dev->of_node)
-		s3c64xx_spi_dt_gpio_free(sdd);
-
 	sdd->cur_speed = 0; /* Output Clock is stopped */
 
 	return 0;
@@ -1384,9 +1449,7 @@ static int s3c64xx_spi_resume(struct device *dev)
 	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
 	struct s3c64xx_spi_info *sci = sdd->cntrlr_info;
 
-	if (!sci->cfg_gpio && dev->of_node)
-		s3c64xx_spi_parse_dt_gpio(sdd);
-	else
+	if (sci->cfg_gpio)
 		sci->cfg_gpio();
 
 	/* Enable the clock */
@@ -1399,7 +1462,7 @@ static int s3c64xx_spi_resume(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_RUNTIME
 static int s3c64xx_spi_runtime_suspend(struct device *dev)
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 8b40d0884f8b..2bc5a6b86300 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -764,8 +764,6 @@ static const struct of_device_id sh_msiof_match[] = {
 	{},
 };
 MODULE_DEVICE_TABLE(of, sh_msiof_match);
-#else
-#define sh_msiof_match NULL
 #endif
 
 static struct dev_pm_ops sh_msiof_spi_dev_pm_ops = {
@@ -780,7 +778,7 @@ static struct platform_driver sh_msiof_spi_drv = {
 		.name		= "spi_sh_msiof",
 		.owner		= THIS_MODULE,
 		.pm		= &sh_msiof_spi_dev_pm_ops,
-		.of_match_table = sh_msiof_match,
+		.of_match_table = of_match_ptr(sh_msiof_match),
 	},
 };
 module_platform_driver(sh_msiof_spi_drv);
diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c
index f59d4177b419..0808cd56bf8d 100644
--- a/drivers/spi/spi-sirf.c
+++ b/drivers/spi/spi-sirf.c
@@ -660,7 +660,7 @@ static const struct of_device_id spi_sirfsoc_of_match[] = {
 	{ .compatible = "sirf,marco-spi", },
 	{}
 };
-MODULE_DEVICE_TABLE(of, sirfsoc_spi_of_match);
+MODULE_DEVICE_TABLE(of, spi_sirfsoc_of_match);
 
 static struct platform_driver spi_sirfsoc_driver = {
 	.driver = {
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
new file mode 100644
index 000000000000..598eb45e8008
--- /dev/null
+++ b/drivers/spi/spi-tegra114.c
@@ -0,0 +1,1246 @@
+/*
+ * SPI driver for NVIDIA's Tegra114 SPI Controller.
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk/tegra.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+
+#define SPI_COMMAND1				0x000
+#define SPI_BIT_LENGTH(x)			(((x) & 0x1f) << 0)
+#define SPI_PACKED				(1 << 5)
+#define SPI_TX_EN				(1 << 11)
+#define SPI_RX_EN				(1 << 12)
+#define SPI_BOTH_EN_BYTE			(1 << 13)
+#define SPI_BOTH_EN_BIT				(1 << 14)
+#define SPI_LSBYTE_FE				(1 << 15)
+#define SPI_LSBIT_FE				(1 << 16)
+#define SPI_BIDIROE				(1 << 17)
+#define SPI_IDLE_SDA_DRIVE_LOW			(0 << 18)
+#define SPI_IDLE_SDA_DRIVE_HIGH			(1 << 18)
+#define SPI_IDLE_SDA_PULL_LOW			(2 << 18)
+#define SPI_IDLE_SDA_PULL_HIGH			(3 << 18)
+#define SPI_IDLE_SDA_MASK			(3 << 18)
+#define SPI_CS_SS_VAL				(1 << 20)
+#define SPI_CS_SW_HW				(1 << 21)
+/* SPI_CS_POL_INACTIVE bits are default high */
+#define SPI_CS_POL_INACTIVE			22
+#define SPI_CS_POL_INACTIVE_0			(1 << 22)
+#define SPI_CS_POL_INACTIVE_1			(1 << 23)
+#define SPI_CS_POL_INACTIVE_2			(1 << 24)
+#define SPI_CS_POL_INACTIVE_3			(1 << 25)
+#define SPI_CS_POL_INACTIVE_MASK		(0xF << 22)
+
+#define SPI_CS_SEL_0				(0 << 26)
+#define SPI_CS_SEL_1				(1 << 26)
+#define SPI_CS_SEL_2				(2 << 26)
+#define SPI_CS_SEL_3				(3 << 26)
+#define SPI_CS_SEL_MASK				(3 << 26)
+#define SPI_CS_SEL(x)				(((x) & 0x3) << 26)
+#define SPI_CONTROL_MODE_0			(0 << 28)
+#define SPI_CONTROL_MODE_1			(1 << 28)
+#define SPI_CONTROL_MODE_2			(2 << 28)
+#define SPI_CONTROL_MODE_3			(3 << 28)
+#define SPI_CONTROL_MODE_MASK			(3 << 28)
+#define SPI_MODE_SEL(x)				(((x) & 0x3) << 28)
+#define SPI_M_S					(1 << 30)
+#define SPI_PIO					(1 << 31)
+
+#define SPI_COMMAND2				0x004
+#define SPI_TX_TAP_DELAY(x)			(((x) & 0x3F) << 6)
+#define SPI_RX_TAP_DELAY(x)			(((x) & 0x3F) << 0)
+
+#define SPI_CS_TIMING1				0x008
+#define SPI_SETUP_HOLD(setup, hold)		(((setup) << 4) | (hold))
+#define SPI_CS_SETUP_HOLD(reg, cs, val)			\
+		((((val) & 0xFFu) << ((cs) * 8)) |	\
+		((reg) & ~(0xFFu << ((cs) * 8))))
+
+#define SPI_CS_TIMING2				0x00C
+#define CYCLES_BETWEEN_PACKETS_0(x)		(((x) & 0x1F) << 0)
+#define CS_ACTIVE_BETWEEN_PACKETS_0		(1 << 5)
+#define CYCLES_BETWEEN_PACKETS_1(x)		(((x) & 0x1F) << 8)
+#define CS_ACTIVE_BETWEEN_PACKETS_1		(1 << 13)
+#define CYCLES_BETWEEN_PACKETS_2(x)		(((x) & 0x1F) << 16)
+#define CS_ACTIVE_BETWEEN_PACKETS_2		(1 << 21)
+#define CYCLES_BETWEEN_PACKETS_3(x)		(((x) & 0x1F) << 24)
+#define CS_ACTIVE_BETWEEN_PACKETS_3		(1 << 29)
+#define SPI_SET_CS_ACTIVE_BETWEEN_PACKETS(reg, cs, val)		\
+		(reg = (((val) & 0x1) << ((cs) * 8 + 5)) |	\
+			((reg) & ~(1 << ((cs) * 8 + 5))))
+#define SPI_SET_CYCLES_BETWEEN_PACKETS(reg, cs, val)		\
+		(reg = (((val) & 0xF) << ((cs) * 8)) |		\
+			((reg) & ~(0xF << ((cs) * 8))))
+
+#define SPI_TRANS_STATUS			0x010
+#define SPI_BLK_CNT(val)			(((val) >> 0) & 0xFFFF)
+#define SPI_SLV_IDLE_COUNT(val)			(((val) >> 16) & 0xFF)
+#define SPI_RDY					(1 << 30)
+
+#define SPI_FIFO_STATUS				0x014
+#define SPI_RX_FIFO_EMPTY			(1 << 0)
+#define SPI_RX_FIFO_FULL			(1 << 1)
+#define SPI_TX_FIFO_EMPTY			(1 << 2)
+#define SPI_TX_FIFO_FULL			(1 << 3)
+#define SPI_RX_FIFO_UNF				(1 << 4)
+#define SPI_RX_FIFO_OVF				(1 << 5)
+#define SPI_TX_FIFO_UNF				(1 << 6)
+#define SPI_TX_FIFO_OVF				(1 << 7)
+#define SPI_ERR					(1 << 8)
+#define SPI_TX_FIFO_FLUSH			(1 << 14)
+#define SPI_RX_FIFO_FLUSH			(1 << 15)
+#define SPI_TX_FIFO_EMPTY_COUNT(val)		(((val) >> 16) & 0x7F)
+#define SPI_RX_FIFO_FULL_COUNT(val)		(((val) >> 23) & 0x7F)
+#define SPI_FRAME_END				(1 << 30)
+#define SPI_CS_INACTIVE				(1 << 31)
+
+#define SPI_FIFO_ERROR				(SPI_RX_FIFO_UNF | \
+			SPI_RX_FIFO_OVF | SPI_TX_FIFO_UNF | SPI_TX_FIFO_OVF)
+#define SPI_FIFO_EMPTY			(SPI_RX_FIFO_EMPTY | SPI_TX_FIFO_EMPTY)
+
+#define SPI_TX_DATA				0x018
+#define SPI_RX_DATA				0x01C
+
+#define SPI_DMA_CTL				0x020
+#define SPI_TX_TRIG_1				(0 << 15)
+#define SPI_TX_TRIG_4				(1 << 15)
+#define SPI_TX_TRIG_8				(2 << 15)
+#define SPI_TX_TRIG_16				(3 << 15)
+#define SPI_TX_TRIG_MASK			(3 << 15)
+#define SPI_RX_TRIG_1				(0 << 19)
+#define SPI_RX_TRIG_4				(1 << 19)
+#define SPI_RX_TRIG_8				(2 << 19)
+#define SPI_RX_TRIG_16				(3 << 19)
+#define SPI_RX_TRIG_MASK			(3 << 19)
+#define SPI_IE_TX				(1 << 28)
+#define SPI_IE_RX				(1 << 29)
+#define SPI_CONT				(1 << 30)
+#define SPI_DMA					(1 << 31)
+#define SPI_DMA_EN				SPI_DMA
+
+#define SPI_DMA_BLK				0x024
+#define SPI_DMA_BLK_SET(x)			(((x) & 0xFFFF) << 0)
+
+#define SPI_TX_FIFO				0x108
+#define SPI_RX_FIFO				0x188
+#define MAX_CHIP_SELECT				4
+#define SPI_FIFO_DEPTH				64
+#define DATA_DIR_TX				(1 << 0)
+#define DATA_DIR_RX				(1 << 1)
+
+#define SPI_DMA_TIMEOUT				(msecs_to_jiffies(1000))
+#define DEFAULT_SPI_DMA_BUF_LEN			(16*1024)
+#define TX_FIFO_EMPTY_COUNT_MAX			SPI_TX_FIFO_EMPTY_COUNT(0x40)
+#define RX_FIFO_FULL_COUNT_ZERO			SPI_RX_FIFO_FULL_COUNT(0)
+#define MAX_HOLD_CYCLES				16
+#define SPI_DEFAULT_SPEED			25000000
+
+#define MAX_CHIP_SELECT				4
+#define SPI_FIFO_DEPTH				64
+
+struct tegra_spi_data {
+	struct device				*dev;
+	struct spi_master			*master;
+	spinlock_t				lock;
+
+	struct clk				*clk;
+	void __iomem				*base;
+	phys_addr_t				phys;
+	unsigned				irq;
+	int					dma_req_sel;
+	u32					spi_max_frequency;
+	u32					cur_speed;
+
+	struct spi_device			*cur_spi;
+	unsigned				cur_pos;
+	unsigned				cur_len;
+	unsigned				words_per_32bit;
+	unsigned				bytes_per_word;
+	unsigned				curr_dma_words;
+	unsigned				cur_direction;
+
+	unsigned				cur_rx_pos;
+	unsigned				cur_tx_pos;
+
+	unsigned				dma_buf_size;
+	unsigned				max_buf_size;
+	bool					is_curr_dma_xfer;
+
+	struct completion			rx_dma_complete;
+	struct completion			tx_dma_complete;
+
+	u32					tx_status;
+	u32					rx_status;
+	u32					status_reg;
+	bool					is_packed;
+	unsigned long				packed_size;
+
+	u32					command1_reg;
+	u32					dma_control_reg;
+	u32					def_command1_reg;
+	u32					spi_cs_timing;
+
+	struct completion			xfer_completion;
+	struct spi_transfer			*curr_xfer;
+	struct dma_chan				*rx_dma_chan;
+	u32					*rx_dma_buf;
+	dma_addr_t				rx_dma_phys;
+	struct dma_async_tx_descriptor		*rx_dma_desc;
+
+	struct dma_chan				*tx_dma_chan;
+	u32					*tx_dma_buf;
+	dma_addr_t				tx_dma_phys;
+	struct dma_async_tx_descriptor		*tx_dma_desc;
+};
+
+static int tegra_spi_runtime_suspend(struct device *dev);
+static int tegra_spi_runtime_resume(struct device *dev);
+
+static inline unsigned long tegra_spi_readl(struct tegra_spi_data *tspi,
+		unsigned long reg)
+{
+	return readl(tspi->base + reg);
+}
+
+static inline void tegra_spi_writel(struct tegra_spi_data *tspi,
+		unsigned long val, unsigned long reg)
+{
+	writel(val, tspi->base + reg);
+
+	/* Read back register to make sure that register writes completed */
+	if (reg != SPI_TX_FIFO)
+		readl(tspi->base + SPI_COMMAND1);
+}
+
+static void tegra_spi_clear_status(struct tegra_spi_data *tspi)
+{
+	unsigned long val;
+
+	/* Write 1 to clear status register */
+	val = tegra_spi_readl(tspi, SPI_TRANS_STATUS);
+	tegra_spi_writel(tspi, val, SPI_TRANS_STATUS);
+
+	/* Clear fifo status error if any */
+	val = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	if (val & SPI_ERR)
+		tegra_spi_writel(tspi, SPI_ERR | SPI_FIFO_ERROR,
+				SPI_FIFO_STATUS);
+}
+
+static unsigned tegra_spi_calculate_curr_xfer_param(
+	struct spi_device *spi, struct tegra_spi_data *tspi,
+	struct spi_transfer *t)
+{
+	unsigned remain_len = t->len - tspi->cur_pos;
+	unsigned max_word;
+	unsigned bits_per_word = t->bits_per_word;
+	unsigned max_len;
+	unsigned total_fifo_words;
+
+	tspi->bytes_per_word = (bits_per_word - 1) / 8 + 1;
+
+	if (bits_per_word == 8 || bits_per_word == 16) {
+		tspi->is_packed = 1;
+		tspi->words_per_32bit = 32/bits_per_word;
+	} else {
+		tspi->is_packed = 0;
+		tspi->words_per_32bit = 1;
+	}
+
+	if (tspi->is_packed) {
+		max_len = min(remain_len, tspi->max_buf_size);
+		tspi->curr_dma_words = max_len/tspi->bytes_per_word;
+		total_fifo_words = (max_len + 3) / 4;
+	} else {
+		max_word = (remain_len - 1) / tspi->bytes_per_word + 1;
+		max_word = min(max_word, tspi->max_buf_size/4);
+		tspi->curr_dma_words = max_word;
+		total_fifo_words = max_word;
+	}
+	return total_fifo_words;
+}
+
+static unsigned tegra_spi_fill_tx_fifo_from_client_txbuf(
+	struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned nbytes;
+	unsigned tx_empty_count;
+	unsigned long fifo_status;
+	unsigned max_n_32bit;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int written_words;
+	unsigned fifo_words_left;
+	u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+
+	fifo_status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	tx_empty_count = SPI_TX_FIFO_EMPTY_COUNT(fifo_status);
+
+	if (tspi->is_packed) {
+		fifo_words_left = tx_empty_count * tspi->words_per_32bit;
+		written_words = min(fifo_words_left, tspi->curr_dma_words);
+		nbytes = written_words * tspi->bytes_per_word;
+		max_n_32bit = DIV_ROUND_UP(nbytes, 4);
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; (i < 4) && nbytes; i++, nbytes--)
+				x |= (*tx_buf++) << (i*8);
+			tegra_spi_writel(tspi, x, SPI_TX_FIFO);
+		}
+	} else {
+		max_n_32bit = min(tspi->curr_dma_words,  tx_empty_count);
+		written_words = max_n_32bit;
+		nbytes = written_words * tspi->bytes_per_word;
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; nbytes && (i < tspi->bytes_per_word);
+							i++, nbytes--)
+				x |= ((*tx_buf++) << i*8);
+			tegra_spi_writel(tspi, x, SPI_TX_FIFO);
+		}
+	}
+	tspi->cur_tx_pos += written_words * tspi->bytes_per_word;
+	return written_words;
+}
+
+static unsigned int tegra_spi_read_rx_fifo_to_client_rxbuf(
+		struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned rx_full_count;
+	unsigned long fifo_status;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int read_words = 0;
+	unsigned len;
+	u8 *rx_buf = (u8 *)t->rx_buf + tspi->cur_rx_pos;
+
+	fifo_status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	rx_full_count = SPI_RX_FIFO_FULL_COUNT(fifo_status);
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_spi_readl(tspi, SPI_RX_FIFO);
+			for (i = 0; len && (i < 4); i++, len--)
+				*rx_buf++ = (x >> i*8) & 0xFF;
+		}
+		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+		read_words += tspi->curr_dma_words;
+	} else {
+		unsigned int rx_mask;
+		unsigned int bits_per_word = t->bits_per_word;
+
+		rx_mask = (1 << bits_per_word) - 1;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_spi_readl(tspi, SPI_RX_FIFO);
+			x &= rx_mask;
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+		tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word;
+		read_words += rx_full_count;
+	}
+	return read_words;
+}
+
+static void tegra_spi_copy_client_txbuf_to_spi_txbuf(
+		struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+		unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word;
+		unsigned int x;
+
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = 0;
+			for (i = 0; consume && (i < tspi->bytes_per_word);
+							i++, consume--)
+				x |= ((*tx_buf++) << i * 8);
+			tspi->tx_dma_buf[count] = x;
+		}
+	}
+	tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+}
+
+static void tegra_spi_copy_spi_rxbuf_to_client_rxbuf(
+		struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos;
+		unsigned int x;
+		unsigned int rx_mask;
+		unsigned int bits_per_word = t->bits_per_word;
+
+		rx_mask = (1 << bits_per_word) - 1;
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = tspi->rx_dma_buf[count];
+			x &= rx_mask;
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+	}
+	tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+}
+
+static void tegra_spi_dma_complete(void *args)
+{
+	struct completion *dma_complete = args;
+
+	complete(dma_complete);
+}
+
+static int tegra_spi_start_tx_dma(struct tegra_spi_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->tx_dma_complete);
+	tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan,
+				tspi->tx_dma_phys, len, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->tx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Tx\n");
+		return -EIO;
+	}
+
+	tspi->tx_dma_desc->callback = tegra_spi_dma_complete;
+	tspi->tx_dma_desc->callback_param = &tspi->tx_dma_complete;
+
+	dmaengine_submit(tspi->tx_dma_desc);
+	dma_async_issue_pending(tspi->tx_dma_chan);
+	return 0;
+}
+
+static int tegra_spi_start_rx_dma(struct tegra_spi_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->rx_dma_complete);
+	tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan,
+				tspi->rx_dma_phys, len, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->rx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Rx\n");
+		return -EIO;
+	}
+
+	tspi->rx_dma_desc->callback = tegra_spi_dma_complete;
+	tspi->rx_dma_desc->callback_param = &tspi->rx_dma_complete;
+
+	dmaengine_submit(tspi->rx_dma_desc);
+	dma_async_issue_pending(tspi->rx_dma_chan);
+	return 0;
+}
+
+static int tegra_spi_start_dma_based_transfer(
+		struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned int len;
+	int ret = 0;
+	unsigned long status;
+
+	/* Make sure that Rx and Tx fifo are empty */
+	status = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) {
+		dev_err(tspi->dev,
+			"Rx/Tx fifo are not empty status 0x%08lx\n", status);
+		return -EIO;
+	}
+
+	val = SPI_DMA_BLK_SET(tspi->curr_dma_words - 1);
+	tegra_spi_writel(tspi, val, SPI_DMA_BLK);
+
+	if (tspi->is_packed)
+		len = DIV_ROUND_UP(tspi->curr_dma_words * tspi->bytes_per_word,
+					4) * 4;
+	else
+		len = tspi->curr_dma_words * 4;
+
+	/* Set attention level based on length of transfer */
+	if (len & 0xF)
+		val |= SPI_TX_TRIG_1 | SPI_RX_TRIG_1;
+	else if (((len) >> 4) & 0x1)
+		val |= SPI_TX_TRIG_4 | SPI_RX_TRIG_4;
+	else
+		val |= SPI_TX_TRIG_8 | SPI_RX_TRIG_8;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SPI_IE_TX;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SPI_IE_RX;
+
+	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		tegra_spi_copy_client_txbuf_to_spi_txbuf(tspi, t);
+		ret = tegra_spi_start_tx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting tx dma failed, err %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		/* Make the dma buffer to read by dma */
+		dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+				tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+		ret = tegra_spi_start_rx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting rx dma failed, err %d\n", ret);
+			if (tspi->cur_direction & DATA_DIR_TX)
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+			return ret;
+		}
+	}
+	tspi->is_curr_dma_xfer = true;
+	tspi->dma_control_reg = val;
+
+	val |= SPI_DMA_EN;
+	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
+	return ret;
+}
+
+static int tegra_spi_start_cpu_based_transfer(
+		struct tegra_spi_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned cur_words;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		cur_words = tegra_spi_fill_tx_fifo_from_client_txbuf(tspi, t);
+	else
+		cur_words = tspi->curr_dma_words;
+
+	val = SPI_DMA_BLK_SET(cur_words - 1);
+	tegra_spi_writel(tspi, val, SPI_DMA_BLK);
+
+	val = 0;
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SPI_IE_TX;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SPI_IE_RX;
+
+	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	tspi->is_curr_dma_xfer = false;
+
+	val |= SPI_DMA_EN;
+	tegra_spi_writel(tspi, val, SPI_DMA_CTL);
+	return 0;
+}
+
+static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi,
+			bool dma_to_memory)
+{
+	struct dma_chan *dma_chan;
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	int ret;
+	struct dma_slave_config dma_sconfig;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_chan = dma_request_channel(mask, NULL, NULL);
+	if (!dma_chan) {
+		dev_err(tspi->dev,
+			"Dma channel is not available, will try later\n");
+		return -EPROBE_DEFER;
+	}
+
+	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
+				&dma_phys, GFP_KERNEL);
+	if (!dma_buf) {
+		dev_err(tspi->dev, " Not able to allocate the dma buffer\n");
+		dma_release_channel(dma_chan);
+		return -ENOMEM;
+	}
+
+	dma_sconfig.slave_id = tspi->dma_req_sel;
+	if (dma_to_memory) {
+		dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO;
+		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.src_maxburst = 0;
+	} else {
+		dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO;
+		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.dst_maxburst = 0;
+	}
+
+	ret = dmaengine_slave_config(dma_chan, &dma_sconfig);
+	if (ret)
+		goto scrub;
+	if (dma_to_memory) {
+		tspi->rx_dma_chan = dma_chan;
+		tspi->rx_dma_buf = dma_buf;
+		tspi->rx_dma_phys = dma_phys;
+	} else {
+		tspi->tx_dma_chan = dma_chan;
+		tspi->tx_dma_buf = dma_buf;
+		tspi->tx_dma_phys = dma_phys;
+	}
+	return 0;
+
+scrub:
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+	return ret;
+}
+
+static void tegra_spi_deinit_dma_param(struct tegra_spi_data *tspi,
+	bool dma_to_memory)
+{
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	struct dma_chan *dma_chan;
+
+	if (dma_to_memory) {
+		dma_buf = tspi->rx_dma_buf;
+		dma_chan = tspi->rx_dma_chan;
+		dma_phys = tspi->rx_dma_phys;
+		tspi->rx_dma_chan = NULL;
+		tspi->rx_dma_buf = NULL;
+	} else {
+		dma_buf = tspi->tx_dma_buf;
+		dma_chan = tspi->tx_dma_chan;
+		dma_phys = tspi->tx_dma_phys;
+		tspi->tx_dma_buf = NULL;
+		tspi->tx_dma_chan = NULL;
+	}
+	if (!dma_chan)
+		return;
+
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+}
+
+static int tegra_spi_start_transfer_one(struct spi_device *spi,
+		struct spi_transfer *t, bool is_first_of_msg,
+		bool is_single_xfer)
+{
+	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	u32 speed = t->speed_hz;
+	u8 bits_per_word = t->bits_per_word;
+	unsigned total_fifo_words;
+	int ret;
+	unsigned long command1;
+	int req_mode;
+
+	if (speed != tspi->cur_speed) {
+		clk_set_rate(tspi->clk, speed);
+		tspi->cur_speed = speed;
+	}
+
+	tspi->cur_spi = spi;
+	tspi->cur_pos = 0;
+	tspi->cur_rx_pos = 0;
+	tspi->cur_tx_pos = 0;
+	tspi->curr_xfer = t;
+	total_fifo_words = tegra_spi_calculate_curr_xfer_param(spi, tspi, t);
+
+	if (is_first_of_msg) {
+		tegra_spi_clear_status(tspi);
+
+		command1 = tspi->def_command1_reg;
+		command1 |= SPI_BIT_LENGTH(bits_per_word - 1);
+
+		command1 &= ~SPI_CONTROL_MODE_MASK;
+		req_mode = spi->mode & 0x3;
+		if (req_mode == SPI_MODE_0)
+			command1 |= SPI_CONTROL_MODE_0;
+		else if (req_mode == SPI_MODE_1)
+			command1 |= SPI_CONTROL_MODE_1;
+		else if (req_mode == SPI_MODE_2)
+			command1 |= SPI_CONTROL_MODE_2;
+		else if (req_mode == SPI_MODE_3)
+			command1 |= SPI_CONTROL_MODE_3;
+
+		tegra_spi_writel(tspi, command1, SPI_COMMAND1);
+
+		command1 |= SPI_CS_SW_HW;
+		if (spi->mode & SPI_CS_HIGH)
+			command1 |= SPI_CS_SS_VAL;
+		else
+			command1 &= ~SPI_CS_SS_VAL;
+
+		tegra_spi_writel(tspi, 0, SPI_COMMAND2);
+	} else {
+		command1 = tspi->command1_reg;
+		command1 &= ~SPI_BIT_LENGTH(~0);
+		command1 |= SPI_BIT_LENGTH(bits_per_word - 1);
+	}
+
+	if (tspi->is_packed)
+		command1 |= SPI_PACKED;
+
+	command1 &= ~(SPI_CS_SEL_MASK | SPI_TX_EN | SPI_RX_EN);
+	tspi->cur_direction = 0;
+	if (t->rx_buf) {
+		command1 |= SPI_RX_EN;
+		tspi->cur_direction |= DATA_DIR_RX;
+	}
+	if (t->tx_buf) {
+		command1 |= SPI_TX_EN;
+		tspi->cur_direction |= DATA_DIR_TX;
+	}
+	command1 |= SPI_CS_SEL(spi->chip_select);
+	tegra_spi_writel(tspi, command1, SPI_COMMAND1);
+	tspi->command1_reg = command1;
+
+	dev_dbg(tspi->dev, "The def 0x%x and written 0x%lx\n",
+				tspi->def_command1_reg, command1);
+
+	if (total_fifo_words > SPI_FIFO_DEPTH)
+		ret = tegra_spi_start_dma_based_transfer(tspi, t);
+	else
+		ret = tegra_spi_start_cpu_based_transfer(tspi, t);
+	return ret;
+}
+
+static int tegra_spi_setup(struct spi_device *spi)
+{
+	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	unsigned long val;
+	unsigned long flags;
+	int ret;
+	unsigned int cs_pol_bit[MAX_CHIP_SELECT] = {
+			SPI_CS_POL_INACTIVE_0,
+			SPI_CS_POL_INACTIVE_1,
+			SPI_CS_POL_INACTIVE_2,
+			SPI_CS_POL_INACTIVE_3,
+	};
+
+	dev_dbg(&spi->dev, "setup %d bpw, %scpol, %scpha, %dHz\n",
+		spi->bits_per_word,
+		spi->mode & SPI_CPOL ? "" : "~",
+		spi->mode & SPI_CPHA ? "" : "~",
+		spi->max_speed_hz);
+
+	BUG_ON(spi->chip_select >= MAX_CHIP_SELECT);
+
+	/* Set speed to the spi max fequency if spi device has not set */
+	spi->max_speed_hz = spi->max_speed_hz ? : tspi->spi_max_frequency;
+
+	ret = pm_runtime_get_sync(tspi->dev);
+	if (ret < 0) {
+		dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	val = tspi->def_command1_reg;
+	if (spi->mode & SPI_CS_HIGH)
+		val &= ~cs_pol_bit[spi->chip_select];
+	else
+		val |= cs_pol_bit[spi->chip_select];
+	tspi->def_command1_reg = val;
+	tegra_spi_writel(tspi, tspi->def_command1_reg, SPI_COMMAND1);
+	spin_unlock_irqrestore(&tspi->lock, flags);
+
+	pm_runtime_put(tspi->dev);
+	return 0;
+}
+
+static int tegra_spi_transfer_one_message(struct spi_master *master,
+			struct spi_message *msg)
+{
+	bool is_first_msg = true;
+	int single_xfer;
+	struct tegra_spi_data *tspi = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+	struct spi_device *spi = msg->spi;
+	int ret;
+
+	msg->status = 0;
+	msg->actual_length = 0;
+
+	ret = pm_runtime_get_sync(tspi->dev);
+	if (ret < 0) {
+		dev_err(tspi->dev, "runtime PM get failed: %d\n", ret);
+		msg->status = ret;
+		spi_finalize_current_message(master);
+		return ret;
+	}
+
+	single_xfer = list_is_singular(&msg->transfers);
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		INIT_COMPLETION(tspi->xfer_completion);
+		ret = tegra_spi_start_transfer_one(spi, xfer,
+					is_first_msg, single_xfer);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"spi can not start transfer, err %d\n", ret);
+			goto exit;
+		}
+		is_first_msg = false;
+		ret = wait_for_completion_timeout(&tspi->xfer_completion,
+						SPI_DMA_TIMEOUT);
+		if (WARN_ON(ret == 0)) {
+			dev_err(tspi->dev,
+				"spi trasfer timeout, err %d\n", ret);
+			ret = -EIO;
+			goto exit;
+		}
+
+		if (tspi->tx_status ||  tspi->rx_status) {
+			dev_err(tspi->dev, "Error in Transfer\n");
+			ret = -EIO;
+			goto exit;
+		}
+		msg->actual_length += xfer->len;
+		if (xfer->cs_change && xfer->delay_usecs) {
+			tegra_spi_writel(tspi, tspi->def_command1_reg,
+					SPI_COMMAND1);
+			udelay(xfer->delay_usecs);
+		}
+	}
+	ret = 0;
+exit:
+	tegra_spi_writel(tspi, tspi->def_command1_reg, SPI_COMMAND1);
+	pm_runtime_put(tspi->dev);
+	msg->status = ret;
+	spi_finalize_current_message(master);
+	return ret;
+}
+
+static irqreturn_t handle_cpu_based_xfer(struct tegra_spi_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (tspi->tx_status ||  tspi->rx_status) {
+		dev_err(tspi->dev, "CpuXfer ERROR bit set 0x%x\n",
+			tspi->status_reg);
+		dev_err(tspi->dev, "CpuXfer 0x%08x:0x%08x\n",
+			tspi->command1_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_spi_read_rx_fifo_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	tegra_spi_calculate_curr_xfer_param(tspi->cur_spi, tspi, t);
+	tegra_spi_start_cpu_based_transfer(tspi, t);
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t handle_dma_based_xfer(struct tegra_spi_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	long wait_status;
+	int err = 0;
+	unsigned total_fifo_words;
+	unsigned long flags;
+
+	/* Abort dmas if any error */
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		if (tspi->tx_status) {
+			dmaengine_terminate_all(tspi->tx_dma_chan);
+			err += 1;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->tx_dma_complete, SPI_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+				dev_err(tspi->dev, "TxDma Xfer failed\n");
+				err += 1;
+			}
+		}
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		if (tspi->rx_status) {
+			dmaengine_terminate_all(tspi->rx_dma_chan);
+			err += 2;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->rx_dma_complete, SPI_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->rx_dma_chan);
+				dev_err(tspi->dev, "RxDma Xfer failed\n");
+				err += 2;
+			}
+		}
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (err) {
+		dev_err(tspi->dev, "DmaXfer: ERROR bit set 0x%x\n",
+			tspi->status_reg);
+		dev_err(tspi->dev, "DmaXfer 0x%08x:0x%08x\n",
+			tspi->command1_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		spin_unlock_irqrestore(&tspi->lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_spi_copy_spi_rxbuf_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	/* Continue transfer in current message */
+	total_fifo_words = tegra_spi_calculate_curr_xfer_param(tspi->cur_spi,
+							tspi, t);
+	if (total_fifo_words > SPI_FIFO_DEPTH)
+		err = tegra_spi_start_dma_based_transfer(tspi, t);
+	else
+		err = tegra_spi_start_cpu_based_transfer(tspi, t);
+
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_spi_isr_thread(int irq, void *context_data)
+{
+	struct tegra_spi_data *tspi = context_data;
+
+	if (!tspi->is_curr_dma_xfer)
+		return handle_cpu_based_xfer(tspi);
+	return handle_dma_based_xfer(tspi);
+}
+
+static irqreturn_t tegra_spi_isr(int irq, void *context_data)
+{
+	struct tegra_spi_data *tspi = context_data;
+
+	tspi->status_reg = tegra_spi_readl(tspi, SPI_FIFO_STATUS);
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->tx_status = tspi->status_reg &
+					(SPI_TX_FIFO_UNF | SPI_TX_FIFO_OVF);
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tspi->rx_status = tspi->status_reg &
+					(SPI_RX_FIFO_OVF | SPI_RX_FIFO_UNF);
+	tegra_spi_clear_status(tspi);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static void tegra_spi_parse_dt(struct platform_device *pdev,
+	struct tegra_spi_data *tspi)
+{
+	struct device_node *np = pdev->dev.of_node;
+	u32 of_dma[2];
+
+	if (of_property_read_u32_array(np, "nvidia,dma-request-selector",
+				of_dma, 2) >= 0)
+		tspi->dma_req_sel = of_dma[1];
+
+	if (of_property_read_u32(np, "spi-max-frequency",
+				&tspi->spi_max_frequency))
+		tspi->spi_max_frequency = 25000000; /* 25MHz */
+}
+
+static struct of_device_id tegra_spi_of_match[] = {
+	{ .compatible = "nvidia,tegra114-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, tegra_spi_of_match);
+
+static int tegra_spi_probe(struct platform_device *pdev)
+{
+	struct spi_master	*master;
+	struct tegra_spi_data	*tspi;
+	struct resource		*r;
+	int ret, spi_irq;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
+	if (!master) {
+		dev_err(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+	dev_set_drvdata(&pdev->dev, master);
+	tspi = spi_master_get_devdata(master);
+
+	/* Parse DT */
+	tegra_spi_parse_dt(pdev, tspi);
+
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->setup = tegra_spi_setup;
+	master->transfer_one_message = tegra_spi_transfer_one_message;
+	master->num_chipselect = MAX_CHIP_SELECT;
+	master->bus_num = -1;
+
+	tspi->master = master;
+	tspi->dev = &pdev->dev;
+	spin_lock_init(&tspi->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "No IO memory resource\n");
+		ret = -ENODEV;
+		goto exit_free_master;
+	}
+	tspi->phys = r->start;
+	tspi->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(tspi->base)) {
+		ret = PTR_ERR(tspi->base);
+		dev_err(&pdev->dev, "ioremap failed: err = %d\n", ret);
+		goto exit_free_master;
+	}
+
+	spi_irq = platform_get_irq(pdev, 0);
+	tspi->irq = spi_irq;
+	ret = request_threaded_irq(tspi->irq, tegra_spi_isr,
+			tegra_spi_isr_thread, IRQF_ONESHOT,
+			dev_name(&pdev->dev), tspi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+					tspi->irq);
+		goto exit_free_master;
+	}
+
+	tspi->clk = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(tspi->clk)) {
+		dev_err(&pdev->dev, "can not get clock\n");
+		ret = PTR_ERR(tspi->clk);
+		goto exit_free_irq;
+	}
+
+	tspi->max_buf_size = SPI_FIFO_DEPTH << 2;
+	tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN;
+
+	if (tspi->dma_req_sel) {
+		ret = tegra_spi_init_dma_param(tspi, true);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "RxDma Init failed, err %d\n", ret);
+			goto exit_free_irq;
+		}
+
+		ret = tegra_spi_init_dma_param(tspi, false);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "TxDma Init failed, err %d\n", ret);
+			goto exit_rx_dma_free;
+		}
+		tspi->max_buf_size = tspi->dma_buf_size;
+		init_completion(&tspi->tx_dma_complete);
+		init_completion(&tspi->rx_dma_complete);
+	}
+
+	init_completion(&tspi->xfer_completion);
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev)) {
+		ret = tegra_spi_runtime_resume(&pdev->dev);
+		if (ret)
+			goto exit_pm_disable;
+	}
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		goto exit_pm_disable;
+	}
+	tspi->def_command1_reg  = SPI_M_S;
+	tegra_spi_writel(tspi, tspi->def_command1_reg, SPI_COMMAND1);
+	pm_runtime_put(&pdev->dev);
+
+	master->dev.of_node = pdev->dev.of_node;
+	ret = spi_register_master(master);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "can not register to master err %d\n", ret);
+		goto exit_pm_disable;
+	}
+	return ret;
+
+exit_pm_disable:
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_spi_runtime_suspend(&pdev->dev);
+	tegra_spi_deinit_dma_param(tspi, false);
+exit_rx_dma_free:
+	tegra_spi_deinit_dma_param(tspi, true);
+exit_free_irq:
+	free_irq(spi_irq, tspi);
+exit_free_master:
+	spi_master_put(master);
+	return ret;
+}
+
+static int tegra_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = dev_get_drvdata(&pdev->dev);
+	struct tegra_spi_data	*tspi = spi_master_get_devdata(master);
+
+	free_irq(tspi->irq, tspi);
+	spi_unregister_master(master);
+
+	if (tspi->tx_dma_chan)
+		tegra_spi_deinit_dma_param(tspi, false);
+
+	if (tspi->rx_dma_chan)
+		tegra_spi_deinit_dma_param(tspi, true);
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_spi_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_spi_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+
+	return spi_master_suspend(master);
+}
+
+static int tegra_spi_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_spi_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+	tegra_spi_writel(tspi, tspi->command1_reg, SPI_COMMAND1);
+	pm_runtime_put(dev);
+
+	return spi_master_resume(master);
+}
+#endif
+
+static int tegra_spi_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_spi_data *tspi = spi_master_get_devdata(master);
+
+	/* Flush all write which are in PPSB queue by reading back */
+	tegra_spi_readl(tspi, SPI_COMMAND1);
+
+	clk_disable_unprepare(tspi->clk);
+	return 0;
+}
+
+static int tegra_spi_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_spi_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(tspi->clk);
+	if (ret < 0) {
+		dev_err(tspi->dev, "clk_prepare failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_spi_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_spi_runtime_suspend,
+		tegra_spi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_spi_suspend, tegra_spi_resume)
+};
+static struct platform_driver tegra_spi_driver = {
+	.driver = {
+		.name		= "spi-tegra114",
+		.owner		= THIS_MODULE,
+		.pm		= &tegra_spi_pm_ops,
+		.of_match_table	= tegra_spi_of_match,
+	},
+	.probe =	tegra_spi_probe,
+	.remove =	tegra_spi_remove,
+};
+module_platform_driver(tegra_spi_driver);
+
+MODULE_ALIAS("platform:spi-tegra114");
+MODULE_DESCRIPTION("NVIDIA Tegra114 SPI Controller Driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 3d6a12b2af04..d65c000efe35 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -33,7 +33,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/spi-tegra.h>
 #include <linux/clk/tegra.h>
 
 #define SPI_COMMAND				0x000
@@ -439,23 +438,13 @@ static irqreturn_t tegra_sflash_isr(int irq, void *context_data)
 	return handle_cpu_based_xfer(tsd);
 }
 
-static struct tegra_spi_platform_data *tegra_sflash_parse_dt(
-		struct platform_device *pdev)
+static void tegra_sflash_parse_dt(struct tegra_sflash_data *tsd)
 {
-	struct tegra_spi_platform_data *pdata;
-	struct device_node *np = pdev->dev.of_node;
-	u32 max_freq;
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "Memory alloc for pdata failed\n");
-		return NULL;
-	}
-
-	if (!of_property_read_u32(np, "spi-max-frequency", &max_freq))
-		pdata->spi_max_frequency = max_freq;
+	struct device_node *np = tsd->dev->of_node;
 
-	return pdata;
+	if (of_property_read_u32(np, "spi-max-frequency",
+					&tsd->spi_max_frequency))
+		tsd->spi_max_frequency = 25000000; /* 25MHz */
 }
 
 static struct of_device_id tegra_sflash_of_match[] = {
@@ -469,28 +458,15 @@ static int tegra_sflash_probe(struct platform_device *pdev)
 	struct spi_master	*master;
 	struct tegra_sflash_data	*tsd;
 	struct resource		*r;
-	struct tegra_spi_platform_data *pdata = pdev->dev.platform_data;
 	int ret;
 	const struct of_device_id *match;
 
-	match = of_match_device(of_match_ptr(tegra_sflash_of_match),
-					&pdev->dev);
+	match = of_match_device(tegra_sflash_of_match, &pdev->dev);
 	if (!match) {
 		dev_err(&pdev->dev, "Error: No device match found\n");
 		return -ENODEV;
 	}
 
-	if (!pdata && pdev->dev.of_node)
-		pdata = tegra_sflash_parse_dt(pdev);
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "No platform data, exiting\n");
-		return -ENODEV;
-	}
-
-	if (!pdata->spi_max_frequency)
-		pdata->spi_max_frequency = 25000000; /* 25MHz */
-
 	master = spi_alloc_master(&pdev->dev, sizeof(*tsd));
 	if (!master) {
 		dev_err(&pdev->dev, "master allocation failed\n");
@@ -510,6 +486,8 @@ static int tegra_sflash_probe(struct platform_device *pdev)
 	tsd->dev = &pdev->dev;
 	spin_lock_init(&tsd->lock);
 
+	tegra_sflash_parse_dt(tsd);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!r) {
 		dev_err(&pdev->dev, "No IO memory resource\n");
@@ -538,7 +516,6 @@ static int tegra_sflash_probe(struct platform_device *pdev)
 		goto exit_free_irq;
 	}
 
-	tsd->spi_max_frequency = pdata->spi_max_frequency;
 	init_completion(&tsd->xfer_completion);
 	pm_runtime_enable(&pdev->dev);
 	if (!pm_runtime_enabled(&pdev->dev)) {
@@ -658,7 +635,7 @@ static struct platform_driver tegra_sflash_driver = {
 		.name		= "spi-tegra-sflash",
 		.owner		= THIS_MODULE,
 		.pm		= &slink_pm_ops,
-		.of_match_table	= of_match_ptr(tegra_sflash_of_match),
+		.of_match_table	= tegra_sflash_of_match,
 	},
 	.probe =	tegra_sflash_probe,
 	.remove =	tegra_sflash_remove,
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index b8698b389ef3..3faf88d003de 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -34,7 +34,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
-#include <linux/spi/spi-tegra.h>
 #include <linux/clk/tegra.h>
 
 #define SLINK_COMMAND			0x000
@@ -189,7 +188,6 @@ struct tegra_slink_data {
 	unsigned				dma_buf_size;
 	unsigned				max_buf_size;
 	bool					is_curr_dma_xfer;
-	bool					is_hw_based_cs;
 
 	struct completion			rx_dma_complete;
 	struct completion			tx_dma_complete;
@@ -375,9 +373,6 @@ static unsigned int tegra_slink_read_rx_fifo_to_client_rxbuf(
 		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
 		read_words += tspi->curr_dma_words;
 	} else {
-		unsigned int bits_per_word;
-
-		bits_per_word = t->bits_per_word;
 		for (count = 0; count < rx_full_count; count++) {
 			x = tegra_slink_readl(tspi, SLINK_RX_FIFO);
 			for (i = 0; (i < tspi->bytes_per_word); i++)
@@ -720,7 +715,6 @@ static int tegra_slink_start_transfer_one(struct spi_device *spi,
 	u8 bits_per_word;
 	unsigned total_fifo_words;
 	int ret;
-	struct tegra_spi_device_controller_data *cdata = spi->controller_data;
 	unsigned long command;
 	unsigned long command2;
 
@@ -743,39 +737,11 @@ static int tegra_slink_start_transfer_one(struct spi_device *spi,
 
 		command = tspi->def_command_reg;
 		command |= SLINK_BIT_LENGTH(bits_per_word - 1);
+		command |= SLINK_CS_SW | SLINK_CS_VALUE;
 
 		command2 = tspi->def_command2_reg;
 		command2 |= SLINK_SS_EN_CS(spi->chip_select);
 
-		/* possibly use the hw based chip select */
-		tspi->is_hw_based_cs = false;
-		if (cdata && cdata->is_hw_based_cs && is_single_xfer &&
-			((tspi->curr_dma_words * tspi->bytes_per_word) ==
-						(t->len - tspi->cur_pos))) {
-			int setup_count;
-			int sts2;
-
-			setup_count = cdata->cs_setup_clk_count >> 1;
-			setup_count = max(setup_count, 3);
-			command2 |= SLINK_SS_SETUP(setup_count);
-			if (tspi->chip_data->cs_hold_time) {
-				int hold_count;
-
-				hold_count = cdata->cs_hold_clk_count;
-				hold_count = max(hold_count, 0xF);
-				sts2 = tegra_slink_readl(tspi, SLINK_STATUS2);
-				sts2 &= ~SLINK_SS_HOLD_TIME(0xF);
-				sts2 |= SLINK_SS_HOLD_TIME(hold_count);
-				tegra_slink_writel(tspi, sts2, SLINK_STATUS2);
-			}
-			tspi->is_hw_based_cs = true;
-		}
-
-		if (tspi->is_hw_based_cs)
-			command &= ~SLINK_CS_SW;
-		else
-			command |= SLINK_CS_SW | SLINK_CS_VALUE;
-
 		command &= ~SLINK_MODES;
 		if (spi->mode & SPI_CPHA)
 			command |= SLINK_CK_SDA;
@@ -858,21 +824,6 @@ static int tegra_slink_setup(struct spi_device *spi)
 	return 0;
 }
 
-static int tegra_slink_prepare_transfer(struct spi_master *master)
-{
-	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
-
-	return pm_runtime_get_sync(tspi->dev);
-}
-
-static int tegra_slink_unprepare_transfer(struct spi_master *master)
-{
-	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
-
-	pm_runtime_put(tspi->dev);
-	return 0;
-}
-
 static int tegra_slink_transfer_one_message(struct spi_master *master,
 			struct spi_message *msg)
 {
@@ -885,6 +836,12 @@ static int tegra_slink_transfer_one_message(struct spi_master *master,
 
 	msg->status = 0;
 	msg->actual_length = 0;
+	ret = pm_runtime_get_sync(tspi->dev);
+	if (ret < 0) {
+		dev_err(tspi->dev, "runtime get failed: %d\n", ret);
+		goto done;
+	}
+
 	single_xfer = list_is_singular(&msg->transfers);
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
 		INIT_COMPLETION(tspi->xfer_completion);
@@ -921,6 +878,8 @@ static int tegra_slink_transfer_one_message(struct spi_master *master,
 exit:
 	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
 	tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
+	pm_runtime_put(tspi->dev);
+done:
 	msg->status = ret;
 	spi_finalize_current_message(master);
 	return ret;
@@ -1072,36 +1031,25 @@ static irqreturn_t tegra_slink_isr(int irq, void *context_data)
 	return IRQ_WAKE_THREAD;
 }
 
-static struct tegra_spi_platform_data *tegra_slink_parse_dt(
-		struct platform_device *pdev)
+static void tegra_slink_parse_dt(struct tegra_slink_data *tspi)
 {
-	struct tegra_spi_platform_data *pdata;
-	const unsigned int *prop;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = tspi->dev->of_node;
 	u32 of_dma[2];
 
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "Memory alloc for pdata failed\n");
-		return NULL;
-	}
-
 	if (of_property_read_u32_array(np, "nvidia,dma-request-selector",
 				of_dma, 2) >= 0)
-		pdata->dma_req_sel = of_dma[1];
+		tspi->dma_req_sel = of_dma[1];
 
-	prop = of_get_property(np, "spi-max-frequency", NULL);
-	if (prop)
-		pdata->spi_max_frequency = be32_to_cpup(prop);
-
-	return pdata;
+	if (of_property_read_u32(np, "spi-max-frequency",
+					&tspi->spi_max_frequency))
+		tspi->spi_max_frequency = 25000000; /* 25MHz */
 }
 
-const struct tegra_slink_chip_data tegra30_spi_cdata = {
+static const struct tegra_slink_chip_data tegra30_spi_cdata = {
 	.cs_hold_time = true,
 };
 
-const struct tegra_slink_chip_data tegra20_spi_cdata = {
+static const struct tegra_slink_chip_data tegra20_spi_cdata = {
 	.cs_hold_time = false,
 };
 
@@ -1117,27 +1065,16 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	struct spi_master	*master;
 	struct tegra_slink_data	*tspi;
 	struct resource		*r;
-	struct tegra_spi_platform_data *pdata = pdev->dev.platform_data;
 	int ret, spi_irq;
 	const struct tegra_slink_chip_data *cdata = NULL;
 	const struct of_device_id *match;
 
-	match = of_match_device(of_match_ptr(tegra_slink_of_match), &pdev->dev);
+	match = of_match_device(tegra_slink_of_match, &pdev->dev);
 	if (!match) {
 		dev_err(&pdev->dev, "Error: No device match found\n");
 		return -ENODEV;
 	}
 	cdata = match->data;
-	if (!pdata && pdev->dev.of_node)
-		pdata = tegra_slink_parse_dt(pdev);
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "No platform data, exiting\n");
-		return -ENODEV;
-	}
-
-	if (!pdata->spi_max_frequency)
-		pdata->spi_max_frequency = 25000000; /* 25MHz */
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
 	if (!master) {
@@ -1148,20 +1085,19 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->setup = tegra_slink_setup;
-	master->prepare_transfer_hardware = tegra_slink_prepare_transfer;
 	master->transfer_one_message = tegra_slink_transfer_one_message;
-	master->unprepare_transfer_hardware = tegra_slink_unprepare_transfer;
 	master->num_chipselect = MAX_CHIP_SELECT;
 	master->bus_num = -1;
 
 	dev_set_drvdata(&pdev->dev, master);
 	tspi = spi_master_get_devdata(master);
 	tspi->master = master;
-	tspi->dma_req_sel = pdata->dma_req_sel;
 	tspi->dev = &pdev->dev;
 	tspi->chip_data = cdata;
 	spin_lock_init(&tspi->lock);
 
+	tegra_slink_parse_dt(tspi);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!r) {
 		dev_err(&pdev->dev, "No IO memory resource\n");
@@ -1195,9 +1131,8 @@ static int tegra_slink_probe(struct platform_device *pdev)
 
 	tspi->max_buf_size = SLINK_FIFO_DEPTH << 2;
 	tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN;
-	tspi->spi_max_frequency = pdata->spi_max_frequency;
 
-	if (pdata->dma_req_sel) {
+	if (tspi->dma_req_sel) {
 		ret = tegra_slink_init_dma_param(tspi, true);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "RxDma Init failed, err %d\n", ret);
@@ -1340,7 +1275,7 @@ static struct platform_driver tegra_slink_driver = {
 		.name		= "spi-tegra-slink",
 		.owner		= THIS_MODULE,
 		.pm		= &slink_pm_ops,
-		.of_match_table	= of_match_ptr(tegra_slink_of_match),
+		.of_match_table	= tegra_slink_of_match,
 	},
 	.probe =	tegra_slink_probe,
 	.remove =	tegra_slink_remove,
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index f756481b0fea..35f60bd252dd 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -615,7 +615,7 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw)
 	int size;
 	u32 n_writes;
 	int j;
-	struct spi_message *pmsg;
+	struct spi_message *pmsg, *tmp;
 	const u8 *tx_buf;
 	const u16 *tx_sbuf;
 
@@ -656,7 +656,7 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw)
 	if (!data->pkt_rx_buff) {
 		/* flush queue and set status of all transfers to -ENOMEM */
 		dev_err(&data->master->dev, "%s :kzalloc failed\n", __func__);
-		list_for_each_entry(pmsg, data->queue.next, queue) {
+		list_for_each_entry_safe(pmsg, tmp, data->queue.next, queue) {
 			pmsg->status = -ENOMEM;
 
 			if (pmsg->complete != 0)
@@ -703,7 +703,7 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw)
 
 static void pch_spi_nomore_transfer(struct pch_spi_data *data)
 {
-	struct spi_message *pmsg;
+	struct spi_message *pmsg, *tmp;
 	dev_dbg(&data->master->dev, "%s called\n", __func__);
 	/* Invoke complete callback
 	 * [To the spi core..indicating end of transfer] */
@@ -740,7 +740,7 @@ static void pch_spi_nomore_transfer(struct pch_spi_data *data)
 		dev_dbg(&data->master->dev,
 			"%s suspend/remove initiated, flushing queue\n",
 			__func__);
-		list_for_each_entry(pmsg, data->queue.next, queue) {
+		list_for_each_entry_safe(pmsg, tmp, data->queue.next, queue) {
 			pmsg->status = -EIO;
 
 			if (pmsg->complete)
@@ -1187,7 +1187,7 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw)
 
 static void pch_spi_process_messages(struct work_struct *pwork)
 {
-	struct spi_message *pmsg;
+	struct spi_message *pmsg, *tmp;
 	struct pch_spi_data *data;
 	int bpw;
 
@@ -1199,7 +1199,7 @@ static void pch_spi_process_messages(struct work_struct *pwork)
 	if (data->board_dat->suspend_sts || (data->status == STATUS_EXITING)) {
 		dev_dbg(&data->master->dev, "%s suspend/remove initiated,"
 			"flushing queue\n", __func__);
-		list_for_each_entry(pmsg, data->queue.next, queue) {
+		list_for_each_entry_safe(pmsg, tmp, data->queue.next, queue) {
 			pmsg->status = -EIO;
 
 			if (pmsg->complete != 0) {
@@ -1789,8 +1789,10 @@ static int __init pch_spi_init(void)
 		return ret;
 
 	ret = pci_register_driver(&pch_spi_pcidev_driver);
-	if (ret)
+	if (ret) {
+		platform_driver_unregister(&pch_spi_pd_driver);
 		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f996c600eb8c..163fd802b7ac 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -543,17 +543,16 @@ static void spi_pump_messages(struct kthread_work *work)
 	/* Lock queue and check for queue work */
 	spin_lock_irqsave(&master->queue_lock, flags);
 	if (list_empty(&master->queue) || !master->running) {
-		if (master->busy && master->unprepare_transfer_hardware) {
-			ret = master->unprepare_transfer_hardware(master);
-			if (ret) {
-				spin_unlock_irqrestore(&master->queue_lock, flags);
-				dev_err(&master->dev,
-					"failed to unprepare transfer hardware\n");
-				return;
-			}
+		if (!master->busy) {
+			spin_unlock_irqrestore(&master->queue_lock, flags);
+			return;
 		}
 		master->busy = false;
 		spin_unlock_irqrestore(&master->queue_lock, flags);
+		if (master->unprepare_transfer_hardware &&
+		    master->unprepare_transfer_hardware(master))
+			dev_err(&master->dev,
+				"failed to unprepare transfer hardware\n");
 		return;
 	}
 
@@ -984,7 +983,7 @@ static void acpi_register_spi_devices(struct spi_master *master)
 	acpi_status status;
 	acpi_handle handle;
 
-	handle = ACPI_HANDLE(&master->dev);
+	handle = ACPI_HANDLE(master->dev.parent);
 	if (!handle)
 		return;
 
@@ -1377,6 +1376,14 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)
 			xfer->bits_per_word = spi->bits_per_word;
 		if (!xfer->speed_hz)
 			xfer->speed_hz = spi->max_speed_hz;
+		if (master->bits_per_word_mask) {
+			/* Only 32 bits fit in the mask */
+			if (xfer->bits_per_word > 32)
+				return -EINVAL;
+			if (!(master->bits_per_word_mask &
+					BIT(xfer->bits_per_word - 1)))
+				return -EINVAL;
+		}
 	}
 
 	message->spi = spi;
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2e0655dbe070..911e9e0711d2 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -603,7 +603,7 @@ static int spidev_probe(struct spi_device *spi)
 		dev = device_create(spidev_class, &spi->dev, spidev->devt,
 				    spidev, "spidev%d.%d",
 				    spi->master->bus_num, spi->chip_select);
-		status = IS_ERR(dev) ? PTR_ERR(dev) : 0;
+		status = PTR_RET(dev);
 	} else {
 		dev_dbg(&spi->dev, "no minor number available!\n");
 		status = -ENODEV;