summary refs log tree commit diff
path: root/drivers/mmc/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/mmc/host')
-rw-r--r--drivers/mmc/host/Kconfig41
-rw-r--r--drivers/mmc/host/Makefile3
-rw-r--r--drivers/mmc/host/davinci_mmc.c80
-rw-r--r--drivers/mmc/host/dw_mmc.c1796
-rw-r--r--drivers/mmc/host/dw_mmc.h168
-rw-r--r--drivers/mmc/host/mmci.c207
-rw-r--r--drivers/mmc/host/mmci.h9
-rw-r--r--drivers/mmc/host/msm_sdcc.c139
-rw-r--r--drivers/mmc/host/msm_sdcc.h8
-rw-r--r--drivers/mmc/host/mxcmmc.c53
-rw-r--r--drivers/mmc/host/omap.c24
-rw-r--r--drivers/mmc/host/omap_hsmmc.c2
-rw-r--r--drivers/mmc/host/sdhci-dove.c70
-rw-r--r--drivers/mmc/host/sdhci-of-core.c4
-rw-r--r--drivers/mmc/host/sdhci-pci.c161
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c6
-rw-r--r--drivers/mmc/host/sdhci-pltfm.h2
-rw-r--r--drivers/mmc/host/sdhci-s3c.c66
-rw-r--r--drivers/mmc/host/sdhci-tegra.c257
-rw-r--r--drivers/mmc/host/sdhci.c45
-rw-r--r--drivers/mmc/host/sdhci.h3
-rw-r--r--drivers/mmc/host/sh_mmcif.c429
-rw-r--r--drivers/mmc/host/tmio_mmc.c561
-rw-r--r--drivers/mmc/host/tmio_mmc.h228
24 files changed, 3835 insertions, 527 deletions
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index d618e8673996..c22a4c039988 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -83,7 +83,7 @@ config MMC_RICOH_MMC
 
 config MMC_SDHCI_OF
 	tristate "SDHCI support on OpenFirmware platforms"
-	depends on MMC_SDHCI && PPC_OF
+	depends on MMC_SDHCI && OF
 	help
 	  This selects the OF support for Secure Digital Host Controller
 	  Interfaces.
@@ -93,6 +93,7 @@ config MMC_SDHCI_OF
 config MMC_SDHCI_OF_ESDHC
 	bool "SDHCI OF support for the Freescale eSDHC controller"
 	depends on MMC_SDHCI_OF
+	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Freescale eSDHC controller support.
@@ -102,6 +103,7 @@ config MMC_SDHCI_OF_ESDHC
 config MMC_SDHCI_OF_HLWD
 	bool "SDHCI OF support for the Nintendo Wii SDHCI controllers"
 	depends on MMC_SDHCI_OF
+	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -140,6 +142,27 @@ config MMC_SDHCI_ESDHC_IMX
 
 	  If unsure, say N.
 
+config MMC_SDHCI_DOVE
+	bool "SDHCI support on Marvell's Dove SoC"
+	depends on ARCH_DOVE
+	depends on MMC_SDHCI_PLTFM
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the Secure Digital Host Controller Interface in
+	  Marvell's Dove SoC.
+
+	  If unsure, say N.
+
+config MMC_SDHCI_TEGRA
+	tristate "SDHCI platform support for the Tegra SD/MMC Controller"
+	depends on MMC_SDHCI_PLTFM && ARCH_TEGRA
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the Tegra SD/MMC controller. If you have a Tegra
+	  platform with SD or MMC devices, say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_SDHCI_S3C
 	tristate "SDHCI support on Samsung S3C SoC"
 	depends on MMC_SDHCI && PLAT_SAMSUNG
@@ -458,6 +481,22 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND
 	help
 	  If you say yes here SD-Cards may work on the EZkit.
 
+config MMC_DW
+	tristate "Synopsys DesignWare Memory Card Interface"
+	depends on ARM
+	help
+	  This selects support for the Synopsys DesignWare Mobile Storage IP
+	  block, this provides host support for SD and MMC interfaces, in both
+	  PIO and external DMA modes.
+
+config MMC_DW_IDMAC
+	bool "Internal DMAC interface"
+	depends on MMC_DW
+	help
+	  This selects support for the internal DMAC block within the Synopsys
+	  Designware Mobile Storage IP block. This disables the external DMA
+	  interface.
+
 config MMC_SH_MMCIF
 	tristate "SuperH Internal MMCIF support"
 	depends on MMC_BLOCK && (SUPERH || ARCH_SHMOBILE)
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7b645ff43b30..e834fb223e9a 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_CB710)	+= cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)	+= via-sdmmc.o
 obj-$(CONFIG_SDH_BFIN)		+= bfin_sdh.o
+obj-$(CONFIG_MMC_DW)		+= dw_mmc.o
 obj-$(CONFIG_MMC_SH_MMCIF)	+= sh_mmcif.o
 obj-$(CONFIG_MMC_JZ4740)	+= jz4740_mmc.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
@@ -39,6 +40,8 @@ obj-$(CONFIG_MMC_SDHCI_PLTFM)			+= sdhci-platform.o
 sdhci-platform-y				:= sdhci-pltfm.o
 sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX)	+= sdhci-cns3xxx.o
 sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
+sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA)	+= sdhci-tegra.o
 
 obj-$(CONFIG_MMC_SDHCI_OF)	+= sdhci-of.o
 sdhci-of-y				:= sdhci-of-core.o
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index e15547cf701f..0076c7448fe6 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -66,8 +66,8 @@
 #define DAVINCI_MMCBLNC      0x60
 #define DAVINCI_SDIOCTL      0x64
 #define DAVINCI_SDIOST0      0x68
-#define DAVINCI_SDIOEN       0x6C
-#define DAVINCI_SDIOST       0x70
+#define DAVINCI_SDIOIEN      0x6C
+#define DAVINCI_SDIOIST      0x70
 #define DAVINCI_MMCFIFOCTL   0x74 /* FIFO Control Register             */
 
 /* DAVINCI_MMCCTL definitions */
@@ -131,6 +131,14 @@
 #define MMCFIFOCTL_ACCWD_2    (2 << 3) /* access width of 2 bytes    */
 #define MMCFIFOCTL_ACCWD_1    (3 << 3) /* access width of 1 byte     */
 
+/* DAVINCI_SDIOST0 definitions */
+#define SDIOST0_DAT1_HI       BIT(0)
+
+/* DAVINCI_SDIOIEN definitions */
+#define SDIOIEN_IOINTEN       BIT(0)
+
+/* DAVINCI_SDIOIST definitions */
+#define SDIOIST_IOINT         BIT(0)
 
 /* MMCSD Init clock in Hz in opendrain mode */
 #define MMCSD_INIT_CLOCK		200000
@@ -164,7 +172,7 @@ struct mmc_davinci_host {
 	unsigned int mmc_input_clk;
 	void __iomem *base;
 	struct resource *mem_res;
-	int irq;
+	int mmc_irq, sdio_irq;
 	unsigned char bus_mode;
 
 #define DAVINCI_MMC_DATADIR_NONE	0
@@ -184,6 +192,7 @@ struct mmc_davinci_host {
 	u32 rxdma, txdma;
 	bool use_dma;
 	bool do_dma;
+	bool sdio_int;
 
 	/* Scatterlist DMA uses one or more parameter RAM entries:
 	 * the main one (associated with rxdma or txdma) plus zero or
@@ -480,7 +489,7 @@ static void mmc_davinci_send_dma_request(struct mmc_davinci_host *host,
 	struct scatterlist	*sg;
 	unsigned		sg_len;
 	unsigned		bytes_left = host->bytes_left;
-	const unsigned		shift = ffs(rw_threshold) - 1;;
+	const unsigned		shift = ffs(rw_threshold) - 1;
 
 	if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE) {
 		template = &host->tx_template;
@@ -866,6 +875,19 @@ mmc_davinci_xfer_done(struct mmc_davinci_host *host, struct mmc_data *data)
 {
 	host->data = NULL;
 
+	if (host->mmc->caps & MMC_CAP_SDIO_IRQ) {
+		/*
+		 * SDIO Interrupt Detection work-around as suggested by
+		 * Davinci Errata (TMS320DM355 Silicon Revision 1.1 Errata
+		 * 2.1.6): Signal SDIO interrupt only if it is enabled by core
+		 */
+		if (host->sdio_int && !(readl(host->base + DAVINCI_SDIOST0) &
+					SDIOST0_DAT1_HI)) {
+			writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+			mmc_signal_sdio_irq(host->mmc);
+		}
+	}
+
 	if (host->do_dma) {
 		davinci_abort_dma(host);
 
@@ -932,6 +954,21 @@ davinci_abort_data(struct mmc_davinci_host *host, struct mmc_data *data)
 	mmc_davinci_reset_ctrl(host, 0);
 }
 
+static irqreturn_t mmc_davinci_sdio_irq(int irq, void *dev_id)
+{
+	struct mmc_davinci_host *host = dev_id;
+	unsigned int status;
+
+	status = readl(host->base + DAVINCI_SDIOIST);
+	if (status & SDIOIST_IOINT) {
+		dev_dbg(mmc_dev(host->mmc),
+			"SDIO interrupt status %x\n", status);
+		writel(status | SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+		mmc_signal_sdio_irq(host->mmc);
+	}
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t mmc_davinci_irq(int irq, void *dev_id)
 {
 	struct mmc_davinci_host *host = (struct mmc_davinci_host *)dev_id;
@@ -1076,11 +1113,32 @@ static int mmc_davinci_get_ro(struct mmc_host *mmc)
 	return config->get_ro(pdev->id);
 }
 
+static void mmc_davinci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct mmc_davinci_host *host = mmc_priv(mmc);
+
+	if (enable) {
+		if (!(readl(host->base + DAVINCI_SDIOST0) & SDIOST0_DAT1_HI)) {
+			writel(SDIOIST_IOINT, host->base + DAVINCI_SDIOIST);
+			mmc_signal_sdio_irq(host->mmc);
+		} else {
+			host->sdio_int = true;
+			writel(readl(host->base + DAVINCI_SDIOIEN) |
+			       SDIOIEN_IOINTEN, host->base + DAVINCI_SDIOIEN);
+		}
+	} else {
+		host->sdio_int = false;
+		writel(readl(host->base + DAVINCI_SDIOIEN) & ~SDIOIEN_IOINTEN,
+		       host->base + DAVINCI_SDIOIEN);
+	}
+}
+
 static struct mmc_host_ops mmc_davinci_ops = {
 	.request	= mmc_davinci_request,
 	.set_ios	= mmc_davinci_set_ios,
 	.get_cd		= mmc_davinci_get_cd,
 	.get_ro		= mmc_davinci_get_ro,
+	.enable_sdio_irq = mmc_davinci_enable_sdio_irq,
 };
 
 /*----------------------------------------------------------------------*/
@@ -1209,7 +1267,8 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 		host->nr_sg = MAX_NR_SG;
 
 	host->use_dma = use_dma;
-	host->irq = irq;
+	host->mmc_irq = irq;
+	host->sdio_irq = platform_get_irq(pdev, 1);
 
 	if (host->use_dma && davinci_acquire_dma_channels(host) != 0)
 		host->use_dma = 0;
@@ -1270,6 +1329,13 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 	if (ret)
 		goto out;
 
+	if (host->sdio_irq >= 0) {
+		ret = request_irq(host->sdio_irq, mmc_davinci_sdio_irq, 0,
+				  mmc_hostname(mmc), host);
+		if (!ret)
+			mmc->caps |= MMC_CAP_SDIO_IRQ;
+	}
+
 	rename_region(mem, mmc_hostname(mmc));
 
 	dev_info(mmc_dev(host->mmc), "Using %s, %d-bit mode\n",
@@ -1313,7 +1379,9 @@ static int __exit davinci_mmcsd_remove(struct platform_device *pdev)
 		mmc_davinci_cpufreq_deregister(host);
 
 		mmc_remove_host(host->mmc);
-		free_irq(host->irq, host);
+		free_irq(host->mmc_irq, host);
+		if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+			free_irq(host->sdio_irq, host);
 
 		davinci_release_dma_channels(host);
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
new file mode 100644
index 000000000000..2fcc82577c1b
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.c
@@ -0,0 +1,1796 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ *  (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/dw_mmc.h>
+#include <linux/bitops.h>
+
+#include "dw_mmc.h"
+
+/* Common flag combinations */
+#define DW_MCI_DATA_ERROR_FLAGS	(SDMMC_INT_DTO | SDMMC_INT_DCRC | \
+				 SDMMC_INT_HTO | SDMMC_INT_SBE  | \
+				 SDMMC_INT_EBE)
+#define DW_MCI_CMD_ERROR_FLAGS	(SDMMC_INT_RTO | SDMMC_INT_RCRC | \
+				 SDMMC_INT_RESP_ERR)
+#define DW_MCI_ERROR_FLAGS	(DW_MCI_DATA_ERROR_FLAGS | \
+				 DW_MCI_CMD_ERROR_FLAGS  | SDMMC_INT_HLE)
+#define DW_MCI_SEND_STATUS	1
+#define DW_MCI_RECV_STATUS	2
+#define DW_MCI_DMA_THRESHOLD	16
+
+#ifdef CONFIG_MMC_DW_IDMAC
+struct idmac_desc {
+	u32		des0;	/* Control Descriptor */
+#define IDMAC_DES0_DIC	BIT(1)
+#define IDMAC_DES0_LD	BIT(2)
+#define IDMAC_DES0_FD	BIT(3)
+#define IDMAC_DES0_CH	BIT(4)
+#define IDMAC_DES0_ER	BIT(5)
+#define IDMAC_DES0_CES	BIT(30)
+#define IDMAC_DES0_OWN	BIT(31)
+
+	u32		des1;	/* Buffer sizes */
+#define IDMAC_SET_BUFFER1_SIZE(d, s) \
+	((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff))
+
+	u32		des2;	/* buffer 1 physical address */
+
+	u32		des3;	/* buffer 2 physical address */
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+/**
+ * struct dw_mci_slot - MMC slot state
+ * @mmc: The mmc_host representing this slot.
+ * @host: The MMC controller this slot is using.
+ * @ctype: Card type for this slot.
+ * @mrq: mmc_request currently being processed or waiting to be
+ *	processed, or NULL when the slot is idle.
+ * @queue_node: List node for placing this node in the @queue list of
+ *	&struct dw_mci.
+ * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @flags: Random state bits associated with the slot.
+ * @id: Number of this slot.
+ * @last_detect_state: Most recently observed card detect state.
+ */
+struct dw_mci_slot {
+	struct mmc_host		*mmc;
+	struct dw_mci		*host;
+
+	u32			ctype;
+
+	struct mmc_request	*mrq;
+	struct list_head	queue_node;
+
+	unsigned int		clock;
+	unsigned long		flags;
+#define DW_MMC_CARD_PRESENT	0
+#define DW_MMC_CARD_NEED_INIT	1
+	int			id;
+	int			last_detect_state;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+static int dw_mci_req_show(struct seq_file *s, void *v)
+{
+	struct dw_mci_slot *slot = s->private;
+	struct mmc_request *mrq;
+	struct mmc_command *cmd;
+	struct mmc_command *stop;
+	struct mmc_data	*data;
+
+	/* Make sure we get a consistent snapshot */
+	spin_lock_bh(&slot->host->lock);
+	mrq = slot->mrq;
+
+	if (mrq) {
+		cmd = mrq->cmd;
+		data = mrq->data;
+		stop = mrq->stop;
+
+		if (cmd)
+			seq_printf(s,
+				   "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+				   cmd->opcode, cmd->arg, cmd->flags,
+				   cmd->resp[0], cmd->resp[1], cmd->resp[2],
+				   cmd->resp[2], cmd->error);
+		if (data)
+			seq_printf(s, "DATA %u / %u * %u flg %x err %d\n",
+				   data->bytes_xfered, data->blocks,
+				   data->blksz, data->flags, data->error);
+		if (stop)
+			seq_printf(s,
+				   "CMD%u(0x%x) flg %x rsp %x %x %x %x err %d\n",
+				   stop->opcode, stop->arg, stop->flags,
+				   stop->resp[0], stop->resp[1], stop->resp[2],
+				   stop->resp[2], stop->error);
+	}
+
+	spin_unlock_bh(&slot->host->lock);
+
+	return 0;
+}
+
+static int dw_mci_req_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_mci_req_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_req_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dw_mci_req_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dw_mci_regs_show(struct seq_file *s, void *v)
+{
+	seq_printf(s, "STATUS:\t0x%08x\n", SDMMC_STATUS);
+	seq_printf(s, "RINTSTS:\t0x%08x\n", SDMMC_RINTSTS);
+	seq_printf(s, "CMD:\t0x%08x\n", SDMMC_CMD);
+	seq_printf(s, "CTRL:\t0x%08x\n", SDMMC_CTRL);
+	seq_printf(s, "INTMASK:\t0x%08x\n", SDMMC_INTMASK);
+	seq_printf(s, "CLKENA:\t0x%08x\n", SDMMC_CLKENA);
+
+	return 0;
+}
+
+static int dw_mci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_mci_regs_show, inode->i_private);
+}
+
+static const struct file_operations dw_mci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dw_mci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void dw_mci_init_debugfs(struct dw_mci_slot *slot)
+{
+	struct mmc_host	*mmc = slot->mmc;
+	struct dw_mci *host = slot->host;
+	struct dentry *root;
+	struct dentry *node;
+
+	root = mmc->debugfs_root;
+	if (!root)
+		return;
+
+	node = debugfs_create_file("regs", S_IRUSR, root, host,
+				   &dw_mci_regs_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_file("req", S_IRUSR, root, slot,
+				   &dw_mci_req_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_x32("pending_events", S_IRUSR, root,
+				  (u32 *)&host->pending_events);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_x32("completed_events", S_IRUSR, root,
+				  (u32 *)&host->completed_events);
+	if (!node)
+		goto err;
+
+	return;
+
+err:
+	dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n");
+}
+#endif /* defined(CONFIG_DEBUG_FS) */
+
+static void dw_mci_set_timeout(struct dw_mci *host)
+{
+	/* timeout (maximum) */
+	mci_writel(host, TMOUT, 0xffffffff);
+}
+
+static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+	struct mmc_data	*data;
+	u32 cmdr;
+	cmd->error = -EINPROGRESS;
+
+	cmdr = cmd->opcode;
+
+	if (cmdr == MMC_STOP_TRANSMISSION)
+		cmdr |= SDMMC_CMD_STOP;
+	else
+		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		/* We expect a response, so set this bit */
+		cmdr |= SDMMC_CMD_RESP_EXP;
+		if (cmd->flags & MMC_RSP_136)
+			cmdr |= SDMMC_CMD_RESP_LONG;
+	}
+
+	if (cmd->flags & MMC_RSP_CRC)
+		cmdr |= SDMMC_CMD_RESP_CRC;
+
+	data = cmd->data;
+	if (data) {
+		cmdr |= SDMMC_CMD_DAT_EXP;
+		if (data->flags & MMC_DATA_STREAM)
+			cmdr |= SDMMC_CMD_STRM_MODE;
+		if (data->flags & MMC_DATA_WRITE)
+			cmdr |= SDMMC_CMD_DAT_WR;
+	}
+
+	return cmdr;
+}
+
+static void dw_mci_start_command(struct dw_mci *host,
+				 struct mmc_command *cmd, u32 cmd_flags)
+{
+	host->cmd = cmd;
+	dev_vdbg(&host->pdev->dev,
+		 "start command: ARGR=0x%08x CMDR=0x%08x\n",
+		 cmd->arg, cmd_flags);
+
+	mci_writel(host, CMDARG, cmd->arg);
+	wmb();
+
+	mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
+}
+
+static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
+{
+	dw_mci_start_command(host, data->stop, host->stop_cmdr);
+}
+
+/* DMA interface functions */
+static void dw_mci_stop_dma(struct dw_mci *host)
+{
+	if (host->use_dma) {
+		host->dma_ops->stop(host);
+		host->dma_ops->cleanup(host);
+	} else {
+		/* Data transfer was stopped by the interrupt handler */
+		set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+	}
+}
+
+#ifdef CONFIG_MMC_DW_IDMAC
+static void dw_mci_dma_cleanup(struct dw_mci *host)
+{
+	struct mmc_data *data = host->data;
+
+	if (data)
+		dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len,
+			     ((data->flags & MMC_DATA_WRITE)
+			      ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+}
+
+static void dw_mci_idmac_stop_dma(struct dw_mci *host)
+{
+	u32 temp;
+
+	/* Disable and reset the IDMAC interface */
+	temp = mci_readl(host, CTRL);
+	temp &= ~SDMMC_CTRL_USE_IDMAC;
+	temp |= SDMMC_CTRL_DMA_RESET;
+	mci_writel(host, CTRL, temp);
+
+	/* Stop the IDMAC running */
+	temp = mci_readl(host, BMOD);
+	temp &= ~SDMMC_IDMAC_ENABLE;
+	mci_writel(host, BMOD, temp);
+}
+
+static void dw_mci_idmac_complete_dma(struct dw_mci *host)
+{
+	struct mmc_data *data = host->data;
+
+	dev_vdbg(&host->pdev->dev, "DMA complete\n");
+
+	host->dma_ops->cleanup(host);
+
+	/*
+	 * If the card was removed, data will be NULL. No point in trying to
+	 * send the stop command or waiting for NBUSY in this case.
+	 */
+	if (data) {
+		set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+		tasklet_schedule(&host->tasklet);
+	}
+}
+
+static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
+				    unsigned int sg_len)
+{
+	int i;
+	struct idmac_desc *desc = host->sg_cpu;
+
+	for (i = 0; i < sg_len; i++, desc++) {
+		unsigned int length = sg_dma_len(&data->sg[i]);
+		u32 mem_addr = sg_dma_address(&data->sg[i]);
+
+		/* Set the OWN bit and disable interrupts for this descriptor */
+		desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | IDMAC_DES0_CH;
+
+		/* Buffer length */
+		IDMAC_SET_BUFFER1_SIZE(desc, length);
+
+		/* Physical address to DMA to/from */
+		desc->des2 = mem_addr;
+	}
+
+	/* Set first descriptor */
+	desc = host->sg_cpu;
+	desc->des0 |= IDMAC_DES0_FD;
+
+	/* Set last descriptor */
+	desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
+	desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+	desc->des0 |= IDMAC_DES0_LD;
+
+	wmb();
+}
+
+static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
+{
+	u32 temp;
+
+	dw_mci_translate_sglist(host, host->data, sg_len);
+
+	/* Select IDMAC interface */
+	temp = mci_readl(host, CTRL);
+	temp |= SDMMC_CTRL_USE_IDMAC;
+	mci_writel(host, CTRL, temp);
+
+	wmb();
+
+	/* Enable the IDMAC */
+	temp = mci_readl(host, BMOD);
+	temp |= SDMMC_IDMAC_ENABLE;
+	mci_writel(host, BMOD, temp);
+
+	/* Start it running */
+	mci_writel(host, PLDMND, 1);
+}
+
+static int dw_mci_idmac_init(struct dw_mci *host)
+{
+	struct idmac_desc *p;
+	int i;
+
+	/* Number of descriptors in the ring buffer */
+	host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
+
+	/* Forward link the descriptor list */
+	for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
+		p->des3 = host->sg_dma + (sizeof(struct idmac_desc) * (i + 1));
+
+	/* Set the last descriptor as the end-of-ring descriptor */
+	p->des3 = host->sg_dma;
+	p->des0 = IDMAC_DES0_ER;
+
+	/* Mask out interrupts - get Tx & Rx complete only */
+	mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI | SDMMC_IDMAC_INT_RI |
+		   SDMMC_IDMAC_INT_TI);
+
+	/* Set the descriptor base address */
+	mci_writel(host, DBADDR, host->sg_dma);
+	return 0;
+}
+
+static struct dw_mci_dma_ops dw_mci_idmac_ops = {
+	.init = dw_mci_idmac_init,
+	.start = dw_mci_idmac_start_dma,
+	.stop = dw_mci_idmac_stop_dma,
+	.complete = dw_mci_idmac_complete_dma,
+	.cleanup = dw_mci_dma_cleanup,
+};
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
+{
+	struct scatterlist *sg;
+	unsigned int i, direction, sg_len;
+	u32 temp;
+
+	/* If we don't have a channel, we can't do DMA */
+	if (!host->use_dma)
+		return -ENODEV;
+
+	/*
+	 * We don't do DMA on "complex" transfers, i.e. with
+	 * non-word-aligned buffers or lengths. Also, we don't bother
+	 * with all the DMA setup overhead for short transfers.
+	 */
+	if (data->blocks * data->blksz < DW_MCI_DMA_THRESHOLD)
+		return -EINVAL;
+	if (data->blksz & 3)
+		return -EINVAL;
+
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	if (data->flags & MMC_DATA_READ)
+		direction = DMA_FROM_DEVICE;
+	else
+		direction = DMA_TO_DEVICE;
+
+	sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len,
+			    direction);
+
+	dev_vdbg(&host->pdev->dev,
+		 "sd sg_cpu: %#lx sg_dma: %#lx sg_len: %d\n",
+		 (unsigned long)host->sg_cpu, (unsigned long)host->sg_dma,
+		 sg_len);
+
+	/* Enable the DMA interface */
+	temp = mci_readl(host, CTRL);
+	temp |= SDMMC_CTRL_DMA_ENABLE;
+	mci_writel(host, CTRL, temp);
+
+	/* Disable RX/TX IRQs, let DMA handle it */
+	temp = mci_readl(host, INTMASK);
+	temp  &= ~(SDMMC_INT_RXDR | SDMMC_INT_TXDR);
+	mci_writel(host, INTMASK, temp);
+
+	host->dma_ops->start(host, sg_len);
+
+	return 0;
+}
+
+static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
+{
+	u32 temp;
+
+	data->error = -EINPROGRESS;
+
+	WARN_ON(host->data);
+	host->sg = NULL;
+	host->data = data;
+
+	if (dw_mci_submit_data_dma(host, data)) {
+		host->sg = data->sg;
+		host->pio_offset = 0;
+		if (data->flags & MMC_DATA_READ)
+			host->dir_status = DW_MCI_RECV_STATUS;
+		else
+			host->dir_status = DW_MCI_SEND_STATUS;
+
+		temp = mci_readl(host, INTMASK);
+		temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
+		mci_writel(host, INTMASK, temp);
+
+		temp = mci_readl(host, CTRL);
+		temp &= ~SDMMC_CTRL_DMA_ENABLE;
+		mci_writel(host, CTRL, temp);
+	}
+}
+
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
+{
+	struct dw_mci *host = slot->host;
+	unsigned long timeout = jiffies + msecs_to_jiffies(500);
+	unsigned int cmd_status = 0;
+
+	mci_writel(host, CMDARG, arg);
+	wmb();
+	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
+
+	while (time_before(jiffies, timeout)) {
+		cmd_status = mci_readl(host, CMD);
+		if (!(cmd_status & SDMMC_CMD_START))
+			return;
+	}
+	dev_err(&slot->mmc->class_dev,
+		"Timeout sending command (cmd %#x arg %#x status %#x)\n",
+		cmd, arg, cmd_status);
+}
+
+static void dw_mci_setup_bus(struct dw_mci_slot *slot)
+{
+	struct dw_mci *host = slot->host;
+	u32 div;
+
+	if (slot->clock != host->current_speed) {
+		if (host->bus_hz % slot->clock)
+			/*
+			 * move the + 1 after the divide to prevent
+			 * over-clocking the card.
+			 */
+			div = ((host->bus_hz / slot->clock) >> 1) + 1;
+		else
+			div = (host->bus_hz  / slot->clock) >> 1;
+
+		dev_info(&slot->mmc->class_dev,
+			 "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ"
+			 " div = %d)\n", slot->id, host->bus_hz, slot->clock,
+			 div ? ((host->bus_hz / div) >> 1) : host->bus_hz, div);
+
+		/* disable clock */
+		mci_writel(host, CLKENA, 0);
+		mci_writel(host, CLKSRC, 0);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		/* set clock to desired speed */
+		mci_writel(host, CLKDIV, div);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		/* enable clock */
+		mci_writel(host, CLKENA, SDMMC_CLKEN_ENABLE);
+
+		/* inform CIU */
+		mci_send_cmd(slot,
+			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+
+		host->current_speed = slot->clock;
+	}
+
+	/* Set the current slot bus width */
+	mci_writel(host, CTYPE, slot->ctype);
+}
+
+static void dw_mci_start_request(struct dw_mci *host,
+				 struct dw_mci_slot *slot)
+{
+	struct mmc_request *mrq;
+	struct mmc_command *cmd;
+	struct mmc_data	*data;
+	u32 cmdflags;
+
+	mrq = slot->mrq;
+	if (host->pdata->select_slot)
+		host->pdata->select_slot(slot->id);
+
+	/* Slot specific timing and width adjustment */
+	dw_mci_setup_bus(slot);
+
+	host->cur_slot = slot;
+	host->mrq = mrq;
+
+	host->pending_events = 0;
+	host->completed_events = 0;
+	host->data_status = 0;
+
+	data = mrq->data;
+	if (data) {
+		dw_mci_set_timeout(host);
+		mci_writel(host, BYTCNT, data->blksz*data->blocks);
+		mci_writel(host, BLKSIZ, data->blksz);
+	}
+
+	cmd = mrq->cmd;
+	cmdflags = dw_mci_prepare_command(slot->mmc, cmd);
+
+	/* this is the first command, send the initialization clock */
+	if (test_and_clear_bit(DW_MMC_CARD_NEED_INIT, &slot->flags))
+		cmdflags |= SDMMC_CMD_INIT;
+
+	if (data) {
+		dw_mci_submit_data(host, data);
+		wmb();
+	}
+
+	dw_mci_start_command(host, cmd, cmdflags);
+
+	if (mrq->stop)
+		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
+}
+
+static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
+				 struct mmc_request *mrq)
+{
+	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
+		 host->state);
+
+	spin_lock_bh(&host->lock);
+	slot->mrq = mrq;
+
+	if (host->state == STATE_IDLE) {
+		host->state = STATE_SENDING_CMD;
+		dw_mci_start_request(host, slot);
+	} else {
+		list_add_tail(&slot->queue_node, &host->queue);
+	}
+
+	spin_unlock_bh(&host->lock);
+}
+
+static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+
+	WARN_ON(slot->mrq);
+
+	if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	/* We don't support multiple blocks of weird lengths. */
+	dw_mci_queue_request(host, slot, mrq);
+}
+
+static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+
+	/* set default 1 bit mode */
+	slot->ctype = SDMMC_CTYPE_1BIT;
+
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		slot->ctype = SDMMC_CTYPE_1BIT;
+		break;
+	case MMC_BUS_WIDTH_4:
+		slot->ctype = SDMMC_CTYPE_4BIT;
+		break;
+	}
+
+	if (ios->clock) {
+		/*
+		 * Use mirror of ios->clock to prevent race with mmc
+		 * core ios update when finding the minimum.
+		 */
+		slot->clock = ios->clock;
+	}
+
+	switch (ios->power_mode) {
+	case MMC_POWER_UP:
+		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
+		break;
+	default:
+		break;
+	}
+}
+
+static int dw_mci_get_ro(struct mmc_host *mmc)
+{
+	int read_only;
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci_board *brd = slot->host->pdata;
+
+	/* Use platform get_ro function, else try on board write protect */
+	if (brd->get_ro)
+		read_only = brd->get_ro(slot->id);
+	else
+		read_only =
+			mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0;
+
+	dev_dbg(&mmc->class_dev, "card is %s\n",
+		read_only ? "read-only" : "read-write");
+
+	return read_only;
+}
+
+static int dw_mci_get_cd(struct mmc_host *mmc)
+{
+	int present;
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci_board *brd = slot->host->pdata;
+
+	/* Use platform get_cd function, else try onboard card detect */
+	if (brd->get_cd)
+		present = !brd->get_cd(slot->id);
+	else
+		present = (mci_readl(slot->host, CDETECT) & (1 << slot->id))
+			== 0 ? 1 : 0;
+
+	if (present)
+		dev_dbg(&mmc->class_dev, "card is present\n");
+	else
+		dev_dbg(&mmc->class_dev, "card is not present\n");
+
+	return present;
+}
+
+static const struct mmc_host_ops dw_mci_ops = {
+	.request	= dw_mci_request,
+	.set_ios	= dw_mci_set_ios,
+	.get_ro		= dw_mci_get_ro,
+	.get_cd		= dw_mci_get_cd,
+};
+
+static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
+	__releases(&host->lock)
+	__acquires(&host->lock)
+{
+	struct dw_mci_slot *slot;
+	struct mmc_host	*prev_mmc = host->cur_slot->mmc;
+
+	WARN_ON(host->cmd || host->data);
+
+	host->cur_slot->mrq = NULL;
+	host->mrq = NULL;
+	if (!list_empty(&host->queue)) {
+		slot = list_entry(host->queue.next,
+				  struct dw_mci_slot, queue_node);
+		list_del(&slot->queue_node);
+		dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n",
+			 mmc_hostname(slot->mmc));
+		host->state = STATE_SENDING_CMD;
+		dw_mci_start_request(host, slot);
+	} else {
+		dev_vdbg(&host->pdev->dev, "list empty\n");
+		host->state = STATE_IDLE;
+	}
+
+	spin_unlock(&host->lock);
+	mmc_request_done(prev_mmc, mrq);
+	spin_lock(&host->lock);
+}
+
+static void dw_mci_command_complete(struct dw_mci *host, struct mmc_command *cmd)
+{
+	u32 status = host->cmd_status;
+
+	host->cmd_status = 0;
+
+	/* Read the response from the card (up to 16 bytes) */
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136) {
+			cmd->resp[3] = mci_readl(host, RESP0);
+			cmd->resp[2] = mci_readl(host, RESP1);
+			cmd->resp[1] = mci_readl(host, RESP2);
+			cmd->resp[0] = mci_readl(host, RESP3);
+		} else {
+			cmd->resp[0] = mci_readl(host, RESP0);
+			cmd->resp[1] = 0;
+			cmd->resp[2] = 0;
+			cmd->resp[3] = 0;
+		}
+	}
+
+	if (status & SDMMC_INT_RTO)
+		cmd->error = -ETIMEDOUT;
+	else if ((cmd->flags & MMC_RSP_CRC) && (status & SDMMC_INT_RCRC))
+		cmd->error = -EILSEQ;
+	else if (status & SDMMC_INT_RESP_ERR)
+		cmd->error = -EIO;
+	else
+		cmd->error = 0;
+
+	if (cmd->error) {
+		/* newer ip versions need a delay between retries */
+		if (host->quirks & DW_MCI_QUIRK_RETRY_DELAY)
+			mdelay(20);
+
+		if (cmd->data) {
+			host->data = NULL;
+			dw_mci_stop_dma(host);
+		}
+	}
+}
+
+static void dw_mci_tasklet_func(unsigned long priv)
+{
+	struct dw_mci *host = (struct dw_mci *)priv;
+	struct mmc_data	*data;
+	struct mmc_command *cmd;
+	enum dw_mci_state state;
+	enum dw_mci_state prev_state;
+	u32 status;
+
+	spin_lock(&host->lock);
+
+	state = host->state;
+	data = host->data;
+
+	do {
+		prev_state = state;
+
+		switch (state) {
+		case STATE_IDLE:
+			break;
+
+		case STATE_SENDING_CMD:
+			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+						&host->pending_events))
+				break;
+
+			cmd = host->cmd;
+			host->cmd = NULL;
+			set_bit(EVENT_CMD_COMPLETE, &host->completed_events);
+			dw_mci_command_complete(host, host->mrq->cmd);
+			if (!host->mrq->data || cmd->error) {
+				dw_mci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_DATA;
+			/* fall through */
+
+		case STATE_SENDING_DATA:
+			if (test_and_clear_bit(EVENT_DATA_ERROR,
+					       &host->pending_events)) {
+				dw_mci_stop_dma(host);
+				if (data->stop)
+					send_stop_cmd(host, data);
+				state = STATE_DATA_ERROR;
+				break;
+			}
+
+			if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+						&host->pending_events))
+				break;
+
+			set_bit(EVENT_XFER_COMPLETE, &host->completed_events);
+			prev_state = state = STATE_DATA_BUSY;
+			/* fall through */
+
+		case STATE_DATA_BUSY:
+			if (!test_and_clear_bit(EVENT_DATA_COMPLETE,
+						&host->pending_events))
+				break;
+
+			host->data = NULL;
+			set_bit(EVENT_DATA_COMPLETE, &host->completed_events);
+			status = host->data_status;
+
+			if (status & DW_MCI_DATA_ERROR_FLAGS) {
+				if (status & SDMMC_INT_DTO) {
+					dev_err(&host->pdev->dev,
+						"data timeout error\n");
+					data->error = -ETIMEDOUT;
+				} else if (status & SDMMC_INT_DCRC) {
+					dev_err(&host->pdev->dev,
+						"data CRC error\n");
+					data->error = -EILSEQ;
+				} else {
+					dev_err(&host->pdev->dev,
+						"data FIFO error "
+						"(status=%08x)\n",
+						status);
+					data->error = -EIO;
+				}
+			} else {
+				data->bytes_xfered = data->blocks * data->blksz;
+				data->error = 0;
+			}
+
+			if (!data->stop) {
+				dw_mci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_STOP;
+			if (!data->error)
+				send_stop_cmd(host, data);
+			/* fall through */
+
+		case STATE_SENDING_STOP:
+			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
+						&host->pending_events))
+				break;
+
+			host->cmd = NULL;
+			dw_mci_command_complete(host, host->mrq->stop);
+			dw_mci_request_end(host, host->mrq);
+			goto unlock;
+
+		case STATE_DATA_ERROR:
+			if (!test_and_clear_bit(EVENT_XFER_COMPLETE,
+						&host->pending_events))
+				break;
+
+			state = STATE_DATA_BUSY;
+			break;
+		}
+	} while (state != prev_state);
+
+	host->state = state;
+unlock:
+	spin_unlock(&host->lock);
+
+}
+
+static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
+{
+	u16 *pdata = (u16 *)buf;
+
+	WARN_ON(cnt % 2 != 0);
+
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		mci_writew(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
+{
+	u16 *pdata = (u16 *)buf;
+
+	WARN_ON(cnt % 2 != 0);
+
+	cnt = cnt >> 1;
+	while (cnt > 0) {
+		*pdata++ = mci_readw(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
+{
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		mci_writel(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
+{
+	u32 *pdata = (u32 *)buf;
+
+	WARN_ON(cnt % 4 != 0);
+	WARN_ON((unsigned long)pdata & 0x3);
+
+	cnt = cnt >> 2;
+	while (cnt > 0) {
+		*pdata++ = mci_readl(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
+{
+	u64 *pdata = (u64 *)buf;
+
+	WARN_ON(cnt % 8 != 0);
+
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		mci_writeq(host, DATA, *pdata++);
+		cnt--;
+	}
+}
+
+static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
+{
+	u64 *pdata = (u64 *)buf;
+
+	WARN_ON(cnt % 8 != 0);
+
+	cnt = cnt >> 3;
+	while (cnt > 0) {
+		*pdata++ = mci_readq(host, DATA);
+		cnt--;
+	}
+}
+
+static void dw_mci_read_data_pio(struct dw_mci *host)
+{
+	struct scatterlist *sg = host->sg;
+	void *buf = sg_virt(sg);
+	unsigned int offset = host->pio_offset;
+	struct mmc_data	*data = host->data;
+	int shift = host->data_shift;
+	u32 status;
+	unsigned int nbytes = 0, len, old_len, count = 0;
+
+	do {
+		len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
+		if (count == 0)
+			old_len = len;
+
+		if (offset + len <= sg->length) {
+			host->pull_data(host, (void *)(buf + offset), len);
+
+			offset += len;
+			nbytes += len;
+
+			if (offset == sg->length) {
+				flush_dcache_page(sg_page(sg));
+				host->sg = sg = sg_next(sg);
+				if (!sg)
+					goto done;
+
+				offset = 0;
+				buf = sg_virt(sg);
+			}
+		} else {
+			unsigned int remaining = sg->length - offset;
+			host->pull_data(host, (void *)(buf + offset),
+					remaining);
+			nbytes += remaining;
+
+			flush_dcache_page(sg_page(sg));
+			host->sg = sg = sg_next(sg);
+			if (!sg)
+				goto done;
+
+			offset = len - remaining;
+			buf = sg_virt(sg);
+			host->pull_data(host, buf, offset);
+			nbytes += offset;
+		}
+
+		status = mci_readl(host, MINTSTS);
+		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+		if (status & DW_MCI_DATA_ERROR_FLAGS) {
+			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
+
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+			tasklet_schedule(&host->tasklet);
+			return;
+		}
+		count++;
+	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
+	len = SDMMC_GET_FCNT(mci_readl(host, STATUS));
+	host->pio_offset = offset;
+	data->bytes_xfered += nbytes;
+	return;
+
+done:
+	data->bytes_xfered += nbytes;
+	smp_wmb();
+	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_write_data_pio(struct dw_mci *host)
+{
+	struct scatterlist *sg = host->sg;
+	void *buf = sg_virt(sg);
+	unsigned int offset = host->pio_offset;
+	struct mmc_data	*data = host->data;
+	int shift = host->data_shift;
+	u32 status;
+	unsigned int nbytes = 0, len;
+
+	do {
+		len = SDMMC_FIFO_SZ -
+			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
+		if (offset + len <= sg->length) {
+			host->push_data(host, (void *)(buf + offset), len);
+
+			offset += len;
+			nbytes += len;
+			if (offset == sg->length) {
+				host->sg = sg = sg_next(sg);
+				if (!sg)
+					goto done;
+
+				offset = 0;
+				buf = sg_virt(sg);
+			}
+		} else {
+			unsigned int remaining = sg->length - offset;
+
+			host->push_data(host, (void *)(buf + offset),
+					remaining);
+			nbytes += remaining;
+
+			host->sg = sg = sg_next(sg);
+			if (!sg)
+				goto done;
+
+			offset = len - remaining;
+			buf = sg_virt(sg);
+			host->push_data(host, (void *)buf, offset);
+			nbytes += offset;
+		}
+
+		status = mci_readl(host, MINTSTS);
+		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+		if (status & DW_MCI_DATA_ERROR_FLAGS) {
+			host->data_status = status;
+			data->bytes_xfered += nbytes;
+
+			smp_wmb();
+
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+
+			tasklet_schedule(&host->tasklet);
+			return;
+		}
+	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
+
+	host->pio_offset = offset;
+	data->bytes_xfered += nbytes;
+
+	return;
+
+done:
+	data->bytes_xfered += nbytes;
+	smp_wmb();
+	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
+}
+
+static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
+{
+	if (!host->cmd_status)
+		host->cmd_status = status;
+
+	smp_wmb();
+
+	set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+	tasklet_schedule(&host->tasklet);
+}
+
+static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
+{
+	struct dw_mci *host = dev_id;
+	u32 status, pending;
+	unsigned int pass_count = 0;
+
+	do {
+		status = mci_readl(host, RINTSTS);
+		pending = mci_readl(host, MINTSTS); /* read-only mask reg */
+
+		/*
+		 * DTO fix - version 2.10a and below, and only if internal DMA
+		 * is configured.
+		 */
+		if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
+			if (!pending &&
+			    ((mci_readl(host, STATUS) >> 17) & 0x1fff))
+				pending |= SDMMC_INT_DATA_OVER;
+		}
+
+		if (!pending)
+			break;
+
+		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
+			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
+			host->cmd_status = status;
+			smp_wmb();
+			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & DW_MCI_DATA_ERROR_FLAGS) {
+			/* if there is an error report DATA_ERROR */
+			mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
+			host->data_status = status;
+			smp_wmb();
+			set_bit(EVENT_DATA_ERROR, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & SDMMC_INT_DATA_OVER) {
+			mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
+			if (!host->data_status)
+				host->data_status = status;
+			smp_wmb();
+			if (host->dir_status == DW_MCI_RECV_STATUS) {
+				if (host->sg != NULL)
+					dw_mci_read_data_pio(host);
+			}
+			set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+			tasklet_schedule(&host->tasklet);
+		}
+
+		if (pending & SDMMC_INT_RXDR) {
+			mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
+			if (host->sg)
+				dw_mci_read_data_pio(host);
+		}
+
+		if (pending & SDMMC_INT_TXDR) {
+			mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
+			if (host->sg)
+				dw_mci_write_data_pio(host);
+		}
+
+		if (pending & SDMMC_INT_CMD_DONE) {
+			mci_writel(host, RINTSTS, SDMMC_INT_CMD_DONE);
+			dw_mci_cmd_interrupt(host, status);
+		}
+
+		if (pending & SDMMC_INT_CD) {
+			mci_writel(host, RINTSTS, SDMMC_INT_CD);
+			tasklet_schedule(&host->card_tasklet);
+		}
+
+	} while (pass_count++ < 5);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+	/* Handle DMA interrupts */
+	pending = mci_readl(host, IDSTS);
+	if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
+		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI);
+		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
+		set_bit(EVENT_DATA_COMPLETE, &host->pending_events);
+		host->dma_ops->complete(host);
+	}
+#endif
+
+	return IRQ_HANDLED;
+}
+
+static void dw_mci_tasklet_card(unsigned long data)
+{
+	struct dw_mci *host = (struct dw_mci *)data;
+	int i;
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		struct mmc_host *mmc = slot->mmc;
+		struct mmc_request *mrq;
+		int present;
+		u32 ctrl;
+
+		present = dw_mci_get_cd(mmc);
+		while (present != slot->last_detect_state) {
+			spin_lock(&host->lock);
+
+			dev_dbg(&slot->mmc->class_dev, "card %s\n",
+				present ? "inserted" : "removed");
+
+			/* Card change detected */
+			slot->last_detect_state = present;
+
+			/* Power up slot */
+			if (present != 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id,
+							      mmc->ocr_avail);
+
+				set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+			}
+
+			/* Clean up queue if present */
+			mrq = slot->mrq;
+			if (mrq) {
+				if (mrq == host->mrq) {
+					host->data = NULL;
+					host->cmd = NULL;
+
+					switch (host->state) {
+					case STATE_IDLE:
+						break;
+					case STATE_SENDING_CMD:
+						mrq->cmd->error = -ENOMEDIUM;
+						if (!mrq->data)
+							break;
+						/* fall through */
+					case STATE_SENDING_DATA:
+						mrq->data->error = -ENOMEDIUM;
+						dw_mci_stop_dma(host);
+						break;
+					case STATE_DATA_BUSY:
+					case STATE_DATA_ERROR:
+						if (mrq->data->error == -EINPROGRESS)
+							mrq->data->error = -ENOMEDIUM;
+						if (!mrq->stop)
+							break;
+						/* fall through */
+					case STATE_SENDING_STOP:
+						mrq->stop->error = -ENOMEDIUM;
+						break;
+					}
+
+					dw_mci_request_end(host, mrq);
+				} else {
+					list_del(&slot->queue_node);
+					mrq->cmd->error = -ENOMEDIUM;
+					if (mrq->data)
+						mrq->data->error = -ENOMEDIUM;
+					if (mrq->stop)
+						mrq->stop->error = -ENOMEDIUM;
+
+					spin_unlock(&host->lock);
+					mmc_request_done(slot->mmc, mrq);
+					spin_lock(&host->lock);
+				}
+			}
+
+			/* Power down slot */
+			if (present == 0) {
+				if (host->pdata->setpower)
+					host->pdata->setpower(slot->id, 0);
+				clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+				/*
+				 * Clear down the FIFO - doing so generates a
+				 * block interrupt, hence setting the
+				 * scatter-gather pointer to NULL.
+				 */
+				host->sg = NULL;
+
+				ctrl = mci_readl(host, CTRL);
+				ctrl |= SDMMC_CTRL_FIFO_RESET;
+				mci_writel(host, CTRL, ctrl);
+
+#ifdef CONFIG_MMC_DW_IDMAC
+				ctrl = mci_readl(host, BMOD);
+				ctrl |= 0x01; /* Software reset of DMA */
+				mci_writel(host, BMOD, ctrl);
+#endif
+
+			}
+
+			spin_unlock(&host->lock);
+			present = dw_mci_get_cd(mmc);
+		}
+
+		mmc_detect_change(slot->mmc,
+			msecs_to_jiffies(host->pdata->detect_delay_ms));
+	}
+}
+
+static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
+{
+	struct mmc_host *mmc;
+	struct dw_mci_slot *slot;
+
+	mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), &host->pdev->dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	slot = mmc_priv(mmc);
+	slot->id = id;
+	slot->mmc = mmc;
+	slot->host = host;
+
+	mmc->ops = &dw_mci_ops;
+	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510);
+	mmc->f_max = host->bus_hz;
+
+	if (host->pdata->get_ocr)
+		mmc->ocr_avail = host->pdata->get_ocr(id);
+	else
+		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+
+	/*
+	 * Start with slot power disabled, it will be enabled when a card
+	 * is detected.
+	 */
+	if (host->pdata->setpower)
+		host->pdata->setpower(id, 0);
+
+	mmc->caps = 0;
+	if (host->pdata->get_bus_wd)
+		if (host->pdata->get_bus_wd(slot->id) >= 4)
+			mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+	if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED)
+		mmc->caps |= MMC_CAP_SD_HIGHSPEED;
+
+#ifdef CONFIG_MMC_DW_IDMAC
+	mmc->max_segs = host->ring_size;
+	mmc->max_blk_size = 65536;
+	mmc->max_blk_count = host->ring_size;
+	mmc->max_seg_size = 0x1000;
+	mmc->max_req_size = mmc->max_seg_size * mmc->max_blk_count;
+#else
+	if (host->pdata->blk_settings) {
+		mmc->max_segs = host->pdata->blk_settings->max_segs;
+		mmc->max_blk_size = host->pdata->blk_settings->max_blk_size;
+		mmc->max_blk_count = host->pdata->blk_settings->max_blk_count;
+		mmc->max_req_size = host->pdata->blk_settings->max_req_size;
+		mmc->max_seg_size = host->pdata->blk_settings->max_seg_size;
+	} else {
+		/* Useful defaults if platform data is unset. */
+		mmc->max_segs = 64;
+		mmc->max_blk_size = 65536; /* BLKSIZ is 16 bits */
+		mmc->max_blk_count = 512;
+		mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+		mmc->max_seg_size = mmc->max_req_size;
+	}
+#endif /* CONFIG_MMC_DW_IDMAC */
+
+	if (dw_mci_get_cd(mmc))
+		set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+	else
+		clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+
+	host->slot[id] = slot;
+	mmc_add_host(mmc);
+
+#if defined(CONFIG_DEBUG_FS)
+	dw_mci_init_debugfs(slot);
+#endif
+
+	/* Card initially undetected */
+	slot->last_detect_state = 0;
+
+	return 0;
+}
+
+static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
+{
+	/* Shutdown detect IRQ */
+	if (slot->host->pdata->exit)
+		slot->host->pdata->exit(id);
+
+	/* Debugfs stuff is cleaned up by mmc core */
+	mmc_remove_host(slot->mmc);
+	slot->host->slot[id] = NULL;
+	mmc_free_host(slot->mmc);
+}
+
+static void dw_mci_init_dma(struct dw_mci *host)
+{
+	/* Alloc memory for sg translation */
+	host->sg_cpu = dma_alloc_coherent(&host->pdev->dev, PAGE_SIZE,
+					  &host->sg_dma, GFP_KERNEL);
+	if (!host->sg_cpu) {
+		dev_err(&host->pdev->dev, "%s: could not alloc DMA memory\n",
+			__func__);
+		goto no_dma;
+	}
+
+	/* Determine which DMA interface to use */
+#ifdef CONFIG_MMC_DW_IDMAC
+	host->dma_ops = &dw_mci_idmac_ops;
+	dev_info(&host->pdev->dev, "Using internal DMA controller.\n");
+#endif
+
+	if (!host->dma_ops)
+		goto no_dma;
+
+	if (host->dma_ops->init) {
+		if (host->dma_ops->init(host)) {
+			dev_err(&host->pdev->dev, "%s: Unable to initialize "
+				"DMA Controller.\n", __func__);
+			goto no_dma;
+		}
+	} else {
+		dev_err(&host->pdev->dev, "DMA initialization not found.\n");
+		goto no_dma;
+	}
+
+	host->use_dma = 1;
+	return;
+
+no_dma:
+	dev_info(&host->pdev->dev, "Using PIO mode.\n");
+	host->use_dma = 0;
+	return;
+}
+
+static bool mci_wait_reset(struct device *dev, struct dw_mci *host)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(500);
+	unsigned int ctrl;
+
+	mci_writel(host, CTRL, (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+				SDMMC_CTRL_DMA_RESET));
+
+	/* wait till resets clear */
+	do {
+		ctrl = mci_readl(host, CTRL);
+		if (!(ctrl & (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET |
+			      SDMMC_CTRL_DMA_RESET)))
+			return true;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(dev, "Timeout resetting block (ctrl %#x)\n", ctrl);
+
+	return false;
+}
+
+static int dw_mci_probe(struct platform_device *pdev)
+{
+	struct dw_mci *host;
+	struct resource	*regs;
+	struct dw_mci_board *pdata;
+	int irq, ret, i, width;
+	u32 fifo_size;
+
+	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!regs)
+		return -ENXIO;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	host = kzalloc(sizeof(struct dw_mci), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	host->pdev = pdev;
+	host->pdata = pdata = pdev->dev.platform_data;
+	if (!pdata || !pdata->init) {
+		dev_err(&pdev->dev,
+			"Platform data must supply init function\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	if (!pdata->select_slot && pdata->num_slots > 1) {
+		dev_err(&pdev->dev,
+			"Platform data must supply select_slot function\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	if (!pdata->bus_hz) {
+		dev_err(&pdev->dev,
+			"Platform data must supply bus speed\n");
+		ret = -ENODEV;
+		goto err_freehost;
+	}
+
+	host->bus_hz = pdata->bus_hz;
+	host->quirks = pdata->quirks;
+
+	spin_lock_init(&host->lock);
+	INIT_LIST_HEAD(&host->queue);
+
+	ret = -ENOMEM;
+	host->regs = ioremap(regs->start, regs->end - regs->start + 1);
+	if (!host->regs)
+		goto err_freehost;
+
+	host->dma_ops = pdata->dma_ops;
+	dw_mci_init_dma(host);
+
+	/*
+	 * Get the host data width - this assumes that HCON has been set with
+	 * the correct values.
+	 */
+	i = (mci_readl(host, HCON) >> 7) & 0x7;
+	if (!i) {
+		host->push_data = dw_mci_push_data16;
+		host->pull_data = dw_mci_pull_data16;
+		width = 16;
+		host->data_shift = 1;
+	} else if (i == 2) {
+		host->push_data = dw_mci_push_data64;
+		host->pull_data = dw_mci_pull_data64;
+		width = 64;
+		host->data_shift = 3;
+	} else {
+		/* Check for a reserved value, and warn if it is */
+		WARN((i != 1),
+		     "HCON reports a reserved host data width!\n"
+		     "Defaulting to 32-bit access.\n");
+		host->push_data = dw_mci_push_data32;
+		host->pull_data = dw_mci_pull_data32;
+		width = 32;
+		host->data_shift = 2;
+	}
+
+	/* Reset all blocks */
+	if (!mci_wait_reset(&pdev->dev, host)) {
+		ret = -ENODEV;
+		goto err_dmaunmap;
+	}
+
+	/* Clear the interrupts for the host controller */
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+	/* Put in max timeout */
+	mci_writel(host, TMOUT, 0xFFFFFFFF);
+
+	/*
+	 * FIFO threshold settings  RxMark  = fifo_size / 2 - 1,
+	 *                          Tx Mark = fifo_size / 2 DMA Size = 8
+	 */
+	fifo_size = mci_readl(host, FIFOTH);
+	fifo_size = (fifo_size >> 16) & 0x7ff;
+	mci_writel(host, FIFOTH, ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
+				  ((fifo_size/2) << 0)));
+
+	/* disable clock to CIU */
+	mci_writel(host, CLKENA, 0);
+	mci_writel(host, CLKSRC, 0);
+
+	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
+	tasklet_init(&host->card_tasklet,
+		     dw_mci_tasklet_card, (unsigned long)host);
+
+	ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host);
+	if (ret)
+		goto err_dmaunmap;
+
+	platform_set_drvdata(pdev, host);
+
+	if (host->pdata->num_slots)
+		host->num_slots = host->pdata->num_slots;
+	else
+		host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1;
+
+	/* We need at least one slot to succeed */
+	for (i = 0; i < host->num_slots; i++) {
+		ret = dw_mci_init_slot(host, i);
+		if (ret) {
+			ret = -ENODEV;
+			goto err_init_slot;
+		}
+	}
+
+	/*
+	 * Enable interrupts for command done, data over, data empty, card det,
+	 * receive ready and error such as transmit, receive timeout, crc error
+	 */
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
+		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
+		   DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
+
+	dev_info(&pdev->dev, "DW MMC controller at irq %d, "
+		 "%d bit host data width\n", irq, width);
+	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
+		dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n");
+
+	return 0;
+
+err_init_slot:
+	/* De-init any initialized slots */
+	while (i > 0) {
+		if (host->slot[i])
+			dw_mci_cleanup_slot(host->slot[i], i);
+		i--;
+	}
+	free_irq(irq, host);
+
+err_dmaunmap:
+	if (host->use_dma && host->dma_ops->exit)
+		host->dma_ops->exit(host);
+	dma_free_coherent(&host->pdev->dev, PAGE_SIZE,
+			  host->sg_cpu, host->sg_dma);
+	iounmap(host->regs);
+
+err_freehost:
+	kfree(host);
+	return ret;
+}
+
+static int __exit dw_mci_remove(struct platform_device *pdev)
+{
+	struct dw_mci *host = platform_get_drvdata(pdev);
+	int i;
+
+	mci_writel(host, RINTSTS, 0xFFFFFFFF);
+	mci_writel(host, INTMASK, 0); /* disable all mmc interrupt first */
+
+	platform_set_drvdata(pdev, NULL);
+
+	for (i = 0; i < host->num_slots; i++) {
+		dev_dbg(&pdev->dev, "remove slot %d\n", i);
+		if (host->slot[i])
+			dw_mci_cleanup_slot(host->slot[i], i);
+	}
+
+	/* disable clock to CIU */
+	mci_writel(host, CLKENA, 0);
+	mci_writel(host, CLKSRC, 0);
+
+	free_irq(platform_get_irq(pdev, 0), host);
+	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
+
+	if (host->use_dma && host->dma_ops->exit)
+		host->dma_ops->exit(host);
+
+	iounmap(host->regs);
+
+	kfree(host);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+/*
+ * TODO: we should probably disable the clock to the card in the suspend path.
+ */
+static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg)
+{
+	int i, ret;
+	struct dw_mci *host = platform_get_drvdata(pdev);
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		if (!slot)
+			continue;
+		ret = mmc_suspend_host(slot->mmc);
+		if (ret < 0) {
+			while (--i >= 0) {
+				slot = host->slot[i];
+				if (slot)
+					mmc_resume_host(host->slot[i]->mmc);
+			}
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int dw_mci_resume(struct platform_device *pdev)
+{
+	int i, ret;
+	struct dw_mci *host = platform_get_drvdata(pdev);
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+		if (!slot)
+			continue;
+		ret = mmc_resume_host(host->slot[i]->mmc);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+#else
+#define dw_mci_suspend	NULL
+#define dw_mci_resume	NULL
+#endif /* CONFIG_PM */
+
+static struct platform_driver dw_mci_driver = {
+	.remove		= __exit_p(dw_mci_remove),
+	.suspend	= dw_mci_suspend,
+	.resume		= dw_mci_resume,
+	.driver		= {
+		.name		= "dw_mmc",
+	},
+};
+
+static int __init dw_mci_init(void)
+{
+	return platform_driver_probe(&dw_mci_driver, dw_mci_probe);
+}
+
+static void __exit dw_mci_exit(void)
+{
+	platform_driver_unregister(&dw_mci_driver);
+}
+
+module_init(dw_mci_init);
+module_exit(dw_mci_exit);
+
+MODULE_DESCRIPTION("DW Multimedia Card Interface driver");
+MODULE_AUTHOR("NXP Semiconductor VietNam");
+MODULE_AUTHOR("Imagination Technologies Ltd");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
new file mode 100644
index 000000000000..5dd55a75233d
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc.h
@@ -0,0 +1,168 @@
+/*
+ * Synopsys DesignWare Multimedia Card Interface driver
+ *  (Based on NXP driver for lpc 31xx)
+ *
+ * Copyright (C) 2009 NXP Semiconductors
+ * Copyright (C) 2009, 2010 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _DW_MMC_H_
+#define _DW_MMC_H_
+
+#define SDMMC_CTRL		0x000
+#define SDMMC_PWREN		0x004
+#define SDMMC_CLKDIV		0x008
+#define SDMMC_CLKSRC		0x00c
+#define SDMMC_CLKENA		0x010
+#define SDMMC_TMOUT		0x014
+#define SDMMC_CTYPE		0x018
+#define SDMMC_BLKSIZ		0x01c
+#define SDMMC_BYTCNT		0x020
+#define SDMMC_INTMASK		0x024
+#define SDMMC_CMDARG		0x028
+#define SDMMC_CMD		0x02c
+#define SDMMC_RESP0		0x030
+#define SDMMC_RESP1		0x034
+#define SDMMC_RESP2		0x038
+#define SDMMC_RESP3		0x03c
+#define SDMMC_MINTSTS		0x040
+#define SDMMC_RINTSTS		0x044
+#define SDMMC_STATUS		0x048
+#define SDMMC_FIFOTH		0x04c
+#define SDMMC_CDETECT		0x050
+#define SDMMC_WRTPRT		0x054
+#define SDMMC_GPIO		0x058
+#define SDMMC_TCBCNT		0x05c
+#define SDMMC_TBBCNT		0x060
+#define SDMMC_DEBNCE		0x064
+#define SDMMC_USRID		0x068
+#define SDMMC_VERID		0x06c
+#define SDMMC_HCON		0x070
+#define SDMMC_BMOD		0x080
+#define SDMMC_PLDMND		0x084
+#define SDMMC_DBADDR		0x088
+#define SDMMC_IDSTS		0x08c
+#define SDMMC_IDINTEN		0x090
+#define SDMMC_DSCADDR		0x094
+#define SDMMC_BUFADDR		0x098
+#define SDMMC_DATA		0x100
+#define SDMMC_DATA_ADR		0x100
+
+/* shift bit field */
+#define _SBF(f, v)		((v) << (f))
+
+/* Control register defines */
+#define SDMMC_CTRL_USE_IDMAC		BIT(25)
+#define SDMMC_CTRL_CEATA_INT_EN		BIT(11)
+#define SDMMC_CTRL_SEND_AS_CCSD		BIT(10)
+#define SDMMC_CTRL_SEND_CCSD		BIT(9)
+#define SDMMC_CTRL_ABRT_READ_DATA	BIT(8)
+#define SDMMC_CTRL_SEND_IRQ_RESP	BIT(7)
+#define SDMMC_CTRL_READ_WAIT		BIT(6)
+#define SDMMC_CTRL_DMA_ENABLE		BIT(5)
+#define SDMMC_CTRL_INT_ENABLE		BIT(4)
+#define SDMMC_CTRL_DMA_RESET		BIT(2)
+#define SDMMC_CTRL_FIFO_RESET		BIT(1)
+#define SDMMC_CTRL_RESET		BIT(0)
+/* Clock Enable register defines */
+#define SDMMC_CLKEN_LOW_PWR		BIT(16)
+#define SDMMC_CLKEN_ENABLE		BIT(0)
+/* time-out register defines */
+#define SDMMC_TMOUT_DATA(n)		_SBF(8, (n))
+#define SDMMC_TMOUT_DATA_MSK		0xFFFFFF00
+#define SDMMC_TMOUT_RESP(n)		((n) & 0xFF)
+#define SDMMC_TMOUT_RESP_MSK		0xFF
+/* card-type register defines */
+#define SDMMC_CTYPE_8BIT		BIT(16)
+#define SDMMC_CTYPE_4BIT		BIT(0)
+#define SDMMC_CTYPE_1BIT		0
+/* Interrupt status & mask register defines */
+#define SDMMC_INT_SDIO			BIT(16)
+#define SDMMC_INT_EBE			BIT(15)
+#define SDMMC_INT_ACD			BIT(14)
+#define SDMMC_INT_SBE			BIT(13)
+#define SDMMC_INT_HLE			BIT(12)
+#define SDMMC_INT_FRUN			BIT(11)
+#define SDMMC_INT_HTO			BIT(10)
+#define SDMMC_INT_DTO			BIT(9)
+#define SDMMC_INT_RTO			BIT(8)
+#define SDMMC_INT_DCRC			BIT(7)
+#define SDMMC_INT_RCRC			BIT(6)
+#define SDMMC_INT_RXDR			BIT(5)
+#define SDMMC_INT_TXDR			BIT(4)
+#define SDMMC_INT_DATA_OVER		BIT(3)
+#define SDMMC_INT_CMD_DONE		BIT(2)
+#define SDMMC_INT_RESP_ERR		BIT(1)
+#define SDMMC_INT_CD			BIT(0)
+#define SDMMC_INT_ERROR			0xbfc2
+/* Command register defines */
+#define SDMMC_CMD_START			BIT(31)
+#define SDMMC_CMD_CCS_EXP		BIT(23)
+#define SDMMC_CMD_CEATA_RD		BIT(22)
+#define SDMMC_CMD_UPD_CLK		BIT(21)
+#define SDMMC_CMD_INIT			BIT(15)
+#define SDMMC_CMD_STOP			BIT(14)
+#define SDMMC_CMD_PRV_DAT_WAIT		BIT(13)
+#define SDMMC_CMD_SEND_STOP		BIT(12)
+#define SDMMC_CMD_STRM_MODE		BIT(11)
+#define SDMMC_CMD_DAT_WR		BIT(10)
+#define SDMMC_CMD_DAT_EXP		BIT(9)
+#define SDMMC_CMD_RESP_CRC		BIT(8)
+#define SDMMC_CMD_RESP_LONG		BIT(7)
+#define SDMMC_CMD_RESP_EXP		BIT(6)
+#define SDMMC_CMD_INDX(n)		((n) & 0x1F)
+/* Status register defines */
+#define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FF)
+#define SDMMC_FIFO_SZ			32
+/* Internal DMAC interrupt defines */
+#define SDMMC_IDMAC_INT_AI		BIT(9)
+#define SDMMC_IDMAC_INT_NI		BIT(8)
+#define SDMMC_IDMAC_INT_CES		BIT(5)
+#define SDMMC_IDMAC_INT_DU		BIT(4)
+#define SDMMC_IDMAC_INT_FBE		BIT(2)
+#define SDMMC_IDMAC_INT_RI		BIT(1)
+#define SDMMC_IDMAC_INT_TI		BIT(0)
+/* Internal DMAC bus mode bits */
+#define SDMMC_IDMAC_ENABLE		BIT(7)
+#define SDMMC_IDMAC_FB			BIT(1)
+#define SDMMC_IDMAC_SWRESET		BIT(0)
+
+/* Register access macros */
+#define mci_readl(dev, reg)			\
+	__raw_readl(dev->regs + SDMMC_##reg)
+#define mci_writel(dev, reg, value)			\
+	__raw_writel((value), dev->regs + SDMMC_##reg)
+
+/* 16-bit FIFO access macros */
+#define mci_readw(dev, reg)			\
+	__raw_readw(dev->regs + SDMMC_##reg)
+#define mci_writew(dev, reg, value)			\
+	__raw_writew((value), dev->regs + SDMMC_##reg)
+
+/* 64-bit FIFO access macros */
+#ifdef readq
+#define mci_readq(dev, reg)			\
+	__raw_readq(dev->regs + SDMMC_##reg)
+#define mci_writeq(dev, reg, value)			\
+	__raw_writeq((value), dev->regs + SDMMC_##reg)
+#else
+/*
+ * Dummy readq implementation for architectures that don't define it.
+ *
+ * We would assume that none of these architectures would configure
+ * the IP block with a 64bit FIFO width, so this code will never be
+ * executed on those machines. Defining these macros here keeps the
+ * rest of the code free from ifdefs.
+ */
+#define mci_readq(dev, reg)			\
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg))
+#define mci_writeq(dev, reg, value)			\
+	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value)
+#endif
+
+#endif /* _DW_MMC_H_ */
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 87b4fc6c98c2..563022825667 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -19,6 +19,7 @@
 #include <linux/highmem.h>
 #include <linux/log2.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
 #include <linux/amba/bus.h>
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
@@ -45,6 +46,12 @@ static unsigned int fmax = 515633;
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
+ * @broken_blockend: the MCI_DATABLOCKEND is broken on the hardware
+ *		and will not work at all.
+ * @broken_blockend_dma: the MCI_DATABLOCKEND is broken on the hardware when
+ *		using DMA.
+ * @sdio: variant supports SDIO
+ * @st_clkdiv: true if using a ST-specific clock divider algorithm
  */
 struct variant_data {
 	unsigned int		clkreg;
@@ -52,6 +59,10 @@ struct variant_data {
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
+	bool			broken_blockend;
+	bool			broken_blockend_dma;
+	bool			sdio;
+	bool			st_clkdiv;
 };
 
 static struct variant_data variant_arm = {
@@ -65,6 +76,8 @@ static struct variant_data variant_u300 = {
 	.fifohalfsize		= 8 * 4,
 	.clkreg_enable		= 1 << 13, /* HWFCEN */
 	.datalength_bits	= 16,
+	.broken_blockend_dma	= true,
+	.sdio			= true,
 };
 
 static struct variant_data variant_ux500 = {
@@ -73,7 +86,11 @@ static struct variant_data variant_ux500 = {
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
 	.datalength_bits	= 24,
+	.broken_blockend	= true,
+	.sdio			= true,
+	.st_clkdiv		= true,
 };
+
 /*
  * This must be called with host->lock held
  */
@@ -86,7 +103,22 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 		if (desired >= host->mclk) {
 			clk = MCI_CLK_BYPASS;
 			host->cclk = host->mclk;
+		} else if (variant->st_clkdiv) {
+			/*
+			 * DB8500 TRM says f = mclk / (clkdiv + 2)
+			 * => clkdiv = (mclk / f) - 2
+			 * Round the divider up so we don't exceed the max
+			 * frequency
+			 */
+			clk = DIV_ROUND_UP(host->mclk, desired) - 2;
+			if (clk >= 256)
+				clk = 255;
+			host->cclk = host->mclk / (clk + 2);
 		} else {
+			/*
+			 * PL180 TRM says f = mclk / (2 * (clkdiv + 1))
+			 * => clkdiv = mclk / (2 * f) - 1
+			 */
 			clk = host->mclk / (2 * desired) - 1;
 			if (clk >= 256)
 				clk = 255;
@@ -129,10 +161,26 @@ mmci_request_end(struct mmci_host *host, struct mmc_request *mrq)
 	spin_lock(&host->lock);
 }
 
+static void mmci_set_mask1(struct mmci_host *host, unsigned int mask)
+{
+	void __iomem *base = host->base;
+
+	if (host->singleirq) {
+		unsigned int mask0 = readl(base + MMCIMASK0);
+
+		mask0 &= ~MCI_IRQ1MASK;
+		mask0 |= mask;
+
+		writel(mask0, base + MMCIMASK0);
+	}
+
+	writel(mask, base + MMCIMASK1);
+}
+
 static void mmci_stop_data(struct mmci_host *host)
 {
 	writel(0, host->base + MMCIDATACTRL);
-	writel(0, host->base + MMCIMASK1);
+	mmci_set_mask1(host, 0);
 	host->data = NULL;
 }
 
@@ -162,6 +210,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	host->data = data;
 	host->size = data->blksz * data->blocks;
 	host->data_xfered = 0;
+	host->blockend = false;
+	host->dataend = false;
 
 	mmci_init_sg(host, data);
 
@@ -196,9 +246,14 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 		irqmask = MCI_TXFIFOHALFEMPTYMASK;
 	}
 
+	/* The ST Micro variants has a special bit to enable SDIO */
+	if (variant->sdio && host->mmc->card)
+		if (mmc_card_sdio(host->mmc->card))
+			datactrl |= MCI_ST_DPSM_SDIOEN;
+
 	writel(datactrl, base + MMCIDATACTRL);
 	writel(readl(base + MMCIMASK0) & ~MCI_DATAENDMASK, base + MMCIMASK0);
-	writel(irqmask, base + MMCIMASK1);
+	mmci_set_mask1(host, irqmask);
 }
 
 static void
@@ -233,20 +288,9 @@ static void
 mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 	      unsigned int status)
 {
-	if (status & MCI_DATABLOCKEND) {
-		host->data_xfered += data->blksz;
-#ifdef CONFIG_ARCH_U300
-		/*
-		 * On the U300 some signal or other is
-		 * badly routed so that a data write does
-		 * not properly terminate with a MCI_DATAEND
-		 * status flag. This quirk will make writes
-		 * work again.
-		 */
-		if (data->flags & MMC_DATA_WRITE)
-			status |= MCI_DATAEND;
-#endif
-	}
+	struct variant_data *variant = host->variant;
+
+	/* First check for errors */
 	if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
 		dev_dbg(mmc_dev(host->mmc), "MCI ERROR IRQ (status %08x)\n", status);
 		if (status & MCI_DATACRCFAIL)
@@ -255,7 +299,10 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 			data->error = -ETIMEDOUT;
 		else if (status & (MCI_TXUNDERRUN|MCI_RXOVERRUN))
 			data->error = -EIO;
-		status |= MCI_DATAEND;
+
+		/* Force-complete the transaction */
+		host->blockend = true;
+		host->dataend = true;
 
 		/*
 		 * We hit an error condition.  Ensure that any data
@@ -273,9 +320,64 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 			local_irq_restore(flags);
 		}
 	}
-	if (status & MCI_DATAEND) {
+
+	/*
+	 * On ARM variants in PIO mode, MCI_DATABLOCKEND
+	 * is always sent first, and we increase the
+	 * transfered number of bytes for that IRQ. Then
+	 * MCI_DATAEND follows and we conclude the transaction.
+	 *
+	 * On the Ux500 single-IRQ variant MCI_DATABLOCKEND
+	 * doesn't seem to immediately clear from the status,
+	 * so we can't use it keep count when only one irq is
+	 * used because the irq will hit for other reasons, and
+	 * then the flag is still up. So we use the MCI_DATAEND
+	 * IRQ at the end of the entire transfer because
+	 * MCI_DATABLOCKEND is broken.
+	 *
+	 * In the U300, the IRQs can arrive out-of-order,
+	 * e.g. MCI_DATABLOCKEND sometimes arrives after MCI_DATAEND,
+	 * so for this case we use the flags "blockend" and
+	 * "dataend" to make sure both IRQs have arrived before
+	 * concluding the transaction. (This does not apply
+	 * to the Ux500 which doesn't fire MCI_DATABLOCKEND
+	 * at all.) In DMA mode it suffers from the same problem
+	 * as the Ux500.
+	 */
+	if (status & MCI_DATABLOCKEND) {
+		/*
+		 * Just being a little over-cautious, we do not
+		 * use this progressive update if the hardware blockend
+		 * flag is unreliable: since it can stay high between
+		 * IRQs it will corrupt the transfer counter.
+		 */
+		if (!variant->broken_blockend)
+			host->data_xfered += data->blksz;
+		host->blockend = true;
+	}
+
+	if (status & MCI_DATAEND)
+		host->dataend = true;
+
+	/*
+	 * On variants with broken blockend we shall only wait for dataend,
+	 * on others we must sync with the blockend signal since they can
+	 * appear out-of-order.
+	 */
+	if (host->dataend && (host->blockend || variant->broken_blockend)) {
 		mmci_stop_data(host);
 
+		/* Reset these flags */
+		host->blockend = false;
+		host->dataend = false;
+
+		/*
+		 * Variants with broken blockend flags need to handle the
+		 * end of the entire transfer here.
+		 */
+		if (variant->broken_blockend && !data->error)
+			host->data_xfered += data->blksz * data->blocks;
+
 		if (!data->stop) {
 			mmci_request_end(host, data->mrq);
 		} else {
@@ -356,7 +458,32 @@ static int mmci_pio_write(struct mmci_host *host, char *buffer, unsigned int rem
 			 variant->fifosize : variant->fifohalfsize;
 		count = min(remain, maxcnt);
 
-		writesl(base + MMCIFIFO, ptr, count >> 2);
+		/*
+		 * The ST Micro variant for SDIO transfer sizes
+		 * less then 8 bytes should have clock H/W flow
+		 * control disabled.
+		 */
+		if (variant->sdio &&
+		    mmc_card_sdio(host->mmc->card)) {
+			if (count < 8)
+				writel(readl(host->base + MMCICLOCK) &
+					~variant->clkreg_enable,
+					host->base + MMCICLOCK);
+			else
+				writel(readl(host->base + MMCICLOCK) |
+					variant->clkreg_enable,
+					host->base + MMCICLOCK);
+		}
+
+		/*
+		 * SDIO especially may want to send something that is
+		 * not divisible by 4 (as opposed to card sectors
+		 * etc), and the FIFO only accept full 32-bit writes.
+		 * So compensate by adding +3 on the count, a single
+		 * byte become a 32bit write, 7 bytes will be two
+		 * 32bit writes etc.
+		 */
+		writesl(base + MMCIFIFO, ptr, (count + 3) >> 2);
 
 		ptr += count;
 		remain -= count;
@@ -437,7 +564,7 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 	 * "any data available" mode.
 	 */
 	if (status & MCI_RXACTIVE && host->size < variant->fifosize)
-		writel(MCI_RXDATAAVLBLMASK, base + MMCIMASK1);
+		mmci_set_mask1(host, MCI_RXDATAAVLBLMASK);
 
 	/*
 	 * If we run out of data, disable the data IRQs; this
@@ -446,7 +573,7 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id)
 	 * stops us racing with our data end IRQ.
 	 */
 	if (host->size == 0) {
-		writel(0, base + MMCIMASK1);
+		mmci_set_mask1(host, 0);
 		writel(readl(base + MMCIMASK0) | MCI_DATAENDMASK, base + MMCIMASK0);
 	}
 
@@ -469,6 +596,14 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
 		struct mmc_data *data;
 
 		status = readl(host->base + MMCISTATUS);
+
+		if (host->singleirq) {
+			if (status & readl(host->base + MMCIMASK1))
+				mmci_pio_irq(irq, dev_id);
+
+			status &= ~MCI_IRQ1MASK;
+		}
+
 		status &= readl(host->base + MMCIMASK0);
 		writel(status, host->base + MMCICLEAR);
 
@@ -635,6 +770,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	struct variant_data *variant = id->data;
 	struct mmci_host *host;
 	struct mmc_host *mmc;
+	unsigned int mask;
 	int ret;
 
 	/* must have platform data */
@@ -806,20 +942,30 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	if (ret)
 		goto unmap;
 
-	ret = request_irq(dev->irq[1], mmci_pio_irq, IRQF_SHARED, DRIVER_NAME " (pio)", host);
-	if (ret)
-		goto irq0_free;
+	if (dev->irq[1] == NO_IRQ)
+		host->singleirq = true;
+	else {
+		ret = request_irq(dev->irq[1], mmci_pio_irq, IRQF_SHARED,
+				  DRIVER_NAME " (pio)", host);
+		if (ret)
+			goto irq0_free;
+	}
 
-	writel(MCI_IRQENABLE, host->base + MMCIMASK0);
+	mask = MCI_IRQENABLE;
+	/* Don't use the datablockend flag if it's broken */
+	if (variant->broken_blockend)
+		mask &= ~MCI_DATABLOCKEND;
 
-	amba_set_drvdata(dev, mmc);
+	writel(mask, host->base + MMCIMASK0);
 
-	mmc_add_host(mmc);
+	amba_set_drvdata(dev, mmc);
 
-	dev_info(&dev->dev, "%s: MMCI rev %x cfg %02x at 0x%016llx irq %d,%d\n",
-		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
+	dev_info(&dev->dev, "%s: PL%03x rev%u at 0x%08llx irq %d,%d\n",
+		mmc_hostname(mmc), amba_part(dev), amba_rev(dev),
 		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
 
+	mmc_add_host(mmc);
+
 	return 0;
 
  irq0_free:
@@ -864,7 +1010,8 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		writel(0, host->base + MMCIDATACTRL);
 
 		free_irq(dev->irq[0], host);
-		free_irq(dev->irq[1], host);
+		if (!host->singleirq)
+			free_irq(dev->irq[1], host);
 
 		if (host->gpio_wp != -ENOSYS)
 			gpio_free(host->gpio_wp);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 4ae887fc0189..df06f01aac89 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -139,6 +139,11 @@
 	MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK|	\
 	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATABLOCKENDMASK)
 
+/* These interrupts are directed to IRQ1 when two IRQ lines are available */
+#define MCI_IRQ1MASK \
+	(MCI_RXFIFOHALFFULLMASK | MCI_RXDATAAVLBLMASK | \
+	 MCI_TXFIFOHALFEMPTYMASK)
+
 #define NR_SG		16
 
 struct clk;
@@ -154,6 +159,7 @@ struct mmci_host {
 	int			gpio_cd;
 	int			gpio_wp;
 	int			gpio_cd_irq;
+	bool			singleirq;
 
 	unsigned int		data_xfered;
 
@@ -171,6 +177,9 @@ struct mmci_host {
 	struct timer_list	timer;
 	unsigned int		oldstat;
 
+	bool			blockend;
+	bool			dataend;
+
 	/* pio stuff */
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 1290d14c5839..5decfd0bd61d 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c
@@ -44,6 +44,7 @@
 #include <mach/mmc.h>
 #include <mach/msm_iomap.h>
 #include <mach/dma.h>
+#include <mach/clk.h>
 
 #include "msm_sdcc.h"
 
@@ -126,6 +127,40 @@ static void
 msmsdcc_start_command(struct msmsdcc_host *host, struct mmc_command *cmd,
 		      u32 c);
 
+static void msmsdcc_reset_and_restore(struct msmsdcc_host *host)
+{
+	u32	mci_clk = 0;
+	u32	mci_mask0 = 0;
+	int	ret = 0;
+
+	/* Save the controller state */
+	mci_clk = readl(host->base + MMCICLOCK);
+	mci_mask0 = readl(host->base + MMCIMASK0);
+
+	/* Reset the controller */
+	ret = clk_reset(host->clk, CLK_RESET_ASSERT);
+	if (ret)
+		pr_err("%s: Clock assert failed at %u Hz with err %d\n",
+				mmc_hostname(host->mmc), host->clk_rate, ret);
+
+	ret = clk_reset(host->clk, CLK_RESET_DEASSERT);
+	if (ret)
+		pr_err("%s: Clock deassert failed at %u Hz with err %d\n",
+				mmc_hostname(host->mmc), host->clk_rate, ret);
+
+	pr_info("%s: Controller has been re-initialiazed\n",
+			mmc_hostname(host->mmc));
+
+	/* Restore the contoller state */
+	writel(host->pwr, host->base + MMCIPOWER);
+	writel(mci_clk, host->base + MMCICLOCK);
+	writel(mci_mask0, host->base + MMCIMASK0);
+	ret = clk_set_rate(host->clk, host->clk_rate);
+	if (ret)
+		pr_err("%s: Failed to set clk rate %u Hz (%d)\n",
+				mmc_hostname(host->mmc), host->clk_rate, ret);
+}
+
 static void
 msmsdcc_request_end(struct msmsdcc_host *host, struct mmc_request *mrq)
 {
@@ -155,7 +190,7 @@ static void
 msmsdcc_stop_data(struct msmsdcc_host *host)
 {
 	host->curr.data = NULL;
-	host->curr.got_dataend = host->curr.got_datablkend = 0;
+	host->curr.got_dataend = 0;
 }
 
 uint32_t msmsdcc_fifo_addr(struct msmsdcc_host *host)
@@ -189,42 +224,42 @@ msmsdcc_dma_exec_func(struct msm_dmov_cmd *cmd)
 }
 
 static void
-msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
-			  unsigned int result,
-			  struct msm_dmov_errdata *err)
+msmsdcc_dma_complete_tlet(unsigned long data)
 {
-	struct msmsdcc_dma_data	*dma_data =
-		container_of(cmd, struct msmsdcc_dma_data, hdr);
-	struct msmsdcc_host	*host = dma_data->host;
+	struct msmsdcc_host *host = (struct msmsdcc_host *)data;
 	unsigned long		flags;
 	struct mmc_request	*mrq;
+	struct msm_dmov_errdata err;
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->dma.active = 0;
 
+	err = host->dma.err;
 	mrq = host->curr.mrq;
 	BUG_ON(!mrq);
 	WARN_ON(!mrq->data);
 
-	if (!(result & DMOV_RSLT_VALID)) {
+	if (!(host->dma.result & DMOV_RSLT_VALID)) {
 		pr_err("msmsdcc: Invalid DataMover result\n");
 		goto out;
 	}
 
-	if (result & DMOV_RSLT_DONE) {
+	if (host->dma.result & DMOV_RSLT_DONE) {
 		host->curr.data_xfered = host->curr.xfer_size;
 	} else {
 		/* Error or flush  */
-		if (result & DMOV_RSLT_ERROR)
+		if (host->dma.result & DMOV_RSLT_ERROR)
 			pr_err("%s: DMA error (0x%.8x)\n",
-			       mmc_hostname(host->mmc), result);
-		if (result & DMOV_RSLT_FLUSH)
+			       mmc_hostname(host->mmc), host->dma.result);
+		if (host->dma.result & DMOV_RSLT_FLUSH)
 			pr_err("%s: DMA channel flushed (0x%.8x)\n",
-			       mmc_hostname(host->mmc), result);
-		if (err)
-			pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
-			       err->flush[0], err->flush[1], err->flush[2],
-			       err->flush[3], err->flush[4], err->flush[5]);
+			       mmc_hostname(host->mmc), host->dma.result);
+
+		pr_err("Flush data: %.8x %.8x %.8x %.8x %.8x %.8x\n",
+		       err.flush[0], err.flush[1], err.flush[2],
+		       err.flush[3], err.flush[4], err.flush[5]);
+
+		msmsdcc_reset_and_restore(host);
 		if (!mrq->data->error)
 			mrq->data->error = -EIO;
 	}
@@ -242,8 +277,7 @@ msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
 	host->dma.sg = NULL;
 	host->dma.busy = 0;
 
-	if ((host->curr.got_dataend && host->curr.got_datablkend)
-	     || mrq->data->error) {
+	if (host->curr.got_dataend || mrq->data->error) {
 
 		/*
 		 * If we've already gotten our DATAEND / DATABLKEND
@@ -273,6 +307,22 @@ out:
 	return;
 }
 
+static void
+msmsdcc_dma_complete_func(struct msm_dmov_cmd *cmd,
+			  unsigned int result,
+			  struct msm_dmov_errdata *err)
+{
+	struct msmsdcc_dma_data	*dma_data =
+		container_of(cmd, struct msmsdcc_dma_data, hdr);
+	struct msmsdcc_host *host = dma_data->host;
+
+	dma_data->result = result;
+	if (err)
+		memcpy(&dma_data->err, err, sizeof(struct msm_dmov_errdata));
+
+	tasklet_schedule(&host->dma_tlet);
+}
+
 static int validate_dma(struct msmsdcc_host *host, struct mmc_data *data)
 {
 	if (host->dma.channel == -1)
@@ -424,6 +474,11 @@ msmsdcc_start_command_deferred(struct msmsdcc_host *host,
 	      (cmd->opcode == 53))
 		*c |= MCI_CSPM_DATCMD;
 
+	if (host->prog_scan && (cmd->opcode == 12)) {
+		*c |= MCI_CPSM_PROGENA;
+		host->prog_enable = true;
+	}
+
 	if (cmd == cmd->mrq->stop)
 		*c |= MCI_CSPM_MCIABORT;
 
@@ -450,7 +505,6 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data,
 	host->curr.xfer_remain = host->curr.xfer_size;
 	host->curr.data_xfered = 0;
 	host->curr.got_dataend = 0;
-	host->curr.got_datablkend = 0;
 
 	memset(&host->pio, 0, sizeof(host->pio));
 
@@ -494,6 +548,8 @@ msmsdcc_start_data(struct msmsdcc_host *host, struct mmc_data *data,
 			host->cmd_c = c;
 		}
 		msm_dmov_enqueue_cmd(host->dma.channel, &host->dma.hdr);
+		if (data->flags & MMC_DATA_WRITE)
+			host->prog_scan = true;
 	} else {
 		msmsdcc_writel(host, timeout, MMCIDATATIMER);
 
@@ -555,6 +611,9 @@ msmsdcc_pio_read(struct msmsdcc_host *host, char *buffer, unsigned int remain)
 	uint32_t	*ptr = (uint32_t *) buffer;
 	int		count = 0;
 
+	if (remain % 4)
+		remain = ((remain >> 2) + 1) << 2;
+
 	while (msmsdcc_readl(host, MMCISTATUS) & MCI_RXDATAAVLBL) {
 		*ptr = msmsdcc_readl(host, MMCIFIFO + (count % MCI_FIFOSIZE));
 		ptr++;
@@ -575,13 +634,14 @@ msmsdcc_pio_write(struct msmsdcc_host *host, char *buffer,
 	char *ptr = buffer;
 
 	do {
-		unsigned int count, maxcnt;
+		unsigned int count, maxcnt, sz;
 
 		maxcnt = status & MCI_TXFIFOEMPTY ? MCI_FIFOSIZE :
 						    MCI_FIFOHALFSIZE;
 		count = min(remain, maxcnt);
 
-		writesl(base + MMCIFIFO, ptr, count >> 2);
+		sz = count % 4 ? (count >> 2) + 1 : (count >> 2);
+		writesl(base + MMCIFIFO, ptr, sz);
 		ptr += count;
 		remain -= count;
 
@@ -702,10 +762,26 @@ static void msmsdcc_do_cmdirq(struct msmsdcc_host *host, uint32_t status)
 			msm_dmov_stop_cmd(host->dma.channel,
 					  &host->dma.hdr, 0);
 		else if (host->curr.data) { /* Non DMA */
+			msmsdcc_reset_and_restore(host);
 			msmsdcc_stop_data(host);
 			msmsdcc_request_end(host, cmd->mrq);
-		} else /* host->data == NULL */
-			msmsdcc_request_end(host, cmd->mrq);
+		} else { /* host->data == NULL */
+			if (!cmd->error && host->prog_enable) {
+				if (status & MCI_PROGDONE) {
+					host->prog_scan = false;
+					host->prog_enable = false;
+					msmsdcc_request_end(host, cmd->mrq);
+				} else {
+					host->curr.cmd = cmd;
+				}
+			} else {
+				if (host->prog_enable) {
+					host->prog_scan = false;
+					host->prog_enable = false;
+				}
+				msmsdcc_request_end(host, cmd->mrq);
+			}
+		}
 	} else if (cmd->data)
 		if (!(cmd->data->flags & MMC_DATA_READ))
 			msmsdcc_start_data(host, cmd->data,
@@ -719,7 +795,7 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status,
 	struct mmc_data *data = host->curr.data;
 
 	if (status & (MCI_CMDSENT | MCI_CMDRESPEND | MCI_CMDCRCFAIL |
-	              MCI_CMDTIMEOUT) && host->curr.cmd) {
+			MCI_CMDTIMEOUT | MCI_PROGDONE) && host->curr.cmd) {
 		msmsdcc_do_cmdirq(host, status);
 	}
 
@@ -735,6 +811,7 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status,
 			msm_dmov_stop_cmd(host->dma.channel,
 					  &host->dma.hdr, 0);
 		else {
+			msmsdcc_reset_and_restore(host);
 			if (host->curr.data)
 				msmsdcc_stop_data(host);
 			if (!data->stop)
@@ -748,14 +825,10 @@ msmsdcc_handle_irq_data(struct msmsdcc_host *host, u32 status,
 	if (!host->curr.got_dataend && (status & MCI_DATAEND))
 		host->curr.got_dataend = 1;
 
-	if (!host->curr.got_datablkend && (status & MCI_DATABLOCKEND))
-		host->curr.got_datablkend = 1;
-
 	/*
 	 * If DMA is still in progress, we complete via the completion handler
 	 */
-	if (host->curr.got_dataend && host->curr.got_datablkend &&
-	    !host->dma.busy) {
+	if (host->curr.got_dataend && !host->dma.busy) {
 		/*
 		 * There appears to be an issue in the controller where
 		 * if you request a small block transfer (< fifo size),
@@ -792,8 +865,7 @@ msmsdcc_irq(int irq, void *dev_id)
 
 	do {
 		status = msmsdcc_readl(host, MMCISTATUS);
-		status &= (msmsdcc_readl(host, MMCIMASK0) |
-					      MCI_DATABLOCKENDMASK);
+		status &= msmsdcc_readl(host, MMCIMASK0);
 		msmsdcc_writel(host, status, MMCICLEAR);
 
 		if (status & MCI_SDIOINTR)
@@ -1118,6 +1190,9 @@ msmsdcc_probe(struct platform_device *pdev)
 	host->dmares = dmares;
 	spin_lock_init(&host->lock);
 
+	tasklet_init(&host->dma_tlet, msmsdcc_dma_complete_tlet,
+			(unsigned long)host);
+
 	/*
 	 * Setup DMA
 	 */
diff --git a/drivers/mmc/host/msm_sdcc.h b/drivers/mmc/host/msm_sdcc.h
index ff2b0f74f6f4..939557af266d 100644
--- a/drivers/mmc/host/msm_sdcc.h
+++ b/drivers/mmc/host/msm_sdcc.h
@@ -138,7 +138,7 @@
 #define MCI_IRQENABLE	\
 	(MCI_CMDCRCFAILMASK|MCI_DATACRCFAILMASK|MCI_CMDTIMEOUTMASK|	\
 	MCI_DATATIMEOUTMASK|MCI_TXUNDERRUNMASK|MCI_RXOVERRUNMASK|	\
-	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK)
+	MCI_CMDRESPENDMASK|MCI_CMDSENTMASK|MCI_DATAENDMASK|MCI_PROGDONEMASK)
 
 /*
  * The size of the FIFO in bytes.
@@ -172,6 +172,8 @@ struct msmsdcc_dma_data {
 	struct msmsdcc_host		*host;
 	int				busy; /* Set if DM is busy */
 	int				active;
+	unsigned int			result;
+	struct msm_dmov_errdata		err;
 };
 
 struct msmsdcc_pio_data {
@@ -188,7 +190,6 @@ struct msmsdcc_curr_req {
 	unsigned int		xfer_remain;	/* Bytes remaining to send */
 	unsigned int		data_xfered;	/* Bytes acked by BLKEND irq */
 	int			got_dataend;
-	int			got_datablkend;
 	int			user_pages;
 };
 
@@ -235,6 +236,7 @@ struct msmsdcc_host {
 	int			cmdpoll;
 	struct msmsdcc_stats	stats;
 
+	struct tasklet_struct	dma_tlet;
 	/* Command parameters */
 	unsigned int		cmd_timeout;
 	unsigned int		cmd_pio_irqmask;
@@ -242,6 +244,8 @@ struct msmsdcc_host {
 	struct mmc_command	*cmd_cmd;
 	u32			cmd_c;
 
+	bool prog_scan;
+	bool prog_enable;
 };
 
 #endif
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bdd2cbb87cba..4428594261c5 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -31,6 +31,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/regulator/consumer.h>
 
 #include <asm/dma.h>
 #include <asm/irq.h>
@@ -141,10 +142,49 @@ struct mxcmci_host {
 
 	struct work_struct	datawork;
 	spinlock_t		lock;
+
+	struct regulator	*vcc;
 };
 
 static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
 
+static inline void mxcmci_init_ocr(struct mxcmci_host *host)
+{
+	host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc");
+
+	if (IS_ERR(host->vcc)) {
+		host->vcc = NULL;
+	} else {
+		host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc);
+		if (host->pdata && host->pdata->ocr_avail)
+			dev_warn(mmc_dev(host->mmc),
+				"pdata->ocr_avail will not be used\n");
+	}
+
+	if (host->vcc == NULL) {
+		/* fall-back to platform data */
+		if (host->pdata && host->pdata->ocr_avail)
+			host->mmc->ocr_avail = host->pdata->ocr_avail;
+		else
+			host->mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	}
+}
+
+static inline void mxcmci_set_power(struct mxcmci_host *host,
+				    unsigned char power_mode,
+				    unsigned int vdd)
+{
+	if (host->vcc) {
+		if (power_mode == MMC_POWER_UP)
+			mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
+		else if (power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
+	}
+
+	if (host->pdata && host->pdata->setpower)
+		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+}
+
 static inline int mxcmci_use_dma(struct mxcmci_host *host)
 {
 	return host->do_dma;
@@ -680,9 +720,9 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		host->cmdat &= ~CMD_DAT_CONT_BUS_WIDTH_4;
 
 	if (host->power_mode != ios->power_mode) {
-		if (host->pdata && host->pdata->setpower)
-			host->pdata->setpower(mmc_dev(mmc), ios->vdd);
+		mxcmci_set_power(host, ios->power_mode, ios->vdd);
 		host->power_mode = ios->power_mode;
+
 		if (ios->power_mode == MMC_POWER_ON)
 			host->cmdat |= CMD_DAT_CONT_INIT;
 	}
@@ -807,10 +847,7 @@ static int mxcmci_probe(struct platform_device *pdev)
 	host->pdata = pdev->dev.platform_data;
 	spin_lock_init(&host->lock);
 
-	if (host->pdata && host->pdata->ocr_avail)
-		mmc->ocr_avail = host->pdata->ocr_avail;
-	else
-		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	mxcmci_init_ocr(host);
 
 	if (host->pdata && host->pdata->dat3_card_detect)
 		host->default_irq_mask =
@@ -915,6 +952,9 @@ static int mxcmci_remove(struct platform_device *pdev)
 
 	mmc_remove_host(mmc);
 
+	if (host->vcc)
+		regulator_put(host->vcc);
+
 	if (host->pdata && host->pdata->exit)
 		host->pdata->exit(&pdev->dev, mmc);
 
@@ -927,7 +967,6 @@ static int mxcmci_remove(struct platform_device *pdev)
 	clk_put(host->clk);
 
 	release_mem_region(host->res->start, resource_size(host->res));
-	release_resource(host->res);
 
 	mmc_free_host(mmc);
 
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 0c7e37f496ef..379d2ffe4c87 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -173,6 +173,8 @@ struct mmc_omap_host {
 	struct omap_mmc_platform_data *pdata;
 };
 
+static struct workqueue_struct *mmc_omap_wq;
+
 static void mmc_omap_fclk_offdelay(struct mmc_omap_slot *slot)
 {
 	unsigned long tick_ns;
@@ -289,7 +291,7 @@ static void mmc_omap_release_slot(struct mmc_omap_slot *slot, int clk_enabled)
 		host->next_slot = new_slot;
 		host->mmc = new_slot->mmc;
 		spin_unlock_irqrestore(&host->slot_lock, flags);
-		schedule_work(&host->slot_release_work);
+		queue_work(mmc_omap_wq, &host->slot_release_work);
 		return;
 	}
 
@@ -457,7 +459,7 @@ mmc_omap_xfer_done(struct mmc_omap_host *host, struct mmc_data *data)
 	}
 
 	host->stop_data = data;
-	schedule_work(&host->send_stop_work);
+	queue_work(mmc_omap_wq, &host->send_stop_work);
 }
 
 static void
@@ -637,7 +639,7 @@ mmc_omap_cmd_timer(unsigned long data)
 		OMAP_MMC_WRITE(host, IE, 0);
 		disable_irq(host->irq);
 		host->abort = 1;
-		schedule_work(&host->cmd_abort_work);
+		queue_work(mmc_omap_wq, &host->cmd_abort_work);
 	}
 	spin_unlock_irqrestore(&host->slot_lock, flags);
 }
@@ -826,7 +828,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
 		host->abort = 1;
 		OMAP_MMC_WRITE(host, IE, 0);
 		disable_irq_nosync(host->irq);
-		schedule_work(&host->cmd_abort_work);
+		queue_work(mmc_omap_wq, &host->cmd_abort_work);
 		return IRQ_HANDLED;
 	}
 
@@ -1387,7 +1389,7 @@ static void mmc_omap_remove_slot(struct mmc_omap_slot *slot)
 
 	tasklet_kill(&slot->cover_tasklet);
 	del_timer_sync(&slot->cover_timer);
-	flush_scheduled_work();
+	flush_workqueue(mmc_omap_wq);
 
 	mmc_remove_host(mmc);
 	mmc_free_host(mmc);
@@ -1608,12 +1610,22 @@ static struct platform_driver mmc_omap_driver = {
 
 static int __init mmc_omap_init(void)
 {
-	return platform_driver_probe(&mmc_omap_driver, mmc_omap_probe);
+	int ret;
+
+	mmc_omap_wq = alloc_workqueue("mmc_omap", 0, 0);
+	if (!mmc_omap_wq)
+		return -ENOMEM;
+
+	ret = platform_driver_probe(&mmc_omap_driver, mmc_omap_probe);
+	if (ret)
+		destroy_workqueue(mmc_omap_wq);
+	return ret;
 }
 
 static void __exit mmc_omap_exit(void)
 {
 	platform_driver_unregister(&mmc_omap_driver);
+	destroy_workqueue(mmc_omap_wq);
 }
 
 module_init(mmc_omap_init);
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 5d46021cbb57..078fdf11af03 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2290,7 +2290,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 		free_irq(host->irq, host);
 		if (mmc_slot(host).card_detect_irq)
 			free_irq(mmc_slot(host).card_detect_irq, host);
-		flush_scheduled_work();
+		flush_work_sync(&host->mmc_carddetect_work);
 
 		mmc_host_disable(host->mmc);
 		clk_disable(host->iclk);
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
new file mode 100644
index 000000000000..2aeef4ffed8c
--- /dev/null
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -0,0 +1,70 @@
+/*
+ * sdhci-dove.c Support for SDHCI on Marvell's Dove SoC
+ *
+ * Author: Saeed Bishara <saeed@marvell.com>
+ *	   Mike Rapoport <mike@compulab.co.il>
+ * Based on sdhci-cns3xxx.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
+{
+	u16 ret;
+
+	switch (reg) {
+	case SDHCI_HOST_VERSION:
+	case SDHCI_SLOT_INT_STATUS:
+		/* those registers don't exist */
+		return 0;
+	default:
+		ret = readw(host->ioaddr + reg);
+	}
+	return ret;
+}
+
+static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
+{
+	u32 ret;
+
+	switch (reg) {
+	case SDHCI_CAPABILITIES:
+		ret = readl(host->ioaddr + reg);
+		/* Mask the support for 3.0V */
+		ret &= ~SDHCI_CAN_VDD_300;
+		break;
+	default:
+		ret = readl(host->ioaddr + reg);
+	}
+	return ret;
+}
+
+static struct sdhci_ops sdhci_dove_ops = {
+	.read_w	= sdhci_dove_readw,
+	.read_l	= sdhci_dove_readl,
+};
+
+struct sdhci_pltfm_data sdhci_dove_pdata = {
+	.ops	= &sdhci_dove_ops,
+	.quirks	= SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
+		  SDHCI_QUIRK_NO_BUSY_IRQ |
+		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		  SDHCI_QUIRK_FORCE_DMA,
+};
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
index c51b71174c1d..fa19d849a920 100644
--- a/drivers/mmc/host/sdhci-of-core.c
+++ b/drivers/mmc/host/sdhci-of-core.c
@@ -122,7 +122,7 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev,
 	struct sdhci_of_data *sdhci_of_data = match->data;
 	struct sdhci_host *host;
 	struct sdhci_of_host *of_host;
-	const u32 *clk;
+	const __be32 *clk;
 	int size;
 	int ret;
 
@@ -166,7 +166,7 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev,
 
 	clk = of_get_property(np, "clock-frequency", &size);
 	if (clk && size == sizeof(*clk) && *clk)
-		of_host->clock = *clk;
+		of_host->clock = be32_to_cpup(clk);
 
 	ret = sdhci_add_host(host);
 	if (ret)
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 3d9c2460d437..0dc905b20eee 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -176,6 +176,74 @@ static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 };
 
+/* O2Micro extra registers */
+#define O2_SD_LOCK_WP		0xD3
+#define O2_SD_MULTI_VCC3V	0xEE
+#define O2_SD_CLKREQ		0xEC
+#define O2_SD_CAPS		0xE0
+#define O2_SD_ADMA1		0xE2
+#define O2_SD_ADMA2		0xE7
+#define O2_SD_INF_MOD		0xF1
+
+static int o2_probe(struct sdhci_pci_chip *chip)
+{
+	int ret;
+	u8 scratch;
+
+	switch (chip->pdev->device) {
+	case PCI_DEVICE_ID_O2_8220:
+	case PCI_DEVICE_ID_O2_8221:
+	case PCI_DEVICE_ID_O2_8320:
+	case PCI_DEVICE_ID_O2_8321:
+		/* This extra setup is required due to broken ADMA. */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch &= 0x7f;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+
+		/* Set Multi 3 to VCC3V# */
+		pci_write_config_byte(chip->pdev, O2_SD_MULTI_VCC3V, 0x08);
+
+		/* Disable CLK_REQ# support after media DET */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_CLKREQ, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x20;
+		pci_write_config_byte(chip->pdev, O2_SD_CLKREQ, scratch);
+
+		/* Choose capabilities, enable SDMA.  We have to write 0x01
+		 * to the capabilities register first to unlock it.
+		 */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_CAPS, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x01;
+		pci_write_config_byte(chip->pdev, O2_SD_CAPS, scratch);
+		pci_write_config_byte(chip->pdev, O2_SD_CAPS, 0x73);
+
+		/* Disable ADMA1/2 */
+		pci_write_config_byte(chip->pdev, O2_SD_ADMA1, 0x39);
+		pci_write_config_byte(chip->pdev, O2_SD_ADMA2, 0x08);
+
+		/* Disable the infinite transfer mode */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_INF_MOD, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x08;
+		pci_write_config_byte(chip->pdev, O2_SD_INF_MOD, scratch);
+
+		/* Lock WP */
+		ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch);
+		if (ret)
+			return ret;
+		scratch |= 0x80;
+		pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+	}
+
+	return 0;
+}
+
 static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
 {
 	u8 scratch;
@@ -204,6 +272,7 @@ static int jmicron_pmos(struct sdhci_pci_chip *chip, int on)
 static int jmicron_probe(struct sdhci_pci_chip *chip)
 {
 	int ret;
+	u16 mmcdev = 0;
 
 	if (chip->pdev->revision == 0) {
 		chip->quirks |= SDHCI_QUIRK_32BIT_DMA_ADDR |
@@ -225,12 +294,17 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
 	 * 2. The MMC interface has a lower subfunction number
 	 *    than the SD interface.
 	 */
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_SD)
+		mmcdev = PCI_DEVICE_ID_JMICRON_JMB38X_MMC;
+	else if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_SD)
+		mmcdev = PCI_DEVICE_ID_JMICRON_JMB388_ESD;
+
+	if (mmcdev) {
 		struct pci_dev *sd_dev;
 
 		sd_dev = NULL;
 		while ((sd_dev = pci_get_device(PCI_VENDOR_ID_JMICRON,
-			PCI_DEVICE_ID_JMICRON_JMB38X_MMC, sd_dev)) != NULL) {
+						mmcdev, sd_dev)) != NULL) {
 			if ((PCI_SLOT(chip->pdev->devfn) ==
 				PCI_SLOT(sd_dev->devfn)) &&
 				(chip->pdev->bus == sd_dev->bus))
@@ -290,13 +364,25 @@ static int jmicron_probe_slot(struct sdhci_pci_slot *slot)
 			slot->host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
 	}
 
+	/* JM388 MMC doesn't support 1.8V while SD supports it */
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
+		slot->host->ocr_avail_sd = MMC_VDD_32_33 | MMC_VDD_33_34 |
+			MMC_VDD_29_30 | MMC_VDD_30_31 |
+			MMC_VDD_165_195; /* allow 1.8V */
+		slot->host->ocr_avail_mmc = MMC_VDD_32_33 | MMC_VDD_33_34 |
+			MMC_VDD_29_30 | MMC_VDD_30_31; /* no 1.8V for MMC */
+	}
+
 	/*
 	 * The secondary interface requires a bit set to get the
 	 * interrupts.
 	 */
-	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
 		jmicron_enable_mmc(slot->host, 1);
 
+	slot->host->mmc->caps |= MMC_CAP_BUS_WIDTH_TEST;
+
 	return 0;
 }
 
@@ -305,7 +391,8 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
 	if (dead)
 		return;
 
-	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC)
+	if (slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    slot->chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD)
 		jmicron_enable_mmc(slot->host, 0);
 }
 
@@ -313,7 +400,8 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state)
 {
 	int i;
 
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
 		for (i = 0;i < chip->num_slots;i++)
 			jmicron_enable_mmc(chip->slots[i]->host, 0);
 	}
@@ -325,7 +413,8 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 {
 	int ret, i;
 
-	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC) {
+	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
+	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
 		for (i = 0;i < chip->num_slots;i++)
 			jmicron_enable_mmc(chip->slots[i]->host, 1);
 	}
@@ -339,6 +428,10 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 	return 0;
 }
 
+static const struct sdhci_pci_fixes sdhci_o2 = {
+	.probe		= o2_probe,
+};
+
 static const struct sdhci_pci_fixes sdhci_jmicron = {
 	.probe		= jmicron_probe,
 
@@ -510,6 +603,22 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 	},
 
 	{
+		.vendor		= PCI_VENDOR_ID_JMICRON,
+		.device		= PCI_DEVICE_ID_JMICRON_JMB388_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_jmicron,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_JMICRON,
+		.device		= PCI_DEVICE_ID_JMICRON_JMB388_ESD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_jmicron,
+	},
+
+	{
 		.vendor		= PCI_VENDOR_ID_SYSKONNECT,
 		.device		= 0x8000,
 		.subvendor	= PCI_ANY_ID,
@@ -589,6 +698,46 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
 	},
 
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8120,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8220,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8221,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8320,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_O2,
+		.device		= PCI_DEVICE_ID_O2_8321,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_o2,
+	},
+
 	{	/* Generic SD host controller */
 		PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
 	},
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 0502f89f662b..dbab0407f4b6 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -170,6 +170,12 @@ static const struct platform_device_id sdhci_pltfm_ids[] = {
 #ifdef CONFIG_MMC_SDHCI_ESDHC_IMX
 	{ "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata },
 #endif
+#ifdef CONFIG_MMC_SDHCI_DOVE
+	{ "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata },
+#endif
+#ifdef CONFIG_MMC_SDHCI_TEGRA
+	{ "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata },
+#endif
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids);
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index c1bfe48af56a..ea2e44d9be5e 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -22,5 +22,7 @@ struct sdhci_pltfm_host {
 
 extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata;
 extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata;
+extern struct sdhci_pltfm_data sdhci_dove_pdata;
+extern struct sdhci_pltfm_data sdhci_tegra_pdata;
 
 #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index aacb862ecc8a..17203586305c 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -130,6 +130,15 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
 	if (!clksrc)
 		return UINT_MAX;
 
+	/*
+	 * Clock divider's step is different as 1 from that of host controller
+	 * when 'clk_type' is S3C_SDHCI_CLK_DIV_EXTERNAL.
+	 */
+	if (ourhost->pdata->clk_type) {
+		rate = clk_round_rate(clksrc, wanted);
+		return wanted - rate;
+	}
+
 	rate = clk_get_rate(clksrc);
 
 	for (div = 1; div < 256; div *= 2) {
@@ -232,6 +241,42 @@ static unsigned int sdhci_s3c_get_min_clock(struct sdhci_host *host)
 	return min;
 }
 
+/* sdhci_cmu_get_max_clk - callback to get maximum clock frequency.*/
+static unsigned int sdhci_cmu_get_max_clock(struct sdhci_host *host)
+{
+	struct sdhci_s3c *ourhost = to_s3c(host);
+
+	return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], UINT_MAX);
+}
+
+/* sdhci_cmu_get_min_clock - callback to get minimal supported clock value. */
+static unsigned int sdhci_cmu_get_min_clock(struct sdhci_host *host)
+{
+	struct sdhci_s3c *ourhost = to_s3c(host);
+
+	/*
+	 * initial clock can be in the frequency range of
+	 * 100KHz-400KHz, so we set it as max value.
+	 */
+	return clk_round_rate(ourhost->clk_bus[ourhost->cur_clk], 400000);
+}
+
+/* sdhci_cmu_set_clock - callback on clock change.*/
+static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	struct sdhci_s3c *ourhost = to_s3c(host);
+
+	/* don't bother if the clock is going off */
+	if (clock == 0)
+		return;
+
+	sdhci_s3c_set_clock(host, clock);
+
+	clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock);
+
+	host->clock = clock;
+}
+
 static struct sdhci_ops sdhci_s3c_ops = {
 	.get_max_clock		= sdhci_s3c_get_max_clk,
 	.set_clock		= sdhci_s3c_set_clock,
@@ -361,6 +406,13 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
 
 		clks++;
 		sc->clk_bus[ptr] = clk;
+
+		/*
+		 * save current clock index to know which clock bus
+		 * is used later in overriding functions.
+		 */
+		sc->cur_clk = ptr;
+
 		clk_enable(clk);
 
 		dev_info(dev, "clock source %d: %s (%ld Hz)\n",
@@ -427,6 +479,20 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
 	/* HSMMC on Samsung SoCs uses SDCLK as timeout clock */
 	host->quirks |= SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK;
 
+	/*
+	 * If controller does not have internal clock divider,
+	 * we can use overriding functions instead of default.
+	 */
+	if (pdata->clk_type) {
+		sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
+		sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
+		sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
+	}
+
+	/* It supports additional host capabilities if needed */
+	if (pdata->host_caps)
+		host->mmc->caps |= pdata->host_caps;
+
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(dev, "sdhci_add_host() failed\n");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
new file mode 100644
index 000000000000..4823ee94a63f
--- /dev/null
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+
+#include <mach/gpio.h>
+#include <mach/sdhci.h>
+
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
+{
+	u32 val;
+
+	if (unlikely(reg == SDHCI_PRESENT_STATE)) {
+		/* Use wp_gpio here instead? */
+		val = readl(host->ioaddr + reg);
+		return val | SDHCI_WRITE_PROTECT;
+	}
+
+	return readl(host->ioaddr + reg);
+}
+
+static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
+{
+	if (unlikely(reg == SDHCI_HOST_VERSION)) {
+		/* Erratum: Version register is invalid in HW. */
+		return SDHCI_SPEC_200;
+	}
+
+	return readw(host->ioaddr + reg);
+}
+
+static void tegra_sdhci_writel(struct sdhci_host *host, u32 val, int reg)
+{
+	/* Seems like we're getting spurious timeout and crc errors, so
+	 * disable signalling of them. In case of real errors software
+	 * timers should take care of eventually detecting them.
+	 */
+	if (unlikely(reg == SDHCI_SIGNAL_ENABLE))
+		val &= ~(SDHCI_INT_TIMEOUT|SDHCI_INT_CRC);
+
+	writel(val, host->ioaddr + reg);
+
+	if (unlikely(reg == SDHCI_INT_ENABLE)) {
+		/* Erratum: Must enable block gap interrupt detection */
+		u8 gap_ctrl = readb(host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
+		if (val & SDHCI_INT_CARD_INT)
+			gap_ctrl |= 0x8;
+		else
+			gap_ctrl &= ~0x8;
+		writeb(gap_ctrl, host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
+	}
+}
+
+static unsigned int tegra_sdhci_get_ro(struct sdhci_host *sdhci)
+{
+	struct platform_device *pdev = to_platform_device(mmc_dev(sdhci->mmc));
+	struct tegra_sdhci_platform_data *plat;
+
+	plat = pdev->dev.platform_data;
+
+	if (!gpio_is_valid(plat->wp_gpio))
+		return -1;
+
+	return gpio_get_value(plat->wp_gpio);
+}
+
+static irqreturn_t carddetect_irq(int irq, void *data)
+{
+	struct sdhci_host *sdhost = (struct sdhci_host *)data;
+
+	tasklet_schedule(&sdhost->card_tasklet);
+	return IRQ_HANDLED;
+};
+
+static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width)
+{
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct tegra_sdhci_platform_data *plat;
+	u32 ctrl;
+
+	plat = pdev->dev.platform_data;
+
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+	if (plat->is_8bit && bus_width == MMC_BUS_WIDTH_8) {
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		ctrl |= SDHCI_CTRL_8BITBUS;
+	} else {
+		ctrl &= ~SDHCI_CTRL_8BITBUS;
+		if (bus_width == MMC_BUS_WIDTH_4)
+			ctrl |= SDHCI_CTRL_4BITBUS;
+		else
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+	}
+	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+	return 0;
+}
+
+
+static int tegra_sdhci_pltfm_init(struct sdhci_host *host,
+				  struct sdhci_pltfm_data *pdata)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct tegra_sdhci_platform_data *plat;
+	struct clk *clk;
+	int rc;
+
+	plat = pdev->dev.platform_data;
+	if (plat == NULL) {
+		dev_err(mmc_dev(host->mmc), "missing platform data\n");
+		return -ENXIO;
+	}
+
+	if (gpio_is_valid(plat->power_gpio)) {
+		rc = gpio_request(plat->power_gpio, "sdhci_power");
+		if (rc) {
+			dev_err(mmc_dev(host->mmc),
+				"failed to allocate power gpio\n");
+			goto out;
+		}
+		tegra_gpio_enable(plat->power_gpio);
+		gpio_direction_output(plat->power_gpio, 1);
+	}
+
+	if (gpio_is_valid(plat->cd_gpio)) {
+		rc = gpio_request(plat->cd_gpio, "sdhci_cd");
+		if (rc) {
+			dev_err(mmc_dev(host->mmc),
+				"failed to allocate cd gpio\n");
+			goto out_power;
+		}
+		tegra_gpio_enable(plat->cd_gpio);
+		gpio_direction_input(plat->cd_gpio);
+
+		rc = request_irq(gpio_to_irq(plat->cd_gpio), carddetect_irq,
+				 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				 mmc_hostname(host->mmc), host);
+
+		if (rc)	{
+			dev_err(mmc_dev(host->mmc), "request irq error\n");
+			goto out_cd;
+		}
+
+	}
+
+	if (gpio_is_valid(plat->wp_gpio)) {
+		rc = gpio_request(plat->wp_gpio, "sdhci_wp");
+		if (rc) {
+			dev_err(mmc_dev(host->mmc),
+				"failed to allocate wp gpio\n");
+			goto out_cd;
+		}
+		tegra_gpio_enable(plat->wp_gpio);
+		gpio_direction_input(plat->wp_gpio);
+	}
+
+	clk = clk_get(mmc_dev(host->mmc), NULL);
+	if (IS_ERR(clk)) {
+		dev_err(mmc_dev(host->mmc), "clk err\n");
+		rc = PTR_ERR(clk);
+		goto out_wp;
+	}
+	clk_enable(clk);
+	pltfm_host->clk = clk;
+
+	if (plat->is_8bit)
+		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
+	return 0;
+
+out_wp:
+	if (gpio_is_valid(plat->wp_gpio)) {
+		tegra_gpio_disable(plat->wp_gpio);
+		gpio_free(plat->wp_gpio);
+	}
+
+out_cd:
+	if (gpio_is_valid(plat->cd_gpio)) {
+		tegra_gpio_disable(plat->cd_gpio);
+		gpio_free(plat->cd_gpio);
+	}
+
+out_power:
+	if (gpio_is_valid(plat->power_gpio)) {
+		tegra_gpio_disable(plat->power_gpio);
+		gpio_free(plat->power_gpio);
+	}
+
+out:
+	return rc;
+}
+
+static void tegra_sdhci_pltfm_exit(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct tegra_sdhci_platform_data *plat;
+
+	plat = pdev->dev.platform_data;
+
+	if (gpio_is_valid(plat->wp_gpio)) {
+		tegra_gpio_disable(plat->wp_gpio);
+		gpio_free(plat->wp_gpio);
+	}
+
+	if (gpio_is_valid(plat->cd_gpio)) {
+		tegra_gpio_disable(plat->cd_gpio);
+		gpio_free(plat->cd_gpio);
+	}
+
+	if (gpio_is_valid(plat->power_gpio)) {
+		tegra_gpio_disable(plat->power_gpio);
+		gpio_free(plat->power_gpio);
+	}
+
+	clk_disable(pltfm_host->clk);
+	clk_put(pltfm_host->clk);
+}
+
+static struct sdhci_ops tegra_sdhci_ops = {
+	.get_ro     = tegra_sdhci_get_ro,
+	.read_l     = tegra_sdhci_readl,
+	.read_w     = tegra_sdhci_readw,
+	.write_l    = tegra_sdhci_writel,
+	.platform_8bit_width = tegra_sdhci_8bit,
+};
+
+struct sdhci_pltfm_data sdhci_tegra_pdata = {
+	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
+		  SDHCI_QUIRK_NO_HISPD_BIT |
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+	.ops  = &tegra_sdhci_ops,
+	.init = tegra_sdhci_pltfm_init,
+	.exit = tegra_sdhci_pltfm_exit,
+};
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a25db426c910..9e15f41f87be 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -23,6 +23,7 @@
 
 #include <linux/leds.h>
 
+#include <linux/mmc/mmc.h>
 #include <linux/mmc/host.h>
 
 #include "sdhci.h"
@@ -77,8 +78,11 @@ static void sdhci_dumpregs(struct sdhci_host *host)
 	printk(KERN_DEBUG DRIVER_NAME ": AC12 err: 0x%08x | Slot int: 0x%08x\n",
 		sdhci_readw(host, SDHCI_ACMD12_ERR),
 		sdhci_readw(host, SDHCI_SLOT_INT_STATUS));
-	printk(KERN_DEBUG DRIVER_NAME ": Caps:     0x%08x | Max curr: 0x%08x\n",
+	printk(KERN_DEBUG DRIVER_NAME ": Caps:     0x%08x | Caps_1:   0x%08x\n",
 		sdhci_readl(host, SDHCI_CAPABILITIES),
+		sdhci_readl(host, SDHCI_CAPABILITIES_1));
+	printk(KERN_DEBUG DRIVER_NAME ": Cmd:      0x%08x | Max curr: 0x%08x\n",
+		sdhci_readw(host, SDHCI_COMMAND),
 		sdhci_readl(host, SDHCI_MAX_CURRENT));
 
 	if (host->flags & SDHCI_USE_ADMA)
@@ -1518,7 +1522,11 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 
 	if (intmask & SDHCI_INT_DATA_TIMEOUT)
 		host->data->error = -ETIMEDOUT;
-	else if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_END_BIT))
+	else if (intmask & SDHCI_INT_DATA_END_BIT)
+		host->data->error = -EILSEQ;
+	else if ((intmask & SDHCI_INT_DATA_CRC) &&
+		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
+			!= MMC_BUS_TEST_R)
 		host->data->error = -EILSEQ;
 	else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		printk(KERN_ERR "%s: ADMA error\n", mmc_hostname(host->mmc));
@@ -1736,7 +1744,7 @@ EXPORT_SYMBOL_GPL(sdhci_alloc_host);
 int sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
-	unsigned int caps;
+	unsigned int caps, ocr_avail;
 	int ret;
 
 	WARN_ON(host == NULL);
@@ -1890,13 +1898,26 @@ int sdhci_add_host(struct sdhci_host *host)
 	    mmc_card_is_removable(mmc))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
-	mmc->ocr_avail = 0;
+	ocr_avail = 0;
 	if (caps & SDHCI_CAN_VDD_330)
-		mmc->ocr_avail |= MMC_VDD_32_33|MMC_VDD_33_34;
+		ocr_avail |= MMC_VDD_32_33 | MMC_VDD_33_34;
 	if (caps & SDHCI_CAN_VDD_300)
-		mmc->ocr_avail |= MMC_VDD_29_30|MMC_VDD_30_31;
+		ocr_avail |= MMC_VDD_29_30 | MMC_VDD_30_31;
 	if (caps & SDHCI_CAN_VDD_180)
-		mmc->ocr_avail |= MMC_VDD_165_195;
+		ocr_avail |= MMC_VDD_165_195;
+
+	mmc->ocr_avail = ocr_avail;
+	mmc->ocr_avail_sdio = ocr_avail;
+	if (host->ocr_avail_sdio)
+		mmc->ocr_avail_sdio &= host->ocr_avail_sdio;
+	mmc->ocr_avail_sd = ocr_avail;
+	if (host->ocr_avail_sd)
+		mmc->ocr_avail_sd &= host->ocr_avail_sd;
+	else /* normal SD controllers don't support 1.8V */
+		mmc->ocr_avail_sd &= ~MMC_VDD_165_195;
+	mmc->ocr_avail_mmc = ocr_avail;
+	if (host->ocr_avail_mmc)
+		mmc->ocr_avail_mmc &= host->ocr_avail_mmc;
 
 	if (mmc->ocr_avail == 0) {
 		printk(KERN_ERR "%s: Hardware doesn't report any "
@@ -1928,10 +1949,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * of bytes. When doing hardware scatter/gather, each entry cannot
 	 * be larger than 64 KiB though.
 	 */
-	if (host->flags & SDHCI_USE_ADMA)
-		mmc->max_seg_size = 65536;
-	else
+	if (host->flags & SDHCI_USE_ADMA) {
+		if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
+			mmc->max_seg_size = 65535;
+		else
+			mmc->max_seg_size = 65536;
+	} else {
 		mmc->max_seg_size = mmc->max_req_size;
+	}
 
 	/*
 	 * Maximum block size. This varies from controller to controller and
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index e42d7f00c060..6e0969e40650 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -52,6 +52,7 @@
 #define  SDHCI_CMD_RESP_SHORT_BUSY 0x03
 
 #define SDHCI_MAKE_CMD(c, f) (((c & 0xff) << 8) | (f & 0xff))
+#define SDHCI_GET_CMD(c) ((c>>8) & 0x3f)
 
 #define SDHCI_RESPONSE		0x10
 
@@ -165,7 +166,7 @@
 #define  SDHCI_CAN_VDD_180	0x04000000
 #define  SDHCI_CAN_64BIT	0x10000000
 
-/* 44-47 reserved for more caps */
+#define SDHCI_CAPABILITIES_1	0x44
 
 #define SDHCI_MAX_CURRENT	0x48
 
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index ddd09840520b..12884c270171 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -16,16 +16,19 @@
  *
  */
 
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
 #include <linux/dma-mapping.h>
-#include <linux/mmc/host.h>
+#include <linux/dmaengine.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
 #include <linux/mmc/sh_mmcif.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
 
 #define DRIVER_NAME	"sh_mmcif"
 #define DRIVER_VERSION	"2010-04-28"
@@ -62,25 +65,6 @@
 /* CE_BLOCK_SET */
 #define BLOCK_SIZE_MASK		0x0000ffff
 
-/* CE_CLK_CTRL */
-#define CLK_ENABLE		(1 << 24) /* 1: output mmc clock */
-#define CLK_CLEAR		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
-#define CLK_SUP_PCLK		((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
-#define SRSPTO_256		((1 << 13) | (0 << 12)) /* resp timeout */
-#define SRBSYTO_29		((1 << 11) | (1 << 10) |	\
-				 (1 << 9) | (1 << 8)) /* resp busy timeout */
-#define SRWDTO_29		((1 << 7) | (1 << 6) |		\
-				 (1 << 5) | (1 << 4)) /* read/write timeout */
-#define SCCSTO_29		((1 << 3) | (1 << 2) |		\
-				 (1 << 1) | (1 << 0)) /* ccs timeout */
-
-/* CE_BUF_ACC */
-#define BUF_ACC_DMAWEN		(1 << 25)
-#define BUF_ACC_DMAREN		(1 << 24)
-#define BUF_ACC_BUSW_32		(0 << 17)
-#define BUF_ACC_BUSW_16		(1 << 17)
-#define BUF_ACC_ATYP		(1 << 16)
-
 /* CE_INT */
 #define INT_CCSDE		(1 << 29)
 #define INT_CMD12DRE		(1 << 26)
@@ -165,10 +149,6 @@
 				 STS2_AC12BSYTO | STS2_RSPBSYTO |	\
 				 STS2_AC12RSPTO | STS2_RSPTO)
 
-/* CE_VERSION */
-#define SOFT_RST_ON		(1 << 31)
-#define SOFT_RST_OFF		(0 << 31)
-
 #define CLKDEV_EMMC_DATA	52000000 /* 52MHz */
 #define CLKDEV_MMC_DATA		20000000 /* 20MHz */
 #define CLKDEV_INIT		400000   /* 400 KHz */
@@ -176,18 +156,21 @@
 struct sh_mmcif_host {
 	struct mmc_host *mmc;
 	struct mmc_data *data;
-	struct mmc_command *cmd;
 	struct platform_device *pd;
 	struct clk *hclk;
 	unsigned int clk;
 	int bus_width;
-	u16 wait_int;
-	u16 sd_error;
+	bool sd_error;
 	long timeout;
 	void __iomem *addr;
-	wait_queue_head_t intr_wait;
-};
+	struct completion intr_wait;
 
+	/* DMA support */
+	struct dma_chan		*chan_rx;
+	struct dma_chan		*chan_tx;
+	struct completion	dma_complete;
+	unsigned int            dma_sglen;
+};
 
 static inline void sh_mmcif_bitset(struct sh_mmcif_host *host,
 					unsigned int reg, u32 val)
@@ -201,6 +184,188 @@ static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host,
 	writel(~val & readl(host->addr + reg), host->addr + reg);
 }
 
+static void mmcif_dma_complete(void *arg)
+{
+	struct sh_mmcif_host *host = arg;
+	dev_dbg(&host->pd->dev, "Command completed\n");
+
+	if (WARN(!host->data, "%s: NULL data in DMA completion!\n",
+		 dev_name(&host->pd->dev)))
+		return;
+
+	if (host->data->flags & MMC_DATA_READ)
+		dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen,
+			     DMA_FROM_DEVICE);
+	else
+		dma_unmap_sg(&host->pd->dev, host->data->sg, host->dma_sglen,
+			     DMA_TO_DEVICE);
+
+	complete(&host->dma_complete);
+}
+
+static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
+{
+	struct scatterlist *sg = host->data->sg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan = host->chan_rx;
+	dma_cookie_t cookie = -EINVAL;
+	int ret;
+
+	ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_FROM_DEVICE);
+	if (ret > 0) {
+		host->dma_sglen = ret;
+		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+			DMA_FROM_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = mmcif_dma_complete;
+		desc->callback_param = host;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
+		} else {
+			sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN);
+			chan->device->device_issue_pending(chan);
+		}
+	}
+	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
+		__func__, host->data->sg_len, ret, cookie);
+
+	if (!desc) {
+		/* DMA failed, fall back to PIO */
+		if (ret >= 0)
+			ret = -EIO;
+		host->chan_rx = NULL;
+		host->dma_sglen = 0;
+		dma_release_channel(chan);
+		/* Free the Tx channel too */
+		chan = host->chan_tx;
+		if (chan) {
+			host->chan_tx = NULL;
+			dma_release_channel(chan);
+		}
+		dev_warn(&host->pd->dev,
+			 "DMA failed: %d, falling back to PIO\n", ret);
+		sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	}
+
+	dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
+		desc, cookie, host->data->sg_len);
+}
+
+static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
+{
+	struct scatterlist *sg = host->data->sg;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan = host->chan_tx;
+	dma_cookie_t cookie = -EINVAL;
+	int ret;
+
+	ret = dma_map_sg(&host->pd->dev, sg, host->data->sg_len, DMA_TO_DEVICE);
+	if (ret > 0) {
+		host->dma_sglen = ret;
+		desc = chan->device->device_prep_slave_sg(chan, sg, ret,
+			DMA_TO_DEVICE, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = mmcif_dma_complete;
+		desc->callback_param = host;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
+		} else {
+			sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAWEN);
+			chan->device->device_issue_pending(chan);
+		}
+	}
+	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
+		__func__, host->data->sg_len, ret, cookie);
+
+	if (!desc) {
+		/* DMA failed, fall back to PIO */
+		if (ret >= 0)
+			ret = -EIO;
+		host->chan_tx = NULL;
+		host->dma_sglen = 0;
+		dma_release_channel(chan);
+		/* Free the Rx channel too */
+		chan = host->chan_rx;
+		if (chan) {
+			host->chan_rx = NULL;
+			dma_release_channel(chan);
+		}
+		dev_warn(&host->pd->dev,
+			 "DMA failed: %d, falling back to PIO\n", ret);
+		sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	}
+
+	dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d\n", __func__,
+		desc, cookie);
+}
+
+static bool sh_mmcif_filter(struct dma_chan *chan, void *arg)
+{
+	dev_dbg(chan->device->dev, "%s: slave data %p\n", __func__, arg);
+	chan->private = arg;
+	return true;
+}
+
+static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
+				 struct sh_mmcif_plat_data *pdata)
+{
+	host->dma_sglen = 0;
+
+	/* We can only either use DMA for both Tx and Rx or not use it at all */
+	if (pdata->dma) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		host->chan_tx = dma_request_channel(mask, sh_mmcif_filter,
+						    &pdata->dma->chan_priv_tx);
+		dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__,
+			host->chan_tx);
+
+		if (!host->chan_tx)
+			return;
+
+		host->chan_rx = dma_request_channel(mask, sh_mmcif_filter,
+						    &pdata->dma->chan_priv_rx);
+		dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__,
+			host->chan_rx);
+
+		if (!host->chan_rx) {
+			dma_release_channel(host->chan_tx);
+			host->chan_tx = NULL;
+			return;
+		}
+
+		init_completion(&host->dma_complete);
+	}
+}
+
+static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
+{
+	sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC, BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	/* Descriptors are freed automatically */
+	if (host->chan_tx) {
+		struct dma_chan *chan = host->chan_tx;
+		host->chan_tx = NULL;
+		dma_release_channel(chan);
+	}
+	if (host->chan_rx) {
+		struct dma_chan *chan = host->chan_rx;
+		host->chan_rx = NULL;
+		dma_release_channel(chan);
+	}
+
+	host->dma_sglen = 0;
+}
 
 static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk)
 {
@@ -239,13 +404,12 @@ static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
 	u32 state1, state2;
 	int ret, timeout = 10000000;
 
-	host->sd_error = 0;
-	host->wait_int = 0;
+	host->sd_error = false;
 
 	state1 = sh_mmcif_readl(host->addr, MMCIF_CE_HOST_STS1);
 	state2 = sh_mmcif_readl(host->addr, MMCIF_CE_HOST_STS2);
-	pr_debug("%s: ERR HOST_STS1 = %08x\n", DRIVER_NAME, state1);
-	pr_debug("%s: ERR HOST_STS2 = %08x\n", DRIVER_NAME, state2);
+	dev_dbg(&host->pd->dev, "ERR HOST_STS1 = %08x\n", state1);
+	dev_dbg(&host->pd->dev, "ERR HOST_STS2 = %08x\n", state2);
 
 	if (state1 & STS1_CMDSEQ) {
 		sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, CMD_CTRL_BREAK);
@@ -253,8 +417,8 @@ static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
 		while (1) {
 			timeout--;
 			if (timeout < 0) {
-				pr_err(DRIVER_NAME": Forceed end of " \
-					"command sequence timeout err\n");
+				dev_err(&host->pd->dev,
+					"Forceed end of command sequence timeout err\n");
 				return -EIO;
 			}
 			if (!(sh_mmcif_readl(host->addr, MMCIF_CE_HOST_STS1)
@@ -263,18 +427,18 @@ static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
 			mdelay(1);
 		}
 		sh_mmcif_sync_reset(host);
-		pr_debug(DRIVER_NAME": Forced end of command sequence\n");
+		dev_dbg(&host->pd->dev, "Forced end of command sequence\n");
 		return -EIO;
 	}
 
 	if (state2 & STS2_CRC_ERR) {
-		pr_debug(DRIVER_NAME": Happened CRC error\n");
+		dev_dbg(&host->pd->dev, ": Happened CRC error\n");
 		ret = -EIO;
 	} else if (state2 & STS2_TIMEOUT_ERR) {
-		pr_debug(DRIVER_NAME": Happened Timeout error\n");
+		dev_dbg(&host->pd->dev, ": Happened Timeout error\n");
 		ret = -ETIMEDOUT;
 	} else {
-		pr_debug(DRIVER_NAME": Happened End/Index error\n");
+		dev_dbg(&host->pd->dev, ": Happened End/Index error\n");
 		ret = -EIO;
 	}
 	return ret;
@@ -287,17 +451,13 @@ static int sh_mmcif_single_read(struct sh_mmcif_host *host,
 	long time;
 	u32 blocksize, i, *p = sg_virt(data->sg);
 
-	host->wait_int = 0;
-
 	/* buf read enable */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
-	time = wait_event_interruptible_timeout(host->intr_wait,
-			host->wait_int == 1 ||
-			host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+			host->timeout);
+	if (time <= 0 || host->sd_error)
 		return sh_mmcif_error_manage(host);
 
-	host->wait_int = 0;
 	blocksize = (BLOCK_SIZE_MASK &
 			sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3;
 	for (i = 0; i < blocksize / 4; i++)
@@ -305,13 +465,11 @@ static int sh_mmcif_single_read(struct sh_mmcif_host *host,
 
 	/* buffer read end */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFRE);
-	time = wait_event_interruptible_timeout(host->intr_wait,
-			host->wait_int == 1 ||
-			host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+			host->timeout);
+	if (time <= 0 || host->sd_error)
 		return sh_mmcif_error_manage(host);
 
-	host->wait_int = 0;
 	return 0;
 }
 
@@ -326,19 +484,15 @@ static int sh_mmcif_multi_read(struct sh_mmcif_host *host,
 						     MMCIF_CE_BLOCK_SET);
 	for (j = 0; j < data->sg_len; j++) {
 		p = sg_virt(data->sg);
-		host->wait_int = 0;
 		for (sec = 0; sec < data->sg->length / blocksize; sec++) {
 			sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
 			/* buf read enable */
-			time = wait_event_interruptible_timeout(host->intr_wait,
-				host->wait_int == 1 ||
-				host->sd_error == 1, host->timeout);
+			time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+				host->timeout);
 
-			if (host->wait_int != 1 &&
-			    (time == 0 || host->sd_error != 0))
+			if (time <= 0 || host->sd_error)
 				return sh_mmcif_error_manage(host);
 
-			host->wait_int = 0;
 			for (i = 0; i < blocksize / 4; i++)
 				*p++ = sh_mmcif_readl(host->addr,
 						      MMCIF_CE_DATA);
@@ -356,17 +510,14 @@ static int sh_mmcif_single_write(struct sh_mmcif_host *host,
 	long time;
 	u32 blocksize, i, *p = sg_virt(data->sg);
 
-	host->wait_int = 0;
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
 
 	/* buf write enable */
-	time = wait_event_interruptible_timeout(host->intr_wait,
-			host->wait_int == 1 ||
-			host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+			host->timeout);
+	if (time <= 0 || host->sd_error)
 		return sh_mmcif_error_manage(host);
 
-	host->wait_int = 0;
 	blocksize = (BLOCK_SIZE_MASK &
 			sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3;
 	for (i = 0; i < blocksize / 4; i++)
@@ -375,13 +526,11 @@ static int sh_mmcif_single_write(struct sh_mmcif_host *host,
 	/* buffer write end */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MDTRANE);
 
-	time = wait_event_interruptible_timeout(host->intr_wait,
-			host->wait_int == 1 ||
-			host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+			host->timeout);
+	if (time <= 0 || host->sd_error)
 		return sh_mmcif_error_manage(host);
 
-	host->wait_int = 0;
 	return 0;
 }
 
@@ -397,19 +546,15 @@ static int sh_mmcif_multi_write(struct sh_mmcif_host *host,
 
 	for (j = 0; j < data->sg_len; j++) {
 		p = sg_virt(data->sg);
-		host->wait_int = 0;
 		for (sec = 0; sec < data->sg->length / blocksize; sec++) {
 			sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
 			/* buf write enable*/
-			time = wait_event_interruptible_timeout(host->intr_wait,
-				host->wait_int == 1 ||
-				host->sd_error == 1, host->timeout);
+			time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+				host->timeout);
 
-			if (host->wait_int != 1 &&
-			    (time == 0 || host->sd_error != 0))
+			if (time <= 0 || host->sd_error)
 				return sh_mmcif_error_manage(host);
 
-			host->wait_int = 0;
 			for (i = 0; i < blocksize / 4; i++)
 				sh_mmcif_writel(host->addr,
 						MMCIF_CE_DATA, *p++);
@@ -457,7 +602,7 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
 		tmp |= CMD_SET_RTYP_17B;
 		break;
 	default:
-		pr_err(DRIVER_NAME": Not support type response.\n");
+		dev_err(&host->pd->dev, "Unsupported response type.\n");
 		break;
 	}
 	switch (opc) {
@@ -485,7 +630,7 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
 			tmp |= CMD_SET_DATW_8;
 			break;
 		default:
-			pr_err(DRIVER_NAME": Not support bus width.\n");
+			dev_err(&host->pd->dev, "Unsupported bus width.\n");
 			break;
 		}
 	}
@@ -513,10 +658,10 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
 	return opc = ((opc << 24) | tmp);
 }
 
-static u32 sh_mmcif_data_trans(struct sh_mmcif_host *host,
+static int sh_mmcif_data_trans(struct sh_mmcif_host *host,
 				struct mmc_request *mrq, u32 opc)
 {
-	u32 ret;
+	int ret;
 
 	switch (opc) {
 	case MMC_READ_MULTIPLE_BLOCK:
@@ -533,7 +678,7 @@ static u32 sh_mmcif_data_trans(struct sh_mmcif_host *host,
 		ret = sh_mmcif_single_read(host, mrq);
 		break;
 	default:
-		pr_err(DRIVER_NAME": NOT SUPPORT CMD = d'%08d\n", opc);
+		dev_err(&host->pd->dev, "UNSUPPORTED CMD = d'%08d\n", opc);
 		ret = -EINVAL;
 		break;
 	}
@@ -547,8 +692,6 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
 	int ret = 0, mask = 0;
 	u32 opc = cmd->opcode;
 
-	host->cmd = cmd;
-
 	switch (opc) {
 	/* respons busy check */
 	case MMC_SWITCH:
@@ -579,13 +722,12 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, mask);
 	/* set arg */
 	sh_mmcif_writel(host->addr, MMCIF_CE_ARG, cmd->arg);
-	host->wait_int = 0;
 	/* set cmd */
 	sh_mmcif_writel(host->addr, MMCIF_CE_CMD_SET, opc);
 
-	time = wait_event_interruptible_timeout(host->intr_wait,
-		host->wait_int == 1 || host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && time == 0) {
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+		host->timeout);
+	if (time <= 0) {
 		cmd->error = sh_mmcif_error_manage(host);
 		return;
 	}
@@ -597,26 +739,34 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
 			cmd->error = -ETIMEDOUT;
 			break;
 		default:
-			pr_debug("%s: Cmd(d'%d) err\n",
-					DRIVER_NAME, cmd->opcode);
+			dev_dbg(&host->pd->dev, "Cmd(d'%d) err\n",
+					cmd->opcode);
 			cmd->error = sh_mmcif_error_manage(host);
 			break;
 		}
-		host->sd_error = 0;
-		host->wait_int = 0;
+		host->sd_error = false;
 		return;
 	}
 	if (!(cmd->flags & MMC_RSP_PRESENT)) {
-		cmd->error = ret;
-		host->wait_int = 0;
+		cmd->error = 0;
 		return;
 	}
-	if (host->wait_int == 1) {
-		sh_mmcif_get_response(host, cmd);
-		host->wait_int = 0;
-	}
+	sh_mmcif_get_response(host, cmd);
 	if (host->data) {
-		ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
+		if (!host->dma_sglen) {
+			ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
+		} else {
+			long time =
+				wait_for_completion_interruptible_timeout(&host->dma_complete,
+									  host->timeout);
+			if (!time)
+				ret = -ETIMEDOUT;
+			else if (time < 0)
+				ret = time;
+			sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC,
+					BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+			host->dma_sglen = 0;
+		}
 		if (ret < 0)
 			mrq->data->bytes_xfered = 0;
 		else
@@ -636,20 +786,18 @@ static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host,
 	else if (mrq->cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
 		sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE);
 	else {
-		pr_err(DRIVER_NAME": not support stop cmd\n");
+		dev_err(&host->pd->dev, "unsupported stop cmd\n");
 		cmd->error = sh_mmcif_error_manage(host);
 		return;
 	}
 
-	time = wait_event_interruptible_timeout(host->intr_wait,
-			host->wait_int == 1 ||
-			host->sd_error == 1, host->timeout);
-	if (host->wait_int != 1 && (time == 0 || host->sd_error != 0)) {
+	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
+			host->timeout);
+	if (time <= 0 || host->sd_error) {
 		cmd->error = sh_mmcif_error_manage(host);
 		return;
 	}
 	sh_mmcif_get_cmd12response(host, cmd);
-	host->wait_int = 0;
 	cmd->error = 0;
 }
 
@@ -676,6 +824,15 @@ static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		break;
 	}
 	host->data = mrq->data;
+	if (mrq->data) {
+		if (mrq->data->flags & MMC_DATA_READ) {
+			if (host->chan_rx)
+				sh_mmcif_start_dma_rx(host);
+		} else {
+			if (host->chan_tx)
+				sh_mmcif_start_dma_tx(host);
+		}
+	}
 	sh_mmcif_start_cmd(host, mrq, mrq->cmd);
 	host->data = NULL;
 
@@ -735,7 +892,7 @@ static void sh_mmcif_detect(struct mmc_host *mmc)
 static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
 {
 	struct sh_mmcif_host *host = dev_id;
-	u32 state = 0;
+	u32 state;
 	int err = 0;
 
 	state = sh_mmcif_readl(host->addr, MMCIF_CE_INT);
@@ -774,17 +931,19 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
 		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
 		err = 1;
 	} else {
-		pr_debug("%s: Not support int\n", DRIVER_NAME);
+		dev_dbg(&host->pd->dev, "Not support int\n");
 		sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state);
 		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
 		err = 1;
 	}
 	if (err) {
-		host->sd_error = 1;
-		pr_debug("%s: int err state = %08x\n", DRIVER_NAME, state);
+		host->sd_error = true;
+		dev_dbg(&host->pd->dev, "int err state = %08x\n", state);
 	}
-	host->wait_int = 1;
-	wake_up(&host->intr_wait);
+	if (state & ~(INT_CMD12RBE | INT_CMD12CRE))
+		complete(&host->intr_wait);
+	else
+		dev_dbg(&host->pd->dev, "Unexpected IRQ 0x%x\n", state);
 
 	return IRQ_HANDLED;
 }
@@ -793,8 +952,8 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 {
 	int ret = 0, irq[2];
 	struct mmc_host *mmc;
-	struct sh_mmcif_host *host = NULL;
-	struct sh_mmcif_plat_data *pd = NULL;
+	struct sh_mmcif_host *host;
+	struct sh_mmcif_plat_data *pd;
 	struct resource *res;
 	void __iomem *reg;
 	char clk_name[8];
@@ -802,7 +961,7 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	irq[0] = platform_get_irq(pdev, 0);
 	irq[1] = platform_get_irq(pdev, 1);
 	if (irq[0] < 0 || irq[1] < 0) {
-		pr_err(DRIVER_NAME": Get irq error\n");
+		dev_err(&pdev->dev, "Get irq error\n");
 		return -ENXIO;
 	}
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -815,7 +974,7 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "ioremap error.\n");
 		return -ENOMEM;
 	}
-	pd = (struct sh_mmcif_plat_data *)(pdev->dev.platform_data);
+	pd = pdev->dev.platform_data;
 	if (!pd) {
 		dev_err(&pdev->dev, "sh_mmcif plat data error.\n");
 		ret = -ENXIO;
@@ -842,7 +1001,7 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	host->clk = clk_get_rate(host->hclk);
 	host->pd = pdev;
 
-	init_waitqueue_head(&host->intr_wait);
+	init_completion(&host->intr_wait);
 
 	mmc->ops = &sh_mmcif_ops;
 	mmc->f_max = host->clk;
@@ -858,33 +1017,37 @@ static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 	mmc->caps = MMC_CAP_MMC_HIGHSPEED;
 	if (pd->caps)
 		mmc->caps |= pd->caps;
-	mmc->max_segs = 128;
+	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;
-	mmc->max_blk_count = 65535;
-	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+	mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+	mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
 	mmc->max_seg_size = mmc->max_req_size;
 
 	sh_mmcif_sync_reset(host);
 	platform_set_drvdata(pdev, host);
+
+	/* See if we also get DMA */
+	sh_mmcif_request_dma(host, pd);
+
 	mmc_add_host(mmc);
 
 	ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host);
 	if (ret) {
-		pr_err(DRIVER_NAME": request_irq error (sh_mmc:error)\n");
+		dev_err(&pdev->dev, "request_irq error (sh_mmc:error)\n");
 		goto clean_up2;
 	}
 	ret = request_irq(irq[1], sh_mmcif_intr, 0, "sh_mmc:int", host);
 	if (ret) {
 		free_irq(irq[0], host);
-		pr_err(DRIVER_NAME": request_irq error (sh_mmc:int)\n");
+		dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
 		goto clean_up2;
 	}
 
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
 	sh_mmcif_detect(host->mmc);
 
-	pr_info("%s: driver version %s\n", DRIVER_NAME, DRIVER_VERSION);
-	pr_debug("%s: chip ver H'%04x\n", DRIVER_NAME,
+	dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
+	dev_dbg(&pdev->dev, "chip ver H'%04x\n",
 		sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff);
 	return ret;
 
@@ -903,20 +1066,22 @@ static int __devexit sh_mmcif_remove(struct platform_device *pdev)
 	struct sh_mmcif_host *host = platform_get_drvdata(pdev);
 	int irq[2];
 
-	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
-
-	irq[0] = platform_get_irq(pdev, 0);
-	irq[1] = platform_get_irq(pdev, 1);
+	mmc_remove_host(host->mmc);
+	sh_mmcif_release_dma(host);
 
 	if (host->addr)
 		iounmap(host->addr);
 
-	platform_set_drvdata(pdev, NULL);
-	mmc_remove_host(host->mmc);
+	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
+
+	irq[0] = platform_get_irq(pdev, 0);
+	irq[1] = platform_get_irq(pdev, 1);
 
 	free_irq(irq[0], host);
 	free_irq(irq[1], host);
 
+	platform_set_drvdata(pdev, NULL);
+
 	clk_disable(host->hclk);
 	mmc_free_host(host->mmc);
 
@@ -947,5 +1112,5 @@ module_exit(sh_mmcif_exit);
 
 MODULE_DESCRIPTION("SuperH on-chip MMC/eMMC interface driver");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
 MODULE_AUTHOR("Yusuke Goda <yusuke.goda.sx@renesas.com>");
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index e7765a89593e..e3c6ef208391 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -25,16 +25,261 @@
  *   double buffer support
  *
  */
-#include <linux/module.h>
-#include <linux/irq.h>
-#include <linux/device.h>
+
 #include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
-#include <linux/mmc/host.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tmio.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#define CTL_SD_CMD 0x00
+#define CTL_ARG_REG 0x04
+#define CTL_STOP_INTERNAL_ACTION 0x08
+#define CTL_XFER_BLK_COUNT 0xa
+#define CTL_RESPONSE 0x0c
+#define CTL_STATUS 0x1c
+#define CTL_IRQ_MASK 0x20
+#define CTL_SD_CARD_CLK_CTL 0x24
+#define CTL_SD_XFER_LEN 0x26
+#define CTL_SD_MEM_CARD_OPT 0x28
+#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
+#define CTL_SD_DATA_PORT 0x30
+#define CTL_TRANSACTION_CTL 0x34
+#define CTL_SDIO_STATUS 0x36
+#define CTL_SDIO_IRQ_MASK 0x38
+#define CTL_RESET_SD 0xe0
+#define CTL_SDIO_REGS 0x100
+#define CTL_CLK_AND_WAIT_CTL 0x138
+#define CTL_RESET_SDIO 0x1e0
+
+/* Definitions for values the CTRL_STATUS register can take. */
+#define TMIO_STAT_CMDRESPEND    0x00000001
+#define TMIO_STAT_DATAEND       0x00000004
+#define TMIO_STAT_CARD_REMOVE   0x00000008
+#define TMIO_STAT_CARD_INSERT   0x00000010
+#define TMIO_STAT_SIGSTATE      0x00000020
+#define TMIO_STAT_WRPROTECT     0x00000080
+#define TMIO_STAT_CARD_REMOVE_A 0x00000100
+#define TMIO_STAT_CARD_INSERT_A 0x00000200
+#define TMIO_STAT_SIGSTATE_A    0x00000400
+#define TMIO_STAT_CMD_IDX_ERR   0x00010000
+#define TMIO_STAT_CRCFAIL       0x00020000
+#define TMIO_STAT_STOPBIT_ERR   0x00040000
+#define TMIO_STAT_DATATIMEOUT   0x00080000
+#define TMIO_STAT_RXOVERFLOW    0x00100000
+#define TMIO_STAT_TXUNDERRUN    0x00200000
+#define TMIO_STAT_CMDTIMEOUT    0x00400000
+#define TMIO_STAT_RXRDY         0x01000000
+#define TMIO_STAT_TXRQ          0x02000000
+#define TMIO_STAT_ILL_FUNC      0x20000000
+#define TMIO_STAT_CMD_BUSY      0x40000000
+#define TMIO_STAT_ILL_ACCESS    0x80000000
+
+/* Definitions for values the CTRL_SDIO_STATUS register can take. */
+#define TMIO_SDIO_STAT_IOIRQ	0x0001
+#define TMIO_SDIO_STAT_EXPUB52	0x4000
+#define TMIO_SDIO_STAT_EXWT	0x8000
+#define TMIO_SDIO_MASK_ALL	0xc007
+
+/* Define some IRQ masks */
+/* This is the mask used at reset by the chip */
+#define TMIO_MASK_ALL           0x837f031d
+#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
+#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
+#define TMIO_MASK_CMD     (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \
+		TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
+#define TMIO_MASK_IRQ     (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
+
+#define enable_mmc_irqs(host, i) \
+	do { \
+		u32 mask;\
+		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
+		mask &= ~((i) & TMIO_MASK_IRQ); \
+		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
+	} while (0)
+
+#define disable_mmc_irqs(host, i) \
+	do { \
+		u32 mask;\
+		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
+		mask |= ((i) & TMIO_MASK_IRQ); \
+		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
+	} while (0)
+
+#define ack_mmc_irqs(host, i) \
+	do { \
+		sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
+	} while (0)
+
+/* This is arbitrary, just noone needed any higher alignment yet */
+#define MAX_ALIGN 4
+
+struct tmio_mmc_host {
+	void __iomem *ctl;
+	unsigned long bus_shift;
+	struct mmc_command      *cmd;
+	struct mmc_request      *mrq;
+	struct mmc_data         *data;
+	struct mmc_host         *mmc;
+	int                     irq;
+	unsigned int		sdio_irq_enabled;
+
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
+	/* pio related stuff */
+	struct scatterlist      *sg_ptr;
+	struct scatterlist      *sg_orig;
+	unsigned int            sg_len;
+	unsigned int            sg_off;
+
+	struct platform_device *pdev;
+
+	/* DMA support */
+	struct dma_chan		*chan_rx;
+	struct dma_chan		*chan_tx;
+	struct tasklet_struct	dma_complete;
+	struct tasklet_struct	dma_issue;
+#ifdef CONFIG_TMIO_MMC_DMA
+	unsigned int            dma_sglen;
+	u8			bounce_buf[PAGE_CACHE_SIZE] __attribute__((aligned(MAX_ALIGN)));
+	struct scatterlist	bounce_sg;
+#endif
+
+	/* Track lost interrupts */
+	struct delayed_work	delayed_reset_work;
+	spinlock_t		lock;
+	unsigned long		last_req_ts;
+};
+
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host);
+
+static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	readsw(host->ctl + (addr << host->bus_shift), buf, count);
+}
 
-#include "tmio_mmc.h"
+static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift)) |
+	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
+}
+
+static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+}
+
+static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	writesw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
+}
+
+static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data)
+{
+	host->sg_len = data->sg_len;
+	host->sg_ptr = data->sg;
+	host->sg_orig = data->sg;
+	host->sg_off = 0;
+}
+
+static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
+{
+	host->sg_ptr = sg_next(host->sg_ptr);
+	host->sg_off = 0;
+	return --host->sg_len;
+}
+
+static char *tmio_mmc_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
+{
+	local_irq_save(*flags);
+	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+}
+
+static void tmio_mmc_kunmap_atomic(void *virt, unsigned long *flags)
+{
+	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+	local_irq_restore(*flags);
+}
+
+#ifdef CONFIG_MMC_DEBUG
+
+#define STATUS_TO_TEXT(a) \
+	do { \
+		if (status & TMIO_STAT_##a) \
+			printk(#a); \
+	} while (0)
+
+void pr_debug_status(u32 status)
+{
+	printk(KERN_DEBUG "status: %08x = ", status);
+	STATUS_TO_TEXT(CARD_REMOVE);
+	STATUS_TO_TEXT(CARD_INSERT);
+	STATUS_TO_TEXT(SIGSTATE);
+	STATUS_TO_TEXT(WRPROTECT);
+	STATUS_TO_TEXT(CARD_REMOVE_A);
+	STATUS_TO_TEXT(CARD_INSERT_A);
+	STATUS_TO_TEXT(SIGSTATE_A);
+	STATUS_TO_TEXT(CMD_IDX_ERR);
+	STATUS_TO_TEXT(STOPBIT_ERR);
+	STATUS_TO_TEXT(ILL_FUNC);
+	STATUS_TO_TEXT(CMD_BUSY);
+	STATUS_TO_TEXT(CMDRESPEND);
+	STATUS_TO_TEXT(DATAEND);
+	STATUS_TO_TEXT(CRCFAIL);
+	STATUS_TO_TEXT(DATATIMEOUT);
+	STATUS_TO_TEXT(CMDTIMEOUT);
+	STATUS_TO_TEXT(RXOVERFLOW);
+	STATUS_TO_TEXT(TXUNDERRUN);
+	STATUS_TO_TEXT(RXRDY);
+	STATUS_TO_TEXT(TXRQ);
+	STATUS_TO_TEXT(ILL_ACCESS);
+	printk("\n");
+}
+
+#else
+#define pr_debug_status(s)  do { } while (0)
+#endif
+
+static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (enable) {
+		host->sdio_irq_enabled = 1;
+		sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001);
+		sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK,
+			(TMIO_SDIO_MASK_ALL & ~TMIO_SDIO_STAT_IOIRQ));
+	} else {
+		sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, TMIO_SDIO_MASK_ALL);
+		sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0000);
+		host->sdio_irq_enabled = 0;
+	}
+}
 
 static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 {
@@ -55,8 +300,23 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 {
+	struct mfd_cell *cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
+	/*
+	 * Testing on sh-mobile showed that SDIO IRQs are unmasked when
+	 * CTL_CLK_AND_WAIT_CTL gets written, so we have to disable the
+	 * device IRQ here and restore the SDIO IRQ mask before
+	 * re-enabling the device IRQ.
+	 */
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		disable_irq(host->irq);
 	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000);
 	msleep(10);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+		enable_irq(host->irq);
+	}
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
@@ -64,11 +324,21 @@ static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
+	struct mfd_cell *cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
 	msleep(10);
+	/* see comment in tmio_mmc_clk_stop above */
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		disable_irq(host->irq);
 	sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
 	msleep(10);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		tmio_mmc_enable_sdio_irq(host->mmc, host->sdio_irq_enabled);
+		enable_irq(host->irq);
+	}
 }
 
 static void reset(struct tmio_mmc_host *host)
@@ -82,15 +352,60 @@ static void reset(struct tmio_mmc_host *host)
 	msleep(10);
 }
 
+static void tmio_mmc_reset_work(struct work_struct *work)
+{
+	struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host,
+						  delayed_reset_work.work);
+	struct mmc_request *mrq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	mrq = host->mrq;
+
+	/* request already finished */
+	if (!mrq
+	    || time_is_after_jiffies(host->last_req_ts +
+		msecs_to_jiffies(2000))) {
+		spin_unlock_irqrestore(&host->lock, flags);
+		return;
+	}
+
+	dev_warn(&host->pdev->dev,
+		"timeout waiting for hardware interrupt (CMD%u)\n",
+		mrq->cmd->opcode);
+
+	if (host->data)
+		host->data->error = -ETIMEDOUT;
+	else if (host->cmd)
+		host->cmd->error = -ETIMEDOUT;
+	else
+		mrq->cmd->error = -ETIMEDOUT;
+
+	host->cmd = NULL;
+	host->data = NULL;
+	host->mrq = NULL;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	reset(host);
+
+	mmc_request_done(host->mmc, mrq);
+}
+
 static void
 tmio_mmc_finish_request(struct tmio_mmc_host *host)
 {
 	struct mmc_request *mrq = host->mrq;
 
+	if (!mrq)
+		return;
+
 	host->mrq = NULL;
 	host->cmd = NULL;
 	host->data = NULL;
 
+	cancel_delayed_work(&host->delayed_reset_work);
+
 	mmc_request_done(host->mmc, mrq);
 }
 
@@ -200,6 +515,7 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
 	return;
 }
 
+/* needs to be called with host->lock held */
 static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 {
 	struct mmc_data *data = host->data;
@@ -233,6 +549,8 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 	if (data->flags & MMC_DATA_READ) {
 		if (!host->chan_rx)
 			disable_mmc_irqs(host, TMIO_MASK_READOP);
+		else
+			tmio_check_bounce_buffer(host);
 		dev_dbg(&host->pdev->dev, "Complete Rx request %p\n",
 			host->mrq);
 	} else {
@@ -254,10 +572,12 @@ static void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 
 static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 {
-	struct mmc_data *data = host->data;
+	struct mmc_data *data;
+	spin_lock(&host->lock);
+	data = host->data;
 
 	if (!data)
-		return;
+		goto out;
 
 	if (host->chan_tx && (data->flags & MMC_DATA_WRITE)) {
 		/*
@@ -278,6 +598,8 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
 	} else {
 		tmio_mmc_do_data_irq(host);
 	}
+out:
+	spin_unlock(&host->lock);
 }
 
 static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
@@ -286,9 +608,11 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 	struct mmc_command *cmd = host->cmd;
 	int i, addr;
 
+	spin_lock(&host->lock);
+
 	if (!host->cmd) {
 		pr_debug("Spurious CMD irq\n");
-		return;
+		goto out;
 	}
 
 	host->cmd = NULL;
@@ -324,8 +648,7 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 			if (!host->chan_rx)
 				enable_mmc_irqs(host, TMIO_MASK_READOP);
 		} else {
-			struct dma_chan *chan = host->chan_tx;
-			if (!chan)
+			if (!host->chan_tx)
 				enable_mmc_irqs(host, TMIO_MASK_WRITEOP);
 			else
 				tasklet_schedule(&host->dma_issue);
@@ -334,13 +657,19 @@ static void tmio_mmc_cmd_irq(struct tmio_mmc_host *host,
 		tmio_mmc_finish_request(host);
 	}
 
+out:
+	spin_unlock(&host->lock);
+
 	return;
 }
 
 static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
 	unsigned int ireg, irq_mask, status;
+	unsigned int sdio_ireg, sdio_irq_mask, sdio_status;
 
 	pr_debug("MMC IRQ begin\n");
 
@@ -348,6 +677,29 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
 	ireg = status & TMIO_MASK_IRQ & ~irq_mask;
 
+	sdio_ireg = 0;
+	if (!ireg && pdata->flags & TMIO_MMC_SDIO_IRQ) {
+		sdio_status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
+		sdio_irq_mask = sd_ctrl_read16(host, CTL_SDIO_IRQ_MASK);
+		sdio_ireg = sdio_status & TMIO_SDIO_MASK_ALL & ~sdio_irq_mask;
+
+		sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status & ~TMIO_SDIO_MASK_ALL);
+
+		if (sdio_ireg && !host->sdio_irq_enabled) {
+			pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n",
+				   sdio_status, sdio_irq_mask, sdio_ireg);
+			tmio_mmc_enable_sdio_irq(host->mmc, 0);
+			goto out;
+		}
+
+		if (host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+			sdio_ireg & TMIO_SDIO_STAT_IOIRQ)
+			mmc_signal_sdio_irq(host->mmc);
+
+		if (sdio_ireg)
+			goto out;
+	}
+
 	pr_debug_status(status);
 	pr_debug_status(ireg);
 
@@ -375,8 +727,10 @@ static irqreturn_t tmio_mmc_irq(int irq, void *devid)
  */
 
 		/* Command completion */
-		if (ireg & TMIO_MASK_CMD) {
-			ack_mmc_irqs(host, TMIO_MASK_CMD);
+		if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
+			ack_mmc_irqs(host,
+				     TMIO_STAT_CMDRESPEND |
+				     TMIO_STAT_CMDTIMEOUT);
 			tmio_mmc_cmd_irq(host, status);
 		}
 
@@ -407,6 +761,16 @@ out:
 }
 
 #ifdef CONFIG_TMIO_MMC_DMA
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+	if (host->sg_ptr == &host->bounce_sg) {
+		unsigned long flags;
+		void *sg_vaddr = tmio_mmc_kmap_atomic(host->sg_orig, &flags);
+		memcpy(sg_vaddr, host->bounce_buf, host->bounce_sg.length);
+		tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+	}
+}
+
 static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 {
 #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
@@ -427,12 +791,39 @@ static void tmio_dma_complete(void *arg)
 		enable_mmc_irqs(host, TMIO_STAT_DATAEND);
 }
 
-static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
+static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 {
-	struct scatterlist *sg = host->sg_ptr;
+	struct scatterlist *sg = host->sg_ptr, *sg_tmp;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_rx;
-	int ret;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+	dma_cookie_t cookie;
+	int ret, i;
+	bool aligned = true, multiple = true;
+	unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+	for_each_sg(sg, sg_tmp, host->sg_len, i) {
+		if (sg_tmp->offset & align)
+			aligned = false;
+		if (sg_tmp->length & align) {
+			multiple = false;
+			break;
+		}
+	}
+
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+			  align >= MAX_ALIGN)) || !multiple) {
+		ret = -EINVAL;
+		goto pio;
+	}
+
+	/* The only sg element can be unaligned, use our bounce buffer then */
+	if (!aligned) {
+		sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+		host->sg_ptr = &host->bounce_sg;
+		sg = host->sg_ptr;
+	}
 
 	ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_FROM_DEVICE);
 	if (ret > 0) {
@@ -442,21 +833,21 @@ static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 	}
 
 	if (desc) {
-		host->desc = desc;
 		desc->callback = tmio_dma_complete;
 		desc->callback_param = host;
-		host->cookie = desc->tx_submit(desc);
-		if (host->cookie < 0) {
-			host->desc = NULL;
-			ret = host->cookie;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
 		} else {
 			chan->device->device_issue_pending(chan);
 		}
 	}
 	dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-		__func__, host->sg_len, ret, host->cookie, host->mrq);
+		__func__, host->sg_len, ret, cookie, host->mrq);
 
-	if (!host->desc) {
+pio:
+	if (!desc) {
 		/* DMA failed, fall back to PIO */
 		if (ret >= 0)
 			ret = -EIO;
@@ -471,24 +862,49 @@ static int tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
 		tmio_mmc_enable_dma(host, false);
-		reset(host);
-		/* Fail this request, let above layers recover */
-		host->mrq->cmd->error = ret;
-		tmio_mmc_finish_request(host);
 	}
 
 	dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
-		desc, host->cookie, host->sg_len);
-
-	return ret > 0 ? 0 : ret;
+		desc, cookie, host->sg_len);
 }
 
-static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
+static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 {
-	struct scatterlist *sg = host->sg_ptr;
+	struct scatterlist *sg = host->sg_ptr, *sg_tmp;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_tx;
-	int ret;
+	struct mfd_cell	*cell = host->pdev->dev.platform_data;
+	struct tmio_mmc_data *pdata = cell->driver_data;
+	dma_cookie_t cookie;
+	int ret, i;
+	bool aligned = true, multiple = true;
+	unsigned int align = (1 << pdata->dma->alignment_shift) - 1;
+
+	for_each_sg(sg, sg_tmp, host->sg_len, i) {
+		if (sg_tmp->offset & align)
+			aligned = false;
+		if (sg_tmp->length & align) {
+			multiple = false;
+			break;
+		}
+	}
+
+	if ((!aligned && (host->sg_len > 1 || sg->length > PAGE_CACHE_SIZE ||
+			  align >= MAX_ALIGN)) || !multiple) {
+		ret = -EINVAL;
+		goto pio;
+	}
+
+	/* The only sg element can be unaligned, use our bounce buffer then */
+	if (!aligned) {
+		unsigned long flags;
+		void *sg_vaddr = tmio_mmc_kmap_atomic(sg, &flags);
+		sg_init_one(&host->bounce_sg, host->bounce_buf, sg->length);
+		memcpy(host->bounce_buf, sg_vaddr, host->bounce_sg.length);
+		tmio_mmc_kunmap_atomic(sg_vaddr, &flags);
+		host->sg_ptr = &host->bounce_sg;
+		sg = host->sg_ptr;
+	}
 
 	ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, DMA_TO_DEVICE);
 	if (ret > 0) {
@@ -498,19 +914,19 @@ static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 	}
 
 	if (desc) {
-		host->desc = desc;
 		desc->callback = tmio_dma_complete;
 		desc->callback_param = host;
-		host->cookie = desc->tx_submit(desc);
-		if (host->cookie < 0) {
-			host->desc = NULL;
-			ret = host->cookie;
+		cookie = desc->tx_submit(desc);
+		if (cookie < 0) {
+			desc = NULL;
+			ret = cookie;
 		}
 	}
 	dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-		__func__, host->sg_len, ret, host->cookie, host->mrq);
+		__func__, host->sg_len, ret, cookie, host->mrq);
 
-	if (!host->desc) {
+pio:
+	if (!desc) {
 		/* DMA failed, fall back to PIO */
 		if (ret >= 0)
 			ret = -EIO;
@@ -525,30 +941,22 @@ static int tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
 		tmio_mmc_enable_dma(host, false);
-		reset(host);
-		/* Fail this request, let above layers recover */
-		host->mrq->cmd->error = ret;
-		tmio_mmc_finish_request(host);
 	}
 
 	dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
-		desc, host->cookie);
-
-	return ret > 0 ? 0 : ret;
+		desc, cookie);
 }
 
-static int tmio_mmc_start_dma(struct tmio_mmc_host *host,
+static void tmio_mmc_start_dma(struct tmio_mmc_host *host,
 			       struct mmc_data *data)
 {
 	if (data->flags & MMC_DATA_READ) {
 		if (host->chan_rx)
-			return tmio_mmc_start_dma_rx(host);
+			tmio_mmc_start_dma_rx(host);
 	} else {
 		if (host->chan_tx)
-			return tmio_mmc_start_dma_tx(host);
+			tmio_mmc_start_dma_tx(host);
 	}
-
-	return 0;
 }
 
 static void tmio_issue_tasklet_fn(unsigned long priv)
@@ -562,6 +970,12 @@ static void tmio_issue_tasklet_fn(unsigned long priv)
 static void tmio_tasklet_fn(unsigned long arg)
 {
 	struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	if (!host->data)
+		goto out;
 
 	if (host->data->flags & MMC_DATA_READ)
 		dma_unmap_sg(&host->pdev->dev, host->sg_ptr, host->dma_sglen,
@@ -571,6 +985,8 @@ static void tmio_tasklet_fn(unsigned long arg)
 			     DMA_TO_DEVICE);
 
 	tmio_mmc_do_data_irq(host);
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
 /* It might be necessary to make filter MFD specific */
@@ -584,9 +1000,6 @@ static bool tmio_mmc_filter(struct dma_chan *chan, void *arg)
 static void tmio_mmc_request_dma(struct tmio_mmc_host *host,
 				 struct tmio_mmc_data *pdata)
 {
-	host->cookie = -EINVAL;
-	host->desc = NULL;
-
 	/* We can only either use DMA for both Tx and Rx or not use it at all */
 	if (pdata->dma) {
 		dma_cap_mask_t mask;
@@ -632,15 +1045,15 @@ static void tmio_mmc_release_dma(struct tmio_mmc_host *host)
 		host->chan_rx = NULL;
 		dma_release_channel(chan);
 	}
-
-	host->cookie = -EINVAL;
-	host->desc = NULL;
 }
 #else
-static int tmio_mmc_start_dma(struct tmio_mmc_host *host,
+static void tmio_check_bounce_buffer(struct tmio_mmc_host *host)
+{
+}
+
+static void tmio_mmc_start_dma(struct tmio_mmc_host *host,
 			       struct mmc_data *data)
 {
-	return 0;
 }
 
 static void tmio_mmc_request_dma(struct tmio_mmc_host *host,
@@ -682,7 +1095,9 @@ static int tmio_mmc_start_data(struct tmio_mmc_host *host,
 	sd_ctrl_write16(host, CTL_SD_XFER_LEN, data->blksz);
 	sd_ctrl_write16(host, CTL_XFER_BLK_COUNT, data->blocks);
 
-	return tmio_mmc_start_dma(host, data);
+	tmio_mmc_start_dma(host, data);
+
+	return 0;
 }
 
 /* Process requests from the MMC layer */
@@ -694,6 +1109,8 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (host->mrq)
 		pr_debug("request not null\n");
 
+	host->last_req_ts = jiffies;
+	wmb();
 	host->mrq = mrq;
 
 	if (mrq->data) {
@@ -703,10 +1120,14 @@ static void tmio_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	}
 
 	ret = tmio_mmc_start_command(host, mrq->cmd);
-	if (!ret)
+	if (!ret) {
+		schedule_delayed_work(&host->delayed_reset_work,
+				      msecs_to_jiffies(2000));
 		return;
+	}
 
 fail:
+	host->mrq = NULL;
 	mrq->cmd->error = ret;
 	mmc_request_done(mmc, mrq);
 }
@@ -780,6 +1201,7 @@ static const struct mmc_host_ops tmio_mmc_ops = {
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
 	.get_cd		= tmio_mmc_get_cd,
+	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
 };
 
 #ifdef CONFIG_PM
@@ -864,10 +1286,15 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 		goto host_free;
 
 	mmc->ops = &tmio_mmc_ops;
-	mmc->caps = MMC_CAP_4_BIT_DATA;
-	mmc->caps |= pdata->capabilities;
+	mmc->caps = MMC_CAP_4_BIT_DATA | pdata->capabilities;
 	mmc->f_max = pdata->hclk;
 	mmc->f_min = mmc->f_max / 512;
+	mmc->max_segs = 32;
+	mmc->max_blk_size = 512;
+	mmc->max_blk_count = (PAGE_CACHE_SIZE / mmc->max_blk_size) *
+		mmc->max_segs;
+	mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+	mmc->max_seg_size = mmc->max_req_size;
 	if (pdata->ocr_mask)
 		mmc->ocr_avail = pdata->ocr_mask;
 	else
@@ -890,12 +1317,19 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 		goto cell_disable;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
+	if (pdata->flags & TMIO_MMC_SDIO_IRQ)
+		tmio_mmc_enable_sdio_irq(mmc, 0);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
 		goto cell_disable;
 
+	spin_lock_init(&host->lock);
+
+	/* Init delayed work for request timeouts */
+	INIT_DELAYED_WORK(&host->delayed_reset_work, tmio_mmc_reset_work);
+
 	/* See if we also get DMA */
 	tmio_mmc_request_dma(host, pdata);
 
@@ -934,6 +1368,7 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 	if (mmc) {
 		struct tmio_mmc_host *host = mmc_priv(mmc);
 		mmc_remove_host(mmc);
+		cancel_delayed_work_sync(&host->delayed_reset_work);
 		tmio_mmc_release_dma(host);
 		free_irq(host->irq, host);
 		if (cell->disable)
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
deleted file mode 100644
index 0fedc78e3ea5..000000000000
--- a/drivers/mmc/host/tmio_mmc.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/* Definitons for use with the tmio_mmc.c
- *
- * (c) 2004 Ian Molton <spyro@f2s.com>
- * (c) 2007 Ian Molton <spyro@f2s.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/highmem.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-
-#define CTL_SD_CMD 0x00
-#define CTL_ARG_REG 0x04
-#define CTL_STOP_INTERNAL_ACTION 0x08
-#define CTL_XFER_BLK_COUNT 0xa
-#define CTL_RESPONSE 0x0c
-#define CTL_STATUS 0x1c
-#define CTL_IRQ_MASK 0x20
-#define CTL_SD_CARD_CLK_CTL 0x24
-#define CTL_SD_XFER_LEN 0x26
-#define CTL_SD_MEM_CARD_OPT 0x28
-#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
-#define CTL_SD_DATA_PORT 0x30
-#define CTL_TRANSACTION_CTL 0x34
-#define CTL_RESET_SD 0xe0
-#define CTL_SDIO_REGS 0x100
-#define CTL_CLK_AND_WAIT_CTL 0x138
-#define CTL_RESET_SDIO 0x1e0
-
-/* Definitions for values the CTRL_STATUS register can take. */
-#define TMIO_STAT_CMDRESPEND    0x00000001
-#define TMIO_STAT_DATAEND       0x00000004
-#define TMIO_STAT_CARD_REMOVE   0x00000008
-#define TMIO_STAT_CARD_INSERT   0x00000010
-#define TMIO_STAT_SIGSTATE      0x00000020
-#define TMIO_STAT_WRPROTECT     0x00000080
-#define TMIO_STAT_CARD_REMOVE_A 0x00000100
-#define TMIO_STAT_CARD_INSERT_A 0x00000200
-#define TMIO_STAT_SIGSTATE_A    0x00000400
-#define TMIO_STAT_CMD_IDX_ERR   0x00010000
-#define TMIO_STAT_CRCFAIL       0x00020000
-#define TMIO_STAT_STOPBIT_ERR   0x00040000
-#define TMIO_STAT_DATATIMEOUT   0x00080000
-#define TMIO_STAT_RXOVERFLOW    0x00100000
-#define TMIO_STAT_TXUNDERRUN    0x00200000
-#define TMIO_STAT_CMDTIMEOUT    0x00400000
-#define TMIO_STAT_RXRDY         0x01000000
-#define TMIO_STAT_TXRQ          0x02000000
-#define TMIO_STAT_ILL_FUNC      0x20000000
-#define TMIO_STAT_CMD_BUSY      0x40000000
-#define TMIO_STAT_ILL_ACCESS    0x80000000
-
-/* Define some IRQ masks */
-/* This is the mask used at reset by the chip */
-#define TMIO_MASK_ALL           0x837f031d
-#define TMIO_MASK_READOP  (TMIO_STAT_RXRDY | TMIO_STAT_DATAEND)
-#define TMIO_MASK_WRITEOP (TMIO_STAT_TXRQ | TMIO_STAT_DATAEND)
-#define TMIO_MASK_CMD     (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT | \
-		TMIO_STAT_CARD_REMOVE | TMIO_STAT_CARD_INSERT)
-#define TMIO_MASK_IRQ     (TMIO_MASK_READOP | TMIO_MASK_WRITEOP | TMIO_MASK_CMD)
-
-
-#define enable_mmc_irqs(host, i) \
-	do { \
-		u32 mask;\
-		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
-		mask &= ~((i) & TMIO_MASK_IRQ); \
-		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
-	} while (0)
-
-#define disable_mmc_irqs(host, i) \
-	do { \
-		u32 mask;\
-		mask  = sd_ctrl_read32((host), CTL_IRQ_MASK); \
-		mask |= ((i) & TMIO_MASK_IRQ); \
-		sd_ctrl_write32((host), CTL_IRQ_MASK, mask); \
-	} while (0)
-
-#define ack_mmc_irqs(host, i) \
-	do { \
-		sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
-	} while (0)
-
-
-struct tmio_mmc_host {
-	void __iomem *ctl;
-	unsigned long bus_shift;
-	struct mmc_command      *cmd;
-	struct mmc_request      *mrq;
-	struct mmc_data         *data;
-	struct mmc_host         *mmc;
-	int                     irq;
-
-	/* Callbacks for clock / power control */
-	void (*set_pwr)(struct platform_device *host, int state);
-	void (*set_clk_div)(struct platform_device *host, int state);
-
-	/* pio related stuff */
-	struct scatterlist      *sg_ptr;
-	unsigned int            sg_len;
-	unsigned int            sg_off;
-
-	struct platform_device *pdev;
-
-	/* DMA support */
-	struct dma_chan		*chan_rx;
-	struct dma_chan		*chan_tx;
-	struct tasklet_struct	dma_complete;
-	struct tasklet_struct	dma_issue;
-#ifdef CONFIG_TMIO_MMC_DMA
-	struct dma_async_tx_descriptor *desc;
-	unsigned int            dma_sglen;
-	dma_cookie_t		cookie;
-#endif
-};
-
-#include <linux/io.h>
-
-static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	readsw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift)) |
-	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
-}
-
-static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->ctl + (addr << host->bus_shift));
-}
-
-static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	writesw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->ctl + (addr << host->bus_shift));
-	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
-}
-
-#include <linux/scatterlist.h>
-#include <linux/blkdev.h>
-
-static inline void tmio_mmc_init_sg(struct tmio_mmc_host *host,
-	struct mmc_data *data)
-{
-	host->sg_len = data->sg_len;
-	host->sg_ptr = data->sg;
-	host->sg_off = 0;
-}
-
-static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host)
-{
-	host->sg_ptr = sg_next(host->sg_ptr);
-	host->sg_off = 0;
-	return --host->sg_len;
-}
-
-static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
-	unsigned long *flags)
-{
-	local_irq_save(*flags);
-	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
-}
-
-static inline void tmio_mmc_kunmap_atomic(void *virt,
-	unsigned long *flags)
-{
-	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
-	local_irq_restore(*flags);
-}
-
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a) \
-	do { \
-		if (status & TMIO_STAT_##a) \
-			printk(#a); \
-	} while (0)
-
-void pr_debug_status(u32 status)
-{
-	printk(KERN_DEBUG "status: %08x = ", status);
-	STATUS_TO_TEXT(CARD_REMOVE);
-	STATUS_TO_TEXT(CARD_INSERT);
-	STATUS_TO_TEXT(SIGSTATE);
-	STATUS_TO_TEXT(WRPROTECT);
-	STATUS_TO_TEXT(CARD_REMOVE_A);
-	STATUS_TO_TEXT(CARD_INSERT_A);
-	STATUS_TO_TEXT(SIGSTATE_A);
-	STATUS_TO_TEXT(CMD_IDX_ERR);
-	STATUS_TO_TEXT(STOPBIT_ERR);
-	STATUS_TO_TEXT(ILL_FUNC);
-	STATUS_TO_TEXT(CMD_BUSY);
-	STATUS_TO_TEXT(CMDRESPEND);
-	STATUS_TO_TEXT(DATAEND);
-	STATUS_TO_TEXT(CRCFAIL);
-	STATUS_TO_TEXT(DATATIMEOUT);
-	STATUS_TO_TEXT(CMDTIMEOUT);
-	STATUS_TO_TEXT(RXOVERFLOW);
-	STATUS_TO_TEXT(TXUNDERRUN);
-	STATUS_TO_TEXT(RXRDY);
-	STATUS_TO_TEXT(TXRQ);
-	STATUS_TO_TEXT(ILL_ACCESS);
-	printk("\n");
-}
-
-#else
-#define pr_debug_status(s)  do { } while (0)
-#endif