summary refs log tree commit diff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 17:34:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-02 17:34:32 -0700
commitbe580e7522eecfcf31c70abdf6fa0ae77b2e293b (patch)
tree1137d880a002ef342f9b1ab77331144c9ed956cf /drivers
parent8d65b08debc7e62b2c6032d7fe7389d895b92cbc (diff)
parenta627f025eb0534052ff451427c16750b3530634c (diff)
downloadlinux-be580e7522eecfcf31c70abdf6fa0ae77b2e293b.tar.gz
Merge tag 'mmc-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
Pull MMC updates from Ulf Hansson:
 "MMC core:
   - Continue to re-factor code to prepare for eMMC CMDQ and blkmq support
   - Introduce queue semantics to prepare for eMMC CMDQ and blkmq support
   - Add helper functions to manage temporary enable/disable of eMMC CMDQ
   - Improve wait-busy detection for SDIO

  MMC host:
   - cavium: Add driver to support Cavium controllers
   - cavium: Extend Cavium driver to support Octeon and ThunderX SOCs
   - bcm2835: Add new driver for Broadcom BCM2835 controller
   - sdhci-xenon: Add driver to support Marvell Xenon SDHCI controller
   - sdhci-tegra: Add support for the Tegra186 variant
   - sdhci-of-esdhc: Support for UHS-I SD cards
   - sdhci-of-esdhc: Support for eMMC HS200 cards
   - sdhci-cadence: Add eMMC HS400 enhanced strobe support
   - sdhci-esdhc-imx: Reset tuning circuit when needed
   - sdhci-pci: Modernize and clean-up some PM related code
   - sdhci-pci: Avoid re-tuning at runtime PM for some Intel devices
   - sdhci-pci|acpi: Use aggressive PM for some Intel BYT controllers
   - sdhci: Re-factoring and modernizations
   - sdhci: Optimize delay loops
   - sdhci: Improve register dump print format
   - sdhci: Add support for the Command Queue Engine
   - meson-gx: Various improvements and clean-ups
   - meson-gx: Add support for CMD23
   - meson-gx: Basic tuning support to avoid CRC errors
   - s3cmci: Enable probing via DT
   - mediatek: Improve tuning support for eMMC HS200 and HS400 mode
   - tmio: Improve DMA support
   - tmio: Use correct response for CMD12
   - dw_mmc: Minor improvements and clean-ups"

* tag 'mmc-v4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc: (148 commits)
  mmc: sdhci-of-esdhc: limit SD clock for ls1012a/ls1046a
  mmc: sdhci-of-esdhc: poll ESDHC_CLOCK_STABLE bit with udelay
  mmc: sdhci-xenon: Fix default value of LOGIC_TIMING_ADJUST for eMMC5.0 PHY
  mmc: sdhci-xenon: Fix the work flow in xenon_remove().
  MIPS: Octeon: cavium_octeon_defconfig: Enable Octeon MMC
  mmc: sdhci-xenon: Remove redundant dev_err call in get_dt_pad_ctrl_data()
  mmc: cavium: Use module_pci_driver to simplify the code
  mmc: cavium: Add MMC support for Octeon SOCs.
  mmc: cavium: Fix detection of block or byte addressing.
  mmc: core: Export API to allow hosts to get the card address
  mmc: sdio: Fix sdio wait busy implement limitation
  mmc: sdhci-esdhc-imx: reset tuning circuit when power on mmc card
  clk: apn806: fix spelling mistake: "mising" -> "missing"
  mmc: sdhci-of-esdhc: add delay between tuning cycles
  mmc: sdhci: Control the delay between tuning commands
  mmc: sdhci-of-esdhc: add tuning support
  mmc: sdhci-of-esdhc: add support for signal voltage switch
  mmc: sdhci-of-esdhc: add peripheral clock support
  mmc: sdhci-pci: Allow for 3 bytes from Intel DSM
  mmc: cavium: Fix a shift wrapping bug
  ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/clk/mvebu/ap806-system-controller.c21
-rw-r--r--drivers/mmc/core/block.c300
-rw-r--r--drivers/mmc/core/core.c193
-rw-r--r--drivers/mmc/core/mmc.c9
-rw-r--r--drivers/mmc/core/mmc_ops.c36
-rw-r--r--drivers/mmc/core/mmc_ops.h2
-rw-r--r--drivers/mmc/core/mmc_test.c14
-rw-r--r--drivers/mmc/core/queue.c307
-rw-r--r--drivers/mmc/core/queue.h12
-rw-r--r--drivers/mmc/core/sd.c4
-rw-r--r--drivers/mmc/core/sd_ops.c19
-rw-r--r--drivers/mmc/core/sd_ops.h2
-rw-r--r--drivers/mmc/core/sdio_io.c54
-rw-r--r--drivers/mmc/core/sdio_ops.c9
-rw-r--r--drivers/mmc/core/sdio_ops.h10
-rw-r--r--drivers/mmc/host/Kconfig43
-rw-r--r--drivers/mmc/host/Makefile8
-rw-r--r--drivers/mmc/host/android-goldfish.c10
-rw-r--r--drivers/mmc/host/atmel-mci.c30
-rw-r--r--drivers/mmc/host/bcm2835.c1466
-rw-r--r--drivers/mmc/host/cavium-octeon.c351
-rw-r--r--drivers/mmc/host/cavium-thunderx.c187
-rw-r--r--drivers/mmc/host/cavium.c1090
-rw-r--r--drivers/mmc/host/cavium.h215
-rw-r--r--drivers/mmc/host/davinci_mmc.c14
-rw-r--r--drivers/mmc/host/dw_mmc.c397
-rw-r--r--drivers/mmc/host/jz4740_mmc.c9
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c590
-rw-r--r--drivers/mmc/host/mmc_spi.c5
-rw-r--r--drivers/mmc/host/mmci.c20
-rw-r--r--drivers/mmc/host/moxart-mmc.c8
-rw-r--r--drivers/mmc/host/mtk-sd.c176
-rw-r--r--drivers/mmc/host/mvsdio.c11
-rw-r--r--drivers/mmc/host/omap_hsmmc.c21
-rw-r--r--drivers/mmc/host/s3cmci.c261
-rw-r--r--drivers/mmc/host/sdhci-acpi.c18
-rw-r--r--drivers/mmc/host/sdhci-brcmstb.c3
-rw-r--r--drivers/mmc/host/sdhci-cadence.c129
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c32
-rw-r--r--drivers/mmc/host/sdhci-esdhc.h7
-rw-r--r--drivers/mmc/host/sdhci-msm.c8
-rw-r--r--drivers/mmc/host/sdhci-of-arasan.c26
-rw-r--r--drivers/mmc/host/sdhci-of-at91.c5
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c194
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c562
-rw-r--r--drivers/mmc/host/sdhci-pci-data.c3
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c4
-rw-r--r--drivers/mmc/host/sdhci-pci.h24
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c3
-rw-r--r--drivers/mmc/host/sdhci-pxav2.c9
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c12
-rw-r--r--drivers/mmc/host/sdhci-s3c.c10
-rw-r--r--drivers/mmc/host/sdhci-sirf.c3
-rw-r--r--drivers/mmc/host/sdhci-spear.c3
-rw-r--r--drivers/mmc/host/sdhci-st.c8
-rw-r--r--drivers/mmc/host/sdhci-tegra.c59
-rw-r--r--drivers/mmc/host/sdhci-xenon-phy.c837
-rw-r--r--drivers/mmc/host/sdhci-xenon.c548
-rw-r--r--drivers/mmc/host/sdhci-xenon.h101
-rw-r--r--drivers/mmc/host/sdhci.c453
-rw-r--r--drivers/mmc/host/sdhci.h65
-rw-r--r--drivers/mmc/host/sunxi-mmc.c16
-rw-r--r--drivers/mmc/host/tmio_mmc.h12
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c61
-rw-r--r--drivers/mmc/host/tmio_mmc_pio.c36
65 files changed, 7542 insertions, 1613 deletions
diff --git a/drivers/clk/mvebu/ap806-system-controller.c b/drivers/clk/mvebu/ap806-system-controller.c
index f17702107ac5..8155baccc98e 100644
--- a/drivers/clk/mvebu/ap806-system-controller.c
+++ b/drivers/clk/mvebu/ap806-system-controller.c
@@ -23,7 +23,7 @@
 #define AP806_SAR_REG			0x400
 #define AP806_SAR_CLKFREQ_MODE_MASK	0x1f
 
-#define AP806_CLK_NUM			4
+#define AP806_CLK_NUM			5
 
 static struct clk *ap806_clks[AP806_CLK_NUM];
 
@@ -135,6 +135,23 @@ static int ap806_syscon_clk_probe(struct platform_device *pdev)
 		goto fail3;
 	}
 
+	/* eMMC Clock is fixed clock divided by 3 */
+	if (of_property_read_string_index(np, "clock-output-names",
+					  4, &name)) {
+		ap806_clk_data.clk_num--;
+		dev_warn(&pdev->dev,
+			 "eMMC clock missing: update the device tree!\n");
+	} else {
+		ap806_clks[4] = clk_register_fixed_factor(NULL, name,
+							  fixedclk_name,
+							  0, 1, 3);
+		if (IS_ERR(ap806_clks[4])) {
+			ret = PTR_ERR(ap806_clks[4]);
+			goto fail4;
+		}
+	}
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &ap806_clk_data);
 	ret = of_clk_add_provider(np, of_clk_src_onecell_get, &ap806_clk_data);
 	if (ret)
 		goto fail_clk_add;
@@ -142,6 +159,8 @@ static int ap806_syscon_clk_probe(struct platform_device *pdev)
 	return 0;
 
 fail_clk_add:
+	clk_unregister_fixed_factor(ap806_clks[4]);
+fail4:
 	clk_unregister_fixed_factor(ap806_clks[3]);
 fail3:
 	clk_unregister_fixed_rate(ap806_clks[2]);
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index ff3da960c473..8273b078686d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -129,6 +129,13 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md);
 static int get_card_status(struct mmc_card *card, u32 *status, int retries);
 
+static void mmc_blk_requeue(struct request_queue *q, struct request *req)
+{
+	spin_lock_irq(q->queue_lock);
+	blk_requeue_request(q, req);
+	spin_unlock_irq(q->queue_lock);
+}
+
 static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 {
 	struct mmc_blk_data *md;
@@ -721,10 +728,41 @@ static const struct block_device_operations mmc_bdops = {
 #endif
 };
 
+static int mmc_blk_part_switch_pre(struct mmc_card *card,
+				   unsigned int part_type)
+{
+	int ret = 0;
+
+	if (part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) {
+		if (card->ext_csd.cmdq_en) {
+			ret = mmc_cmdq_disable(card);
+			if (ret)
+				return ret;
+		}
+		mmc_retune_pause(card->host);
+	}
+
+	return ret;
+}
+
+static int mmc_blk_part_switch_post(struct mmc_card *card,
+				    unsigned int part_type)
+{
+	int ret = 0;
+
+	if (part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) {
+		mmc_retune_unpause(card->host);
+		if (card->reenable_cmdq && !card->ext_csd.cmdq_en)
+			ret = mmc_cmdq_enable(card);
+	}
+
+	return ret;
+}
+
 static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md)
 {
-	int ret;
+	int ret = 0;
 	struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
 
 	if (main_md->part_curr == md->part_type)
@@ -733,8 +771,9 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 	if (mmc_card_mmc(card)) {
 		u8 part_config = card->ext_csd.part_config;
 
-		if (md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
-			mmc_retune_pause(card->host);
+		ret = mmc_blk_part_switch_pre(card, md->part_type);
+		if (ret)
+			return ret;
 
 		part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
 		part_config |= md->part_type;
@@ -743,19 +782,17 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				 EXT_CSD_PART_CONFIG, part_config,
 				 card->ext_csd.part_time);
 		if (ret) {
-			if (md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
-				mmc_retune_unpause(card->host);
+			mmc_blk_part_switch_post(card, md->part_type);
 			return ret;
 		}
 
 		card->ext_csd.part_config = part_config;
 
-		if (main_md->part_curr == EXT_CSD_PART_CONFIG_ACC_RPMB)
-			mmc_retune_unpause(card->host);
+		ret = mmc_blk_part_switch_post(card, main_md->part_curr);
 	}
 
 	main_md->part_curr = md->part_type;
-	return 0;
+	return ret;
 }
 
 static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks)
@@ -1272,7 +1309,7 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
 {
 	if (!(card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN)) {
 		/* Legacy mode imposes restrictions on transfers. */
-		if (!IS_ALIGNED(brq->cmd.arg, card->ext_csd.rel_sectors))
+		if (!IS_ALIGNED(blk_rq_pos(req), card->ext_csd.rel_sectors))
 			brq->data.blocks = 1;
 
 		if (brq->data.blocks > card->ext_csd.rel_sectors)
@@ -1396,36 +1433,39 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
 	return MMC_BLK_SUCCESS;
 }
 
-static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
-			       struct mmc_card *card,
-			       int disable_multi,
-			       struct mmc_queue *mq)
+static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
+			      int disable_multi, bool *do_rel_wr,
+			      bool *do_data_tag)
 {
-	u32 readcmd, writecmd;
+	struct mmc_blk_data *md = mq->blkdata;
+	struct mmc_card *card = md->queue.card;
 	struct mmc_blk_request *brq = &mqrq->brq;
 	struct request *req = mqrq->req;
-	struct mmc_blk_data *md = mq->blkdata;
-	bool do_data_tag;
 
 	/*
 	 * Reliable writes are used to implement Forced Unit Access and
 	 * are supported only on MMCs.
 	 */
-	bool do_rel_wr = (req->cmd_flags & REQ_FUA) &&
-		(rq_data_dir(req) == WRITE) &&
-		(md->flags & MMC_BLK_REL_WR);
+	*do_rel_wr = (req->cmd_flags & REQ_FUA) &&
+		     rq_data_dir(req) == WRITE &&
+		     (md->flags & MMC_BLK_REL_WR);
 
 	memset(brq, 0, sizeof(struct mmc_blk_request));
-	brq->mrq.cmd = &brq->cmd;
+
 	brq->mrq.data = &brq->data;
 
-	brq->cmd.arg = blk_rq_pos(req);
-	if (!mmc_card_blockaddr(card))
-		brq->cmd.arg <<= 9;
-	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-	brq->data.blksz = 512;
 	brq->stop.opcode = MMC_STOP_TRANSMISSION;
 	brq->stop.arg = 0;
+
+	if (rq_data_dir(req) == READ) {
+		brq->data.flags = MMC_DATA_READ;
+		brq->stop.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
+	} else {
+		brq->data.flags = MMC_DATA_WRITE;
+		brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+	}
+
+	brq->data.blksz = 512;
 	brq->data.blocks = blk_rq_sectors(req);
 
 	/*
@@ -1456,6 +1496,68 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 						brq->data.blocks);
 	}
 
+	if (*do_rel_wr)
+		mmc_apply_rel_rw(brq, card, req);
+
+	/*
+	 * Data tag is used only during writing meta data to speed
+	 * up write and any subsequent read of this meta data
+	 */
+	*do_data_tag = card->ext_csd.data_tag_unit_size &&
+		       (req->cmd_flags & REQ_META) &&
+		       (rq_data_dir(req) == WRITE) &&
+		       ((brq->data.blocks * brq->data.blksz) >=
+			card->ext_csd.data_tag_unit_size);
+
+	mmc_set_data_timeout(&brq->data, card);
+
+	brq->data.sg = mqrq->sg;
+	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
+
+	/*
+	 * Adjust the sg list so it is the same size as the
+	 * request.
+	 */
+	if (brq->data.blocks != blk_rq_sectors(req)) {
+		int i, data_size = brq->data.blocks << 9;
+		struct scatterlist *sg;
+
+		for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
+			data_size -= sg->length;
+			if (data_size <= 0) {
+				sg->length += data_size;
+				i++;
+				break;
+			}
+		}
+		brq->data.sg_len = i;
+	}
+
+	mqrq->areq.mrq = &brq->mrq;
+
+	mmc_queue_bounce_pre(mqrq);
+}
+
+static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+			       struct mmc_card *card,
+			       int disable_multi,
+			       struct mmc_queue *mq)
+{
+	u32 readcmd, writecmd;
+	struct mmc_blk_request *brq = &mqrq->brq;
+	struct request *req = mqrq->req;
+	struct mmc_blk_data *md = mq->blkdata;
+	bool do_rel_wr, do_data_tag;
+
+	mmc_blk_data_prep(mq, mqrq, disable_multi, &do_rel_wr, &do_data_tag);
+
+	brq->mrq.cmd = &brq->cmd;
+
+	brq->cmd.arg = blk_rq_pos(req);
+	if (!mmc_card_blockaddr(card))
+		brq->cmd.arg <<= 9;
+	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+
 	if (brq->data.blocks > 1 || do_rel_wr) {
 		/* SPI multiblock writes terminate using a special
 		 * token, not a STOP_TRANSMISSION request.
@@ -1470,32 +1572,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		readcmd = MMC_READ_SINGLE_BLOCK;
 		writecmd = MMC_WRITE_BLOCK;
 	}
-	if (rq_data_dir(req) == READ) {
-		brq->cmd.opcode = readcmd;
-		brq->data.flags = MMC_DATA_READ;
-		if (brq->mrq.stop)
-			brq->stop.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 |
-					MMC_CMD_AC;
-	} else {
-		brq->cmd.opcode = writecmd;
-		brq->data.flags = MMC_DATA_WRITE;
-		if (brq->mrq.stop)
-			brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B |
-					MMC_CMD_AC;
-	}
-
-	if (do_rel_wr)
-		mmc_apply_rel_rw(brq, card, req);
-
-	/*
-	 * Data tag is used only during writing meta data to speed
-	 * up write and any subsequent read of this meta data
-	 */
-	do_data_tag = (card->ext_csd.data_tag_unit_size) &&
-		(req->cmd_flags & REQ_META) &&
-		(rq_data_dir(req) == WRITE) &&
-		((brq->data.blocks * brq->data.blksz) >=
-		 card->ext_csd.data_tag_unit_size);
+	brq->cmd.opcode = rq_data_dir(req) == READ ? readcmd : writecmd;
 
 	/*
 	 * Pre-defined multi-block transfers are preferable to
@@ -1526,34 +1603,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		brq->mrq.sbc = &brq->sbc;
 	}
 
-	mmc_set_data_timeout(&brq->data, card);
-
-	brq->data.sg = mqrq->sg;
-	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
-
-	/*
-	 * Adjust the sg list so it is the same size as the
-	 * request.
-	 */
-	if (brq->data.blocks != blk_rq_sectors(req)) {
-		int i, data_size = brq->data.blocks << 9;
-		struct scatterlist *sg;
-
-		for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
-			data_size -= sg->length;
-			if (data_size <= 0) {
-				sg->length += data_size;
-				i++;
-				break;
-			}
-		}
-		brq->data.sg_len = i;
-	}
-
-	mqrq->areq.mrq = &brq->mrq;
 	mqrq->areq.err_check = mmc_blk_err_check;
-
-	mmc_queue_bounce_pre(mqrq);
 }
 
 static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
@@ -1585,11 +1635,14 @@ static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 	return req_pending;
 }
 
-static void mmc_blk_rw_cmd_abort(struct mmc_card *card, struct request *req)
+static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
+				 struct request *req,
+				 struct mmc_queue_req *mqrq)
 {
 	if (mmc_card_removed(card))
 		req->rq_flags |= RQF_QUIET;
 	while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+	mmc_queue_req_free(mq, mqrq);
 }
 
 /**
@@ -1597,7 +1650,8 @@ static void mmc_blk_rw_cmd_abort(struct mmc_card *card, struct request *req)
  * @mq: the queue with the card and host to restart
  * @req: a new request that want to be started after the current one
  */
-static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req)
+static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
+				   struct mmc_queue_req *mqrq)
 {
 	if (!req)
 		return;
@@ -1608,11 +1662,12 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req)
 	if (mmc_card_removed(mq->card)) {
 		req->rq_flags |= RQF_QUIET;
 		blk_end_request_all(req, -EIO);
+		mmc_queue_req_free(mq, mqrq);
 		return;
 	}
 	/* Else proceed and try to restart the current async request */
-	mmc_blk_rw_rq_prep(mq->mqrq_cur, mq->card, 0, mq);
-	mmc_start_areq(mq->card->host, &mq->mqrq_cur->areq, NULL);
+	mmc_blk_rw_rq_prep(mqrq, mq->card, 0, mq);
+	mmc_start_areq(mq->card->host, &mqrq->areq, NULL);
 }
 
 static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
@@ -1622,13 +1677,23 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 	struct mmc_blk_request *brq;
 	int disable_multi = 0, retry = 0, type, retune_retry_done = 0;
 	enum mmc_blk_status status;
+	struct mmc_queue_req *mqrq_cur = NULL;
 	struct mmc_queue_req *mq_rq;
 	struct request *old_req;
 	struct mmc_async_req *new_areq;
 	struct mmc_async_req *old_areq;
 	bool req_pending = true;
 
-	if (!new_req && !mq->mqrq_prev->req)
+	if (new_req) {
+		mqrq_cur = mmc_queue_req_find(mq, new_req);
+		if (!mqrq_cur) {
+			WARN_ON(1);
+			mmc_blk_requeue(mq->queue, new_req);
+			new_req = NULL;
+		}
+	}
+
+	if (!mq->qcnt)
 		return;
 
 	do {
@@ -1641,12 +1706,12 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 				!IS_ALIGNED(blk_rq_sectors(new_req), 8)) {
 				pr_err("%s: Transfer size is not 4KB sector size aligned\n",
 					new_req->rq_disk->disk_name);
-				mmc_blk_rw_cmd_abort(card, new_req);
+				mmc_blk_rw_cmd_abort(mq, card, new_req, mqrq_cur);
 				return;
 			}
 
-			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
-			new_areq = &mq->mqrq_cur->areq;
+			mmc_blk_rw_rq_prep(mqrq_cur, card, 0, mq);
+			new_areq = &mqrq_cur->areq;
 		} else
 			new_areq = NULL;
 
@@ -1657,8 +1722,6 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			 * and there is nothing more to do until it is
 			 * complete.
 			 */
-			if (status == MMC_BLK_NEW_REQUEST)
-				mq->new_request = true;
 			return;
 		}
 
@@ -1691,7 +1754,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 				pr_err("%s BUG rq_tot %d d_xfer %d\n",
 				       __func__, blk_rq_bytes(old_req),
 				       brq->data.bytes_xfered);
-				mmc_blk_rw_cmd_abort(card, old_req);
+				mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
 				return;
 			}
 			break;
@@ -1699,12 +1762,15 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending);
 			if (mmc_blk_reset(md, card->host, type)) {
 				if (req_pending)
-					mmc_blk_rw_cmd_abort(card, old_req);
-				mmc_blk_rw_try_restart(mq, new_req);
+					mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
+				else
+					mmc_queue_req_free(mq, mq_rq);
+				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
 			if (!req_pending) {
-				mmc_blk_rw_try_restart(mq, new_req);
+				mmc_queue_req_free(mq, mq_rq);
+				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
 			break;
@@ -1716,8 +1782,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 		case MMC_BLK_ABORT:
 			if (!mmc_blk_reset(md, card->host, type))
 				break;
-			mmc_blk_rw_cmd_abort(card, old_req);
-			mmc_blk_rw_try_restart(mq, new_req);
+			mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
+			mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 			return;
 		case MMC_BLK_DATA_ERR: {
 			int err;
@@ -1726,8 +1792,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			if (!err)
 				break;
 			if (err == -ENODEV) {
-				mmc_blk_rw_cmd_abort(card, old_req);
-				mmc_blk_rw_try_restart(mq, new_req);
+				mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
+				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
 			/* Fall through */
@@ -1748,19 +1814,20 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			req_pending = blk_end_request(old_req, -EIO,
 						      brq->data.blksz);
 			if (!req_pending) {
-				mmc_blk_rw_try_restart(mq, new_req);
+				mmc_queue_req_free(mq, mq_rq);
+				mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 				return;
 			}
 			break;
 		case MMC_BLK_NOMEDIUM:
-			mmc_blk_rw_cmd_abort(card, old_req);
-			mmc_blk_rw_try_restart(mq, new_req);
+			mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
+			mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 			return;
 		default:
 			pr_err("%s: Unhandled return value (%d)",
 					old_req->rq_disk->disk_name, status);
-			mmc_blk_rw_cmd_abort(card, old_req);
-			mmc_blk_rw_try_restart(mq, new_req);
+			mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq);
+			mmc_blk_rw_try_restart(mq, new_req, mqrq_cur);
 			return;
 		}
 
@@ -1776,6 +1843,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 			mq_rq->brq.retune_retry_done = retune_retry_done;
 		}
 	} while (req_pending);
+
+	mmc_queue_req_free(mq, mq_rq);
 }
 
 void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
@@ -1783,9 +1852,8 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	int ret;
 	struct mmc_blk_data *md = mq->blkdata;
 	struct mmc_card *card = md->queue.card;
-	bool req_is_special = mmc_req_is_special(req);
 
-	if (req && !mq->mqrq_prev->req)
+	if (req && !mq->qcnt)
 		/* claim host only for the first request */
 		mmc_get_card(card);
 
@@ -1797,20 +1865,19 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		goto out;
 	}
 
-	mq->new_request = false;
 	if (req && req_op(req) == REQ_OP_DISCARD) {
 		/* complete ongoing async transfer before issuing discard */
-		if (card->host->areq)
+		if (mq->qcnt)
 			mmc_blk_issue_rw_rq(mq, NULL);
 		mmc_blk_issue_discard_rq(mq, req);
 	} else if (req && req_op(req) == REQ_OP_SECURE_ERASE) {
 		/* complete ongoing async transfer before issuing secure erase*/
-		if (card->host->areq)
+		if (mq->qcnt)
 			mmc_blk_issue_rw_rq(mq, NULL);
 		mmc_blk_issue_secdiscard_rq(mq, req);
 	} else if (req && req_op(req) == REQ_OP_FLUSH) {
 		/* complete ongoing async transfer before issuing flush */
-		if (card->host->areq)
+		if (mq->qcnt)
 			mmc_blk_issue_rw_rq(mq, NULL);
 		mmc_blk_issue_flush(mq, req);
 	} else {
@@ -1819,13 +1886,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	}
 
 out:
-	if ((!req && !mq->new_request) || req_is_special)
-		/*
-		 * Release host when there are no more requests
-		 * and after special request(discard, flush) is done.
-		 * In case sepecial request, there is no reentry to
-		 * the 'mmc_blk_issue_rq' with 'mqrq_prev->req'.
-		 */
+	if (!mq->qcnt)
 		mmc_put_card(card);
 }
 
@@ -2105,6 +2166,7 @@ static int mmc_blk_probe(struct mmc_card *card)
 {
 	struct mmc_blk_data *md, *part_md;
 	char cap_str[10];
+	int ret;
 
 	/*
 	 * Check that the card supports the command class(es) we need.
@@ -2114,9 +2176,15 @@ static int mmc_blk_probe(struct mmc_card *card)
 
 	mmc_fixup_device(card, mmc_blk_fixups);
 
+	ret = mmc_queue_alloc_shared_queue(card);
+	if (ret)
+		return ret;
+
 	md = mmc_blk_alloc(card);
-	if (IS_ERR(md))
+	if (IS_ERR(md)) {
+		mmc_queue_free_shared_queue(card);
 		return PTR_ERR(md);
+	}
 
 	string_get_size((u64)get_capacity(md->disk), 512, STRING_UNITS_2,
 			cap_str, sizeof(cap_str));
@@ -2154,6 +2222,7 @@ static int mmc_blk_probe(struct mmc_card *card)
  out:
 	mmc_blk_remove_parts(card, md);
 	mmc_blk_remove_req(md);
+	mmc_queue_free_shared_queue(card);
 	return 0;
 }
 
@@ -2171,6 +2240,7 @@ static void mmc_blk_remove(struct mmc_card *card)
 	pm_runtime_put_noidle(&card->dev);
 	mmc_blk_remove_req(md);
 	dev_set_drvdata(&card->dev, NULL);
+	mmc_queue_free_shared_queue(card);
 }
 
 static int _mmc_blk_suspend(struct mmc_card *card)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 926e0fde07d7..82c45ddfa202 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -172,14 +172,16 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 
 	trace_mmc_request_done(host, mrq);
 
-	if (err && cmd->retries && !mmc_card_removed(host->card)) {
-		/*
-		 * Request starter must handle retries - see
-		 * mmc_wait_for_req_done().
-		 */
-		if (mrq->done)
-			mrq->done(mrq);
-	} else {
+	/*
+	 * We list various conditions for the command to be considered
+	 * properly done:
+	 *
+	 * - There was no error, OK fine then
+	 * - We are not doing some kind of retry
+	 * - The card was removed (...so just complete everything no matter
+	 *   if there are errors or retries)
+	 */
+	if (!err || !cmd->retries || mmc_card_removed(host->card)) {
 		mmc_should_fail_request(host, mrq);
 
 		if (!host->ongoing_mrq)
@@ -211,10 +213,13 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 				mrq->stop->resp[0], mrq->stop->resp[1],
 				mrq->stop->resp[2], mrq->stop->resp[3]);
 		}
-
-		if (mrq->done)
-			mrq->done(mrq);
 	}
+	/*
+	 * Request starter must handle retries - see
+	 * mmc_wait_for_req_done().
+	 */
+	if (mrq->done)
+		mrq->done(mrq);
 }
 
 EXPORT_SYMBOL(mmc_request_done);
@@ -234,8 +239,10 @@ static void __mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 	/*
 	 * For sdio rw commands we must wait for card busy otherwise some
 	 * sdio devices won't work properly.
+	 * And bypass I/O abort, reset and bus suspend operations.
 	 */
-	if (mmc_is_io_op(mrq->cmd->opcode) && host->ops->card_busy) {
+	if (sdio_is_io_busy(mrq->cmd->opcode, mrq->cmd->arg) &&
+	    host->ops->card_busy) {
 		int tries = 500; /* Wait aprox 500ms at maximum */
 
 		while (host->ops->card_busy(host) && --tries)
@@ -262,26 +269,19 @@ static void __mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 	host->ops->request(host, mrq);
 }
 
-static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
+static void mmc_mrq_pr_debug(struct mmc_host *host, struct mmc_request *mrq)
 {
-#ifdef CONFIG_MMC_DEBUG
-	unsigned int i, sz;
-	struct scatterlist *sg;
-#endif
-	mmc_retune_hold(host);
-
-	if (mmc_card_removed(host->card))
-		return -ENOMEDIUM;
-
 	if (mrq->sbc) {
 		pr_debug("<%s: starting CMD%u arg %08x flags %08x>\n",
 			 mmc_hostname(host), mrq->sbc->opcode,
 			 mrq->sbc->arg, mrq->sbc->flags);
 	}
 
-	pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
-		 mmc_hostname(host), mrq->cmd->opcode,
-		 mrq->cmd->arg, mrq->cmd->flags);
+	if (mrq->cmd) {
+		pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
+			 mmc_hostname(host), mrq->cmd->opcode, mrq->cmd->arg,
+			 mrq->cmd->flags);
+	}
 
 	if (mrq->data) {
 		pr_debug("%s:     blksz %d blocks %d flags %08x "
@@ -297,11 +297,20 @@ static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 			 mmc_hostname(host), mrq->stop->opcode,
 			 mrq->stop->arg, mrq->stop->flags);
 	}
+}
 
-	WARN_ON(!host->claimed);
+static int mmc_mrq_prep(struct mmc_host *host, struct mmc_request *mrq)
+{
+#ifdef CONFIG_MMC_DEBUG
+	unsigned int i, sz;
+	struct scatterlist *sg;
+#endif
 
-	mrq->cmd->error = 0;
-	mrq->cmd->mrq = mrq;
+	if (mrq->cmd) {
+		mrq->cmd->error = 0;
+		mrq->cmd->mrq = mrq;
+		mrq->cmd->data = mrq->data;
+	}
 	if (mrq->sbc) {
 		mrq->sbc->error = 0;
 		mrq->sbc->mrq = mrq;
@@ -318,8 +327,6 @@ static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 		if (sz != mrq->data->blocks * mrq->data->blksz)
 			return -EINVAL;
 #endif
-
-		mrq->cmd->data = mrq->data;
 		mrq->data->error = 0;
 		mrq->data->mrq = mrq;
 		if (mrq->stop) {
@@ -328,6 +335,27 @@ static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 			mrq->stop->mrq = mrq;
 		}
 	}
+
+	return 0;
+}
+
+static int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
+{
+	int err;
+
+	mmc_retune_hold(host);
+
+	if (mmc_card_removed(host->card))
+		return -ENOMEDIUM;
+
+	mmc_mrq_pr_debug(host, mrq);
+
+	WARN_ON(!host->claimed);
+
+	err = mmc_mrq_prep(host, mrq);
+	if (err)
+		return err;
+
 	led_trigger_event(host->led, LED_FULL);
 	__mmc_start_request(host, mrq);
 
@@ -485,56 +513,6 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 	return err;
 }
 
-/*
- * mmc_wait_for_data_req_done() - wait for request completed
- * @host: MMC host to prepare the command.
- * @mrq: MMC request to wait for
- *
- * Blocks MMC context till host controller will ack end of data request
- * execution or new request notification arrives from the block layer.
- * Handles command retries.
- *
- * Returns enum mmc_blk_status after checking errors.
- */
-static enum mmc_blk_status mmc_wait_for_data_req_done(struct mmc_host *host,
-						      struct mmc_request *mrq)
-{
-	struct mmc_command *cmd;
-	struct mmc_context_info *context_info = &host->context_info;
-	enum mmc_blk_status status;
-
-	while (1) {
-		wait_event_interruptible(context_info->wait,
-				(context_info->is_done_rcv ||
-				 context_info->is_new_req));
-
-		if (context_info->is_done_rcv) {
-			context_info->is_done_rcv = false;
-			cmd = mrq->cmd;
-
-			if (!cmd->error || !cmd->retries ||
-			    mmc_card_removed(host->card)) {
-				status = host->areq->err_check(host->card,
-							       host->areq);
-				break; /* return status */
-			} else {
-				mmc_retune_recheck(host);
-				pr_info("%s: req failed (CMD%u): %d, retrying...\n",
-					mmc_hostname(host),
-					cmd->opcode, cmd->error);
-				cmd->retries--;
-				cmd->error = 0;
-				__mmc_start_request(host, mrq);
-				continue; /* wait for done/new event again */
-			}
-		}
-
-		return MMC_BLK_NEW_REQUEST;
-	}
-	mmc_retune_release(host);
-	return status;
-}
-
 void mmc_wait_for_req_done(struct mmc_host *host, struct mmc_request *mrq)
 {
 	struct mmc_command *cmd;
@@ -639,14 +617,44 @@ static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
  */
 static enum mmc_blk_status mmc_finalize_areq(struct mmc_host *host)
 {
+	struct mmc_context_info *context_info = &host->context_info;
 	enum mmc_blk_status status;
 
 	if (!host->areq)
 		return MMC_BLK_SUCCESS;
 
-	status = mmc_wait_for_data_req_done(host, host->areq->mrq);
-	if (status == MMC_BLK_NEW_REQUEST)
-		return status;
+	while (1) {
+		wait_event_interruptible(context_info->wait,
+				(context_info->is_done_rcv ||
+				 context_info->is_new_req));
+
+		if (context_info->is_done_rcv) {
+			struct mmc_command *cmd;
+
+			context_info->is_done_rcv = false;
+			cmd = host->areq->mrq->cmd;
+
+			if (!cmd->error || !cmd->retries ||
+			    mmc_card_removed(host->card)) {
+				status = host->areq->err_check(host->card,
+							       host->areq);
+				break; /* return status */
+			} else {
+				mmc_retune_recheck(host);
+				pr_info("%s: req failed (CMD%u): %d, retrying...\n",
+					mmc_hostname(host),
+					cmd->opcode, cmd->error);
+				cmd->retries--;
+				cmd->error = 0;
+				__mmc_start_request(host, host->areq->mrq);
+				continue; /* wait for done/new event again */
+			}
+		}
+
+		return MMC_BLK_NEW_REQUEST;
+	}
+
+	mmc_retune_release(host);
 
 	/*
 	 * Check BKOPS urgency for each R1 response
@@ -683,7 +691,7 @@ struct mmc_async_req *mmc_start_areq(struct mmc_host *host,
 {
 	enum mmc_blk_status status;
 	int start_err = 0;
-	struct mmc_async_req *data = host->areq;
+	struct mmc_async_req *previous = host->areq;
 
 	/* Prepare a new request */
 	if (areq)
@@ -691,13 +699,12 @@ struct mmc_async_req *mmc_start_areq(struct mmc_host *host,
 
 	/* Finalize previous request */
 	status = mmc_finalize_areq(host);
+	if (ret_stat)
+		*ret_stat = status;
 
 	/* The previous request is still going on... */
-	if (status == MMC_BLK_NEW_REQUEST) {
-		if (ret_stat)
-			*ret_stat = status;
+	if (status == MMC_BLK_NEW_REQUEST)
 		return NULL;
-	}
 
 	/* Fine so far, start the new request! */
 	if (status == MMC_BLK_SUCCESS && areq)
@@ -716,9 +723,7 @@ struct mmc_async_req *mmc_start_areq(struct mmc_host *host,
 	else
 		host->areq = areq;
 
-	if (ret_stat)
-		*ret_stat = status;
-	return data;
+	return previous;
 }
 EXPORT_SYMBOL(mmc_start_areq);
 
@@ -2555,6 +2560,12 @@ unsigned int mmc_calc_max_discard(struct mmc_card *card)
 }
 EXPORT_SYMBOL(mmc_calc_max_discard);
 
+bool mmc_card_is_blockaddr(struct mmc_card *card)
+{
+	return card ? mmc_card_blockaddr(card) : false;
+}
+EXPORT_SYMBOL(mmc_card_is_blockaddr);
+
 int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
 	struct mmc_command cmd = {};
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index b502601df228..2c87dede5841 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -790,6 +790,7 @@ MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size);
 MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult);
 MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors);
 MMC_DEV_ATTR(ocr, "%08x\n", card->ocr);
+MMC_DEV_ATTR(cmdq_en, "%d\n", card->ext_csd.cmdq_en);
 
 static ssize_t mmc_fwrev_show(struct device *dev,
 			      struct device_attribute *attr,
@@ -845,6 +846,7 @@ static struct attribute *mmc_std_attrs[] = {
 	&dev_attr_rel_sectors.attr,
 	&dev_attr_ocr.attr,
 	&dev_attr_dsr.attr,
+	&dev_attr_cmdq_en.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(mmc_std);
@@ -1788,6 +1790,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	}
 
 	/*
+	 * In some cases (e.g. RPMB or mmc_test), the Command Queue must be
+	 * disabled for a time, so a flag is needed to indicate to re-enable the
+	 * Command Queue.
+	 */
+	card->reenable_cmdq = card->ext_csd.cmdq_en;
+
+	/*
 	 * The mandatory minimum values are defined for packed command.
 	 * read: 5, write: 3
 	 */
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index fe80f26d6971..78f75f00efc5 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -305,7 +305,7 @@ mmc_send_cxd_data(struct mmc_card *card, struct mmc_host *host,
 int mmc_send_csd(struct mmc_card *card, u32 *csd)
 {
 	int ret, i;
-	u32 *csd_tmp;
+	__be32 *csd_tmp;
 
 	if (!mmc_host_is_spi(card->host))
 		return mmc_send_cxd_native(card->host, card->rca << 16,
@@ -319,7 +319,7 @@ int mmc_send_csd(struct mmc_card *card, u32 *csd)
 	if (ret)
 		goto err;
 
-	for (i = 0;i < 4;i++)
+	for (i = 0; i < 4; i++)
 		csd[i] = be32_to_cpu(csd_tmp[i]);
 
 err:
@@ -330,7 +330,7 @@ err:
 int mmc_send_cid(struct mmc_host *host, u32 *cid)
 {
 	int ret, i;
-	u32 *cid_tmp;
+	__be32 *cid_tmp;
 
 	if (!mmc_host_is_spi(host)) {
 		if (!host->card)
@@ -347,7 +347,7 @@ int mmc_send_cid(struct mmc_host *host, u32 *cid)
 	if (ret)
 		goto err;
 
-	for (i = 0;i < 4;i++)
+	for (i = 0; i < 4; i++)
 		cid[i] = be32_to_cpu(cid_tmp[i]);
 
 err:
@@ -838,3 +838,31 @@ int mmc_can_ext_csd(struct mmc_card *card)
 {
 	return (card && card->csd.mmca_vsn > CSD_SPEC_VER_3);
 }
+
+static int mmc_cmdq_switch(struct mmc_card *card, bool enable)
+{
+	u8 val = enable ? EXT_CSD_CMDQ_MODE_ENABLED : 0;
+	int err;
+
+	if (!card->ext_csd.cmdq_support)
+		return -EOPNOTSUPP;
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_CMDQ_MODE_EN,
+			 val, card->ext_csd.generic_cmd6_time);
+	if (!err)
+		card->ext_csd.cmdq_en = enable;
+
+	return err;
+}
+
+int mmc_cmdq_enable(struct mmc_card *card)
+{
+	return mmc_cmdq_switch(card, true);
+}
+EXPORT_SYMBOL_GPL(mmc_cmdq_enable);
+
+int mmc_cmdq_disable(struct mmc_card *card)
+{
+	return mmc_cmdq_switch(card, false);
+}
+EXPORT_SYMBOL_GPL(mmc_cmdq_disable);
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 74beea8a9c7e..978bd2e60f8a 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -46,6 +46,8 @@ int mmc_read_bkops_status(struct mmc_card *card);
 void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 int mmc_can_reset(struct mmc_card *card);
 int mmc_flush_cache(struct mmc_card *card);
+int mmc_cmdq_enable(struct mmc_card *card);
+int mmc_cmdq_disable(struct mmc_card *card);
 
 #endif
 
diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index f99ac3123fd2..fd1b4b8510b9 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -26,6 +26,7 @@
 #include "card.h"
 #include "host.h"
 #include "bus.h"
+#include "mmc_ops.h"
 
 #define RESULT_OK		0
 #define RESULT_FAIL		1
@@ -3264,6 +3265,14 @@ static int mmc_test_probe(struct mmc_card *card)
 	if (ret)
 		return ret;
 
+	if (card->ext_csd.cmdq_en) {
+		mmc_claim_host(card->host);
+		ret = mmc_cmdq_disable(card);
+		mmc_release_host(card->host);
+		if (ret)
+			return ret;
+	}
+
 	dev_info(&card->dev, "Card claimed for testing.\n");
 
 	return 0;
@@ -3271,6 +3280,11 @@ static int mmc_test_probe(struct mmc_card *card)
 
 static void mmc_test_remove(struct mmc_card *card)
 {
+	if (card->reenable_cmdq) {
+		mmc_claim_host(card->host);
+		mmc_cmdq_enable(card);
+		mmc_release_host(card->host);
+	}
 	mmc_test_free_result(card);
 	mmc_test_free_dbgfs_file(card);
 }
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 4c54ad34e17a..5c37b6be3e7b 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -40,6 +40,35 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	return BLKPREP_OK;
 }
 
+struct mmc_queue_req *mmc_queue_req_find(struct mmc_queue *mq,
+					 struct request *req)
+{
+	struct mmc_queue_req *mqrq;
+	int i = ffz(mq->qslots);
+
+	if (i >= mq->qdepth)
+		return NULL;
+
+	mqrq = &mq->mqrq[i];
+	WARN_ON(mqrq->req || mq->qcnt >= mq->qdepth ||
+		test_bit(mqrq->task_id, &mq->qslots));
+	mqrq->req = req;
+	mq->qcnt += 1;
+	__set_bit(mqrq->task_id, &mq->qslots);
+
+	return mqrq;
+}
+
+void mmc_queue_req_free(struct mmc_queue *mq,
+			struct mmc_queue_req *mqrq)
+{
+	WARN_ON(!mqrq->req || mq->qcnt < 1 ||
+		!test_bit(mqrq->task_id, &mq->qslots));
+	mqrq->req = NULL;
+	mq->qcnt -= 1;
+	__clear_bit(mqrq->task_id, &mq->qslots);
+}
+
 static int mmc_queue_thread(void *d)
 {
 	struct mmc_queue *mq = d;
@@ -50,7 +79,7 @@ static int mmc_queue_thread(void *d)
 
 	down(&mq->thread_sem);
 	do {
-		struct request *req = NULL;
+		struct request *req;
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -63,38 +92,17 @@ static int mmc_queue_thread(void *d)
 			 * Dispatch queue is empty so set flags for
 			 * mmc_request_fn() to wake us up.
 			 */
-			if (mq->mqrq_prev->req)
+			if (mq->qcnt)
 				cntx->is_waiting_last_req = true;
 			else
 				mq->asleep = true;
 		}
-		mq->mqrq_cur->req = req;
 		spin_unlock_irq(q->queue_lock);
 
-		if (req || mq->mqrq_prev->req) {
-			bool req_is_special = mmc_req_is_special(req);
-
+		if (req || mq->qcnt) {
 			set_current_state(TASK_RUNNING);
 			mmc_blk_issue_rq(mq, req);
 			cond_resched();
-			if (mq->new_request) {
-				mq->new_request = false;
-				continue; /* fetch again */
-			}
-
-			/*
-			 * Current request becomes previous request
-			 * and vice versa.
-			 * In case of special requests, current request
-			 * has been finished. Do not assign it to previous
-			 * request.
-			 */
-			if (req_is_special)
-				mq->mqrq_cur->req = NULL;
-
-			mq->mqrq_prev->brq.mrq.data = NULL;
-			mq->mqrq_prev->req = NULL;
-			swap(mq->mqrq_prev, mq->mqrq_cur);
 		} else {
 			if (kthread_should_stop()) {
 				set_current_state(TASK_RUNNING);
@@ -141,17 +149,13 @@ static void mmc_request_fn(struct request_queue *q)
 		wake_up_process(mq->thread);
 }
 
-static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
+static struct scatterlist *mmc_alloc_sg(int sg_len)
 {
 	struct scatterlist *sg;
 
 	sg = kmalloc_array(sg_len, sizeof(*sg), GFP_KERNEL);
-	if (!sg)
-		*err = -ENOMEM;
-	else {
-		*err = 0;
+	if (sg)
 		sg_init_table(sg, sg_len);
-	}
 
 	return sg;
 }
@@ -175,80 +179,178 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 		queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
 }
 
-#ifdef CONFIG_MMC_BLOCK_BOUNCE
-static bool mmc_queue_alloc_bounce_bufs(struct mmc_queue *mq,
-					unsigned int bouncesz)
+static void mmc_queue_req_free_bufs(struct mmc_queue_req *mqrq)
+{
+	kfree(mqrq->bounce_sg);
+	mqrq->bounce_sg = NULL;
+
+	kfree(mqrq->sg);
+	mqrq->sg = NULL;
+
+	kfree(mqrq->bounce_buf);
+	mqrq->bounce_buf = NULL;
+}
+
+static void mmc_queue_reqs_free_bufs(struct mmc_queue_req *mqrq, int qdepth)
 {
 	int i;
 
-	for (i = 0; i < mq->qdepth; i++) {
-		mq->mqrq[i].bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-		if (!mq->mqrq[i].bounce_buf)
-			goto out_err;
-	}
+	for (i = 0; i < qdepth; i++)
+		mmc_queue_req_free_bufs(&mqrq[i]);
+}
 
-	return true;
+static void mmc_queue_free_mqrqs(struct mmc_queue_req *mqrq, int qdepth)
+{
+	mmc_queue_reqs_free_bufs(mqrq, qdepth);
+	kfree(mqrq);
+}
 
-out_err:
-	while (--i >= 0) {
-		kfree(mq->mqrq[i].bounce_buf);
-		mq->mqrq[i].bounce_buf = NULL;
+static struct mmc_queue_req *mmc_queue_alloc_mqrqs(int qdepth)
+{
+	struct mmc_queue_req *mqrq;
+	int i;
+
+	mqrq = kcalloc(qdepth, sizeof(*mqrq), GFP_KERNEL);
+	if (mqrq) {
+		for (i = 0; i < qdepth; i++)
+			mqrq[i].task_id = i;
 	}
-	pr_warn("%s: unable to allocate bounce buffers\n",
-		mmc_card_name(mq->card));
-	return false;
+
+	return mqrq;
 }
 
-static int mmc_queue_alloc_bounce_sgs(struct mmc_queue *mq,
-				      unsigned int bouncesz)
+#ifdef CONFIG_MMC_BLOCK_BOUNCE
+static int mmc_queue_alloc_bounce_bufs(struct mmc_queue_req *mqrq, int qdepth,
+				       unsigned int bouncesz)
 {
-	int i, ret;
+	int i;
 
-	for (i = 0; i < mq->qdepth; i++) {
-		mq->mqrq[i].sg = mmc_alloc_sg(1, &ret);
-		if (ret)
-			return ret;
+	for (i = 0; i < qdepth; i++) {
+		mqrq[i].bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+		if (!mqrq[i].bounce_buf)
+			return -ENOMEM;
 
-		mq->mqrq[i].bounce_sg = mmc_alloc_sg(bouncesz / 512, &ret);
-		if (ret)
-			return ret;
+		mqrq[i].sg = mmc_alloc_sg(1);
+		if (!mqrq[i].sg)
+			return -ENOMEM;
+
+		mqrq[i].bounce_sg = mmc_alloc_sg(bouncesz / 512);
+		if (!mqrq[i].bounce_sg)
+			return -ENOMEM;
 	}
 
 	return 0;
 }
+
+static bool mmc_queue_alloc_bounce(struct mmc_queue_req *mqrq, int qdepth,
+				   unsigned int bouncesz)
+{
+	int ret;
+
+	ret = mmc_queue_alloc_bounce_bufs(mqrq, qdepth, bouncesz);
+	if (ret)
+		mmc_queue_reqs_free_bufs(mqrq, qdepth);
+
+	return !ret;
+}
+
+static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
+{
+	unsigned int bouncesz = MMC_QUEUE_BOUNCESZ;
+
+	if (host->max_segs != 1)
+		return 0;
+
+	if (bouncesz > host->max_req_size)
+		bouncesz = host->max_req_size;
+	if (bouncesz > host->max_seg_size)
+		bouncesz = host->max_seg_size;
+	if (bouncesz > host->max_blk_count * 512)
+		bouncesz = host->max_blk_count * 512;
+
+	if (bouncesz <= 512)
+		return 0;
+
+	return bouncesz;
+}
+#else
+static inline bool mmc_queue_alloc_bounce(struct mmc_queue_req *mqrq,
+					  int qdepth, unsigned int bouncesz)
+{
+	return false;
+}
+
+static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host)
+{
+	return 0;
+}
 #endif
 
-static int mmc_queue_alloc_sgs(struct mmc_queue *mq, int max_segs)
+static int mmc_queue_alloc_sgs(struct mmc_queue_req *mqrq, int qdepth,
+			       int max_segs)
 {
-	int i, ret;
+	int i;
 
-	for (i = 0; i < mq->qdepth; i++) {
-		mq->mqrq[i].sg = mmc_alloc_sg(max_segs, &ret);
-		if (ret)
-			return ret;
+	for (i = 0; i < qdepth; i++) {
+		mqrq[i].sg = mmc_alloc_sg(max_segs);
+		if (!mqrq[i].sg)
+			return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static void mmc_queue_req_free_bufs(struct mmc_queue_req *mqrq)
+void mmc_queue_free_shared_queue(struct mmc_card *card)
 {
-	kfree(mqrq->bounce_sg);
-	mqrq->bounce_sg = NULL;
+	if (card->mqrq) {
+		mmc_queue_free_mqrqs(card->mqrq, card->qdepth);
+		card->mqrq = NULL;
+	}
+}
 
-	kfree(mqrq->sg);
-	mqrq->sg = NULL;
+static int __mmc_queue_alloc_shared_queue(struct mmc_card *card, int qdepth)
+{
+	struct mmc_host *host = card->host;
+	struct mmc_queue_req *mqrq;
+	unsigned int bouncesz;
+	int ret = 0;
 
-	kfree(mqrq->bounce_buf);
-	mqrq->bounce_buf = NULL;
+	if (card->mqrq)
+		return -EINVAL;
+
+	mqrq = mmc_queue_alloc_mqrqs(qdepth);
+	if (!mqrq)
+		return -ENOMEM;
+
+	card->mqrq = mqrq;
+	card->qdepth = qdepth;
+
+	bouncesz = mmc_queue_calc_bouncesz(host);
+
+	if (bouncesz && !mmc_queue_alloc_bounce(mqrq, qdepth, bouncesz)) {
+		bouncesz = 0;
+		pr_warn("%s: unable to allocate bounce buffers\n",
+			mmc_card_name(card));
+	}
+
+	card->bouncesz = bouncesz;
+
+	if (!bouncesz) {
+		ret = mmc_queue_alloc_sgs(mqrq, qdepth, host->max_segs);
+		if (ret)
+			goto out_err;
+	}
+
+	return ret;
+
+out_err:
+	mmc_queue_free_shared_queue(card);
+	return ret;
 }
 
-static void mmc_queue_reqs_free_bufs(struct mmc_queue *mq)
+int mmc_queue_alloc_shared_queue(struct mmc_card *card)
 {
-	int i;
-
-	for (i = 0; i < mq->qdepth; i++)
-		mmc_queue_req_free_bufs(&mq->mqrq[i]);
+	return __mmc_queue_alloc_shared_queue(card, 2);
 }
 
 /**
@@ -265,7 +367,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 {
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
-	bool bounce = false;
 	int ret = -ENOMEM;
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
@@ -276,13 +377,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (!mq->queue)
 		return -ENOMEM;
 
-	mq->qdepth = 2;
-	mq->mqrq = kcalloc(mq->qdepth, sizeof(struct mmc_queue_req),
-			   GFP_KERNEL);
-	if (!mq->mqrq)
-		goto blk_cleanup;
-	mq->mqrq_cur = &mq->mqrq[0];
-	mq->mqrq_prev = &mq->mqrq[1];
+	mq->mqrq = card->mqrq;
+	mq->qdepth = card->qdepth;
 	mq->queue->queuedata = mq;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
@@ -291,44 +387,17 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	if (mmc_can_erase(card))
 		mmc_queue_setup_discard(mq->queue, card);
 
-#ifdef CONFIG_MMC_BLOCK_BOUNCE
-	if (host->max_segs == 1) {
-		unsigned int bouncesz;
-
-		bouncesz = MMC_QUEUE_BOUNCESZ;
-
-		if (bouncesz > host->max_req_size)
-			bouncesz = host->max_req_size;
-		if (bouncesz > host->max_seg_size)
-			bouncesz = host->max_seg_size;
-		if (bouncesz > (host->max_blk_count * 512))
-			bouncesz = host->max_blk_count * 512;
-
-		if (bouncesz > 512 &&
-		    mmc_queue_alloc_bounce_bufs(mq, bouncesz)) {
-			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
-			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
-			blk_queue_max_segments(mq->queue, bouncesz / 512);
-			blk_queue_max_segment_size(mq->queue, bouncesz);
-
-			ret = mmc_queue_alloc_bounce_sgs(mq, bouncesz);
-			if (ret)
-				goto cleanup_queue;
-			bounce = true;
-		}
-	}
-#endif
-
-	if (!bounce) {
+	if (card->bouncesz) {
+		blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
+		blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
+		blk_queue_max_segments(mq->queue, card->bouncesz / 512);
+		blk_queue_max_segment_size(mq->queue, card->bouncesz);
+	} else {
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
 		blk_queue_max_segments(mq->queue, host->max_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
-
-		ret = mmc_queue_alloc_sgs(mq, host->max_segs);
-		if (ret)
-			goto cleanup_queue;
 	}
 
 	sema_init(&mq->thread_sem, 1);
@@ -343,11 +412,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 
 	return 0;
 
- cleanup_queue:
-	mmc_queue_reqs_free_bufs(mq);
-	kfree(mq->mqrq);
+cleanup_queue:
 	mq->mqrq = NULL;
-blk_cleanup:
 	blk_cleanup_queue(mq->queue);
 	return ret;
 }
@@ -369,10 +435,7 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 	blk_start_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	mmc_queue_reqs_free_bufs(mq);
-	kfree(mq->mqrq);
 	mq->mqrq = NULL;
-
 	mq->card = NULL;
 }
 EXPORT_SYMBOL(mmc_cleanup_queue);
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index e298f100101b..871796c3f406 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -34,23 +34,25 @@ struct mmc_queue_req {
 	struct scatterlist	*bounce_sg;
 	unsigned int		bounce_sg_len;
 	struct mmc_async_req	areq;
+	int			task_id;
 };
 
 struct mmc_queue {
 	struct mmc_card		*card;
 	struct task_struct	*thread;
 	struct semaphore	thread_sem;
-	bool			new_request;
 	bool			suspended;
 	bool			asleep;
 	struct mmc_blk_data	*blkdata;
 	struct request_queue	*queue;
 	struct mmc_queue_req	*mqrq;
-	struct mmc_queue_req	*mqrq_cur;
-	struct mmc_queue_req	*mqrq_prev;
 	int			qdepth;
+	int			qcnt;
+	unsigned long		qslots;
 };
 
+extern int mmc_queue_alloc_shared_queue(struct mmc_card *card);
+extern void mmc_queue_free_shared_queue(struct mmc_card *card);
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
 			  const char *);
 extern void mmc_cleanup_queue(struct mmc_queue *);
@@ -64,4 +66,8 @@ extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 
 extern int mmc_access_rpmb(struct mmc_queue *);
 
+extern struct mmc_queue_req *mmc_queue_req_find(struct mmc_queue *,
+						struct request *);
+extern void mmc_queue_req_free(struct mmc_queue *, struct mmc_queue_req *);
+
 #endif
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 89531b48ae84..d109634fbfce 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -225,7 +225,7 @@ static int mmc_decode_scr(struct mmc_card *card)
 static int mmc_read_ssr(struct mmc_card *card)
 {
 	unsigned int au, es, et, eo;
-	u32 *raw_ssr;
+	__be32 *raw_ssr;
 	int i;
 
 	if (!(card->csd.cmdclass & CCC_APP_SPEC)) {
@@ -853,7 +853,7 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 		/*
 		 * Fetch SCR from card.
 		 */
-		err = mmc_app_send_scr(card, card->raw_scr);
+		err = mmc_app_send_scr(card);
 		if (err)
 			return err;
 
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 9d5824a37586..47056d8d1bac 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -232,14 +232,14 @@ int mmc_send_relative_addr(struct mmc_host *host, unsigned int *rca)
 	return 0;
 }
 
-int mmc_app_send_scr(struct mmc_card *card, u32 *scr)
+int mmc_app_send_scr(struct mmc_card *card)
 {
 	int err;
 	struct mmc_request mrq = {};
 	struct mmc_command cmd = {};
 	struct mmc_data data = {};
 	struct scatterlist sg;
-	void *data_buf;
+	__be32 *scr;
 
 	/* NOTE: caller guarantees scr is heap-allocated */
 
@@ -250,8 +250,8 @@ int mmc_app_send_scr(struct mmc_card *card, u32 *scr)
 	/* dma onto stack is unsafe/nonportable, but callers to this
 	 * routine normally provide temporary on-stack buffers ...
 	 */
-	data_buf = kmalloc(sizeof(card->raw_scr), GFP_KERNEL);
-	if (data_buf == NULL)
+	scr = kmalloc(sizeof(card->raw_scr), GFP_KERNEL);
+	if (!scr)
 		return -ENOMEM;
 
 	mrq.cmd = &cmd;
@@ -267,23 +267,22 @@ int mmc_app_send_scr(struct mmc_card *card, u32 *scr)
 	data.sg = &sg;
 	data.sg_len = 1;
 
-	sg_init_one(&sg, data_buf, 8);
+	sg_init_one(&sg, scr, 8);
 
 	mmc_set_data_timeout(&data, card);
 
 	mmc_wait_for_req(card->host, &mrq);
 
-	memcpy(scr, data_buf, sizeof(card->raw_scr));
-	kfree(data_buf);
+	card->raw_scr[0] = be32_to_cpu(scr[0]);
+	card->raw_scr[1] = be32_to_cpu(scr[1]);
+
+	kfree(scr);
 
 	if (cmd.error)
 		return cmd.error;
 	if (data.error)
 		return data.error;
 
-	scr[0] = be32_to_cpu(scr[0]);
-	scr[1] = be32_to_cpu(scr[1]);
-
 	return 0;
 }
 
diff --git a/drivers/mmc/core/sd_ops.h b/drivers/mmc/core/sd_ops.h
index 784f8e6b6baa..0e6c3d51e66d 100644
--- a/drivers/mmc/core/sd_ops.h
+++ b/drivers/mmc/core/sd_ops.h
@@ -22,7 +22,7 @@ int mmc_app_set_bus_width(struct mmc_card *card, int width);
 int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr);
 int mmc_send_if_cond(struct mmc_host *host, u32 ocr);
 int mmc_send_relative_addr(struct mmc_host *host, unsigned int *rca);
-int mmc_app_send_scr(struct mmc_card *card, u32 *scr);
+int mmc_app_send_scr(struct mmc_card *card);
 int mmc_sd_switch(struct mmc_card *card, int mode, int group,
 	u8 value, u8 *resp);
 int mmc_app_sd_status(struct mmc_card *card, void *ssr);
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index 74195d772f5a..d40744bbafa9 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -373,19 +373,16 @@ u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret)
 	u8 val;
 
 	if (!func) {
-		*err_ret = -EINVAL;
+		if (err_ret)
+			*err_ret = -EINVAL;
 		return 0xFF;
 	}
 
-	if (err_ret)
-		*err_ret = 0;
-
 	ret = mmc_io_rw_direct(func->card, 0, func->num, addr, 0, &val);
-	if (ret) {
-		if (err_ret)
-			*err_ret = ret;
+	if (err_ret)
+		*err_ret = ret;
+	if (ret)
 		return 0xFF;
-	}
 
 	return val;
 }
@@ -407,7 +404,8 @@ void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret)
 	int ret;
 
 	if (!func) {
-		*err_ret = -EINVAL;
+		if (err_ret)
+			*err_ret = -EINVAL;
 		return;
 	}
 
@@ -441,7 +439,7 @@ u8 sdio_writeb_readb(struct sdio_func *func, u8 write_byte,
 	if (err_ret)
 		*err_ret = ret;
 	if (ret)
-		val = 0xff;
+		return 0xff;
 
 	return val;
 }
@@ -529,15 +527,11 @@ u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret)
 {
 	int ret;
 
-	if (err_ret)
-		*err_ret = 0;
-
 	ret = sdio_memcpy_fromio(func, func->tmpbuf, addr, 2);
-	if (ret) {
-		if (err_ret)
-			*err_ret = ret;
+	if (err_ret)
+		*err_ret = ret;
+	if (ret)
 		return 0xFFFF;
-	}
 
 	return le16_to_cpup((__le16 *)func->tmpbuf);
 }
@@ -581,15 +575,11 @@ u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret)
 {
 	int ret;
 
-	if (err_ret)
-		*err_ret = 0;
-
 	ret = sdio_memcpy_fromio(func, func->tmpbuf, addr, 4);
-	if (ret) {
-		if (err_ret)
-			*err_ret = ret;
+	if (err_ret)
+		*err_ret = ret;
+	if (ret)
 		return 0xFFFFFFFF;
-	}
 
 	return le32_to_cpup((__le32 *)func->tmpbuf);
 }
@@ -635,19 +625,16 @@ unsigned char sdio_f0_readb(struct sdio_func *func, unsigned int addr,
 	unsigned char val;
 
 	if (!func) {
-		*err_ret = -EINVAL;
+		if (err_ret)
+			*err_ret = -EINVAL;
 		return 0xFF;
 	}
 
-	if (err_ret)
-		*err_ret = 0;
-
 	ret = mmc_io_rw_direct(func->card, 0, 0, addr, 0, &val);
-	if (ret) {
-		if (err_ret)
-			*err_ret = ret;
+	if (err_ret)
+		*err_ret = ret;
+	if (ret)
 		return 0xFF;
-	}
 
 	return val;
 }
@@ -673,7 +660,8 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 	int ret;
 
 	if (!func) {
-		*err_ret = -EINVAL;
+		if (err_ret)
+			*err_ret = -EINVAL;
 		return;
 	}
 
diff --git a/drivers/mmc/core/sdio_ops.c b/drivers/mmc/core/sdio_ops.c
index 3c0d3ab4324c..abaaba38514f 100644
--- a/drivers/mmc/core/sdio_ops.c
+++ b/drivers/mmc/core/sdio_ops.c
@@ -152,7 +152,7 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn,
 	data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ;
 
 	left_size = data.blksz * data.blocks;
-	nents = (left_size - 1) / seg_size + 1;
+	nents = DIV_ROUND_UP(left_size, seg_size);
 	if (nents > 1) {
 		if (sg_alloc_table(&sgtable, nents, GFP_KERNEL))
 			return -ENOMEM;
@@ -161,10 +161,9 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn,
 		data.sg_len = nents;
 
 		for_each_sg(data.sg, sg_ptr, data.sg_len, i) {
-			sg_set_page(sg_ptr, virt_to_page(buf + (i * seg_size)),
-					min(seg_size, left_size),
-					offset_in_page(buf + (i * seg_size)));
-			left_size = left_size - seg_size;
+			sg_set_buf(sg_ptr, buf + i * seg_size,
+				   min(seg_size, left_size));
+			left_size -= seg_size;
 		}
 	} else {
 		data.sg = &sg;
diff --git a/drivers/mmc/core/sdio_ops.h b/drivers/mmc/core/sdio_ops.h
index bed8a8377fec..ee35cb4d170e 100644
--- a/drivers/mmc/core/sdio_ops.h
+++ b/drivers/mmc/core/sdio_ops.h
@@ -26,9 +26,15 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn,
 int sdio_reset(struct mmc_host *host);
 unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz);
 
-static inline bool mmc_is_io_op(u32 opcode)
+static inline bool sdio_is_io_busy(u32 opcode, u32 arg)
 {
-	return opcode == SD_IO_RW_DIRECT || opcode == SD_IO_RW_EXTENDED;
+	u32 addr;
+
+	addr = (arg >> 9) & 0x1FFFF;
+
+	return (opcode == SD_IO_RW_EXTENDED ||
+		(opcode == SD_IO_RW_DIRECT &&
+		!(addr == SDIO_CCCR_ABORT || addr == SDIO_CCCR_SUSPEND)));
 }
 
 #endif
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index f08691a58d7e..2db84dd664d7 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -622,6 +622,27 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND
 	help
 	  If you say yes here SD-Cards may work on the EZkit.
 
+config MMC_CAVIUM_OCTEON
+	tristate "Cavium OCTEON SD/MMC Card Interface support"
+	depends on CAVIUM_OCTEON_SOC
+	help
+	  This selects Cavium OCTEON SD/MMC card Interface.
+	  If you have an OCTEON board with a Multimedia Card slot,
+	  say Y or M here.
+
+	  If unsure, say N.
+
+config MMC_CAVIUM_THUNDERX
+	tristate "Cavium ThunderX SD/MMC Card Interface support"
+	depends on PCI && 64BIT && (ARM64 || COMPILE_TEST)
+	depends on GPIOLIB
+	depends on OF_ADDRESS
+	help
+	  This selects Cavium ThunderX SD/MMC Card Interface.
+	  If you have an Cavium ARM64 board with a Multimedia Card slot
+	  or builtin eMMC chip say Y or M here. If built as a module
+	  the module will be called thunderx_mmc.ko.
+
 config MMC_DW
 	tristate "Synopsys DesignWare Memory Card Interface"
 	depends on HAS_DMA
@@ -799,6 +820,20 @@ config MMC_TOSHIBA_PCI
 	depends on PCI
 	help
 
+config MMC_BCM2835
+	tristate "Broadcom BCM2835 SDHOST MMC Controller support"
+	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on HAS_DMA
+	help
+	  This selects the BCM2835 SDHOST MMC controller. If you have
+	  a BCM2835 platform with SD or MMC devices, say Y or M here.
+
+	  Note that the BCM2835 has two SD controllers: The Arasan
+	  sdhci controller (supported by MMC_SDHCI_IPROC) and a custom
+	  sdhost controller (supported by this driver).
+
+	  If unsure, say N.
+
 config MMC_MTK
 	tristate "MediaTek SD/MMC Card Interface support"
 	depends on HAS_DMA
@@ -828,3 +863,11 @@ config MMC_SDHCI_BRCMSTB
 	  Broadcom STB SoCs.
 
 	  If unsure, say Y.
+
+config MMC_SDHCI_XENON
+	tristate "Marvell Xenon eMMC/SD/SDIO SDHCI driver"
+	depends on MMC_SDHCI_PLTFM
+	help
+	  This selects Marvell Xenon eMMC/SD/SDIO SDHCI.
+	  If you have a controller with this interface, say Y or M here.
+	  If unsure, say N.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6d548c4ee2fa..926347c2eeb4 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -42,6 +42,10 @@ obj-$(CONFIG_MMC_SDHI)		+= sh_mobile_sdhi.o
 obj-$(CONFIG_MMC_CB710)		+= cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)	+= via-sdmmc.o
 obj-$(CONFIG_SDH_BFIN)		+= bfin_sdh.o
+octeon-mmc-objs := cavium.o cavium-octeon.o
+obj-$(CONFIG_MMC_CAVIUM_OCTEON) += octeon-mmc.o
+thunderx-mmc-objs := cavium.o cavium-thunderx.o
+obj-$(CONFIG_MMC_CAVIUM_THUNDERX) += thunderx-mmc.o
 obj-$(CONFIG_MMC_DW)		+= dw_mmc.o
 obj-$(CONFIG_MMC_DW_PLTFM)	+= dw_mmc-pltfm.o
 obj-$(CONFIG_MMC_DW_EXYNOS)	+= dw_mmc-exynos.o
@@ -59,6 +63,7 @@ obj-$(CONFIG_MMC_MOXART)	+= moxart-mmc.o
 obj-$(CONFIG_MMC_SUNXI)		+= sunxi-mmc.o
 obj-$(CONFIG_MMC_USDHI6ROL0)	+= usdhi6rol0.o
 obj-$(CONFIG_MMC_TOSHIBA_PCI)	+= toshsd.o
+obj-$(CONFIG_MMC_BCM2835)	+= bcm2835.o
 
 obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
 obj-$(CONFIG_MMC_REALTEK_USB)	+= rtsx_usb_sdmmc.o
@@ -83,3 +88,6 @@ obj-$(CONFIG_MMC_SDHCI_BRCMSTB)		+= sdhci-brcmstb.o
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
 endif
+
+obj-$(CONFIG_MMC_SDHCI_XENON)	+= sdhci-xenon-driver.o
+sdhci-xenon-driver-y		+= sdhci-xenon.o sdhci-xenon-phy.o
diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index 590a8a4522be..5b3e1c9bb75f 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -212,10 +212,7 @@ static void goldfish_mmc_xfer_done(struct goldfish_mmc_host *host,
 	if (host->dma_in_use) {
 		enum dma_data_direction dma_data_dir;
 
-		if (data->flags & MMC_DATA_WRITE)
-			dma_data_dir = DMA_TO_DEVICE;
-		else
-			dma_data_dir = DMA_FROM_DEVICE;
+		dma_data_dir = mmc_get_dma_dir(data);
 
 		if (dma_data_dir == DMA_FROM_DEVICE) {
 			/*
@@ -390,10 +387,7 @@ static void goldfish_mmc_prepare_data(struct goldfish_mmc_host *host,
 	 */
 	sg_len = (data->blocks == 1) ? 1 : data->sg_len;
 
-	if (data->flags & MMC_DATA_WRITE)
-		dma_data_dir = DMA_TO_DEVICE;
-	else
-		dma_data_dir = DMA_FROM_DEVICE;
+	dma_data_dir = mmc_get_dma_dir(data);
 
 	host->sg_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
 				  sg_len, dma_data_dir);
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 0ad8ef565b74..388e4a3f13e6 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -954,8 +954,7 @@ static void atmci_pdc_cleanup(struct atmel_mci *host)
 	if (data)
 		dma_unmap_sg(&host->pdev->dev,
 				data->sg, data->sg_len,
-				((data->flags & MMC_DATA_WRITE)
-				 ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+				mmc_get_dma_dir(data));
 }
 
 /*
@@ -993,8 +992,7 @@ static void atmci_dma_cleanup(struct atmel_mci *host)
 	if (data)
 		dma_unmap_sg(host->dma.chan->device->dev,
 				data->sg, data->sg_len,
-				((data->flags & MMC_DATA_WRITE)
-				 ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+				mmc_get_dma_dir(data));
 }
 
 /*
@@ -1095,7 +1093,6 @@ atmci_prepare_data_pdc(struct atmel_mci *host, struct mmc_data *data)
 {
 	u32 iflags, tmp;
 	unsigned int sg_len;
-	enum dma_data_direction dir;
 	int i;
 
 	data->error = -EINPROGRESS;
@@ -1107,13 +1104,10 @@ atmci_prepare_data_pdc(struct atmel_mci *host, struct mmc_data *data)
 	/* Enable pdc mode */
 	atmci_writel(host, ATMCI_MR, host->mode_reg | ATMCI_MR_PDCMODE);
 
-	if (data->flags & MMC_DATA_READ) {
-		dir = DMA_FROM_DEVICE;
+	if (data->flags & MMC_DATA_READ)
 		iflags |= ATMCI_ENDRX | ATMCI_RXBUFF;
-	} else {
-		dir = DMA_TO_DEVICE;
+	else
 		iflags |= ATMCI_ENDTX | ATMCI_TXBUFE | ATMCI_BLKE;
-	}
 
 	/* Set BLKLEN */
 	tmp = atmci_readl(host, ATMCI_MR);
@@ -1123,7 +1117,8 @@ atmci_prepare_data_pdc(struct atmel_mci *host, struct mmc_data *data)
 
 	/* Configure PDC */
 	host->data_size = data->blocks * data->blksz;
-	sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, dir);
+	sg_len = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len,
+			    mmc_get_dma_dir(data));
 
 	if ((!host->caps.has_rwproof)
 	    && (host->data->flags & MMC_DATA_WRITE)) {
@@ -1135,9 +1130,8 @@ atmci_prepare_data_pdc(struct atmel_mci *host, struct mmc_data *data)
 	}
 
 	if (host->data_size)
-		atmci_pdc_set_both_buf(host,
-			((dir == DMA_FROM_DEVICE) ? XFER_RECEIVE : XFER_TRANSMIT));
-
+		atmci_pdc_set_both_buf(host, data->flags & MMC_DATA_READ ?
+				       XFER_RECEIVE : XFER_TRANSMIT);
 	return iflags;
 }
 
@@ -1148,7 +1142,6 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
 	struct dma_async_tx_descriptor	*desc;
 	struct scatterlist		*sg;
 	unsigned int			i;
-	enum dma_data_direction		direction;
 	enum dma_transfer_direction	slave_dirn;
 	unsigned int			sglen;
 	u32				maxburst;
@@ -1186,12 +1179,10 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
 		return -ENODEV;
 
 	if (data->flags & MMC_DATA_READ) {
-		direction = DMA_FROM_DEVICE;
 		host->dma_conf.direction = slave_dirn = DMA_DEV_TO_MEM;
 		maxburst = atmci_convert_chksize(host,
 						 host->dma_conf.src_maxburst);
 	} else {
-		direction = DMA_TO_DEVICE;
 		host->dma_conf.direction = slave_dirn = DMA_MEM_TO_DEV;
 		maxburst = atmci_convert_chksize(host,
 						 host->dma_conf.dst_maxburst);
@@ -1202,7 +1193,7 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
 			ATMCI_DMAEN);
 
 	sglen = dma_map_sg(chan->device->dev, data->sg,
-			data->sg_len, direction);
+			data->sg_len, mmc_get_dma_dir(data));
 
 	dmaengine_slave_config(chan, &host->dma_conf);
 	desc = dmaengine_prep_slave_sg(chan,
@@ -1217,7 +1208,8 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
 
 	return iflags;
 unmap_exit:
-	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, direction);
+	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len,
+		     mmc_get_dma_dir(data));
 	return -ENOMEM;
 }
 
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
new file mode 100644
index 000000000000..1f343a477b3d
--- /dev/null
+++ b/drivers/mmc/host/bcm2835.c
@@ -0,0 +1,1466 @@
+/*
+ * bcm2835 sdhost driver.
+ *
+ * The 2835 has two SD controllers: The Arasan sdhci controller
+ * (supported by the iproc driver) and a custom sdhost controller
+ * (supported by this driver).
+ *
+ * The sdhci controller supports both sdcard and sdio.  The sdhost
+ * controller supports the sdcard only, but has better performance.
+ * Also note that the rpi3 has sdio wifi, so driving the sdcard with
+ * the sdhost controller allows to use the sdhci controller for wifi
+ * support.
+ *
+ * The configuration is done by devicetree via pin muxing.  Both
+ * SD controller are available on the same pins (2 pin groups = pin 22
+ * to 27 + pin 48 to 53).  So it's possible to use both SD controllers
+ * at the same time with different pin groups.
+ *
+ * Author:      Phil Elwell <phil@raspberrypi.org>
+ *              Copyright (C) 2015-2016 Raspberry Pi (Trading) Ltd.
+ *
+ * Based on
+ *  mmc-bcm2835.c by Gellert Weisz
+ * which is, in turn, based on
+ *  sdhci-bcm2708.c by Broadcom
+ *  sdhci-bcm2835.c by Stephen Warren and Oleksandr Tymoshenko
+ *  sdhci.c and sdhci-pci.c by Pierre Ossman
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+#include <linux/workqueue.h>
+
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+
+#define SDCMD  0x00 /* Command to SD card              - 16 R/W */
+#define SDARG  0x04 /* Argument to SD card             - 32 R/W */
+#define SDTOUT 0x08 /* Start value for timeout counter - 32 R/W */
+#define SDCDIV 0x0c /* Start value for clock divider   - 11 R/W */
+#define SDRSP0 0x10 /* SD card response (31:0)         - 32 R   */
+#define SDRSP1 0x14 /* SD card response (63:32)        - 32 R   */
+#define SDRSP2 0x18 /* SD card response (95:64)        - 32 R   */
+#define SDRSP3 0x1c /* SD card response (127:96)       - 32 R   */
+#define SDHSTS 0x20 /* SD host status                  - 11 R/W */
+#define SDVDD  0x30 /* SD card power control           -  1 R/W */
+#define SDEDM  0x34 /* Emergency Debug Mode            - 13 R/W */
+#define SDHCFG 0x38 /* Host configuration              -  2 R/W */
+#define SDHBCT 0x3c /* Host byte count (debug)         - 32 R/W */
+#define SDDATA 0x40 /* Data to/from SD card            - 32 R/W */
+#define SDHBLC 0x50 /* Host block count (SDIO/SDHC)    -  9 R/W */
+
+#define SDCMD_NEW_FLAG			0x8000
+#define SDCMD_FAIL_FLAG			0x4000
+#define SDCMD_BUSYWAIT			0x800
+#define SDCMD_NO_RESPONSE		0x400
+#define SDCMD_LONG_RESPONSE		0x200
+#define SDCMD_WRITE_CMD			0x80
+#define SDCMD_READ_CMD			0x40
+#define SDCMD_CMD_MASK			0x3f
+
+#define SDCDIV_MAX_CDIV			0x7ff
+
+#define SDHSTS_BUSY_IRPT		0x400
+#define SDHSTS_BLOCK_IRPT		0x200
+#define SDHSTS_SDIO_IRPT		0x100
+#define SDHSTS_REW_TIME_OUT		0x80
+#define SDHSTS_CMD_TIME_OUT		0x40
+#define SDHSTS_CRC16_ERROR		0x20
+#define SDHSTS_CRC7_ERROR		0x10
+#define SDHSTS_FIFO_ERROR		0x08
+/* Reserved */
+/* Reserved */
+#define SDHSTS_DATA_FLAG		0x01
+
+#define SDHSTS_TRANSFER_ERROR_MASK	(SDHSTS_CRC7_ERROR | \
+					 SDHSTS_CRC16_ERROR | \
+					 SDHSTS_REW_TIME_OUT | \
+					 SDHSTS_FIFO_ERROR)
+
+#define SDHSTS_ERROR_MASK		(SDHSTS_CMD_TIME_OUT | \
+					 SDHSTS_TRANSFER_ERROR_MASK)
+
+#define SDHCFG_BUSY_IRPT_EN	BIT(10)
+#define SDHCFG_BLOCK_IRPT_EN	BIT(8)
+#define SDHCFG_SDIO_IRPT_EN	BIT(5)
+#define SDHCFG_DATA_IRPT_EN	BIT(4)
+#define SDHCFG_SLOW_CARD	BIT(3)
+#define SDHCFG_WIDE_EXT_BUS	BIT(2)
+#define SDHCFG_WIDE_INT_BUS	BIT(1)
+#define SDHCFG_REL_CMD_LINE	BIT(0)
+
+#define SDVDD_POWER_OFF		0
+#define SDVDD_POWER_ON		1
+
+#define SDEDM_FORCE_DATA_MODE	BIT(19)
+#define SDEDM_CLOCK_PULSE	BIT(20)
+#define SDEDM_BYPASS		BIT(21)
+
+#define SDEDM_WRITE_THRESHOLD_SHIFT	9
+#define SDEDM_READ_THRESHOLD_SHIFT	14
+#define SDEDM_THRESHOLD_MASK		0x1f
+
+#define SDEDM_FSM_MASK		0xf
+#define SDEDM_FSM_IDENTMODE	0x0
+#define SDEDM_FSM_DATAMODE	0x1
+#define SDEDM_FSM_READDATA	0x2
+#define SDEDM_FSM_WRITEDATA	0x3
+#define SDEDM_FSM_READWAIT	0x4
+#define SDEDM_FSM_READCRC	0x5
+#define SDEDM_FSM_WRITECRC	0x6
+#define SDEDM_FSM_WRITEWAIT1	0x7
+#define SDEDM_FSM_POWERDOWN	0x8
+#define SDEDM_FSM_POWERUP	0x9
+#define SDEDM_FSM_WRITESTART1	0xa
+#define SDEDM_FSM_WRITESTART2	0xb
+#define SDEDM_FSM_GENPULSES	0xc
+#define SDEDM_FSM_WRITEWAIT2	0xd
+#define SDEDM_FSM_STARTPOWDOWN	0xf
+
+#define SDDATA_FIFO_WORDS	16
+
+#define FIFO_READ_THRESHOLD	4
+#define FIFO_WRITE_THRESHOLD	4
+#define SDDATA_FIFO_PIO_BURST	8
+
+#define PIO_THRESHOLD	1  /* Maximum block count for PIO (0 = always DMA) */
+
+struct bcm2835_host {
+	spinlock_t		lock;
+	struct mutex		mutex;
+
+	void __iomem		*ioaddr;
+	u32			phys_addr;
+
+	struct mmc_host		*mmc;
+	struct platform_device	*pdev;
+
+	int			clock;		/* Current clock speed */
+	unsigned int		max_clk;	/* Max possible freq */
+	struct work_struct	dma_work;
+	struct delayed_work	timeout_work;	/* Timer for timeouts */
+	struct sg_mapping_iter	sg_miter;	/* SG state for PIO */
+	unsigned int		blocks;		/* remaining PIO blocks */
+	int			irq;		/* Device IRQ */
+
+	u32			ns_per_fifo_word;
+
+	/* cached registers */
+	u32			hcfg;
+	u32			cdiv;
+
+	struct mmc_request	*mrq;		/* Current request */
+	struct mmc_command	*cmd;		/* Current command */
+	struct mmc_data		*data;		/* Current data request */
+	bool			data_complete:1;/* Data finished before cmd */
+	bool			use_busy:1;	/* Wait for busy interrupt */
+	bool			use_sbc:1;	/* Send CMD23 */
+
+	/* for threaded irq handler */
+	bool			irq_block;
+	bool			irq_busy;
+	bool			irq_data;
+
+	/* DMA part */
+	struct dma_chan		*dma_chan_rxtx;
+	struct dma_chan		*dma_chan;
+	struct dma_slave_config dma_cfg_rx;
+	struct dma_slave_config dma_cfg_tx;
+	struct dma_async_tx_descriptor	*dma_desc;
+	u32			dma_dir;
+	u32			drain_words;
+	struct page		*drain_page;
+	u32			drain_offset;
+	bool			use_dma;
+};
+
+static void bcm2835_dumpcmd(struct bcm2835_host *host, struct mmc_command *cmd,
+			    const char *label)
+{
+	struct device *dev = &host->pdev->dev;
+
+	if (!cmd)
+		return;
+
+	dev_dbg(dev, "%c%s op %d arg 0x%x flags 0x%x - resp %08x %08x %08x %08x, err %d\n",
+		(cmd == host->cmd) ? '>' : ' ',
+		label, cmd->opcode, cmd->arg, cmd->flags,
+		cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3],
+		cmd->error);
+}
+
+static void bcm2835_dumpregs(struct bcm2835_host *host)
+{
+	struct mmc_request *mrq = host->mrq;
+	struct device *dev = &host->pdev->dev;
+
+	if (mrq) {
+		bcm2835_dumpcmd(host, mrq->sbc, "sbc");
+		bcm2835_dumpcmd(host, mrq->cmd, "cmd");
+		if (mrq->data) {
+			dev_dbg(dev, "data blocks %x blksz %x - err %d\n",
+				mrq->data->blocks,
+				mrq->data->blksz,
+				mrq->data->error);
+		}
+		bcm2835_dumpcmd(host, mrq->stop, "stop");
+	}
+
+	dev_dbg(dev, "=========== REGISTER DUMP ===========\n");
+	dev_dbg(dev, "SDCMD  0x%08x\n", readl(host->ioaddr + SDCMD));
+	dev_dbg(dev, "SDARG  0x%08x\n", readl(host->ioaddr + SDARG));
+	dev_dbg(dev, "SDTOUT 0x%08x\n", readl(host->ioaddr + SDTOUT));
+	dev_dbg(dev, "SDCDIV 0x%08x\n", readl(host->ioaddr + SDCDIV));
+	dev_dbg(dev, "SDRSP0 0x%08x\n", readl(host->ioaddr + SDRSP0));
+	dev_dbg(dev, "SDRSP1 0x%08x\n", readl(host->ioaddr + SDRSP1));
+	dev_dbg(dev, "SDRSP2 0x%08x\n", readl(host->ioaddr + SDRSP2));
+	dev_dbg(dev, "SDRSP3 0x%08x\n", readl(host->ioaddr + SDRSP3));
+	dev_dbg(dev, "SDHSTS 0x%08x\n", readl(host->ioaddr + SDHSTS));
+	dev_dbg(dev, "SDVDD  0x%08x\n", readl(host->ioaddr + SDVDD));
+	dev_dbg(dev, "SDEDM  0x%08x\n", readl(host->ioaddr + SDEDM));
+	dev_dbg(dev, "SDHCFG 0x%08x\n", readl(host->ioaddr + SDHCFG));
+	dev_dbg(dev, "SDHBCT 0x%08x\n", readl(host->ioaddr + SDHBCT));
+	dev_dbg(dev, "SDHBLC 0x%08x\n", readl(host->ioaddr + SDHBLC));
+	dev_dbg(dev, "===========================================\n");
+}
+
+static void bcm2835_reset_internal(struct bcm2835_host *host)
+{
+	u32 temp;
+
+	writel(SDVDD_POWER_OFF, host->ioaddr + SDVDD);
+	writel(0, host->ioaddr + SDCMD);
+	writel(0, host->ioaddr + SDARG);
+	writel(0xf00000, host->ioaddr + SDTOUT);
+	writel(0, host->ioaddr + SDCDIV);
+	writel(0x7f8, host->ioaddr + SDHSTS); /* Write 1s to clear */
+	writel(0, host->ioaddr + SDHCFG);
+	writel(0, host->ioaddr + SDHBCT);
+	writel(0, host->ioaddr + SDHBLC);
+
+	/* Limit fifo usage due to silicon bug */
+	temp = readl(host->ioaddr + SDEDM);
+	temp &= ~((SDEDM_THRESHOLD_MASK << SDEDM_READ_THRESHOLD_SHIFT) |
+		  (SDEDM_THRESHOLD_MASK << SDEDM_WRITE_THRESHOLD_SHIFT));
+	temp |= (FIFO_READ_THRESHOLD << SDEDM_READ_THRESHOLD_SHIFT) |
+		(FIFO_WRITE_THRESHOLD << SDEDM_WRITE_THRESHOLD_SHIFT);
+	writel(temp, host->ioaddr + SDEDM);
+	msleep(20);
+	writel(SDVDD_POWER_ON, host->ioaddr + SDVDD);
+	msleep(20);
+	host->clock = 0;
+	writel(host->hcfg, host->ioaddr + SDHCFG);
+	writel(host->cdiv, host->ioaddr + SDCDIV);
+}
+
+static void bcm2835_reset(struct mmc_host *mmc)
+{
+	struct bcm2835_host *host = mmc_priv(mmc);
+
+	if (host->dma_chan)
+		dmaengine_terminate_sync(host->dma_chan);
+	bcm2835_reset_internal(host);
+}
+
+static void bcm2835_finish_command(struct bcm2835_host *host);
+
+static void bcm2835_wait_transfer_complete(struct bcm2835_host *host)
+{
+	int timediff;
+	u32 alternate_idle;
+
+	alternate_idle = (host->mrq->data->flags & MMC_DATA_READ) ?
+		SDEDM_FSM_READWAIT : SDEDM_FSM_WRITESTART1;
+
+	timediff = 0;
+
+	while (1) {
+		u32 edm, fsm;
+
+		edm = readl(host->ioaddr + SDEDM);
+		fsm = edm & SDEDM_FSM_MASK;
+
+		if ((fsm == SDEDM_FSM_IDENTMODE) ||
+		    (fsm == SDEDM_FSM_DATAMODE))
+			break;
+		if (fsm == alternate_idle) {
+			writel(edm | SDEDM_FORCE_DATA_MODE,
+			       host->ioaddr + SDEDM);
+			break;
+		}
+
+		timediff++;
+		if (timediff == 100000) {
+			dev_err(&host->pdev->dev,
+				"wait_transfer_complete - still waiting after %d retries\n",
+				timediff);
+			bcm2835_dumpregs(host);
+			host->mrq->data->error = -ETIMEDOUT;
+			return;
+		}
+		cpu_relax();
+	}
+}
+
+static void bcm2835_dma_complete(void *param)
+{
+	struct bcm2835_host *host = param;
+
+	schedule_work(&host->dma_work);
+}
+
+static void bcm2835_transfer_block_pio(struct bcm2835_host *host, bool is_read)
+{
+	unsigned long flags;
+	size_t blksize;
+	unsigned long wait_max;
+
+	blksize = host->data->blksz;
+
+	wait_max = jiffies + msecs_to_jiffies(500);
+
+	local_irq_save(flags);
+
+	while (blksize) {
+		int copy_words;
+		u32 hsts = 0;
+		size_t len;
+		u32 *buf;
+
+		if (!sg_miter_next(&host->sg_miter)) {
+			host->data->error = -EINVAL;
+			break;
+		}
+
+		len = min(host->sg_miter.length, blksize);
+		if (len % 4) {
+			host->data->error = -EINVAL;
+			break;
+		}
+
+		blksize -= len;
+		host->sg_miter.consumed = len;
+
+		buf = (u32 *)host->sg_miter.addr;
+
+		copy_words = len / 4;
+
+		while (copy_words) {
+			int burst_words, words;
+			u32 edm;
+
+			burst_words = min(SDDATA_FIFO_PIO_BURST, copy_words);
+			edm = readl(host->ioaddr + SDEDM);
+			if (is_read)
+				words = ((edm >> 4) & 0x1f);
+			else
+				words = SDDATA_FIFO_WORDS - ((edm >> 4) & 0x1f);
+
+			if (words < burst_words) {
+				int fsm_state = (edm & SDEDM_FSM_MASK);
+				struct device *dev = &host->pdev->dev;
+
+				if ((is_read &&
+				     (fsm_state != SDEDM_FSM_READDATA &&
+				      fsm_state != SDEDM_FSM_READWAIT &&
+				      fsm_state != SDEDM_FSM_READCRC)) ||
+				    (!is_read &&
+				     (fsm_state != SDEDM_FSM_WRITEDATA &&
+				      fsm_state != SDEDM_FSM_WRITESTART1 &&
+				      fsm_state != SDEDM_FSM_WRITESTART2))) {
+					hsts = readl(host->ioaddr + SDHSTS);
+					dev_err(dev, "fsm %x, hsts %08x\n",
+						fsm_state, hsts);
+					if (hsts & SDHSTS_ERROR_MASK)
+						break;
+				}
+
+				if (time_after(jiffies, wait_max)) {
+					dev_err(dev, "PIO %s timeout - EDM %08x\n",
+						is_read ? "read" : "write",
+						edm);
+					hsts = SDHSTS_REW_TIME_OUT;
+					break;
+				}
+				ndelay((burst_words - words) *
+				       host->ns_per_fifo_word);
+				continue;
+			} else if (words > copy_words) {
+				words = copy_words;
+			}
+
+			copy_words -= words;
+
+			while (words) {
+				if (is_read)
+					*(buf++) = readl(host->ioaddr + SDDATA);
+				else
+					writel(*(buf++), host->ioaddr + SDDATA);
+				words--;
+			}
+		}
+
+		if (hsts & SDHSTS_ERROR_MASK)
+			break;
+	}
+
+	sg_miter_stop(&host->sg_miter);
+
+	local_irq_restore(flags);
+}
+
+static void bcm2835_transfer_pio(struct bcm2835_host *host)
+{
+	struct device *dev = &host->pdev->dev;
+	u32 sdhsts;
+	bool is_read;
+
+	is_read = (host->data->flags & MMC_DATA_READ) != 0;
+	bcm2835_transfer_block_pio(host, is_read);
+
+	sdhsts = readl(host->ioaddr + SDHSTS);
+	if (sdhsts & (SDHSTS_CRC16_ERROR |
+		      SDHSTS_CRC7_ERROR |
+		      SDHSTS_FIFO_ERROR)) {
+		dev_err(dev, "%s transfer error - HSTS %08x\n",
+			is_read ? "read" : "write", sdhsts);
+		host->data->error = -EILSEQ;
+	} else if ((sdhsts & (SDHSTS_CMD_TIME_OUT |
+			      SDHSTS_REW_TIME_OUT))) {
+		dev_err(dev, "%s timeout error - HSTS %08x\n",
+			is_read ? "read" : "write", sdhsts);
+		host->data->error = -ETIMEDOUT;
+	}
+}
+
+static
+void bcm2835_prepare_dma(struct bcm2835_host *host, struct mmc_data *data)
+{
+	int len, dir_data, dir_slave;
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *dma_chan;
+
+	dma_chan = host->dma_chan_rxtx;
+	if (data->flags & MMC_DATA_READ) {
+		dir_data = DMA_FROM_DEVICE;
+		dir_slave = DMA_DEV_TO_MEM;
+	} else {
+		dir_data = DMA_TO_DEVICE;
+		dir_slave = DMA_MEM_TO_DEV;
+	}
+
+	/* The block doesn't manage the FIFO DREQs properly for
+	 * multi-block transfers, so don't attempt to DMA the final
+	 * few words.  Unfortunately this requires the final sg entry
+	 * to be trimmed.  N.B. This code demands that the overspill
+	 * is contained in a single sg entry.
+	 */
+
+	host->drain_words = 0;
+	if ((data->blocks > 1) && (dir_data == DMA_FROM_DEVICE)) {
+		struct scatterlist *sg;
+		u32 len;
+		int i;
+
+		len = min((u32)(FIFO_READ_THRESHOLD - 1) * 4,
+			  (u32)data->blocks * data->blksz);
+
+		for_each_sg(data->sg, sg, data->sg_len, i) {
+			if (sg_is_last(sg)) {
+				WARN_ON(sg->length < len);
+				sg->length -= len;
+				host->drain_page = sg_page(sg);
+				host->drain_offset = sg->offset + sg->length;
+			}
+		}
+		host->drain_words = len / 4;
+	}
+
+	/* The parameters have already been validated, so this will not fail */
+	(void)dmaengine_slave_config(dma_chan,
+				     (dir_data == DMA_FROM_DEVICE) ?
+				     &host->dma_cfg_rx :
+				     &host->dma_cfg_tx);
+
+	len = dma_map_sg(dma_chan->device->dev, data->sg, data->sg_len,
+			 dir_data);
+
+	if (len > 0) {
+		desc = dmaengine_prep_slave_sg(dma_chan, data->sg,
+					       len, dir_slave,
+					       DMA_PREP_INTERRUPT |
+					       DMA_CTRL_ACK);
+	}
+
+	if (desc) {
+		desc->callback = bcm2835_dma_complete;
+		desc->callback_param = host;
+		host->dma_desc = desc;
+		host->dma_chan = dma_chan;
+		host->dma_dir = dir_data;
+	}
+}
+
+static void bcm2835_start_dma(struct bcm2835_host *host)
+{
+	dmaengine_submit(host->dma_desc);
+	dma_async_issue_pending(host->dma_chan);
+}
+
+static void bcm2835_set_transfer_irqs(struct bcm2835_host *host)
+{
+	u32 all_irqs = SDHCFG_DATA_IRPT_EN | SDHCFG_BLOCK_IRPT_EN |
+		SDHCFG_BUSY_IRPT_EN;
+
+	if (host->dma_desc) {
+		host->hcfg = (host->hcfg & ~all_irqs) |
+			SDHCFG_BUSY_IRPT_EN;
+	} else {
+		host->hcfg = (host->hcfg & ~all_irqs) |
+			SDHCFG_DATA_IRPT_EN |
+			SDHCFG_BUSY_IRPT_EN;
+	}
+
+	writel(host->hcfg, host->ioaddr + SDHCFG);
+}
+
+static
+void bcm2835_prepare_data(struct bcm2835_host *host, struct mmc_command *cmd)
+{
+	struct mmc_data *data = cmd->data;
+
+	WARN_ON(host->data);
+
+	host->data = data;
+	if (!data)
+		return;
+
+	host->data_complete = false;
+	host->data->bytes_xfered = 0;
+
+	if (!host->dma_desc) {
+		/* Use PIO */
+		int flags = SG_MITER_ATOMIC;
+
+		if (data->flags & MMC_DATA_READ)
+			flags |= SG_MITER_TO_SG;
+		else
+			flags |= SG_MITER_FROM_SG;
+		sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
+		host->blocks = data->blocks;
+	}
+
+	bcm2835_set_transfer_irqs(host);
+
+	writel(data->blksz, host->ioaddr + SDHBCT);
+	writel(data->blocks, host->ioaddr + SDHBLC);
+}
+
+static u32 bcm2835_read_wait_sdcmd(struct bcm2835_host *host, u32 max_ms)
+{
+	struct device *dev = &host->pdev->dev;
+	u32 value;
+	int ret;
+
+	ret = readl_poll_timeout(host->ioaddr + SDCMD, value,
+				 !(value & SDCMD_NEW_FLAG), 1, 10);
+	if (ret == -ETIMEDOUT)
+		/* if it takes a while make poll interval bigger */
+		ret = readl_poll_timeout(host->ioaddr + SDCMD, value,
+					 !(value & SDCMD_NEW_FLAG),
+					 10, max_ms * 1000);
+	if (ret == -ETIMEDOUT)
+		dev_err(dev, "%s: timeout (%d ms)\n", __func__, max_ms);
+
+	return value;
+}
+
+static void bcm2835_finish_request(struct bcm2835_host *host)
+{
+	struct dma_chan *terminate_chan = NULL;
+	struct mmc_request *mrq;
+
+	cancel_delayed_work(&host->timeout_work);
+
+	mrq = host->mrq;
+
+	host->mrq = NULL;
+	host->cmd = NULL;
+	host->data = NULL;
+
+	host->dma_desc = NULL;
+	terminate_chan = host->dma_chan;
+	host->dma_chan = NULL;
+
+	if (terminate_chan) {
+		int err = dmaengine_terminate_all(terminate_chan);
+
+		if (err)
+			dev_err(&host->pdev->dev,
+				"failed to terminate DMA (%d)\n", err);
+	}
+
+	mmc_request_done(host->mmc, mrq);
+}
+
+static
+bool bcm2835_send_command(struct bcm2835_host *host, struct mmc_command *cmd)
+{
+	struct device *dev = &host->pdev->dev;
+	u32 sdcmd, sdhsts;
+	unsigned long timeout;
+
+	WARN_ON(host->cmd);
+
+	sdcmd = bcm2835_read_wait_sdcmd(host, 100);
+	if (sdcmd & SDCMD_NEW_FLAG) {
+		dev_err(dev, "previous command never completed.\n");
+		bcm2835_dumpregs(host);
+		cmd->error = -EILSEQ;
+		bcm2835_finish_request(host);
+		return false;
+	}
+
+	if (!cmd->data && cmd->busy_timeout > 9000)
+		timeout = DIV_ROUND_UP(cmd->busy_timeout, 1000) * HZ + HZ;
+	else
+		timeout = 10 * HZ;
+	schedule_delayed_work(&host->timeout_work, timeout);
+
+	host->cmd = cmd;
+
+	/* Clear any error flags */
+	sdhsts = readl(host->ioaddr + SDHSTS);
+	if (sdhsts & SDHSTS_ERROR_MASK)
+		writel(sdhsts, host->ioaddr + SDHSTS);
+
+	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
+		dev_err(dev, "unsupported response type!\n");
+		cmd->error = -EINVAL;
+		bcm2835_finish_request(host);
+		return false;
+	}
+
+	bcm2835_prepare_data(host, cmd);
+
+	writel(cmd->arg, host->ioaddr + SDARG);
+
+	sdcmd = cmd->opcode & SDCMD_CMD_MASK;
+
+	host->use_busy = false;
+	if (!(cmd->flags & MMC_RSP_PRESENT)) {
+		sdcmd |= SDCMD_NO_RESPONSE;
+	} else {
+		if (cmd->flags & MMC_RSP_136)
+			sdcmd |= SDCMD_LONG_RESPONSE;
+		if (cmd->flags & MMC_RSP_BUSY) {
+			sdcmd |= SDCMD_BUSYWAIT;
+			host->use_busy = true;
+		}
+	}
+
+	if (cmd->data) {
+		if (cmd->data->flags & MMC_DATA_WRITE)
+			sdcmd |= SDCMD_WRITE_CMD;
+		if (cmd->data->flags & MMC_DATA_READ)
+			sdcmd |= SDCMD_READ_CMD;
+	}
+
+	writel(sdcmd | SDCMD_NEW_FLAG, host->ioaddr + SDCMD);
+
+	return true;
+}
+
+static void bcm2835_transfer_complete(struct bcm2835_host *host)
+{
+	struct mmc_data *data;
+
+	WARN_ON(!host->data_complete);
+
+	data = host->data;
+	host->data = NULL;
+
+	/* Need to send CMD12 if -
+	 * a) open-ended multiblock transfer (no CMD23)
+	 * b) error in multiblock transfer
+	 */
+	if (host->mrq->stop && (data->error || !host->use_sbc)) {
+		if (bcm2835_send_command(host, host->mrq->stop)) {
+			/* No busy, so poll for completion */
+			if (!host->use_busy)
+				bcm2835_finish_command(host);
+		}
+	} else {
+		bcm2835_wait_transfer_complete(host);
+		bcm2835_finish_request(host);
+	}
+}
+
+static void bcm2835_finish_data(struct bcm2835_host *host)
+{
+	struct device *dev = &host->pdev->dev;
+	struct mmc_data *data;
+
+	data = host->data;
+
+	host->hcfg &= ~(SDHCFG_DATA_IRPT_EN | SDHCFG_BLOCK_IRPT_EN);
+	writel(host->hcfg, host->ioaddr + SDHCFG);
+
+	data->bytes_xfered = data->error ? 0 : (data->blksz * data->blocks);
+
+	host->data_complete = true;
+
+	if (host->cmd) {
+		/* Data managed to finish before the
+		 * command completed. Make sure we do
+		 * things in the proper order.
+		 */
+		dev_dbg(dev, "Finished early - HSTS %08x\n",
+			readl(host->ioaddr + SDHSTS));
+	} else {
+		bcm2835_transfer_complete(host);
+	}
+}
+
+static void bcm2835_finish_command(struct bcm2835_host *host)
+{
+	struct device *dev = &host->pdev->dev;
+	struct mmc_command *cmd = host->cmd;
+	u32 sdcmd;
+
+	sdcmd = bcm2835_read_wait_sdcmd(host, 100);
+
+	/* Check for errors */
+	if (sdcmd & SDCMD_NEW_FLAG) {
+		dev_err(dev, "command never completed.\n");
+		bcm2835_dumpregs(host);
+		host->cmd->error = -EIO;
+		bcm2835_finish_request(host);
+		return;
+	} else if (sdcmd & SDCMD_FAIL_FLAG) {
+		u32 sdhsts = readl(host->ioaddr + SDHSTS);
+
+		/* Clear the errors */
+		writel(SDHSTS_ERROR_MASK, host->ioaddr + SDHSTS);
+
+		if (!(sdhsts & SDHSTS_CRC7_ERROR) ||
+		    (host->cmd->opcode != MMC_SEND_OP_COND)) {
+			if (sdhsts & SDHSTS_CMD_TIME_OUT) {
+				host->cmd->error = -ETIMEDOUT;
+			} else {
+				dev_err(dev, "unexpected command %d error\n",
+					host->cmd->opcode);
+				bcm2835_dumpregs(host);
+				host->cmd->error = -EILSEQ;
+			}
+			bcm2835_finish_request(host);
+			return;
+		}
+	}
+
+	if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_136) {
+			int i;
+
+			for (i = 0; i < 4; i++) {
+				cmd->resp[3 - i] =
+					readl(host->ioaddr + SDRSP0 + i * 4);
+			}
+		} else {
+			cmd->resp[0] = readl(host->ioaddr + SDRSP0);
+		}
+	}
+
+	if (cmd == host->mrq->sbc) {
+		/* Finished CMD23, now send actual command. */
+		host->cmd = NULL;
+		if (bcm2835_send_command(host, host->mrq->cmd)) {
+			if (host->data && host->dma_desc)
+				/* DMA transfer starts now, PIO starts
+				 * after irq
+				 */
+				bcm2835_start_dma(host);
+
+			if (!host->use_busy)
+				bcm2835_finish_command(host);
+		}
+	} else if (cmd == host->mrq->stop) {
+		/* Finished CMD12 */
+		bcm2835_finish_request(host);
+	} else {
+		/* Processed actual command. */
+		host->cmd = NULL;
+		if (!host->data)
+			bcm2835_finish_request(host);
+		else if (host->data_complete)
+			bcm2835_transfer_complete(host);
+	}
+}
+
+static void bcm2835_timeout(struct work_struct *work)
+{
+	struct delayed_work *d = to_delayed_work(work);
+	struct bcm2835_host *host =
+		container_of(d, struct bcm2835_host, timeout_work);
+	struct device *dev = &host->pdev->dev;
+
+	mutex_lock(&host->mutex);
+
+	if (host->mrq) {
+		dev_err(dev, "timeout waiting for hardware interrupt.\n");
+		bcm2835_dumpregs(host);
+
+		if (host->data) {
+			host->data->error = -ETIMEDOUT;
+			bcm2835_finish_data(host);
+		} else {
+			if (host->cmd)
+				host->cmd->error = -ETIMEDOUT;
+			else
+				host->mrq->cmd->error = -ETIMEDOUT;
+
+			bcm2835_finish_request(host);
+		}
+	}
+
+	mutex_unlock(&host->mutex);
+}
+
+static bool bcm2835_check_cmd_error(struct bcm2835_host *host, u32 intmask)
+{
+	struct device *dev = &host->pdev->dev;
+
+	if (!(intmask & SDHSTS_ERROR_MASK))
+		return false;
+
+	if (!host->cmd)
+		return true;
+
+	dev_err(dev, "sdhost_busy_irq: intmask %08x\n", intmask);
+	if (intmask & SDHSTS_CRC7_ERROR) {
+		host->cmd->error = -EILSEQ;
+	} else if (intmask & (SDHSTS_CRC16_ERROR |
+			      SDHSTS_FIFO_ERROR)) {
+		if (host->mrq->data)
+			host->mrq->data->error = -EILSEQ;
+		else
+			host->cmd->error = -EILSEQ;
+	} else if (intmask & SDHSTS_REW_TIME_OUT) {
+		if (host->mrq->data)
+			host->mrq->data->error = -ETIMEDOUT;
+		else
+			host->cmd->error = -ETIMEDOUT;
+	} else if (intmask & SDHSTS_CMD_TIME_OUT) {
+		host->cmd->error = -ETIMEDOUT;
+	}
+	bcm2835_dumpregs(host);
+	return true;
+}
+
+static void bcm2835_check_data_error(struct bcm2835_host *host, u32 intmask)
+{
+	if (!host->data)
+		return;
+	if (intmask & (SDHSTS_CRC16_ERROR | SDHSTS_FIFO_ERROR))
+		host->data->error = -EILSEQ;
+	if (intmask & SDHSTS_REW_TIME_OUT)
+		host->data->error = -ETIMEDOUT;
+}
+
+static void bcm2835_busy_irq(struct bcm2835_host *host)
+{
+	if (WARN_ON(!host->cmd)) {
+		bcm2835_dumpregs(host);
+		return;
+	}
+
+	if (WARN_ON(!host->use_busy)) {
+		bcm2835_dumpregs(host);
+		return;
+	}
+	host->use_busy = false;
+
+	bcm2835_finish_command(host);
+}
+
+static void bcm2835_data_irq(struct bcm2835_host *host, u32 intmask)
+{
+	/* There are no dedicated data/space available interrupt
+	 * status bits, so it is necessary to use the single shared
+	 * data/space available FIFO status bits. It is therefore not
+	 * an error to get here when there is no data transfer in
+	 * progress.
+	 */
+	if (!host->data)
+		return;
+
+	bcm2835_check_data_error(host, intmask);
+	if (host->data->error)
+		goto finished;
+
+	if (host->data->flags & MMC_DATA_WRITE) {
+		/* Use the block interrupt for writes after the first block */
+		host->hcfg &= ~(SDHCFG_DATA_IRPT_EN);
+		host->hcfg |= SDHCFG_BLOCK_IRPT_EN;
+		writel(host->hcfg, host->ioaddr + SDHCFG);
+		bcm2835_transfer_pio(host);
+	} else {
+		bcm2835_transfer_pio(host);
+		host->blocks--;
+		if ((host->blocks == 0) || host->data->error)
+			goto finished;
+	}
+	return;
+
+finished:
+	host->hcfg &= ~(SDHCFG_DATA_IRPT_EN | SDHCFG_BLOCK_IRPT_EN);
+	writel(host->hcfg, host->ioaddr + SDHCFG);
+}
+
+static void bcm2835_data_threaded_irq(struct bcm2835_host *host)
+{
+	if (!host->data)
+		return;
+	if ((host->blocks == 0) || host->data->error)
+		bcm2835_finish_data(host);
+}
+
+static void bcm2835_block_irq(struct bcm2835_host *host)
+{
+	if (WARN_ON(!host->data)) {
+		bcm2835_dumpregs(host);
+		return;
+	}
+
+	if (!host->dma_desc) {
+		WARN_ON(!host->blocks);
+		if (host->data->error || (--host->blocks == 0))
+			bcm2835_finish_data(host);
+		else
+			bcm2835_transfer_pio(host);
+	} else if (host->data->flags & MMC_DATA_WRITE) {
+		bcm2835_finish_data(host);
+	}
+}
+
+static irqreturn_t bcm2835_irq(int irq, void *dev_id)
+{
+	irqreturn_t result = IRQ_NONE;
+	struct bcm2835_host *host = dev_id;
+	u32 intmask;
+
+	spin_lock(&host->lock);
+
+	intmask = readl(host->ioaddr + SDHSTS);
+
+	writel(SDHSTS_BUSY_IRPT |
+	       SDHSTS_BLOCK_IRPT |
+	       SDHSTS_SDIO_IRPT |
+	       SDHSTS_DATA_FLAG,
+	       host->ioaddr + SDHSTS);
+
+	if (intmask & SDHSTS_BLOCK_IRPT) {
+		bcm2835_check_data_error(host, intmask);
+		host->irq_block = true;
+		result = IRQ_WAKE_THREAD;
+	}
+
+	if (intmask & SDHSTS_BUSY_IRPT) {
+		if (!bcm2835_check_cmd_error(host, intmask)) {
+			host->irq_busy = true;
+			result = IRQ_WAKE_THREAD;
+		} else {
+			result = IRQ_HANDLED;
+		}
+	}
+
+	/* There is no true data interrupt status bit, so it is
+	 * necessary to qualify the data flag with the interrupt
+	 * enable bit.
+	 */
+	if ((intmask & SDHSTS_DATA_FLAG) &&
+	    (host->hcfg & SDHCFG_DATA_IRPT_EN)) {
+		bcm2835_data_irq(host, intmask);
+		host->irq_data = true;
+		result = IRQ_WAKE_THREAD;
+	}
+
+	spin_unlock(&host->lock);
+
+	return result;
+}
+
+static irqreturn_t bcm2835_threaded_irq(int irq, void *dev_id)
+{
+	struct bcm2835_host *host = dev_id;
+	unsigned long flags;
+	bool block, busy, data;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	block = host->irq_block;
+	busy  = host->irq_busy;
+	data  = host->irq_data;
+	host->irq_block = false;
+	host->irq_busy  = false;
+	host->irq_data  = false;
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	mutex_lock(&host->mutex);
+
+	if (block)
+		bcm2835_block_irq(host);
+	if (busy)
+		bcm2835_busy_irq(host);
+	if (data)
+		bcm2835_data_threaded_irq(host);
+
+	mutex_unlock(&host->mutex);
+
+	return IRQ_HANDLED;
+}
+
+static void bcm2835_dma_complete_work(struct work_struct *work)
+{
+	struct bcm2835_host *host =
+		container_of(work, struct bcm2835_host, dma_work);
+	struct mmc_data *data = host->data;
+
+	mutex_lock(&host->mutex);
+
+	if (host->dma_chan) {
+		dma_unmap_sg(host->dma_chan->device->dev,
+			     data->sg, data->sg_len,
+			     host->dma_dir);
+
+		host->dma_chan = NULL;
+	}
+
+	if (host->drain_words) {
+		unsigned long flags;
+		void *page;
+		u32 *buf;
+
+		if (host->drain_offset & PAGE_MASK) {
+			host->drain_page += host->drain_offset >> PAGE_SHIFT;
+			host->drain_offset &= ~PAGE_MASK;
+		}
+		local_irq_save(flags);
+		page = kmap_atomic(host->drain_page);
+		buf = page + host->drain_offset;
+
+		while (host->drain_words) {
+			u32 edm = readl(host->ioaddr + SDEDM);
+
+			if ((edm >> 4) & 0x1f)
+				*(buf++) = readl(host->ioaddr + SDDATA);
+			host->drain_words--;
+		}
+
+		kunmap_atomic(page);
+		local_irq_restore(flags);
+	}
+
+	bcm2835_finish_data(host);
+
+	mutex_unlock(&host->mutex);
+}
+
+static void bcm2835_set_clock(struct bcm2835_host *host, unsigned int clock)
+{
+	int div;
+
+	/* The SDCDIV register has 11 bits, and holds (div - 2).  But
+	 * in data mode the max is 50MHz wihout a minimum, and only
+	 * the bottom 3 bits are used. Since the switch over is
+	 * automatic (unless we have marked the card as slow...),
+	 * chosen values have to make sense in both modes.  Ident mode
+	 * must be 100-400KHz, so can range check the requested
+	 * clock. CMD15 must be used to return to data mode, so this
+	 * can be monitored.
+	 *
+	 * clock 250MHz -> 0->125MHz, 1->83.3MHz, 2->62.5MHz, 3->50.0MHz
+	 *                 4->41.7MHz, 5->35.7MHz, 6->31.3MHz, 7->27.8MHz
+	 *
+	 *		 623->400KHz/27.8MHz
+	 *		 reset value (507)->491159/50MHz
+	 *
+	 * BUT, the 3-bit clock divisor in data mode is too small if
+	 * the core clock is higher than 250MHz, so instead use the
+	 * SLOW_CARD configuration bit to force the use of the ident
+	 * clock divisor at all times.
+	 */
+
+	if (clock < 100000) {
+		/* Can't stop the clock, but make it as slow as possible
+		 * to show willing
+		 */
+		host->cdiv = SDCDIV_MAX_CDIV;
+		writel(host->cdiv, host->ioaddr + SDCDIV);
+		return;
+	}
+
+	div = host->max_clk / clock;
+	if (div < 2)
+		div = 2;
+	if ((host->max_clk / div) > clock)
+		div++;
+	div -= 2;
+
+	if (div > SDCDIV_MAX_CDIV)
+		div = SDCDIV_MAX_CDIV;
+
+	clock = host->max_clk / (div + 2);
+	host->mmc->actual_clock = clock;
+
+	/* Calibrate some delays */
+
+	host->ns_per_fifo_word = (1000000000 / clock) *
+		((host->mmc->caps & MMC_CAP_4_BIT_DATA) ? 8 : 32);
+
+	host->cdiv = div;
+	writel(host->cdiv, host->ioaddr + SDCDIV);
+
+	/* Set the timeout to 500ms */
+	writel(host->mmc->actual_clock / 2, host->ioaddr + SDTOUT);
+}
+
+static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct bcm2835_host *host = mmc_priv(mmc);
+	struct device *dev = &host->pdev->dev;
+	u32 edm, fsm;
+
+	/* Reset the error statuses in case this is a retry */
+	if (mrq->sbc)
+		mrq->sbc->error = 0;
+	if (mrq->cmd)
+		mrq->cmd->error = 0;
+	if (mrq->data)
+		mrq->data->error = 0;
+	if (mrq->stop)
+		mrq->stop->error = 0;
+
+	if (mrq->data && !is_power_of_2(mrq->data->blksz)) {
+		dev_err(dev, "unsupported block size (%d bytes)\n",
+			mrq->data->blksz);
+		mrq->cmd->error = -EINVAL;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	if (host->use_dma && mrq->data && (mrq->data->blocks > PIO_THRESHOLD))
+		bcm2835_prepare_dma(host, mrq->data);
+
+	mutex_lock(&host->mutex);
+
+	WARN_ON(host->mrq);
+	host->mrq = mrq;
+
+	edm = readl(host->ioaddr + SDEDM);
+	fsm = edm & SDEDM_FSM_MASK;
+
+	if ((fsm != SDEDM_FSM_IDENTMODE) &&
+	    (fsm != SDEDM_FSM_DATAMODE)) {
+		dev_err(dev, "previous command (%d) not complete (EDM %08x)\n",
+			readl(host->ioaddr + SDCMD) & SDCMD_CMD_MASK,
+			edm);
+		bcm2835_dumpregs(host);
+		mrq->cmd->error = -EILSEQ;
+		bcm2835_finish_request(host);
+		mutex_unlock(&host->mutex);
+		return;
+	}
+
+	host->use_sbc = !!mrq->sbc && host->mrq->data &&
+			(host->mrq->data->flags & MMC_DATA_READ);
+	if (host->use_sbc) {
+		if (bcm2835_send_command(host, mrq->sbc)) {
+			if (!host->use_busy)
+				bcm2835_finish_command(host);
+		}
+	} else if (bcm2835_send_command(host, mrq->cmd)) {
+		if (host->data && host->dma_desc) {
+			/* DMA transfer starts now, PIO starts after irq */
+			bcm2835_start_dma(host);
+		}
+
+		if (!host->use_busy)
+			bcm2835_finish_command(host);
+	}
+
+	mutex_unlock(&host->mutex);
+}
+
+static void bcm2835_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct bcm2835_host *host = mmc_priv(mmc);
+
+	mutex_lock(&host->mutex);
+
+	if (!ios->clock || ios->clock != host->clock) {
+		bcm2835_set_clock(host, ios->clock);
+		host->clock = ios->clock;
+	}
+
+	/* set bus width */
+	host->hcfg &= ~SDHCFG_WIDE_EXT_BUS;
+	if (ios->bus_width == MMC_BUS_WIDTH_4)
+		host->hcfg |= SDHCFG_WIDE_EXT_BUS;
+
+	host->hcfg |= SDHCFG_WIDE_INT_BUS;
+
+	/* Disable clever clock switching, to cope with fast core clocks */
+	host->hcfg |= SDHCFG_SLOW_CARD;
+
+	writel(host->hcfg, host->ioaddr + SDHCFG);
+
+	mutex_unlock(&host->mutex);
+}
+
+static struct mmc_host_ops bcm2835_ops = {
+	.request = bcm2835_request,
+	.set_ios = bcm2835_set_ios,
+	.hw_reset = bcm2835_reset,
+};
+
+static int bcm2835_add_host(struct bcm2835_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	struct device *dev = &host->pdev->dev;
+	char pio_limit_string[20];
+	int ret;
+
+	mmc->f_max = host->max_clk;
+	mmc->f_min = host->max_clk / SDCDIV_MAX_CDIV;
+
+	mmc->max_busy_timeout = ~0 / (mmc->f_max / 1000);
+
+	dev_dbg(dev, "f_max %d, f_min %d, max_busy_timeout %d\n",
+		mmc->f_max, mmc->f_min, mmc->max_busy_timeout);
+
+	/* host controller capabilities */
+	mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
+		     MMC_CAP_NEEDS_POLL | MMC_CAP_HW_RESET | MMC_CAP_ERASE |
+		     MMC_CAP_CMD23;
+
+	spin_lock_init(&host->lock);
+	mutex_init(&host->mutex);
+
+	if (IS_ERR_OR_NULL(host->dma_chan_rxtx)) {
+		dev_warn(dev, "unable to initialise DMA channel. Falling back to PIO\n");
+		host->use_dma = false;
+	} else {
+		host->use_dma = true;
+
+		host->dma_cfg_tx.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_cfg_tx.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_cfg_tx.slave_id = 13;		/* DREQ channel */
+		host->dma_cfg_tx.direction = DMA_MEM_TO_DEV;
+		host->dma_cfg_tx.src_addr = 0;
+		host->dma_cfg_tx.dst_addr = host->phys_addr + SDDATA;
+
+		host->dma_cfg_rx.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_cfg_rx.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		host->dma_cfg_rx.slave_id = 13;		/* DREQ channel */
+		host->dma_cfg_rx.direction = DMA_DEV_TO_MEM;
+		host->dma_cfg_rx.src_addr = host->phys_addr + SDDATA;
+		host->dma_cfg_rx.dst_addr = 0;
+
+		if (dmaengine_slave_config(host->dma_chan_rxtx,
+					   &host->dma_cfg_tx) != 0 ||
+		    dmaengine_slave_config(host->dma_chan_rxtx,
+					   &host->dma_cfg_rx) != 0)
+			host->use_dma = false;
+	}
+
+	mmc->max_segs = 128;
+	mmc->max_req_size = 524288;
+	mmc->max_seg_size = mmc->max_req_size;
+	mmc->max_blk_size = 1024;
+	mmc->max_blk_count =  65535;
+
+	/* report supported voltage ranges */
+	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+
+	INIT_WORK(&host->dma_work, bcm2835_dma_complete_work);
+	INIT_DELAYED_WORK(&host->timeout_work, bcm2835_timeout);
+
+	/* Set interrupt enables */
+	host->hcfg = SDHCFG_BUSY_IRPT_EN;
+
+	bcm2835_reset_internal(host);
+
+	ret = request_threaded_irq(host->irq, bcm2835_irq,
+				   bcm2835_threaded_irq,
+				   0, mmc_hostname(mmc), host);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ %d: %d\n", host->irq, ret);
+		return ret;
+	}
+
+	ret = mmc_add_host(mmc);
+	if (ret) {
+		free_irq(host->irq, host);
+		return ret;
+	}
+
+	pio_limit_string[0] = '\0';
+	if (host->use_dma && (PIO_THRESHOLD > 0))
+		sprintf(pio_limit_string, " (>%d)", PIO_THRESHOLD);
+	dev_info(dev, "loaded - DMA %s%s\n",
+		 host->use_dma ? "enabled" : "disabled", pio_limit_string);
+
+	return 0;
+}
+
+static int bcm2835_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct clk *clk;
+	struct resource *iomem;
+	struct bcm2835_host *host;
+	struct mmc_host *mmc;
+	const __be32 *regaddr_p;
+	int ret;
+
+	dev_dbg(dev, "%s\n", __func__);
+	mmc = mmc_alloc_host(sizeof(*host), dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	mmc->ops = &bcm2835_ops;
+	host = mmc_priv(mmc);
+	host->mmc = mmc;
+	host->pdev = pdev;
+	spin_lock_init(&host->lock);
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	host->ioaddr = devm_ioremap_resource(dev, iomem);
+	if (IS_ERR(host->ioaddr)) {
+		ret = PTR_ERR(host->ioaddr);
+		goto err;
+	}
+
+	/* Parse OF address directly to get the physical address for
+	 * DMA to our registers.
+	 */
+	regaddr_p = of_get_address(pdev->dev.of_node, 0, NULL, NULL);
+	if (!regaddr_p) {
+		dev_err(dev, "Can't get phys address\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	host->phys_addr = be32_to_cpup(regaddr_p);
+
+	host->dma_chan = NULL;
+	host->dma_desc = NULL;
+
+	host->dma_chan_rxtx = dma_request_slave_channel(dev, "rx-tx");
+
+	clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "could not get clk: %d\n", ret);
+		goto err;
+	}
+
+	host->max_clk = clk_get_rate(clk);
+
+	host->irq = platform_get_irq(pdev, 0);
+	if (host->irq <= 0) {
+		dev_err(dev, "get IRQ failed\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = mmc_of_parse(mmc);
+	if (ret)
+		goto err;
+
+	ret = bcm2835_add_host(host);
+	if (ret)
+		goto err;
+
+	platform_set_drvdata(pdev, host);
+
+	dev_dbg(dev, "%s -> OK\n", __func__);
+
+	return 0;
+
+err:
+	dev_dbg(dev, "%s -> err %d\n", __func__, ret);
+	mmc_free_host(mmc);
+
+	return ret;
+}
+
+static int bcm2835_remove(struct platform_device *pdev)
+{
+	struct bcm2835_host *host = platform_get_drvdata(pdev);
+
+	mmc_remove_host(host->mmc);
+
+	writel(SDVDD_POWER_OFF, host->ioaddr + SDVDD);
+
+	free_irq(host->irq, host);
+
+	cancel_work_sync(&host->dma_work);
+	cancel_delayed_work_sync(&host->timeout_work);
+
+	mmc_free_host(host->mmc);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id bcm2835_match[] = {
+	{ .compatible = "brcm,bcm2835-sdhost" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bcm2835_match);
+
+static struct platform_driver bcm2835_driver = {
+	.probe      = bcm2835_probe,
+	.remove     = bcm2835_remove,
+	.driver     = {
+		.name		= "sdhost-bcm2835",
+		.of_match_table	= bcm2835_match,
+	},
+};
+module_platform_driver(bcm2835_driver);
+
+MODULE_ALIAS("platform:sdhost-bcm2835");
+MODULE_DESCRIPTION("BCM2835 SDHost driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Phil Elwell");
diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c
new file mode 100644
index 000000000000..772d0900026d
--- /dev/null
+++ b/drivers/mmc/host/cavium-octeon.c
@@ -0,0 +1,351 @@
+/*
+ * Driver for MMC and SSD cards for Cavium OCTEON SOCs.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012-2017 Cavium Inc.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/slot-gpio.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <asm/octeon/octeon.h>
+#include "cavium.h"
+
+#define CVMX_MIO_BOOT_CTL CVMX_ADD_IO_SEG(0x00011800000000D0ull)
+
+/*
+ * The l2c* functions below are used for the EMMC-17978 workaround.
+ *
+ * Due to a bug in the design of the MMC bus hardware, the 2nd to last
+ * cache block of a DMA read must be locked into the L2 Cache.
+ * Otherwise, data corruption may occur.
+ */
+static inline void *phys_to_ptr(u64 address)
+{
+	return (void *)(address | (1ull << 63)); /* XKPHYS */
+}
+
+/*
+ * Lock a single line into L2. The line is zeroed before locking
+ * to make sure no dram accesses are made.
+ */
+static void l2c_lock_line(u64 addr)
+{
+	char *addr_ptr = phys_to_ptr(addr);
+
+	asm volatile (
+		"cache 31, %[line]"	/* Unlock the line */
+		::[line] "m" (*addr_ptr));
+}
+
+/* Unlock a single line in the L2 cache. */
+static void l2c_unlock_line(u64 addr)
+{
+	char *addr_ptr = phys_to_ptr(addr);
+
+	asm volatile (
+		"cache 23, %[line]"	/* Unlock the line */
+		::[line] "m" (*addr_ptr));
+}
+
+/* Locks a memory region in the L2 cache. */
+static void l2c_lock_mem_region(u64 start, u64 len)
+{
+	u64 end;
+
+	/* Round start/end to cache line boundaries */
+	end = ALIGN(start + len - 1, CVMX_CACHE_LINE_SIZE);
+	start = ALIGN(start, CVMX_CACHE_LINE_SIZE);
+
+	while (start <= end) {
+		l2c_lock_line(start);
+		start += CVMX_CACHE_LINE_SIZE;
+	}
+	asm volatile("sync");
+}
+
+/* Unlock a memory region in the L2 cache. */
+static void l2c_unlock_mem_region(u64 start, u64 len)
+{
+	u64 end;
+
+	/* Round start/end to cache line boundaries */
+	end = ALIGN(start + len - 1, CVMX_CACHE_LINE_SIZE);
+	start = ALIGN(start, CVMX_CACHE_LINE_SIZE);
+
+	while (start <= end) {
+		l2c_unlock_line(start);
+		start += CVMX_CACHE_LINE_SIZE;
+	}
+}
+
+static void octeon_mmc_acquire_bus(struct cvm_mmc_host *host)
+{
+	if (!host->has_ciu3) {
+		down(&octeon_bootbus_sem);
+		/* For CN70XX, switch the MMC controller onto the bus. */
+		if (OCTEON_IS_MODEL(OCTEON_CN70XX))
+			writeq(0, (void __iomem *)CVMX_MIO_BOOT_CTL);
+	} else {
+		down(&host->mmc_serializer);
+	}
+}
+
+static void octeon_mmc_release_bus(struct cvm_mmc_host *host)
+{
+	if (!host->has_ciu3)
+		up(&octeon_bootbus_sem);
+	else
+		up(&host->mmc_serializer);
+}
+
+static void octeon_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
+{
+	writeq(val, host->base + MIO_EMM_INT(host));
+	if (!host->dma_active || (host->dma_active && !host->has_ciu3))
+		writeq(val, host->base + MIO_EMM_INT_EN(host));
+}
+
+static void octeon_mmc_set_shared_power(struct cvm_mmc_host *host, int dir)
+{
+	if (dir == 0)
+		if (!atomic_dec_return(&host->shared_power_users))
+			gpiod_set_value_cansleep(host->global_pwr_gpiod, 0);
+	if (dir == 1)
+		if (atomic_inc_return(&host->shared_power_users) == 1)
+			gpiod_set_value_cansleep(host->global_pwr_gpiod, 1);
+}
+
+static void octeon_mmc_dmar_fixup(struct cvm_mmc_host *host,
+				  struct mmc_command *cmd,
+				  struct mmc_data *data,
+				  u64 addr)
+{
+	if (cmd->opcode != MMC_WRITE_MULTIPLE_BLOCK)
+		return;
+	if (data->blksz * data->blocks <= 1024)
+		return;
+
+	host->n_minus_one = addr + (data->blksz * data->blocks) - 1024;
+	l2c_lock_mem_region(host->n_minus_one, 512);
+}
+
+static void octeon_mmc_dmar_fixup_done(struct cvm_mmc_host *host)
+{
+	if (!host->n_minus_one)
+		return;
+	l2c_unlock_mem_region(host->n_minus_one, 512);
+	host->n_minus_one = 0;
+}
+
+static int octeon_mmc_probe(struct platform_device *pdev)
+{
+	struct device_node *cn, *node = pdev->dev.of_node;
+	struct cvm_mmc_host *host;
+	struct resource	*res;
+	void __iomem *base;
+	int mmc_irq[9];
+	int i, ret = 0;
+	u64 val;
+
+	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	spin_lock_init(&host->irq_handler_lock);
+	sema_init(&host->mmc_serializer, 1);
+
+	host->dev = &pdev->dev;
+	host->acquire_bus = octeon_mmc_acquire_bus;
+	host->release_bus = octeon_mmc_release_bus;
+	host->int_enable = octeon_mmc_int_enable;
+	host->set_shared_power = octeon_mmc_set_shared_power;
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) ||
+	    OCTEON_IS_MODEL(OCTEON_CNF7XXX)) {
+		host->dmar_fixup = octeon_mmc_dmar_fixup;
+		host->dmar_fixup_done = octeon_mmc_dmar_fixup_done;
+	}
+
+	host->sys_freq = octeon_get_io_clock_rate();
+
+	if (of_device_is_compatible(node, "cavium,octeon-7890-mmc")) {
+		host->big_dma_addr = true;
+		host->need_irq_handler_lock = true;
+		host->has_ciu3 = true;
+		host->use_sg = true;
+		/*
+		 * First seven are the EMM_INT bits 0..6, then two for
+		 * the EMM_DMA_INT bits
+		 */
+		for (i = 0; i < 9; i++) {
+			mmc_irq[i] = platform_get_irq(pdev, i);
+			if (mmc_irq[i] < 0)
+				return mmc_irq[i];
+
+			/* work around legacy u-boot device trees */
+			irq_set_irq_type(mmc_irq[i], IRQ_TYPE_EDGE_RISING);
+		}
+	} else {
+		host->big_dma_addr = false;
+		host->need_irq_handler_lock = false;
+		host->has_ciu3 = false;
+		/* First one is EMM second DMA */
+		for (i = 0; i < 2; i++) {
+			mmc_irq[i] = platform_get_irq(pdev, i);
+			if (mmc_irq[i] < 0)
+				return mmc_irq[i];
+		}
+	}
+
+	host->last_slot = -1;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Platform resource[0] is missing\n");
+		return -ENXIO;
+	}
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	host->base = (void __iomem *)base;
+	host->reg_off = 0;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		dev_err(&pdev->dev, "Platform resource[1] is missing\n");
+		return -EINVAL;
+	}
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+	host->dma_base = (void __iomem *)base;
+	/*
+	 * To keep the register addresses shared we intentionaly use
+	 * a negative offset here, first register used on Octeon therefore
+	 * starts at 0x20 (MIO_EMM_DMA_CFG).
+	 */
+	host->reg_off_dma = -0x20;
+
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	/*
+	 * Clear out any pending interrupts that may be left over from
+	 * bootloader.
+	 */
+	val = readq(host->base + MIO_EMM_INT(host));
+	writeq(val, host->base + MIO_EMM_INT(host));
+
+	if (host->has_ciu3) {
+		/* Only CMD_DONE, DMA_DONE, CMD_ERR, DMA_ERR */
+		for (i = 1; i <= 4; i++) {
+			ret = devm_request_irq(&pdev->dev, mmc_irq[i],
+					       cvm_mmc_interrupt,
+					       0, cvm_mmc_irq_names[i], host);
+			if (ret < 0) {
+				dev_err(&pdev->dev, "Error: devm_request_irq %d\n",
+					mmc_irq[i]);
+				return ret;
+			}
+		}
+	} else {
+		ret = devm_request_irq(&pdev->dev, mmc_irq[0],
+				       cvm_mmc_interrupt, 0, KBUILD_MODNAME,
+				       host);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Error: devm_request_irq %d\n",
+				mmc_irq[0]);
+			return ret;
+		}
+	}
+
+	host->global_pwr_gpiod = devm_gpiod_get_optional(&pdev->dev,
+							 "power-gpios",
+							 GPIOD_OUT_HIGH);
+	if (IS_ERR(host->global_pwr_gpiod)) {
+		dev_err(&pdev->dev, "Invalid power GPIO\n");
+		return PTR_ERR(host->global_pwr_gpiod);
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	i = 0;
+	for_each_child_of_node(node, cn) {
+		host->slot_pdev[i] =
+			of_platform_device_create(cn, NULL, &pdev->dev);
+		if (!host->slot_pdev[i]) {
+			i++;
+			continue;
+		}
+		ret = cvm_mmc_of_slot_probe(&host->slot_pdev[i]->dev, host);
+		if (ret) {
+			dev_err(&pdev->dev, "Error populating slots\n");
+			octeon_mmc_set_shared_power(host, 0);
+			return ret;
+		}
+		i++;
+	}
+	return 0;
+}
+
+static int octeon_mmc_remove(struct platform_device *pdev)
+{
+	struct cvm_mmc_host *host = platform_get_drvdata(pdev);
+	u64 dma_cfg;
+	int i;
+
+	for (i = 0; i < CAVIUM_MAX_MMC; i++)
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+
+	dma_cfg = readq(host->dma_base + MIO_EMM_DMA_CFG(host));
+	dma_cfg &= ~MIO_EMM_DMA_CFG_EN;
+	writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host));
+
+	octeon_mmc_set_shared_power(host, 0);
+	return 0;
+}
+
+static const struct of_device_id octeon_mmc_match[] = {
+	{
+		.compatible = "cavium,octeon-6130-mmc",
+	},
+	{
+		.compatible = "cavium,octeon-7890-mmc",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, octeon_mmc_match);
+
+static struct platform_driver octeon_mmc_driver = {
+	.probe		= octeon_mmc_probe,
+	.remove		= octeon_mmc_remove,
+	.driver		= {
+		.name	= KBUILD_MODNAME,
+		.of_match_table = octeon_mmc_match,
+	},
+};
+
+static int __init octeon_mmc_init(void)
+{
+	return platform_driver_register(&octeon_mmc_driver);
+}
+
+static void __exit octeon_mmc_cleanup(void)
+{
+	platform_driver_unregister(&octeon_mmc_driver);
+}
+
+module_init(octeon_mmc_init);
+module_exit(octeon_mmc_cleanup);
+
+MODULE_AUTHOR("Cavium Inc. <support@cavium.com>");
+MODULE_DESCRIPTION("Low-level driver for Cavium OCTEON MMC/SSD card");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c
new file mode 100644
index 000000000000..fe3d77267cd6
--- /dev/null
+++ b/drivers/mmc/host/cavium-thunderx.c
@@ -0,0 +1,187 @@
+/*
+ * Driver for MMC and SSD cards for Cavium ThunderX SOCs.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium Inc.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/mmc/mmc.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include "cavium.h"
+
+static void thunder_mmc_acquire_bus(struct cvm_mmc_host *host)
+{
+	down(&host->mmc_serializer);
+}
+
+static void thunder_mmc_release_bus(struct cvm_mmc_host *host)
+{
+	up(&host->mmc_serializer);
+}
+
+static void thunder_mmc_int_enable(struct cvm_mmc_host *host, u64 val)
+{
+	writeq(val, host->base + MIO_EMM_INT(host));
+	writeq(val, host->base + MIO_EMM_INT_EN_SET(host));
+}
+
+static int thunder_mmc_register_interrupts(struct cvm_mmc_host *host,
+					   struct pci_dev *pdev)
+{
+	int nvec, ret, i;
+
+	nvec = pci_alloc_irq_vectors(pdev, 1, 9, PCI_IRQ_MSIX);
+	if (nvec < 0)
+		return nvec;
+
+	/* register interrupts */
+	for (i = 0; i < nvec; i++) {
+		ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i),
+				       cvm_mmc_interrupt,
+				       0, cvm_mmc_irq_names[i], host);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int thunder_mmc_probe(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct device_node *child_node;
+	struct cvm_mmc_host *host;
+	int ret, i = 0;
+
+	host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
+	if (!host)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, host);
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pdev, KBUILD_MODNAME);
+	if (ret)
+		return ret;
+
+	host->base = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0));
+	if (!host->base)
+		return -EINVAL;
+
+	/* On ThunderX these are identical */
+	host->dma_base = host->base;
+
+	host->reg_off = 0x2000;
+	host->reg_off_dma = 0x160;
+
+	host->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(host->clk))
+		return PTR_ERR(host->clk);
+
+	ret = clk_prepare_enable(host->clk);
+	if (ret)
+		return ret;
+	host->sys_freq = clk_get_rate(host->clk);
+
+	spin_lock_init(&host->irq_handler_lock);
+	sema_init(&host->mmc_serializer, 1);
+
+	host->dev = dev;
+	host->acquire_bus = thunder_mmc_acquire_bus;
+	host->release_bus = thunder_mmc_release_bus;
+	host->int_enable = thunder_mmc_int_enable;
+
+	host->use_sg = true;
+	host->big_dma_addr = true;
+	host->need_irq_handler_lock = true;
+	host->last_slot = -1;
+
+	ret = dma_set_mask(dev, DMA_BIT_MASK(48));
+	if (ret)
+		goto error;
+
+	/*
+	 * Clear out any pending interrupts that may be left over from
+	 * bootloader. Writing 1 to the bits clears them.
+	 */
+	writeq(127, host->base + MIO_EMM_INT_EN(host));
+	writeq(3, host->base + MIO_EMM_DMA_INT_ENA_W1C(host));
+	/* Clear DMA FIFO */
+	writeq(BIT_ULL(16), host->base + MIO_EMM_DMA_FIFO_CFG(host));
+
+	ret = thunder_mmc_register_interrupts(host, pdev);
+	if (ret)
+		goto error;
+
+	for_each_child_of_node(node, child_node) {
+		/*
+		 * mmc_of_parse and devm* require one device per slot.
+		 * Create a dummy device per slot and set the node pointer to
+		 * the slot. The easiest way to get this is using
+		 * of_platform_device_create.
+		 */
+		if (of_device_is_compatible(child_node, "mmc-slot")) {
+			host->slot_pdev[i] = of_platform_device_create(child_node, NULL,
+								       &pdev->dev);
+			if (!host->slot_pdev[i])
+				continue;
+
+			ret = cvm_mmc_of_slot_probe(&host->slot_pdev[i]->dev, host);
+			if (ret)
+				goto error;
+		}
+		i++;
+	}
+	dev_info(dev, "probed\n");
+	return 0;
+
+error:
+	clk_disable_unprepare(host->clk);
+	return ret;
+}
+
+static void thunder_mmc_remove(struct pci_dev *pdev)
+{
+	struct cvm_mmc_host *host = pci_get_drvdata(pdev);
+	u64 dma_cfg;
+	int i;
+
+	for (i = 0; i < CAVIUM_MAX_MMC; i++)
+		if (host->slot[i])
+			cvm_mmc_of_slot_remove(host->slot[i]);
+
+	dma_cfg = readq(host->dma_base + MIO_EMM_DMA_CFG(host));
+	dma_cfg &= ~MIO_EMM_DMA_CFG_EN;
+	writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host));
+
+	clk_disable_unprepare(host->clk);
+}
+
+static const struct pci_device_id thunder_mmc_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa010) },
+	{ 0, }  /* end of table */
+};
+
+static struct pci_driver thunder_mmc_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = thunder_mmc_id_table,
+	.probe = thunder_mmc_probe,
+	.remove = thunder_mmc_remove,
+};
+
+module_pci_driver(thunder_mmc_driver);
+
+MODULE_AUTHOR("Cavium Inc.");
+MODULE_DESCRIPTION("Cavium ThunderX eMMC Driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, thunder_mmc_id_table);
diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c
new file mode 100644
index 000000000000..58b51ba6aabd
--- /dev/null
+++ b/drivers/mmc/host/cavium.c
@@ -0,0 +1,1090 @@
+/*
+ * Shared part of driver for MMC/SDHC controller on Cavium OCTEON and
+ * ThunderX SOCs.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012-2017 Cavium Inc.
+ * Authors:
+ *   David Daney <david.daney@cavium.com>
+ *   Peter Swain <pswain@cavium.com>
+ *   Steven J. Hill <steven.hill@cavium.com>
+ *   Jan Glauber <jglauber@cavium.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/slot-gpio.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+
+#include "cavium.h"
+
+const char *cvm_mmc_irq_names[] = {
+	"MMC Buffer",
+	"MMC Command",
+	"MMC DMA",
+	"MMC Command Error",
+	"MMC DMA Error",
+	"MMC Switch",
+	"MMC Switch Error",
+	"MMC DMA int Fifo",
+	"MMC DMA int",
+};
+
+/*
+ * The Cavium MMC host hardware assumes that all commands have fixed
+ * command and response types.  These are correct if MMC devices are
+ * being used.  However, non-MMC devices like SD use command and
+ * response types that are unexpected by the host hardware.
+ *
+ * The command and response types can be overridden by supplying an
+ * XOR value that is applied to the type.  We calculate the XOR value
+ * from the values in this table and the flags passed from the MMC
+ * core.
+ */
+static struct cvm_mmc_cr_type cvm_mmc_cr_types[] = {
+	{0, 0},		/* CMD0 */
+	{0, 3},		/* CMD1 */
+	{0, 2},		/* CMD2 */
+	{0, 1},		/* CMD3 */
+	{0, 0},		/* CMD4 */
+	{0, 1},		/* CMD5 */
+	{0, 1},		/* CMD6 */
+	{0, 1},		/* CMD7 */
+	{1, 1},		/* CMD8 */
+	{0, 2},		/* CMD9 */
+	{0, 2},		/* CMD10 */
+	{1, 1},		/* CMD11 */
+	{0, 1},		/* CMD12 */
+	{0, 1},		/* CMD13 */
+	{1, 1},		/* CMD14 */
+	{0, 0},		/* CMD15 */
+	{0, 1},		/* CMD16 */
+	{1, 1},		/* CMD17 */
+	{1, 1},		/* CMD18 */
+	{3, 1},		/* CMD19 */
+	{2, 1},		/* CMD20 */
+	{0, 0},		/* CMD21 */
+	{0, 0},		/* CMD22 */
+	{0, 1},		/* CMD23 */
+	{2, 1},		/* CMD24 */
+	{2, 1},		/* CMD25 */
+	{2, 1},		/* CMD26 */
+	{2, 1},		/* CMD27 */
+	{0, 1},		/* CMD28 */
+	{0, 1},		/* CMD29 */
+	{1, 1},		/* CMD30 */
+	{1, 1},		/* CMD31 */
+	{0, 0},		/* CMD32 */
+	{0, 0},		/* CMD33 */
+	{0, 0},		/* CMD34 */
+	{0, 1},		/* CMD35 */
+	{0, 1},		/* CMD36 */
+	{0, 0},		/* CMD37 */
+	{0, 1},		/* CMD38 */
+	{0, 4},		/* CMD39 */
+	{0, 5},		/* CMD40 */
+	{0, 0},		/* CMD41 */
+	{2, 1},		/* CMD42 */
+	{0, 0},		/* CMD43 */
+	{0, 0},		/* CMD44 */
+	{0, 0},		/* CMD45 */
+	{0, 0},		/* CMD46 */
+	{0, 0},		/* CMD47 */
+	{0, 0},		/* CMD48 */
+	{0, 0},		/* CMD49 */
+	{0, 0},		/* CMD50 */
+	{0, 0},		/* CMD51 */
+	{0, 0},		/* CMD52 */
+	{0, 0},		/* CMD53 */
+	{0, 0},		/* CMD54 */
+	{0, 1},		/* CMD55 */
+	{0xff, 0xff},	/* CMD56 */
+	{0, 0},		/* CMD57 */
+	{0, 0},		/* CMD58 */
+	{0, 0},		/* CMD59 */
+	{0, 0},		/* CMD60 */
+	{0, 0},		/* CMD61 */
+	{0, 0},		/* CMD62 */
+	{0, 0}		/* CMD63 */
+};
+
+static struct cvm_mmc_cr_mods cvm_mmc_get_cr_mods(struct mmc_command *cmd)
+{
+	struct cvm_mmc_cr_type *cr;
+	u8 hardware_ctype, hardware_rtype;
+	u8 desired_ctype = 0, desired_rtype = 0;
+	struct cvm_mmc_cr_mods r;
+
+	cr = cvm_mmc_cr_types + (cmd->opcode & 0x3f);
+	hardware_ctype = cr->ctype;
+	hardware_rtype = cr->rtype;
+	if (cmd->opcode == MMC_GEN_CMD)
+		hardware_ctype = (cmd->arg & 1) ? 1 : 2;
+
+	switch (mmc_cmd_type(cmd)) {
+	case MMC_CMD_ADTC:
+		desired_ctype = (cmd->data->flags & MMC_DATA_WRITE) ? 2 : 1;
+		break;
+	case MMC_CMD_AC:
+	case MMC_CMD_BC:
+	case MMC_CMD_BCR:
+		desired_ctype = 0;
+		break;
+	}
+
+	switch (mmc_resp_type(cmd)) {
+	case MMC_RSP_NONE:
+		desired_rtype = 0;
+		break;
+	case MMC_RSP_R1:/* MMC_RSP_R5, MMC_RSP_R6, MMC_RSP_R7 */
+	case MMC_RSP_R1B:
+		desired_rtype = 1;
+		break;
+	case MMC_RSP_R2:
+		desired_rtype = 2;
+		break;
+	case MMC_RSP_R3: /* MMC_RSP_R4 */
+		desired_rtype = 3;
+		break;
+	}
+	r.ctype_xor = desired_ctype ^ hardware_ctype;
+	r.rtype_xor = desired_rtype ^ hardware_rtype;
+	return r;
+}
+
+static void check_switch_errors(struct cvm_mmc_host *host)
+{
+	u64 emm_switch;
+
+	emm_switch = readq(host->base + MIO_EMM_SWITCH(host));
+	if (emm_switch & MIO_EMM_SWITCH_ERR0)
+		dev_err(host->dev, "Switch power class error\n");
+	if (emm_switch & MIO_EMM_SWITCH_ERR1)
+		dev_err(host->dev, "Switch hs timing error\n");
+	if (emm_switch & MIO_EMM_SWITCH_ERR2)
+		dev_err(host->dev, "Switch bus width error\n");
+}
+
+static void clear_bus_id(u64 *reg)
+{
+	u64 bus_id_mask = GENMASK_ULL(61, 60);
+
+	*reg &= ~bus_id_mask;
+}
+
+static void set_bus_id(u64 *reg, int bus_id)
+{
+	clear_bus_id(reg);
+	*reg |= FIELD_PREP(GENMASK(61, 60), bus_id);
+}
+
+static int get_bus_id(u64 reg)
+{
+	return FIELD_GET(GENMASK_ULL(61, 60), reg);
+}
+
+/*
+ * We never set the switch_exe bit since that would interfere
+ * with the commands send by the MMC core.
+ */
+static void do_switch(struct cvm_mmc_host *host, u64 emm_switch)
+{
+	int retries = 100;
+	u64 rsp_sts;
+	int bus_id;
+
+	/*
+	 * Modes setting only taken from slot 0. Work around that hardware
+	 * issue by first switching to slot 0.
+	 */
+	bus_id = get_bus_id(emm_switch);
+	clear_bus_id(&emm_switch);
+	writeq(emm_switch, host->base + MIO_EMM_SWITCH(host));
+
+	set_bus_id(&emm_switch, bus_id);
+	writeq(emm_switch, host->base + MIO_EMM_SWITCH(host));
+
+	/* wait for the switch to finish */
+	do {
+		rsp_sts = readq(host->base + MIO_EMM_RSP_STS(host));
+		if (!(rsp_sts & MIO_EMM_RSP_STS_SWITCH_VAL))
+			break;
+		udelay(10);
+	} while (--retries);
+
+	check_switch_errors(host);
+}
+
+static bool switch_val_changed(struct cvm_mmc_slot *slot, u64 new_val)
+{
+	/* Match BUS_ID, HS_TIMING, BUS_WIDTH, POWER_CLASS, CLK_HI, CLK_LO */
+	u64 match = 0x3001070fffffffffull;
+
+	return (slot->cached_switch & match) != (new_val & match);
+}
+
+static void set_wdog(struct cvm_mmc_slot *slot, unsigned int ns)
+{
+	u64 timeout;
+
+	if (!slot->clock)
+		return;
+
+	if (ns)
+		timeout = (slot->clock * ns) / NSEC_PER_SEC;
+	else
+		timeout = (slot->clock * 850ull) / 1000ull;
+	writeq(timeout, slot->host->base + MIO_EMM_WDOG(slot->host));
+}
+
+static void cvm_mmc_reset_bus(struct cvm_mmc_slot *slot)
+{
+	struct cvm_mmc_host *host = slot->host;
+	u64 emm_switch, wdog;
+
+	emm_switch = readq(slot->host->base + MIO_EMM_SWITCH(host));
+	emm_switch &= ~(MIO_EMM_SWITCH_EXE | MIO_EMM_SWITCH_ERR0 |
+			MIO_EMM_SWITCH_ERR1 | MIO_EMM_SWITCH_ERR2);
+	set_bus_id(&emm_switch, slot->bus_id);
+
+	wdog = readq(slot->host->base + MIO_EMM_WDOG(host));
+	do_switch(slot->host, emm_switch);
+
+	slot->cached_switch = emm_switch;
+
+	msleep(20);
+
+	writeq(wdog, slot->host->base + MIO_EMM_WDOG(host));
+}
+
+/* Switch to another slot if needed */
+static void cvm_mmc_switch_to(struct cvm_mmc_slot *slot)
+{
+	struct cvm_mmc_host *host = slot->host;
+	struct cvm_mmc_slot *old_slot;
+	u64 emm_sample, emm_switch;
+
+	if (slot->bus_id == host->last_slot)
+		return;
+
+	if (host->last_slot >= 0 && host->slot[host->last_slot]) {
+		old_slot = host->slot[host->last_slot];
+		old_slot->cached_switch = readq(host->base + MIO_EMM_SWITCH(host));
+		old_slot->cached_rca = readq(host->base + MIO_EMM_RCA(host));
+	}
+
+	writeq(slot->cached_rca, host->base + MIO_EMM_RCA(host));
+	emm_switch = slot->cached_switch;
+	set_bus_id(&emm_switch, slot->bus_id);
+	do_switch(host, emm_switch);
+
+	emm_sample = FIELD_PREP(MIO_EMM_SAMPLE_CMD_CNT, slot->cmd_cnt) |
+		     FIELD_PREP(MIO_EMM_SAMPLE_DAT_CNT, slot->dat_cnt);
+	writeq(emm_sample, host->base + MIO_EMM_SAMPLE(host));
+
+	host->last_slot = slot->bus_id;
+}
+
+static void do_read(struct cvm_mmc_host *host, struct mmc_request *req,
+		    u64 dbuf)
+{
+	struct sg_mapping_iter *smi = &host->smi;
+	int data_len = req->data->blocks * req->data->blksz;
+	int bytes_xfered, shift = -1;
+	u64 dat = 0;
+
+	/* Auto inc from offset zero */
+	writeq((0x10000 | (dbuf << 6)), host->base + MIO_EMM_BUF_IDX(host));
+
+	for (bytes_xfered = 0; bytes_xfered < data_len;) {
+		if (smi->consumed >= smi->length) {
+			if (!sg_miter_next(smi))
+				break;
+			smi->consumed = 0;
+		}
+
+		if (shift < 0) {
+			dat = readq(host->base + MIO_EMM_BUF_DAT(host));
+			shift = 56;
+		}
+
+		while (smi->consumed < smi->length && shift >= 0) {
+			((u8 *)smi->addr)[smi->consumed] = (dat >> shift) & 0xff;
+			bytes_xfered++;
+			smi->consumed++;
+			shift -= 8;
+		}
+	}
+
+	sg_miter_stop(smi);
+	req->data->bytes_xfered = bytes_xfered;
+	req->data->error = 0;
+}
+
+static void do_write(struct mmc_request *req)
+{
+	req->data->bytes_xfered = req->data->blocks * req->data->blksz;
+	req->data->error = 0;
+}
+
+static void set_cmd_response(struct cvm_mmc_host *host, struct mmc_request *req,
+			     u64 rsp_sts)
+{
+	u64 rsp_hi, rsp_lo;
+
+	if (!(rsp_sts & MIO_EMM_RSP_STS_RSP_VAL))
+		return;
+
+	rsp_lo = readq(host->base + MIO_EMM_RSP_LO(host));
+
+	switch (FIELD_GET(MIO_EMM_RSP_STS_RSP_TYPE, rsp_sts)) {
+	case 1:
+	case 3:
+		req->cmd->resp[0] = (rsp_lo >> 8) & 0xffffffff;
+		req->cmd->resp[1] = 0;
+		req->cmd->resp[2] = 0;
+		req->cmd->resp[3] = 0;
+		break;
+	case 2:
+		req->cmd->resp[3] = rsp_lo & 0xffffffff;
+		req->cmd->resp[2] = (rsp_lo >> 32) & 0xffffffff;
+		rsp_hi = readq(host->base + MIO_EMM_RSP_HI(host));
+		req->cmd->resp[1] = rsp_hi & 0xffffffff;
+		req->cmd->resp[0] = (rsp_hi >> 32) & 0xffffffff;
+		break;
+	}
+}
+
+static int get_dma_dir(struct mmc_data *data)
+{
+	return (data->flags & MMC_DATA_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+}
+
+static int finish_dma_single(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	data->bytes_xfered = data->blocks * data->blksz;
+	data->error = 0;
+	return 1;
+}
+
+static int finish_dma_sg(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	u64 fifo_cfg;
+	int count;
+
+	/* Check if there are any pending requests left */
+	fifo_cfg = readq(host->dma_base + MIO_EMM_DMA_FIFO_CFG(host));
+	count = FIELD_GET(MIO_EMM_DMA_FIFO_CFG_COUNT, fifo_cfg);
+	if (count)
+		dev_err(host->dev, "%u requests still pending\n", count);
+
+	data->bytes_xfered = data->blocks * data->blksz;
+	data->error = 0;
+
+	/* Clear and disable FIFO */
+	writeq(BIT_ULL(16), host->dma_base + MIO_EMM_DMA_FIFO_CFG(host));
+	dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data));
+	return 1;
+}
+
+static int finish_dma(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	if (host->use_sg && data->sg_len > 1)
+		return finish_dma_sg(host, data);
+	else
+		return finish_dma_single(host, data);
+}
+
+static int check_status(u64 rsp_sts)
+{
+	if (rsp_sts & MIO_EMM_RSP_STS_RSP_BAD_STS ||
+	    rsp_sts & MIO_EMM_RSP_STS_RSP_CRC_ERR ||
+	    rsp_sts & MIO_EMM_RSP_STS_BLK_CRC_ERR)
+		return -EILSEQ;
+	if (rsp_sts & MIO_EMM_RSP_STS_RSP_TIMEOUT ||
+	    rsp_sts & MIO_EMM_RSP_STS_BLK_TIMEOUT)
+		return -ETIMEDOUT;
+	if (rsp_sts & MIO_EMM_RSP_STS_DBUF_ERR)
+		return -EIO;
+	return 0;
+}
+
+/* Try to clean up failed DMA. */
+static void cleanup_dma(struct cvm_mmc_host *host, u64 rsp_sts)
+{
+	u64 emm_dma;
+
+	emm_dma = readq(host->base + MIO_EMM_DMA(host));
+	emm_dma |= FIELD_PREP(MIO_EMM_DMA_VAL, 1) |
+		   FIELD_PREP(MIO_EMM_DMA_DAT_NULL, 1);
+	set_bus_id(&emm_dma, get_bus_id(rsp_sts));
+	writeq(emm_dma, host->base + MIO_EMM_DMA(host));
+}
+
+irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id)
+{
+	struct cvm_mmc_host *host = dev_id;
+	struct mmc_request *req;
+	unsigned long flags = 0;
+	u64 emm_int, rsp_sts;
+	bool host_done;
+
+	if (host->need_irq_handler_lock)
+		spin_lock_irqsave(&host->irq_handler_lock, flags);
+	else
+		__acquire(&host->irq_handler_lock);
+
+	/* Clear interrupt bits (write 1 clears ). */
+	emm_int = readq(host->base + MIO_EMM_INT(host));
+	writeq(emm_int, host->base + MIO_EMM_INT(host));
+
+	if (emm_int & MIO_EMM_INT_SWITCH_ERR)
+		check_switch_errors(host);
+
+	req = host->current_req;
+	if (!req)
+		goto out;
+
+	rsp_sts = readq(host->base + MIO_EMM_RSP_STS(host));
+	/*
+	 * dma_val set means DMA is still in progress. Don't touch
+	 * the request and wait for the interrupt indicating that
+	 * the DMA is finished.
+	 */
+	if ((rsp_sts & MIO_EMM_RSP_STS_DMA_VAL) && host->dma_active)
+		goto out;
+
+	if (!host->dma_active && req->data &&
+	    (emm_int & MIO_EMM_INT_BUF_DONE)) {
+		unsigned int type = (rsp_sts >> 7) & 3;
+
+		if (type == 1)
+			do_read(host, req, rsp_sts & MIO_EMM_RSP_STS_DBUF);
+		else if (type == 2)
+			do_write(req);
+	}
+
+	host_done = emm_int & MIO_EMM_INT_CMD_DONE ||
+		    emm_int & MIO_EMM_INT_DMA_DONE ||
+		    emm_int & MIO_EMM_INT_CMD_ERR  ||
+		    emm_int & MIO_EMM_INT_DMA_ERR;
+
+	if (!(host_done && req->done))
+		goto no_req_done;
+
+	req->cmd->error = check_status(rsp_sts);
+
+	if (host->dma_active && req->data)
+		if (!finish_dma(host, req->data))
+			goto no_req_done;
+
+	set_cmd_response(host, req, rsp_sts);
+	if ((emm_int & MIO_EMM_INT_DMA_ERR) &&
+	    (rsp_sts & MIO_EMM_RSP_STS_DMA_PEND))
+		cleanup_dma(host, rsp_sts);
+
+	host->current_req = NULL;
+	req->done(req);
+
+no_req_done:
+	if (host->dmar_fixup_done)
+		host->dmar_fixup_done(host);
+	if (host_done)
+		host->release_bus(host);
+out:
+	if (host->need_irq_handler_lock)
+		spin_unlock_irqrestore(&host->irq_handler_lock, flags);
+	else
+		__release(&host->irq_handler_lock);
+	return IRQ_RETVAL(emm_int != 0);
+}
+
+/*
+ * Program DMA_CFG and if needed DMA_ADR.
+ * Returns 0 on error, DMA address otherwise.
+ */
+static u64 prepare_dma_single(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	u64 dma_cfg, addr;
+	int count, rw;
+
+	count = dma_map_sg(host->dev, data->sg, data->sg_len,
+			   get_dma_dir(data));
+	if (!count)
+		return 0;
+
+	rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0;
+	dma_cfg = FIELD_PREP(MIO_EMM_DMA_CFG_EN, 1) |
+		  FIELD_PREP(MIO_EMM_DMA_CFG_RW, rw);
+#ifdef __LITTLE_ENDIAN
+	dma_cfg |= FIELD_PREP(MIO_EMM_DMA_CFG_ENDIAN, 1);
+#endif
+	dma_cfg |= FIELD_PREP(MIO_EMM_DMA_CFG_SIZE,
+			      (sg_dma_len(&data->sg[0]) / 8) - 1);
+
+	addr = sg_dma_address(&data->sg[0]);
+	if (!host->big_dma_addr)
+		dma_cfg |= FIELD_PREP(MIO_EMM_DMA_CFG_ADR, addr);
+	writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host));
+
+	pr_debug("[%s] sg_dma_len: %u  total sg_elem: %d\n",
+		 (rw) ? "W" : "R", sg_dma_len(&data->sg[0]), count);
+
+	if (host->big_dma_addr)
+		writeq(addr, host->dma_base + MIO_EMM_DMA_ADR(host));
+	return addr;
+}
+
+/*
+ * Queue complete sg list into the FIFO.
+ * Returns 0 on error, 1 otherwise.
+ */
+static u64 prepare_dma_sg(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	struct scatterlist *sg;
+	u64 fifo_cmd, addr;
+	int count, i, rw;
+
+	count = dma_map_sg(host->dev, data->sg, data->sg_len,
+			   get_dma_dir(data));
+	if (!count)
+		return 0;
+	if (count > 16)
+		goto error;
+
+	/* Enable FIFO by removing CLR bit */
+	writeq(0, host->dma_base + MIO_EMM_DMA_FIFO_CFG(host));
+
+	for_each_sg(data->sg, sg, count, i) {
+		/* Program DMA address */
+		addr = sg_dma_address(sg);
+		if (addr & 7)
+			goto error;
+		writeq(addr, host->dma_base + MIO_EMM_DMA_FIFO_ADR(host));
+
+		/*
+		 * If we have scatter-gather support we also have an extra
+		 * register for the DMA addr, so no need to check
+		 * host->big_dma_addr here.
+		 */
+		rw = (data->flags & MMC_DATA_WRITE) ? 1 : 0;
+		fifo_cmd = FIELD_PREP(MIO_EMM_DMA_FIFO_CMD_RW, rw);
+
+		/* enable interrupts on the last element */
+		fifo_cmd |= FIELD_PREP(MIO_EMM_DMA_FIFO_CMD_INTDIS,
+				       (i + 1 == count) ? 0 : 1);
+
+#ifdef __LITTLE_ENDIAN
+		fifo_cmd |= FIELD_PREP(MIO_EMM_DMA_FIFO_CMD_ENDIAN, 1);
+#endif
+		fifo_cmd |= FIELD_PREP(MIO_EMM_DMA_FIFO_CMD_SIZE,
+				       sg_dma_len(sg) / 8 - 1);
+		/*
+		 * The write copies the address and the command to the FIFO
+		 * and increments the FIFO's COUNT field.
+		 */
+		writeq(fifo_cmd, host->dma_base + MIO_EMM_DMA_FIFO_CMD(host));
+		pr_debug("[%s] sg_dma_len: %u  sg_elem: %d/%d\n",
+			 (rw) ? "W" : "R", sg_dma_len(sg), i, count);
+	}
+
+	/*
+	 * In difference to prepare_dma_single we don't return the
+	 * address here, as it would not make sense for scatter-gather.
+	 * The dma fixup is only required on models that don't support
+	 * scatter-gather, so that is not a problem.
+	 */
+	return 1;
+
+error:
+	WARN_ON_ONCE(1);
+	dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data));
+	/* Disable FIFO */
+	writeq(BIT_ULL(16), host->dma_base + MIO_EMM_DMA_FIFO_CFG(host));
+	return 0;
+}
+
+static u64 prepare_dma(struct cvm_mmc_host *host, struct mmc_data *data)
+{
+	if (host->use_sg && data->sg_len > 1)
+		return prepare_dma_sg(host, data);
+	else
+		return prepare_dma_single(host, data);
+}
+
+static u64 prepare_ext_dma(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct cvm_mmc_slot *slot = mmc_priv(mmc);
+	u64 emm_dma;
+
+	emm_dma = FIELD_PREP(MIO_EMM_DMA_VAL, 1) |
+		  FIELD_PREP(MIO_EMM_DMA_SECTOR,
+			     mmc_card_is_blockaddr(mmc->card) ? 1 : 0) |
+		  FIELD_PREP(MIO_EMM_DMA_RW,
+			     (mrq->data->flags & MMC_DATA_WRITE) ? 1 : 0) |
+		  FIELD_PREP(MIO_EMM_DMA_BLOCK_CNT, mrq->data->blocks) |
+		  FIELD_PREP(MIO_EMM_DMA_CARD_ADDR, mrq->cmd->arg);
+	set_bus_id(&emm_dma, slot->bus_id);
+
+	if (mmc_card_mmc(mmc->card) || (mmc_card_sd(mmc->card) &&
+	    (mmc->card->scr.cmds & SD_SCR_CMD23_SUPPORT)))
+		emm_dma |= FIELD_PREP(MIO_EMM_DMA_MULTI, 1);
+
+	pr_debug("[%s] blocks: %u  multi: %d\n",
+		(emm_dma & MIO_EMM_DMA_RW) ? "W" : "R",
+		 mrq->data->blocks, (emm_dma & MIO_EMM_DMA_MULTI) ? 1 : 0);
+	return emm_dma;
+}
+
+static void cvm_mmc_dma_request(struct mmc_host *mmc,
+				struct mmc_request *mrq)
+{
+	struct cvm_mmc_slot *slot = mmc_priv(mmc);
+	struct cvm_mmc_host *host = slot->host;
+	struct mmc_data *data;
+	u64 emm_dma, addr;
+
+	if (!mrq->data || !mrq->data->sg || !mrq->data->sg_len ||
+	    !mrq->stop || mrq->stop->opcode != MMC_STOP_TRANSMISSION) {
+		dev_err(&mmc->card->dev,
+			"Error: cmv_mmc_dma_request no data\n");
+		goto error;
+	}
+
+	cvm_mmc_switch_to(slot);
+
+	data = mrq->data;
+	pr_debug("DMA request  blocks: %d  block_size: %d  total_size: %d\n",
+		 data->blocks, data->blksz, data->blocks * data->blksz);
+	if (data->timeout_ns)
+		set_wdog(slot, data->timeout_ns);
+
+	WARN_ON(host->current_req);
+	host->current_req = mrq;
+
+	emm_dma = prepare_ext_dma(mmc, mrq);
+	addr = prepare_dma(host, data);
+	if (!addr) {
+		dev_err(host->dev, "prepare_dma failed\n");
+		goto error;
+	}
+
+	host->dma_active = true;
+	host->int_enable(host, MIO_EMM_INT_CMD_ERR | MIO_EMM_INT_DMA_DONE |
+			 MIO_EMM_INT_DMA_ERR);
+
+	if (host->dmar_fixup)
+		host->dmar_fixup(host, mrq->cmd, data, addr);
+
+	/*
+	 * If we have a valid SD card in the slot, we set the response
+	 * bit mask to check for CRC errors and timeouts only.
+	 * Otherwise, use the default power reset value.
+	 */
+	if (mmc_card_sd(mmc->card))
+		writeq(0x00b00000ull, host->base + MIO_EMM_STS_MASK(host));
+	else
+		writeq(0xe4390080ull, host->base + MIO_EMM_STS_MASK(host));
+	writeq(emm_dma, host->base + MIO_EMM_DMA(host));
+	return;
+
+error:
+	mrq->cmd->error = -EINVAL;
+	if (mrq->done)
+		mrq->done(mrq);
+	host->release_bus(host);
+}
+
+static void do_read_request(struct cvm_mmc_host *host, struct mmc_request *mrq)
+{
+	sg_miter_start(&host->smi, mrq->data->sg, mrq->data->sg_len,
+		       SG_MITER_ATOMIC | SG_MITER_TO_SG);
+}
+
+static void do_write_request(struct cvm_mmc_host *host, struct mmc_request *mrq)
+{
+	unsigned int data_len = mrq->data->blocks * mrq->data->blksz;
+	struct sg_mapping_iter *smi = &host->smi;
+	unsigned int bytes_xfered;
+	int shift = 56;
+	u64 dat = 0;
+
+	/* Copy data to the xmit buffer before issuing the command. */
+	sg_miter_start(smi, mrq->data->sg, mrq->data->sg_len, SG_MITER_FROM_SG);
+
+	/* Auto inc from offset zero, dbuf zero */
+	writeq(0x10000ull, host->base + MIO_EMM_BUF_IDX(host));
+
+	for (bytes_xfered = 0; bytes_xfered < data_len;) {
+		if (smi->consumed >= smi->length) {
+			if (!sg_miter_next(smi))
+				break;
+			smi->consumed = 0;
+		}
+
+		while (smi->consumed < smi->length && shift >= 0) {
+			dat |= (u64)((u8 *)smi->addr)[smi->consumed] << shift;
+			bytes_xfered++;
+			smi->consumed++;
+			shift -= 8;
+		}
+
+		if (shift < 0) {
+			writeq(dat, host->base + MIO_EMM_BUF_DAT(host));
+			shift = 56;
+			dat = 0;
+		}
+	}
+	sg_miter_stop(smi);
+}
+
+static void cvm_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct cvm_mmc_slot *slot = mmc_priv(mmc);
+	struct cvm_mmc_host *host = slot->host;
+	struct mmc_command *cmd = mrq->cmd;
+	struct cvm_mmc_cr_mods mods;
+	u64 emm_cmd, rsp_sts;
+	int retries = 100;
+
+	/*
+	 * Note about locking:
+	 * All MMC devices share the same bus and controller. Allow only a
+	 * single user of the bootbus/MMC bus at a time. The lock is acquired
+	 * on all entry points from the MMC layer.
+	 *
+	 * For requests the lock is only released after the completion
+	 * interrupt!
+	 */
+	host->acquire_bus(host);
+
+	if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+	    cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
+		return cvm_mmc_dma_request(mmc, mrq);
+
+	cvm_mmc_switch_to(slot);
+
+	mods = cvm_mmc_get_cr_mods(cmd);
+
+	WARN_ON(host->current_req);
+	host->current_req = mrq;
+
+	if (cmd->data) {
+		if (cmd->data->flags & MMC_DATA_READ)
+			do_read_request(host, mrq);
+		else
+			do_write_request(host, mrq);
+
+		if (cmd->data->timeout_ns)
+			set_wdog(slot, cmd->data->timeout_ns);
+	} else
+		set_wdog(slot, 0);
+
+	host->dma_active = false;
+	host->int_enable(host, MIO_EMM_INT_CMD_DONE | MIO_EMM_INT_CMD_ERR);
+
+	emm_cmd = FIELD_PREP(MIO_EMM_CMD_VAL, 1) |
+		  FIELD_PREP(MIO_EMM_CMD_CTYPE_XOR, mods.ctype_xor) |
+		  FIELD_PREP(MIO_EMM_CMD_RTYPE_XOR, mods.rtype_xor) |
+		  FIELD_PREP(MIO_EMM_CMD_IDX, cmd->opcode) |
+		  FIELD_PREP(MIO_EMM_CMD_ARG, cmd->arg);
+	set_bus_id(&emm_cmd, slot->bus_id);
+	if (cmd->data && mmc_cmd_type(cmd) == MMC_CMD_ADTC)
+		emm_cmd |= FIELD_PREP(MIO_EMM_CMD_OFFSET,
+				64 - ((cmd->data->blocks * cmd->data->blksz) / 8));
+
+	writeq(0, host->base + MIO_EMM_STS_MASK(host));
+
+retry:
+	rsp_sts = readq(host->base + MIO_EMM_RSP_STS(host));
+	if (rsp_sts & MIO_EMM_RSP_STS_DMA_VAL ||
+	    rsp_sts & MIO_EMM_RSP_STS_CMD_VAL ||
+	    rsp_sts & MIO_EMM_RSP_STS_SWITCH_VAL ||
+	    rsp_sts & MIO_EMM_RSP_STS_DMA_PEND) {
+		udelay(10);
+		if (--retries)
+			goto retry;
+	}
+	if (!retries)
+		dev_err(host->dev, "Bad status: %llx before command write\n", rsp_sts);
+	writeq(emm_cmd, host->base + MIO_EMM_CMD(host));
+}
+
+static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct cvm_mmc_slot *slot = mmc_priv(mmc);
+	struct cvm_mmc_host *host = slot->host;
+	int clk_period = 0, power_class = 10, bus_width = 0;
+	u64 clock, emm_switch;
+
+	host->acquire_bus(host);
+	cvm_mmc_switch_to(slot);
+
+	/* Set the power state */
+	switch (ios->power_mode) {
+	case MMC_POWER_ON:
+		break;
+
+	case MMC_POWER_OFF:
+		cvm_mmc_reset_bus(slot);
+		if (host->global_pwr_gpiod)
+			host->set_shared_power(host, 0);
+		else
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+		break;
+
+	case MMC_POWER_UP:
+		if (host->global_pwr_gpiod)
+			host->set_shared_power(host, 1);
+		else
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
+		break;
+	}
+
+	/* Convert bus width to HW definition */
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_8:
+		bus_width = 2;
+		break;
+	case MMC_BUS_WIDTH_4:
+		bus_width = 1;
+		break;
+	case MMC_BUS_WIDTH_1:
+		bus_width = 0;
+		break;
+	}
+
+	/* DDR is available for 4/8 bit bus width */
+	if (ios->bus_width && ios->timing == MMC_TIMING_MMC_DDR52)
+		bus_width |= 4;
+
+	/* Change the clock frequency. */
+	clock = ios->clock;
+	if (clock > 52000000)
+		clock = 52000000;
+	slot->clock = clock;
+
+	if (clock)
+		clk_period = (host->sys_freq + clock - 1) / (2 * clock);
+
+	emm_switch = FIELD_PREP(MIO_EMM_SWITCH_HS_TIMING,
+				(ios->timing == MMC_TIMING_MMC_HS)) |
+		     FIELD_PREP(MIO_EMM_SWITCH_BUS_WIDTH, bus_width) |
+		     FIELD_PREP(MIO_EMM_SWITCH_POWER_CLASS, power_class) |
+		     FIELD_PREP(MIO_EMM_SWITCH_CLK_HI, clk_period) |
+		     FIELD_PREP(MIO_EMM_SWITCH_CLK_LO, clk_period);
+	set_bus_id(&emm_switch, slot->bus_id);
+
+	if (!switch_val_changed(slot, emm_switch))
+		goto out;
+
+	set_wdog(slot, 0);
+	do_switch(host, emm_switch);
+	slot->cached_switch = emm_switch;
+out:
+	host->release_bus(host);
+}
+
+static const struct mmc_host_ops cvm_mmc_ops = {
+	.request        = cvm_mmc_request,
+	.set_ios        = cvm_mmc_set_ios,
+	.get_ro		= mmc_gpio_get_ro,
+	.get_cd		= mmc_gpio_get_cd,
+};
+
+static void cvm_mmc_set_clock(struct cvm_mmc_slot *slot, unsigned int clock)
+{
+	struct mmc_host *mmc = slot->mmc;
+
+	clock = min(clock, mmc->f_max);
+	clock = max(clock, mmc->f_min);
+	slot->clock = clock;
+}
+
+static int cvm_mmc_init_lowlevel(struct cvm_mmc_slot *slot)
+{
+	struct cvm_mmc_host *host = slot->host;
+	u64 emm_switch;
+
+	/* Enable this bus slot. */
+	host->emm_cfg |= (1ull << slot->bus_id);
+	writeq(host->emm_cfg, slot->host->base + MIO_EMM_CFG(host));
+	udelay(10);
+
+	/* Program initial clock speed and power. */
+	cvm_mmc_set_clock(slot, slot->mmc->f_min);
+	emm_switch = FIELD_PREP(MIO_EMM_SWITCH_POWER_CLASS, 10);
+	emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_CLK_HI,
+				 (host->sys_freq / slot->clock) / 2);
+	emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_CLK_LO,
+				 (host->sys_freq / slot->clock) / 2);
+
+	/* Make the changes take effect on this bus slot. */
+	set_bus_id(&emm_switch, slot->bus_id);
+	do_switch(host, emm_switch);
+
+	slot->cached_switch = emm_switch;
+
+	/*
+	 * Set watchdog timeout value and default reset value
+	 * for the mask register. Finally, set the CARD_RCA
+	 * bit so that we can get the card address relative
+	 * to the CMD register for CMD7 transactions.
+	 */
+	set_wdog(slot, 0);
+	writeq(0xe4390080ull, host->base + MIO_EMM_STS_MASK(host));
+	writeq(1, host->base + MIO_EMM_RCA(host));
+	return 0;
+}
+
+static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot)
+{
+	u32 id, cmd_skew = 0, dat_skew = 0, bus_width = 0;
+	struct device_node *node = dev->of_node;
+	struct mmc_host *mmc = slot->mmc;
+	u64 clock_period;
+	int ret;
+
+	ret = of_property_read_u32(node, "reg", &id);
+	if (ret) {
+		dev_err(dev, "Missing or invalid reg property on %s\n",
+			of_node_full_name(node));
+		return ret;
+	}
+
+	if (id >= CAVIUM_MAX_MMC || slot->host->slot[id]) {
+		dev_err(dev, "Invalid reg property on %s\n",
+			of_node_full_name(node));
+		return -EINVAL;
+	}
+
+	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
+	if (IS_ERR(mmc->supply.vmmc)) {
+		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		/*
+		 * Legacy Octeon firmware has no regulator entry, fall-back to
+		 * a hard-coded voltage to get a sane OCR.
+		 */
+		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	} else {
+		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+		if (ret > 0)
+			mmc->ocr_avail = ret;
+	}
+
+	/* Common MMC bindings */
+	ret = mmc_of_parse(mmc);
+	if (ret)
+		return ret;
+
+	/* Set bus width */
+	if (!(mmc->caps & (MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA))) {
+		of_property_read_u32(node, "cavium,bus-max-width", &bus_width);
+		if (bus_width == 8)
+			mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA;
+		else if (bus_width == 4)
+			mmc->caps |= MMC_CAP_4_BIT_DATA;
+	}
+
+	/* Set maximum and minimum frequency */
+	if (!mmc->f_max)
+		of_property_read_u32(node, "spi-max-frequency", &mmc->f_max);
+	if (!mmc->f_max || mmc->f_max > 52000000)
+		mmc->f_max = 52000000;
+	mmc->f_min = 400000;
+
+	/* Sampling register settings, period in picoseconds */
+	clock_period = 1000000000000ull / slot->host->sys_freq;
+	of_property_read_u32(node, "cavium,cmd-clk-skew", &cmd_skew);
+	of_property_read_u32(node, "cavium,dat-clk-skew", &dat_skew);
+	slot->cmd_cnt = (cmd_skew + clock_period / 2) / clock_period;
+	slot->dat_cnt = (dat_skew + clock_period / 2) / clock_period;
+
+	return id;
+}
+
+int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host)
+{
+	struct cvm_mmc_slot *slot;
+	struct mmc_host *mmc;
+	int ret, id;
+
+	mmc = mmc_alloc_host(sizeof(struct cvm_mmc_slot), dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	slot = mmc_priv(mmc);
+	slot->mmc = mmc;
+	slot->host = host;
+
+	ret = cvm_mmc_of_parse(dev, slot);
+	if (ret < 0)
+		goto error;
+	id = ret;
+
+	/* Set up host parameters */
+	mmc->ops = &cvm_mmc_ops;
+
+	/*
+	 * We only have a 3.3v supply, we cannot support any
+	 * of the UHS modes. We do support the high speed DDR
+	 * modes up to 52MHz.
+	 */
+	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
+		     MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD |
+		     MMC_CAP_3_3V_DDR;
+
+	if (host->use_sg)
+		mmc->max_segs = 16;
+	else
+		mmc->max_segs = 1;
+
+	/* DMA size field can address up to 8 MB */
+	mmc->max_seg_size = 8 * 1024 * 1024;
+	mmc->max_req_size = mmc->max_seg_size;
+	/* External DMA is in 512 byte blocks */
+	mmc->max_blk_size = 512;
+	/* DMA block count field is 15 bits */
+	mmc->max_blk_count = 32767;
+
+	slot->clock = mmc->f_min;
+	slot->bus_id = id;
+	slot->cached_rca = 1;
+
+	host->acquire_bus(host);
+	host->slot[id] = slot;
+	cvm_mmc_switch_to(slot);
+	cvm_mmc_init_lowlevel(slot);
+	host->release_bus(host);
+
+	ret = mmc_add_host(mmc);
+	if (ret) {
+		dev_err(dev, "mmc_add_host() returned %d\n", ret);
+		slot->host->slot[id] = NULL;
+		goto error;
+	}
+	return 0;
+
+error:
+	mmc_free_host(slot->mmc);
+	return ret;
+}
+
+int cvm_mmc_of_slot_remove(struct cvm_mmc_slot *slot)
+{
+	mmc_remove_host(slot->mmc);
+	slot->host->slot[slot->bus_id] = NULL;
+	mmc_free_host(slot->mmc);
+	return 0;
+}
diff --git a/drivers/mmc/host/cavium.h b/drivers/mmc/host/cavium.h
new file mode 100644
index 000000000000..f3eea5eaa678
--- /dev/null
+++ b/drivers/mmc/host/cavium.h
@@ -0,0 +1,215 @@
+/*
+ * Driver for MMC and SSD cards for Cavium OCTEON and ThunderX SOCs.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012-2017 Cavium Inc.
+ */
+
+#ifndef _CAVIUM_MMC_H_
+#define _CAVIUM_MMC_H_
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/io.h>
+#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/scatterlist.h>
+#include <linux/semaphore.h>
+
+#define CAVIUM_MAX_MMC		4
+
+/* DMA register addresses */
+#define MIO_EMM_DMA_FIFO_CFG(x)	(0x00 + x->reg_off_dma)
+#define MIO_EMM_DMA_FIFO_ADR(x)	(0x10 + x->reg_off_dma)
+#define MIO_EMM_DMA_FIFO_CMD(x)	(0x18 + x->reg_off_dma)
+#define MIO_EMM_DMA_CFG(x)	(0x20 + x->reg_off_dma)
+#define MIO_EMM_DMA_ADR(x)	(0x28 + x->reg_off_dma)
+#define MIO_EMM_DMA_INT(x)	(0x30 + x->reg_off_dma)
+#define MIO_EMM_DMA_INT_W1S(x)	(0x38 + x->reg_off_dma)
+#define MIO_EMM_DMA_INT_ENA_W1S(x) (0x40 + x->reg_off_dma)
+#define MIO_EMM_DMA_INT_ENA_W1C(x) (0x48 + x->reg_off_dma)
+
+/* register addresses */
+#define MIO_EMM_CFG(x)		(0x00 + x->reg_off)
+#define MIO_EMM_SWITCH(x)	(0x48 + x->reg_off)
+#define MIO_EMM_DMA(x)		(0x50 + x->reg_off)
+#define MIO_EMM_CMD(x)		(0x58 + x->reg_off)
+#define MIO_EMM_RSP_STS(x)	(0x60 + x->reg_off)
+#define MIO_EMM_RSP_LO(x)	(0x68 + x->reg_off)
+#define MIO_EMM_RSP_HI(x)	(0x70 + x->reg_off)
+#define MIO_EMM_INT(x)		(0x78 + x->reg_off)
+#define MIO_EMM_INT_EN(x)	(0x80 + x->reg_off)
+#define MIO_EMM_WDOG(x)		(0x88 + x->reg_off)
+#define MIO_EMM_SAMPLE(x)	(0x90 + x->reg_off)
+#define MIO_EMM_STS_MASK(x)	(0x98 + x->reg_off)
+#define MIO_EMM_RCA(x)		(0xa0 + x->reg_off)
+#define MIO_EMM_INT_EN_SET(x)	(0xb0 + x->reg_off)
+#define MIO_EMM_INT_EN_CLR(x)	(0xb8 + x->reg_off)
+#define MIO_EMM_BUF_IDX(x)	(0xe0 + x->reg_off)
+#define MIO_EMM_BUF_DAT(x)	(0xe8 + x->reg_off)
+
+struct cvm_mmc_host {
+	struct device *dev;
+	void __iomem *base;
+	void __iomem *dma_base;
+	int reg_off;
+	int reg_off_dma;
+	u64 emm_cfg;
+	u64 n_minus_one;	/* OCTEON II workaround location */
+	int last_slot;
+	struct clk *clk;
+	int sys_freq;
+
+	struct mmc_request *current_req;
+	struct sg_mapping_iter smi;
+	bool dma_active;
+	bool use_sg;
+
+	bool has_ciu3;
+	bool big_dma_addr;
+	bool need_irq_handler_lock;
+	spinlock_t irq_handler_lock;
+	struct semaphore mmc_serializer;
+
+	struct gpio_desc *global_pwr_gpiod;
+	atomic_t shared_power_users;
+
+	struct cvm_mmc_slot *slot[CAVIUM_MAX_MMC];
+	struct platform_device *slot_pdev[CAVIUM_MAX_MMC];
+
+	void (*set_shared_power)(struct cvm_mmc_host *, int);
+	void (*acquire_bus)(struct cvm_mmc_host *);
+	void (*release_bus)(struct cvm_mmc_host *);
+	void (*int_enable)(struct cvm_mmc_host *, u64);
+	/* required on some MIPS models */
+	void (*dmar_fixup)(struct cvm_mmc_host *, struct mmc_command *,
+			   struct mmc_data *, u64);
+	void (*dmar_fixup_done)(struct cvm_mmc_host *);
+};
+
+struct cvm_mmc_slot {
+	struct mmc_host *mmc;		/* slot-level mmc_core object */
+	struct cvm_mmc_host *host;	/* common hw for all slots */
+
+	u64 clock;
+
+	u64 cached_switch;
+	u64 cached_rca;
+
+	unsigned int cmd_cnt;		/* sample delay */
+	unsigned int dat_cnt;		/* sample delay */
+
+	int bus_id;
+};
+
+struct cvm_mmc_cr_type {
+	u8 ctype;
+	u8 rtype;
+};
+
+struct cvm_mmc_cr_mods {
+	u8 ctype_xor;
+	u8 rtype_xor;
+};
+
+/* Bitfield definitions */
+#define MIO_EMM_DMA_FIFO_CFG_CLR	BIT_ULL(16)
+#define MIO_EMM_DMA_FIFO_CFG_INT_LVL	GENMASK_ULL(12, 8)
+#define MIO_EMM_DMA_FIFO_CFG_COUNT	GENMASK_ULL(4, 0)
+
+#define MIO_EMM_DMA_FIFO_CMD_RW		BIT_ULL(62)
+#define MIO_EMM_DMA_FIFO_CMD_INTDIS	BIT_ULL(60)
+#define MIO_EMM_DMA_FIFO_CMD_SWAP32	BIT_ULL(59)
+#define MIO_EMM_DMA_FIFO_CMD_SWAP16	BIT_ULL(58)
+#define MIO_EMM_DMA_FIFO_CMD_SWAP8	BIT_ULL(57)
+#define MIO_EMM_DMA_FIFO_CMD_ENDIAN	BIT_ULL(56)
+#define MIO_EMM_DMA_FIFO_CMD_SIZE	GENMASK_ULL(55, 36)
+
+#define MIO_EMM_CMD_SKIP_BUSY		BIT_ULL(62)
+#define MIO_EMM_CMD_BUS_ID		GENMASK_ULL(61, 60)
+#define MIO_EMM_CMD_VAL			BIT_ULL(59)
+#define MIO_EMM_CMD_DBUF		BIT_ULL(55)
+#define MIO_EMM_CMD_OFFSET		GENMASK_ULL(54, 49)
+#define MIO_EMM_CMD_CTYPE_XOR		GENMASK_ULL(42, 41)
+#define MIO_EMM_CMD_RTYPE_XOR		GENMASK_ULL(40, 38)
+#define MIO_EMM_CMD_IDX			GENMASK_ULL(37, 32)
+#define MIO_EMM_CMD_ARG			GENMASK_ULL(31, 0)
+
+#define MIO_EMM_DMA_SKIP_BUSY		BIT_ULL(62)
+#define MIO_EMM_DMA_BUS_ID		GENMASK_ULL(61, 60)
+#define MIO_EMM_DMA_VAL			BIT_ULL(59)
+#define MIO_EMM_DMA_SECTOR		BIT_ULL(58)
+#define MIO_EMM_DMA_DAT_NULL		BIT_ULL(57)
+#define MIO_EMM_DMA_THRES		GENMASK_ULL(56, 51)
+#define MIO_EMM_DMA_REL_WR		BIT_ULL(50)
+#define MIO_EMM_DMA_RW			BIT_ULL(49)
+#define MIO_EMM_DMA_MULTI		BIT_ULL(48)
+#define MIO_EMM_DMA_BLOCK_CNT		GENMASK_ULL(47, 32)
+#define MIO_EMM_DMA_CARD_ADDR		GENMASK_ULL(31, 0)
+
+#define MIO_EMM_DMA_CFG_EN		BIT_ULL(63)
+#define MIO_EMM_DMA_CFG_RW		BIT_ULL(62)
+#define MIO_EMM_DMA_CFG_CLR		BIT_ULL(61)
+#define MIO_EMM_DMA_CFG_SWAP32		BIT_ULL(59)
+#define MIO_EMM_DMA_CFG_SWAP16		BIT_ULL(58)
+#define MIO_EMM_DMA_CFG_SWAP8		BIT_ULL(57)
+#define MIO_EMM_DMA_CFG_ENDIAN		BIT_ULL(56)
+#define MIO_EMM_DMA_CFG_SIZE		GENMASK_ULL(55, 36)
+#define MIO_EMM_DMA_CFG_ADR		GENMASK_ULL(35, 0)
+
+#define MIO_EMM_INT_SWITCH_ERR		BIT_ULL(6)
+#define MIO_EMM_INT_SWITCH_DONE		BIT_ULL(5)
+#define MIO_EMM_INT_DMA_ERR		BIT_ULL(4)
+#define MIO_EMM_INT_CMD_ERR		BIT_ULL(3)
+#define MIO_EMM_INT_DMA_DONE		BIT_ULL(2)
+#define MIO_EMM_INT_CMD_DONE		BIT_ULL(1)
+#define MIO_EMM_INT_BUF_DONE		BIT_ULL(0)
+
+#define MIO_EMM_RSP_STS_BUS_ID		GENMASK_ULL(61, 60)
+#define MIO_EMM_RSP_STS_CMD_VAL		BIT_ULL(59)
+#define MIO_EMM_RSP_STS_SWITCH_VAL	BIT_ULL(58)
+#define MIO_EMM_RSP_STS_DMA_VAL		BIT_ULL(57)
+#define MIO_EMM_RSP_STS_DMA_PEND	BIT_ULL(56)
+#define MIO_EMM_RSP_STS_DBUF_ERR	BIT_ULL(28)
+#define MIO_EMM_RSP_STS_DBUF		BIT_ULL(23)
+#define MIO_EMM_RSP_STS_BLK_TIMEOUT	BIT_ULL(22)
+#define MIO_EMM_RSP_STS_BLK_CRC_ERR	BIT_ULL(21)
+#define MIO_EMM_RSP_STS_RSP_BUSYBIT	BIT_ULL(20)
+#define MIO_EMM_RSP_STS_STP_TIMEOUT	BIT_ULL(19)
+#define MIO_EMM_RSP_STS_STP_CRC_ERR	BIT_ULL(18)
+#define MIO_EMM_RSP_STS_STP_BAD_STS	BIT_ULL(17)
+#define MIO_EMM_RSP_STS_STP_VAL		BIT_ULL(16)
+#define MIO_EMM_RSP_STS_RSP_TIMEOUT	BIT_ULL(15)
+#define MIO_EMM_RSP_STS_RSP_CRC_ERR	BIT_ULL(14)
+#define MIO_EMM_RSP_STS_RSP_BAD_STS	BIT_ULL(13)
+#define MIO_EMM_RSP_STS_RSP_VAL		BIT_ULL(12)
+#define MIO_EMM_RSP_STS_RSP_TYPE	GENMASK_ULL(11, 9)
+#define MIO_EMM_RSP_STS_CMD_TYPE	GENMASK_ULL(8, 7)
+#define MIO_EMM_RSP_STS_CMD_IDX		GENMASK_ULL(6, 1)
+#define MIO_EMM_RSP_STS_CMD_DONE	BIT_ULL(0)
+
+#define MIO_EMM_SAMPLE_CMD_CNT		GENMASK_ULL(25, 16)
+#define MIO_EMM_SAMPLE_DAT_CNT		GENMASK_ULL(9, 0)
+
+#define MIO_EMM_SWITCH_BUS_ID		GENMASK_ULL(61, 60)
+#define MIO_EMM_SWITCH_EXE		BIT_ULL(59)
+#define MIO_EMM_SWITCH_ERR0		BIT_ULL(58)
+#define MIO_EMM_SWITCH_ERR1		BIT_ULL(57)
+#define MIO_EMM_SWITCH_ERR2		BIT_ULL(56)
+#define MIO_EMM_SWITCH_HS_TIMING	BIT_ULL(48)
+#define MIO_EMM_SWITCH_BUS_WIDTH	GENMASK_ULL(42, 40)
+#define MIO_EMM_SWITCH_POWER_CLASS	GENMASK_ULL(35, 32)
+#define MIO_EMM_SWITCH_CLK_HI		GENMASK_ULL(31, 16)
+#define MIO_EMM_SWITCH_CLK_LO		GENMASK_ULL(15, 0)
+
+/* Protoypes */
+irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id);
+int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host);
+int cvm_mmc_of_slot_remove(struct cvm_mmc_slot *slot);
+extern const char *cvm_mmc_irq_names[];
+
+#endif
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 1e2600da105f..621ce47e0e4a 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -478,18 +478,14 @@ static int mmc_davinci_start_dma_transfer(struct mmc_davinci_host *host,
 	int ret = 0;
 
 	host->sg_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-				((data->flags & MMC_DATA_WRITE)
-				? DMA_TO_DEVICE
-				: DMA_FROM_DEVICE));
+				  mmc_get_dma_dir(data));
 
 	/* no individual DMA segment should need a partial FIFO */
 	for (i = 0; i < host->sg_len; i++) {
 		if (sg_dma_len(data->sg + i) & mask) {
 			dma_unmap_sg(mmc_dev(host->mmc),
-					data->sg, data->sg_len,
-					(data->flags & MMC_DATA_WRITE)
-					? DMA_TO_DEVICE
-					: DMA_FROM_DEVICE);
+				     data->sg, data->sg_len,
+				     mmc_get_dma_dir(data));
 			return -1;
 		}
 	}
@@ -802,9 +798,7 @@ mmc_davinci_xfer_done(struct mmc_davinci_host *host, struct mmc_data *data)
 		davinci_abort_dma(host);
 
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     (data->flags & MMC_DATA_WRITE)
-			     ? DMA_TO_DEVICE
-			     : DMA_FROM_DEVICE);
+			     mmc_get_dma_dir(data));
 		host->do_dma = false;
 	}
 	host->data_dir = DAVINCI_MMC_DATADIR_NONE;
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 8718432751c5..e45129f48174 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -65,6 +66,8 @@
 
 struct idmac_desc_64addr {
 	u32		des0;	/* Control Descriptor */
+#define IDMAC_OWN_CLR64(x) \
+	!((x) & cpu_to_le32(IDMAC_DES0_OWN))
 
 	u32		des1;	/* Reserved */
 
@@ -104,11 +107,6 @@ struct idmac_desc {
 /* Each descriptor can transfer up to 4KB of data in chained mode */
 #define DW_MCI_DESC_DATA_LENGTH	0x1000
 
-static bool dw_mci_reset(struct dw_mci *host);
-static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
-static int dw_mci_card_busy(struct mmc_host *mmc);
-static int dw_mci_get_cd(struct mmc_host *mmc);
-
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
 {
@@ -232,7 +230,66 @@ err:
 }
 #endif /* defined(CONFIG_DEBUG_FS) */
 
-static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg);
+static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
+{
+	u32 ctrl;
+
+	ctrl = mci_readl(host, CTRL);
+	ctrl |= reset;
+	mci_writel(host, CTRL, ctrl);
+
+	/* wait till resets clear */
+	if (readl_poll_timeout_atomic(host->regs + SDMMC_CTRL, ctrl,
+				      !(ctrl & reset),
+				      1, 500 * USEC_PER_MSEC)) {
+		dev_err(host->dev,
+			"Timeout resetting block (ctrl reset %#x)\n",
+			ctrl & reset);
+		return false;
+	}
+
+	return true;
+}
+
+static void dw_mci_wait_while_busy(struct dw_mci *host, u32 cmd_flags)
+{
+	u32 status;
+
+	/*
+	 * Databook says that before issuing a new data transfer command
+	 * we need to check to see if the card is busy.  Data transfer commands
+	 * all have SDMMC_CMD_PRV_DAT_WAIT set, so we'll key off that.
+	 *
+	 * ...also allow sending for SDMMC_CMD_VOLT_SWITCH where busy is
+	 * expected.
+	 */
+	if ((cmd_flags & SDMMC_CMD_PRV_DAT_WAIT) &&
+	    !(cmd_flags & SDMMC_CMD_VOLT_SWITCH)) {
+		if (readl_poll_timeout_atomic(host->regs + SDMMC_STATUS,
+					      status,
+					      !(status & SDMMC_STATUS_BUSY),
+					      10, 500 * USEC_PER_MSEC))
+			dev_err(host->dev, "Busy; trying anyway\n");
+	}
+}
+
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
+{
+	struct dw_mci *host = slot->host;
+	unsigned int cmd_status = 0;
+
+	mci_writel(host, CMDARG, arg);
+	wmb(); /* drain writebuffer */
+	dw_mci_wait_while_busy(host, cmd);
+	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
+
+	if (readl_poll_timeout_atomic(host->regs + SDMMC_CMD, cmd_status,
+				      !(cmd_status & SDMMC_CMD_START),
+				      1, 500 * USEC_PER_MSEC))
+		dev_err(&slot->mmc->class_dev,
+			"Timeout sending command (cmd %#x arg %#x status %#x)\n",
+			cmd, arg, cmd_status);
+}
 
 static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 {
@@ -341,31 +398,6 @@ static u32 dw_mci_prep_stop_abort(struct dw_mci *host, struct mmc_command *cmd)
 	return cmdr;
 }
 
-static void dw_mci_wait_while_busy(struct dw_mci *host, u32 cmd_flags)
-{
-	unsigned long timeout = jiffies + msecs_to_jiffies(500);
-
-	/*
-	 * Databook says that before issuing a new data transfer command
-	 * we need to check to see if the card is busy.  Data transfer commands
-	 * all have SDMMC_CMD_PRV_DAT_WAIT set, so we'll key off that.
-	 *
-	 * ...also allow sending for SDMMC_CMD_VOLT_SWITCH where busy is
-	 * expected.
-	 */
-	if ((cmd_flags & SDMMC_CMD_PRV_DAT_WAIT) &&
-	    !(cmd_flags & SDMMC_CMD_VOLT_SWITCH)) {
-		while (mci_readl(host, STATUS) & SDMMC_STATUS_BUSY) {
-			if (time_after(jiffies, timeout)) {
-				/* Command will fail; we'll pass error then */
-				dev_err(host->dev, "Busy; trying anyway\n");
-				break;
-			}
-			udelay(10);
-		}
-	}
-}
-
 static void dw_mci_start_command(struct dw_mci *host,
 				 struct mmc_command *cmd, u32 cmd_flags)
 {
@@ -400,14 +432,6 @@ static void dw_mci_stop_dma(struct dw_mci *host)
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
-static int dw_mci_get_dma_dir(struct mmc_data *data)
-{
-	if (data->flags & MMC_DATA_WRITE)
-		return DMA_TO_DEVICE;
-	else
-		return DMA_FROM_DEVICE;
-}
-
 static void dw_mci_dma_cleanup(struct dw_mci *host)
 {
 	struct mmc_data *data = host->data;
@@ -416,7 +440,7 @@ static void dw_mci_dma_cleanup(struct dw_mci *host)
 		dma_unmap_sg(host->dev,
 			     data->sg,
 			     data->sg_len,
-			     dw_mci_get_dma_dir(data));
+			     mmc_get_dma_dir(data));
 		data->host_cookie = COOKIE_UNMAPPED;
 	}
 }
@@ -555,7 +579,7 @@ static inline int dw_mci_prepare_desc64(struct dw_mci *host,
 {
 	unsigned int desc_len;
 	struct idmac_desc_64addr *desc_first, *desc_last, *desc;
-	unsigned long timeout;
+	u32 val;
 	int i;
 
 	desc_first = desc_last = desc = host->sg_cpu;
@@ -577,12 +601,10 @@ static inline int dw_mci_prepare_desc64(struct dw_mci *host,
 			 * isn't still owned by IDMAC as IDMAC's write
 			 * ops and CPU's read ops are asynchronous.
 			 */
-			timeout = jiffies + msecs_to_jiffies(100);
-			while (readl(&desc->des0) & IDMAC_DES0_OWN) {
-				if (time_after(jiffies, timeout))
-					goto err_own_bit;
-				udelay(10);
-			}
+			if (readl_poll_timeout_atomic(&desc->des0, val,
+						!(val & IDMAC_DES0_OWN),
+						10, 100 * USEC_PER_MSEC))
+				goto err_own_bit;
 
 			/*
 			 * Set the OWN bit and disable interrupts
@@ -629,7 +651,7 @@ static inline int dw_mci_prepare_desc32(struct dw_mci *host,
 {
 	unsigned int desc_len;
 	struct idmac_desc *desc_first, *desc_last, *desc;
-	unsigned long timeout;
+	u32 val;
 	int i;
 
 	desc_first = desc_last = desc = host->sg_cpu;
@@ -651,13 +673,11 @@ static inline int dw_mci_prepare_desc32(struct dw_mci *host,
 			 * isn't still owned by IDMAC as IDMAC's write
 			 * ops and CPU's read ops are asynchronous.
 			 */
-			timeout = jiffies + msecs_to_jiffies(100);
-			while (readl(&desc->des0) &
-			       cpu_to_le32(IDMAC_DES0_OWN)) {
-				if (time_after(jiffies, timeout))
-					goto err_own_bit;
-				udelay(10);
-			}
+			if (readl_poll_timeout_atomic(&desc->des0, val,
+						      IDMAC_OWN_CLR64(val),
+						      10,
+						      100 * USEC_PER_MSEC))
+				goto err_own_bit;
 
 			/*
 			 * Set the OWN bit and disable interrupts
@@ -876,7 +896,7 @@ static int dw_mci_pre_dma_transfer(struct dw_mci *host,
 	sg_len = dma_map_sg(host->dev,
 			    data->sg,
 			    data->sg_len,
-			    dw_mci_get_dma_dir(data));
+			    mmc_get_dma_dir(data));
 	if (sg_len == 0)
 		return -EINVAL;
 
@@ -916,10 +936,51 @@ static void dw_mci_post_req(struct mmc_host *mmc,
 		dma_unmap_sg(slot->host->dev,
 			     data->sg,
 			     data->sg_len,
-			     dw_mci_get_dma_dir(data));
+			     mmc_get_dma_dir(data));
 	data->host_cookie = COOKIE_UNMAPPED;
 }
 
+static int dw_mci_get_cd(struct mmc_host *mmc)
+{
+	int present;
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+	int gpio_cd = mmc_gpio_get_cd(mmc);
+
+	/* Use platform get_cd function, else try onboard card detect */
+	if (((mmc->caps & MMC_CAP_NEEDS_POLL)
+				|| !mmc_card_is_removable(mmc))) {
+		present = 1;
+
+		if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+			if (mmc->caps & MMC_CAP_NEEDS_POLL) {
+				dev_info(&mmc->class_dev,
+					"card is polling.\n");
+			} else {
+				dev_info(&mmc->class_dev,
+					"card is non-removable.\n");
+			}
+			set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+		}
+
+		return present;
+	} else if (gpio_cd >= 0)
+		present = gpio_cd;
+	else
+		present = (mci_readl(slot->host, CDETECT) & (1 << slot->id))
+			== 0 ? 1 : 0;
+
+	spin_lock_bh(&host->lock);
+	if (present && !test_and_set_bit(DW_MMC_CARD_PRESENT, &slot->flags))
+		dev_dbg(&mmc->class_dev, "card is present\n");
+	else if (!present &&
+			!test_and_clear_bit(DW_MMC_CARD_PRESENT, &slot->flags))
+		dev_dbg(&mmc->class_dev, "card is not present\n");
+	spin_unlock_bh(&host->lock);
+
+	return present;
+}
+
 static void dw_mci_adjust_fifoth(struct dw_mci *host, struct mmc_data *data)
 {
 	unsigned int blksz = data->blksz;
@@ -1133,27 +1194,6 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 	}
 }
 
-static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
-{
-	struct dw_mci *host = slot->host;
-	unsigned long timeout = jiffies + msecs_to_jiffies(500);
-	unsigned int cmd_status = 0;
-
-	mci_writel(host, CMDARG, arg);
-	wmb(); /* drain writebuffer */
-	dw_mci_wait_while_busy(host, cmd);
-	mci_writel(host, CMD, SDMMC_CMD_START | cmd);
-
-	while (time_before(jiffies, timeout)) {
-		cmd_status = mci_readl(host, CMD);
-		if (!(cmd_status & SDMMC_CMD_START))
-			return;
-	}
-	dev_err(&slot->mmc->class_dev,
-		"Timeout sending command (cmd %#x arg %#x status %#x)\n",
-		cmd, arg, cmd_status);
-}
-
 static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 {
 	struct dw_mci *host = slot->host;
@@ -1534,47 +1574,6 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	return read_only;
 }
 
-static int dw_mci_get_cd(struct mmc_host *mmc)
-{
-	int present;
-	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci *host = slot->host;
-	int gpio_cd = mmc_gpio_get_cd(mmc);
-
-	/* Use platform get_cd function, else try onboard card detect */
-	if (((mmc->caps & MMC_CAP_NEEDS_POLL)
-				|| !mmc_card_is_removable(mmc))) {
-		present = 1;
-
-		if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
-			if (mmc->caps & MMC_CAP_NEEDS_POLL) {
-				dev_info(&mmc->class_dev,
-					"card is polling.\n");
-			} else {
-				dev_info(&mmc->class_dev,
-					"card is non-removable.\n");
-			}
-			set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
-		}
-
-		return present;
-	} else if (gpio_cd >= 0)
-		present = gpio_cd;
-	else
-		present = (mci_readl(slot->host, CDETECT) & (1 << slot->id))
-			== 0 ? 1 : 0;
-
-	spin_lock_bh(&host->lock);
-	if (present && !test_and_set_bit(DW_MMC_CARD_PRESENT, &slot->flags))
-		dev_dbg(&mmc->class_dev, "card is present\n");
-	else if (!present &&
-			!test_and_clear_bit(DW_MMC_CARD_PRESENT, &slot->flags))
-		dev_dbg(&mmc->class_dev, "card is not present\n");
-	spin_unlock_bh(&host->lock);
-
-	return present;
-}
-
 static void dw_mci_hw_reset(struct mmc_host *mmc)
 {
 	struct dw_mci_slot *slot = mmc_priv(mmc);
@@ -1688,6 +1687,73 @@ static int dw_mci_prepare_hs400_tuning(struct mmc_host *mmc,
 	return 0;
 }
 
+static bool dw_mci_reset(struct dw_mci *host)
+{
+	u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
+	bool ret = false;
+	u32 status = 0;
+
+	/*
+	 * Resetting generates a block interrupt, hence setting
+	 * the scatter-gather pointer to NULL.
+	 */
+	if (host->sg) {
+		sg_miter_stop(&host->sg_miter);
+		host->sg = NULL;
+	}
+
+	if (host->use_dma)
+		flags |= SDMMC_CTRL_DMA_RESET;
+
+	if (dw_mci_ctrl_reset(host, flags)) {
+		/*
+		 * In all cases we clear the RAWINTS
+		 * register to clear any interrupts.
+		 */
+		mci_writel(host, RINTSTS, 0xFFFFFFFF);
+
+		if (!host->use_dma) {
+			ret = true;
+			goto ciu_out;
+		}
+
+		/* Wait for dma_req to be cleared */
+		if (readl_poll_timeout_atomic(host->regs + SDMMC_STATUS,
+					      status,
+					      !(status & SDMMC_STATUS_DMA_REQ),
+					      1, 500 * USEC_PER_MSEC)) {
+			dev_err(host->dev,
+				"%s: Timeout waiting for dma_req to be cleared\n",
+				__func__);
+			goto ciu_out;
+		}
+
+		/* when using DMA next we reset the fifo again */
+		if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
+			goto ciu_out;
+	} else {
+		/* if the controller reset bit did clear, then set clock regs */
+		if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
+			dev_err(host->dev,
+				"%s: fifo/dma reset bits didn't clear but ciu was reset, doing clock update\n",
+				__func__);
+			goto ciu_out;
+		}
+	}
+
+	if (host->use_dma == TRANS_MODE_IDMAC)
+		/* It is also recommended that we reset and reprogram idmac */
+		dw_mci_idmac_reset(host);
+
+	ret = true;
+
+ciu_out:
+	/* After a CTRL reset we need to have CIU set clock registers  */
+	mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
+
+	return ret;
+}
+
 static const struct mmc_host_ops dw_mci_ops = {
 	.request		= dw_mci_request,
 	.pre_req		= dw_mci_pre_req,
@@ -2830,99 +2896,6 @@ no_dma:
 	host->use_dma = TRANS_MODE_PIO;
 }
 
-static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
-{
-	unsigned long timeout = jiffies + msecs_to_jiffies(500);
-	u32 ctrl;
-
-	ctrl = mci_readl(host, CTRL);
-	ctrl |= reset;
-	mci_writel(host, CTRL, ctrl);
-
-	/* wait till resets clear */
-	do {
-		ctrl = mci_readl(host, CTRL);
-		if (!(ctrl & reset))
-			return true;
-	} while (time_before(jiffies, timeout));
-
-	dev_err(host->dev,
-		"Timeout resetting block (ctrl reset %#x)\n",
-		ctrl & reset);
-
-	return false;
-}
-
-static bool dw_mci_reset(struct dw_mci *host)
-{
-	u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
-	bool ret = false;
-
-	/*
-	 * Reseting generates a block interrupt, hence setting
-	 * the scatter-gather pointer to NULL.
-	 */
-	if (host->sg) {
-		sg_miter_stop(&host->sg_miter);
-		host->sg = NULL;
-	}
-
-	if (host->use_dma)
-		flags |= SDMMC_CTRL_DMA_RESET;
-
-	if (dw_mci_ctrl_reset(host, flags)) {
-		/*
-		 * In all cases we clear the RAWINTS register to clear any
-		 * interrupts.
-		 */
-		mci_writel(host, RINTSTS, 0xFFFFFFFF);
-
-		/* if using dma we wait for dma_req to clear */
-		if (host->use_dma) {
-			unsigned long timeout = jiffies + msecs_to_jiffies(500);
-			u32 status;
-
-			do {
-				status = mci_readl(host, STATUS);
-				if (!(status & SDMMC_STATUS_DMA_REQ))
-					break;
-				cpu_relax();
-			} while (time_before(jiffies, timeout));
-
-			if (status & SDMMC_STATUS_DMA_REQ) {
-				dev_err(host->dev,
-					"%s: Timeout waiting for dma_req to clear during reset\n",
-					__func__);
-				goto ciu_out;
-			}
-
-			/* when using DMA next we reset the fifo again */
-			if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
-				goto ciu_out;
-		}
-	} else {
-		/* if the controller reset bit did clear, then set clock regs */
-		if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
-			dev_err(host->dev,
-				"%s: fifo/dma reset bits didn't clear but ciu was reset, doing clock update\n",
-				__func__);
-			goto ciu_out;
-		}
-	}
-
-	if (host->use_dma == TRANS_MODE_IDMAC)
-		/* It is also recommended that we reset and reprogram idmac */
-		dw_mci_idmac_reset(host);
-
-	ret = true;
-
-ciu_out:
-	/* After a CTRL reset we need to have CIU set clock registers  */
-	mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
-
-	return ret;
-}
-
 static void dw_mci_cmd11_timer(unsigned long arg)
 {
 	struct dw_mci *host = (struct dw_mci *)arg;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 819ad32964fc..57e254aac48d 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -200,11 +200,6 @@ free_master_write:
 	return -ENODEV;
 }
 
-static inline int jz4740_mmc_get_dma_dir(struct mmc_data *data)
-{
-	return (data->flags & MMC_DATA_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-}
-
 static inline struct dma_chan *jz4740_mmc_get_dma_chan(struct jz4740_mmc_host *host,
 						       struct mmc_data *data)
 {
@@ -215,7 +210,7 @@ static void jz4740_mmc_dma_unmap(struct jz4740_mmc_host *host,
 				 struct mmc_data *data)
 {
 	struct dma_chan *chan = jz4740_mmc_get_dma_chan(host, data);
-	enum dma_data_direction dir = jz4740_mmc_get_dma_dir(data);
+	enum dma_data_direction dir = mmc_get_dma_dir(data);
 
 	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
 }
@@ -227,7 +222,7 @@ static int jz4740_mmc_prepare_dma_data(struct jz4740_mmc_host *host,
 				       struct dma_chan *chan)
 {
 	struct jz4740_mmc_host_next *next_data = &host->next_data;
-	enum dma_data_direction dir = jz4740_mmc_get_dma_dir(data);
+	enum dma_data_direction dir = mmc_get_dma_dir(data);
 	int sg_len;
 
 	if (!next && data->host_cookie &&
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 5a959783304b..1842ed341af1 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -36,23 +36,21 @@
 #include <linux/clk-provider.h>
 #include <linux/regulator/consumer.h>
 #include <linux/interrupt.h>
+#include <linux/bitfield.h>
 
 #define DRIVER_NAME "meson-gx-mmc"
 
 #define SD_EMMC_CLOCK 0x0
-#define   CLK_DIV_SHIFT 0
-#define   CLK_DIV_WIDTH 6
-#define   CLK_DIV_MASK 0x3f
+#define   CLK_DIV_MASK GENMASK(5, 0)
 #define   CLK_DIV_MAX 63
-#define   CLK_SRC_SHIFT 6
-#define   CLK_SRC_WIDTH 2
-#define   CLK_SRC_MASK 0x3
+#define   CLK_SRC_MASK GENMASK(7, 6)
 #define   CLK_SRC_XTAL 0   /* external crystal */
 #define   CLK_SRC_XTAL_RATE 24000000
 #define   CLK_SRC_PLL 1    /* FCLK_DIV2 */
 #define   CLK_SRC_PLL_RATE 1000000000
-#define   CLK_PHASE_SHIFT 8
-#define   CLK_PHASE_MASK 0x3
+#define   CLK_CORE_PHASE_MASK GENMASK(9, 8)
+#define   CLK_TX_PHASE_MASK GENMASK(11, 10)
+#define   CLK_RX_PHASE_MASK GENMASK(13, 12)
 #define   CLK_PHASE_0 0
 #define   CLK_PHASE_90 1
 #define   CLK_PHASE_180 2
@@ -65,22 +63,17 @@
 #define SD_EMMC_START 0x40
 #define   START_DESC_INIT BIT(0)
 #define   START_DESC_BUSY BIT(1)
-#define   START_DESC_ADDR_SHIFT 2
-#define   START_DESC_ADDR_MASK (~0x3)
+#define   START_DESC_ADDR_MASK GENMASK(31, 2)
 
 #define SD_EMMC_CFG 0x44
-#define   CFG_BUS_WIDTH_SHIFT 0
-#define   CFG_BUS_WIDTH_MASK 0x3
+#define   CFG_BUS_WIDTH_MASK GENMASK(1, 0)
 #define   CFG_BUS_WIDTH_1 0x0
 #define   CFG_BUS_WIDTH_4 0x1
 #define   CFG_BUS_WIDTH_8 0x2
 #define   CFG_DDR BIT(2)
-#define   CFG_BLK_LEN_SHIFT 4
-#define   CFG_BLK_LEN_MASK 0xf
-#define   CFG_RESP_TIMEOUT_SHIFT 8
-#define   CFG_RESP_TIMEOUT_MASK 0xf
-#define   CFG_RC_CC_SHIFT 12
-#define   CFG_RC_CC_MASK 0xf
+#define   CFG_BLK_LEN_MASK GENMASK(7, 4)
+#define   CFG_RESP_TIMEOUT_MASK GENMASK(11, 8)
+#define   CFG_RC_CC_MASK GENMASK(15, 12)
 #define   CFG_STOP_CLOCK BIT(22)
 #define   CFG_CLK_ALWAYS_ON BIT(18)
 #define   CFG_CHK_DS BIT(20)
@@ -90,9 +83,8 @@
 #define   STATUS_BUSY BIT(31)
 
 #define SD_EMMC_IRQ_EN 0x4c
-#define   IRQ_EN_MASK 0x3fff
-#define   IRQ_RXD_ERR_SHIFT 0
-#define   IRQ_RXD_ERR_MASK 0xff
+#define   IRQ_EN_MASK GENMASK(13, 0)
+#define   IRQ_RXD_ERR_MASK GENMASK(7, 0)
 #define   IRQ_TXD_ERR BIT(8)
 #define   IRQ_DESC_ERR BIT(9)
 #define   IRQ_RESP_ERR BIT(10)
@@ -116,23 +108,39 @@
 
 #define SD_EMMC_CFG_BLK_SIZE 512 /* internal buffer max: 512 bytes */
 #define SD_EMMC_CFG_RESP_TIMEOUT 256 /* in clock cycles */
+#define SD_EMMC_CMD_TIMEOUT 1024 /* in ms */
+#define SD_EMMC_CMD_TIMEOUT_DATA 4096 /* in ms */
 #define SD_EMMC_CFG_CMD_GAP 16 /* in clock cycles */
+#define SD_EMMC_DESC_BUF_LEN PAGE_SIZE
+
+#define SD_EMMC_PRE_REQ_DONE BIT(0)
+#define SD_EMMC_DESC_CHAIN_MODE BIT(1)
+
 #define MUX_CLK_NUM_PARENTS 2
 
+struct meson_tuning_params {
+	u8 core_phase;
+	u8 tx_phase;
+	u8 rx_phase;
+};
+
+struct sd_emmc_desc {
+	u32 cmd_cfg;
+	u32 cmd_arg;
+	u32 cmd_data;
+	u32 cmd_resp;
+};
+
 struct meson_host {
 	struct	device		*dev;
 	struct	mmc_host	*mmc;
-	struct	mmc_request	*mrq;
 	struct	mmc_command	*cmd;
 
 	spinlock_t lock;
 	void __iomem *regs;
-	int irq;
-	u32 ocr_mask;
 	struct clk *core_clk;
 	struct clk_mux mux;
 	struct clk *mux_clk;
-	struct clk *mux_parent[MUX_CLK_NUM_PARENTS];
 	unsigned long current_clock;
 
 	struct clk_divider cfg_div;
@@ -141,23 +149,18 @@ struct meson_host {
 	unsigned int bounce_buf_size;
 	void *bounce_buf;
 	dma_addr_t bounce_dma_addr;
+	struct sd_emmc_desc *descs;
+	dma_addr_t descs_dma_addr;
 
+	struct meson_tuning_params tp;
 	bool vqmmc_enabled;
 };
 
-struct sd_emmc_desc {
-	u32 cmd_cfg;
-	u32 cmd_arg;
-	u32 cmd_data;
-	u32 cmd_resp;
-};
-#define CMD_CFG_LENGTH_SHIFT 0
-#define CMD_CFG_LENGTH_MASK 0x1ff
+#define CMD_CFG_LENGTH_MASK GENMASK(8, 0)
 #define CMD_CFG_BLOCK_MODE BIT(9)
 #define CMD_CFG_R1B BIT(10)
 #define CMD_CFG_END_OF_CHAIN BIT(11)
-#define CMD_CFG_TIMEOUT_SHIFT 12
-#define CMD_CFG_TIMEOUT_MASK 0xf
+#define CMD_CFG_TIMEOUT_MASK GENMASK(15, 12)
 #define CMD_CFG_NO_RESP BIT(16)
 #define CMD_CFG_NO_CMD BIT(17)
 #define CMD_CFG_DATA_IO BIT(18)
@@ -166,17 +169,99 @@ struct sd_emmc_desc {
 #define CMD_CFG_RESP_128 BIT(21)
 #define CMD_CFG_RESP_NUM BIT(22)
 #define CMD_CFG_DATA_NUM BIT(23)
-#define CMD_CFG_CMD_INDEX_SHIFT 24
-#define CMD_CFG_CMD_INDEX_MASK 0x3f
+#define CMD_CFG_CMD_INDEX_MASK GENMASK(29, 24)
 #define CMD_CFG_ERROR BIT(30)
 #define CMD_CFG_OWNER BIT(31)
 
-#define CMD_DATA_MASK (~0x3)
+#define CMD_DATA_MASK GENMASK(31, 2)
 #define CMD_DATA_BIG_ENDIAN BIT(1)
 #define CMD_DATA_SRAM BIT(0)
-#define CMD_RESP_MASK (~0x1)
+#define CMD_RESP_MASK GENMASK(31, 1)
 #define CMD_RESP_SRAM BIT(0)
 
+static unsigned int meson_mmc_get_timeout_msecs(struct mmc_data *data)
+{
+	unsigned int timeout = data->timeout_ns / NSEC_PER_MSEC;
+
+	if (!timeout)
+		return SD_EMMC_CMD_TIMEOUT_DATA;
+
+	timeout = roundup_pow_of_two(timeout);
+
+	return min(timeout, 32768U); /* max. 2^15 ms */
+}
+
+static struct mmc_command *meson_mmc_get_next_command(struct mmc_command *cmd)
+{
+	if (cmd->opcode == MMC_SET_BLOCK_COUNT && !cmd->error)
+		return cmd->mrq->cmd;
+	else if (mmc_op_multi(cmd->opcode) &&
+		 (!cmd->mrq->sbc || cmd->error || cmd->data->error))
+		return cmd->mrq->stop;
+	else
+		return NULL;
+}
+
+static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
+					struct mmc_request *mrq)
+{
+	struct mmc_data *data = mrq->data;
+	struct scatterlist *sg;
+	int i;
+	bool use_desc_chain_mode = true;
+
+	for_each_sg(data->sg, sg, data->sg_len, i)
+		/* check for 8 byte alignment */
+		if (sg->offset & 7) {
+			WARN_ONCE(1, "unaligned scatterlist buffer\n");
+			use_desc_chain_mode = false;
+			break;
+		}
+
+	if (use_desc_chain_mode)
+		data->host_cookie |= SD_EMMC_DESC_CHAIN_MODE;
+}
+
+static inline bool meson_mmc_desc_chain_mode(const struct mmc_data *data)
+{
+	return data->host_cookie & SD_EMMC_DESC_CHAIN_MODE;
+}
+
+static inline bool meson_mmc_bounce_buf_read(const struct mmc_data *data)
+{
+	return data && data->flags & MMC_DATA_READ &&
+	       !meson_mmc_desc_chain_mode(data);
+}
+
+static void meson_mmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct mmc_data *data = mrq->data;
+
+	if (!data)
+		return;
+
+	meson_mmc_get_transfer_mode(mmc, mrq);
+	data->host_cookie |= SD_EMMC_PRE_REQ_DONE;
+
+	if (!meson_mmc_desc_chain_mode(data))
+		return;
+
+	data->sg_count = dma_map_sg(mmc_dev(mmc), data->sg, data->sg_len,
+                                   mmc_get_dma_dir(data));
+	if (!data->sg_count)
+		dev_err(mmc_dev(mmc), "dma_map_sg failed");
+}
+
+static void meson_mmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
+			       int err)
+{
+	struct mmc_data *data = mrq->data;
+
+	if (data && meson_mmc_desc_chain_mode(data) && data->sg_count)
+		dma_unmap_sg(mmc_dev(mmc), data->sg, data->sg_len,
+			     mmc_get_dma_dir(data));
+}
+
 static int meson_mmc_clk_set(struct meson_host *host, unsigned long clk_rate)
 {
 	struct mmc_host *mmc = host->mmc;
@@ -244,26 +329,23 @@ static int meson_mmc_clk_init(struct meson_host *host)
 	char clk_name[32];
 	int i, ret = 0;
 	const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
-	unsigned int mux_parent_count = 0;
 	const char *clk_div_parents[1];
 	u32 clk_reg, cfg;
 
 	/* get the mux parents */
 	for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
+		struct clk *clk;
 		char name[16];
 
 		snprintf(name, sizeof(name), "clkin%d", i);
-		host->mux_parent[i] = devm_clk_get(host->dev, name);
-		if (IS_ERR(host->mux_parent[i])) {
-			ret = PTR_ERR(host->mux_parent[i]);
-			if (PTR_ERR(host->mux_parent[i]) != -EPROBE_DEFER)
+		clk = devm_clk_get(host->dev, name);
+		if (IS_ERR(clk)) {
+			if (clk != ERR_PTR(-EPROBE_DEFER))
 				dev_err(host->dev, "Missing clock %s\n", name);
-			host->mux_parent[i] = NULL;
-			return ret;
+			return PTR_ERR(clk);
 		}
 
-		mux_parent_names[i] = __clk_get_name(host->mux_parent[i]);
-		mux_parent_count++;
+		mux_parent_names[i] = __clk_get_name(clk);
 	}
 
 	/* create the mux */
@@ -272,10 +354,9 @@ static int meson_mmc_clk_init(struct meson_host *host)
 	init.ops = &clk_mux_ops;
 	init.flags = 0;
 	init.parent_names = mux_parent_names;
-	init.num_parents = mux_parent_count;
-
+	init.num_parents = MUX_CLK_NUM_PARENTS;
 	host->mux.reg = host->regs + SD_EMMC_CLOCK;
-	host->mux.shift = CLK_SRC_SHIFT;
+	host->mux.shift = __bf_shf(CLK_SRC_MASK);
 	host->mux.mask = CLK_SRC_MASK;
 	host->mux.flags = 0;
 	host->mux.table = NULL;
@@ -287,7 +368,7 @@ static int meson_mmc_clk_init(struct meson_host *host)
 
 	/* create the divider */
 	snprintf(clk_name, sizeof(clk_name), "%s#div", dev_name(host->dev));
-	init.name = devm_kstrdup(host->dev, clk_name, GFP_KERNEL);
+	init.name = clk_name;
 	init.ops = &clk_divider_ops;
 	init.flags = CLK_SET_RATE_PARENT;
 	clk_div_parents[0] = __clk_get_name(host->mux_clk);
@@ -295,8 +376,8 @@ static int meson_mmc_clk_init(struct meson_host *host)
 	init.num_parents = ARRAY_SIZE(clk_div_parents);
 
 	host->cfg_div.reg = host->regs + SD_EMMC_CLOCK;
-	host->cfg_div.shift = CLK_DIV_SHIFT;
-	host->cfg_div.width = CLK_DIV_WIDTH;
+	host->cfg_div.shift = __bf_shf(CLK_DIV_MASK);
+	host->cfg_div.width = __builtin_popcountl(CLK_DIV_MASK);
 	host->cfg_div.hw.init = &init;
 	host->cfg_div.flags = CLK_DIVIDER_ONE_BASED |
 		CLK_DIVIDER_ROUND_CLOSEST | CLK_DIVIDER_ALLOW_ZERO;
@@ -307,9 +388,11 @@ static int meson_mmc_clk_init(struct meson_host *host)
 
 	/* init SD_EMMC_CLOCK to sane defaults w/min clock rate */
 	clk_reg = 0;
-	clk_reg |= CLK_PHASE_180 << CLK_PHASE_SHIFT;
-	clk_reg |= CLK_SRC_XTAL << CLK_SRC_SHIFT;
-	clk_reg |= CLK_DIV_MAX << CLK_DIV_SHIFT;
+	clk_reg |= FIELD_PREP(CLK_CORE_PHASE_MASK, host->tp.core_phase);
+	clk_reg |= FIELD_PREP(CLK_TX_PHASE_MASK, host->tp.tx_phase);
+	clk_reg |= FIELD_PREP(CLK_RX_PHASE_MASK, host->tp.rx_phase);
+	clk_reg |= FIELD_PREP(CLK_SRC_MASK, CLK_SRC_XTAL);
+	clk_reg |= FIELD_PREP(CLK_DIV_MASK, CLK_DIV_MAX);
 	clk_reg &= ~CLK_ALWAYS_ON;
 	writel(clk_reg, host->regs + SD_EMMC_CLOCK);
 
@@ -327,12 +410,37 @@ static int meson_mmc_clk_init(struct meson_host *host)
 	host->mmc->f_min = clk_round_rate(host->cfg_div_clk, 400000);
 
 	ret = meson_mmc_clk_set(host, host->mmc->f_min);
-	if (!ret)
+	if (ret)
 		clk_disable_unprepare(host->cfg_div_clk);
 
 	return ret;
 }
 
+static void meson_mmc_set_tuning_params(struct mmc_host *mmc)
+{
+	struct meson_host *host = mmc_priv(mmc);
+	u32 regval;
+
+	/* stop clock */
+	regval = readl(host->regs + SD_EMMC_CFG);
+	regval |= CFG_STOP_CLOCK;
+	writel(regval, host->regs + SD_EMMC_CFG);
+
+	regval = readl(host->regs + SD_EMMC_CLOCK);
+	regval &= ~CLK_CORE_PHASE_MASK;
+	regval |= FIELD_PREP(CLK_CORE_PHASE_MASK, host->tp.core_phase);
+	regval &= ~CLK_TX_PHASE_MASK;
+	regval |= FIELD_PREP(CLK_TX_PHASE_MASK, host->tp.tx_phase);
+	regval &= ~CLK_RX_PHASE_MASK;
+	regval |= FIELD_PREP(CLK_RX_PHASE_MASK, host->tp.rx_phase);
+	writel(regval, host->regs + SD_EMMC_CLOCK);
+
+	/* start clock */
+	regval = readl(host->regs + SD_EMMC_CFG);
+	regval &= ~CFG_STOP_CLOCK;
+	writel(regval, host->regs + SD_EMMC_CFG);
+}
+
 static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct meson_host *host = mmc_priv(mmc);
@@ -397,17 +505,8 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	val = readl(host->regs + SD_EMMC_CFG);
 	orig = val;
 
-	val &= ~(CFG_BUS_WIDTH_MASK << CFG_BUS_WIDTH_SHIFT);
-	val |= bus_width << CFG_BUS_WIDTH_SHIFT;
-
-	val &= ~(CFG_BLK_LEN_MASK << CFG_BLK_LEN_SHIFT);
-	val |= ilog2(SD_EMMC_CFG_BLK_SIZE) << CFG_BLK_LEN_SHIFT;
-
-	val &= ~(CFG_RESP_TIMEOUT_MASK << CFG_RESP_TIMEOUT_SHIFT);
-	val |= ilog2(SD_EMMC_CFG_RESP_TIMEOUT) << CFG_RESP_TIMEOUT_SHIFT;
-
-	val &= ~(CFG_RC_CC_MASK << CFG_RC_CC_SHIFT);
-	val |= ilog2(SD_EMMC_CFG_CMD_GAP) << CFG_RC_CC_SHIFT;
+	val &= ~CFG_BUS_WIDTH_MASK;
+	val |= FIELD_PREP(CFG_BUS_WIDTH_MASK, bus_width);
 
 	val &= ~CFG_DDR;
 	if (ios->timing == MMC_TIMING_UHS_DDR50 ||
@@ -419,149 +518,189 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->timing == MMC_TIMING_MMC_HS400)
 		val |= CFG_CHK_DS;
 
-	writel(val, host->regs + SD_EMMC_CFG);
-
-	if (val != orig)
+	if (val != orig) {
+		writel(val, host->regs + SD_EMMC_CFG);
 		dev_dbg(host->dev, "%s: SD_EMMC_CFG: 0x%08x -> 0x%08x\n",
 			__func__, orig, val);
+	}
 }
 
-static int meson_mmc_request_done(struct mmc_host *mmc, struct mmc_request *mrq)
+static void meson_mmc_request_done(struct mmc_host *mmc,
+				   struct mmc_request *mrq)
 {
 	struct meson_host *host = mmc_priv(mmc);
 
-	WARN_ON(host->mrq != mrq);
-
-	host->mrq = NULL;
 	host->cmd = NULL;
 	mmc_request_done(host->mmc, mrq);
-
-	return 0;
 }
 
-static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
+static void meson_mmc_set_blksz(struct mmc_host *mmc, unsigned int blksz)
 {
 	struct meson_host *host = mmc_priv(mmc);
-	struct sd_emmc_desc *desc, desc_tmp;
-	u32 cfg;
-	u8 blk_len, cmd_cfg_timeout;
-	unsigned int xfer_bytes = 0;
+	u32 cfg, blksz_old;
 
-	/* Setup descriptors */
-	dma_rmb();
-	desc = &desc_tmp;
-	memset(desc, 0, sizeof(struct sd_emmc_desc));
+	cfg = readl(host->regs + SD_EMMC_CFG);
+	blksz_old = FIELD_GET(CFG_BLK_LEN_MASK, cfg);
+
+	if (!is_power_of_2(blksz))
+		dev_err(host->dev, "blksz %u is not a power of 2\n", blksz);
+
+	blksz = ilog2(blksz);
+
+	/* check if block-size matches, if not update */
+	if (blksz == blksz_old)
+		return;
 
-	desc->cmd_cfg |= (cmd->opcode & CMD_CFG_CMD_INDEX_MASK)	<<
-		CMD_CFG_CMD_INDEX_SHIFT;
-	desc->cmd_cfg |= CMD_CFG_OWNER;  /* owned by CPU */
-	desc->cmd_arg = cmd->arg;
+	dev_dbg(host->dev, "%s: update blk_len %d -> %d\n", __func__,
+		blksz_old, blksz);
 
-	/* Response */
+	cfg &= ~CFG_BLK_LEN_MASK;
+	cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, blksz);
+	writel(cfg, host->regs + SD_EMMC_CFG);
+}
+
+static void meson_mmc_set_response_bits(struct mmc_command *cmd, u32 *cmd_cfg)
+{
 	if (cmd->flags & MMC_RSP_PRESENT) {
-		desc->cmd_cfg &= ~CMD_CFG_NO_RESP;
 		if (cmd->flags & MMC_RSP_136)
-			desc->cmd_cfg |= CMD_CFG_RESP_128;
-		desc->cmd_cfg |= CMD_CFG_RESP_NUM;
-		desc->cmd_resp = 0;
+			*cmd_cfg |= CMD_CFG_RESP_128;
+		*cmd_cfg |= CMD_CFG_RESP_NUM;
 
 		if (!(cmd->flags & MMC_RSP_CRC))
-			desc->cmd_cfg |= CMD_CFG_RESP_NOCRC;
+			*cmd_cfg |= CMD_CFG_RESP_NOCRC;
 
 		if (cmd->flags & MMC_RSP_BUSY)
-			desc->cmd_cfg |= CMD_CFG_R1B;
+			*cmd_cfg |= CMD_CFG_R1B;
 	} else {
-		desc->cmd_cfg |= CMD_CFG_NO_RESP;
+		*cmd_cfg |= CMD_CFG_NO_RESP;
 	}
+}
+
+static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
+{
+	struct meson_host *host = mmc_priv(mmc);
+	struct sd_emmc_desc *desc = host->descs;
+	struct mmc_data *data = host->cmd->data;
+	struct scatterlist *sg;
+	u32 start;
+	int i;
+
+	if (data->flags & MMC_DATA_WRITE)
+		cmd_cfg |= CMD_CFG_DATA_WR;
+
+	if (data->blocks > 1) {
+		cmd_cfg |= CMD_CFG_BLOCK_MODE;
+		meson_mmc_set_blksz(mmc, data->blksz);
+	}
+
+	for_each_sg(data->sg, sg, data->sg_count, i) {
+		unsigned int len = sg_dma_len(sg);
+
+		if (data->blocks > 1)
+			len /= data->blksz;
+
+		desc[i].cmd_cfg = cmd_cfg;
+		desc[i].cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK, len);
+		if (i > 0)
+			desc[i].cmd_cfg |= CMD_CFG_NO_CMD;
+		desc[i].cmd_arg = host->cmd->arg;
+		desc[i].cmd_resp = 0;
+		desc[i].cmd_data = sg_dma_address(sg);
+	}
+	desc[data->sg_count - 1].cmd_cfg |= CMD_CFG_END_OF_CHAIN;
+
+	dma_wmb(); /* ensure descriptor is written before kicked */
+	start = host->descs_dma_addr | START_DESC_BUSY;
+	writel(start, host->regs + SD_EMMC_START);
+}
+
+static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
+{
+	struct meson_host *host = mmc_priv(mmc);
+	struct mmc_data *data = cmd->data;
+	u32 cmd_cfg = 0, cmd_data = 0;
+	unsigned int xfer_bytes = 0;
+
+	/* Setup descriptors */
+	dma_rmb();
+
+	host->cmd = cmd;
+
+	cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode);
+	cmd_cfg |= CMD_CFG_OWNER;  /* owned by CPU */
+
+	meson_mmc_set_response_bits(cmd, &cmd_cfg);
 
 	/* data? */
-	if (cmd->data) {
-		desc->cmd_cfg |= CMD_CFG_DATA_IO;
-		if (cmd->data->blocks > 1) {
-			desc->cmd_cfg |= CMD_CFG_BLOCK_MODE;
-			desc->cmd_cfg |=
-				(cmd->data->blocks & CMD_CFG_LENGTH_MASK) <<
-				CMD_CFG_LENGTH_SHIFT;
-
-			/* check if block-size matches, if not update */
-			cfg = readl(host->regs + SD_EMMC_CFG);
-			blk_len = cfg & (CFG_BLK_LEN_MASK << CFG_BLK_LEN_SHIFT);
-			blk_len >>= CFG_BLK_LEN_SHIFT;
-			if (blk_len != ilog2(cmd->data->blksz)) {
-				dev_dbg(host->dev, "%s: update blk_len %d -> %d\n",
-					__func__, blk_len,
-					ilog2(cmd->data->blksz));
-				blk_len = ilog2(cmd->data->blksz);
-				cfg &= ~(CFG_BLK_LEN_MASK << CFG_BLK_LEN_SHIFT);
-				cfg |= blk_len << CFG_BLK_LEN_SHIFT;
-				writel(cfg, host->regs + SD_EMMC_CFG);
-			}
+	if (data) {
+		data->bytes_xfered = 0;
+		cmd_cfg |= CMD_CFG_DATA_IO;
+		cmd_cfg |= FIELD_PREP(CMD_CFG_TIMEOUT_MASK,
+				      ilog2(meson_mmc_get_timeout_msecs(data)));
+
+		if (meson_mmc_desc_chain_mode(data)) {
+			meson_mmc_desc_chain_transfer(mmc, cmd_cfg);
+			return;
+		}
+
+		if (data->blocks > 1) {
+			cmd_cfg |= CMD_CFG_BLOCK_MODE;
+			cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK,
+					      data->blocks);
+			meson_mmc_set_blksz(mmc, data->blksz);
 		} else {
-			desc->cmd_cfg &= ~CMD_CFG_BLOCK_MODE;
-			desc->cmd_cfg |=
-				(cmd->data->blksz & CMD_CFG_LENGTH_MASK) <<
-				CMD_CFG_LENGTH_SHIFT;
+			cmd_cfg |= FIELD_PREP(CMD_CFG_LENGTH_MASK, data->blksz);
 		}
 
-		cmd->data->bytes_xfered = 0;
-		xfer_bytes = cmd->data->blksz * cmd->data->blocks;
-		if (cmd->data->flags & MMC_DATA_WRITE) {
-			desc->cmd_cfg |= CMD_CFG_DATA_WR;
+		xfer_bytes = data->blksz * data->blocks;
+		if (data->flags & MMC_DATA_WRITE) {
+			cmd_cfg |= CMD_CFG_DATA_WR;
 			WARN_ON(xfer_bytes > host->bounce_buf_size);
-			sg_copy_to_buffer(cmd->data->sg, cmd->data->sg_len,
+			sg_copy_to_buffer(data->sg, data->sg_len,
 					  host->bounce_buf, xfer_bytes);
-			cmd->data->bytes_xfered = xfer_bytes;
 			dma_wmb();
-		} else {
-			desc->cmd_cfg &= ~CMD_CFG_DATA_WR;
-		}
-
-		if (xfer_bytes > 0) {
-			desc->cmd_cfg &= ~CMD_CFG_DATA_NUM;
-			desc->cmd_data = host->bounce_dma_addr & CMD_DATA_MASK;
-		} else {
-			/* write data to data_addr */
-			desc->cmd_cfg |= CMD_CFG_DATA_NUM;
-			desc->cmd_data = 0;
 		}
 
-		cmd_cfg_timeout = 12;
+		cmd_data = host->bounce_dma_addr & CMD_DATA_MASK;
 	} else {
-		desc->cmd_cfg &= ~CMD_CFG_DATA_IO;
-		cmd_cfg_timeout = 10;
+		cmd_cfg |= FIELD_PREP(CMD_CFG_TIMEOUT_MASK,
+				      ilog2(SD_EMMC_CMD_TIMEOUT));
 	}
-	desc->cmd_cfg |= (cmd_cfg_timeout & CMD_CFG_TIMEOUT_MASK) <<
-		CMD_CFG_TIMEOUT_SHIFT;
-
-	host->cmd = cmd;
 
 	/* Last descriptor */
-	desc->cmd_cfg |= CMD_CFG_END_OF_CHAIN;
-	writel(desc->cmd_cfg, host->regs + SD_EMMC_CMD_CFG);
-	writel(desc->cmd_data, host->regs + SD_EMMC_CMD_DAT);
-	writel(desc->cmd_resp, host->regs + SD_EMMC_CMD_RSP);
+	cmd_cfg |= CMD_CFG_END_OF_CHAIN;
+	writel(cmd_cfg, host->regs + SD_EMMC_CMD_CFG);
+	writel(cmd_data, host->regs + SD_EMMC_CMD_DAT);
+	writel(0, host->regs + SD_EMMC_CMD_RSP);
 	wmb(); /* ensure descriptor is written before kicked */
-	writel(desc->cmd_arg, host->regs + SD_EMMC_CMD_ARG);
+	writel(cmd->arg, host->regs + SD_EMMC_CMD_ARG);
 }
 
 static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct meson_host *host = mmc_priv(mmc);
+	bool needs_pre_post_req = mrq->data &&
+			!(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE);
 
-	WARN_ON(host->mrq != NULL);
+	if (needs_pre_post_req) {
+		meson_mmc_get_transfer_mode(mmc, mrq);
+		if (!meson_mmc_desc_chain_mode(mrq->data))
+			needs_pre_post_req = false;
+	}
+
+	if (needs_pre_post_req)
+		meson_mmc_pre_req(mmc, mrq);
 
 	/* Stop execution */
 	writel(0, host->regs + SD_EMMC_START);
 
-	host->mrq = mrq;
+	meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd);
 
-	if (mrq->sbc)
-		meson_mmc_start_cmd(mmc, mrq->sbc);
-	else
-		meson_mmc_start_cmd(mmc, mrq->cmd);
+	if (needs_pre_post_req)
+		meson_mmc_post_req(mmc, mrq, 0);
 }
 
-static int meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
+static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
 {
 	struct meson_host *host = mmc_priv(mmc);
 
@@ -573,15 +712,13 @@ static int meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd)
 	} else if (cmd->flags & MMC_RSP_PRESENT) {
 		cmd->resp[0] = readl(host->regs + SD_EMMC_CMD_RSP);
 	}
-
-	return 0;
 }
 
 static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 {
 	struct meson_host *host = dev_id;
-	struct mmc_request *mrq;
 	struct mmc_command *cmd;
+	struct mmc_data *data;
 	u32 irq_en, status, raw_status;
 	irqreturn_t ret = IRQ_HANDLED;
 
@@ -590,14 +727,11 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 
 	cmd = host->cmd;
 
-	mrq = host->mrq;
-
-	if (WARN_ON(!mrq))
-		return IRQ_NONE;
-
 	if (WARN_ON(!cmd))
 		return IRQ_NONE;
 
+	data = cmd->data;
+
 	spin_lock(&host->lock);
 	irq_en = readl(host->regs + SD_EMMC_IRQ_EN);
 	raw_status = readl(host->regs + SD_EMMC_STATUS);
@@ -610,6 +744,8 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 		goto out;
 	}
 
+	meson_mmc_read_resp(host->mmc, cmd);
+
 	cmd->error = 0;
 	if (status & IRQ_RXD_ERR_MASK) {
 		dev_dbg(host->dev, "Unhandled IRQ: RXD error\n");
@@ -636,12 +772,16 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
 	if (status & IRQ_SDIO)
 		dev_dbg(host->dev, "Unhandled IRQ: SDIO.\n");
 
-	if (status & (IRQ_END_OF_CHAIN | IRQ_RESP_STATUS))
-		ret = IRQ_WAKE_THREAD;
-	else  {
+	if (status & (IRQ_END_OF_CHAIN | IRQ_RESP_STATUS)) {
+		if (data && !cmd->error)
+			data->bytes_xfered = data->blksz * data->blocks;
+		if (meson_mmc_bounce_buf_read(data) ||
+		    meson_mmc_get_next_command(cmd))
+			ret = IRQ_WAKE_THREAD;
+	} else {
 		dev_warn(host->dev, "Unknown IRQ! status=0x%04x: MMC CMD%u arg=0x%08x flags=0x%08x stop=%d\n",
 			 status, cmd->opcode, cmd->arg,
-			 cmd->flags, mrq->stop ? 1 : 0);
+			 cmd->flags, cmd->mrq->stop ? 1 : 0);
 		if (cmd->data) {
 			struct mmc_data *data = cmd->data;
 
@@ -656,10 +796,8 @@ out:
 	/* ack all (enabled) interrupts */
 	writel(status, host->regs + SD_EMMC_STATUS);
 
-	if (ret == IRQ_HANDLED) {
-		meson_mmc_read_resp(host->mmc, cmd);
+	if (ret == IRQ_HANDLED)
 		meson_mmc_request_done(host->mmc, cmd->mrq);
-	}
 
 	spin_unlock(&host->lock);
 	return ret;
@@ -668,35 +806,53 @@ out:
 static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
 {
 	struct meson_host *host = dev_id;
-	struct mmc_request *mrq = host->mrq;
-	struct mmc_command *cmd = host->cmd;
+	struct mmc_command *next_cmd, *cmd = host->cmd;
 	struct mmc_data *data;
 	unsigned int xfer_bytes;
 
-	if (WARN_ON(!mrq))
-		return IRQ_NONE;
-
 	if (WARN_ON(!cmd))
 		return IRQ_NONE;
 
 	data = cmd->data;
-	if (data && data->flags & MMC_DATA_READ) {
+	if (meson_mmc_bounce_buf_read(data)) {
 		xfer_bytes = data->blksz * data->blocks;
 		WARN_ON(xfer_bytes > host->bounce_buf_size);
 		sg_copy_from_buffer(data->sg, data->sg_len,
 				    host->bounce_buf, xfer_bytes);
-		data->bytes_xfered = xfer_bytes;
 	}
 
-	meson_mmc_read_resp(host->mmc, cmd);
-	if (!data || !data->stop || mrq->sbc)
-		meson_mmc_request_done(host->mmc, mrq);
+	next_cmd = meson_mmc_get_next_command(cmd);
+	if (next_cmd)
+		meson_mmc_start_cmd(host->mmc, next_cmd);
 	else
-		meson_mmc_start_cmd(host->mmc, data->stop);
+		meson_mmc_request_done(host->mmc, cmd->mrq);
 
 	return IRQ_HANDLED;
 }
 
+static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+	struct meson_host *host = mmc_priv(mmc);
+	struct meson_tuning_params tp_old = host->tp;
+	int ret = -EINVAL, i, cmd_error;
+
+	dev_info(mmc_dev(mmc), "(re)tuning...\n");
+
+	for (i = CLK_PHASE_0; i <= CLK_PHASE_270; i++) {
+		host->tp.rx_phase = i;
+		/* exclude the active parameter set if retuning */
+		if (!memcmp(&tp_old, &host->tp, sizeof(tp_old)) &&
+		    mmc->doing_retune)
+			continue;
+		meson_mmc_set_tuning_params(mmc);
+		ret = mmc_send_tuning(mmc, opcode, &cmd_error);
+		if (!ret)
+			break;
+	}
+
+	return ret;
+}
+
 /*
  * NOTE: we only need this until the GPIO/pinctrl driver can handle
  * interrupts.  For now, the MMC core will use this for polling.
@@ -711,10 +867,25 @@ static int meson_mmc_get_cd(struct mmc_host *mmc)
 	return status;
 }
 
+static void meson_mmc_cfg_init(struct meson_host *host)
+{
+	u32 cfg = 0;
+
+	cfg |= FIELD_PREP(CFG_RESP_TIMEOUT_MASK,
+			  ilog2(SD_EMMC_CFG_RESP_TIMEOUT));
+	cfg |= FIELD_PREP(CFG_RC_CC_MASK, ilog2(SD_EMMC_CFG_CMD_GAP));
+	cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, ilog2(SD_EMMC_CFG_BLK_SIZE));
+
+	writel(cfg, host->regs + SD_EMMC_CFG);
+}
+
 static const struct mmc_host_ops meson_mmc_ops = {
 	.request	= meson_mmc_request,
 	.set_ios	= meson_mmc_set_ios,
 	.get_cd         = meson_mmc_get_cd,
+	.pre_req	= meson_mmc_pre_req,
+	.post_req	= meson_mmc_post_req,
+	.execute_tuning = meson_mmc_execute_tuning,
 };
 
 static int meson_mmc_probe(struct platform_device *pdev)
@@ -722,7 +893,7 @@ static int meson_mmc_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct meson_host *host;
 	struct mmc_host *mmc;
-	int ret;
+	int ret, irq;
 
 	mmc = mmc_alloc_host(sizeof(struct meson_host), &pdev->dev);
 	if (!mmc)
@@ -754,8 +925,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
 		goto free_host;
 	}
 
-	host->irq = platform_get_irq(pdev, 0);
-	if (host->irq == 0) {
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
 		dev_err(&pdev->dev, "failed to get interrupt resource.\n");
 		ret = -EINVAL;
 		goto free_host;
@@ -771,9 +942,13 @@ static int meson_mmc_probe(struct platform_device *pdev)
 	if (ret)
 		goto free_host;
 
+	host->tp.core_phase = CLK_PHASE_180;
+	host->tp.tx_phase = CLK_PHASE_0;
+	host->tp.rx_phase = CLK_PHASE_0;
+
 	ret = meson_mmc_clk_init(host);
 	if (ret)
-		goto free_host;
+		goto err_core_clk;
 
 	/* Stop execution */
 	writel(0, host->regs + SD_EMMC_START);
@@ -783,14 +958,20 @@ static int meson_mmc_probe(struct platform_device *pdev)
 	writel(IRQ_EN_MASK, host->regs + SD_EMMC_STATUS);
 	writel(IRQ_EN_MASK, host->regs + SD_EMMC_IRQ_EN);
 
-	ret = devm_request_threaded_irq(&pdev->dev, host->irq,
-					meson_mmc_irq, meson_mmc_irq_thread,
-					IRQF_SHARED, DRIVER_NAME, host);
+	/* set config to sane default */
+	meson_mmc_cfg_init(host);
+
+	ret = devm_request_threaded_irq(&pdev->dev, irq, meson_mmc_irq,
+					meson_mmc_irq_thread, IRQF_SHARED,
+					NULL, host);
 	if (ret)
-		goto free_host;
+		goto err_div_clk;
 
+	mmc->caps |= MMC_CAP_CMD23;
 	mmc->max_blk_count = CMD_CFG_LENGTH_MASK;
 	mmc->max_req_size = mmc->max_blk_count * mmc->max_blk_size;
+	mmc->max_segs = SD_EMMC_DESC_BUF_LEN / sizeof(struct sd_emmc_desc);
+	mmc->max_seg_size = mmc->max_req_size;
 
 	/* data bounce buffer */
 	host->bounce_buf_size = mmc->max_req_size;
@@ -800,7 +981,15 @@ static int meson_mmc_probe(struct platform_device *pdev)
 	if (host->bounce_buf == NULL) {
 		dev_err(host->dev, "Unable to map allocate DMA bounce buffer.\n");
 		ret = -ENOMEM;
-		goto free_host;
+		goto err_div_clk;
+	}
+
+	host->descs = dma_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
+		      &host->descs_dma_addr, GFP_KERNEL);
+	if (!host->descs) {
+		dev_err(host->dev, "Allocating descriptor DMA buffer failed\n");
+		ret = -ENOMEM;
+		goto err_bounce_buf;
 	}
 
 	mmc->ops = &meson_mmc_ops;
@@ -808,9 +997,14 @@ static int meson_mmc_probe(struct platform_device *pdev)
 
 	return 0;
 
-free_host:
+err_bounce_buf:
+	dma_free_coherent(host->dev, host->bounce_buf_size,
+			  host->bounce_buf, host->bounce_dma_addr);
+err_div_clk:
 	clk_disable_unprepare(host->cfg_div_clk);
+err_core_clk:
 	clk_disable_unprepare(host->core_clk);
+free_host:
 	mmc_free_host(mmc);
 	return ret;
 }
@@ -819,9 +1013,13 @@ static int meson_mmc_remove(struct platform_device *pdev)
 {
 	struct meson_host *host = dev_get_drvdata(&pdev->dev);
 
+	mmc_remove_host(host->mmc);
+
 	/* disable interrupts */
 	writel(0, host->regs + SD_EMMC_IRQ_EN);
 
+	dma_free_coherent(host->dev, SD_EMMC_DESC_BUF_LEN,
+			  host->descs, host->descs_dma_addr);
 	dma_free_coherent(host->dev, host->bounce_buf_size,
 			  host->bounce_buf, host->bounce_dma_addr);
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index e77d79c8cd9f..476e53d30128 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -888,10 +888,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
 	u32			clock_rate;
 	unsigned long		timeout;
 
-	if (data->flags & MMC_DATA_READ)
-		direction = DMA_FROM_DEVICE;
-	else
-		direction = DMA_TO_DEVICE;
+	direction = mmc_get_dma_dir(data);
 	mmc_spi_setup_data_message(host, multiple, direction);
 	t = &host->t;
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 0c6420bb2f00..d1ca2f489054 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -516,17 +516,14 @@ static void mmci_dma_data_error(struct mmci_host *host)
 static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
 {
 	struct dma_chan *chan;
-	enum dma_data_direction dir;
 
-	if (data->flags & MMC_DATA_READ) {
-		dir = DMA_FROM_DEVICE;
+	if (data->flags & MMC_DATA_READ)
 		chan = host->dma_rx_channel;
-	} else {
-		dir = DMA_TO_DEVICE;
+	else
 		chan = host->dma_tx_channel;
-	}
 
-	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
+	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len,
+		     mmc_get_dma_dir(data));
 }
 
 static void mmci_dma_finalize(struct mmci_host *host, struct mmc_data *data)
@@ -589,17 +586,14 @@ static int __mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *desc;
-	enum dma_data_direction buffer_dirn;
 	int nr_sg;
 	unsigned long flags = DMA_CTRL_ACK;
 
 	if (data->flags & MMC_DATA_READ) {
 		conf.direction = DMA_DEV_TO_MEM;
-		buffer_dirn = DMA_FROM_DEVICE;
 		chan = host->dma_rx_channel;
 	} else {
 		conf.direction = DMA_MEM_TO_DEV;
-		buffer_dirn = DMA_TO_DEVICE;
 		chan = host->dma_tx_channel;
 	}
 
@@ -612,7 +606,8 @@ static int __mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 		return -EINVAL;
 
 	device = chan->device;
-	nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len, buffer_dirn);
+	nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len,
+			   mmc_get_dma_dir(data));
 	if (nr_sg == 0)
 		return -EINVAL;
 
@@ -631,7 +626,8 @@ static int __mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
 	return 0;
 
  unmap_exit:
-	dma_unmap_sg(device->dev, data->sg, data->sg_len, buffer_dirn);
+	dma_unmap_sg(device->dev, data->sg, data->sg_len,
+		     mmc_get_dma_dir(data));
 	return -ENOMEM;
 }
 
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index bbad309679cf..d4dc55ac7dea 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -256,7 +256,7 @@ static void moxart_dma_complete(void *param)
 
 static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
 {
-	u32 len, dir_data, dir_slave;
+	u32 len, dir_slave;
 	long dma_time;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *dma_chan;
@@ -266,16 +266,14 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
 
 	if (data->flags & MMC_DATA_WRITE) {
 		dma_chan = host->dma_chan_tx;
-		dir_data = DMA_TO_DEVICE;
 		dir_slave = DMA_MEM_TO_DEV;
 	} else {
 		dma_chan = host->dma_chan_rx;
-		dir_data = DMA_FROM_DEVICE;
 		dir_slave = DMA_DEV_TO_MEM;
 	}
 
 	len = dma_map_sg(dma_chan->device->dev, data->sg,
-			 data->sg_len, dir_data);
+			 data->sg_len, mmc_get_dma_dir(data));
 
 	if (len > 0) {
 		desc = dmaengine_prep_slave_sg(dma_chan, data->sg,
@@ -301,7 +299,7 @@ static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
 
 	dma_unmap_sg(dma_chan->device->dev,
 		     data->sg, data->sg_len,
-		     dir_data);
+		     mmc_get_dma_dir(data));
 }
 
 
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index b235d8da0602..5c1e178fc5f9 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -76,6 +76,7 @@
 #define MSDC_PATCH_BIT1  0xb4
 #define MSDC_PAD_TUNE    0xec
 #define PAD_DS_TUNE      0x188
+#define PAD_CMD_TUNE     0x18c
 #define EMMC50_CFG0      0x208
 
 /*--------------------------------------------------------------------------*/
@@ -211,13 +212,18 @@
 #define MSDC_PATCH_BIT_SPCPUSH    (0x1 << 29)	/* RW */
 #define MSDC_PATCH_BIT_DECRCTMO   (0x1 << 30)	/* RW */
 
+#define MSDC_PAD_TUNE_DATWRDLY	  (0x1f <<  0)	/* RW */
 #define MSDC_PAD_TUNE_DATRRDLY	  (0x1f <<  8)	/* RW */
 #define MSDC_PAD_TUNE_CMDRDLY	  (0x1f << 16)  /* RW */
+#define MSDC_PAD_TUNE_CMDRRDLY	  (0x1f << 22)	/* RW */
+#define MSDC_PAD_TUNE_CLKTDLY	  (0x1f << 27)  /* RW */
 
 #define PAD_DS_TUNE_DLY1	  (0x1f << 2)   /* RW */
 #define PAD_DS_TUNE_DLY2	  (0x1f << 7)   /* RW */
 #define PAD_DS_TUNE_DLY3	  (0x1f << 12)  /* RW */
 
+#define PAD_CMD_TUNE_RX_DLY3	  (0x1f << 1)  /* RW */
+
 #define EMMC50_CFG_PADCMD_LATCHCK (0x1 << 0)   /* RW */
 #define EMMC50_CFG_CRCSTS_EDGE    (0x1 << 3)   /* RW */
 #define EMMC50_CFG_CFCSTS_SEL     (0x1 << 4)   /* RW */
@@ -285,12 +291,14 @@ struct msdc_save_para {
 	u32 patch_bit0;
 	u32 patch_bit1;
 	u32 pad_ds_tune;
+	u32 pad_cmd_tune;
 	u32 emmc50_cfg0;
 };
 
 struct msdc_tune_para {
 	u32 iocon;
 	u32 pad_tune;
+	u32 pad_cmd_tune;
 };
 
 struct msdc_delay_phase {
@@ -332,6 +340,10 @@ struct msdc_host {
 	unsigned char timing;
 	bool vqmmc_enabled;
 	u32 hs400_ds_delay;
+	u32 hs200_cmd_int_delay; /* cmd internal delay for HS200/SDR104 */
+	u32 hs400_cmd_int_delay; /* cmd internal delay for HS400 */
+	bool hs400_cmd_resp_sel_rising;
+				 /* cmd response sample selection for HS400 */
 	bool hs400_mode;	/* current eMMC will run at hs400 mode */
 	struct msdc_save_para save_para; /* used when gate HCLK */
 	struct msdc_tune_para def_tune_para; /* default tune setting */
@@ -462,11 +474,9 @@ static void msdc_prepare_data(struct msdc_host *host, struct mmc_request *mrq)
 	struct mmc_data *data = mrq->data;
 
 	if (!(data->host_cookie & MSDC_PREPARE_FLAG)) {
-		bool read = (data->flags & MMC_DATA_READ) != 0;
-
 		data->host_cookie |= MSDC_PREPARE_FLAG;
 		data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len,
-					   read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+					    mmc_get_dma_dir(data));
 	}
 }
 
@@ -478,10 +488,8 @@ static void msdc_unprepare_data(struct msdc_host *host, struct mmc_request *mrq)
 		return;
 
 	if (data->host_cookie & MSDC_PREPARE_FLAG) {
-		bool read = (data->flags & MMC_DATA_READ) != 0;
-
 		dma_unmap_sg(host->dev, data->sg, data->sg_len,
-			     read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+			     mmc_get_dma_dir(data));
 		data->host_cookie &= ~MSDC_PREPARE_FLAG;
 	}
 }
@@ -601,8 +609,14 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
 	} else {
 		writel(host->saved_tune_para.iocon, host->base + MSDC_IOCON);
 		writel(host->saved_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
+		writel(host->saved_tune_para.pad_cmd_tune,
+		       host->base + PAD_CMD_TUNE);
 	}
 
+	if (timing == MMC_TIMING_MMC_HS400)
+		sdr_set_field(host->base + PAD_CMD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs400_cmd_int_delay);
 	dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->sclk, timing);
 }
 
@@ -1303,7 +1317,7 @@ static struct msdc_delay_phase get_best_delay(struct msdc_host *host, u32 delay)
 			len_final = len;
 		}
 		start += len ? len : 1;
-		if (len >= 8 && start_final < 4)
+		if (len >= 12 && start_final < 4)
 			break;
 	}
 
@@ -1326,36 +1340,67 @@ static int msdc_tune_response(struct mmc_host *mmc, u32 opcode)
 	struct msdc_host *host = mmc_priv(mmc);
 	u32 rise_delay = 0, fall_delay = 0;
 	struct msdc_delay_phase final_rise_delay, final_fall_delay = { 0,};
+	struct msdc_delay_phase internal_delay_phase;
 	u8 final_delay, final_maxlen;
+	u32 internal_delay = 0;
 	int cmd_err;
-	int i;
+	int i, j;
+
+	if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+	    mmc->ios.timing == MMC_TIMING_UHS_SDR104)
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs200_cmd_int_delay);
 
 	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
 	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
 		sdr_set_field(host->base + MSDC_PAD_TUNE,
 			      MSDC_PAD_TUNE_CMDRDLY, i);
-		mmc_send_tuning(mmc, opcode, &cmd_err);
-		if (!cmd_err)
-			rise_delay |= (1 << i);
+		/*
+		 * Using the same parameters, it may sometimes pass the test,
+		 * but sometimes it may fail. To make sure the parameters are
+		 * more stable, we test each set of parameters 3 times.
+		 */
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				rise_delay |= (1 << i);
+			} else {
+				rise_delay &= ~(1 << i);
+				break;
+			}
+		}
 	}
 	final_rise_delay = get_best_delay(host, rise_delay);
 	/* if rising edge has enough margin, then do not scan falling edge */
-	if (final_rise_delay.maxlen >= 10 ||
-	    (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
+	if (final_rise_delay.maxlen >= 12 && final_rise_delay.start < 4)
 		goto skip_fall;
 
 	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
 	for (i = 0; i < PAD_DELAY_MAX; i++) {
 		sdr_set_field(host->base + MSDC_PAD_TUNE,
 			      MSDC_PAD_TUNE_CMDRDLY, i);
-		mmc_send_tuning(mmc, opcode, &cmd_err);
-		if (!cmd_err)
-			fall_delay |= (1 << i);
+		/*
+		 * Using the same parameters, it may sometimes pass the test,
+		 * but sometimes it may fail. To make sure the parameters are
+		 * more stable, we test each set of parameters 3 times.
+		 */
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				fall_delay |= (1 << i);
+			} else {
+				fall_delay &= ~(1 << i);
+				break;
+			}
+		}
 	}
 	final_fall_delay = get_best_delay(host, fall_delay);
 
 skip_fall:
 	final_maxlen = max(final_rise_delay.maxlen, final_fall_delay.maxlen);
+	if (final_fall_delay.maxlen >= 12 && final_fall_delay.start < 4)
+		final_maxlen = final_fall_delay.maxlen;
 	if (final_maxlen == final_rise_delay.maxlen) {
 		sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
 		sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
@@ -1367,7 +1412,71 @@ skip_fall:
 			      final_fall_delay.final_phase);
 		final_delay = final_fall_delay.final_phase;
 	}
+	if (host->hs200_cmd_int_delay)
+		goto skip_internal;
+
+	for (i = 0; i < PAD_DELAY_MAX; i++) {
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY, i);
+		mmc_send_tuning(mmc, opcode, &cmd_err);
+		if (!cmd_err)
+			internal_delay |= (1 << i);
+	}
+	dev_dbg(host->dev, "Final internal delay: 0x%x\n", internal_delay);
+	internal_delay_phase = get_best_delay(host, internal_delay);
+	sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRRDLY,
+		      internal_delay_phase.final_phase);
+skip_internal:
+	dev_dbg(host->dev, "Final cmd pad delay: %x\n", final_delay);
+	return final_delay == 0xff ? -EIO : 0;
+}
+
+static int hs400_tune_response(struct mmc_host *mmc, u32 opcode)
+{
+	struct msdc_host *host = mmc_priv(mmc);
+	u32 cmd_delay = 0;
+	struct msdc_delay_phase final_cmd_delay = { 0,};
+	u8 final_delay;
+	int cmd_err;
+	int i, j;
+
+	/* select EMMC50 PAD CMD tune */
+	sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0));
+
+	if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+	    mmc->ios.timing == MMC_TIMING_UHS_SDR104)
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs200_cmd_int_delay);
+
+	if (host->hs400_cmd_resp_sel_rising)
+		sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
+	else
+		sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
+	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
+		sdr_set_field(host->base + PAD_CMD_TUNE,
+			      PAD_CMD_TUNE_RX_DLY3, i);
+		/*
+		 * Using the same parameters, it may sometimes pass the test,
+		 * but sometimes it may fail. To make sure the parameters are
+		 * more stable, we test each set of parameters 3 times.
+		 */
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				cmd_delay |= (1 << i);
+			} else {
+				cmd_delay &= ~(1 << i);
+				break;
+			}
+		}
+	}
+	final_cmd_delay = get_best_delay(host, cmd_delay);
+	sdr_set_field(host->base + PAD_CMD_TUNE, PAD_CMD_TUNE_RX_DLY3,
+		      final_cmd_delay.final_phase);
+	final_delay = final_cmd_delay.final_phase;
 
+	dev_dbg(host->dev, "Final cmd pad delay: %x\n", final_delay);
 	return final_delay == 0xff ? -EIO : 0;
 }
 
@@ -1390,7 +1499,7 @@ static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
 	}
 	final_rise_delay = get_best_delay(host, rise_delay);
 	/* if rising edge has enough margin, then do not scan falling edge */
-	if (final_rise_delay.maxlen >= 10 ||
+	if (final_rise_delay.maxlen >= 12 ||
 	    (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
 		goto skip_fall;
 
@@ -1423,6 +1532,7 @@ skip_fall:
 		final_delay = final_fall_delay.final_phase;
 	}
 
+	dev_dbg(host->dev, "Final data pad delay: %x\n", final_delay);
 	return final_delay == 0xff ? -EIO : 0;
 }
 
@@ -1431,7 +1541,10 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	struct msdc_host *host = mmc_priv(mmc);
 	int ret;
 
-	ret = msdc_tune_response(mmc, opcode);
+	if (host->hs400_mode)
+		ret = hs400_tune_response(mmc, opcode);
+	else
+		ret = msdc_tune_response(mmc, opcode);
 	if (ret == -EIO) {
 		dev_err(host->dev, "Tune response fail!\n");
 		return ret;
@@ -1444,6 +1557,7 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
 	host->saved_tune_para.iocon = readl(host->base + MSDC_IOCON);
 	host->saved_tune_para.pad_tune = readl(host->base + MSDC_PAD_TUNE);
+	host->saved_tune_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
 	return ret;
 }
 
@@ -1478,6 +1592,25 @@ static struct mmc_host_ops mt_msdc_ops = {
 	.hw_reset = msdc_hw_reset,
 };
 
+static void msdc_of_property_parse(struct platform_device *pdev,
+				   struct msdc_host *host)
+{
+	of_property_read_u32(pdev->dev.of_node, "hs400-ds-delay",
+			     &host->hs400_ds_delay);
+
+	of_property_read_u32(pdev->dev.of_node, "mediatek,hs200-cmd-int-delay",
+			     &host->hs200_cmd_int_delay);
+
+	of_property_read_u32(pdev->dev.of_node, "mediatek,hs400-cmd-int-delay",
+			     &host->hs400_cmd_int_delay);
+
+	if (of_property_read_bool(pdev->dev.of_node,
+				  "mediatek,hs400-cmd-resp-sel-rising"))
+		host->hs400_cmd_resp_sel_rising = true;
+	else
+		host->hs400_cmd_resp_sel_rising = false;
+}
+
 static int msdc_drv_probe(struct platform_device *pdev)
 {
 	struct mmc_host *mmc;
@@ -1549,10 +1682,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
 		goto host_free;
 	}
 
-	if (!of_property_read_u32(pdev->dev.of_node, "hs400-ds-delay",
-				  &host->hs400_ds_delay))
-		dev_dbg(&pdev->dev, "hs400-ds-delay: %x\n",
-			host->hs400_ds_delay);
+	msdc_of_property_parse(pdev, host);
 
 	host->dev = &pdev->dev;
 	host->mmc = mmc;
@@ -1664,6 +1794,7 @@ static void msdc_save_reg(struct msdc_host *host)
 	host->save_para.patch_bit0 = readl(host->base + MSDC_PATCH_BIT);
 	host->save_para.patch_bit1 = readl(host->base + MSDC_PATCH_BIT1);
 	host->save_para.pad_ds_tune = readl(host->base + PAD_DS_TUNE);
+	host->save_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
 	host->save_para.emmc50_cfg0 = readl(host->base + EMMC50_CFG0);
 }
 
@@ -1676,6 +1807,7 @@ static void msdc_restore_reg(struct msdc_host *host)
 	writel(host->save_para.patch_bit0, host->base + MSDC_PATCH_BIT);
 	writel(host->save_para.patch_bit1, host->base + MSDC_PATCH_BIT1);
 	writel(host->save_para.pad_ds_tune, host->base + PAD_DS_TUNE);
+	writel(host->save_para.pad_cmd_tune, host->base + PAD_CMD_TUNE);
 	writel(host->save_para.emmc50_cfg0, host->base + EMMC50_CFG0);
 }
 
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 42296e55b9de..58d74b8d6c79 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -125,10 +125,10 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
 		return 1;
 	} else {
 		dma_addr_t phys_addr;
-		int dma_dir = (data->flags & MMC_DATA_READ) ?
-			DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		host->sg_frags = dma_map_sg(mmc_dev(host->mmc), data->sg,
-					    data->sg_len, dma_dir);
+
+		host->sg_frags = dma_map_sg(mmc_dev(host->mmc),
+					    data->sg, data->sg_len,
+					    mmc_get_dma_dir(data));
 		phys_addr = sg_dma_address(data->sg);
 		mvsd_write(MVSD_SYS_ADDR_LOW, (u32)phys_addr & 0xffff);
 		mvsd_write(MVSD_SYS_ADDR_HI,  (u32)phys_addr >> 16);
@@ -294,8 +294,7 @@ static u32 mvsd_finish_data(struct mvsd_host *host, struct mmc_data *data,
 		host->pio_size = 0;
 	} else {
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_frags,
-			     (data->flags & MMC_DATA_READ) ?
-				DMA_FROM_DEVICE : DMA_TO_DEVICE);
+			     mmc_get_dma_dir(data));
 	}
 
 	if (err_status & MVSD_ERR_DATA_TIMEOUT)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index a58bd653ed8b..8c39dccacf39 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -935,15 +935,6 @@ omap_hsmmc_start_command(struct omap_hsmmc_host *host, struct mmc_command *cmd,
 	OMAP_HSMMC_WRITE(host->base, CMD, cmdreg);
 }
 
-static int
-omap_hsmmc_get_dma_dir(struct omap_hsmmc_host *host, struct mmc_data *data)
-{
-	if (data->flags & MMC_DATA_WRITE)
-		return DMA_TO_DEVICE;
-	else
-		return DMA_FROM_DEVICE;
-}
-
 static struct dma_chan *omap_hsmmc_get_dma_chan(struct omap_hsmmc_host *host,
 	struct mmc_data *data)
 {
@@ -1055,7 +1046,7 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
 		dmaengine_terminate_all(chan);
 		dma_unmap_sg(chan->device->dev,
 			host->data->sg, host->data->sg_len,
-			omap_hsmmc_get_dma_dir(host, host->data));
+			mmc_get_dma_dir(host->data));
 
 		host->data->host_cookie = 0;
 	}
@@ -1350,7 +1341,7 @@ static void omap_hsmmc_dma_callback(void *param)
 	if (!data->host_cookie)
 		dma_unmap_sg(chan->device->dev,
 			     data->sg, data->sg_len,
-			     omap_hsmmc_get_dma_dir(host, data));
+			     mmc_get_dma_dir(data));
 
 	req_in_progress = host->req_in_progress;
 	host->dma_ch = -1;
@@ -1383,7 +1374,7 @@ static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host,
 	/* Check if next job is already prepared */
 	if (next || data->host_cookie != host->next_data.cookie) {
 		dma_len = dma_map_sg(chan->device->dev, data->sg, data->sg_len,
-				     omap_hsmmc_get_dma_dir(host, data));
+				     mmc_get_dma_dir(data));
 
 	} else {
 		dma_len = host->next_data.dma_len;
@@ -1569,7 +1560,7 @@ static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 		struct dma_chan *c = omap_hsmmc_get_dma_chan(host, data);
 
 		dma_unmap_sg(c->device->dev, data->sg, data->sg_len,
-			     omap_hsmmc_get_dma_dir(host, data));
+			     mmc_get_dma_dir(data));
 		data->host_cookie = 0;
 	}
 }
@@ -1770,8 +1761,8 @@ static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
 	 */
 	if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
 		struct pinctrl *p = devm_pinctrl_get(host->dev);
-		if (!p) {
-			ret = -ENODEV;
+		if (IS_ERR(p)) {
+			ret = PTR_ERR(p);
 			goto err_free_irq;
 		}
 		if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) {
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 7a173f8c455b..8896bf533dc7 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -24,6 +24,10 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/mmc/slot-gpio.h>
 
 #include <plat/gpio-cfg.h>
 #include <mach/dma.h>
@@ -807,21 +811,6 @@ irq_out:
 
 }
 
-/*
- * ISR for the CardDetect Pin
-*/
-
-static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
-{
-	struct s3cmci_host *host = (struct s3cmci_host *)dev_id;
-
-	dbg(host, dbg_irq, "card detect\n");
-
-	mmc_detect_change(host->mmc, msecs_to_jiffies(500));
-
-	return IRQ_HANDLED;
-}
-
 static void s3cmci_dma_done_callback(void *arg)
 {
 	struct s3cmci_host *host = arg;
@@ -1104,7 +1093,7 @@ static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
 		conf.direction = DMA_MEM_TO_DEV;
 
 	dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		   mmc_get_dma_dir(data));
 
 	dmaengine_slave_config(host->dma, &conf);
 	desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len,
@@ -1121,7 +1110,7 @@ static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
 
 unmap_exit:
 	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		     mmc_get_dma_dir(data));
 	return -ENOMEM;
 }
 
@@ -1177,19 +1166,6 @@ static void s3cmci_send_request(struct mmc_host *mmc)
 	s3cmci_enable_irq(host, true);
 }
 
-static int s3cmci_card_present(struct mmc_host *mmc)
-{
-	struct s3cmci_host *host = mmc_priv(mmc);
-	struct s3c24xx_mci_pdata *pdata = host->pdata;
-	int ret;
-
-	if (pdata->no_detect)
-		return -ENOSYS;
-
-	ret = gpio_get_value(pdata->gpio_detect) ? 0 : 1;
-	return ret ^ pdata->detect_invert;
-}
-
 static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 {
 	struct s3cmci_host *host = mmc_priv(mmc);
@@ -1198,7 +1174,7 @@ static void s3cmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	host->cmd_is_stop = 0;
 	host->mrq = mrq;
 
-	if (s3cmci_card_present(mmc) == 0) {
+	if (mmc_gpio_get_cd(mmc) == 0) {
 		dbg(host, dbg_err, "%s: no medium present\n", __func__);
 		host->mrq->cmd->error = -ENOMEDIUM;
 		mmc_request_done(mmc, mrq);
@@ -1242,8 +1218,9 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	case MMC_POWER_ON:
 	case MMC_POWER_UP:
 		/* Configure GPE5...GPE10 pins in SD mode */
-		s3c_gpio_cfgall_range(S3C2410_GPE(5), 6, S3C_GPIO_SFN(2),
-				      S3C_GPIO_PULL_NONE);
+		if (!host->pdev->dev.of_node)
+			s3c_gpio_cfgall_range(S3C2410_GPE(5), 6, S3C_GPIO_SFN(2),
+					      S3C_GPIO_PULL_NONE);
 
 		if (host->pdata->set_power)
 			host->pdata->set_power(ios->power_mode, ios->vdd);
@@ -1255,7 +1232,8 @@ static void s3cmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	case MMC_POWER_OFF:
 	default:
-		gpio_direction_output(S3C2410_GPE(5), 0);
+		if (!host->pdev->dev.of_node)
+			gpio_direction_output(S3C2410_GPE(5), 0);
 
 		if (host->is2440)
 			mci_con |= S3C2440_SDICON_SDRESET;
@@ -1295,21 +1273,6 @@ static void s3cmci_reset(struct s3cmci_host *host)
 	writel(con, host->base + S3C2410_SDICON);
 }
 
-static int s3cmci_get_ro(struct mmc_host *mmc)
-{
-	struct s3cmci_host *host = mmc_priv(mmc);
-	struct s3c24xx_mci_pdata *pdata = host->pdata;
-	int ret;
-
-	if (pdata->no_wprotect)
-		return 0;
-
-	ret = gpio_get_value(pdata->gpio_wprotect) ? 1 : 0;
-	ret ^= pdata->wprotect_invert;
-
-	return ret;
-}
-
 static void s3cmci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct s3cmci_host *host = mmc_priv(mmc);
@@ -1353,8 +1316,8 @@ static void s3cmci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 static struct mmc_host_ops s3cmci_ops = {
 	.request	= s3cmci_request,
 	.set_ios	= s3cmci_set_ios,
-	.get_ro		= s3cmci_get_ro,
-	.get_cd		= s3cmci_card_present,
+	.get_ro		= mmc_gpio_get_ro,
+	.get_cd		= mmc_gpio_get_cd,
 	.enable_sdio_irq = s3cmci_enable_sdio_irq,
 };
 
@@ -1545,21 +1508,14 @@ static inline void s3cmci_debugfs_remove(struct s3cmci_host *host) { }
 
 #endif /* CONFIG_DEBUG_FS */
 
-static int s3cmci_probe(struct platform_device *pdev)
+static int s3cmci_probe_pdata(struct s3cmci_host *host)
 {
-	struct s3cmci_host *host;
-	struct mmc_host	*mmc;
-	int ret;
-	int is2440;
-	int i;
+	struct platform_device *pdev = host->pdev;
+	struct mmc_host *mmc = host->mmc;
+	struct s3c24xx_mci_pdata *pdata;
+	int i, ret;
 
-	is2440 = platform_get_device_id(pdev)->driver_data;
-
-	mmc = mmc_alloc_host(sizeof(struct s3cmci_host), &pdev->dev);
-	if (!mmc) {
-		ret = -ENOMEM;
-		goto probe_out;
-	}
+	host->is2440 = platform_get_device_id(pdev)->driver_data;
 
 	for (i = S3C2410_GPE(5); i <= S3C2410_GPE(10); i++) {
 		ret = gpio_request(i, dev_name(&pdev->dev));
@@ -1569,25 +1525,101 @@ static int s3cmci_probe(struct platform_device *pdev)
 			for (i--; i >= S3C2410_GPE(5); i--)
 				gpio_free(i);
 
-			goto probe_free_host;
+			return ret;
+		}
+	}
+
+	if (!pdev->dev.platform_data)
+		pdev->dev.platform_data = &s3cmci_def_pdata;
+
+	pdata = pdev->dev.platform_data;
+
+	if (pdata->no_wprotect)
+		mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT;
+
+	if (pdata->no_detect)
+		mmc->caps |= MMC_CAP_NEEDS_POLL;
+
+	if (pdata->wprotect_invert)
+		mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+
+	if (pdata->detect_invert)
+		 mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
+
+	if (gpio_is_valid(pdata->gpio_detect)) {
+		ret = mmc_gpio_request_cd(mmc, pdata->gpio_detect, 0);
+		if (ret) {
+			dev_err(&pdev->dev, "error requesting GPIO for CD %d\n",
+				ret);
+			return ret;
 		}
 	}
 
+	if (gpio_is_valid(pdata->gpio_wprotect)) {
+		ret = mmc_gpio_request_ro(mmc, pdata->gpio_wprotect);
+		if (ret) {
+			dev_err(&pdev->dev, "error requesting GPIO for WP %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int s3cmci_probe_dt(struct s3cmci_host *host)
+{
+	struct platform_device *pdev = host->pdev;
+	struct s3c24xx_mci_pdata *pdata;
+	struct mmc_host *mmc = host->mmc;
+	int ret;
+
+	host->is2440 = (int) of_device_get_match_data(&pdev->dev);
+
+	ret = mmc_of_parse(mmc);
+	if (ret)
+		return ret;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdev->dev.platform_data = pdata;
+
+	return 0;
+}
+
+static int s3cmci_probe(struct platform_device *pdev)
+{
+	struct s3cmci_host *host;
+	struct mmc_host	*mmc;
+	int ret;
+	int i;
+
+	mmc = mmc_alloc_host(sizeof(struct s3cmci_host), &pdev->dev);
+	if (!mmc) {
+		ret = -ENOMEM;
+		goto probe_out;
+	}
+
 	host = mmc_priv(mmc);
 	host->mmc 	= mmc;
 	host->pdev	= pdev;
-	host->is2440	= is2440;
+
+	if (pdev->dev.of_node)
+		ret = s3cmci_probe_dt(host);
+	else
+		ret = s3cmci_probe_pdata(host);
+
+	if (ret)
+		goto probe_free_host;
 
 	host->pdata = pdev->dev.platform_data;
-	if (!host->pdata) {
-		pdev->dev.platform_data = &s3cmci_def_pdata;
-		host->pdata = &s3cmci_def_pdata;
-	}
 
 	spin_lock_init(&host->complete_lock);
 	tasklet_init(&host->pio_tasklet, pio_tasklet, (unsigned long) host);
 
-	if (is2440) {
+	if (host->is2440) {
 		host->sdiimsk	= S3C2440_SDIIMSK;
 		host->sdidata	= S3C2440_SDIDATA;
 		host->clk_div	= 1;
@@ -1645,43 +1677,6 @@ static int s3cmci_probe(struct platform_device *pdev)
 	disable_irq(host->irq);
 	host->irq_state = false;
 
-	if (!host->pdata->no_detect) {
-		ret = gpio_request(host->pdata->gpio_detect, "s3cmci detect");
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get detect gpio\n");
-			goto probe_free_irq;
-		}
-
-		host->irq_cd = gpio_to_irq(host->pdata->gpio_detect);
-
-		if (host->irq_cd >= 0) {
-			if (request_irq(host->irq_cd, s3cmci_irq_cd,
-					IRQF_TRIGGER_RISING |
-					IRQF_TRIGGER_FALLING,
-					DRIVER_NAME, host)) {
-				dev_err(&pdev->dev,
-					"can't get card detect irq.\n");
-				ret = -ENOENT;
-				goto probe_free_gpio_cd;
-			}
-		} else {
-			dev_warn(&pdev->dev,
-				 "host detect has no irq available\n");
-			gpio_direction_input(host->pdata->gpio_detect);
-		}
-	} else
-		host->irq_cd = -1;
-
-	if (!host->pdata->no_wprotect) {
-		ret = gpio_request(host->pdata->gpio_wprotect, "s3cmci wp");
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get writeprotect\n");
-			goto probe_free_irq_cd;
-		}
-
-		gpio_direction_input(host->pdata->gpio_wprotect);
-	}
-
 	/* Depending on the dma state, get a DMA channel to use. */
 
 	if (s3cmci_host_usedma(host)) {
@@ -1689,7 +1684,7 @@ static int s3cmci_probe(struct platform_device *pdev)
 		ret = PTR_ERR_OR_ZERO(host->dma);
 		if (ret) {
 			dev_err(&pdev->dev, "cannot get DMA channel.\n");
-			goto probe_free_gpio_wp;
+			goto probe_free_irq;
 		}
 	}
 
@@ -1768,18 +1763,6 @@ static int s3cmci_probe(struct platform_device *pdev)
 	if (s3cmci_host_usedma(host))
 		dma_release_channel(host->dma);
 
- probe_free_gpio_wp:
-	if (!host->pdata->no_wprotect)
-		gpio_free(host->pdata->gpio_wprotect);
-
- probe_free_gpio_cd:
-	if (!host->pdata->no_detect)
-		gpio_free(host->pdata->gpio_detect);
-
- probe_free_irq_cd:
-	if (host->irq_cd >= 0)
-		free_irq(host->irq_cd, host);
-
  probe_free_irq:
 	free_irq(host->irq, host);
 
@@ -1790,8 +1773,9 @@ static int s3cmci_probe(struct platform_device *pdev)
 	release_mem_region(host->mem->start, resource_size(host->mem));
 
  probe_free_gpio:
-	for (i = S3C2410_GPE(5); i <= S3C2410_GPE(10); i++)
-		gpio_free(i);
+	if (!pdev->dev.of_node)
+		for (i = S3C2410_GPE(5); i <= S3C2410_GPE(10); i++)
+			gpio_free(i);
 
  probe_free_host:
 	mmc_free_host(mmc);
@@ -1818,7 +1802,6 @@ static int s3cmci_remove(struct platform_device *pdev)
 {
 	struct mmc_host		*mmc  = platform_get_drvdata(pdev);
 	struct s3cmci_host	*host = mmc_priv(mmc);
-	struct s3c24xx_mci_pdata *pd = host->pdata;
 	int i;
 
 	s3cmci_shutdown(pdev);
@@ -1832,15 +1815,9 @@ static int s3cmci_remove(struct platform_device *pdev)
 
 	free_irq(host->irq, host);
 
-	if (!pd->no_wprotect)
-		gpio_free(pd->gpio_wprotect);
-
-	if (!pd->no_detect)
-		gpio_free(pd->gpio_detect);
-
-	for (i = S3C2410_GPE(5); i <= S3C2410_GPE(10); i++)
-		gpio_free(i);
-
+	if (!pdev->dev.of_node)
+		for (i = S3C2410_GPE(5); i <= S3C2410_GPE(10); i++)
+			gpio_free(i);
 
 	iounmap(host->base);
 	release_mem_region(host->mem->start, resource_size(host->mem));
@@ -1849,6 +1826,23 @@ static int s3cmci_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id s3cmci_dt_match[] = {
+	{
+		.compatible = "samsung,s3c2410-sdi",
+		.data = (void *)0,
+	},
+	{
+		.compatible = "samsung,s3c2412-sdi",
+		.data = (void *)1,
+	},
+	{
+		.compatible = "samsung,s3c2440-sdi",
+		.data = (void *)1,
+	},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, s3cmci_dt_match);
+
 static const struct platform_device_id s3cmci_driver_ids[] = {
 	{
 		.name	= "s3c2410-sdi",
@@ -1868,6 +1862,7 @@ MODULE_DEVICE_TABLE(platform, s3cmci_driver_ids);
 static struct platform_driver s3cmci_driver = {
 	.driver	= {
 		.name	= "s3c-sdi",
+		.of_match_table = s3cmci_dt_match,
 	},
 	.id_table	= s3cmci_driver_ids,
 	.probe		= s3cmci_probe,
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 9dcb7048e3b1..c6a9a1bfaa22 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -263,10 +263,8 @@ static int sdhci_acpi_sd_probe_slot(struct platform_device *pdev,
 
 	/* Platform specific code during sd probe slot goes here */
 
-	if (hid && !strcmp(hid, "80865ACA")) {
+	if (hid && !strcmp(hid, "80865ACA"))
 		host->mmc_host_ops.get_cd = bxt_get_cd;
-		host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
-	}
 
 	return 0;
 }
@@ -302,7 +300,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = {
 	.quirks  = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.quirks2 = SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON |
 		   SDHCI_QUIRK2_STOP_WITH_TC,
-	.caps    = MMC_CAP_WAIT_WHILE_BUSY,
+	.caps    = MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_AGGRESSIVE_PM,
 	.probe_slot	= sdhci_acpi_sd_probe_slot,
 };
 
@@ -524,8 +522,12 @@ static int sdhci_acpi_remove(struct platform_device *pdev)
 static int sdhci_acpi_suspend(struct device *dev)
 {
 	struct sdhci_acpi_host *c = dev_get_drvdata(dev);
+	struct sdhci_host *host = c->host;
+
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
 
-	return sdhci_suspend_host(c->host);
+	return sdhci_suspend_host(host);
 }
 
 static int sdhci_acpi_resume(struct device *dev)
@@ -544,8 +546,12 @@ static int sdhci_acpi_resume(struct device *dev)
 static int sdhci_acpi_runtime_suspend(struct device *dev)
 {
 	struct sdhci_acpi_host *c = dev_get_drvdata(dev);
+	struct sdhci_host *host = c->host;
+
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
 
-	return sdhci_runtime_suspend_host(c->host);
+	return sdhci_runtime_suspend_host(host);
 }
 
 static int sdhci_acpi_runtime_resume(struct device *dev)
diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c
index 159f6f64c68e..242c5dc7a81e 100644
--- a/drivers/mmc/host/sdhci-brcmstb.c
+++ b/drivers/mmc/host/sdhci-brcmstb.c
@@ -29,6 +29,9 @@ static int sdhci_brcmstb_suspend(struct device *dev)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	int res;
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	res = sdhci_suspend_host(host);
 	if (res)
 		return res;
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 316cfec3f005..19d5698244b5 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
+#include <linux/of.h>
 
 #include "sdhci-pltfm.h"
 
@@ -40,6 +41,7 @@
 #define   SDHCI_CDNS_HRS06_MODE_MMC_DDR		0x3
 #define   SDHCI_CDNS_HRS06_MODE_MMC_HS200	0x4
 #define   SDHCI_CDNS_HRS06_MODE_MMC_HS400	0x5
+#define   SDHCI_CDNS_HRS06_MODE_MMC_HS400ES	0x6
 
 /* SRS - Slot Register Set (SDHCI-compatible) */
 #define SDHCI_CDNS_SRS_BASE		0x200
@@ -54,6 +56,9 @@
 #define SDHCI_CDNS_PHY_DLY_EMMC_LEGACY	0x06
 #define SDHCI_CDNS_PHY_DLY_EMMC_SDR	0x07
 #define SDHCI_CDNS_PHY_DLY_EMMC_DDR	0x08
+#define SDHCI_CDNS_PHY_DLY_SDCLK	0x0b
+#define SDHCI_CDNS_PHY_DLY_HSMMC	0x0c
+#define SDHCI_CDNS_PHY_DLY_STROBE	0x0d
 
 /*
  * The tuned val register is 6 bit-wide, but not the whole of the range is
@@ -64,13 +69,34 @@
 
 struct sdhci_cdns_priv {
 	void __iomem *hrs_addr;
+	bool enhanced_strobe;
 };
 
-static void sdhci_cdns_write_phy_reg(struct sdhci_cdns_priv *priv,
-				     u8 addr, u8 data)
+struct sdhci_cdns_phy_cfg {
+	const char *property;
+	u8 addr;
+};
+
+static const struct sdhci_cdns_phy_cfg sdhci_cdns_phy_cfgs[] = {
+	{ "cdns,phy-input-delay-sd-highspeed", SDHCI_CDNS_PHY_DLY_SD_HS, },
+	{ "cdns,phy-input-delay-legacy", SDHCI_CDNS_PHY_DLY_SD_DEFAULT, },
+	{ "cdns,phy-input-delay-sd-uhs-sdr12", SDHCI_CDNS_PHY_DLY_UHS_SDR12, },
+	{ "cdns,phy-input-delay-sd-uhs-sdr25", SDHCI_CDNS_PHY_DLY_UHS_SDR25, },
+	{ "cdns,phy-input-delay-sd-uhs-sdr50", SDHCI_CDNS_PHY_DLY_UHS_SDR50, },
+	{ "cdns,phy-input-delay-sd-uhs-ddr50", SDHCI_CDNS_PHY_DLY_UHS_DDR50, },
+	{ "cdns,phy-input-delay-mmc-highspeed", SDHCI_CDNS_PHY_DLY_EMMC_SDR, },
+	{ "cdns,phy-input-delay-mmc-ddr", SDHCI_CDNS_PHY_DLY_EMMC_DDR, },
+	{ "cdns,phy-dll-delay-sdclk", SDHCI_CDNS_PHY_DLY_SDCLK, },
+	{ "cdns,phy-dll-delay-sdclk-hsmmc", SDHCI_CDNS_PHY_DLY_HSMMC, },
+	{ "cdns,phy-dll-delay-strobe", SDHCI_CDNS_PHY_DLY_STROBE, },
+};
+
+static int sdhci_cdns_write_phy_reg(struct sdhci_cdns_priv *priv,
+				    u8 addr, u8 data)
 {
 	void __iomem *reg = priv->hrs_addr + SDHCI_CDNS_HRS04;
 	u32 tmp;
+	int ret;
 
 	tmp = (data << SDHCI_CDNS_HRS04_WDATA_SHIFT) |
 	      (addr << SDHCI_CDNS_HRS04_ADDR_SHIFT);
@@ -79,17 +105,36 @@ static void sdhci_cdns_write_phy_reg(struct sdhci_cdns_priv *priv,
 	tmp |= SDHCI_CDNS_HRS04_WR;
 	writel(tmp, reg);
 
+	ret = readl_poll_timeout(reg, tmp, tmp & SDHCI_CDNS_HRS04_ACK, 0, 10);
+	if (ret)
+		return ret;
+
 	tmp &= ~SDHCI_CDNS_HRS04_WR;
 	writel(tmp, reg);
+
+	return 0;
 }
 
-static void sdhci_cdns_phy_init(struct sdhci_cdns_priv *priv)
+static int sdhci_cdns_phy_init(struct device_node *np,
+			       struct sdhci_cdns_priv *priv)
 {
-	sdhci_cdns_write_phy_reg(priv, SDHCI_CDNS_PHY_DLY_SD_HS, 4);
-	sdhci_cdns_write_phy_reg(priv, SDHCI_CDNS_PHY_DLY_SD_DEFAULT, 4);
-	sdhci_cdns_write_phy_reg(priv, SDHCI_CDNS_PHY_DLY_EMMC_LEGACY, 9);
-	sdhci_cdns_write_phy_reg(priv, SDHCI_CDNS_PHY_DLY_EMMC_SDR, 2);
-	sdhci_cdns_write_phy_reg(priv, SDHCI_CDNS_PHY_DLY_EMMC_DDR, 3);
+	u32 val;
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(sdhci_cdns_phy_cfgs); i++) {
+		ret = of_property_read_u32(np, sdhci_cdns_phy_cfgs[i].property,
+					   &val);
+		if (ret)
+			continue;
+
+		ret = sdhci_cdns_write_phy_reg(priv,
+					       sdhci_cdns_phy_cfgs[i].addr,
+					       val);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static inline void *sdhci_cdns_priv(struct sdhci_host *host)
@@ -103,16 +148,35 @@ static unsigned int sdhci_cdns_get_timeout_clock(struct sdhci_host *host)
 {
 	/*
 	 * Cadence's spec says the Timeout Clock Frequency is the same as the
-	 * Base Clock Frequency.  Divide it by 1000 to return a value in kHz.
+	 * Base Clock Frequency.
 	 */
-	return host->max_clk / 1000;
+	return host->max_clk;
+}
+
+static void sdhci_cdns_set_emmc_mode(struct sdhci_cdns_priv *priv, u32 mode)
+{
+	u32 tmp;
+
+	/* The speed mode for eMMC is selected by HRS06 register */
+	tmp = readl(priv->hrs_addr + SDHCI_CDNS_HRS06);
+	tmp &= ~SDHCI_CDNS_HRS06_MODE_MASK;
+	tmp |= mode;
+	writel(tmp, priv->hrs_addr + SDHCI_CDNS_HRS06);
+}
+
+static u32 sdhci_cdns_get_emmc_mode(struct sdhci_cdns_priv *priv)
+{
+	u32 tmp;
+
+	tmp = readl(priv->hrs_addr + SDHCI_CDNS_HRS06);
+	return tmp & SDHCI_CDNS_HRS06_MODE_MASK;
 }
 
 static void sdhci_cdns_set_uhs_signaling(struct sdhci_host *host,
 					 unsigned int timing)
 {
 	struct sdhci_cdns_priv *priv = sdhci_cdns_priv(host);
-	u32 mode, tmp;
+	u32 mode;
 
 	switch (timing) {
 	case MMC_TIMING_MMC_HS:
@@ -125,18 +189,17 @@ static void sdhci_cdns_set_uhs_signaling(struct sdhci_host *host,
 		mode = SDHCI_CDNS_HRS06_MODE_MMC_HS200;
 		break;
 	case MMC_TIMING_MMC_HS400:
-		mode = SDHCI_CDNS_HRS06_MODE_MMC_HS400;
+		if (priv->enhanced_strobe)
+			mode = SDHCI_CDNS_HRS06_MODE_MMC_HS400ES;
+		else
+			mode = SDHCI_CDNS_HRS06_MODE_MMC_HS400;
 		break;
 	default:
 		mode = SDHCI_CDNS_HRS06_MODE_SD;
 		break;
 	}
 
-	/* The speed mode for eMMC is selected by HRS06 register */
-	tmp = readl(priv->hrs_addr + SDHCI_CDNS_HRS06);
-	tmp &= ~SDHCI_CDNS_HRS06_MODE_MASK;
-	tmp |= mode;
-	writel(tmp, priv->hrs_addr + SDHCI_CDNS_HRS06);
+	sdhci_cdns_set_emmc_mode(priv, mode);
 
 	/* For SD, fall back to the default handler */
 	if (mode == SDHCI_CDNS_HRS06_MODE_SD)
@@ -213,6 +276,26 @@ static int sdhci_cdns_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	return sdhci_cdns_set_tune_val(host, end_of_streak - max_streak / 2);
 }
 
+static void sdhci_cdns_hs400_enhanced_strobe(struct mmc_host *mmc,
+					     struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_cdns_priv *priv = sdhci_cdns_priv(host);
+	u32 mode;
+
+	priv->enhanced_strobe = ios->enhanced_strobe;
+
+	mode = sdhci_cdns_get_emmc_mode(priv);
+
+	if (mode == SDHCI_CDNS_HRS06_MODE_MMC_HS400 && ios->enhanced_strobe)
+		sdhci_cdns_set_emmc_mode(priv,
+					 SDHCI_CDNS_HRS06_MODE_MMC_HS400ES);
+
+	if (mode == SDHCI_CDNS_HRS06_MODE_MMC_HS400ES && !ios->enhanced_strobe)
+		sdhci_cdns_set_emmc_mode(priv,
+					 SDHCI_CDNS_HRS06_MODE_MMC_HS400);
+}
+
 static int sdhci_cdns_probe(struct platform_device *pdev)
 {
 	struct sdhci_host *host;
@@ -220,8 +303,9 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
 	struct sdhci_cdns_priv *priv;
 	struct clk *clk;
 	int ret;
+	struct device *dev = &pdev->dev;
 
-	clk = devm_clk_get(&pdev->dev, NULL);
+	clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
@@ -240,14 +324,21 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
 
 	priv = sdhci_cdns_priv(host);
 	priv->hrs_addr = host->ioaddr;
+	priv->enhanced_strobe = false;
 	host->ioaddr += SDHCI_CDNS_SRS_BASE;
 	host->mmc_host_ops.execute_tuning = sdhci_cdns_execute_tuning;
+	host->mmc_host_ops.hs400_enhanced_strobe =
+				sdhci_cdns_hs400_enhanced_strobe;
+
+	sdhci_get_of_property(pdev);
 
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
 		goto free;
 
-	sdhci_cdns_phy_init(priv);
+	ret = sdhci_cdns_phy_init(dev->of_node, priv);
+	if (ret)
+		goto free;
 
 	ret = sdhci_add_host(host);
 	if (ret)
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 445fc47dc3e7..23d8b8a73ae9 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -889,6 +889,28 @@ static void esdhc_set_strobe_dll(struct sdhci_host *host)
 	}
 }
 
+static void esdhc_reset_tuning(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
+	u32 ctrl;
+
+	/* Rest the tuning circurt */
+	if (esdhc_is_usdhc(imx_data)) {
+		if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) {
+			ctrl = readl(host->ioaddr + ESDHC_MIX_CTRL);
+			ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL;
+			ctrl &= ~ESDHC_MIX_CTRL_FBCLK_SEL;
+			writel(ctrl, host->ioaddr + ESDHC_MIX_CTRL);
+			writel(0, host->ioaddr + ESDHC_TUNE_CTRL_STATUS);
+		} else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) {
+			ctrl = readl(host->ioaddr + SDHCI_ACMD12_ERR);
+			ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL;
+			writel(ctrl, host->ioaddr + SDHCI_ACMD12_ERR);
+		}
+	}
+}
+
 static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 {
 	u32 m;
@@ -932,6 +954,10 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 		host->ops->set_clock(host, host->clock);
 		esdhc_set_strobe_dll(host);
 		break;
+	case MMC_TIMING_LEGACY:
+	default:
+		esdhc_reset_tuning(host);
+		break;
 	}
 
 	esdhc_change_pinstate(host, timing);
@@ -1323,6 +1349,9 @@ static int sdhci_esdhc_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	return sdhci_suspend_host(host);
 }
 
@@ -1347,6 +1376,9 @@ static int sdhci_esdhc_runtime_suspend(struct device *dev)
 
 	ret = sdhci_runtime_suspend_host(host);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	if (!sdhci_sdio_irq_enabled(host)) {
 		clk_disable_unprepare(imx_data->clk_per);
 		clk_disable_unprepare(imx_data->clk_ipg);
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index ece8b37e51dd..c4bbd7485987 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -37,6 +37,7 @@
 
 /* Protocol Control Register */
 #define ESDHC_PROCTL			0x28
+#define ESDHC_VOLT_SEL			0x00000400
 #define ESDHC_CTRL_4BITBUS		(0x1 << 1)
 #define ESDHC_CTRL_8BITBUS		(0x2 << 1)
 #define ESDHC_CTRL_BUSWIDTH_MASK	(0x3 << 1)
@@ -52,8 +53,14 @@
 #define ESDHC_CLOCK_HCKEN		0x00000002
 #define ESDHC_CLOCK_IPGEN		0x00000001
 
+/* Tuning Block Control Register */
+#define ESDHC_TBCTL			0x120
+#define ESDHC_TB_EN			0x00000004
+
 /* Control Register for DMA transfer */
 #define ESDHC_DMA_SYSCTL		0x40c
+#define ESDHC_PERIPHERAL_CLK_SEL	0x00080000
+#define ESDHC_FLUSH_ASYNC_FIFO		0x00040000
 #define ESDHC_DMA_SNOOP			0x00000040
 
 #endif /* _DRIVERS_MMC_SDHCI_ESDHC_H */
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 10cdc84d5113..9d601dc0d646 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -991,12 +991,8 @@ static void sdhci_msm_set_uhs_signaling(struct sdhci_host *host,
 		mmc_hostname(host->mmc), host->clock, uhs, ctrl_2);
 	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
 
-	spin_unlock_irq(&host->lock);
-
 	if (mmc->ios.timing == MMC_TIMING_MMC_HS400)
 		sdhci_msm_hs400(host, &mmc->ios);
-
-	spin_lock_irq(&host->lock);
 }
 
 static void sdhci_msm_voltage_switch(struct sdhci_host *host)
@@ -1089,13 +1085,9 @@ static void sdhci_msm_set_clock(struct sdhci_host *host, unsigned int clock)
 		goto out;
 	}
 
-	spin_unlock_irq(&host->lock);
-
 	sdhci_msm_hc_select_mode(host);
 
 	msm_set_clock_rate_for_bus_mode(host, clock);
-
-	spin_lock_irq(&host->lock);
 out:
 	__sdhci_msm_set_clock(host, clock);
 }
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 1cfd7f900339..ea6b36c88ae7 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -157,21 +157,6 @@ static int sdhci_arasan_syscon_write(struct sdhci_host *host,
 	return ret;
 }
 
-static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
-{
-	unsigned long freq;
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-
-	/* SDHCI timeout clock is in kHz */
-	freq = DIV_ROUND_UP(clk_get_rate(pltfm_host->clk), 1000);
-
-	/* or in MHz */
-	if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
-		freq = DIV_ROUND_UP(freq, 1000);
-
-	return freq;
-}
-
 static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -194,9 +179,7 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
 			 * through low speeds without power cycling.
 			 */
 			sdhci_set_clock(host, host->max_clk);
-			spin_unlock_irq(&host->lock);
 			phy_power_on(sdhci_arasan->phy);
-			spin_lock_irq(&host->lock);
 			sdhci_arasan->is_phy_on = true;
 
 			/*
@@ -215,18 +198,14 @@ static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
 	}
 
 	if (ctrl_phy && sdhci_arasan->is_phy_on) {
-		spin_unlock_irq(&host->lock);
 		phy_power_off(sdhci_arasan->phy);
-		spin_lock_irq(&host->lock);
 		sdhci_arasan->is_phy_on = false;
 	}
 
 	sdhci_set_clock(host, clock);
 
 	if (ctrl_phy) {
-		spin_unlock_irq(&host->lock);
 		phy_power_on(sdhci_arasan->phy);
-		spin_lock_irq(&host->lock);
 		sdhci_arasan->is_phy_on = true;
 	}
 }
@@ -286,7 +265,7 @@ static int sdhci_arasan_voltage_switch(struct mmc_host *mmc,
 static struct sdhci_ops sdhci_arasan_ops = {
 	.set_clock = sdhci_arasan_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
-	.get_timeout_clock = sdhci_arasan_get_timeout_clock,
+	.get_timeout_clock = sdhci_pltfm_clk_get_max_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_arasan_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -315,6 +294,9 @@ static int sdhci_arasan_suspend(struct device *dev)
 	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	ret = sdhci_suspend_host(host);
 	if (ret)
 		return ret;
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index d5430ed02a67..7611fd679f1a 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -98,9 +98,7 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
 	if (!IS_ERR(host->mmc->supply.vmmc)) {
 		struct mmc_host *mmc = host->mmc;
 
-		spin_unlock_irq(&host->lock);
 		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
-		spin_lock_irq(&host->lock);
 	}
 	sdhci_set_power_noreg(host, mode, vdd);
 }
@@ -140,6 +138,9 @@ static int sdhci_at91_runtime_suspend(struct device *dev)
 
 	ret = sdhci_runtime_suspend_host(host);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	clk_disable_unprepare(priv->gck);
 	clk_disable_unprepare(priv->hclock);
 	clk_disable_unprepare(priv->mainck);
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index d3aa67142839..44b016baa585 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -16,9 +16,12 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/sys_soc.h>
+#include <linux/clk.h>
+#include <linux/ktime.h>
 #include <linux/mmc/host.h>
 #include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
@@ -30,6 +33,7 @@ struct sdhci_esdhc {
 	u8 vendor_ver;
 	u8 spec_ver;
 	bool quirk_incorrect_hostver;
+	unsigned int peripheral_clock;
 };
 
 /**
@@ -414,15 +418,25 @@ static int esdhc_of_enable_dma(struct sdhci_host *host)
 static unsigned int esdhc_of_get_max_clock(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
 
-	return pltfm_host->clock;
+	if (esdhc->peripheral_clock)
+		return esdhc->peripheral_clock;
+	else
+		return pltfm_host->clock;
 }
 
 static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
+	unsigned int clock;
 
-	return pltfm_host->clock / 256 / 16;
+	if (esdhc->peripheral_clock)
+		clock = esdhc->peripheral_clock;
+	else
+		clock = pltfm_host->clock;
+	return clock / 256 / 16;
 }
 
 static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
@@ -431,7 +445,7 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
 	int pre_div = 1;
 	int div = 1;
-	u32 timeout;
+	ktime_t timeout;
 	u32 temp;
 
 	host->mmc->actual_clock = 0;
@@ -443,6 +457,20 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 	if (esdhc->vendor_ver < VENDOR_V_23)
 		pre_div = 2;
 
+	/*
+	 * Limit SD clock to 167MHz for ls1046a according to its datasheet
+	 */
+	if (clock > 167000000 &&
+	    of_find_compatible_node(NULL, NULL, "fsl,ls1046a-esdhc"))
+		clock = 167000000;
+
+	/*
+	 * Limit SD clock to 125MHz for ls1012a according to its datasheet
+	 */
+	if (clock > 125000000 &&
+	    of_find_compatible_node(NULL, NULL, "fsl,ls1012a-esdhc"))
+		clock = 125000000;
+
 	/* Workaround to reduce the clock frequency for p1010 esdhc */
 	if (of_find_compatible_node(NULL, NULL, "fsl,p1010-esdhc")) {
 		if (clock > 20000000)
@@ -475,15 +503,14 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
 
 	/* Wait max 20 ms */
-	timeout = 20;
+	timeout = ktime_add_ms(ktime_get(), 20);
 	while (!(sdhci_readl(host, ESDHC_PRSSTAT) & ESDHC_CLOCK_STABLE)) {
-		if (timeout == 0) {
+		if (ktime_after(ktime_get(), timeout)) {
 			pr_err("%s: Internal clock never stabilised.\n",
 				mmc_hostname(host->mmc));
 			return;
 		}
-		timeout--;
-		mdelay(1);
+		udelay(10);
 	}
 
 	temp |= ESDHC_CLOCK_SDCLKEN;
@@ -512,6 +539,33 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
 	sdhci_writel(host, ctrl, ESDHC_PROCTL);
 }
 
+static void esdhc_clock_enable(struct sdhci_host *host, bool enable)
+{
+	u32 val;
+	ktime_t timeout;
+
+	val = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
+
+	if (enable)
+		val |= ESDHC_CLOCK_SDCLKEN;
+	else
+		val &= ~ESDHC_CLOCK_SDCLKEN;
+
+	sdhci_writel(host, val, ESDHC_SYSTEM_CONTROL);
+
+	/* Wait max 20 ms */
+	timeout = ktime_add_ms(ktime_get(), 20);
+	val = ESDHC_CLOCK_STABLE;
+	while (!(sdhci_readl(host, ESDHC_PRSSTAT) & val)) {
+		if (ktime_after(ktime_get(), timeout)) {
+			pr_err("%s: Internal clock never stabilised.\n",
+				mmc_hostname(host->mmc));
+			break;
+		}
+		udelay(10);
+	}
+}
+
 static void esdhc_reset(struct sdhci_host *host, u8 mask)
 {
 	sdhci_reset(host, mask);
@@ -520,6 +574,95 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask)
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
+/* The SCFG, Supplemental Configuration Unit, provides SoC specific
+ * configuration and status registers for the device. There is a
+ * SDHC IO VSEL control register on SCFG for some platforms. It's
+ * used to support SDHC IO voltage switching.
+ */
+static const struct of_device_id scfg_device_ids[] = {
+	{ .compatible = "fsl,t1040-scfg", },
+	{ .compatible = "fsl,ls1012a-scfg", },
+	{ .compatible = "fsl,ls1046a-scfg", },
+	{}
+};
+
+/* SDHC IO VSEL control register definition */
+#define SCFG_SDHCIOVSELCR	0x408
+#define SDHCIOVSELCR_TGLEN	0x80000000
+#define SDHCIOVSELCR_VSELVAL	0x60000000
+#define SDHCIOVSELCR_SDHC_VS	0x00000001
+
+static int esdhc_signal_voltage_switch(struct mmc_host *mmc,
+				       struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct device_node *scfg_node;
+	void __iomem *scfg_base = NULL;
+	u32 sdhciovselcr;
+	u32 val;
+
+	/*
+	 * Signal Voltage Switching is only applicable for Host Controllers
+	 * v3.00 and above.
+	 */
+	if (host->version < SDHCI_SPEC_300)
+		return 0;
+
+	val = sdhci_readl(host, ESDHC_PROCTL);
+
+	switch (ios->signal_voltage) {
+	case MMC_SIGNAL_VOLTAGE_330:
+		val &= ~ESDHC_VOLT_SEL;
+		sdhci_writel(host, val, ESDHC_PROCTL);
+		return 0;
+	case MMC_SIGNAL_VOLTAGE_180:
+		scfg_node = of_find_matching_node(NULL, scfg_device_ids);
+		if (scfg_node)
+			scfg_base = of_iomap(scfg_node, 0);
+		if (scfg_base) {
+			sdhciovselcr = SDHCIOVSELCR_TGLEN |
+				       SDHCIOVSELCR_VSELVAL;
+			iowrite32be(sdhciovselcr,
+				scfg_base + SCFG_SDHCIOVSELCR);
+
+			val |= ESDHC_VOLT_SEL;
+			sdhci_writel(host, val, ESDHC_PROCTL);
+			mdelay(5);
+
+			sdhciovselcr = SDHCIOVSELCR_TGLEN |
+				       SDHCIOVSELCR_SDHC_VS;
+			iowrite32be(sdhciovselcr,
+				scfg_base + SCFG_SDHCIOVSELCR);
+			iounmap(scfg_base);
+		} else {
+			val |= ESDHC_VOLT_SEL;
+			sdhci_writel(host, val, ESDHC_PROCTL);
+		}
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static int esdhc_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	u32 val;
+
+	/* Use tuning block for tuning procedure */
+	esdhc_clock_enable(host, false);
+	val = sdhci_readl(host, ESDHC_DMA_SYSCTL);
+	val |= ESDHC_FLUSH_ASYNC_FIFO;
+	sdhci_writel(host, val, ESDHC_DMA_SYSCTL);
+
+	val = sdhci_readl(host, ESDHC_TBCTL);
+	val |= ESDHC_TB_EN;
+	sdhci_writel(host, val, ESDHC_TBCTL);
+	esdhc_clock_enable(host, true);
+
+	return sdhci_execute_tuning(mmc, opcode);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static u32 esdhc_proctl;
 static int esdhc_of_suspend(struct device *dev)
@@ -528,6 +671,9 @@ static int esdhc_of_suspend(struct device *dev)
 
 	esdhc_proctl = sdhci_readl(host, SDHCI_HOST_CONTROL);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	return sdhci_suspend_host(host);
 }
 
@@ -610,6 +756,9 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host;
 	struct sdhci_esdhc *esdhc;
+	struct device_node *np;
+	struct clk *clk;
+	u32 val;
 	u16 host_ver;
 
 	pltfm_host = sdhci_priv(host);
@@ -623,6 +772,32 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
 		esdhc->quirk_incorrect_hostver = true;
 	else
 		esdhc->quirk_incorrect_hostver = false;
+
+	np = pdev->dev.of_node;
+	clk = of_clk_get(np, 0);
+	if (!IS_ERR(clk)) {
+		/*
+		 * esdhc->peripheral_clock would be assigned with a value
+		 * which is eSDHC base clock when use periperal clock.
+		 * For ls1046a, the clock value got by common clk API is
+		 * peripheral clock while the eSDHC base clock is 1/2
+		 * peripheral clock.
+		 */
+		if (of_device_is_compatible(np, "fsl,ls1046a-esdhc"))
+			esdhc->peripheral_clock = clk_get_rate(clk) / 2;
+		else
+			esdhc->peripheral_clock = clk_get_rate(clk);
+
+		clk_put(clk);
+	}
+
+	if (esdhc->peripheral_clock) {
+		esdhc_clock_enable(host, false);
+		val = sdhci_readl(host, ESDHC_DMA_SYSCTL);
+		val |= ESDHC_PERIPHERAL_CLK_SEL;
+		sdhci_writel(host, val, ESDHC_DMA_SYSCTL);
+		esdhc_clock_enable(host, true);
+	}
 }
 
 static int sdhci_esdhc_probe(struct platform_device *pdev)
@@ -645,6 +820,11 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
+	host->mmc_host_ops.start_signal_voltage_switch =
+		esdhc_signal_voltage_switch;
+	host->mmc_host_ops.execute_tuning = esdhc_execute_tuning;
+	host->tuning_delay = 1;
+
 	esdhc_init(pdev, host);
 
 	sdhci_get_of_property(pdev);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 86560d590786..92fc3f7c538d 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -12,6 +12,7 @@
  *     - JMicron (hardware and technical support)
  */
 
+#include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
 #include <linux/module.h>
@@ -36,10 +37,138 @@
 static int sdhci_pci_enable_dma(struct sdhci_host *host);
 static void sdhci_pci_set_bus_width(struct sdhci_host *host, int width);
 static void sdhci_pci_hw_reset(struct sdhci_host *host);
-static int sdhci_pci_select_drive_strength(struct sdhci_host *host,
-					   struct mmc_card *card,
-					   unsigned int max_dtr, int host_drv,
-					   int card_drv, int *drv_type);
+
+#ifdef CONFIG_PM_SLEEP
+static int __sdhci_pci_suspend_host(struct sdhci_pci_chip *chip)
+{
+	int i, ret;
+
+	for (i = 0; i < chip->num_slots; i++) {
+		struct sdhci_pci_slot *slot = chip->slots[i];
+		struct sdhci_host *host;
+
+		if (!slot)
+			continue;
+
+		host = slot->host;
+
+		if (chip->pm_retune && host->tuning_mode != SDHCI_TUNING_MODE_3)
+			mmc_retune_needed(host->mmc);
+
+		ret = sdhci_suspend_host(host);
+		if (ret)
+			goto err_pci_suspend;
+
+		if (host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+			sdhci_enable_irq_wakeups(host);
+	}
+
+	return 0;
+
+err_pci_suspend:
+	while (--i >= 0)
+		sdhci_resume_host(chip->slots[i]->host);
+	return ret;
+}
+
+static int sdhci_pci_init_wakeup(struct sdhci_pci_chip *chip)
+{
+	mmc_pm_flag_t pm_flags = 0;
+	int i;
+
+	for (i = 0; i < chip->num_slots; i++) {
+		struct sdhci_pci_slot *slot = chip->slots[i];
+
+		if (slot)
+			pm_flags |= slot->host->mmc->pm_flags;
+	}
+
+	return device_init_wakeup(&chip->pdev->dev,
+				  (pm_flags & MMC_PM_KEEP_POWER) &&
+				  (pm_flags & MMC_PM_WAKE_SDIO_IRQ));
+}
+
+static int sdhci_pci_suspend_host(struct sdhci_pci_chip *chip)
+{
+	int ret;
+
+	ret = __sdhci_pci_suspend_host(chip);
+	if (ret)
+		return ret;
+
+	sdhci_pci_init_wakeup(chip);
+
+	return 0;
+}
+
+int sdhci_pci_resume_host(struct sdhci_pci_chip *chip)
+{
+	struct sdhci_pci_slot *slot;
+	int i, ret;
+
+	for (i = 0; i < chip->num_slots; i++) {
+		slot = chip->slots[i];
+		if (!slot)
+			continue;
+
+		ret = sdhci_resume_host(slot->host);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int sdhci_pci_runtime_suspend_host(struct sdhci_pci_chip *chip)
+{
+	struct sdhci_pci_slot *slot;
+	struct sdhci_host *host;
+	int i, ret;
+
+	for (i = 0; i < chip->num_slots; i++) {
+		slot = chip->slots[i];
+		if (!slot)
+			continue;
+
+		host = slot->host;
+
+		ret = sdhci_runtime_suspend_host(host);
+		if (ret)
+			goto err_pci_runtime_suspend;
+
+		if (chip->rpm_retune &&
+		    host->tuning_mode != SDHCI_TUNING_MODE_3)
+			mmc_retune_needed(host->mmc);
+	}
+
+	return 0;
+
+err_pci_runtime_suspend:
+	while (--i >= 0)
+		sdhci_runtime_resume_host(chip->slots[i]->host);
+	return ret;
+}
+
+static int sdhci_pci_runtime_resume_host(struct sdhci_pci_chip *chip)
+{
+	struct sdhci_pci_slot *slot;
+	int i, ret;
+
+	for (i = 0; i < chip->num_slots; i++) {
+		slot = chip->slots[i];
+		if (!slot)
+			continue;
+
+		ret = sdhci_runtime_resume_host(slot->host);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
 
 /*****************************************************************************\
  *                                                                           *
@@ -71,14 +200,16 @@ static int ricoh_mmc_probe_slot(struct sdhci_pci_slot *slot)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int ricoh_mmc_resume(struct sdhci_pci_chip *chip)
 {
 	/* Apply a delay to allow controller to settle */
 	/* Otherwise it becomes confused if card state changed
 		during suspend */
 	msleep(500);
-	return 0;
+	return sdhci_pci_resume_host(chip);
 }
+#endif
 
 static const struct sdhci_pci_fixes sdhci_ricoh = {
 	.probe		= ricoh_probe,
@@ -89,7 +220,9 @@ static const struct sdhci_pci_fixes sdhci_ricoh = {
 
 static const struct sdhci_pci_fixes sdhci_ricoh_mmc = {
 	.probe_slot	= ricoh_mmc_probe_slot,
+#ifdef CONFIG_PM_SLEEP
 	.resume		= ricoh_mmc_resume,
+#endif
 	.quirks		= SDHCI_QUIRK_32BIT_DMA_ADDR |
 			  SDHCI_QUIRK_CLOCK_BEFORE_RESET |
 			  SDHCI_QUIRK_NO_CARD_NO_RESET |
@@ -259,6 +392,81 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
 	.probe_slot	= pch_hc_probe_slot,
 };
 
+enum {
+	INTEL_DSM_FNS		=  0,
+	INTEL_DSM_DRV_STRENGTH	=  9,
+	INTEL_DSM_D3_RETUNE	= 10,
+};
+
+struct intel_host {
+	u32	dsm_fns;
+	int	drv_strength;
+	bool	d3_retune;
+};
+
+const u8 intel_dsm_uuid[] = {
+	0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
+	0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
+};
+
+static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
+		       unsigned int fn, u32 *result)
+{
+	union acpi_object *obj;
+	int err = 0;
+	size_t len;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+	if (!obj)
+		return -EOPNOTSUPP;
+
+	if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < 1) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	len = min_t(size_t, obj->buffer.length, 4);
+
+	*result = 0;
+	memcpy(result, obj->buffer.pointer, len);
+out:
+	ACPI_FREE(obj);
+
+	return err;
+}
+
+static int intel_dsm(struct intel_host *intel_host, struct device *dev,
+		     unsigned int fn, u32 *result)
+{
+	if (fn > 31 || !(intel_host->dsm_fns & (1 << fn)))
+		return -EOPNOTSUPP;
+
+	return __intel_dsm(intel_host, dev, fn, result);
+}
+
+static void intel_dsm_init(struct intel_host *intel_host, struct device *dev,
+			   struct mmc_host *mmc)
+{
+	int err;
+	u32 val;
+
+	err = __intel_dsm(intel_host, dev, INTEL_DSM_FNS, &intel_host->dsm_fns);
+	if (err) {
+		pr_debug("%s: DSM not supported, error %d\n",
+			 mmc_hostname(mmc), err);
+		return;
+	}
+
+	pr_debug("%s: DSM function mask %#x\n",
+		 mmc_hostname(mmc), intel_host->dsm_fns);
+
+	err = intel_dsm(intel_host, dev, INTEL_DSM_DRV_STRENGTH, &val);
+	intel_host->drv_strength = err ? 0 : val;
+
+	err = intel_dsm(intel_host, dev, INTEL_DSM_D3_RETUNE, &val);
+	intel_host->d3_retune = err ? true : !!val;
+}
+
 static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
 {
 	u8 reg;
@@ -274,67 +482,15 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
 	usleep_range(300, 1000);
 }
 
-static int spt_select_drive_strength(struct sdhci_host *host,
-				     struct mmc_card *card,
-				     unsigned int max_dtr,
-				     int host_drv, int card_drv, int *drv_type)
-{
-	int drive_strength;
-
-	if (sdhci_pci_spt_drive_strength > 0)
-		drive_strength = sdhci_pci_spt_drive_strength & 0xf;
-	else
-		drive_strength = 0; /* Default 50-ohm */
-
-	if ((mmc_driver_type_mask(drive_strength) & card_drv) == 0)
-		drive_strength = 0; /* Default 50-ohm */
-
-	return drive_strength;
-}
-
-/* Try to read the drive strength from the card */
-static void spt_read_drive_strength(struct sdhci_host *host)
+static int intel_select_drive_strength(struct mmc_card *card,
+				       unsigned int max_dtr, int host_drv,
+				       int card_drv, int *drv_type)
 {
-	u32 val, i, t;
-	u16 m;
-
-	if (sdhci_pci_spt_drive_strength)
-		return;
-
-	sdhci_pci_spt_drive_strength = -1;
-
-	m = sdhci_readw(host, SDHCI_HOST_CONTROL2) & 0x7;
-	if (m != 3 && m != 5)
-		return;
-	val = sdhci_readl(host, SDHCI_PRESENT_STATE);
-	if (val & 0x3)
-		return;
-	sdhci_writel(host, 0x007f0023, SDHCI_INT_ENABLE);
-	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
-	sdhci_writew(host, 0x10, SDHCI_TRANSFER_MODE);
-	sdhci_writeb(host, 0xe, SDHCI_TIMEOUT_CONTROL);
-	sdhci_writew(host, 512, SDHCI_BLOCK_SIZE);
-	sdhci_writew(host, 1, SDHCI_BLOCK_COUNT);
-	sdhci_writel(host, 0, SDHCI_ARGUMENT);
-	sdhci_writew(host, 0x83b, SDHCI_COMMAND);
-	for (i = 0; i < 1000; i++) {
-		val = sdhci_readl(host, SDHCI_INT_STATUS);
-		if (val & 0xffff8000)
-			return;
-		if (val & 0x20)
-			break;
-		udelay(1);
-	}
-	val = sdhci_readl(host, SDHCI_PRESENT_STATE);
-	if (!(val & 0x800))
-		return;
-	for (i = 0; i < 47; i++)
-		val = sdhci_readl(host, SDHCI_BUFFER);
-	t = val & 0xf00;
-	if (t != 0x200 && t != 0x300)
-		return;
+	struct sdhci_host *host = mmc_priv(card->host);
+	struct sdhci_pci_slot *slot = sdhci_priv(host);
+	struct intel_host *intel_host = sdhci_pci_priv(slot);
 
-	sdhci_pci_spt_drive_strength = 0x10 | ((val >> 12) & 0xf);
+	return intel_host->drv_strength;
 }
 
 static int bxt_get_cd(struct mmc_host *mmc)
@@ -359,8 +515,57 @@ out:
 	return ret;
 }
 
+#define SDHCI_INTEL_PWR_TIMEOUT_CNT	20
+#define SDHCI_INTEL_PWR_TIMEOUT_UDELAY	100
+
+static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
+				  unsigned short vdd)
+{
+	int cntr;
+	u8 reg;
+
+	sdhci_set_power(host, mode, vdd);
+
+	if (mode == MMC_POWER_OFF)
+		return;
+
+	/*
+	 * Bus power might not enable after D3 -> D0 transition due to the
+	 * present state not yet having propagated. Retry for up to 2ms.
+	 */
+	for (cntr = 0; cntr < SDHCI_INTEL_PWR_TIMEOUT_CNT; cntr++) {
+		reg = sdhci_readb(host, SDHCI_POWER_CONTROL);
+		if (reg & SDHCI_POWER_ON)
+			break;
+		udelay(SDHCI_INTEL_PWR_TIMEOUT_UDELAY);
+		reg |= SDHCI_POWER_ON;
+		sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
+	}
+}
+
+static const struct sdhci_ops sdhci_intel_byt_ops = {
+	.set_clock		= sdhci_set_clock,
+	.set_power		= sdhci_intel_set_power,
+	.enable_dma		= sdhci_pci_enable_dma,
+	.set_bus_width		= sdhci_pci_set_bus_width,
+	.reset			= sdhci_reset,
+	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+	.hw_reset		= sdhci_pci_hw_reset,
+};
+
+static void byt_read_dsm(struct sdhci_pci_slot *slot)
+{
+	struct intel_host *intel_host = sdhci_pci_priv(slot);
+	struct device *dev = &slot->chip->pdev->dev;
+	struct mmc_host *mmc = slot->host->mmc;
+
+	intel_dsm_init(intel_host, dev, mmc);
+	slot->chip->rpm_retune = intel_host->d3_retune;
+}
+
 static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
+	byt_read_dsm(slot);
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
 				 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR |
 				 MMC_CAP_CMD_DURING_TFR |
@@ -369,10 +574,8 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 	slot->hw_reset = sdhci_pci_int_hw_reset;
 	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BSW_EMMC)
 		slot->host->timeout_clk = 1000; /* 1000 kHz i.e. 1 MHz */
-	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_SPT_EMMC) {
-		spt_read_drive_strength(slot->host);
-		slot->select_drive_strength = spt_select_drive_strength;
-	}
+	slot->host->mmc_host_ops.select_drive_strength =
+						intel_select_drive_strength;
 	return 0;
 }
 
@@ -405,6 +608,8 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
 	int err;
 
+	byt_read_dsm(slot);
+
 	err = ni_set_max_freq(slot);
 	if (err)
 		return err;
@@ -416,6 +621,7 @@ static int ni_byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
+	byt_read_dsm(slot);
 	slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD | MMC_CAP_NONREMOVABLE |
 				 MMC_CAP_WAIT_WHILE_BUSY;
 	return 0;
@@ -423,63 +629,20 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 
 static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
 {
-	slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+	byt_read_dsm(slot);
+	slot->host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
+				 MMC_CAP_AGGRESSIVE_PM;
 	slot->cd_idx = 0;
 	slot->cd_override_level = true;
 	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
 	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD ||
 	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD ||
-	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_SD) {
+	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_SD)
 		slot->host->mmc_host_ops.get_cd = bxt_get_cd;
-		slot->host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
-	}
 
 	return 0;
 }
 
-#define SDHCI_INTEL_PWR_TIMEOUT_CNT	20
-#define SDHCI_INTEL_PWR_TIMEOUT_UDELAY	100
-
-static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
-				  unsigned short vdd)
-{
-	int cntr;
-	u8 reg;
-
-	sdhci_set_power(host, mode, vdd);
-
-	if (mode == MMC_POWER_OFF)
-		return;
-
-	spin_unlock_irq(&host->lock);
-
-	/*
-	 * Bus power might not enable after D3 -> D0 transition due to the
-	 * present state not yet having propagated. Retry for up to 2ms.
-	 */
-	for (cntr = 0; cntr < SDHCI_INTEL_PWR_TIMEOUT_CNT; cntr++) {
-		reg = sdhci_readb(host, SDHCI_POWER_CONTROL);
-		if (reg & SDHCI_POWER_ON)
-			break;
-		udelay(SDHCI_INTEL_PWR_TIMEOUT_UDELAY);
-		reg |= SDHCI_POWER_ON;
-		sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
-	}
-
-	spin_lock_irq(&host->lock);
-}
-
-static const struct sdhci_ops sdhci_intel_byt_ops = {
-	.set_clock		= sdhci_set_clock,
-	.set_power		= sdhci_intel_set_power,
-	.enable_dma		= sdhci_pci_enable_dma,
-	.set_bus_width		= sdhci_pci_set_bus_width,
-	.reset			= sdhci_reset,
-	.set_uhs_signaling	= sdhci_set_uhs_signaling,
-	.hw_reset		= sdhci_pci_hw_reset,
-	.select_drive_strength	= sdhci_pci_select_drive_strength,
-};
-
 static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
 	.allow_runtime_pm = true,
 	.probe_slot	= byt_emmc_probe_slot,
@@ -488,6 +651,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
 			  SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 |
 			  SDHCI_QUIRK2_STOP_WITH_TC,
 	.ops		= &sdhci_intel_byt_ops,
+	.priv_size	= sizeof(struct intel_host),
 };
 
 static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = {
@@ -497,6 +661,7 @@ static const struct sdhci_pci_fixes sdhci_ni_byt_sdio = {
 	.allow_runtime_pm = true,
 	.probe_slot	= ni_byt_sdio_probe_slot,
 	.ops		= &sdhci_intel_byt_ops,
+	.priv_size	= sizeof(struct intel_host),
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -506,6 +671,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
 	.allow_runtime_pm = true,
 	.probe_slot	= byt_sdio_probe_slot,
 	.ops		= &sdhci_intel_byt_ops,
+	.priv_size	= sizeof(struct intel_host),
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
@@ -517,6 +683,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
 	.own_cd_for_runtime_pm = true,
 	.probe_slot	= byt_sd_probe_slot,
 	.ops		= &sdhci_intel_byt_ops,
+	.priv_size	= sizeof(struct intel_host),
 };
 
 /* Define Host controllers for Intel Merrifield platform */
@@ -719,9 +886,14 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
 		jmicron_enable_mmc(slot->host, 0);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int jmicron_suspend(struct sdhci_pci_chip *chip)
 {
-	int i;
+	int i, ret;
+
+	ret = __sdhci_pci_suspend_host(chip);
+	if (ret)
+		return ret;
 
 	if (chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB38X_MMC ||
 	    chip->pdev->device == PCI_DEVICE_ID_JMICRON_JMB388_ESD) {
@@ -729,6 +901,8 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip)
 			jmicron_enable_mmc(chip->slots[i]->host, 0);
 	}
 
+	sdhci_pci_init_wakeup(chip);
+
 	return 0;
 }
 
@@ -748,15 +922,18 @@ static int jmicron_resume(struct sdhci_pci_chip *chip)
 		return ret;
 	}
 
-	return 0;
+	return sdhci_pci_resume_host(chip);
 }
+#endif
 
 static const struct sdhci_pci_fixes sdhci_o2 = {
 	.probe = sdhci_pci_o2_probe,
 	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.quirks2 = SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD,
 	.probe_slot = sdhci_pci_o2_probe_slot,
+#ifdef CONFIG_PM_SLEEP
 	.resume = sdhci_pci_o2_resume,
+#endif
 };
 
 static const struct sdhci_pci_fixes sdhci_jmicron = {
@@ -765,8 +942,10 @@ static const struct sdhci_pci_fixes sdhci_jmicron = {
 	.probe_slot	= jmicron_probe_slot,
 	.remove_slot	= jmicron_remove_slot,
 
+#ifdef CONFIG_PM_SLEEP
 	.suspend	= jmicron_suspend,
 	.resume		= jmicron_resume,
+#endif
 };
 
 /* SysKonnect CardBus2SDIO extra registers */
@@ -1617,20 +1796,6 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host)
 		slot->hw_reset(host);
 }
 
-static int sdhci_pci_select_drive_strength(struct sdhci_host *host,
-					   struct mmc_card *card,
-					   unsigned int max_dtr, int host_drv,
-					   int card_drv, int *drv_type)
-{
-	struct sdhci_pci_slot *slot = sdhci_priv(host);
-
-	if (!slot->select_drive_strength)
-		return 0;
-
-	return slot->select_drive_strength(host, card, max_dtr, host_drv,
-					   card_drv, drv_type);
-}
-
 static const struct sdhci_ops sdhci_pci_ops = {
 	.set_clock	= sdhci_set_clock,
 	.enable_dma	= sdhci_pci_enable_dma,
@@ -1638,7 +1803,6 @@ static const struct sdhci_ops sdhci_pci_ops = {
 	.reset		= sdhci_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
-	.select_drive_strength	= sdhci_pci_select_drive_strength,
 };
 
 /*****************************************************************************\
@@ -1651,83 +1815,29 @@ static const struct sdhci_ops sdhci_pci_ops = {
 static int sdhci_pci_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct sdhci_pci_chip *chip;
-	struct sdhci_pci_slot *slot;
-	mmc_pm_flag_t slot_pm_flags;
-	mmc_pm_flag_t pm_flags = 0;
-	int i, ret;
+	struct sdhci_pci_chip *chip = pci_get_drvdata(pdev);
 
-	chip = pci_get_drvdata(pdev);
 	if (!chip)
 		return 0;
 
-	for (i = 0; i < chip->num_slots; i++) {
-		slot = chip->slots[i];
-		if (!slot)
-			continue;
-
-		ret = sdhci_suspend_host(slot->host);
-
-		if (ret)
-			goto err_pci_suspend;
-
-		slot_pm_flags = slot->host->mmc->pm_flags;
-		if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ)
-			sdhci_enable_irq_wakeups(slot->host);
-
-		pm_flags |= slot_pm_flags;
-	}
-
-	if (chip->fixes && chip->fixes->suspend) {
-		ret = chip->fixes->suspend(chip);
-		if (ret)
-			goto err_pci_suspend;
-	}
-
-	if (pm_flags & MMC_PM_KEEP_POWER) {
-		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
-			device_init_wakeup(dev, true);
-		else
-			device_init_wakeup(dev, false);
-	} else
-		device_init_wakeup(dev, false);
-
-	return 0;
+	if (chip->fixes && chip->fixes->suspend)
+		return chip->fixes->suspend(chip);
 
-err_pci_suspend:
-	while (--i >= 0)
-		sdhci_resume_host(chip->slots[i]->host);
-	return ret;
+	return sdhci_pci_suspend_host(chip);
 }
 
 static int sdhci_pci_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct sdhci_pci_chip *chip;
-	struct sdhci_pci_slot *slot;
-	int i, ret;
+	struct sdhci_pci_chip *chip = pci_get_drvdata(pdev);
 
-	chip = pci_get_drvdata(pdev);
 	if (!chip)
 		return 0;
 
-	if (chip->fixes && chip->fixes->resume) {
-		ret = chip->fixes->resume(chip);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; i < chip->num_slots; i++) {
-		slot = chip->slots[i];
-		if (!slot)
-			continue;
-
-		ret = sdhci_resume_host(slot->host);
-		if (ret)
-			return ret;
-	}
+	if (chip->fixes && chip->fixes->resume)
+		return chip->fixes->resume(chip);
 
-	return 0;
+	return sdhci_pci_resume_host(chip);
 }
 #endif
 
@@ -1735,67 +1845,29 @@ static int sdhci_pci_resume(struct device *dev)
 static int sdhci_pci_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct sdhci_pci_chip *chip;
-	struct sdhci_pci_slot *slot;
-	int i, ret;
+	struct sdhci_pci_chip *chip = pci_get_drvdata(pdev);
 
-	chip = pci_get_drvdata(pdev);
 	if (!chip)
 		return 0;
 
-	for (i = 0; i < chip->num_slots; i++) {
-		slot = chip->slots[i];
-		if (!slot)
-			continue;
-
-		ret = sdhci_runtime_suspend_host(slot->host);
-
-		if (ret)
-			goto err_pci_runtime_suspend;
-	}
+	if (chip->fixes && chip->fixes->runtime_suspend)
+		return chip->fixes->runtime_suspend(chip);
 
-	if (chip->fixes && chip->fixes->suspend) {
-		ret = chip->fixes->suspend(chip);
-		if (ret)
-			goto err_pci_runtime_suspend;
-	}
-
-	return 0;
-
-err_pci_runtime_suspend:
-	while (--i >= 0)
-		sdhci_runtime_resume_host(chip->slots[i]->host);
-	return ret;
+	return sdhci_pci_runtime_suspend_host(chip);
 }
 
 static int sdhci_pci_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct sdhci_pci_chip *chip;
-	struct sdhci_pci_slot *slot;
-	int i, ret;
+	struct sdhci_pci_chip *chip = pci_get_drvdata(pdev);
 
-	chip = pci_get_drvdata(pdev);
 	if (!chip)
 		return 0;
 
-	if (chip->fixes && chip->fixes->resume) {
-		ret = chip->fixes->resume(chip);
-		if (ret)
-			return ret;
-	}
+	if (chip->fixes && chip->fixes->runtime_resume)
+		return chip->fixes->runtime_resume(chip);
 
-	for (i = 0; i < chip->num_slots; i++) {
-		slot = chip->slots[i];
-		if (!slot)
-			continue;
-
-		ret = sdhci_runtime_resume_host(slot->host);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+	return sdhci_pci_runtime_resume_host(chip);
 }
 #endif
 
@@ -1818,6 +1890,7 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
 	struct sdhci_pci_slot *slot;
 	struct sdhci_host *host;
 	int ret, bar = first_bar + slotno;
+	size_t priv_size = chip->fixes ? chip->fixes->priv_size : 0;
 
 	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
@@ -1839,7 +1912,7 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
 		return ERR_PTR(-ENODEV);
 	}
 
-	host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pci_slot));
+	host = sdhci_alloc_host(&pdev->dev, sizeof(*slot) + priv_size);
 	if (IS_ERR(host)) {
 		dev_err(&pdev->dev, "cannot allocate host\n");
 		return ERR_CAST(host);
@@ -1919,7 +1992,10 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
 		}
 	}
 
-	ret = sdhci_add_host(host);
+	if (chip->fixes && chip->fixes->add_host)
+		ret = chip->fixes->add_host(slot);
+	else
+		ret = sdhci_add_host(host);
 	if (ret)
 		goto remove;
 
@@ -2042,6 +2118,8 @@ static int sdhci_pci_probe(struct pci_dev *pdev,
 		chip->allow_runtime_pm = chip->fixes->allow_runtime_pm;
 	}
 	chip->num_slots = slots;
+	chip->pm_retune = true;
+	chip->rpm_retune = true;
 
 	pci_set_drvdata(pdev, chip);
 
diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c
index 56fddc622a54..a611217769f5 100644
--- a/drivers/mmc/host/sdhci-pci-data.c
+++ b/drivers/mmc/host/sdhci-pci-data.c
@@ -3,6 +3,3 @@
 
 struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno);
 EXPORT_SYMBOL_GPL(sdhci_pci_get_data);
-
-int sdhci_pci_spt_drive_strength;
-EXPORT_SYMBOL_GPL(sdhci_pci_spt_drive_strength);
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index d48f03104b5b..14273ca00641 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -384,8 +384,10 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 int sdhci_pci_o2_resume(struct sdhci_pci_chip *chip)
 {
 	sdhci_pci_o2_probe(chip);
-	return 0;
+	return sdhci_pci_resume_host(chip);
 }
+#endif
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 36f743464fcc..37766d20a600 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -64,12 +64,20 @@ struct sdhci_pci_fixes {
 	int			(*probe) (struct sdhci_pci_chip *);
 
 	int			(*probe_slot) (struct sdhci_pci_slot *);
+	int			(*add_host) (struct sdhci_pci_slot *);
 	void			(*remove_slot) (struct sdhci_pci_slot *, int);
 
+#ifdef CONFIG_PM_SLEEP
 	int			(*suspend) (struct sdhci_pci_chip *);
 	int			(*resume) (struct sdhci_pci_chip *);
+#endif
+#ifdef CONFIG_PM
+	int			(*runtime_suspend) (struct sdhci_pci_chip *);
+	int			(*runtime_resume) (struct sdhci_pci_chip *);
+#endif
 
 	const struct sdhci_ops	*ops;
+	size_t			priv_size;
 };
 
 struct sdhci_pci_slot {
@@ -85,10 +93,7 @@ struct sdhci_pci_slot {
 	bool			cd_override_level;
 
 	void (*hw_reset)(struct sdhci_host *host);
-	int (*select_drive_strength)(struct sdhci_host *host,
-				     struct mmc_card *card,
-				     unsigned int max_dtr, int host_drv,
-				     int card_drv, int *drv_type);
+	unsigned long		private[0] ____cacheline_aligned;
 };
 
 struct sdhci_pci_chip {
@@ -97,10 +102,21 @@ struct sdhci_pci_chip {
 	unsigned int		quirks;
 	unsigned int		quirks2;
 	bool			allow_runtime_pm;
+	bool			pm_retune;
+	bool			rpm_retune;
 	const struct sdhci_pci_fixes *fixes;
 
 	int			num_slots;	/* Slots on controller */
 	struct sdhci_pci_slot	*slots[MAX_SLOTS]; /* Pointers to host slots */
 };
 
+static inline void *sdhci_pci_priv(struct sdhci_pci_slot *slot)
+{
+	return (void *)slot->private;
+}
+
+#ifdef CONFIG_PM_SLEEP
+int sdhci_pci_resume_host(struct sdhci_pci_chip *chip);
+#endif
+
 #endif /* __SDHCI_PCI_H */
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index ad49bfaf5bf8..e090d8c42ddb 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -213,6 +213,9 @@ static int sdhci_pltfm_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	return sdhci_suspend_host(host);
 }
 
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 347eae2d7b6a..995083ce1c46 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -185,7 +185,11 @@ static int sdhci_pxav2_probe(struct platform_device *pdev)
 		goto err_clk_get;
 	}
 	pltfm_host->clk = clk;
-	clk_prepare_enable(clk);
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable io clock\n");
+		goto err_clk_enable;
+	}
 
 	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
 		| SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
@@ -222,12 +226,11 @@ static int sdhci_pxav2_probe(struct platform_device *pdev)
 		goto err_add_host;
 	}
 
-	platform_set_drvdata(pdev, host);
-
 	return 0;
 
 err_add_host:
 	clk_disable_unprepare(clk);
+err_clk_enable:
 	clk_put(clk);
 err_clk_get:
 	sdhci_pltfm_free(pdev);
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index d0f5c05fbc19..f953f35c2624 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -323,11 +323,8 @@ static void pxav3_set_power(struct sdhci_host *host, unsigned char mode,
 	if (host->pwr == 0)
 		vdd = 0;
 
-	if (!IS_ERR(mmc->supply.vmmc)) {
-		spin_unlock_irq(&host->lock);
+	if (!IS_ERR(mmc->supply.vmmc))
 		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
-		spin_lock_irq(&host->lock);
-	}
 }
 
 static const struct sdhci_ops pxav3_sdhci_ops = {
@@ -480,8 +477,6 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
 		goto err_add_host;
 	}
 
-	platform_set_drvdata(pdev, host);
-
 	if (host->mmc->pm_caps & MMC_PM_WAKE_SDIO_IRQ)
 		device_init_wakeup(&pdev->dev, 1);
 
@@ -529,6 +524,8 @@ static int sdhci_pxav3_suspend(struct device *dev)
 	struct sdhci_host *host = dev_get_drvdata(dev);
 
 	pm_runtime_get_sync(dev);
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
 	ret = sdhci_suspend_host(host);
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
@@ -562,6 +559,9 @@ static int sdhci_pxav3_runtime_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	clk_disable_unprepare(pxa->clk_io);
 	if (!IS_ERR(pxa->clk_core))
 		clk_disable_unprepare(pxa->clk_core);
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 3e5c83d435ae..7c065a70f92b 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -190,9 +190,7 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
 	 * speed possible with selected clock source and skip the division.
 	 */
 	if (ourhost->no_divider) {
-		spin_unlock_irq(&ourhost->host->lock);
 		rate = clk_round_rate(clksrc, wanted);
-		spin_lock_irq(&ourhost->host->lock);
 		return wanted - rate;
 	}
 
@@ -389,9 +387,7 @@ static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
 	clk &= ~SDHCI_CLOCK_CARD_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
-	spin_unlock_irq(&host->lock);
 	ret = clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock);
-	spin_lock_irq(&host->lock);
 	if (ret != 0) {
 		dev_err(dev, "%s: failed to set clock rate %uHz\n",
 			mmc_hostname(host->mmc), clock);
@@ -743,6 +739,9 @@ static int sdhci_s3c_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	return sdhci_suspend_host(host);
 }
 
@@ -764,6 +763,9 @@ static int sdhci_s3c_runtime_suspend(struct device *dev)
 
 	ret = sdhci_runtime_suspend_host(host);
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	if (ourhost->cur_clk >= 0)
 		clk_disable_unprepare(ourhost->clk_bus[ourhost->cur_clk]);
 	clk_disable_unprepare(busclk);
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 5d068639dd3f..c251c6c0a112 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -237,6 +237,9 @@ static int sdhci_sirf_suspend(struct device *dev)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	int ret;
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	ret = sdhci_suspend_host(host);
 	if (ret)
 		return ret;
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 255a896769b8..8c0f88428556 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -165,6 +165,9 @@ static int sdhci_suspend(struct device *dev)
 	struct spear_sdhci *sdhci = sdhci_priv(host);
 	int ret;
 
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
+
 	ret = sdhci_suspend_host(host);
 	if (!ret)
 		clk_disable(sdhci->clk);
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index ed92ce729dde..68c36c9fa231 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -418,8 +418,6 @@ static int sdhci_st_probe(struct platform_device *pdev)
 		goto err_out;
 	}
 
-	platform_set_drvdata(pdev, host);
-
 	host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
 
 	dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n",
@@ -465,8 +463,12 @@ static int sdhci_st_suspend(struct device *dev)
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
-	int ret = sdhci_suspend_host(host);
+	int ret;
+
+	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+		mmc_retune_needed(host->mmc);
 
+	ret = sdhci_suspend_host(host);
 	if (ret)
 		goto out;
 
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 20b6ff5b4af1..7f93079c7a3a 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/reset.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
@@ -65,6 +66,8 @@ struct sdhci_tegra {
 	struct gpio_desc *power_gpio;
 	bool ddr_signaling;
 	bool pad_calib_required;
+
+	struct reset_control *rst;
 };
 
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -431,7 +434,23 @@ static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
 	.pdata = &sdhci_tegra210_pdata,
 };
 
+static const struct sdhci_pltfm_data sdhci_tegra186_pdata = {
+	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
+		  SDHCI_QUIRK_NO_HISPD_BIT |
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+	.ops  = &tegra114_sdhci_ops,
+};
+
+static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
+	.pdata = &sdhci_tegra186_pdata,
+};
+
 static const struct of_device_id sdhci_tegra_dt_match[] = {
+	{ .compatible = "nvidia,tegra186-sdhci", .data = &soc_data_tegra186 },
 	{ .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 },
 	{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra124 },
 	{ .compatible = "nvidia,tegra114-sdhci", .data = &soc_data_tegra114 },
@@ -489,6 +508,25 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 	clk_prepare_enable(clk);
 	pltfm_host->clk = clk;
 
+	tegra_host->rst = devm_reset_control_get(&pdev->dev, "sdhci");
+	if (IS_ERR(tegra_host->rst)) {
+		rc = PTR_ERR(tegra_host->rst);
+		dev_err(&pdev->dev, "failed to get reset control: %d\n", rc);
+		goto err_rst_get;
+	}
+
+	rc = reset_control_assert(tegra_host->rst);
+	if (rc)
+		goto err_rst_get;
+
+	usleep_range(2000, 4000);
+
+	rc = reset_control_deassert(tegra_host->rst);
+	if (rc)
+		goto err_rst_get;
+
+	usleep_range(2000, 4000);
+
 	rc = sdhci_add_host(host);
 	if (rc)
 		goto err_add_host;
@@ -496,6 +534,8 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 	return 0;
 
 err_add_host:
+	reset_control_assert(tegra_host->rst);
+err_rst_get:
 	clk_disable_unprepare(pltfm_host->clk);
 err_clk_get:
 err_power_req:
@@ -504,6 +544,23 @@ err_parse_dt:
 	return rc;
 }
 
+static int sdhci_tegra_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+
+	sdhci_remove_host(host, 0);
+
+	reset_control_assert(tegra_host->rst);
+	usleep_range(2000, 4000);
+	clk_disable_unprepare(pltfm_host->clk);
+
+	sdhci_pltfm_free(pdev);
+
+	return 0;
+}
+
 static struct platform_driver sdhci_tegra_driver = {
 	.driver		= {
 		.name	= "sdhci-tegra",
@@ -511,7 +568,7 @@ static struct platform_driver sdhci_tegra_driver = {
 		.pm	= &sdhci_pltfm_pmops,
 	},
 	.probe		= sdhci_tegra_probe,
-	.remove		= sdhci_pltfm_unregister,
+	.remove		= sdhci_tegra_remove,
 };
 
 module_platform_driver(sdhci_tegra_driver);
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
new file mode 100644
index 000000000000..6356781f1cca
--- /dev/null
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -0,0 +1,837 @@
+/*
+ * PHY support for Xenon SDHC
+ *
+ * Copyright (C) 2016 Marvell, All Rights Reserved.
+ *
+ * Author:	Hu Ziji <huziji@marvell.com>
+ * Date:	2016-8-24
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ */
+
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/of_address.h>
+
+#include "sdhci-pltfm.h"
+#include "sdhci-xenon.h"
+
+/* Register base for eMMC PHY 5.0 Version */
+#define XENON_EMMC_5_0_PHY_REG_BASE		0x0160
+/* Register base for eMMC PHY 5.1 Version */
+#define XENON_EMMC_PHY_REG_BASE			0x0170
+
+#define XENON_EMMC_PHY_TIMING_ADJUST		XENON_EMMC_PHY_REG_BASE
+#define XENON_EMMC_5_0_PHY_TIMING_ADJUST	XENON_EMMC_5_0_PHY_REG_BASE
+#define XENON_TIMING_ADJUST_SLOW_MODE		BIT(29)
+#define XENON_TIMING_ADJUST_SDIO_MODE		BIT(28)
+#define XENON_SAMPL_INV_QSP_PHASE_SELECT	BIT(18)
+#define XENON_SAMPL_INV_QSP_PHASE_SELECT_SHIFT	18
+#define XENON_PHY_INITIALIZAION			BIT(31)
+#define XENON_WAIT_CYCLE_BEFORE_USING_MASK	0xF
+#define XENON_WAIT_CYCLE_BEFORE_USING_SHIFT	12
+#define XENON_FC_SYNC_EN_DURATION_MASK		0xF
+#define XENON_FC_SYNC_EN_DURATION_SHIFT		8
+#define XENON_FC_SYNC_RST_EN_DURATION_MASK	0xF
+#define XENON_FC_SYNC_RST_EN_DURATION_SHIFT	4
+#define XENON_FC_SYNC_RST_DURATION_MASK		0xF
+#define XENON_FC_SYNC_RST_DURATION_SHIFT	0
+
+#define XENON_EMMC_PHY_FUNC_CONTROL		(XENON_EMMC_PHY_REG_BASE + 0x4)
+#define XENON_EMMC_5_0_PHY_FUNC_CONTROL		\
+	(XENON_EMMC_5_0_PHY_REG_BASE + 0x4)
+#define XENON_ASYNC_DDRMODE_MASK		BIT(23)
+#define XENON_ASYNC_DDRMODE_SHIFT		23
+#define XENON_CMD_DDR_MODE			BIT(16)
+#define XENON_DQ_DDR_MODE_SHIFT			8
+#define XENON_DQ_DDR_MODE_MASK			0xFF
+#define XENON_DQ_ASYNC_MODE			BIT(4)
+
+#define XENON_EMMC_PHY_PAD_CONTROL		(XENON_EMMC_PHY_REG_BASE + 0x8)
+#define XENON_EMMC_5_0_PHY_PAD_CONTROL		\
+	(XENON_EMMC_5_0_PHY_REG_BASE + 0x8)
+#define XENON_REC_EN_SHIFT			24
+#define XENON_REC_EN_MASK			0xF
+#define XENON_FC_DQ_RECEN			BIT(24)
+#define XENON_FC_CMD_RECEN			BIT(25)
+#define XENON_FC_QSP_RECEN			BIT(26)
+#define XENON_FC_QSN_RECEN			BIT(27)
+#define XENON_OEN_QSN				BIT(28)
+#define XENON_AUTO_RECEN_CTRL			BIT(30)
+#define XENON_FC_ALL_CMOS_RECEIVER		0xF000
+
+#define XENON_EMMC5_FC_QSP_PD			BIT(18)
+#define XENON_EMMC5_FC_QSP_PU			BIT(22)
+#define XENON_EMMC5_FC_CMD_PD			BIT(17)
+#define XENON_EMMC5_FC_CMD_PU			BIT(21)
+#define XENON_EMMC5_FC_DQ_PD			BIT(16)
+#define XENON_EMMC5_FC_DQ_PU			BIT(20)
+
+#define XENON_EMMC_PHY_PAD_CONTROL1		(XENON_EMMC_PHY_REG_BASE + 0xC)
+#define XENON_EMMC5_1_FC_QSP_PD			BIT(9)
+#define XENON_EMMC5_1_FC_QSP_PU			BIT(25)
+#define XENON_EMMC5_1_FC_CMD_PD			BIT(8)
+#define XENON_EMMC5_1_FC_CMD_PU			BIT(24)
+#define XENON_EMMC5_1_FC_DQ_PD			0xFF
+#define XENON_EMMC5_1_FC_DQ_PU			(0xFF << 16)
+
+#define XENON_EMMC_PHY_PAD_CONTROL2		(XENON_EMMC_PHY_REG_BASE + 0x10)
+#define XENON_EMMC_5_0_PHY_PAD_CONTROL2		\
+	(XENON_EMMC_5_0_PHY_REG_BASE + 0xC)
+#define XENON_ZNR_MASK				0x1F
+#define XENON_ZNR_SHIFT				8
+#define XENON_ZPR_MASK				0x1F
+/* Preferred ZNR and ZPR value vary between different boards.
+ * The specific ZNR and ZPR value should be defined here
+ * according to board actual timing.
+ */
+#define XENON_ZNR_DEF_VALUE			0xF
+#define XENON_ZPR_DEF_VALUE			0xF
+
+#define XENON_EMMC_PHY_DLL_CONTROL		(XENON_EMMC_PHY_REG_BASE + 0x14)
+#define XENON_EMMC_5_0_PHY_DLL_CONTROL		\
+	(XENON_EMMC_5_0_PHY_REG_BASE + 0x10)
+#define XENON_DLL_ENABLE			BIT(31)
+#define XENON_DLL_UPDATE_STROBE_5_0		BIT(30)
+#define XENON_DLL_REFCLK_SEL			BIT(30)
+#define XENON_DLL_UPDATE			BIT(23)
+#define XENON_DLL_PHSEL1_SHIFT			24
+#define XENON_DLL_PHSEL0_SHIFT			16
+#define XENON_DLL_PHASE_MASK			0x3F
+#define XENON_DLL_PHASE_90_DEGREE		0x1F
+#define XENON_DLL_FAST_LOCK			BIT(5)
+#define XENON_DLL_GAIN2X			BIT(3)
+#define XENON_DLL_BYPASS_EN			BIT(0)
+
+#define XENON_EMMC_5_0_PHY_LOGIC_TIMING_ADJUST	\
+	(XENON_EMMC_5_0_PHY_REG_BASE + 0x14)
+#define XENON_EMMC_5_0_PHY_LOGIC_TIMING_VALUE	0x5A54
+#define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST	(XENON_EMMC_PHY_REG_BASE + 0x18)
+#define XENON_LOGIC_TIMING_VALUE		0x00AA8977
+
+/*
+ * List offset of PHY registers and some special register values
+ * in eMMC PHY 5.0 or eMMC PHY 5.1
+ */
+struct xenon_emmc_phy_regs {
+	/* Offset of Timing Adjust register */
+	u16 timing_adj;
+	/* Offset of Func Control register */
+	u16 func_ctrl;
+	/* Offset of Pad Control register */
+	u16 pad_ctrl;
+	/* Offset of Pad Control register 2 */
+	u16 pad_ctrl2;
+	/* Offset of DLL Control register */
+	u16 dll_ctrl;
+	/* Offset of Logic Timing Adjust register */
+	u16 logic_timing_adj;
+	/* DLL Update Enable bit */
+	u32 dll_update;
+	/* value in Logic Timing Adjustment register */
+	u32 logic_timing_val;
+};
+
+static const char * const phy_types[] = {
+	"emmc 5.0 phy",
+	"emmc 5.1 phy"
+};
+
+enum xenon_phy_type_enum {
+	EMMC_5_0_PHY,
+	EMMC_5_1_PHY,
+	NR_PHY_TYPES
+};
+
+enum soc_pad_ctrl_type {
+	SOC_PAD_SD,
+	SOC_PAD_FIXED_1_8V,
+};
+
+struct soc_pad_ctrl {
+	/* Register address of SoC PHY PAD ctrl */
+	void __iomem	*reg;
+	/* SoC PHY PAD ctrl type */
+	enum soc_pad_ctrl_type pad_type;
+	/* SoC specific operation to set SoC PHY PAD */
+	void (*set_soc_pad)(struct sdhci_host *host,
+			    unsigned char signal_voltage);
+};
+
+static struct xenon_emmc_phy_regs xenon_emmc_5_0_phy_regs = {
+	.timing_adj	= XENON_EMMC_5_0_PHY_TIMING_ADJUST,
+	.func_ctrl	= XENON_EMMC_5_0_PHY_FUNC_CONTROL,
+	.pad_ctrl	= XENON_EMMC_5_0_PHY_PAD_CONTROL,
+	.pad_ctrl2	= XENON_EMMC_5_0_PHY_PAD_CONTROL2,
+	.dll_ctrl	= XENON_EMMC_5_0_PHY_DLL_CONTROL,
+	.logic_timing_adj = XENON_EMMC_5_0_PHY_LOGIC_TIMING_ADJUST,
+	.dll_update	= XENON_DLL_UPDATE_STROBE_5_0,
+	.logic_timing_val = XENON_EMMC_5_0_PHY_LOGIC_TIMING_VALUE,
+};
+
+static struct xenon_emmc_phy_regs xenon_emmc_5_1_phy_regs = {
+	.timing_adj	= XENON_EMMC_PHY_TIMING_ADJUST,
+	.func_ctrl	= XENON_EMMC_PHY_FUNC_CONTROL,
+	.pad_ctrl	= XENON_EMMC_PHY_PAD_CONTROL,
+	.pad_ctrl2	= XENON_EMMC_PHY_PAD_CONTROL2,
+	.dll_ctrl	= XENON_EMMC_PHY_DLL_CONTROL,
+	.logic_timing_adj = XENON_EMMC_PHY_LOGIC_TIMING_ADJUST,
+	.dll_update	= XENON_DLL_UPDATE,
+	.logic_timing_val = XENON_LOGIC_TIMING_VALUE,
+};
+
+/*
+ * eMMC PHY configuration and operations
+ */
+struct xenon_emmc_phy_params {
+	bool	slow_mode;
+
+	u8	znr;
+	u8	zpr;
+
+	/* Nr of consecutive Sampling Points of a Valid Sampling Window */
+	u8	nr_tun_times;
+	/* Divider for calculating Tuning Step */
+	u8	tun_step_divider;
+
+	struct soc_pad_ctrl pad_ctrl;
+};
+
+static int xenon_alloc_emmc_phy(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params;
+
+	params = devm_kzalloc(mmc_dev(host->mmc), sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	priv->phy_params = params;
+	if (priv->phy_type == EMMC_5_0_PHY)
+		priv->emmc_phy_regs = &xenon_emmc_5_0_phy_regs;
+	else
+		priv->emmc_phy_regs = &xenon_emmc_5_1_phy_regs;
+
+	return 0;
+}
+
+/*
+ * eMMC 5.0/5.1 PHY init/re-init.
+ * eMMC PHY init should be executed after:
+ * 1. SDCLK frequency changes.
+ * 2. SDCLK is stopped and re-enabled.
+ * 3. config in emmc_phy_regs->timing_adj and emmc_phy_regs->func_ctrl
+ * are changed
+ */
+static int xenon_emmc_phy_init(struct sdhci_host *host)
+{
+	u32 reg;
+	u32 wait, clock;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+
+	reg = sdhci_readl(host, phy_regs->timing_adj);
+	reg |= XENON_PHY_INITIALIZAION;
+	sdhci_writel(host, reg, phy_regs->timing_adj);
+
+	/* Add duration of FC_SYNC_RST */
+	wait = ((reg >> XENON_FC_SYNC_RST_DURATION_SHIFT) &
+			XENON_FC_SYNC_RST_DURATION_MASK);
+	/* Add interval between FC_SYNC_EN and FC_SYNC_RST */
+	wait += ((reg >> XENON_FC_SYNC_RST_EN_DURATION_SHIFT) &
+			XENON_FC_SYNC_RST_EN_DURATION_MASK);
+	/* Add duration of asserting FC_SYNC_EN */
+	wait += ((reg >> XENON_FC_SYNC_EN_DURATION_SHIFT) &
+			XENON_FC_SYNC_EN_DURATION_MASK);
+	/* Add duration of waiting for PHY */
+	wait += ((reg >> XENON_WAIT_CYCLE_BEFORE_USING_SHIFT) &
+			XENON_WAIT_CYCLE_BEFORE_USING_MASK);
+	/* 4 additional bus clock and 4 AXI bus clock are required */
+	wait += 8;
+	wait <<= 20;
+
+	clock = host->clock;
+	if (!clock)
+		/* Use the possibly slowest bus frequency value */
+		clock = XENON_LOWEST_SDCLK_FREQ;
+	/* get the wait time */
+	wait /= clock;
+	wait++;
+	/* wait for host eMMC PHY init completes */
+	udelay(wait);
+
+	reg = sdhci_readl(host, phy_regs->timing_adj);
+	reg &= XENON_PHY_INITIALIZAION;
+	if (reg) {
+		dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n",
+			wait);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+#define ARMADA_3700_SOC_PAD_1_8V	0x1
+#define ARMADA_3700_SOC_PAD_3_3V	0x0
+
+static void armada_3700_soc_pad_voltage_set(struct sdhci_host *host,
+					    unsigned char signal_voltage)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params = priv->phy_params;
+
+	if (params->pad_ctrl.pad_type == SOC_PAD_FIXED_1_8V) {
+		writel(ARMADA_3700_SOC_PAD_1_8V, params->pad_ctrl.reg);
+	} else if (params->pad_ctrl.pad_type == SOC_PAD_SD) {
+		if (signal_voltage == MMC_SIGNAL_VOLTAGE_180)
+			writel(ARMADA_3700_SOC_PAD_1_8V, params->pad_ctrl.reg);
+		else if (signal_voltage == MMC_SIGNAL_VOLTAGE_330)
+			writel(ARMADA_3700_SOC_PAD_3_3V, params->pad_ctrl.reg);
+	}
+}
+
+/*
+ * Set SoC PHY voltage PAD control register,
+ * according to the operation voltage on PAD.
+ * The detailed operation depends on SoC implementation.
+ */
+static void xenon_emmc_phy_set_soc_pad(struct sdhci_host *host,
+				       unsigned char signal_voltage)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params = priv->phy_params;
+
+	if (!params->pad_ctrl.reg)
+		return;
+
+	if (params->pad_ctrl.set_soc_pad)
+		params->pad_ctrl.set_soc_pad(host, signal_voltage);
+}
+
+/*
+ * Enable eMMC PHY HW DLL
+ * DLL should be enabled and stable before HS200/SDR104 tuning,
+ * and before HS400 data strobe setting.
+ */
+static int xenon_emmc_phy_enable_dll(struct sdhci_host *host)
+{
+	u32 reg;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+	ktime_t timeout;
+
+	if (WARN_ON(host->clock <= MMC_HIGH_52_MAX_DTR))
+		return -EINVAL;
+
+	reg = sdhci_readl(host, phy_regs->dll_ctrl);
+	if (reg & XENON_DLL_ENABLE)
+		return 0;
+
+	/* Enable DLL */
+	reg = sdhci_readl(host, phy_regs->dll_ctrl);
+	reg |= (XENON_DLL_ENABLE | XENON_DLL_FAST_LOCK);
+
+	/*
+	 * Set Phase as 90 degree, which is most common value.
+	 * Might set another value if necessary.
+	 * The granularity is 1 degree.
+	 */
+	reg &= ~((XENON_DLL_PHASE_MASK << XENON_DLL_PHSEL0_SHIFT) |
+		 (XENON_DLL_PHASE_MASK << XENON_DLL_PHSEL1_SHIFT));
+	reg |= ((XENON_DLL_PHASE_90_DEGREE << XENON_DLL_PHSEL0_SHIFT) |
+		(XENON_DLL_PHASE_90_DEGREE << XENON_DLL_PHSEL1_SHIFT));
+
+	reg &= ~XENON_DLL_BYPASS_EN;
+	reg |= phy_regs->dll_update;
+	if (priv->phy_type == EMMC_5_1_PHY)
+		reg &= ~XENON_DLL_REFCLK_SEL;
+	sdhci_writel(host, reg, phy_regs->dll_ctrl);
+
+	/* Wait max 32 ms */
+	timeout = ktime_add_ms(ktime_get(), 32);
+	while (!(sdhci_readw(host, XENON_SLOT_EXT_PRESENT_STATE) &
+		XENON_DLL_LOCK_STATE)) {
+		if (ktime_after(ktime_get(), timeout)) {
+			dev_err(mmc_dev(host->mmc), "Wait for DLL Lock time-out\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+	return 0;
+}
+
+/*
+ * Config to eMMC PHY to prepare for tuning.
+ * Enable HW DLL and set the TUNING_STEP
+ */
+static int xenon_emmc_phy_config_tuning(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params = priv->phy_params;
+	u32 reg, tuning_step;
+	int ret;
+
+	if (host->clock <= MMC_HIGH_52_MAX_DTR)
+		return -EINVAL;
+
+	ret = xenon_emmc_phy_enable_dll(host);
+	if (ret)
+		return ret;
+
+	/* Achieve TUNING_STEP with HW DLL help */
+	reg = sdhci_readl(host, XENON_SLOT_DLL_CUR_DLY_VAL);
+	tuning_step = reg / params->tun_step_divider;
+	if (unlikely(tuning_step > XENON_TUNING_STEP_MASK)) {
+		dev_warn(mmc_dev(host->mmc),
+			 "HS200 TUNING_STEP %d is larger than MAX value\n",
+			 tuning_step);
+		tuning_step = XENON_TUNING_STEP_MASK;
+	}
+
+	/* Set TUNING_STEP for later tuning */
+	reg = sdhci_readl(host, XENON_SLOT_OP_STATUS_CTRL);
+	reg &= ~(XENON_TUN_CONSECUTIVE_TIMES_MASK <<
+		 XENON_TUN_CONSECUTIVE_TIMES_SHIFT);
+	reg |= (params->nr_tun_times << XENON_TUN_CONSECUTIVE_TIMES_SHIFT);
+	reg &= ~(XENON_TUNING_STEP_MASK << XENON_TUNING_STEP_SHIFT);
+	reg |= (tuning_step << XENON_TUNING_STEP_SHIFT);
+	sdhci_writel(host, reg, XENON_SLOT_OP_STATUS_CTRL);
+
+	return 0;
+}
+
+static void xenon_emmc_phy_disable_data_strobe(struct sdhci_host *host)
+{
+	u32 reg;
+
+	/* Disable SDHC Data Strobe */
+	reg = sdhci_readl(host, XENON_SLOT_EMMC_CTRL);
+	reg &= ~XENON_ENABLE_DATA_STROBE;
+	sdhci_writel(host, reg, XENON_SLOT_EMMC_CTRL);
+}
+
+/* Set HS400 Data Strobe */
+static void xenon_emmc_phy_strobe_delay_adj(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u32 reg;
+
+	if (WARN_ON(host->timing != MMC_TIMING_MMC_HS400))
+		return;
+
+	if (host->clock <= MMC_HIGH_52_MAX_DTR)
+		return;
+
+	dev_dbg(mmc_dev(host->mmc), "starts HS400 strobe delay adjustment\n");
+
+	xenon_emmc_phy_enable_dll(host);
+
+	/* Enable SDHC Data Strobe */
+	reg = sdhci_readl(host, XENON_SLOT_EMMC_CTRL);
+	reg |= XENON_ENABLE_DATA_STROBE;
+	sdhci_writel(host, reg, XENON_SLOT_EMMC_CTRL);
+
+	/* Set Data Strobe Pull down */
+	if (priv->phy_type == EMMC_5_0_PHY) {
+		reg = sdhci_readl(host, XENON_EMMC_5_0_PHY_PAD_CONTROL);
+		reg |= XENON_EMMC5_FC_QSP_PD;
+		reg &= ~XENON_EMMC5_FC_QSP_PU;
+		sdhci_writel(host, reg, XENON_EMMC_5_0_PHY_PAD_CONTROL);
+	} else {
+		reg = sdhci_readl(host, XENON_EMMC_PHY_PAD_CONTROL1);
+		reg |= XENON_EMMC5_1_FC_QSP_PD;
+		reg &= ~XENON_EMMC5_1_FC_QSP_PU;
+		sdhci_writel(host, reg, XENON_EMMC_PHY_PAD_CONTROL1);
+	}
+}
+
+/*
+ * If eMMC PHY Slow Mode is required in lower speed mode (SDCLK < 55MHz)
+ * in SDR mode, enable Slow Mode to bypass eMMC PHY.
+ * SDIO slower SDR mode also requires Slow Mode.
+ *
+ * If Slow Mode is enabled, return true.
+ * Otherwise, return false.
+ */
+static bool xenon_emmc_phy_slow_mode(struct sdhci_host *host,
+				     unsigned char timing)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params = priv->phy_params;
+	struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+	u32 reg;
+	int ret;
+
+	if (host->clock > MMC_HIGH_52_MAX_DTR)
+		return false;
+
+	reg = sdhci_readl(host, phy_regs->timing_adj);
+	/* When in slower SDR mode, enable Slow Mode for SDIO
+	 * or when Slow Mode flag is set
+	 */
+	switch (timing) {
+	case MMC_TIMING_LEGACY:
+		/*
+		 * If Slow Mode is required, enable Slow Mode by default
+		 * in early init phase to avoid any potential issue.
+		 */
+		if (params->slow_mode) {
+			reg |= XENON_TIMING_ADJUST_SLOW_MODE;
+			ret = true;
+		} else {
+			reg &= ~XENON_TIMING_ADJUST_SLOW_MODE;
+			ret = false;
+		}
+		break;
+	case MMC_TIMING_UHS_SDR25:
+	case MMC_TIMING_UHS_SDR12:
+	case MMC_TIMING_SD_HS:
+	case MMC_TIMING_MMC_HS:
+		if ((priv->init_card_type == MMC_TYPE_SDIO) ||
+		    params->slow_mode) {
+			reg |= XENON_TIMING_ADJUST_SLOW_MODE;
+			ret = true;
+			break;
+		}
+	default:
+		reg &= ~XENON_TIMING_ADJUST_SLOW_MODE;
+		ret = false;
+	}
+
+	sdhci_writel(host, reg, phy_regs->timing_adj);
+	return ret;
+}
+
+/*
+ * Set-up eMMC 5.0/5.1 PHY.
+ * Specific configuration depends on the current speed mode in use.
+ */
+static void xenon_emmc_phy_set(struct sdhci_host *host,
+			       unsigned char timing)
+{
+	u32 reg;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	struct xenon_emmc_phy_params *params = priv->phy_params;
+	struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs;
+
+	dev_dbg(mmc_dev(host->mmc), "eMMC PHY setting starts\n");
+
+	/* Setup pad, set bit[28] and bits[26:24] */
+	reg = sdhci_readl(host, phy_regs->pad_ctrl);
+	reg |= (XENON_FC_DQ_RECEN | XENON_FC_CMD_RECEN |
+		XENON_FC_QSP_RECEN | XENON_OEN_QSN);
+	/* All FC_XX_RECEIVCE should be set as CMOS Type */
+	reg |= XENON_FC_ALL_CMOS_RECEIVER;
+	sdhci_writel(host, reg, phy_regs->pad_ctrl);
+
+	/* Set CMD and DQ Pull Up */
+	if (priv->phy_type == EMMC_5_0_PHY) {
+		reg = sdhci_readl(host, XENON_EMMC_5_0_PHY_PAD_CONTROL);
+		reg |= (XENON_EMMC5_FC_CMD_PU | XENON_EMMC5_FC_DQ_PU);
+		reg &= ~(XENON_EMMC5_FC_CMD_PD | XENON_EMMC5_FC_DQ_PD);
+		sdhci_writel(host, reg, XENON_EMMC_5_0_PHY_PAD_CONTROL);
+	} else {
+		reg = sdhci_readl(host, XENON_EMMC_PHY_PAD_CONTROL1);
+		reg |= (XENON_EMMC5_1_FC_CMD_PU | XENON_EMMC5_1_FC_DQ_PU);
+		reg &= ~(XENON_EMMC5_1_FC_CMD_PD | XENON_EMMC5_1_FC_DQ_PD);
+		sdhci_writel(host, reg, XENON_EMMC_PHY_PAD_CONTROL1);
+	}
+
+	if (timing == MMC_TIMING_LEGACY) {
+		xenon_emmc_phy_slow_mode(host, timing);
+		goto phy_init;
+	}
+
+	/*
+	 * If SDIO card, set SDIO Mode
+	 * Otherwise, clear SDIO Mode
+	 */
+	reg = sdhci_readl(host, phy_regs->timing_adj);
+	if (priv->init_card_type == MMC_TYPE_SDIO)
+		reg |= XENON_TIMING_ADJUST_SDIO_MODE;
+	else
+		reg &= ~XENON_TIMING_ADJUST_SDIO_MODE;
+	sdhci_writel(host, reg, phy_regs->timing_adj);
+
+	if (xenon_emmc_phy_slow_mode(host, timing))
+		goto phy_init;
+
+	/*
+	 * Set preferred ZNR and ZPR value
+	 * The ZNR and ZPR value vary between different boards.
+	 * Define them both in sdhci-xenon-emmc-phy.h.
+	 */
+	reg = sdhci_readl(host, phy_regs->pad_ctrl2);
+	reg &= ~((XENON_ZNR_MASK << XENON_ZNR_SHIFT) | XENON_ZPR_MASK);
+	reg |= ((params->znr << XENON_ZNR_SHIFT) | params->zpr);
+	sdhci_writel(host, reg, phy_regs->pad_ctrl2);
+
+	/*
+	 * When setting EMMC_PHY_FUNC_CONTROL register,
+	 * SD clock should be disabled
+	 */
+	reg = sdhci_readl(host, SDHCI_CLOCK_CONTROL);
+	reg &= ~SDHCI_CLOCK_CARD_EN;
+	sdhci_writew(host, reg, SDHCI_CLOCK_CONTROL);
+
+	reg = sdhci_readl(host, phy_regs->func_ctrl);
+	switch (timing) {
+	case MMC_TIMING_MMC_HS400:
+		reg |= (XENON_DQ_DDR_MODE_MASK << XENON_DQ_DDR_MODE_SHIFT) |
+		       XENON_CMD_DDR_MODE;
+		reg &= ~XENON_DQ_ASYNC_MODE;
+		break;
+	case MMC_TIMING_UHS_DDR50:
+	case MMC_TIMING_MMC_DDR52:
+		reg |= (XENON_DQ_DDR_MODE_MASK << XENON_DQ_DDR_MODE_SHIFT) |
+		       XENON_CMD_DDR_MODE | XENON_DQ_ASYNC_MODE;
+		break;
+	default:
+		reg &= ~((XENON_DQ_DDR_MODE_MASK << XENON_DQ_DDR_MODE_SHIFT) |
+			 XENON_CMD_DDR_MODE);
+		reg |= XENON_DQ_ASYNC_MODE;
+	}
+	sdhci_writel(host, reg, phy_regs->func_ctrl);
+
+	/* Enable bus clock */
+	reg = sdhci_readl(host, SDHCI_CLOCK_CONTROL);
+	reg |= SDHCI_CLOCK_CARD_EN;
+	sdhci_writew(host, reg, SDHCI_CLOCK_CONTROL);
+
+	if (timing == MMC_TIMING_MMC_HS400)
+		/* Hardware team recommend a value for HS400 */
+		sdhci_writel(host, phy_regs->logic_timing_val,
+			     phy_regs->logic_timing_adj);
+	else
+		xenon_emmc_phy_disable_data_strobe(host);
+
+phy_init:
+	xenon_emmc_phy_init(host);
+
+	dev_dbg(mmc_dev(host->mmc), "eMMC PHY setting completes\n");
+}
+
+static int get_dt_pad_ctrl_data(struct sdhci_host *host,
+				struct device_node *np,
+				struct xenon_emmc_phy_params *params)
+{
+	int ret = 0;
+	const char *name;
+	struct resource iomem;
+
+	if (of_device_is_compatible(np, "marvell,armada-3700-sdhci"))
+		params->pad_ctrl.set_soc_pad = armada_3700_soc_pad_voltage_set;
+	else
+		return 0;
+
+	if (of_address_to_resource(np, 1, &iomem)) {
+		dev_err(mmc_dev(host->mmc), "Unable to find SoC PAD ctrl register address for %s\n",
+			np->name);
+		return -EINVAL;
+	}
+
+	params->pad_ctrl.reg = devm_ioremap_resource(mmc_dev(host->mmc),
+						     &iomem);
+	if (IS_ERR(params->pad_ctrl.reg))
+		return PTR_ERR(params->pad_ctrl.reg);
+
+	ret = of_property_read_string(np, "marvell,pad-type", &name);
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "Unable to determine SoC PHY PAD ctrl type\n");
+		return ret;
+	}
+	if (!strcmp(name, "sd")) {
+		params->pad_ctrl.pad_type = SOC_PAD_SD;
+	} else if (!strcmp(name, "fixed-1-8v")) {
+		params->pad_ctrl.pad_type = SOC_PAD_FIXED_1_8V;
+	} else {
+		dev_err(mmc_dev(host->mmc), "Unsupported SoC PHY PAD ctrl type %s\n",
+			name);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int xenon_emmc_phy_parse_param_dt(struct sdhci_host *host,
+					 struct device_node *np,
+					 struct xenon_emmc_phy_params *params)
+{
+	u32 value;
+
+	params->slow_mode = false;
+	if (of_property_read_bool(np, "marvell,xenon-phy-slow-mode"))
+		params->slow_mode = true;
+
+	params->znr = XENON_ZNR_DEF_VALUE;
+	if (!of_property_read_u32(np, "marvell,xenon-phy-znr", &value))
+		params->znr = value & XENON_ZNR_MASK;
+
+	params->zpr = XENON_ZPR_DEF_VALUE;
+	if (!of_property_read_u32(np, "marvell,xenon-phy-zpr", &value))
+		params->zpr = value & XENON_ZPR_MASK;
+
+	params->nr_tun_times = XENON_TUN_CONSECUTIVE_TIMES;
+	if (!of_property_read_u32(np, "marvell,xenon-phy-nr-success-tun",
+				  &value))
+		params->nr_tun_times = value & XENON_TUN_CONSECUTIVE_TIMES_MASK;
+
+	params->tun_step_divider = XENON_TUNING_STEP_DIVIDER;
+	if (!of_property_read_u32(np, "marvell,xenon-phy-tun-step-divider",
+				  &value))
+		params->tun_step_divider = value & 0xFF;
+
+	return get_dt_pad_ctrl_data(host, np, params);
+}
+
+/* Set SoC PHY Voltage PAD */
+void xenon_soc_pad_ctrl(struct sdhci_host *host,
+			unsigned char signal_voltage)
+{
+	xenon_emmc_phy_set_soc_pad(host, signal_voltage);
+}
+
+/*
+ * Setting PHY when card is working in High Speed Mode.
+ * HS400 set data strobe line.
+ * HS200/SDR104 set tuning config to prepare for tuning.
+ */
+static int xenon_hs_delay_adj(struct sdhci_host *host)
+{
+	int ret = 0;
+
+	if (WARN_ON(host->clock <= XENON_DEFAULT_SDCLK_FREQ))
+		return -EINVAL;
+
+	switch (host->timing) {
+	case MMC_TIMING_MMC_HS400:
+		xenon_emmc_phy_strobe_delay_adj(host);
+		return 0;
+	case MMC_TIMING_MMC_HS200:
+	case MMC_TIMING_UHS_SDR104:
+		return xenon_emmc_phy_config_tuning(host);
+	case MMC_TIMING_MMC_DDR52:
+	case MMC_TIMING_UHS_DDR50:
+		/*
+		 * DDR Mode requires driver to scan Sampling Fixed Delay Line,
+		 * to find out a perfect operation sampling point.
+		 * It is hard to implement such a scan in host driver
+		 * since initiating commands by host driver is not safe.
+		 * Thus so far just keep PHY Sampling Fixed Delay in
+		 * default value of DDR mode.
+		 *
+		 * If any timing issue occurs in DDR mode on Marvell products,
+		 * please contact maintainer for internal support in Marvell.
+		 */
+		dev_warn_once(mmc_dev(host->mmc), "Timing issue might occur in DDR mode\n");
+		return 0;
+	}
+
+	return ret;
+}
+
+/*
+ * Adjust PHY setting.
+ * PHY setting should be adjusted when SDCLK frequency, Bus Width
+ * or Speed Mode is changed.
+ * Additional config are required when card is working in High Speed mode,
+ * after leaving Legacy Mode.
+ */
+int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	int ret = 0;
+
+	if (!host->clock) {
+		priv->clock = 0;
+		return 0;
+	}
+
+	/*
+	 * The timing, frequency or bus width is changed,
+	 * better to set eMMC PHY based on current setting
+	 * and adjust Xenon SDHC delay.
+	 */
+	if ((host->clock == priv->clock) &&
+	    (ios->bus_width == priv->bus_width) &&
+	    (ios->timing == priv->timing))
+		return 0;
+
+	xenon_emmc_phy_set(host, ios->timing);
+
+	/* Update the record */
+	priv->bus_width = ios->bus_width;
+
+	priv->timing = ios->timing;
+	priv->clock = host->clock;
+
+	/* Legacy mode is a special case */
+	if (ios->timing == MMC_TIMING_LEGACY)
+		return 0;
+
+	if (host->clock > XENON_DEFAULT_SDCLK_FREQ)
+		ret = xenon_hs_delay_adj(host);
+	return ret;
+}
+
+void xenon_clean_phy(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+
+	kfree(priv->phy_params);
+}
+
+static int xenon_add_phy(struct device_node *np, struct sdhci_host *host,
+			 const char *phy_name)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	int i, ret;
+
+	for (i = 0; i < NR_PHY_TYPES; i++) {
+		if (!strcmp(phy_name, phy_types[i])) {
+			priv->phy_type = i;
+			break;
+		}
+	}
+	if (i == NR_PHY_TYPES) {
+		dev_err(mmc_dev(host->mmc),
+			"Unable to determine PHY name %s. Use default eMMC 5.1 PHY\n",
+			phy_name);
+		priv->phy_type = EMMC_5_1_PHY;
+	}
+
+	ret = xenon_alloc_emmc_phy(host);
+	if (ret)
+		return ret;
+
+	ret = xenon_emmc_phy_parse_param_dt(host, np, priv->phy_params);
+	if (ret)
+		xenon_clean_phy(host);
+
+	return ret;
+}
+
+int xenon_phy_parse_dt(struct device_node *np, struct sdhci_host *host)
+{
+	const char *phy_type = NULL;
+
+	if (!of_property_read_string(np, "marvell,xenon-phy-type", &phy_type))
+		return xenon_add_phy(np, host, phy_type);
+
+	return xenon_add_phy(np, host, "emmc 5.1 phy");
+}
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
new file mode 100644
index 000000000000..67246655315b
--- /dev/null
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -0,0 +1,548 @@
+/*
+ * Driver for Marvell Xenon SDHC as a platform device
+ *
+ * Copyright (C) 2016 Marvell, All Rights Reserved.
+ *
+ * Author:	Hu Ziji <huziji@marvell.com>
+ * Date:	2016-8-24
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * Inspired by Jisheng Zhang <jszhang@marvell.com>
+ * Special thanks to Video BG4 project team.
+ */
+
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include "sdhci-pltfm.h"
+#include "sdhci-xenon.h"
+
+static int xenon_enable_internal_clk(struct sdhci_host *host)
+{
+	u32 reg;
+	ktime_t timeout;
+
+	reg = sdhci_readl(host, SDHCI_CLOCK_CONTROL);
+	reg |= SDHCI_CLOCK_INT_EN;
+	sdhci_writel(host, reg, SDHCI_CLOCK_CONTROL);
+	/* Wait max 20 ms */
+	timeout = ktime_add_ms(ktime_get(), 20);
+	while (!((reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
+			& SDHCI_CLOCK_INT_STABLE)) {
+		if (ktime_after(ktime_get(), timeout)) {
+			dev_err(mmc_dev(host->mmc), "Internal clock never stabilised.\n");
+			return -ETIMEDOUT;
+		}
+		usleep_range(900, 1100);
+	}
+
+	return 0;
+}
+
+/* Set SDCLK-off-while-idle */
+static void xenon_set_sdclk_off_idle(struct sdhci_host *host,
+				     unsigned char sdhc_id, bool enable)
+{
+	u32 reg;
+	u32 mask;
+
+	reg = sdhci_readl(host, XENON_SYS_OP_CTRL);
+	/* Get the bit shift basing on the SDHC index */
+	mask = (0x1 << (XENON_SDCLK_IDLEOFF_ENABLE_SHIFT + sdhc_id));
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+
+	sdhci_writel(host, reg, XENON_SYS_OP_CTRL);
+}
+
+/* Enable/Disable the Auto Clock Gating function */
+static void xenon_set_acg(struct sdhci_host *host, bool enable)
+{
+	u32 reg;
+
+	reg = sdhci_readl(host, XENON_SYS_OP_CTRL);
+	if (enable)
+		reg &= ~XENON_AUTO_CLKGATE_DISABLE_MASK;
+	else
+		reg |= XENON_AUTO_CLKGATE_DISABLE_MASK;
+	sdhci_writel(host, reg, XENON_SYS_OP_CTRL);
+}
+
+/* Enable this SDHC */
+static void xenon_enable_sdhc(struct sdhci_host *host,
+			      unsigned char sdhc_id)
+{
+	u32 reg;
+
+	reg = sdhci_readl(host, XENON_SYS_OP_CTRL);
+	reg |= (BIT(sdhc_id) << XENON_SLOT_ENABLE_SHIFT);
+	sdhci_writel(host, reg, XENON_SYS_OP_CTRL);
+
+	host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
+	/*
+	 * Force to clear BUS_TEST to
+	 * skip bus_test_pre and bus_test_post
+	 */
+	host->mmc->caps &= ~MMC_CAP_BUS_WIDTH_TEST;
+}
+
+/* Disable this SDHC */
+static void xenon_disable_sdhc(struct sdhci_host *host,
+			       unsigned char sdhc_id)
+{
+	u32 reg;
+
+	reg = sdhci_readl(host, XENON_SYS_OP_CTRL);
+	reg &= ~(BIT(sdhc_id) << XENON_SLOT_ENABLE_SHIFT);
+	sdhci_writel(host, reg, XENON_SYS_OP_CTRL);
+}
+
+/* Enable Parallel Transfer Mode */
+static void xenon_enable_sdhc_parallel_tran(struct sdhci_host *host,
+					    unsigned char sdhc_id)
+{
+	u32 reg;
+
+	reg = sdhci_readl(host, XENON_SYS_EXT_OP_CTRL);
+	reg |= BIT(sdhc_id);
+	sdhci_writel(host, reg, XENON_SYS_EXT_OP_CTRL);
+}
+
+/* Mask command conflict error */
+static void xenon_mask_cmd_conflict_err(struct sdhci_host *host)
+{
+	u32  reg;
+
+	reg = sdhci_readl(host, XENON_SYS_EXT_OP_CTRL);
+	reg |= XENON_MASK_CMD_CONFLICT_ERR;
+	sdhci_writel(host, reg, XENON_SYS_EXT_OP_CTRL);
+}
+
+static void xenon_retune_setup(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u32 reg;
+
+	/* Disable the Re-Tuning Request functionality */
+	reg = sdhci_readl(host, XENON_SLOT_RETUNING_REQ_CTRL);
+	reg &= ~XENON_RETUNING_COMPATIBLE;
+	sdhci_writel(host, reg, XENON_SLOT_RETUNING_REQ_CTRL);
+
+	/* Disable the Re-tuning Interrupt */
+	reg = sdhci_readl(host, SDHCI_SIGNAL_ENABLE);
+	reg &= ~SDHCI_INT_RETUNE;
+	sdhci_writel(host, reg, SDHCI_SIGNAL_ENABLE);
+	reg = sdhci_readl(host, SDHCI_INT_ENABLE);
+	reg &= ~SDHCI_INT_RETUNE;
+	sdhci_writel(host, reg, SDHCI_INT_ENABLE);
+
+	/* Force to use Tuning Mode 1 */
+	host->tuning_mode = SDHCI_TUNING_MODE_1;
+	/* Set re-tuning period */
+	host->tuning_count = 1 << (priv->tuning_count - 1);
+}
+
+/*
+ * Operations inside struct sdhci_ops
+ */
+/* Recover the Register Setting cleared during SOFTWARE_RESET_ALL */
+static void xenon_reset_exit(struct sdhci_host *host,
+			     unsigned char sdhc_id, u8 mask)
+{
+	/* Only SOFTWARE RESET ALL will clear the register setting */
+	if (!(mask & SDHCI_RESET_ALL))
+		return;
+
+	/* Disable tuning request and auto-retuning again */
+	xenon_retune_setup(host);
+
+	xenon_set_acg(host, true);
+
+	xenon_set_sdclk_off_idle(host, sdhc_id, false);
+
+	xenon_mask_cmd_conflict_err(host);
+}
+
+static void xenon_reset(struct sdhci_host *host, u8 mask)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+
+	sdhci_reset(host, mask);
+	xenon_reset_exit(host, priv->sdhc_id, mask);
+}
+
+/*
+ * Xenon defines different values for HS200 and HS400
+ * in Host_Control_2
+ */
+static void xenon_set_uhs_signaling(struct sdhci_host *host,
+				    unsigned int timing)
+{
+	u16 ctrl_2;
+
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	if (timing == MMC_TIMING_MMC_HS200)
+		ctrl_2 |= XENON_CTRL_HS200;
+	else if (timing == MMC_TIMING_UHS_SDR104)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+	else if (timing == MMC_TIMING_UHS_SDR12)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+	else if (timing == MMC_TIMING_UHS_SDR25)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+	else if (timing == MMC_TIMING_UHS_SDR50)
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+	else if ((timing == MMC_TIMING_UHS_DDR50) ||
+		 (timing == MMC_TIMING_MMC_DDR52))
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+	else if (timing == MMC_TIMING_MMC_HS400)
+		ctrl_2 |= XENON_CTRL_HS400;
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+
+static const struct sdhci_ops sdhci_xenon_ops = {
+	.set_clock		= sdhci_set_clock,
+	.set_bus_width		= sdhci_set_bus_width,
+	.reset			= xenon_reset,
+	.set_uhs_signaling	= xenon_set_uhs_signaling,
+	.get_max_clock		= sdhci_pltfm_clk_get_max_clock,
+};
+
+static const struct sdhci_pltfm_data sdhci_xenon_pdata = {
+	.ops = &sdhci_xenon_ops,
+	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+		  SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
+		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+};
+
+/*
+ * Xenon Specific Operations in mmc_host_ops
+ */
+static void xenon_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u32 reg;
+
+	/*
+	 * HS400/HS200/eMMC HS doesn't have Preset Value register.
+	 * However, sdhci_set_ios will read HS400/HS200 Preset register.
+	 * Disable Preset Value register for HS400/HS200.
+	 * eMMC HS with preset_enabled set will trigger a bug in
+	 * get_preset_value().
+	 */
+	if ((ios->timing == MMC_TIMING_MMC_HS400) ||
+	    (ios->timing == MMC_TIMING_MMC_HS200) ||
+	    (ios->timing == MMC_TIMING_MMC_HS)) {
+		host->preset_enabled = false;
+		host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
+		host->flags &= ~SDHCI_PV_ENABLED;
+
+		reg = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		reg &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+		sdhci_writew(host, reg, SDHCI_HOST_CONTROL2);
+	} else {
+		host->quirks2 &= ~SDHCI_QUIRK2_PRESET_VALUE_BROKEN;
+	}
+
+	sdhci_set_ios(mmc, ios);
+	xenon_phy_adj(host, ios);
+
+	if (host->clock > XENON_DEFAULT_SDCLK_FREQ)
+		xenon_set_sdclk_off_idle(host, priv->sdhc_id, true);
+}
+
+static int xenon_start_signal_voltage_switch(struct mmc_host *mmc,
+					     struct mmc_ios *ios)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	/*
+	 * Before SD/SDIO set signal voltage, SD bus clock should be
+	 * disabled. However, sdhci_set_clock will also disable the Internal
+	 * clock in mmc_set_signal_voltage().
+	 * If Internal clock is disabled, the 3.3V/1.8V bit can not be updated.
+	 * Thus here manually enable internal clock.
+	 *
+	 * After switch completes, it is unnecessary to disable internal clock,
+	 * since keeping internal clock active obeys SD spec.
+	 */
+	xenon_enable_internal_clk(host);
+
+	xenon_soc_pad_ctrl(host, ios->signal_voltage);
+
+	/*
+	 * If Vqmmc is fixed on platform, vqmmc regulator should be unavailable.
+	 * Thus SDHCI_CTRL_VDD_180 bit might not work then.
+	 * Skip the standard voltage switch to avoid any issue.
+	 */
+	if (PTR_ERR(mmc->supply.vqmmc) == -ENODEV)
+		return 0;
+
+	return sdhci_start_signal_voltage_switch(mmc, ios);
+}
+
+/*
+ * Update card type.
+ * priv->init_card_type will be used in PHY timing adjustment.
+ */
+static void xenon_init_card(struct mmc_host *mmc, struct mmc_card *card)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+
+	/* Update card type*/
+	priv->init_card_type = card->type;
+}
+
+static int xenon_execute_tuning(struct mmc_host *mmc, u32 opcode)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (host->timing == MMC_TIMING_UHS_DDR50)
+		return 0;
+
+	/*
+	 * Currently force Xenon driver back to support mode 1 only,
+	 * even though Xenon might claim to support mode 2 or mode 3.
+	 * It requires more time to test mode 2/mode 3 on more platforms.
+	 */
+	if (host->tuning_mode != SDHCI_TUNING_MODE_1)
+		xenon_retune_setup(host);
+
+	return sdhci_execute_tuning(mmc, opcode);
+}
+
+static void xenon_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u32 reg;
+	u8 sdhc_id = priv->sdhc_id;
+
+	sdhci_enable_sdio_irq(mmc, enable);
+
+	if (enable) {
+		/*
+		 * Set SDIO Card Inserted indication
+		 * to enable detecting SDIO async irq.
+		 */
+		reg = sdhci_readl(host, XENON_SYS_CFG_INFO);
+		reg |= (1 << (sdhc_id + XENON_SLOT_TYPE_SDIO_SHIFT));
+		sdhci_writel(host, reg, XENON_SYS_CFG_INFO);
+	} else {
+		/* Clear SDIO Card Inserted indication */
+		reg = sdhci_readl(host, XENON_SYS_CFG_INFO);
+		reg &= ~(1 << (sdhc_id + XENON_SLOT_TYPE_SDIO_SHIFT));
+		sdhci_writel(host, reg, XENON_SYS_CFG_INFO);
+	}
+}
+
+static void xenon_replace_mmc_host_ops(struct sdhci_host *host)
+{
+	host->mmc_host_ops.set_ios = xenon_set_ios;
+	host->mmc_host_ops.start_signal_voltage_switch =
+			xenon_start_signal_voltage_switch;
+	host->mmc_host_ops.init_card = xenon_init_card;
+	host->mmc_host_ops.execute_tuning = xenon_execute_tuning;
+	host->mmc_host_ops.enable_sdio_irq = xenon_enable_sdio_irq;
+}
+
+/*
+ * Parse Xenon specific DT properties:
+ * sdhc-id: the index of current SDHC.
+ *	    Refer to XENON_SYS_CFG_INFO register
+ * tun-count: the interval between re-tuning
+ */
+static int xenon_probe_dt(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct mmc_host *mmc = host->mmc;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u32 sdhc_id, nr_sdhc;
+	u32 tuning_count;
+
+	/* Disable HS200 on Armada AP806 */
+	if (of_device_is_compatible(np, "marvell,armada-ap806-sdhci"))
+		host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
+
+	sdhc_id = 0x0;
+	if (!of_property_read_u32(np, "marvell,xenon-sdhc-id", &sdhc_id)) {
+		nr_sdhc = sdhci_readl(host, XENON_SYS_CFG_INFO);
+		nr_sdhc &= XENON_NR_SUPPORTED_SLOT_MASK;
+		if (unlikely(sdhc_id > nr_sdhc)) {
+			dev_err(mmc_dev(mmc), "SDHC Index %d exceeds Number of SDHCs %d\n",
+				sdhc_id, nr_sdhc);
+			return -EINVAL;
+		}
+	}
+	priv->sdhc_id = sdhc_id;
+
+	tuning_count = XENON_DEF_TUNING_COUNT;
+	if (!of_property_read_u32(np, "marvell,xenon-tun-count",
+				  &tuning_count)) {
+		if (unlikely(tuning_count >= XENON_TMR_RETUN_NO_PRESENT)) {
+			dev_err(mmc_dev(mmc), "Wrong Re-tuning Count. Set default value %d\n",
+				XENON_DEF_TUNING_COUNT);
+			tuning_count = XENON_DEF_TUNING_COUNT;
+		}
+	}
+	priv->tuning_count = tuning_count;
+
+	return xenon_phy_parse_dt(np, host);
+}
+
+static int xenon_sdhc_prepare(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u8 sdhc_id = priv->sdhc_id;
+
+	/* Enable SDHC */
+	xenon_enable_sdhc(host, sdhc_id);
+
+	/* Enable ACG */
+	xenon_set_acg(host, true);
+
+	/* Enable Parallel Transfer Mode */
+	xenon_enable_sdhc_parallel_tran(host, sdhc_id);
+
+	/* Disable SDCLK-Off-While-Idle before card init */
+	xenon_set_sdclk_off_idle(host, sdhc_id, false);
+
+	xenon_mask_cmd_conflict_err(host);
+
+	return 0;
+}
+
+static void xenon_sdhc_unprepare(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
+	u8 sdhc_id = priv->sdhc_id;
+
+	/* disable SDHC */
+	xenon_disable_sdhc(host, sdhc_id);
+}
+
+static int xenon_probe(struct platform_device *pdev)
+{
+	struct sdhci_pltfm_host *pltfm_host;
+	struct sdhci_host *host;
+	struct xenon_priv *priv;
+	int err;
+
+	host = sdhci_pltfm_init(pdev, &sdhci_xenon_pdata,
+				sizeof(struct xenon_priv));
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+	priv = sdhci_pltfm_priv(pltfm_host);
+
+	/*
+	 * Link Xenon specific mmc_host_ops function,
+	 * to replace standard ones in sdhci_ops.
+	 */
+	xenon_replace_mmc_host_ops(host);
+
+	pltfm_host->clk = devm_clk_get(&pdev->dev, "core");
+	if (IS_ERR(pltfm_host->clk)) {
+		err = PTR_ERR(pltfm_host->clk);
+		dev_err(&pdev->dev, "Failed to setup input clk: %d\n", err);
+		goto free_pltfm;
+	}
+	err = clk_prepare_enable(pltfm_host->clk);
+	if (err)
+		goto free_pltfm;
+
+	err = mmc_of_parse(host->mmc);
+	if (err)
+		goto err_clk;
+
+	sdhci_get_of_property(pdev);
+
+	xenon_set_acg(host, false);
+
+	/* Xenon specific dt parse */
+	err = xenon_probe_dt(pdev);
+	if (err)
+		goto err_clk;
+
+	err = xenon_sdhc_prepare(host);
+	if (err)
+		goto clean_phy_param;
+
+	err = sdhci_add_host(host);
+	if (err)
+		goto remove_sdhc;
+
+	return 0;
+
+remove_sdhc:
+	xenon_sdhc_unprepare(host);
+clean_phy_param:
+	xenon_clean_phy(host);
+err_clk:
+	clk_disable_unprepare(pltfm_host->clk);
+free_pltfm:
+	sdhci_pltfm_free(pdev);
+	return err;
+}
+
+static int xenon_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	xenon_clean_phy(host);
+
+	sdhci_remove_host(host, 0);
+
+	xenon_sdhc_unprepare(host);
+
+	clk_disable_unprepare(pltfm_host->clk);
+
+	sdhci_pltfm_free(pdev);
+
+	return 0;
+}
+
+static const struct of_device_id sdhci_xenon_dt_ids[] = {
+	{ .compatible = "marvell,armada-ap806-sdhci",},
+	{ .compatible = "marvell,armada-cp110-sdhci",},
+	{ .compatible = "marvell,armada-3700-sdhci",},
+	{}
+};
+MODULE_DEVICE_TABLE(of, sdhci_xenon_dt_ids);
+
+static struct platform_driver sdhci_xenon_driver = {
+	.driver	= {
+		.name	= "xenon-sdhci",
+		.of_match_table = sdhci_xenon_dt_ids,
+		.pm = &sdhci_pltfm_pmops,
+	},
+	.probe	= xenon_probe,
+	.remove	= xenon_remove,
+};
+
+module_platform_driver(sdhci_xenon_driver);
+
+MODULE_DESCRIPTION("SDHCI platform driver for Marvell Xenon SDHC");
+MODULE_AUTHOR("Hu Ziji <huziji@marvell.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h
new file mode 100644
index 000000000000..6e6523ea01ce
--- /dev/null
+++ b/drivers/mmc/host/sdhci-xenon.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016 Marvell, All Rights Reserved.
+ *
+ * Author:	Hu Ziji <huziji@marvell.com>
+ * Date:	2016-8-24
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ */
+#ifndef SDHCI_XENON_H_
+#define SDHCI_XENON_H_
+
+/* Register Offset of Xenon SDHC self-defined register */
+#define XENON_SYS_CFG_INFO			0x0104
+#define XENON_SLOT_TYPE_SDIO_SHIFT		24
+#define XENON_NR_SUPPORTED_SLOT_MASK		0x7
+
+#define XENON_SYS_OP_CTRL			0x0108
+#define XENON_AUTO_CLKGATE_DISABLE_MASK		BIT(20)
+#define XENON_SDCLK_IDLEOFF_ENABLE_SHIFT	8
+#define XENON_SLOT_ENABLE_SHIFT			0
+
+#define XENON_SYS_EXT_OP_CTRL			0x010C
+#define XENON_MASK_CMD_CONFLICT_ERR		BIT(8)
+
+#define XENON_SLOT_OP_STATUS_CTRL		0x0128
+#define XENON_TUN_CONSECUTIVE_TIMES_SHIFT	16
+#define XENON_TUN_CONSECUTIVE_TIMES_MASK	0x7
+#define XENON_TUN_CONSECUTIVE_TIMES		0x4
+#define XENON_TUNING_STEP_SHIFT			12
+#define XENON_TUNING_STEP_MASK			0xF
+#define XENON_TUNING_STEP_DIVIDER		BIT(6)
+
+#define XENON_SLOT_EMMC_CTRL			0x0130
+#define XENON_ENABLE_DATA_STROBE		BIT(24)
+
+#define XENON_SLOT_RETUNING_REQ_CTRL		0x0144
+/* retuning compatible */
+#define XENON_RETUNING_COMPATIBLE		0x1
+
+#define XENON_SLOT_EXT_PRESENT_STATE		0x014C
+#define XENON_DLL_LOCK_STATE			0x1
+
+#define XENON_SLOT_DLL_CUR_DLY_VAL		0x0150
+
+/* Tuning Parameter */
+#define XENON_TMR_RETUN_NO_PRESENT		0xF
+#define XENON_DEF_TUNING_COUNT			0x9
+
+#define XENON_DEFAULT_SDCLK_FREQ		400000
+#define XENON_LOWEST_SDCLK_FREQ			100000
+
+/* Xenon specific Mode Select value */
+#define XENON_CTRL_HS200			0x5
+#define XENON_CTRL_HS400			0x6
+
+struct xenon_priv {
+	unsigned char	tuning_count;
+	/* idx of SDHC */
+	u8		sdhc_id;
+
+	/*
+	 * eMMC/SD/SDIO require different register settings.
+	 * Xenon driver has to recognize card type
+	 * before mmc_host->card is not available.
+	 * This field records the card type during init.
+	 * It is updated in xenon_init_card().
+	 *
+	 * It is only valid during initialization after it is updated.
+	 * Do not access this variable in normal transfers after
+	 * initialization completes.
+	 */
+	unsigned int	init_card_type;
+
+	/*
+	 * The bus_width, timing, and clock fields in below
+	 * record the current ios setting of Xenon SDHC.
+	 * Driver will adjust PHY setting if any change to
+	 * ios affects PHY timing.
+	 */
+	unsigned char	bus_width;
+	unsigned char	timing;
+	unsigned int	clock;
+
+	int		phy_type;
+	/*
+	 * Contains board-specific PHY parameters
+	 * passed from device tree.
+	 */
+	void		*phy_params;
+	struct xenon_emmc_phy_regs *emmc_phy_regs;
+};
+
+int xenon_phy_adj(struct sdhci_host *host, struct mmc_ios *ios);
+void xenon_clean_phy(struct sdhci_host *host);
+int xenon_phy_parse_dt(struct device_node *np,
+		       struct sdhci_host *host);
+void xenon_soc_pad_ctrl(struct sdhci_host *host,
+			unsigned char signal_voltage);
+#endif
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 63bc33a54d0d..ecd0d4350e8a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/ktime.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -37,7 +38,10 @@
 #define DRIVER_NAME "sdhci"
 
 #define DBG(f, x...) \
-	pr_debug(DRIVER_NAME " [%s()]: " f, __func__,## x)
+	pr_debug("%s: " DRIVER_NAME ": " f, mmc_hostname(host->mmc), ## x)
+
+#define SDHCI_DUMP(f, x...) \
+	pr_err("%s: " DRIVER_NAME ": " f, mmc_hostname(host->mmc), ## x)
 
 #define MAX_TUNING_LOOP 40
 
@@ -48,61 +52,68 @@ static void sdhci_finish_data(struct sdhci_host *);
 
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 
-static void sdhci_dumpregs(struct sdhci_host *host)
-{
-	pr_err(DRIVER_NAME ": =========== REGISTER DUMP (%s)===========\n",
-	       mmc_hostname(host->mmc));
-
-	pr_err(DRIVER_NAME ": Sys addr: 0x%08x | Version:  0x%08x\n",
-	       sdhci_readl(host, SDHCI_DMA_ADDRESS),
-	       sdhci_readw(host, SDHCI_HOST_VERSION));
-	pr_err(DRIVER_NAME ": Blk size: 0x%08x | Blk cnt:  0x%08x\n",
-	       sdhci_readw(host, SDHCI_BLOCK_SIZE),
-	       sdhci_readw(host, SDHCI_BLOCK_COUNT));
-	pr_err(DRIVER_NAME ": Argument: 0x%08x | Trn mode: 0x%08x\n",
-	       sdhci_readl(host, SDHCI_ARGUMENT),
-	       sdhci_readw(host, SDHCI_TRANSFER_MODE));
-	pr_err(DRIVER_NAME ": Present:  0x%08x | Host ctl: 0x%08x\n",
-	       sdhci_readl(host, SDHCI_PRESENT_STATE),
-	       sdhci_readb(host, SDHCI_HOST_CONTROL));
-	pr_err(DRIVER_NAME ": Power:    0x%08x | Blk gap:  0x%08x\n",
-	       sdhci_readb(host, SDHCI_POWER_CONTROL),
-	       sdhci_readb(host, SDHCI_BLOCK_GAP_CONTROL));
-	pr_err(DRIVER_NAME ": Wake-up:  0x%08x | Clock:    0x%08x\n",
-	       sdhci_readb(host, SDHCI_WAKE_UP_CONTROL),
-	       sdhci_readw(host, SDHCI_CLOCK_CONTROL));
-	pr_err(DRIVER_NAME ": Timeout:  0x%08x | Int stat: 0x%08x\n",
-	       sdhci_readb(host, SDHCI_TIMEOUT_CONTROL),
-	       sdhci_readl(host, SDHCI_INT_STATUS));
-	pr_err(DRIVER_NAME ": Int enab: 0x%08x | Sig enab: 0x%08x\n",
-	       sdhci_readl(host, SDHCI_INT_ENABLE),
-	       sdhci_readl(host, SDHCI_SIGNAL_ENABLE));
-	pr_err(DRIVER_NAME ": AC12 err: 0x%08x | Slot int: 0x%08x\n",
-	       sdhci_readw(host, SDHCI_ACMD12_ERR),
-	       sdhci_readw(host, SDHCI_SLOT_INT_STATUS));
-	pr_err(DRIVER_NAME ": Caps:     0x%08x | Caps_1:   0x%08x\n",
-	       sdhci_readl(host, SDHCI_CAPABILITIES),
-	       sdhci_readl(host, SDHCI_CAPABILITIES_1));
-	pr_err(DRIVER_NAME ": Cmd:      0x%08x | Max curr: 0x%08x\n",
-	       sdhci_readw(host, SDHCI_COMMAND),
-	       sdhci_readl(host, SDHCI_MAX_CURRENT));
-	pr_err(DRIVER_NAME ": Host ctl2: 0x%08x\n",
-	       sdhci_readw(host, SDHCI_HOST_CONTROL2));
+void sdhci_dumpregs(struct sdhci_host *host)
+{
+	SDHCI_DUMP("============ SDHCI REGISTER DUMP ===========\n");
+
+	SDHCI_DUMP("Sys addr:  0x%08x | Version:  0x%08x\n",
+		   sdhci_readl(host, SDHCI_DMA_ADDRESS),
+		   sdhci_readw(host, SDHCI_HOST_VERSION));
+	SDHCI_DUMP("Blk size:  0x%08x | Blk cnt:  0x%08x\n",
+		   sdhci_readw(host, SDHCI_BLOCK_SIZE),
+		   sdhci_readw(host, SDHCI_BLOCK_COUNT));
+	SDHCI_DUMP("Argument:  0x%08x | Trn mode: 0x%08x\n",
+		   sdhci_readl(host, SDHCI_ARGUMENT),
+		   sdhci_readw(host, SDHCI_TRANSFER_MODE));
+	SDHCI_DUMP("Present:   0x%08x | Host ctl: 0x%08x\n",
+		   sdhci_readl(host, SDHCI_PRESENT_STATE),
+		   sdhci_readb(host, SDHCI_HOST_CONTROL));
+	SDHCI_DUMP("Power:     0x%08x | Blk gap:  0x%08x\n",
+		   sdhci_readb(host, SDHCI_POWER_CONTROL),
+		   sdhci_readb(host, SDHCI_BLOCK_GAP_CONTROL));
+	SDHCI_DUMP("Wake-up:   0x%08x | Clock:    0x%08x\n",
+		   sdhci_readb(host, SDHCI_WAKE_UP_CONTROL),
+		   sdhci_readw(host, SDHCI_CLOCK_CONTROL));
+	SDHCI_DUMP("Timeout:   0x%08x | Int stat: 0x%08x\n",
+		   sdhci_readb(host, SDHCI_TIMEOUT_CONTROL),
+		   sdhci_readl(host, SDHCI_INT_STATUS));
+	SDHCI_DUMP("Int enab:  0x%08x | Sig enab: 0x%08x\n",
+		   sdhci_readl(host, SDHCI_INT_ENABLE),
+		   sdhci_readl(host, SDHCI_SIGNAL_ENABLE));
+	SDHCI_DUMP("AC12 err:  0x%08x | Slot int: 0x%08x\n",
+		   sdhci_readw(host, SDHCI_ACMD12_ERR),
+		   sdhci_readw(host, SDHCI_SLOT_INT_STATUS));
+	SDHCI_DUMP("Caps:      0x%08x | Caps_1:   0x%08x\n",
+		   sdhci_readl(host, SDHCI_CAPABILITIES),
+		   sdhci_readl(host, SDHCI_CAPABILITIES_1));
+	SDHCI_DUMP("Cmd:       0x%08x | Max curr: 0x%08x\n",
+		   sdhci_readw(host, SDHCI_COMMAND),
+		   sdhci_readl(host, SDHCI_MAX_CURRENT));
+	SDHCI_DUMP("Resp[0]:   0x%08x | Resp[1]:  0x%08x\n",
+		   sdhci_readl(host, SDHCI_RESPONSE),
+		   sdhci_readl(host, SDHCI_RESPONSE + 4));
+	SDHCI_DUMP("Resp[2]:   0x%08x | Resp[3]:  0x%08x\n",
+		   sdhci_readl(host, SDHCI_RESPONSE + 8),
+		   sdhci_readl(host, SDHCI_RESPONSE + 12));
+	SDHCI_DUMP("Host ctl2: 0x%08x\n",
+		   sdhci_readw(host, SDHCI_HOST_CONTROL2));
 
 	if (host->flags & SDHCI_USE_ADMA) {
-		if (host->flags & SDHCI_USE_64_BIT_DMA)
-			pr_err(DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x%08x\n",
-			       readl(host->ioaddr + SDHCI_ADMA_ERROR),
-			       readl(host->ioaddr + SDHCI_ADMA_ADDRESS_HI),
-			       readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
-		else
-			pr_err(DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
-			       readl(host->ioaddr + SDHCI_ADMA_ERROR),
-			       readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
+		if (host->flags & SDHCI_USE_64_BIT_DMA) {
+			SDHCI_DUMP("ADMA Err:  0x%08x | ADMA Ptr: 0x%08x%08x\n",
+				   sdhci_readl(host, SDHCI_ADMA_ERROR),
+				   sdhci_readl(host, SDHCI_ADMA_ADDRESS_HI),
+				   sdhci_readl(host, SDHCI_ADMA_ADDRESS));
+		} else {
+			SDHCI_DUMP("ADMA Err:  0x%08x | ADMA Ptr: 0x%08x\n",
+				   sdhci_readl(host, SDHCI_ADMA_ERROR),
+				   sdhci_readl(host, SDHCI_ADMA_ADDRESS));
+		}
 	}
 
-	pr_err(DRIVER_NAME ": ===========================================\n");
+	SDHCI_DUMP("============================================\n");
 }
+EXPORT_SYMBOL_GPL(sdhci_dumpregs);
 
 /*****************************************************************************\
  *                                                                           *
@@ -165,7 +176,7 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
 
 void sdhci_reset(struct sdhci_host *host, u8 mask)
 {
-	unsigned long timeout;
+	ktime_t timeout;
 
 	sdhci_writeb(host, mask, SDHCI_SOFTWARE_RESET);
 
@@ -177,18 +188,17 @@ void sdhci_reset(struct sdhci_host *host, u8 mask)
 	}
 
 	/* Wait max 100 ms */
-	timeout = 100;
+	timeout = ktime_add_ms(ktime_get(), 100);
 
 	/* hw clears the bit when it's done */
 	while (sdhci_readb(host, SDHCI_SOFTWARE_RESET) & mask) {
-		if (timeout == 0) {
+		if (ktime_after(ktime_get(), timeout)) {
 			pr_err("%s: Reset 0x%x never completed.\n",
 				mmc_hostname(host->mmc), (int)mask);
 			sdhci_dumpregs(host);
 			return;
 		}
-		timeout--;
-		mdelay(1);
+		udelay(10);
 	}
 }
 EXPORT_SYMBOL_GPL(sdhci_reset);
@@ -215,15 +225,8 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 	}
 }
 
-static void sdhci_init(struct sdhci_host *host, int soft)
+static void sdhci_set_default_irqs(struct sdhci_host *host)
 {
-	struct mmc_host *mmc = host->mmc;
-
-	if (soft)
-		sdhci_do_reset(host, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
-	else
-		sdhci_do_reset(host, SDHCI_RESET_ALL);
-
 	host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
 		    SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
 		    SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC |
@@ -236,6 +239,20 @@ static void sdhci_init(struct sdhci_host *host, int soft)
 
 	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+}
+
+static void sdhci_init(struct sdhci_host *host, int soft)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	if (soft)
+		sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+	else
+		sdhci_do_reset(host, SDHCI_RESET_ALL);
+
+	sdhci_set_default_irqs(host);
+
+	host->cqe_on = false;
 
 	if (soft) {
 		/* force clock reconfiguration */
@@ -485,8 +502,7 @@ static int sdhci_pre_dma_transfer(struct sdhci_host *host,
 		return data->sg_count;
 
 	sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-				data->flags & MMC_DATA_WRITE ?
-				DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			      mmc_get_dma_dir(data));
 
 	if (sg_count == 0)
 		return -ENOSPC;
@@ -715,8 +731,8 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 	}
 
 	if (count >= 0xF) {
-		DBG("%s: Too large timeout 0x%x requested for CMD%d!\n",
-		    mmc_hostname(host->mmc), count, cmd->opcode);
+		DBG("Too large timeout 0x%x requested for CMD%d!\n",
+		    count, cmd->opcode);
 		count = 0xE;
 	}
 
@@ -1346,25 +1362,22 @@ EXPORT_SYMBOL_GPL(sdhci_calc_clk);
 
 void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
 {
-	unsigned long timeout;
+	ktime_t timeout;
 
 	clk |= SDHCI_CLOCK_INT_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
 	/* Wait max 20 ms */
-	timeout = 20;
+	timeout = ktime_add_ms(ktime_get(), 20);
 	while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
 		& SDHCI_CLOCK_INT_STABLE)) {
-		if (timeout == 0) {
+		if (ktime_after(ktime_get(), timeout)) {
 			pr_err("%s: Internal clock never stabilised.\n",
 			       mmc_hostname(host->mmc));
 			sdhci_dumpregs(host);
 			return;
 		}
-		timeout--;
-		spin_unlock_irq(&host->lock);
-		usleep_range(900, 1100);
-		spin_lock_irq(&host->lock);
+		udelay(10);
 	}
 
 	clk |= SDHCI_CLOCK_CARD_EN;
@@ -1393,9 +1406,7 @@ static void sdhci_set_power_reg(struct sdhci_host *host, unsigned char mode,
 {
 	struct mmc_host *mmc = host->mmc;
 
-	spin_unlock_irq(&host->lock);
 	mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
-	spin_lock_irq(&host->lock);
 
 	if (mode != MMC_POWER_OFF)
 		sdhci_writeb(host, SDHCI_POWER_ON, SDHCI_POWER_CONTROL);
@@ -1572,19 +1583,15 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 }
 EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
 
-static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
-	unsigned long flags;
 	u8 ctrl;
 
 	if (ios->power_mode == MMC_POWER_UNDEFINED)
 		return;
 
-	spin_lock_irqsave(&host->lock, flags);
-
 	if (host->flags & SDHCI_DEVICE_DEAD) {
-		spin_unlock_irqrestore(&host->lock, flags);
 		if (!IS_ERR(mmc->supply.vmmc) &&
 		    ios->power_mode == MMC_POWER_OFF)
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
@@ -1730,8 +1737,8 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
 
 	mmiowb();
-	spin_unlock_irqrestore(&host->lock, flags);
 }
+EXPORT_SYMBOL_GPL(sdhci_set_ios);
 
 static int sdhci_get_cd(struct mmc_host *mmc)
 {
@@ -1825,7 +1832,7 @@ static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 	}
 }
 
-static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
@@ -1845,9 +1852,10 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 	if (!enable)
 		pm_runtime_put_noidle(host->mmc->parent);
 }
+EXPORT_SYMBOL_GPL(sdhci_enable_sdio_irq);
 
-static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
-					     struct mmc_ios *ios)
+int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+				      struct mmc_ios *ios)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	u16 ctrl;
@@ -1939,6 +1947,7 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 		return 0;
 	}
 }
+EXPORT_SYMBOL_GPL(sdhci_start_signal_voltage_switch);
 
 static int sdhci_card_busy(struct mmc_host *mmc)
 {
@@ -2003,8 +2012,7 @@ static void sdhci_reset_tuning(struct sdhci_host *host)
 	sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 }
 
-static void sdhci_abort_tuning(struct sdhci_host *host, u32 opcode,
-			       unsigned long flags)
+static void sdhci_abort_tuning(struct sdhci_host *host, u32 opcode)
 {
 	sdhci_reset_tuning(host);
 
@@ -2013,9 +2021,7 @@ static void sdhci_abort_tuning(struct sdhci_host *host, u32 opcode,
 
 	sdhci_end_tuning(host);
 
-	spin_unlock_irqrestore(&host->lock, flags);
 	mmc_abort_tuning(host->mmc, opcode);
-	spin_lock_irqsave(&host->lock, flags);
 }
 
 /*
@@ -2025,12 +2031,14 @@ static void sdhci_abort_tuning(struct sdhci_host *host, u32 opcode,
  * interrupt setup is different to other commands and there is no timeout
  * interrupt so special handling is needed.
  */
-static void sdhci_send_tuning(struct sdhci_host *host, u32 opcode,
-			      unsigned long flags)
+static void sdhci_send_tuning(struct sdhci_host *host, u32 opcode)
 {
 	struct mmc_host *mmc = host->mmc;
 	struct mmc_command cmd = {};
 	struct mmc_request mrq = {};
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
 
 	cmd.opcode = opcode;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -2064,17 +2072,16 @@ static void sdhci_send_tuning(struct sdhci_host *host, u32 opcode,
 
 	host->tuning_done = 0;
 
+	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	/* Wait for Buffer Read Ready interrupt */
 	wait_event_timeout(host->buf_ready_int, (host->tuning_done == 1),
 			   msecs_to_jiffies(50));
 
-	spin_lock_irqsave(&host->lock, flags);
 }
 
-static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode,
-				   unsigned long flags)
+static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
 {
 	int i;
 
@@ -2085,12 +2092,12 @@ static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode,
 	for (i = 0; i < MAX_TUNING_LOOP; i++) {
 		u16 ctrl;
 
-		sdhci_send_tuning(host, opcode, flags);
+		sdhci_send_tuning(host, opcode);
 
 		if (!host->tuning_done) {
 			pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
 				mmc_hostname(host->mmc));
-			sdhci_abort_tuning(host, opcode, flags);
+			sdhci_abort_tuning(host, opcode);
 			return;
 		}
 
@@ -2101,9 +2108,9 @@ static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode,
 			break;
 		}
 
-		/* eMMC spec does not require a delay between tuning cycles */
-		if (opcode == MMC_SEND_TUNING_BLOCK)
-			mdelay(1);
+		/* Spec does not require a delay between tuning cycles */
+		if (host->tuning_delay > 0)
+			mdelay(host->tuning_delay);
 	}
 
 	pr_info("%s: Tuning failed, falling back to fixed sampling clock\n",
@@ -2115,12 +2122,9 @@ int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	int err = 0;
-	unsigned long flags;
 	unsigned int tuning_count = 0;
 	bool hs400_tuning;
 
-	spin_lock_irqsave(&host->lock, flags);
-
 	hs400_tuning = host->flags & SDHCI_HS400_TUNING;
 
 	if (host->tuning_mode == SDHCI_TUNING_MODE_1)
@@ -2137,7 +2141,7 @@ int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	/* HS400 tuning is done in HS200 mode */
 	case MMC_TIMING_MMC_HS400:
 		err = -EINVAL;
-		goto out_unlock;
+		goto out;
 
 	case MMC_TIMING_MMC_HS200:
 		/*
@@ -2158,44 +2162,31 @@ int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 		/* FALLTHROUGH */
 
 	default:
-		goto out_unlock;
+		goto out;
 	}
 
 	if (host->ops->platform_execute_tuning) {
-		spin_unlock_irqrestore(&host->lock, flags);
 		err = host->ops->platform_execute_tuning(host, opcode);
-		spin_lock_irqsave(&host->lock, flags);
-		goto out_unlock;
+		goto out;
 	}
 
 	host->mmc->retune_period = tuning_count;
 
+	if (host->tuning_delay < 0)
+		host->tuning_delay = opcode == MMC_SEND_TUNING_BLOCK;
+
 	sdhci_start_tuning(host);
 
-	__sdhci_execute_tuning(host, opcode, flags);
+	__sdhci_execute_tuning(host, opcode);
 
 	sdhci_end_tuning(host);
-out_unlock:
+out:
 	host->flags &= ~SDHCI_HS400_TUNING;
-	spin_unlock_irqrestore(&host->lock, flags);
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(sdhci_execute_tuning);
 
-static int sdhci_select_drive_strength(struct mmc_card *card,
-				       unsigned int max_dtr, int host_drv,
-				       int card_drv, int *drv_type)
-{
-	struct sdhci_host *host = mmc_priv(card->host);
-
-	if (!host->ops->select_drive_strength)
-		return 0;
-
-	return host->ops->select_drive_strength(host, card, max_dtr, host_drv,
-						card_drv, drv_type);
-}
-
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
 {
 	/* Host Controller v3.00 defines preset value registers */
@@ -2233,8 +2224,7 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 
 	if (data->host_cookie != COOKIE_UNMAPPED)
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     data->flags & MMC_DATA_WRITE ?
-			       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			     mmc_get_dma_dir(data));
 
 	data->host_cookie = COOKIE_UNMAPPED;
 }
@@ -2309,7 +2299,6 @@ static const struct mmc_host_ops sdhci_ops = {
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 	.prepare_hs400_tuning		= sdhci_prepare_hs400_tuning,
 	.execute_tuning			= sdhci_execute_tuning,
-	.select_drive_strength		= sdhci_select_drive_strength,
 	.card_event			= sdhci_card_event,
 	.card_busy	= sdhci_card_busy,
 };
@@ -2351,8 +2340,7 @@ static bool sdhci_request_done(struct sdhci_host *host)
 
 		if (data && data->host_cookie == COOKIE_MAPPED) {
 			dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-				     (data->flags & MMC_DATA_READ) ?
-				     DMA_FROM_DEVICE : DMA_TO_DEVICE);
+				     mmc_get_dma_dir(data));
 			data->host_cookie = COOKIE_UNMAPPED;
 		}
 	}
@@ -2517,7 +2505,6 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 #ifdef CONFIG_MMC_DEBUG
 static void sdhci_adma_show_error(struct sdhci_host *host)
 {
-	const char *name = mmc_hostname(host->mmc);
 	void *desc = host->adma_table;
 
 	sdhci_dumpregs(host);
@@ -2526,14 +2513,14 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
 		struct sdhci_adma2_64_desc *dma_desc = desc;
 
 		if (host->flags & SDHCI_USE_64_BIT_DMA)
-			DBG("%s: %p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
-			    name, desc, le32_to_cpu(dma_desc->addr_hi),
+			DBG("%p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    desc, le32_to_cpu(dma_desc->addr_hi),
 			    le32_to_cpu(dma_desc->addr_lo),
 			    le16_to_cpu(dma_desc->len),
 			    le16_to_cpu(dma_desc->cmd));
 		else
-			DBG("%s: %p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
-			    name, desc, le32_to_cpu(dma_desc->addr_lo),
+			DBG("%p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    desc, le32_to_cpu(dma_desc->addr_lo),
 			    le16_to_cpu(dma_desc->len),
 			    le16_to_cpu(dma_desc->cmd));
 
@@ -2649,10 +2636,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 				~(SDHCI_DEFAULT_BOUNDARY_SIZE - 1)) +
 				SDHCI_DEFAULT_BOUNDARY_SIZE;
 			host->data->bytes_xfered = dmanow - dmastart;
-			DBG("%s: DMA base 0x%08x, transferred 0x%06x bytes,"
-				" next 0x%08x\n",
-				mmc_hostname(host->mmc), dmastart,
-				host->data->bytes_xfered, dmanow);
+			DBG("DMA base 0x%08x, transferred 0x%06x bytes, next 0x%08x\n",
+			    dmastart, host->data->bytes_xfered, dmanow);
 			sdhci_writel(host, dmanow, SDHCI_DMA_ADDRESS);
 		}
 
@@ -2692,14 +2677,19 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 	}
 
 	do {
+		DBG("IRQ status 0x%08x\n", intmask);
+
+		if (host->ops->irq) {
+			intmask = host->ops->irq(host, intmask);
+			if (!intmask)
+				goto cont;
+		}
+
 		/* Clear selected interrupts. */
 		mask = intmask & (SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
 				  SDHCI_INT_BUS_POWER);
 		sdhci_writel(host, mask, SDHCI_INT_STATUS);
 
-		DBG("*** %s got interrupt: 0x%08x\n",
-			mmc_hostname(host->mmc), intmask);
-
 		if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
 			u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
 				      SDHCI_CARD_PRESENT;
@@ -2759,7 +2749,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			unexpected |= intmask;
 			sdhci_writel(host, intmask, SDHCI_INT_STATUS);
 		}
-
+cont:
 		if (result == IRQ_NONE)
 			result = IRQ_HANDLED;
 
@@ -2858,8 +2848,6 @@ int sdhci_suspend_host(struct sdhci_host *host)
 	sdhci_disable_card_detection(host);
 
 	mmc_retune_timer_stop(host->mmc);
-	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
-		mmc_retune_needed(host->mmc);
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
 		host->ier = 0;
@@ -2920,8 +2908,6 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	unsigned long flags;
 
 	mmc_retune_timer_stop(host->mmc);
-	if (host->tuning_mode != SDHCI_TUNING_MODE_3)
-		mmc_retune_needed(host->mmc);
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->ier &= SDHCI_INT_CARD_INT;
@@ -2992,6 +2978,119 @@ EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host);
 
 /*****************************************************************************\
  *                                                                           *
+ * Command Queue Engine (CQE) helpers                                        *
+ *                                                                           *
+\*****************************************************************************/
+
+void sdhci_cqe_enable(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	unsigned long flags;
+	u8 ctrl;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+	ctrl &= ~SDHCI_CTRL_DMA_MASK;
+	if (host->flags & SDHCI_USE_64_BIT_DMA)
+		ctrl |= SDHCI_CTRL_ADMA64;
+	else
+		ctrl |= SDHCI_CTRL_ADMA32;
+	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+
+	sdhci_writew(host, SDHCI_MAKE_BLKSZ(SDHCI_DEFAULT_BOUNDARY_ARG, 512),
+		     SDHCI_BLOCK_SIZE);
+
+	/* Set maximum timeout */
+	sdhci_writeb(host, 0xE, SDHCI_TIMEOUT_CONTROL);
+
+	host->ier = host->cqe_ier;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+
+	host->cqe_on = true;
+
+	pr_debug("%s: sdhci: CQE on, IRQ mask %#x, IRQ status %#x\n",
+		 mmc_hostname(mmc), host->ier,
+		 sdhci_readl(host, SDHCI_INT_STATUS));
+
+	mmiowb();
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+EXPORT_SYMBOL_GPL(sdhci_cqe_enable);
+
+void sdhci_cqe_disable(struct mmc_host *mmc, bool recovery)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	sdhci_set_default_irqs(host);
+
+	host->cqe_on = false;
+
+	if (recovery) {
+		sdhci_do_reset(host, SDHCI_RESET_CMD);
+		sdhci_do_reset(host, SDHCI_RESET_DATA);
+	}
+
+	pr_debug("%s: sdhci: CQE off, IRQ mask %#x, IRQ status %#x\n",
+		 mmc_hostname(mmc), host->ier,
+		 sdhci_readl(host, SDHCI_INT_STATUS));
+
+	mmiowb();
+	spin_unlock_irqrestore(&host->lock, flags);
+}
+EXPORT_SYMBOL_GPL(sdhci_cqe_disable);
+
+bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
+		   int *data_error)
+{
+	u32 mask;
+
+	if (!host->cqe_on)
+		return false;
+
+	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
+		*cmd_error = -EILSEQ;
+	else if (intmask & SDHCI_INT_TIMEOUT)
+		*cmd_error = -ETIMEDOUT;
+	else
+		*cmd_error = 0;
+
+	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+		*data_error = -EILSEQ;
+	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
+		*data_error = -ETIMEDOUT;
+	else if (intmask & SDHCI_INT_ADMA_ERROR)
+		*data_error = -EIO;
+	else
+		*data_error = 0;
+
+	/* Clear selected interrupts. */
+	mask = intmask & host->cqe_ier;
+	sdhci_writel(host, mask, SDHCI_INT_STATUS);
+
+	if (intmask & SDHCI_INT_BUS_POWER)
+		pr_err("%s: Card is consuming too much power!\n",
+		       mmc_hostname(host->mmc));
+
+	intmask &= ~(host->cqe_ier | SDHCI_INT_ERROR);
+	if (intmask) {
+		sdhci_writel(host, intmask, SDHCI_INT_STATUS);
+		pr_err("%s: CQE: Unexpected interrupt 0x%08x.\n",
+		       mmc_hostname(host->mmc), intmask);
+		sdhci_dumpregs(host);
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(sdhci_cqe_irq);
+
+/*****************************************************************************\
+ *                                                                           *
  * Device allocation/registration                                            *
  *                                                                           *
 \*****************************************************************************/
@@ -3015,6 +3114,11 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
 
 	host->flags = SDHCI_SIGNALING_330;
 
+	host->cqe_ier     = SDHCI_CQE_INT_MASK;
+	host->cqe_err_ier = SDHCI_CQE_INT_ERR_MASK;
+
+	host->tuning_delay = -1;
+
 	return host;
 }
 
@@ -3297,20 +3401,22 @@ int sdhci_setup_host(struct sdhci_host *host)
 	if (!(host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
 		host->timeout_clk = (host->caps & SDHCI_TIMEOUT_CLK_MASK) >>
 					SDHCI_TIMEOUT_CLK_SHIFT;
+
+		if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
+			host->timeout_clk *= 1000;
+
 		if (host->timeout_clk == 0) {
-			if (host->ops->get_timeout_clock) {
-				host->timeout_clk =
-					host->ops->get_timeout_clock(host);
-			} else {
+			if (!host->ops->get_timeout_clock) {
 				pr_err("%s: Hardware doesn't specify timeout clock frequency.\n",
 					mmc_hostname(mmc));
 				ret = -ENODEV;
 				goto undma;
 			}
-		}
 
-		if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
-			host->timeout_clk *= 1000;
+			host->timeout_clk =
+				DIV_ROUND_UP(host->ops->get_timeout_clock(host),
+					     1000);
+		}
 
 		if (override_timeout_clk)
 			host->timeout_clk = override_timeout_clk;
@@ -3332,9 +3438,9 @@ int sdhci_setup_host(struct sdhci_host *host)
 	     !(host->flags & SDHCI_USE_SDMA)) &&
 	     !(host->quirks2 & SDHCI_QUIRK2_ACMD23_BROKEN)) {
 		host->flags |= SDHCI_AUTO_CMD23;
-		DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc));
+		DBG("Auto-CMD23 available\n");
 	} else {
-		DBG("%s: Auto-CMD23 unavailable\n", mmc_hostname(mmc));
+		DBG("Auto-CMD23 unavailable\n");
 	}
 
 	/*
@@ -3598,6 +3704,22 @@ undma:
 }
 EXPORT_SYMBOL_GPL(sdhci_setup_host);
 
+void sdhci_cleanup_host(struct sdhci_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	if (!IS_ERR(mmc->supply.vqmmc))
+		regulator_disable(mmc->supply.vqmmc);
+
+	if (host->align_buffer)
+		dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
+				  host->adma_table_sz, host->align_buffer,
+				  host->align_addr);
+	host->adma_table = NULL;
+	host->align_buffer = NULL;
+}
+EXPORT_SYMBOL_GPL(sdhci_cleanup_host);
+
 int __sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc = host->mmc;
@@ -3662,16 +3784,6 @@ unirq:
 untasklet:
 	tasklet_kill(&host->finish_tasklet);
 
-	if (!IS_ERR(mmc->supply.vqmmc))
-		regulator_disable(mmc->supply.vqmmc);
-
-	if (host->align_buffer)
-		dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
-				  host->adma_table_sz, host->align_buffer,
-				  host->align_addr);
-	host->adma_table = NULL;
-	host->align_buffer = NULL;
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(__sdhci_add_host);
@@ -3684,7 +3796,16 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (ret)
 		return ret;
 
-	return __sdhci_add_host(host);
+	ret = __sdhci_add_host(host);
+	if (ret)
+		goto cleanup;
+
+	return 0;
+
+cleanup:
+	sdhci_cleanup_host(host);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(sdhci_add_host);
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index edf3adfbc213..0469fa191493 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -134,6 +134,7 @@
 #define  SDHCI_INT_CARD_REMOVE	0x00000080
 #define  SDHCI_INT_CARD_INT	0x00000100
 #define  SDHCI_INT_RETUNE	0x00001000
+#define  SDHCI_INT_CQE		0x00004000
 #define  SDHCI_INT_ERROR	0x00008000
 #define  SDHCI_INT_TIMEOUT	0x00010000
 #define  SDHCI_INT_CRC		0x00020000
@@ -158,6 +159,13 @@
 		SDHCI_INT_BLK_GAP)
 #define SDHCI_INT_ALL_MASK	((unsigned int)-1)
 
+#define SDHCI_CQE_INT_ERR_MASK ( \
+	SDHCI_INT_ADMA_ERROR | SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT | \
+	SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX | \
+	SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)
+
+#define SDHCI_CQE_INT_MASK (SDHCI_CQE_INT_ERR_MASK | SDHCI_INT_CQE)
+
 #define SDHCI_ACMD12_ERR	0x3C
 
 #define SDHCI_HOST_CONTROL2		0x3E
@@ -518,6 +526,10 @@ struct sdhci_host {
 	/* cached registers */
 	u32			ier;
 
+	bool			cqe_on;		/* CQE is operating */
+	u32			cqe_ier;	/* CQE interrupt mask */
+	u32			cqe_err_ier;	/* CQE error interrupt mask */
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
@@ -526,6 +538,8 @@ struct sdhci_host {
 #define SDHCI_TUNING_MODE_1	0
 #define SDHCI_TUNING_MODE_2	1
 #define SDHCI_TUNING_MODE_3	2
+	/* Delay (ms) between tuning commands */
+	int			tuning_delay;
 
 	unsigned long private[0] ____cacheline_aligned;
 };
@@ -544,9 +558,12 @@ struct sdhci_ops {
 	void	(*set_power)(struct sdhci_host *host, unsigned char mode,
 			     unsigned short vdd);
 
+	u32		(*irq)(struct sdhci_host *host, u32 intmask);
+
 	int		(*enable_dma)(struct sdhci_host *host);
 	unsigned int	(*get_max_clock)(struct sdhci_host *host);
 	unsigned int	(*get_min_clock)(struct sdhci_host *host);
+	/* get_timeout_clock should return clk rate in unit of Hz */
 	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
 	unsigned int	(*get_max_timeout_count)(struct sdhci_host *host);
 	void		(*set_timeout)(struct sdhci_host *host,
@@ -562,10 +579,6 @@ struct sdhci_ops {
 	void    (*adma_workaround)(struct sdhci_host *host, u32 intmask);
 	void    (*card_event)(struct sdhci_host *host);
 	void	(*voltage_switch)(struct sdhci_host *host);
-	int	(*select_drive_strength)(struct sdhci_host *host,
-					 struct mmc_card *card,
-					 unsigned int max_dtr, int host_drv,
-					 int card_drv, int *drv_type);
 };
 
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
@@ -652,24 +665,23 @@ static inline u8 sdhci_readb(struct sdhci_host *host, int reg)
 
 #endif /* CONFIG_MMC_SDHCI_IO_ACCESSORS */
 
-extern struct sdhci_host *sdhci_alloc_host(struct device *dev,
-	size_t priv_size);
-extern void sdhci_free_host(struct sdhci_host *host);
+struct sdhci_host *sdhci_alloc_host(struct device *dev, size_t priv_size);
+void sdhci_free_host(struct sdhci_host *host);
 
 static inline void *sdhci_priv(struct sdhci_host *host)
 {
 	return host->private;
 }
 
-extern void sdhci_card_detect(struct sdhci_host *host);
-extern void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps,
-			      u32 *caps1);
-extern int sdhci_setup_host(struct sdhci_host *host);
-extern int __sdhci_add_host(struct sdhci_host *host);
-extern int sdhci_add_host(struct sdhci_host *host);
-extern void sdhci_remove_host(struct sdhci_host *host, int dead);
-extern void sdhci_send_command(struct sdhci_host *host,
-				struct mmc_command *cmd);
+void sdhci_card_detect(struct sdhci_host *host);
+void __sdhci_read_caps(struct sdhci_host *host, u16 *ver, u32 *caps,
+		       u32 *caps1);
+int sdhci_setup_host(struct sdhci_host *host);
+void sdhci_cleanup_host(struct sdhci_host *host);
+int __sdhci_add_host(struct sdhci_host *host);
+int sdhci_add_host(struct sdhci_host *host);
+void sdhci_remove_host(struct sdhci_host *host, int dead);
+void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd);
 
 static inline void sdhci_read_caps(struct sdhci_host *host)
 {
@@ -693,13 +705,24 @@ void sdhci_set_bus_width(struct sdhci_host *host, int width);
 void sdhci_reset(struct sdhci_host *host, u8 mask);
 void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing);
 int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
+void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios);
+int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+				      struct mmc_ios *ios);
+void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable);
 
 #ifdef CONFIG_PM
-extern int sdhci_suspend_host(struct sdhci_host *host);
-extern int sdhci_resume_host(struct sdhci_host *host);
-extern void sdhci_enable_irq_wakeups(struct sdhci_host *host);
-extern int sdhci_runtime_suspend_host(struct sdhci_host *host);
-extern int sdhci_runtime_resume_host(struct sdhci_host *host);
+int sdhci_suspend_host(struct sdhci_host *host);
+int sdhci_resume_host(struct sdhci_host *host);
+void sdhci_enable_irq_wakeups(struct sdhci_host *host);
+int sdhci_runtime_suspend_host(struct sdhci_host *host);
+int sdhci_runtime_resume_host(struct sdhci_host *host);
 #endif
 
+void sdhci_cqe_enable(struct mmc_host *mmc);
+void sdhci_cqe_disable(struct mmc_host *mmc, bool recovery);
+bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
+		   int *data_error);
+
+void sdhci_dumpregs(struct sdhci_host *host);
+
 #endif /* __SDHCI_HW_H */
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 6ffcd2838272..d6fa2214aaae 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -385,14 +385,6 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
 	wmb();
 }
 
-static enum dma_data_direction sunxi_mmc_get_dma_dir(struct mmc_data *data)
-{
-	if (data->flags & MMC_DATA_WRITE)
-		return DMA_TO_DEVICE;
-	else
-		return DMA_FROM_DEVICE;
-}
-
 static int sunxi_mmc_map_dma(struct sunxi_mmc_host *host,
 			     struct mmc_data *data)
 {
@@ -400,7 +392,7 @@ static int sunxi_mmc_map_dma(struct sunxi_mmc_host *host,
 	struct scatterlist *sg;
 
 	dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     sunxi_mmc_get_dma_dir(data));
+			     mmc_get_dma_dir(data));
 	if (dma_len == 0) {
 		dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
 		return -ENOMEM;
@@ -489,7 +481,7 @@ static void sunxi_mmc_dump_errinfo(struct sunxi_mmc_host *host)
 				      cmd->opcode == SD_IO_RW_DIRECT))
 		return;
 
-	dev_err(mmc_dev(host->mmc),
+	dev_dbg(mmc_dev(host->mmc),
 		"smc %d err, cmd %d,%s%s%s%s%s%s%s%s%s%s !!\n",
 		host->mmc->index, cmd->opcode,
 		data ? (data->flags & MMC_DATA_WRITE ? " WR" : " RD") : "",
@@ -551,7 +543,7 @@ static irqreturn_t sunxi_mmc_finalize_request(struct sunxi_mmc_host *host)
 		rval |= SDXC_FIFO_RESET;
 		mmc_writel(host, REG_GCTRL, rval);
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-				     sunxi_mmc_get_dma_dir(data));
+			     mmc_get_dma_dir(data));
 	}
 
 	mmc_writel(host, REG_RINTR, 0xffff);
@@ -1022,7 +1014,7 @@ static void sunxi_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 		if (data)
 			dma_unmap_sg(mmc_dev(mmc), data->sg, data->sg_len,
-				     sunxi_mmc_get_dma_dir(data));
+				     mmc_get_dma_dir(data));
 
 		dev_err(mmc_dev(mmc), "request already pending\n");
 		mrq->cmd->error = -EBUSY;
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 2b349d48fb9a..d0edb5730d3f 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -50,7 +50,11 @@
 #define CTL_CLK_AND_WAIT_CTL 0x138
 #define CTL_RESET_SDIO 0x1e0
 
-/* Definitions for values the CTRL_STATUS register can take. */
+/* Definitions for values the CTL_STOP_INTERNAL_ACTION register can take */
+#define TMIO_STOP_STP		BIT(0)
+#define TMIO_STOP_SEC		BIT(8)
+
+/* Definitions for values the CTL_STATUS register can take */
 #define TMIO_STAT_CMDRESPEND    BIT(0)
 #define TMIO_STAT_DATAEND       BIT(2)
 #define TMIO_STAT_CARD_REMOVE   BIT(3)
@@ -61,7 +65,7 @@
 #define TMIO_STAT_CARD_INSERT_A BIT(9)
 #define TMIO_STAT_SIGSTATE_A    BIT(10)
 
-/* These belong technically to CTRL_STATUS2, but the driver merges them */
+/* These belong technically to CTL_STATUS2, but the driver merges them */
 #define TMIO_STAT_CMD_IDX_ERR   BIT(16)
 #define TMIO_STAT_CRCFAIL       BIT(17)
 #define TMIO_STAT_STOPBIT_ERR   BIT(18)
@@ -85,7 +89,7 @@
 
 #define TMIO_BBS		512		/* Boot block size */
 
-/* Definitions for values the CTRL_SDIO_STATUS register can take. */
+/* Definitions for values the CTL_SDIO_STATUS register can take */
 #define TMIO_SDIO_STAT_IOIRQ	0x0001
 #define TMIO_SDIO_STAT_EXPUB52	0x4000
 #define TMIO_SDIO_STAT_EXWT	0x8000
@@ -137,7 +141,7 @@ struct tmio_mmc_host {
 	bool			force_pio;
 	struct dma_chan		*chan_rx;
 	struct dma_chan		*chan_tx;
-	struct tasklet_struct	dma_complete;
+	struct completion	dma_dataend;
 	struct tasklet_struct	dma_issue;
 	struct scatterlist	bounce_sg;
 	u8			*bounce_buf;
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index fa8a936a3d9b..e2093db2b7ff 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -43,6 +43,34 @@ void tmio_mmc_abort_dma(struct tmio_mmc_host *host)
 	tmio_mmc_enable_dma(host, true);
 }
 
+static void tmio_mmc_dma_callback(void *arg)
+{
+	struct tmio_mmc_host *host = arg;
+
+	spin_lock_irq(&host->lock);
+
+	if (!host->data)
+		goto out;
+
+	if (host->data->flags & MMC_DATA_READ)
+		dma_unmap_sg(host->chan_rx->device->dev,
+			     host->sg_ptr, host->sg_len,
+			     DMA_FROM_DEVICE);
+	else
+		dma_unmap_sg(host->chan_tx->device->dev,
+			     host->sg_ptr, host->sg_len,
+			     DMA_TO_DEVICE);
+
+	spin_unlock_irq(&host->lock);
+
+	wait_for_completion(&host->dma_dataend);
+
+	spin_lock_irq(&host->lock);
+	tmio_mmc_do_data_irq(host);
+out:
+	spin_unlock_irq(&host->lock);
+}
+
 static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 {
 	struct scatterlist *sg = host->sg_ptr, *sg_tmp;
@@ -88,6 +116,10 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 			DMA_DEV_TO_MEM, DMA_CTRL_ACK);
 
 	if (desc) {
+		reinit_completion(&host->dma_dataend);
+		desc->callback = tmio_mmc_dma_callback;
+		desc->callback_param = host;
+
 		cookie = dmaengine_submit(desc);
 		if (cookie < 0) {
 			desc = NULL;
@@ -162,6 +194,10 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 			DMA_MEM_TO_DEV, DMA_CTRL_ACK);
 
 	if (desc) {
+		reinit_completion(&host->dma_dataend);
+		desc->callback = tmio_mmc_dma_callback;
+		desc->callback_param = host;
+
 		cookie = dmaengine_submit(desc);
 		if (cookie < 0) {
 			desc = NULL;
@@ -221,29 +257,6 @@ static void tmio_mmc_issue_tasklet_fn(unsigned long priv)
 		dma_async_issue_pending(chan);
 }
 
-static void tmio_mmc_tasklet_fn(unsigned long arg)
-{
-	struct tmio_mmc_host *host = (struct tmio_mmc_host *)arg;
-
-	spin_lock_irq(&host->lock);
-
-	if (!host->data)
-		goto out;
-
-	if (host->data->flags & MMC_DATA_READ)
-		dma_unmap_sg(host->chan_rx->device->dev,
-			     host->sg_ptr, host->sg_len,
-			     DMA_FROM_DEVICE);
-	else
-		dma_unmap_sg(host->chan_tx->device->dev,
-			     host->sg_ptr, host->sg_len,
-			     DMA_TO_DEVICE);
-
-	tmio_mmc_do_data_irq(host);
-out:
-	spin_unlock_irq(&host->lock);
-}
-
 void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata)
 {
 	/* We can only either use DMA for both Tx and Rx or not use it at all */
@@ -306,7 +319,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 		if (!host->bounce_buf)
 			goto ebouncebuf;
 
-		tasklet_init(&host->dma_complete, tmio_mmc_tasklet_fn, (unsigned long)host);
+		init_completion(&host->dma_dataend);
 		tasklet_init(&host->dma_issue, tmio_mmc_issue_tasklet_fn, (unsigned long)host);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 6b789a739d4d..a2d92f10501b 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -340,7 +340,7 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command
 
 	/* CMD12 is handled by hardware */
 	if (cmd->opcode == MMC_STOP_TRANSMISSION && !cmd->arg) {
-		sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x001);
+		sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, TMIO_STOP_STP);
 		return 0;
 	}
 
@@ -367,7 +367,7 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command
 	if (data) {
 		c |= DATA_PRESENT;
 		if (data->blocks > 1) {
-			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100);
+			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, TMIO_STOP_SEC);
 			c |= TRANSFER_MULTI;
 
 			/*
@@ -553,10 +553,14 @@ void tmio_mmc_do_data_irq(struct tmio_mmc_host *host)
 	}
 
 	if (stop) {
-		if (stop->opcode == MMC_STOP_TRANSMISSION && !stop->arg)
-			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x000);
-		else
-			BUG();
+		if (stop->opcode != MMC_STOP_TRANSMISSION || stop->arg)
+			dev_err(&host->pdev->dev, "unsupported stop: CMD%u,0x%x. We did CMD12,0\n",
+				stop->opcode, stop->arg);
+
+		/* fill in response from auto CMD12 */
+		stop->resp[0] = sd_ctrl_read16_and_16_as_32(host, CTL_RESPONSE);
+
+		sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0);
 	}
 
 	schedule_work(&host->done);
@@ -596,11 +600,11 @@ static void tmio_mmc_data_irq(struct tmio_mmc_host *host, unsigned int stat)
 
 		if (done) {
 			tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);
-			tasklet_schedule(&host->dma_complete);
+			complete(&host->dma_dataend);
 		}
 	} else if (host->chan_rx && (data->flags & MMC_DATA_READ) && !host->force_pio) {
 		tmio_mmc_disable_mmc_irqs(host, TMIO_STAT_DATAEND);
-		tasklet_schedule(&host->dma_complete);
+		complete(&host->dma_dataend);
 	} else {
 		tmio_mmc_do_data_irq(host);
 		tmio_mmc_disable_mmc_irqs(host, TMIO_MASK_READOP | TMIO_MASK_WRITEOP);
@@ -811,16 +815,14 @@ static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int i, ret = 0;
 
-	if (!host->tap_num) {
-		if (!host->init_tuning || !host->select_tuning)
-			/* Tuning is not supported */
-			goto out;
+	if (!host->init_tuning || !host->select_tuning)
+		/* Tuning is not supported */
+		goto out;
 
-		host->tap_num = host->init_tuning(host);
-		if (!host->tap_num)
-			/* Tuning is not supported */
-			goto out;
-	}
+	host->tap_num = host->init_tuning(host);
+	if (!host->tap_num)
+		/* Tuning is not supported */
+		goto out;
 
 	if (host->tap_num * 2 >= sizeof(host->taps) * BITS_PER_BYTE) {
 		dev_warn_once(&host->pdev->dev,