Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-29 11:51:49 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-01-29 11:51:49 -0800
commit: 0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
tree: cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/block/null_blk.c
parent: 9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent: 796baeeef85a40b3495a907fb7425086e7010102 (diff)
download: linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.tar.gz
1 files changed, 69 insertions, 221 deletions
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index ad0477ae820f..6655893a3a7a 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -12,9 +12,9 @@
 #include <linux/slab.h>
 #include <linux/blk-mq.h>
 #include <linux/hrtimer.h>
-#include <linux/lightnvm.h>
 #include <linux/configfs.h>
 #include <linux/badblocks.h>
+#include <linux/fault-inject.h>
 
 #define SECTOR_SHIFT		9
 #define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
@@ -27,6 +27,10 @@
 #define TICKS_PER_SEC		50ULL
 #define TIMER_INTERVAL		(NSEC_PER_SEC / TICKS_PER_SEC)
 
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(null_timeout_attr);
+#endif
+
 static inline u64 mb_per_tick(int mbps)
 {
 	return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
@@ -107,7 +111,6 @@ struct nullb_device {
 	unsigned int hw_queue_depth; /* queue depth */
 	unsigned int index; /* index of the disk, only valid with a disk */
 	unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
-	bool use_lightnvm; /* register as a LightNVM device */
 	bool blocking; /* blocking blk-mq device */
 	bool use_per_node_hctx; /* use per-node allocation for hardware context */
 	bool power; /* power on/off the device */
@@ -121,7 +124,6 @@ struct nullb {
 	unsigned int index;
 	struct request_queue *q;
 	struct gendisk *disk;
-	struct nvm_dev *ndev;
 	struct blk_mq_tag_set *tag_set;
 	struct blk_mq_tag_set __tag_set;
 	unsigned int queue_depth;
@@ -139,7 +141,6 @@ static LIST_HEAD(nullb_list);
 static struct mutex lock;
 static int null_major;
 static DEFINE_IDA(nullb_indexes);
-static struct kmem_cache *ppa_cache;
 static struct blk_mq_tag_set tag_set;
 
 enum {
@@ -166,6 +167,11 @@ static int g_home_node = NUMA_NO_NODE;
 module_param_named(home_node, g_home_node, int, S_IRUGO);
 MODULE_PARM_DESC(home_node, "Home node for the device");
 
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+static char g_timeout_str[80];
+module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
+#endif
+
 static int g_queue_mode = NULL_Q_MQ;
 
 static int null_param_store_val(const char *str, int *val, int min, int max)
@@ -208,10 +214,6 @@ static int nr_devices = 1;
 module_param(nr_devices, int, S_IRUGO);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
-static bool g_use_lightnvm;
-module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO);
-MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
-
 static bool g_blocking;
 module_param_named(blocking, g_blocking, bool, S_IRUGO);
 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
@@ -345,7 +347,6 @@ NULLB_DEVICE_ATTR(blocksize, uint);
 NULLB_DEVICE_ATTR(irqmode, uint);
 NULLB_DEVICE_ATTR(hw_queue_depth, uint);
 NULLB_DEVICE_ATTR(index, uint);
-NULLB_DEVICE_ATTR(use_lightnvm, bool);
 NULLB_DEVICE_ATTR(blocking, bool);
 NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
 NULLB_DEVICE_ATTR(memory_backed, bool);
@@ -455,7 +456,6 @@ static struct configfs_attribute *nullb_device_attrs[] = {
 	&nullb_device_attr_irqmode,
 	&nullb_device_attr_hw_queue_depth,
 	&nullb_device_attr_index,
-	&nullb_device_attr_use_lightnvm,
 	&nullb_device_attr_blocking,
 	&nullb_device_attr_use_per_node_hctx,
 	&nullb_device_attr_power,
@@ -573,7 +573,6 @@ static struct nullb_device *null_alloc_dev(void)
 	dev->blocksize = g_bs;
 	dev->irqmode = g_irqmode;
 	dev->hw_queue_depth = g_hw_queue_depth;
-	dev->use_lightnvm = g_use_lightnvm;
 	dev->blocking = g_blocking;
 	dev->use_per_node_hctx = g_use_per_node_hctx;
 	return dev;
@@ -1352,6 +1351,12 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 	return BLK_QC_T_NONE;
 }
 
+static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
+{
+	pr_info("null: rq %p timed out\n", rq);
+	return BLK_EH_HANDLED;
+}
+
 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct nullb *nullb = q->queuedata;
@@ -1369,6 +1374,16 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req)
 	return BLKPREP_DEFER;
 }
 
+static bool should_timeout_request(struct request *rq)
+{
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+	if (g_timeout_str[0])
+		return should_fail(&null_timeout_attr, 1);
+#endif
+
+	return false;
+}
+
 static void null_request_fn(struct request_queue *q)
 {
 	struct request *rq;
@@ -1376,12 +1391,20 @@ static void null_request_fn(struct request_queue *q)
 	while ((rq = blk_fetch_request(q)) != NULL) {
 		struct nullb_cmd *cmd = rq->special;
 
-		spin_unlock_irq(q->queue_lock);
-		null_handle_cmd(cmd);
-		spin_lock_irq(q->queue_lock);
+		if (!should_timeout_request(rq)) {
+			spin_unlock_irq(q->queue_lock);
+			null_handle_cmd(cmd);
+			spin_lock_irq(q->queue_lock);
+		}
 	}
 }
 
+static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
+{
+	pr_info("null: rq %p timed out\n", rq);
+	return BLK_EH_HANDLED;
+}
+
 static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
@@ -1399,12 +1422,16 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	blk_mq_start_request(bd->rq);
 
-	return null_handle_cmd(cmd);
+	if (!should_timeout_request(bd->rq))
+		return null_handle_cmd(cmd);
+
+	return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.complete	= null_softirq_done_fn,
+	.timeout	= null_timeout_rq,
 };
 
 static void cleanup_queue(struct nullb_queue *nq)
@@ -1423,170 +1450,6 @@ static void cleanup_queues(struct nullb *nullb)
 	kfree(nullb->queues);
 }
 
-#ifdef CONFIG_NVM
-
-static void null_lnvm_end_io(struct request *rq, blk_status_t status)
-{
-	struct nvm_rq *rqd = rq->end_io_data;
-
-	/* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
-	rqd->error = status ? -EIO : 0;
-	nvm_end_io(rqd);
-
-	blk_put_request(rq);
-}
-
-static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-	struct request_queue *q = dev->q;
-	struct request *rq;
-	struct bio *bio = rqd->bio;
-
-	rq = blk_mq_alloc_request(q,
-		op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-	if (IS_ERR(rq))
-		return -ENOMEM;
-
-	blk_init_request_from_bio(rq, bio);
-
-	rq->end_io_data = rqd;
-
-	blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
-
-	return 0;
-}
-
-static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
-{
-	struct nullb *nullb = dev->q->queuedata;
-	sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
-	sector_t blksize;
-	struct nvm_id_group *grp;
-
-	id->ver_id = 0x1;
-	id->vmnt = 0;
-	id->cap = 0x2;
-	id->dom = 0x1;
-
-	id->ppaf.blk_offset = 0;
-	id->ppaf.blk_len = 16;
-	id->ppaf.pg_offset = 16;
-	id->ppaf.pg_len = 16;
-	id->ppaf.sect_offset = 32;
-	id->ppaf.sect_len = 8;
-	id->ppaf.pln_offset = 40;
-	id->ppaf.pln_len = 8;
-	id->ppaf.lun_offset = 48;
-	id->ppaf.lun_len = 8;
-	id->ppaf.ch_offset = 56;
-	id->ppaf.ch_len = 8;
-
-	sector_div(size, nullb->dev->blocksize); /* convert size to pages */
-	size >>= 8; /* concert size to pgs pr blk */
-	grp = &id->grp;
-	grp->mtype = 0;
-	grp->fmtype = 0;
-	grp->num_ch = 1;
-	grp->num_pg = 256;
-	blksize = size;
-	size >>= 16;
-	grp->num_lun = size + 1;
-	sector_div(blksize, grp->num_lun);
-	grp->num_blk = blksize;
-	grp->num_pln = 1;
-
-	grp->fpg_sz = nullb->dev->blocksize;
-	grp->csecs = nullb->dev->blocksize;
-	grp->trdt = 25000;
-	grp->trdm = 25000;
-	grp->tprt = 500000;
-	grp->tprm = 500000;
-	grp->tbet = 1500000;
-	grp->tbem = 1500000;
-	grp->mpos = 0x010101; /* single plane rwe */
-	grp->cpar = nullb->dev->hw_queue_depth;
-
-	return 0;
-}
-
-static void *null_lnvm_create_dma_pool(struct nvm_dev *dev, char *name)
-{
-	mempool_t *virtmem_pool;
-
-	virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
-	if (!virtmem_pool) {
-		pr_err("null_blk: Unable to create virtual memory pool\n");
-		return NULL;
-	}
-
-	return virtmem_pool;
-}
-
-static void null_lnvm_destroy_dma_pool(void *pool)
-{
-	mempool_destroy(pool);
-}
-
-static void *null_lnvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
-				gfp_t mem_flags, dma_addr_t *dma_handler)
-{
-	return mempool_alloc(pool, mem_flags);
-}
-
-static void null_lnvm_dev_dma_free(void *pool, void *entry,
-							dma_addr_t dma_handler)
-{
-	mempool_free(entry, pool);
-}
-
-static struct nvm_dev_ops null_lnvm_dev_ops = {
-	.identity		= null_lnvm_id,
-	.submit_io		= null_lnvm_submit_io,
-
-	.create_dma_pool	= null_lnvm_create_dma_pool,
-	.destroy_dma_pool	= null_lnvm_destroy_dma_pool,
-	.dev_dma_alloc		= null_lnvm_dev_dma_alloc,
-	.dev_dma_free		= null_lnvm_dev_dma_free,
-
-	/* Simulate nvme protocol restriction */
-	.max_phys_sect		= 64,
-};
-
-static int null_nvm_register(struct nullb *nullb)
-{
-	struct nvm_dev *dev;
-	int rv;
-
-	dev = nvm_alloc_dev(0);
-	if (!dev)
-		return -ENOMEM;
-
-	dev->q = nullb->q;
-	memcpy(dev->name, nullb->disk_name, DISK_NAME_LEN);
-	dev->ops = &null_lnvm_dev_ops;
-
-	rv = nvm_register(dev);
-	if (rv) {
-		kfree(dev);
-		return rv;
-	}
-	nullb->ndev = dev;
-	return 0;
-}
-
-static void null_nvm_unregister(struct nullb *nullb)
-{
-	nvm_unregister(nullb->ndev);
-}
-#else
-static int null_nvm_register(struct nullb *nullb)
-{
-	pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n");
-	return -EINVAL;
-}
-static void null_nvm_unregister(struct nullb *nullb) {}
-#endif /* CONFIG_NVM */
-
 static void null_del_dev(struct nullb *nullb)
 {
 	struct nullb_device *dev = nullb->dev;
@@ -1595,10 +1458,7 @@ static void null_del_dev(struct nullb *nullb)
 
 	list_del_init(&nullb->list);
 
-	if (dev->use_lightnvm)
-		null_nvm_unregister(nullb);
-	else
-		del_gendisk(nullb->disk);
+	del_gendisk(nullb->disk);
 
 	if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
 		hrtimer_cancel(&nullb->bw_timer);
@@ -1610,8 +1470,7 @@ static void null_del_dev(struct nullb *nullb)
 	if (dev->queue_mode == NULL_Q_MQ &&
 	    nullb->tag_set == &nullb->__tag_set)
 		blk_mq_free_tag_set(nullb->tag_set);
-	if (!dev->use_lightnvm)
-		put_disk(nullb->disk);
+	put_disk(nullb->disk);
 	cleanup_queues(nullb);
 	if (null_cache_active(nullb))
 		null_free_device_storage(nullb->dev, true);
@@ -1775,11 +1634,6 @@ static void null_validate_conf(struct nullb_device *dev)
 {
 	dev->blocksize = round_down(dev->blocksize, 512);
 	dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
-	if (dev->use_lightnvm && dev->blocksize != 4096)
-		dev->blocksize = 4096;
-
-	if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ)
-		dev->queue_mode = NULL_Q_MQ;
 
 	if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
 		if (dev->submit_queues != nr_online_nodes)
@@ -1805,6 +1659,20 @@ static void null_validate_conf(struct nullb_device *dev)
 		dev->mbps = 0;
 }
 
+static bool null_setup_fault(void)
+{
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+	if (!g_timeout_str[0])
+		return true;
+
+	if (!setup_fault_attr(&null_timeout_attr, g_timeout_str))
+		return false;
+
+	null_timeout_attr.verbose = 0;
+#endif
+	return true;
+}
+
 static int null_add_dev(struct nullb_device *dev)
 {
 	struct nullb *nullb;
@@ -1838,6 +1706,10 @@ static int null_add_dev(struct nullb_device *dev)
 		if (rv)
 			goto out_cleanup_queues;
 
+		if (!null_setup_fault())
+			goto out_cleanup_queues;
+
+		nullb->tag_set->timeout = 5 * HZ;
 		nullb->q = blk_mq_init_queue(nullb->tag_set);
 		if (IS_ERR(nullb->q)) {
 			rv = -ENOMEM;
@@ -1861,8 +1733,14 @@ static int null_add_dev(struct nullb_device *dev)
 			rv = -ENOMEM;
 			goto out_cleanup_queues;
 		}
+
+		if (!null_setup_fault())
+			goto out_cleanup_blk_queue;
+
 		blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
 		blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
+		blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn);
+		nullb->q->rq_timeout = 5 * HZ;
 		rv = init_driver_queues(nullb);
 		if (rv)
 			goto out_cleanup_blk_queue;
@@ -1895,11 +1773,7 @@ static int null_add_dev(struct nullb_device *dev)
 
 	sprintf(nullb->disk_name, "nullb%d", nullb->index);
 
-	if (dev->use_lightnvm)
-		rv = null_nvm_register(nullb);
-	else
-		rv = null_gendisk_register(nullb);
-
+	rv = null_gendisk_register(nullb);
 	if (rv)
 		goto out_cleanup_blk_queue;
 
@@ -1938,18 +1812,6 @@ static int __init null_init(void)
 		g_bs = PAGE_SIZE;
 	}
 
-	if (g_use_lightnvm && g_bs != 4096) {
-		pr_warn("null_blk: LightNVM only supports 4k block size\n");
-		pr_warn("null_blk: defaults block size to 4k\n");
-		g_bs = 4096;
-	}
-
-	if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) {
-		pr_warn("null_blk: LightNVM only supported for blk-mq\n");
-		pr_warn("null_blk: defaults queue mode to blk-mq\n");
-		g_queue_mode = NULL_Q_MQ;
-	}
-
 	if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
 		if (g_submit_queues != nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.\n",
@@ -1982,16 +1844,6 @@ static int __init null_init(void)
 		goto err_conf;
 	}
 
-	if (g_use_lightnvm) {
-		ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
-								0, 0, NULL);
-		if (!ppa_cache) {
-			pr_err("null_blk: unable to create ppa cache\n");
-			ret = -ENOMEM;
-			goto err_ppa;
-		}
-	}
-
 	for (i = 0; i < nr_devices; i++) {
 		dev = null_alloc_dev();
 		if (!dev) {
@@ -2015,8 +1867,6 @@ err_dev:
 		null_del_dev(nullb);
 		null_free_dev(dev);
 	}
-	kmem_cache_destroy(ppa_cache);
-err_ppa:
 	unregister_blkdev(null_major, "nullb");
 err_conf:
 	configfs_unregister_subsystem(&nullb_subsys);
@@ -2047,8 +1897,6 @@ static void __exit null_exit(void)
 
 	if (g_queue_mode == NULL_Q_MQ && shared_tags)
 		blk_mq_free_tag_set(&tag_set);
-
-	kmem_cache_destroy(ppa_cache);
 }
 
 module_init(null_init);
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-29 11:51:49 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-01-29 11:51:49 -0800
commit	0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
tree	cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/block/null_blk.c
parent	9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent	796baeeef85a40b3495a907fb7425086e7010102 (diff)
download	linux-0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0.tar.gz