From 1209cb7fa4afcd7f652a70e1d2028686ec28a7a5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Tue, 27 Feb 2018 16:32:13 -0800
Subject: blk-mq-debugfs: Reorder queue show and store methods

Make sure that the queue show and store methods are contiguous and
also that these appear in alphabetical order.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 130 ++++++++++++++++++++++++-------------------------
 1 file changed, 65 insertions(+), 65 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 21cbc1f071c6..9547569aa619 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -24,6 +24,64 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 
+static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
+{
+	if (stat->nr_samples) {
+		seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
+			   stat->nr_samples, stat->mean, stat->min, stat->max);
+	} else {
+		seq_puts(m, "samples=0");
+	}
+}
+
+static int queue_poll_stat_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	int bucket;
+
+	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
+		seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
+		print_stat(m, &q->poll_stat[2*bucket]);
+		seq_puts(m, "\n");
+
+		seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
+		print_stat(m, &q->poll_stat[2*bucket+1]);
+		seq_puts(m, "\n");
+	}
+	return 0;
+}
+
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+	__acquires(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_lock_irq(&q->requeue_lock);
+	return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+
+	return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+	__releases(&q->requeue_lock)
+{
+	struct request_queue *q = m->private;
+
+	spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+	.start	= queue_requeue_list_start,
+	.next	= queue_requeue_list_next,
+	.stop	= queue_requeue_list_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
 static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 			  const char *const *flag_name, int flag_name_count)
 {
@@ -125,16 +183,6 @@ inval:
 	return count;
 }
 
-static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
-{
-	if (stat->nr_samples) {
-		seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
-			   stat->nr_samples, stat->mean, stat->min, stat->max);
-	} else {
-		seq_puts(m, "samples=0");
-	}
-}
-
 static int queue_write_hint_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
@@ -158,22 +206,13 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_poll_stat_show(void *data, struct seq_file *m)
-{
-	struct request_queue *q = data;
-	int bucket;
-
-	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
-		seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket]);
-		seq_puts(m, "\n");
-
-		seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket+1]);
-		seq_puts(m, "\n");
-	}
-	return 0;
-}
+static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
+	{ "poll_stat", 0400, queue_poll_stat_show },
+	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
+	{ "state", 0600, queue_state_show, queue_state_write },
+	{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
+	{ },
+};
 
 #define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
 static const char *const hctx_state_name[] = {
@@ -327,37 +366,6 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
-static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
-	__acquires(&q->requeue_lock)
-{
-	struct request_queue *q = m->private;
-
-	spin_lock_irq(&q->requeue_lock);
-	return seq_list_start(&q->requeue_list, *pos);
-}
-
-static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct request_queue *q = m->private;
-
-	return seq_list_next(v, &q->requeue_list, pos);
-}
-
-static void queue_requeue_list_stop(struct seq_file *m, void *v)
-	__releases(&q->requeue_lock)
-{
-	struct request_queue *q = m->private;
-
-	spin_unlock_irq(&q->requeue_lock);
-}
-
-static const struct seq_operations queue_requeue_list_seq_ops = {
-	.start	= queue_requeue_list_start,
-	.next	= queue_requeue_list_next,
-	.stop	= queue_requeue_list_stop,
-	.show	= blk_mq_debugfs_rq_show,
-};
-
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
 {
@@ -747,14 +755,6 @@ static const struct file_operations blk_mq_debugfs_fops = {
 	.release	= blk_mq_debugfs_release,
 };
 
-static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
-	{"poll_stat", 0400, queue_poll_stat_show},
-	{"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
-	{"state", 0600, queue_state_show, queue_state_write},
-	{"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
-	{},
-};
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"state", 0400, hctx_state_show},
 	{"flags", 0400, hctx_flags_show},
-- 
cgit 1.4.1


From 18bc42308699522b57fd599401c03ad561f422ef Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Tue, 27 Feb 2018 16:32:14 -0800
Subject: blk-mq-debugfs: Show zone locking information

When debugging the ZBC code in the mq-deadline scheduler it is very
important to know which zones are locked and which zones are not
locked. Hence this patch that exports the zone locking information
through debugfs.

Cc: Omar Sandoval <osandov@fb.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 9547569aa619..bd21d5b9f65f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -206,11 +206,27 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
+static int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	unsigned int i;
+
+	if (!q->seq_zones_wlock)
+		return 0;
+
+	for (i = 0; i < blk_queue_nr_zones(q); i++)
+		if (test_bit(i, q->seq_zones_wlock))
+			seq_printf(m, "%u\n", i);
+
+	return 0;
+}
+
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
 	{ "state", 0600, queue_state_show, queue_state_write },
 	{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
+	{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
 	{ },
 };
 
-- 
cgit 1.4.1


From e9a99a638800af25c7ed006c96fd1dabb99254b7 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 27 Feb 2018 16:56:42 -0800
Subject: block: clear ctx pending bit under ctx lock

When we insert a request, we set the software queue pending bit while
holding the software queue lock. However, we clear it outside of the
lock, so it's possible that a concurrent insert could reset the bit
after we clear it but before we empty the request list. Afterwards, the
bit would still be set but the software queue wouldn't have any requests
in it, leading us to do a spurious run in the future. This is mostly a
benign/theoretical issue, but it makes the following change easier to
justify.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 16e83e6df404..9594a0e9f65b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -986,9 +986,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
 	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
 
-	sbitmap_clear_bit(sb, bitnr);
 	spin_lock(&ctx->lock);
 	list_splice_tail_init(&ctx->rq_list, flush_data->list);
+	sbitmap_clear_bit(sb, bitnr);
 	spin_unlock(&ctx->lock);
 	return true;
 }
-- 
cgit 1.4.1


From 5ee0524ba137fe928a88b440d014e3c8451fb32c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 28 Feb 2018 10:15:31 -0800
Subject: block: Add 'lock' as third argument to blk_alloc_queue_node()

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              | 7 ++++---
 block/blk-mq.c                | 2 +-
 drivers/block/null_blk.c      | 3 ++-
 drivers/ide/ide-probe.c       | 2 +-
 drivers/lightnvm/core.c       | 2 +-
 drivers/md/dm.c               | 2 +-
 drivers/nvdimm/pmem.c         | 2 +-
 drivers/nvme/host/multipath.c | 2 +-
 drivers/scsi/scsi_lib.c       | 2 +-
 include/linux/blkdev.h        | 3 ++-
 10 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2d1a7bbe0634..e873a24bf82d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -810,7 +810,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
+	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -888,7 +888,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 	kblockd_schedule_work(&q->timeout_work);
 }
 
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+					   spinlock_t *lock)
 {
 	struct request_queue *q;
 
@@ -1030,7 +1031,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
 	if (!q)
 		return NULL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9594a0e9f65b..75336848f7a7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2556,7 +2556,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct request_queue *uninit_q, *q;
 
-	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
 	if (!uninit_q)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d12d7a8325ad..6dc7e7cfca4a 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1760,7 +1760,8 @@ static int null_add_dev(struct nullb_device *dev)
 		}
 		null_init_queues(nullb);
 	} else if (dev->queue_mode == NULL_Q_BIO) {
-		nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
+		nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
+						NULL);
 		if (!nullb->q) {
 			rv = -ENOMEM;
 			goto out_cleanup_queues;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index caa20eb5f26b..d6b8c7e1545d 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -766,7 +766,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	limits and LBA48 we could raise it but as yet
 	 *	do not.
 	 */
-	q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
+	q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
 	if (!q)
 		return 1;
 
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index dcc9e621e651..5f1988df1593 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		goto err_dev;
 	}
 
-	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
 	if (!tqueue) {
 		ret = -ENOMEM;
 		goto err_disk;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68136806d365..7586d249266c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1841,7 +1841,7 @@ static struct mapped_device *alloc_dev(int minor)
 	INIT_LIST_HEAD(&md->table_devices);
 	spin_lock_init(&md->uevent_lock);
 
-	md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
+	md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
 	if (!md->queue)
 		goto bad;
 	md->queue->queuedata = md;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 10041ac4032c..cfb15ac50925 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -344,7 +344,7 @@ static int pmem_attach_disk(struct device *dev,
 		return -EBUSY;
 	}
 
-	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
 	if (!q)
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index b7e5c6db4d92..88440562a197 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -162,7 +162,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
 		return 0;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
 	if (!q)
 		goto out;
 	q->queuedata = head;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a86df9ca7d1c..71d1135f94d0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2223,7 +2223,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
 	struct Scsi_Host *shost = sdev->host;
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
 	if (!q)
 		return NULL;
 	q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed63f3b69c12..667a9b0053d9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1321,7 +1321,8 @@ extern long nr_blockdev_pages(void);
 
 bool __must_check blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t, int);
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+					   spinlock_t *lock);
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
 
-- 
cgit 1.4.1


From 498f6650aec864e331cae7575fec5f07781d0bf3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 28 Feb 2018 10:15:32 -0800
Subject: block: Fix a race between the cgroup code and request queue
 initialization

Initialize the request queue lock earlier such that the following
race can no longer occur:

blk_init_queue_node()             blkcg_print_blkgs()
  blk_alloc_queue_node (1)
    q->queue_lock = &q->__queue_lock (2)
    blkcg_init_queue(q) (3)
                                    spin_lock_irq(blkg->q->queue_lock) (4)
  q->queue_lock = lock (5)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)

(1) allocate an uninitialized queue;
(2) initialize queue_lock to its default internal lock;
(3) initialize blkcg part of request queue, which will create blkg and
    then insert it to blkg_list;
(4) traverse blkg_list and find the created blkg, and then take its
    queue lock, here it is the default *internal lock*;
(5) *race window*, now queue_lock is overridden with *driver specified
    lock*;
(6) now unlock *driver specified lock*, not the locked *internal lock*,
    unlock balance breaks.

The changes in this patch are as follows:
- Move the .queue_lock initialization from blk_init_queue_node() into
  blk_alloc_queue_node().
- Only override the .queue_lock pointer for legacy queues because it
  is not useful for blk-mq queues to override this pointer.
- For all all block drivers that initialize .queue_lock explicitly,
  change the blk_alloc_queue() call in the driver into a
  blk_alloc_queue_node() call and remove the explicit .queue_lock
  initialization. Additionally, initialize the spin lock that will
  be used as queue lock earlier if necessary.

Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c               | 24 ++++++++++++++++--------
 drivers/block/drbd/drbd_main.c |  3 +--
 drivers/block/umem.c           |  7 +++----
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index e873a24bf82d..41c74b37be85 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 	kblockd_schedule_work(&q->timeout_work);
 }
 
+/**
+ * blk_alloc_queue_node - allocate a request queue
+ * @gfp_mask: memory allocation flags
+ * @node_id: NUMA node to allocate memory from
+ * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
+ *        serialize calls to the legacy .request_fn() callback. Ignored for
+ *	  blk-mq request queues.
+ *
+ * Note: pass the queue lock as the third argument to this function instead of
+ * setting the queue lock pointer explicitly to avoid triggering a sporadic
+ * crash in the blkcg code. This function namely calls blkcg_init_queue() and
+ * the queue lock pointer must be set before blkcg_init_queue() is called.
+ */
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 					   spinlock_t *lock)
 {
@@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
-	/*
-	 * By default initialize queue_lock to internal lock and driver can
-	 * override it later if need be.
-	 */
-	q->queue_lock = &q->__queue_lock;
+	if (!q->mq_ops)
+		q->queue_lock = lock ? : &q->__queue_lock;
 
 	/*
 	 * A queue starts its life with bypass turned on to avoid
@@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
 	if (!q)
 		return NULL;
 
 	q->request_fn = rfn;
-	if (lock)
-		q->queue_lock = lock;
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return NULL;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0a0394aa1b9c..185f1ef00a7c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
 	drbd_init_set_defaults(device);
 
-	q = blk_alloc_queue(GFP_KERNEL);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
 	if (!q)
 		goto out_no_q;
 	device->rq_queue = q;
@@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-	q->queue_lock = &resource->req_lock;
 
 	device->md_io.page = alloc_page(GFP_KERNEL);
 	if (!device->md_io.page)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 8077123678ad..5c7fb8cc4149 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	card->Active = -1;	/* no page is active */
 	card->bio = NULL;
 	card->biotail = &card->bio;
+	spin_lock_init(&card->lock);
 
-	card->queue = blk_alloc_queue(GFP_KERNEL);
+	card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
+					   &card->lock);
 	if (!card->queue)
 		goto failed_alloc;
 
 	blk_queue_make_request(card->queue, mm_make_request);
-	card->queue->queue_lock = &card->lock;
 	card->queue->queuedata = card;
 
 	tasklet_init(&card->tasklet, process_page, (unsigned long)card);
@@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	dev_printk(KERN_INFO, &card->dev->dev,
 		"Window size %d bytes, IRQ %d\n", data, dev->irq);
 
-	spin_lock_init(&card->lock);
-
 	pci_set_drvdata(dev, card);
 
 	if (pci_write_cmd != 0x0F) 	/* If not Memory Write & Invalidate */
-- 
cgit 1.4.1


From a063057d7c731cffa7d10740e8ebc2970df8dbb3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 28 Feb 2018 10:15:33 -0800
Subject: block: Fix a race between request queue removal and the block cgroup
 controller

Avoid that the following race can occur:

blk_cleanup_queue()               blkcg_print_blkgs()
  spin_lock_irq(lock) (1)           spin_lock_irq(blkg->q->queue_lock) (2,5)
    q->queue_lock = &q->__queue_lock (3)
  spin_unlock_irq(lock) (4)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)

(1) take driver lock;
(2) busy loop for driver lock;
(3) override driver lock with internal lock;
(4) unlock driver lock;
(5) can take driver lock now;
(6) but unlock internal lock.

This change is safe because only the SCSI core and the NVME core keep
a reference on a request queue after having called blk_cleanup_queue().
Neither driver accesses any of the removed data structures between its
blk_cleanup_queue() and blk_put_queue() calls.

Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Jan Kara <jack@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  | 31 +++++++++++++++++++++++++++++++
 block/blk-sysfs.c |  7 -------
 2 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 41c74b37be85..6febc69a58aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -719,6 +719,37 @@ void blk_cleanup_queue(struct request_queue *q)
 	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
 	blk_sync_queue(q);
 
+	/*
+	 * I/O scheduler exit is only safe after the sysfs scheduler attribute
+	 * has been removed.
+	 */
+	WARN_ON_ONCE(q->kobj.state_in_sysfs);
+
+	/*
+	 * Since the I/O scheduler exit code may access cgroup information,
+	 * perform I/O scheduler exit before disassociating from the block
+	 * cgroup controller.
+	 */
+	if (q->elevator) {
+		ioc_clear_queue(q);
+		elevator_exit(q, q->elevator);
+		q->elevator = NULL;
+	}
+
+	/*
+	 * Remove all references to @q from the block cgroup controller before
+	 * restoring @q->queue_lock to avoid that restoring this pointer causes
+	 * e.g. blkcg_print_blkgs() to crash.
+	 */
+	blkcg_exit_queue(q);
+
+	/*
+	 * Since the cgroup code may dereference the @q->backing_dev_info
+	 * pointer, only decrease its reference count after having removed the
+	 * association with the block cgroup controller.
+	 */
+	bdi_put(q->backing_dev_info);
+
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
 	percpu_ref_exit(&q->q_usage_counter);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cbea895a5547..fd71a00c9462 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -798,13 +798,6 @@ static void __blk_release_queue(struct work_struct *work)
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
 		blk_stat_remove_callback(q, q->poll_cb);
 	blk_stat_free_callback(q->poll_cb);
-	bdi_put(q->backing_dev_info);
-	blkcg_exit_queue(q);
-
-	if (q->elevator) {
-		ioc_clear_queue(q);
-		elevator_exit(q, q->elevator);
-	}
 
 	blk_free_queue_stats(q->stats);
 
-- 
cgit 1.4.1


From d15e1175a9a9af967ae01245f55196acb5d5ff0f Mon Sep 17 00:00:00 2001
From: Jonas Rabenstein <jonas.rabenstein@studium.uni-erlangen.de>
Date: Thu, 1 Mar 2018 14:26:37 +0100
Subject: block: sed-opal: fix response string extraction

Tokens are prefixed by a variable length of bytes. If a bytestring is
not stored in an tiny or short atom, we have to skip more than one byte
in order to have the actual bytes not prefixed by the bytes describing
the actual length of the string.

Acked-by: Jonathan Derrick <jonathan.derrick@intel.com>
Signed-off-by: Jonas Rabenstein <jonas.rabenstein@studium.uni-erlangen.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 9ed51d0c6b1d..36842bfa572e 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -871,6 +871,9 @@ static int response_parse(const u8 *buf, size_t length,
 static size_t response_get_string(const struct parsed_resp *resp, int n,
 				  const char **store)
 {
+	u8 skip;
+	const struct opal_resp_tok *token;
+
 	*store = NULL;
 	if (!resp) {
 		pr_debug("Response is NULL\n");
@@ -883,13 +886,30 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
 		return 0;
 	}
 
-	if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
+	token = &resp->toks[n];
+	if (token->type != OPAL_DTA_TOKENID_BYTESTRING) {
 		pr_debug("Token is not a byte string!\n");
 		return 0;
 	}
 
-	*store = resp->toks[n].pos + 1;
-	return resp->toks[n].len - 1;
+	switch (token->width) {
+	case OPAL_WIDTH_TINY:
+	case OPAL_WIDTH_SHORT:
+		skip = 1;
+		break;
+	case OPAL_WIDTH_MEDIUM:
+		skip = 2;
+		break;
+	case OPAL_WIDTH_LONG:
+		skip = 4;
+		break;
+	default:
+		pr_debug("Token has invalid width!\n");
+		return 0;
+	}
+
+	*store = token->pos + skip;
+	return token->len - skip;
 }
 
 static u64 response_get_u64(const struct parsed_resp *resp, int n)
-- 
cgit 1.4.1


From f78bac2c8e69144781e271d9771bae8dbb4e7098 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 7 Mar 2018 17:10:03 -0800
Subject: block: Use the queue_flag_*() functions instead of open-coding these

Except for changing the atomic queue flag manipulations that are
protected by the queue lock into non-atomic manipulations, this
patch does not change any functionality.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c     | 2 +-
 block/blk-mq.c       | 2 +-
 block/blk-settings.c | 4 ++--
 block/blk-stat.c     | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6febc69a58aa..241b73088617 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -994,7 +994,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	 * registered by blk_register_queue().
 	 */
 	q->bypass_depth = 1;
-	__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 75336848f7a7..e70cc7d48f58 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2678,7 +2678,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
 	if (!(set->flags & BLK_MQ_F_SG_MERGE))
-		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
+		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
 
 	q->sg_reserved_size = INT_MAX;
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 48ebe6be07b7..7f719da0eadd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -861,9 +861,9 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
 {
 	spin_lock_irq(q->queue_lock);
 	if (queueable)
-		clear_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
 	else
-		set_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
+		queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 28003bf9941c..b664aa6df725 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q,
 
 	spin_lock(&q->stats->lock);
 	list_add_tail_rcu(&cb->list, &q->stats->callbacks);
-	set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 EXPORT_SYMBOL_GPL(blk_stat_add_callback);
@@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q,
 	spin_lock(&q->stats->lock);
 	list_del_rcu(&cb->list);
 	if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
-		clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+		queue_flag_clear(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 
 	del_timer_sync(&cb->timer);
@@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
 {
 	spin_lock(&q->stats->lock);
 	q->stats->enable_accounting = true;
-	set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+	queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 
-- 
cgit 1.4.1


From 8814ce8a0f680599a837af18aefdec774e5c7b97 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 7 Mar 2018 17:10:04 -0800
Subject: block: Introduce blk_queue_flag_{set,clear,test_and_{set,clear}}()

Introduce functions that modify the queue flags and that protect
these modifications with the request queue lock. Except for moving
one wake_up_all() call from inside to outside a critical section,
this patch does not change any functionality.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 91 +++++++++++++++++++++++++++++++++++++++++---------
 block/blk-mq.c         | 12 ++-----
 block/blk-settings.c   |  6 ++--
 block/blk-sysfs.c      | 22 ++++--------
 block/blk-timeout.c    |  6 ++--
 include/linux/blkdev.h |  5 +++
 6 files changed, 93 insertions(+), 49 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 241b73088617..74c6283f4509 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
  */
 static struct workqueue_struct *kblockd_workqueue;
 
+/**
+ * blk_queue_flag_set - atomically set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ */
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_set(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_set);
+
+/**
+ * blk_queue_flag_clear - atomically clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ */
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	queue_flag_clear(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_clear);
+
+/**
+ * blk_queue_flag_test_and_set - atomically test and set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was already set.
+ */
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+	bool res;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	res = queue_flag_test_and_set(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
+
+/**
+ * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was set.
+ */
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
+{
+	unsigned long flags;
+	bool res;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	res = queue_flag_test_and_clear(flag, q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
+
 static void blk_clear_congested(struct request_list *rl, int sync)
 {
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 int blk_set_preempt_only(struct request_queue *q)
 {
-	unsigned long flags;
-	int res;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	return res;
+	return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
 }
 EXPORT_SYMBOL_GPL(blk_set_preempt_only);
 
 void blk_clear_preempt_only(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+	blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
 	wake_up_all(&q->mq_freeze_wq);
-	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
 
@@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-	spin_lock_irq(q->queue_lock);
-	queue_flag_set(QUEUE_FLAG_DYING, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
 
 	/*
 	 * When queue DYING flag is set, we need to block new req
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e70cc7d48f58..a86899022683 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  */
 void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
@@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
 
 	/* dispatch requests which are inserted during quiescing */
 	blk_mq_run_hw_queues(q, true);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7f719da0eadd..d1de71124656 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -859,12 +859,10 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
 void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
 {
-	spin_lock_irq(q->queue_lock);
 	if (queueable)
-		queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
+		blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
 	else
-		queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fd71a00c9462..d00d1b0ec109 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -276,12 +276,10 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \
 	if (neg)							\
 		val = !val;						\
 									\
-	spin_lock_irq(q->queue_lock);					\
 	if (val)							\
-		queue_flag_set(QUEUE_FLAG_##flag, q);			\
+		blk_queue_flag_set(QUEUE_FLAG_##flag, q);		\
 	else								\
-		queue_flag_clear(QUEUE_FLAG_##flag, q);			\
-	spin_unlock_irq(q->queue_lock);					\
+		blk_queue_flag_clear(QUEUE_FLAG_##flag, q);		\
 	return ret;							\
 }
 
@@ -414,12 +412,10 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 	if (ret < 0)
 		return ret;
 
-	spin_lock_irq(q->queue_lock);
 	if (poll_on)
-		queue_flag_set(QUEUE_FLAG_POLL, q);
+		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
 	else
-		queue_flag_clear(QUEUE_FLAG_POLL, q);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
 
 	return ret;
 }
@@ -487,12 +483,10 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
 	if (set == -1)
 		return -EINVAL;
 
-	spin_lock_irq(q->queue_lock);
 	if (set)
-		queue_flag_set(QUEUE_FLAG_WC, q);
+		blk_queue_flag_set(QUEUE_FLAG_WC, q);
 	else
-		queue_flag_clear(QUEUE_FLAG_WC, q);
-	spin_unlock_irq(q->queue_lock);
+		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
 
 	return count;
 }
@@ -946,9 +940,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	 */
 	mutex_lock(&q->sysfs_lock);
 
-	spin_lock_irq(q->queue_lock);
-	queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
-	spin_unlock_irq(q->queue_lock);
+	blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
 
 	/*
 	 * Remove the sysfs attributes before unregistering the queue data
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a05e3676d24a..34a55250f08a 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -57,12 +57,10 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
 		char *p = (char *) buf;
 
 		val = simple_strtoul(p, &p, 10);
-		spin_lock_irq(q->queue_lock);
 		if (val)
-			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
+			blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
 		else
-			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
-		spin_unlock_irq(q->queue_lock);
+			blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
 	}
 
 	return count;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c351aaec3ca7..f84b3c7887b1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -707,6 +707,11 @@ struct request_queue {
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_POLL))
 
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
+
 /*
  * @q->queue_lock is set while a queue is being initialized. Since we know
  * that no other threads access the queue object before @q->queue_lock has
-- 
cgit 1.4.1


From 7dfdbc7367f6f789715cab2cb484b78ab45e9f3e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 7 Mar 2018 17:10:05 -0800
Subject: block: Protect queue flag changes with the queue lock

Since the queue flags may be changed concurrently from multiple
contexts after a queue becomes visible in sysfs, make these changes
safe by protecting these with the queue lock.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c   | 2 +-
 block/blk-stat.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a86899022683..f5c7dbcb954f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2997,7 +2997,7 @@ EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 static bool blk_poll_stats_enable(struct request_queue *q)
 {
 	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
-	    test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+	    blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
 		return true;
 	blk_stat_add_callback(q, q->poll_cb);
 	return false;
diff --git a/block/blk-stat.c b/block/blk-stat.c
index b664aa6df725..bd365a95fcf8 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -152,7 +152,7 @@ void blk_stat_add_callback(struct request_queue *q,
 
 	spin_lock(&q->stats->lock);
 	list_add_tail_rcu(&cb->list, &q->stats->callbacks);
-	queue_flag_set(QUEUE_FLAG_STATS, q);
+	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 EXPORT_SYMBOL_GPL(blk_stat_add_callback);
@@ -163,7 +163,7 @@ void blk_stat_remove_callback(struct request_queue *q,
 	spin_lock(&q->stats->lock);
 	list_del_rcu(&cb->list);
 	if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
-		queue_flag_clear(QUEUE_FLAG_STATS, q);
+		blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 
 	del_timer_sync(&cb->timer);
@@ -191,7 +191,7 @@ void blk_stat_enable_accounting(struct request_queue *q)
 {
 	spin_lock(&q->stats->lock);
 	q->stats->enable_accounting = true;
-	queue_flag_set(QUEUE_FLAG_STATS, q);
+	blk_queue_flag_set(QUEUE_FLAG_STATS, q);
 	spin_unlock(&q->stats->lock);
 }
 
-- 
cgit 1.4.1


From 8b904b5b6b58b9a29dcf3f82d936d9e7fd69fda6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 7 Mar 2018 17:10:10 -0800
Subject: block: Use blk_queue_flag_*() in drivers instead of queue_flag_*()

This patch has been generated as follows:

for verb in set_unlocked clear_unlocked set clear; do
  replace-in-files queue_flag_${verb} blk_queue_flag_${verb%_unlocked} \
    $(git grep -lw queue_flag_${verb} drivers block/bsg*)
done

Except for protecting all queue flag changes with the queue lock
this patch does not change any functionality.

Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c                             |  4 ++--
 drivers/block/drbd/drbd_nl.c                |  4 ++--
 drivers/block/loop.c                        | 10 +++++-----
 drivers/block/nbd.c                         |  8 ++++----
 drivers/block/null_blk.c                    |  6 +++---
 drivers/block/rbd.c                         |  4 ++--
 drivers/block/rsxx/dev.c                    |  6 +++---
 drivers/block/skd_main.c                    |  4 ++--
 drivers/block/xen-blkfront.c                | 10 +++++-----
 drivers/block/zram/zram_drv.c               |  6 +++---
 drivers/ide/ide-disk.c                      |  4 ++--
 drivers/ide/ide-probe.c                     |  2 +-
 drivers/lightnvm/pblk-init.c                |  2 +-
 drivers/md/dm-table.c                       | 16 ++++++++--------
 drivers/md/md-linear.c                      |  4 ++--
 drivers/md/md.c                             |  4 ++--
 drivers/md/raid0.c                          |  4 ++--
 drivers/md/raid1.c                          |  6 +++---
 drivers/md/raid10.c                         |  6 +++---
 drivers/md/raid5.c                          |  4 ++--
 drivers/mmc/core/queue.c                    |  8 ++++----
 drivers/mtd/mtd_blkdevs.c                   |  6 +++---
 drivers/nvdimm/blk.c                        |  2 +-
 drivers/nvdimm/btt.c                        |  2 +-
 drivers/nvdimm/pmem.c                       |  4 ++--
 drivers/nvme/host/core.c                    |  4 ++--
 drivers/nvme/host/multipath.c               |  2 +-
 drivers/s390/block/dasd.c                   |  4 ++--
 drivers/s390/block/dcssblk.c                |  2 +-
 drivers/s390/block/scm_blk.c                |  4 ++--
 drivers/s390/block/xpram.c                  |  4 ++--
 drivers/scsi/megaraid/megaraid_sas_base.c   |  2 +-
 drivers/scsi/megaraid/megaraid_sas_fusion.c |  2 +-
 drivers/scsi/mpt3sas/mpt3sas_scsih.c        |  2 +-
 drivers/scsi/scsi_debug.c                   |  2 +-
 drivers/scsi/scsi_lib.c                     |  2 +-
 drivers/scsi/scsi_transport_sas.c           |  4 ++--
 drivers/scsi/sd.c                           |  8 ++++----
 38 files changed, 89 insertions(+), 89 deletions(-)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 1474153f73e3..b4fe1a48f111 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -275,8 +275,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 
 	q->queuedata = dev;
 	q->bsg_job_fn = job_fn;
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a12f77e6891e..b4f02768ba47 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device,
 		 * topology on all peers. */
 		blk_queue_discard_granularity(q, 512);
 		q->limits.max_discard_sectors = drbd_max_discard_sectors(connection);
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 		q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection);
 	} else {
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
 		blk_queue_discard_granularity(q, 0);
 		q->limits.max_discard_sectors = 0;
 		q->limits.max_write_zeroes_sectors = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 9d29aa6413e5..7952ed5c607b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 	blk_mq_freeze_queue(lo->lo_queue);
 	lo->use_dio = use_dio;
 	if (use_dio) {
-		queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+		blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 		lo->lo_flags |= LO_FLAGS_DIRECT_IO;
 	} else {
-		queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 		lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
 	}
 	blk_mq_unfreeze_queue(lo->lo_queue);
@@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo)
 		q->limits.discard_alignment = 0;
 		blk_queue_max_discard_sectors(q, 0);
 		blk_queue_max_write_zeroes_sectors(q, 0);
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
 		return;
 	}
 
@@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo)
 
 	blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
 	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static void loop_unprepare_queue(struct loop_device *lo)
@@ -1808,7 +1808,7 @@ static int loop_add(struct loop_device **l, int i)
 	 * page. For directio mode, merge does help to dispatch bigger request
 	 * to underlayer disk. We will enable merge once directio is enabled.
 	 */
-	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
 
 	err = -ENOMEM;
 	disk = lo->lo_disk = alloc_disk(1 << part_shift);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 86258b00a1d4..afbc202ca6fd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd)
 	else
 		set_disk_ro(nbd->disk, false);
 	if (config->flags & NBD_FLAG_SEND_TRIM)
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 	if (config->flags & NBD_FLAG_SEND_FLUSH) {
 		if (config->flags & NBD_FLAG_SEND_FUA)
 			blk_queue_write_cache(nbd->disk->queue, true, true);
@@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd)
 		nbd->config = NULL;
 
 		nbd->tag_set.timeout = 0;
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 
 		mutex_unlock(&nbd->config_lock);
 		nbd_put(nbd);
@@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index)
 	/*
 	 * Tell the block layer that we are not a rotational device
 	 */
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
 	disk->queue->limits.discard_granularity = 512;
 	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
 	blk_queue_max_segment_size(disk->queue, UINT_MAX);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d6be7a6d8ca6..0517613afccb 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1525,7 +1525,7 @@ static void null_config_discard(struct nullb *nullb)
 	nullb->q->limits.discard_granularity = nullb->dev->blocksize;
 	nullb->q->limits.discard_alignment = nullb->dev->blocksize;
 	blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
 }
 
 static int null_open(struct block_device *bdev, fmode_t mode)
@@ -1810,8 +1810,8 @@ static int null_add_dev(struct nullb_device *dev)
 	}
 
 	nullb->q->queuedata = nullb;
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
 	mutex_lock(&lock);
 	nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e40da093766..0016170cde0a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4370,7 +4370,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 		goto out_tag_set;
 	}
 
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
 
 	/* set io sizes to object size */
@@ -4383,7 +4383,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	blk_queue_io_opt(q, segment_size);
 
 	/* enable the discard support */
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 	q->limits.discard_granularity = segment_size;
 	blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
 	blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index e397d3ee7308..dddb3f2490b6 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 	blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
 	blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
 
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue);
 	if (rsxx_discard_supported(card)) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue);
 		blk_queue_max_discard_sectors(card->queue,
 						RSXX_HW_BLK_SIZE >> 9);
 		card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index e41935ab41ef..bc7aea6d7b7c 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev)
 	/* set optimal I/O size to 8KB */
 	blk_queue_io_opt(q, 8192);
 
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 
 	blk_queue_rq_timeout(q, 8 * HZ);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e126e4cac2ca..3fcdc0d8eed3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -931,15 +931,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 	unsigned int segments = info->max_indirect_segments ? :
 				BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
-	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+	blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
 
 	if (info->feature_discard) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
 		blk_queue_max_discard_sectors(rq, get_capacity(gd));
 		rq->limits.discard_granularity = info->discard_granularity;
 		rq->limits.discard_alignment = info->discard_alignment;
 		if (info->feature_secdiscard)
-			queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
+			blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
 	}
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
@@ -1610,8 +1610,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
-				queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
-				queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
+				blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
 			}
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 85110e7931e5..71b449613cfa 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1530,8 +1530,8 @@ static int zram_add(void)
 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
 	set_capacity(zram->disk, 0);
 	/* zram devices sort of resembles non-rotational disks */
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
 
 	/*
 	 * To ensure that we always get PAGE_SIZE aligned
@@ -1544,7 +1544,7 @@ static int zram_add(void)
 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
 	/*
 	 * zram_bio_discard() will clear all logical blocks if logical block
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 188d1b03715d..9c47f975567f 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -687,8 +687,8 @@ static void ide_disk_setup(ide_drive_t *drive)
 	       queue_max_sectors(q) / 2);
 
 	if (ata_id_is_ssd(id)) {
-		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 	}
 
 	/* calculate drive capacity, and select LBA if possible */
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d6b8c7e1545d..2019e66eada7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -773,7 +773,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	q->request_fn = do_ide_request;
 	q->initialize_rq_fn = ide_initialize_rq;
 	q->cmd_size = sizeof(struct ide_request);
-	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return 1;
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 93d671ca518e..5b46924ac66c 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -1067,7 +1067,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size;
 	tqueue->limits.discard_alignment = 0;
 	blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
 
 	pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
 			tdisk->disk_name,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5fe7ec356c33..54c39ad4ef01 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1861,7 +1861,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	q->limits = *limits;
 
 	if (!dm_table_supports_discards(t)) {
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
 		/* Must also clear discard limits... */
 		q->limits.max_discard_sectors = 0;
 		q->limits.max_hw_discard_sectors = 0;
@@ -1869,7 +1869,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 		q->limits.discard_alignment = 0;
 		q->limits.discard_misaligned = 0;
 	} else
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 
 	if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
 		wc = true;
@@ -1879,15 +1879,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	blk_queue_write_cache(q, wc, fua);
 
 	if (dm_table_supports_dax(t))
-		queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
 	if (dm_table_supports_dax_write_cache(t))
 		dax_write_cache(t->md->dax_dev, true);
 
 	/* Ensure that all underlying devices are non-rotational. */
 	if (dm_table_all_devices_attribute(t, device_is_nonrot))
-		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	else
-		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
+		blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
 
 	if (!dm_table_supports_write_same(t))
 		q->limits.max_write_same_sectors = 0;
@@ -1895,9 +1895,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 		q->limits.max_write_zeroes_sectors = 0;
 
 	if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
-		queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+		blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
 	else
-		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+		blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
 
 	dm_table_verify_integrity(t);
 
@@ -1908,7 +1908,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 * have it set.
 	 */
 	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
-		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 773fc70dced7..4964323d936b 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
 	}
 
 	if (!discard_supported)
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
 	else
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 
 	/*
 	 * Here we calculate the device offsets.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index eba7fa2f0abb..de2b26fba5d8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5608,9 +5608,9 @@ int md_run(struct mddev *mddev)
 		if (mddev->degraded)
 			nonrot = false;
 		if (nonrot)
-			queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+			blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
 		else
-			queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
+			blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
 		mddev->queue->backing_dev_info->congested_data = mddev;
 		mddev->queue->backing_dev_info->congested_fn = md_congested;
 	}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5ecba9eef441..584c10347267 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
 				discard_supported = true;
 		}
 		if (!discard_supported)
-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+			blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
 		else
-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 	}
 
 	/* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b2eae332e1a2..f1635eb9e95a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		}
 	}
 	if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 	print_conf(conf);
 	return err;
 }
@@ -3099,10 +3099,10 @@ static int raid1_run(struct mddev *mddev)
 
 	if (mddev->queue) {
 		if (discard_supported)
-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 		else
-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
 						  mddev->queue);
 	}
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99c9207899a7..e9c409c5f344 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		break;
 	}
 	if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
 
 	print_conf(conf);
 	return err;
@@ -3844,10 +3844,10 @@ static int raid10_run(struct mddev *mddev)
 
 	if (mddev->queue) {
 		if (discard_supported)
-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 		else
-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
 						  mddev->queue);
 	}
 	/* need to check that every block has at least one working mirror */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 50d01144b805..14714b23a2fa 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7444,10 +7444,10 @@ static int raid5_run(struct mddev *mddev)
 		if (devices_handle_discard_safely &&
 		    mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
 		    mddev->queue->limits.discard_granularity >= stripe)
-			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_set(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 		else
-			queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
+			blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 
 		blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 421fab7250ac..56e9a803db21 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -185,14 +185,14 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 	if (!max_discard)
 		return;
 
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 	blk_queue_max_discard_sectors(q, max_discard);
 	q->limits.discard_granularity = card->pref_erase << 9;
 	/* granularity must not be greater than max. discard */
 	if (card->pref_erase > max_discard)
 		q->limits.discard_granularity = 0;
 	if (mmc_can_secure_erase_trim(card))
-		queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q);
+		blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
 }
 
 /**
@@ -356,8 +356,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
 	if (mmc_can_erase(card))
 		mmc_queue_setup_discard(mq->queue, card);
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ec8f033ac5f..16ae4ae8e8f9 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -419,11 +419,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
 	blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
 	if (tr->discard) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq);
 		blk_queue_max_discard_sectors(new->rq, UINT_MAX);
 	}
 
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 345acca576b3..7bde764f939a 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -266,7 +266,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	blk_queue_make_request(q, nd_blk_make_request);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
 	blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	q->queuedata = nsblk;
 
 	disk = alloc_disk(0);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 2ef544f10ec8..6f311f88a8e8 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1542,7 +1542,7 @@ static int btt_blk_init(struct btt *btt)
 	blk_queue_make_request(btt->btt_queue, btt_make_request);
 	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
 	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
 	btt->btt_queue->queuedata = btt;
 
 	set_capacity(btt->btt_disk, 0);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index cfb15ac50925..145db2ad712f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -388,8 +388,8 @@ static int pmem_attach_disk(struct device *dev,
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_logical_block_size(q, pmem_sector_size(ndns));
 	blk_queue_max_hw_sectors(q, UINT_MAX);
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-	queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_set(QUEUE_FLAG_DAX, q);
 	q->queuedata = pmem;
 
 	disk = alloc_disk_node(0, nid);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 817e5e2766da..72e241923e7d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1358,7 +1358,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
 
 	blk_queue_max_discard_sectors(queue, UINT_MAX);
 	blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
 
 	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
 		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -2949,7 +2949,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	ns->queue = blk_mq_init_queue(ctrl->tagset);
 	if (IS_ERR(ns->queue))
 		goto out_free_ns;
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
 	ns->queue->queuedata = ns;
 	ns->ctrl = ctrl;
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 88440562a197..7283d7149baf 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -168,7 +168,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	q->queuedata = head;
 	blk_queue_make_request(q, nvme_ns_head_make_request);
 	q->poll_fn = nvme_ns_head_poll;
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	/* set to a default value for 512 until disk is validated */
 	blk_queue_logical_block_size(q, 512);
 
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index a7c15f0085e2..7be803afcb43 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3210,7 +3210,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 	} else {
 		max = block->base->discipline->max_blocks << block->s2b_shift;
 	}
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	q->limits.max_dev_sectors = max;
 	blk_queue_logical_block_size(q, logical_block_size);
 	blk_queue_max_hw_sectors(q, max);
@@ -3233,7 +3233,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 
 		blk_queue_max_discard_sectors(q, max_discard_sectors);
 		blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 	}
 }
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 9cae08b36b80..0a312e450207 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -633,7 +633,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	dev_info->gd->private_data = dev_info;
 	blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request);
 	blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096);
-	queue_flag_set_unlocked(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
+	blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue);
 
 	seg_byte_size = (dev_info->end - dev_info->start + 1);
 	set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index b4130c7880d8..b1fcb76dd272 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -472,8 +472,8 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	blk_queue_logical_block_size(rq, 1 << 12);
 	blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */
 	blk_queue_max_segments(rq, nr_max_blk);
-	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, rq);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq);
 
 	bdev->gendisk = alloc_disk(SCM_NR_PARTS);
 	if (!bdev->gendisk) {
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 2a6334ca750e..3df5d68d09f0 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -348,8 +348,8 @@ static int __init xpram_setup_blkdev(void)
 			put_disk(xpram_disks[i]);
 			goto out;
 		}
-		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xpram_queues[i]);
-		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]);
+		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]);
 		blk_queue_make_request(xpram_queues[i], xpram_make_request);
 		blk_queue_logical_block_size(xpram_queues[i], 4096);
 	}
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index a71ee67df084..dc234650014c 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1864,7 +1864,7 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, u32 max_io_size)
 
 	blk_queue_max_hw_sectors(sdev->request_queue, (max_io_size / 512));
 
-	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue);
+	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
 	blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1);
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 073ced07e662..298019cf08a2 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -1908,7 +1908,7 @@ megasas_is_prp_possible(struct megasas_instance *instance,
  * then sending IOs with holes.
  *
  * Though driver can request block layer to disable IO merging by calling-
- * queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
+ * blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue) but
  * user may tune sysfs parameter- nomerges again to 0 or 1.
  *
  * If in future IO scheduling is enabled with SCSI BLK MQ,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 74fca184dba9..e3843828e59a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2352,7 +2352,7 @@ scsih_slave_configure(struct scsi_device *sdev)
 		 ** merged and can eliminate holes created during merging
 		 ** operation.
 		 **/
-		queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES,
+		blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
 				sdev->request_queue);
 		blk_queue_virt_boundary(sdev->request_queue,
 				ioc->page_size - 1);
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index a5986dae9020..1cb353f18d08 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3897,7 +3897,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
 	if (sdebug_verbose)
 		pr_info("slave_alloc <%u %u %u %llu>\n",
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
+	blk_queue_flag_set(QUEUE_FLAG_BIDI, sdp->request_queue);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 71d1135f94d0..538152f3528e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2140,7 +2140,7 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
-	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 
 	/*
 	 * this limit is imposed by hardware restrictions
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 736a1f4f9676..7c0987616684 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -227,8 +227,8 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	 * by default assume old behaviour and bounce for any highmem page
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
-	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bff21e636ddd..98de3207ac5d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -714,7 +714,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	case SD_LBP_FULL:
 	case SD_LBP_DISABLE:
 		blk_queue_max_discard_sectors(q, 0);
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
 		return;
 
 	case SD_LBP_UNMAP:
@@ -747,7 +747,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	}
 
 	blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
 static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
@@ -2952,8 +2952,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
 	rot = get_unaligned_be16(&buffer[4]);
 
 	if (rot == 1) {
-		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
 	}
 
 	if (sdkp->device->type == TYPE_ZBC) {
-- 
cgit 1.4.1


From 8a0ac14b8da9b86cfbe7aace40c8d485ed5c5b97 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 7 Mar 2018 17:10:12 -0800
Subject: block: Move the queue_flag_*() functions from a public into a private
 header file

This patch helps to avoid that new code gets introduced in block drivers
that manipulates queue flags without holding the queue lock when that
lock should be held.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h | 69 --------------------------------------------------
 2 files changed, 69 insertions(+), 69 deletions(-)

(limited to 'block')

diff --git a/block/blk.h b/block/blk.h
index 46db5dc83dcb..b034fd2460c4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -41,6 +41,75 @@ extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
+static inline void queue_lockdep_assert_held(struct request_queue *q)
+{
+	if (q->queue_lock)
+		lockdep_assert_held(q->queue_lock);
+}
+
+static inline void queue_flag_set_unlocked(unsigned int flag,
+					   struct request_queue *q)
+{
+	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
+	    kref_read(&q->kobj.kref))
+		lockdep_assert_held(q->queue_lock);
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear_unlocked(unsigned int flag,
+					     struct request_queue *q)
+{
+	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
+	    kref_read(&q->kobj.kref))
+		lockdep_assert_held(q->queue_lock);
+	__clear_bit(flag, &q->queue_flags);
+}
+
+static inline int queue_flag_test_and_clear(unsigned int flag,
+					    struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+
+	if (test_bit(flag, &q->queue_flags)) {
+		__clear_bit(flag, &q->queue_flags);
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline int queue_flag_test_and_set(unsigned int flag,
+					  struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+
+	if (!test_bit(flag, &q->queue_flags)) {
+		__set_bit(flag, &q->queue_flags);
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+	__set_bit(flag, &q->queue_flags);
+}
+
+static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+	queue_lockdep_assert_held(q);
+	__clear_bit(flag, &q->queue_flags);
+}
+
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 888c9b25cb8f..19eaf8d89368 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -712,75 +712,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 
-/*
- * @q->queue_lock is set while a queue is being initialized. Since we know
- * that no other threads access the queue object before @q->queue_lock has
- * been set, it is safe to manipulate queue flags without holding the
- * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
- * blk_init_allocated_queue().
- */
-static inline void queue_lockdep_assert_held(struct request_queue *q)
-{
-	if (q->queue_lock)
-		lockdep_assert_held(q->queue_lock);
-}
-
-static inline void queue_flag_set_unlocked(unsigned int flag,
-					   struct request_queue *q)
-{
-	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
-	    kref_read(&q->kobj.kref))
-		lockdep_assert_held(q->queue_lock);
-	__set_bit(flag, &q->queue_flags);
-}
-
-static inline void queue_flag_clear_unlocked(unsigned int flag,
-					     struct request_queue *q)
-{
-	if (test_bit(QUEUE_FLAG_INIT_DONE, &q->queue_flags) &&
-	    kref_read(&q->kobj.kref))
-		lockdep_assert_held(q->queue_lock);
-	__clear_bit(flag, &q->queue_flags);
-}
-
-static inline int queue_flag_test_and_clear(unsigned int flag,
-					    struct request_queue *q)
-{
-	queue_lockdep_assert_held(q);
-
-	if (test_bit(flag, &q->queue_flags)) {
-		__clear_bit(flag, &q->queue_flags);
-		return 1;
-	}
-
-	return 0;
-}
-
-static inline int queue_flag_test_and_set(unsigned int flag,
-					  struct request_queue *q)
-{
-	queue_lockdep_assert_held(q);
-
-	if (!test_bit(flag, &q->queue_flags)) {
-		__set_bit(flag, &q->queue_flags);
-		return 0;
-	}
-
-	return 1;
-}
-
-static inline void queue_flag_set(unsigned int flag, struct request_queue *q)
-{
-	queue_lockdep_assert_held(q);
-	__set_bit(flag, &q->queue_flags);
-}
-
-static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
-{
-	queue_lockdep_assert_held(q);
-	__clear_bit(flag, &q->queue_flags);
-}
-
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-- 
cgit 1.4.1


From 56c4bddb970658b26da0d847a2bfb3225e2f2b13 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 8 Mar 2018 15:28:50 -0800
Subject: block: Suppress kernel-doc warnings triggered by blk-zoned.c

Avoid that building with W=1 causes the kernel-doc tool to complain
about undocumented function arguments for the blk-zoned.c source file.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index acb7252c7e81..08e84ef2bc05 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -296,7 +296,7 @@ int blkdev_reset_zones(struct block_device *bdev,
 }
 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
 
-/**
+/*
  * BLKREPORTZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
@@ -355,7 +355,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-/**
+/*
  * BLKRESETZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
-- 
cgit 1.4.1


From 31156ec378c2ed10330c8c06bbf36fb7d7a55506 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 13 Mar 2018 17:28:39 +0100
Subject: bsg-lib: introduce a timeout field in struct bsg_job

The zfcp driver wants to know the timeout for a bsg job, so add a field
to struct bsg_job for it in preparation of not exposing the request
to the bsg-lib users.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c             | 1 +
 drivers/s390/scsi/zfcp_fc.c | 4 ++--
 include/linux/bsg-lib.h     | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index b4fe1a48f111..fb509779a090 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -132,6 +132,7 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	int ret;
 
+	job->timeout = req->timeout;
 	job->request = rq->cmd;
 	job->request_len = rq->cmd_len;
 
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index ca218c82321f..6162cf57a20a 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -961,7 +961,7 @@ static int zfcp_fc_exec_els_job(struct bsg_job *job,
 		d_id = ntoh24(bsg_request->rqst_data.h_els.port_id);
 
 	els->handler = zfcp_fc_ct_els_job_handler;
-	return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ);
+	return zfcp_fsf_send_els(adapter, d_id, els, job->timeout / HZ);
 }
 
 static int zfcp_fc_exec_ct_job(struct bsg_job *job,
@@ -980,7 +980,7 @@ static int zfcp_fc_exec_ct_job(struct bsg_job *job,
 		return ret;
 
 	ct->handler = zfcp_fc_ct_job_handler;
-	ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->req->timeout / HZ);
+	ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->timeout / HZ);
 	if (ret)
 		zfcp_fc_wka_port_put(wka_port);
 
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index b1be0233ce35..402223c95ce1 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -44,6 +44,8 @@ struct bsg_job {
 
 	struct kref kref;
 
+	unsigned int timeout;
+
 	/* Transport/driver specific request/reply structs */
 	void *request;
 	void *reply;
-- 
cgit 1.4.1


From ef6fa64f9b8e1611854077ea9213f2eef2428cd2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 13 Mar 2018 17:28:40 +0100
Subject: bsg-lib: remove bsg_job.req

Users of the bsg-lib interface should only use the bsg_job data structure
and not know about implementation details of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         | 14 ++++++--------
 include/linux/bsg-lib.h |  1 -
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fb509779a090..f2c2d54a61b4 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -35,7 +35,7 @@
 static void bsg_teardown_job(struct kref *kref)
 {
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
-	struct request *rq = job->req;
+	struct request *rq = blk_mq_rq_from_pdu(job);
 
 	put_device(job->dev);	/* release reference for the request */
 
@@ -68,19 +68,18 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len)
 {
-	struct request *req = job->req;
+	struct request *req = blk_mq_rq_from_pdu(job);
 	struct request *rsp = req->next_rq;
-	struct scsi_request *rq = scsi_req(req);
 	int err;
 
-	err = scsi_req(job->req)->result = result;
+	err = job->sreq.result = result;
 	if (err < 0)
 		/* we're only returning the result field in the reply */
-		rq->sense_len = sizeof(u32);
+		job->sreq.sense_len = sizeof(u32);
 	else
-		rq->sense_len = job->reply_len;
+		job->sreq.sense_len = job->reply_len;
 	/* we assume all request payload was transferred, residual == 0 */
-	rq->resid_len = 0;
+	job->sreq.resid_len = 0;
 
 	if (rsp) {
 		WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
@@ -232,7 +231,6 @@ static void bsg_initialize_rq(struct request *req)
 	sreq->sense = sense;
 	sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
 
-	job->req = req;
 	job->reply = sense;
 	job->reply_len = sreq->sense_len;
 	job->dd_data = job + 1;
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 402223c95ce1..08762d297cbd 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -40,7 +40,6 @@ struct bsg_buffer {
 struct bsg_job {
 	struct scsi_request sreq;
 	struct device *dev;
-	struct request *req;
 
 	struct kref kref;
 
-- 
cgit 1.4.1


From 17cb960f29c29ee07bf6848ada3265f4be55972e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 13 Mar 2018 17:28:41 +0100
Subject: bsg: split handling of SCSI CDBs vs transport requeues

The current BSG design tries to shoe-horn the transport-specific
passthrough commands into the overall framework for SCSI passthrough
requests.  This has a couple problems:

 - each passthrough queue has to set the QUEUE_FLAG_SCSI_PASSTHROUGH flag
   despite not dealing with SCSI commands at all.  Because of that these
   queues could also incorrectly accept SCSI commands from in-kernel
   users or through the legacy SCSI_IOCTL_SEND_COMMAND ioctl.
 - the real SCSI bsg queues also incorrectly accept bsg requests of the
   BSG_SUB_PROTOCOL_SCSI_TRANSPORT type
 - the bsg transport code is almost unredable because it tries to reuse
   different SCSI concepts for its own purpose.

This patch instead adds a new bsg_ops structure to handle the two cases
differently, and thus solves all of the above problems.  Another side
effect is that the bsg-lib queues also don't need to embedd a
struct scsi_request anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c                   | 158 +++++++++++++++--------
 block/bsg.c                       | 262 +++++++++++++++++---------------------
 drivers/scsi/scsi_lib.c           |   4 +-
 drivers/scsi/scsi_sysfs.c         |   3 +-
 drivers/scsi/scsi_transport_sas.c |   1 -
 include/linux/bsg-lib.h           |   4 +-
 include/linux/bsg.h               |  35 +++--
 7 files changed, 250 insertions(+), 217 deletions(-)

(limited to 'block')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index f2c2d54a61b4..fc2e5ff2c4b9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -27,6 +27,94 @@
 #include <linux/bsg-lib.h>
 #include <linux/export.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/sg.h>
+
+#define uptr64(val) ((void __user *)(uintptr_t)(val))
+
+static int bsg_transport_check_proto(struct sg_io_v4 *hdr)
+{
+	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
+	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_TRANSPORT)
+		return -EINVAL;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+	return 0;
+}
+
+static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
+		fmode_t mode)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+
+	job->request_len = hdr->request_len;
+	job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
+	if (IS_ERR(job->request))
+		return PTR_ERR(job->request);
+	return 0;
+}
+
+static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+	int ret = 0;
+
+	/*
+	 * The assignments below don't make much sense, but are kept for
+	 * bug by bug backwards compatibility:
+	 */
+	hdr->device_status = job->result & 0xff;
+	hdr->transport_status = host_byte(job->result);
+	hdr->driver_status = driver_byte(job->result);
+	hdr->info = 0;
+	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->response_len = 0;
+
+	if (job->result < 0) {
+		/* we're only returning the result field in the reply */
+		job->reply_len = sizeof(u32);
+		ret = job->result;
+	}
+
+	if (job->reply_len && hdr->response) {
+		int len = min(hdr->max_response_len, job->reply_len);
+
+		if (copy_to_user(uptr64(hdr->response), job->reply, len))
+			ret = -EFAULT;
+		else
+			hdr->response_len = len;
+	}
+
+	/* we assume all request payload was transferred, residual == 0 */
+	hdr->dout_resid = 0;
+
+	if (rq->next_rq) {
+		unsigned int rsp_len = job->reply_payload.payload_len;
+
+		if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
+			hdr->din_resid = 0;
+		else
+			hdr->din_resid = rsp_len - job->reply_payload_rcv_len;
+	} else {
+		hdr->din_resid = 0;
+	}
+
+	return ret;
+}
+
+static void bsg_transport_free_rq(struct request *rq)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+
+	kfree(job->request);
+}
+
+static const struct bsg_ops bsg_transport_ops = {
+	.check_proto		= bsg_transport_check_proto,
+	.fill_hdr		= bsg_transport_fill_hdr,
+	.complete_rq		= bsg_transport_complete_rq,
+	.free_rq		= bsg_transport_free_rq,
+};
 
 /**
  * bsg_teardown_job - routine to teardown a bsg job
@@ -68,27 +156,9 @@ EXPORT_SYMBOL_GPL(bsg_job_get);
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len)
 {
-	struct request *req = blk_mq_rq_from_pdu(job);
-	struct request *rsp = req->next_rq;
-	int err;
-
-	err = job->sreq.result = result;
-	if (err < 0)
-		/* we're only returning the result field in the reply */
-		job->sreq.sense_len = sizeof(u32);
-	else
-		job->sreq.sense_len = job->reply_len;
-	/* we assume all request payload was transferred, residual == 0 */
-	job->sreq.resid_len = 0;
-
-	if (rsp) {
-		WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
-
-		/* set reply (bidi) residual */
-		scsi_req(rsp)->resid_len -=
-			min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
-	}
-	blk_complete_request(req);
+	job->result = result;
+	job->reply_payload_rcv_len = reply_payload_rcv_len;
+	blk_complete_request(blk_mq_rq_from_pdu(job));
 }
 EXPORT_SYMBOL_GPL(bsg_job_done);
 
@@ -113,7 +183,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 	if (!buf->sg_list)
 		return -ENOMEM;
 	sg_init_table(buf->sg_list, req->nr_phys_segments);
-	scsi_req(req)->resid_len = blk_rq_bytes(req);
 	buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
 	buf->payload_len = blk_rq_bytes(req);
 	return 0;
@@ -124,16 +193,13 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
  * @dev: device that is being sent the bsg request
  * @req: BSG request that needs a job structure
  */
-static int bsg_prepare_job(struct device *dev, struct request *req)
+static bool bsg_prepare_job(struct device *dev, struct request *req)
 {
 	struct request *rsp = req->next_rq;
-	struct scsi_request *rq = scsi_req(req);
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	int ret;
 
 	job->timeout = req->timeout;
-	job->request = rq->cmd;
-	job->request_len = rq->cmd_len;
 
 	if (req->bio) {
 		ret = bsg_map_buffer(&job->request_payload, req);
@@ -149,12 +215,13 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 	/* take a reference for the request */
 	get_device(job->dev);
 	kref_init(&job->kref);
-	return 0;
+	return true;
 
 failjob_rls_rqst_payload:
 	kfree(job->request_payload.sg_list);
 failjob_rls_job:
-	return -ENOMEM;
+	job->result = -ENOMEM;
+	return false;
 }
 
 /**
@@ -183,9 +250,7 @@ static void bsg_request_fn(struct request_queue *q)
 			break;
 		spin_unlock_irq(q->queue_lock);
 
-		ret = bsg_prepare_job(dev, req);
-		if (ret) {
-			scsi_req(req)->result = ret;
+		if (!bsg_prepare_job(dev, req)) {
 			blk_end_request_all(req, BLK_STS_OK);
 			spin_lock_irq(q->queue_lock);
 			continue;
@@ -202,46 +267,34 @@ static void bsg_request_fn(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 }
 
+/* called right after the request is allocated for the request_queue */
 static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
-
-	/* called right after the request is allocated for the request_queue */
 
-	sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
-	if (!sreq->sense)
+	job->reply = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp);
+	if (!job->reply)
 		return -ENOMEM;
-
 	return 0;
 }
 
+/* called right before the request is given to the request_queue user */
 static void bsg_initialize_rq(struct request *req)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
-	void *sense = sreq->sense;
-
-	/* called right before the request is given to the request_queue user */
+	void *reply = job->reply;
 
 	memset(job, 0, sizeof(*job));
-
-	scsi_req_init(sreq);
-
-	sreq->sense = sense;
-	sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
-
-	job->reply = sense;
-	job->reply_len = sreq->sense_len;
+	job->reply = reply;
+	job->reply_len = SCSI_SENSE_BUFFERSIZE;
 	job->dd_data = job + 1;
 }
 
 static void bsg_exit_rq(struct request_queue *q, struct request *req)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
-	struct scsi_request *sreq = &job->sreq;
 
-	kfree(sreq->sense);
+	kfree(job->reply);
 }
 
 /**
@@ -275,11 +328,10 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 	q->queuedata = dev;
 	q->bsg_job_fn = job_fn;
 	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-	ret = bsg_register_queue(q, dev, name, release);
+	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
 	if (ret) {
 		printk(KERN_ERR "%s: bsg interface failed to "
 		       "initialize - register queue\n", dev->kobj.name);
diff --git a/block/bsg.c b/block/bsg.c
index 06dc96e1f670..defa06c11858 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -130,114 +130,120 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
 	return &bsg_device_list[index & (BSG_LIST_ARRAY_SIZE - 1)];
 }
 
-static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
-				struct sg_io_v4 *hdr, struct bsg_device *bd,
-				fmode_t mode)
+#define uptr64(val) ((void __user *)(uintptr_t)(val))
+
+static int bsg_scsi_check_proto(struct sg_io_v4 *hdr)
+{
+	if (hdr->protocol != BSG_PROTOCOL_SCSI  ||
+	    hdr->subprotocol != BSG_SUB_PROTOCOL_SCSI_CMD)
+		return -EINVAL;
+	return 0;
+}
+
+static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
+		fmode_t mode)
 {
-	struct scsi_request *req = scsi_req(rq);
+	struct scsi_request *sreq = scsi_req(rq);
 
-	if (hdr->request_len > BLK_MAX_CDB) {
-		req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
-		if (!req->cmd)
+	sreq->cmd_len = hdr->request_len;
+	if (sreq->cmd_len > BLK_MAX_CDB) {
+		sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
+		if (!sreq->cmd)
 			return -ENOMEM;
 	}
 
-	if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
-			   hdr->request_len))
+	if (copy_from_user(sreq->cmd, uptr64(hdr->request), sreq->cmd_len))
 		return -EFAULT;
-
-	if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
-		if (blk_verify_command(req->cmd, mode))
-			return -EPERM;
-	} else if (!capable(CAP_SYS_RAWIO))
+	if (blk_verify_command(sreq->cmd, mode))
 		return -EPERM;
-
-	/*
-	 * fill in request structure
-	 */
-	req->cmd_len = hdr->request_len;
-
-	rq->timeout = msecs_to_jiffies(hdr->timeout);
-	if (!rq->timeout)
-		rq->timeout = q->sg_timeout;
-	if (!rq->timeout)
-		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
-		rq->timeout = BLK_MIN_SG_TIMEOUT;
-
 	return 0;
 }
 
-/*
- * Check if sg_io_v4 from user is allowed and valid
- */
-static int
-bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
+static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
 {
+	struct scsi_request *sreq = scsi_req(rq);
 	int ret = 0;
 
-	if (hdr->guard != 'Q')
-		return -EINVAL;
+	/*
+	 * fill in all the output members
+	 */
+	hdr->device_status = sreq->result & 0xff;
+	hdr->transport_status = host_byte(sreq->result);
+	hdr->driver_status = driver_byte(sreq->result);
+	hdr->info = 0;
+	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
+		hdr->info |= SG_INFO_CHECK;
+	hdr->response_len = 0;
 
-	switch (hdr->protocol) {
-	case BSG_PROTOCOL_SCSI:
-		switch (hdr->subprotocol) {
-		case BSG_SUB_PROTOCOL_SCSI_CMD:
-		case BSG_SUB_PROTOCOL_SCSI_TRANSPORT:
-			break;
-		default:
-			ret = -EINVAL;
-		}
-		break;
-	default:
-		ret = -EINVAL;
+	if (sreq->sense_len && hdr->response) {
+		int len = min_t(unsigned int, hdr->max_response_len,
+					sreq->sense_len);
+
+		if (copy_to_user(uptr64(hdr->response), sreq->sense, len))
+			ret = -EFAULT;
+		else
+			hdr->response_len = len;
+	}
+
+	if (rq->next_rq) {
+		hdr->dout_resid = sreq->resid_len;
+		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
+	} else if (rq_data_dir(rq) == READ) {
+		hdr->din_resid = sreq->resid_len;
+	} else {
+		hdr->dout_resid = sreq->resid_len;
 	}
 
-	*op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
 	return ret;
 }
 
-/*
- * map sg_io_v4 to a request.
- */
+static void bsg_scsi_free_rq(struct request *rq)
+{
+	scsi_req_free_cmd(scsi_req(rq));
+}
+
+static const struct bsg_ops bsg_scsi_ops = {
+	.check_proto		= bsg_scsi_check_proto,
+	.fill_hdr		= bsg_scsi_fill_hdr,
+	.complete_rq		= bsg_scsi_complete_rq,
+	.free_rq		= bsg_scsi_free_rq,
+};
+
 static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
+bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 {
-	struct request_queue *q = bd->queue;
 	struct request *rq, *next_rq = NULL;
 	int ret;
-	unsigned int op, dxfer_len;
-	void __user *dxferp = NULL;
-	struct bsg_class_device *bcd = &q->bsg_dev;
 
-	/* if the LLD has been removed then the bsg_unregister_queue will
-	 * eventually be called and the class_dev was freed, so we can no
-	 * longer use this request_queue. Return no such address.
-	 */
-	if (!bcd->class_dev)
+	if (!q->bsg_dev.class_dev)
 		return ERR_PTR(-ENXIO);
 
-	bsg_dbg(bd, "map hdr %llx/%u %llx/%u\n",
-		(unsigned long long) hdr->dout_xferp,
-		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
-		hdr->din_xfer_len);
+	if (hdr->guard != 'Q')
+		return ERR_PTR(-EINVAL);
 
-	ret = bsg_validate_sgv4_hdr(hdr, &op);
+	ret = q->bsg_dev.ops->check_proto(hdr);
 	if (ret)
 		return ERR_PTR(ret);
 
-	/*
-	 * map scatter-gather elements separately and string them to request
-	 */
-	rq = blk_get_request(q, op, GFP_KERNEL);
+	rq = blk_get_request(q, hdr->dout_xfer_len ?
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
+			GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
 
-	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode);
+	ret = q->bsg_dev.ops->fill_hdr(rq, hdr, mode);
 	if (ret)
 		goto out;
 
-	if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) {
+	rq->timeout = msecs_to_jiffies(hdr->timeout);
+	if (!rq->timeout)
+		rq->timeout = q->sg_timeout;
+	if (!rq->timeout)
+		rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
+	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
+		rq->timeout = BLK_MIN_SG_TIMEOUT;
+
+	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
 		if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
 			ret = -EOPNOTSUPP;
 			goto out;
@@ -246,42 +252,39 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode)
 		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
 		if (IS_ERR(next_rq)) {
 			ret = PTR_ERR(next_rq);
-			next_rq = NULL;
 			goto out;
 		}
-		rq->next_rq = next_rq;
 
-		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
-		ret =  blk_rq_map_user(q, next_rq, NULL, dxferp,
+		rq->next_rq = next_rq;
+		ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr->din_xferp),
 				       hdr->din_xfer_len, GFP_KERNEL);
 		if (ret)
-			goto out;
+			goto out_free_nextrq;
 	}
 
 	if (hdr->dout_xfer_len) {
-		dxfer_len = hdr->dout_xfer_len;
-		dxferp = (void __user *)(unsigned long)hdr->dout_xferp;
+		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->dout_xferp),
+				hdr->dout_xfer_len, GFP_KERNEL);
 	} else if (hdr->din_xfer_len) {
-		dxfer_len = hdr->din_xfer_len;
-		dxferp = (void __user *)(unsigned long)hdr->din_xferp;
-	} else
-		dxfer_len = 0;
-
-	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
-				      GFP_KERNEL);
-		if (ret)
-			goto out;
+		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
+				hdr->din_xfer_len, GFP_KERNEL);
+	} else {
+		ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
 	}
 
+	if (ret)
+		goto out_unmap_nextrq;
 	return rq;
+
+out_unmap_nextrq:
+	if (rq->next_rq)
+		blk_rq_unmap_user(rq->next_rq->bio);
+out_free_nextrq:
+	if (rq->next_rq)
+		blk_put_request(rq->next_rq);
 out:
-	scsi_req_free_cmd(scsi_req(rq));
+	q->bsg_dev.ops->free_rq(rq);
 	blk_put_request(rq);
-	if (next_rq) {
-		blk_rq_unmap_user(next_rq->bio);
-		blk_put_request(next_rq);
-	}
 	return ERR_PTR(ret);
 }
 
@@ -383,56 +386,18 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 				    struct bio *bio, struct bio *bidi_bio)
 {
-	struct scsi_request *req = scsi_req(rq);
-	int ret = 0;
-
-	pr_debug("rq %p bio %p 0x%x\n", rq, bio, req->result);
-	/*
-	 * fill in all the output members
-	 */
-	hdr->device_status = req->result & 0xff;
-	hdr->transport_status = host_byte(req->result);
-	hdr->driver_status = driver_byte(req->result);
-	hdr->info = 0;
-	if (hdr->device_status || hdr->transport_status || hdr->driver_status)
-		hdr->info |= SG_INFO_CHECK;
-	hdr->response_len = 0;
-
-	if (req->sense_len && hdr->response) {
-		int len = min_t(unsigned int, hdr->max_response_len,
-					req->sense_len);
+	int ret;
 
-		ret = copy_to_user((void __user *)(unsigned long)hdr->response,
-				   req->sense, len);
-		if (!ret)
-			hdr->response_len = len;
-		else
-			ret = -EFAULT;
-	}
+	ret = rq->q->bsg_dev.ops->complete_rq(rq, hdr);
 
 	if (rq->next_rq) {
-		hdr->dout_resid = req->resid_len;
-		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
-	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = req->resid_len;
-	else
-		hdr->dout_resid = req->resid_len;
-
-	/*
-	 * If the request generated a negative error number, return it
-	 * (providing we aren't already returning an error); if it's
-	 * just a protocol response (i.e. non negative), that gets
-	 * processed above.
-	 */
-	if (!ret && req->result < 0)
-		ret = req->result;
+	}
 
 	blk_rq_unmap_user(bio);
-	scsi_req_free_cmd(req);
+	rq->q->bsg_dev.ops->free_rq(rq);
 	blk_put_request(rq);
-
 	return ret;
 }
 
@@ -614,7 +579,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
 		/*
 		 * get a request, fill in the blanks, and add to request queue
 		 */
-		rq = bsg_map_hdr(bd, &bc->hdr, mode);
+		rq = bsg_map_hdr(bd->queue, &bc->hdr, mode);
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
 			rq = NULL;
@@ -742,11 +707,6 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 	struct bsg_device *bd;
 	unsigned char buf[32];
 
-	if (!blk_queue_scsi_passthrough(rq)) {
-		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
-		return ERR_PTR(-EINVAL);
-	}
-
 	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
@@ -907,7 +867,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
 			return -EFAULT;
 
-		rq = bsg_map_hdr(bd, &hdr, file->f_mode);
+		rq = bsg_map_hdr(bd->queue, &hdr, file->f_mode);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
@@ -959,7 +919,8 @@ void bsg_unregister_queue(struct request_queue *q)
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 
 int bsg_register_queue(struct request_queue *q, struct device *parent,
-		       const char *name, void (*release)(struct device *))
+		const char *name, const struct bsg_ops *ops,
+		void (*release)(struct device *))
 {
 	struct bsg_class_device *bcd;
 	dev_t dev;
@@ -996,6 +957,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 	bcd->queue = q;
 	bcd->parent = get_device(parent);
 	bcd->release = release;
+	bcd->ops = ops;
 	kref_init(&bcd->ref);
 	dev = MKDEV(bsg_major, bcd->minor);
 	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
@@ -1023,7 +985,17 @@ unlock:
 	mutex_unlock(&bsg_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(bsg_register_queue);
+
+int bsg_scsi_register_queue(struct request_queue *q, struct device *parent)
+{
+	if (!blk_queue_scsi_passthrough(q)) {
+		WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+		return -EINVAL;
+	}
+
+	return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
 
 static struct cdev bsg_cdev;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 538152f3528e..37c1d63e847e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2140,8 +2140,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
-
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
@@ -2239,6 +2237,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
 	}
 
 	__scsi_init_queue(shost, q);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	blk_queue_prep_rq(q, scsi_prep_fn);
 	blk_queue_unprep_rq(q, scsi_unprep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
@@ -2270,6 +2269,7 @@ struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
 
 	sdev->request_queue->queuedata = sdev;
 	__scsi_init_queue(sdev->host, sdev->request_queue);
+	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
 	return sdev->request_queue;
 }
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 91b90f672d23..7142c8be1099 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1292,8 +1292,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 	transport_add_device(&sdev->sdev_gendev);
 	sdev->is_visible = 1;
 
-	error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL);
-
+	error = bsg_scsi_register_queue(rq, &sdev->sdev_gendev);
 	if (error)
 		/* we're treating error on bsg register as non-fatal,
 		 * so pretend nothing went wrong */
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 7c0987616684..08acbabfae07 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -228,7 +228,6 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	return 0;
 }
 
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 08762d297cbd..28a7ccc55c89 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -38,7 +38,6 @@ struct bsg_buffer {
 };
 
 struct bsg_job {
-	struct scsi_request sreq;
 	struct device *dev;
 
 	struct kref kref;
@@ -64,6 +63,9 @@ struct bsg_job {
 	struct bsg_buffer request_payload;
 	struct bsg_buffer reply_payload;
 
+	int result;
+	unsigned int reply_payload_rcv_len;
+
 	void *dd_data;		/* Used for driver-specific storage */
 };
 
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2a202e41a3af..0c7dd9ceb139 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -1,34 +1,43 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BSG_H
-#define BSG_H
+#ifndef _LINUX_BSG_H
+#define _LINUX_BSG_H
 
 #include <uapi/linux/bsg.h>
 
+struct request;
+
+#ifdef CONFIG_BLK_DEV_BSG
+struct bsg_ops {
+	int	(*check_proto)(struct sg_io_v4 *hdr);
+	int	(*fill_hdr)(struct request *rq, struct sg_io_v4 *hdr,
+				fmode_t mode);
+	int	(*complete_rq)(struct request *rq, struct sg_io_v4 *hdr);
+	void	(*free_rq)(struct request *rq);
+};
 
-#if defined(CONFIG_BLK_DEV_BSG)
 struct bsg_class_device {
 	struct device *class_dev;
 	struct device *parent;
 	int minor;
 	struct request_queue *queue;
 	struct kref ref;
+	const struct bsg_ops *ops;
 	void (*release)(struct device *);
 };
 
-extern int bsg_register_queue(struct request_queue *q,
-			      struct device *parent, const char *name,
-			      void (*release)(struct device *));
-extern void bsg_unregister_queue(struct request_queue *);
+int bsg_register_queue(struct request_queue *q, struct device *parent,
+		const char *name, const struct bsg_ops *ops,
+		void (*release)(struct device *));
+int bsg_scsi_register_queue(struct request_queue *q, struct device *parent);
+void bsg_unregister_queue(struct request_queue *q);
 #else
-static inline int bsg_register_queue(struct request_queue *q,
-				     struct device *parent, const char *name,
-				     void (*release)(struct device *))
+static inline int bsg_scsi_register_queue(struct request_queue *q,
+		struct device *parent)
 {
 	return 0;
 }
 static inline void bsg_unregister_queue(struct request_queue *q)
 {
 }
-#endif
-
-#endif
+#endif /* CONFIG_BLK_DEV_BSG */
+#endif /* _LINUX_BSG_H */
-- 
cgit 1.4.1


From 5f990d316085aca11b04dc0f63d6df5e508d73c7 Mon Sep 17 00:00:00 2001
From: Jonas Rabenstein <jonas.rabenstein@studium.uni-erlangen.de>
Date: Wed, 7 Mar 2018 17:55:56 +0100
Subject: block: sed-opal: fix u64 short atom length

The length must be given as bytes and not as 4 bit tuples.

Reviewed-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Jonas Rabenstein <jonas.rabenstein@studium.uni-erlangen.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 36842bfa572e..38411c5c477f 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -554,15 +554,14 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
 
 	size_t len;
 	int msb;
-	u8 n;
 
 	if (!(number & ~TINY_ATOM_DATA_MASK)) {
 		add_token_u8(err, cmd, number);
 		return;
 	}
 
-	msb = fls(number);
-	len = DIV_ROUND_UP(msb, 4);
+	msb = fls64(number);
+	len = DIV_ROUND_UP(msb, 8);
 
 	if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
 		pr_debug("Error adding u64: end of buffer.\n");
@@ -570,10 +569,8 @@ static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
 		return;
 	}
 	add_short_atom_header(cmd, false, false, len);
-	while (len--) {
-		n = number >> (len * 8);
-		add_token_u8(err, cmd, n);
-	}
+	while (len--)
+		add_token_u8(err, cmd, number >> (len * 8));
 }
 
 static void add_token_bytestring(int *err, struct opal_dev *cmd,
-- 
cgit 1.4.1


From 4c6994806f708559c2812b73501406e21ae5dcd0 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Fri, 16 Mar 2018 14:51:27 +0800
Subject: blk-throttle: fix race between blkcg_bio_issue_check() and
 cgroup_rmdir()

We've triggered a WARNING in blk_throtl_bio() when throttling writeback
io, which complains blkg->refcnt is already 0 when calling blkg_get(),
and then kernel crashes with invalid page request.
After investigating this issue, we've found it is caused by a race
between blkcg_bio_issue_check() and cgroup_rmdir(), which is described
below:

writeback kworker               cgroup_rmdir
                                  cgroup_destroy_locked
                                    kill_css
                                      css_killed_ref_fn
                                        css_killed_work_fn
                                          offline_css
                                            blkcg_css_offline
  blkcg_bio_issue_check
    rcu_read_lock
    blkg_lookup
                                              spin_trylock(q->queue_lock)
                                              blkg_destroy
                                              spin_unlock(q->queue_lock)
    blk_throtl_bio
    spin_lock_irq(q->queue_lock)
    ...
    spin_unlock_irq(q->queue_lock)
  rcu_read_unlock

Since rcu can only prevent blkg from releasing when it is being used,
the blkg->refcnt can be decreased to 0 during blkg_destroy() and schedule
blkg release.
Then trying to blkg_get() in blk_throtl_bio() will complains the WARNING.
And then the corresponding blkg_put() will schedule blkg release again,
which result in double free.
This race is introduced by commit ae1188963611 ("blkcg: consolidate blkg
creation in blkcg_bio_issue_check()"). Before this commit, it will
lookup first and then try to lookup/create again with queue_lock. Since
revive this logic is a bit drastic, so fix it by only offlining pd during
blkcg_css_offline(), and move the rest destruction (especially
blkg_put()) into blkcg_css_free(), which should be the right way as
discussed.

Fixes: ae1188963611 ("blkcg: consolidate blkg creation in blkcg_bio_issue_check()")
Reported-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 78 ++++++++++++++++++++++++++++++++++++----------
 include/linux/blk-cgroup.h |  1 +
 2 files changed, 63 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c2033a232a44..1c16694ae145 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -307,11 +307,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	}
 }
 
+static void blkg_pd_offline(struct blkcg_gq *blkg)
+{
+	int i;
+
+	lockdep_assert_held(blkg->q->queue_lock);
+	lockdep_assert_held(&blkg->blkcg->lock);
+
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && !blkg->pd[i]->offline &&
+		    pol->pd_offline_fn) {
+			pol->pd_offline_fn(blkg->pd[i]);
+			blkg->pd[i]->offline = true;
+		}
+	}
+}
+
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
 	struct blkcg_gq *parent = blkg->parent;
-	int i;
 
 	lockdep_assert_held(blkg->q->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
@@ -320,13 +337,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 
-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkcg_policy *pol = blkcg_policy[i];
-
-		if (blkg->pd[i] && pol->pd_offline_fn)
-			pol->pd_offline_fn(blkg->pd[i]);
-	}
-
 	if (parent) {
 		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
 		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -369,6 +379,7 @@ static void blkg_destroy_all(struct request_queue *q)
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
+		blkg_pd_offline(blkg);
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
 	}
@@ -995,25 +1006,25 @@ static struct cftype blkcg_legacy_files[] = {
  * @css: css of interest
  *
  * This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css.  blkgs should be
- * removed while holding both q and blkcg locks.  As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
+ * for offlining all blkgs pd and killing all wbs associated with @css.
+ * blkgs pd offline should be done while holding both q and blkcg locks.
+ * As blkcg lock is nested inside q lock, this function performs reverse
+ * double lock dancing.
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg_gq *blkg;
 
 	spin_lock_irq(&blkcg->lock);
 
-	while (!hlist_empty(&blkcg->blkg_list)) {
-		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
-						struct blkcg_gq, blkcg_node);
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
 		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
-			blkg_destroy(blkg);
+			blkg_pd_offline(blkg);
 			spin_unlock(q->queue_lock);
 		} else {
 			spin_unlock_irq(&blkcg->lock);
@@ -1027,11 +1038,43 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
 	wb_blkcg_offline(blkcg);
 }
 
+/**
+ * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * This function is called when blkcg css is about to free and responsible for
+ * destroying all blkgs associated with @blkcg.
+ * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ */
+static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
+{
+	spin_lock_irq(&blkcg->lock);
+	while (!hlist_empty(&blkcg->blkg_list)) {
+		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
+						    struct blkcg_gq,
+						    blkcg_node);
+		struct request_queue *q = blkg->q;
+
+		if (spin_trylock(q->queue_lock)) {
+			blkg_destroy(blkg);
+			spin_unlock(q->queue_lock);
+		} else {
+			spin_unlock_irq(&blkcg->lock);
+			cpu_relax();
+			spin_lock_irq(&blkcg->lock);
+		}
+	}
+	spin_unlock_irq(&blkcg->lock);
+}
+
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 	int i;
 
+	blkcg_destroy_all_blkgs(blkcg);
+
 	mutex_lock(&blkcg_pol_mutex);
 
 	list_del(&blkcg->all_blkcgs_node);
@@ -1371,8 +1414,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
 		spin_lock(&blkg->blkcg->lock);
 
 		if (blkg->pd[pol->plid]) {
-			if (pol->pd_offline_fn)
+			if (!blkg->pd[pol->plid]->offline &&
+			    pol->pd_offline_fn) {
 				pol->pd_offline_fn(blkg->pd[pol->plid]);
+				blkg->pd[pol->plid]->offline = true;
+			}
 			pol->pd_free_fn(blkg->pd[pol->plid]);
 			blkg->pd[pol->plid] = NULL;
 		}
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 69bea82ebeb1..6c666fd7de3c 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -88,6 +88,7 @@ struct blkg_policy_data {
 	/* the blkg and policy id this per-policy data belongs to */
 	struct blkcg_gq			*blkg;
 	int				plid;
+	bool				offline;
 };
 
 /*
-- 
cgit 1.4.1


From ec6dcf63c55c8b09b44fe5990082b07baf139c69 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 16 Mar 2018 10:31:11 -0700
Subject: blk-mq-debugfs: Show more request state information

Since commit 634f9e4631a8 ("blk-mq: remove REQ_ATOM_COMPLETE usages
from blk-mq") blk_rq_is_complete() only reports whether or not a
request has completed for legacy queues. Hence modify the
blk-mq-debugfs code such that it shows the blk-mq request state
again.

Fixes: 634f9e4631a8 ("blk-mq: remove REQ_ATOM_COMPLETE usages from blk-mq")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bd21d5b9f65f..58b3b79cbe83 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -350,6 +350,20 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+static const char *const blk_mq_rq_state_name_array[] = {
+	[MQ_RQ_IDLE]		= "idle",
+	[MQ_RQ_IN_FLIGHT]	= "in_flight",
+	[MQ_RQ_COMPLETE]	= "complete",
+};
+
+static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
+{
+	if (WARN_ON_ONCE((unsigned int)rq_state >
+			 ARRAY_SIZE(blk_mq_rq_state_name_array)))
+		return "(?)";
+	return blk_mq_rq_state_name_array[rq_state];
+}
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -366,7 +380,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 	seq_puts(m, ", .rq_flags=");
 	blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
 		       ARRAY_SIZE(rqf_name));
-	seq_printf(m, ", complete=%d", blk_rq_is_complete(rq));
+	seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq)));
 	seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
 		   rq->internal_tag);
 	if (mq_ops->show_rq)
-- 
cgit 1.4.1


From 52c5e62d4c4beecddc6e1b8045ce1d695fca1ba7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Mar 2018 16:56:53 +0100
Subject: block: bio_check_eod() needs to consider partitions

bio_check_eod() should check partition size not the whole disk if
bio->bi_partno is non-zero.  Do this by moving the call
to bio_check_eod() into blk_partition_remap().

Based on an earlier patch from Jiufei Xue.

Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index")
Reported-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 93 ++++++++++++++++++++++++--------------------------------
 1 file changed, 40 insertions(+), 53 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 74c6283f4509..5e88c579e896 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2122,7 +2122,7 @@ out_unlock:
 	return BLK_QC_T_NONE;
 }
 
-static void handle_bad_sector(struct bio *bio)
+static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
 	char b[BDEVNAME_SIZE];
 
@@ -2130,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
 	printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
 			bio_devname(bio, b), bio->bi_opf,
 			(unsigned long long)bio_end_sector(bio),
-			(long long)get_capacity(bio->bi_disk));
+			(long long)maxsector);
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2191,68 +2191,59 @@ static noinline int should_fail_bio(struct bio *bio)
 }
 ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
 
+/*
+ * Check whether this bio extends beyond the end of the device or partition.
+ * This may well happen - the kernel calls bread() without checking the size of
+ * the device, e.g., when mounting a file system.
+ */
+static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
+{
+	unsigned int nr_sectors = bio_sectors(bio);
+
+	if (nr_sectors && maxsector &&
+	    (nr_sectors > maxsector ||
+	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
+		handle_bad_sector(bio, maxsector);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * Remap block n of partition p to block n+start(p) of the disk.
  */
 static inline int blk_partition_remap(struct bio *bio)
 {
 	struct hd_struct *p;
-	int ret = 0;
+	int ret = -EIO;
 
 	rcu_read_lock();
 	p = __disk_get_part(bio->bi_disk, bio->bi_partno);
-	if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
-		     bio_check_ro(bio, p))) {
-		ret = -EIO;
+	if (unlikely(!p))
+		goto out;
+	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
+		goto out;
+	if (unlikely(bio_check_ro(bio, p)))
 		goto out;
-	}
 
 	/*
 	 * Zone reset does not include bi_size so bio_sectors() is always 0.
 	 * Include a test for the reset op code and perform the remap if needed.
 	 */
-	if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
-		goto out;
-
-	bio->bi_iter.bi_sector += p->start_sect;
-	bio->bi_partno = 0;
-	trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
-			      bio->bi_iter.bi_sector - p->start_sect);
-
+	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+		if (bio_check_eod(bio, part_nr_sects_read(p)))
+			goto out;
+		bio->bi_iter.bi_sector += p->start_sect;
+		bio->bi_partno = 0;
+		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+				      bio->bi_iter.bi_sector - p->start_sect);
+	}
+	ret = 0;
 out:
 	rcu_read_unlock();
 	return ret;
 }
 
-/*
- * Check whether this bio extends beyond the end of the device.
- */
-static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
-{
-	sector_t maxsector;
-
-	if (!nr_sectors)
-		return 0;
-
-	/* Test device or partition size, when known. */
-	maxsector = get_capacity(bio->bi_disk);
-	if (maxsector) {
-		sector_t sector = bio->bi_iter.bi_sector;
-
-		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
-			/*
-			 * This may well happen - the kernel calls bread()
-			 * without checking the size of the device, e.g., when
-			 * mounting a device.
-			 */
-			handle_bad_sector(bio);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
@@ -2263,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
 
 	might_sleep();
 
-	if (bio_check_eod(bio, nr_sectors))
-		goto end_io;
-
 	q = bio->bi_disk->queue;
 	if (unlikely(!q)) {
 		printk(KERN_ERR
@@ -2285,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
 	if (should_fail_bio(bio))
 		goto end_io;
 
-	if (!bio->bi_partno) {
-		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+	if (bio->bi_partno) {
+		if (unlikely(blk_partition_remap(bio)))
 			goto end_io;
 	} else {
-		if (blk_partition_remap(bio))
+		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+			goto end_io;
+		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
 			goto end_io;
 	}
 
-	if (bio_check_eod(bio, nr_sectors))
-		goto end_io;
-
 	/*
 	 * Filter flush bio's early so that make_request based
 	 * drivers without flush support don't have to worry
-- 
cgit 1.4.1


From 818e0fa293ca836eba515615c64680ea916fd7cd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 19 Mar 2018 11:46:13 -0700
Subject: block: Change a rcu_read_{lock,unlock}_sched() pair into
 rcu_read_{lock,unlock}()

scsi_device_quiesce() uses synchronize_rcu() to guarantee that the
effect of blk_set_preempt_only() will be visible for percpu_ref_tryget()
calls that occur after the queue unfreeze by using the approach
explained in https://lwn.net/Articles/573497/. The rcu read lock and
unlock calls in blk_queue_enter() form a pair with the synchronize_rcu()
call in scsi_device_quiesce(). Both scsi_device_quiesce() and
blk_queue_enter() must either use regular RCU or RCU-sched.
Since neither the RCU-protected code in blk_queue_enter() nor
blk_queue_usage_counter_release() sleeps, regular RCU protection
is sufficient. Note: scsi_device_quiesce() does not have to be
modified since it already uses synchronize_rcu().

Reported-by: Tejun Heo <tj@kernel.org>
Fixes: 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Oleksandr Natalenko <oleksandr@natalenko.name>
Cc: Martin Steigerwald <martin@lichtvoll.de>
Cc: stable@vger.kernel.org # v4.15
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 5e88c579e896..a0f675f84f86 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -917,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 		bool success = false;
 		int ret;
 
-		rcu_read_lock_sched();
+		rcu_read_lock();
 		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
 			/*
 			 * The code that sets the PREEMPT_ONLY flag is
@@ -930,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 				percpu_ref_put(&q->q_usage_counter);
 			}
 		}
-		rcu_read_unlock_sched();
+		rcu_read_unlock();
 
 		if (success)
 			return 0;
-- 
cgit 1.4.1


From bd5c4facf59648581d2f1692dad7b107bf429954 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 21 Mar 2018 12:49:29 -0400
Subject: Fix slab name "biovec-(1<<(21-12))"

I'm getting a slab named "biovec-(1<<(21-12))". It is caused by unintended
expansion of the macro BIO_MAX_PAGES. This patch renames it to biovec-max.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v4.14+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/bio.c b/block/bio.c
index e1708db48258..53e0f0a1ed94 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,9 +43,9 @@
  * break badly! cannot be bigger than what you can fit into an
  * unsigned short
  */
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
+#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
 static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
-	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+	BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
 };
 #undef BV
 
-- 
cgit 1.4.1


From bc56e2cafa3f80954a278d74bd18349ac3cb8fa5 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Mon, 26 Mar 2018 16:06:24 +0200
Subject: block, bfq: lower-bound the estimated peak rate to 1

If a storage device handled by BFQ happens to be slower than 7.5 KB/s
for a certain amount of time (in the order of a second), then the
estimated peak rate of the device, maintained in BFQ, becomes equal to
0. The reason is the limited precision with which the rate is
represented (details on the range of representable values in the
comments introduced by this commit). This leads to a division-by-zero
error where the estimated peak rate is used as divisor. Such a type of
failure has been reported in [1].

This commit addresses this issue by:
1. Lower-bounding the estimated peak rate to 1
2. Adding and improving comments on the range of rates representable

[1] https://www.spinics.net/lists/kernel/msg2739205.html

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 25 ++++++++++++++++++++++++-
 block/bfq-iosched.h |  2 +-
 2 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'block')

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index aeca22d91101..f0ecd98509d8 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -201,7 +201,20 @@ static struct kmem_cache *bfq_pool;
 /* Target observation time interval for a peak-rate update (ns) */
 #define BFQ_RATE_REF_INTERVAL	NSEC_PER_SEC
 
-/* Shift used for peak rate fixed precision calculations. */
+/*
+ * Shift used for peak-rate fixed precision calculations.
+ * With
+ * - the current shift: 16 positions
+ * - the current type used to store rate: u32
+ * - the current unit of measure for rate: [sectors/usec], or, more precisely,
+ *   [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
+ * the range of rates that can be stored is
+ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
+ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
+ * [15, 65G] sectors/sec
+ * Which, assuming a sector size of 512B, corresponds to a range of
+ * [7.5K, 33T] B/sec
+ */
 #define BFQ_RATE_SHIFT		16
 
 /*
@@ -2637,6 +2650,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
 	rate /= divisor; /* smoothing constant alpha = 1/divisor */
 
 	bfqd->peak_rate += rate;
+
+	/*
+	 * For a very slow device, bfqd->peak_rate can reach 0 (see
+	 * the minimum representable values reported in the comments
+	 * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
+	 * divisions by zero where bfqd->peak_rate is used as a
+	 * divisor.
+	 */
+	bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
+
 	update_thr_responsiveness_params(bfqd);
 
 reset_computation:
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 350c39ae2896..ae2f3dadec44 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -499,7 +499,7 @@ struct bfq_data {
 	u64 delta_from_first;
 	/*
 	 * Current estimate of the device peak rate, measured in
-	 * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
+	 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
 	 * BFQ_RATE_SHIFT is performed to increase precision in
 	 * fixed-point calculations.
 	 */
-- 
cgit 1.4.1


From f23f5bece686a76598335141a091934f7eb0998c Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 27 Mar 2018 09:39:06 -0600
Subject: blk-mq: Allow PCI vector offset for mapping queues

The PCI interrupt vectors intended to be associated with a queue may
not start at 0; a driver may allocate pre_vectors for special use. This
patch adds an offset parameter so blk-mq may find the intended affinity
mask and updates all drivers using this API accordingly.

Cc: Don Brace <don.brace@microsemi.com>
Cc: <qla2xxx-upstream@qlogic.com>
Cc: <linux-scsi@vger.kernel.org>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-pci.c                    | 6 ++++--
 drivers/nvme/host/pci.c               | 2 +-
 drivers/scsi/qla2xxx/qla_os.c         | 2 +-
 drivers/scsi/smartpqi/smartpqi_init.c | 2 +-
 include/linux/blk-mq-pci.h            | 3 ++-
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'block')

diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 76944e3271bf..e233996bb76f 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -21,6 +21,7 @@
  * blk_mq_pci_map_queues - provide a default queue mapping for PCI device
  * @set:	tagset to provide the mapping for
  * @pdev:	PCI device associated with @set.
+ * @offset:	Offset to use for the pci irq vector
  *
  * This function assumes the PCI device @pdev has at least as many available
  * interrupt vectors as @set has queues.  It will then query the vector
@@ -28,13 +29,14 @@
  * that maps a queue to the CPUs that have irq affinity for the corresponding
  * vector.
  */
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
+int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+			    int offset)
 {
 	const struct cpumask *mask;
 	unsigned int queue, cpu;
 
 	for (queue = 0; queue < set->nr_hw_queues; queue++) {
-		mask = pci_irq_get_affinity(pdev, queue);
+		mask = pci_irq_get_affinity(pdev, queue + offset);
 		if (!mask)
 			goto fallback;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cef5ce851a92..e3b9efca0571 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
 {
 	struct nvme_dev *dev = set->driver_data;
 
-	return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev));
+	return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0);
 }
 
 /**
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 12ee6e02d146..2c705f3dd265 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -6805,7 +6805,7 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost)
 	if (USER_CTRL_IRQ(vha->hw))
 		rc = blk_mq_map_queues(&shost->tag_set);
 	else
-		rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev);
+		rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0);
 	return rc;
 }
 
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index b2880c7709e6..10c94011c8a8 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -5348,7 +5348,7 @@ static int pqi_map_queues(struct Scsi_Host *shost)
 {
 	struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost);
 
-	return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev);
+	return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0);
 }
 
 static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info,
diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h
index 6338551e0fb9..9f4c17f0d2d8 100644
--- a/include/linux/blk-mq-pci.h
+++ b/include/linux/blk-mq-pci.h
@@ -5,6 +5,7 @@
 struct blk_mq_tag_set;
 struct pci_dev;
 
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev);
+int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+			  int offset);
 
 #endif /* _LINUX_BLK_MQ_PCI_H */
-- 
cgit 1.4.1


From bc6d65e6dc89c3b7ff78e4ad797117c122ffde8e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 Apr 2018 15:04:58 -0700
Subject: blk-mq: Directly schedule q->timeout_work when aborting a request

Request abortion is performed by overriding deadline to now and
scheduling timeout handling immediately.  For the latter part, the
code was using mod_timer(timeout, 0) which can't guarantee that the
timer runs afterwards.  Let's schedule the underlying work item
directly instead.

This fixes the hangs during probing reported by Sitsofe but it isn't
yet clear to me how the failure can happen reliably if it's just the
above described race condition.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Sitsofe Wheeler <sitsofe@gmail.com>
Reported-by: Meelis Roos <mroos@linux.ee>
Fixes: 358f70da49d7 ("blk-mq: make blk_abort_request() trigger timeout path")
Cc: stable@vger.kernel.org # v4.16
Link: http://lkml.kernel.org/r/CALjAwxh-PVYFnYFCJpGOja+m5SzZ8Sa4J7ohxdK=r8NyOF-EMA@mail.gmail.com
Link: http://lkml.kernel.org/r/alpine.LRH.2.21.1802261049140.4893@math.ut.ee
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-timeout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 34a55250f08a..652d4d4d3e97 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -163,7 +163,7 @@ void blk_abort_request(struct request *req)
 		 * No need for fancy synchronizations.
 		 */
 		blk_rq_set_deadline(req, jiffies);
-		mod_timer(&req->q->timeout, 0);
+		kblockd_schedule_work(&req->q->timeout_work);
 	} else {
 		if (blk_mark_rq_complete(req))
 			return;
-- 
cgit 1.4.1