summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
commit694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /lib
parenta351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
downloadlinux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.gz
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:

 - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ
   was initially a fork of CFQ, but subsequently changed to implement
   fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant
   to be used on desktop type single drives, providing good fairness.
   From Paolo.

 - Add Kyber IO scheduler. This is a full multiqueue aware scheduler,
   using a scalable token based algorithm that throttles IO based on
   live completion IO stats, similary to blk-wbt. From Omar.

 - A series from Jan, moving users to separately allocated backing
   devices. This continues the work of separating backing device life
   times, solving various problems with hot removal.

 - A series of updates for lightnvm, mostly from Javier. Includes a
   'pblk' target that exposes an open channel SSD as a physical block
   device.

 - A series of fixes and improvements for nbd from Josef.

 - A series from Omar, removing queue sharing between devices on mostly
   legacy drivers. This helps us clean up other bits, if we know that a
   queue only has a single device backing. This has been overdue for
   more than a decade.

 - Fixes for the blk-stats, and improvements to unify the stats and user
   windows. This both improves blk-wbt, and enables other users to
   register a need to receive IO stats for a device. From Omar.

 - blk-throttle improvements from Shaohua. This provides a scalable
   framework for implementing scalable priotization - particularly for
   blk-mq, but applicable to any type of block device. The interface is
   marked experimental for now.

 - Bucketized IO stats for IO polling from Stephen Bates. This improves
   efficiency of polled workloads in the presence of mixed block size
   IO.

 - A few fixes for opal, from Scott.

 - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics.
   From a variety of folks, mostly Sagi and James Smart.

 - A series from Bart, improving our exposed info and capabilities from
   the blk-mq debugfs support.

 - A series from Christoph, cleaning up how handle WRITE_ZEROES.

 - A series from Christoph, cleaning up the block layer handling of how
   we track errors in a request. On top of being a nice cleanup, it also
   shrinks the size of struct request a bit.

 - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was
   never used by platforms, and the latter has outlived it's usefulness.

 - Various little bug fixes and cleanups from a wide variety of folks.

* 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits)
  block: hide badblocks attribute by default
  blk-mq: unify hctx delay_work and run_work
  block: add kblock_mod_delayed_work_on()
  blk-mq: unify hctx delayed_run_work and run_work
  nbd: fix use after free on module unload
  MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler
  blk-mq-sched: alloate reserved tags out of normal pool
  mtip32xx: use runtime tag to initialize command header
  scsi: Implement blk_mq_ops.show_rq()
  blk-mq: Add blk_mq_ops.show_rq()
  blk-mq: Show operation, cmd_flags and rq_flags names
  blk-mq: Make blk_flags_show() callers append a newline character
  blk-mq: Move the "state" debugfs attribute one level down
  blk-mq: Unregister debugfs attributes earlier
  blk-mq: Only unregister hctxs for which registration succeeded
  blk-mq-debugfs: Rename functions for registering and unregistering the mq directory
  blk-mq: Let blk_mq_debugfs_register() look up the queue name
  blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
  ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset
  ide-pm: always pass 0 error to __blk_end_request_all
  ..
Diffstat (limited to 'lib')
-rw-r--r--lib/kobject.c5
-rw-r--r--lib/sbitmap.c75
2 files changed, 72 insertions, 8 deletions
diff --git a/lib/kobject.c b/lib/kobject.c
index 445dcaeb0f56..763d70a18941 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -601,12 +601,15 @@ struct kobject *kobject_get(struct kobject *kobj)
 }
 EXPORT_SYMBOL(kobject_get);
 
-static struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj)
+struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj)
 {
+	if (!kobj)
+		return NULL;
 	if (!kref_get_unless_zero(&kobj->kref))
 		kobj = NULL;
 	return kobj;
 }
+EXPORT_SYMBOL(kobject_get_unless_zero);
 
 /*
  * kobject_cleanup - free kobject resources.
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 60e800e0b5a0..80aa8d5463fa 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -79,15 +79,15 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
 }
 EXPORT_SYMBOL_GPL(sbitmap_resize);
 
-static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
-			      bool wrap)
+static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
+			      unsigned int hint, bool wrap)
 {
 	unsigned int orig_hint = hint;
 	int nr;
 
 	while (1) {
-		nr = find_next_zero_bit(&word->word, word->depth, hint);
-		if (unlikely(nr >= word->depth)) {
+		nr = find_next_zero_bit(word, depth, hint);
+		if (unlikely(nr >= depth)) {
 			/*
 			 * We started with an offset, and we didn't reset the
 			 * offset to 0 in a failure case, so start from 0 to
@@ -100,11 +100,11 @@ static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
 			return -1;
 		}
 
-		if (!test_and_set_bit(nr, &word->word))
+		if (!test_and_set_bit(nr, word))
 			break;
 
 		hint = nr + 1;
-		if (hint >= word->depth - 1)
+		if (hint >= depth - 1)
 			hint = 0;
 	}
 
@@ -119,7 +119,8 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
 	index = SB_NR_TO_INDEX(sb, alloc_hint);
 
 	for (i = 0; i < sb->map_nr; i++) {
-		nr = __sbitmap_get_word(&sb->map[index],
+		nr = __sbitmap_get_word(&sb->map[index].word,
+					sb->map[index].depth,
 					SB_NR_TO_BIT(sb, alloc_hint),
 					!round_robin);
 		if (nr != -1) {
@@ -141,6 +142,37 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
 }
 EXPORT_SYMBOL_GPL(sbitmap_get);
 
+int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
+			unsigned long shallow_depth)
+{
+	unsigned int i, index;
+	int nr = -1;
+
+	index = SB_NR_TO_INDEX(sb, alloc_hint);
+
+	for (i = 0; i < sb->map_nr; i++) {
+		nr = __sbitmap_get_word(&sb->map[index].word,
+					min(sb->map[index].depth, shallow_depth),
+					SB_NR_TO_BIT(sb, alloc_hint), true);
+		if (nr != -1) {
+			nr += index << sb->shift;
+			break;
+		}
+
+		/* Jump to next index. */
+		index++;
+		alloc_hint = index << sb->shift;
+
+		if (index >= sb->map_nr) {
+			index = 0;
+			alloc_hint = 0;
+		}
+	}
+
+	return nr;
+}
+EXPORT_SYMBOL_GPL(sbitmap_get_shallow);
+
 bool sbitmap_any_bit_set(const struct sbitmap *sb)
 {
 	unsigned int i;
@@ -342,6 +374,35 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq)
 }
 EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
 
+int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
+				unsigned int shallow_depth)
+{
+	unsigned int hint, depth;
+	int nr;
+
+	hint = this_cpu_read(*sbq->alloc_hint);
+	depth = READ_ONCE(sbq->sb.depth);
+	if (unlikely(hint >= depth)) {
+		hint = depth ? prandom_u32() % depth : 0;
+		this_cpu_write(*sbq->alloc_hint, hint);
+	}
+	nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth);
+
+	if (nr == -1) {
+		/* If the map is full, a hint won't do us much good. */
+		this_cpu_write(*sbq->alloc_hint, 0);
+	} else if (nr == hint || unlikely(sbq->round_robin)) {
+		/* Only update the hint if we used it. */
+		hint = nr + 1;
+		if (hint >= depth - 1)
+			hint = 0;
+		this_cpu_write(*sbq->alloc_hint, hint);
+	}
+
+	return nr;
+}
+EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
+
 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 {
 	int i, wake_index;