summary refs log tree commit diff
path: root/drivers/scsi
diff options
context:
space:
mode:
authorBart Van Assche <bart.vanassche@wdc.com>2017-11-09 10:49:58 -0800
committerJens Axboe <axboe@kernel.dk>2017-11-10 19:53:25 -0700
commit3a0a529971ec4e2d933e9c7798db101dfb6b1aec (patch)
treee79cac20198e657afc109f6f80111b0fb03f9dbc /drivers/scsi
parentc9254f2ddb19387ea9714a57ea48463c20333b92 (diff)
downloadlinux-3a0a529971ec4e2d933e9c7798db101dfb6b1aec.tar.gz
block, scsi: Make SCSI quiesce and resume work reliably
The contexts from which a SCSI device can be quiesced or resumed are:
* Writing into /sys/class/scsi_device/*/device/state.
* SCSI parallel (SPI) domain validation.
* The SCSI device power management methods. See also scsi_bus_pm_ops.

It is essential during suspend and resume that neither the filesystem
state nor the filesystem metadata in RAM changes. This is why while
the hibernation image is being written or restored that SCSI devices
are quiesced. The SCSI core quiesces devices through scsi_device_quiesce()
and scsi_device_resume(). In the SDEV_QUIESCE state execution of
non-preempt requests is deferred. This is realized by returning
BLKPREP_DEFER from inside scsi_prep_state_check() for quiesced SCSI
devices. Avoid that a full queue prevents power management requests
to be submitted by deferring allocation of non-preempt requests for
devices in the quiesced state. This patch has been tested by running
the following commands and by verifying that after each resume the
fio job was still running:

for ((i=0; i<10; i++)); do
  (
    cd /sys/block/md0/md &&
    while true; do
      [ "$(<sync_action)" = "idle" ] && echo check > sync_action
      sleep 1
    done
  ) &
  pids=($!)
  for d in /sys/class/block/sd*[a-z]; do
    bdev=${d#/sys/class/block/}
    hcil=$(readlink "$d/device")
    hcil=${hcil#../../../}
    echo 4 > "$d/queue/nr_requests"
    echo 1 > "/sys/class/scsi_device/$hcil/device/queue_depth"
    fio --name="$bdev" --filename="/dev/$bdev" --buffered=0 --bs=512 \
      --rw=randread --ioengine=libaio --numjobs=4 --iodepth=16       \
      --iodepth_batch=1 --thread --loops=$((2**31)) &
    pids+=($!)
  done
  sleep 1
  echo "$(date) Hibernating ..." >>hibernate-test-log.txt
  systemctl hibernate
  sleep 10
  kill "${pids[@]}"
  echo idle > /sys/block/md0/md/sync_action
  wait
  echo "$(date) Done." >>hibernate-test-log.txt
done

Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
References: "I/O hangs after resuming from suspend-to-ram" (https://marc.info/?l=linux-block&m=150340235201348).
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Tested-by: Martin Steigerwald <martin@lichtvoll.de>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/scsi_lib.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eb129dfc2ebe..f907e2f8c1dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2947,21 +2947,37 @@ static void scsi_wait_for_queuecommand(struct scsi_device *sdev)
 int
 scsi_device_quiesce(struct scsi_device *sdev)
 {
+	struct request_queue *q = sdev->request_queue;
 	int err;
 
+	/*
+	 * It is allowed to call scsi_device_quiesce() multiple times from
+	 * the same context but concurrent scsi_device_quiesce() calls are
+	 * not allowed.
+	 */
+	WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
+
+	blk_set_preempt_only(q);
+
+	blk_mq_freeze_queue(q);
+	/*
+	 * Ensure that the effect of blk_set_preempt_only() will be visible
+	 * for percpu_ref_tryget() callers that occur after the queue
+	 * unfreeze even if the queue was already frozen before this function
+	 * was called. See also https://lwn.net/Articles/573497/.
+	 */
+	synchronize_rcu();
+	blk_mq_unfreeze_queue(q);
+
 	mutex_lock(&sdev->state_mutex);
 	err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+	if (err == 0)
+		sdev->quiesced_by = current;
+	else
+		blk_clear_preempt_only(q);
 	mutex_unlock(&sdev->state_mutex);
 
-	if (err)
-		return err;
-
-	scsi_run_queue(sdev->request_queue);
-	while (atomic_read(&sdev->device_busy)) {
-		msleep_interruptible(200);
-		scsi_run_queue(sdev->request_queue);
-	}
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL(scsi_device_quiesce);
 
@@ -2981,9 +2997,11 @@ void scsi_device_resume(struct scsi_device *sdev)
 	 * device deleted during suspend)
 	 */
 	mutex_lock(&sdev->state_mutex);
-	if (sdev->sdev_state == SDEV_QUIESCE &&
-	    scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
-		scsi_run_queue(sdev->request_queue);
+	WARN_ON_ONCE(!sdev->quiesced_by);
+	sdev->quiesced_by = NULL;
+	blk_clear_preempt_only(sdev->request_queue);
+	if (sdev->sdev_state == SDEV_QUIESCE)
+		scsi_device_set_state(sdev, SDEV_RUNNING);
 	mutex_unlock(&sdev->state_mutex);
 }
 EXPORT_SYMBOL(scsi_device_resume);