summary refs log tree commit diff
path: root/fs/block_dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 11:18:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 11:18:01 -0700
commit1f40c49570eb01436786a9b5845c4469a9a1f362 (patch)
treef0a31705d5c0a65604784d9b01841c453055d62f /fs/block_dev.c
parent7639dad93a5564579987abded4ec05e3db13659d (diff)
parent36092ee8ba695fce023b2118ececa6c2a56b1331 (diff)
downloadlinux-1f40c49570eb01436786a9b5845c4469a9a1f362.tar.gz
Merge tag 'libnvdimm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
 "The bulk of this update was stabilized before the merge window and
  appeared in -next.  The "device dax" implementation was revised this
  week in response to review feedback, and to address failures detected
  by the recently expanded ndctl unit test suite.

  Not included in this pull request are two dax topic branches (dax
  error handling, and dax radix-tree locking).  These topics were
  deferred to get a few more days of -next integration testing, and to
  coordinate a branch baseline with Ted and the ext4 tree.  Vishal and
  Ross will send the error handling and locking topics respectively in
  the next few days.

  This branch has received a positive build result from the kbuild robot
  across 226 configs.

  Summary:

   - Device DAX for persistent memory: Device DAX is the device-centric
     analogue of Filesystem DAX (CONFIG_FS_DAX).  It allows memory
     ranges to be allocated and mapped without need of an intervening
     file system.  Device DAX is strict, precise and predictable.
     Specifically this interface:

      a) Guarantees fault granularity with respect to a given page size
         (pte, pmd, or pud) set at configuration time.

      b) Enforces deterministic behavior by being strict about what
         fault scenarios are supported.

     Persistent memory is the first target, but the mechanism is also
     targeted for exclusive allocations of performance/feature
     differentiated memory ranges.

   - Support for the HPE DSM (device specific method) command formats.
     This enables management of these first generation devices until a
     unified DSM specification materializes.

   - Further ACPI 6.1 compliance with support for the common dimm
     identifier format.

   - Various fixes and cleanups across the subsystem"

* tag 'libnvdimm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (40 commits)
  libnvdimm, dax: fix deletion
  libnvdimm, dax: fix alignment validation
  libnvdimm, dax: autodetect support
  libnvdimm: release ida resources
  Revert "block: enable dax for raw block devices"
  /dev/dax, core: file operations and dax-mmap
  /dev/dax, pmem: direct access to persistent memory
  libnvdimm: stop requiring a driver ->remove() method
  libnvdimm, dax: record the specified alignment of a dax-device instance
  libnvdimm, dax: reserve space to store labels for device-dax
  libnvdimm, dax: introduce device-dax infrastructure
  nfit: add sysfs dimm 'family' and 'dsm_mask' attributes
  tools/testing/nvdimm: ND_CMD_CALL support
  nfit: disable vendor specific commands
  nfit: export subsystem ids as attributes
  nfit: fix format interface code byte order per ACPI6.1
  nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism
  nfit, libnvdimm: clarify "commands" vs "_DSMs"
  libnvdimm: increase max envelope size for ioctl
  acpi/nfit: Add sysfs "id" for NVDIMM ID
  ...
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c96
1 files changed, 29 insertions, 67 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a063d4d8ac39..1089dbf25925 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,7 @@
 #include <linux/log2.h>
 #include <linux/cleancache.h>
 #include <linux/dax.h>
+#include <linux/badblocks.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
+static bool blkdev_dax_capable(struct block_device *bdev)
+{
+	struct gendisk *disk = bdev->bd_disk;
+
+	if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
+		return false;
+
+	/*
+	 * If the partition is not aligned on a page boundary, we can't
+	 * do dax I/O to it.
+	 */
+	if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
+			|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+		return false;
+
+	/*
+	 * If the device has known bad blocks, force all I/O through the
+	 * driver / page cache.
+	 *
+	 * TODO: support finer grained dax error handling
+	 */
+	if (disk->bb && disk->bb->count)
+		return false;
+
+	return true;
+}
+
 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
 
 /*
@@ -1720,79 +1748,13 @@ static const struct address_space_operations def_blk_aops = {
 	.is_dirty_writeback = buffer_check_dirty_writeback,
 };
 
-#ifdef CONFIG_FS_DAX
-/*
- * In the raw block case we do not need to contend with truncation nor
- * unwritten file extents.  Without those concerns there is no need for
- * additional locking beyond the mmap_sem context that these routines
- * are already executing under.
- *
- * Note, there is no protection if the block device is dynamically
- * resized (partition grow/shrink) during a fault. A stable block device
- * size is already not enforced in the blkdev_direct_IO path.
- *
- * For DAX, it is the responsibility of the block device driver to
- * ensure the whole-disk device size is stable while requests are in
- * flight.
- *
- * Finally, unlike the filemap_page_mkwrite() case there is no
- * filesystem superblock to sync against freezing.  We still include a
- * pfn_mkwrite callback for dax drivers to receive write fault
- * notifications.
- */
-static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	return __dax_fault(vma, vmf, blkdev_get_block, NULL);
-}
-
-static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
-		struct vm_fault *vmf)
-{
-	return dax_pfn_mkwrite(vma, vmf);
-}
-
-static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
-		pmd_t *pmd, unsigned int flags)
-{
-	return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
-}
-
-static const struct vm_operations_struct blkdev_dax_vm_ops = {
-	.fault		= blkdev_dax_fault,
-	.pmd_fault	= blkdev_dax_pmd_fault,
-	.pfn_mkwrite	= blkdev_dax_pfn_mkwrite,
-};
-
-static const struct vm_operations_struct blkdev_default_vm_ops = {
-	.fault		= filemap_fault,
-	.map_pages	= filemap_map_pages,
-};
-
-static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct inode *bd_inode = bdev_file_inode(file);
-
-	file_accessed(file);
-	if (IS_DAX(bd_inode)) {
-		vma->vm_ops = &blkdev_dax_vm_ops;
-		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
-	} else {
-		vma->vm_ops = &blkdev_default_vm_ops;
-	}
-
-	return 0;
-}
-#else
-#define blkdev_mmap generic_file_mmap
-#endif
-
 const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
 	.read_iter	= blkdev_read_iter,
 	.write_iter	= blkdev_write_iter,
-	.mmap		= blkdev_mmap,
+	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT