summary refs log tree commit diff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-26 20:00:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-26 20:00:28 -0700
commit478a1469a7d27fe6b2f85fc801ecdeb8afc836e6 (patch)
tree9b1eb10e1a0567413443281387b09d02b514b5ec /include
parent315227f6da389f3a560f27f7777080857278e1b4 (diff)
parent4d9a2c8746671efbb0c27d3ae28c7474597a7aad (diff)
downloadlinux-478a1469a7d27fe6b2f85fc801ecdeb8afc836e6.tar.gz
Merge tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull DAX locking updates from Ross Zwisler:
 "Filesystem DAX locking for 4.7

   - We use a bit in an exceptional radix tree entry as a lock bit and
     use it similarly to how page lock is used for normal faults.  This
     fixes races between hole instantiation and read faults of the same
     index.

   - Filesystem DAX PMD faults are disabled, and will be re-enabled when
     PMD locking is implemented"

* tag 'dax-locking-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
  dax: Remove i_mmap_lock protection
  dax: Use radix tree entry lock to protect cow faults
  dax: New fault locking
  dax: Allow DAX code to replace exceptional entries
  dax: Define DAX lock bit for radix tree exceptional entry
  dax: Make huge page handling depend of CONFIG_BROKEN
  dax: Fix condition for filling of PMD holes
Diffstat (limited to 'include')
-rw-r--r--include/linux/dax.h16
-rw-r--r--include/linux/mm.h7
2 files changed, 22 insertions, 1 deletions
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 7743e51f826c..43d5f0b799c7 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -3,17 +3,25 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
+/* We use lowest available exceptional entry bit for locking */
+#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
+
 ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
 		  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
+int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+				   pgoff_t index, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
 struct page *read_dax_sector(struct block_device *bdev, sector_t n);
+void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index);
 int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 		unsigned int offset, unsigned int length);
 #else
@@ -22,6 +30,12 @@ static inline struct page *read_dax_sector(struct block_device *bdev,
 {
 	return ERR_PTR(-ENXIO);
 }
+/* Shouldn't ever be called when dax is disabled. */
+static inline void dax_unlock_mapping_entry(struct address_space *mapping,
+					    pgoff_t index)
+{
+	BUG();
+}
 static inline int __dax_zero_page_range(struct block_device *bdev,
 		sector_t sector, unsigned int offset, unsigned int length)
 {
@@ -29,7 +43,7 @@ static inline int __dax_zero_page_range(struct block_device *bdev,
 }
 #endif
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
 				unsigned int flags, get_block_t);
 int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2835d598d258..a00ec816233a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -303,6 +303,12 @@ struct vm_fault {
 					 * is set (which is also implied by
 					 * VM_FAULT_ERROR).
 					 */
+	void *entry;			/* ->fault handler can alternatively
+					 * return locked DAX entry. In that
+					 * case handler should return
+					 * VM_FAULT_DAX_LOCKED and fill in
+					 * entry here.
+					 */
 	/* for ->map_pages() only */
 	pgoff_t max_pgoff;		/* map pages for offset from pgoff till
 					 * max_pgoff inclusive */
@@ -1076,6 +1082,7 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
+#define VM_FAULT_DAX_LOCKED 0x1000	/* ->fault has locked DAX entry */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */