summary refs log tree commit diff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-24 13:45:51 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-07-01 13:16:25 +0200
commite6bbad75712a97b9b16433563c1358652a33003e (patch)
treef6b5576d36aeb448ba285be34556ad9a3ef96465 /mm
parentc4b31d1b694e101cae7469a20762647185e11721 (diff)
downloadlinux-e6bbad75712a97b9b16433563c1358652a33003e.tar.gz
mm: always expand the stack with the mmap write lock held
commit 8d7071af890768438c14db6172cc8f9f4d04e184 upstream

This finishes the job of always holding the mmap write lock when
extending the user stack vma, and removes the 'write_locked' argument
from the vm helper functions again.

For some cases, we just avoid expanding the stack at all: drivers and
page pinning really shouldn't be extending any stacks.  Let's see if any
strange users really wanted that.

It's worth noting that architectures that weren't converted to the new
lock_mm_and_find_vma() helper function are left using the legacy
"expand_stack()" function, but it has been changed to drop the mmap_lock
and take it for writing while expanding the vma.  This makes it fairly
straightforward to convert the remaining architectures.

As a result of dropping and re-taking the lock, the calling conventions
for this function have also changed, since the old vma may no longer be
valid.  So it will now return the new vma if successful, and NULL - and
the lock dropped - if the area could not be extended.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[6.1: Patch drivers/iommu/io-pgfault.c instead]
Signed-off-by: Samuel Mendoza-Jonas <samjonas@amazon.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c6
-rw-r--r--mm/memory.c10
-rw-r--r--mm/mmap.c121
-rw-r--r--mm/nommu.c18
4 files changed, 116 insertions, 39 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 028f3b4e8c3f..f4911ddd3070 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1182,7 +1182,7 @@ static long __get_user_pages(struct mm_struct *mm,
 
 		/* first iteration or cross vma bound */
 		if (!vma || start >= vma->vm_end) {
-			vma = find_extend_vma(mm, start);
+			vma = vma_lookup(mm, start);
 			if (!vma && in_gate_area(mm, start)) {
 				ret = get_gate_page(mm, start & PAGE_MASK,
 						gup_flags, &vma,
@@ -1351,8 +1351,8 @@ int fixup_user_fault(struct mm_struct *mm,
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 retry:
-	vma = find_extend_vma(mm, address);
-	if (!vma || address < vma->vm_start)
+	vma = vma_lookup(mm, address);
+	if (!vma)
 		return -EFAULT;
 
 	if (!vma_permits_fault(vma, fault_flags))
diff --git a/mm/memory.c b/mm/memory.c
index 2d7bd0ca2924..77549434d13a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5364,7 +5364,7 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
 			goto fail;
 	}
 
-	if (expand_stack_locked(vma, addr, true))
+	if (expand_stack_locked(vma, addr))
 		goto fail;
 
 success:
@@ -5648,6 +5648,14 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
 	if (mmap_read_lock_killable(mm))
 		return 0;
 
+	/* We might need to expand the stack to access it */
+	vma = vma_lookup(mm, addr);
+	if (!vma) {
+		vma = expand_stack(mm, addr);
+		if (!vma)
+			return 0;
+	}
+
 	/* ignore errors, just check how much was successfully transferred */
 	while (len) {
 		int bytes, ret, offset;
diff --git a/mm/mmap.c b/mm/mmap.c
index 42bf63eaeeae..b8af52db3bbe 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1945,8 +1945,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-int expand_upwards(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *next;
@@ -1970,8 +1969,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address,
 	if (gap_addr < address || gap_addr > TASK_SIZE)
 		gap_addr = TASK_SIZE;
 
-	if (!write_locked)
-		return -EAGAIN;
 	next = find_vma_intersection(mm, vma->vm_end, gap_addr);
 	if (next && vma_is_accessible(next)) {
 		if (!(next->vm_flags & VM_GROWSUP))
@@ -2039,15 +2036,18 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address,
 
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ * mmap_lock held for writing.
  */
-int expand_downwards(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
 	struct vm_area_struct *prev;
 	int error = 0;
 
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		return -EFAULT;
+
 	address &= PAGE_MASK;
 	if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
 		return -EPERM;
@@ -2060,8 +2060,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address,
 		    vma_is_accessible(prev) &&
 		    (address - prev->vm_end < stack_guard_gap))
 			return -ENOMEM;
-		if (!write_locked && (prev->vm_end == address))
-			return -EAGAIN;
 	}
 
 	if (mas_preallocate(&mas, vma, GFP_KERNEL))
@@ -2139,14 +2137,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
 __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
 
 #ifdef CONFIG_STACK_GROWSUP
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
-	return expand_upwards(vma, address, write_locked);
+	return expand_upwards(vma, address);
 }
 
-struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
-		unsigned long addr, bool write_locked)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma, *prev;
 
@@ -2156,23 +2152,21 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 		return vma;
 	if (!prev)
 		return NULL;
-	if (expand_stack_locked(prev, addr, write_locked))
+	if (expand_stack_locked(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
 		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 	return prev;
 }
 #else
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 		return -EINVAL;
-	return expand_downwards(vma, address, write_locked);
+	return expand_downwards(vma, address);
 }
 
-struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
-		unsigned long addr, bool write_locked)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma;
 	unsigned long start;
@@ -2184,7 +2178,7 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 	if (vma->vm_start <= addr)
 		return vma;
 	start = vma->vm_start;
-	if (expand_stack_locked(vma, addr, write_locked))
+	if (expand_stack_locked(vma, addr))
 		return NULL;
 	if (vma->vm_flags & VM_LOCKED)
 		populate_vma_page_range(vma, addr, start, NULL);
@@ -2192,12 +2186,91 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 }
 #endif
 
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm,
-		unsigned long addr)
+/*
+ * IA64 has some horrid mapping rules: it can expand both up and down,
+ * but with various special rules.
+ *
+ * We'll get rid of this architecture eventually, so the ugliness is
+ * temporary.
+ */
+#ifdef CONFIG_IA64
+static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr)
+{
+	return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) &&
+		REGION_OFFSET(addr) < RGN_MAP_LIMIT;
+}
+
+/*
+ * IA64 stacks grow down, but there's a special register backing store
+ * that can grow up. Only sequentially, though, so the new address must
+ * match vm_end.
+ */
+static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (!vma_expand_ok(vma, addr))
+		return -EFAULT;
+	if (vma->vm_end != (addr & PAGE_MASK))
+		return -EFAULT;
+	return expand_upwards(vma, addr);
+}
+
+static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (!vma_expand_ok(vma, addr))
+		return -EFAULT;
+	return expand_downwards(vma, addr);
+}
+
+#elif defined(CONFIG_STACK_GROWSUP)
+
+#define vma_expand_up(vma,addr) expand_upwards(vma, addr)
+#define vma_expand_down(vma, addr) (-EFAULT)
+
+#else
+
+#define vma_expand_up(vma,addr) (-EFAULT)
+#define vma_expand_down(vma, addr) expand_downwards(vma, addr)
+
+#endif
+
+/*
+ * expand_stack(): legacy interface for page faulting. Don't use unless
+ * you have to.
+ *
+ * This is called with the mm locked for reading, drops the lock, takes
+ * the lock for writing, tries to look up a vma again, expands it if
+ * necessary, and downgrades the lock to reading again.
+ *
+ * If no vma is found or it can't be expanded, it returns NULL and has
+ * dropped the lock.
+ */
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
 {
-	return find_extend_vma_locked(mm, addr, false);
+	struct vm_area_struct *vma, *prev;
+
+	mmap_read_unlock(mm);
+	if (mmap_write_lock_killable(mm))
+		return NULL;
+
+	vma = find_vma_prev(mm, addr, &prev);
+	if (vma && vma->vm_start <= addr)
+		goto success;
+
+	if (prev && !vma_expand_up(prev, addr)) {
+		vma = prev;
+		goto success;
+	}
+
+	if (vma && !vma_expand_down(vma, addr))
+		goto success;
+
+	mmap_write_unlock(mm);
+	return NULL;
+
+success:
+	mmap_write_downgrade(mm);
+	return vma;
 }
-EXPORT_SYMBOL_GPL(find_extend_vma);
 
 /*
  * Ok - we have the memory areas we should free on a maple tree so release them,
diff --git a/mm/nommu.c b/mm/nommu.c
index 2772d6b332ff..b44ed69c109a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -682,24 +682,20 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 EXPORT_SYMBOL(find_vma);
 
 /*
- * find a VMA
- * - we don't extend stack VMAs under NOMMU conditions
- */
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
-	return find_vma(mm, addr);
-}
-
-/*
  * expand a stack to a given address
  * - not supported under NOMMU conditions
  */
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr)
 {
 	return -ENOMEM;
 }
 
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+	mmap_read_unlock(mm);
+	return NULL;
+}
+
 /*
  * look up the first VMA exactly that exactly matches addr
  * - should be called with mm->mmap_lock at least held readlocked