Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/arm/arm32 Fix a deadlock between the pool and pmap ...



details:   https://anonhg.NetBSD.org/src/rev/f721f61f7b11
branches:  trunk
changeset: 998597:f721f61f7b11
user:      bouyer <bouyer%NetBSD.org@localhost>
date:      Tue Apr 23 11:21:21 2019 +0000

description:
Fix a deadlock between the pool and pmap codes:
- cpu0 grabs the kernel lock (e.g. from a non-MPSAFE interrupt) and
  calls pool_get().
- cpu1 does a pool_get() on the same pool from MPSAFE code, which needs a
  pool_page_alloc(), which ends up in pmap_extract_coherency().

So cpu0 holds the kernel_lock and wants the pool lock. cpu1 holds the pool
lock and wants the kernel_lock in pmap_extract_coherency().

The pmap code should not rely on kernel_lock. Intead make the
pmap_kernel()->pm_obj_lock a IPL_VM lock and use it as pmap lock
(thus dropping the pmap test pmap_{acquire,release}_pmap_lock()).
This needs to be a IPL_VM because unlike user pmaps, this can be locked
from interrupt context.

Add a IPL_NONE lock for pmap_growkernel(). We can't use
pmap_kernel()->pm_obj_lock here because pmap_grow_map() may sleep.

Make pmap_lock (which may be locked with pm_obj_lock held) a IPL_VM
lock in all case.

reorder a few things to not call pool_get()/pool_put() (which may sleep)
with pm_obj_lock held.

Patch initially posted to port-arm@ on April 19, improved patch (per
suggestions from Nick Hudson and Jason Thorpe) on April 21.

diffstat:

 sys/arch/arm/arm32/pmap.c |  100 ++++++++++++++++++++++++---------------------
 1 files changed, 53 insertions(+), 47 deletions(-)

diffs (277 lines):

diff -r e7077b6c086d -r f721f61f7b11 sys/arch/arm/arm32/pmap.c
--- a/sys/arch/arm/arm32/pmap.c Tue Apr 23 11:05:14 2019 +0000
+++ b/sys/arch/arm/arm32/pmap.c Tue Apr 23 11:21:21 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $        */
+/*     $NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $        */
 
 /*
  * Copyright 2003 Wasabi Systems, Inc.
@@ -221,7 +221,7 @@
 #include <arm/db_machdep.h>
 #endif
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.372 2019/04/23 11:05:14 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.373 2019/04/23 11:21:21 bouyer Exp $");
 
 //#define PMAP_DEBUG
 #ifdef PMAP_DEBUG
@@ -521,6 +521,7 @@
 vaddr_t memhook;                       /* used by mem.c & others */
 kmutex_t memlock __cacheline_aligned;  /* used by mem.c & others */
 kmutex_t pmap_lock __cacheline_aligned;
+kmutex_t kpm_lock __cacheline_aligned;
 extern void *msgbufaddr;
 int pmap_kmpages;
 /*
@@ -543,33 +544,21 @@
 pmap_acquire_pmap_lock(pmap_t pm)
 {
 #if defined(MULTIPROCESSOR) && defined(DDB)
-       if (db_onproc != NULL)
+       if (__predict_false(db_onproc != NULL))
                return;
 #endif
        
-       if (pm == pmap_kernel()) {
-#ifdef MULTIPROCESSOR
-               KERNEL_LOCK(1, NULL);
-#endif
-       } else {
-               mutex_enter(pm->pm_lock);
-       }
+       mutex_enter(pm->pm_lock);
 }
 
 static inline void
 pmap_release_pmap_lock(pmap_t pm)
 {
 #if defined(MULTIPROCESSOR) && defined(DDB)
-       if (db_onproc != NULL)
+       if (__predict_false(db_onproc != NULL))
                return;
 #endif
-       if (pm == pmap_kernel()) {
-#ifdef MULTIPROCESSOR
-               KERNEL_UNLOCK_ONE(NULL);
-#endif
-       } else {
-               mutex_exit(pm->pm_lock);
-       }
+       mutex_exit(pm->pm_lock);
 }
 
 static inline void
@@ -3070,6 +3059,10 @@
 #else
        const bool vector_page_p = (va == vector_page);
 #endif
+       struct pmap_page *pp = pmap_pv_tracked(pa);
+       struct pv_entry *new_pv = NULL;
+       struct pv_entry *old_pv = NULL;
+       int error = 0;
 
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
 
@@ -3085,6 +3078,12 @@
         * test for a managed page by checking pg != NULL.
         */
        pg = pmap_initialized ? PHYS_TO_VM_PAGE(pa) : NULL;
+       /*
+        * if we may need a new pv entry allocate if now, as we can't do it
+        * with the kernel_pmap locked
+        */
+       if (pg || pp)
+               new_pv = pool_get(&pmap_pv_pool, PR_NOWAIT);
 
        nflags = 0;
        if (prot & VM_PROT_WRITE)
@@ -3108,7 +3107,8 @@
        if (l2b == NULL) {
                if (flags & PMAP_CANFAIL) {
                        pmap_release_pmap_lock(pm);
-                       return (ENOMEM);
+                       error = ENOMEM;
+                       goto free_pv;
                }
                panic("pmap_enter: failed to allocate L2 bucket");
        }
@@ -3131,8 +3131,6 @@
        } else
                opg = NULL;
 
-       struct pmap_page *pp = pmap_pv_tracked(pa);
-
        if (pg || pp) {
                KASSERT((pg != NULL) != (pp != NULL));
                struct vm_page_md *md = (pg != NULL) ? VM_PAGE_TO_MD(pg) :
@@ -3241,9 +3239,10 @@
                                }
 #endif
                        } else {
-                               pmap_release_page_lock(md);
-                               pv = pool_get(&pmap_pv_pool, PR_NOWAIT);
+                               pv = new_pv;
+                               new_pv = NULL;
                                if (pv == NULL) {
+                                       pmap_release_page_lock(md);
                                        pmap_release_pmap_lock(pm);
                                        if ((flags & PMAP_CANFAIL) == 0)
                                                panic("pmap_enter: "
@@ -3254,7 +3253,6 @@
                                            0, 0, 0, 0);
                                        return (ENOMEM);
                                }
-                               pmap_acquire_page_lock(md);
                        }
 
                        pmap_enter_pv(md, pa, pv, pm, va, nflags);
@@ -3291,9 +3289,9 @@
                        paddr_t opa = VM_PAGE_TO_PHYS(opg);
 
                        pmap_acquire_page_lock(omd);
-                       struct pv_entry *pv = pmap_remove_pv(omd, opa, pm, va);
+                       old_pv = pmap_remove_pv(omd, opa, pm, va);
                        pmap_vac_me_harder(omd, opa, pm, 0);
-                       oflags = pv->pv_flags;
+                       oflags = old_pv->pv_flags;
                        pmap_release_page_lock(omd);
 
 #ifdef PMAP_CACHE_VIVT
@@ -3301,7 +3299,6 @@
                                pmap_cache_wbinv_page(pm, va, true, oflags);
                        }
 #endif
-                       pool_put(&pmap_pv_pool, pv);
                }
        }
 
@@ -3403,7 +3400,13 @@
 
        pmap_release_pmap_lock(pm);
 
-       return (0);
+
+       if (old_pv)
+               pool_put(&pmap_pv_pool, old_pv);
+free_pv:
+       if (new_pv)
+               pool_put(&pmap_pv_pool, new_pv);
+       return (error);
 }
 
 /*
@@ -3431,10 +3434,13 @@
 void
 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
 {
+       SLIST_HEAD(,pv_entry) opv_list;
+       struct pv_entry *pv, *npv;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        UVMHIST_LOG(maphist, " (pm=%#jx, sva=%#jx, eva=%#jx)",
            (uintptr_t)pm, sva, eva, 0);
 
+       SLIST_INIT(&opv_list);
        /*
         * we lock in the pmap => pv_head direction
         */
@@ -3493,7 +3499,6 @@
                         */
                        if (pg != NULL) {
                                struct vm_page_md *md = VM_PAGE_TO_MD(pg);
-                               struct pv_entry *pv;
 
                                pmap_acquire_page_lock(md);
                                pv = pmap_remove_pv(md, pa, pm, sva);
@@ -3503,7 +3508,8 @@
                                        if (pm->pm_remove_all == false) {
                                                flags = pv->pv_flags;
                                        }
-                                       pool_put(&pmap_pv_pool, pv);
+                                       SLIST_INSERT_HEAD(&opv_list,
+                                           pv, pv_link);
                                }
                        }
                        mappings += PAGE_SIZE / L2_S_SIZE;
@@ -3605,6 +3611,9 @@
        }
 
        pmap_release_pmap_lock(pm);
+       SLIST_FOREACH_SAFE(pv, &opv_list, pv_link, npv) {
+               pool_put(&pmap_pv_pool, pv);
+       }
 }
 
 #if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
@@ -3828,15 +3837,15 @@
        UVMHIST_LOG(maphist, " (va=%#jx, len=%#jx)", va, len, 0, 0);
 
        const vaddr_t eva = va + len;
-
-       pmap_acquire_pmap_lock(pmap_kernel());
+       pmap_t kpm = pmap_kernel();
+
+       pmap_acquire_pmap_lock(kpm);
 
        while (va < eva) {
                vaddr_t next_bucket = L2_NEXT_BUCKET_VA(va);
                if (next_bucket > eva)
                        next_bucket = eva;
 
-               pmap_t kpm = pmap_kernel();
                struct l2_bucket * const l2b = pmap_get_l2_bucket(kpm, va);
                KDASSERT(l2b != NULL);
 
@@ -3892,7 +3901,7 @@
                total_mappings += mappings;
 #endif
        }
-       pmap_release_pmap_lock(pmap_kernel());
+       pmap_release_pmap_lock(kpm);
        cpu_cpwait();
        UVMHIST_LOG(maphist, "  <--- done (%ju mappings removed)",
            total_mappings, 0, 0, 0);
@@ -5917,8 +5926,8 @@
         * whoops!   we need to add kernel PTPs
         */
 
-       s = splhigh();  /* to be safe */
-       mutex_enter(kpm->pm_lock);
+       s = splvm();    /* to be safe */
+       mutex_enter(&kpm_lock);
 
        /* Map 1MB at a time */
        size_t l1slot = l1pte_index(pmap_curmaxkvaddr);
@@ -5962,7 +5971,7 @@
        cpu_cpwait();
 #endif
 
-       mutex_exit(kpm->pm_lock);
+       mutex_exit(&kpm_lock);
        splx(s);
 
 out:
@@ -6160,16 +6169,13 @@
 #endif
 
        VPRINTF("locks ");
-#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
-       if (arm_cache_prefer_mask != 0) {
-               mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM);
-       } else {
-#endif
-               mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
-#if defined(PMAP_CACHE_VIPT) && !defined(ARM_MMU_EXTENDED)
-       }
-#endif
-       mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE);
+       /*
+        * pmap_kenter_pa() and pmap_kremove() may be called from interrupt
+        * context, so its locks have to be at IPL_VM
+        */
+       mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_VM);
+       mutex_init(&kpm_lock, MUTEX_DEFAULT, IPL_NONE);
+       mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_VM);
        uvm_obj_init(&pm->pm_obj, NULL, false, 1);
        uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock);
 



Home | Main Index | Thread Index | Old Index