Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch uvm_pagerealloc() can now block because of radixtre...



details:   https://anonhg.NetBSD.org/src/rev/48dbd5094db7
branches:  trunk
changeset: 967614:48dbd5094db7
user:      ad <ad%NetBSD.org@localhost>
date:      Sun Dec 15 19:24:11 2019 +0000

description:
uvm_pagerealloc() can now block because of radixtree manipulation, so defer
freeing PTPs until pmap_unmap_ptes(), where we still have the pmap locked
but can finally tolerate context switches again.

To be revisited soon: pmap_map_ptes() seems broken WRT other pmap load.

Reported-by: syzbot+689fb7dab41abff8e75a%syzkaller.appspotmail.com@localhost
Reported-by: syzbot+3e7bbf37d37d451b25d7%syzkaller.appspotmail.com@localhost
Reported-by: syzbot+689fb7dab41abff8e75a%syzkaller.appspotmail.com@localhost
Reported-by: syzbot+689fb7dab41abff8e75a%syzkaller.appspotmail.com@localhost
Reported-by: syzbot+3e7bbf37d37d451b25d7%syzkaller.appspotmail.com@localhost

diffstat:

 sys/arch/x86/include/pmap.h |    4 +-
 sys/arch/x86/x86/pmap.c     |  147 ++++++++++++++++++++++++++-----------------
 sys/arch/xen/x86/xen_pmap.c |    8 +-
 3 files changed, 93 insertions(+), 66 deletions(-)

diffs (truncated from 443 to 300 lines):

diff -r d538e2849eb8 -r 48dbd5094db7 sys/arch/x86/include/pmap.h
--- a/sys/arch/x86/include/pmap.h       Sun Dec 15 17:17:16 2019 +0000
+++ b/sys/arch/x86/include/pmap.h       Sun Dec 15 19:24:11 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.h,v 1.106 2019/12/08 20:42:48 ad Exp $    */
+/*     $NetBSD: pmap.h,v 1.107 2019/12/15 19:24:11 ad Exp $    */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -374,7 +374,7 @@
 
 void           pmap_map_ptes(struct pmap *, struct pmap **, pd_entry_t **,
                    pd_entry_t * const **);
-void           pmap_unmap_ptes(struct pmap *, struct pmap *);
+void           pmap_unmap_ptes(struct pmap *, struct pmap *, struct vm_page *);
 
 bool           pmap_pdes_valid(vaddr_t, pd_entry_t * const *, pd_entry_t *,
                    int *lastlvl);
diff -r d538e2849eb8 -r 48dbd5094db7 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Sun Dec 15 17:17:16 2019 +0000
+++ b/sys/arch/x86/x86/pmap.c   Sun Dec 15 19:24:11 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.343 2019/12/08 20:42:48 ad Exp $    */
+/*     $NetBSD: pmap.c,v 1.344 2019/12/15 19:24:11 ad Exp $    */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.343 2019/12/08 20:42:48 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.344 2019/12/15 19:24:11 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -460,9 +460,9 @@
 static int pmap_get_ptp(struct pmap *, vaddr_t,
     pd_entry_t * const *, int, struct vm_page **);
 static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
-static void pmap_freepage(struct pmap *, struct vm_page *, int);
+static void pmap_freepages(struct pmap *, struct vm_page *);
 static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
-    pt_entry_t *, pd_entry_t * const *);
+    pt_entry_t *, pd_entry_t * const *, struct vm_page **);
 static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
     vaddr_t, struct pv_entry **);
 static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
@@ -476,7 +476,7 @@
  * p m a p   h e l p e r   f u n c t i o n s
  */
 
-static inline void
+static void
 pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
 {
 
@@ -490,7 +490,7 @@
        }
 }
 
-static inline void
+static void
 pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
 {
        int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0);
@@ -657,7 +657,7 @@
                kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
                cpu_load_pmap(pmap, curpmap);
        }
-       pmap->pm_ncsw = l->l_ncsw;
+       pmap->pm_ncsw = lwp_pctr();
        *pmap2 = curpmap;
        *ptepp = PTE_BASE;
 
@@ -674,7 +674,8 @@
  * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
  */
 void
-pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
+pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2,
+    struct vm_page *ptp_tofree)
 {
        struct cpu_info *ci;
        struct pmap *mypmap;
@@ -683,6 +684,7 @@
 
        /* The kernel's pmap is always accessible. */
        if (pmap == pmap_kernel()) {
+               KASSERT(ptp_tofree == NULL);
                return;
        }
 
@@ -697,9 +699,13 @@
         * We cannot tolerate context switches while mapped in.
         * If it is our own pmap all we have to do is unlock.
         */
-       KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
+       KASSERT(pmap->pm_ncsw == lwp_pctr());
        mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
        if (pmap == mypmap) {
+               /* Now safe to free PTPs, with the pmap still locked. */
+               if (ptp_tofree != NULL) {
+                       pmap_freepages(pmap, ptp_tofree);
+               }
                mutex_exit(&pmap->pm_lock);
                return;
        }
@@ -716,6 +722,10 @@
                 * This can happen when undoing after pmap_get_ptp blocked.
                 */ 
        }
+       /* Now safe to free PTPs, with the pmap still locked. */
+       if (ptp_tofree != NULL) {
+               pmap_freepages(pmap, ptp_tofree);
+       }
        mutex_exit(&pmap->pm_lock);
        if (pmap == pmap2) {
                return;
@@ -946,7 +956,7 @@
  *    checking the valid bit before doing TLB flushing
  * => must be followed by call to pmap_update() before reuse of page
  */
-static inline void
+static void
 pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly)
 {
        pt_entry_t *pte, opte;
@@ -1975,7 +1985,7 @@
  * p t p   f u n c t i o n s
  */
 
-static inline struct vm_page *
+static struct vm_page *
 pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level)
 {
        int lidx = level - 1;
@@ -1993,32 +2003,35 @@
        return pg;
 }
 
-static inline void
-pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
+static void
+pmap_freepages(struct pmap *pmap, struct vm_page *ptp_tofree)
 {
+       struct vm_page *ptp;
        lwp_t *l;
        int lidx;
-       struct uvm_object *obj;
-
-       KASSERT(ptp->wire_count == 1);
-
-       lidx = level - 1;
-
-       obj = &pmap->pm_obj[lidx];
-       pmap_stats_update(pmap, -1, 0);
-       if (pmap->pm_ptphint[lidx] == ptp)
-               pmap->pm_ptphint[lidx] = TAILQ_FIRST(&obj->memq);
-       ptp->wire_count = 0;
-       uvm_pagerealloc(ptp, NULL, 0);
-       l = curlwp;
-       KASSERT((l->l_pflag & LP_INTR) == 0);
-       VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
-       l->l_md.md_gc_ptp = ptp;
+
+       while ((ptp = ptp_tofree) != NULL) {
+               KASSERT(ptp->wire_count == 1);
+               for (lidx = 0; lidx < __arraycount(pmap->pm_obj); lidx++) {
+                       if (pmap->pm_ptphint[lidx] == ptp) {
+                               pmap->pm_ptphint[lidx] = NULL;
+                       }
+               }
+               pmap_stats_update(pmap, -1, 0);
+               ptp->wire_count = 0;
+               uvm_pagerealloc(ptp, NULL, 0);
+               l = curlwp;
+               KASSERT((l->l_pflag & LP_INTR) == 0);
+               ptp_tofree = VM_PAGE_TO_PP(ptp)->pp_link;
+               VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
+               l->l_md.md_gc_ptp = ptp;
+       }
 }
 
 static void
 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
-             pt_entry_t *ptes, pd_entry_t * const *pdes)
+             pt_entry_t *ptes, pd_entry_t * const *pdes,
+             struct vm_page **ptp_tofree)
 {
        unsigned long index;
        int level;
@@ -2057,7 +2070,8 @@
                pmap_tlb_shootnow();
 #endif
 
-               pmap_freepage(pmap, ptp, level);
+               VM_PAGE_TO_PP(ptp)->pp_link = *ptp_tofree;
+               *ptp_tofree = ptp;
                if (level < PTP_LEVELS - 1) {
                        ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
                        ptp->wire_count--;
@@ -2090,7 +2104,6 @@
        paddr_t pa;
        struct uvm_object *obj;
        voff_t off;
-       lwp_t *l;
        uint64_t ncsw;
 
        KASSERT(pmap != pmap_kernel());
@@ -2110,14 +2123,13 @@
 
                pt[i].pg = uvm_pagelookup(obj, off);
                if (pt[i].pg == NULL) {
-                       l = curlwp;
-                       ncsw = l->l_ncsw;
+                       ncsw = lwp_pctr();
                        pt[i].pg = uvm_pagealloc(obj, off, NULL, aflags);
                        pt[i].new = true;
-                       if (__predict_false(ncsw != l->l_ncsw)) {
+                       if (__predict_false(ncsw != lwp_pctr())) {
                                /* uvm_pagealloc can block. */
                                /* XXX silence assertion in pmap_unmap_ptes */
-                               pmap->pm_ncsw = l->l_ncsw;
+                               pmap->pm_ncsw = lwp_pctr();
                                error = EAGAIN;
                                goto fail;
                        }
@@ -2464,11 +2476,10 @@
 
        for (i = 0; i < PTP_LEVELS - 1; i++) {
                KASSERT(pmap->pm_obj[i].uo_npages == 0);
-               KASSERT(TAILQ_EMPTY(&pmap->pm_obj[i].memq));
        }
 }
 
-static inline void
+static void
 pmap_check_inuse(struct pmap *pmap)
 {
 #ifdef DIAGNOSTIC
@@ -3136,7 +3147,7 @@
                }
        }
        if (__predict_false(hard)) {
-               pmap_unmap_ptes(pmap, pmap2);
+               pmap_unmap_ptes(pmap, pmap2, NULL);
        }
        kpreempt_enable();
        if (pap != NULL) {
@@ -3550,6 +3561,7 @@
        pd_entry_t pde;
        pd_entry_t * const *pdes;
        struct pv_entry *pv_tofree = NULL;
+       struct vm_page *ptp_tofree = NULL;
        bool result;
        paddr_t ptppa;
        vaddr_t blkendva, va = sva;
@@ -3594,8 +3606,10 @@
                         * being used, free it!
                         */
 
-                       if (result && ptp && ptp->wire_count <= 1)
-                               pmap_free_ptp(pmap, ptp, va, ptes, pdes);
+                       if (result && ptp && ptp->wire_count <= 1) {
+                               pmap_free_ptp(pmap, ptp, va, ptes, pdes,
+                                   &ptp_tofree);
+                       }
                }
        } else for (/* null */ ; va < eva ; va = blkendva) {
                /* determine range of block */
@@ -3626,12 +3640,22 @@
                pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
                    blkendva, &pv_tofree);
 
-               /* If PTP is no longer being used, free it. */
-               if (ptp && ptp->wire_count <= 1) {
-                       pmap_free_ptp(pmap, ptp, va, ptes, pdes);
+               /*
+                * If PTP is no longer being used, free it.  We need to unmap
+                * and re-map to do this, then continue on at the next VA,
+                * because we can't tolerate blocking with the PTEs mapped in.
+                */
+               if (ptp == NULL || ptp->wire_count > 1) {
+                       continue;
                }
-       }
-       pmap_unmap_ptes(pmap, pmap2);           /* unlock pmap */
+               pmap_free_ptp(pmap, ptp, va, ptes, pdes, &ptp_tofree);
+               if (ptp_tofree != NULL) {
+                       pmap_unmap_ptes(pmap, pmap2, ptp_tofree);
+                       ptp_tofree = NULL;
+                       pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
+               }
+       }
+       pmap_unmap_ptes(pmap, pmap2, ptp_tofree);       /* unlock pmap */
        kpreempt_enable();
 
        /* Now we free unused PVs */
@@ -3742,10 +3766,11 @@



Home | Main Index | Thread Index | Old Index