Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/x86 x86 pmap:



details:   https://anonhg.NetBSD.org/src/rev/5ccfaa00c4ce
branches:  trunk
changeset: 1006346:5ccfaa00c4ce
user:      ad <ad%NetBSD.org@localhost>
date:      Sun Jan 12 13:01:11 2020 +0000

description:
x86 pmap:

- It turns out that every page the pmap frees is necessarily zeroed.  Tell
  the VM system about this and use the pmap as a source of pre-zeroed pages.

- Redo deferred freeing of PTPs more elegantly, including the integration with
  pmap_remove_all().  This fixes problems with nvmm, and possibly also a crash
  discovered during fuzzing.

Reported-by: syzbot+a97186518c84f1d85c0c%syzkaller.appspotmail.com@localhost

diffstat:

 sys/arch/x86/include/pmap.h    |    7 +-
 sys/arch/x86/include/pmap_pv.h |    5 +-
 sys/arch/x86/x86/pmap.c        |  276 ++++++++++++++++++++--------------------
 sys/arch/x86/x86/vm_machdep.c  |   13 +-
 sys/arch/x86/x86/x86_tlb.c     |   12 +-
 5 files changed, 145 insertions(+), 168 deletions(-)

diffs (truncated from 671 to 300 lines):

diff -r fc6b5de412ec -r 5ccfaa00c4ce sys/arch/x86/include/pmap.h
--- a/sys/arch/x86/include/pmap.h       Sun Jan 12 12:55:03 2020 +0000
+++ b/sys/arch/x86/include/pmap.h       Sun Jan 12 13:01:11 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.h,v 1.108 2020/01/04 22:49:20 ad Exp $    */
+/*     $NetBSD: pmap.h,v 1.109 2020/01/12 13:01:11 ad Exp $    */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -262,7 +262,7 @@
 #if !defined(__x86_64__)
        vaddr_t pm_hiexec;              /* highest executable mapping */
 #endif /* !defined(__x86_64__) */
-       int pm_flags;                   /* see below */
+       struct lwp *pm_remove_all;      /* who's emptying the pmap */
 
        union descriptor *pm_ldt;       /* user-set LDT */
        size_t pm_ldt_len;              /* size of LDT in bytes */
@@ -273,7 +273,7 @@
        kcpuset_t *pm_xen_ptp_cpus;     /* mask of CPUs which have this pmap's
                                         ptp mapped */
        uint64_t pm_ncsw;               /* for assertions */
-       struct vm_page *pm_gc_ptp;      /* pages from pmap g/c */
+       LIST_HEAD(,vm_page) pm_gc_ptp;  /* PTPs queued for free */
 
        /* Used by NVMM. */
        int (*pm_enter)(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int);
@@ -580,7 +580,6 @@
 int    pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, paddr_t,
            vm_prot_t, u_int, int);
 bool   pmap_extract_ma(pmap_t, vaddr_t, paddr_t *);
-void   pmap_free_ptps(struct vm_page *);
 
 paddr_t pmap_get_physpage(void);
 
diff -r fc6b5de412ec -r 5ccfaa00c4ce sys/arch/x86/include/pmap_pv.h
--- a/sys/arch/x86/include/pmap_pv.h    Sun Jan 12 12:55:03 2020 +0000
+++ b/sys/arch/x86/include/pmap_pv.h    Sun Jan 12 13:01:11 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap_pv.h,v 1.9 2020/01/04 22:49:20 ad Exp $   */
+/*     $NetBSD: pmap_pv.h,v 1.10 2020/01/12 13:01:11 ad Exp $  */
 
 /*-
  * Copyright (c)2008 YAMAMOTO Takashi,
@@ -69,7 +69,7 @@
                struct pv_pte u_pte;
 
                /* PTPs */
-               struct vm_page *u_link;
+               LIST_ENTRY(vm_page) u_link;
        } pp_u;
        LIST_HEAD(, pv_entry) pp_pvlist;
 #define        pp_pte  pp_u.u_pte
@@ -83,6 +83,7 @@
 
 /* pp_flags */
 #define        PP_EMBEDDED     1
+#define        PP_FREEING      2
 
 #define        PMAP_PAGE_INIT(pp)      LIST_INIT(&(pp)->pp_pvlist)
 
diff -r fc6b5de412ec -r 5ccfaa00c4ce sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Sun Jan 12 12:55:03 2020 +0000
+++ b/sys/arch/x86/x86/pmap.c   Sun Jan 12 13:01:11 2020 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: pmap.c,v 1.354 2020/01/07 21:18:24 ad Exp $    */
+/*     $NetBSD: pmap.c,v 1.355 2020/01/12 13:01:11 ad Exp $    */
 
 /*
- * Copyright (c) 2008, 2010, 2016, 2017, 2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.354 2020/01/07 21:18:24 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.355 2020/01/12 13:01:11 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -419,9 +419,9 @@
 static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t,
     pd_entry_t * const *);
 static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
-static void pmap_freepages(struct pmap *, struct vm_page *);
+static void pmap_freepage(struct pmap *, struct vm_page *, int);
 static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
-    pt_entry_t *, pd_entry_t * const *, struct vm_page **);
+    pt_entry_t *, pd_entry_t * const *);
 static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
     vaddr_t, struct pv_entry **);
 static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
@@ -435,6 +435,7 @@
 
 static void pmap_load1(struct lwp *, struct pmap *, struct pmap *);
 static void pmap_reactivate(struct pmap *);
+static void pmap_dropref(struct pmap *);
 
 /*
  * p m a p   h e l p e r   f u n c t i o n s
@@ -667,7 +668,7 @@
 
        /* Toss reference to other pmap taken earlier. */
        if (pmap2 != NULL) {
-               pmap_destroy(pmap2);
+               pmap_dropref(pmap2);
        }
 }
 
@@ -1953,40 +1954,51 @@
                return (pmap->pm_ptphint[lidx]);
        }
        pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level));
-
-       KASSERT(pg == NULL || pg->wire_count >= 1);
+       if (pg != NULL) {
+               if (__predict_false(pg->wire_count == 0)) {
+                       /* This page is queued to be freed - ignore. */
+                       KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
+                           PP_FREEING) != 0);
+                       pg = NULL;
+               } else {
+                       KASSERT((VM_PAGE_TO_PP(pg)->pp_flags &
+                           PP_FREEING) == 0);
+               }
+       }
        return pg;
 }
 
-static void
-pmap_freepages(struct pmap *pmap, struct vm_page *ptp_tofree)
+static inline void
+pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
 {
-       struct vm_page *ptp;
-       lwp_t *l;
+       struct pmap_page *pp;
        int lidx;
 
-       while ((ptp = ptp_tofree) != NULL) {
-               KASSERT(ptp->wire_count == 1);
-               for (lidx = 0; lidx < __arraycount(pmap->pm_obj); lidx++) {
-                       if (pmap->pm_ptphint[lidx] == ptp) {
-                               pmap->pm_ptphint[lidx] = NULL;
-                       }
-               }
-               pmap_stats_update(pmap, -1, 0);
-               ptp->wire_count = 0;
-               uvm_pagerealloc(ptp, NULL, 0);
-               l = curlwp;
-               KASSERT((l->l_pflag & LP_INTR) == 0);
-               ptp_tofree = VM_PAGE_TO_PP(ptp)->pp_link;
-               VM_PAGE_TO_PP(ptp)->pp_link = l->l_md.md_gc_ptp;
-               l->l_md.md_gc_ptp = ptp;
-       }
+       KASSERT(ptp->wire_count == 1);
+
+       lidx = level - 1;
+       pmap_stats_update(pmap, -1, 0);
+       if (pmap->pm_ptphint[lidx] == ptp)
+               pmap->pm_ptphint[lidx] = NULL;
+       ptp->wire_count = 0;
+
+       /*
+        * Enqueue the PTP to be freed by pmap_update().  We can't remove
+        * the page from the uvm_object, as that can take further locks
+        * (intolerable right now because the PTEs are likely mapped in). 
+        * Instead mark the PTP as free and if we bump into it again, we'll
+        * either ignore or reuse (depending on what's tolerable at the
+        * time).
+        */
+       pp = VM_PAGE_TO_PP(ptp);
+       KASSERT((pp->pp_flags & PP_FREEING) == 0);
+       pp->pp_flags |= PP_FREEING;
+       LIST_INSERT_HEAD(&pmap->pm_gc_ptp, ptp, mdpage.mp_pp.pp_link);
 }
 
 static void
 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
-             pt_entry_t *ptes, pd_entry_t * const *pdes,
-             struct vm_page **ptp_tofree)
+             pt_entry_t *ptes, pd_entry_t * const *pdes)
 {
        unsigned long index;
        int level;
@@ -2025,8 +2037,7 @@
                pmap_tlb_shootnow();
 #endif
 
-               VM_PAGE_TO_PP(ptp)->pp_link = *ptp_tofree;
-               *ptp_tofree = ptp;
+               pmap_freepage(pmap, ptp, level);
                if (level < PTP_LEVELS - 1) {
                        ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1);
                        ptp->wire_count--;
@@ -2071,6 +2082,15 @@
                if (pt->pg[i] == NULL) {
                        pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags);
                        pt->alloced[i] = true;
+               } else if (pt->pg[i]->wire_count == 0) {
+                       /* This page was queued to be freed; dequeue it. */
+                       KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+                           PP_FREEING) != 0);
+                       VM_PAGE_TO_PP(pt->pg[i])->pp_flags &= ~PP_FREEING;
+                       LIST_REMOVE(pt->pg[i], mdpage.mp_pp.pp_link);
+               } else {                
+                       KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+                           PP_FREEING) == 0);
                }
                if (pt->pg[i] == NULL) {
                        pmap_unget_ptp(pmap, pt);
@@ -2175,6 +2195,11 @@
                if (!pt->alloced[i]) {
                        continue;
                }
+               KASSERT((VM_PAGE_TO_PP(pt->pg[i])->pp_flags &
+                   PP_FREEING) == 0);
+               KASSERT(pt->pg[i]->wire_count == 0);
+               /* pmap zeros all pages before freeing. */
+               pt->pg[i]->flags |= PG_ZERO; 
                uvm_pagefree(pt->pg[i]);
                pt->pg[i] = NULL;
                pmap->pm_ptphint[0] = NULL;
@@ -2366,6 +2391,8 @@
 #ifdef XENPV
        kcpuset_create(&pmap->pm_xen_ptp_cpus, true);
 #endif
+       LIST_INIT(&pmap->pm_gc_ptp);
+       pmap->pm_remove_all = NULL;
 
        /* allocate and init PDP */
        pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK);
@@ -2436,8 +2463,6 @@
 #if !defined(__x86_64__)
        pmap->pm_hiexec = 0;
 #endif
-       pmap->pm_flags = 0;
-       pmap->pm_gc_ptp = NULL;
 
        /* Used by NVMM. */
        pmap->pm_enter = NULL;
@@ -2459,23 +2484,6 @@
 }
 
 /*
- * pmap_free_ptps: put a list of ptps back to the freelist.
- */
-void
-pmap_free_ptps(struct vm_page *empty_ptps)
-{
-       struct vm_page *ptp;
-       struct pmap_page *pp;
-
-       while ((ptp = empty_ptps) != NULL) {
-               pp = VM_PAGE_TO_PP(ptp);
-               empty_ptps = pp->pp_link;
-               LIST_INIT(&pp->pp_pvlist);
-               uvm_pagefree(ptp);
-       }
-}
-
-/*
  * pmap_check_ptps: verify that none of the pmap's page table objects
  * have any pages allocated to them.
  */
@@ -2503,7 +2511,7 @@
                for (int i = 0; i < PDIR_SLOT_USERLIM; i++) {
                        if (pmap->pm_pdir[i] != 0 &&
                            ci->ci_kpm_pdir[i] == pmap->pm_pdir[i]) {
-                               printf("pmap_destroy(%p) pmap_kernel %p "
+                               printf("pmap_dropref(%p) pmap_kernel %p "
                                    "curcpu %d cpu %d ci_pmap %p "
                                    "ci->ci_kpm_pdir[%d]=%" PRIx64
                                    " pmap->pm_pdir[%d]=%" PRIx64 "\n",
@@ -2520,33 +2528,30 @@
 }
 
 /*
- * pmap_destroy: drop reference count on pmap.   free pmap if
- * reference count goes to zero.
- *
- * => we can be called from pmap_unmap_ptes() with a different, unrelated
- *    pmap's lock held.  be careful!
+ * pmap_destroy:  pmap is being destroyed by UVM.
  */
 void
 pmap_destroy(struct pmap *pmap)
 {
-       lwp_t *l;
+
+       /* Undo pmap_remove_all(), then drop the reference. */
+       pmap_update(pmap);
+       pmap_dropref(pmap);
+}
+
+/*
+ * pmap_dropref:  drop reference count on pmap.  free pmap if reference



Home | Main Index | Thread Index | Old Index