Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/x86 PR kern/55071 (Panic shortly after running X11 ...



details:   https://anonhg.NetBSD.org/src/rev/0073b25fe5b6
branches:  trunk
changeset: 1008200:0073b25fe5b6
user:      ad <ad%NetBSD.org@localhost>
date:      Sat Mar 14 18:24:10 2020 +0000

description:
PR kern/55071 (Panic shortly after running X11 due to kernel diagnostic assertion "mutex_owned(&pp->pp_lock)")

- Fix a locking bug in pmap_pp_clear_attrs() and in pmap_pp_remove() do the
  TLB shootdown while still holding the target pmap's lock.

Also:

- Finish PV list locking for x86 & update comments around same.

- Keep track of the min/max index of PTEs inserted into each PTP, and use
  that to clip ranges of VAs passed to pmap_remove_ptes().

- Based on the above, implement a pmap_remove_all() for x86 that clears out
  the pmap in a single pass.  Makes exit() / fork() much cheaper.

diffstat:

 sys/arch/x86/include/pmap.h    |    6 +-
 sys/arch/x86/include/pmap_pv.h |   21 +-
 sys/arch/x86/x86/pmap.c        |  680 ++++++++++++++++++++++++++++------------
 3 files changed, 489 insertions(+), 218 deletions(-)

diffs (truncated from 1217 to 300 lines):

diff -r ec42ae4d8511 -r 0073b25fe5b6 sys/arch/x86/include/pmap.h
--- a/sys/arch/x86/include/pmap.h       Sat Mar 14 18:08:38 2020 +0000
+++ b/sys/arch/x86/include/pmap.h       Sat Mar 14 18:24:10 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.h,v 1.112 2020/03/14 14:05:44 ad Exp $    */
+/*     $NetBSD: pmap.h,v 1.113 2020/03/14 18:24:10 ad Exp $    */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -248,6 +248,8 @@
  * (the other object locks are only used when uvm_pagealloc is called)
  */
 
+struct pv_page;
+
 struct pmap {
        struct uvm_object pm_obj[PTP_LEVELS-1];/* objects for lvl >= 1) */
        LIST_ENTRY(pmap) pm_list;       /* list of all pmaps */
@@ -256,11 +258,11 @@
        struct vm_page *pm_ptphint[PTP_LEVELS-1];
                                        /* pointer to a PTP in our pmap */
        struct pmap_statistics pm_stats;  /* pmap stats */
+       struct pv_entry *pm_pve;        /* spare pv_entry */
 
 #if !defined(__x86_64__)
        vaddr_t pm_hiexec;              /* highest executable mapping */
 #endif /* !defined(__x86_64__) */
-       struct lwp *pm_remove_all;      /* who's emptying the pmap */
 
        union descriptor *pm_ldt;       /* user-set LDT */
        size_t pm_ldt_len;              /* size of LDT in bytes */
diff -r ec42ae4d8511 -r 0073b25fe5b6 sys/arch/x86/include/pmap_pv.h
--- a/sys/arch/x86/include/pmap_pv.h    Sat Mar 14 18:08:38 2020 +0000
+++ b/sys/arch/x86/include/pmap_pv.h    Sat Mar 14 18:24:10 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap_pv.h,v 1.13 2020/03/10 22:38:41 ad Exp $  */
+/*     $NetBSD: pmap_pv.h,v 1.14 2020/03/14 18:24:10 ad Exp $  */
 
 /*-
  * Copyright (c)2008 YAMAMOTO Takashi,
@@ -34,6 +34,7 @@
 #include <sys/rbtree.h>
 
 struct vm_page;
+struct pmap_page;
 
 /*
  * structures to track P->V mapping
@@ -51,14 +52,14 @@
 };
 
 /*
- * pv_entry: plug pv_pte into lists.
+ * pv_entry: plug pv_pte into lists.  32 bytes on i386, 64 on amd64.
  */
 
 struct pv_entry {
        struct pv_pte pve_pte;          /* should be the first member */
        LIST_ENTRY(pv_entry) pve_list;  /* on pmap_page::pp_pvlist */
        rb_node_t pve_rb;               /* red-black tree node */
-       uintptr_t pve_padding;          /* unused */
+       struct pmap_page *pve_pp;       /* backpointer to mapped page */
 };
 #define        pve_next        pve_list.le_next
 
@@ -71,16 +72,14 @@
                /* PTPs */
                rb_tree_t rb;
 
-               /* PTPs */
+               /* PTPs, when being freed */
                LIST_ENTRY(vm_page) link;
 
-               /* Non-PTPs */
+               /* Non-PTPs (i.e. normal pages) */
                struct {
-                       /* PP_EMBEDDED */
                        struct pv_pte pte;
-
                        LIST_HEAD(, pv_entry) pvlist;
-                       uint8_t flags;
+                       uint8_t embedded;
                        uint8_t attrs;
                } s;
        } pp_u;
@@ -89,7 +88,7 @@
 #define        pp_link         pp_u.link
 #define        pp_pte          pp_u.s.pte
 #define pp_pvlist      pp_u.s.pvlist
-#define        pp_pflags       pp_u.s.flags
+#define        pp_embedded     pp_u.s.embedded
 #define        pp_attrs        pp_u.s.attrs
 };
 
@@ -97,10 +96,6 @@
 #define PP_ATTRS_A     0x02    /* Accessed */
 #define PP_ATTRS_W     0x04    /* Writable */
 
-/* pp_flags */
-#define        PP_EMBEDDED     1
-#define        PP_FREEING      2
-
 #define        PMAP_PAGE_INIT(pp) \
 do { \
        LIST_INIT(&(pp)->pp_pvlist); \
diff -r ec42ae4d8511 -r 0073b25fe5b6 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Sat Mar 14 18:08:38 2020 +0000
+++ b/sys/arch/x86/x86/pmap.c   Sat Mar 14 18:24:10 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.365 2020/03/14 14:05:44 ad Exp $    */
+/*     $NetBSD: pmap.c,v 1.366 2020/03/14 18:24:10 ad Exp $    */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.365 2020/03/14 14:05:44 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.366 2020/03/14 18:24:10 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -224,23 +224,39 @@
 /*
  * Locking
  *
- * We have the following locks that we must contend with, listed in the
- * order that they must be acquired:
+ * We have the following locks that we must deal with, listed in the order
+ * that they are acquired:
+ *
+ * pg->uobject->vmobjlock, pg->uanon->an_lock
  *
- * - pg->uobject->vmobjlock, pg->uanon->an_lock
- *   These per-object locks are taken by the VM system before calling into
- *   the pmap module.  Holding them prevents concurrent operations on the
- *   given page or set of pages.
+ *     For managed pages, these per-object locks are taken by the VM system
+ *     before calling into the pmap module - either a read or write hold. 
+ *     The lock hold prevent pages from changing identity while the pmap is
+ *     operating on them.  For example, the same lock is held across a call
+ *     to pmap_remove() and the following call to pmap_update(), so that a
+ *     page does not gain a new identity while its TLB visibility is stale.
+ *
+ * pmap->pm_lock
  *
- * - pmap->pm_lock (per pmap)
- *   This lock protects the fields in the pmap structure including the
- *   non-kernel PDEs in the PDP, the PTEs, and the PVE radix tree.  For
- *   modifying kernel PTEs it is not required as kernel PDEs are never
- *   freed, and the kernel is expected to be self consistent.
+ *     This lock protects the fields in the pmap structure including the
+ *     non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data
+ *     structures.  For modifying unmanaged kernel PTEs it is not needed as
+ *     kernel PDEs are never freed, and the kernel is expected to be self
+ *     consistent (and the lock can't be taken for unmanaged kernel PTEs,
+ *     because they can be modified from interrupt context).
+ *
+ * pmaps_lock
  *
- * - pmaps_lock
- *   This lock protects the list of active pmaps (headed by "pmaps"). We
- *   lock it when adding or removing pmaps from this list.
+ *     This lock protects the list of active pmaps (headed by "pmaps"). 
+ *     It's acqired when adding or removing pmaps or adjusting kernel PDEs.
+ *
+ * pp_lock
+ *
+ *     This per-page lock protects PV entry lists and the embedded PV entry
+ *     in each vm_page, allowing for concurrent operation on pages by
+ *     different pmaps.  This is a spin mutex at IPL_VM, because at the
+ *     points it is taken context switching is usually not tolerable, and
+ *     spin mutexes must block out interrupts that could take kernel_lock.
  */
 
 /* uvm_object is abused here to index pmap_pages; make assertions happy. */
@@ -530,7 +546,7 @@
 {
 
        KASSERT(mutex_owned(&pp->pp_lock));
-       if ((pp->pp_pflags & PP_EMBEDDED) != 0) {
+       if (pp->pp_embedded) {
                return &pp->pp_pte;
        }
        return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
@@ -543,7 +559,7 @@
        KASSERT(mutex_owned(&pp->pp_lock));
        KASSERT(pvpte != NULL);
        if (pvpte == &pp->pp_pte) {
-               KASSERT((pp->pp_pflags & PP_EMBEDDED) != 0);
+               KASSERT(pp->pp_embedded);
                return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
        }
        return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
@@ -605,6 +621,82 @@
 }
 
 /*
+ * pmap_ptp_init: initialize new page table page
+ */
+static inline void
+pmap_ptp_init(struct vm_page *ptp)
+{
+       uint16_t *minidx, *maxidx;
+
+       minidx = (uint16_t *)&ptp->uanon;
+       maxidx = (uint16_t *)&ptp->uanon + 1;
+
+       *minidx = UINT16_MAX;
+       *maxidx = 0;
+       rb_tree_init(&VM_PAGE_TO_PP(ptp)->pp_rb, &pmap_rbtree_ops);
+}
+
+/*
+ * pmap_ptp_fini: finalize a page table page
+ */
+static inline void
+pmap_ptp_fini(struct vm_page *ptp)
+{
+
+       KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);
+       ptp->uanon = NULL;
+}
+
+/*
+ * pmap_ptp_range_set: abuse ptp->uanon to record min/max idx of PTE
+ */
+static inline void
+pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va)
+{
+       uint16_t *minidx, *maxidx;
+       u_int idx;
+
+       idx = pl1_pi(va);
+       KASSERT(idx < UINT16_MAX);
+       minidx = (uint16_t *)&ptp->uanon;
+       maxidx = (uint16_t *)&ptp->uanon + 1;
+
+       if (idx < *minidx) {
+               *minidx = idx;
+       }
+       if (idx > *maxidx) {
+               *maxidx = idx;
+       }
+}
+
+/*
+ * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove
+ */
+static inline void
+pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, vaddr_t *endva,
+    pt_entry_t **pte)
+{
+       vaddr_t sclip, eclip;
+
+       if (ptp == NULL) {
+               return;
+       }
+
+       sclip = (*startva & L2_FRAME) +
+           x86_ptob(*(uint16_t *)&ptp->uanon);
+       eclip = (*startva & L2_FRAME) +
+           x86_ptob(*((uint16_t *)&ptp->uanon + 1) + 1);
+
+       KASSERT(sclip < eclip);
+
+       sclip = (*startva < sclip ? sclip : *startva);
+       eclip = (*endva > eclip ? eclip : *endva);
+       *pte += (sclip - *startva) / PAGE_SIZE;
+       *startva = sclip;
+       *endva = eclip;
+}
+
+/*
  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
  *
  * there are several pmaps involved.  some or all of them might be same.
@@ -656,7 +748,9 @@
                 * often the case during exit(), when we have switched
                 * to the kernel pmap in order to destroy a user pmap.
                 */
-               pmap_reactivate(pmap);
+               if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) {
+                       pmap_reactivate(pmap);
+               }
                *pmap2 = NULL;
        } else {
                /*
@@ -1771,7 +1865,7 @@
         * The kernel doesn't keep track of PTPs, so there's nowhere handy
         * to hang a tree of pv_entry records.  Dynamically allocated
         * pv_entry lists are not heavily used in the kernel's pmap (the
-        * usual case is PP_EMBEDDED), so cop out and use a single RB tree
+        * usual case is embedded), so cop out and use a single RB tree
         * to cover them.
         */
        rb_tree_init(&pmap_kernel_rb, &pmap_rbtree_ops);
@@ -1857,28 +1951,6 @@
  * p v _ e n t r y   f u n c t i o n s



Home | Main Index | Thread Index | Old Index