Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/x86 Hallelujah, the bug has been found. Resurrect ...



details:   https://anonhg.NetBSD.org/src/rev/87732dba0a12
branches:  trunk
changeset: 849885:87732dba0a12
user:      ad <ad%NetBSD.org@localhost>
date:      Tue Mar 17 22:29:19 2020 +0000

description:
Hallelujah, the bug has been found.  Resurrect prior changes, to be fixed
with following commit.

diffstat:

 sys/arch/x86/include/pmap.h    |    6 +-
 sys/arch/x86/include/pmap_pv.h |   19 +-
 sys/arch/x86/x86/pmap.c        |  941 ++++++++++++++++++++++++++++------------
 3 files changed, 673 insertions(+), 293 deletions(-)

diffs (truncated from 1619 to 300 lines):

diff -r e2ef5c2563ea -r 87732dba0a12 sys/arch/x86/include/pmap.h
--- a/sys/arch/x86/include/pmap.h       Tue Mar 17 22:20:48 2020 +0000
+++ b/sys/arch/x86/include/pmap.h       Tue Mar 17 22:29:19 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.h,v 1.114 2020/03/17 21:02:56 ad Exp $    */
+/*     $NetBSD: pmap.h,v 1.115 2020/03/17 22:29:19 ad Exp $    */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -248,6 +248,8 @@
  * (the other object locks are only used when uvm_pagealloc is called)
  */
 
+struct pv_page;
+
 struct pmap {
        struct uvm_object pm_obj[PTP_LEVELS-1];/* objects for lvl >= 1) */
        LIST_ENTRY(pmap) pm_list;       /* list of all pmaps */
@@ -256,11 +258,11 @@
        struct vm_page *pm_ptphint[PTP_LEVELS-1];
                                        /* pointer to a PTP in our pmap */
        struct pmap_statistics pm_stats;  /* pmap stats */
+       struct pv_entry *pm_pve;        /* spare pv_entry */
 
 #if !defined(__x86_64__)
        vaddr_t pm_hiexec;              /* highest executable mapping */
 #endif /* !defined(__x86_64__) */
-       struct lwp *pm_remove_all;      /* who's emptying the pmap */
 
        union descriptor *pm_ldt;       /* user-set LDT */
        size_t pm_ldt_len;              /* size of LDT in bytes */
diff -r e2ef5c2563ea -r 87732dba0a12 sys/arch/x86/include/pmap_pv.h
--- a/sys/arch/x86/include/pmap_pv.h    Tue Mar 17 22:20:48 2020 +0000
+++ b/sys/arch/x86/include/pmap_pv.h    Tue Mar 17 22:29:19 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap_pv.h,v 1.16 2020/03/17 21:02:56 ad Exp $  */
+/*     $NetBSD: pmap_pv.h,v 1.17 2020/03/17 22:29:19 ad Exp $  */
 
 /*-
  * Copyright (c)2008 YAMAMOTO Takashi,
@@ -34,6 +34,7 @@
 #include <sys/rbtree.h>
 
 struct vm_page;
+struct pmap_page;
 
 /*
  * structures to track P->V mapping
@@ -51,14 +52,14 @@
 };
 
 /*
- * pv_entry: plug pv_pte into lists.
+ * pv_entry: plug pv_pte into lists.  32 bytes on i386, 64 on amd64.
  */
 
 struct pv_entry {
        struct pv_pte pve_pte;          /* should be the first member */
        LIST_ENTRY(pv_entry) pve_list;  /* on pmap_page::pp_pvlist */
        rb_node_t pve_rb;               /* red-black tree node */
-       uintptr_t pve_padding;          /* unused */
+       struct pmap_page *pve_pp;       /* backpointer to mapped page */
 };
 #define        pve_next        pve_list.le_next
 
@@ -71,16 +72,13 @@
                /* PTPs */
                rb_tree_t rb;
 
-               /* PTPs */
+               /* PTPs, when being freed */
                LIST_ENTRY(vm_page) link;
 
-               /* Non-PTPs */
+               /* Non-PTPs (i.e. normal pages) */
                struct {
-                       /* PP_EMBEDDED */
                        struct pv_pte pte;
-
                        LIST_HEAD(, pv_entry) pvlist;
-                       uint8_t flags;
                        uint8_t attrs;
                } s;
        } pp_u;
@@ -89,7 +87,6 @@
 #define        pp_link         pp_u.link
 #define        pp_pte          pp_u.s.pte
 #define pp_pvlist      pp_u.s.pvlist
-#define        pp_pflags       pp_u.s.flags
 #define        pp_attrs        pp_u.s.attrs
 };
 
@@ -97,10 +94,6 @@
 #define PP_ATTRS_A     0x02    /* Accessed */
 #define PP_ATTRS_W     0x04    /* Writable */
 
-/* pp_flags */
-#define        PP_EMBEDDED     1
-#define        PP_FREEING      2
-
 #define        PMAP_PAGE_INIT(pp) \
 do { \
        LIST_INIT(&(pp)->pp_pvlist); \
diff -r e2ef5c2563ea -r 87732dba0a12 sys/arch/x86/x86/pmap.c
--- a/sys/arch/x86/x86/pmap.c   Tue Mar 17 22:20:48 2020 +0000
+++ b/sys/arch/x86/x86/pmap.c   Tue Mar 17 22:29:19 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.373 2020/03/17 21:02:56 ad Exp $    */
+/*     $NetBSD: pmap.c,v 1.374 2020/03/17 22:29:19 ad Exp $    */
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.373 2020/03/17 21:02:56 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.374 2020/03/17 22:29:19 ad Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -139,6 +139,8 @@
 #include "opt_svs.h"
 #include "opt_kaslr.h"
 
+#define        __MUTEX_PRIVATE /* for assertions */
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
@@ -224,23 +226,39 @@
 /*
  * Locking
  *
- * We have the following locks that we must contend with, listed in the
- * order that they must be acquired:
+ * We have the following locks that we must deal with, listed in the order
+ * that they are acquired:
+ *
+ * pg->uobject->vmobjlock, pg->uanon->an_lock
  *
- * - pg->uobject->vmobjlock, pg->uanon->an_lock
- *   These per-object locks are taken by the VM system before calling into
- *   the pmap module.  Holding them prevents concurrent operations on the
- *   given page or set of pages.
+ *     For managed pages, these per-object locks are taken by the VM system
+ *     before calling into the pmap module - either a read or write hold. 
+ *     The lock hold prevent pages from changing identity while the pmap is
+ *     operating on them.  For example, the same lock is held across a call
+ *     to pmap_remove() and the following call to pmap_update(), so that a
+ *     page does not gain a new identity while its TLB visibility is stale.
+ *
+ * pmap->pm_lock
  *
- * - pmap->pm_lock (per pmap)
- *   This lock protects the fields in the pmap structure including the
- *   non-kernel PDEs in the PDP, the PTEs, and the PVE radix tree.  For
- *   modifying kernel PTEs it is not required as kernel PDEs are never
- *   freed, and the kernel is expected to be self consistent.
+ *     This lock protects the fields in the pmap structure including the
+ *     non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data
+ *     structures.  For modifying unmanaged kernel PTEs it is not needed as
+ *     kernel PDEs are never freed, and the kernel is expected to be self
+ *     consistent (and the lock can't be taken for unmanaged kernel PTEs,
+ *     because they can be modified from interrupt context).
+ *
+ * pmaps_lock
  *
- * - pmaps_lock
- *   This lock protects the list of active pmaps (headed by "pmaps"). We
- *   lock it when adding or removing pmaps from this list.
+ *     This lock protects the list of active pmaps (headed by "pmaps"). 
+ *     It's acqired when adding or removing pmaps or adjusting kernel PDEs.
+ *
+ * pp_lock
+ *
+ *     This per-page lock protects PV entry lists and the embedded PV entry
+ *     in each vm_page, allowing for concurrent operation on pages by
+ *     different pmaps.  This is a spin mutex at IPL_VM, because at the
+ *     points it is taken context switching is usually not tolerable, and
+ *     spin mutexes must block out interrupts that could take kernel_lock.
  */
 
 /* uvm_object is abused here to index pmap_pages; make assertions happy. */
@@ -317,6 +335,8 @@
 #endif
 
 #define        VM_PAGE_TO_PP(pg)       (&(pg)->mdpage.mp_pp)
+#define        PMAP_CHECK_PP(pp) \
+    KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)
 
 /*
  * Other data structures
@@ -523,6 +543,17 @@
 }
 
 /*
+ * Return true if the pmap page has an embedded PV entry.
+ */
+static inline bool
+pv_pte_embedded(struct pmap_page *pp)
+{
+
+       KASSERT(mutex_owned(&pp->pp_lock));
+       return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va);
+}
+
+/*
  * pv_pte_first, pv_pte_next: PV list iterator.
  */
 static struct pv_pte *
@@ -530,7 +561,7 @@
 {
 
        KASSERT(mutex_owned(&pp->pp_lock));
-       if ((pp->pp_pflags & PP_EMBEDDED) != 0) {
+       if (pv_pte_embedded(pp)) {
                return &pp->pp_pte;
        }
        return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
@@ -543,7 +574,6 @@
        KASSERT(mutex_owned(&pp->pp_lock));
        KASSERT(pvpte != NULL);
        if (pvpte == &pp->pp_pte) {
-               KASSERT((pp->pp_pflags & PP_EMBEDDED) != 0);
                return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
        }
        return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
@@ -605,6 +635,61 @@
 }
 
 /*
+ * pmap_ptp_init: initialize new page table page
+ */
+static inline void
+pmap_ptp_init(struct vm_page *ptp)
+{
+
+       ptp->uanon = (struct vm_anon *)(vaddr_t)~0L;
+       rb_tree_init(&VM_PAGE_TO_PP(ptp)->pp_rb, &pmap_rbtree_ops);
+       PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
+}
+
+/*
+ * pmap_ptp_fini: finalize a page table page
+ */
+static inline void
+pmap_ptp_fini(struct vm_page *ptp)
+{
+
+       KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);
+       PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));
+       ptp->uanon = NULL;
+}
+
+/*
+ * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE
+ */
+static inline void
+pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va)
+{
+       vaddr_t *min = (vaddr_t *)&ptp->uanon;
+
+       if (va < *min) {
+               *min = va;
+       }
+}
+
+/*
+ * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove
+ */
+static inline void
+pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte)
+{
+       vaddr_t sclip;
+
+       if (ptp == NULL) {
+               return;
+       }
+
+       sclip = (vaddr_t)ptp->uanon;
+       sclip = (*startva < sclip ? sclip : *startva);
+       *pte += (sclip - *startva) / PAGE_SIZE;
+       *startva = sclip;
+}
+
+/*
  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
  *
  * there are several pmaps involved.  some or all of them might be same.
@@ -656,7 +741,9 @@
                 * often the case during exit(), when we have switched
                 * to the kernel pmap in order to destroy a user pmap.
                 */
-               pmap_reactivate(pmap);
+               if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) {
+                       pmap_reactivate(pmap);
+               }
                *pmap2 = NULL;
        } else {



Home | Main Index | Thread Index | Old Index