Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch Enable caching on kernel and user page tables. Thi...



details:   https://anonhg.NetBSD.org/src/rev/99d2059721e8
branches:  trunk
changeset: 535633:99d2059721e8
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Sat Aug 24 02:16:30 2002 +0000

description:
Enable caching on kernel and user page tables.  This saves having
to do uncached memory access during VM operations (which can be
quite expensive on some CPUs).

We currently write-back PTEs as soon as they're modified; there is
some room for optimization (to write them back in larger chunks).
For PTEs in the APTE space (i.e. PTEs for pmaps that describe another
process's address space), PTEs must also be evicted from the cache
complete (PTEs in PTE space will be evicted durint a context switch).

diffstat:

 sys/arch/acorn32/acorn32/rpc_machdep.c           |   12 +-
 sys/arch/arm/arm32/pmap.c                        |  140 +++++++++++++++++++---
 sys/arch/arm/include/arm32/pmap.h                |   12 +-
 sys/arch/cats/cats/cats_machdep.c                |    8 +-
 sys/arch/evbarm/integrator/integrator_machdep.c  |    8 +-
 sys/arch/evbarm/iq80310/iq80310_machdep.c        |    8 +-
 sys/arch/evbarm/iq80321/iq80321_machdep.c        |    8 +-
 sys/arch/evbarm/ixm1200/ixm1200_machdep.c        |    8 +-
 sys/arch/hpcarm/hpcarm/hpc_machdep.c             |   10 +-
 sys/arch/netwinder/netwinder/netwinder_machdep.c |    8 +-
 sys/arch/shark/ofw/ofw.c                         |   12 +-
 11 files changed, 163 insertions(+), 71 deletions(-)

diffs (truncated from 799 to 300 lines):

diff -r f780d23e51cb -r 99d2059721e8 sys/arch/acorn32/acorn32/rpc_machdep.c
--- a/sys/arch/acorn32/acorn32/rpc_machdep.c    Sat Aug 24 01:05:14 2002 +0000
+++ b/sys/arch/acorn32/acorn32/rpc_machdep.c    Sat Aug 24 02:16:30 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rpc_machdep.c,v 1.40 2002/08/22 01:13:53 thorpej Exp $ */
+/*     $NetBSD: rpc_machdep.c,v 1.41 2002/08/24 02:16:30 thorpej Exp $ */
 
 /*
  * Copyright (c) 2000-2001 Reinoud Zandijk.
@@ -55,7 +55,7 @@
 
 #include <sys/param.h>
 
-__KERNEL_RCSID(0, "$NetBSD: rpc_machdep.c,v 1.40 2002/08/22 01:13:53 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rpc_machdep.c,v 1.41 2002/08/24 02:16:30 thorpej Exp $");
 
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -777,24 +777,24 @@
        pmap_map_entry(l1pagetable,
            PTE_BASE + (KERNEL_BASE >> (PGSHIFT-2)),
            kernel_pt_table[KERNEL_PT_KERNEL].pv_pa, VM_PROT_READ|VM_PROT_WRITE,
-           PTE_NOCACHE);
+           PTE_CACHE);
        pmap_map_entry(l1pagetable,
            PTE_BASE + (PTE_BASE >> (PGSHIFT-2)),
            kernel_ptpt.pv_pa, VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE);
        pmap_map_entry(l1pagetable,
            PTE_BASE + (VMEM_VBASE >> (PGSHIFT-2)),
            kernel_pt_table[KERNEL_PT_VMEM].pv_pa, VM_PROT_READ|VM_PROT_WRITE,
-           PTE_NOCACHE);
+           PTE_CACHE);
        pmap_map_entry(l1pagetable,
            PTE_BASE+ (0x00000000 >> (PGSHIFT-2)),
            kernel_pt_table[KERNEL_PT_SYS].pv_pa, VM_PROT_READ|VM_PROT_WRITE,
-           PTE_NOCACHE);
+           PTE_CACHE);
        for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; ++loop) {
                pmap_map_entry(l1pagetable,
                    PTE_BASE + ((KERNEL_VM_BASE +
                    (loop * 0x00400000)) >> (PGSHIFT-2)),
                    kernel_pt_table[KERNEL_PT_VMDATA + loop].pv_pa,
-                   VM_PROT_READ|VM_PROT_WRITE, PTE_NOCACHE);
+                   VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
        }
 
        /* Map the vector page. */
diff -r f780d23e51cb -r 99d2059721e8 sys/arch/arm/arm32/pmap.c
--- a/sys/arch/arm/arm32/pmap.c Sat Aug 24 01:05:14 2002 +0000
+++ b/sys/arch/arm/arm32/pmap.c Sat Aug 24 02:16:30 2002 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pmap.c,v 1.112 2002/08/22 01:13:55 thorpej Exp $       */
+/*     $NetBSD: pmap.c,v 1.113 2002/08/24 02:16:31 thorpej Exp $       */
 
 /*
  * Copyright (c) 2002 Wasabi Systems, Inc.
@@ -143,7 +143,7 @@
 #include <machine/param.h>
 #include <arm/arm32/katelib.h>
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.112 2002/08/22 01:13:55 thorpej Exp $");        
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.113 2002/08/24 02:16:31 thorpej Exp $");        
 #ifdef PMAP_DEBUG
 #define        PDEBUG(_lev_,_stat_) \
        if (pmap_debug_level >= (_lev_)) \
@@ -340,7 +340,7 @@
 
 struct l1pt *pmap_alloc_l1pt __P((void));
 static __inline void pmap_map_in_l1 __P((struct pmap *pmap, vaddr_t va,
-     vaddr_t l2pa, boolean_t));
+     vaddr_t l2pa, int));
 
 static pt_entry_t *pmap_map_ptes __P((struct pmap *));
 static void pmap_unmap_ptes __P((struct pmap *));
@@ -421,6 +421,33 @@
 }
 
 /*
+ * PTE_SYNC_CURRENT:
+ *
+ *     Make sure the pte is flushed to RAM.  If the pmap is
+ *     not the current pmap, then also evict the pte from
+ *     any cache lines.
+ */
+#define        PTE_SYNC_CURRENT(pmap, pte)                                     \
+do {                                                                   \
+       if (pmap_is_curpmap(pmap))                                      \
+               PTE_SYNC(pte);                                          \
+       else                                                            \
+               PTE_FLUSH(pte);                                         \
+} while (/*CONSTCOND*/0)
+
+/*
+ * PTE_FLUSH_ALT:
+ *
+ *     Make sure the pte is not in any cache lines.  We expect
+ *     this to be used only when a pte has not been modified.
+ */
+#define        PTE_FLUSH_ALT(pmap, pte)                                        \
+do {                                                                   \
+       if (pmap_is_curpmap(pmap) == 0)                                 \
+               PTE_FLUSH(pte);                                         \
+} while (/*CONSTCOND*/0)
+
+/*
  * p v _ e n t r y   f u n c t i o n s
  */
 
@@ -903,8 +930,11 @@
  * the given pmap to cover a chunk of virtual address space starting from the
  * address specified.
  */
+#define        PMAP_PTP_SELFREF        0x01
+#define        PMAP_PTP_CACHEABLE      0x02
+
 static __inline void
-pmap_map_in_l1(struct pmap *pmap, vaddr_t va, paddr_t l2pa, boolean_t selfref)
+pmap_map_in_l1(struct pmap *pmap, vaddr_t va, paddr_t l2pa, int flags)
 {
        vaddr_t ptva;
 
@@ -919,9 +949,12 @@
        cpu_dcache_wb_range((vaddr_t) &pmap->pm_pdir[ptva + 0], 16);
 
        /* Map the page table into the page table area. */
-       if (selfref)
+       if (flags & PMAP_PTP_SELFREF) {
                *((pt_entry_t *)(pmap->pm_vptpt + ptva)) = L2_S_PROTO | l2pa |
-                   L2_S_PROT(PTE_KERNEL, VM_PROT_READ|VM_PROT_WRITE);
+                   L2_S_PROT(PTE_KERNEL, VM_PROT_READ|VM_PROT_WRITE) |
+                   ((flags & PMAP_PTP_CACHEABLE) ? pte_l2_s_cache_mode : 0);
+               PTE_SYNC_CURRENT(pmap, (pt_entry_t *)(pmap->pm_vptpt + ptva));
+       }
 }
 
 #if 0
@@ -942,6 +975,7 @@
 
        /* Unmap the page table from the page table area. */
        *((pt_entry_t *)(pmap->pm_vptpt + ptva)) = 0;
+       PTE_SYNC_CURRENT(pmap, (pt_entry_t *)(pmap->pm_vptpt + ptva));
 }
 #endif
 
@@ -1446,7 +1480,7 @@
            (L1_TABLE_SIZE - KERNEL_PD_SIZE), KERNEL_PD_SIZE);
 
        /* Wire in this page table */
-       pmap_map_in_l1(pmap, PTE_BASE, pmap->pm_pptpt, TRUE);
+       pmap_map_in_l1(pmap, PTE_BASE, pmap->pm_pptpt, PMAP_PTP_SELFREF);
 
        pt->pt_flags &= ~PTFLAG_CLEAN;  /* L1 is dirty now */
 
@@ -1600,6 +1634,9 @@
        simple_lock(&pmap->pm_obj.vmobjlock);
        while ((page = TAILQ_FIRST(&pmap->pm_obj.memq)) != NULL) {
                KASSERT((page->flags & PG_BUSY) == 0);
+               /* XXXJRT Clean this up. */
+               cpu_dcache_inv_range(trunc_page((vaddr_t)vtopte(page->offset)),
+                   PAGE_SIZE);
                page->wire_count = 0;
                uvm_pagefree(page);
        }
@@ -1796,6 +1833,7 @@
         */
        *cdst_pte = L2_S_PROTO | phys |
            L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+       PTE_SYNC(cdst_pte);
        cpu_tlb_flushD_SE(cdstp);
        cpu_cpwait();
        bzero_page(cdstp);
@@ -1823,6 +1861,7 @@
        *cdst_pte = L2_S_PROTO | phys |
            L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
            L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
+       PTE_SYNC(cdst_pte);
        cpu_tlb_flushD_SE(cdstp);
        cpu_cpwait();
        bzero_page(cdstp);
@@ -1857,6 +1896,7 @@
         */
        *cdst_pte = L2_S_PROTO | phys |
            L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+       PTE_SYNC(cdst_pte);
        cpu_tlb_flushD_SE(cdstp);
        cpu_cpwait();
 
@@ -1921,8 +1961,10 @@
         */
        *csrc_pte = L2_S_PROTO | src |
            L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
+       PTE_SYNC(csrc_pte);
        *cdst_pte = L2_S_PROTO | dst |
            L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
+       PTE_SYNC(cdst_pte);
        cpu_tlb_flushD_SE(csrcp);
        cpu_tlb_flushD_SE(cdstp);
        cpu_cpwait();
@@ -1964,9 +2006,11 @@
        *csrc_pte = L2_S_PROTO | src |
            L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
            L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
+       PTE_SYNC(csrc_pte);
        *cdst_pte = L2_S_PROTO | dst |
            L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
            L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
+       PTE_SYNC(cdst_pte);
        cpu_tlb_flushD_SE(csrcp);
        cpu_tlb_flushD_SE(cdstp);
        cpu_cpwait();
@@ -2232,6 +2276,8 @@
                            || kpmap == npv->pv_pmap) && 
                            (npv->pv_flags & PVF_NC) == 0) {
                                ptes[arm_btop(npv->pv_va)] &= ~L2_S_CACHE_MASK;
+                               PTE_SYNC_CURRENT(pmap,
+                                   &ptes[arm_btop(npv->pv_va)]);
                                npv->pv_flags |= PVF_NC;
                                /*
                                 * If this page needs flushing from the
@@ -2265,6 +2311,8 @@
                            (npv->pv_flags & PVF_NC)) {
                                ptes[arm_btop(npv->pv_va)] |=
                                    pte_l2_s_cache_mode;
+                               PTE_SYNC_CURRENT(pmap,
+                                   &ptes[arm_btop(npv->pv_va)]);
                                npv->pv_flags &= ~PVF_NC;
                        }
                }
@@ -2331,12 +2379,13 @@
        /* Now loop along */
        while (sva < eva) {
                /* Check if we can move to the next PDE (l1 chunk) */
-               if (!(sva & L2_ADDR_BITS))
+               if ((sva & L2_ADDR_BITS) == 0) {
                        if (!pmap_pde_page(pmap_pde(pmap, sva))) {
                                sva += L1_S_SIZE;
                                pte += arm_btop(L1_S_SIZE);
                                continue;
                        }
+               }
 
                /* We've found a valid PTE, so this page of PTEs has to go. */
                if (pmap_pte_v(pte)) {
@@ -2374,11 +2423,22 @@
                                 * Roll back the previous PTE list,
                                 * and zero out the current PTE.
                                 */
-                               for (cnt = 0; cnt < PMAP_REMOVE_CLEAN_LIST_SIZE; cnt++) {
+                               for (cnt = 0;
+                                    cnt < PMAP_REMOVE_CLEAN_LIST_SIZE;
+                                    cnt++) {
                                        *cleanlist[cnt].pte = 0;
-                                       pmap_pte_delref(pmap, cleanlist[cnt].va);
+                                       if (pmap_active)
+                                               PTE_SYNC(cleanlist[cnt].pte);
+                                       else
+                                               PTE_FLUSH(cleanlist[cnt].pte);
+                                       pmap_pte_delref(pmap,
+                                           cleanlist[cnt].va);
                                }
                                *pte = 0;
+                               if (pmap_active)
+                                       PTE_SYNC(pte);
+                               else
+                                       PTE_FLUSH(pte);
                                pmap_pte_delref(pmap, sva);
                                cleanlist_idx++;
                        } else {
@@ -2388,6 +2448,10 @@
                                 * and we won't need to do it again
                                 */
                                *pte = 0;
+                               if (pmap_active)
+                                       PTE_SYNC(pte);
+                               else
+                                       PTE_FLUSH(pte);
                                pmap_pte_delref(pmap, sva);
                        }
 
@@ -2404,7 +2468,8 @@
                                pmap_vac_me_harder(pmap, pg, ptes, FALSE);
                                simple_unlock(&pg->mdpage.pvh_slock);
                        }
-               }
+               } else if (pmap_active == 0)
+                       PTE_FLUSH(pte);
                sva += NBPG;
                pte++;
        }
@@ -2422,8 +2487,11 @@
                                    NBPG);
                                *cleanlist[cnt].pte = 0;
                                cpu_tlb_flushID_SE(cleanlist[cnt].va);
-                       } else
+                               PTE_SYNC(cleanlist[cnt].pte);
+                       } else {
                                *cleanlist[cnt].pte = 0;
+                               PTE_FLUSH(cleanlist[cnt].pte);
+                       }
                        pmap_pte_delref(pmap, cleanlist[cnt].va);



Home | Main Index | Thread Index | Old Index