Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/x86 x86 TLB shootdown IPI changes:



details:   https://anonhg.NetBSD.org/src/rev/20f9fdf0c2e5
branches:  trunk
changeset: 461292:20f9fdf0c2e5
user:      ad <ad%NetBSD.org@localhost>
date:      Thu Nov 21 21:48:33 2019 +0000

description:
x86 TLB shootdown IPI changes:

- Shave some time off processing.
- Reduce cacheline/bus traffic on systems with many CPUs.
- Reduce time spent at IPL_VM.

diffstat:

 sys/arch/x86/include/cpu.h |    6 +-
 sys/arch/x86/x86/x86_tlb.c |  312 +++++++++++++++++++++++++-------------------
 2 files changed, 184 insertions(+), 134 deletions(-)

diffs (truncated from 531 to 300 lines):

diff -r 385f4f0eb8e2 -r 20f9fdf0c2e5 sys/arch/x86/include/cpu.h
--- a/sys/arch/x86/include/cpu.h        Thu Nov 21 21:45:34 2019 +0000
+++ b/sys/arch/x86/include/cpu.h        Thu Nov 21 21:48:33 2019 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: cpu.h,v 1.111 2019/11/21 19:57:24 ad Exp $     */
+/*     $NetBSD: cpu.h,v 1.112 2019/11/21 21:48:33 ad Exp $     */
 
 /*
  * Copyright (c) 1990 The Regents of the University of California.
@@ -76,6 +76,7 @@
 
 struct intrsource;
 struct pmap;
+struct kcpuset;
 
 #ifdef __x86_64__
 #define        i386tss x86_64_tss
@@ -135,7 +136,8 @@
        int ci_curldt;          /* current LDT descriptor */
        int ci_nintrhand;       /* number of H/W interrupt handlers */
        uint64_t ci_scratch;
-       uintptr_t ci_pmap_data[128 / sizeof(uintptr_t)];
+       uintptr_t ci_pmap_data[64 / sizeof(uintptr_t)];
+       struct kcpuset *ci_tlb_cpuset;
 
 #ifndef XENPV
        struct intrsource *ci_isources[MAX_INTR_SOURCES];
diff -r 385f4f0eb8e2 -r 20f9fdf0c2e5 sys/arch/x86/x86/x86_tlb.c
--- a/sys/arch/x86/x86/x86_tlb.c        Thu Nov 21 21:45:34 2019 +0000
+++ b/sys/arch/x86/x86/x86_tlb.c        Thu Nov 21 21:48:33 2019 +0000
@@ -1,7 +1,7 @@
-/*     $NetBSD: x86_tlb.c,v 1.8 2019/05/27 17:32:36 maxv Exp $ */
+/*     $NetBSD: x86_tlb.c,v 1.9 2019/11/21 21:48:33 ad Exp $   */
 
 /*-
- * Copyright (c) 2008-2012 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008-2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -40,7 +40,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.8 2019/05/27 17:32:36 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.9 2019/11/21 21:48:33 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -59,22 +59,33 @@
 #include <x86/i82489var.h>
 
 /*
- * TLB shootdown structures.
+ * TLB shootdown packet.  Each CPU has a copy of this packet, where we build
+ * sets of TLB shootdowns.  If shootdowns need to occur on remote CPUs, the
+ * packet is copied into a shared mailbox kept on the initiator's kernel
+ * stack.  Once the copy is made, no further updates to the mailbox are made
+ * until the request is completed.  This keeps the cache line in the shared
+ * state, and bus traffic to a minimum.
+ *
+ * On i386 the packet is 28 bytes in size.  On amd64 it's 52 bytes.
  */
-
 typedef struct {
-#ifdef _LP64
-       uintptr_t               tp_va[14];      /* whole struct: 128 bytes */
-#else
-       uintptr_t               tp_va[13];      /* whole struct: 64 bytes */
-#endif
-       uint16_t                tp_count;
-       uint16_t                tp_pte;
-       int                     tp_userpmap;
-       kcpuset_t *             tp_cpumask;
+       uintptr_t               tp_va[6];
+       uint8_t                 tp_count;
+       uint8_t                 tp_userpmap;
+       uint8_t                 tp_global;
+       uint8_t                 tp_done;
 } pmap_tlb_packet_t;
 
 /*
+ * Padded packet stored on the initiator's stack.
+ */
+typedef struct {
+       uint8_t                 ts_pad1[COHERENCY_UNIT];
+       pmap_tlb_packet_t       ts_tp;
+       uint8_t                 ts_pad2[COHERENCY_UNIT];
+} pmap_tlb_stackbuf_t;
+
+/*
  * No more than N separate invlpg.
  *
  * Statistically, a value of six is big enough to cover the requested number
@@ -82,14 +93,14 @@
  * reach the limit, and increasing it can actually reduce the performance due
  * to the high cost of invlpg.
  */
-#define        TP_MAXVA                6
+#define        TP_MAXVA                6       /* for individual mappings */
+#define        TP_ALLVA                255     /* special: shoot all mappings */
 
 /*
  * TLB shootdown state.
  */
-static pmap_tlb_packet_t       pmap_tlb_packet         __cacheline_aligned;
+static volatile pmap_tlb_packet_t * volatile pmap_tlb_packet __cacheline_aligned;
 static volatile u_int          pmap_tlb_pendcount      __cacheline_aligned;
-static volatile u_int          pmap_tlb_gen            __cacheline_aligned;
 static struct evcnt            pmap_tlb_evcnt          __cacheline_aligned;
 
 /*
@@ -123,9 +134,7 @@
 pmap_tlb_init(void)
 {
 
-       memset(&pmap_tlb_packet, 0, sizeof(pmap_tlb_packet_t));
-       pmap_tlb_pendcount = 0;
-       pmap_tlb_gen = 0;
+       KASSERT(__arraycount(pmap_tlb_packet->tp_va) >= TP_MAXVA);
 
        evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR,
            NULL, "TLB", "shootdown");
@@ -158,7 +167,7 @@
        pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
 
        memset(tp, 0, sizeof(pmap_tlb_packet_t));
-       kcpuset_create(&tp->tp_cpumask, true);
+       kcpuset_create(&ci->ci_tlb_cpuset, true);
 }
 
 static inline void
@@ -193,13 +202,13 @@
 }
 
 static inline void
-pmap_tlb_invalidate(const pmap_tlb_packet_t *tp)
+pmap_tlb_invalidate(volatile pmap_tlb_packet_t *tp)
 {
-       int i;
+       int i = tp->tp_count;
 
        /* Find out what we need to invalidate. */
-       if (tp->tp_count == (uint16_t)-1) {
-               if (tp->tp_pte & PTE_G) {
+       if (i == TP_ALLVA) {
+               if (tp->tp_global) {
                        /* Invalidating all TLB entries. */
                        tlbflushg();
                } else {
@@ -208,9 +217,10 @@
                }
        } else {
                /* Invalidating a single page or a range of pages. */
-               for (i = tp->tp_count - 1; i >= 0; i--) {
-                       pmap_update_pg(tp->tp_va[i]);
-               }
+               KASSERT(i != 0);
+               do {
+                       pmap_update_pg(tp->tp_va[--i]);
+               } while (i > 0);
        }
 }
 
@@ -221,6 +231,8 @@
 pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why)
 {
        pmap_tlb_packet_t *tp;
+       struct cpu_info *ci;
+       uint8_t count;
        int s;
 
 #ifndef XENPV
@@ -248,63 +260,65 @@
         * Add the shootdown operation to our pending set.
         */
        s = splvm();
-       tp = (pmap_tlb_packet_t *)curcpu()->ci_pmap_data;
+       ci = curcpu();
+       tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
 
        /* Whole address flush will be needed if PTE_G is set. */
        CTASSERT(PTE_G == (uint16_t)PTE_G);
-       tp->tp_pte |= (uint16_t)pte;
+       tp->tp_global |= ((pte & PTE_G) != 0);;
+       count = tp->tp_count;
 
-       if (tp->tp_count == (uint16_t)-1) {
-               /*
-                * Already flushing everything.
-                */
-       } else if (tp->tp_count < TP_MAXVA && va != (vaddr_t)-1LL) {
+       if (count < TP_MAXVA && va != (vaddr_t)-1LL) {
                /* Flush a single page. */
-               tp->tp_va[tp->tp_count++] = va;
-               KASSERT(tp->tp_count > 0);
+               tp->tp_va[count] = va;
+               tp->tp_count = count + 1;
        } else {
-               /* Flush everything. */
-               tp->tp_count = (uint16_t)-1;
+               /* Flush everything - may already be set. */
+               tp->tp_count = TP_ALLVA;
        }
 
        if (pm != pmap_kernel()) {
-               kcpuset_merge(tp->tp_cpumask, pm->pm_cpus);
+               kcpuset_merge(ci->ci_tlb_cpuset, pm->pm_cpus);
                if (va >= VM_MAXUSER_ADDRESS) {
-                       kcpuset_merge(tp->tp_cpumask, pm->pm_kernel_cpus);
+                       kcpuset_merge(ci->ci_tlb_cpuset, pm->pm_kernel_cpus);
                }
                tp->tp_userpmap = 1;
        } else {
-               kcpuset_copy(tp->tp_cpumask, kcpuset_running);
+               kcpuset_copy(ci->ci_tlb_cpuset, kcpuset_running);
        }
        pmap_tlbstat_count(pm, va, why);
        splx(s);
 }
 
-#ifdef MULTIPROCESSOR
 #ifdef XENPV
 
 static inline void
-pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
+pmap_tlb_processpacket(volatile pmap_tlb_packet_t *tp, kcpuset_t *target)
 {
+#ifdef MULTIPROCESSOR
+       int i = tp->tp_count;
 
-       if (tp->tp_count != (uint16_t)-1) {
+       if (i != TP_ALLVA) {
                /* Invalidating a single page or a range of pages. */
-               for (int i = tp->tp_count - 1; i >= 0; i--) {
-                       xen_mcast_invlpg(tp->tp_va[i], target);
-               }
+               KASSERT(i != 0);
+               do {
+                       xen_mcast_invlpg(tp->tp_va[--i], target);
+               } while (i > 0);
        } else {
                xen_mcast_tlbflush(target);
        }
 
        /* Remote CPUs have been synchronously flushed. */
        pmap_tlb_pendcount = 0;
+#endif /* MULTIPROCESSOR */
 }
 
 #else
 
 static inline void
-pmap_tlb_processpacket(pmap_tlb_packet_t *tp, kcpuset_t *target)
+pmap_tlb_processpacket(volatile pmap_tlb_packet_t *tp, kcpuset_t *target)
 {
+#ifdef MULTIPROCESSOR
        int err = 0;
 
        if (!kcpuset_match(target, kcpuset_attached)) {
@@ -327,10 +341,10 @@
                    LAPIC_DLMODE_FIXED);
        }
        KASSERT(err == 0);
+#endif /* MULTIPROCESSOR */
 }
 
 #endif /* XENPV */
-#endif /* MULTIPROCESSOR */
 
 /*
  * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU.
@@ -340,142 +354,176 @@
 void
 pmap_tlb_shootnow(void)
 {
-       pmap_tlb_packet_t *tp;
+       volatile pmap_tlb_packet_t *tp;
+       volatile pmap_tlb_stackbuf_t ts;
        struct cpu_info *ci;
        kcpuset_t *target;
-       u_int local, gen, rcpucount;
+       u_int local, rcpucount;
        cpuid_t cid;
        int s;
 
        KASSERT(kpreempt_disabled());
 
+       /* Pre-check first. */
        ci = curcpu();
        tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
-
-       /* Pre-check first. */
        if (tp->tp_count == 0) {
                return;
        }
 
+       /* An interrupt may have flushed our updates, so check again. */
        s = splvm();
        if (tp->tp_count == 0) {



Home | Main Index | Thread Index | Old Index