Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/alpha/alpha Move the optimized pmap_zero_page() fro...



details:   https://anonhg.NetBSD.org/src/rev/f5fe95230766
branches:  trunk
changeset: 984521:f5fe95230766
user:      thorpej <thorpej%NetBSD.org@localhost>
date:      Sat Jul 10 20:22:37 2021 +0000

description:
Move the optimized pmap_zero_page() from pmap.c to a new pmap_subr.s,
and optimize it a teeny bit little more.  Provide an optimized (for 21164,
anyway) pmap_copy_page() as well.

diffstat:

 sys/arch/alpha/alpha/pmap.c      |   86 +-------------------
 sys/arch/alpha/alpha/pmap_subr.s |  165 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+), 81 deletions(-)

diffs (276 lines):

diff -r d3cbbebc26c6 -r f5fe95230766 sys/arch/alpha/alpha/pmap.c
--- a/sys/arch/alpha/alpha/pmap.c       Sat Jul 10 19:30:19 2021 +0000
+++ b/sys/arch/alpha/alpha/pmap.c       Sat Jul 10 20:22:37 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $ */
+/* $NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $ */
 
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020
@@ -135,7 +135,7 @@
 
 #include <sys/cdefs.h>                 /* RCS ID & Copyright macro defns */
 
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -2758,85 +2758,9 @@
        pmap_destroy(pmap);
 }
 
-/*
- * pmap_zero_page:             [ INTERFACE ]
- *
- *     Zero the specified (machine independent) page by mapping the page
- *     into virtual memory and clear its contents, one machine dependent
- *     page at a time.
- *
- *     Note: no locking is necessary in this function.
- */
-void
-pmap_zero_page(paddr_t phys)
-{
-       u_long *p0, *p1, *pend;
-
-#ifdef DEBUG
-       if (pmapdebug & PDB_FOLLOW)
-               printf("pmap_zero_page(%lx)\n", phys);
-#endif
-
-       p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
-       p1 = NULL;
-       pend = (u_long *)((u_long)p0 + PAGE_SIZE);
-
-       /*
-        * Unroll the loop a bit, doing 16 quadwords per iteration.
-        * Do only 8 back-to-back stores, and alternate registers.
-        */
-       do {
-               __asm volatile(
-               "# BEGIN loop body\n"
-               "       addq    %2, (8 * 8), %1         \n"
-               "       stq     $31, (0 * 8)(%0)        \n"
-               "       stq     $31, (1 * 8)(%0)        \n"
-               "       stq     $31, (2 * 8)(%0)        \n"
-               "       stq     $31, (3 * 8)(%0)        \n"
-               "       stq     $31, (4 * 8)(%0)        \n"
-               "       stq     $31, (5 * 8)(%0)        \n"
-               "       stq     $31, (6 * 8)(%0)        \n"
-               "       stq     $31, (7 * 8)(%0)        \n"
-               "                                       \n"
-               "       addq    %3, (8 * 8), %0         \n"
-               "       stq     $31, (0 * 8)(%1)        \n"
-               "       stq     $31, (1 * 8)(%1)        \n"
-               "       stq     $31, (2 * 8)(%1)        \n"
-               "       stq     $31, (3 * 8)(%1)        \n"
-               "       stq     $31, (4 * 8)(%1)        \n"
-               "       stq     $31, (5 * 8)(%1)        \n"
-               "       stq     $31, (6 * 8)(%1)        \n"
-               "       stq     $31, (7 * 8)(%1)        \n"
-               "       # END loop body"
-               : "=r" (p0), "=r" (p1)
-               : "0" (p0), "1" (p1)
-               : "memory");
-       } while (p0 < pend);
-}
-
-/*
- * pmap_copy_page:             [ INTERFACE ]
- *
- *     Copy the specified (machine independent) page by mapping the page
- *     into virtual memory and using memcpy to copy the page, one machine
- *     dependent page at a time.
- *
- *     Note: no locking is necessary in this function.
- */
-void
-pmap_copy_page(paddr_t src, paddr_t dst)
-{
-       const void *s;
-       void *d;
-
-#ifdef DEBUG
-       if (pmapdebug & PDB_FOLLOW)
-               printf("pmap_copy_page(%lx, %lx)\n", src, dst);
-#endif
-       s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
-       d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
-       memcpy(d, s, PAGE_SIZE);
-}
+/* pmap_zero_page() is in pmap_subr.s */
+
+/* pmap_copy_page() is in pmap_subr.s */
 
 /*
  * pmap_pageidlezero:          [ INTERFACE ]
diff -r d3cbbebc26c6 -r f5fe95230766 sys/arch/alpha/alpha/pmap_subr.s
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/alpha/alpha/pmap_subr.s  Sat Jul 10 20:22:37 2021 +0000
@@ -0,0 +1,165 @@
+/* $NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2021 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+__KERNEL_RCSID(7, "$NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $")
+
+/*
+ * Optimized pmap subroutines.
+ */
+
+       .text
+inc7:  .stabs  __FILE__,132,0,0,inc7;  .loc    1 __LINE__
+
+/*
+ * pmap_zero_page:             [ INTERFACE ]
+ *
+ *     Zero the specified (machine independent) page by mapping the page
+ *     into virtual memory and clear its contents, one machine dependent
+ *     page at a time.
+ *
+ *     Note: no locking is necessary in this function.
+ */
+       .p2align 4
+LEAF(pmap_zero_page, 1)
+       /* No global references - skip LDGP() */
+
+       /*
+        * Code here is arranged to keep branch targets on 16-byte
+        * boundaries, minimize result latencies in the loop, unroll
+        * the loop to at least 20 insns, and to dual-issue when
+        * feasible.
+        *
+        * In the setup, we use nop and unop to minimize pipline stalls
+        * on dependent instruction pairs.
+        */
+
+       /* ---- */
+       lda     t0, -1
+       nop
+       sll     t0, 42, t0              /* t0 = ALPHA_K0SEG_BASE */
+       /*
+        * Loop counter:
+        * PAGE_SIZE / 8 bytes per store / 16 stores per iteration
+        */
+       lda     v0, ((ALPHA_PGBYTES / 8) / 16)
+       /* ---- */
+       or      a0, t0, a0              /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+       nop
+       addq    a0, (8*8), a2           /* a2 = a0 + 8-quads */
+       unop
+       /* ---- */
+1:     stq     zero, (0*8)(a0)         /* 0 */
+       stq     zero, (1*8)(a0)         /* 1 */
+       stq     zero, (2*8)(a0)         /* 2 */
+       stq     zero, (3*8)(a0)         /* 3 */
+       /* ---- */
+       stq     zero, (4*8)(a0)         /* 4 */
+       stq     zero, (5*8)(a0)         /* 5 */
+       stq     zero, (6*8)(a0)         /* 6 */
+       stq     zero, (7*8)(a0)         /* 7 */
+       /* ---- */
+       addq    a2, (8*8), a0           /* a0 = a2 + 8-quads */
+       stq     zero, (0*8)(a2)         /* 8 */
+       stq     zero, (1*8)(a2)         /* 9 */
+       stq     zero, (2*8)(a2)         /* 10 */
+       /* --- */
+       subq    v0, 1, v0               /* count-- */
+       stq     zero, (3*8)(a2)         /* 11 */
+       stq     zero, (4*8)(a2)         /* 12 */
+       stq     zero, (5*8)(a2)         /* 13 */
+       /* ---- */
+       stq     zero, (6*8)(a2)         /* 14 */
+       stq     zero, (7*8)(a2)         /* 15 */
+       addq    a0, (8*8), a2           /* a2 = a0 + 8-quads */
+       bne     v0, 1b                  /* loop around if count != 0 */
+       /* ---- */
+
+       RET
+       END(pmap_zero_page)
+
+/*
+ * pmap_copy_page:             [ INTERFACE ]
+ *
+ *     Copy the specified (machine independent) page by mapping the page
+ *     into virtual memory and copying the page, one machine dependent
+ *     page at a time.
+ *
+ *     Note: no locking is necessary in this function.
+ */
+       .p2align 4
+LEAF(pmap_copy_page, 2)
+       /* No global references - skip LDGP() */
+
+       /* See above. */
+
+       /* ---- */
+       lda     t0, -1
+       nop
+       sll     t0, 42, t0              /* t0 = ALPHA_K0SEG_BASE */
+       /*
+        * Loop counter:
+        * PAGE_SIZE / 8 bytes per store / 8 stores per iteration
+        */
+       lda     v0, ((ALPHA_PGBYTES / 8) / 8)
+       /* ---- */
+       or      a0, t0, a0              /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+       unop
+       or      a1, t0, a1              /* a1 = ALPHA_PHYS_TO_K0SEG(a1) */
+       unop
+       /* ---- */
+1:     ldq     t0, (0*8)(a0)           /* load 0 */
+       ldq     t1, (1*8)(a0)           /* load 1 */
+       ldq     t2, (2*8)(a0)           /* load 2 */
+       ldq     t3, (3*8)(a0)           /* load 3 */
+       /* ---- */
+       ldq     t4, (4*8)(a0)           /* load 4 */
+       ldq     t5, (5*8)(a0)           /* load 5 */
+       ldq     t6, (6*8)(a0)           /* load 6 */
+       ldq     t7, (7*8)(a0)           /* load 7 */
+       /* ---- */
+       addq    a0, (8*8), a0           /* a0 = a0 + 8-quads */
+       stq     t0, (0*8)(a1)           /* store 0 */
+       stq     t1, (1*8)(a1)           /* store 1 */
+       stq     t2, (2*8)(a1)           /* store 2 */
+       /* ---- */
+       subq    v0, 1, v0               /* count-- */
+       stq     t3, (3*8)(a1)           /* store 3 */
+       stq     t4, (4*8)(a1)           /* store 4 */
+       stq     t5, (5*8)(a1)           /* store 5 */
+       /* ---- */
+       stq     t6, (6*8)(a1)           /* store 6 */
+       stq     t7, (7*8)(a1)           /* store 7 */
+       addq    a1, (8*8), a1           /* a1 = a1 + 8-quads */
+       bne     v0, 1b                  /* loop around if count != 0 */
+       /* ---- */
+
+       RET
+       END(pmap_copy_page)



Home | Main Index | Thread Index | Old Index