Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/alpha/alpha Move the optimized pmap_zero_page() fro...
details: https://anonhg.NetBSD.org/src/rev/e8feb4519228
branches: trunk
changeset: 1022257:e8feb4519228
user: thorpej <thorpej%NetBSD.org@localhost>
date: Sat Jul 10 20:22:37 2021 +0000
description:
Move the optimized pmap_zero_page() from pmap.c to a new pmap_subr.s,
and optimize it a teeny bit little more. Provide an optimized (for 21164,
anyway) pmap_copy_page() as well.
diffstat:
sys/arch/alpha/alpha/pmap.c | 86 +-------------------
sys/arch/alpha/alpha/pmap_subr.s | 165 +++++++++++++++++++++++++++++++++++++++
2 files changed, 170 insertions(+), 81 deletions(-)
diffs (276 lines):
diff -r 8f00cb9d0011 -r e8feb4519228 sys/arch/alpha/alpha/pmap.c
--- a/sys/arch/alpha/alpha/pmap.c Sat Jul 10 19:30:19 2021 +0000
+++ b/sys/arch/alpha/alpha/pmap.c Sat Jul 10 20:22:37 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $ */
+/* $NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $ */
/*-
* Copyright (c) 1998, 1999, 2000, 2001, 2007, 2008, 2020
@@ -135,7 +135,7 @@
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.296 2021/07/05 15:12:00 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.297 2021/07/10 20:22:37 thorpej Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -2758,85 +2758,9 @@
pmap_destroy(pmap);
}
-/*
- * pmap_zero_page: [ INTERFACE ]
- *
- * Zero the specified (machine independent) page by mapping the page
- * into virtual memory and clear its contents, one machine dependent
- * page at a time.
- *
- * Note: no locking is necessary in this function.
- */
-void
-pmap_zero_page(paddr_t phys)
-{
- u_long *p0, *p1, *pend;
-
-#ifdef DEBUG
- if (pmapdebug & PDB_FOLLOW)
- printf("pmap_zero_page(%lx)\n", phys);
-#endif
-
- p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
- p1 = NULL;
- pend = (u_long *)((u_long)p0 + PAGE_SIZE);
-
- /*
- * Unroll the loop a bit, doing 16 quadwords per iteration.
- * Do only 8 back-to-back stores, and alternate registers.
- */
- do {
- __asm volatile(
- "# BEGIN loop body\n"
- " addq %2, (8 * 8), %1 \n"
- " stq $31, (0 * 8)(%0) \n"
- " stq $31, (1 * 8)(%0) \n"
- " stq $31, (2 * 8)(%0) \n"
- " stq $31, (3 * 8)(%0) \n"
- " stq $31, (4 * 8)(%0) \n"
- " stq $31, (5 * 8)(%0) \n"
- " stq $31, (6 * 8)(%0) \n"
- " stq $31, (7 * 8)(%0) \n"
- " \n"
- " addq %3, (8 * 8), %0 \n"
- " stq $31, (0 * 8)(%1) \n"
- " stq $31, (1 * 8)(%1) \n"
- " stq $31, (2 * 8)(%1) \n"
- " stq $31, (3 * 8)(%1) \n"
- " stq $31, (4 * 8)(%1) \n"
- " stq $31, (5 * 8)(%1) \n"
- " stq $31, (6 * 8)(%1) \n"
- " stq $31, (7 * 8)(%1) \n"
- " # END loop body"
- : "=r" (p0), "=r" (p1)
- : "0" (p0), "1" (p1)
- : "memory");
- } while (p0 < pend);
-}
-
-/*
- * pmap_copy_page: [ INTERFACE ]
- *
- * Copy the specified (machine independent) page by mapping the page
- * into virtual memory and using memcpy to copy the page, one machine
- * dependent page at a time.
- *
- * Note: no locking is necessary in this function.
- */
-void
-pmap_copy_page(paddr_t src, paddr_t dst)
-{
- const void *s;
- void *d;
-
-#ifdef DEBUG
- if (pmapdebug & PDB_FOLLOW)
- printf("pmap_copy_page(%lx, %lx)\n", src, dst);
-#endif
- s = (const void *)ALPHA_PHYS_TO_K0SEG(src);
- d = (void *)ALPHA_PHYS_TO_K0SEG(dst);
- memcpy(d, s, PAGE_SIZE);
-}
+/* pmap_zero_page() is in pmap_subr.s */
+
+/* pmap_copy_page() is in pmap_subr.s */
/*
* pmap_pageidlezero: [ INTERFACE ]
diff -r 8f00cb9d0011 -r e8feb4519228 sys/arch/alpha/alpha/pmap_subr.s
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/arch/alpha/alpha/pmap_subr.s Sat Jul 10 20:22:37 2021 +0000
@@ -0,0 +1,165 @@
+/* $NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2021 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+__KERNEL_RCSID(7, "$NetBSD: pmap_subr.s,v 1.1 2021/07/10 20:22:37 thorpej Exp $")
+
+/*
+ * Optimized pmap subroutines.
+ */
+
+ .text
+inc7: .stabs __FILE__,132,0,0,inc7; .loc 1 __LINE__
+
+/*
+ * pmap_zero_page: [ INTERFACE ]
+ *
+ * Zero the specified (machine independent) page by mapping the page
+ * into virtual memory and clear its contents, one machine dependent
+ * page at a time.
+ *
+ * Note: no locking is necessary in this function.
+ */
+ .p2align 4
+LEAF(pmap_zero_page, 1)
+ /* No global references - skip LDGP() */
+
+ /*
+ * Code here is arranged to keep branch targets on 16-byte
+ * boundaries, minimize result latencies in the loop, unroll
+ * the loop to at least 20 insns, and to dual-issue when
+ * feasible.
+ *
+ * In the setup, we use nop and unop to minimize pipline stalls
+ * on dependent instruction pairs.
+ */
+
+ /* ---- */
+ lda t0, -1
+ nop
+ sll t0, 42, t0 /* t0 = ALPHA_K0SEG_BASE */
+ /*
+ * Loop counter:
+ * PAGE_SIZE / 8 bytes per store / 16 stores per iteration
+ */
+ lda v0, ((ALPHA_PGBYTES / 8) / 16)
+ /* ---- */
+ or a0, t0, a0 /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+ nop
+ addq a0, (8*8), a2 /* a2 = a0 + 8-quads */
+ unop
+ /* ---- */
+1: stq zero, (0*8)(a0) /* 0 */
+ stq zero, (1*8)(a0) /* 1 */
+ stq zero, (2*8)(a0) /* 2 */
+ stq zero, (3*8)(a0) /* 3 */
+ /* ---- */
+ stq zero, (4*8)(a0) /* 4 */
+ stq zero, (5*8)(a0) /* 5 */
+ stq zero, (6*8)(a0) /* 6 */
+ stq zero, (7*8)(a0) /* 7 */
+ /* ---- */
+ addq a2, (8*8), a0 /* a0 = a2 + 8-quads */
+ stq zero, (0*8)(a2) /* 8 */
+ stq zero, (1*8)(a2) /* 9 */
+ stq zero, (2*8)(a2) /* 10 */
+ /* --- */
+ subq v0, 1, v0 /* count-- */
+ stq zero, (3*8)(a2) /* 11 */
+ stq zero, (4*8)(a2) /* 12 */
+ stq zero, (5*8)(a2) /* 13 */
+ /* ---- */
+ stq zero, (6*8)(a2) /* 14 */
+ stq zero, (7*8)(a2) /* 15 */
+ addq a0, (8*8), a2 /* a2 = a0 + 8-quads */
+ bne v0, 1b /* loop around if count != 0 */
+ /* ---- */
+
+ RET
+ END(pmap_zero_page)
+
+/*
+ * pmap_copy_page: [ INTERFACE ]
+ *
+ * Copy the specified (machine independent) page by mapping the page
+ * into virtual memory and copying the page, one machine dependent
+ * page at a time.
+ *
+ * Note: no locking is necessary in this function.
+ */
+ .p2align 4
+LEAF(pmap_copy_page, 2)
+ /* No global references - skip LDGP() */
+
+ /* See above. */
+
+ /* ---- */
+ lda t0, -1
+ nop
+ sll t0, 42, t0 /* t0 = ALPHA_K0SEG_BASE */
+ /*
+ * Loop counter:
+ * PAGE_SIZE / 8 bytes per store / 8 stores per iteration
+ */
+ lda v0, ((ALPHA_PGBYTES / 8) / 8)
+ /* ---- */
+ or a0, t0, a0 /* a0 = ALPHA_PHYS_TO_K0SEG(a0) */
+ unop
+ or a1, t0, a1 /* a1 = ALPHA_PHYS_TO_K0SEG(a1) */
+ unop
+ /* ---- */
+1: ldq t0, (0*8)(a0) /* load 0 */
+ ldq t1, (1*8)(a0) /* load 1 */
+ ldq t2, (2*8)(a0) /* load 2 */
+ ldq t3, (3*8)(a0) /* load 3 */
+ /* ---- */
+ ldq t4, (4*8)(a0) /* load 4 */
+ ldq t5, (5*8)(a0) /* load 5 */
+ ldq t6, (6*8)(a0) /* load 6 */
+ ldq t7, (7*8)(a0) /* load 7 */
+ /* ---- */
+ addq a0, (8*8), a0 /* a0 = a0 + 8-quads */
+ stq t0, (0*8)(a1) /* store 0 */
+ stq t1, (1*8)(a1) /* store 1 */
+ stq t2, (2*8)(a1) /* store 2 */
+ /* ---- */
+ subq v0, 1, v0 /* count-- */
+ stq t3, (3*8)(a1) /* store 3 */
+ stq t4, (4*8)(a1) /* store 4 */
+ stq t5, (5*8)(a1) /* store 5 */
+ /* ---- */
+ stq t6, (6*8)(a1) /* store 6 */
+ stq t7, (7*8)(a1) /* store 7 */
+ addq a1, (8*8), a1 /* a1 = a1 + 8-quads */
+ bne v0, 1b /* loop around if count != 0 */
+ /* ---- */
+
+ RET
+ END(pmap_copy_page)
Home |
Main Index |
Thread Index |
Old Index