Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/lib/libc/arch/i386/string A faster implementation.
details: https://anonhg.NetBSD.org/src/rev/740088340336
branches: trunk
changeset: 573635:740088340336
user: dsl <dsl%NetBSD.org@localhost>
date: Thu Feb 03 22:05:01 2005 +0000
description:
A faster implementation.
'rep stos' is slow to setup on modern processors, so don't use it to
align the transfer.
Also not that 8 byte alignment is faster on Intel processors
diffstat:
lib/libc/arch/i386/string/bzero.S | 48 +---------------
lib/libc/arch/i386/string/memset.S | 111 +++++++++++++++++++++++++++---------
2 files changed, 85 insertions(+), 74 deletions(-)
diffs (197 lines):
diff -r 9bcb0a4ffb3f -r 740088340336 lib/libc/arch/i386/string/bzero.S
--- a/lib/libc/arch/i386/string/bzero.S Thu Feb 03 21:54:49 2005 +0000
+++ b/lib/libc/arch/i386/string/bzero.S Thu Feb 03 22:05:01 2005 +0000
@@ -1,46 +1,4 @@
-/*
- * Written by J.T. Conklin <jtc%NetBSD.org@localhost>.
- * Public domain.
- */
-
-#include <machine/asm.h>
-
-#if defined(LIBC_SCCS)
- RCSID("$NetBSD: bzero.S,v 1.9 2003/07/26 19:24:33 salo Exp $")
-#endif
-
-ENTRY(bzero)
- pushl %edi
- movl 8(%esp),%edi
- movl 12(%esp),%edx
-
- cld /* set fill direction forward */
- xorl %eax,%eax /* set fill data to 0 */
+/* $NetBSD: bzero.S,v 1.10 2005/02/03 22:05:01 dsl Exp $ */
- /*
- * if the string is too short, it's really not worth the overhead
- * of aligning to word boundries, etc. So we jump to a plain
- * unaligned set.
- */
- cmpl $16,%edx
- jb L1
-
- movl %edi,%ecx /* compute misalignment */
- negl %ecx
- andl $3,%ecx
- subl %ecx,%edx
- rep /* zero until word aligned */
- stosb
-
- movl %edx,%ecx /* zero by words */
- shrl $2,%ecx
- andl $3,%edx
- rep
- stosl
-
-L1: movl %edx,%ecx /* zero remainder by bytes */
- rep
- stosb
-
- popl %edi
- ret
+#define BZERO
+#include "memset.S"
diff -r 9bcb0a4ffb3f -r 740088340336 lib/libc/arch/i386/string/memset.S
--- a/lib/libc/arch/i386/string/memset.S Thu Feb 03 21:54:49 2005 +0000
+++ b/lib/libc/arch/i386/string/memset.S Thu Feb 03 22:05:01 2005 +0000
@@ -1,21 +1,59 @@
-/*
- * Written by J.T. Conklin <jtc%NetBSD.org@localhost>.
- * Public domain.
+/* $NetBSD: memset.S,v 1.10 2005/02/03 22:05:01 dsl Exp $ */
+
+/*-
+ * Copyright (c) 2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by David Laight.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
- RCSID("$NetBSD: memset.S,v 1.9 2003/07/26 19:24:34 salo Exp $")
+ RCSID("$NetBSD: memset.S,v 1.10 2005/02/03 22:05:01 dsl Exp $")
#endif
+#ifdef BZERO
+ENTRY(bzero)
+#else
ENTRY(memset)
+#endif
+#ifdef BZERO
+ movl 8(%esp),%ecx
+ xor %eax,%eax
+#else
+ movl 12(%esp),%ecx
+ movzbl 8(%esp),%eax /* unsigned char, zero extend */
+#endif
+ cmpl $0x0f,%ecx /* avoid mispredicted branch... */
+
pushl %edi
- pushl %ebx
- movl 12(%esp),%edi
- movzbl 16(%esp),%eax /* unsigned char, zero extend */
- movl 20(%esp),%ecx
- pushl %edi /* push address of buffer */
+ movl 8(%esp),%edi
cld /* set fill direction forward */
@@ -23,36 +61,51 @@
* if the string is too short, it's really not worth the overhead
* of aligning to word boundries, etc. So we jump to a plain
* unaligned set.
+ *
+ * NB aligning the transfer is actually pointless on my athlon 700,
+ * It does make a difference to a PII though.
+ *
+ * The PII, PIII and PIV all seem to have a massive performance
+ * drop when the initial target address is an odd multiple of 4.
*/
- cmpl $0x0f,%ecx
- jle L1
+ jbe by_bytes
+#ifndef BZERO
movb %al,%ah /* copy char to all bytes in word */
movl %eax,%edx
sall $16,%eax
orl %edx,%eax
+#endif
- movl %edi,%edx /* compute misalignment */
- negl %edx
- andl $3,%edx
- movl %ecx,%ebx
- subl %edx,%ebx
+ movl %edi,%edx /* detect misalignment */
+ neg %edx
+ andl $7,%edx
+ jnz align
+aligned:
+ movl %eax,-4(%edi,%ecx) /* zap last 4 bytes */
+ shrl $2,%ecx /* zero by words */
+ rep
+ stosl
+done:
+#ifndef BZERO
+ movl 8(%esp),%eax /* return address of buffer */
+#endif
+ pop %edi
+ ret
- movl %edx,%ecx /* set until word aligned */
+align:
+ movl %eax,(%edi) /* zap first 8 bytes */
+ movl %eax,4(%edi)
+ subl %edx,%ecx /* remove from main count */
+ add %edx,%edi
+ jmp aligned
+
+by_bytes:
rep
stosb
- movl %ebx,%ecx
- shrl $2,%ecx /* set by words */
- rep
- stosl
-
- movl %ebx,%ecx /* set remainder by bytes */
- andl $3,%ecx
-L1: rep
- stosb
-
- popl %eax /* pop address of buffer */
- popl %ebx
+#ifndef BZERO
+ movl 8(%esp),%eax /* return address of buffer */
+#endif
popl %edi
ret
Home |
Main Index |
Thread Index |
Old Index