Subject: port-m68k/3641: Polishing zeropage/m68k.
To: None <gnats-bugs@gnats.netbsd.org>
From: Hiroshi HORIMOTO <horimoto@cs-aoi.cs.sist.ac.jp>
List: netbsd-bugs
Date: 05/18/1997 08:07:26
>Number: 3641
>Category: port-m68k
>Synopsis: Optimizing `zeropage'.
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: gnats-admin (GNATS administrator)
>State: open
>Class: change-request
>Submitter-Id: net
>Arrival-Date: Sat May 17 16:20:01 1997
>Last-Modified:
>Originator: Hiroshi HORIMOTO
>Organization:
Shizuoka Institute of Science and Technology, JAPAN.
>Release: NetBSD/x68k 1.2D (May 3, 1997)
>Environment:
Machine: X68030 with MC68030RC40, MC68882FN33
Target: all m68k-based machines' kernel (src/sys/arch/m68k/m68k/copypage.s)
System: NetBSD silpheed.faf.mil 1.2D NetBSD 1.2D (SILPHEED) #7: Thu May 8 07:34:53 JST 1997 root@silpheed.faf.mil:/usr/src/sys/arch/x68k/compile/SILPHEED x68k
>Description:
Polishing `zeropage' in src/sys/arch/m68k/m68k/copypage.s by using
movem.l instruction for multiple memory-writing.
>How-To-Repeat:
>Fix:
This is the patch. Please apply and examine it.
--- ./copypage.s.org Sun May 18 07:48:33 1997
+++ ./copypage.s Sun Mar 23 07:07:14 1997
@@ -56,11 +56,15 @@
ENTRY(copypage040)
movl sp@(4),a0 | source address
movl sp@(8),a1 | destiniation address
- movl #NBPG/32-1,d0 | number of 32 byte chunks - 1
+#if NBPG <= 4096
+ movq #NBPG/32-1,d0 | number of 32 byte chunks - 1
+#else
+ movw #NBPG/32-1,d0
+#endif
Lm16loop:
.long 0xf6209000 | move16 a0@+,a1@+
.long 0xf6209000 | move16 a0@+,a1@+
- dbf d0,Lm16loop
+ dbra d0,Lm16loop
rts
#endif /* M68040 || M68060 */
@@ -72,7 +76,11 @@
ENTRY(copypage)
movl sp@(4),a0 | source address
movl sp@(8),a1 | destiniation address
- movl #NBPG/32-1,d0 | number of 32 byte chunks - 1
+#if NBPG <= 4096
+ movq #NBPG/32-1,d0 | number of 32 byte chunks - 1
+#else
+ movw #NBPG/32-1,d0
+#endif
Lmlloop:
movl a0@+,a1@+
movl a0@+,a1@+
@@ -82,7 +90,7 @@
movl a0@+,a1@+
movl a0@+,a1@+
movl a0@+,a1@+
- dbf d0,Lmlloop
+ dbra d0,Lmlloop
rts
/*
@@ -91,8 +99,48 @@
* Optimized version of bzero for a single page-aligned NBPG byte zero.
*/
ENTRY(zeropage)
+#if NBPG >= 1024
+ movml #0x3f3e,sp@- | push d2-d7/a2-a6
+ movl sp@(48),a0 | dest address
+#if NBPG > 65536
+ movw #NBPG/512-1,d0 | number of 512 byte chunks - 1
+#else
+ movq #NBPG/512-1,d0
+#endif
+ movq #0,d1
+ movq #0,d2
+ movq #0,d3
+ movq #0,d4
+ movq #0,d5
+ movq #0,d6
+ movq #0,d7
+ movl d7,a1
+ movl d7,a2
+ movl d7,a3
+ movl d7,a4
+ movl d7,a5
+ movl d7,a6
+#if NBPG < 32768
+ lea a0@(NBPG),a0
+#else
+ addl #NBPG,a0
+#endif
+Lzzloop:
+ movml #0x7f7e,a0@- | 52 bytes (d1-d7/a1-a6)
+ movml #0x7f7e,a0@- | 104 bytes
+ movml #0x7f7e,a0@- | 156 bytes
+ movml #0x7f7e,a0@- | 208 bytes
+ movml #0x7f7e,a0@- | 260 bytes
+ movml #0x7f7e,a0@- | 312 bytes
+ movml #0x7f7e,a0@- | 364 bytes
+ movml #0x7f7e,a0@- | 416 bytes
+ movml #0x7f7e,a0@- | 468 bytes
+ movml #0x7f78,a0@- | 512 bytes (d1-d7/a1-a4)
+ dbra d0,Lzzloop
+ movml sp@+,#0x7cfc
+#else
movl sp@(4),a0 | dest address
- movl #NBPG/32-1,d0 | number of 32 byte chunks - 1
+ movq #NBPG/32-1,d0 | number of 32 byte chunks - 1
movq #0,d1
Lzloop:
movl d1,a0@+
@@ -103,5 +151,6 @@
movl d1,a0@+
movl d1,a0@+
movl d1,a0@+
- dbf d0,Lzloop
+ dbra d0,Lzloop
+#endif
rts
>Audit-Trail:
>Unformatted: