Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/dev/rasops Replace manually unrolled loops with memcpy/m...
details: https://anonhg.NetBSD.org/src/rev/aceb94369a59
branches: trunk
changeset: 457985:aceb94369a59
user: rin <rin%NetBSD.org@localhost>
date: Fri Jul 26 05:24:04 2019 +0000
description:
Replace manually unrolled loops with memcpy/memmove or simple loops.
Modern compilers are smart enough; there's no measurable changes in
performance even on MC68040 with optimization level -Os.
Also, convert loop of byte-wise copy into memset.
diffstat:
sys/dev/rasops/rasops.c | 162 +++++++++--------------------------------------
1 files changed, 33 insertions(+), 129 deletions(-)
diffs (267 lines):
diff -r 6588cc2b4da1 -r aceb94369a59 sys/dev/rasops/rasops.c
--- a/sys/dev/rasops/rasops.c Fri Jul 26 05:15:47 2019 +0000
+++ b/sys/dev/rasops/rasops.c Fri Jul 26 05:24:04 2019 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $ */
+/* $NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $ */
/*-
* Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $");
#include "opt_rasops.h"
#include "rasops_glue.h"
@@ -617,12 +617,11 @@
static void
rasops_copyrows(void *cookie, int src, int dst, int num)
{
- uint32_t *sp, *dp, *hp, *srp, *drp, *hrp;
- struct rasops_info *ri;
- int n8, n1, cnt, delta;
+ struct rasops_info *ri = (struct rasops_info *)cookie;
+ uint8_t *sp, *dp, *hp;
+ int n;
- ri = (struct rasops_info *)cookie;
- hp = hrp = NULL;
+ hp = NULL; /* XXX GCC */
#ifdef RASOPS_CLIPPING
if (dst == src)
@@ -649,72 +648,21 @@
#endif
num *= ri->ri_font->fontheight;
- n8 = ri->ri_emustride >> 5;
- n1 = (ri->ri_emustride >> 2) & 7;
+ n = ri->ri_emustride;
- if (dst < src) {
- srp = (uint32_t *)(ri->ri_bits + src * ri->ri_yscale);
- drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_yscale);
- if (ri->ri_hwbits)
- hrp = (uint32_t *)(ri->ri_hwbits + dst *
- ri->ri_yscale);
- delta = ri->ri_stride;
- } else {
- src = ri->ri_font->fontheight * src + num - 1;
- dst = ri->ri_font->fontheight * dst + num - 1;
- srp = (uint32_t *)(ri->ri_bits + src * ri->ri_stride);
- drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_stride);
- if (ri->ri_hwbits)
- hrp = (uint32_t *)(ri->ri_hwbits + dst *
- ri->ri_stride);
-
- delta = -ri->ri_stride;
- }
+ sp = ri->ri_bits + src * ri->ri_yscale;
+ dp = ri->ri_bits + dst * ri->ri_yscale;
+ if (ri->ri_hwbits)
+ hp = ri->ri_hwbits + dst * ri->ri_yscale;
while (num--) {
- dp = drp;
- sp = srp;
- if (ri->ri_hwbits)
- hp = hrp;
-
- DELTA(drp, delta, uint32_t *);
- DELTA(srp, delta, uint32_t *);
- if (ri->ri_hwbits)
- DELTA(hrp, delta, uint32_t *);
-
- for (cnt = n8; cnt; cnt--) {
- dp[0] = sp[0];
- dp[1] = sp[1];
- dp[2] = sp[2];
- dp[3] = sp[3];
- dp[4] = sp[4];
- dp[5] = sp[5];
- dp[6] = sp[6];
- dp[7] = sp[7];
- dp += 8;
- sp += 8;
+ memmove(dp, sp, n);
+ dp += n;
+ if (ri->ri_hwbits) {
+ memcpy(hp, sp, n);
+ hp += n;
}
- if (ri->ri_hwbits) {
- sp -= (8 * n8);
- for (cnt = n8; cnt; cnt--) {
- hp[0] = sp[0];
- hp[1] = sp[1];
- hp[2] = sp[2];
- hp[3] = sp[3];
- hp[4] = sp[4];
- hp[5] = sp[5];
- hp[6] = sp[6];
- hp[7] = sp[7];
- hp += 8;
- sp += 8;
- }
- }
-
- for (cnt = n1; cnt; cnt--) {
- *dp++ = *sp++;
- if (ri->ri_hwbits)
- *hp++ = *(sp - 1);
- }
+ sp += n;
}
}
@@ -916,13 +864,11 @@
void
rasops_eraserows(void *cookie, int row, int num, long attr)
{
- struct rasops_info *ri;
- int np, nw, cnt, delta;
+ struct rasops_info *ri = (struct rasops_info *)cookie;
uint32_t *dp, *hp, clr;
- int i;
+ int n, cnt, delta;
- ri = (struct rasops_info *)cookie;
- hp = NULL;
+ hp = NULL; /* XXX GCC */
#ifdef RASOPS_CLIPPING
if (row < 0) {
@@ -946,16 +892,14 @@
* the RI_FULLCLEAR flag is set, clear the entire display.
*/
if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) {
- np = ri->ri_stride >> 5;
- nw = (ri->ri_stride >> 2) & 7;
+ n = ri->ri_stride >> 2;
num = ri->ri_height;
dp = (uint32_t *)ri->ri_origbits;
if (ri->ri_hwbits)
hp = (uint32_t *)ri->ri_hworigbits;
delta = 0;
} else {
- np = ri->ri_emustride >> 5;
- nw = (ri->ri_emustride >> 2) & 7;
+ n = ri->ri_emustride >> 2;
num *= ri->ri_font->fontheight;
dp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
if (ri->ri_hwbits)
@@ -965,26 +909,11 @@
}
while (num--) {
- for (cnt = np; cnt; cnt--) {
- for (i = 0; i < 8; i++) {
- dp[i] = clr;
- if (ri->ri_hwbits)
- hp[i] = clr;
- }
- dp += 8;
+ for (cnt = n; cnt; cnt--) {
+ *dp++ = clr;
if (ri->ri_hwbits)
- hp += 8;
+ *hp++ = clr;
}
-
- for (cnt = nw; cnt; cnt--) {
- *(uint32_t *)dp = clr;
- DELTA(dp, 4, uint32_t *);
- if (ri->ri_hwbits) {
- *(uint32_t *)hp = clr;
- DELTA(hp, 4, uint32_t *);
- }
- }
-
DELTA(dp, delta, uint32_t *);
if (ri->ri_hwbits)
DELTA(hp, delta, uint32_t *);
@@ -1112,13 +1041,11 @@
void
rasops_erasecols(void *cookie, int row, int col, int num, long attr)
{
- int n8, height, cnt, slop1, slop2, clr;
- struct rasops_info *ri;
+ struct rasops_info *ri = (struct rasops_info *)cookie;
+ int height, cnt, slop1, slop2, clr;
uint32_t *rp, *dp, *hrp, *hp;
- int i;
- ri = (struct rasops_info *)cookie;
- hrp = hp = NULL;
+ hrp = hp = NULL; /* XXX GCC */
#ifdef RASOPS_CLIPPING
if ((unsigned)row >= (unsigned)ri->ri_rows)
@@ -1136,7 +1063,7 @@
return;
#endif
- num = num * ri->ri_xscale;
+ num *= ri->ri_xscale;
rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
if (ri->ri_hwbits)
hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
@@ -1190,32 +1117,21 @@
}
} else {
while (height--) {
- dp = rp;
+ memset(rp, clr, num);
DELTA(rp, ri->ri_stride, uint32_t *);
if (ri->ri_hwbits) {
- hp = hrp;
+ memset(hrp, clr, num);
DELTA(hrp, ri->ri_stride, uint32_t *);
}
-
- for (cnt = num; cnt; cnt--) {
- *(uint8_t *)dp = clr;
- DELTA(dp, 1, uint32_t *);
- if (ri->ri_hwbits) {
- *(uint8_t *)hp = clr;
- DELTA(hp, 1, uint32_t *);
- }
- }
}
}
return;
}
- slop1 = (4 - ((long)rp & 3)) & 3;
+ slop1 = (4 - ((uintptr_t)rp & 3)) & 3;
slop2 = (num - slop1) & 3;
- num -= slop1 + slop2;
- n8 = num >> 5;
- num = (num >> 2) & 7;
+ num = (num - slop1 /* - slop2 */) >> 2;
while (height--) {
dp = rp;
@@ -1244,18 +1160,6 @@
}
}
- /* Write 32 bytes per loop */
- for (cnt = n8; cnt; cnt--) {
- for (i = 0; i < 8; i++) {
- dp[i] = clr;
- if (ri->ri_hwbits)
- hp[i] = clr;
- }
- dp += 8;
- if (ri->ri_hwbits)
- hp += 8;
- }
-
/* Write 4 bytes per loop */
for (cnt = num; cnt; cnt--) {
*dp++ = clr;
Home |
Main Index |
Thread Index |
Old Index