tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: recent rasops commits vs. macppc



Hi,

Could you try the attached patch? It replaces memcpy into 32-bit-wise
copy, at least for 8-bpp framebuffer with 8, 12, or 16-width font.

Thanks,
rin

On 2019/08/03 10:05, Michael wrote:
Hello,

On Sat, 3 Aug 2019 08:58:02 +0900
Rin Okuyama <rokuyama.rk%gmail.com@localhost> wrote:

Hi Michael,

I'm so sorry for the breakage. I'll investigate it.
Font width is 8?

Doesn't seem to matter, the G5 used Gallant 12x22, the others used an 8
pixels wide font. Colour depth is always 8 though.

PS
I decided to buy my own macppc machine. Could anyone
recommend me a model of

- serial console capable
- newer and smaller as possible

G4 and models don't really have serial ports anymore, unfortunately,
although many can be fitted with a serial port instead of a modem as a
3rd party option.

have fun
Michael

Index: sys/dev/rasops/rasops.c
===================================================================
RCS file: /cvsroot/src/sys/dev/rasops/rasops.c,v
retrieving revision 1.108
diff -p -u -r1.108 rasops.c
--- sys/dev/rasops/rasops.c	2 Aug 2019 23:24:37 -0000	1.108
+++ sys/dev/rasops/rasops.c	3 Aug 2019 14:40:45 -0000
@@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1
 
 #define	_RASOPS_PRIVATE
 #include <dev/rasops/rasops.h>
+#include <dev/rasops/rasops_masks.h>	/* XXX for MBE */
 
 #ifndef _KERNEL
 #include <errno.h>
@@ -69,6 +70,23 @@ struct rasops_matchdata {
 	int ident;
 };	
 
+const uint32_t rasops_lmask32[4 + 1] = {
+	MBE(0x00000000), MBE(0x00ffffff), MBE(0x0000ffff), MBE(0x000000ff),
+	MBE(0x00000000),
+};
+
+const uint32_t rasops_rmask32[4 + 1] = {
+	MBE(0x00000000), MBE(0xff000000), MBE(0xffff0000), MBE(0xffffff00),
+	MBE(0xffffffff),
+};
+
+const uint32_t rasops_pmask32[4][4] = {
+      { MBE(0xffffffff), MBE(0xff000000), MBE(0xffff0000), MBE(0xffffffff), },
+      { MBE(0x00000000), MBE(0x00ff0000), MBE(0x00ffff00), MBE(0x00ffffff), },
+      { MBE(0x00000000), MBE(0x0000ff00), MBE(0x0000ffff), MBE(0x00000000), },
+      { MBE(0x00000000), MBE(0x000000ff), MBE(0x00000000), MBE(0x00000000), },
+};
+
 /* ANSI colormap (R,G,B). Upper 8 are high-intensity */
 const uint8_t rasops_cmap[256 * 3] = {
 	0x00, 0x00, 0x00, /* black */
@@ -429,7 +447,8 @@ rasops_reconfig(struct rasops_info *ri, 
 
 	/* Clear the entire display */
 	if ((ri->ri_flg & RI_CLEAR) != 0)
-		memset(ri->ri_bits, 0, ri->ri_stride * ri->ri_height);
+		rasops_memset32((uint32_t *)ri->ri_bits, 0,
+		    ri->ri_stride * ri->ri_height);
 
 	/* Now centre our window if needs be */
 	if ((ri->ri_flg & RI_CENTER) != 0) {
@@ -690,7 +709,7 @@ static void
 rasops_copyrows(void *cookie, int src, int dst, int num)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint8_t *sp, *dp, *hp;
+	uint32_t *sp, *dp, *hp;
 	int n, stride;
 
 	hp = NULL;	/* XXX GCC */
@@ -723,19 +742,19 @@ rasops_copyrows(void *cookie, int src, i
 	n = ri->ri_emustride;
 	stride = ri->ri_stride;
 
-	sp = ri->ri_bits + src * ri->ri_yscale;
-	dp = ri->ri_bits + dst * ri->ri_yscale;
+	sp = (uint32_t *)(ri->ri_bits + src * ri->ri_yscale);
+	dp = (uint32_t *)(ri->ri_bits + dst * ri->ri_yscale);
 	if (ri->ri_hwbits)
-		hp = ri->ri_hwbits + dst * ri->ri_yscale;
+		hp = (uint32_t *)(ri->ri_hwbits + dst * ri->ri_yscale);
 
 	while (num--) {
-		memcpy(dp, sp, n);
-		dp += stride;
+		rasops_memcpy32(dp, sp, n);
+		DELTA(dp, stride, uint32_t *);
 		if (ri->ri_hwbits) {
-			memcpy(hp, sp, n);
-			hp += stride;
+			rasops_memcpy32(hp, sp, n);
+			DELTA(hp, stride, uint32_t *);
 		}
-		sp += stride;
+		DELTA(sp, stride, uint32_t *);
 	}
 }
 
@@ -792,9 +811,9 @@ rasops_copycols(void *cookie, int row, i
 		hp = ri->ri_hwbits + row + dst * ri->ri_xscale;
 
 	while (height--) {
-		memmove(dp, sp, num);
+		memmove(dp, sp, num);		/* XXXRO not 32-bit operation */
 		if (ri->ri_hwbits) {
-			memcpy(hp, dp, num);
+			memcpy(hp, dp, num);	/* XXXRO */
 			hp += ri->ri_stride;
 		}
 		dp += ri->ri_stride;
@@ -980,9 +999,8 @@ void
 rasops_eraserows(void *cookie, int row, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *buf = (uint32_t *)ri->ri_buf;
 	uint32_t *rp, *hp, clr;
-	int stride, cnt;
+	int n, stride;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -1008,29 +1026,28 @@ rasops_eraserows(void *cookie, int row, 
 	 * the RI_FULLCLEAR flag is set, clear the entire display.
 	 */
 	if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) {
-		stride = ri->ri_stride;
+		n = ri->ri_stride;
 		num = ri->ri_height;
 		rp = (uint32_t *)ri->ri_origbits;
 		if (ri->ri_hwbits)
 			hp = (uint32_t *)ri->ri_hworigbits;
 	} else {
-		stride = ri->ri_emustride;
+		n = ri->ri_emustride;
 		num *= ri->ri_font->fontheight;
 		rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
 		if (ri->ri_hwbits)
 			hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale);
 	}
 
-	for (cnt = 0; cnt < stride >> 2; cnt++)
-		buf[cnt] = clr;
+	stride = ri->ri_stride;
 
 	while (num--) {
-		memcpy(rp, buf, stride);
+		rasops_memset32(rp, clr, n);
+		DELTA(rp, stride, uint32_t *);
 		if (ri->ri_hwbits) {
-			memcpy(hp, buf, stride);
-			DELTA(hp, ri->ri_stride, uint32_t *);
+			rasops_memset32(hp, clr, n);
+			DELTA(hp, stride, uint32_t *);
 		}
-		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 
@@ -1042,9 +1059,8 @@ static void
 rasops_do_cursor(struct rasops_info *ri)
 {
 	int full, height, cnt, slop1, slop2, row, col;
-	uint32_t tmp32, msk1, msk2;
-	uint8_t tmp8;
-	uint8_t *dp, *rp, *hp;
+	uint32_t mask, mask1, mask2, tmp32;
+	uint32_t *dp, *rp, *hp;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -1069,9 +1085,11 @@ rasops_do_cursor(struct rasops_info *ri)
 		col = ri->ri_ccol;
 	}
 
-	rp = ri->ri_bits + row * ri->ri_yscale + col * ri->ri_xscale;
+	col *= ri->ri_xscale;
+	rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale + (col & ~3));
 	if (ri->ri_hwbits)
-		hp = ri->ri_hwbits + row * ri->ri_yscale + col * ri->ri_xscale;
+		hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
+		    (col & ~3));
 	height = ri->ri_font->fontheight;
 
 	/*
@@ -1081,17 +1099,18 @@ rasops_do_cursor(struct rasops_info *ri)
 	 * fontwidth = 8 and bpp = 1. So we take care of it.
 	 */
 	if (ri->ri_xscale == 1) {
-		while (height--) {
-			tmp8 = ~*rp;
-
-			*rp = tmp8;
-			rp += ri->ri_stride;
+		mask = rasops_pmask32[col & 3][1];
 
+		while (height--) {
+			tmp32 = *rp ^ mask;
+			*rp = tmp32;
+			DELTA(rp, ri->ri_stride, uint32_t *);
 			if (ri->ri_hwbits) {
-				*hp = tmp8;
-				hp += ri->ri_stride;
+				*hp = tmp32;
+				DELTA(hp, ri->ri_stride, uint32_t *);
 			}
 		}
+
 		return;
 	}
 
@@ -1101,42 +1120,35 @@ rasops_do_cursor(struct rasops_info *ri)
 	 * Note that siop1 <= ri_xscale even for ri_xscale = 2,
 	 * since rp % 3 = 0 or 2 (ri_stride % 4 = 0).
 	 */
-	slop1 = (4 - ((uintptr_t)rp & 3)) & 3;
+	slop1 = (4 - (col & 3)) & 3;
 	slop2 = (ri->ri_xscale - slop1) & 3;
 	full = (ri->ri_xscale - slop1 /* - slop2 */) >> 2;
 
-	rp = (uint8_t *)((uintptr_t)rp & ~3);
-	hp = (uint8_t *)((uintptr_t)hp & ~3);
-
-	msk1 = !slop1 ? 0 : be32toh(0xffffffffU >> (32 - (8 * slop1)));
-	msk2 = !slop2 ? 0 : be32toh(0xffffffffU << (32 - (8 * slop2)));
+	mask1 = rasops_lmask32[4 - slop1];
+	mask2 = rasops_rmask32[slop2];
 
 	while (height--) {
 		dp = rp;
 
 		if (slop1) {
-			tmp32 = *(uint32_t *)dp ^ msk1;
-			*(uint32_t *)dp = tmp32;
-			dp += 4;
+			*dp = *dp ^ mask1;
+			dp++;
 		}
 
 		for (cnt = full; cnt; cnt--) {
-			tmp32 = ~*(uint32_t *)dp;
-			*(uint32_t *)dp = tmp32;
-			dp += 4;
+			*dp = ~*dp;
+			dp++;
 		}
 
-		if (slop2) {
-			tmp32 = *(uint32_t *)dp ^ msk2;
-			*(uint32_t *)dp = tmp32;
-		}
+		if (slop2)
+			*dp = *dp ^ mask2;
 
 		if (ri->ri_hwbits) {
-			memcpy(hp, rp, ((slop1 != 0) + full +
+			rasops_memcpy32(hp, rp, ((slop1 != 0) + full +
 			    (slop2 != 0)) << 2);
-			hp += ri->ri_stride;
+			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
-		rp += ri->ri_stride;
+		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 
@@ -1147,9 +1159,8 @@ void
 rasops_erasecols(void *cookie, int row, int col, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *buf = ri->ri_buf;
-	int height, cnt, clr;
-	uint32_t *dp, *rp, *hp;
+	uint32_t height, clr, *hp;
+	uint8_t *rp;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -1170,33 +1181,33 @@ rasops_erasecols(void *cookie, int row, 
 #endif
 
 	num *= ri->ri_xscale;
-	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
+	col *= ri->ri_xscale;
+	rp = ri->ri_bits + row * ri->ri_yscale + col;
 	if (ri->ri_hwbits)
-		hp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
-		    col*ri->ri_xscale);
+		hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
+		    (col & ~3));
 	height = ri->ri_font->fontheight;
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
 
-	dp = buf;
-
-	/* Write 4 bytes per loop */
-	for (cnt = num >> 2; cnt; cnt--)
-		*dp++ = clr;
-
-	/* Write unaligned trailing slop */
-	for (cnt = num & 3; cnt; cnt--) {
-		*(uint8_t *)dp = clr;
-		DELTA(dp, 1, uint32_t *);
-	}
-
-	while (height--) {
-		memcpy(rp, buf, num);
-		DELTA(rp, ri->ri_stride, uint32_t *);
-		if (ri->ri_hwbits) {
-			memcpy(hp, buf, num);
-			DELTA(hp, ri->ri_stride, uint32_t *);
+	if (ri->ri_xscale & 3)
+		while (height--) {
+			int changed = rasops_memset32ua(rp, clr, num);
+			if (ri->ri_hwbits) {
+				rasops_memcpy32(hp,
+				    (uint32_t *)((uintptr_t)rp & ~3), changed);
+				DELTA(hp, ri->ri_stride, uint32_t *);
+			}
+			rp += ri->ri_stride;
+		}
+	else
+		while (height--) {
+			rasops_memset32((uint32_t *)rp, clr, num);
+			rp += ri->ri_stride;
+			if (ri->ri_hwbits) {
+				rasops_memset32(hp, clr, num);
+				DELTA(hp, ri->ri_stride, uint32_t *);
+			}
 		}
-	}
 }
 
 #if NRASOPS_ROTATION > 0
Index: sys/dev/rasops/rasops.h
===================================================================
RCS file: /cvsroot/src/sys/dev/rasops/rasops.h,v
retrieving revision 1.43
diff -p -u -r1.43 rasops.h
--- sys/dev/rasops/rasops.h	3 Aug 2019 06:29:52 -0000	1.43
+++ sys/dev/rasops/rasops.h	3 Aug 2019 14:40:45 -0000
@@ -193,12 +193,70 @@ void	rasops32_init(struct rasops_info *)
 
 void	rasops_allocstamp(struct rasops_info *, size_t);
 
+extern const uint32_t rasops_lmask32[4 + 1];
+extern const uint32_t rasops_rmask32[4 + 1];
+extern const uint32_t rasops_pmask32[4][4];
+
 #define	DELTA(p, d, cast) ((p) = (cast)((uint8_t *)(p) + (d)))
 
 #define	FONT_GLYPH(uc, font, ri)					\
 	((uint8_t *)(font)->data + ((uc) - ((font)->firstchar)) *	\
 	    (ri)->ri_fontscale)
 
+static __inline void
+rasops_memcpy32(uint32_t * restrict dst, const uint32_t * restrict src,
+    size_t bytes)
+{
+	size_t cnt;
+
+	for (cnt = bytes >> 2; cnt; cnt--)
+		*dst++ = *src++;
+}
+
+static __inline void
+rasops_memset32(uint32_t *p, uint32_t val, size_t bytes)
+{
+	size_t cnt;
+
+	for (cnt = bytes >> 2; cnt; cnt--)
+		*p++ = val;
+}
+
+static __inline int
+rasops_memset32ua(void *p, uint32_t val, size_t bytes)
+{
+	int slop1, slop2, full, cnt;
+	uint32_t mask, mask1, mask2, *dp;
+
+	dp = (uint32_t *)((uintptr_t)p & ~3);
+
+	if (bytes == 1) {
+		mask = rasops_pmask32[(uintptr_t)p & 3][1];
+		*dp = (*dp & ~mask) | (val & mask);
+		return 4;
+	}
+
+	slop1 = (4 - ((uintptr_t)p & 3)) & 3;
+	slop2 = (bytes - slop1) & 3;
+	full = (bytes - slop1 /* - slop2 */) >> 2;
+
+	mask1 = rasops_lmask32[4 - slop1];
+	mask2 = rasops_rmask32[slop2];
+
+	if (slop1) {
+		*dp = (*dp & ~mask1) | (val & mask1);
+		dp++;
+	}
+
+	for (cnt = full; cnt; cnt--)
+		*dp++ = val;
+
+	if (slop2)
+		*dp = (*dp & ~mask2) | (val & mask2);
+
+	return ((slop1 != 0) + full + (slop2 != 0)) << 2;
+}
+
 static __inline uint32_t
 be32uatoh(uint8_t *p)
 {


Home | Main Index | Thread Index | Old Index