Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/luna68k/dev lunafb: Improve drawing performance usi...
details: https://anonhg.NetBSD.org/src/rev/b3abf6db90ba
branches: trunk
changeset: 370717:b3abf6db90ba
user: isaki <isaki%NetBSD.org@localhost>
date: Sun Sep 25 11:28:40 2022 +0000
description:
lunafb: Improve drawing performance using VRAM ROP features.
- Drawing a character on 4bpp normally needs 4 times writes, but by using
VRAM ROP actively, it can be reduced to write only once.
The same goes for copyrows. If the whole row consists of only two colors
(one foreground and one background), it can be copied by reading once and
writing once, regardless of the number of planes. Only if the row consists
of more than two colors, it will be copied plane by plane.
- On 8bpp board, it acts as 4bpp (16 colors).
- On 4bpp board on the real LUNA-I(68030/20MHz), monochrome scroll is about
4 times faster even without asm. Using asm improves it by additional 5%
(asm is enabled by default).
- By tsutsui@-san's report, even color scroll is about about 2 times faster
on his 8bpp board on the real LUNA-II(68040).
This was first developped by Y.Sugahara back in late 2019, and was modified
a lot by me in 2022.
http://mail-index.netbsd.org/port-luna68k/2022/09/23/msg000072.html
diffstat:
sys/arch/luna68k/dev/lunafb.c | 22 +-
sys/arch/luna68k/dev/omrasops.c | 1738 ++++++++++++++++++++++++-----------
sys/arch/luna68k/dev/omrasopsvar.h | 31 +-
3 files changed, 1217 insertions(+), 574 deletions(-)
diffs (truncated from 2079 to 300 lines):
diff -r 97bcba7eaf44 -r b3abf6db90ba sys/arch/luna68k/dev/lunafb.c
--- a/sys/arch/luna68k/dev/lunafb.c Sun Sep 25 11:22:36 2022 +0000
+++ b/sys/arch/luna68k/dev/lunafb.c Sun Sep 25 11:28:40 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: lunafb.c,v 1.46 2022/07/14 20:13:21 tsutsui Exp $ */
+/* $NetBSD: lunafb.c,v 1.47 2022/09/25 11:28:40 isaki Exp $ */
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.46 2022/07/14 20:13:21 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lunafb.c,v 1.47 2022/09/25 11:28:40 isaki Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -76,6 +76,8 @@
#define OMFB_RFCNT BMAP_RFCNT /* video h-origin/v-origin */
#define OMFB_RAMDAC BMAP_PALLET2 /* Bt454/Bt458 RAMDAC */
+#define OMFB_FB_WADDR (BMAP_BMP + 8) /* common bitmap plane */
+#define OMFB_FB_RADDR (BMAP_BMAP0 + 8)/* bitmap plane #0 */
#define OMFB_SIZE (BMAP_FN0 - BMAP_BMP + PAGE_SIZE)
@@ -173,6 +175,8 @@
extern int hwplanemask; /* hardware planemask; retrieved at boot */
+int hwplanecount; /* for omrasops */
+
static int omfb_console;
int omfb_cnattach(void);
@@ -456,7 +460,7 @@
static void
omfb_getdevconfig(paddr_t paddr, struct om_hwdevconfig *dc)
{
- int bpp, i;
+ int i;
struct rasops_info *ri;
union {
struct { short h, v; } p;
@@ -465,21 +469,21 @@
switch (hwplanemask) {
case 0xff:
- bpp = 8; /* XXX check monochrome bit in DIPSW */
+ hwplanecount = 8; /* XXX check monochrome bit in DIPSW */
break;
default:
case 0x0f:
- bpp = 4; /* XXX check monochrome bit in DIPSW */
+ hwplanecount = 4; /* XXX check monochrome bit in DIPSW */
break;
case 1:
- bpp = 1;
+ hwplanecount = 1;
break;
}
dc->dc_wid = 1280;
dc->dc_ht = 1024;
- dc->dc_depth = bpp;
+ dc->dc_depth = hwplanecount;
dc->dc_rowbytes = 2048 / 8;
- dc->dc_cmsize = (bpp == 1) ? 0 : 1 << bpp;
+ dc->dc_cmsize = (hwplanecount == 1) ? 0 : 1 << hwplanecount;
dc->dc_videobase = paddr;
omfb_resetcmap(dc);
@@ -509,7 +513,7 @@
ri->ri_flg |= RI_NO_AUTO;
ri->ri_hw = dc;
- if (bpp == 4 || bpp == 8)
+ if (hwplanecount == 4 || hwplanecount == 8)
omrasops4_init(ri, 34, 80);
else
omrasops1_init(ri, 34, 80);
diff -r 97bcba7eaf44 -r b3abf6db90ba sys/arch/luna68k/dev/omrasops.c
--- a/sys/arch/luna68k/dev/omrasops.c Sun Sep 25 11:22:36 2022 +0000
+++ b/sys/arch/luna68k/dev/omrasops.c Sun Sep 25 11:28:40 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: omrasops.c,v 1.22 2022/09/25 11:22:36 isaki Exp $ */
+/* $NetBSD: omrasops.c,v 1.23 2022/09/25 11:28:40 isaki Exp $ */
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
@@ -31,7 +31,7 @@
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.22 2022/09/25 11:22:36 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: omrasops.c,v 1.23 2022/09/25 11:28:40 isaki Exp $");
/*
* Designed speficically for 'm68k bitorder';
@@ -41,6 +41,15 @@
* - first column is at 32bit aligned address,
* - font glyphs are stored in 32bit padded.
*/
+/*
+ * BMSEL affects both of
+ * 1) which plane a write to the common bitmap plane is reflected on and
+ * 2) which plane's ROP a write to the common ROP is reflected on.
+ *
+ * The common ROP is not a ROP applied to write to the common bitmap plane.
+ * It's equivalent to set ROPs of the plane selected in the plane mask one
+ * by one.
+ */
#include <sys/param.h>
#include <sys/systm.h>
@@ -52,32 +61,84 @@
#include <arch/luna68k/dev/omrasopsvar.h>
+#ifdef luna68k
+#define USE_M68K_ASM 1
+#endif
+
+/* To provide optimization conditions to compilers */
+#if defined(__GNUC__)
+#define ASSUME(cond) if (!(cond)) __unreachable()
+#elif defined(__clang__) && __has_builtin(__builtin_assume)
+#define ASSUME(cond) __builtin_assume(cond)
+#else
+#define ASSUME(cond) (void)(cond)
+#endif
+
+/* XXX it should be redesigned, including making the attributes support 8bpp */
+typedef struct {
+ union {
+ int32_t all;
+ struct {
+ int8_t ismulti; /* is multi color used */
+ uint8_t fg;
+ uint8_t bg;
+ uint8_t reserved;
+ };
+ };
+} rowattr_t;
+
/* wscons emulator operations */
-static void om1_cursor(void *, int, int, int);
-static void om4_cursor(void *, int, int, int);
-static int om_mapchar(void *, int, unsigned int *);
-static void om1_putchar(void *, int, int, u_int, long);
-static void om4_putchar(void *, int, int, u_int, long);
+static void om_cursor(void *, int, int, int);
+static int om_mapchar(void *, int, u_int *);
+static void om_putchar(void *, int, int, u_int, long);
static void om1_copycols(void *, int, int, int, int);
static void om4_copycols(void *, int, int, int, int);
static void om1_copyrows(void *, int, int, int num);
static void om4_copyrows(void *, int, int, int num);
-static void om1_erasecols(void *, int, int, int, long);
-static void om4_erasecols(void *, int, int, int, long);
-static void om1_eraserows(void *, int, int, long);
-static void om4_eraserows(void *, int, int, long);
-static int om1_allocattr(void *, int, int, int, long *);
-static int om4_allocattr(void *, int, int, int, long *);
-static void om4_unpack_attr(long, int *, int *, int *);
+static void om_erasecols(void *, int, int, int, long);
+static void om_eraserows(void *, int, int, long);
+static int om_allocattr(void *, int, int, int, long *);
+
+static void om_fill(int, int, uint8_t *, int, int, uint32_t, int, int);
+static void om_fill_color(int, uint8_t *, int, int, int, int);
+static void om_rascopy_single(uint8_t *, uint8_t *, int16_t, int16_t,
+ uint8_t[]);
+static void om4_rascopy_multi(uint8_t *, uint8_t *, int16_t, int16_t);
+static void om_unpack_attr(long, uint8_t *, uint8_t *, int *);
static int omrasops_init(struct rasops_info *, int, int);
+/*
+ * XXX should be fixed...
+ * This number of elements is derived from howmany(1024, fontheight = 24).
+ * But it is currently initialized with row = 34, so it is used only up to 34.
+ */
+static rowattr_t rowattr[43];
+
#define ALL1BITS (~0U)
#define ALL0BITS (0U)
#define BLITWIDTH (32)
#define ALIGNMASK (0x1f)
#define BYTESDONE (4)
+#if 0 /* XXX not used yet */
+/*
+ * internal attributes. see om_allocattr().
+ */
+#define OMFB_ATTR_MULTICOLOR (1U << 31)
+#define OMFB_ATTR_UNDERLINE (1U << 17)
+#define OMFB_ATTR_BOLD (1U << 16)
+#endif
+
+/*
+ * XXX deprecated.
+ * This way cannot be extended to 8bpp, so don't use it in new code.
+ */
+#define P0(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 1))
+#define P1(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 2))
+#define P2(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 3))
+#define P3(addr) ((uint32_t *)((uint8_t *)(addr) + OMFB_PLANEOFFS * 4))
+
/*
* macros to handle unaligned bit copy ops.
* See src/sys/dev/rasops/rasops_masks.h for MI version.
@@ -103,405 +164,934 @@
#define PUTBITS(src, x, w, pdst) FASTPUTBITS(src, x, w, pdst)
/*
- * Blit a character at the specified co-ordinates.
+ * Clear lower w bits from x.
+ * x must be filled with 1 at least lower w bits.
*/
-static void
-om1_putchar(void *cookie, int row, int startcol, u_int uc, long attr)
-{
- struct rasops_info *ri = cookie;
- uint8_t *p;
- int scanspan, startx, height, width, align, y;
- uint32_t lmask, rmask, glyph, inverse;
- int i;
- uint8_t *fb;
+#if USE_M68K_ASM
+#define CLEAR_LOWER_BITS(x, w) \
+ asm volatile( \
+ " bclr %[width],%[data] ;\n" \
+ " addq.l #1,%[data] ;\n" \
+ : [data] "+&d" (x) \
+ : [width] "d" (w) \
+ : \
+ )
+#else
+#define CLEAR_LOWER_BITS(x, w) x = ((x) & ~(1U << (w))) + 1
+#endif
- scanspan = ri->ri_stride;
- y = ri->ri_font->fontheight * row;
- startx = ri->ri_font->fontwidth * startcol;
- height = ri->ri_font->fontheight;
- fb = (uint8_t *)ri->ri_font->data +
- (uc - ri->ri_font->firstchar) * ri->ri_fontscale;
- inverse = ((attr & 0x00000001) != 0) ? ALL1BITS : ALL0BITS;
+/* Set planemask for the common plane and the common ROP */
+static inline void
+om_set_planemask(int planemask)
+{
+
+ *(volatile uint32_t *)OMFB_PLANEMASK = planemask;
+}
- p = (uint8_t *)ri->ri_bits + y * scanspan + ((startx / 32) * 4);
- align = startx & ALIGNMASK;
- width = ri->ri_font->fontwidth + align;
- lmask = ALL1BITS >> align;
- rmask = ALL1BITS << (-width & ALIGNMASK);
- if (width <= BLITWIDTH) {
- lmask &= rmask;
- /* set lmask as ROP mask value, with THROUGH mode */
- ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = lmask;
+/* Get a ROP address */
+static inline volatile uint32_t *
+om_rop_addr(int plane, int rop)
+{
- while (height > 0) {
- glyph = 0;
- for (i = ri->ri_font->stride; i != 0; i--)
- glyph = (glyph << 8) | *fb++;
- glyph <<= (4 - ri->ri_font->stride) * NBBY;
- glyph = (glyph >> align) ^ inverse;
+ return (volatile uint32_t *)
+ (OMFB_ROP_P0 + OMFB_PLANEOFFS * plane + rop * 4);
+}
+
+/* Set ROP and ROP's mask for individual plane */
+static inline void
+om_set_rop(int plane, int rop, uint32_t mask)
+{
- *W(p) = glyph;
+ *om_rop_addr(plane, rop) = mask;
+}
+
+/* Set ROP and ROP's mask for current setplanemask-ed plane(s) */
+static inline void
+om_set_rop_curplane(int rop, uint32_t mask)
+{
- p += scanspan;
- height--;
- }
- /* reset mask value */
- ((volatile uint32_t *)OMFB_ROPFUNC)[ROP_THROUGH] = ALL1BITS;
Home |
Main Index |
Thread Index |
Old Index