Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[xsrc/trunk]: xsrc/external/mit/xf86-video-suncg14/dist/src start optimizing ...
details: https://anonhg.NetBSD.org/xsrc/rev/29f9bbacd49d
branches: trunk
changeset: 10823:29f9bbacd49d
user: macallan <macallan%NetBSD.org@localhost>
date: Fri Dec 03 19:43:22 2021 +0000
description:
start optimizing Copy8() operations
- only go right to left if srcY == dstY in the same pixmap
- special case copy where src and dst X have the same alignment
- special case the above where we don't need to read dst
scrolling should now be about as fast as SX can go
diffstat:
external/mit/xf86-video-suncg14/dist/src/cg14_accel.c | 132 ++++++++++++++++-
1 files changed, 118 insertions(+), 14 deletions(-)
diffs (169 lines):
diff -r 315bc5b49edc -r 29f9bbacd49d external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
--- a/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 03 16:54:26 2021 +0000
+++ b/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 03 19:43:22 2021 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_accel.c,v 1.18 2021/12/03 16:54:26 macallan Exp $ */
+/* $NetBSD: cg14_accel.c,v 1.19 2021/12/03 19:43:22 macallan Exp $ */
/*
* Copyright (c) 2013 Michael Lorenz
* All rights reserved.
@@ -98,8 +98,8 @@
Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
ENTER;
- DPRINTF(X_ERROR, "bits per pixel: %d\n",
- pSrcPixmap->drawable.bitsPerPixel);
+ xf86Msg(X_ERROR, "bits per pixel: %d rop %x\n",
+ pSrcPixmap->drawable.bitsPerPixel, alu);
if (planemask != p->last_mask) {
CG14Wait(p);
@@ -305,6 +305,106 @@
exaMarkSync(pDstPixmap->drawable.pScreen);
}
+/*
+ * copy with same alignment, left to right, no ROP
+ */
+static void
+CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+{
+ int saddr, daddr, pre, cnt, wrds;
+
+ ENTER;
+
+ pre = srcstart & 3;
+ if (pre != 0) pre = 4 - pre;
+ pre = min(pre, w);
+
+ while (h > 0) {
+ saddr = srcstart;
+ daddr = dststart;
+ cnt = w;
+ if (pre > 0) {
+ write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7));
+ saddr += pre;
+ daddr += pre;
+ cnt -= pre;
+ if (cnt == 0) goto next;
+ }
+ while (cnt > 3) {
+ wrds = min(32, cnt >> 2);
+ write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+ saddr += wrds << 2;
+ daddr += wrds << 2;
+ cnt -= wrds << 2;
+ }
+ if (cnt > 0) {
+ write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7));
+ }
+next:
+ srcstart += srcpitch;
+ dststart += dstpitch;
+ h--;
+ }
+}
+
+/*
+ * copy with same alignment, left to right, ROP
+ */
+static void
+CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+{
+ int saddr, daddr, pre, cnt, wrds;
+
+ ENTER;
+
+ pre = srcstart & 3;
+ if (pre != 0) pre = 4 - pre;
+ pre = min(pre, w);
+
+ while (h > 0) {
+ saddr = srcstart;
+ daddr = dststart;
+ cnt = w;
+ if (pre > 0) {
+ write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7));
+ write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1));
+ write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7));
+ saddr += pre;
+ daddr += pre;
+ cnt -= pre;
+ if (cnt == 0) goto next;
+ }
+ while (cnt > 3) {
+ wrds = min(32, cnt >> 2);
+ write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7));
+ if (cnt > 16) {
+ write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15));
+ write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17));
+ } else
+ write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1));
+ write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7));
+ saddr += wrds << 2;
+ daddr += wrds << 2;
+ cnt -= wrds << 2;
+ }
+ if (cnt > 0) {
+ write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
+ write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7));
+ write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1));
+ write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7));
+ }
+next:
+ srcstart += srcpitch;
+ dststart += dstpitch;
+ h--;
+ }
+}
+
static void
CG14Copy8(PixmapPtr pDstPixmap,
int srcX, int srcY, int dstX, int dstY, int w, int h)
@@ -327,19 +427,13 @@
srcstart = srcX + (srcpitch * srcY) + srcoff;
dststart = dstX + (dstpitch * dstY) + dstoff;
- /*
- * we always copy up to 32 pixels at a time so direction doesn't
- * matter if w<=32
- */
- if (w > 32) {
- if (p->xdir < 0) {
- srcstart += (w - 32);
- dststart += (w - 32);
- xinc = -32;
- } else
- xinc = 32;
+ if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) {
+ srcstart += (w - 32);
+ dststart += (w - 32);
+ xinc = -32;
} else
xinc = 32;
+
if (p->ydir < 0) {
srcstart += (h - 1) * srcpitch;
dststart += (h - 1) * dstpitch;
@@ -349,6 +443,16 @@
srcinc = srcpitch;
dstinc = dstpitch;
}
+ if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) {
+ switch (p->last_rop) {
+ case 0xcc:
+ CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ break;
+ default:
+ CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ }
+ return;
+ }
if (p->last_rop == 0xcc) {
/* plain old copy */
if ( xinc > 0) {
Home |
Main Index |
Thread Index |
Old Index