Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch/arm/arm Make this work. Various fixes and some fur...



details:   https://anonhg.NetBSD.org/src/rev/517b9616110b
branches:  trunk
changeset: 783421:517b9616110b
user:      matt <matt%NetBSD.org@localhost>
date:      Thu Dec 20 07:18:33 2012 +0000

description:
Make this work.  Various fixes and some further optimizations.

diffstat:

 sys/arch/arm/arm/cpu_in_cksum_buffer.S |  85 ++++++++++++++++++++++++++--------
 1 files changed, 65 insertions(+), 20 deletions(-)

diffs (155 lines):

diff -r 6aece91e6994 -r 517b9616110b sys/arch/arm/arm/cpu_in_cksum_buffer.S
--- a/sys/arch/arm/arm/cpu_in_cksum_buffer.S    Thu Dec 20 07:16:00 2012 +0000
+++ b/sys/arch/arm/arm/cpu_in_cksum_buffer.S    Thu Dec 20 07:18:33 2012 +0000
@@ -29,7 +29,7 @@
 
 #include <machine/asm.h>
 
-RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.1 2012/12/19 15:05:16 matt Exp $")
+RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.2 2012/12/20 07:18:33 matt Exp $")
 
 /*
  * Special note:
@@ -66,7 +66,7 @@
 #ifndef __OPTIMIZE_SIZE__
        rsb     r3, r3, #64             /* subtract from 64 */
 #ifdef _ARM_ARCH_DWORD_OK
-       add     r3, r3, r1, lsr #1      /* multiply by 1.5 */
+       add     r3, r3, r3, lsr #1      /* multiply by 1.5 */
        add     pc, pc, r3              /* and jump! */
 #else
        add     pc, pc, r3, lsl #1      /* multiply by 2 and jump! */
@@ -97,6 +97,7 @@
        LOAD_DWORD_INTO_R4(r0)          /* 1 dword left */
 .Ladd_one_dword:
        adcs    ip, ip, r4
+.Ladd_one_word:
        adcs    ip, ip, r5
        teq     r2, r0                  /* nothing left? */
        beq     .Lfold                  /*   yep, proceed to hold */
@@ -107,13 +108,14 @@
        bne     4b                      /*   yep, do 64 at time */
 #endif
        bics    r3, r1, #7              /* at least 8 bytes left? */
-       bge     3b                      /*   yep, do them */
+       bne     3b                      /*   yep, do them */
 
 .Lfinal_dword:
-       tst     r1, #4                  /* more than one word more left? */
+       sub     r3, r1, #1              /* 0-3 = 1 word, 4-7 = 2 words */
+       tst     r3, #4                  /* more than one word more left? */
        moveq   r4, #0                  /*   no, just use zero */
-       ldrne   r4, [r0], #4            /*   yes, load first word */
-       ldr     r5, [r0]                /* load last word */
+       ldreq   r5, [r0]                /*   no, load last word */
+       ldmneia r0, {r4-r5}             /*   yes, load last dword */
 .Lfinal_dword_noload:
        rsb     r1, r1, #4              /* find out many bytes to discard */
 #ifdef __ARMEL__
@@ -127,7 +129,9 @@
        tst     r1, #1                  /* discard odd? */
        bicne   r5, r5, #0x000000ff     /*   yes, discard odd byte */
 #endif
-       adds    ip, ip, r4              /* add 1st to accumulator */
+.Lfinal_add_one_dword:
+       adcs    ip, ip, r4              /* add 1st to accumulator */
+.Lfinal_add_one_word:
        adcs    ip, ip, r5              /* add 2nd to accumulator */
 
        /*
@@ -143,33 +147,58 @@
 #include "cpu_in_cksum_fold.S"
 
 .Ldword_misaligned:
+       tst     r0, #3                  /* are at least word aligned? */
+       bne     .Lword_misaligned       /*   no, do it the hard way */
+       ldr     r5, [r0], #4            /* load word here in case of partial */
+       sub     r1, r1, #4              /* subtract length of one word */
+       teq     r1, #0                  /* what is length? */
+       beq     .Lfinal_add_one_word    /*   = 0? just do the final add */
+       addgt   r2, r1, r0              /*   > 0? point r2 just past end */
+       bgt     .Ladd_one_word          /*   > 0? accumulate it and loop */
+       mov     r4, #0                  /*   < 0? zero this */
+       b       .Lfinal_dword_noload    /*   < 0? handle final partial dword */
+
+.Lword_misaligned:
+       tst     r0, #4                  /* do we load 1 or 2 words? */
        bic     r0, r0, #3              /* force word alignment */
-       add     r1, r1, r2              /* add misalignment to length */
-       tst     r2, #4                  /* first  */
-       ldr     r4, [r0], #4            /* load first word */
+       add     r1, r1, r2              /* add initial offset to length */
+       sub     r1, r1, #8              /* subtract length of one dword */
+       ldmeqia r0!, {r4-r5}            /* load first dword */
+       ldrne   r4, [r0], #4            /* load first word */
        movne   r5, #0                  /* no second word */
-       ldreq   r5, [r0], #4            /* load second word */
        /*
         * We are now dword aligned.
         */
 #ifdef __ARMEL__
        tst     r2, #2                  /* discard at least 2? */
        movne   r4, r4, lsr #16         /* yes, discard lower halfword */
-       tst     r2, #1                  /* discard odd? */
-       bicne   r4, r4, #0x0000ff00     /* yes, discard odd byte */
+       tst     r2, #1                  /* start odd? */
+       bicne   r4, r4, #0x000000ff     /* yes, discard even byte */
 #else
        tst     r2, #2                  /* discard at least 2? */
        movne   r4, r4, lsl #16         /* yes, discard upper halfword */
-       tst     r2, #1                  /* discard odd? */
-       bicne   r4, r4, #0x00ff0000     /* yes, discard odd byte */
+       tst     r2, #1                  /* start odd? */
+       bicne   r4, r4, #0xff000000     /* yes, discard even byte */
 #endif
        /*
+        * Since we started on an odd boundary, set up our stack frame so we
+        * fixup the return value to be byteswapped.
+        */
+       ldrne   r3, [sp, #4]            /* pop r5 */
+       strne   r3, [sp, #-4]!          /* push it again */ 
+       ldrne   r3, [sp, #4]            /* pop r4 */
+       strne   r3, [sp, #-4]!          /* push it again */
+       strne   lr, [sp, #8]            /* save our return address */
+       adrne   lr, .Lmisaligned_fixup  /* use new to fixup the return value */
+       /*
         * See if we have a least a full dword to process.  If we do, jump
         * into the main loop as if we just load a single dword.
         */
-       bics    r3, r1, #7              /* at least one dword? */
-       addne   r2, r1, r0              /*   yes, point r2 just past end */
-       bne     .Ladd_one_dword         /*   yes, accumulate it and loop */
+       teq     r1, #0                  /* what is length? */
+       beq     .Lfinal_add_one_word    /*   = 0? just do the final add */
+       addgt   r2, r1, r0              /*   > 0? point r2 just past end */
+       bgt     .Ladd_one_dword         /*   > 0? accumulate it and loop */
+
        /*
         * Not a full dword so do the final dword processing to find out
         * bytes to discard.  If we only loaded one word, move it to 2nd
@@ -177,7 +206,23 @@
         * clear the 1st word.
         */
        tst     r2, #4                  /* one or two words? */
-       movne   r5, r4                  /*   one, move 1st word to 2nd word */
-       movne   r4, #0                  /*        and clear 1st word */
+       moveq   r5, r4                  /*   one, move 1st word to 2nd word */
+       moveq   r4, #0                  /*        and clear 1st word */
        b       .Lfinal_dword_noload    /* handle final dword */
+
+       /*
+        * If we had an odd address, we have byte swap the return value.
+        * instead of testing everywhere, we inserted a fake callframe and
+        * set LR to return to do the fixup and return to the caller.
+        */
+.Lmisaligned_fixup:
+       ldr     lr, [sp], #8            /* fetch saved LR */
+#ifdef _ARM_ARCH_6
+       rev16   r0, r0                  /* byte swap */
+#else
+       mov     r0, r0, r0, ror #8      /* move 0:7 to 24:31 and 8:15 to 0:7 */
+       orr     r0, r0, r0, lsl #16     /* move 0:7 to 16:23 */
+       mov     r0, r0, r0, lsr #16     /* clear 16:31 to 0:15 */
+#endif
+       RET
 END(cpu_in_cksum_buffer)



Home | Main Index | Thread Index | Old Index