Subject: bcopy oops
To: None <port-alpha@NetBSD.ORG>
From: Trevor Blackwell <tlb@eecs.harvard.edu>
List: port-alpha
Date: 08/13/1995 20:00:40
I forgot to make bcopy return 0. This was causing copyin and friends
return garbage values, indicating errors.
Here's the fixed bcopy, and also a fast bzero, as diffs from Chris's
last release.
*** locore.s Sun Aug 13 17:39:52 1995
--- locore.s-orig Fri Mar 24 12:11:54 1995
***************
*** 1039,1270 ****
*
* int bcopy(char *from, char *to, u_int len);
*/
- #if 1
LEAF(bcopy,3)
-
- /* Check for negative length */
- ble a2,bcopy_done
-
- /* Check for overlap */
- subq a1,a0,t5
- cmpult t5,a2,t5
- bne t5,bcopy_overlap
-
- /* a3 = end address */
- addq a0,a2,a3
-
- /* Get the first word */
- ldq_u t2,0(a0)
-
- /* Do they have the same alignment? */
- xor a0,a1,t0
- and t0,7,t0
- and a1,7,t1
- bne t0,bcopy_different_alignment
-
- /* src & dst have same alignment */
- beq t1,bcopy_all_aligned
-
- ldq_u t3,0(a1)
- addq a2,t1,a2
- mskqh t2,a0,t2
- mskql t3,a0,t3
- or t2,t3,t2
-
- /* Dst is 8-byte aligned */
-
- /* If less than 8 bytes,skip loop */
- bcopy_all_aligned:
- subq a2,1,t0
- and a2,7,a2
- bic t0,7,t0
- beq t0,bcopy_samealign_lp_end
-
- bcopy_samealign_lp:
- stq_u t2,0(a1)
- addq a1,8,a1
- ldq_u t2,8(a0)
- subq t0,8,t0
- addq a0,8,a0
- bne t0,bcopy_samealign_lp
-
- /* If we're done,exit */
- bcopy_samealign_lp_end:
- bne a2,bcopy_small_left
- stq_u t2,0(a1)
- mov zero, v0 /* return 0. */
- RET
-
- bcopy_small_left:
- mskql t2,a2,t4
- ldq_u t3,0(a1)
- mskqh t3,a2,t3
- or t4,t3,t4
- stq_u t4,0(a1)
- mov zero, v0 /* return 0. */
- RET
-
- /* this is the fun part */
- bcopy_different_alignment:
- addq a0,a2,a3
- cmpule a2,8,t0
- bne t0,bcopy_da_finish
-
- beq t1,bcopy_da_noentry
-
- /* Do the initial partial word */
- subq zero,a1,t0
- and t0,7,t0
- ldq_u t3,7(a0)
- extql t2,a0,t2
- extqh t3,a0,t3
- or t2,t3,t5
- insql t5,a1,t5
- ldq_u t6,0(a1)
- mskql t6,a1,t6
- or t5,t6,t5
- stq_u t5,0(a1)
- addq a0,t0,a0
- addq a1,t0,a1
- subq a2,t0,a2
- ldq_u t2,0(a0)
-
- bcopy_da_noentry:
- subq a2,1,t0
- bic t0,7,t0
- and a2,7,a2
- beq t0,bcopy_da_finish2
-
- bcopy_da_lp:
- ldq_u t3,7(a0)
- addq a0,8,a0
- extql t2,a0,t4
- extqh t3,a0,t5
- subq t0,8,t0
- or t4,t5,t5
- stq t5,0(a1)
- addq a1,8,a1
- beq t0,bcopy_da_finish1
- ldq_u t2,7(a0)
- addq a0,8,a0
- extql t3,a0,t4
- extqh t2,a0,t5
- subq t0,8,t0
- or t4,t5,t5
- stq t5,0(a1)
- addq a1,8,a1
- bne t0,bcopy_da_lp
-
- /* Do the last new word */
- bcopy_da_finish2:
- mov t2,t3
-
- /* Do the last partial word */
- bcopy_da_finish1:
- ldq_u t2,-1(a3)
- extql t3,a0,t3
- extqh t2,a0,t2
- or t2,t3,t2
- br zero,bcopy_samealign_lp_end
-
- /* Do the last word in the next source word */
- bcopy_da_finish:
- ldq_u t3,-1(a3)
- extql t2,a0,t2
- extqh t3,a0,t3
- or t2,t3,t2
- insqh t2,a1,t3
- insql t2,a1,t2
- lda t4,-1(zero)
- mskql t4,a2,t5
- cmovne t5,t5,t4
- insqh t4,a1,t5
- insql t4,a1,t4
- addq a1,a2,a4
- ldq_u t6,0(a1)
- ldq_u t7,-1(a4)
- bic t6,t4,t6
- bic t7,t5,t7
- and t2,t4,t2
- and t3,t5,t3
- or t2,t6,t2
- or t3,t7,t3
- stq_u t3,-1(a4)
- stq_u t2,0(a1)
- mov zero, v0 /* return 0. */
- RET
-
- /* Basically equivalent to previous case, only backwards.
- Not quite as highly optimized */
- bcopy_overlap:
- addq a0,a2,a3
- addq a1,a2,a4
-
- /* less than 8 bytes - don't worry about overlap */
- cmpule a2,8,t0
- bne t0,bcopy_ov_short
-
- /* Possibly do a partial first word */
- and a4,7,t4
- beq t4,bcopy_ov_nostart2
- subq a3,t4,a3
- subq a4,t4,a4
- ldq_u t1,0(a3)
- subq a2,t4,a2
- ldq_u t2,7(a3)
- ldq t3,0(a4)
- extql t1,a3,t1
- extqh t2,a3,t2
- or t1,t2,t1
- mskqh t3,t4,t3
- mskql t1,t4,t1
- or t1,t3,t1
- stq t1,0(a4)
-
- bcopy_ov_nostart2:
- bic a2,7,t4
- and a2,7,a2
- beq t4,bcopy_ov_lp_end
-
- /* This could be more pipelined, but it doesn't seem worth it */
- bcopy_ov_lp:
- ldq_u t0,-8(a3)
- subq a4,8,a4
- ldq_u t1,-1(a3)
- subq a3,8,a3
- extql t0,a3,t0
- extqh t1,a3,t1
- subq t4,8,t4
- or t0,t1,t0
- stq t0,0(a4)
- bne t4,bcopy_ov_lp
-
- bcopy_ov_lp_end:
- beq a2,bcopy_done
-
- ldq_u t0,0(a0)
- ldq_u t1,7(a0)
- ldq_u t2,0(a1)
- extql t0,a0,t0
- extqh t1,a0,t1
- or t0,t1,t0
- insql t0,a1,t0
- mskql t2,a1,t2
- or t2,t0,t2
- stq_u t2,0(a1)
-
- bcopy_done:
- mov zero, v0 /* return 0. */
- RET
-
- bcopy_ov_short:
- ldq_u t2,0(a0)
- br zero,bcopy_da_finish
-
- END(bcopy)
-
- #else
- LEAF(bcopy, 3)
SETGP(pv)
mov a2, t0 /* t0 = i = len */
beq a2, 2f /* if (len == 0), bail out */
--- 1039,1045 ----
***************
*** 1287,1370 ****
mov zero, v0 /* return 0. */
RET
END(bcopy)
- #endif
-
- #if 1
- LEAF(bzero,2)
- ble a1,bzero_done
- bic a1,63,t3 /* t3 is # bytes to do 64 bytes at a time */
-
- /* If nothing in first word, ignore it */
- subq zero,a0,t0
- and t0,7,t0 /* t0 = (0-size)%8 */
- beq t0,bzero_nostart1
-
- cmpult a1,t0,t1 /* if size > size%8 goto noshort */
- beq t1,bzero_noshort
-
- /* The whole thing is less than a word. Mask off 1..7 bytes, and finish */
- ldq_u t2,0(a0)
- lda t0,-1(zero) /* t0=-1 */
- mskql t0,a1,t0 /* Get ff in bytes (a0%8)..((a0+a1-1)%8) */
- insql t0,a0,t0
- bic t2,t0,t2 /* zero those bytes in word */
- stq_u t2,0(a0)
- mov zero, v0 /* return 0. */
- RET
-
- bzero_noshort:
-
- /* Handle the first partial word */
- ldq_u t2,0(a0)
- subq a1,t0,a1
- mskql t2,a0,t2 /* zero bytes (a0%8)..7 in word */
- stq_u t2,0(a0)
-
- addq a0,t0,a0 /* round a0 up to next word */
- bic a1,63,t3 /* recalc t3 (# bytes to do 64 bytes at a time)*/
- bzero_nostart1:
-
- /* Loop, zeroing 64 bytes at a time */
- beq t3,bzero_lp_done
- bzero_lp:
- stq zero,0(a0)
- stq zero,8(a0)
- stq zero,16(a0)
- stq zero,24(a0)
- subq t3,64,t3
- stq zero,32(a0)
- stq zero,40(a0)
- stq zero,48(a0)
- stq zero,56(a0)
- addq a0,64,a0
- bne t3,bzero_lp
-
- bzero_lp_done:
- /* Handle the last 0..7 words */
- /* We mask off the low bits, so we don't need an extra compare
- instruction for the loop (just a bne. heh-heh) */
- and a1,0x38,t4
- beq t4,bzero_finish_lp_done
- bzero_finish_lp:
- stq zero,0(a0)
- subq t4,8,t4
- addq a0,8,a0
- bne t4,bzero_finish_lp
-
- /* Do the last partial word */
- bzero_finish_lp_done:
- and a1,7,t5 /* 0..7 bytes left */
- beq t5,bzero_done /* mskqh won't change t0 if t5==0, but I
- don't want to touch, say, a new VM page */
- ldq t0,0(a0)
- mskqh t0,t5,t0
- stq t0,0(a0)
- bzero_done:
- mov zero, v0 /* return 0. */
- RET
-
- END(bzero)
- #endif
NESTED(copyin, 3, 16, ra, 0, 0)
SETGP(pv)
--- 1062,1067 ----
--
Trevor Blackwell tlb@eecs.harvard.edu (617) 495-8912