Source-Changes-D archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: CVS commit: src/common/lib/libc/arch/x86_64/string
Sorry for answering out of thread, but the message is long gone.
I just remembered very old x86 code of mine.
More than fifteen years ago it was still faster to go for 32-bit
wide testing when more than 20 bytes had to be compared, and to
redo a short byte loop to work around the fact that the carry flag
stats the wrong byteorder for the matching slot.
Maybe of interest.
|Andrew Doran <ad%netbsd.org@localhost> writes:
|> Hi,
|>
|> Change backed out. Sorry about the disruption.
/// MEMCMP - sir (*)(const void *_ba, const void *_bb, uir _bytes)
#undef FUN
#undef FUN_STR
#define FUN __XXXXXX_mem_Compare
#define FUN_STR "sir XXXXXXX::Mem::Utils::Compare(const void*,const void*,uir)"
ASSERT_FUNVARS_STR()
NYD_FUNVARS_STR()
.global G(FUN)
.type G(FUN), @function
G(FUN):
pushl %edi
pushl %esi
.if __ALL
GET_GOT()
NYDIN()
.endif
.if SF_DEBUG
movl PICSO(12)(%esp), %eax // _ba
testl %eax, %eax
jnz 1f
ASSERT_CRASH("_ba != NIL")
1:
movl PICSO(16)(%esp), %eax // _bb
testl %eax, %eax
jnz 2f
ASSERT_CRASH("_bb != NIL")
2:
.endif
// load args (_ba, _bb, _bytes)
movl __PICSO(12)(%esp), %esi
movl __PICSO(16)(%esp), %edi
movl __PICSO(20)(%esp), %ecx
cld // forward cried the man from the rear
xorl %edx, %edx // default return
cmpl $20, %ecx // byte loop?
jle 7f
1: // align at least one on ui4 boundary; use a bytewise loop for that
testl $3, %esi
jz 2f
cmpsb
jne 8f // query result (CF)
decl %ecx
jmp 1b
2: // perform a uir loop; does not help us much due to the little endian
// byte order, but gives us at least an equality indication..
// (and is much faster than the byteloop ...)
movl %ecx, %eax // save bytecount
shrl $2, %ecx // >> Register::shift
repz cmpsl
jne 3f
movb %al, %cl // restore rem. bytecount
andl $3, %ecx // max. two bits remain (<= 3)
jz 9f
jmp 7f // to the byte loop please
3: // we have found an unequal slot, but CF aka the result is based on
// the "wrong" byte order. this is not easy to solve, thus simply
// adjust the pointers and the count and restart the byte loop.
// doing so is easier than the other thinkable approaches?
movl $4, %eax // avoid immediate ops..
subl %eax, %esi
subl %eax, %edi
addl %eax, %ecx
7: // byte loop
repz cmpsb
je 9f
8: // have result, calculate it accordingly (edx is still 0)
sbbl %edx, %edx // a -= b+CF --> 0 || -1 (borrow sub)
orb $1, %dl // 1 or still -1
9: // and finalize
movl %edx, %eax // overtake result into eax
.if __ALL
NYDOUT()
UNGET_GOT()
.endif
popl %esi
popl %edi
ret
.size G(FUN), .-G(FUN)
.align 16
// /__XXXXXX_mem_Compare
--steffen
|
|Der Kragenbaer, The moon bear,
|der holt sich munter he cheerfully and one by one
|einen nach dem anderen runter wa.ks himself off
|(By Robert Gernhardt)
Home |
Main Index |
Thread Index |
Old Index