Source-Changes-D archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: audio2
At Wed, 8 May 2019 18:14:28 +0000,
maya%netbsd.org@localhost wrote:
> Is there some scenario where this made a difference?
> it sounds like an optimization that a compiler should already do
>
> 5234:#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
> 5235- *d++ += ((aint2_t)*s++) * track->volume >> 8;
> 5236-#else
> 5237- *d++ += ((aint2_t)*s++) * track->volume / 256;
> 5238-#endif
It's signed integer so these two output different code.
Here is the fragment of amd64 asm of "(x * vol >> 8)"
96a5: 0f af d1 imul %ecx,%edx
96a8: c1 fa 08 sar $0x8,%edx
96ab: 66 89 14 07 mov %dx,(%rdi,%rax,1)
Here is "(x * vol / 256)"
96a5: 0f af c1 imul %ecx,%eax
96a8: 85 c0 test %eax,%eax
96aa: 79 05 jns 96b1 <audio_track_chvol+0x82>
96ac: 05 ff 00 00 00 add $0xff,%eax
96b1: c1 f8 08 sar $0x8,%eax
96b4: 66 89 04 17 mov %ax,(%rdi,%rdx,1)
I evaluated these (a year ago). The former is 1.9 times faster on
my old amd64 than the later, 1.3 times faster on m68k, at that point.
# Although, above code is from current but evaluation is a year ago.
Let's consider about (-1 / 2). The mathematical answer is -0.5 so
it's 0 for integers. And (-1 >> 1) is -1. These two answers are
different but this value is audio wave that human hear. I took
performance.
However, I understand that my code looks strange.
How about this?
Index: sys/dev/audio/audio.c
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audio.c,v
retrieving revision 1.8
diff -u -r1.8 audio.c
--- sys/dev/audio/audio.c 21 May 2019 12:52:57 -0000 1.8
+++ sys/dev/audio/audio.c 23 May 2019 03:31:36 -0000
@@ -465,6 +465,29 @@
#define SPECIFIED(x) ((x) != ~0)
#define SPECIFIED_CH(x) ((x) != (u_char)~0)
+/*
+ * AUDIO_ASR() does Arithmetic Shift Right operation.
+ * This macro should be used for audio wave data only.
+ *
+ * Division by power of two is replaced with shift operation in the most
+ * compiler, but even then rounding-to-zero occurs on negative value.
+ * What we handle here is the audio wave data that human hear, so we can
+ * ignore the rounding difference. Therefore we want to use faster
+ * arithmetic shift right operation. But the right shift operator ('>>')
+ * for negative integer is "implementation defined" behavior in C (note
+ * that it's not "undefined" behavior). So if implementation defines '>>'
+ * as ASR, we use it.
+ *
+ * Using ASR is 1.9 times faster than division on my amd64, and 1.3 times
+ * faster on my m68k. -- isaki 201801.
+ */
+#if defined(__GNUC__)
+/* gcc defines '>>' as ASR. */
+#define AUDIO_ASR(value, shift) ((value) >> (shift))
+#else
+#define AUDIO_ASR(value, shift) ((value) / (1 << (shift)))
+#endif
+
/* Device timeout in msec */
#define AUDIO_TIMEOUT (3000)
@@ -3304,11 +3327,7 @@
for (ch = 0; ch < channels; ch++) {
aint2_t val;
val = *s++;
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
- val = val * ch_volume[ch] >> 8;
-#else
- val = val * ch_volume[ch] / 256;
-#endif
+ val = AUDIO_ASR(val * ch_volume[ch], 8);
*d++ = (aint_t)val;
}
}
@@ -3330,11 +3349,7 @@
d = arg->dst;
for (i = 0; i < arg->count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
- *d++ = (s[0] >> 1) + (s[1] >> 1);
-#else
- *d++ = (s[0] / 2) + (s[1] / 2);
-#endif
+ *d++ = AUDIO_ASR(s[0], 1) + AUDIO_ASR(s[1], 1);
s += arg->srcfmt->channels;
}
}
@@ -5131,11 +5146,7 @@
if (vol != 256) {
m = mixer->mixsample;
for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
- *m = *m * vol >> 8;
-#else
- *m = *m * vol / 256;
-#endif
+ *m = AUDIO_ASR(*m * vol, 8);
m++;
}
}
@@ -5223,11 +5234,9 @@
#if defined(AUDIO_SUPPORT_TRACK_VOLUME)
if (track->volume != 256) {
for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
- *d++ = ((aint2_t)*s++) * track->volume >> 8;
-#else
- *d++ = ((aint2_t)*s++) * track->volume / 256;
-#endif
+ aint2_t v;
+ v = *s++;
+ *d++ = AUDIO_ASR(v * track->volume, 8)
}
} else
#endif
@@ -5241,11 +5250,9 @@
#if defined(AUDIO_SUPPORT_TRACK_VOLUME)
if (track->volume != 256) {
for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
- *d++ += ((aint2_t)*s++) * track->volume >> 8;
-#else
- *d++ += ((aint2_t)*s++) * track->volume / 256;
-#endif
+ aint2_t v;
+ v = *s++;
+ *d++ += AUDIO_ASR(v * track->volume, 8);
}
} else
#endif
Index: sys/dev/audio/audiodef.h
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audiodef.h,v
retrieving revision 1.2
diff -u -r1.2 audiodef.h
--- sys/dev/audio/audiodef.h 8 May 2019 13:40:17 -0000 1.2
+++ sys/dev/audio/audiodef.h 23 May 2019 03:31:36 -0000
@@ -63,12 +63,6 @@
*/
/* #define AUDIO_SUPPORT_TRACK_VOLUME */
-/*
- * Whether use C language's "implementation defined" behavior (note that
- * it's not "undefined" behavior). It improves performance well.
- */
-#define AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR
-
/* conversion stage */
typedef struct {
audio_ring_t srcbuf;
---
Tetsuya Isaki <isaki%pastel-flower.jp@localhost / isaki%NetBSD.org@localhost>
Home |
Main Index |
Thread Index |
Old Index