pkgsrc-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: pkg/48680
The following reply was made to PR pkg/48680; it has been noted by GNATS.
From: Onno van der Linden <o.vd.linden%quicknet.nl@localhost>
To: gnats-bugs%netbsd.org@localhost
Cc:
Subject: Re: pkg/48680
Date: Mon, 16 Jun 2014 19:35:12 +0200
Two problems in the current ffmpeg and ffmpeg2 packages
for NetBSD/i386:
1) compile error in ac3dsp_init.c
2) alignment issues with gcc 4.8
Fixed (1) by the diff from
http://svnweb.freebsd.org/ports/head/multimedia/ffmpeg2/files/patch-libavcodec-x86-ac3dsp_init.c?view=markup&pathrev=329611
(1) can also be fixed by changing libavutil/x86/asm.h as done in
http://svnweb.freebsd.org/ports/head/multimedia/ffmpeg/files/patch-libavutil-x86-asm.h?revision=332397&view=markup
with __clang__ replace by ARCH_X86_32
As noted in http://ffmpeg.org/pipermail/ffmpeg-cvslog/2012-December/058557.html
a better fix would be to change the inline asm to yasm. An attempt
is available from https://patches.libav.org/patch/30666/ but that would
change the package too much.
Fixed (2) by changing the ffmpeg and ffmpeg2 Makefile slightly
for i386 to give gcc 4.[01].* no-asm and the rest of gcc 4
the alignment options.
Onno
--- libavcodec/x86/ac3dsp_init.c.orig 2013-08-11 01:23:26.000000000 +0200
+++ libavcodec/x86/ac3dsp_init.c 2013-10-06 17:43:36.000000000 +0200
@@ -70,11 +70,11 @@ void ff_ac3_extract_exponents_ssse3(uint
"shufps $0, %%xmm6, %%xmm6 \n" \
"shufps $0, %%xmm7, %%xmm7 \n" \
"1: \n" \
- "movaps (%0, %2), %%xmm0 \n" \
- "movaps (%0, %3), %%xmm1 \n" \
- "movaps (%0, %4), %%xmm2 \n" \
- "movaps (%0, %5), %%xmm3 \n" \
- "movaps (%0, %6), %%xmm4 \n" \
+ "movups (%0, %2), %%xmm0 \n" \
+ "movups (%0, %3), %%xmm1 \n" \
+ "movups (%0, %4), %%xmm2 \n" \
+ "movups (%0, %5), %%xmm3 \n" \
+ "movups (%0, %6), %%xmm4 \n" \
"mulps %%xmm5, %%xmm0 \n" \
"mulps %%xmm6, %%xmm1 \n" \
"mulps %%xmm5, %%xmm2 \n" \
@@ -85,8 +85,8 @@ void ff_ac3_extract_exponents_ssse3(uint
"addps %%xmm3, %%xmm0 \n" \
"addps %%xmm4, %%xmm2 \n" \
mono("addps %%xmm2, %%xmm0 \n") \
- "movaps %%xmm0, (%0, %2) \n" \
- stereo("movaps %%xmm2, (%0, %3) \n") \
+ "movups %%xmm0, (%0, %2) \n" \
+ stereo("movups %%xmm2, (%0, %3) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i) \
@@ -106,24 +106,26 @@ void ff_ac3_extract_exponents_ssse3(uint
"mov %5, %2 \n" \
"1: \n" \
"mov -%c7(%6, %2, %c8), %3 \n" \
- "movaps (%3, %0), %%xmm0 \n" \
+ "movups (%3, %0), %%xmm0 \n" \
stereo("movaps %%xmm0, %%xmm1 \n") \
"mulps %%xmm4, %%xmm0 \n" \
stereo("mulps %%xmm5, %%xmm1 \n") \
"2: \n" \
"mov (%6, %2, %c8), %1 \n" \
- "movaps (%1, %0), %%xmm2 \n" \
+ "movups (%1, %0), %%xmm2 \n" \
stereo("movaps %%xmm2, %%xmm3 \n") \
- "mulps (%4, %2, 8), %%xmm2 \n" \
- stereo("mulps 16(%4, %2, 8), %%xmm3 \n") \
+ "movups (%4, %2, 8), %%xmm4 \n" \
+ "mulps %%xmm4, %%xmm2 \n" \
+ stereo("movups 16(%4, %2, 8), %%xmm4 \n") \
+ stereo("mulps %%xmm4, %%xmm3 \n") \
"addps %%xmm2, %%xmm0 \n" \
stereo("addps %%xmm3, %%xmm1 \n") \
"add $4, %2 \n" \
"jl 2b \n" \
"mov %5, %2 \n" \
stereo("mov (%6, %2, %c8), %1 \n") \
- "movaps %%xmm0, (%3, %0) \n" \
- stereo("movaps %%xmm1, (%1, %0) \n") \
+ "movups %%xmm0, (%3, %0) \n" \
+ stereo("movups %%xmm1, (%1, %0) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m) \
@@ -152,7 +154,7 @@ static void ac3_downmix_sse(float **samp
matrix_cmp[3][0] == matrix_cmp[4][0]) {
MIX5(IF1, IF0);
} else {
- DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
+ float matrix_simd[AC3_MAX_CHANNELS][2][4];
float *samp[AC3_MAX_CHANNELS];
for (j = 0; j < in_ch; j++)
@@ -166,8 +168,8 @@ static void ac3_downmix_sse(float **samp
"movss 4(%2, %0), %%xmm5 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"shufps $0, %%xmm5, %%xmm5 \n"
- "movaps %%xmm4, (%1, %0, 4) \n"
- "movaps %%xmm5, 16(%1, %0, 4) \n"
+ "movups %%xmm4, (%1, %0, 4) \n"
+ "movups %%xmm5, 16(%1, %0, 4) \n"
"jg 1b \n"
: "+&r"(j)
: "r"(matrix_simd), "r"(matrix)
--- ffmpeg/Makefile.orig 2014-06-16 19:10:35.000000000 +0200
+++ ffmpeg/Makefile 2014-06-16 19:05:39.000000000 +0200
@@ -41,15 +41,14 @@
.include "../../mk/compiler.mk"
# disable asm on i386 for non-gcc and gcc < 4.2
-# no alignment options needed for gcc >= 4.6 on i386
.if ${MACHINE_ARCH} == "i386"
. if !empty(MACHINE_PLATFORM:MDarwin-*-i386) \
|| !empty(MACHINE_PLATFORM:MSunOS-*-i386) \
|| !empty(CC_VERSION:Mgcc-[123]*) \
- || !empty(CC_VERSION:Mgcc-4.[01]*) \
+ || !empty(CC_VERSION:Mgcc-4.[01].*) \
|| empty(CC_VERSION:Mgcc*)
CONFIGURE_ARGS+= --disable-asm
-. elif !empty(CC_VERSION:Mgcc-4.[23456]*)
+. elif !empty(CC_VERSION:Mgcc-4.*)
CFLAGS+= -mstackrealign -mpreferred-stack-boundary=4
. endif
.endif
#### End of ffmpeg fixes, start of ffmpeg2 fixes
--- ffmpeg2/Makefile.orig 2014-06-16 19:10:45.000000000 +0200
+++ ffmpeg2/Makefile 2014-06-16 19:06:17.000000000 +0200
@@ -37,15 +37,14 @@
.include "../../mk/compiler.mk"
# disable asm on i386 for non-gcc and gcc < 4.2
-# no alignment options needed for gcc >= 4.6 on i386
.if ${MACHINE_ARCH} == "i386"
. if !empty(MACHINE_PLATFORM:MDarwin-*-i386) \
|| !empty(MACHINE_PLATFORM:MSunOS-*-i386) \
|| !empty(CC_VERSION:Mgcc-[123]*) \
- || !empty(CC_VERSION:Mgcc-4.[01]*) \
+ || !empty(CC_VERSION:Mgcc-4.[01].*) \
|| empty(CC_VERSION:Mgcc*)
CONFIGURE_ARGS+= --disable-asm
-. elif !empty(CC_VERSION:Mgcc-4.[23456]*)
+. elif !empty(CC_VERSION:Mgcc-4.*)
CFLAGS+= -mstackrealign -mpreferred-stack-boundary=4
. endif
.endif
--- libavcodec/x86/ac3dsp_init.c.orig 2014-06-02 23:18:54.000000000 +0200
+++ libavcodec/x86/ac3dsp_init.c 2014-06-14 21:27:55.000000000 +0200
@@ -84,11 +84,11 @@
"shufps $0, %%xmm6, %%xmm6 \n" \
"shufps $0, %%xmm7, %%xmm7 \n" \
"1: \n" \
- "movaps (%0, %2), %%xmm0 \n" \
- "movaps (%0, %3), %%xmm1 \n" \
- "movaps (%0, %4), %%xmm2 \n" \
- "movaps (%0, %5), %%xmm3 \n" \
- "movaps (%0, %6), %%xmm4 \n" \
+ "movups (%0, %2), %%xmm0 \n" \
+ "movups (%0, %3), %%xmm1 \n" \
+ "movups (%0, %4), %%xmm2 \n" \
+ "movups (%0, %5), %%xmm3 \n" \
+ "movups (%0, %6), %%xmm4 \n" \
"mulps %%xmm5, %%xmm0 \n" \
"mulps %%xmm6, %%xmm1 \n" \
"mulps %%xmm5, %%xmm2 \n" \
@@ -99,8 +99,8 @@
"addps %%xmm3, %%xmm0 \n" \
"addps %%xmm4, %%xmm2 \n" \
mono("addps %%xmm2, %%xmm0 \n") \
- "movaps %%xmm0, (%0, %2) \n" \
- stereo("movaps %%xmm2, (%0, %3) \n") \
+ "movups %%xmm0, (%0, %2) \n" \
+ stereo("movups %%xmm2, (%0, %3) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i) \
@@ -120,24 +120,26 @@
"mov %5, %2 \n" \
"1: \n" \
"mov -%c7(%6, %2, %c8), %3 \n" \
- "movaps (%3, %0), %%xmm0 \n" \
+ "movups (%3, %0), %%xmm0 \n" \
stereo("movaps %%xmm0, %%xmm1 \n") \
"mulps %%xmm4, %%xmm0 \n" \
stereo("mulps %%xmm5, %%xmm1 \n") \
"2: \n" \
"mov (%6, %2, %c8), %1 \n" \
- "movaps (%1, %0), %%xmm2 \n" \
+ "movups (%1, %0), %%xmm2 \n" \
stereo("movaps %%xmm2, %%xmm3 \n") \
- "mulps (%4, %2, 8), %%xmm2 \n" \
- stereo("mulps 16(%4, %2, 8), %%xmm3 \n") \
+ "movups (%4, %2, 8), %%xmm4 \n" \
+ "mulps %%xmm4, %%xmm2 \n" \
+ stereo("movups 16(%4, %2, 8), %%xmm4 \n") \
+ stereo("mulps %%xmm4, %%xmm3 \n") \
"addps %%xmm2, %%xmm0 \n" \
stereo("addps %%xmm3, %%xmm1 \n") \
"add $4, %2 \n" \
"jl 2b \n" \
"mov %5, %2 \n" \
stereo("mov (%6, %2, %c8), %1 \n") \
- "movaps %%xmm0, (%3, %0) \n" \
- stereo("movaps %%xmm1, (%1, %0) \n") \
+ "movups %%xmm0, (%3, %0) \n" \
+ stereo("movups %%xmm1, (%1, %0) \n") \
"add $16, %0 \n" \
"jl 1b \n" \
: "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m) \
@@ -166,7 +168,7 @@
matrix_cmp[3][0] == matrix_cmp[4][0]) {
MIX5(IF1, IF0);
} else {
- DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
+ float matrix_simd[AC3_MAX_CHANNELS][2][4];
float *samp[AC3_MAX_CHANNELS];
for (j = 0; j < in_ch; j++)
@@ -180,8 +182,8 @@
"movss 4(%2, %0), %%xmm5 \n"
"shufps $0, %%xmm4, %%xmm4 \n"
"shufps $0, %%xmm5, %%xmm5 \n"
- "movaps %%xmm4, (%1, %0, 4) \n"
- "movaps %%xmm5, 16(%1, %0, 4) \n"
+ "movups %%xmm4, (%1, %0, 4) \n"
+ "movups %%xmm5, 16(%1, %0, 4) \n"
"jg 1b \n"
: "+&r"(j)
: "r"(matrix_simd), "r"(matrix)
Home |
Main Index |
Thread Index |
Old Index