Subject: Re: port-i386/21665: bus_dmamap_sync needs a memory barrier
To: None <port-i386@netbsd.org, port-amd64@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: port-i386
Date: 01/08/2004 19:54:43
hi,
i'll commit the following diff if no one objects.
(not tested on amd64.)
YAMAMOTO Takashi
Index: arch/i386/include/cpufunc.h
===================================================================
--- arch/i386/include/cpufunc.h (revision 490)
+++ arch/i386/include/cpufunc.h (revision 492)
@@ -54,6 +54,16 @@ x86_pause(void)
__asm __volatile("pause");
}
+static __inline void
+x86_lfence(void)
+{
+
+ /*
+ * XXX it's better to use real lfence insn if available.
+ */
+ __asm __volatile("lock; addl $0, 0(%%esp)" : : : "memory");
+}
+
#ifdef _KERNEL
extern unsigned int cpu_feature;
Index: arch/amd64/include/cpufunc.h
===================================================================
--- arch/amd64/include/cpufunc.h (revision 490)
+++ arch/amd64/include/cpufunc.h (revision 492)
@@ -54,6 +54,18 @@ x86_pause(void)
/* nothing */
}
+static __inline void
+x86_lfence(void)
+{
+
+ /*
+ * XXX if lfence isn't available...
+ *
+ * memory clobber to avoid compiler reordering.
+ */
+ __asm __volatile("lfence" : : : "memory");
+}
+
#ifdef _KERNEL
extern int cpu_feature;
Index: arch/x86/include/bus.h
===================================================================
--- arch/x86/include/bus.h (revision 490)
+++ arch/x86/include/bus.h (revision 492)
@@ -72,6 +72,7 @@
#define _X86_BUS_H_
#include <machine/pio.h>
+#include <machine/cpufunc.h> /* for x86_lfence */
#ifdef BUS_SPACE_DEBUG
#include <sys/systm.h> /* for printf() prototype */
@@ -1109,6 +1110,9 @@ struct x86_bus_dma_tag {
int, off_t, int, int);
};
+static __inline void bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t,
+ bus_addr_t, bus_size_t, int) __attribute__((__unused__));
+
#define bus_dmamap_create(t, s, n, m, b, f, p) \
(*(t)->_dmamap_create)((t), (s), (n), (m), (b), (f), (p))
#define bus_dmamap_destroy(t, p) \
@@ -1123,9 +1127,15 @@ struct x86_bus_dma_tag {
(*(t)->_dmamap_load_raw)((t), (m), (sg), (n), (s), (f))
#define bus_dmamap_unload(t, p) \
(*(t)->_dmamap_unload)((t), (p))
-#define bus_dmamap_sync(t, p, o, l, ops) \
- (void)((t)->_dmamap_sync ? \
- (*(t)->_dmamap_sync)((t), (p), (o), (l), (ops)) : (void)0)
+static __inline void
+bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t p, bus_addr_t o, bus_size_t l,
+ int ops)
+{
+ if (ops & BUS_DMASYNC_POSTREAD)
+ x86_lfence();
+ if (t->_dmamap_sync)
+ (*t->_dmamap_sync)(t, p, o, l, ops);
+}
#define bus_dmamem_alloc(t, s, a, b, sg, n, r, f) \
(*(t)->_dmamem_alloc)((t), (s), (a), (b), (sg), (n), (r), (f))