Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/dev/pci virtio(4): Membar and bus_dmamap_sync audit.
details: https://anonhg.NetBSD.org/src/rev/26a86ab03943
branches: trunk
changeset: 368894:26a86ab03943
user: riastradh <riastradh%NetBSD.org@localhost>
date: Fri Aug 12 10:49:57 2022 +0000
description:
virtio(4): Membar and bus_dmamap_sync audit.
- Don't use membar_* for DMA.
- Sync only the header and payload of the rings separately as needed.
If we bounce, this avoids large memcpy when we only care about the
header.
- Sync uring with PREREAD before triggering anything that will return
data in it.
=> Move uring PREREAD in virtio_enqueue_commit to _before_ updating
vq->vq_avail->idx, which is the pointat which the DMA read is
triggered in the `device' (host).
=> Omit needless membar_consumer in virtio_enqueue_commit -- may not
work with DMA memory, and even if it does, redundant with
bus_dmamap_sync uring PREREAD here.
=> XXX Does the device/host ever return unsolicited entries in the
queue, or only solicited ones? If only solicited ones, the
PREREAD in virtio_init_vq is redundant.
- Sync uring with POSTREAD before we read from it. This way the DMA
read into our buffer has finished before we read from the buffer.
=> Add missing uring POSTREAD in virtio_vq_is_enqueued, between read of
vq->vq_used_idx and return to caller, so that the caller can
safely use virtio_dequeue.
=> Add missing uring POSTREADs in virtio_start_vq_intr:
. between entry from caller and the read of vq->vq_used_idx
. between the read of vq->vq_used_idx and return to caller,
so that the caller can safely use virtio_dequeue, just like
virtio_vq_is_enqueued
=> Move uring POSTREADs in virtio_enqueue_commit to _before_ reading
vq->vq_used->flags or *vq->vq_avail_event, not after.
- After we write to aring, sync it with PREWRITE. This way we finish
writing to our buffer before the DMA write from it.
=> Omit needless PREWRITE in virtio_init_vq -- we do the appropriate
PREWRITE in virtio_enqueue_commit now.
=> Convert membar_producer to bus_dmamap_sync PREWRITE in
virtio_enqueue_commit.
=> Omit incorrect aring POSTWRITE in virtio_enqueue_commit -- no need
because the DMA write may not have completed yet at this point,
and we already do a POSTWRITE in virtio_vq_is_enqueued.
=> Omit needless membar_producer in virtio_postpone_intr -- may not
work with DMA memory, and even if it does, redundant with
bus_dmamap_sync PREWRITE here.
- After xfers to aring have completed, sync it with POSTWRITE.
=> Add missing aring POSTWRITE in virtio_free_vq, in case there are
paths from virtio_enqueue_commit to here that don't go through
virtio_is_enqueued. (If there are no such paths, then maybe we
should KASSERT(vq->vq_queued == 0) in virtio_free_vq.)
diffstat:
sys/dev/pci/virtio.c | 168 ++++++++++++++++++++++++++++++++++++++------------
1 files changed, 126 insertions(+), 42 deletions(-)
diffs (truncated from 302 to 300 lines):
diff -r b8ad6f3a032a -r 26a86ab03943 sys/dev/pci/virtio.c
--- a/sys/dev/pci/virtio.c Fri Aug 12 10:49:47 2022 +0000
+++ b/sys/dev/pci/virtio.c Fri Aug 12 10:49:57 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: virtio.c,v 1.56 2022/08/09 12:42:05 riastradh Exp $ */
+/* $NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $ */
/*
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.56 2022/08/09 12:42:05 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -422,47 +422,111 @@
static inline void
vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
+
/* availoffset == sizeof(vring_desc)*vq_num */
bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
- ops);
+ ops);
+}
+
+static inline void
+vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_avail, ring);
+ size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+ size_t usedlen = 0;
+
+ if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
+ usedlen = sizeof(uint16_t);
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
+}
+
+static inline void
+vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_avail, ring);
+
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_availoffset, hdrlen, ops);
+}
+
+static inline void
+vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_avail, ring);
+ size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_availoffset + hdrlen, payloadlen, ops);
}
static inline void
-vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
uint16_t hdrlen = offsetof(struct vring_avail, ring);
- if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
- hdrlen += sizeof(uint16_t);
+ size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+ size_t usedlen = sizeof(uint16_t);
+ if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
+ return;
bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
- vq->vq_availoffset,
- hdrlen + sc->sc_nvqs * sizeof(uint16_t),
- ops);
+ vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
+}
+
+static inline void
+vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_used, ring);
+ size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
+ size_t availlen = 0;
+
+ if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
+ availlen = sizeof(uint16_t);
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
}
static inline void
-vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_used, ring);
+
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_usedoffset, hdrlen, ops);
+}
+
+static inline void
+vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
uint16_t hdrlen = offsetof(struct vring_used, ring);
- if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
- hdrlen += sizeof(uint16_t);
+ size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
- vq->vq_usedoffset,
- hdrlen + sc->sc_nvqs * sizeof(struct vring_used_elem),
- ops);
+ vq->vq_usedoffset + hdrlen, payloadlen, ops);
+}
+
+static inline void
+vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+ uint16_t hdrlen = offsetof(struct vring_used, ring);
+ size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
+ size_t availlen = sizeof(uint16_t);
+
+ if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
+ return;
+ bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+ vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
}
static inline void
vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
- int ops)
+ int ops)
{
- int offset = vq->vq_indirectoffset
- + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
+ int offset = vq->vq_indirectoffset +
+ sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
- offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
- ops);
+ offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
}
bool
@@ -471,12 +535,14 @@
if (vq->vq_queued) {
vq->vq_queued = 0;
- vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
+ vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
}
- vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
- membar_consumer();
- return (vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx)) ? 1 : 0;
+ vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
+ if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
+ return 0;
+ vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
+ return 1;
}
/*
@@ -530,9 +596,7 @@
/* set the new event index: avail_ring->used_event = idx */
*vq->vq_used_event = virtio_rw16(sc, idx);
- membar_producer();
-
- vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
+ vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
vq->vq_queued++;
nused = (uint16_t)
@@ -578,6 +642,7 @@
void
virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{
+
if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
/*
* No way to disable the interrupt completely with
@@ -586,16 +651,19 @@
* the past to not trigger a spurios interrupt.
*/
*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
+ vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
} else {
- vq->vq_avail->flags |= virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+ vq->vq_avail->flags |=
+ virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+ vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
}
- vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
vq->vq_queued++;
}
int
virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{
+
if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
/*
* If event index feature is negotiated, enabling interrupts
@@ -603,13 +671,19 @@
* used_event field
*/
*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
+ vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
} else {
- vq->vq_avail->flags &= ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+ vq->vq_avail->flags &=
+ ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+ vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
}
- vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
vq->vq_queued++;
- return vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx);
+ vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
+ if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
+ return 0;
+ vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
+ return 1;
}
/*
@@ -655,8 +729,7 @@
mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
}
- vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
- vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
+ vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
vq->vq_queued++;
}
@@ -812,6 +885,8 @@
/* tell device that there's no virtqueue any longer */
sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
+ vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
+
kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
@@ -1050,34 +1125,43 @@
vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
mutex_enter(&vq->vq_aring_lock);
vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
- virtio_rw16(sc, slot);
+ virtio_rw16(sc, slot);
notify:
if (notifynow) {
uint16_t o, n, t;
uint16_t flags;
+
o = virtio_rw16(sc, vq->vq_avail->idx);
n = vq->vq_avail_idx;
- /* publish avail idx */
- membar_producer();
+ /*
+ * Prepare for `device->CPU' (host->guest) transfer
+ * into the buffer. This must happen before we commit
+ * the vq->vq_avail->idx update to ensure we're not
+ * still using the buffer in case program-prior loads
+ * or stores in it get delayed past the store to
+ * vq->vq_avail->idx.
+ */
+ vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
+
+ /* ensure payload is published, then avail idx */
+ vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
- vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+ vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
vq->vq_queued++;
- membar_consumer();
- vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
+ vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
if ((uint16_t) (n - t) < (uint16_t) (n - o))
sc->sc_ops->kick(sc, vq->vq_index);
} else {
+ vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
flags = virtio_rw16(sc, vq->vq_used->flags);
if (!(flags & VRING_USED_F_NO_NOTIFY))
sc->sc_ops->kick(sc, vq->vq_index);
}
- vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
- vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
}
Home |
Main Index |
Thread Index |
Old Index