Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci virtio(4): Membar and bus_dmamap_sync audit.



details:   https://anonhg.NetBSD.org/src/rev/26a86ab03943
branches:  trunk
changeset: 368894:26a86ab03943
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Fri Aug 12 10:49:57 2022 +0000

description:
virtio(4): Membar and bus_dmamap_sync audit.

- Don't use membar_* for DMA.

- Sync only the header and payload of the rings separately as needed.
  If we bounce, this avoids large memcpy when we only care about the
  header.

- Sync uring with PREREAD before triggering anything that will return
  data in it.

  => Move uring PREREAD in virtio_enqueue_commit to _before_ updating
     vq->vq_avail->idx, which is the pointat which the DMA read is
     triggered in the `device' (host).

  => Omit needless membar_consumer in virtio_enqueue_commit -- may not
     work with DMA memory, and even if it does, redundant with
     bus_dmamap_sync uring PREREAD here.

  => XXX Does the device/host ever return unsolicited entries in the
     queue, or only solicited ones?  If only solicited ones, the
     PREREAD in virtio_init_vq is redundant.

- Sync uring with POSTREAD before we read from it.  This way the DMA
  read into our buffer has finished before we read from the buffer.

  => Add missing uring POSTREAD in virtio_vq_is_enqueued, between read of
     vq->vq_used_idx and return to caller, so that the caller can
     safely use virtio_dequeue.

  => Add missing uring POSTREADs in virtio_start_vq_intr:
     . between entry from caller and the read of vq->vq_used_idx
     . between the read of vq->vq_used_idx and return to caller,
       so that the caller can safely use virtio_dequeue, just like
       virtio_vq_is_enqueued

  => Move uring POSTREADs in virtio_enqueue_commit to _before_ reading
     vq->vq_used->flags or *vq->vq_avail_event, not after.

- After we write to aring, sync it with PREWRITE.  This way we finish
  writing to our buffer before the DMA write from it.

  => Omit needless PREWRITE in virtio_init_vq -- we do the appropriate
     PREWRITE in virtio_enqueue_commit now.

  => Convert membar_producer to bus_dmamap_sync PREWRITE in
     virtio_enqueue_commit.

  => Omit incorrect aring POSTWRITE in virtio_enqueue_commit -- no need
     because the DMA write may not have completed yet at this point,
     and we already do a POSTWRITE in virtio_vq_is_enqueued.

  => Omit needless membar_producer in virtio_postpone_intr -- may not
     work with DMA memory, and even if it does, redundant with
     bus_dmamap_sync PREWRITE here.

- After xfers to aring have completed, sync it with POSTWRITE.

  => Add missing aring POSTWRITE in virtio_free_vq, in case there are
     paths from virtio_enqueue_commit to here that don't go through
     virtio_is_enqueued.  (If there are no such paths, then maybe we
     should KASSERT(vq->vq_queued == 0) in virtio_free_vq.)

diffstat:

 sys/dev/pci/virtio.c |  168 ++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 126 insertions(+), 42 deletions(-)

diffs (truncated from 302 to 300 lines):

diff -r b8ad6f3a032a -r 26a86ab03943 sys/dev/pci/virtio.c
--- a/sys/dev/pci/virtio.c      Fri Aug 12 10:49:47 2022 +0000
+++ b/sys/dev/pci/virtio.c      Fri Aug 12 10:49:57 2022 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: virtio.c,v 1.56 2022/08/09 12:42:05 riastradh Exp $    */
+/*     $NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $    */
 
 /*
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.56 2022/08/09 12:42:05 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.57 2022/08/12 10:49:57 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -422,47 +422,111 @@
 static inline void
 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
 {
+
        /* availoffset == sizeof(vring_desc)*vq_num */
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
-                       ops);
+           ops);
+}
+
+static inline void
+vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_avail, ring);
+       size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+       size_t usedlen = 0;
+
+       if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
+               usedlen = sizeof(uint16_t);
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
+}
+
+static inline void
+vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_avail, ring);
+
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_availoffset, hdrlen, ops);
+}
+
+static inline void
+vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_avail, ring);
+       size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_availoffset + hdrlen, payloadlen, ops);
 }
 
 static inline void
-vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
 {
        uint16_t hdrlen = offsetof(struct vring_avail, ring);
-       if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
-               hdrlen += sizeof(uint16_t);
+       size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
+       size_t usedlen = sizeof(uint16_t);
 
+       if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
+               return;
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
-                       vq->vq_availoffset,
-                       hdrlen + sc->sc_nvqs * sizeof(uint16_t),
-                       ops);
+           vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
+}
+
+static inline void
+vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_used, ring);
+       size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
+       size_t availlen = 0;
+
+       if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
+               availlen = sizeof(uint16_t);
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
 }
 
 static inline void
-vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_used, ring);
+
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_usedoffset, hdrlen, ops);
+}
+
+static inline void
+vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
 {
        uint16_t hdrlen = offsetof(struct vring_used, ring);
-       if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
-               hdrlen += sizeof(uint16_t);
+       size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
 
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
-                       vq->vq_usedoffset,
-                       hdrlen + sc->sc_nvqs * sizeof(struct vring_used_elem),
-                       ops);
+           vq->vq_usedoffset + hdrlen, payloadlen, ops);
+}
+
+static inline void
+vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
+{
+       uint16_t hdrlen = offsetof(struct vring_used, ring);
+       size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
+       size_t availlen = sizeof(uint16_t);
+
+       if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
+               return;
+       bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
+           vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
 }
 
 static inline void
 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
-                    int ops)
+    int ops)
 {
-       int offset = vq->vq_indirectoffset
-                     + sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
+       int offset = vq->vq_indirectoffset +
+           sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
 
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
-                       offset, sizeof(struct vring_desc) * vq->vq_maxnsegs,
-                       ops);
+           offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
 }
 
 bool
@@ -471,12 +535,14 @@
 
        if (vq->vq_queued) {
                vq->vq_queued = 0;
-               vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
+               vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
        }
-       vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
-       membar_consumer();
 
-       return (vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx)) ? 1 : 0;
+       vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
+       if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
+               return 0;
+       vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
+       return 1;
 }
 
 /*
@@ -530,9 +596,7 @@
 
        /* set the new event index: avail_ring->used_event = idx */
        *vq->vq_used_event = virtio_rw16(sc, idx);
-       membar_producer();
-
-       vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
+       vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
        vq->vq_queued++;
 
        nused = (uint16_t)
@@ -578,6 +642,7 @@
 void
 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
 {
+
        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
                /*
                 * No way to disable the interrupt completely with
@@ -586,16 +651,19 @@
                 * the past to not trigger a spurios interrupt.
                 */
                *vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
+               vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
        } else {
-               vq->vq_avail->flags |= virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+               vq->vq_avail->flags |=
+                   virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+               vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
        }
-       vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
        vq->vq_queued++;
 }
 
 int
 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
 {
+
        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
                /*
                 * If event index feature is negotiated, enabling interrupts
@@ -603,13 +671,19 @@
                 * used_event field
                 */
                *vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
+               vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
        } else {
-               vq->vq_avail->flags &= ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+               vq->vq_avail->flags &=
+                   ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
+               vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
        }
-       vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
        vq->vq_queued++;
 
-       return vq->vq_used_idx != virtio_rw16(sc, vq->vq_used->idx);
+       vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
+       if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
+               return 0;
+       vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
+       return 1;
 }
 
 /*
@@ -655,8 +729,7 @@
                mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
                mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
        }
-       vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
-       vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
+       vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
        vq->vq_queued++;
 }
 
@@ -812,6 +885,8 @@
        /* tell device that there's no virtqueue any longer */
        sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
 
+       vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
+
        kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
        bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
        bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
@@ -1050,34 +1125,43 @@
                vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
        mutex_enter(&vq->vq_aring_lock);
        vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
-               virtio_rw16(sc, slot);
+           virtio_rw16(sc, slot);
 
 notify:
        if (notifynow) {
                uint16_t o, n, t;
                uint16_t flags;
+
                o = virtio_rw16(sc, vq->vq_avail->idx);
                n = vq->vq_avail_idx;
 
-               /* publish avail idx */
-               membar_producer();
+               /*
+                * Prepare for `device->CPU' (host->guest) transfer
+                * into the buffer.  This must happen before we commit
+                * the vq->vq_avail->idx update to ensure we're not
+                * still using the buffer in case program-prior loads
+                * or stores in it get delayed past the store to
+                * vq->vq_avail->idx.
+                */
+               vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
+
+               /* ensure payload is published, then avail idx */
+               vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
                vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
-               vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
+               vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
                vq->vq_queued++;
 
-               membar_consumer();
-               vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
                if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
+                       vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
                        t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
                        if ((uint16_t) (n - t) < (uint16_t) (n - o))
                                sc->sc_ops->kick(sc, vq->vq_index);
                } else {
+                       vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
                        flags = virtio_rw16(sc, vq->vq_used->flags);
                        if (!(flags & VRING_USED_F_NO_NOTIFY))
                                sc->sc_ops->kick(sc, vq->vq_index);
                }
-               vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
-               vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
        }



Home | Main Index | Thread Index | Old Index