[src/trunk]: src/sys/arch/xen/xen remove I/O defragmentation logic, pass requ...

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/sys/arch/xen/xen remove I/O defragmentation logic, pass requ...
From: jdolecek <jdolecek%NetBSD.org@localhost>
Date: Mon, 08 Nov 2021 11:08:48 +0000
details:   https://anonhg.NetBSD.org/src/rev/dbec2394e285
branches:  trunk
changeset: 1009328:dbec2394e285
user:      jdolecek <jdolecek%NetBSD.org@localhost>
date:      Mon Apr 20 16:12:28 2020 +0000

description:
remove I/O defragmentation logic, pass requests straight to the
underlying block device without trying to coalesce them

it seems rarely useful, and it makes the handling logic unnecessarily complex -
ultimately it's the DomU operating system responsibility to issue optimal I/O

might also help with the ZFS problem reported on port-xen, and will surely
simplify eventual indirect segment support

diffstat:

 sys/arch/xen/xen/xbdback_xenbus.c |  518 +++++--------------------------------
 1 files changed, 76 insertions(+), 442 deletions(-)

diffs (truncated from 823 to 300 lines):

diff -r a92a27ad7042 -r dbec2394e285 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Mon Apr 20 14:11:04 2020 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Mon Apr 20 16:12:28 2020 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $      */
+/*      $NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $");
 
 #include <sys/atomic.h>
 #include <sys/buf.h>
@@ -74,9 +74,7 @@
 /* Need to alloc one extra page to account for possible mapping offset */
 #define VBD_VA_SIZE    (MAXPHYS + PAGE_SIZE)
 
-struct xbdback_request;
 struct xbdback_io;
-struct xbdback_fragment;
 struct xbdback_instance;
 
 /*
@@ -98,8 +96,8 @@
  * callback), the return value of a continuation can be set to NULL. In that
  * case, the thread will go back to sleeping and wait for the proper
  * condition before it starts processing requests again from where it left.
- * Continuation state is "stored" in the xbdback instance (xbdi_cont and
- * xbdi_cont_aux), and should only be manipulated by the instance thread.
+ * Continuation state is "stored" in the xbdback instance (xbdi_cont),
+ * and should only be manipulated by the instance thread.
  *
  * As xbdback(4) has to handle different sort of asynchronous events (Xen
  * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
@@ -109,36 +107,23 @@
  * Here's how the call graph is supposed to be for a single I/O:
  *
  * xbdback_co_main()
- *        |
- *        |               --> xbdback_co_cache_doflush() or NULL
+ *        |               --> xbdback_co_cache_flush()
+ *        |               |    |
+ *        |               |    -> xbdback_co_cache_doflush() or NULL
+ *        |               |        |
+ *        |               |        -> xbdback_co_do_io()
+ * xbdback_co_main_loop()-|
+ *        |               |-> xbdback_co_main_done2() or NULL
  *        |               |
- *        |               - xbdback_co_cache_flush2() <- xbdback_co_do_io() <-
- *        |                                            |                     |
- *        |               |-> xbdback_co_cache_flush() -> xbdback_co_map_io()-
- * xbdback_co_main_loop()-|
- *        |               |-> xbdback_co_main_done() ---> xbdback_co_map_io()-
- *        |                                           |                      |
- *        |               -- xbdback_co_main_done2() <-- xbdback_co_do_io() <-
- *        |               |
- *        |               --> xbdback_co_main() or NULL
+ *        |               --> xbdback_co_main_incr() -> xbdback_co_main_loop()
  *        |
  *     xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
  *        |
- *     xbdback_co_io_gotreq()--+--> xbdback_co_map_io() ---
- *        |                    |                          |
- *  -> xbdback_co_io_loop()----|  <- xbdback_co_do_io() <--
- *  |     |     |     |
- *  |     |     |     |----------> xbdback_co_io_gotio()
- *  |     |     |                         |
- *  |     |   xbdback_co_main_incr()      |
- *  |     |     |                         |
- *  |     |   xbdback_co_main_loop()      |
- *  |     |                               |
- *  |  xbdback_co_io_gotio2() <-----------|
- *  |     |           |
- *  |     |           |----------> xbdback_co_io_gotfrag()
- *  |     |                               |
- *  -- xbdback_co_io_gotfrag2() <---------|
+ *     xbdback_co_io_gotio() -> xbdback_map_shm()
+ *        |                     |
+ *        |                     xbdback_co_main_incr() -> xbdback_co_main_loop()
+ *        |
+ *     xbdback_co_do_io() 
  *        |
  *     xbdback_co_main_incr() -> xbdback_co_main_loop()
  */
@@ -192,17 +177,12 @@
         * continuation-ness (see above).
         */
        RING_IDX xbdi_req_prod; /* limit on request indices */
-       xbdback_cont_t xbdi_cont, xbdi_cont_aux;
+       xbdback_cont_t xbdi_cont;
        /* _request state: track requests fetched from ring */
        struct xbdback_request *xbdi_req; /* if NULL, ignore following */
        blkif_request_t xbdi_xen_req;
-       int xbdi_segno;
        /* _io state: I/O associated to this instance */
-       struct xbdback_io *xbdi_io; /* if NULL, ignore next field */
-       daddr_t xbdi_next_sector;
-       uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */
-       uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */
-       grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
+       struct xbdback_io *xbdi_io;
        /* other state */
        int xbdi_same_page; /* are we merging two segments on the same page? */
        uint xbdi_pendingreqs; /* number of I/O in fly */
@@ -223,20 +203,6 @@
 static kmutex_t xbdback_lock;
 
 /*
- * For each request from a guest, a xbdback_request is allocated from
- * a pool.  This will describe the request until completion.  The
- * request may require multiple IO operations to perform, so the
- * per-IO information is not stored here.
- */
-struct xbdback_request {
-       struct xbdback_instance *rq_xbdi; /* our xbd instance */
-       uint64_t rq_id;
-       int rq_iocount; /* reference count; or, number of outstanding I/O's */
-       int rq_ioerrs;
-       uint8_t rq_operation;
-};
-
-/*
  * For each I/O operation associated with one of those requests, an
  * xbdback_io is allocated from a pool.  It may correspond to multiple
  * Xen disk requests, or parts of them, if several arrive at once that
@@ -246,56 +212,35 @@
        /* The instance pointer is duplicated for convenience. */
        struct xbdback_instance *xio_xbdi; /* our xbd instance */
        uint8_t xio_operation;
+       uint64_t xio_id;
        union {
                struct {
                        struct buf xio_buf; /* our I/O */
-                       /* xbd requests involved */
-                       SLIST_HEAD(, xbdback_fragment) xio_rq;
                        /* the virtual address to map the request at */
                        vaddr_t xio_vaddr;
                        struct xbdback_va *xio_xv;
+                       vaddr_t xio_start_offset;       /* I/O start offset */
                        /* grants to map */
                        grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
                        /* grants release */
                        grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
                        uint16_t xio_nrma; /* number of guest pages */
-                       uint16_t xio_mapped; /* == 1: grants are mapped */
                } xio_rw;
-               uint64_t xio_flush_id;
        } u;
 };
 #define xio_buf                u.xio_rw.xio_buf
-#define xio_rq         u.xio_rw.xio_rq
 #define xio_vaddr      u.xio_rw.xio_vaddr
+#define xio_start_offset       u.xio_rw.xio_start_offset
 #define xio_xv         u.xio_rw.xio_xv
 #define xio_gref       u.xio_rw.xio_gref
 #define xio_gh         u.xio_rw.xio_gh
 #define xio_nrma       u.xio_rw.xio_nrma
-#define xio_mapped     u.xio_rw.xio_mapped
-
-#define xio_flush_id   u.xio_flush_id
-
-/*
- * Rather than having the xbdback_io keep an array of the
- * xbdback_requests involved, since the actual number will probably be
- * small but might be as large as BLKIF_RING_SIZE, use a list.  This
- * would be threaded through xbdback_request, but one of them might be
- * part of multiple I/O's, alas.
- */
-struct xbdback_fragment {
-       struct xbdback_request *car;
-       SLIST_ENTRY(xbdback_fragment) cdr;
-};
 
 /*
  * Pools to manage the chain of block requests and I/Os fragments
  * submitted by frontend.
  */
-/* XXXSMP */
-static struct xbdback_pool {
-       struct pool_cache pc;
-       struct timeval last_warning;
-} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool;
+static struct pool_cache xbdback_io_pool;
 
 /* Interval between reports of I/O errors from frontend */
 static const struct timeval xbdback_err_intvl = { 1, 0 };
@@ -320,22 +265,14 @@
 static void *xbdback_co_main(struct xbdback_instance *, void *);
 static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
 static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
-static void *xbdback_co_main_done(struct xbdback_instance *, void *);
 static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
 
 static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
-static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
 static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
 
 static void *xbdback_co_io(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
-static void *xbdback_co_io_loop(struct xbdback_instance *, void *);
 static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *);
 
-static void *xbdback_co_map_io(struct xbdback_instance *, void *);
 static void *xbdback_co_do_io(struct xbdback_instance *, void *);
 
 static void xbdback_io_error(struct xbdback_io *, int);
@@ -345,9 +282,9 @@
 static void *xbdback_map_shm(struct xbdback_io *);
 static void xbdback_unmap_shm(struct xbdback_io *);
 
-static void *xbdback_pool_get(struct xbdback_pool *,
+static void *xbdback_pool_get(struct pool_cache *,
                              struct xbdback_instance *);
-static void xbdback_pool_put(struct xbdback_pool *, void *);
+static void xbdback_pool_put(struct pool_cache *, void *);
 static void xbdback_thread(void *);
 static void xbdback_wakeup_thread(struct xbdback_instance *);
 static void xbdback_trampoline(struct xbdback_instance *, void *);
@@ -369,21 +306,12 @@
        SLIST_INIT(&xbdback_instances);
        mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
 
-       pool_cache_bootstrap(&xbdback_request_pool.pc,
-           sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL,
-           IPL_SOFTBIO, NULL, NULL, NULL);
-       pool_cache_bootstrap(&xbdback_io_pool.pc,
+       pool_cache_bootstrap(&xbdback_io_pool,
            sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
            IPL_SOFTBIO, NULL, NULL, NULL);
-       pool_cache_bootstrap(&xbdback_fragment_pool.pc,
-           sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL,
-           IPL_SOFTBIO, NULL, NULL, NULL);
 
        /* we allocate enough to handle a whole ring at once */
-       pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE);
-       pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE);
-       pool_prime(&xbdback_fragment_pool.pc.pc_pool,
-            BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE);
+       pool_prime(&xbdback_io_pool.pc_pool, BLKIF_RING_SIZE);
 
        xenbus_backend_register(&xbd_backend_driver);
 }
@@ -1074,7 +1002,8 @@
                        break;
                }
        } else {
-               xbdi->xbdi_cont = xbdback_co_main_done;
+               KASSERT(xbdi->xbdi_io == NULL);
+               xbdi->xbdi_cont = xbdback_co_main_done2;
        }
        return xbdi;
 }
@@ -1112,25 +1041,6 @@
 }
 
 /*
- * Ring processing is over. If there are any I/O still present for this
- * instance, handle them first.
- */
-static void *
-xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj)
-{
-       (void)obj;
-       if (xbdi->xbdi_io != NULL) {
-               KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
-                   xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
-               xbdi->xbdi_cont = xbdback_co_map_io;
-               xbdi->xbdi_cont_aux = xbdback_co_main_done2;
-       } else {
-               xbdi->xbdi_cont = xbdback_co_main_done2;
-       }
-       return xbdi;
-}
-
-/*
  * Check for requests in the instance's ring. In case there are, start again
  * from the beginning. If not, stall.
  */
@@ -1139,6 +1049,7 @@
 {
        int work_to_do;
 
+       KASSERT(xbdi->xbdio_io == NULL);
        RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
        if (work_to_do)
                xbdi->xbdi_cont = xbdback_co_main;
@@ -1152,29 +1063,8 @@
  * Frontend requested a cache flush operation.
  */
 static void *
-xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
+xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused)
Prev by Date: [src/trunk]: src/sys/arch/xen/xen do not retry when xen_shm_map() fails after...
Next by Date: [src/trunk]: src/sys/kern Add three KASSERTs, to detect refcount bugs.
Previous by Thread: [src/trunk]: src/sys/arch/xen/xen do not retry when xen_shm_map() fails after...
Next by Thread: [src/trunk]: src/sys/kern Add three KASSERTs, to detect refcount bugs.
Indexes:
Home | Main Index | Thread Index | Old Index