Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/xen/xen remove I/O defragmentation logic, pass requ...
details: https://anonhg.NetBSD.org/src/rev/dbec2394e285
branches: trunk
changeset: 1009328:dbec2394e285
user: jdolecek <jdolecek%NetBSD.org@localhost>
date: Mon Apr 20 16:12:28 2020 +0000
description:
remove I/O defragmentation logic, pass requests straight to the
underlying block device without trying to coalesce them
it seems rarely useful, and it makes the handling logic unnecessarily complex -
ultimately it's the DomU operating system responsibility to issue optimal I/O
might also help with the ZFS problem reported on port-xen, and will surely
simplify eventual indirect segment support
diffstat:
sys/arch/xen/xen/xbdback_xenbus.c | 518 +++++--------------------------------
1 files changed, 76 insertions(+), 442 deletions(-)
diffs (truncated from 823 to 300 lines):
diff -r a92a27ad7042 -r dbec2394e285 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Mon Apr 20 14:11:04 2020 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Mon Apr 20 16:12:28 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $ */
+/* $NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $");
#include <sys/atomic.h>
#include <sys/buf.h>
@@ -74,9 +74,7 @@
/* Need to alloc one extra page to account for possible mapping offset */
#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE)
-struct xbdback_request;
struct xbdback_io;
-struct xbdback_fragment;
struct xbdback_instance;
/*
@@ -98,8 +96,8 @@
* callback), the return value of a continuation can be set to NULL. In that
* case, the thread will go back to sleeping and wait for the proper
* condition before it starts processing requests again from where it left.
- * Continuation state is "stored" in the xbdback instance (xbdi_cont and
- * xbdi_cont_aux), and should only be manipulated by the instance thread.
+ * Continuation state is "stored" in the xbdback instance (xbdi_cont),
+ * and should only be manipulated by the instance thread.
*
* As xbdback(4) has to handle different sort of asynchronous events (Xen
* event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
@@ -109,36 +107,23 @@
* Here's how the call graph is supposed to be for a single I/O:
*
* xbdback_co_main()
- * |
- * | --> xbdback_co_cache_doflush() or NULL
+ * | --> xbdback_co_cache_flush()
+ * | | |
+ * | | -> xbdback_co_cache_doflush() or NULL
+ * | | |
+ * | | -> xbdback_co_do_io()
+ * xbdback_co_main_loop()-|
+ * | |-> xbdback_co_main_done2() or NULL
* | |
- * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <-
- * | | |
- * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()-
- * xbdback_co_main_loop()-|
- * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()-
- * | | |
- * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <-
- * | |
- * | --> xbdback_co_main() or NULL
+ * | --> xbdback_co_main_incr() -> xbdback_co_main_loop()
* |
* xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
* |
- * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() ---
- * | | |
- * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <--
- * | | | |
- * | | | |----------> xbdback_co_io_gotio()
- * | | | |
- * | | xbdback_co_main_incr() |
- * | | | |
- * | | xbdback_co_main_loop() |
- * | | |
- * | xbdback_co_io_gotio2() <-----------|
- * | | |
- * | | |----------> xbdback_co_io_gotfrag()
- * | | |
- * -- xbdback_co_io_gotfrag2() <---------|
+ * xbdback_co_io_gotio() -> xbdback_map_shm()
+ * | |
+ * | xbdback_co_main_incr() -> xbdback_co_main_loop()
+ * |
+ * xbdback_co_do_io()
* |
* xbdback_co_main_incr() -> xbdback_co_main_loop()
*/
@@ -192,17 +177,12 @@
* continuation-ness (see above).
*/
RING_IDX xbdi_req_prod; /* limit on request indices */
- xbdback_cont_t xbdi_cont, xbdi_cont_aux;
+ xbdback_cont_t xbdi_cont;
/* _request state: track requests fetched from ring */
struct xbdback_request *xbdi_req; /* if NULL, ignore following */
blkif_request_t xbdi_xen_req;
- int xbdi_segno;
/* _io state: I/O associated to this instance */
- struct xbdback_io *xbdi_io; /* if NULL, ignore next field */
- daddr_t xbdi_next_sector;
- uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */
- uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */
- grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
+ struct xbdback_io *xbdi_io;
/* other state */
int xbdi_same_page; /* are we merging two segments on the same page? */
uint xbdi_pendingreqs; /* number of I/O in fly */
@@ -223,20 +203,6 @@
static kmutex_t xbdback_lock;
/*
- * For each request from a guest, a xbdback_request is allocated from
- * a pool. This will describe the request until completion. The
- * request may require multiple IO operations to perform, so the
- * per-IO information is not stored here.
- */
-struct xbdback_request {
- struct xbdback_instance *rq_xbdi; /* our xbd instance */
- uint64_t rq_id;
- int rq_iocount; /* reference count; or, number of outstanding I/O's */
- int rq_ioerrs;
- uint8_t rq_operation;
-};
-
-/*
* For each I/O operation associated with one of those requests, an
* xbdback_io is allocated from a pool. It may correspond to multiple
* Xen disk requests, or parts of them, if several arrive at once that
@@ -246,56 +212,35 @@
/* The instance pointer is duplicated for convenience. */
struct xbdback_instance *xio_xbdi; /* our xbd instance */
uint8_t xio_operation;
+ uint64_t xio_id;
union {
struct {
struct buf xio_buf; /* our I/O */
- /* xbd requests involved */
- SLIST_HEAD(, xbdback_fragment) xio_rq;
/* the virtual address to map the request at */
vaddr_t xio_vaddr;
struct xbdback_va *xio_xv;
+ vaddr_t xio_start_offset; /* I/O start offset */
/* grants to map */
grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
/* grants release */
grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
uint16_t xio_nrma; /* number of guest pages */
- uint16_t xio_mapped; /* == 1: grants are mapped */
} xio_rw;
- uint64_t xio_flush_id;
} u;
};
#define xio_buf u.xio_rw.xio_buf
-#define xio_rq u.xio_rw.xio_rq
#define xio_vaddr u.xio_rw.xio_vaddr
+#define xio_start_offset u.xio_rw.xio_start_offset
#define xio_xv u.xio_rw.xio_xv
#define xio_gref u.xio_rw.xio_gref
#define xio_gh u.xio_rw.xio_gh
#define xio_nrma u.xio_rw.xio_nrma
-#define xio_mapped u.xio_rw.xio_mapped
-
-#define xio_flush_id u.xio_flush_id
-
-/*
- * Rather than having the xbdback_io keep an array of the
- * xbdback_requests involved, since the actual number will probably be
- * small but might be as large as BLKIF_RING_SIZE, use a list. This
- * would be threaded through xbdback_request, but one of them might be
- * part of multiple I/O's, alas.
- */
-struct xbdback_fragment {
- struct xbdback_request *car;
- SLIST_ENTRY(xbdback_fragment) cdr;
-};
/*
* Pools to manage the chain of block requests and I/Os fragments
* submitted by frontend.
*/
-/* XXXSMP */
-static struct xbdback_pool {
- struct pool_cache pc;
- struct timeval last_warning;
-} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool;
+static struct pool_cache xbdback_io_pool;
/* Interval between reports of I/O errors from frontend */
static const struct timeval xbdback_err_intvl = { 1, 0 };
@@ -320,22 +265,14 @@
static void *xbdback_co_main(struct xbdback_instance *, void *);
static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
-static void *xbdback_co_main_done(struct xbdback_instance *, void *);
static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
-static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
static void *xbdback_co_io(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
-static void *xbdback_co_io_loop(struct xbdback_instance *, void *);
static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *);
-static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *);
-static void *xbdback_co_map_io(struct xbdback_instance *, void *);
static void *xbdback_co_do_io(struct xbdback_instance *, void *);
static void xbdback_io_error(struct xbdback_io *, int);
@@ -345,9 +282,9 @@
static void *xbdback_map_shm(struct xbdback_io *);
static void xbdback_unmap_shm(struct xbdback_io *);
-static void *xbdback_pool_get(struct xbdback_pool *,
+static void *xbdback_pool_get(struct pool_cache *,
struct xbdback_instance *);
-static void xbdback_pool_put(struct xbdback_pool *, void *);
+static void xbdback_pool_put(struct pool_cache *, void *);
static void xbdback_thread(void *);
static void xbdback_wakeup_thread(struct xbdback_instance *);
static void xbdback_trampoline(struct xbdback_instance *, void *);
@@ -369,21 +306,12 @@
SLIST_INIT(&xbdback_instances);
mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
- pool_cache_bootstrap(&xbdback_request_pool.pc,
- sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL,
- IPL_SOFTBIO, NULL, NULL, NULL);
- pool_cache_bootstrap(&xbdback_io_pool.pc,
+ pool_cache_bootstrap(&xbdback_io_pool,
sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
IPL_SOFTBIO, NULL, NULL, NULL);
- pool_cache_bootstrap(&xbdback_fragment_pool.pc,
- sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL,
- IPL_SOFTBIO, NULL, NULL, NULL);
/* we allocate enough to handle a whole ring at once */
- pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE);
- pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE);
- pool_prime(&xbdback_fragment_pool.pc.pc_pool,
- BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE);
+ pool_prime(&xbdback_io_pool.pc_pool, BLKIF_RING_SIZE);
xenbus_backend_register(&xbd_backend_driver);
}
@@ -1074,7 +1002,8 @@
break;
}
} else {
- xbdi->xbdi_cont = xbdback_co_main_done;
+ KASSERT(xbdi->xbdi_io == NULL);
+ xbdi->xbdi_cont = xbdback_co_main_done2;
}
return xbdi;
}
@@ -1112,25 +1041,6 @@
}
/*
- * Ring processing is over. If there are any I/O still present for this
- * instance, handle them first.
- */
-static void *
-xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj)
-{
- (void)obj;
- if (xbdi->xbdi_io != NULL) {
- KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
- xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
- xbdi->xbdi_cont = xbdback_co_map_io;
- xbdi->xbdi_cont_aux = xbdback_co_main_done2;
- } else {
- xbdi->xbdi_cont = xbdback_co_main_done2;
- }
- return xbdi;
-}
-
-/*
* Check for requests in the instance's ring. In case there are, start again
* from the beginning. If not, stall.
*/
@@ -1139,6 +1049,7 @@
{
int work_to_do;
+ KASSERT(xbdi->xbdio_io == NULL);
RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
if (work_to_do)
xbdi->xbdi_cont = xbdback_co_main;
@@ -1152,29 +1063,8 @@
* Frontend requested a cache flush operation.
*/
static void *
-xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
+xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused)
Home |
Main Index |
Thread Index |
Old Index