Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/xen/xen Deep rework of the xbdback(4) driver; it no...
details: https://anonhg.NetBSD.org/src/rev/20aacd83a963
branches: trunk
changeset: 771517:20aacd83a963
user: jym <jym%NetBSD.org@localhost>
date: Thu Nov 24 01:47:18 2011 +0000
description:
Deep rework of the xbdback(4) driver; it now uses a thread per instance
instead of continuations directly from shm callbacks or interrupt
handlers. The whole CPS design remains but is adapted to cope with
a thread model.
This patch allows scheduling away I/O requests of domains that behave
abnormally, or even destroy them if there is a need to (without thrashing
dom0 with lots of error messages at IPL_BIO).
I took this opportunity to make the driver MPSAFE, so multiple instances
can run concurrently. Moved from home-grown pool(9) queues to
pool_cache(9), and rework the callback mechanism so that it delegates
I/O processing to thread instead of handling it itself through the
continuation trampoline.
This one fixes the potential DoS many have seen in a dom0 when trying to
suspend a NetBSD domU with a corrupted I/O ring.
Benchmarks (build.sh release runs and bonnie++) do not show any
performance regression, the "new" driver is on-par with the "old" one.
ok bouyer@.
diffstat:
sys/arch/xen/xen/xbdback_xenbus.c | 603 ++++++++++++++++++++++---------------
1 files changed, 350 insertions(+), 253 deletions(-)
diffs (truncated from 1185 to 300 lines):
diff -r de92f57388a7 -r 20aacd83a963 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Thu Nov 24 01:46:40 2011 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Thu Nov 24 01:47:18 2011 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xbdback_xenbus.c,v 1.51 2011/11/14 21:34:50 christos Exp $ */
+/* $NetBSD: xbdback_xenbus.c,v 1.52 2011/11/24 01:47:18 jym Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -26,23 +26,27 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.51 2011/11/14 21:34:50 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.52 2011/11/24 01:47:18 jym Exp $");
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/queue.h>
-#include <sys/kernel.h>
#include <sys/atomic.h>
+#include <sys/buf.h>
+#include <sys/condvar.h>
#include <sys/conf.h>
#include <sys/disk.h>
#include <sys/device.h>
#include <sys/fcntl.h>
+#include <sys/kauth.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
#include <sys/vnode.h>
-#include <sys/kauth.h>
-#include <sys/workqueue.h>
-#include <sys/buf.h>
#include <xen/xen.h>
#include <xen/xen_shm.h>
@@ -75,39 +79,54 @@
struct xbdback_fragment;
struct xbdback_instance;
-/* state of a xbdback instance */
-typedef enum {CONNECTED, DISCONNECTING, DISCONNECTED} xbdback_state_t;
+/*
+ * status of a xbdback instance:
+ * WAITING: xbdback instance is connected, waiting for requests
+ * RUN: xbdi thread must be woken up, I/Os have to be processed
+ * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
+ * DISCONNECTED: no I/Os, no ring, the thread should terminate.
+ */
+typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
/*
- * Since there are a variety of conditions that can block our I/O
- * processing, which isn't allowed to suspend its thread's execution,
- * such things will be done in a sort of continuation-passing style.
+ * Each xbdback instance is managed by a single thread that handles all
+ * the I/O processing. As there are a variety of conditions that can block,
+ * everything will be done in a sort of continuation-passing style.
+ *
+ * When the execution has to block to delay processing, for example to
+ * allow system to recover because of memory shortage (via shared memory
+ * callback), the return value of a continuation can be set to NULL. In that
+ * case, the thread will go back to sleeping and wait for the proper
+ * condition before it starts processing requests again from where it left.
+ * Continuation state is "stored" in the xbdback instance (xbdi_cont and
+ * xbdi_cont_aux), and should only be manipulated by the instance thread.
+ *
+ * As xbdback(4) has to handle different sort of asynchronous events (Xen
+ * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
+ * mutex is used to protect specific elements of the xbdback instance from
+ * concurrent access: thread status and ring access (when pushing responses).
*
- * Return value is NULL to indicate that execution has blocked; if
- * it's finished, set xbdi->xbdi_cont (see below) to NULL and the return
- * doesn't matter. Otherwise it's passed as the second parameter to
- * the new value of xbdi->xbdi_cont.
+ * Here's how the call graph is supposed to be for a single I/O:
*
- * Here's how the call graph is supposed to be for a single I/O:
* xbdback_co_main()
* |
- * | --> xbdback_co_cache_doflush() or NULL
- * | |
- * | -- xbdback_co_cache_flush2() <- xbdback_co_flush_done() <--
- * | | |
- * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() --
+ * | --> xbdback_co_cache_doflush() or NULL
+ * | |
+ * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <-
+ * | | |
+ * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()-
* xbdback_co_main_loop()-|
- * | |-> xbdback_co_main_done() -> xbdback_co_flush() --
- * | | |
- * | -- xbdback_co_main_done2() <- xbdback_co_flush_done() <--
- * | |
- * | --> xbdback_co_main() or NULL
+ * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()-
+ * | | |
+ * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <-
+ * | |
+ * | --> xbdback_co_main() or NULL
* |
* xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
* |
- * xbdback_co_io_gotreq()--+---------> xbdback_co_flush() --
- * | | |
- * -> xbdback_co_io_loop()----| <- xbdback_co_flush_done() <--
+ * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() ---
+ * | | |
+ * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <--
* | | | |
* | | | |----------> xbdback_co_io_gotio()
* | | | |
@@ -131,21 +150,24 @@
XBDIP_64
};
-
/* we keep the xbdback instances in a linked list */
struct xbdback_instance {
SLIST_ENTRY(xbdback_instance) next;
struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
struct xenbus_watch xbdi_watch; /* to watch our store */
- domid_t xbdi_domid; /* attached to this domain */
+ domid_t xbdi_domid; /* attached to this domain */
uint32_t xbdi_handle; /* domain-specific handle */
- xbdback_state_t xbdi_status;
+ char xbdi_name[16]; /* name of this instance */
+ /* mutex that protects concurrent access to the xbdback instance */
+ kmutex_t xbdi_lock;
+ kcondvar_t xbdi_cv; /* wait channel for thread work */
+ xbdback_state_t xbdi_status; /* thread's status */
/* backing device parameters */
dev_t xbdi_dev;
const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
struct vnode *xbdi_vp;
uint64_t xbdi_size;
- int xbdi_ro; /* is device read-only ? */
+ bool xbdi_ro; /* is device read-only ? */
/* parameters for the communication */
unsigned int xbdi_evtchn;
/* private parameters for communication */
@@ -176,6 +198,11 @@
/* other state */
int xbdi_same_page; /* are we merging two segments on the same page? */
uint xbdi_pendingreqs; /* number of I/O in fly */
+ int xbdi_errps; /* errors per second */
+ struct timeval xbdi_lasterr_time; /* error time tracking */
+#ifdef DEBUG
+ struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */
+#endif
};
/* Manipulation of the above reference count. */
#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt)
@@ -208,7 +235,6 @@
* can be coalesced.
*/
struct xbdback_io {
- struct work xio_work;
/* The instance pointer is duplicated for convenience. */
struct xbdback_instance *xio_xbdi; /* our xbd instance */
uint8_t xio_operation;
@@ -252,18 +278,21 @@
};
/*
- * Wrap our pools with a chain of xbdback_instances whose I/O
- * processing has blocked for want of memory from that pool.
+ * Pools to manage the chain of block requests and I/Os fragments
+ * submitted by frontend.
*/
struct xbdback_pool {
- struct pool p;
- SIMPLEQ_HEAD(xbdback_iqueue, xbdback_instance) q;
+ struct pool_cache pc;
struct timeval last_warning;
} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool;
+
+SIMPLEQ_HEAD(xbdback_iqueue, xbdback_instance);
static struct xbdback_iqueue xbdback_shmq;
static int xbdback_shmcb; /* have we already registered a callback? */
-struct timeval xbdback_poolsleep_intvl = { 5, 0 };
+/* Interval between reports of I/O errors from frontend */
+struct timeval xbdback_err_intvl = { 1, 0 };
+
#ifdef DEBUG
struct timeval xbdback_fragio_intvl = { 60, 0 };
#endif
@@ -274,6 +303,9 @@
static void xbdback_backend_changed(struct xenbus_watch *,
const char **, unsigned int);
static int xbdback_evthandler(void *);
+
+static int xbdback_connect(struct xbdback_instance *);
+static int xbdback_disconnect(struct xbdback_instance *);
static void xbdback_finish_disconnect(struct xbdback_instance *);
static struct xbdback_instance *xbdif_lookup(domid_t, uint32_t);
@@ -287,7 +319,6 @@
static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
-static void *xbdback_co_cache_doflush_wait(struct xbdback_instance *, void *);
static void *xbdback_co_io(struct xbdback_instance *, void *);
static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
@@ -297,12 +328,13 @@
static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *);
static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *);
-static void *xbdback_co_flush(struct xbdback_instance *, void *);
-static void *xbdback_co_flush_done(struct xbdback_instance *, void *);
+static void *xbdback_co_map_io(struct xbdback_instance *, void *);
+static void *xbdback_co_do_io(struct xbdback_instance *, void *);
+
+static void *xbdback_co_wait_shm_callback(struct xbdback_instance *, void *);
static int xbdback_shm_callback(void *);
static void xbdback_io_error(struct xbdback_io *, int);
-static void xbdback_do_io(struct work *, void *);
static void xbdback_iodone(struct buf *);
static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
@@ -312,6 +344,8 @@
static void *xbdback_pool_get(struct xbdback_pool *,
struct xbdback_instance *);
static void xbdback_pool_put(struct xbdback_pool *, void *);
+static void xbdback_thread(void *);
+static void xbdback_wakeup_thread(struct xbdback_instance *);
static void xbdback_trampoline(struct xbdback_instance *, void *);
static struct xenbus_backend_driver xbd_backend_driver = {
@@ -319,8 +353,6 @@
.xbakd_type = "vbd"
};
-struct workqueue *xbdback_workqueue;
-
void
xbdbackattach(int n)
{
@@ -333,27 +365,26 @@
SLIST_INIT(&xbdback_instances);
SIMPLEQ_INIT(&xbdback_shmq);
xbdback_shmcb = 0;
- pool_init(&xbdback_request_pool.p, sizeof(struct xbdback_request),
- 0, 0, 0, "xbbrp", NULL, IPL_BIO);
- SIMPLEQ_INIT(&xbdback_request_pool.q);
- pool_init(&xbdback_io_pool.p, sizeof(struct xbdback_io),
- 0, 0, 0, "xbbip", NULL, IPL_BIO);
- SIMPLEQ_INIT(&xbdback_io_pool.q);
- pool_init(&xbdback_fragment_pool.p, sizeof(struct xbdback_fragment),
- 0, 0, 0, "xbbfp", NULL, IPL_BIO);
- SIMPLEQ_INIT(&xbdback_fragment_pool.q);
+
+ pool_cache_bootstrap(&xbdback_request_pool.pc,
+ sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL,
+ IPL_SOFTBIO, NULL, NULL, NULL);
+ pool_cache_bootstrap(&xbdback_io_pool.pc,
+ sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
+ IPL_SOFTBIO, NULL, NULL, NULL);
+ pool_cache_bootstrap(&xbdback_fragment_pool.pc,
+ sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL,
+ IPL_SOFTBIO, NULL, NULL, NULL);
+
/* we allocate enough to handle a whole ring at once */
- if (pool_prime(&xbdback_request_pool.p, BLKIF_RING_SIZE) != 0)
+ if (pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE) != 0)
printf("xbdback: failed to prime request pool\n");
- if (pool_prime(&xbdback_io_pool.p, BLKIF_RING_SIZE) != 0)
+ if (pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE) != 0)
printf("xbdback: failed to prime io pool\n");
- if (pool_prime(&xbdback_fragment_pool.p,
+ if (pool_prime(&xbdback_fragment_pool.pc.pc_pool,
BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) != 0)
printf("xbdback: failed to prime fragment pool\n");
- if (workqueue_create(&xbdback_workqueue, "xbdbackd",
- xbdback_do_io, NULL, PRI_BIO, IPL_BIO, 0))
- printf("xbdback: failed to init workqueue\n");
xenbus_backend_register(&xbd_backend_driver);
}
@@ -396,23 +427,28 @@
if (xbdif_lookup(domid, handle) != NULL) {
return EEXIST;
}
Home |
Main Index |
Thread Index |
Old Index