Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/xen/xen xbdback: implement and publish "feature-flu...
details: https://anonhg.NetBSD.org/src/rev/67289c8a7549
branches: trunk
changeset: 747603:67289c8a7549
user: bouyer <bouyer%NetBSD.org@localhost>
date: Wed Sep 23 17:48:55 2009 +0000
description:
xbdback: implement and publish "feature-flush-cache".
xbd: if feature-flush-cache is present, use it for DIOCCACHESYNC.
If not present, make DIOCCACHESYNC return EOPNOTSUPP and warn on
first call.
Should improve WAPBL reliability of Xen guests on a NetBSD dom0.
Unfortunably not all linux guests seems to support this feature, and using
feature-write-barrier would require a B_BARRIER flag in the buffer.
diffstat:
sys/arch/xen/xen/xbd_xenbus.c | 108 +++++++++++++++++++++++--
sys/arch/xen/xen/xbdback_xenbus.c | 159 +++++++++++++++++++++++++++++++++++--
2 files changed, 247 insertions(+), 20 deletions(-)
diffs (truncated from 461 to 300 lines):
diff -r d7ec97666d10 -r 67289c8a7549 sys/arch/xen/xen/xbd_xenbus.c
--- a/sys/arch/xen/xen/xbd_xenbus.c Wed Sep 23 11:17:58 2009 +0000
+++ b/sys/arch/xen/xen/xbd_xenbus.c Wed Sep 23 17:48:55 2009 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xbd_xenbus.c,v 1.42 2009/09/21 21:59:30 bouyer Exp $ */
+/* $NetBSD: xbd_xenbus.c,v 1.43 2009/09/23 17:48:55 bouyer Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.42 2009/09/21 21:59:30 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.43 2009/09/23 17:48:55 bouyer Exp $");
#include "opt_xen.h"
#include "rnd.h"
@@ -84,11 +84,24 @@
struct xbd_req {
SLIST_ENTRY(xbd_req) req_next;
uint16_t req_id; /* ID passed to backend */
- grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
- int req_nr_segments; /* number of segments in this request */
- struct buf *req_bp; /* buffer associated with this request */
- void *req_data; /* pointer to the data buffer */
+ union {
+ struct {
+ grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int req_nr_segments; /* number of segments in this request */
+ struct buf *req_bp; /* buffer associated with this request */
+ void *req_data; /* pointer to the data buffer */
+ } req_rw;
+ struct {
+ int s_error;
+ volatile int s_done;
+ } req_sync;
+ } u;
};
+#define req_gntref u.req_rw.req_gntref
+#define req_nr_segments u.req_rw.req_nr_segments
+#define req_bp u.req_rw.req_bp
+#define req_data u.req_rw.req_data
+#define req_sync u.req_sync
struct xbd_xenbus_softc {
device_t sc_dev;
@@ -104,6 +117,7 @@
struct xbd_req sc_reqs[XBD_RING_SIZE];
SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
+ bool sc_xbdreq_wait; /* special waiting on xbd_req */
int sc_backend_status; /* our status with backend */
#define BLKIF_STATE_DISCONNECTED 0
@@ -119,6 +133,7 @@
uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
u_long sc_info; /* VDISK_* */
u_long sc_handle; /* from backend */
+ int sc_cache_flush; /* backend supports BLKIF_OP_FLUSH_DISKCACHE */
#if NRND > 0
rndsource_element_t sc_rnd_source;
#endif
@@ -518,6 +533,7 @@
{
int err;
unsigned long long sectors;
+ u_long cache_flush;
err = xenbus_read_ul(NULL,
sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
@@ -541,6 +557,14 @@
if (err)
panic("%s: can't read number from %s/sector-size\n",
device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
+ err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
+ "feature-flush-cache", &cache_flush, 10);
+ if (err)
+ cache_flush = 0;
+ if (cache_flush > 0)
+ sc->sc_cache_flush = 1;
+ else
+ sc->sc_cache_flush = 0;
xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
}
@@ -564,9 +588,16 @@
for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
+ DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
+ xbdreq->req_bp, (long)bp->b_bcount));
bp = xbdreq->req_bp;
- DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
- bp, (long)bp->b_bcount));
+ if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
+ xbdreq->req_sync.s_error = rep->status;
+ xbdreq->req_sync.s_done = 1;
+ wakeup(xbdreq);
+ /* caller will free the req */
+ continue;
+ }
for (seg = xbdreq->req_nr_segments - 1; seg >= 0; seg--) {
if (__predict_false(
xengnt_status(xbdreq->req_gntref[seg]))) {
@@ -608,13 +639,15 @@
biodone(bp);
SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
}
+done:
xen_rmb();
sc->sc_ring.rsp_cons = i;
RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
if (more_to_do)
goto again;
-done:
dk_iodone(sc->sc_di, &sc->sc_dksc);
+ if (sc->sc_xbdreq_wait)
+ wakeup(&sc->sc_xbdreq_wait);
return 1;
}
@@ -717,6 +750,10 @@
struct dk_softc *dksc;
int error;
struct disk *dk;
+ int s;
+ struct xbd_req *xbdreq;
+ blkif_request_t *req;
+ int notify;
DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
dev, cmd, data, flag, l));
@@ -731,6 +768,57 @@
case DIOCSSTRATEGY:
error = EOPNOTSUPP;
break;
+ case DIOCCACHESYNC:
+ if (sc->sc_cache_flush <= 0) {
+ if (sc->sc_cache_flush == 0) {
+ aprint_error_dev(sc->sc_dev,
+ "WARNING: cache flush not supported "
+ "by backend\n");
+ sc->sc_cache_flush = -1;
+ }
+ return EOPNOTSUPP;
+ }
+
+ s = splbio();
+
+ while (RING_FULL(&sc->sc_ring)) {
+ sc->sc_xbdreq_wait = 1;
+ tsleep(&sc->sc_xbdreq_wait, PRIBIO, "xbdreq", 0);
+ }
+ sc->sc_xbdreq_wait = 0;
+
+ xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
+ if (__predict_false(xbdreq == NULL)) {
+ DPRINTF(("xbdioctl: no req\n"));
+ error = ENOMEM;
+ } else {
+ SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+ req = RING_GET_REQUEST(&sc->sc_ring,
+ sc->sc_ring.req_prod_pvt);
+ req->id = xbdreq->req_id;
+ req->operation = BLKIF_OP_FLUSH_DISKCACHE;
+ req->handle = sc->sc_handle;
+ xbdreq->req_sync.s_done = 0;
+ sc->sc_ring.req_prod_pvt++;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring,
+ notify);
+ if (notify)
+ hypervisor_notify_via_evtchn(sc->sc_evtchn);
+ /* request sent, no wait for completion */
+ while (xbdreq->req_sync.s_done == 0) {
+ tsleep(xbdreq, PRIBIO, "xbdsync", 0);
+ }
+ if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
+ error = EOPNOTSUPP;
+ else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
+ error = 0;
+ else
+ error = EIO;
+ SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
+ req_next);
+ }
+ splx(s);
+ break;
default:
error = dk_ioctl(sc->sc_di, dksc, dev, cmd, data, flag, l);
break;
@@ -788,7 +876,7 @@
}
- if (RING_FULL(&sc->sc_ring)) {
+ if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
DPRINTF(("xbdstart: ring_full\n"));
ret = -1;
goto out;
diff -r d7ec97666d10 -r 67289c8a7549 sys/arch/xen/xen/xbdback_xenbus.c
--- a/sys/arch/xen/xen/xbdback_xenbus.c Wed Sep 23 11:17:58 2009 +0000
+++ b/sys/arch/xen/xen/xbdback_xenbus.c Wed Sep 23 17:48:55 2009 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: xbdback_xenbus.c,v 1.24 2009/01/21 09:55:53 cegger Exp $ */
+/* $NetBSD: xbdback_xenbus.c,v 1.25 2009/09/23 17:48:55 bouyer Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.24 2009/01/21 09:55:53 cegger Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.25 2009/09/23 17:48:55 bouyer Exp $");
#include <sys/types.h>
#include <sys/param.h>
@@ -91,6 +91,31 @@
* it's finished, set xbdi->xbdi_cont (see below) to NULL and the return
* doesn't matter. Otherwise it's passed as the second parameter to
* the new value of xbdi->xbdi_cont.
+ * Here's how the call graph is supposed to be for a single I/O:
+ * xbdback_co_main()
+ * | |-> xbdback_co_cache_doflush() -> stall
+ * | xbdback_co_cache_flush2() <- xbdback_co_flush_done() <-
+ * | | |
+ * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() --
+ * xbdback_co_main_loop() -> xbdback_co_main_done() -> xbdback_co_flush()
+ * | | |
+ * | xbdback_co_main_done2() <- xbdback_co_flush_done()
+ * | |
+ * | xbdback_co_main() or NULL
+ * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
+ * |
+ * xbdback_co_io_gotreq() -> xbdback_co_flush() -> xbdback_co_flush()
+ * | | |
+ * xbdback_co_io_loop() --- <---------------- xbdback_co_flush_done()
+ * | |
+ * xbdback_co_io_gotio() |
+ * | |
+ * xbdback_co_io_gotio2()<-
+ * | |--------> xbdback_co_io_gotfrag
+ * | |
+ * xbdback_co_io_gotfrag2() <----------|
+ * | |--> xbdback_co_io_loop()
+ * xbdback_co_main_incr()
*/
typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
@@ -144,6 +169,7 @@
grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
/* other state */
int xbdi_same_page; /* are we merging two segments on the same page? */
+ uint xbdi_pendingreqs; /* number of I/O in fly */
};
/* Manipulation of the above reference count. */
/* XXXjld%panix.com@localhost: not MP-safe, and move the i386 asm elsewhere. */
@@ -180,16 +206,35 @@
*/
struct xbdback_io {
struct work xio_work;
- struct buf xio_buf; /* our I/O */
/* The instance pointer is duplicated for convenience. */
struct xbdback_instance *xio_xbdi; /* our xbd instance */
- SLIST_HEAD(, xbdback_fragment) xio_rq; /* xbd requests involved */
- vaddr_t xio_vaddr; /* the virtual address to map the request at */
- grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST]; /* grants to map */
- grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];/* grants release */
- uint16_t xio_nrma; /* number of guest pages */
- uint16_t xio_mapped;
+ uint8_t xio_operation;
+ union {
+ struct {
+ struct buf xio_buf; /* our I/O */
+ /* xbd requests involved */
+ SLIST_HEAD(, xbdback_fragment) xio_rq;
+ /* the virtual address to map the request at */
+ vaddr_t xio_vaddr;
+ /* grants to map */
+ grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
+ /* grants release */
+ grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
+ uint16_t xio_nrma; /* number of guest pages */
+ uint16_t xio_mapped;
+ } xio_rw;
+ uint64_t xio_flush_id;
+ } u;
};
+#define xio_buf u.xio_rw.xio_buf
+#define xio_rq u.xio_rw.xio_rq
+#define xio_vaddr u.xio_rw.xio_vaddr
+#define xio_gref u.xio_rw.xio_gref
+#define xio_gh u.xio_rw.xio_gh
+#define xio_nrma u.xio_rw.xio_nrma
+#define xio_mapped u.xio_rw.xio_mapped
+
+#define xio_flush_id u.xio_flush_id
/*
Home |
Main Index |
Thread Index |
Old Index