Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/uvm change code to take advantage of direct...
details: https://anonhg.NetBSD.org/src/rev/79bf7a6f6375
branches: trunk
changeset: 319170:79bf7a6f6375
user: jdolecek <jdolecek%NetBSD.org@localhost>
date: Sat May 19 15:13:26 2018 +0000
description:
change code to take advantage of direct map when available, avoiding the need
to map pages into kernel
this improves performance of UBC-based (read(2)/write(2)) I/O especially
for cached block I/O - sequential read on my NVMe goes from 1.7 GB/s to 1.9 GB/s
for non-cached, and from 2.2 GB/s to 5.6 GB/s for cached read
the new code is conditional now and off for now, so that it can be tested further;
can be turned on by adjusting ubc_direct variable to true
part of fix for PR kern/53124
diffstat:
sys/uvm/uvm_bio.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 223 insertions(+), 18 deletions(-)
diffs (truncated from 340 to 300 lines):
diff -r 6f5e4dcfe2b8 -r 79bf7a6f6375 sys/uvm/uvm_bio.c
--- a/sys/uvm/uvm_bio.c Sat May 19 15:03:26 2018 +0000
+++ b/sys/uvm/uvm_bio.c Sat May 19 15:13:26 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $ */
+/* $NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.94 2018/04/20 18:58:10 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.95 2018/05/19 15:13:26 jdolecek Exp $");
#include "opt_uvmhist.h"
#include "opt_ubc.h"
@@ -48,9 +48,9 @@
#include <uvm/uvm.h>
-/*
- * global data structures
- */
+#ifdef PMAP_DIRECT
+# define UBC_USE_PMAP_DIRECT
+#endif
/*
* local functions
@@ -59,6 +59,13 @@
static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
int, int, vm_prot_t, int);
static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
+#ifdef UBC_USE_PMAP_DIRECT
+static int __noinline ubc_uiomove_direct(struct uvm_object *, struct uio *, vsize_t,
+ int, int);
+static void __noinline ubc_zerorange_direct(struct uvm_object *, off_t, size_t, int);
+
+bool ubc_direct = false; /* XXX */
+#endif
/*
* local data structues
@@ -149,15 +156,12 @@
void
ubc_init(void)
{
- struct ubc_map *umap;
- vaddr_t va;
- int i;
-
/*
* Make sure ubc_winshift is sane.
*/
if (ubc_winshift < PAGE_SHIFT)
ubc_winshift = PAGE_SHIFT;
+ ubc_winsize = 1 << ubc_winshift;
/*
* init ubc_object.
@@ -174,10 +178,7 @@
if (ubc_object.umap == NULL)
panic("ubc_init: failed to allocate ubc_map");
- if (ubc_winshift < PAGE_SHIFT) {
- ubc_winshift = PAGE_SHIFT;
- }
- va = (vaddr_t)1L;
+ vaddr_t va = (vaddr_t)1L;
#ifdef PMAP_PREFER
PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */
ubc_nqueues = va >> ubc_winshift;
@@ -185,13 +186,13 @@
ubc_nqueues = 1;
}
#endif
- ubc_winsize = 1 << ubc_winshift;
ubc_object.inactive = kmem_alloc(UBC_NQUEUES *
sizeof(struct ubc_inactive_head), KM_SLEEP);
- for (i = 0; i < UBC_NQUEUES; i++) {
+ for (int i = 0; i < UBC_NQUEUES; i++) {
TAILQ_INIT(&ubc_object.inactive[i]);
}
- for (i = 0; i < ubc_nwins; i++) {
+ for (int i = 0; i < ubc_nwins; i++) {
+ struct ubc_map *umap;
umap = &ubc_object.umap[i];
TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
umap, inactive);
@@ -199,7 +200,7 @@
ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true,
&ubc_object.hashmask);
- for (i = 0; i <= ubc_object.hashmask; i++) {
+ for (int i = 0; i <= ubc_object.hashmask; i++) {
LIST_INIT(&ubc_object.hash[i]);
}
@@ -562,6 +563,7 @@
(uintptr_t)umap, umap->refcount, (uintptr_t)va, flags);
if (flags & UBC_FAULTBUSY) {
+ // XXX add offset from slot_offset?
int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct vm_page *pgs[npages];
int gpflags =
@@ -732,6 +734,12 @@
KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
+#ifdef UBC_USE_PMAP_DIRECT
+ if (ubc_direct) {
+ return ubc_uiomove_direct(uobj, uio, todo, advice, flags);
+ }
+#endif
+
off = uio->uio_offset;
error = 0;
while (todo > 0) {
@@ -769,13 +777,20 @@
void
ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
{
- void *win;
+
+#ifdef UBC_USE_PMAP_DIRECT
+ if (ubc_direct) {
+ ubc_zerorange_direct(uobj, off, len, flags);
+ return;
+ }
+#endif
/*
* XXXUBC invent kzero() and use it
*/
while (len) {
+ void *win;
vsize_t bytelen = len;
win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE);
@@ -787,6 +802,196 @@
}
}
+#ifdef UBC_USE_PMAP_DIRECT
+/* Copy data using direct map */
+
+/*
+ * ubc_alloc_direct: allocate a file mapping window using direct map
+ */
+static int __noinline
+ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp,
+ int advice, int flags, struct vm_page **pgs, int *npages)
+{
+ voff_t pgoff;
+ int error;
+ int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO | PGO_ALLPAGES;
+ int access_type = VM_PROT_READ;
+
+ if (flags & UBC_WRITE) {
+ if (flags & UBC_FAULTBUSY)
+ gpflags |= PGO_OVERWRITE;
+#if 0
+ KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj));
+#endif
+
+ gpflags |= PGO_PASTEOF;
+ access_type |= VM_PROT_WRITE;
+ }
+
+ pgoff = (offset & PAGE_MASK);
+ *lenp = MIN(*lenp, ubc_winsize - pgoff);
+
+again:
+ *npages = (*lenp + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ KASSERT((*npages * PAGE_SIZE) <= ubc_winsize);
+ KASSERT(*lenp + pgoff <= ubc_winsize);
+ memset(pgs, 0, *npages * sizeof(pgs[0]));
+
+ mutex_enter(uobj->vmobjlock);
+ error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
+ npages, 0, access_type, advice, gpflags);
+ UVMHIST_LOG(ubchist, "alloc_direct getpages %jd", error, 0, 0, 0);
+ if (error) {
+ if (error == EAGAIN) {
+ kpause("ubc_alloc_directg", false, hz >> 2, NULL);
+ goto again;
+ }
+ return error;
+ }
+
+ mutex_enter(uobj->vmobjlock);
+ for (int i = 0; i < *npages; i++) {
+ struct vm_page *pg = pgs[i];
+
+ KASSERT(pg != NULL);
+ KASSERT(pg != PGO_DONTCARE);
+ KASSERT((pg->flags & PG_FAKE) == 0 || (gpflags & PGO_OVERWRITE));
+ KASSERT(pg->uobject->vmobjlock == uobj->vmobjlock);
+
+ /* Avoid breaking loan if possible, only do it on write */
+ if ((flags & UBC_WRITE) && pg->loan_count != 0) {
+ pg = uvm_loanbreak(pg);
+ if (pg == NULL) {
+ uvm_page_unbusy(pgs, *npages);
+ mutex_exit(uobj->vmobjlock);
+ uvm_wait("ubc_alloc_directl");
+ goto again;
+ }
+ pgs[i] = pg;
+ }
+
+ /* Page must be writable by now */
+ KASSERT((pg->flags & PG_RDONLY) == 0 || (flags & UBC_WRITE) == 0);
+
+ mutex_enter(&uvm_pageqlock);
+ uvm_pageactivate(pg);
+ mutex_exit(&uvm_pageqlock);
+
+ /* Page will be changed, no longer clean */
+ /* XXX do this AFTER the write? */
+ if (flags & UBC_WRITE)
+ pg->flags &= ~(PG_FAKE|PG_CLEAN);
+ }
+ mutex_exit(uobj->vmobjlock);
+
+ return 0;
+}
+
+static int
+ubc_uiomove_process(void *win, size_t len, void *arg)
+{
+ struct uio *uio = (struct uio *)arg;
+
+ return uiomove(win, len, uio);
+}
+
+static int
+ubc_zerorange_process(void *win, size_t len, void *arg)
+{
+ memset(win, 0, len);
+ return 0;
+}
+
+static int __noinline
+ubc_uiomove_direct(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
+ int flags)
+{
+ const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
+ voff_t off;
+ int error, npages;
+ struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT];
+
+ KASSERT(todo <= uio->uio_resid);
+ KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
+ ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));
+
+ off = uio->uio_offset;
+ error = 0;
+ while (todo > 0) {
+ vsize_t bytelen = todo;
+
+ error = ubc_alloc_direct(uobj, off, &bytelen, advice, flags,
+ pgs, &npages);
+ if (error != 0) {
+ /* can't do anything, failed to get the pages */
+ break;
+ }
+
+ if (error == 0) {
+ error = uvm_direct_process(pgs, npages, off, bytelen,
+ ubc_uiomove_process, uio);
+ }
+ if (error != 0 && overwrite) {
+ /*
+ * if we haven't initialized the pages yet,
+ * do it now. it's safe to use memset here
+ * because we just mapped the pages above.
+ */
+ printf("%s: error=%d\n", __func__, error);
+ (void) uvm_direct_process(pgs, npages, off, bytelen,
+ ubc_zerorange_process, NULL);
+ }
+
+ mutex_enter(uobj->vmobjlock);
+ uvm_page_unbusy(pgs, npages);
+ mutex_exit(uobj->vmobjlock);
+
+ off += bytelen;
+ todo -= bytelen;
+
+ if (error != 0 && ISSET(flags, UBC_PARTIALOK)) {
+ break;
+ }
+ }
+
+ return error;
Home |
Main Index |
Thread Index |
Old Index