Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys convert the (still disabled) 'direct write' for pipes to...
details: https://anonhg.NetBSD.org/src/rev/1547e0edd6ef
branches: trunk
changeset: 362440:1547e0edd6ef
user: jdolecek <jdolecek%NetBSD.org@localhost>
date: Sun Jun 10 17:54:51 2018 +0000
description:
convert the (still disabled) 'direct write' for pipes to use the
experimental PMAP_DIRECT if available; the direct code paths now survive
longer than the pmap_enter() variant, but still triggers panic during
build.sh tools run; remove some obsolete sysctls
add some XXXs to mark places which need attention to make this more stable
Note: the loan case is now actually significantly slower than the
non-loan case on MP systems, due to synchronous IPIs triggered by
marking the page read-only by uvm_loan(); this is being discussed
in the email thread
https://mail-index.netbsd.org/tech-kern/2018/05/21/msg023441.html
that is basically the same issue due to which loaning was disabled
for sosend()
diffstat:
sys/kern/sys_pipe.c | 180 ++++++++++++++++++++++-----------------------------
sys/sys/pipe.h | 15 ++--
2 files changed, 84 insertions(+), 111 deletions(-)
diffs (truncated from 374 to 300 lines):
diff -r b4c363728414 -r 1547e0edd6ef sys/kern/sys_pipe.c
--- a/sys/kern/sys_pipe.c Sun Jun 10 17:52:20 2018 +0000
+++ b/sys/kern/sys_pipe.c Sun Jun 10 17:54:51 2018 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: sys_pipe.c,v 1.145 2018/05/19 11:39:37 jdolecek Exp $ */
+/* $NetBSD: sys_pipe.c,v 1.146 2018/06/10 17:54:51 jdolecek Exp $ */
/*-
* Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.145 2018/05/19 11:39:37 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.146 2018/06/10 17:54:51 jdolecek Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -102,6 +102,12 @@
#ifndef PIPE_NODIRECT
#include <uvm/uvm.h>
+
+#if !defined(PMAP_DIRECT)
+# define PIPE_NODIRECT /* Direct map interface not available */
+#endif
+
+bool pipe_direct = true;
#endif
static int pipe_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
@@ -136,20 +142,6 @@
#define MAXPIPESIZE (2 * PIPE_SIZE / 3)
/*
- * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
- * is there so that on large systems, we don't exhaust it.
- */
-#define MAXPIPEKVA (8 * 1024 * 1024)
-static u_int maxpipekva = MAXPIPEKVA;
-
-/*
- * Limit for direct transfers, we cannot, of course limit
- * the amount of kva for pipes in general though.
- */
-#define LIMITPIPEKVA (16 * 1024 * 1024)
-static u_int limitpipekva = LIMITPIPEKVA;
-
-/*
* Limit the number of "big" pipes
*/
#define LIMITBIGPIPES 32
@@ -177,6 +169,7 @@
#ifndef PIPE_NODIRECT
static int pipe_loan_alloc(struct pipe *, int);
static void pipe_loan_free(struct pipe *);
+static int pipe_direct_process_read(void *, size_t, void *);
#endif /* PIPE_NODIRECT */
static pool_cache_t pipe_wr_cache;
@@ -446,6 +439,16 @@
fownsignal(sigp->pipe_pgid, SIGIO, code, band, selp);
}
+#ifndef PIPE_NODIRECT
+static int
+pipe_direct_process_read(void *va, size_t len, void *arg)
+{
+ struct uio *uio = (struct uio *)arg;
+
+ return uiomove(va, len, uio);
+}
+#endif
+
static int
pipe_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
int flags)
@@ -507,30 +510,39 @@
#ifndef PIPE_NODIRECT
if ((rpipe->pipe_state & PIPE_DIRECTR) != 0) {
struct pipemapping * const rmap = &rpipe->pipe_map;
+ voff_t pgoff;
+ u_int pgst, npages;
+
/*
* Direct copy, bypassing a kernel buffer.
*/
- void *va;
-
KASSERT(rpipe->pipe_state & PIPE_DIRECTW);
- size = rmap->cnt;
- if (size > uio->uio_resid)
- size = uio->uio_resid;
+ size = MIN(rmap->cnt, uio->uio_resid);
+
+ if (size > 0) {
+ KASSERT(size > 0);
+ mutex_exit(lock);
- va = (char *)rmap->kva + rmap->pos;
- mutex_exit(lock);
- error = uiomove(va, size, uio);
- mutex_enter(lock);
- if (error)
- break;
- nread += size;
- rmap->pos += size;
- rmap->cnt -= size;
+ pgst = rmap->pos >> PAGE_SHIFT;
+ pgoff = rmap->pos & PAGE_MASK;
+ npages = (size + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ KASSERTMSG(npages > 0 && (pgst + npages) <= rmap->npages, "npages %u pgst %u rmap->npages %u", npages, pgst, rmap->npages);
+
+ error = uvm_direct_process(&rmap->pgs[pgst], npages,
+ pgoff, size, pipe_direct_process_read, uio);
+ mutex_enter(lock);
+
+ nread += size;
+ rmap->pos += size;
+ rmap->cnt -= size;
+ }
+
if (rmap->cnt == 0) {
rpipe->pipe_state &= ~PIPE_DIRECTR;
cv_broadcast(&rpipe->pipe_wcv);
}
+
continue;
}
#endif
@@ -630,18 +642,20 @@
pipe_loan_alloc(struct pipe *wpipe, int npages)
{
struct pipemapping * const wmap = &wpipe->pipe_map;
- const vsize_t len = ptoa(npages);
+
+ KASSERT(wmap->npages == 0);
- atomic_add_int(&amountpipekva, len);
- wmap->kva = uvm_km_alloc(kernel_map, len, 0,
- UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA);
- if (wmap->kva == 0) {
- atomic_add_int(&amountpipekva, -len);
- return (ENOMEM);
+ if (npages > wmap->maxpages) {
+ pipe_loan_free(wpipe);
+
+ wmap->pgs = kmem_alloc(npages * sizeof(struct vm_page *), KM_NOSLEEP);
+ if (wmap->pgs == NULL)
+ return ENOMEM;
+ wmap->maxpages = npages;
}
wmap->npages = npages;
- wmap->pgs = kmem_alloc(npages * sizeof(struct vm_page *), KM_SLEEP);
+
return (0);
}
@@ -652,18 +666,16 @@
pipe_loan_free(struct pipe *wpipe)
{
struct pipemapping * const wmap = &wpipe->pipe_map;
- const vsize_t len = ptoa(wmap->npages);
- uvm_km_free(kernel_map, wmap->kva, len, UVM_KMF_VAONLY);
- wmap->kva = 0;
- atomic_add_int(&amountpipekva, -len);
- kmem_free(wmap->pgs, wmap->npages * sizeof(struct vm_page *));
- wmap->pgs = NULL;
-#if 0
+ if (wmap->maxpages > 0) {
+ kmem_free(wmap->pgs, wmap->maxpages * sizeof(struct vm_page *));
+ wmap->pgs = NULL;
+ wmap->maxpages = 0;
+ }
+
wmap->npages = 0;
wmap->pos = 0;
wmap->cnt = 0;
-#endif
}
/*
@@ -681,20 +693,18 @@
{
struct pipemapping * const wmap = &wpipe->pipe_map;
kmutex_t * const lock = wpipe->pipe_lock;
- struct vm_page **pgs;
vaddr_t bbase, base, bend;
vsize_t blen, bcnt;
int error, npages;
voff_t bpos;
- u_int starting_color;
- KASSERT(mutex_owned(wpipe->pipe_lock));
+ KASSERT(mutex_owned(lock));
KASSERT(wmap->cnt == 0);
mutex_exit(lock);
/*
- * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers
+ * Handle first PIPE_DIRECT_CHUNK bytes of buffer. Deal with buffers
* not aligned to PAGE_SIZE.
*/
bbase = (vaddr_t)uio->uio_iov->iov_base;
@@ -711,43 +721,28 @@
bcnt = uio->uio_iov->iov_len;
}
npages = atop(blen);
- starting_color = atop(base) & uvmexp.colormask;
+
+ KASSERT((wpipe->pipe_state & (PIPE_DIRECTW | PIPE_DIRECTR)) == 0);
+ KASSERT(wmap->npages == 0);
- /*
- * Free the old kva if we need more pages than we have
- * allocated.
- */
- if (wmap->kva != 0 && starting_color + npages > wmap->npages)
- pipe_loan_free(wpipe);
-
- /* Allocate new kva. */
- if (wmap->kva == 0) {
- error = pipe_loan_alloc(wpipe, starting_color + npages);
- if (error) {
- mutex_enter(lock);
- return (error);
- }
+ /* Make sure page array is big enough */
+ error = pipe_loan_alloc(wpipe, npages);
+ if (error) {
+ mutex_enter(lock);
+ return (error);
}
/* Loan the write buffer memory from writer process */
- pgs = wmap->pgs + starting_color;
error = uvm_loan(&uio->uio_vmspace->vm_map, base, blen,
- pgs, UVM_LOAN_TOPAGE);
+ wmap->pgs, UVM_LOAN_TOPAGE);
if (error) {
pipe_loan_free(wpipe);
mutex_enter(lock);
return (ENOMEM); /* so that caller fallback to ordinary write */
}
- /* Enter the loaned pages to kva */
- vaddr_t kva = wpipe->pipe_map.kva;
- for (int j = 0; j < npages; j++, kva += PAGE_SIZE) {
- pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ, 0);
- }
- pmap_update(pmap_kernel());
-
/* Now we can put the pipe in direct write mode */
- wmap->pos = bpos + ptoa(starting_color);
+ wmap->pos = bpos;
wmap->cnt = bcnt;
/*
@@ -783,17 +778,13 @@
/* Acquire the pipe lock and cleanup */
(void)pipelock(wpipe, false);
- mutex_exit(lock);
- if (pgs != NULL) {
- pmap_kremove(wpipe->pipe_map.kva, blen);
- pmap_update(pmap_kernel());
- uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE);
- }
- if (error || amountpipekva > maxpipekva)
- pipe_loan_free(wpipe);
+ mutex_exit(lock);
+ /* XXX what happens if the writer process exits without waiting for reader?
+ * XXX FreeBSD does a clone in this case */
+ uvm_unloan(wmap->pgs, npages, UVM_LOAN_TOPAGE);
+ mutex_enter(lock);
- mutex_enter(lock);
if (error) {
pipeselwakeup(wpipe, wpipe, POLL_ERR);
@@ -914,7 +905,7 @@
*/
if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
(fp->f_flag & FNONBLOCK) == 0 &&
- (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) {
+ pipe_direct) {
error = pipe_direct_write(fp, wpipe, uio);
/*
@@ -1271,12 +1262,8 @@
pipe->pipe_buffer.buffer = NULL;
}
#ifndef PIPE_NODIRECT
- if (pipe->pipe_map.kva != 0) {
+ if (pipe->pipe_map.npages > 0)
Home |
Main Index |
Thread Index |
Old Index