Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys Add some experimental page-loaning for writes on sockets...
details: https://anonhg.NetBSD.org/src/rev/3a80fa4d495b
branches: trunk
changeset: 526358:3a80fa4d495b
user: thorpej <thorpej%NetBSD.org@localhost>
date: Thu May 02 17:55:48 2002 +0000
description:
Add some experimental page-loaning for writes on sockets. It is disabled
by default, and can be enabled by adding the SOSEND_LOAN option to your
kernel config. The SOSEND_COUNTERS option can be used to provide some
instrumentation.
Use of this option, combined with an application that does large enough
writes, gets us zero-copy on the TCP and UDP transmit path.
diffstat:
sys/conf/files | 5 +-
sys/kern/uipc_socket.c | 285 +++++++++++++++++++++++++++++++++++++++++++++++-
sys/sys/socketvar.h | 3 +-
3 files changed, 284 insertions(+), 9 deletions(-)
diffs (truncated from 417 to 300 lines):
diff -r d352786a8cbc -r 3a80fa4d495b sys/conf/files
--- a/sys/conf/files Thu May 02 17:44:32 2002 +0000
+++ b/sys/conf/files Thu May 02 17:55:48 2002 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files,v 1.523 2002/04/26 02:05:09 ad Exp $
+# $NetBSD: files,v 1.524 2002/05/02 17:55:48 thorpej Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -14,6 +14,9 @@
defflag UCONSOLE
defflag opt_pipe.h PIPE_SOCKETPAIR PIPE_NODIRECT
+defflag opt_sock_counters.h SOSEND_COUNTERS
+defflag SOSEND_LOAN
+
defflag MULTIPROCESSOR
defflag opt_config.h INCLUDE_CONFIG_FILE INCLUDE_JUST_CONFIG
diff -r d352786a8cbc -r 3a80fa4d495b sys/kern/uipc_socket.c
--- a/sys/kern/uipc_socket.c Thu May 02 17:44:32 2002 +0000
+++ b/sys/kern/uipc_socket.c Thu May 02 17:55:48 2002 +0000
@@ -1,4 +1,40 @@
-/* $NetBSD: uipc_socket.c,v 1.63 2002/04/06 08:04:17 matt Exp $ */
+/* $NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $ */
+
+/*-
+ * Copyright (c) 2002 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
/*
* Copyright (c) 1982, 1986, 1988, 1990, 1993
@@ -36,7 +72,10 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.63 2002/04/06 08:04:17 matt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $");
+
+#include "opt_sock_counters.h"
+#include "opt_sosend_loan.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -53,19 +92,219 @@
#include <sys/resourcevar.h>
#include <sys/pool.h>
+#include <uvm/uvm.h>
+
struct pool socket_pool;
extern int somaxconn; /* patchable (XXX sysctl) */
int somaxconn = SOMAXCONN;
+#ifdef SOSEND_COUNTERS
+#include <sys/device.h>
+
+struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "sosend", "loan big");
+struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "sosend", "copy big");
+struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "sosend", "copy small");
+struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+ NULL, "sosend", "kva limit");
+
+#define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++
+
+#else
+
+#define SOSEND_COUNTER_INCR(ev) /* nothing */
+
+#endif /* SOSEND_COUNTERS */
+
void
soinit(void)
{
pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0,
"sockpl", NULL);
+
+#ifdef SOSEND_COUNTERS
+ evcnt_attach_static(&sosend_loan_big);
+ evcnt_attach_static(&sosend_copy_big);
+ evcnt_attach_static(&sosend_copy_small);
+ evcnt_attach_static(&sosend_kvalimit);
+#endif /* SOSEND_COUNTERS */
}
+#ifdef SOSEND_LOAN
+
+struct mbuf *so_pendfree;
+
+int somaxkva = 16 * 1024 * 1024;
+int socurkva;
+int sokvawaiters;
+
+#define SOCK_LOAN_THRESH 4096
+#define SOCK_LOAN_CHUNK 65536
+
+static void
+sodoloanfree(caddr_t buf, u_int size)
+{
+ struct vm_page **pgs;
+ vaddr_t va, sva, eva;
+ vsize_t len;
+ paddr_t pa;
+ int i, npgs;
+
+ eva = round_page((vaddr_t) buf + size);
+ sva = trunc_page((vaddr_t) buf);
+ len = eva - sva;
+ npgs = len >> PAGE_SHIFT;
+
+ pgs = alloca(npgs * sizeof(*pgs));
+
+ for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) {
+ if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
+ panic("sodoloanfree: va 0x%lx not mapped", va);
+ pgs[i] = PHYS_TO_VM_PAGE(pa);
+ }
+
+ pmap_kremove(sva, len);
+ pmap_update(pmap_kernel());
+ uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE);
+ uvm_km_free(kernel_map, sva, len);
+ socurkva -= len;
+ if (sokvawaiters)
+ wakeup(&socurkva);
+}
+
+static size_t
+sodopendfree(struct socket *so)
+{
+ struct mbuf *m;
+ size_t rv = 0;
+ int s;
+
+ s = splvm();
+
+ for (;;) {
+ m = so_pendfree;
+ if (m == NULL)
+ break;
+ so_pendfree = m->m_next;
+ splx(s);
+
+ rv += m->m_ext.ext_size;
+ sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+ s = splvm();
+ pool_cache_put(&mbpool_cache, m);
+ }
+
+ for (;;) {
+ m = so->so_pendfree;
+ if (m == NULL)
+ break;
+ so->so_pendfree = m->m_next;
+ splx(s);
+
+ rv += m->m_ext.ext_size;
+ sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size);
+ s = splvm();
+ pool_cache_put(&mbpool_cache, m);
+ }
+
+ splx(s);
+ return (rv);
+}
+
+static void
+soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg)
+{
+ struct socket *so = arg;
+ int s;
+
+ if (m == NULL) {
+ sodoloanfree(buf, size);
+ return;
+ }
+
+ s = splvm();
+ m->m_next = so->so_pendfree;
+ so->so_pendfree = m;
+ splx(s);
+ if (sokvawaiters)
+ wakeup(&socurkva);
+}
+
+static long
+sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space)
+{
+ struct iovec *iov = uio->uio_iov;
+ vaddr_t sva, eva;
+ vsize_t len;
+ struct vm_page **pgs;
+ vaddr_t lva, va;
+ int npgs, s, i, error;
+
+ if (uio->uio_segflg != UIO_USERSPACE)
+ return (0);
+
+ if (iov->iov_len < (size_t) space)
+ space = iov->iov_len;
+ if (space > SOCK_LOAN_CHUNK)
+ space = SOCK_LOAN_CHUNK;
+
+ eva = round_page((vaddr_t) iov->iov_base + space);
+ sva = trunc_page((vaddr_t) iov->iov_base);
+ len = eva - sva;
+ npgs = len >> PAGE_SHIFT;
+
+ while (socurkva + len > somaxkva) {
+ if (sodopendfree(so))
+ continue;
+ SOSEND_COUNTER_INCR(&sosend_kvalimit);
+ s = splvm();
+ sokvawaiters++;
+ (void) tsleep(&socurkva, PVM, "sokva", 0);
+ sokvawaiters--;
+ splx(s);
+ }
+
+ lva = uvm_km_valloc_wait(kernel_map, len);
+ if (lva == 0)
+ return (0);
+ socurkva += len;
+
+ pgs = alloca(npgs * sizeof(*pgs));
+
+ error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len,
+ pgs, UVM_LOAN_TOPAGE);
+ if (error) {
+ uvm_km_free(kernel_map, lva, len);
+ socurkva -= len;
+ return (0);
+ }
+
+ for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
+ pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ);
+ pmap_update(pmap_kernel());
+
+ lva += (vaddr_t) iov->iov_base & PAGE_MASK;
+
+ MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so);
+
+ uio->uio_resid -= space;
+ /* uio_offset not updated, not set/used for write(2) */
+ uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space;
+ uio->uio_iov->iov_len -= space;
+ if (uio->uio_iov->iov_len == 0) {
+ uio->uio_iov++;
+ uio->uio_iovcnt--;
+ }
+
+ return (space);
+}
+
+#endif /* SOSEND_LOAN */
+
/*
* Socket operation routines.
* These routines are called by the routines in
@@ -151,6 +390,9 @@
void
sofree(struct socket *so)
{
Home |
Main Index |
Thread Index |
Old Index