Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/net/npf This patches ditches the ptree(3) library, becau...
details: https://anonhg.NetBSD.org/src/rev/95ab67291ca6
branches: trunk
changeset: 349372:95ab67291ca6
user: christos <christos%NetBSD.org@localhost>
date: Fri Dec 09 02:40:38 2016 +0000
description:
This patches ditches the ptree(3) library, because it is broken (you
can get missing entries!). Instead, as a temporary solution, we switch
to a simple linear scan of the hash tables for the longest-prefix-match
(lpm.c lpm.h) algorithm. In fact, with few unique prefixes in the set,
on modern hardware this simple algorithm is pretty fast anyway!
diffstat:
sys/net/npf/files.npf | 6 +-
sys/net/npf/lpm.c | 401 +++++++++++++++++++++++++++++++++++++++
sys/net/npf/lpm.h | 46 ++++
sys/net/npf/npf_impl.h | 5 +-
sys/net/npf/npf_tableset.c | 125 +++++------
sys/net/npf/npf_tableset_ptree.c | 183 -----------------
6 files changed, 506 insertions(+), 260 deletions(-)
diffs (truncated from 1002 to 300 lines):
diff -r 17a1916c6af8 -r 95ab67291ca6 sys/net/npf/files.npf
--- a/sys/net/npf/files.npf Fri Dec 09 02:38:14 2016 +0000
+++ b/sys/net/npf/files.npf Fri Dec 09 02:40:38 2016 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: files.npf,v 1.18 2016/12/09 02:40:38 christos Exp $
#
# Public Domain.
#
@@ -19,7 +19,6 @@
file net/npf/npf_ruleset.c npf
file net/npf/npf_rproc.c npf
file net/npf/npf_tableset.c npf
-file net/npf/npf_tableset_ptree.c npf
file net/npf/npf_if.c npf
file net/npf/npf_inet.c npf
file net/npf/npf_conn.c npf
@@ -31,6 +30,9 @@
file net/npf/npf_sendpkt.c npf
file net/npf/npf_worker.c npf
+# LPM
+file net/npf/lpm.c npf
+
# Built-in extensions.
file net/npf/npf_ext_log.c npf
file net/npf/npf_ext_normalize.c npf
diff -r 17a1916c6af8 -r 95ab67291ca6 sys/net/npf/lpm.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/net/npf/lpm.c Fri Dec 09 02:40:38 2016 +0000
@@ -0,0 +1,401 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * TODO: Simple linear scan for now (works just well with a few prefixes).
+ * TBD on a better algorithm.
+ */
+
+#if defined(_KERNEL)
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.1 2016/12/09 02:40:38 christos Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#define kmem_alloc(a, b) malloc(a)
+#define kmem_free(a, b) free(a)
+#define kmem_zalloc(a, b) calloc(a, 1)
+#endif
+
+#include "lpm.h"
+
+#define LPM_MAX_PREFIX (128)
+#define LPM_MAX_WORDS (LPM_MAX_PREFIX >> 5)
+#define LPM_TO_WORDS(x) ((x) >> 2)
+#define LPM_HASH_STEP (8)
+
+#ifdef DEBUG
+#define ASSERT assert
+#else
+#define ASSERT
+#endif
+
+typedef struct lpm_ent {
+ struct lpm_ent *next;
+ void * val;
+ unsigned len;
+ uint8_t key[];
+} lpm_ent_t;
+
+typedef struct {
+ uint32_t hashsize;
+ uint32_t nitems;
+ lpm_ent_t **bucket;
+} lpm_hmap_t;
+
+struct lpm {
+ uint32_t bitmask[LPM_MAX_WORDS];
+ void * defval;
+ lpm_hmap_t prefix[LPM_MAX_PREFIX + 1];
+};
+
+lpm_t *
+lpm_create(void)
+{
+ return kmem_zalloc(sizeof(lpm_t), KM_SLEEP);
+}
+
+void
+lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg)
+{
+ for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) {
+ lpm_hmap_t *hmap = &lpm->prefix[n];
+
+ if (!hmap->hashsize) {
+ KASSERT(!hmap->bucket);
+ continue;
+ }
+ for (unsigned i = 0; i < hmap->hashsize; i++) {
+ lpm_ent_t *entry = hmap->bucket[i];
+
+ while (entry) {
+ lpm_ent_t *next = entry->next;
+
+ if (dtor) {
+ dtor(arg, entry->key,
+ entry->len, entry->val);
+ }
+ kmem_free(entry,
+ offsetof(lpm_ent_t, key[entry->len]));
+ entry = next;
+ }
+ }
+ kmem_free(hmap->bucket, hmap->hashsize);
+ hmap->bucket = NULL;
+ hmap->hashsize = 0;
+ hmap->nitems = 0;
+ }
+ memset(lpm->bitmask, 0, sizeof(lpm->bitmask));
+ lpm->defval = NULL;
+}
+
+void
+lpm_destroy(lpm_t *lpm)
+{
+ lpm_clear(lpm, NULL, NULL);
+ kmem_free(lpm, sizeof(*lpm));
+}
+
+/*
+ * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant).
+ */
+static uint32_t
+fnv1a_hash(const void *buf, size_t len)
+{
+ uint32_t hash = 2166136261UL;
+ const uint8_t *p = buf;
+
+ while (len--) {
+ hash ^= *p++;
+ hash *= 16777619U;
+ }
+ return hash;
+}
+
+static bool
+hashmap_rehash(lpm_hmap_t *hmap, uint32_t size)
+{
+ lpm_ent_t **bucket;
+ uint32_t hashsize;
+
+ for (hashsize = 1; hashsize < size; hashsize <<= 1) {
+ continue;
+ }
+ bucket = kmem_zalloc(hashsize * sizeof(*bucket), KM_SLEEP);
+ if (bucket == NULL)
+ return false;
+ for (unsigned n = 0; n < hmap->hashsize; n++) {
+ lpm_ent_t *list = hmap->bucket[n];
+
+ while (list) {
+ lpm_ent_t *entry = list;
+ uint32_t hash = fnv1a_hash(entry->key, entry->len);
+ const size_t i = hash & (hashsize - 1);
+
+ list = entry->next;
+ entry->next = bucket[i];
+ bucket[i] = entry;
+ }
+ }
+ if (hmap->bucket)
+ kmem_free(hmap->bucket, hmap->hashsize);
+ hmap->bucket = bucket;
+ hmap->hashsize = hashsize;
+ return true;
+}
+
+static lpm_ent_t *
+hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t target = hmap->nitems + LPM_HASH_STEP;
+ const size_t entlen = offsetof(lpm_ent_t, key[len]);
+ uint32_t hash, i;
+ lpm_ent_t *entry;
+
+ if (hmap->hashsize < target && !hashmap_rehash(hmap, target)) {
+ return NULL;
+ }
+
+ hash = fnv1a_hash(key, len);
+ i = hash & (hmap->hashsize - 1);
+ entry = hmap->bucket[i];
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ return entry;
+ }
+ entry = entry->next;
+ }
+
+ if ((entry = kmem_alloc(entlen, KM_SLEEP)) == NULL)
+ return NULL;
+
+ memcpy(entry->key, key, len);
+ entry->next = hmap->bucket[i];
+ entry->len = len;
+
+ hmap->bucket[i] = entry;
+ hmap->nitems++;
+ return entry;
+}
+
+static lpm_ent_t *
+hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t hash = fnv1a_hash(key, len);
+ const uint32_t i = hash & (hmap->hashsize - 1);
+ lpm_ent_t *entry = hmap->bucket[i];
+
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ return entry;
+ }
+ entry = entry->next;
+ }
+ return NULL;
+}
+
+static int
+hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t hash = fnv1a_hash(key, len);
+ const uint32_t i = hash & (hmap->hashsize - 1);
+ lpm_ent_t *prev = NULL, *entry = hmap->bucket[i];
+
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ if (prev) {
+ prev->next = entry->next;
+ } else {
+ hmap->bucket[i] = entry->next;
+ }
+ free(entry, M_TEMP);
+ return 0;
+ }
+ prev = entry;
+ entry = entry->next;
+ }
+ return -1;
+}
+
+/*
+ * compute_prefix: given the address and prefix length, compute and
+ * return the address prefix.
+ */
+static inline void
+compute_prefix(const unsigned nwords, const uint32_t *addr,
+ unsigned preflen, uint32_t *prefix)
+{
+ uint32_t addr2[4];
+
+ if ((uintptr_t)addr & 3) {
+ /* Unaligned address: just copy for now. */
+ memcpy(addr2, addr, nwords * 4);
Home |
Main Index |
Thread Index |
Old Index