[src/trunk]: src/usr.bin/nbperf Optimize nbperf

To: source-changes-hg%NetBSD.org@localhost
Subject: [src/trunk]: src/usr.bin/nbperf Optimize nbperf
From: joerg <joerg%NetBSD.org@localhost>
Date: Thu, 07 Jan 2021 18:21:15 +0000
details:   https://anonhg.NetBSD.org/src/rev/6fc5d51b2765
branches:  trunk
changeset: 949271:6fc5d51b2765
user:      joerg <joerg%NetBSD.org@localhost>
date:      Thu Jan 07 16:03:08 2021 +0000

description:
Optimize nbperf

- add fudge mode which gives a slightly slower hash function, but works
  almost always in the first iteration by avoiding degenerate edges
- avoid keeping incidence lists around reducing the memory foot print by
  30%
- split edge processing from hashing as in the non-fudge case it is a
  reasonable costly part that often gets thrown away
- merge graph2 and graph3 routines now that they are mostly the same

diffstat:

 tests/usr.bin/nbperf/t_nbperf.sh |  130 +++++++++++++++++-
 usr.bin/nbperf/graph2.c          |  264 ++++++++++++++++++++++----------------
 usr.bin/nbperf/graph2.h          |   59 ++++++--
 usr.bin/nbperf/graph3.c          |  252 +------------------------------------
 usr.bin/nbperf/graph3.h          |   62 ---------
 usr.bin/nbperf/nbperf-bdz.c      |   57 +++++---
 usr.bin/nbperf/nbperf-chm.c      |  161 +++++++++++++----------
 usr.bin/nbperf/nbperf-chm3.c     |    4 +-
 usr.bin/nbperf/nbperf.1          |    6 +-
 usr.bin/nbperf/nbperf.c          |   25 ++-
 usr.bin/nbperf/nbperf.h          |    5 +-
 11 files changed, 469 insertions(+), 556 deletions(-)

diffs (truncated from 1446 to 300 lines):

diff -r 7ebd79385460 -r 6fc5d51b2765 tests/usr.bin/nbperf/t_nbperf.sh
--- a/tests/usr.bin/nbperf/t_nbperf.sh  Thu Jan 07 14:41:50 2021 +0000
+++ b/tests/usr.bin/nbperf/t_nbperf.sh  Thu Jan 07 16:03:08 2021 +0000
@@ -1,4 +1,4 @@
-# $NetBSD: t_nbperf.sh,v 1.3 2014/04/30 21:04:21 joerg Exp $
+# $NetBSD: t_nbperf.sh,v 1.4 2021/01/07 16:03:08 joerg Exp $
 #
 # Copyright (c) 2012 The NetBSD Foundation, Inc.
 # All rights reserved.
@@ -27,7 +27,7 @@
 
 cleanup()
 {
-       rm -f reference.txt hash.c hash.map testprog
+       rm -f reference.txt input.txt hash.c hash.map testprog
 }
 
 atf_test_case chm
@@ -38,7 +38,7 @@
        atf_set "require.progs" "cc"
 }
 chm_body()
-{ 
+{
        for n in 4 32 128 1024 65536; do
                seq 0 $(($n - 1)) > reference.txt
                atf_check -o file:reference.txt \
@@ -54,6 +54,32 @@
        cleanup
 }
 
+atf_test_case chm_fudged
+chm_fudged_head()
+{
+       atf_set "descr" "Checks chm algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+chm_fudged_body()
+{
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm -f -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^=' hash.c
+}
+chm_fudged_clean()
+{
+       cleanup
+}
+
 atf_test_case chm3
 chm3_head()
 {
@@ -78,26 +104,102 @@
        cleanup
 }
 
-atf_test_case bdz
-bdz_head()
+atf_test_case chm3_fudged
+chm3_fudged_head()
+{
+       atf_set "descr" "Checks chm3 algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+chm3_fudged_body()
 {
-       atf_set "descr" "Checks bdz algorithm"
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm3 -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm3 -f -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^= (' hash.c
+       atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+chm3_fudged_clean()
+{
+       cleanup
+}
+
+atf_test_case bpz
+bpz_head()
+{
+       atf_set "descr" "Checks bpz algorithm"
        atf_set "require.files" "/usr/share/dict/web2"
        atf_set "require.progs" "cc"
 }
-bdz_body()
-{ 
+bpz_body()
+{
        for n in 4 32 128 1024 65536 131072; do
                seq 0 $(($n - 1)) > reference.txt
                atf_check -o file:reference.txt \
-                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz "sort -n" \
+                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz "sort -n" \
                    $n $(atf_get_srcdir)/hash_driver.c
                atf_check -o file:hash.map \
-                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz cat \
+                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz cat \
                    $n $(atf_get_srcdir)/hash_driver.c
        done
 }
-bdz_clean()
+bpz_clean()
+{
+       cleanup
+}
+
+atf_test_case bpz_fudged
+bpz_fudged_head()
+{
+       atf_set "descr" "Checks bpz algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+bpz_fudged_body()
+{
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "bpz -p" "sort -n" \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "bpz -f -p" "sort -n" \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^= (' hash.c
+       atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+bpz_fudged_clean()
+{
+       cleanup
+}
+
+atf_test_case handle_dup
+handle_dup_head()
+{
+       atf_set "descr" "Checks different algorithms deal with duplicates"
+       atf_set "require.progs" "cc"
+}
+handle_dup_body()
+{
+       seq 0 9 > reference.txt
+       echo 0 >> reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a chm < reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a chm3 < reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a bpz < reference.txt
+}
+handle_dup_clean()
 {
        cleanup
 }
@@ -105,6 +207,10 @@
 atf_init_test_cases()
 {
        atf_add_test_case chm
+       atf_add_test_case chm_fudged
        atf_add_test_case chm3
-       atf_add_test_case bdz
+       atf_add_test_case chm3_fudged
+       atf_add_test_case bpz
+       atf_add_test_case bpz_fudged
+       atf_add_test_case handle_dup
 }
diff -r 7ebd79385460 -r 6fc5d51b2765 usr.bin/nbperf/graph2.c
--- a/usr.bin/nbperf/graph2.c   Thu Jan 07 14:41:50 2021 +0000
+++ b/usr.bin/nbperf/graph2.c   Thu Jan 07 16:03:08 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: graph2.c,v 1.4 2011/10/21 23:47:11 joerg Exp $ */
+/*     $NetBSD: graph2.c,v 1.5 2021/01/07 16:03:08 joerg Exp $ */
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -36,7 +36,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: graph2.c,v 1.4 2011/10/21 23:47:11 joerg Exp $");
+__RCSID("$NetBSD: graph2.c,v 1.5 2021/01/07 16:03:08 joerg Exp $");
 
 #include <err.h>
 #include <inttypes.h>
@@ -47,16 +47,14 @@
 #include "nbperf.h"
 #include "graph2.h"
 
-static const uint32_t unused = 0xffffffffU;
-
 void
-graph2_setup(struct graph2 *graph, uint32_t v, uint32_t e)
+SIZED2(_setup)(struct SIZED(graph) *graph, uint32_t v, uint32_t e)
 {
        graph->v = v;
        graph->e = e;
 
-       graph->verts = calloc(sizeof(struct vertex2), v);
-       graph->edges = calloc(sizeof(struct edge2), e);
+       graph->verts = calloc(sizeof(*graph->verts), v);
+       graph->edges = calloc(sizeof(*graph->edges), e);
        graph->output_order = calloc(sizeof(uint32_t), e);
 
        if (graph->verts == NULL || graph->edges == NULL ||
@@ -65,7 +63,7 @@
 }
 
 void
-graph2_free(struct graph2 *graph)
+SIZED2(_free)(struct SIZED(graph) *graph)
 {
        free(graph->verts);
        free(graph->edges);
@@ -76,136 +74,180 @@
        graph->output_order = NULL;
 }
 
-static int
-graph2_check_duplicates(struct nbperf *nbperf, struct graph2 *graph)
+static struct nbperf *sorting_nbperf;
+static struct SIZED(graph) *sorting_graph;
+static int sorting_found;
+
+static int sorting_cmp(const void *a_, const void *b_)
 {
-       struct vertex2 *v;
-       struct edge2 *e, *e2;
-       uint32_t i, j;
+       const uint32_t *a = a_, *b = b_;
+       int i;
+       const struct SIZED(edge) *ea = &sorting_graph->edges[*a],
+           *eb = &sorting_graph->edges[*b];
+       for (i = 0; i < GRAPH_SIZE; ++i) {
+               if (ea->vertices[i] < eb->vertices[i])
+                       return -1;
+               if (ea->vertices[i] > eb->vertices[i])
+                       return 1;
+       }
+       if (sorting_nbperf->keylens[*a] < sorting_nbperf->keylens[*b])
+               return -1;
+       if (sorting_nbperf->keylens[*a] > sorting_nbperf->keylens[*b])
+               return 1;
+       i = memcmp(sorting_nbperf->keys[*a], sorting_nbperf->keys[*b],
+           sorting_nbperf->keylens[*a]);
+       if (i == 0)
+               sorting_found = 1;
+       return i;
+}
+
+static int
+SIZED2(_check_duplicates)(struct nbperf *nbperf, struct SIZED(graph) *graph)
+{
+       size_t i;
+       uint32_t *key_index = calloc(sizeof(*key_index), graph->e);
+
+       if (key_index == NULL)
+               err(1, "malloc failed");
+       for (i = 0; i < graph->e; ++i)
+               key_index[i] = i;
 
-       for (i = 0; i < graph->e; ++i) {
-               e = &graph->edges[i];
-               v = &graph->verts[e->left];
-               j = v->l_edge;
-               e2 = &graph->edges[j];
-               for (;;) {
-                       if (i < j && e->right == e2->right &&
-                           nbperf->keylens[i] == nbperf->keylens[j] &&
-                           memcmp(nbperf->keys[i], nbperf->keys[j],
-                           nbperf->keylens[i]) == 0) {
-                               nbperf->has_duplicates = 1;
-                               return -1;
-                       }
-                       if (e2->l_next == unused)
-                               break;
-                       j = e2->l_next;
-                       e2 = &graph->edges[j];
-               }               
+       sorting_nbperf = nbperf;
+       sorting_graph = graph;
+       sorting_found = 0;
+       qsort(key_index, graph->e, sizeof(*key_index), sorting_cmp);
+       if (sorting_found)
Prev by Date: [src/trunk]: src/lib/libc/cdb Optimize CPU and memory use of cdbw(3)
Next by Date: [src/trunk]: src/sys/arch/mips/mips Fix build for non-_MIPS_NEED_BUS_DMA_BOUN...
Previous by Thread: [src/trunk]: src/lib/libc/cdb Optimize CPU and memory use of cdbw(3)
Next by Thread: [src/trunk]: src/sys/arch/mips/mips Fix build for non-_MIPS_NEED_BUS_DMA_BOUN...
Indexes:
Home | Main Index | Thread Index | Old Index