Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src-draft/trunk]: src/usr.bin/nbperf Optimize nbperf



details:   https://anonhg.NetBSD.org/src-all/rev/88478e8cd931
branches:  trunk
changeset: 949256:88478e8cd931
user:      Joerg Sonnenberger <joerg%bec.de@localhost>
date:      Mon Jan 04 03:00:46 2021 +0100

description:
Optimize nbperf

- add fudge mode which gives a slightly slower hash function, but works
  almost always in the first iteration by avoiding degenerate edges
- avoid keeping incidence lists around reducing the memory foot print by
  30%
- split edge processing from hashing as in the non-fudge case it is a
  reasonable costly part that often gets thrown away
- merge graph2 and graph3 routines now that they are mostly the same

diffstat:

 tests/usr.bin/nbperf/t_nbperf.sh |  128 +++++++++++++++++-
 usr.bin/nbperf/graph2.c          |  260 ++++++++++++++++++++++----------------
 usr.bin/nbperf/graph2.h          |   57 ++++++--
 usr.bin/nbperf/graph3.c          |  252 +-------------------------------------
 usr.bin/nbperf/graph3.h          |   62 ---------
 usr.bin/nbperf/nbperf-bdz.c      |   53 ++++---
 usr.bin/nbperf/nbperf-chm.c      |  157 +++++++++++++----------
 usr.bin/nbperf/nbperf-chm3.c     |    2 +-
 usr.bin/nbperf/nbperf.1          |    4 +-
 usr.bin/nbperf/nbperf.c          |   21 ++-
 usr.bin/nbperf/nbperf.h          |    3 +-
 11 files changed, 453 insertions(+), 546 deletions(-)

diffs (truncated from 1368 to 300 lines):

diff -r f44eda8687d6 -r 88478e8cd931 tests/usr.bin/nbperf/t_nbperf.sh
--- a/tests/usr.bin/nbperf/t_nbperf.sh  Tue Nov 17 14:11:31 2020 +0100
+++ b/tests/usr.bin/nbperf/t_nbperf.sh  Mon Jan 04 03:00:46 2021 +0100
@@ -27,7 +27,7 @@
 
 cleanup()
 {
-       rm -f reference.txt hash.c hash.map testprog
+       rm -f reference.txt input.txt hash.c hash.map testprog
 }
 
 atf_test_case chm
@@ -38,7 +38,7 @@
        atf_set "require.progs" "cc"
 }
 chm_body()
-{ 
+{
        for n in 4 32 128 1024 65536; do
                seq 0 $(($n - 1)) > reference.txt
                atf_check -o file:reference.txt \
@@ -54,6 +54,32 @@
        cleanup
 }
 
+atf_test_case chm_fudged
+chm_fudged_head()
+{
+       atf_set "descr" "Checks chm algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+chm_fudged_body()
+{
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm -f -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^=' hash.c
+}
+chm_fudged_clean()
+{
+       cleanup
+}
+
 atf_test_case chm3
 chm3_head()
 {
@@ -78,26 +104,102 @@
        cleanup
 }
 
-atf_test_case bdz
-bdz_head()
+atf_test_case chm3_fudged
+chm3_fudged_head()
+{
+       atf_set "descr" "Checks chm3 algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+chm3_fudged_body()
 {
-       atf_set "descr" "Checks bdz algorithm"
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm3 -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "chm3 -f -p" cat \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^= (' hash.c
+       atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+chm3_fudged_clean()
+{
+       cleanup
+}
+
+atf_test_case bpz
+bpz_head()
+{
+       atf_set "descr" "Checks bpz algorithm"
        atf_set "require.files" "/usr/share/dict/web2"
        atf_set "require.progs" "cc"
 }
-bdz_body()
-{ 
+bpz_body()
+{
        for n in 4 32 128 1024 65536 131072; do
                seq 0 $(($n - 1)) > reference.txt
                atf_check -o file:reference.txt \
-                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz "sort -n" \
+                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz "sort -n" \
                    $n $(atf_get_srcdir)/hash_driver.c
                atf_check -o file:hash.map \
-                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz cat \
+                   $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz cat \
                    $n $(atf_get_srcdir)/hash_driver.c
        done
 }
-bdz_clean()
+bpz_clean()
+{
+       cleanup
+}
+
+atf_test_case bpz_fudged
+bpz_fudged_head()
+{
+       atf_set "descr" "Checks bpz algorithm with fudged hash"
+       atf_set "require.progs" "cc"
+}
+bpz_fudged_body()
+{
+       seq 0 9 > reference.txt
+       seq 1 10 > input.txt
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "bpz -p" "sort -n" \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:1 fgrep -q '^=' hash.c
+
+       atf_check -o file:reference.txt \
+           $(atf_get_srcdir)/h_nbperf input.txt "bpz -f -p" "sort -n" \
+           10 $(atf_get_srcdir)/hash_driver.c
+       atf_check -s exit:0 fgrep -q '^= (' hash.c
+       atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+bpz_fudged_clean()
+{
+       cleanup
+}
+
+atf_test_case handle_dup
+handle_dup_head()
+{
+       atf_set "descr" "Checks different algorithms deal with duplicates"
+       atf_set "require.progs" "cc"
+}
+handle_dup_body()
+{
+       seq 0 9 > reference.txt
+       echo 0 >> reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a chm < reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a chm3 < reference.txt
+       atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+               nbperf -a bpz < reference.txt
+}
+handle_dup_clean()
 {
        cleanup
 }
@@ -105,6 +207,10 @@
 atf_init_test_cases()
 {
        atf_add_test_case chm
+       atf_add_test_case chm_fudged
        atf_add_test_case chm3
-       atf_add_test_case bdz
+       atf_add_test_case chm3_fudged
+       atf_add_test_case bpz
+       atf_add_test_case bpz_fudged
+       atf_add_test_case handle_dup
 }
diff -r f44eda8687d6 -r 88478e8cd931 usr.bin/nbperf/graph2.c
--- a/usr.bin/nbperf/graph2.c   Tue Nov 17 14:11:31 2020 +0100
+++ b/usr.bin/nbperf/graph2.c   Mon Jan 04 03:00:46 2021 +0100
@@ -47,16 +47,14 @@
 #include "nbperf.h"
 #include "graph2.h"
 
-static const uint32_t unused = 0xffffffffU;
-
 void
-graph2_setup(struct graph2 *graph, uint32_t v, uint32_t e)
+SIZED2(_setup)(struct SIZED(graph) *graph, uint32_t v, uint32_t e)
 {
        graph->v = v;
        graph->e = e;
 
-       graph->verts = calloc(sizeof(struct vertex2), v);
-       graph->edges = calloc(sizeof(struct edge2), e);
+       graph->verts = calloc(sizeof(*graph->verts), v);
+       graph->edges = calloc(sizeof(*graph->edges), e);
        graph->output_order = calloc(sizeof(uint32_t), e);
 
        if (graph->verts == NULL || graph->edges == NULL ||
@@ -65,7 +63,7 @@
 }
 
 void
-graph2_free(struct graph2 *graph)
+SIZED2(_free)(struct SIZED(graph) *graph)
 {
        free(graph->verts);
        free(graph->edges);
@@ -76,136 +74,180 @@
        graph->output_order = NULL;
 }
 
-static int
-graph2_check_duplicates(struct nbperf *nbperf, struct graph2 *graph)
+static struct nbperf *sorting_nbperf;
+static struct SIZED(graph) *sorting_graph;
+static int sorting_found;
+
+static int sorting_cmp(const void *a_, const void *b_)
 {
-       struct vertex2 *v;
-       struct edge2 *e, *e2;
-       uint32_t i, j;
+       const uint32_t *a = a_, *b = b_;
+       int i;
+       const struct SIZED(edge) *ea = &sorting_graph->edges[*a],
+           *eb = &sorting_graph->edges[*b];
+       for (i = 0; i < GRAPH_SIZE; ++i) {
+               if (ea->vertices[i] < eb->vertices[i])
+                       return -1;
+               if (ea->vertices[i] > eb->vertices[i])
+                       return 1;
+       }
+       if (sorting_nbperf->keylens[*a] < sorting_nbperf->keylens[*b])
+               return -1;
+       if (sorting_nbperf->keylens[*a] > sorting_nbperf->keylens[*b])
+               return 1;
+       i = memcmp(sorting_nbperf->keys[*a], sorting_nbperf->keys[*b],
+           sorting_nbperf->keylens[*a]);
+       if (i == 0)
+               sorting_found = 1;
+       return i;
+}
+
+static int
+SIZED2(_check_duplicates)(struct nbperf *nbperf, struct SIZED(graph) *graph)
+{
+       size_t i;
+       uint32_t *key_index = calloc(sizeof(*key_index), graph->e);
+
+       if (key_index == NULL)
+               err(1, "malloc failed");
+       for (i = 0; i < graph->e; ++i)
+               key_index[i] = i;
 
-       for (i = 0; i < graph->e; ++i) {
-               e = &graph->edges[i];
-               v = &graph->verts[e->left];
-               j = v->l_edge;
-               e2 = &graph->edges[j];
-               for (;;) {
-                       if (i < j && e->right == e2->right &&
-                           nbperf->keylens[i] == nbperf->keylens[j] &&
-                           memcmp(nbperf->keys[i], nbperf->keys[j],
-                           nbperf->keylens[i]) == 0) {
-                               nbperf->has_duplicates = 1;
-                               return -1;
-                       }
-                       if (e2->l_next == unused)
-                               break;
-                       j = e2->l_next;
-                       e2 = &graph->edges[j];
-               }               
+       sorting_nbperf = nbperf;
+       sorting_graph = graph;
+       sorting_found = 0;
+       qsort(key_index, graph->e, sizeof(*key_index), sorting_cmp);
+       if (sorting_found)
+               goto found_dups;
+       /*
+        * Any duplicate must have been found as part of the qsort,
+        * as it can't sort consecutive elements in the output without
+        * comparing them at least once.
+        */
+
+       free(key_index);
+       return 0;
+ found_dups:
+       nbperf->has_duplicates = 1;
+       return -1;
+}
+
+static inline void
+SIZED2(_add_edge)(struct SIZED(graph) *graph, uint32_t edge)
+{
+       struct SIZED(edge) *e = graph->edges + edge;
+       struct SIZED(vertex) *v;
+       size_t i;
+



Home | Main Index | Thread Index | Old Index