Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src-draft/trunk]: src/usr.bin/nbperf Optimize nbperf
details: https://anonhg.NetBSD.org/src-all/rev/88478e8cd931
branches: trunk
changeset: 949256:88478e8cd931
user: Joerg Sonnenberger <joerg%bec.de@localhost>
date: Mon Jan 04 03:00:46 2021 +0100
description:
Optimize nbperf
- add fudge mode which gives a slightly slower hash function, but works
almost always in the first iteration by avoiding degenerate edges
- avoid keeping incidence lists around reducing the memory foot print by
30%
- split edge processing from hashing as in the non-fudge case it is a
reasonable costly part that often gets thrown away
- merge graph2 and graph3 routines now that they are mostly the same
diffstat:
tests/usr.bin/nbperf/t_nbperf.sh | 128 +++++++++++++++++-
usr.bin/nbperf/graph2.c | 260 ++++++++++++++++++++++----------------
usr.bin/nbperf/graph2.h | 57 ++++++--
usr.bin/nbperf/graph3.c | 252 +-------------------------------------
usr.bin/nbperf/graph3.h | 62 ---------
usr.bin/nbperf/nbperf-bdz.c | 53 ++++---
usr.bin/nbperf/nbperf-chm.c | 157 +++++++++++++----------
usr.bin/nbperf/nbperf-chm3.c | 2 +-
usr.bin/nbperf/nbperf.1 | 4 +-
usr.bin/nbperf/nbperf.c | 21 ++-
usr.bin/nbperf/nbperf.h | 3 +-
11 files changed, 453 insertions(+), 546 deletions(-)
diffs (truncated from 1368 to 300 lines):
diff -r f44eda8687d6 -r 88478e8cd931 tests/usr.bin/nbperf/t_nbperf.sh
--- a/tests/usr.bin/nbperf/t_nbperf.sh Tue Nov 17 14:11:31 2020 +0100
+++ b/tests/usr.bin/nbperf/t_nbperf.sh Mon Jan 04 03:00:46 2021 +0100
@@ -27,7 +27,7 @@
cleanup()
{
- rm -f reference.txt hash.c hash.map testprog
+ rm -f reference.txt input.txt hash.c hash.map testprog
}
atf_test_case chm
@@ -38,7 +38,7 @@
atf_set "require.progs" "cc"
}
chm_body()
-{
+{
for n in 4 32 128 1024 65536; do
seq 0 $(($n - 1)) > reference.txt
atf_check -o file:reference.txt \
@@ -54,6 +54,32 @@
cleanup
}
+atf_test_case chm_fudged
+chm_fudged_head()
+{
+ atf_set "descr" "Checks chm algorithm with fudged hash"
+ atf_set "require.progs" "cc"
+}
+chm_fudged_body()
+{
+ seq 0 9 > reference.txt
+ seq 1 10 > input.txt
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "chm -p" cat \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:1 fgrep -q '^=' hash.c
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "chm -f -p" cat \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:0 fgrep -q '^=' hash.c
+}
+chm_fudged_clean()
+{
+ cleanup
+}
+
atf_test_case chm3
chm3_head()
{
@@ -78,26 +104,102 @@
cleanup
}
-atf_test_case bdz
-bdz_head()
+atf_test_case chm3_fudged
+chm3_fudged_head()
+{
+ atf_set "descr" "Checks chm3 algorithm with fudged hash"
+ atf_set "require.progs" "cc"
+}
+chm3_fudged_body()
{
- atf_set "descr" "Checks bdz algorithm"
+ seq 0 9 > reference.txt
+ seq 1 10 > input.txt
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "chm3 -p" cat \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:1 fgrep -q '^=' hash.c
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "chm3 -f -p" cat \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:0 fgrep -q '^= (' hash.c
+ atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+chm3_fudged_clean()
+{
+ cleanup
+}
+
+atf_test_case bpz
+bpz_head()
+{
+ atf_set "descr" "Checks bpz algorithm"
atf_set "require.files" "/usr/share/dict/web2"
atf_set "require.progs" "cc"
}
-bdz_body()
-{
+bpz_body()
+{
for n in 4 32 128 1024 65536 131072; do
seq 0 $(($n - 1)) > reference.txt
atf_check -o file:reference.txt \
- $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz "sort -n" \
+ $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz "sort -n" \
$n $(atf_get_srcdir)/hash_driver.c
atf_check -o file:hash.map \
- $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bdz cat \
+ $(atf_get_srcdir)/h_nbperf /usr/share/dict/web2 bpz cat \
$n $(atf_get_srcdir)/hash_driver.c
done
}
-bdz_clean()
+bpz_clean()
+{
+ cleanup
+}
+
+atf_test_case bpz_fudged
+bpz_fudged_head()
+{
+ atf_set "descr" "Checks bpz algorithm with fudged hash"
+ atf_set "require.progs" "cc"
+}
+bpz_fudged_body()
+{
+ seq 0 9 > reference.txt
+ seq 1 10 > input.txt
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "bpz -p" "sort -n" \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:1 fgrep -q '^=' hash.c
+
+ atf_check -o file:reference.txt \
+ $(atf_get_srcdir)/h_nbperf input.txt "bpz -f -p" "sort -n" \
+ 10 $(atf_get_srcdir)/hash_driver.c
+ atf_check -s exit:0 fgrep -q '^= (' hash.c
+ atf_check -s exit:0 fgrep -q '^= 2' hash.c
+}
+bpz_fudged_clean()
+{
+ cleanup
+}
+
+atf_test_case handle_dup
+handle_dup_head()
+{
+ atf_set "descr" "Checks different algorithms deal with duplicates"
+ atf_set "require.progs" "cc"
+}
+handle_dup_body()
+{
+ seq 0 9 > reference.txt
+ echo 0 >> reference.txt
+ atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+ nbperf -a chm < reference.txt
+ atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+ nbperf -a chm3 < reference.txt
+ atf_check -s exit:1 -e match:"nbperf: Duplicate keys detected" \
+ nbperf -a bpz < reference.txt
+}
+handle_dup_clean()
{
cleanup
}
@@ -105,6 +207,10 @@
atf_init_test_cases()
{
atf_add_test_case chm
+ atf_add_test_case chm_fudged
atf_add_test_case chm3
- atf_add_test_case bdz
+ atf_add_test_case chm3_fudged
+ atf_add_test_case bpz
+ atf_add_test_case bpz_fudged
+ atf_add_test_case handle_dup
}
diff -r f44eda8687d6 -r 88478e8cd931 usr.bin/nbperf/graph2.c
--- a/usr.bin/nbperf/graph2.c Tue Nov 17 14:11:31 2020 +0100
+++ b/usr.bin/nbperf/graph2.c Mon Jan 04 03:00:46 2021 +0100
@@ -47,16 +47,14 @@
#include "nbperf.h"
#include "graph2.h"
-static const uint32_t unused = 0xffffffffU;
-
void
-graph2_setup(struct graph2 *graph, uint32_t v, uint32_t e)
+SIZED2(_setup)(struct SIZED(graph) *graph, uint32_t v, uint32_t e)
{
graph->v = v;
graph->e = e;
- graph->verts = calloc(sizeof(struct vertex2), v);
- graph->edges = calloc(sizeof(struct edge2), e);
+ graph->verts = calloc(sizeof(*graph->verts), v);
+ graph->edges = calloc(sizeof(*graph->edges), e);
graph->output_order = calloc(sizeof(uint32_t), e);
if (graph->verts == NULL || graph->edges == NULL ||
@@ -65,7 +63,7 @@
}
void
-graph2_free(struct graph2 *graph)
+SIZED2(_free)(struct SIZED(graph) *graph)
{
free(graph->verts);
free(graph->edges);
@@ -76,136 +74,180 @@
graph->output_order = NULL;
}
-static int
-graph2_check_duplicates(struct nbperf *nbperf, struct graph2 *graph)
+static struct nbperf *sorting_nbperf;
+static struct SIZED(graph) *sorting_graph;
+static int sorting_found;
+
+static int sorting_cmp(const void *a_, const void *b_)
{
- struct vertex2 *v;
- struct edge2 *e, *e2;
- uint32_t i, j;
+ const uint32_t *a = a_, *b = b_;
+ int i;
+ const struct SIZED(edge) *ea = &sorting_graph->edges[*a],
+ *eb = &sorting_graph->edges[*b];
+ for (i = 0; i < GRAPH_SIZE; ++i) {
+ if (ea->vertices[i] < eb->vertices[i])
+ return -1;
+ if (ea->vertices[i] > eb->vertices[i])
+ return 1;
+ }
+ if (sorting_nbperf->keylens[*a] < sorting_nbperf->keylens[*b])
+ return -1;
+ if (sorting_nbperf->keylens[*a] > sorting_nbperf->keylens[*b])
+ return 1;
+ i = memcmp(sorting_nbperf->keys[*a], sorting_nbperf->keys[*b],
+ sorting_nbperf->keylens[*a]);
+ if (i == 0)
+ sorting_found = 1;
+ return i;
+}
+
+static int
+SIZED2(_check_duplicates)(struct nbperf *nbperf, struct SIZED(graph) *graph)
+{
+ size_t i;
+ uint32_t *key_index = calloc(sizeof(*key_index), graph->e);
+
+ if (key_index == NULL)
+ err(1, "malloc failed");
+ for (i = 0; i < graph->e; ++i)
+ key_index[i] = i;
- for (i = 0; i < graph->e; ++i) {
- e = &graph->edges[i];
- v = &graph->verts[e->left];
- j = v->l_edge;
- e2 = &graph->edges[j];
- for (;;) {
- if (i < j && e->right == e2->right &&
- nbperf->keylens[i] == nbperf->keylens[j] &&
- memcmp(nbperf->keys[i], nbperf->keys[j],
- nbperf->keylens[i]) == 0) {
- nbperf->has_duplicates = 1;
- return -1;
- }
- if (e2->l_next == unused)
- break;
- j = e2->l_next;
- e2 = &graph->edges[j];
- }
+ sorting_nbperf = nbperf;
+ sorting_graph = graph;
+ sorting_found = 0;
+ qsort(key_index, graph->e, sizeof(*key_index), sorting_cmp);
+ if (sorting_found)
+ goto found_dups;
+ /*
+ * Any duplicate must have been found as part of the qsort,
+ * as it can't sort consecutive elements in the output without
+ * comparing them at least once.
+ */
+
+ free(key_index);
+ return 0;
+ found_dups:
+ nbperf->has_duplicates = 1;
+ return -1;
+}
+
+static inline void
+SIZED2(_add_edge)(struct SIZED(graph) *graph, uint32_t edge)
+{
+ struct SIZED(edge) *e = graph->edges + edge;
+ struct SIZED(vertex) *v;
+ size_t i;
+
Home |
Main Index |
Thread Index |
Old Index