Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/usr.bin/make make: avoid allocating memory for simple variab...



details:   https://anonhg.NetBSD.org/src/rev/484702fda676
branches:  trunk
changeset: 378364:484702fda676
user:      rillig <rillig%NetBSD.org@localhost>
date:      Sun Apr 11 12:46:54 2021 +0000

description:
make: avoid allocating memory for simple variable names

The main change is in ParseVarname, where a Buffer is replaced with the
newly introduced LazyBuf.  LazyBuf is inspired by
https://golang.org/src/path/path.go.

In CanonicalVarname, the pre-comparison of the first letter of the
variable name is no longer necessary.  GCC 9 optimizes a fixed-length
memcmp so well that the code can finally be written to target human
readers, leaving the optimization to the compiler.

diffstat:

 usr.bin/make/hash.c |   60 +++++++++++++--
 usr.bin/make/hash.h |    6 +-
 usr.bin/make/var.c  |  192 +++++++++++++++++++++++----------------------------
 3 files changed, 142 insertions(+), 116 deletions(-)

diffs (truncated from 527 to 300 lines):

diff -r 7d142f548688 -r 484702fda676 usr.bin/make/hash.c
--- a/usr.bin/make/hash.c       Sun Apr 11 12:06:53 2021 +0000
+++ b/usr.bin/make/hash.c       Sun Apr 11 12:46:54 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $ */
+/*     $NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $ */
 
 /*
  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -74,7 +74,7 @@
 #include "make.h"
 
 /*     "@(#)hash.c     8.1 (Berkeley) 6/6/93"  */
-MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/04/03 14:39:02 rillig Exp $");
+MAKE_RCSID("$NetBSD: hash.c,v 1.64 2021/04/11 12:46:54 rillig Exp $");
 
 /*
  * The ratio of # entries to # buckets at which we rebuild the table to
@@ -84,7 +84,7 @@ MAKE_RCSID("$NetBSD: hash.c,v 1.63 2021/
 
 /* This hash function matches Gosling's Emacs and java.lang.String. */
 static unsigned int
-hash(const char *key, size_t *out_keylen)
+Hash_String(const char *key, size_t *out_keylen)
 {
        unsigned int h;
        const char *p;
@@ -98,10 +98,17 @@ hash(const char *key, size_t *out_keylen
        return h;
 }
 
+/* This hash function matches Gosling's Emacs and java.lang.String. */
 unsigned int
-Hash_Hash(const char *key)
+Hash_Substring(Substring key)
 {
-       return hash(key, NULL);
+       unsigned int h;
+       const char *p;
+
+       h = 0;
+       for (p = key.start; p != key.end; p++)
+               h = 31 * h + (unsigned char)*p;
+       return h;
 }
 
 static HashEntry *
@@ -126,6 +133,41 @@ HashTable_Find(HashTable *t, unsigned in
        return e;
 }
 
+static bool
+HashEntry_KeyEquals(const HashEntry *he, Substring key)
+{
+       const char *heKey, *p;
+
+       heKey = he->key;
+       for (p = key.start; p != key.end; p++, heKey++)
+               if (*p != *heKey || *heKey == '\0')
+                       return false;
+       return *heKey == '\0';
+}
+
+static HashEntry *
+HashTable_FindEntryBySubstring(HashTable *t, Substring key, unsigned int h)
+{
+       HashEntry *e;
+       unsigned int chainlen = 0;
+
+#ifdef DEBUG_HASH_LOOKUP
+       DEBUG4(HASH, "%s: %p h=%08x key=%.*s\n", __func__, t, h,
+           (int)Substring_Length(key), key.start);
+#endif
+
+       for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) {
+               chainlen++;
+               if (e->key_hash == h && HashEntry_KeyEquals(e, key))
+                       break;
+       }
+
+       if (chainlen > t->maxchain)
+               t->maxchain = chainlen;
+
+       return e;
+}
+
 /* Set up the hash table. */
 void
 HashTable_Init(HashTable *t)
@@ -171,7 +213,7 @@ HashTable_Done(HashTable *t)
 HashEntry *
 HashTable_FindEntry(HashTable *t, const char *key)
 {
-       unsigned int h = hash(key, NULL);
+       unsigned int h = Hash_String(key, NULL);
        return HashTable_Find(t, h, key);
 }
 
@@ -188,9 +230,9 @@ HashTable_FindValue(HashTable *t, const 
  * or return NULL.
  */
 void *
-HashTable_FindValueHash(HashTable *t, const char *key, unsigned int h)
+HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned int h)
 {
-       HashEntry *he = HashTable_Find(t, h, key);
+       HashEntry *he = HashTable_FindEntryBySubstring(t, key, h);
        return he != NULL ? he->value : NULL;
 }
 
@@ -239,7 +281,7 @@ HashEntry *
 HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew)
 {
        size_t keylen;
-       unsigned int h = hash(key, &keylen);
+       unsigned int h = Hash_String(key, &keylen);
        HashEntry *he = HashTable_Find(t, h, key);
 
        if (he != NULL) {
diff -r 7d142f548688 -r 484702fda676 usr.bin/make/hash.h
--- a/usr.bin/make/hash.h       Sun Apr 11 12:06:53 2021 +0000
+++ b/usr.bin/make/hash.h       Sun Apr 11 12:46:54 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: hash.h,v 1.39 2021/04/03 11:08:40 rillig Exp $ */
+/*     $NetBSD: hash.h,v 1.40 2021/04/11 12:46:54 rillig Exp $ */
 
 /*
  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -124,8 +124,8 @@ void HashTable_Init(HashTable *);
 void HashTable_Done(HashTable *);
 HashEntry *HashTable_FindEntry(HashTable *, const char *);
 void *HashTable_FindValue(HashTable *, const char *);
-unsigned int Hash_Hash(const char *);
-void *HashTable_FindValueHash(HashTable *, const char *, unsigned int);
+unsigned int Hash_Substring(Substring);
+void *HashTable_FindValueBySubstringHash(HashTable *, Substring, unsigned int);
 HashEntry *HashTable_CreateEntry(HashTable *, const char *, bool *);
 HashEntry *HashTable_Set(HashTable *, const char *, void *);
 void HashTable_DeleteEntry(HashTable *, HashEntry *);
diff -r 7d142f548688 -r 484702fda676 usr.bin/make/var.c
--- a/usr.bin/make/var.c        Sun Apr 11 12:06:53 2021 +0000
+++ b/usr.bin/make/var.c        Sun Apr 11 12:46:54 2021 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $ */
+/*     $NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $ */
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -140,7 +140,7 @@
 #include "metachar.h"
 
 /*     "@(#)var.c      8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: var.c,v 1.915 2021/04/10 22:40:34 rillig Exp $");
+MAKE_RCSID("$NetBSD: var.c,v 1.916 2021/04/11 12:46:54 rillig Exp $");
 
 /*
  * Variables are defined using one of the VAR=value assignments.  Their
@@ -345,45 +345,30 @@ VarNew(FStr name, const char *value, boo
        return var;
 }
 
-static const char *
-CanonicalVarname(const char *name)
+static Substring
+CanonicalVarname(Substring name)
 {
-       if (*name == '.' && ch_isupper(name[1])) {
-               switch (name[1]) {
-               case 'A':
-                       if (strcmp(name, ".ALLSRC") == 0)
-                               name = ALLSRC;
-                       if (strcmp(name, ".ARCHIVE") == 0)
-                               name = ARCHIVE;
-                       break;
-               case 'I':
-                       if (strcmp(name, ".IMPSRC") == 0)
-                               name = IMPSRC;
-                       break;
-               case 'M':
-                       if (strcmp(name, ".MEMBER") == 0)
-                               name = MEMBER;
-                       break;
-               case 'O':
-                       if (strcmp(name, ".OODATE") == 0)
-                               name = OODATE;
-                       break;
-               case 'P':
-                       if (strcmp(name, ".PREFIX") == 0)
-                               name = PREFIX;
-                       break;
-               case 'S':
-                       if (strcmp(name, ".SHELL") == 0) {
-                               if (shellPath == NULL)
-                                       Shell_Init();
-                       }
-                       break;
-               case 'T':
-                       if (strcmp(name, ".TARGET") == 0)
-                               name = TARGET;
-                       break;
-               }
-       }
+
+       if (!(Substring_Length(name) > 0 && name.start[0] == '.'))
+               return name;
+
+       if (Substring_Equals(name, ".ALLSRC"))
+               return Substring_InitStr(ALLSRC);
+       if (Substring_Equals(name, ".ARCHIVE"))
+               return Substring_InitStr(ARCHIVE);
+       if (Substring_Equals(name, ".IMPSRC"))
+               return Substring_InitStr(IMPSRC);
+       if (Substring_Equals(name, ".MEMBER"))
+               return Substring_InitStr(MEMBER);
+       if (Substring_Equals(name, ".OODATE"))
+               return Substring_InitStr(OODATE);
+       if (Substring_Equals(name, ".PREFIX"))
+               return Substring_InitStr(PREFIX);
+       if (Substring_Equals(name, ".TARGET"))
+               return Substring_InitStr(TARGET);
+
+       if (Substring_Equals(name, ".SHELL") && shellPath == NULL)
+               Shell_Init();
 
        /* GNU make has an additional alias $^ == ${.ALLSRC}. */
 
@@ -391,9 +376,9 @@ CanonicalVarname(const char *name)
 }
 
 static Var *
-GNode_FindVar(GNode *scope, const char *varname, unsigned int hash)
+GNode_FindVar(GNode *scope, Substring varname, unsigned int hash)
 {
-       return HashTable_FindValueHash(&scope->vars, varname, hash);
+       return HashTable_FindValueBySubstringHash(&scope->vars, varname, hash);
 }
 
 /*
@@ -410,14 +395,14 @@ GNode_FindVar(GNode *scope, const char *
  *     VarFreeEnv after use.
  */
 static Var *
-VarFind(const char *name, GNode *scope, bool elsewhere)
+VarFindSubstring(Substring name, GNode *scope, bool elsewhere)
 {
        Var *var;
        unsigned int nameHash;
 
        /* Replace '.TARGET' with '@', likewise for other local variables. */
        name = CanonicalVarname(name);
-       nameHash = Hash_Hash(name);
+       nameHash = Hash_Substring(name);
 
        var = GNode_FindVar(scope, name, nameHash);
        if (!elsewhere)
@@ -435,17 +420,19 @@ VarFind(const char *name, GNode *scope, 
        }
 
        if (var == NULL) {
-               char *env;
+               FStr envName;
+               const char *envValue;
 
                /*
                 * TODO: try setting an environment variable with the empty
                 *  name, which should be technically possible, just to see
                 *  how make reacts.  All .for loops should be broken then.
                 */
-               if ((env = getenv(name)) != NULL) {
-                       char *varname = bmake_strdup(name);
-                       return VarNew(FStr_InitOwn(varname), env, true, false);
-               }
+               envName = Substring_Str(name);
+               envValue = getenv(envName.str);
+               if (envValue != NULL)
+                       return VarNew(envName, envValue, true, false);
+               FStr_Done(&envName);
 
                if (opts.checkEnvFirst && scope != SCOPE_GLOBAL) {
                        var = GNode_FindVar(SCOPE_GLOBAL, name, nameHash);
@@ -461,6 +448,13 @@ VarFind(const char *name, GNode *scope, 
        return var;
 }
 
+/* TODO: Replace these calls with VarFindSubstring, as far as possible. */
+static Var *
+VarFind(const char *name, GNode *scope, bool elsewhere)
+{
+       return VarFindSubstring(Substring_InitStr(name), scope, elsewhere);
+}
+
 /* If the variable is an environment variable, free it, including its value. */
 static void
 VarFreeEnv(Var *v)
@@ -4006,12 +4000,17 @@ cleanup:
 }
 
 /*
- * Only four of the local variables are treated specially as they are the
- * only four that will be set when dynamic sources are expanded.



Home | Main Index | Thread Index | Old Index