Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/libexec/ld.elf_so Implement DT_GNU_HASH



details:   https://anonhg.NetBSD.org/src/rev/1b874dff0653
branches:  trunk
changeset: 1007753:1b874dff0653
user:      kamil <kamil%NetBSD.org@localhost>
date:      Sat Feb 29 04:24:33 2020 +0000

description:
Implement DT_GNU_HASH

DT_GNU_HASH serves the same purpose as DT_HASH, however it is a distinct
and faster apprach implemented and designed in the GNU toolchain in 2006.

DT_GNU_HASH is preferred whenever available.

Original GNU benchmarks claim 50% faster dynamic linking time.
https://www.sourceware.org/ml/binutils/2006-06/msg00418.html

Code based on FreeBSD and OpenBSD, both were based on DragonFlyBSD.

diffstat:

 libexec/ld.elf_so/headers.c |  107 ++++++++++++++++++++++++++++++++++++++-----
 libexec/ld.elf_so/reloc.c   |    9 +--
 libexec/ld.elf_so/rtld.h    |   22 ++++++++-
 libexec/ld.elf_so/symbol.c  |   89 +++++++++++++++++++++++++++++++++--
 4 files changed, 200 insertions(+), 27 deletions(-)

diffs (truncated from 352 to 300 lines):

diff -r 5151725bccf7 -r 1b874dff0653 libexec/ld.elf_so/headers.c
--- a/libexec/ld.elf_so/headers.c       Sat Feb 29 04:23:05 2020 +0000
+++ b/libexec/ld.elf_so/headers.c       Sat Feb 29 04:24:33 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $       */
+/*     $NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $        */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -40,7 +40,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: headers.c,v 1.65 2018/12/30 11:55:15 martin Exp $");
+__RCSID("$NetBSD: headers.c,v 1.66 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -166,26 +166,87 @@
 
                case DT_HASH:
                        {
+                               uint32_t nbuckets, nchains;
+                               const Elf_Symindx *hashtab = (const Elf_Symindx *)
+                                   (obj->relocbase + dynp->d_un.d_ptr);
+
+                               if (hashtab[0] > UINT32_MAX)
+                                       nbuckets = UINT32_MAX;
+                               else
+                                       nbuckets = hashtab[0];
+                               obj->nbuckets = nbuckets;
+                               obj->nchains = (nchains = hashtab[1]);
+                               obj->buckets = hashtab + 2;
+                               obj->chains = obj->buckets + obj->nbuckets;
+
+                               /* Validity check */
+                               if (!obj->buckets || !nbuckets || !nchains)
+                                       continue;
+
+                               obj->sysv_hash = true;
+
+                               /*
+                                * Should really be in _rtld_relocate_objects,
+                                * but _rtld_symlook_obj might be used before.
+                                */
+                               fast_divide32_prepare(obj->nbuckets,
+                                   &obj->nbuckets_m,
+                                   &obj->nbuckets_s1,
+                                   &obj->nbuckets_s2);
+                       }
+                       break;
+
+               case DT_GNU_HASH:
+                       {
+                               uint32_t nmaskwords;
+                               uint32_t nbuckets, symndx;
+                               int bloom_size32;
+                               bool nmw_power2;
                                const Elf_Symindx *hashtab = (const Elf_Symindx *)
                                    (obj->relocbase + dynp->d_un.d_ptr);
 
                                if (hashtab[0] > UINT32_MAX)
-                                       obj->nbuckets = UINT32_MAX;
+                                       nbuckets = UINT32_MAX;
                                else
-                                       obj->nbuckets = hashtab[0];
-                               obj->nchains = hashtab[1];
-                               obj->buckets = hashtab + 2;
-                               obj->chains = obj->buckets + obj->nbuckets;
+                                       nbuckets = hashtab[0];
+                               obj->nbuckets_gnu = nbuckets;
+
+                               nmaskwords = hashtab[2];
+                               bloom_size32 = nmaskwords * (ELFSIZE / 32);
+
+                               obj->buckets_gnu = hashtab + 4 + bloom_size32;
+
+                               nmw_power2 = powerof2(nmaskwords);
+
+                               /* Validity check */
+                               if (!nmw_power2 || !nbuckets || !obj->buckets_gnu)
+                                       continue;
+
+                               obj->gnu_hash = true;
+
+                               obj->mask_bm_gnu = nmaskwords - 1;
+                               obj->symndx_gnu = (symndx = hashtab[1]);
+                               obj->shift2_gnu = hashtab[3];
+                               obj->bloom_gnu = (const Elf_Addr *)(hashtab + 4);
+                               obj->chains_gnu = obj->buckets_gnu + nbuckets - symndx;
+
                                /*
                                 * Should really be in _rtld_relocate_objects,
                                 * but _rtld_symlook_obj might be used before.
                                 */
-                               if (obj->nbuckets) {
-                                       fast_divide32_prepare(obj->nbuckets,
-                                           &obj->nbuckets_m,
-                                           &obj->nbuckets_s1,
-                                           &obj->nbuckets_s2);
-                               }
+                               fast_divide32_prepare(nbuckets,
+                                   &obj->nbuckets_m_gnu,
+                                   &obj->nbuckets_s1_gnu,
+                                   &obj->nbuckets_s2_gnu);
+
+                               dbg(("found GNU Hash: buckets=%p "
+                                    "nbuckets=%lu chains=%p nchains=%u "
+                                    "bloom=%p mask_bm=%u shift2=%u "
+                                    "symndx=%u",
+                                   obj->buckets_gnu, obj->nbuckets_gnu,
+                                   obj->chains_gnu, obj->nchains_gnu,
+                                   obj->bloom_gnu, obj->mask_bm_gnu,
+                                   obj->shift2_gnu, obj->symndx_gnu));
                        }
                        break;
 
@@ -352,6 +413,26 @@
                        obj->relalim = obj->pltrela;
        }
 
+       /* If the ELF Hash is present, "nchains" is the same in both hashes. */
+       if (!obj->sysv_hash && obj->gnu_hash) {
+               uint_fast32_t i, nbucket, symndx;
+
+               /* Otherwise, count the entries from the GNU Hash chain. */
+               nbucket = obj->nbuckets_gnu;
+               symndx = obj->symndx_gnu;
+
+               for (i = 0; i < nbucket; i++) {
+                       Elf_Word bkt = obj->buckets_gnu[i];
+                       if (bkt == 0)
+                               continue;
+                       const uint32_t *hashval = &obj->chains_gnu[bkt];
+                       do {
+                               symndx++;
+                       } while ((*hashval++ & 1U) == 0);
+               }
+               obj->nchains_gnu = (uint32_t)symndx;
+       }
+
 #ifdef RTLD_LOADER
        if (init != 0)
                obj->init = (Elf_Addr) obj->relocbase + init;
diff -r 5151725bccf7 -r 1b874dff0653 libexec/ld.elf_so/reloc.c
--- a/libexec/ld.elf_so/reloc.c Sat Feb 29 04:23:05 2020 +0000
+++ b/libexec/ld.elf_so/reloc.c Sat Feb 29 04:24:33 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $         */
+/*     $NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $         */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -39,7 +39,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: reloc.c,v 1.115 2020/02/29 04:23:05 kamil Exp $");
+__RCSID("$NetBSD: reloc.c,v 1.116 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -174,9 +174,8 @@
        int ok = 1;
 
        for (obj = first; obj != NULL; obj = obj->next) {
-               if (obj->nbuckets == 0 || obj->nchains == 0 ||
-                   obj->buckets == NULL || obj->symtab == NULL ||
-                   obj->strtab == NULL) {
+               if ((!obj->sysv_hash && !obj->gnu_hash) ||
+                   obj->symtab == NULL || obj->strtab == NULL) {
                        _rtld_error("%s: Shared object has no run-time"
                            " symbol table", obj->path);
                        return -1;
diff -r 5151725bccf7 -r 1b874dff0653 libexec/ld.elf_so/rtld.h
--- a/libexec/ld.elf_so/rtld.h  Sat Feb 29 04:23:05 2020 +0000
+++ b/libexec/ld.elf_so/rtld.h  Sat Feb 29 04:24:33 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rtld.h,v 1.137 2020/02/29 04:23:05 kamil Exp $  */
+/*     $NetBSD: rtld.h,v 1.138 2020/02/29 04:24:33 kamil Exp $  */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -181,6 +181,7 @@
        Elf_Word        gotsym;         /* First dynamic symbol in GOT */
 #endif
 
+       /* SysV Hash fields */
        const Elf_Symindx *buckets;     /* Hash table buckets array */
        unsigned long   unused1;        /* Used to be nbuckets */
        const Elf_Symindx *chains;      /* Hash table chain array */
@@ -221,7 +222,9 @@
                        tls_done:1,     /* True if static TLS offset
                                         * has been allocated */
 #endif
-                       ref_nodel:1;    /* Refcount increased to prevent dlclose */
+                       ref_nodel:1,    /* Refcount increased to prevent dlclose */
+                       sysv_hash:1,    /* SysV Hash available */
+                       gnu_hash:1;     /* GNU Hash available */
 
        struct link_map linkmap;        /* for GDB */
 
@@ -234,10 +237,25 @@
 
        void            *ehdr;
 
+       /* SysV Hash fields */
        uint32_t        nbuckets;       /* Number of buckets */
        uint32_t        nbuckets_m;     /* Precomputed for fast remainder */
        uint8_t         nbuckets_s1;
        uint8_t         nbuckets_s2;
+
+       /* GNU Hash fields */
+       const uint32_t *buckets_gnu;    /* Hash table buckets array */
+       uint32_t        nbuckets_gnu;   /* Number of GNU hash buckets */
+       uint32_t        nbuckets_m_gnu; /* Precomputed for fast remainder */
+       uint8_t         nbuckets_s1_gnu;
+       uint8_t         nbuckets_s2_gnu;
+       const uint32_t *chains_gnu;     /* Hash table chain array */
+#define nchains_gnu    nchains         /* Number of symbols, shared with SysV Hash */
+       const Elf_Addr *bloom_gnu;
+       uint32_t        symndx_gnu;     /* First accessible symbol on dynsym table */
+       uint32_t        mask_bm_gnu;    /* Bloom filter words - 1 (bitmask) */
+       uint32_t        shift2_gnu;     /* Bloom filter shift count */
+
        size_t          pathlen;        /* Pathname length */
        SIMPLEQ_HEAD(, Struct_Name_Entry) names; /* List of names for this
                                                  * object we know about. */
diff -r 5151725bccf7 -r 1b874dff0653 libexec/ld.elf_so/symbol.c
--- a/libexec/ld.elf_so/symbol.c        Sat Feb 29 04:23:05 2020 +0000
+++ b/libexec/ld.elf_so/symbol.c        Sat Feb 29 04:24:33 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $         */
+/*     $NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $         */
 
 /*
  * Copyright 1996 John D. Polstra.
@@ -40,7 +40,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: symbol.c,v 1.71 2020/02/29 04:23:05 kamil Exp $");
+__RCSID("$NetBSD: symbol.c,v 1.72 2020/02/29 04:24:33 kamil Exp $");
 #endif /* not lint */
 
 #include <err.h>
@@ -327,18 +327,17 @@
  * the given name.  Returns a pointer to the symbol, or NULL if no
  * definition was found.
  *
- * The symbol's hash value is passed in for efficiency reasons; that
- * eliminates many recomputations of the hash value.
+ * SysV Hash version.
  */
-const Elf_Sym *
-_rtld_symlook_obj(const char *name, Elf_Hash *hash,
+static const Elf_Sym *
+_rtld_symlook_obj_sysv(const char *name, unsigned long hash,
     const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
 {
        unsigned long symnum;
        const Elf_Sym *vsymp = NULL;
        int vcount = 0;
 
-       for (symnum = obj->buckets[fast_remainder32(hash->sysv, obj->nbuckets,
+       for (symnum = obj->buckets[fast_remainder32(hash, obj->nbuckets,
             obj->nbuckets_m, obj->nbuckets_s1, obj->nbuckets_s2)];
             symnum != ELF_SYM_UNDEFINED;
             symnum = obj->chains[symnum]) {
@@ -355,6 +354,82 @@
 }
 
 /*
+ * Search the symbol table of a single shared object for a symbol of
+ * the given name.  Returns a pointer to the symbol, or NULL if no
+ * definition was found.
+ *
+ * GNU Hash version.
+ */
+static const Elf_Sym *
+_rtld_symlook_obj_gnu(const char *name, unsigned long hash,
+    const Obj_Entry *obj, u_int flags, const Ver_Entry *ventry)
+{
+       unsigned long symnum;
+       const Elf_Sym *vsymp = NULL;
+       const Elf32_Word *hashval;
+       Elf_Addr bloom_word;
+       Elf32_Word bucket;
+       int vcount = 0;
+       unsigned int h1, h2;
+
+       /* Pick right bitmask word from Bloom filter array */
+       bloom_word = obj->bloom_gnu[(hash / ELFSIZE) & obj->mask_bm_gnu];
+
+       /* Calculate modulus word size of gnu hash and its derivative */
+       h1 = hash & (ELFSIZE - 1);
+       h2 = ((hash >> obj->shift2_gnu) & (ELFSIZE - 1));
+
+       /* Filter out the "definitely not in set" queries */
+       if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)



Home | Main Index | Thread Index | Old Index