tech-userlevel archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[PATCH] ld.elf_so - Cache objects checked



Hi List

I've been researching why Evolution from GNOME takes over 5 minutes to load on my quad core amd64 beast. It boils down to dlsym looking for a symbol that does not exist directly and as such examining every needed library. However, the current implementation does not remember what libraries it as already checked. Normally this isn't a problem, but with the way Evolution is built the search chain is massive.

As such, I've ported FreeBSD's DoneList cache from their runtime linker to ours. The main implementation difference is we xmalloc + xfree vs their alloca + no free needed. They've been using this for a long time, so the idea + code is pretty solid.

However, I've added a second cache for weak symbols to _rtld_symlook_needed which is where the real performance gain is made for my use case. Whilst this doesn't break my system and I cannot see a flaw in it's design maybe someone can comment?

With this patch, Evolution (without the patches to and a glib I added to pkgsrc a few days ago) loads in under 2 seconds (5 seconds with initial disk thrashing).

Thanks

Roy
Index: load.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/load.c,v
retrieving revision 1.36
diff -u -p -r1.36 load.c
--- load.c      19 May 2009 20:44:52 -0000      1.36
+++ load.c      24 Feb 2010 12:13:05 -0000
@@ -155,6 +155,7 @@ _rtld_load_object(const char *filepath, 
 
                *_rtld_objtail = obj;
                _rtld_objtail = &obj->next;
+               _rtld_objcount++;
 #ifdef RTLD_LOADER
                _rtld_linkmap_add(obj); /* for GDB */
 #endif
Index: rtld.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/rtld.c,v
retrieving revision 1.128
diff -u -p -r1.128 rtld.c
--- rtld.c      10 Jan 2010 06:37:32 -0000      1.128
+++ rtld.c      24 Feb 2010 12:13:05 -0000
@@ -83,6 +83,7 @@ struct r_debug  _rtld_debug;  /* for GDB;
 bool            _rtld_trust;   /* False for setuid and setgid programs */
 Obj_Entry      *_rtld_objlist; /* Head of linked list of shared objects */
 Obj_Entry     **_rtld_objtail; /* Link field of last object in list */
+int            _rtld_objcount; /* Number of shared objects */
 Obj_Entry      *_rtld_objmain; /* The main program shared object */
 Obj_Entry       _rtld_objself; /* The dynamic linker shared object */
 const char     _rtld_path[] = _PATH_RTLD;
@@ -120,7 +121,7 @@ static void _rtld_initlist_visit(Objlist
 static void _rtld_initlist_tsort(Objlist *, int);
 static Obj_Entry *_rtld_dlcheck(void *);
 static void _rtld_init_dag(Obj_Entry *);
-static void _rtld_init_dag1(Obj_Entry *, Obj_Entry *);
+static void _rtld_init_dag1(Obj_Entry *, Obj_Entry *, DoneList *);
 static void _rtld_objlist_remove(Objlist *, Obj_Entry *);
 static void _rtld_objlist_clear(Objlist *);
 static void _rtld_unload_object(Obj_Entry *, bool);
@@ -271,6 +272,7 @@ _rtld_init(caddr_t mapbase, caddr_t relo
        /* Make the object list empty again. */
        _rtld_objlist = NULL;
        _rtld_objtail = &_rtld_objlist;
+       _rtld_objcount = 0;
 
        _rtld_debug.r_brk = _rtld_debug_state;
        _rtld_debug.r_state = RT_CONSISTENT;
@@ -643,15 +645,21 @@ _rtld_initlist_tsort(Objlist* list, int 
 static void
 _rtld_init_dag(Obj_Entry *root)
 {
+       DoneList donelist;
 
-       _rtld_init_dag1(root, root);
+       donelist_init(&donelist);
+       _rtld_init_dag1(root, root, &donelist);
+       donelist_clear(&donelist);
 }
 
 static void
-_rtld_init_dag1(Obj_Entry *root, Obj_Entry *obj)
+_rtld_init_dag1(Obj_Entry *root, Obj_Entry *obj, DoneList *dlp)
 {
        const Needed_Entry *needed;
 
+       if (donelist_check(dlp, obj))
+               return;
+
        if (!obj->mainref) {
                if (_rtld_objlist_find(&obj->dldags, root))
                        return;
@@ -662,7 +670,7 @@ _rtld_init_dag1(Obj_Entry *root, Obj_Ent
        }
        for (needed = obj->needed; needed != NULL; needed = needed->next)
                if (needed->obj != NULL)
-                       _rtld_init_dag1(root, needed->obj);
+                       _rtld_init_dag1(root, needed->obj, dlp);
 }
 
 /*
@@ -705,6 +713,7 @@ _rtld_unload_object(Obj_Entry *root, boo
                                _rtld_objlist_remove(&_rtld_list_global, obj);
                                _rtld_linkmap_delete(obj);
                                *linkp = obj->next;
+                               _rtld_objcount--;
                                _rtld_obj_free(obj);
                        } else
                                linkp = &obj->next;
@@ -810,11 +819,16 @@ _rtld_objmain_sym(const char *name)
        unsigned long hash;
        const Elf_Sym *def;
        const Obj_Entry *obj;
+       DoneList donelist;
 
        hash = _rtld_elf_hash(name);
        obj = _rtld_objmain;
+       donelist_init(&donelist);
 
-       def = _rtld_symlook_list(name, hash, &_rtld_list_main, &obj, false);
+       def = _rtld_symlook_list(name, hash, &_rtld_list_main, &obj, false,
+           &donelist);
+
+       donelist_clear(&donelist);
 
        if (def != NULL)
                return obj->relocbase + def->st_value;
@@ -838,6 +852,7 @@ dlsym(void *handle, const char *name)
        const Elf_Sym *def;
        const Obj_Entry *defobj;
        void *retaddr;
+       DoneList donelist; 
        
        hash = _rtld_elf_hash(name);
        def = NULL;
@@ -892,20 +907,28 @@ dlsym(void *handle, const char *name)
                if ((obj = _rtld_dlcheck(handle)) == NULL)
                        return NULL;
                
+               donelist_init(&donelist);
+
                if (obj->mainprog) {
                        /* Search main program and all libraries loaded by it */
                        def = _rtld_symlook_list(name, hash, &_rtld_list_main,
-                           &defobj, false);
+                           &defobj, false, &donelist);
                } else {
                        Needed_Entry fake;
+                       DoneList wdonelist;;
 
                        /* Search the object and all the libraries loaded by 
it. */
                        fake.next = NULL;
                        fake.obj = __UNCONST(obj);
                        fake.name = 0;
+
+                       donelist_init(&wdonelist);
                        def = _rtld_symlook_needed(name, hash, &fake, &defobj,
-                           false);
+                           false, &donelist, &wdonelist);
+                       donelist_clear(&wdonelist);
                }
+
+               donelist_clear(&donelist);
                break;
        }
        
Index: rtld.h
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/rtld.h,v
retrieving revision 1.88
diff -u -p -r1.88 rtld.h
--- rtld.h      17 Jan 2010 08:04:20 -0000      1.88
+++ rtld.h      24 Feb 2010 12:13:06 -0000
@@ -59,6 +59,17 @@ extern size_t _rtld_pagesz;
 #define NEW(type)      ((type *) xmalloc(sizeof(type)))
 #define CNEW(type)     ((type *) xcalloc(sizeof(type)))
 
+/*
+ * Fill in a DoneList with an allocation large enough to hold all of
+ * the currently-loaded objects.
+ */
+#define donelist_init(dlp)                                             \
+    ((dlp)->objs = xmalloc(_rtld_objcount * sizeof((dlp)->objs[0])),   \
+    (dlp)->num_alloc = _rtld_objcount,                                 \
+    (dlp)->num_used = 0)
+#define donelist_clear(dlp)            xfree((dlp)->objs)
+#define donelist_check(dlp, obj)       _rtld_donelist_check(dlp, obj)
+
 #endif /* _RTLD_SOURCE */
 
 /*
@@ -198,12 +209,20 @@ typedef struct Struct_Obj_Entry {
        void            *ehdr;
 } Obj_Entry;
 
+typedef struct Struct_DoneList {
+       const Obj_Entry **objs;         /* Array of object pointers */
+       unsigned int num_alloc;         /* Allocated size of the array */
+       unsigned int num_used;          /* Number of array slots used */
+} DoneList;
+
+
 #if defined(_RTLD_SOURCE)
 
 extern struct r_debug _rtld_debug;
 extern Search_Path *_rtld_default_paths;
 extern Obj_Entry *_rtld_objlist;
 extern Obj_Entry **_rtld_objtail;
+extern int _rtld_objcount;
 extern Obj_Entry *_rtld_objmain;
 extern Obj_Entry _rtld_objself;
 extern Search_Path *_rtld_paths;
@@ -235,6 +254,7 @@ void _rtld_linkmap_delete(Obj_Entry *);
 void _rtld_objlist_push_head(Objlist *, Obj_Entry *);
 void _rtld_objlist_push_tail(Objlist *, Obj_Entry *);
 Objlist_Entry *_rtld_objlist_find(Objlist *, const Obj_Entry *);
+bool _rtld_donelist_check(DoneList *, const Obj_Entry *);
 
 /* expand.c */
 size_t _rtld_expand_path(char *, size_t, const char *, const char *,\
@@ -276,11 +296,12 @@ const Elf_Sym *_rtld_find_plt_symdef(uns
     const Obj_Entry **, bool);
 
 const Elf_Sym *_rtld_symlook_list(const char *, unsigned long,
-    const Objlist *, const Obj_Entry **, bool);
+    const Objlist *, const Obj_Entry **, bool, DoneList *);
 const Elf_Sym *_rtld_symlook_default(const char *, unsigned long,
     const Obj_Entry *, const Obj_Entry **, bool);
 const Elf_Sym *_rtld_symlook_needed(const char *, unsigned long,
-    const Needed_Entry *, const Obj_Entry **, bool);
+    const Needed_Entry *, const Obj_Entry **, bool,
+    DoneList *, DoneList *);
 #ifdef COMBRELOC
 void _rtld_combreloc_reset(const Obj_Entry *);
 #endif
Index: symbol.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/symbol.c,v
retrieving revision 1.50
diff -u -p -r1.50 symbol.c
--- symbol.c    13 Jan 2010 20:17:21 -0000      1.50
+++ symbol.c    24 Feb 2010 12:13:06 -0000
@@ -60,6 +60,29 @@ __RCSID("$NetBSD: symbol.c,v 1.50 2010/0
 
 typedef void (*fptr_t)(void);
 
+/*
+ * If the given object is already in the donelist, return true.  Otherwise
+ * add the object to the list and return false.
+ */
+bool
+_rtld_donelist_check(DoneList *dlp, const Obj_Entry *obj)
+{
+       unsigned int i;
+
+       for (i = 0;  i < dlp->num_used;  i++)
+               if (dlp->objs[i] == obj)
+                       return true;
+       /*
+        * Our donelist allocation should always be sufficient.  But if
+        * our threads locking isn't working properly, more shared objects
+        * could have been loaded since we allocated the list.  That should
+        * never happen, but we'll handle it properly just in case it does.
+        */
+       if (dlp->num_used < dlp->num_alloc)
+               dlp->objs[dlp->num_used++] = obj;
+       return false;
+}
+
 static bool
 _rtld_is_exported(const Elf_Sym *def)
 {
@@ -108,7 +131,7 @@ _rtld_elf_hash(const char *name)
 
 const Elf_Sym *
 _rtld_symlook_list(const char *name, unsigned long hash, const Objlist 
*objlist,
-    const Obj_Entry **defobj_out, bool in_plt)
+    const Obj_Entry **defobj_out, bool in_plt, DoneList *dlp)
 {
        const Elf_Sym *symp;
        const Elf_Sym *def;
@@ -118,6 +141,8 @@ _rtld_symlook_list(const char *name, uns
        def = NULL;
        defobj = NULL;
        SIMPLEQ_FOREACH(elm, objlist, link) {
+               if (donelist_check(dlp, elm->obj))
+                       continue;
                rdbg(("search object %p (%s) for %s", elm->obj, elm->obj->path,
                    name));
                if ((symp = _rtld_symlook_obj(name, hash, elm->obj, in_plt))
@@ -143,7 +168,8 @@ _rtld_symlook_list(const char *name, uns
  */
 const Elf_Sym *
 _rtld_symlook_needed(const char *name, unsigned long hash,
-    const Needed_Entry *needed, const Obj_Entry **defobj_out, bool inplt)
+    const Needed_Entry *needed, const Obj_Entry **defobj_out, bool inplt,
+    DoneList *ndone, DoneList *wdone)
 {
        const Elf_Sym *def, *def_w;
        const Needed_Entry *n;
@@ -152,8 +178,11 @@ _rtld_symlook_needed(const char *name, u
        def = def_w = NULL;
        defobj = NULL;
        for (n = needed; n != NULL; n = n->next) {
-               if ((obj = n->obj) == NULL ||
-                    (def = _rtld_symlook_obj(name, hash, obj, inplt)) == NULL)
+               if ((obj = n->obj) == NULL)
+                       continue;
+               if (donelist_check(ndone, obj))
+                       continue;
+               if ((def = _rtld_symlook_obj(name, hash, obj, inplt)) == NULL)
                        continue;
                defobj = obj;
                if (ELF_ST_BIND(def->st_info) != STB_WEAK) {
@@ -169,8 +198,10 @@ _rtld_symlook_needed(const char *name, u
        for (n = needed; n != NULL; n = n->next) {
                if ((obj = n->obj) == NULL)
                        continue;
+               if (donelist_check(wdone, obj))
+                       continue;
                def_w = _rtld_symlook_needed(name, hash, obj->needed, &defobj1,
-                   inplt);
+                   inplt, ndone, wdone);
                if (def_w == NULL)
                        continue;
                if (def == NULL || ELF_ST_BIND(def_w->st_info) != STB_WEAK) {
@@ -381,9 +412,12 @@ _rtld_symlook_default(const char *name, 
        const Objlist_Entry *elm;
        def = NULL;
        defobj = NULL;
+       DoneList donelist;
+
+       donelist_init(&donelist);
 
        /* Look first in the referencing object if linked symbolically. */
-       if (refobj->symbolic) {
+       if (refobj->symbolic && !donelist_check(&donelist, refobj)) {
                rdbg(("search referencing object for %s", name));
                symp = _rtld_symlook_obj(name, hash, refobj, in_plt);
                if (symp != NULL) {
@@ -396,7 +430,7 @@ _rtld_symlook_default(const char *name, 
        if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
                rdbg(("search _rtld_list_main for %s", name));
                symp = _rtld_symlook_list(name, hash, &_rtld_list_main, &obj,
-                   in_plt);
+                   in_plt, &donelist);
                if (symp != NULL &&
                    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
                        def = symp;
@@ -408,7 +442,7 @@ _rtld_symlook_default(const char *name, 
        if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
                rdbg(("search _rtld_list_global for %s", name));
                symp = _rtld_symlook_list(name, hash, &_rtld_list_global,
-                   &obj, in_plt);
+                   &obj, in_plt, &donelist);
                if (symp != NULL &&
                    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
                        def = symp;
@@ -423,7 +457,7 @@ _rtld_symlook_default(const char *name, 
                rdbg(("search DAG with root %p (%s) for %s", elm->obj,
                    elm->obj->path, name));
                symp = _rtld_symlook_list(name, hash, &elm->obj->dagmembers,
-                   &obj, in_plt);
+                   &obj, in_plt, &donelist);
                if (symp != NULL &&
                    (def == NULL || ELF_ST_BIND(symp->st_info) != STB_WEAK)) {
                        def = symp;
@@ -447,6 +481,8 @@ _rtld_symlook_default(const char *name, 
                }
        }
 
+       donelist_clear(&donelist);
+
        if (def != NULL)
                *defobj_out = defobj;
        return def;


Home | Main Index | Thread Index | Old Index