NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-mips/59064 (jemalloc switch to 5.3 broke userland)



The following reply was made to PR port-mips/59064; it has been noted by GNATS.

From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: Rin Okuyama <rokuyama.rk%gmail.com@localhost>
Cc: Martin Husemann <martin%duskware.de@localhost>, gnats-bugs%netbsd.org@localhost,
	port-mips-maintainer%netbsd.org@localhost, gnats-admin%netbsd.org@localhost,
	netbsd-bugs%netbsd.org@localhost, martin%NetBSD.org@localhost, simonb%NetBSD.org@localhost,
	joerg%NetBSD.org@localhost
Subject: Re: port-mips/59064 (jemalloc switch to 5.3 broke userland)
Date: Mon, 14 Apr 2025 03:39:37 +0000

 This is a multi-part message in MIME format.
 --=_17Wymd7lPaKOmlAiVMiz9N/EY5eA0fm1
 Content-Transfer-Encoding: quoted-printable
 
 [+cc simon for mips, joerg for tls]
 
 I tugged on this thread some more, via detour through t_rtld_r_debug,
 and fixed core dumps using the attached patch series, so now I can
 read the core dump from a program that crashes in jemalloc!
 
 Core was generated by `find'.
 Program terminated with signal SIGSEGV, Segmentation fault.
 #0  0x7850be50 in malloc_default ()
    from /export/erlite3/var/tmp/20250413/lib/libc.so.12.222
 (gdb) bt
 #0  0x7850be50 in malloc_default ()
    from /export/erlite3/var/tmp/20250413/lib/libc.so.12.222
 #1  0x00018698 in main ()
 (gdb) x/i $pc
 =3D> 0x7850be50 <malloc_default+72>:      lbu     v1,600(a2)
 (gdb) print $a2
 $1 =3D -256094680
 (gdb) set output-radix 0x10
 Output radix now set to decimal 16, hex 10, octal 20.
 (gdb) print $a2
 $2 =3D 0xfffffffff0bc4e28
 (gdb) disas malloc_default
 Dump of assembler code for function malloc_default:
    0x7850be08 <+0>:     addiu   sp,sp,-144
    0x7850be0c <+4>:     sd      gp,120(sp)
    0x7850be10 <+8>:     lui     gp,0x16
    0x7850be14 <+12>:    addu    gp,gp,t9
    0x7850be18 <+16>:    addiu   gp,gp,-31640
    0x7850be1c <+20>:    lw      v0,-23368(gp)
    0x7850be20 <+24>:    .word   0x7c03e83b
    0x7850be24 <+28>:    sd      s1,64(sp)
    0x7850be28 <+32>:    sd      s0,56(sp)
    0x7850be2c <+36>:    addu    a2,v0,v1
    0x7850be30 <+40>:    sd      ra,136(sp)
    0x7850be34 <+44>:    sd      s8,128(sp)
    0x7850be38 <+48>:    sd      s7,112(sp)
    0x7850be3c <+52>:    sd      s6,104(sp)
    0x7850be40 <+56>:    sd      s5,96(sp)
    0x7850be44 <+60>:    sd      s4,88(sp)
    0x7850be48 <+64>:    sd      s3,80(sp)
    0x7850be4c <+68>:    sd      s2,72(sp)
 =3D> 0x7850be50 <+72>:    lbu     v1,600(a2)
    0x7850be54 <+76>:    move    s1,a0
    0x7850be58 <+80>:    bnez    v1,0x7850c020 <malloc_default+536>
    0x7850be5c <+84>:    move    s0,a2
 --Type <RET> for more, q to quit, c to continue without paging--q
 Quit
 (gdb) print $v0
 $3 =3D 0x7850be20
 (gdb) print $v1
 $4 =3D 0x786b9008
 (gdb) print $v0 + $v1
 $5 =3D 0xf0bc4e28
 (gdb) print $a2
 $6 =3D 0xfffffffff0bc4e28
 
 Now this is interesting: v0 and v1 are very close, as if maybe they
 already both include a base address when one of them is supposed to be
 an offset relative to it.  (I'm also puzzled by why a2 got
 sign-extended -- I thought `addu' would _not_ sign-extend -- but I
 don't speak mips natively so I dunno.)
 
 Here are the relevant instructions, excerpted from others which don't
 touch other registers:
 
    0x7850be10 <+8>:     lui     gp,0x16
    0x7850be14 <+12>:    addu    gp,gp,t9
    0x7850be18 <+16>:    addiu   gp,gp,-31640
    0x7850be1c <+20>:    lw      v0,-23368(gp)
    0x7850be20 <+24>:    .word   0x7c03e83b	/* rdhwr v1,$29 */
 ...
    0x7850be2c <+36>:    addu    a2,v0,v1
 ...
 =3D> 0x7850be50 <+72>:    lbu     v1,600(a2)
 
 The original jemalloc.pico, with relocations shown by objdump -dlr,
 was this:
 
     8a60:       3c1c0000        lui     gp,0x0
                         8a60: R_MIPS_GPREL16    malloc_default
                         8a60: R_MIPS_SUB        *ABS*
                         8a60: R_MIPS_HI16       *ABS*
     8a64:       0399e021        addu    gp,gp,t9
     8a68:       279c0000        addiu   gp,gp,0
                         8a68: R_MIPS_GPREL16    malloc_default
                         8a68: R_MIPS_SUB        *ABS*
                         8a68: R_MIPS_LO16       *ABS*
 tsd_fetch_impl():
 /home/riastradh/netbsd/current/src/external/bsd/jemalloc/lib/../include/jem=
 alloc/internal/tsd.h:270
     8a6c:       8f820000        lw      v0,0(gp)
                         8a6c: R_MIPS_TLS_GOTTPREL       je_tsd_tls
     8a70:       7c03e83b        0x7c03e83b	/* rdhwr v1,$29 */
 malloc_default():
 /home/riastradh/netbsd/current/src/external/bsd/jemalloc/lib/../dist/src/je=
 malloc.c:2727
 ...
 tsd_fetch_impl():
 /home/riastradh/netbsd/current/src/external/bsd/jemalloc/lib/../include/jem=
 alloc/internal/tsd.h:270
     8a7c:       00433021        addu    a2,v0,v1
 malloc_default():
 /home/riastradh/netbsd/current/src/external/bsd/jemalloc/lib/../dist/src/je=
 malloc.c:2727
 ...
 tsd_fetch_impl():
 /home/riastradh/netbsd/current/src/external/bsd/jemalloc/lib/../include/jem=
 alloc/internal/tsd.h:422
     8aa0:       90c30258        lbu     v1,600(a2)
 
 Next up, after I have slept or someone else tugs on the thread
 further, will be to figure out whether these values for v1 and v2 are
 sensible or indicate something went wrong earlier on -- and perhaps
 figure out where it went wrong.
 
 --=_17Wymd7lPaKOmlAiVMiz9N/EY5eA0fm1
 Content-Type: text/plain; charset="ISO-8859-1"; name="pr59296-trtldrdebugmips"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="pr59296-trtldrdebugmips.patch"
 
 # HG changeset patch
 # User Taylor R Campbell <riastradh%NetBSD.org@localhost>
 # Date 1744596185 0
 #      Mon Apr 14 02:03:05 2025 +0000
 # Branch trunk
 # Node ID 6595e473ec58e7d23a5a1c351c2fec8136cd9fd5
 # Parent  ac7c24d0e55da140e83ba403e888ecdd1b7c0323
 # EXP-Topic riastradh-pr59296-trtldrdebugmips
 t_rtld_r_debug: Mark this xfail on mips.
 
 PR port-mips/59296: t_rtld_r_debug test is failing
 
 diff -r ac7c24d0e55d -r 6595e473ec58 tests/libexec/ld.elf_so/t_rtld_r_debug=
 .c
 --- a/tests/libexec/ld.elf_so/t_rtld_r_debug.c	Sun Apr 13 17:23:06 2025 +00=
 00
 +++ b/tests/libexec/ld.elf_so/t_rtld_r_debug.c	Mon Apr 14 02:03:05 2025 +00=
 00
 @@ -116,6 +116,10 @@ check_r_debug_return_link_map(const char
 =20
  	loader =3D NULL;
  	debug =3D get_rtld_r_debug();
 +#ifdef __mips__
 +	atf_tc_expect_fail("PR port-mips/59296:"
 +	    " t_rtld_r_debug test is failing");
 +#endif
  	ATF_REQUIRE(debug !=3D NULL);
  	ATF_CHECK_EQ_MSG(debug->r_version, R_DEBUG_VERSION,
  	    "debug->r_version=3D%d R_DEBUG_VERSION=3D%d",
 # HG changeset patch
 # User Taylor R Campbell <riastradh%NetBSD.org@localhost>
 # Date 1744596814 0
 #      Mon Apr 14 02:13:34 2025 +0000
 # Branch trunk
 # Node ID e39733b6bc5cbaa630adffe32ca218f870045cd6
 # Parent  6595e473ec58e7d23a5a1c351c2fec8136cd9fd5
 # EXP-Topic riastradh-pr59296-trtldrdebugmips
 ld.elf_so: Teach this to handle MIPS PIE rtld debug data.
 
 Adapt t_rtld_r_debug to handle the two MIPS cases too.
 
 XXX t_rtld_r_debug should be tested both as PIE and non-PIE to
 exercise both cases.
 
 Context:
 
 The value of a DT_DEBUG .dynamic entry is initialized at load-time,
 by ld.elf_so, to a pointer to a data structure set up by ld.elf_so
 describing the shared objects loaded by the executable, so debuggers
 can find them from, e.g., core dumps.  None of this is really
 documented anywhere that I can find.  Best reference is this post on
 the gdb mailing list from a quarter century ago saying there's no
 real documentation:
 
 https://web.archive.org/web/20250414021320/https://sourceware.org/pipermail=
 /gdb/2000-April/004509.html
 
 However, on MIPS, the .dynamic section is mapped read-only, so
 ld.elf_so can't properly modify it (I imagine technically it could
 with some mprotect shenanigans but that's not how it's done on MIPS).
 Instead, the linker reserves a location in read/write memory and uses
 a DT_MIPS_RLD_MAP entry with a pointer to that location.
 
 However, in position-independent executables, the .dynamic entry
 can't have an absolute pointer to that location because it's not
 known up front.  Instead, the the linker uses a DT_MIPS_RLD_MAP_REL
 entry with the relative offset to that location from the Elf_Dyn
 entry itself.
 
 I would add a reference for this but it's basically a matter of UTSL
 plus some oblique mentions on the web and mailing list discussions:
 
 https://web.archive.org/web/20250414024823/https://cygwin.com/legacy-ml/bin=
 utils/2016-04/msg00244.html
 https://web.archive.org/web/20250403151803/https://maskray.me/blog/2023-09-=
 04-toolchain-notes-on-mips
 https://web.archive.org/web/20211024050833/https://reviews.llvm.org/D12794?=
 id=3D34533
 https://web.archive.org/web/20250407052145/https://wiki.debian.org/MIPSPort
 https://web.archive.org/web/20250414024924/https://reviews.freebsd.org/D178=
 67?id=3D50122
 
 PR port-mips/59296: t_rtld_r_debug test is failing
 
 diff -r 6595e473ec58 -r e39733b6bc5c libexec/ld.elf_so/headers.c
 --- a/libexec/ld.elf_so/headers.c	Mon Apr 14 02:03:05 2025 +0000
 +++ b/libexec/ld.elf_so/headers.c	Mon Apr 14 02:13:34 2025 +0000
 @@ -333,8 +333,10 @@ void
  #endif
 =20
  		/*
 -		 * Don't process DT_DEBUG on MIPS as the dynamic section
 -		 * is mapped read-only. DT_MIPS_RLD_MAP is used instead.
 +		 * Don't process DT_DEBUG on MIPS as the dynamic
 +		 * section is mapped read-only.  DT_MIPS_RLD_MAP or
 +		 * DT_MIPS_RLD_MAP_REL is used instead.
 +		 *
  		 * XXX: n32/n64 may use DT_DEBUG, not sure yet.
  		 */
  #ifndef __mips__
 @@ -358,10 +360,38 @@ void
  			obj->gotsym =3D dynp->d_un.d_val;
  			break;
 =20
 +		/*
 +		 * The .dynamic section is read-only, so the loader
 +		 * can't write to it; instead, the linker reserves
 +		 * space in a read/write .rld_map section for the
 +		 * loader write to, and leaves a pointer to that space
 +		 * in a DT_MIPS_RLD_MAP entry.
 +		 *
 +		 * Except pointers like that don't work for
 +		 * position-independent executables, which use
 +		 * DT_MIPS_RLD_MAP_REL instead.
 +		 */
  		case DT_MIPS_RLD_MAP:
  #ifdef RTLD_LOADER
 -			*((Elf_Addr *)(dynp->d_un.d_ptr)) =3D (Elf_Addr)
 -			    &_rtld_debug;
 +			*((Elf_Addr *)dynp->d_un.d_ptr) =3D
 +			    (Elf_Addr)&_rtld_debug;
 +#endif
 +			break;
 +
 +		/*
 +		 * The .dynamic section is read-only, so the loader
 +		 * can't write to it; instead, the linker reserves
 +		 * space in a read/write .rld_map section for the
 +		 * loader write to, which might be mapped anywhere in
 +		 * virtual address space for position-independent
 +		 * executables, so the linker leaves its offset
 +		 * relative to the .dynamic entry itself in the dynamic
 +		 * entry.
 +		 */
 +		case DT_MIPS_RLD_MAP_REL:
 +#ifdef RTLD_LOADER
 +			*(Elf_Addr *)((Elf_Addr)dynp + dynp->d_un.d_val) =3D
 +			    (Elf_Addr)&_rtld_debug;
  #endif
  			break;
  #endif
 diff -r 6595e473ec58 -r e39733b6bc5c sys/arch/mips/include/elf_machdep.h
 --- a/sys/arch/mips/include/elf_machdep.h	Mon Apr 14 02:03:05 2025 +0000
 +++ b/sys/arch/mips/include/elf_machdep.h	Mon Apr 14 02:13:34 2025 +0000
 @@ -152,6 +152,7 @@
  #define	DT_MIPS_RLD_MAP		0x70000016	/* address of loader map */
  #define	DT_MIPS_PLTGOT		0x70000032
  #define	DT_MIPS_RWPLT		0x70000034
 +#define	DT_MIPS_RLD_MAP_REL	0x70000035
 =20
  /*
   * ELF Flags
 diff -r 6595e473ec58 -r e39733b6bc5c tests/libexec/ld.elf_so/t_rtld_r_debug=
 .c
 --- a/tests/libexec/ld.elf_so/t_rtld_r_debug.c	Mon Apr 14 02:03:05 2025 +00=
 00
 +++ b/tests/libexec/ld.elf_so/t_rtld_r_debug.c	Mon Apr 14 02:13:34 2025 +00=
 00
 @@ -87,19 +87,30 @@ get_dynamic_section(void)
  	return (Elf_Dyn *)((uint8_t *)dynphdr->p_vaddr + relocbase);
  }
 =20
 -static struct r_debug *
 +static const struct r_debug *
  get_rtld_r_debug(void)
  {
 -	struct r_debug *debug =3D NULL;
 +	const struct r_debug *debug =3D NULL;
  	Elf_Dyn *dynp;
 =20
  	for (dynp =3D get_dynamic_section(); dynp->d_tag !=3D DT_NULL; dynp++) {
  		printf("dynp %p: tag=3D%ld val=3D0x%lx\n", dynp,
  		    (long)dynp->d_tag, (long)dynp->d_un.d_val);
 +#ifdef __mips__
 +		if (dynp->d_tag =3D=3D DT_MIPS_RLD_MAP) {
 +			debug =3D (const void *)*(Elf_Addr *)dynp->d_un.d_ptr;
 +			break;
 +		}
 +		if (dynp->d_tag =3D=3D DT_MIPS_RLD_MAP_REL) {
 +			debug =3D (const void *)*(Elf_Addr *)((Elf_Addr)dynp +
 +			    dynp->d_un.d_val);
 +		}
 +#else
  		if (dynp->d_tag =3D=3D DT_DEBUG) {
  			debug =3D (void *)dynp->d_un.d_val;
  			break;
  		}
 +#endif
  	}
  	ATF_REQUIRE(debug !=3D NULL);
 =20
 @@ -107,19 +118,15 @@ get_rtld_r_debug(void)
  }
 =20
  static void
 -check_r_debug_return_link_map(const char *name, struct link_map **rmap)
 +check_r_debug_return_link_map(const char *name, const struct link_map **rm=
 ap)
  {
 -	struct r_debug *debug;
 -	struct link_map *map;
 +	const struct r_debug *debug;
 +	const struct link_map *map;
  	void *loader;
  	bool found;
 =20
  	loader =3D NULL;
  	debug =3D get_rtld_r_debug();
 -#ifdef __mips__
 -	atf_tc_expect_fail("PR port-mips/59296:"
 -	    " t_rtld_r_debug test is failing");
 -#endif
  	ATF_REQUIRE(debug !=3D NULL);
  	ATF_CHECK_EQ_MSG(debug->r_version, R_DEBUG_VERSION,
  	    "debug->r_version=3D%d R_DEBUG_VERSION=3D%d",
 @@ -166,7 +173,7 @@ ATF_TC_HEAD(dlopen, tc)
  ATF_TC_BODY(dlopen, tc)
  {
  	void *handle;
 -	struct link_map *map, *r_map;
 +	const struct link_map *r_map, *map;
 =20
  	handle =3D dlopen("libutil.so", RTLD_LAZY);
  	ATF_REQUIRE_MSG(handle, "dlopen: %s", dlerror());
 
 --=_17Wymd7lPaKOmlAiVMiz9N/EY5eA0fm1--
 


Home | Main Index | Thread Index | Old Index