[3/3] crt: add dcrt1, with support for locating the dynamic loader at runtime

Submitted by Rodger Combs on April 27, 2019, 1:13 a.m.

Details

Message ID 1556327609-27385-3-git-send-email-rodger.combs@gmail.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Rodger Combs April 27, 2019, 1:13 a.m.
---
 Makefile                |  10 +-
 crt/dcrt1.c             | 362 ++++++++++++++++++++++++++++++++++++++++++++++++
 ldso/dlstart.c          |  57 +++++---
 ldso/dynlink.c          |  13 ++
 ldso/map_library.h      |  59 ++++----
 src/internal/dynlink.h  |   2 +-
 tools/ld.musl-clang.in  |  17 ++-
 tools/musl-clang.in     |   8 ++
 tools/musl-gcc.specs.sh |   4 +-
 9 files changed, 479 insertions(+), 53 deletions(-)
 create mode 100644 crt/dcrt1.c

Patch hide | download patch | download mbox

diff --git a/Makefile b/Makefile
index b46f8ca..163bf03 100644
--- a/Makefile
+++ b/Makefile
@@ -105,13 +105,15 @@  obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git)
 
 obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h
 
-obj/crt/rcrt1.o obj/ldso/dlstart.lo obj/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
+obj/crt/rcrt1.o obj/crt/dcrt1.o obj/ldso/dlstart.lo obj/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
 
-obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
+obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/crt/dcrt1.o obj/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
 
-obj/crt/rcrt1.o: $(srcdir)/ldso/dlstart.c
+obj/crt/rcrt1.o obj/crt/dcrt1.o: $(srcdir)/ldso/dlstart.c
 
-obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC
+obj/crt/Scrt1.o obj/crt/rcrt1.o obj/crt/dcrt1.o: CFLAGS_ALL += -fPIC
+
+obj/crt/dcrt1.o: CFLAGS_ALL += -DLDSO_PATHNAME=\"$(LDSO_PATHNAME)\"
 
 OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%))
 $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3
diff --git a/crt/dcrt1.c b/crt/dcrt1.c
new file mode 100644
index 0000000..47c6dc2
--- /dev/null
+++ b/crt/dcrt1.c
@@ -0,0 +1,362 @@ 
+#define SYSCALL_NO_TLS
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <features.h>
+#include <libgen.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <unistd.h>
+#include "atomic.h"
+#include "dynlink.h"
+#include "syscall.h"
+
+extern weak hidden const size_t _DYNAMIC[];
+
+int main();
+weak void _init();
+weak void _fini();
+weak _Noreturn int __libc_start_main(int (*)(), int, char **,
+	void (*)(), void(*)(), void(*)());
+
+#define DLSTART_PROLOGUE __libc_start_main(main, argc, argv, _init, _fini, 0);
+
+#define START "_start"
+#define _dlstart_c _start_c
+#include "../ldso/dlstart.c"
+
+struct dso {
+	unsigned char *base;
+	struct fdpic_loadmap *loadmap;
+	size_t *dynv;
+	Phdr *phdr;
+	int phnum;
+	size_t phentsize;
+	unsigned char *map;
+	size_t map_len;
+};
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE SYSCALL_MMAP2_UNIT
+#endif
+
+#ifdef SYS_mmap2
+#define mmap(start, len, prot, flags, fd, off) (void*)__syscall(SYS_mmap2, start, len, prot, flags, fd, off/SYSCALL_MMAP2_UNIT)
+#else
+#define mmap(start, len, prot, flags, fd, off) (void*)__syscall(SYS_mmap, start, len, prot, flags, fd, off)
+#endif
+
+#define munmap(ptr, len) __syscall(SYS_munmap, ptr, len)
+
+static inline int crt_mprotect(void *addr, size_t len, int prot)
+{
+	size_t start, end;
+	start = (size_t)addr & -PAGE_SIZE;
+	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
+	return __syscall(SYS_mprotect, start, end-start, prot);
+}
+#define mprotect crt_mprotect
+
+#define read(fd, buf, size) __syscall(SYS_read, fd, buf, size)
+#define pread(fd, buf, size, ofs) __syscall(SYS_pread, fd, buf, size, __SYSCALL_LL_PRW(ofs))
+
+static inline off_t crt_lseek(int fd, off_t offset, int whence)
+{
+#ifdef SYS__llseek
+	off_t result;
+	return __syscall(SYS__llseek, fd, offset>>32, offset, &result, whence) ? -1 : result;
+#else
+	return __syscall(SYS_lseek, fd, offset, whence);
+#endif
+}
+#define lseek crt_lseek
+
+static inline void *map_library_allocz(size_t *size)
+{
+	void *ret;
+	*size = (*size + SYSCALL_MMAP2_UNIT - 1) & -SYSCALL_MMAP2_UNIT;
+	ret = mmap(0, *size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (ret == MAP_FAILED)
+		return NULL;
+	return ret;
+}
+
+static inline void map_library_free(void *ptr, size_t size)
+{
+	munmap(ptr, size);
+}
+
+#define map_library_failed(val) ((unsigned long)val > -4096UL)
+#define map_library_error(ret) (-(long)ret)
+
+#ifdef SYS_readlink
+#define readlink(path, buf, bufsize) __syscall(SYS_readlink, path, buf, bufsize)
+#else
+#define readlink(path, buf, bufsize) __syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize)
+#endif
+
+#ifdef SYS_access
+#define access(filename, amode) __syscall(SYS_access, filename, amode)
+#else
+#define access(filename, amode) __syscall(SYS_faccessat, AT_FDCWD, filename, amode, 0)
+#endif
+
+static void *crt_memcpy(void *restrict dest, const void *restrict src, size_t n)
+{
+	unsigned char *d = dest;
+	const unsigned char *s = src;
+	for (; n; n--) *d++ = *s++;
+	return dest;
+}
+
+static void *crt_memset(void *dest, int c, size_t n)
+{
+	unsigned char *s = dest;
+	for (; n; n--, s++) *s = c;
+	return dest;
+}
+#define memset crt_memset
+
+static size_t crt_strlen(const char *s)
+{
+	const char *a = s;
+	for (; *s; s++);
+	return s-a;
+}
+
+static char *crt_strchrnul(const char *s, int c)
+{
+	c = (unsigned char)c;
+	if (!c) return (char *)s + crt_strlen(s);
+	for (; *s && *(unsigned char *)s != c; s++);
+	return (char *)s;
+}
+
+static int crt_strncmp(const char *_l, const char *_r, size_t n)
+{
+	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	if (!n--) return 0;
+	for (; *l && *r && n && *l == *r ; l++, r++, n--);
+	return *l - *r;
+}
+
+static char *crt_getenv(const char *name, char **environ)
+{
+	size_t l = crt_strchrnul(name, '=') - name;
+	if (l && !name[l] && environ)
+		for (char **e = environ; *e; e++)
+			if (!crt_strncmp(name, *e, l) && l[*e] == '=')
+				return *e + l+1;
+	return 0;
+}
+
+#define LD_CRT
+#include "../ldso/map_library.h"
+
+static void decode_vec(size_t *v, size_t *a, size_t cnt)
+{
+	size_t i;
+	for (i=0; i<cnt; i++) a[i] = 0;
+	for (; v[0]; v+=2) if (v[0]-1<cnt-1) {
+		a[0] |= 1UL<<v[0];
+		a[v[0]] = v[1];
+	}
+}
+
+static void get_rpaths(const char **rpath, const char **runpath, const size_t *dyn)
+{
+	/* DT_STRTAB is pre-relocated for us by dlstart */
+	const char *strings = (void*)dyn[DT_STRTAB];
+
+	if (dyn[0] & (1 << DT_RPATH))
+		*rpath = strings + dyn[DT_RPATH];
+	if (dyn[0] & (1 << DT_RUNPATH))
+		*runpath = strings + dyn[DT_RUNPATH];
+}
+
+static size_t find_linker(char *outbuf, size_t bufsize, const char *this_path, size_t thisl, const size_t *dyn, char **environ, int secure)
+{
+	const char *paths[3] = {0}; // rpath, envpath, runpath
+	size_t i;
+	int fd;
+
+	if (thisl)
+		thisl--;
+	while (thisl > 1 && this_path[thisl] == '/')
+		thisl--;
+	while (thisl > 0 && this_path[thisl] != '/')
+		thisl--;
+
+	if (!secure) {
+		const char *envpath = crt_getenv("LD_LOADER_PATH", environ);
+		if (envpath) {
+			size_t envlen = crt_strlen(envpath);
+			if (envlen < bufsize) {
+				crt_memcpy(outbuf, envpath, envlen + 1);
+				return envlen + 1;
+			}
+		}
+	}
+
+	get_rpaths(&paths[0], &paths[2], dyn);
+
+	paths[1] = secure ? NULL : crt_getenv("LD_LIBRARY_PATH", environ);
+
+	for (i = 0; i < 3; i++) {
+		int relative = 0;
+		const char *p = paths[i];
+		char *o = outbuf;
+		if (!p)
+			continue;
+		for (;;) {
+			if (!crt_strncmp(p, "$ORIGIN", 7) ||
+					!crt_strncmp(p, "${ORIGIN}", 9)) {
+				relative = 1;
+				if (o + thisl + 1 < outbuf + bufsize) {
+					crt_memcpy(o, this_path, thisl);
+					o += thisl;
+				} else {
+					o = outbuf + bufsize - 1;
+				}
+				p += (p[1] == '{' ? 9 : 7);
+			} else if (*p == ':' || !*p) {
+#define LDSO_FILENAME "ld-musl-" LDSO_ARCH ".so.1"
+				relative |= outbuf[0] != '/';
+				if ((!secure || !relative) && o + sizeof(LDSO_FILENAME) + 1 < outbuf + bufsize) {
+					*o++ = '/';
+					crt_memcpy(o, LDSO_FILENAME, sizeof(LDSO_FILENAME));
+					if (!access(outbuf, R_OK | X_OK))
+						return (o + sizeof(LDSO_FILENAME)) - outbuf;
+				}
+				if (!*p)
+					break;
+				relative = 0;
+				o = outbuf;
+				p++;
+			} else {
+				if (o < outbuf + bufsize)
+					*o++ = *p;
+				p++;
+			}
+		}
+	}
+
+	// Didn't find a usable loader anywhere, so try the hardcoded path :shrug:
+	crt_memcpy(outbuf, LDSO_PATHNAME, sizeof(LDSO_PATHNAME));
+	return sizeof(LDSO_PATHNAME);
+}
+
+static void final_start_c(long *p)
+{
+	int argc = p[0];
+	char **argv = (void *)(p+1);
+	__libc_start_main(main, argc, argv, _init, _fini, 0);
+}
+
+hidden _Noreturn void __dls2(unsigned char *base, size_t *p, size_t *dyn)
+{
+	int argc = p[0];
+	char **argv = (void *)(p+1);
+	int fd;
+	int secure;
+	int prot = PROT_READ;
+	struct dso loader;
+	Ehdr *loader_hdr;
+	Phdr *new_hdr;
+	void *entry;
+	char this_path[2*NAME_MAX+2] = {0};
+	size_t thisl;
+	char linker_path[2*NAME_MAX+2] = {0};
+	size_t linker_len;
+	size_t i;
+	size_t aux[AUX_CNT];
+	size_t *auxv;
+	char **environ = argv + argc + 1;
+
+	/* Find aux vector just past environ[] and use it to initialize
+	* global data that may be needed before we can make syscalls. */
+	for (i = argc + 1; argv[i]; i++);
+	auxv = (void *)(argv + i + 1);
+	decode_vec(auxv, aux, AUX_CNT);
+	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
+		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
+
+	thisl = readlink("/proc/self/exe", this_path, sizeof this_path);
+	linker_len = find_linker(linker_path, sizeof linker_path, this_path, thisl, dyn, environ, secure);
+
+	fd = __sys_open2(, linker_path, O_RDONLY);
+	if (fd < 0)
+		goto error;
+
+	loader_hdr = map_library(fd, &loader);
+	if (!loader_hdr)
+		goto error;
+
+	__syscall(SYS_close, fd);
+
+	// Copy the program headers into an anonymous mapping
+	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (map_library_failed(new_hdr))
+		goto error;
+
+	// Point it back at the original kernel-provided base
+	new_hdr->p_type = PT_PHDR;
+	new_hdr->p_vaddr = (size_t)new_hdr - (size_t)base;
+
+	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_type = PT_INTERP;
+	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_vaddr = new_hdr->p_vaddr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2);
+
+	crt_memcpy((char*)new_hdr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2), linker_path, linker_len);
+
+	for (i = 0; i < aux[AT_PHNUM]; i++) {
+		Phdr *hdr = (void*)((char*)aux[AT_PHDR] + aux[AT_PHENT] * i);
+		Phdr *dst = (void*)((char*)new_hdr + aux[AT_PHENT] * (i + 2));
+		if (hdr->p_type == PT_PHDR || hdr->p_type == PT_INTERP) {
+			// Can't have a duplicate
+			dst->p_type = PT_NULL;
+		} else {
+			crt_memcpy(dst, hdr, aux[AT_PHENT]);
+		}
+	}
+
+	if (mprotect(new_hdr, aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len, PROT_READ))
+		goto error;
+
+	for (i=0; auxv[i]; i+=2) {
+		if (auxv[i] == AT_BASE)
+			auxv[i + 1] = (size_t)loader_hdr;
+		if (auxv[i] == AT_PHDR)
+			auxv[i + 1] = (size_t)new_hdr;
+		if (auxv[i] == AT_PHNUM)
+			auxv[i + 1] += 2;
+	}
+
+	entry = laddr(&loader, loader_hdr->e_entry);
+
+#ifndef LD_FDPIC
+	/* Undo the relocations performed by dlstart */
+
+	if (NEED_MIPS_GOT_RELOCS) {
+		const size_t *dynv = _DYNAMIC;
+		size_t local_cnt = 0;
+		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
+		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
+			local_cnt = dynv[i+1];
+		for (i=0; i<local_cnt; i++) got[i] -= (size_t)base;
+	}
+
+	size_t *rel = (void *)((size_t)base+dyn[DT_REL]);
+	size_t rel_size = dyn[DT_RELSZ];
+	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
+		if (!IS_RELATIVE(rel[1], 0)) continue;
+		size_t *rel_addr = (void *)((size_t)base + rel[0]);
+		*rel_addr -= (size_t)base;
+	}
+#endif
+
+	CRTJMP(entry, argv - 1);
+
+error:
+	for(;;) a_crash();
+}
diff --git a/ldso/dlstart.c b/ldso/dlstart.c
index 20d50f2..7ff79d6 100644
--- a/ldso/dlstart.c
+++ b/ldso/dlstart.c
@@ -21,7 +21,7 @@ 
 hidden void _dlstart_c(size_t *sp, size_t *dynv)
 {
 	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
-	size_t *rel, rel_size, base;
+	size_t *rel, rel_size, base, loader_phdr;
 
 	int argc = *sp;
 	char **argv = (void *)(sp+1);
@@ -41,6 +41,13 @@  hidden void _dlstart_c(size_t *sp, size_t *dynv)
 		 * space and moving the extra fdpic arguments to the stack
 		 * vector where they are easily accessible from C. */
 		segs = ((struct fdpic_loadmap *)(sp[-1] ? sp[-1] : sp[-2]))->segs;
+		if (aux[AT_BASE]) {
+			Ehdr *eh = (void*)aux[AT_BASE];
+			for (i = 0; eh->e_phoff - segs[i].p_vaddr >= segs[i].p_memsz; i++);
+			loader_phdr = (eh->e_phoff - segs[i].p_vaddr + segs[i].addr);
+		} else {
+			loader_phdr = aux[AT_PHDR];
+		}
 	} else {
 		/* If dynv is null, the entry point was started from loader
 		 * that is not fdpic-aware. We can assume normal fixed-
@@ -55,6 +62,7 @@  hidden void _dlstart_c(size_t *sp, size_t *dynv)
 		segs[0].p_memsz = -1;
 		Ehdr *eh = (void *)base;
 		Phdr *ph = (void *)(base + eh->e_phoff);
+		loader_phdr = (size_t)ph;
 		size_t phnum = eh->e_phnum;
 		size_t phent = eh->e_phentsize;
 		while (phnum-- && ph->p_type != PT_DYNAMIC)
@@ -69,13 +77,38 @@  hidden void _dlstart_c(size_t *sp, size_t *dynv)
 
 #if DL_FDPIC
 	for (i=0; i<DYN_CNT; i++) {
-		if (i==DT_RELASZ || i==DT_RELSZ) continue;
+		if (i==DT_RELASZ || i==DT_RELSZ || i==DT_RPATH || i==DT_RUNPATH) continue;
 		if (!dyn[i]) continue;
 		for (j=0; dyn[i]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
 		dyn[i] += segs[j].addr - segs[j].p_vaddr;
 	}
 	base = 0;
+#else
+	/* If the dynamic linker is invoked as a command, its load
+	 * address is not available in the aux vector. Instead, compute
+	 * the load address as the difference between &_DYNAMIC and the
+	 * virtual address in the PT_DYNAMIC program header. */
+	base = aux[AT_BASE];
+	if (!base) {
+		size_t phnum = aux[AT_PHNUM];
+		size_t phentsize = aux[AT_PHENT];
+		Phdr *ph = (void *)aux[AT_PHDR];
+		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
+			if (ph->p_type == PT_DYNAMIC) {
+				base = (size_t)dynv - ph->p_vaddr;
+				break;
+			}
+		}
+	}
+	loader_phdr = base + ((Ehdr*)base)->e_phoff;
+#endif
 
+#ifdef DLSTART_PROLOGUE
+	if (aux[AT_PHDR] != loader_phdr)
+		DLSTART_PROLOGUE
+#endif
+
+#if DL_FDPIC
 	const Sym *syms = (void *)dyn[DT_SYMTAB];
 
 	rel = (void *)dyn[DT_RELA];
@@ -97,22 +130,8 @@  hidden void _dlstart_c(size_t *sp, size_t *dynv)
 		}
 	}
 #else
-	/* If the dynamic linker is invoked as a command, its load
-	 * address is not available in the aux vector. Instead, compute
-	 * the load address as the difference between &_DYNAMIC and the
-	 * virtual address in the PT_DYNAMIC program header. */
-	base = aux[AT_BASE];
-	if (!base) {
-		size_t phnum = aux[AT_PHNUM];
-		size_t phentsize = aux[AT_PHENT];
-		Phdr *ph = (void *)aux[AT_PHDR];
-		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
-			if (ph->p_type == PT_DYNAMIC) {
-				base = (size_t)dynv - ph->p_vaddr;
-				break;
-			}
-		}
-	}
+	/* For convenience in dcrt1, we relocate STRTAB here (as with FDPIC) */
+	dyn[DT_STRTAB] += base;
 
 	/* MIPS uses an ugly packed form for GOT relocations. Since we
 	 * can't make function calls yet and the code is tiny anyway,
@@ -144,5 +163,5 @@  hidden void _dlstart_c(size_t *sp, size_t *dynv)
 
 	stage2_func dls2;
 	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
-	dls2((void *)base, sp);
+	dls2((void *)base, sp, dyn);
 }
diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index d761837..d4b2331 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -153,6 +153,19 @@  extern hidden void (*const __init_array_end)(void), (*const __fini_array_end)(vo
 weak_alias(__init_array_start, __init_array_end);
 weak_alias(__fini_array_start, __fini_array_end);
 
+static inline void *map_library_allocz(size_t *size)
+{
+	return calloc(1, *size);
+}
+
+static inline void map_library_free(void *ptr, size_t size)
+{
+	return free(ptr);
+}
+
+#define map_library_failed(ptr) (ptr == MAP_FAILED)
+#define map_library_error(val) (errno)
+
 #include "map_library.h"
 
 static int dl_strcmp(const char *l, const char *r)
diff --git a/ldso/map_library.h b/ldso/map_library.h
index 3609a0f..27d2a7e 100644
--- a/ldso/map_library.h
+++ b/ldso/map_library.h
@@ -1,11 +1,3 @@ 
-#include <errno.h>
-#include <features.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include "dynlink.h"
-#include "pthread_impl.h"
-
 /* Compute load address for a virtual address in a given dso. */
 #if DL_FDPIC
 static inline void *laddr(const struct dso *p, size_t v)
@@ -44,7 +36,7 @@  static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t of
 	char *q;
 	if (!no_map_fixed) {
 		q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
-		if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
+		if (!DL_NOMMU_SUPPORT || !map_library_failed(q) || map_library_error(q) != EINVAL)
 			return q;
 		no_map_fixed = 1;
 	}
@@ -57,7 +49,7 @@  static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t of
 	if (lseek(fd, off, SEEK_SET) < 0) return MAP_FAILED;
 	for (q=p; n; q+=r, off+=r, n-=r) {
 		r = read(fd, q, n);
-		if (r < 0 && errno != EINTR) return MAP_FAILED;
+		if (r < 0 && map_library_error(r) != EINTR) return MAP_FAILED;
 		if (!r) {
 			memset(q, 0, n);
 			break;
@@ -66,6 +58,11 @@  static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t of
 	return p;
 }
 
+static inline int map_library_error_check(int ret, int expected)
+{
+	return ret && map_library_error(ret) != expected;
+}
+
 static inline void unmap_library(struct dso *dso)
 {
 	if (dso->loadmap) {
@@ -76,7 +73,7 @@  static inline void unmap_library(struct dso *dso)
 			munmap((void *)dso->loadmap->segs[i].addr,
 				dso->loadmap->segs[i].p_memsz);
 		}
-		free(dso->loadmap);
+		map_library_free(dso->loadmap, dso->map_len);
 	} else if (dso->map && dso->map_len) {
 		munmap(dso->map, dso->map_len);
 	}
@@ -86,8 +83,8 @@  static inline void *map_library(int fd, struct dso *dso)
 {
 	Ehdr buf[(896+sizeof(Ehdr))/sizeof(Ehdr)];
 	void *allocated_buf=0;
+	size_t allocated_buf_size;
 	size_t phsize;
-	size_t ph_allocated_size;
 	size_t addr_min=SIZE_MAX, addr_max=0, map_len;
 	size_t this_min, this_max;
 	size_t nsegs = 0;
@@ -98,7 +95,9 @@  static inline void *map_library(int fd, struct dso *dso)
 	unsigned char *map=MAP_FAILED, *base;
 	size_t dyn=0;
 	size_t i;
+#ifndef LD_CRT
 	size_t tls_image=0;
+#endif
 
 	ssize_t l = read(fd, buf, sizeof buf);
 	eh = buf;
@@ -107,7 +106,8 @@  static inline void *map_library(int fd, struct dso *dso)
 		goto noexec;
 	phsize = eh->e_phentsize * eh->e_phnum;
 	if (phsize > sizeof buf - sizeof *eh) {
-		allocated_buf = malloc(phsize);
+		allocated_buf_size = phsize;
+		allocated_buf = map_library_allocz(&allocated_buf_size);
 		if (!allocated_buf) return 0;
 		l = pread(fd, allocated_buf, phsize, eh->e_phoff);
 		if (l < 0) goto error;
@@ -124,6 +124,7 @@  static inline void *map_library(int fd, struct dso *dso)
 	for (i=eh->e_phnum; i; i--, ph=(void *)((char *)ph+eh->e_phentsize)) {
 		if (ph->p_type == PT_DYNAMIC) {
 			dyn = ph->p_vaddr;
+#ifndef LD_CRT
 		} else if (ph->p_type == PT_TLS) {
 			tls_image = ph->p_vaddr;
 			dso->tls.align = ph->p_align;
@@ -138,6 +139,7 @@  static inline void *map_library(int fd, struct dso *dso)
 					ph->p_memsz < DEFAULT_STACK_MAX ?
 					ph->p_memsz : DEFAULT_STACK_MAX;
 			}
+#endif
 		}
 		if (ph->p_type != PT_LOAD) continue;
 		nsegs++;
@@ -154,8 +156,8 @@  static inline void *map_library(int fd, struct dso *dso)
 	}
 	if (!dyn) goto noexec;
 	if (DL_FDPIC && !(eh->e_flags & FDPIC_CONSTDISP_FLAG)) {
-		dso->loadmap = calloc(1, sizeof *dso->loadmap
-			+ nsegs * sizeof *dso->loadmap->segs);
+		dso->map_len = sizeof *dso->loadmap + nsegs * sizeof *dso->loadmap->segs;
+		dso->loadmap = map_library_allocz(&dso->map_len);
 		if (!dso->loadmap) goto error;
 		dso->loadmap->nsegs = nsegs;
 		for (ph=ph0, i=0; i<nsegs; ph=(void *)((char *)ph+eh->e_phentsize)) {
@@ -166,7 +168,7 @@  static inline void *map_library(int fd, struct dso *dso)
 			map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
 				prot, MAP_PRIVATE,
 				fd, ph->p_offset & -PAGE_SIZE);
-			if (map == MAP_FAILED) {
+			if (map_library_failed(map)) {
 				unmap_library(dso);
 				goto error;
 			}
@@ -181,10 +183,10 @@  static inline void *map_library(int fd, struct dso *dso)
 				size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE;
 				size_t pgend = brk + ph->p_memsz - ph->p_filesz
 					+ PAGE_SIZE-1 & -PAGE_SIZE;
-				if (pgend > pgbrk && mmap_fixed(map+pgbrk,
+				if (pgend > pgbrk && !map_library_failed(mmap_fixed(map+pgbrk,
 					pgend-pgbrk, prot,
 					MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
-					-1, off_start) == MAP_FAILED)
+					-1, off_start)))
 					goto error;
 				memset(map + brk, 0, pgbrk-brk);
 			}
@@ -207,13 +209,15 @@  static inline void *map_library(int fd, struct dso *dso)
 			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
 		: mmap((void *)addr_min, map_len, prot,
 			MAP_PRIVATE, fd, off_start);
-	if (map==MAP_FAILED) goto error;
+	if (map_library_failed(map)) goto error;
 	dso->map = map;
 	dso->map_len = map_len;
 	/* If the loaded file is not relocatable and the requested address is
 	 * not available, then the load operation must fail. */
 	if (eh->e_type != ET_DYN && addr_min && map!=(void *)addr_min) {
+#ifndef LD_CRT
 		errno = EBUSY;
+#endif
 		goto error;
 	}
 	base = map - addr_min;
@@ -238,33 +242,36 @@  static inline void *map_library(int fd, struct dso *dso)
 			((ph->p_flags&PF_X) ? PROT_EXEC : 0));
 		/* Reuse the existing mapping for the lowest-address LOAD */
 		if ((ph->p_vaddr & -PAGE_SIZE) != addr_min || DL_NOMMU_SUPPORT)
-			if (mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start) == MAP_FAILED)
+			if (map_library_failed(mmap_fixed(base+this_min, this_max-this_min, prot, MAP_PRIVATE|MAP_FIXED, fd, off_start)))
 				goto error;
 		if (ph->p_memsz > ph->p_filesz && (ph->p_flags&PF_W)) {
 			size_t brk = (size_t)base+ph->p_vaddr+ph->p_filesz;
 			size_t pgbrk = brk+PAGE_SIZE-1 & -PAGE_SIZE;
 			memset((void *)brk, 0, pgbrk-brk & PAGE_SIZE-1);
-			if (pgbrk-(size_t)base < this_max && mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
+			if (pgbrk-(size_t)base < this_max && map_library_failed(mmap_fixed((void *)pgbrk, (size_t)base+this_max-pgbrk, prot, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0)))
 				goto error;
 		}
 	}
 	for (i=0; ((size_t *)(base+dyn))[i]; i+=2)
 		if (((size_t *)(base+dyn))[i]==DT_TEXTREL) {
-			if (mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC)
-			    && errno != ENOSYS)
+			if (map_library_error_check(mprotect(map, map_len, PROT_READ|PROT_WRITE|PROT_EXEC), ENOSYS))
 				goto error;
 			break;
 		}
 done_mapping:
 	dso->base = base;
 	dso->dynv = laddr(dso, dyn);
+#ifndef LD_CRT
 	if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
-	free(allocated_buf);
+#endif
+	map_library_free(allocated_buf, allocated_buf_size);
 	return map;
 noexec:
+#ifndef LD_CRT
 	errno = ENOEXEC;
+#endif
 error:
-	if (map!=MAP_FAILED) unmap_library(dso);
-	free(allocated_buf);
+	if (!map_library_failed(map)) unmap_library(dso);
+	map_library_free(allocated_buf, allocated_buf_size);
 	return 0;
 }
diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
index cbe0a6f..7a9560d 100644
--- a/src/internal/dynlink.h
+++ b/src/internal/dynlink.h
@@ -94,7 +94,7 @@  struct fdpic_dummy_loadmap {
 #define AUX_CNT 32
 #define DYN_CNT 32
 
-typedef void (*stage2_func)(unsigned char *, size_t *);
+typedef _Noreturn void (*stage2_func)(unsigned char *, size_t *, size_t *);
 typedef _Noreturn void (*stage3_func)(size_t *);
 
 hidden void *__dlsym(void *restrict, const char *restrict, void *restrict);
diff --git a/tools/ld.musl-clang.in b/tools/ld.musl-clang.in
index 93763d6..02d9893 100644
--- a/tools/ld.musl-clang.in
+++ b/tools/ld.musl-clang.in
@@ -7,6 +7,18 @@  shared=
 userlinkdir=
 userlink=
 
+Scrt="$libc_lib/Scrt1.o"
+dynamic_linker_args="-dynamic-linker \"$ldso\""
+
+for x ; do
+    case "$x" in
+        -l-dni)
+            dynamic_linker_args="-no-dynamic-linker"
+            Scrt="$libc_lib/dcrt1.o"
+            ;;
+    esac
+done
+
 for x ; do
     test "$cleared" || set -- ; cleared=1
 
@@ -42,10 +54,13 @@  for x ; do
             ;;
         -sysroot=*|--sysroot=*)
             ;;
+        $libc_lib/Scrt1.o)
+            set -- "$@" $Scrt
+            ;;
         *)
             set -- "$@" "$x"
             ;;
     esac
 done
 
-exec $($cc -print-prog-name=ld) -nostdlib "$@" -lc -dynamic-linker "$ldso"
+exec $($cc -print-prog-name=ld) -nostdlib "$@" -lc "$dynamic_linker_args"
diff --git a/tools/musl-clang.in b/tools/musl-clang.in
index 623de6f..49cba1b 100644
--- a/tools/musl-clang.in
+++ b/tools/musl-clang.in
@@ -5,14 +5,21 @@  libc_inc="@INCDIR@"
 libc_lib="@LIBDIR@"
 thisdir="`cd "$(dirname "$0")"; pwd`"
 
+cleared=
+
 # prevent clang from running the linker (and erroring) on no input.
 sflags=
 eflags=
+dniflags=
 for x ; do
+    test "$cleared" || set -- ; cleared=1
+
     case "$x" in
+        --dni) dniflags=-l-dni; continue ;;
         -l*) input=1 ;;
         *) input= ;;
     esac
+    set -- "$@" "$x"
     if test "$input" ; then
         sflags="-l-user-start"
         eflags="-l-user-end"
@@ -29,6 +36,7 @@  exec $cc \
     -isystem "$libc_inc" \
     -L-user-start \
     $sflags \
+    $dniflags \
     "$@" \
     $eflags \
     -L"$libc_lib" \
diff --git a/tools/musl-gcc.specs.sh b/tools/musl-gcc.specs.sh
index 3049257..86374f1 100644
--- a/tools/musl-gcc.specs.sh
+++ b/tools/musl-gcc.specs.sh
@@ -17,13 +17,13 @@  cat <<EOF
 libgcc.a%s %:if-exists(libgcc_eh.a%s)
 
 *startfile:
-%{!shared: $libdir/Scrt1.o} $libdir/crti.o crtbeginS.o%s
+%{!shared: %{-dni:$libdir/dcrt1.o;:$libdir/Scrt1.o}} $libdir/crti.o crtbeginS.o%s
 
 *endfile:
 crtendS.o%s $libdir/crtn.o
 
 *link:
--dynamic-linker $ldso -nostdlib %{shared:-shared} %{static:-static} %{rdynamic:-export-dynamic}
+%{-dni:-no-dynamic-linker;:--dynamic-linker $ldso} -nostdlib %{shared:-shared} %{static:-static} %{rdynamic:-export-dynamic}
 
 *esp_link:
 

Comments

Szabolcs Nagy April 27, 2019, 8:55 a.m.
* Rodger Combs <rodger.combs@gmail.com> [2019-04-26 20:13:29 -0500]:
> ---

i think you need a lot more explanation about possible
use-cases, failure modes, toolchain requirements etc.
Rich Felker April 27, 2019, 4:19 p.m.
On Sat, Apr 27, 2019 at 10:55:40AM +0200, Szabolcs Nagy wrote:
> * Rodger Combs <rodger.combs@gmail.com> [2019-04-26 20:13:29 -0500]:
> > ---
> 
> i think you need a lot more explanation about possible
> use-cases, failure modes, toolchain requirements etc.

I can elaborate a bit. This is long-desired functionality, to be able
to produce dynamic-linked binaries which run without the presence of a
"program interpreter" (dynamic linker) at a fixed absolute pathname
and without wrapper scripts or similar. The idea is that the entry
point works like static pie, performing sufficient self-relocation to
begin execution without a dynamic linker, but then maps the dynamic
linker into memory according to some search procedure that allows it
to be found relative to the main executable's location. More on this
later. The use case is distribution of dynamic musl-linked binaries
that can easily be run on non-musl systems.

Toolchain requirements are essentially nothing new, but the toolchain
must be invoked in a different way (alternate crt1 file and passing
--no-dynamic-linker to ld). The patches as submitted put this into the
musl-gcc and musl-clang wrapper scripts, which is a decent way of
testing, but these wrappers are not really intended for serious use,
so going forward there should be some discussion of what might be an
acceptable way to upstream this to gcc or at least include it in mcm
in a way that's maintainable and doesn't conflict with upstream.

Failure modes include at least:

- Cannot be used with suid/sgid, at all. Needs to hard fail if invoked
  that way; otherwise it invites dangerous misuse. Currently this is
  not handled right.

- Does not work if /proc is not mounted or /proc/self/exe is not
  available. (It might be better to work from AT_EXECFN but that has
  different semantics.)

- Processes and honors LD_LIBRARY_PATH and (new) LD_LOADER_PATH. These
  could be wrong especially in the main intended usage case, a
  non-musl system. But that would affect ldso itself already. It's not
  a hard no at this point (needs discussion) but I'm skeptical of any
  use of environment from crt1.

- Depends on ability to rewrite aux vector and program headers in some
  possibly fragile ways (seems mostly ok from what I remember; need to
  review again where we'll have a record we can look back at).

There may be others I find when reviewing the patches in more detail,
which I'm starting now.

Rich
Rich Felker April 27, 2019, 5:19 p.m.
On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
> diff --git a/crt/dcrt1.c b/crt/dcrt1.c
> new file mode 100644
> index 0000000..47c6dc2
> --- /dev/null
> +++ b/crt/dcrt1.c
> @@ -0,0 +1,362 @@
> +#define SYSCALL_NO_TLS
> +
> +#include <elf.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <features.h>
> +#include <libgen.h>
> +#include <sys/mman.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include "atomic.h"
> +#include "dynlink.h"
> +#include "syscall.h"
> +
> +extern weak hidden const size_t _DYNAMIC[];
> +
> +int main();
> +weak void _init();
> +weak void _fini();
> +weak _Noreturn int __libc_start_main(int (*)(), int, char **,
> +	void (*)(), void(*)(), void(*)());
> +
> +#define DLSTART_PROLOGUE __libc_start_main(main, argc, argv, _init, _fini, 0);
> +
> +#define START "_start"
> +#define _dlstart_c _start_c
> +#include "../ldso/dlstart.c"
> +
> +struct dso {
> +	unsigned char *base;
> +	struct fdpic_loadmap *loadmap;
> +	size_t *dynv;
> +	Phdr *phdr;
> +	int phnum;
> +	size_t phentsize;
> +	unsigned char *map;
> +	size_t map_len;
> +};

There is no need for making a dummy strict dso or exposing struct dso
to this file. The code to be taken/shared from map_library just needs
to be refactored so that the part that writes things into struct dso
remains in dynlink.c. Or, since we're punting on fdpic support for
this for now, a simplified version of map_library without fdpic
support (it gets quite simple then) could perhaps just be put here for
now, with the intent of refactoring to unify later.

> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE SYSCALL_MMAP2_UNIT
> +#endif
> [...]
> +static inline int crt_mprotect(void *addr, size_t len, int prot)
> +{
> +	size_t start, end;
> +	start = (size_t)addr & -PAGE_SIZE;
> +	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
> +	return __syscall(SYS_mprotect, start, end-start, prot);
> +}
> +#define mprotect crt_mprotect

This is wrong. mprotect does not take units of SYSCALL_MMAP2_UNIT. You
need to get the variable page size from auxv.

> +static char *crt_getenv(const char *name, char **environ)
> +{
> +	size_t l = crt_strchrnul(name, '=') - name;
> +	if (l && !name[l] && environ)
> +		for (char **e = environ; *e; e++)
> +			if (!crt_strncmp(name, *e, l) && l[*e] == '=')
> +				return *e + l+1;
> +	return 0;
> +}

I really question the use of environment here at all, both from a
standpoint of simplicity/least-surprise, and from a standpoint of
intended usage case. If you're on a non-musl system, the environment
is likely to contain settings that wouldn't be appropriate to musl
anyway, meaing you probably need to remove them for this to work at
all, and then if you really need to force a path other than the
builtin relative one for a particular musl-linked binary, you might as
well just invoke ldso as a command, making the override local to that
invocation rather than inherited across exec of other programs.

If you have compelling reasons you want to search the environment,
let's discuss it, but if it's just gratuitous "maybe it will be
useful", let's apply YAGNI.

> +static void get_rpaths(const char **rpath, const char **runpath, const size_t *dyn)
> +{
> +	/* DT_STRTAB is pre-relocated for us by dlstart */
> +	const char *strings = (void*)dyn[DT_STRTAB];
> +
> +	if (dyn[0] & (1 << DT_RPATH))
> +		*rpath = strings + dyn[DT_RPATH];
> +	if (dyn[0] & (1 << DT_RUNPATH))
> +		*runpath = strings + dyn[DT_RUNPATH];
> +}

musl does not honor the legacy difference in search order between
rpath and runpath (see dynlink.c), but if wwe don't search via
environment here it doesn't matter anyway.

> +static size_t find_linker(char *outbuf, size_t bufsize, const char *this_path, size_t thisl, const size_t *dyn, char **environ, int secure)
> +{
> +	const char *paths[3] = {0}; // rpath, envpath, runpath
> +	size_t i;
> +	int fd;
> +
> +	if (thisl)
> +		thisl--;
> +	while (thisl > 1 && this_path[thisl] == '/')
> +		thisl--;
> +	while (thisl > 0 && this_path[thisl] != '/')
> +		thisl--;
> +
> +	if (!secure) {
> +		const char *envpath = crt_getenv("LD_LOADER_PATH", environ);
> +		if (envpath) {
> +			size_t envlen = crt_strlen(envpath);
> +			if (envlen < bufsize) {
> +				crt_memcpy(outbuf, envpath, envlen + 1);
> +				return envlen + 1;
> +			}
> +		}
> +	}
> +
> +	get_rpaths(&paths[0], &paths[2], dyn);
> +
> +	paths[1] = secure ? NULL : crt_getenv("LD_LIBRARY_PATH", environ);
> +
> +	for (i = 0; i < 3; i++) {
> +		int relative = 0;
> +		const char *p = paths[i];
> +		char *o = outbuf;
> +		if (!p)
> +			continue;
> +		for (;;) {
> +			if (!crt_strncmp(p, "$ORIGIN", 7) ||
> +					!crt_strncmp(p, "${ORIGIN}", 9)) {
> +				relative = 1;
> +				if (o + thisl + 1 < outbuf + bufsize) {
> +					crt_memcpy(o, this_path, thisl);
> +					o += thisl;
> +				} else {
> +					o = outbuf + bufsize - 1;
> +				}
> +				p += (p[1] == '{' ? 9 : 7);
> +			} else if (*p == ':' || !*p) {
> +#define LDSO_FILENAME "ld-musl-" LDSO_ARCH ".so.1"
> +				relative |= outbuf[0] != '/';
> +				if ((!secure || !relative) && o + sizeof(LDSO_FILENAME) + 1 < outbuf + bufsize) {
> +					*o++ = '/';
> +					crt_memcpy(o, LDSO_FILENAME, sizeof(LDSO_FILENAME));
> +					if (!access(outbuf, R_OK | X_OK))
> +						return (o + sizeof(LDSO_FILENAME)) - outbuf;
> +				}
> +				if (!*p)
> +					break;
> +				relative = 0;
> +				o = outbuf;
> +				p++;
> +			} else {
> +				if (o < outbuf + bufsize)
> +					*o++ = *p;
> +				p++;
> +			}
> +		}
> +	}
> +
> +	// Didn't find a usable loader anywhere, so try the hardcoded path :shrug:
> +	crt_memcpy(outbuf, LDSO_PATHNAME, sizeof(LDSO_PATHNAME));
> +	return sizeof(LDSO_PATHNAME);
> +}

Including fallbacks that alter behavior is probably a bad idea
(failing preferable). If the changes in your other patch to dynlink.c
are needed to make it work with dcrt1, then it won't necessarily even
work to load a (possibly older) system-wide musl ldso; however if
that's the case I Think we should fix it.

> +static void final_start_c(long *p)
> +{
> +	int argc = p[0];
> +	char **argv = (void *)(p+1);
> +	__libc_start_main(main, argc, argv, _init, _fini, 0);
> +}
> +
> +hidden _Noreturn void __dls2(unsigned char *base, size_t *p, size_t *dyn)
> +{
> +	int argc = p[0];
> +	char **argv = (void *)(p+1);
> +	int fd;
> +	int secure;
> +	int prot = PROT_READ;
> +	struct dso loader;
> +	Ehdr *loader_hdr;
> +	Phdr *new_hdr;
> +	void *entry;
> +	char this_path[2*NAME_MAX+2] = {0};
> +	size_t thisl;
> +	char linker_path[2*NAME_MAX+2] = {0};

2*NAME_MAX is a fairly short arbitrary limit. Why not PATH_MAX, or a
VLA that adapts up to PATH_MAX (to avoid expanding stack when longer
is not needed)?

> +	size_t linker_len;
> +	size_t i;
> +	size_t aux[AUX_CNT];
> +	size_t *auxv;
> +	char **environ = argv + argc + 1;
> +
> +	/* Find aux vector just past environ[] and use it to initialize
> +	* global data that may be needed before we can make syscalls. */
> +	for (i = argc + 1; argv[i]; i++);
> +	auxv = (void *)(argv + i + 1);
> +	decode_vec(auxv, aux, AUX_CNT);
> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);

At this point we can just abort if secure != 0. There is unbounded
attack surface trying to load a (possibly relative) ldso with elevated
privileges.

> +	thisl = readlink("/proc/self/exe", this_path, sizeof this_path);

We might consider whether use of AT_EXECFN (possibly with additional
resolution of symlinks etc) would be better. It would avoid dependency
on /proc, making it possible to run dcrt1 binaries at early boot or
inside chroots, but does have different semantics that might be less
(or maybe more?) desirable.

> +	// Copy the program headers into an anonymous mapping
> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	if (map_library_failed(new_hdr))
> +		goto error;

Can you remind us why patched program headers are needed? I think it
was absence of PT_PHDR or something...

> +	// Point it back at the original kernel-provided base
> +	new_hdr->p_type = PT_PHDR;
> +	new_hdr->p_vaddr = (size_t)new_hdr - (size_t)base;
> +
> +	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_type = PT_INTERP;
> +	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_vaddr = new_hdr->p_vaddr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2);

But here you're also adding a PT_INTERP. What's the motivation for that?

> +	crt_memcpy((char*)new_hdr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2), linker_path, linker_len);
> +
> +	for (i = 0; i < aux[AT_PHNUM]; i++) {
> +		Phdr *hdr = (void*)((char*)aux[AT_PHDR] + aux[AT_PHENT] * i);
> +		Phdr *dst = (void*)((char*)new_hdr + aux[AT_PHENT] * (i + 2));
> +		if (hdr->p_type == PT_PHDR || hdr->p_type == PT_INTERP) {

Isn't it impossible to have a PT_INTERP already here?

> +			// Can't have a duplicate
> +			dst->p_type = PT_NULL;

Is adding PT_NULL headers in the middle valid, or do they get
interpreted as end of headers?

> +		} else {
> +			crt_memcpy(dst, hdr, aux[AT_PHENT]);
> +		}
> +	}
> +
> +	if (mprotect(new_hdr, aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len, PROT_READ))
> +		goto error;
> +
> +	for (i=0; auxv[i]; i+=2) {
> +		if (auxv[i] == AT_BASE)
> +			auxv[i + 1] = (size_t)loader_hdr;
> +		if (auxv[i] == AT_PHDR)
> +			auxv[i + 1] = (size_t)new_hdr;
> +		if (auxv[i] == AT_PHNUM)
> +			auxv[i + 1] += 2;
> +	}
> +
> +	entry = laddr(&loader, loader_hdr->e_entry);
> +
> +#ifndef LD_FDPIC
> +	/* Undo the relocations performed by dlstart */
> +
> +	if (NEED_MIPS_GOT_RELOCS) {
> +		const size_t *dynv = _DYNAMIC;
> +		size_t local_cnt = 0;
> +		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
> +		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
> +			local_cnt = dynv[i+1];
> +		for (i=0; i<local_cnt; i++) got[i] -= (size_t)base;
> +	}
> +
> +	size_t *rel = (void *)((size_t)base+dyn[DT_REL]);
> +	size_t rel_size = dyn[DT_RELSZ];
> +	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
> +		if (!IS_RELATIVE(rel[1], 0)) continue;
> +		size_t *rel_addr = (void *)((size_t)base + rel[0]);
> +		*rel_addr -= (size_t)base;
> +	}
> +#endif
> +
> +	CRTJMP(entry, argv - 1);
> +
> +error:
> +	for(;;) a_crash();
> +}
> diff --git a/ldso/dlstart.c b/ldso/dlstart.c
> index 20d50f2..7ff79d6 100644
> --- a/ldso/dlstart.c
> +++ b/ldso/dlstart.c
> @@ -21,7 +21,7 @@
>  hidden void _dlstart_c(size_t *sp, size_t *dynv)
>  {
>  	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
> -	size_t *rel, rel_size, base;
> +	size_t *rel, rel_size, base, loader_phdr;
>  
>  	int argc = *sp;
>  	char **argv = (void *)(sp+1);
> @@ -41,6 +41,13 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>  		 * space and moving the extra fdpic arguments to the stack
>  		 * vector where they are easily accessible from C. */
>  		segs = ((struct fdpic_loadmap *)(sp[-1] ? sp[-1] : sp[-2]))->segs;
> +		if (aux[AT_BASE]) {
> +			Ehdr *eh = (void*)aux[AT_BASE];
> +			for (i = 0; eh->e_phoff - segs[i].p_vaddr >= segs[i].p_memsz; i++);
> +			loader_phdr = (eh->e_phoff - segs[i].p_vaddr + segs[i].addr);
> +		} else {
> +			loader_phdr = aux[AT_PHDR];
> +		}
>  	} else {
>  		/* If dynv is null, the entry point was started from loader
>  		 * that is not fdpic-aware. We can assume normal fixed-
> @@ -55,6 +62,7 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>  		segs[0].p_memsz = -1;
>  		Ehdr *eh = (void *)base;
>  		Phdr *ph = (void *)(base + eh->e_phoff);
> +		loader_phdr = (size_t)ph;
>  		size_t phnum = eh->e_phnum;
>  		size_t phent = eh->e_phentsize;
>  		while (phnum-- && ph->p_type != PT_DYNAMIC)
> @@ -69,13 +77,38 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>  
>  #if DL_FDPIC
>  	for (i=0; i<DYN_CNT; i++) {
> -		if (i==DT_RELASZ || i==DT_RELSZ) continue;
> +		if (i==DT_RELASZ || i==DT_RELSZ || i==DT_RPATH || i==DT_RUNPATH) continue;
>  		if (!dyn[i]) continue;
>  		for (j=0; dyn[i]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
>  		dyn[i] += segs[j].addr - segs[j].p_vaddr;
>  	}
>  	base = 0;
> +#else
> +	/* If the dynamic linker is invoked as a command, its load
> +	 * address is not available in the aux vector. Instead, compute
> +	 * the load address as the difference between &_DYNAMIC and the
> +	 * virtual address in the PT_DYNAMIC program header. */
> +	base = aux[AT_BASE];
> +	if (!base) {
> +		size_t phnum = aux[AT_PHNUM];
> +		size_t phentsize = aux[AT_PHENT];
> +		Phdr *ph = (void *)aux[AT_PHDR];
> +		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
> +			if (ph->p_type == PT_DYNAMIC) {
> +				base = (size_t)dynv - ph->p_vaddr;
> +				break;
> +			}
> +		}
> +	}
> +	loader_phdr = base + ((Ehdr*)base)->e_phoff;
> +#endif
>  
> +#ifdef DLSTART_PROLOGUE
> +	if (aux[AT_PHDR] != loader_phdr)
> +		DLSTART_PROLOGUE
> +#endif

As mentioned before, this does not work. It puts a symbol reference
into a function that is required to be pure PIC that can run prior to
relocations. Having the reference in a branch that's not taken does
not help.

Instead it should be something like (pseudocode):

	if (relocs_already_done) goto done:
	// ...
done:
	stage2_func dls2;
	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
	dls2((void *)base, sp);
	
This does not require any DLSTART_PROLOGUE macro that's specific to
dcrt1; the exact same thing works in rcrt1 (static pie) and will also
make it safe to invoke static pie programs via ldso (seems pointless,
but useful when you don't realize the binary is static).

The dcrt1 code in dls2 (which can vary between ldso, rcrt1, and dcrt1)
then has to do its own test to tell which code path it needs to take,
but that's fine. General principle here: repeating small amounts of
work is better than increasing dependency between stages/layers. Same
applies to passing dyn into dls2 -- doing so prevents the call from
being a tail call, and keeps _dlstart_c's stack frame around despite
being done with it.

> +	/* For convenience in dcrt1, we relocate STRTAB here (as with FDPIC) */
> +	dyn[DT_STRTAB] += base;

Just do it where you need it.

>  	/* MIPS uses an ugly packed form for GOT relocations. Since we
>  	 * can't make function calls yet and the code is tiny anyway,
> @@ -144,5 +163,5 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>  
>  	stage2_func dls2;
>  	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
> -	dls2((void *)base, sp);
> +	dls2((void *)base, sp, dyn);

See above.
Rodger Combs April 27, 2019, 10:51 p.m.
> On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
> 
> On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
>> diff --git a/crt/dcrt1.c b/crt/dcrt1.c
>> new file mode 100644
>> index 0000000..47c6dc2
>> --- /dev/null
>> +++ b/crt/dcrt1.c
>> @@ -0,0 +1,362 @@
>> +#define SYSCALL_NO_TLS
>> +
>> +#include <elf.h>
>> +#include <errno.h>
>> +#include <fcntl.h>
>> +#include <features.h>
>> +#include <libgen.h>
>> +#include <sys/mman.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include "atomic.h"
>> +#include "dynlink.h"
>> +#include "syscall.h"
>> +
>> +extern weak hidden const size_t _DYNAMIC[];
>> +
>> +int main();
>> +weak void _init();
>> +weak void _fini();
>> +weak _Noreturn int __libc_start_main(int (*)(), int, char **,
>> +	void (*)(), void(*)(), void(*)());
>> +
>> +#define DLSTART_PROLOGUE __libc_start_main(main, argc, argv, _init, _fini, 0);
>> +
>> +#define START "_start"
>> +#define _dlstart_c _start_c
>> +#include "../ldso/dlstart.c"
>> +
>> +struct dso {
>> +	unsigned char *base;
>> +	struct fdpic_loadmap *loadmap;
>> +	size_t *dynv;
>> +	Phdr *phdr;
>> +	int phnum;
>> +	size_t phentsize;
>> +	unsigned char *map;
>> +	size_t map_len;
>> +};
> 
> There is no need for making a dummy strict dso or exposing struct dso
> to this file. The code to be taken/shared from map_library just needs
> to be refactored so that the part that writes things into struct dso
> remains in dynlink.c. Or, since we're punting on fdpic support for
> this for now, a simplified version of map_library without fdpic
> support (it gets quite simple then) could perhaps just be put here for
> now, with the intent of refactoring to unify later.

There are enough relevant values here that it seems to warrant using a struct (rather than a bunch of individual pointer args), and fairly little of map_library had to be disabled (basically just the stuff dealing with TLS and GNU relro/stack stuff) to avoid accesses to other members. I suppose those portions could be factored out fairly easily, and then this minimal struct could be made a member of the dso struct? That just seemed like it'd require a fair bit more change to the existing dynlink code than this.

> 
>> +#ifndef PAGE_SIZE
>> +#define PAGE_SIZE SYSCALL_MMAP2_UNIT
>> +#endif
>> [...]
>> +static inline int crt_mprotect(void *addr, size_t len, int prot)
>> +{
>> +	size_t start, end;
>> +	start = (size_t)addr & -PAGE_SIZE;
>> +	end = (size_t)((char *)addr + len + PAGE_SIZE-1) & -PAGE_SIZE;
>> +	return __syscall(SYS_mprotect, start, end-start, prot);
>> +}
>> +#define mprotect crt_mprotect
> 
> This is wrong. mprotect does not take units of SYSCALL_MMAP2_UNIT. You
> need to get the variable page size from auxv.

Ah, I was confused because the actual mprotect implementation uses PAGE_SIZE, which is defined in the public headers as a fixed value (though looking closer, I'm not sure if it's always the same as SYSCALL_MMAP2_UNIT anyway), but in the internal headers it's an access to the __libc struct. Sure.

> 
>> +static char *crt_getenv(const char *name, char **environ)
>> +{
>> +	size_t l = crt_strchrnul(name, '=') - name;
>> +	if (l && !name[l] && environ)
>> +		for (char **e = environ; *e; e++)
>> +			if (!crt_strncmp(name, *e, l) && l[*e] == '=')
>> +				return *e + l+1;
>> +	return 0;
>> +}
> 
> I really question the use of environment here at all, both from a
> standpoint of simplicity/least-surprise, and from a standpoint of
> intended usage case. If you're on a non-musl system, the environment
> is likely to contain settings that wouldn't be appropriate to musl
> anyway, meaing you probably need to remove them for this to work at
> all, and then if you really need to force a path other than the
> builtin relative one for a particular musl-linked binary, you might as
> well just invoke ldso as a command, making the override local to that
> invocation rather than inherited across exec of other programs.

My goal is to make locating the dynamic loader work the same way as locating any other library you load, including the ability to override that path via LD_LIBRARY_PATH for testing, interception, etc. Having a dedicated var for it means you can set it and have a program and all of its child processes use an alternate loader when testing something, or as a way to adjust the loader path after build (if patchelf or similar tools don't work well for you).

> 
> If you have compelling reasons you want to search the environment,
> let's discuss it, but if it's just gratuitous "maybe it will be
> useful", let's apply YAGNI.
> 
>> +static void get_rpaths(const char **rpath, const char **runpath, const size_t *dyn)
>> +{
>> +	/* DT_STRTAB is pre-relocated for us by dlstart */
>> +	const char *strings = (void*)dyn[DT_STRTAB];
>> +
>> +	if (dyn[0] & (1 << DT_RPATH))
>> +		*rpath = strings + dyn[DT_RPATH];
>> +	if (dyn[0] & (1 << DT_RUNPATH))
>> +		*runpath = strings + dyn[DT_RUNPATH];
>> +}
> 
> musl does not honor the legacy difference in search order between
> rpath and runpath (see dynlink.c), but if wwe don't search via
> environment here it doesn't matter anyway.

It was easy enough to implement, so I did ¯\_(ツ)_/¯ but if we explicitly don't want to implement the old behavior I can remove it.

> 
>> +static size_t find_linker(char *outbuf, size_t bufsize, const char *this_path, size_t thisl, const size_t *dyn, char **environ, int secure)
>> +{
>> +	const char *paths[3] = {0}; // rpath, envpath, runpath
>> +	size_t i;
>> +	int fd;
>> +
>> +	if (thisl)
>> +		thisl--;
>> +	while (thisl > 1 && this_path[thisl] == '/')
>> +		thisl--;
>> +	while (thisl > 0 && this_path[thisl] != '/')
>> +		thisl--;
>> +
>> +	if (!secure) {
>> +		const char *envpath = crt_getenv("LD_LOADER_PATH", environ);
>> +		if (envpath) {
>> +			size_t envlen = crt_strlen(envpath);
>> +			if (envlen < bufsize) {
>> +				crt_memcpy(outbuf, envpath, envlen + 1);
>> +				return envlen + 1;
>> +			}
>> +		}
>> +	}
>> +
>> +	get_rpaths(&paths[0], &paths[2], dyn);
>> +
>> +	paths[1] = secure ? NULL : crt_getenv("LD_LIBRARY_PATH", environ);
>> +
>> +	for (i = 0; i < 3; i++) {
>> +		int relative = 0;
>> +		const char *p = paths[i];
>> +		char *o = outbuf;
>> +		if (!p)
>> +			continue;
>> +		for (;;) {
>> +			if (!crt_strncmp(p, "$ORIGIN", 7) ||
>> +					!crt_strncmp(p, "${ORIGIN}", 9)) {
>> +				relative = 1;
>> +				if (o + thisl + 1 < outbuf + bufsize) {
>> +					crt_memcpy(o, this_path, thisl);
>> +					o += thisl;
>> +				} else {
>> +					o = outbuf + bufsize - 1;
>> +				}
>> +				p += (p[1] == '{' ? 9 : 7);
>> +			} else if (*p == ':' || !*p) {
>> +#define LDSO_FILENAME "ld-musl-" LDSO_ARCH ".so.1"
>> +				relative |= outbuf[0] != '/';
>> +				if ((!secure || !relative) && o + sizeof(LDSO_FILENAME) + 1 < outbuf + bufsize) {
>> +					*o++ = '/';
>> +					crt_memcpy(o, LDSO_FILENAME, sizeof(LDSO_FILENAME));
>> +					if (!access(outbuf, R_OK | X_OK))
>> +						return (o + sizeof(LDSO_FILENAME)) - outbuf;
>> +				}
>> +				if (!*p)
>> +					break;
>> +				relative = 0;
>> +				o = outbuf;
>> +				p++;
>> +			} else {
>> +				if (o < outbuf + bufsize)
>> +					*o++ = *p;
>> +				p++;
>> +			}
>> +		}
>> +	}
>> +
>> +	// Didn't find a usable loader anywhere, so try the hardcoded path :shrug:
>> +	crt_memcpy(outbuf, LDSO_PATHNAME, sizeof(LDSO_PATHNAME));
>> +	return sizeof(LDSO_PATHNAME);
>> +}
> 
> Including fallbacks that alter behavior is probably a bad idea
> (failing preferable). If the changes in your other patch to dynlink.c
> are needed to make it work with dcrt1, then it won't necessarily even
> work to load a (possibly older) system-wide musl ldso; however if
> that's the case I Think we should fix it.

This behavior is analogous to dynlink.c's behavior when it searches rpath for a file and can't find it there. It uses "/etc/ld-musl-" LDSO_ARCH ".path" and falls back on "/lib:/usr/local/lib:/usr/lib" if that's unavailable and I'm just using a single hardcoded path, but beyond that I don't see this as significantly conceptually different. It also allows executables built with this to work on systems with musl installed even without having the vendored copy on-hand, which makes it easier to deploy a single binary that can use a vendored copy if provided, but can be packaged without it for systems with musl.

The changes aren't needed for dcrt1 when the app is run directly; they're needed when running a dcrt app (or other static PIE app) via ldso on the command line.

> 
>> +static void final_start_c(long *p)
>> +{
>> +	int argc = p[0];
>> +	char **argv = (void *)(p+1);
>> +	__libc_start_main(main, argc, argv, _init, _fini, 0);
>> +}
>> +
>> +hidden _Noreturn void __dls2(unsigned char *base, size_t *p, size_t *dyn)
>> +{
>> +	int argc = p[0];
>> +	char **argv = (void *)(p+1);
>> +	int fd;
>> +	int secure;
>> +	int prot = PROT_READ;
>> +	struct dso loader;
>> +	Ehdr *loader_hdr;
>> +	Phdr *new_hdr;
>> +	void *entry;
>> +	char this_path[2*NAME_MAX+2] = {0};
>> +	size_t thisl;
>> +	char linker_path[2*NAME_MAX+2] = {0};
> 
> 2*NAME_MAX is a fairly short arbitrary limit. Why not PATH_MAX, or a
> VLA that adapts up to PATH_MAX (to avoid expanding stack when longer
> is not needed)?

I got this limit from load_library; if we think it should be larger, we should change it in both places.
I don't think there's much use in doing VLAs here (at least, not if they're going to have fixed maximums that are reasonably low), since we're going to CRTJMP out of this anyway.

> 
>> +	size_t linker_len;
>> +	size_t i;
>> +	size_t aux[AUX_CNT];
>> +	size_t *auxv;
>> +	char **environ = argv + argc + 1;
>> +
>> +	/* Find aux vector just past environ[] and use it to initialize
>> +	* global data that may be needed before we can make syscalls. */
>> +	for (i = argc + 1; argv[i]; i++);
>> +	auxv = (void *)(argv + i + 1);
>> +	decode_vec(auxv, aux, AUX_CNT);
>> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
>> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
> 
> At this point we can just abort if secure != 0. There is unbounded
> attack surface trying to load a (possibly relative) ldso with elevated
> privileges.

No more so than dynlink.c normally has when loading other SOs. Like there, I don't follow $ORIGIN in secure mode, and additionally here I don't handle relative-to-cwd paths in secure mode. I don't see a problem with allowing a load from an absolute rpath, or from the hardcoded path, using this mechanism, though.
Basically, I'm intending for this to be a feature that you could just turn on in your linker flags for everything you build, and get the functionality in the cases where you want it, at no significant cost in those where you don't.

> 
>> +	thisl = readlink("/proc/self/exe", this_path, sizeof this_path);
> 
> We might consider whether use of AT_EXECFN (possibly with additional
> resolution of symlinks etc) would be better. It would avoid dependency
> on /proc, making it possible to run dcrt1 binaries at early boot or
> inside chroots, but does have different semantics that might be less
> (or maybe more?) desirable.
> 

Hmmmm, so it's possible to get relative paths that way (e.g. ./test-prog), but I suppose that might be fine? But I don't know how useful this would really be, since dynlink.c depends on /proc for rpath anyway.

>> +	// Copy the program headers into an anonymous mapping
>> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>> +	if (map_library_failed(new_hdr))
>> +		goto error;
> 
> Can you remind us why patched program headers are needed? I think it
> was absence of PT_PHDR or something...

Yeah, the linker doesn't add PT_PHDR when we tell it not to set a dynamic loader, and dynlink needs it.

> 
>> +	// Point it back at the original kernel-provided base
>> +	new_hdr->p_type = PT_PHDR;
>> +	new_hdr->p_vaddr = (size_t)new_hdr - (size_t)base;
>> +
>> +	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_type = PT_INTERP;
>> +	((Phdr*)((char*)new_hdr + aux[AT_PHENT]))->p_vaddr = new_hdr->p_vaddr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2);
> 
> But here you're also adding a PT_INTERP. What's the motivation for that?

It's pretty trivial to provide since we're already making a duplicate for PHDR, and it lets the loader know its own path, which might be useful for debug stuff at some point. Generally I'm trying to make this act as much like a kernel-loaded ldso as possible.

> 
>> +	crt_memcpy((char*)new_hdr + aux[AT_PHENT] * (aux[AT_PHNUM] + 2), linker_path, linker_len);
>> +
>> +	for (i = 0; i < aux[AT_PHNUM]; i++) {
>> +		Phdr *hdr = (void*)((char*)aux[AT_PHDR] + aux[AT_PHENT] * i);
>> +		Phdr *dst = (void*)((char*)new_hdr + aux[AT_PHENT] * (i + 2));
>> +		if (hdr->p_type == PT_PHDR || hdr->p_type == PT_INTERP) {
> 
> Isn't it impossible to have a PT_INTERP already here?

Normally, yeah, I'm just covering my bases so we behave predictably even if you do something weird.

> 
>> +			// Can't have a duplicate
>> +			dst->p_type = PT_NULL;
> 
> Is adding PT_NULL headers in the middle valid, or do they get
> interpreted as end of headers?

It's valid; they're just skipped over.

> 
>> +		} else {
>> +			crt_memcpy(dst, hdr, aux[AT_PHENT]);
>> +		}
>> +	}
>> +
>> +	if (mprotect(new_hdr, aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len, PROT_READ))
>> +		goto error;
>> +
>> +	for (i=0; auxv[i]; i+=2) {
>> +		if (auxv[i] == AT_BASE)
>> +			auxv[i + 1] = (size_t)loader_hdr;
>> +		if (auxv[i] == AT_PHDR)
>> +			auxv[i + 1] = (size_t)new_hdr;
>> +		if (auxv[i] == AT_PHNUM)
>> +			auxv[i + 1] += 2;
>> +	}
>> +
>> +	entry = laddr(&loader, loader_hdr->e_entry);
>> +
>> +#ifndef LD_FDPIC
>> +	/* Undo the relocations performed by dlstart */
>> +
>> +	if (NEED_MIPS_GOT_RELOCS) {
>> +		const size_t *dynv = _DYNAMIC;
>> +		size_t local_cnt = 0;
>> +		size_t *got = (void *)(base + dyn[DT_PLTGOT]);
>> +		for (i=0; dynv[i]; i+=2) if (dynv[i]==DT_MIPS_LOCAL_GOTNO)
>> +			local_cnt = dynv[i+1];
>> +		for (i=0; i<local_cnt; i++) got[i] -= (size_t)base;
>> +	}
>> +
>> +	size_t *rel = (void *)((size_t)base+dyn[DT_REL]);
>> +	size_t rel_size = dyn[DT_RELSZ];
>> +	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
>> +		if (!IS_RELATIVE(rel[1], 0)) continue;
>> +		size_t *rel_addr = (void *)((size_t)base + rel[0]);
>> +		*rel_addr -= (size_t)base;
>> +	}
>> +#endif
>> +
>> +	CRTJMP(entry, argv - 1);
>> +
>> +error:
>> +	for(;;) a_crash();
>> +}
>> diff --git a/ldso/dlstart.c b/ldso/dlstart.c
>> index 20d50f2..7ff79d6 100644
>> --- a/ldso/dlstart.c
>> +++ b/ldso/dlstart.c
>> @@ -21,7 +21,7 @@
>> hidden void _dlstart_c(size_t *sp, size_t *dynv)
>> {
>> 	size_t i, aux[AUX_CNT], dyn[DYN_CNT];
>> -	size_t *rel, rel_size, base;
>> +	size_t *rel, rel_size, base, loader_phdr;
>> 
>> 	int argc = *sp;
>> 	char **argv = (void *)(sp+1);
>> @@ -41,6 +41,13 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>> 		 * space and moving the extra fdpic arguments to the stack
>> 		 * vector where they are easily accessible from C. */
>> 		segs = ((struct fdpic_loadmap *)(sp[-1] ? sp[-1] : sp[-2]))->segs;
>> +		if (aux[AT_BASE]) {
>> +			Ehdr *eh = (void*)aux[AT_BASE];
>> +			for (i = 0; eh->e_phoff - segs[i].p_vaddr >= segs[i].p_memsz; i++);
>> +			loader_phdr = (eh->e_phoff - segs[i].p_vaddr + segs[i].addr);
>> +		} else {
>> +			loader_phdr = aux[AT_PHDR];
>> +		}
>> 	} else {
>> 		/* If dynv is null, the entry point was started from loader
>> 		 * that is not fdpic-aware. We can assume normal fixed-
>> @@ -55,6 +62,7 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>> 		segs[0].p_memsz = -1;
>> 		Ehdr *eh = (void *)base;
>> 		Phdr *ph = (void *)(base + eh->e_phoff);
>> +		loader_phdr = (size_t)ph;
>> 		size_t phnum = eh->e_phnum;
>> 		size_t phent = eh->e_phentsize;
>> 		while (phnum-- && ph->p_type != PT_DYNAMIC)
>> @@ -69,13 +77,38 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>> 
>> #if DL_FDPIC
>> 	for (i=0; i<DYN_CNT; i++) {
>> -		if (i==DT_RELASZ || i==DT_RELSZ) continue;
>> +		if (i==DT_RELASZ || i==DT_RELSZ || i==DT_RPATH || i==DT_RUNPATH) continue;
>> 		if (!dyn[i]) continue;
>> 		for (j=0; dyn[i]-segs[j].p_vaddr >= segs[j].p_memsz; j++);
>> 		dyn[i] += segs[j].addr - segs[j].p_vaddr;
>> 	}
>> 	base = 0;
>> +#else
>> +	/* If the dynamic linker is invoked as a command, its load
>> +	 * address is not available in the aux vector. Instead, compute
>> +	 * the load address as the difference between &_DYNAMIC and the
>> +	 * virtual address in the PT_DYNAMIC program header. */
>> +	base = aux[AT_BASE];
>> +	if (!base) {
>> +		size_t phnum = aux[AT_PHNUM];
>> +		size_t phentsize = aux[AT_PHENT];
>> +		Phdr *ph = (void *)aux[AT_PHDR];
>> +		for (i=phnum; i--; ph = (void *)((char *)ph + phentsize)) {
>> +			if (ph->p_type == PT_DYNAMIC) {
>> +				base = (size_t)dynv - ph->p_vaddr;
>> +				break;
>> +			}
>> +		}
>> +	}
>> +	loader_phdr = base + ((Ehdr*)base)->e_phoff;
>> +#endif
>> 
>> +#ifdef DLSTART_PROLOGUE
>> +	if (aux[AT_PHDR] != loader_phdr)
>> +		DLSTART_PROLOGUE
>> +#endif
> 
> As mentioned before, this does not work. It puts a symbol reference
> into a function that is required to be pure PIC that can run prior to
> relocations. Having the reference in a branch that's not taken does
> not help.

Ah, alright.

> 
> Instead it should be something like (pseudocode):
> 
> 	if (relocs_already_done) goto done:
> 	// ...
> done:
> 	stage2_func dls2;
> 	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
> 	dls2((void *)base, sp);
> 	
> This does not require any DLSTART_PROLOGUE macro that's specific to
> dcrt1; the exact same thing works in rcrt1 (static pie) and will also
> make it safe to invoke static pie programs via ldso (seems pointless,
> but useful when you don't realize the binary is static).

Alright, simple enough; just added a static int to __dls2 that I set to 1 before CRTJMP-ing to ldso, and had this same aux[AT_PHDR] check in dlstart goto past the relocs. It still needs an ifdef, since otherwise it'd trigger in the loader itself.

> 
> The dcrt1 code in dls2 (which can vary between ldso, rcrt1, and dcrt1)
> then has to do its own test to tell which code path it needs to take,
> but that's fine. General principle here: repeating small amounts of
> work is better than increasing dependency between stages/layers. Same
> applies to passing dyn into dls2 -- doing so prevents the call from
> being a tail call, and keeps _dlstart_c's stack frame around despite
> being done with it.

Re-parsing dynv is simple enough, but I need to get at it somehow (this function gets it from _start). I suppose I could read the _DYNAMIC symbol in C if you prefer?

The stack frame sticking around doesn't really matter, since we're going to CRTJMP out of this anyway.

> 
>> +	/* For convenience in dcrt1, we relocate STRTAB here (as with FDPIC) */
>> +	dyn[DT_STRTAB] += base;
> 
> Just do it where you need it.

It's simple enough in the non-FDPIC case; I just did this here for consistency with FDPIC, where this is significantly more complex. I can drop it if we want to handle FDPIC differently long-term.

> 
>> 	/* MIPS uses an ugly packed form for GOT relocations. Since we
>> 	 * can't make function calls yet and the code is tiny anyway,
>> @@ -144,5 +163,5 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
>> 
>> 	stage2_func dls2;
>> 	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
>> -	dls2((void *)base, sp);
>> +	dls2((void *)base, sp, dyn);
> 
> See above.


I'll hold off on re-sending this series until we've decided what to do about the questions above; once that's done, I'll apply whatever we decide on, and also write up some more useful commit messages.

Most of your comments from your other mail are addressed (or at least discussed) above; the one about rewriting auxv/phdrs is not:
- Both linux and freebsd's linux emu layer always insert entries for all the auxv entries we care about (their values just might be zero), so we don't need to worry about them potentially being missing (unless there's some other implementation we care about that I need to check; maybe Microsoft's?). The aux vector is on the stack, so we don't need to worry about it being read-only or something like that. glibc actually already rewrites values in there (when invoked directly), so there's precedent for doing so.
- We make an anonymous mapping in which to rewrite the program headers, rather than doing it in-place. The only particularly odd bit is that we take the difference between the effectively-random anonymous mapping address and the actual kernel-mapped base address, which may be negative (or very large). In practice, this works fine with musl's loader as-is (also happens to work with glibc's), and both do an unsigned subtract with no bounds check, so I think we're fine here as well.
Szabolcs Nagy April 27, 2019, 11:55 p.m.
* Rodger Combs <rodger.combs@gmail.com> [2019-04-27 17:51:17 -0500]:
> On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
> > On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
> >> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
> >> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
> > 
> > At this point we can just abort if secure != 0. There is unbounded
> > attack surface trying to load a (possibly relative) ldso with elevated
> > privileges.
> 
> No more so than dynlink.c normally has when loading other SOs. Like there, I don't follow $ORIGIN in secure mode, and additionally here I don't handle relative-to-cwd paths in secure mode. I don't see a problem with allowing a load from an absolute rpath, or from the hardcoded path, using this mechanism, though.
> Basically, I'm intending for this to be a feature that you could just turn on in your linker flags for everything you build, and get the functionality in the cases where you want it, at no significant cost in those where you don't.

i think the code should be written such that it is obvious
that user input cannot affect runtime behaviour in secure
mode in any way (in particular the loaded code).

> >> +	// Copy the program headers into an anonymous mapping
> >> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> >> +	if (map_library_failed(new_hdr))
> >> +		goto error;
> > 
> > Can you remind us why patched program headers are needed? I think it
> > was absence of PT_PHDR or something...
> 
> Yeah, the linker doesn't add PT_PHDR when we tell it not to set a dynamic loader, and dynlink needs it.

there should be a strong reason to add fake program headers.
why is PT_PHDR required?
who uses PT_INTERP?
Rodger Combs April 28, 2019, 12:16 a.m.
> On Apr 27, 2019, at 18:55, Szabolcs Nagy <nsz@port70.net> wrote:
> 
> * Rodger Combs <rodger.combs@gmail.com> [2019-04-27 17:51:17 -0500]:
>> On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
>>> On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
>>>> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
>>>> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
>>> 
>>> At this point we can just abort if secure != 0. There is unbounded
>>> attack surface trying to load a (possibly relative) ldso with elevated
>>> privileges.
>> 
>> No more so than dynlink.c normally has when loading other SOs. Like there, I don't follow $ORIGIN in secure mode, and additionally here I don't handle relative-to-cwd paths in secure mode. I don't see a problem with allowing a load from an absolute rpath, or from the hardcoded path, using this mechanism, though.
>> Basically, I'm intending for this to be a feature that you could just turn on in your linker flags for everything you build, and get the functionality in the cases where you want it, at no significant cost in those where you don't.
> 
> i think the code should be written such that it is obvious
> that user input cannot affect runtime behaviour in secure
> mode in any way (in particular the loaded code).

This is the case (CWD, the executable path, and env vars are all ignored in secure mode); if there's something you'd like changed to make that more clear, please elaborate.

> 
>>>> +	// Copy the program headers into an anonymous mapping
>>>> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>>>> +	if (map_library_failed(new_hdr))
>>>> +		goto error;
>>> 
>>> Can you remind us why patched program headers are needed? I think it
>>> was absence of PT_PHDR or something...
>> 
>> Yeah, the linker doesn't add PT_PHDR when we tell it not to set a dynamic loader, and dynlink needs it.
> 
> there should be a strong reason to add fake program headers.
> why is PT_PHDR required?
> who uses PT_INTERP?

PT_PHDR is needed for the dynamic loader to find the executable's base address.
PT_INTERP isn't currently used by musl, but it is in glibc (to find its own path, so it knows where it's loaded from for future dlopen()s and such, and potentially for debugging?), and it seems reasonable that the linker might care about it in the future, so I'm including it for potential forwards-compatibility (and also glibc compatibility), since we already need to create an entry for PHDR anyway, so it's trivial to do this as well.
Szabolcs Nagy April 28, 2019, 11:07 a.m.
* Rodger Combs <rodger.combs@gmail.com> [2019-04-27 19:16:30 -0500]:
> > On Apr 27, 2019, at 18:55, Szabolcs Nagy <nsz@port70.net> wrote:
> > * Rodger Combs <rodger.combs@gmail.com> [2019-04-27 17:51:17 -0500]:
> >> On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
> >>> On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
> >>>> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
> >>>> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
> >>> 
> >>> At this point we can just abort if secure != 0. There is unbounded
> >>> attack surface trying to load a (possibly relative) ldso with elevated
> >>> privileges.
> >> 
> >> No more so than dynlink.c normally has when loading other SOs. Like there, I don't follow $ORIGIN in secure mode, and additionally here I don't handle relative-to-cwd paths in secure mode. I don't see a problem with allowing a load from an absolute rpath, or from the hardcoded path, using this mechanism, though.
> >> Basically, I'm intending for this to be a feature that you could just turn on in your linker flags for everything you build, and get the functionality in the cases where you want it, at no significant cost in those where you don't.
> > 
> > i think the code should be written such that it is obvious
> > that user input cannot affect runtime behaviour in secure
> > mode in any way (in particular the loaded code).
> 
> This is the case (CWD, the executable path, and env vars are all ignored in secure mode); if there's something you'd like changed to make that more clear, please elaborate.

the current code does not *obviously* have the right
security properties (it does not even document the
properties it guarantees).

if an auditor has to read complex code like find_linker
to verify important security properties then it is not
obviously secure.

the original musl ldso code is already fairly complicated
and you created a dcrt1 that has more state and branching
around user input.

i suggest refactoring at least find_linker (e.g. into a
secure and a non-secure version, but there might be ways
with less code duplication) and documenting assumptions
about the secure paths (e.g. not user writable).

> >>>> +	// Copy the program headers into an anonymous mapping
> >>>> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> >>>> +	if (map_library_failed(new_hdr))
> >>>> +		goto error;
> >>> 
> >>> Can you remind us why patched program headers are needed? I think it
> >>> was absence of PT_PHDR or something...
> >> 
> >> Yeah, the linker doesn't add PT_PHDR when we tell it not to set a dynamic loader, and dynlink needs it.
> > 
> > there should be a strong reason to add fake program headers.
> > why is PT_PHDR required?
> > who uses PT_INTERP?
> 
> PT_PHDR is needed for the dynamic loader to find the executable's base address.
> PT_INTERP isn't currently used by musl, but it is in glibc (to find its own path, so it knows where it's loaded from for future dlopen()s and such, and potentially for debugging?), and it seems reasonable that the linker might care about it in the future, so I'm including it for potential forwards-compatibility (and also glibc compatibility), since we already need to create an entry for PHDR anyway, so it's trivial to do this as well.

i think base address can be found without PT_PHDR if
there is a dynamic section or with a new api between
the ldso and loader of the ldso (i'd only try the
fake phdr mapping if other options are explored and
turn out to be worse).
Rich Felker April 28, 2019, 4:12 p.m.
On Sat, Apr 27, 2019 at 05:51:17PM -0500, Rodger Combs wrote:
> > On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
> > 
> > On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
> >> diff --git a/crt/dcrt1.c b/crt/dcrt1.c
> >> new file mode 100644
> >> index 0000000..47c6dc2
> >> --- /dev/null
> >> +++ b/crt/dcrt1.c
> >> @@ -0,0 +1,362 @@
> >> +#define SYSCALL_NO_TLS
> >> +
> >> +#include <elf.h>
> >> +#include <errno.h>
> >> +#include <fcntl.h>
> >> +#include <features.h>
> >> +#include <libgen.h>
> >> +#include <sys/mman.h>
> >> +#include <string.h>
> >> +#include <unistd.h>
> >> +#include "atomic.h"
> >> +#include "dynlink.h"
> >> +#include "syscall.h"
> >> +
> >> +extern weak hidden const size_t _DYNAMIC[];
> >> +
> >> +int main();
> >> +weak void _init();
> >> +weak void _fini();
> >> +weak _Noreturn int __libc_start_main(int (*)(), int, char **,
> >> +	void (*)(), void(*)(), void(*)());
> >> +
> >> +#define DLSTART_PROLOGUE __libc_start_main(main, argc, argv, _init, _fini, 0);
> >> +
> >> +#define START "_start"
> >> +#define _dlstart_c _start_c
> >> +#include "../ldso/dlstart.c"
> >> +
> >> +struct dso {
> >> +	unsigned char *base;
> >> +	struct fdpic_loadmap *loadmap;
> >> +	size_t *dynv;
> >> +	Phdr *phdr;
> >> +	int phnum;
> >> +	size_t phentsize;
> >> +	unsigned char *map;
> >> +	size_t map_len;
> >> +};
> > 
> > There is no need for making a dummy strict dso or exposing struct dso
> > to this file. The code to be taken/shared from map_library just needs
> > to be refactored so that the part that writes things into struct dso
> > remains in dynlink.c. Or, since we're punting on fdpic support for
> > this for now, a simplified version of map_library without fdpic
> > support (it gets quite simple then) could perhaps just be put here for
> > now, with the intent of refactoring to unify later.
> 
> There are enough relevant values here that it seems to warrant using
> a struct (rather than a bunch of individual pointer args), and
> fairly little of map_library had to be disabled (basically just the
> stuff dealing with TLS and GNU relro/stack stuff) to avoid accesses
> to other members. I suppose those portions could be factored out
> fairly easily, and then this minimal struct could be made a member
> of the dso struct? That just seemed like it'd require a fair bit
> more change to the existing dynlink code than this.

I think you're not understanding what I mean by "refactor". The
majority of accesses to *dso in map_library are to do things that have
nothing to do with mapping the ELF file into memory, but instead are
just work that was put into map_library because the dynamic linker
needed something later and there was already a loop iterating over
where it could be found in map_library.

The exceptions I see are dso->loadmap (for fdpic, which is needed to
record where the individual segments were loaded), and the program
header stuff (needed to use the mapped library). That leaves the
outputs as essentially: phdr,phnum,phent,map,map_len. (base is also an
output, but can be derived from phdrs+map. For FDPIC, it's essentially
an array of map,map_len.)

The factorization I think might make sense is renaming the existing
map_library to map_elf, and reducing it to just mapping the file
according to the load segments and providing the above output.
map_library, in dynlink.c, could then call it and additionally fill in
the fields of struct dso, so that the existing callers in dynlink.c
work as-is.

I'm not sure what the ideal way to do FDPIC loadmaps here is. It's
probably something like returning a loadmap rather than a map address
on FDPIC targets.

> >> +static char *crt_getenv(const char *name, char **environ)
> >> +{
> >> +	size_t l = crt_strchrnul(name, '=') - name;
> >> +	if (l && !name[l] && environ)
> >> +		for (char **e = environ; *e; e++)
> >> +			if (!crt_strncmp(name, *e, l) && l[*e] == '=')
> >> +				return *e + l+1;
> >> +	return 0;
> >> +}
> > 
> > I really question the use of environment here at all, both from a
> > standpoint of simplicity/least-surprise, and from a standpoint of
> > intended usage case. If you're on a non-musl system, the environment
> > is likely to contain settings that wouldn't be appropriate to musl
> > anyway, meaing you probably need to remove them for this to work at
> > all, and then if you really need to force a path other than the
> > builtin relative one for a particular musl-linked binary, you might as
> > well just invoke ldso as a command, making the override local to that
> > invocation rather than inherited across exec of other programs.
> 
> My goal is to make locating the dynamic loader work the same way as
> locating any other library you load, including the ability to
> override that path via LD_LIBRARY_PATH for testing, interception,
> etc. Having a dedicated var for it means you can set it and have a
> program and all of its child processes use an alternate loader when
> testing something, or as a way to adjust the loader path after build
> (if patchelf or similar tools don't work well for you).
> 
> > 
> > If you have compelling reasons you want to search the environment,
> > let's discuss it, but if it's just gratuitous "maybe it will be
> > useful", let's apply YAGNI.
> > 
> >> +static void get_rpaths(const char **rpath, const char **runpath, const size_t *dyn)
> >> +{
> >> +	/* DT_STRTAB is pre-relocated for us by dlstart */
> >> +	const char *strings = (void*)dyn[DT_STRTAB];
> >> +
> >> +	if (dyn[0] & (1 << DT_RPATH))
> >> +		*rpath = strings + dyn[DT_RPATH];
> >> +	if (dyn[0] & (1 << DT_RUNPATH))
> >> +		*runpath = strings + dyn[DT_RUNPATH];
> >> +}
> > 
> > musl does not honor the legacy difference in search order between
> > rpath and runpath (see dynlink.c), but if wwe don't search via
> > environment here it doesn't matter anyway.
> 
> It was easy enough to implement, so I did ¯\_(ツ)_/¯ but if we
> explicitly don't want to implement the old behavior I can remove it.

Indeed, it's explicitly not wanted. The old behavior was only left
with glibc for backwards-compatibility with existing stuff using it,
but with musl we didn't have that.

> >> +static size_t find_linker(char *outbuf, size_t bufsize, const char *this_path, size_t thisl, const size_t *dyn, char **environ, int secure)
> >> +{
> >> +	const char *paths[3] = {0}; // rpath, envpath, runpath
> >> +	size_t i;
> >> +	int fd;
> >> +
> >> +	if (thisl)
> >> +		thisl--;
> >> +	while (thisl > 1 && this_path[thisl] == '/')
> >> +		thisl--;
> >> +	while (thisl > 0 && this_path[thisl] != '/')
> >> +		thisl--;
> >> +
> >> +	if (!secure) {
> >> +		const char *envpath = crt_getenv("LD_LOADER_PATH", environ);
> >> +		if (envpath) {
> >> +			size_t envlen = crt_strlen(envpath);
> >> +			if (envlen < bufsize) {
> >> +				crt_memcpy(outbuf, envpath, envlen + 1);
> >> +				return envlen + 1;
> >> +			}
> >> +		}
> >> +	}
> >> +
> >> +	get_rpaths(&paths[0], &paths[2], dyn);
> >> +
> >> +	paths[1] = secure ? NULL : crt_getenv("LD_LIBRARY_PATH", environ);
> >> +
> >> +	for (i = 0; i < 3; i++) {
> >> +		int relative = 0;
> >> +		const char *p = paths[i];
> >> +		char *o = outbuf;
> >> +		if (!p)
> >> +			continue;
> >> +		for (;;) {
> >> +			if (!crt_strncmp(p, "$ORIGIN", 7) ||
> >> +					!crt_strncmp(p, "${ORIGIN}", 9)) {
> >> +				relative = 1;
> >> +				if (o + thisl + 1 < outbuf + bufsize) {
> >> +					crt_memcpy(o, this_path, thisl);
> >> +					o += thisl;
> >> +				} else {
> >> +					o = outbuf + bufsize - 1;
> >> +				}
> >> +				p += (p[1] == '{' ? 9 : 7);
> >> +			} else if (*p == ':' || !*p) {
> >> +#define LDSO_FILENAME "ld-musl-" LDSO_ARCH ".so.1"
> >> +				relative |= outbuf[0] != '/';
> >> +				if ((!secure || !relative) && o + sizeof(LDSO_FILENAME) + 1 < outbuf + bufsize) {
> >> +					*o++ = '/';
> >> +					crt_memcpy(o, LDSO_FILENAME, sizeof(LDSO_FILENAME));
> >> +					if (!access(outbuf, R_OK | X_OK))
> >> +						return (o + sizeof(LDSO_FILENAME)) - outbuf;
> >> +				}
> >> +				if (!*p)
> >> +					break;
> >> +				relative = 0;
> >> +				o = outbuf;
> >> +				p++;
> >> +			} else {
> >> +				if (o < outbuf + bufsize)
> >> +					*o++ = *p;
> >> +				p++;
> >> +			}
> >> +		}
> >> +	}
> >> +
> >> +	// Didn't find a usable loader anywhere, so try the hardcoded path :shrug:
> >> +	crt_memcpy(outbuf, LDSO_PATHNAME, sizeof(LDSO_PATHNAME));
> >> +	return sizeof(LDSO_PATHNAME);
> >> +}
> > 
> > Including fallbacks that alter behavior is probably a bad idea
> > (failing preferable). If the changes in your other patch to dynlink.c
> > are needed to make it work with dcrt1, then it won't necessarily even
> > work to load a (possibly older) system-wide musl ldso; however if
> > that's the case I Think we should fix it.
> 
> This behavior is analogous to dynlink.c's behavior when it searches
> rpath for a file and can't find it there. It uses "/etc/ld-musl-"
> LDSO_ARCH ".path" and falls back on "/lib:/usr/local/lib:/usr/lib"
> if that's unavailable and I'm just using a single hardcoded path,
> but beyond that I don't see this as significantly conceptually
> different. It also allows executables built with this to work on
> systems with musl installed even without having the vendored copy
> on-hand, which makes it easier to deploy a single binary that can
> use a vendored copy if provided, but can be packaged without it for
> systems with musl.
> 
> The changes aren't needed for dcrt1 when the app is run directly;
> they're needed when running a dcrt app (or other static PIE app) via
> ldso on the command line.
> 
> > 
> >> +static void final_start_c(long *p)
> >> +{
> >> +	int argc = p[0];
> >> +	char **argv = (void *)(p+1);
> >> +	__libc_start_main(main, argc, argv, _init, _fini, 0);
> >> +}
> >> +
> >> +hidden _Noreturn void __dls2(unsigned char *base, size_t *p, size_t *dyn)
> >> +{
> >> +	int argc = p[0];
> >> +	char **argv = (void *)(p+1);
> >> +	int fd;
> >> +	int secure;
> >> +	int prot = PROT_READ;
> >> +	struct dso loader;
> >> +	Ehdr *loader_hdr;
> >> +	Phdr *new_hdr;
> >> +	void *entry;
> >> +	char this_path[2*NAME_MAX+2] = {0};
> >> +	size_t thisl;
> >> +	char linker_path[2*NAME_MAX+2] = {0};
> > 
> > 2*NAME_MAX is a fairly short arbitrary limit. Why not PATH_MAX, or a
> > VLA that adapts up to PATH_MAX (to avoid expanding stack when longer
> > is not needed)?
> 
> I got this limit from load_library; if we think it should be larger,
> we should change it in both places.

What I think you're missing here and in several other places where
you're citing something as analogous to what ldso does is that they're
not on equal standing. Anything that goes into crt1 is logic that's
permanently baked into every application linked with it. On the other
hand, anything in ldso can be changed in the future *with the changes
automatically picked up by any dynamic-linked application*, including
not only upstream changes but site-local changes if needed.

This also applies to things like search based on environment. Any
*policy* logic in ldso is something that can be changed if needed. Any
policy logic in crt1 is baked-in policy that can't be changed.

> I don't think there's much use in doing VLAs here (at least, not if
> they're going to have fixed maximums that are reasonably low), since
> we're going to CRTJMP out of this anyway.

The idea is just avoiding dirtying 2+ extra pages of memory at startup
for no reason. If the program never uses significant amounts of stack,
that's possibly an 8k increase in footprint for the entire process
lifetime.

> 
> > 
> >> +	size_t linker_len;
> >> +	size_t i;
> >> +	size_t aux[AUX_CNT];
> >> +	size_t *auxv;
> >> +	char **environ = argv + argc + 1;
> >> +
> >> +	/* Find aux vector just past environ[] and use it to initialize
> >> +	* global data that may be needed before we can make syscalls. */
> >> +	for (i = argc + 1; argv[i]; i++);
> >> +	auxv = (void *)(argv + i + 1);
> >> +	decode_vec(auxv, aux, AUX_CNT);
> >> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
> >> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
> > 
> > At this point we can just abort if secure != 0. There is unbounded
> > attack surface trying to load a (possibly relative) ldso with elevated
> > privileges.
> 
> No more so than dynlink.c normally has when loading other SOs. Like
> there, I don't follow $ORIGIN in secure mode, and additionally here
> I don't handle relative-to-cwd paths in secure mode. I don't see a
> problem with allowing a load from an absolute rpath, or from the
> hardcoded path, using this mechanism, though.

In that case, you're not getting any advantages over the kernel doing
the loading of the interpreter, and possibly disadvantages.

> Basically, I'm intending for this to be a feature that you could
> just turn on in your linker flags for everything you build, and get
> the functionality in the cases where you want it, at no significant
> cost in those where you don't.

But there *are* significant costs, and some of the things you want to
do make these costs much higher, and thereby significantly more
inappropriate to do globally (e.g. for a musl-based distro). These
costs are in the areas of:

- Size. Probably not relevant except to the real size purists.
- Complexity.
- Baked-in policy.
- Baked-in bug (and for suid, attack) surface.

I'm not sure yet where the right balance to be had is.

> >> +	thisl = readlink("/proc/self/exe", this_path, sizeof this_path);
> > 
> > We might consider whether use of AT_EXECFN (possibly with additional
> > resolution of symlinks etc) would be better. It would avoid dependency
> > on /proc, making it possible to run dcrt1 binaries at early boot or
> > inside chroots, but does have different semantics that might be less
> > (or maybe more?) desirable.
> > 
> 
> Hmmmm, so it's possible to get relative paths that way (e.g.
> ../test-prog), but I suppose that might be fine? But I don't know
> how useful this would really be, since dynlink.c depends on /proc
> for rpath anyway.

Yes, relative paths should be fine, but unless you resolve the final
component (filename), you end up getting a location relative to the
symlink invoked, rather than relative to the actual binary file. This
essentially makes it impossible to make executable symlinks to DNI
binaries (you'd have to ensure that you also make a symlink to the
ldso at the right relative location). So I think of AT_EXECFN were
used, we'd need to do symlink resolution on the final component
(repeating until a non-symlink was found). Note that this is also
racy, but the race is not really a problem as long as you're not doing
it for suid.

> > Instead it should be something like (pseudocode):
> > 
> > 	if (relocs_already_done) goto done:
> > 	// ...
> > done:
> > 	stage2_func dls2;
> > 	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
> > 	dls2((void *)base, sp);
> > 	
> > This does not require any DLSTART_PROLOGUE macro that's specific to
> > dcrt1; the exact same thing works in rcrt1 (static pie) and will also
> > make it safe to invoke static pie programs via ldso (seems pointless,
> > but useful when you don't realize the binary is static).
> 
> Alright, simple enough; just added a static int to __dls2 that I set
> to 1 before CRTJMP-ing to ldso,

That sounds like the perfect method. Nothing fancy or hackish.

> and had this same aux[AT_PHDR] check
> in dlstart goto past the relocs. It still needs an ifdef, since
> otherwise it'd trigger in the loader itself.

I still wonder if there's a better way to do this. I'll let you know
if I think of other ideas.

> > The dcrt1 code in dls2 (which can vary between ldso, rcrt1, and dcrt1)
> > then has to do its own test to tell which code path it needs to take,
> > but that's fine. General principle here: repeating small amounts of
> > work is better than increasing dependency between stages/layers. Same
> > applies to passing dyn into dls2 -- doing so prevents the call from
> > being a tail call, and keeps _dlstart_c's stack frame around despite
> > being done with it.
> 
> Re-parsing dynv is simple enough, but I need to get at it somehow
> (this function gets it from _start). I suppose I could read the
> _DYNAMIC symbol in C if you prefer?

Yes, that's simpler.

> The stack frame sticking around doesn't really matter, since we're
> going to CRTJMP out of this anyway.

But it happens for ldso's instanct of dlstart_c too, meaning you're
more likely to allocate an extra page during dynamic linking.

> >> +	/* For convenience in dcrt1, we relocate STRTAB here (as with FDPIC) */
> >> +	dyn[DT_STRTAB] += base;
> > 
> > Just do it where you need it.
> 
> It's simple enough in the non-FDPIC case; I just did this here for
> consistency with FDPIC, where this is significantly more complex. I
> can drop it if we want to handle FDPIC differently long-term.

I wouldn't really call it consistency -- in the FDPIC code path, it's
relocating the pointers in the dyn[] table because two of them (symbol
table address and GOT address) are needed for performing the FDPIC
relocations.

> >> 	/* MIPS uses an ugly packed form for GOT relocations. Since we
> >> 	 * can't make function calls yet and the code is tiny anyway,
> >> @@ -144,5 +163,5 @@ hidden void _dlstart_c(size_t *sp, size_t *dynv)
> >> 
> >> 	stage2_func dls2;
> >> 	GETFUNCSYM(&dls2, __dls2, base+dyn[DT_PLTGOT]);
> >> -	dls2((void *)base, sp);
> >> +	dls2((void *)base, sp, dyn);
> > 
> > See above.
> 
> I'll hold off on re-sending this series until we've decided what to
> do about the questions above; once that's done, I'll apply whatever
> we decide on, and also write up some more useful commit messages.

Sounds good.

> Most of your comments from your other mail are addressed (or at
> least discussed) above; the one about rewriting auxv/phdrs is not:
> 
> - Both linux and freebsd's linux emu layer always insert entries for
> all the auxv entries we care about (their values just might be
> zero), so we don't need to worry about them potentially being
> missing (unless there's some other implementation we care about that
> I need to check; maybe Microsoft's?). The aux vector is on the
> stack, so we don't need to worry about it being read-only or
> something like that.

Yes, my only concern was whether it's sized such that it can be done
in-place. Would be nice to confirm that it works for MS's loader too.

> glibc actually already rewrites values in there
> (when invoked directly), so there's precedent for doing so.

Very good to know. Are the changes you proposed in dynlink.c to
rewrite auxv in alignment with what glibc does?

> - We make an anonymous mapping in which to rewrite the program
> headers, rather than doing it in-place. The only particularly odd
> bit is that we take the difference between the effectively-random
> anonymous mapping address and the actual kernel-mapped base address,
> which may be negative (or very large). In practice, this works fine
> with musl's loader as-is (also happens to work with glibc's), and
> both do an unsigned subtract with no bounds check, so I think we're
> fine here as well.

Yes, I think that's fine. BTW if we had a reasonable bound on size of
program headers, we could just put the array in drct1.o's .bss and
only mmap if that bound were exceeded. It'd save PAGE_SIZE-epsilon
bytes of memory and save a syscall.

Rich
Rich Felker April 28, 2019, 4:28 p.m.
On Sun, Apr 28, 2019 at 01:07:14PM +0200, Szabolcs Nagy wrote:
> * Rodger Combs <rodger.combs@gmail.com> [2019-04-27 19:16:30 -0500]:
> > > On Apr 27, 2019, at 18:55, Szabolcs Nagy <nsz@port70.net> wrote:
> > > * Rodger Combs <rodger.combs@gmail.com> [2019-04-27 17:51:17 -0500]:
> > >> On Apr 27, 2019, at 12:19, Rich Felker <dalias@libc.org> wrote:
> > >>> On Fri, Apr 26, 2019 at 08:13:29PM -0500, Rodger Combs wrote:
> > >>>> +	secure = ((aux[0] & 0x7800) != 0x7800 || aux[AT_UID] != aux[AT_EUID]
> > >>>> +		|| aux[AT_GID] != aux[AT_EGID] || aux[AT_SECURE]);
> > >>> 
> > >>> At this point we can just abort if secure != 0. There is unbounded
> > >>> attack surface trying to load a (possibly relative) ldso with elevated
> > >>> privileges.
> > >> 
> > >> No more so than dynlink.c normally has when loading other SOs. Like there, I don't follow $ORIGIN in secure mode, and additionally here I don't handle relative-to-cwd paths in secure mode. I don't see a problem with allowing a load from an absolute rpath, or from the hardcoded path, using this mechanism, though.
> > >> Basically, I'm intending for this to be a feature that you could just turn on in your linker flags for everything you build, and get the functionality in the cases where you want it, at no significant cost in those where you don't.
> > > 
> > > i think the code should be written such that it is obvious
> > > that user input cannot affect runtime behaviour in secure
> > > mode in any way (in particular the loaded code).
> > 
> > This is the case (CWD, the executable path, and env vars are all ignored in secure mode); if there's something you'd like changed to make that more clear, please elaborate.
> 
> the current code does not *obviously* have the right
> security properties (it does not even document the
> properties it guarantees).
> 
> if an auditor has to read complex code like find_linker
> to verify important security properties then it is not
> obviously secure.
> 
> the original musl ldso code is already fairly complicated
> and you created a dcrt1 that has more state and branching
> around user input.

Right, esp. re: user input. Also note that a bug here in dcrt1 is
*unfixable* in deployed binaries (without binary-level hacking)
because it's linked into every one of them. This is contrary to the
expectation of dynamic linking that bugs in library code are isolated
to the corresponding libraries and can be fixed by upgrading the
affected library only.

> i suggest refactoring at least find_linker (e.g. into a
> secure and a non-secure version, but there might be ways
> with less code duplication) and documenting assumptions
> about the secure paths (e.g. not user writable).

I really think we should just refrain from trying to support suid with
this at all. The number of binaries that "should" be suid can be
counted on one hand (one could even argue that it's a nice round
number), and having suid fail when linked DNI is probably a "feature"
in that it would catch stuff trying to install suid binaries
unexpectedly and make you fix it.

> 
> > >>>> +	// Copy the program headers into an anonymous mapping
> > >>>> +	new_hdr = mmap(0, (aux[AT_PHENT] * (aux[AT_PHNUM] + 2) + linker_len + PAGE_SIZE - 1) & -PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> > >>>> +	if (map_library_failed(new_hdr))
> > >>>> +		goto error;
> > >>> 
> > >>> Can you remind us why patched program headers are needed? I think it
> > >>> was absence of PT_PHDR or something...
> > >> 
> > >> Yeah, the linker doesn't add PT_PHDR when we tell it not to set a dynamic loader, and dynlink needs it.
> > > 
> > > there should be a strong reason to add fake program headers.
> > > why is PT_PHDR required?
> > > who uses PT_INTERP?
> > 
> > PT_PHDR is needed for the dynamic loader to find the executable's base address.
> > PT_INTERP isn't currently used by musl, but it is in glibc (to find its own path, so it knows where it's loaded from for future dlopen()s and such, and potentially for debugging?), and it seems reasonable that the linker might care about it in the future, so I'm including it for potential forwards-compatibility (and also glibc compatibility), since we already need to create an entry for PHDR anyway, so it's trivial to do this as well.
> 
> i think base address can be found without PT_PHDR if
> there is a dynamic section

I don't think so. When the dynamic linker entry point is reached, the
*only* references is has to the main program it's supposed to execute
are AT_PHDR and AT_ENTRY. In order to find the load base of the main
program, it needs to be able to compare the mapped and ELF-vaddr
addresses for a single point in the program. Without knowing the load
base, you can't even find the main program's _DYNAMIC. The only single
point I know that you can do the comparison on is AT_PHDR vs PT_PHDR.

> or with a new api between
> the ldso and loader of the ldso (i'd only try the
> fake phdr mapping if other options are explored and
> turn out to be worse).

A new API between them is invention of a new interface boundary, which
is a very bad thing in itself, plus it precludes running DNI
executables with any existing/older ldso; they'd only work with new
ones that speak the new interface.

Ideally ld could/should be taught that "wasting" 9 words for an
"unnecessary" phdr is not a problem and that it should always emit
PT_PHDR. If this is something that might eventually get fixed in ld,
the rewriting could be made conditional on whether PT_PHDR was
initially absent; if we go that path, omitting PT_INTERP might be
desirable so that the behavior is unchanged when rewriting is not
necessary.

Rich