Status report and MT fork

Submitted by Rich Felker on Oct. 26, 2020, 12:59 a.m.

Details

Message ID 20201026005912.GJ534@brightrain.aerifal.cx
State New
Series "Status report and MT fork"
Headers show

Commit Message

Rich Felker Oct. 26, 2020, 12:59 a.m.
On Sun, Oct 25, 2020 at 08:50:29PM -0400, Rich Felker wrote:
> I just pushed a series of changes (up through 0b87551bdf) I've had
> queued for a while now, some of which had minor issues that I think
> have all been resolved now. They cover a range of bugs found in the
> process of reviewing the possibility of making fork provide a
> consistent execution environment for the child of a multithreaded
> parent, and a couple unrelated fixes.
> 
> Based on distro experience with musl 1.2.1, I'm working on getting the
> improved fork into 1.2.2. Despite the fact that 1.2.1 did not break
> anything that wasn't already broken (apps invoking UB in MT-forked
> children), prior to it most of the active breakage was hit with very
> low probability, so there were a lot of packages people *thought* were
> working, that weren't, and feedback from distros seems to be that
> getting everything working as reliably as before (even if it was
> imperfect and dangerous before) is not tractable in any reasonable
> time frame. And in particular, I'm concerned about language runtimes
> like Ruby that seem to have a contract with applications they host to
> support MT-forked children. Fixing these is not a matter of fixing a
> finite set of bugs but fixing a contract, which is likely not
> tractable.
> 
> Assuming it goes through, the change here will be far more complete
> than glibc's handling of MT-forked children, where most things other
> than malloc don't actually work, but fail sufficiently infrequently
> that they seem to work. While there are a lot of things I dislike
> about this path, one major thing I do like is that it really makes
> internal use of threads by library code (including third party libs)
> transparent to the application, rather than "transparent, until you
> use fork".
> 
> Will follow up with draft patch for testing.

Patch attached. It should suffice for testing and review of whether
there are any locks/state I overlooked. It could possibly be made less
ugly too.

Note that this does not strictly conform to past and current POSIX
that specify fork as AS-safe. POSIX-future has removed fork from the
AS-safe list, and seems to have considered its original inclusion
erroneous due to pthread_atfork and existing implementation practice.
The patch as written takes care to skip all locking in single-threaded
parents, so it does not break AS-safety property in single-threaded
programs that may have made use of it. -Dfork=_Fork can also be used
to get an AS-safe fork, but it's not equivalent to old semantics;
_Fork does not run atfork handlers. It's also possible to static-link
an alternate fork implementation that provides its own pthread_atfork
and calls _Fork, if really needed for a particular application.

Feedback from distro folks would be very helpful -- does this fix all
the packages that 1.2.1 "broke"?

Rich

Patch hide | download patch | download mbox

diff --git a/ldso/dynlink.c b/ldso/dynlink.c
index af983692..32e88508 100644
--- a/ldso/dynlink.c
+++ b/ldso/dynlink.c
@@ -21,6 +21,7 @@ 
 #include <semaphore.h>
 #include <sys/membarrier.h>
 #include "pthread_impl.h"
+#include "fork_impl.h"
 #include "libc.h"
 #include "dynlink.h"
 
@@ -1404,6 +1405,17 @@  void __libc_exit_fini()
 	}
 }
 
+void __ldso_atfork(int who)
+{
+	if (who<0) {
+		pthread_rwlock_wrlock(&lock);
+		pthread_mutex_lock(&init_fini_lock);
+	} else {
+		pthread_mutex_unlock(&init_fini_lock);
+		pthread_rwlock_unlock(&lock);
+	}
+}
+
 static struct dso **queue_ctors(struct dso *dso)
 {
 	size_t cnt, qpos, spos, i;
@@ -1462,6 +1474,12 @@  static struct dso **queue_ctors(struct dso *dso)
 	}
 	queue[qpos] = 0;
 	for (i=0; i<qpos; i++) queue[i]->mark = 0;
+	for (i=0; i<qpos; i++) if (queue[i]->ctor_visitor->tid < 0) {
+		error("State of %s is inconsistent due to multithreaded fork\n",
+			queue[i]->name);
+		free(queue);
+		if (runtime) longjmp(*rtld_fail, 1);
+	}
 
 	return queue;
 }
diff --git a/src/exit/at_quick_exit.c b/src/exit/at_quick_exit.c
index d3ce6522..e4b5d78d 100644
--- a/src/exit/at_quick_exit.c
+++ b/src/exit/at_quick_exit.c
@@ -1,12 +1,14 @@ 
 #include <stdlib.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 #define COUNT 32
 
 static void (*funcs[COUNT])(void);
 static int count;
 static volatile int lock[1];
+volatile int *const __at_quick_exit_lockptr = lock;
 
 void __funcs_on_quick_exit()
 {
diff --git a/src/exit/atexit.c b/src/exit/atexit.c
index 160d277a..2804a1d7 100644
--- a/src/exit/atexit.c
+++ b/src/exit/atexit.c
@@ -2,6 +2,7 @@ 
 #include <stdint.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 /* Ensure that at least 32 atexit handlers can be registered without malloc */
 #define COUNT 32
@@ -15,6 +16,7 @@  static struct fl
 
 static int slot;
 static volatile int lock[1];
+volatile int *const __atexit_lockptr = lock;
 
 void __funcs_on_exit()
 {
diff --git a/src/internal/fork_impl.h b/src/internal/fork_impl.h
new file mode 100644
index 00000000..0e116aae
--- /dev/null
+++ b/src/internal/fork_impl.h
@@ -0,0 +1,18 @@ 
+#include <features.h>
+
+extern hidden volatile int *const __at_quick_exit_lockptr;
+extern hidden volatile int *const __atexit_lockptr;
+extern hidden volatile int *const __dlerror_lockptr;
+extern hidden volatile int *const __gettext_lockptr;
+extern hidden volatile int *const __random_lockptr;
+extern hidden volatile int *const __sem_open_lockptr;
+extern hidden volatile int *const __stdio_ofl_lockptr;
+extern hidden volatile int *const __syslog_lockptr;
+extern hidden volatile int *const __timezone_lockptr;
+
+extern hidden volatile int *const __bump_lockptr;
+
+extern hidden volatile int *const __vmlock_lockptr;
+
+hidden void __malloc_atfork(int);
+hidden void __ldso_atfork(int);
diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c
index 3fcc7779..009beecb 100644
--- a/src/ldso/dlerror.c
+++ b/src/ldso/dlerror.c
@@ -4,6 +4,7 @@ 
 #include "pthread_impl.h"
 #include "dynlink.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 char *dlerror()
 {
@@ -19,6 +20,7 @@  char *dlerror()
 
 static volatile int freebuf_queue_lock[1];
 static void **freebuf_queue;
+volatile int *const __dlerror_lockptr = freebuf_queue_lock;
 
 void __dl_thread_cleanup(void)
 {
diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c
index 4c304393..805fe83b 100644
--- a/src/locale/dcngettext.c
+++ b/src/locale/dcngettext.c
@@ -10,6 +10,7 @@ 
 #include "atomic.h"
 #include "pleval.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 struct binding {
 	struct binding *next;
@@ -34,9 +35,11 @@  static char *gettextdir(const char *domainname, size_t *dirlen)
 	return 0;
 }
 
+static volatile int lock[1];
+volatile int *const __gettext_lockptr = lock;
+
 char *bindtextdomain(const char *domainname, const char *dirname)
 {
-	static volatile int lock[1];
 	struct binding *p, *q;
 
 	if (!domainname) return 0;
diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c
index f8931ba5..f6736ec4 100644
--- a/src/malloc/lite_malloc.c
+++ b/src/malloc/lite_malloc.c
@@ -6,6 +6,7 @@ 
 #include "libc.h"
 #include "lock.h"
 #include "syscall.h"
+#include "fork_impl.h"
 
 #define ALIGN 16
 
@@ -31,10 +32,12 @@  static int traverses_stack_p(uintptr_t old, uintptr_t new)
 	return 0;
 }
 
+static volatile int lock[1];
+volatile int *const __bump_lockptr = lock;
+
 static void *__simple_malloc(size_t n)
 {
 	static uintptr_t brk, cur, end;
-	static volatile int lock[1];
 	static unsigned mmap_step;
 	size_t align=1;
 	void *p;
diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h
index 16acd1ea..980ce396 100644
--- a/src/malloc/mallocng/glue.h
+++ b/src/malloc/mallocng/glue.h
@@ -56,7 +56,8 @@  __attribute__((__visibility__("hidden")))
 extern int __malloc_lock[1];
 
 #define LOCK_OBJ_DEF \
-int __malloc_lock[1];
+int __malloc_lock[1]; \
+void __malloc_atfork(int who) { malloc_atfork(who); }
 
 static inline void rdlock()
 {
@@ -73,5 +74,16 @@  static inline void unlock()
 static inline void upgradelock()
 {
 }
+static inline void resetlock()
+{
+	__malloc_lock[0] = 0;
+}
+
+static inline void malloc_atfork(int who)
+{
+	if (who<0) rdlock();
+	else if (who>0) resetlock();
+	else unlock();
+}
 
 #endif
diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c
index c0997ad8..8ac68a24 100644
--- a/src/malloc/oldmalloc/malloc.c
+++ b/src/malloc/oldmalloc/malloc.c
@@ -9,6 +9,7 @@ 
 #include "atomic.h"
 #include "pthread_impl.h"
 #include "malloc_impl.h"
+#include "fork_impl.h"
 
 #if defined(__GNUC__) && defined(__PIC__)
 #define inline inline __attribute__((always_inline))
@@ -527,3 +528,21 @@  void __malloc_donate(char *start, char *end)
 	c->csize = n->psize = C_INUSE | (end-start);
 	__bin_chunk(c);
 }
+
+void __malloc_atfork(int who)
+{
+	if (who<0) {
+		lock(mal.split_merge_lock);
+		for (int i=0; i<64; i++)
+			lock(mal.bins[i].lock);
+	} else if (!who) {
+		for (int i=0; i<64; i++)
+			unlock(mal.bins[i].lock);
+		unlock(mal.split_merge_lock);
+	} else {
+		for (int i=0; i<64; i++)
+			mal.bins[i].lock[0] = mal.bins[i].lock[1] = 0;
+		mal.split_merge_lock[1] = 0;
+		mal.split_merge_lock[0] = 0;
+	}
+}
diff --git a/src/misc/syslog.c b/src/misc/syslog.c
index 13d4b0a6..7dc0c1be 100644
--- a/src/misc/syslog.c
+++ b/src/misc/syslog.c
@@ -10,6 +10,7 @@ 
 #include <errno.h>
 #include <fcntl.h>
 #include "lock.h"
+#include "fork_impl.h"
 
 static volatile int lock[1];
 static char log_ident[32];
@@ -17,6 +18,7 @@  static int log_opt;
 static int log_facility = LOG_USER;
 static int log_mask = 0xff;
 static int log_fd = -1;
+volatile int *const __syslog_lockptr = lock;
 
 int setlogmask(int maskpri)
 {
diff --git a/src/prng/random.c b/src/prng/random.c
index 633a17f6..d3780fa7 100644
--- a/src/prng/random.c
+++ b/src/prng/random.c
@@ -1,6 +1,7 @@ 
 #include <stdlib.h>
 #include <stdint.h>
 #include "lock.h"
+#include "fork_impl.h"
 
 /*
 this code uses the same lagged fibonacci generator as the
@@ -23,6 +24,7 @@  static int i = 3;
 static int j = 0;
 static uint32_t *x = init+1;
 static volatile int lock[1];
+volatile int *const __random_lockptr = lock;
 
 static uint32_t lcg31(uint32_t x) {
 	return (1103515245*x + 12345) & 0x7fffffff;
diff --git a/src/process/fork.c b/src/process/fork.c
index a12da01a..ecf7f376 100644
--- a/src/process/fork.c
+++ b/src/process/fork.c
@@ -1,13 +1,81 @@ 
 #include <unistd.h>
 #include "libc.h"
+#include "lock.h"
+#include "pthread_impl.h"
+#include "fork_impl.h"
+
+static volatile int *const dummy_lockptr = 0;
+
+weak_alias(dummy_lockptr, __at_quick_exit_lockptr);
+weak_alias(dummy_lockptr, __atexit_lockptr);
+weak_alias(dummy_lockptr, __dlerror_lockptr);
+weak_alias(dummy_lockptr, __gettext_lockptr);
+weak_alias(dummy_lockptr, __random_lockptr);
+weak_alias(dummy_lockptr, __sem_open_lockptr);
+weak_alias(dummy_lockptr, __stdio_ofl_lockptr);
+weak_alias(dummy_lockptr, __syslog_lockptr);
+weak_alias(dummy_lockptr, __timezone_lockptr);
+weak_alias(dummy_lockptr, __bump_lockptr);
+
+weak_alias(dummy_lockptr, __vmlock_lockptr);
+
+static volatile int *const *const atfork_locks[] = {
+	&__at_quick_exit_lockptr,
+	&__atexit_lockptr,
+	&__dlerror_lockptr,
+	&__gettext_lockptr,
+	&__random_lockptr,
+	&__sem_open_lockptr,
+	&__stdio_ofl_lockptr,
+	&__syslog_lockptr,
+	&__timezone_lockptr,
+	&__bump_lockptr,
+};
 
 static void dummy(int x) { }
 weak_alias(dummy, __fork_handler);
+weak_alias(dummy, __malloc_atfork);
+weak_alias(dummy, __ldso_atfork);
+
+static void dummy_0(void) { }
+weak_alias(dummy_0, __tl_lock);
+weak_alias(dummy_0, __tl_unlock);
 
 pid_t fork(void)
 {
+	sigset_t set;
 	__fork_handler(-1);
+	__block_app_sigs(&set);
+	int need_locks = libc.need_locks > 0;
+	if (need_locks) {
+		__ldso_atfork(-1);
+		__inhibit_ptc();
+		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
+			if (atfork_locks[i]) LOCK(*atfork_locks[i]);
+		__malloc_atfork(-1);
+		__tl_lock();
+	}
+	pthread_t self=__pthread_self(), next=self->next;
 	pid_t ret = _Fork();
+	if (need_locks) {
+		if (!ret) {
+			for (pthread_t td=next; td!=self; td=td->next)
+				td->tid = -1;
+			if (__vmlock_lockptr) {
+				__vmlock_lockptr[0] = 0;
+				__vmlock_lockptr[1] = 0;
+			}
+		}
+		__tl_unlock();
+		__malloc_atfork(!ret);
+		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
+			if (atfork_locks[i])
+				if (ret) UNLOCK(*atfork_locks[i]);
+				else **atfork_locks[i] = 0;
+		__release_ptc();
+		__ldso_atfork(!ret);
+	}
+	__restore_sigs(&set);
 	__fork_handler(!ret);
 	return ret;
 }
diff --git a/src/stdio/ofl.c b/src/stdio/ofl.c
index f2d3215a..aad3d171 100644
--- a/src/stdio/ofl.c
+++ b/src/stdio/ofl.c
@@ -1,8 +1,10 @@ 
 #include "stdio_impl.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 static FILE *ofl_head;
 static volatile int ofl_lock[1];
+volatile int *const __stdio_ofl_lockptr = ofl_lock;
 
 FILE **__ofl_lock()
 {
diff --git a/src/thread/sem_open.c b/src/thread/sem_open.c
index de8555c5..f198056e 100644
--- a/src/thread/sem_open.c
+++ b/src/thread/sem_open.c
@@ -12,6 +12,7 @@ 
 #include <stdlib.h>
 #include <pthread.h>
 #include "lock.h"
+#include "fork_impl.h"
 
 static struct {
 	ino_t ino;
@@ -19,6 +20,7 @@  static struct {
 	int refcnt;
 } *semtab;
 static volatile int lock[1];
+volatile int *const __sem_open_lockptr = lock;
 
 #define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK)
 
diff --git a/src/thread/vmlock.c b/src/thread/vmlock.c
index 75f3cb76..fa0a8e3c 100644
--- a/src/thread/vmlock.c
+++ b/src/thread/vmlock.c
@@ -1,6 +1,8 @@ 
 #include "pthread_impl.h"
+#include "fork_impl.h"
 
 static volatile int vmlock[2];
+volatile int *const __vmlock_lockptr = vmlock;
 
 void __vm_wait()
 {
diff --git a/src/time/__tz.c b/src/time/__tz.c
index 49a7371e..72c0d58b 100644
--- a/src/time/__tz.c
+++ b/src/time/__tz.c
@@ -6,6 +6,7 @@ 
 #include <sys/mman.h>
 #include "libc.h"
 #include "lock.h"
+#include "fork_impl.h"
 
 long  __timezone = 0;
 int   __daylight = 0;
@@ -30,6 +31,7 @@  static char *old_tz = old_tz_buf;
 static size_t old_tz_size = sizeof old_tz_buf;
 
 static volatile int lock[1];
+volatile int *const __timezone_lockptr = lock;
 
 static int getint(const char **p)
 {

Comments

Rich Felker Oct. 26, 2020, 3:29 a.m.
On Sun, Oct 25, 2020 at 08:59:20PM -0400, Rich Felker wrote:
> diff --git a/ldso/dynlink.c b/ldso/dynlink.c
> index af983692..32e88508 100644
> --- a/ldso/dynlink.c
> +++ b/ldso/dynlink.c
> @@ -21,6 +21,7 @@
>  #include <semaphore.h>
>  #include <sys/membarrier.h>
>  #include "pthread_impl.h"
> +#include "fork_impl.h"
>  #include "libc.h"
>  #include "dynlink.h"
>  
> @@ -1404,6 +1405,17 @@ void __libc_exit_fini()
>  	}
>  }
>  
> +void __ldso_atfork(int who)
> +{
> +	if (who<0) {
> +		pthread_rwlock_wrlock(&lock);
> +		pthread_mutex_lock(&init_fini_lock);
> +	} else {
> +		pthread_mutex_unlock(&init_fini_lock);
> +		pthread_rwlock_unlock(&lock);
> +	}
> +}
> +
>  static struct dso **queue_ctors(struct dso *dso)
>  {
>  	size_t cnt, qpos, spos, i;
> @@ -1462,6 +1474,12 @@ static struct dso **queue_ctors(struct dso *dso)
>  	}
>  	queue[qpos] = 0;
>  	for (i=0; i<qpos; i++) queue[i]->mark = 0;
> +	for (i=0; i<qpos; i++) if (queue[i]->ctor_visitor->tid < 0) {
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Invalid access as-is, should be queue[i]->ctor_visitor && ...

> +		error("State of %s is inconsistent due to multithreaded fork\n",
> +			queue[i]->name);
> +		free(queue);
> +		if (runtime) longjmp(*rtld_fail, 1);
> +	}
>  
>  	return queue;
>  }

Rich
Érico Nogueira Oct. 26, 2020, 6:44 p.m.
On Sun Oct 25, 2020 at 8:29 PM -03, Rich Felker wrote:
> > +	for (i=0; i<qpos; i++) if (queue[i]->ctor_visitor->tid < 0) {
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> Invalid access as-is, should be queue[i]->ctor_visitor && ...
>
> > +		error("State of %s is inconsistent due to multithreaded fork\n",
> > +			queue[i]->name);
> > +		free(queue);
> > +		if (runtime) longjmp(*rtld_fail, 1);
> > +	}
> >  
> >  	return queue;
> >  }
>
> Rich

As a warning, don't install the resulting libc.so on your system without
the above fix! It segfaulted even with simple applications here.

Re. the patches, I am now able to import an image into gscan2pdf (a Perl
GTK application) - though it required building Perl with a bigger thread
stack size. With musl 1.2.1 it simply hung on a futex syscall.

Cheers,
Érico
Rich Felker Oct. 26, 2020, 7:52 p.m.
On Mon, Oct 26, 2020 at 03:44:51PM -0300, Érico Nogueira wrote:
> On Sun Oct 25, 2020 at 8:29 PM -03, Rich Felker wrote:
> > > +	for (i=0; i<qpos; i++) if (queue[i]->ctor_visitor->tid < 0) {
> > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> >
> > Invalid access as-is, should be queue[i]->ctor_visitor && ...
> >
> > > +		error("State of %s is inconsistent due to multithreaded fork\n",
> > > +			queue[i]->name);
> > > +		free(queue);
> > > +		if (runtime) longjmp(*rtld_fail, 1);
> > > +	}
> > >  
> > >  	return queue;
> > >  }
> 
> As a warning, don't install the resulting libc.so on your system without
> the above fix! It segfaulted even with simple applications here.

Yep, sorry about that. I'd rebased out an older version with
queue[i]->ctor_visitor being the tid itself rather than the pthread_t,
and hadn't retested it since adding the ->tid.

> Re. the patches, I am now able to import an image into gscan2pdf (a Perl
> GTK application) - though it required building Perl with a bigger thread
> stack size. With musl 1.2.1 it simply hung on a futex syscall.

Great to hear! Was it "working" in earlier musl though? The crash (as
discussed on irc) was clearly a stack overflow but it seems odd that
it would be newly introduced, unless it was just really borderline on
fitting before.

Rich
Érico Nogueira Oct. 26, 2020, 8:11 p.m.
On Mon Oct 26, 2020 at 12:52 PM -03, Rich Felker wrote:
> On Mon, Oct 26, 2020 at 03:44:51PM -0300, Érico Nogueira wrote:
> > Re. the patches, I am now able to import an image into gscan2pdf (a Perl
> > GTK application) - though it required building Perl with a bigger thread
> > stack size. With musl 1.2.1 it simply hung on a futex syscall.
>
> Great to hear! Was it "working" in earlier musl though? The crash (as
> discussed on irc) was clearly a stack overflow but it seems odd that
> it would be newly introduced, unless it was just really borderline on
> fitting before.
>
> Rich

I hadn't tested with any musl version but 1.2.1. A quick test with
1.1.24 shows that it didn't hang then, but hit the same segfault if
using perl without a fixed thread stack size.

The only reason the segfault seemed new to me was because the program
had never reached past the hang in the futex syscall :)

I will see if I can find the time to write some programs to torture test
the impl further, assuming that would help.

Cheers,
Érico
Rich Felker Oct. 27, 2020, 9:17 p.m.
On Sun, Oct 25, 2020 at 08:59:20PM -0400, Rich Felker wrote:
> On Sun, Oct 25, 2020 at 08:50:29PM -0400, Rich Felker wrote:
> > I just pushed a series of changes (up through 0b87551bdf) I've had
> > queued for a while now, some of which had minor issues that I think
> > have all been resolved now. They cover a range of bugs found in the
> > process of reviewing the possibility of making fork provide a
> > consistent execution environment for the child of a multithreaded
> > parent, and a couple unrelated fixes.
> > 
> > Based on distro experience with musl 1.2.1, I'm working on getting the
> > improved fork into 1.2.2. Despite the fact that 1.2.1 did not break
> > anything that wasn't already broken (apps invoking UB in MT-forked
> > children), prior to it most of the active breakage was hit with very
> > low probability, so there were a lot of packages people *thought* were
> > working, that weren't, and feedback from distros seems to be that
> > getting everything working as reliably as before (even if it was
> > imperfect and dangerous before) is not tractable in any reasonable
> > time frame. And in particular, I'm concerned about language runtimes
> > like Ruby that seem to have a contract with applications they host to
> > support MT-forked children. Fixing these is not a matter of fixing a
> > finite set of bugs but fixing a contract, which is likely not
> > tractable.
> > 
> > Assuming it goes through, the change here will be far more complete
> > than glibc's handling of MT-forked children, where most things other
> > than malloc don't actually work, but fail sufficiently infrequently
> > that they seem to work. While there are a lot of things I dislike
> > about this path, one major thing I do like is that it really makes
> > internal use of threads by library code (including third party libs)
> > transparent to the application, rather than "transparent, until you
> > use fork".
> > 
> > Will follow up with draft patch for testing.
> 
> Patch attached. It should suffice for testing and review of whether
> there are any locks/state I overlooked. It could possibly be made less
> ugly too.
> 
> Note that this does not strictly conform to past and current POSIX
> that specify fork as AS-safe. POSIX-future has removed fork from the
> AS-safe list, and seems to have considered its original inclusion
> erroneous due to pthread_atfork and existing implementation practice.
> The patch as written takes care to skip all locking in single-threaded
> parents, so it does not break AS-safety property in single-threaded
> programs that may have made use of it. -Dfork=_Fork can also be used
> to get an AS-safe fork, but it's not equivalent to old semantics;
> _Fork does not run atfork handlers. It's also possible to static-link
> an alternate fork implementation that provides its own pthread_atfork
> and calls _Fork, if really needed for a particular application.
> 
> Feedback from distro folks would be very helpful -- does this fix all
> the packages that 1.2.1 "broke"?

Another bug:

> diff --git a/src/process/fork.c b/src/process/fork.c
> index a12da01a..ecf7f376 100644
> --- a/src/process/fork.c
> +++ b/src/process/fork.c
> @@ -1,13 +1,81 @@
>  #include <unistd.h>
>  #include "libc.h"
> +#include "lock.h"
> +#include "pthread_impl.h"
> +#include "fork_impl.h"
> +
> +static volatile int *const dummy_lockptr = 0;
> +
> +weak_alias(dummy_lockptr, __at_quick_exit_lockptr);
> +weak_alias(dummy_lockptr, __atexit_lockptr);
> +weak_alias(dummy_lockptr, __dlerror_lockptr);
> +weak_alias(dummy_lockptr, __gettext_lockptr);
> +weak_alias(dummy_lockptr, __random_lockptr);
> +weak_alias(dummy_lockptr, __sem_open_lockptr);
> +weak_alias(dummy_lockptr, __stdio_ofl_lockptr);
> +weak_alias(dummy_lockptr, __syslog_lockptr);
> +weak_alias(dummy_lockptr, __timezone_lockptr);
> +weak_alias(dummy_lockptr, __bump_lockptr);
> +
> +weak_alias(dummy_lockptr, __vmlock_lockptr);
> +
> +static volatile int *const *const atfork_locks[] = {
> +	&__at_quick_exit_lockptr,
> +	&__atexit_lockptr,
> +	&__dlerror_lockptr,
> +	&__gettext_lockptr,
> +	&__random_lockptr,
> +	&__sem_open_lockptr,
> +	&__stdio_ofl_lockptr,
> +	&__syslog_lockptr,
> +	&__timezone_lockptr,
> +	&__bump_lockptr,
> +};
>  
>  static void dummy(int x) { }
>  weak_alias(dummy, __fork_handler);
> +weak_alias(dummy, __malloc_atfork);
> +weak_alias(dummy, __ldso_atfork);
> +
> +static void dummy_0(void) { }
> +weak_alias(dummy_0, __tl_lock);
> +weak_alias(dummy_0, __tl_unlock);
>  
>  pid_t fork(void)
>  {
> +	sigset_t set;
>  	__fork_handler(-1);
> +	__block_app_sigs(&set);
> +	int need_locks = libc.need_locks > 0;
> +	if (need_locks) {
> +		__ldso_atfork(-1);
> +		__inhibit_ptc();
> +		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
> +			if (atfork_locks[i]) LOCK(*atfork_locks[i]);
                            ^^^^^^^^^^^^^^^

Always non-null because it's missing a level of indirection; causes
static linked program to crash. Should be if (*atfork_locks[i]).

> +		__malloc_atfork(-1);
> +		__tl_lock();
> +	}
> +	pthread_t self=__pthread_self(), next=self->next;
>  	pid_t ret = _Fork();
> +	if (need_locks) {
> +		if (!ret) {
> +			for (pthread_t td=next; td!=self; td=td->next)
> +				td->tid = -1;
> +			if (__vmlock_lockptr) {
> +				__vmlock_lockptr[0] = 0;
> +				__vmlock_lockptr[1] = 0;
> +			}
> +		}
> +		__tl_unlock();
> +		__malloc_atfork(!ret);
> +		for (int i=0; i<sizeof atfork_locks/sizeof *atfork_locks; i++)
> +			if (atfork_locks[i])
                            ^^^^^^^^^^^^^^^

And same here.

> +				if (ret) UNLOCK(*atfork_locks[i]);
> +				else **atfork_locks[i] = 0;
> +		__release_ptc();
> +		__ldso_atfork(!ret);
> +	}
> +	__restore_sigs(&set);
>  	__fork_handler(!ret);
>  	return ret;
>  }