Planned robust mutex internals changes

Submitted by Rich Felker on Sept. 5, 2018, 3:09 a.m.

Details

Message ID 20180905030932.GG1878@brightrain.aerifal.cx
State New
Series "Planned robust mutex internals changes"
Headers show

Commit Message

Rich Felker Sept. 5, 2018, 3:09 a.m.
On Fri, Aug 31, 2018 at 12:45:12PM -0400, Rich Felker wrote:
> Recent debugging with Adelie Linux & POSIX conformance tests gave me a
> bit of a scare about the safety of the current robust mutex
> implementation. It turned out the be completely unrelated (kernel
> problem) but it did remind me that there's some mildly sketchy stuff
> going on right now.
> 
> In order to implement ENOTRECOVERABLE, the current implementation
> changes a bit of the mutex type field to indicate that it's recovered
> after EOWNERDEAD and will go into ENOTRECOVERABLE state if
> pthread_mutex_consistent is not called before unlocking. While it's
> only the thread that holds the lock that needs access to this
> information (except possibly for the same of pthread_mutex_consistent
> choosing between EINVAL and EPERM for erroneous calls), the change to
> the type field is formally a data race with all other threads that
> perform any operation on the mutex. No individual bits race, and no
> write races are possible, so things are "okay" in some sense, but it's
> still not good.
> 
> What I plan to do is move this state to the mutex owner/lock field,
> which should be the only field of the object (aside from waiter count)
> that's mutable.
> 
> Currently, the lock field looks like this:
> 
> bits 0-29:  owner; 0 if unlocked or if owned died; -1 if unrecoverable
> bit 30:     owner died flag, also set if unrecoverable; otherwise 0
> bit 31:     new waiters flag
> 
> Ignoring bit 31, this means the possible values are 0 (unlocked), a
> tid for the owner, 0x40000000 after owner died, and 0x7fffffff when
> unrecoverable.
> 
> What I'd like too do is use 0x40000000|tid as the value for when the
> lock is held but pthread_mutex_consistent hasn't yet been called. Note
> that at the kernel level, bit 29 (0x20000000) is reserved not to be
> used as part of a tid, so this does not overlap with the special value
> 0x7fffffff. And there's still some room for extensibility, since we
> could use bit 31 along with values that don't indicate a mutex state
> that can be waited on.
> 
> With these changes, no fields of mutex object will be mutable except
> for the lock state (owner, futex) and the waiter count.
> 
> Anyway I'm posting this now as notes/reminder and for comments in case
> others see a problem with the proposal or ideas for improvement. I'll
> probably hold off on doing any of this until after release.

And here's the proposed patch.

Rich

Patch hide | download patch | download mbox

diff --git a/src/thread/pthread_mutex_consistent.c b/src/thread/pthread_mutex_consistent.c
index 96b83b5..27c74e5 100644
--- a/src/thread/pthread_mutex_consistent.c
+++ b/src/thread/pthread_mutex_consistent.c
@@ -1,10 +1,14 @@ 
 #include "pthread_impl.h"
+#include "atomic.h"
 
 int pthread_mutex_consistent(pthread_mutex_t *m)
 {
-	if (!(m->_m_type & 8)) return EINVAL;
-	if ((m->_m_lock & 0x7fffffff) != __pthread_self()->tid)
+	int old = m->_m_lock;
+	int own = old & 0x3fffffff;
+	if (!(m->_m_type & 4) || !own || !(old & 0x40000000))
+		return EINVAL;
+	if (own != __pthread_self()->tid)
 		return EPERM;
-	m->_m_type &= ~8U;
+	a_and(&m->_m_lock, ~0x40000000);
 	return 0;
 }
diff --git a/src/thread/pthread_mutex_timedlock.c b/src/thread/pthread_mutex_timedlock.c
index d2bd196..02c2ff7 100644
--- a/src/thread/pthread_mutex_timedlock.c
+++ b/src/thread/pthread_mutex_timedlock.c
@@ -18,10 +18,12 @@  int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec
 	while (spins-- && m->_m_lock && !m->_m_waiters) a_spin();
 
 	while ((r=__pthread_mutex_trylock(m)) == EBUSY) {
-		if (!(r=m->_m_lock) || ((r&0x40000000) && (type&4)))
+		r = m->_m_lock;
+		int own = r & 0x3fffffff;
+		if (!own && (!r || (type&4)))
 			continue;
 		if ((type&3) == PTHREAD_MUTEX_ERRORCHECK
-		 && (r&0x7fffffff) == __pthread_self()->tid)
+		    && own == __pthread_self()->tid)
 			return EDEADLK;
 
 		a_inc(&m->_m_waiters);
diff --git a/src/thread/pthread_mutex_trylock.c b/src/thread/pthread_mutex_trylock.c
index 783ca0c..3fe5912 100644
--- a/src/thread/pthread_mutex_trylock.c
+++ b/src/thread/pthread_mutex_trylock.c
@@ -8,14 +8,14 @@  int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 	int tid = self->tid;
 
 	old = m->_m_lock;
-	own = old & 0x7fffffff;
+	own = old & 0x3fffffff;
 	if (own == tid && (type&3) == PTHREAD_MUTEX_RECURSIVE) {
 		if ((unsigned)m->_m_count >= INT_MAX) return EAGAIN;
 		m->_m_count++;
 		return 0;
 	}
-	if (own == 0x7fffffff) return ENOTRECOVERABLE;
-	if (own && (!(own & 0x40000000) || !(type & 4))) return EBUSY;
+	if (own == 0x3fffffff) return ENOTRECOVERABLE;
+	if (own || (old && !(type & 4))) return EBUSY;
 
 	if (m->_m_type & 128) {
 		if (!self->robust_list.off) {
@@ -25,6 +25,7 @@  int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 		if (m->_m_waiters) tid |= 0x80000000;
 		self->robust_list.pending = &m->_m_next;
 	}
+	tid |= old & 0x40000000;
 
 	if (a_cas(&m->_m_lock, old, tid) != old) {
 		self->robust_list.pending = 0;
@@ -39,9 +40,8 @@  int __pthread_mutex_trylock_owner(pthread_mutex_t *m)
 	self->robust_list.head = &m->_m_next;
 	self->robust_list.pending = 0;
 
-	if (own) {
+	if (old) {
 		m->_m_count = 0;
-		m->_m_type |= 8;
 		return EOWNERDEAD;
 	}
 
diff --git a/src/thread/pthread_mutex_unlock.c b/src/thread/pthread_mutex_unlock.c
index 7dd00d2..ea9f54d 100644
--- a/src/thread/pthread_mutex_unlock.c
+++ b/src/thread/pthread_mutex_unlock.c
@@ -7,13 +7,18 @@  int __pthread_mutex_unlock(pthread_mutex_t *m)
 	int cont;
 	int type = m->_m_type & 15;
 	int priv = (m->_m_type & 128) ^ 128;
+	int new = 0;
 
 	if (type != PTHREAD_MUTEX_NORMAL) {
 		self = __pthread_self();
-		if ((m->_m_lock&0x7fffffff) != self->tid)
+		int old = m->_m_lock;
+		int own = old & 0x3fffffff;
+		if (own != self->tid)
 			return EPERM;
 		if ((type&3) == PTHREAD_MUTEX_RECURSIVE && m->_m_count)
 			return m->_m_count--, 0;
+		if ((type&4) && (old&0x40000000))
+			new = 0x7fffffff;
 		if (!priv) {
 			self->robust_list.pending = &m->_m_next;
 			__vm_lock();
@@ -24,7 +29,7 @@  int __pthread_mutex_unlock(pthread_mutex_t *m)
 		if (next != &self->robust_list.head) *(volatile void *volatile *)
 			((char *)next - sizeof(void *)) = prev;
 	}
-	cont = a_swap(&m->_m_lock, (type & 8) ? 0x7fffffff : 0);
+	cont = a_swap(&m->_m_lock, new);
 	if (type != PTHREAD_MUTEX_NORMAL && !priv) {
 		self->robust_list.pending = 0;
 		__vm_unlock();