[1/1] userns: Fix/clarify memory ordering

Submitted by Christian Brauner on Nov. 2, 2017, 11:03 a.m.

Details

Message ID 20171102110344.31647-2-christian.brauner@ubuntu.com
State New
Series "userns: Fix/clarify memory ordering"
Headers show

Commit Message

Christian Brauner Nov. 2, 2017, 11:03 a.m.
Nikolay noticed a number of undocumented memory barriers in this code;
the ordering is fairly simple but not explicitly described. Cure that.

Switch over to smp_store_release() / smp_load_acquire() as that is the
natural fit for the pattern and includes the missing but required
WRITE_ONCE()/READ_ONCE()s.

CC: Eric Biederman <ebiederm@xmission.com>
Cc: Linux Containers <containers@lists.linux-foundation.org>
Reported-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 kernel/user_namespace.c | 74 +++++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 27 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 899c31060ff3..2129762a930e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -27,8 +27,47 @@ 
 #include <linux/sort.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
+
+/**
+ * The userns_state_mutex serializes all writes to any given map.
+ *
+ * Any map is only ever written once.
+ *
+ * An id map fits within 1 cache line on most architectures.
+ */
 static DEFINE_MUTEX(userns_state_mutex);
 
+/**
+ * There is a data dependency between reading the count of the extents and the
+ * values of the extents. The desired behavior is to see the values of the
+ * extents that were written before the count of the extents.
+ *
+ * To achieve this smp_store_release() is used to guarantee the write order and
+ * smp_load_acquire() is guaranteed that we observe the written data.
+ */
+static inline void map_store_extents(struct uid_gid_map *map,
+				     unsigned int extents)
+{
+	/*
+	 * Ensure the map->extent[] stores happen-before we grow map->nr_extents
+	 * to cover it.
+	 *
+	 * Matches the load_acquire in map_load_extents().
+	 */
+	smp_store_release(&map->nr_extents, extents);
+}
+
+static inline unsigned int map_load_extents(struct uid_gid_map *map)
+{
+	/*
+	 * Ensure the map->nr_extents load happens-before we try and access
+	 * map->extent[], such that we guarantee the data is in fact there.
+	 *
+	 * Matches the store-release in map_store_extents().
+	 */
+	return smp_load_acquire(&map->nr_extents);
+}
+
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
@@ -296,9 +335,9 @@  map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 co
 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
 {
 	struct uid_gid_extent *extent;
-	unsigned extents = map->nr_extents;
-	smp_rmb();
+	unsigned extents;
 
+	extents = map_load_extents(map);
 	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
 		extent = map_id_range_down_base(extents, map, id, count);
 	else
@@ -359,9 +398,9 @@  map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
 static u32 map_id_up(struct uid_gid_map *map, u32 id)
 {
 	struct uid_gid_extent *extent;
-	unsigned extents = map->nr_extents;
-	smp_rmb();
+	unsigned extents;
 
+	extents = map_load_extents(map);
 	if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
 		extent = map_id_up_base(extents, map, id);
 	else
@@ -647,9 +686,9 @@  static void *m_start(struct seq_file *seq, loff_t *ppos,
 		     struct uid_gid_map *map)
 {
 	loff_t pos = *ppos;
-	unsigned extents = map->nr_extents;
-	smp_rmb();
+	unsigned extents;
 
+	extents = map_load_extents(map);
 	if (pos >= extents)
 		return NULL;
 
@@ -860,25 +899,6 @@  static ssize_t map_write(struct file *file, const char __user *buf,
 	char *kbuf = NULL, *pos, *next_line;
 	ssize_t ret = -EINVAL;
 
-	/*
-	 * The userns_state_mutex serializes all writes to any given map.
-	 *
-	 * Any map is only ever written once.
-	 *
-	 * An id map fits within 1 cache line on most architectures.
-	 *
-	 * On read nothing needs to be done unless you are on an
-	 * architecture with a crazy cache coherency model like alpha.
-	 *
-	 * There is a one time data dependency between reading the
-	 * count of the extents and the values of the extents.  The
-	 * desired behavior is to see the values of the extents that
-	 * were written before the count of the extents.
-	 *
-	 * To achieve this smp_wmb() is used on guarantee the write
-	 * order and smp_rmb() is guaranteed that we don't have crazy
-	 * architectures returning stale data.
-	 */
 	mutex_lock(&userns_state_mutex);
 
 	memset(&new_map, 0, sizeof(struct uid_gid_map));
@@ -1015,8 +1035,8 @@  static ssize_t map_write(struct file *file, const char __user *buf,
 		map->forward = new_map.forward;
 		map->reverse = new_map.reverse;
 	}
-	smp_wmb();
-	map->nr_extents = new_map.nr_extents;
+
+	map_store_extents(map, new_map.nr_extents);
 
 	*ppos = count;
 	ret = count;