[RHEL7,COMMIT] ms/packet: fix bitfield update race

Submitted by Vasily Averin on Dec. 3, 2020, 9:31 a.m.

Details

Message ID 202012030931.0B39VWZk006641@vz7build.vvs.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Vasily Averin Dec. 3, 2020, 9:31 a.m.
The commit is pushed to "branch-rh7-3.10.0-1160.6.1.vz7.171.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.6.1.vz7.171.1
------>
commit 54b1fa63ec25f8831894436420966fe91f90abc0
Author: Willem de Bruijn <willemb@google.com>
Date:   Thu Dec 3 12:31:32 2020 +0300

    ms/packet: fix bitfield update race
    
    Updates to the bitfields in struct packet_sock are not atomic.
    Serialize these read-modify-write cycles.
    
    Move po->running into a separate variable. Its writes are protected by
    po->bind_lock (except for one startup case at packet_create). Also
    replace a textual precondition warning with lockdep annotation.
    
    All others are set only in packet_setsockopt. Serialize these
    updates by holding the socket lock. Analogous to other field updates,
    also hold the lock when testing whether a ring is active (pg_vec).
    
    Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg")
    Reported-by: DaeRyong Jeong <threeearcat@gmail.com>
    Reported-by: Byoungyoung Lee <byoungyoung@purdue.edu>
    Signed-off-by: Willem de Bruijn <willemb@google.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>
    
    (cherry-picked from commit a6361f0ca4b25460f2cdf3235ebe8115f622901e)
    https://syzkaller.appspot.com/bug?id=8acde8a70c4bca0d0c39e975409c1008663c93fd
    https://jira.sw.ru/browse/PSBM-123046
    Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 net/packet/af_packet.c | 60 ++++++++++++++++++++++++++++++++++++--------------
 net/packet/internal.h  | 10 ++++-----
 2 files changed, 49 insertions(+), 21 deletions(-)

Patch hide | download patch | download mbox

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 112c856..b71157c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -261,11 +261,11 @@  static void packet_cached_dev_reset(struct packet_sock *po)
 	RCU_INIT_POINTER(po->cached_dev, NULL);
 }
 
-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
  */
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
 {
 	struct packet_sock *po = pkt_sk(sk);
 
@@ -280,8 +280,13 @@  static void register_prot_hook(struct sock *sk)
 	}
 }
 
-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held.   If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+	lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+	__register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
  * the po->bind_lock and do a synchronize_net to make sure no
  * asynchronous packet processing paths still refer to the elements
  * of po->prot_hook.  If the sync parameter is false, it is the
@@ -291,6 +296,8 @@  static void __unregister_prot_hook(struct sock *sk, bool sync)
 {
 	struct packet_sock *po = pkt_sk(sk);
 
+	lockdep_assert_held_once(&po->bind_lock);
+
 	po->running = 0;
 
 	if (po->fanout)
@@ -2897,7 +2904,7 @@  static int packet_create(struct net *net, struct socket *sock, int protocol,
 
 	if (proto) {
 		po->prot_hook.type = proto;
-		register_prot_hook(sk);
+		__register_prot_hook(sk);
 	}
 
 	mutex_lock(&net->packet.sklist_lock);
@@ -3440,12 +3447,18 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
-		po->tp_loss = !!val;
-		return 0;
+
+		lock_sock(sk);
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+			ret = -EBUSY;
+		} else {
+			po->tp_loss = !!val;
+			ret = 0;
+		}
+		release_sock(sk);
+		return ret;
 	}
 	case PACKET_AUXDATA:
 	{
@@ -3456,7 +3469,9 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 
+		lock_sock(sk);
 		po->auxdata = !!val;
+		release_sock(sk);
 		return 0;
 	}
 	case PACKET_ORIGDEV:
@@ -3468,7 +3483,9 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 
+		lock_sock(sk);
 		po->origdev = !!val;
+		release_sock(sk);
 		return 0;
 	}
 	case PACKET_VNET_HDR:
@@ -3477,15 +3494,20 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (sock->type != SOCK_RAW)
 			return -EINVAL;
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-			return -EBUSY;
 		if (optlen < sizeof(val))
 			return -EINVAL;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
 
-		po->has_vnet_hdr = !!val;
-		return 0;
+		lock_sock(sk);
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+			ret = -EBUSY;
+		} else {
+			po->has_vnet_hdr = !!val;
+			ret = 0;
+		}
+		release_sock(sk);
+		return ret;
 	}
 	case PACKET_TIMESTAMP:
 	{
@@ -3516,11 +3538,17 @@  packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen != sizeof(val))
 			return -EINVAL;
-		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
-			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
-		po->tp_tx_has_off = !!val;
+
+		lock_sock(sk);
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+			ret = -EBUSY;
+		} else {
+			po->tp_tx_has_off = !!val;
+			ret = 0;
+		}
+		release_sock(sk);
 		return 0;
 	}
 	default:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 0ecefd4..9408dbf 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -98,10 +98,12 @@  struct packet_sock {
 	int			copy_thresh;
 	spinlock_t		bind_lock;
 	struct mutex		pg_vec_lock;
-	unsigned int		running:1,	/* prot_hook is attached*/
-				auxdata:1,
+	unsigned int		running;	/* bind_lock must be held */
+	unsigned int		auxdata:1,	/* writer must hold sock lock */
 				origdev:1,
-				has_vnet_hdr:1;
+				has_vnet_hdr:1,
+				tp_loss:1,
+				tp_tx_has_off:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 	struct packet_mclist	*mclist;
@@ -109,8 +111,6 @@  struct packet_sock {
 	enum tpacket_versions	tp_version;
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
-	unsigned int		tp_loss:1;
-	unsigned int		tp_tx_has_off:1;
 	unsigned int		tp_tstamp;
 	struct net_device __rcu	*cached_dev;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;