[Devel,3/6] proc connector: proc listeners helpers added

Submitted by Stanislav Kinsburskiy on Aug. 15, 2017, 12:42 p.m.

Details

Message ID 20170815124204.22445.36052.stgit@localhost.localdomain
State New
Series "proc connector: containerize on per-net basis"
Headers show

Commit Message

Stanislav Kinsburskiy Aug. 15, 2017, 12:42 p.m.
These are precursor helpers, which will hide all the containerization magic.

Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
---
 drivers/connector/cn_proc.c |   57 +++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index dcd993e..5fc1105 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -66,6 +66,28 @@  static inline void get_seq(__u32 *ts, int *cpu)
 	preempt_enable();
 }
 
+static int get_listeners(struct net *net)
+{
+	return atomic_read(&proc_event_num_listeners);
+}
+
+static void inc_listeners(struct net *net)
+{
+	atomic_inc(&proc_event_num_listeners);
+}
+
+static void dec_listeners(struct net *net)
+{
+	atomic_dec(&proc_event_num_listeners);
+}
+
+static struct net *task_net(struct task_struct *task)
+{
+	if (task->nsproxy && task->nsproxy->net_ns)
+		return task->nsproxy->net_ns;
+	return &init_net;
+}
+
 void proc_fork_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
@@ -73,8 +95,9 @@  void proc_fork_connector(struct task_struct *task)
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 	struct timespec ts;
 	struct task_struct *parent;
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -106,8 +129,9 @@  void proc_exec_connector(struct task_struct *task)
 	struct proc_event *ev;
 	struct timespec ts;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -134,8 +158,9 @@  void proc_id_connector(struct task_struct *task, int which_id)
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 	struct timespec ts;
 	const struct cred *cred;
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -174,8 +199,9 @@  void proc_sid_connector(struct task_struct *task)
 	struct proc_event *ev;
 	struct timespec ts;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -201,8 +227,9 @@  void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	struct proc_event *ev;
 	struct timespec ts;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -236,8 +263,9 @@  void proc_comm_connector(struct task_struct *task)
 	struct proc_event *ev;
 	struct timespec ts;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -264,8 +292,9 @@  void proc_coredump_connector(struct task_struct *task)
 	struct proc_event *ev;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 	struct timespec ts;
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -291,8 +320,9 @@  void proc_exit_connector(struct task_struct *task)
 	struct proc_event *ev;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 	struct timespec ts;
+	struct net *net = task_net(task);
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -322,14 +352,14 @@  void proc_exit_connector(struct task_struct *task)
  * values because it's not being returned via syscall return
  * mechanisms.
  */
-static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
+static void cn_proc_ack(struct net *net, int err, int rcvd_seq, int rcvd_ack)
 {
 	struct cn_msg *msg;
 	struct proc_event *ev;
 	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
 	struct timespec ts;
 
-	if (atomic_read(&proc_event_num_listeners) < 1)
+	if (get_listeners(net) < 1)
 		return;
 
 	msg = buffer_to_cn_msg(buffer);
@@ -357,6 +387,7 @@  static void cn_proc_mcast_ctl(struct cn_msg *msg,
 {
 	enum proc_cn_mcast_op *mc_op = NULL;
 	int err = 0;
+	struct net *net = nsp->sk->sk_net;
 
 	if (msg->len != sizeof(*mc_op))
 		return;
@@ -379,10 +410,10 @@  static void cn_proc_mcast_ctl(struct cn_msg *msg,
 	mc_op = (enum proc_cn_mcast_op *)msg->data;
 	switch (*mc_op) {
 	case PROC_CN_MCAST_LISTEN:
-		atomic_inc(&proc_event_num_listeners);
+		inc_listeners(net);
 		break;
 	case PROC_CN_MCAST_IGNORE:
-		atomic_dec(&proc_event_num_listeners);
+		dec_listeners(net);
 		break;
 	default:
 		err = EINVAL;
@@ -390,7 +421,7 @@  static void cn_proc_mcast_ctl(struct cn_msg *msg,
 	}
 
 out:
-	cn_proc_ack(err, msg->seq, msg->ack);
+	cn_proc_ack(net, err, msg->seq, msg->ack);
 }
 
 int __net_init cn_proc_init_net(struct net *net)

Comments

Andrey Ryabinin Aug. 15, 2017, 1:52 p.m.
On 08/15/2017 03:42 PM, Stanislav Kinsburskiy wrote:
> These are precursor helpers, which will hide all the containerization magic.
> 
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
> ---
>  drivers/connector/cn_proc.c |   57 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 44 insertions(+), 13 deletions(-)
> 

>  void proc_fork_connector(struct task_struct *task)
>  {
>  	struct cn_msg *msg;
> @@ -73,8 +95,9 @@ void proc_fork_connector(struct task_struct *task)
>  	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
>  	struct timespec ts;
>  	struct task_struct *parent;
> +	struct net *net = task_net(task);
>  
> -	if (atomic_read(&proc_event_num_listeners) < 1)
> +	if (get_listeners(net) < 1)

And now the listener on host won't receive anything, because the forked task has different net namespace.
This should remain the global counter.

Also you need to craft and send multiple messages for every listener with pid of tasks in the *listener's* pid namespace.
And same for all other connectors.
Stanislav Kinsburskiy Aug. 15, 2017, 1:58 p.m.
15.08.2017 16:52, Andrey Ryabinin пишет:
> On 08/15/2017 03:42 PM, Stanislav Kinsburskiy wrote:
>> These are precursor helpers, which will hide all the containerization magic.
>>
>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
>> ---
>>  drivers/connector/cn_proc.c |   57 +++++++++++++++++++++++++++++++++----------
>>  1 file changed, 44 insertions(+), 13 deletions(-)
>>
> 
>>  void proc_fork_connector(struct task_struct *task)
>>  {
>>  	struct cn_msg *msg;
>> @@ -73,8 +95,9 @@ void proc_fork_connector(struct task_struct *task)
>>  	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
>>  	struct timespec ts;
>>  	struct task_struct *parent;
>> +	struct net *net = task_net(task);
>>  
>> -	if (atomic_read(&proc_event_num_listeners) < 1)
>> +	if (get_listeners(net) < 1)
> 
> And now the listener on host won't receive anything, because the forked task has different net namespace.

Makes sense. Looks like get_exec_enc()->ve_netns has to be used instead.
The structure is created for containers init net anyways.

> This should remain the global counter.
> 

Why?

> Also you need to craft and send multiple messages for every listener with pid of tasks in the *listener's* pid namespace.
> And same for all other connectors.
> 

Didn't get it.
Andrey Ryabinin Aug. 15, 2017, 2:13 p.m.
On 08/15/2017 04:58 PM, Stanislav Kinsburskiy wrote:
> 
> 
> 15.08.2017 16:52, Andrey Ryabinin пишет:
>> On 08/15/2017 03:42 PM, Stanislav Kinsburskiy wrote:
>>> These are precursor helpers, which will hide all the containerization magic.
>>>
>>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
>>> ---
>>>  drivers/connector/cn_proc.c |   57 +++++++++++++++++++++++++++++++++----------
>>>  1 file changed, 44 insertions(+), 13 deletions(-)
>>>
>>
>>>  void proc_fork_connector(struct task_struct *task)
>>>  {
>>>  	struct cn_msg *msg;
>>> @@ -73,8 +95,9 @@ void proc_fork_connector(struct task_struct *task)
>>>  	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
>>>  	struct timespec ts;
>>>  	struct task_struct *parent;
>>> +	struct net *net = task_net(task);
>>>  
>>> -	if (atomic_read(&proc_event_num_listeners) < 1)
>>> +	if (get_listeners(net) < 1)
>>
>> And now the listener on host won't receive anything, because the forked task has different net namespace.
> 
> Makes sense. Looks like get_exec_enc()->ve_netns has to be used instead.
> The structure is created for containers init net anyways.
> 
>> This should remain the global counter.
>>
> 
> Why?
> 
>> Also you need to craft and send multiple messages for every listener with pid of tasks in the *listener's* pid namespace.
>> And same for all other connectors.
>>
> 
> Didn't get it.
> 

E.g. we have listener on host, i.e. with all init namespaces. And we also have listener in some container.
Let's say we have task "A" in the container which have global pid 10 and pid 3 in container.
So if "A" forks, the host listener must receive message that task 10 forked, and the containers listener must receive message that task 3 forked.
Stanislav Kinsburskiy Aug. 15, 2017, 2:20 p.m.
15.08.2017 17:13, Andrey Ryabinin пишет:
> 
> 
> On 08/15/2017 04:58 PM, Stanislav Kinsburskiy wrote:
>>
>>
>> 15.08.2017 16:52, Andrey Ryabinin пишет:
>>> On 08/15/2017 03:42 PM, Stanislav Kinsburskiy wrote:
>>>> These are precursor helpers, which will hide all the containerization magic.
>>>>
>>>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
>>>> ---
>>>>  drivers/connector/cn_proc.c |   57 +++++++++++++++++++++++++++++++++----------
>>>>  1 file changed, 44 insertions(+), 13 deletions(-)
>>>>
>>>
>>>>  void proc_fork_connector(struct task_struct *task)
>>>>  {
>>>>  	struct cn_msg *msg;
>>>> @@ -73,8 +95,9 @@ void proc_fork_connector(struct task_struct *task)
>>>>  	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
>>>>  	struct timespec ts;
>>>>  	struct task_struct *parent;
>>>> +	struct net *net = task_net(task);
>>>>  
>>>> -	if (atomic_read(&proc_event_num_listeners) < 1)
>>>> +	if (get_listeners(net) < 1)
>>>
>>> And now the listener on host won't receive anything, because the forked task has different net namespace.
>>
>> Makes sense. Looks like get_exec_enc()->ve_netns has to be used instead.
>> The structure is created for containers init net anyways.
>>
>>> This should remain the global counter.
>>>
>>
>> Why?
>>
>>> Also you need to craft and send multiple messages for every listener with pid of tasks in the *listener's* pid namespace.
>>> And same for all other connectors.
>>>
>>
>> Didn't get it.
>>
> 
> E.g. we have listener on host, i.e. with all init namespaces. And we also have listener in some container.
> Let's say we have task "A" in the container which have global pid 10 and pid 3 in container.
> So if "A" forks, the host listener must receive message that task 10 forked, and the containers listener must receive message that task 3 forked.
> 

Hmm. This also makes sense. But painful.
Sending one message for both ve#0 and CT initial pid namespaces should be enough, what do you think?
Andrey Ryabinin Aug. 15, 2017, 2:40 p.m.
On 08/15/2017 05:20 PM, Stanislav Kinsburskiy wrote:

>>>
>>
>> E.g. we have listener on host, i.e. with all init namespaces. And we also have listener in some container.
>> Let's say we have task "A" in the container which have global pid 10 and pid 3 in container.
>> So if "A" forks, the host listener must receive message that task 10 forked, and the containers listener must receive message that task 3 forked.
>>
> 
> Hmm. This also makes sense. But painful.
> Sending one message for both ve#0 and CT initial pid namespaces should be enough, what do you think?
> 

Well I'd prefer to make this right instead. Also it seems doable without ve craft.

I think we need to keep some list of listeners. In proc_fork_connector() iterate through this list
and call  task_pid_nr_ns(task, listener->pid_ns); for every listener. If it returned nothing than we should
skip that listener, and if task_pid_nr_ns() returned something craft and send the message to the listener->net_ns.

Obviously, something similar has to be for other connectors.