[RHEL7,COMMIT] ms/fork: Have new threads join on-going signal group stops

Submitted by Vasily Averin on Dec. 30, 2020, 10:27 a.m.

Details

Message ID 202012301027.0BUARBgk029252@vz7build.vvs.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Vasily Averin Dec. 30, 2020, 10:27 a.m.
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.7
------>
commit 056f2c6a7c8ee0430d60e6026bcaeeda5cfa38b4
Author: Eric W. Biederman <ebiederm@xmission.com>
Date:   Wed Dec 30 13:27:11 2020 +0300

    ms/fork: Have new threads join on-going signal group stops
    
    There are only two signals that are delivered to every member of a
    signal group: SIGSTOP and SIGKILL.  Signal delivery requires every
    signal appear to be delivered either before or after a clone syscall.
    SIGKILL terminates the clone so does not need to be considered.  Which
    leaves only SIGSTOP that needs to be considered when creating new
    threads.
    
    Today in the event of a group stop TIF_SIGPENDING will get set and the
    fork will restart ensuring the fork syscall participates in the group
    stop.
    
    A fork (especially of a process with a lot of memory) is one of the
    most expensive system so we really only want to restart a fork when
    necessary.
    
    It is easy so check to see if a SIGSTOP is ongoing and have the new
    thread join it immediate after the clone completes.  Making it appear
    the clone completed happened just before the SIGSTOP.
    
    The calculate_sigpending function will see the bits set in jobctl and
    set TIF_SIGPENDING to ensure the new task takes the slow path to userspace.
    
    V2: The call to task_join_group_stop was moved before the new task is
        added to the thread group list.  This should not matter as
        sighand->siglock is held over both the addition of the threads,
        the call to task_join_group_stop and do_signal_stop.  But the change
        is trivial and it is one less thing to worry about when reading
        the code.
    
    Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
    (cherry-picked from commit 924de3b8c9410c404c6eda7abffd282b97b3ff7f)
    VvS: minor context changes, added include linux/sched/signal.h to kernel/fork.c
    https://jira.sw.ru/browse/PSBM-123088
    Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 include/linux/sched/signal.h |  2 ++
 kernel/fork.c                | 28 ++++++++++++++++------------
 kernel/signal.c              | 14 ++++++++++++++
 3 files changed, 32 insertions(+), 12 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index da69cd0..5e88abf2 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -2,5 +2,7 @@ 
 #define _LINUX_SCHED_SIGNAL_H
 
 #include <linux/sched.h>
+void task_join_group_stop(struct task_struct *task);
+
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index b6a5279..7ed142f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -79,6 +79,7 @@ 
 #include <linux/aio.h>
 #include <linux/hmm.h>
 #include <linux/compiler.h>
+#include <linux/sched/signal.h>
 #ifndef __GENKSYMS__
 #include <linux/user_namespace.h>
 #endif
@@ -1703,18 +1704,20 @@  static struct task_struct *copy_process(unsigned long clone_flags,
 	 */
 	copy_seccomp(p);
 
-	/*
-	 * Process group and session signals need to be delivered to just the
-	 * parent before the fork or both the parent and the child after the
-	 * fork. Restart if a signal comes in before we add the new process to
-	 * it's process group.
-	 * A fatal signal pending means that current will exit, so the new
-	 * thread can't slip out of an OOM kill (or normal SIGKILL).
-	*/
-	recalc_sigpending();
-	if (signal_pending(current)) {
-		retval = -ERESTARTNOINTR;
-		goto bad_fork_cancel_cgroup;
+	if (!(clone_flags & CLONE_THREAD)) {
+		/*
+		 * Process group and session signals need to be delivered to just the
+		 * parent before the fork or both the parent and the child after the
+		 * fork. Restart if a signal comes in before we add the new process to
+		 * it's process group.
+		 * A fatal signal pending means that current will exit, so the new
+		 * thread can't slip out of an OOM kill (or normal SIGKILL).
+		 */
+		recalc_sigpending();
+		if (signal_pending(current)) {
+			retval = -ERESTARTNOINTR;
+			goto bad_fork_cancel_cgroup;
+		}
 	}
 	if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
 		retval = -ENOMEM;
@@ -1752,6 +1755,7 @@  static struct task_struct *copy_process(unsigned long clone_flags,
 			current->signal->nr_threads++;
 			atomic_inc(&current->signal->live);
 			atomic_inc(&current->signal->sigcnt);
+			task_join_group_stop(p);
 			list_add_tail_rcu(&p->thread_group,
 					  &p->group_leader->thread_group);
 			list_add_tail_rcu(&p->thread_node,
diff --git a/kernel/signal.c b/kernel/signal.c
index d37b108..85b4b89 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -374,6 +374,20 @@  static bool task_participate_group_stop(struct task_struct *task)
 	return false;
 }
 
+void task_join_group_stop(struct task_struct *task)
+{
+	/* Have the new thread join an on-going signal group stop */
+	unsigned long jobctl = current->jobctl;
+	if (jobctl & JOBCTL_STOP_PENDING) {
+		struct signal_struct *sig = current->signal;
+		unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
+		unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+		if (task_set_jobctl_pending(task, signr | gstop)) {
+			sig->group_stop_count++;
+		}
+	}
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an