Message ID | 152300892570.9811.4443853396244200017.stgit@localhost.localdomain |
---|---|
State | New |
Series | "fasync: Fix deadlock between task-context and interrupt-context kill_fasync()" |
Headers | show |
diff --git a/fs/fcntl.c b/fs/fcntl.c index e611047fb904..a25130bb3cd0 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -615,9 +615,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) if (fa->fa_file != filp) continue; - spin_lock_irq(&fa->fa_lock); + write_lock_irq(&fa->fa_lock); fa->fa_file = NULL; - spin_unlock_irq(&fa->fa_lock); + write_unlock_irq(&fa->fa_lock); *fp = fa->fa_next; call_rcu(&fa->fa_rcu, fasync_free_rcu); @@ -662,13 +662,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy if (fa->fa_file != filp) continue; - spin_lock_irq(&fa->fa_lock); + write_lock_irq(&fa->fa_lock); fa->fa_fd = fd; - spin_unlock_irq(&fa->fa_lock); + write_unlock_irq(&fa->fa_lock); goto out; } - spin_lock_init(&new->fa_lock); + rwlock_init(&new->fa_lock); new->magic = FASYNC_MAGIC; new->fa_file = filp; new->fa_fd = fd; @@ -731,14 +731,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) { while (fa) { struct fown_struct *fown; - unsigned long flags; if (fa->magic != FASYNC_MAGIC) { printk(KERN_ERR "kill_fasync: bad magic number in " "fasync_struct!\n"); return; } - spin_lock_irqsave(&fa->fa_lock, flags); + read_lock(&fa->fa_lock); if (fa->fa_file) { fown = &fa->fa_file->f_owner; /* Don't send SIGURG to processes which have not set a @@ -747,7 +746,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } - spin_unlock_irqrestore(&fa->fa_lock, flags); + read_unlock(&fa->fa_lock); fa = rcu_dereference(fa->fa_next); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index a1dc3521f979..1c5144180012 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1541,7 +1541,7 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) } struct fasync_struct { - spinlock_t fa_lock; + rwlock_t fa_lock; int magic; int fa_fd; struct fasync_struct *fa_next; /* singly linked list */
On 04/06/2018 01:02 PM, Kirill Tkhai wrote: > [This also sent to ms] > > I've observed the following deadlock: > > [task 1] [task 2] [task 3] > kill_fasync() mm_update_next_owner() copy_process() > spin_lock_irqsave(&fa->fa_lock) read_lock(&tasklist_lock) write_lock_irq(&tasklist_lock) > send_sigio() <IRQ> ... > read_lock(&fown->lock) kill_fasync() ... > read_lock(&tasklist_lock) spin_lock_irqsave(&fa->fa_lock) ... > > Task 1 can't acquire read locked tasklist_lock, since there is > already task 3 expressed its wish to take the lock exclusive. > Task 2 holds the read locked lock, but it can't take the spin lock. > > Also, there is possible another deadlock (which I haven't observed): > > [task 1] [task 2] > f_getown() kill_fasync() > read_lock(&f_own->lock) spin_lock_irqsave(&fa->fa_lock,) > <IRQ> send_sigio() write_lock_irq(&f_own->lock) > kill_fasync() read_lock(&fown->lock) > spin_lock_irqsave(&fa->fa_lock,) > > Actually, we do not need exclusive fa->fa_lock in kill_fasync_rcu(), > as it guarantees fa->fa_file->f_owner integrity only. It may seem, > that it used to give a task a small possibility to receive two sequential > signals, if there are two parallel kill_fasync() callers, and task > handles the first signal fastly, but the behaviour won't become > different, since there is exclusive sighand lock in do_send_sig_info(). > > The patch converts fa_lock into rwlock_t, and this fixes two above > deadlocks, as rwlock is allowed to be taken from interrupt handler > by qrwlock design. > > https://jira.sw.ru/browse/PSBM-83102 > > Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> > Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
[This also sent to ms] I've observed the following deadlock: [task 1] [task 2] [task 3] kill_fasync() mm_update_next_owner() copy_process() spin_lock_irqsave(&fa->fa_lock) read_lock(&tasklist_lock) write_lock_irq(&tasklist_lock) send_sigio() <IRQ> ... read_lock(&fown->lock) kill_fasync() ... read_lock(&tasklist_lock) spin_lock_irqsave(&fa->fa_lock) ... Task 1 can't acquire read locked tasklist_lock, since there is already task 3 expressed its wish to take the lock exclusive. Task 2 holds the read locked lock, but it can't take the spin lock. Also, there is possible another deadlock (which I haven't observed): [task 1] [task 2] f_getown() kill_fasync() read_lock(&f_own->lock) spin_lock_irqsave(&fa->fa_lock,) <IRQ> send_sigio() write_lock_irq(&f_own->lock) kill_fasync() read_lock(&fown->lock) spin_lock_irqsave(&fa->fa_lock,) Actually, we do not need exclusive fa->fa_lock in kill_fasync_rcu(), as it guarantees fa->fa_file->f_owner integrity only. It may seem, that it used to give a task a small possibility to receive two sequential signals, if there are two parallel kill_fasync() callers, and task handles the first signal fastly, but the behaviour won't become different, since there is exclusive sighand lock in do_send_sig_info(). The patch converts fa_lock into rwlock_t, and this fixes two above deadlocks, as rwlock is allowed to be taken from interrupt handler by qrwlock design. https://jira.sw.ru/browse/PSBM-83102 Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> I used the following program for testing: #include <unistd.h> #include <stdlib.h> #include <signal.h> #include <fcntl.h> #include <errno.h> #include <stdio.h> #ifndef F_SETSIG #define F_SETSIG 10 #endif void handler(int sig) { } main() { unsigned int flags; int fd; system("echo 8 > /proc/sys/kernel/random/read_wakeup_threshold"); system("while :; do ls -R / > /dev/random 2>&1 ; echo 3 > /proc/sys/vm/drop_caches; done &"); if (signal(SIGINT, handler) < 0) { perror("Signal"); exit(1); } fd = open("/dev/random", O_RDWR); if (fd < 0) { perror("Can't open"); exit(1); } flags = FASYNC | fcntl(fd, F_GETFL); if (fcntl(fd, F_SETFL, flags) < 0) { perror("Setfl"); exit(1); } if (fcntl(fd, F_SETOWN, getpid())) { perror("Setown"); exit(1); } if (fcntl(fd, F_SETSIG, SIGINT)) { perror("Setsig"); exit(1); } while (1) sleep(100); } --- fs/fcntl.c | 15 +++++++-------- include/linux/fs.h | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-)