[PATCHv5] locks: Filter /proc/locks output on proc pid ns

Submitted by Nikolay Borisov on Aug. 5, 2016, 7:30 a.m.

Details

Message ID 1470382204-21480-1-git-send-email-kernel@kyup.com
State New
Series "locks: Show only file_locks created in the same pidns as current process"
Headers show

Commit Message

Nikolay Borisov Aug. 5, 2016, 7:30 a.m.
On busy container servers reading /proc/locks shows all the locks
created by all clients. This can cause large latency spikes. In my
case I observed lsof taking up to 5-10 seconds while processing around
50k locks. Fix this by limiting the locks shown only to those created
in the same pidns as the one the proc fs was mounted in. When reading
/proc/locks from the init_pid_ns proc instance then perform no
filtering

Signed-off-by: Nikolay Borisov <kernel@kyup.com>
Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/locks.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..484b7e106076 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2574,9 +2574,19 @@  static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 	struct inode *inode = NULL;
 	unsigned int fl_pid;
 
-	if (fl->fl_nspid)
-		fl_pid = pid_vnr(fl->fl_nspid);
-	else
+	if (fl->fl_nspid) {
+		struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+
+		/* Don't let fl_pid change depending on who is reading the file */
+		fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
+
+		/* If there isn't a fl_pid don't display who is waiting on the lock
+		 * if we are called from locks_show, or if we are called from
+		 * __show_fd_info - skip lock entirely
+		 */
+		if (fl_pid == 0)
+			return;
+	} else
 		fl_pid = fl->fl_pid;
 
 	if (fl->fl_file != NULL)
@@ -2648,9 +2658,13 @@  static int locks_show(struct seq_file *f, void *v)
 {
 	struct locks_iterator *iter = f->private;
 	struct file_lock *fl, *bfl;
+	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
 
 	fl = hlist_entry(v, struct file_lock, fl_link);
 
+	if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
+		return 0;
+
 	lock_get_status(f, fl, iter->li_pos, "");
 
 	list_for_each_entry(bfl, &fl->fl_block, fl_block)

Comments

Jeff Layton Aug. 5, 2016, 10:47 a.m.
On Fri, 2016-08-05 at 10:30 +0300, Nikolay Borisov wrote:
> On busy container servers reading /proc/locks shows all the locks
> created by all clients. This can cause large latency spikes. In my
> case I observed lsof taking up to 5-10 seconds while processing around
> 50k locks. Fix this by limiting the locks shown only to those created
> in the same pidns as the one the proc fs was mounted in. When reading
> /proc/locks from the init_pid_ns proc instance then perform no
> filtering
> 
> > Signed-off-by: Nikolay Borisov <kernel@kyup.com>
> > Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  fs/locks.c | 20 +++++++++++++++++---
>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index ee1b15f6fc13..484b7e106076 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -2574,9 +2574,19 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
> >  	struct inode *inode = NULL;
> >  	unsigned int fl_pid;
>  
> > -	if (fl->fl_nspid)
> > -		fl_pid = pid_vnr(fl->fl_nspid);
> > -	else
> > +	if (fl->fl_nspid) {
> > +		struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
> +
> > +		/* Don't let fl_pid change depending on who is reading the file */
> > +		fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
> +
> > +		/* If there isn't a fl_pid don't display who is waiting on the lock
> > +		 * if we are called from locks_show, or if we are called from
> > +		 * __show_fd_info - skip lock entirely
> > +		 */
> > +		if (fl_pid == 0)
> > +			return;
> > +	} else
> >  		fl_pid = fl->fl_pid;
>  
> >  	if (fl->fl_file != NULL)
> @@ -2648,9 +2658,13 @@ static int locks_show(struct seq_file *f, void *v)
>  {
> >  	struct locks_iterator *iter = f->private;
> >  	struct file_lock *fl, *bfl;
> > +	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
>  
> >  	fl = hlist_entry(v, struct file_lock, fl_link);
>  
> > +	if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
> > +		return 0;
> +
> >  	lock_get_status(f, fl, iter->li_pos, "");
>  
> >  	list_for_each_entry(bfl, &fl->fl_block, fl_block)

Looks good to me. I'll go ahead and merge this into my locks branch for
v4.9 and get it into -next.

Thanks!
J. Bruce Fields Aug. 5, 2016, 2:58 p.m.
On Fri, Aug 05, 2016 at 06:47:16AM -0400, Jeff Layton wrote:
> On Fri, 2016-08-05 at 10:30 +0300, Nikolay Borisov wrote:
> > On busy container servers reading /proc/locks shows all the locks
> > created by all clients. This can cause large latency spikes. In my
> > case I observed lsof taking up to 5-10 seconds while processing around
> > 50k locks. Fix this by limiting the locks shown only to those created
> > in the same pidns as the one the proc fs was mounted in. When reading
> > /proc/locks from the init_pid_ns proc instance then perform no
> > filtering
> > 
> > > Signed-off-by: Nikolay Borisov <kernel@kyup.com>
> > > Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> > ---
> >  fs/locks.c | 20 +++++++++++++++++---
> >  1 file changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/locks.c b/fs/locks.c
> > index ee1b15f6fc13..484b7e106076 100644
> > --- a/fs/locks.c
> > +++ b/fs/locks.c
> > @@ -2574,9 +2574,19 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
> > >  	struct inode *inode = NULL;
> > >  	unsigned int fl_pid;
> >  
> > > -	if (fl->fl_nspid)
> > > -		fl_pid = pid_vnr(fl->fl_nspid);
> > > -	else
> > > +	if (fl->fl_nspid) {
> > > +		struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
> > +
> > > +		/* Don't let fl_pid change depending on who is reading the file */
> > > +		fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
> > +
> > > +		/* If there isn't a fl_pid don't display who is waiting on the lock
> > > +		 * if we are called from locks_show, or if we are called from
> > > +		 * __show_fd_info - skip lock entirely
> > > +		 */
> > > +		if (fl_pid == 0)
> > > +			return;
> > > +	} else
> > >  		fl_pid = fl->fl_pid;
> >  
> > >  	if (fl->fl_file != NULL)
> > @@ -2648,9 +2658,13 @@ static int locks_show(struct seq_file *f, void *v)
> >  {
> > >  	struct locks_iterator *iter = f->private;
> > >  	struct file_lock *fl, *bfl;
> > > +	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
> >  
> > >  	fl = hlist_entry(v, struct file_lock, fl_link);
> >  
> > > +	if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
> > > +		return 0;
> > +
> > >  	lock_get_status(f, fl, iter->li_pos, "");
> >  
> > >  	list_for_each_entry(bfl, &fl->fl_block, fl_block)
> 
> Looks good to me. I'll go ahead and merge this into my locks branch for
> v4.9 and get it into -next.

Makes sense to me.  Thanks also to Eric for the help.

--b.
Andrey Vagin June 12, 2018, 12:21 a.m.
On Fri, Aug 05, 2016 at 10:30:04AM +0300, Nikolay Borisov wrote:
> On busy container servers reading /proc/locks shows all the locks
> created by all clients. This can cause large latency spikes. In my
> case I observed lsof taking up to 5-10 seconds while processing around
> 50k locks. Fix this by limiting the locks shown only to those created
> in the same pidns as the one the proc fs was mounted in. When reading
> /proc/locks from the init_pid_ns proc instance then perform no
> filtering
> 
> Signed-off-by: Nikolay Borisov <kernel@kyup.com>
> Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
>  fs/locks.c | 20 +++++++++++++++++---
>  1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index ee1b15f6fc13..484b7e106076 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -2574,9 +2574,19 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
>  	struct inode *inode = NULL;
>  	unsigned int fl_pid;
>  
> -	if (fl->fl_nspid)
> -		fl_pid = pid_vnr(fl->fl_nspid);
> -	else
> +	if (fl->fl_nspid) {
> +		struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
> +
> +		/* Don't let fl_pid change depending on who is reading the file */
> +		fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
> +
> +		/* If there isn't a fl_pid don't display who is waiting on the lock
> +		 * if we are called from locks_show, or if we are called from
> +		 * __show_fd_info - skip lock entirely
> +		 */
> +		if (fl_pid == 0)
> +			return;

I understand the problem with /proc/locks, but I don't understand why
you decided to not show locks in fdinfo. A lock is a property of a
file descriptor. If a file descriptor has a lock it doesn't matter
what process created it. This process can be dead to a moment when we read
fdinfo...

BTW: these changes breaks CRIU, which reads fdinfo to dump locks.

> +	} else
>  		fl_pid = fl->fl_pid;
>  
>  	if (fl->fl_file != NULL)
> @@ -2648,9 +2658,13 @@ static int locks_show(struct seq_file *f, void *v)
>  {
>  	struct locks_iterator *iter = f->private;
>  	struct file_lock *fl, *bfl;
> +	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
>  
>  	fl = hlist_entry(v, struct file_lock, fl_link);
>  
> +	if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
> +		return 0;
> +
>  	lock_get_status(f, fl, iter->li_pos, "");
>  
>  	list_for_each_entry(bfl, &fl->fl_block, fl_block)