[Devel,RH7,v2,3/4] ms/devpts: clean up interface to pty drivers

Submitted by Pavel Tikhomirov on Aug. 7, 2017, 11:35 a.m.

Details

Message ID 20170807113508.17904-3-ptikhomirov@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Pavel Tikhomirov Aug. 7, 2017, 11:35 a.m.
From: Linus Torvalds <torvalds@linux-foundation.org>

cherry-pick ms commit 67245ff332064c01b760afa7a384ccda024bfd24

Now pts_sb_from_inode is called for non devptsfs inodes only on
open once, next we use superblock from driver_data on tty.

We had a warning if open /dev/ptmx in one ve(ve0) and move task
to other ve(CT) and then close the file, as pts_sb_from_inode
gave us diffenent superblock and different ida in different ve:

    Jun 29 03:33:54 qvm106 kernel: WARNING: at lib/idr.c:1076 ida_remove+0xb7/0xf0()
    Jun 29 03:33:54 qvm106 kernel: ida_remove called for id=1 which is not allocated.
    Jun 29 03:33:54 qvm106 kernel: Modules linked in: nf_conntrack_netlink raw_diag xt_mark udp_diag netlink_diag af_packet_diag unix_diag binfmt_misc xt_CHECKSUM tun tcp_diag inet_diag ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat iptable_mangle iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter fuse kvm_intel kvm irqbypass ppdev sg i2c_piix4 virtio_balloon parport_pc pcspkr parport ip_vs nf_conntrack libcrc32c br_netfilter veth overlay ip6_vzprivnet ip6_vznetstat pio_nfs ip_vznetstat ip_vzprivnet vziolimit vzevent vzlist vzstat vznetstat nfsd vznetdev vzmon auth_rpcgss
    Jun 29 03:33:54 qvm106 kernel: vzdev nfs_acl lockd pio_kaio grace pio_direct pfmt_raw pfmt_ploop1 ploop bridge ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic sr_mod crct10dif_common cdrom ata_generic pata_acpi 8021q garp stp llc mrp virtio_net virtio_scsi virtio_console bochs_drm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm scsi_transport_iscsi crc32c_intel drm ata_piix serio_raw virtio_pci libata virtio_ring virtio i2c_core floppy sunrpc dm_mirror dm_region_hash dm_log dm_mod
    Jun 29 03:33:54 qvm106 kernel: CPU: 0 PID: 1035868 Comm: ss ve: 0 Not tainted 3.10.0-514.16.1.vz7.32.12 #1 32.12
    Jun 29 03:33:54 qvm106 kernel: Hardware name: Virtuozzo KVM, BIOS 1.9.1-5.3.2.vz7.6 04/01/2014
    Jun 29 03:33:54 qvm106 kernel: 0000000000000434 00000000b4d4e8e9 ffff8801c12a3ce8 ffffffff81681d9a
    Jun 29 03:33:54 qvm106 kernel: ffff8801c12a3d20 ffffffff81085e70 ffff880235dc8300 ffff880235dc8300
    Jun 29 03:33:54 qvm106 kernel: 0000000000000001 ffff880039b6b560 ffff8801cfa70c00 ffff8801c12a3d88
    Jun 29 03:33:54 qvm106 kernel: Call Trace:
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff81681d9a>] dump_stack+0x19/0x1b
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff81085e70>] warn_slowpath_common+0x70/0xb0
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff81085f0c>] warn_slowpath_fmt+0x5c/0x80
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff81314347>] ida_remove+0xb7/0xf0
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff8129f322>] devpts_kill_index+0x52/0x70
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff813ed7ee>] pty_unix98_shutdown+0x2e/0x50
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff813e2137>] release_tty+0x37/0x140
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff813e356d>] tty_release+0x36d/0x530
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff8122d786>] ? d_free+0x56/0x80
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff8122dd00>] ? __dentry_kill+0x120/0x180
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff812182c2>] __fput+0xf2/0x280
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff8121845e>] ____fput+0xe/0x10
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff810aec34>] task_work_run+0xc4/0xe0
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff8102aa42>] do_notify_resume+0x92/0xb0
    Jun 29 03:33:54 qvm106 kernel: [<ffffffff816927bd>] int_signal+0x12/0x17
    Jun 29 03:33:54 qvm106 kernel: ---[ end trace 02774061ed55f9a7 ]---

v2: remove fsi check in devpts_pty_new (we call devpts_pty_new only from
ptmx_open on already non-null fsi)

https://jira.sw.ru/browse/PSBM-67757

This gets rid of the horrible notion of having that

    struct inode *ptmx_inode

be the linchpin of the interface between the pty code and devpts.

By de-emphasizing the ptmx inode, a lot of things actually get cleaner,
and we will have a much saner way forward.  In particular, this will
allow us to associate with any particular devpts instance at open-time,
and not be artificially tied to one particular ptmx inode.

The patch itself is actually fairly straightforward, and apart from some
locking and return path cleanups it's pretty mechanical:

 - the interfaces that devpts exposes all take "struct pts_fs_info *"
   instead of "struct inode *ptmx_inode" now.

   NOTE! The "struct pts_fs_info" thing is a completely opaque structure
   as far as the pty driver is concerned: it's still declared entirely
   internally to devpts. So the pty code can't actually access it in any
   way, just pass it as a "cookie" to the devpts code.

 - the "look up the pts fs info" is now a single clear operation, that
   also does the reference count increment on the pts superblock.

   So "devpts_add/del_ref()" is gone, and replaced by a "lookup and get
   ref" operation (devpts_get_ref(inode)), along with a "put ref" op
   (devpts_put_ref()).

 - the pty master "tty->driver_data" field now contains the pts_fs_info,
   not the ptmx inode.

 - because we don't care about the ptmx inode any more as some kind of
   base index, the ref counting can now drop the inode games - it just
   gets the ref on the superblock.

 - the pts_fs_info now has a back-pointer to the super_block. That's so
   that we can easily look up the information we actually need. Although
   quite often, the pts fs info was actually all we wanted, and not having
   to look it up based on some magical inode makes things more
   straightforward.

In particular, now that "devpts_get_ref(inode)" operation should really
be the *only* place we need to look up what devpts instance we're
associated with, and we do it exactly once, at ptmx_open() time.

The other side of this is that one ptmx node could now be associated
with multiple different devpts instances - you could have a single
/dev/ptmx node, and then have multiple mount namespaces with their own
instances of devpts mounted on /dev/pts/.  And that's all perfectly sane
in a model where we just look up the pts instance at open time.

This will eventually allow us to get rid of our odd single-vs-multiple
pts instance model, but this patch in itself changes no semantics, only
an internal binding model.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Jann Horn <jann@thejh.net>
Cc: Greg KH <greg@kroah.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 drivers/tty/pty.c         | 63 ++++++++++++++++++++++-------------------------
 fs/devpts/inode.c         | 52 +++++++++++++++++---------------------
 include/linux/devpts_fs.h | 34 ++++++++-----------------
 3 files changed, 63 insertions(+), 86 deletions(-)

Patch hide | download patch | download mbox

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 7ae7e1aef0cb..ab448d0fdf68 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -625,14 +625,14 @@  static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty)
 /* this is called once with whichever end is closed last */
 static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 {
-	struct inode *ptmx_inode;
+	struct pts_fs_info *fsi;
 
 	if (tty->driver->subtype == PTY_TYPE_MASTER)
-		ptmx_inode = tty->driver_data;
+		fsi = tty->driver_data;
 	else
-		ptmx_inode = tty->link->driver_data;
-	devpts_kill_index(ptmx_inode, tty->index);
-	devpts_del_ref(ptmx_inode);
+		fsi = tty->link->driver_data;
+	devpts_kill_index(fsi, tty->index);
+	devpts_put_ref(fsi);
 }
 
 static const struct tty_operations ptm_unix98_ops = {
@@ -681,6 +681,7 @@  static const struct tty_operations pty_unix98_ops = {
 
 static int ptmx_open(struct inode *inode, struct file *filp)
 {
+	struct pts_fs_info *fsi;
 	struct tty_struct *tty;
 	struct inode *slave_inode;
 	int retval;
@@ -695,47 +696,41 @@  static int ptmx_open(struct inode *inode, struct file *filp)
 	if (retval)
 		return retval;
 
+	fsi = devpts_get_ref(inode, filp);
+	retval = -ENODEV;
+	if (!fsi)
+		goto out_free_file;
+
 	/* find a device that is not in use. */
 	mutex_lock(&devpts_mutex);
-	index = devpts_new_index(inode);
-	if (index < 0) {
-		retval = index;
-		mutex_unlock(&devpts_mutex);
-		goto err_file;
-	}
-
+	index = devpts_new_index(fsi);
 	mutex_unlock(&devpts_mutex);
 
-	mutex_lock(&tty_mutex);
-	tty = tty_init_dev(ptm_driver, index);
+	retval = index;
+	if (index < 0)
+		goto out_put_ref;
 
-	if (IS_ERR(tty)) {
-		retval = PTR_ERR(tty);
-		goto out;
-	}
 
+	mutex_lock(&tty_mutex);
+	tty = tty_init_dev(ptm_driver, index);
 	/* The tty returned here is locked so we can safely
 	   drop the mutex */
 	mutex_unlock(&tty_mutex);
 
-	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
-	tty->driver_data = inode;
+	retval = PTR_ERR(tty);
+	if (IS_ERR(tty))
+		goto out;
 
 	/*
-	 * In the case where all references to ptmx inode are dropped and we
-	 * still have /dev/tty opened pointing to the master/slave pair (ptmx
-	 * is closed/released before /dev/tty), we must make sure that the inode
-	 * is still valid when we call the final pty_unix98_shutdown, thus we
-	 * hold an additional reference to the ptmx inode. For the same /dev/tty
-	 * last close case, we also need to make sure the super_block isn't
-	 * destroyed (devpts instance unmounted), before /dev/tty is closed and
-	 * on its release devpts_kill_index is called.
+	 * From here on out, the tty is "live", and the index and
+	 * fsi will be killed/put by the tty_release()
 	 */
-	devpts_add_ref(inode);
+	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+	tty->driver_data = fsi;
 
 	tty_add_file(tty, filp);
 
-	slave_inode = devpts_pty_new(inode,
+	slave_inode = devpts_pty_new(fsi,
 			MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index,
 			tty->link);
 	if (IS_ERR(slave_inode)) {
@@ -752,12 +747,14 @@  static int ptmx_open(struct inode *inode, struct file *filp)
 	return 0;
 err_release:
 	tty_unlock(tty);
+	// This will also put-ref the fsi
 	tty_release(inode, filp);
 	return retval;
 out:
-	mutex_unlock(&tty_mutex);
-	devpts_kill_index(inode, index);
-err_file:
+	devpts_kill_index(fsi, index);
+out_put_ref:
+	devpts_put_ref(fsi);
+out_free_file:
 	tty_free_file(filp);
 	return retval;
 }
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1940f6de8866..7afce4ebc1b1 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -126,6 +126,7 @@  static const match_table_t tokens = {
 struct pts_fs_info {
 	struct ida allocated_ptys;
 	struct pts_mount_opts mount_opts;
+	struct super_block *sb;
 	struct dentry *ptmx_dentry;
 };
 
@@ -351,7 +352,7 @@  static const struct super_operations devpts_sops = {
 	.show_options	= devpts_show_options,
 };
 
-static void *new_pts_fs_info(void)
+static void *new_pts_fs_info(struct super_block *sb)
 {
 	struct pts_fs_info *fsi;
 
@@ -362,6 +363,7 @@  static void *new_pts_fs_info(void)
 	ida_init(&fsi->allocated_ptys);
 	fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
 	fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+	fsi->sb = sb;
 
 	return fsi;
 }
@@ -377,7 +379,7 @@  devpts_fill_super(struct super_block *s, void *data, int silent)
 	s->s_op = &devpts_sops;
 	s->s_time_gran = 1;
 
-	s->s_fs_info = new_pts_fs_info();
+	s->s_fs_info = new_pts_fs_info(s);
 	if (!s->s_fs_info)
 		goto fail;
 
@@ -525,19 +527,14 @@  static struct file_system_type devpts_fs_type = {
  * to the System V naming convention
  */
 
-int devpts_new_index(struct inode *ptmx_inode)
+int devpts_new_index(struct pts_fs_info *fsi)
 {
-	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-	struct pts_fs_info *fsi;
 	int index;
 	int ida_ret;
 
-	/* devpts not mounted yet */
-	if (!sb)
+	if (!fsi)
 		return -ENODEV;
 
-	fsi = DEVPTS_SB(sb);
-
 retry:
 	if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
 		return -ENOMEM;
@@ -567,11 +564,8 @@  int devpts_new_index(struct inode *ptmx_inode)
 	return index;
 }
 
-void devpts_kill_index(struct inode *ptmx_inode, int idx)
+void devpts_kill_index(struct pts_fs_info *fsi, int idx)
 {
-	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-	struct pts_fs_info *fsi = DEVPTS_SB(sb);
-
 	mutex_lock(&allocated_ptys_lock);
 	ida_remove(&fsi->allocated_ptys, idx);
 	pty_count--;
@@ -581,21 +575,25 @@  void devpts_kill_index(struct inode *ptmx_inode, int idx)
 /*
  * pty code needs to hold extra references in case of last /dev/tty close
  */
-
-void devpts_add_ref(struct inode *ptmx_inode)
+struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
 {
-	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct super_block *sb;
+	struct pts_fs_info *fsi;
+
+	sb = pts_sb_from_inode(ptmx_inode);
+	if (!sb)
+		return NULL;
+	fsi = DEVPTS_SB(sb);
+	if (!fsi)
+		return NULL;
 
 	atomic_inc(&sb->s_active);
-	ihold(ptmx_inode);
+	return fsi;
 }
 
-void devpts_del_ref(struct inode *ptmx_inode)
+void devpts_put_ref(struct pts_fs_info *fsi)
 {
-	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-
-	iput(ptmx_inode);
-	deactivate_super(sb);
+	deactivate_super(fsi->sb);
 }
 
 /**
@@ -607,22 +605,18 @@  void devpts_del_ref(struct inode *ptmx_inode)
  *
  * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
  */
-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
 		void *priv)
 {
 	struct dentry *dentry;
-	struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+	struct super_block *sb;
 	struct inode *inode;
 	struct dentry *root;
-	struct pts_fs_info *fsi;
 	struct pts_mount_opts *opts;
 	char s[12];
 
-	if (!sb)
-		return ERR_PTR(-ENODEV);
-
+	sb = fsi->sb;
 	root = sb->s_root;
-	fsi = DEVPTS_SB(sb);
 	opts = &fsi->mount_opts;
 
 	inode = new_inode(sb);
diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
index e0ee0b3000b2..358a4db72a27 100644
--- a/include/linux/devpts_fs.h
+++ b/include/linux/devpts_fs.h
@@ -15,38 +15,24 @@ 
 
 #include <linux/errno.h>
 
+struct pts_fs_info;
+
 #ifdef CONFIG_UNIX98_PTYS
 
-int devpts_new_index(struct inode *ptmx_inode);
-void devpts_kill_index(struct inode *ptmx_inode, int idx);
-void devpts_add_ref(struct inode *ptmx_inode);
-void devpts_del_ref(struct inode *ptmx_inode);
+/* Look up a pts fs info and get a ref to it */
+struct pts_fs_info *devpts_get_ref(struct inode *, struct file *);
+void devpts_put_ref(struct pts_fs_info *);
+
+int devpts_new_index(struct pts_fs_info *);
+void devpts_kill_index(struct pts_fs_info *, int);
+
 /* mknod in devpts */
-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
-		void *priv);
+struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
 /* get private structure */
 void *devpts_get_priv(struct inode *pts_inode);
 /* unlink */
 void devpts_pty_kill(struct inode *inode);
 
-#else
-
-/* Dummy stubs in the no-pty case */
-static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
-static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
-static inline void devpts_add_ref(struct inode *ptmx_inode) { }
-static inline void devpts_del_ref(struct inode *ptmx_inode) { }
-static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
-		dev_t device, int index, void *priv)
-{
-	return ERR_PTR(-EINVAL);
-}
-static inline void *devpts_get_priv(struct inode *pts_inode)
-{
-	return NULL;
-}
-static inline void devpts_pty_kill(struct inode *inode) { }
-
 #endif