@@ -16,9 +16,6 @@ struct kmapset;
#ifdef CONFIG_VE
-int kernfs_test_ve(struct kernfs_super_info *sb_info,
- struct kernfs_super_info *info);
-
void kernfs_get_ve_perms(struct kernfs_node *kn);
void kernfs_put_ve_perms(struct kernfs_node *kn);
@@ -31,12 +28,6 @@ bool kernfs_d_visible(struct kernfs_node *kn, struct kernfs_super_info *info);
#else // CONFIG_VE
-static inline int kernfs_test_ve(struct kernfs_super_info *sb_info,
- struct kernfs_super_info *info)
-{
- return 0;
-}
-
void kernfs_get_ve_perms(struct kernfs_node *kn) { }
void kernfs_put_ve_perms(struct kernfs_node *kn) { }
@@ -262,9 +262,6 @@ static int kernfs_test_super(struct super_block *sb, void *data)
struct kernfs_super_info *sb_info = kernfs_info(sb);
struct kernfs_super_info *info = data;
- if (!kernfs_test_ve(sb_info, info))
- return 0;
-
return sb_info->root == info->root && sb_info->ns == info->ns;
}
@@ -49,12 +49,6 @@ int kernfs_ve_allowed(struct kernfs_node *kn)
return !kn->ve_perms_map || ve_is_super(get_exec_env());
}
-int kernfs_test_ve(struct kernfs_super_info *sb_info,
- struct kernfs_super_info *info)
-{
- return sb_info->ve == info->ve;
-}
-
static struct kmapset_key *kernfs_info_perms_key(struct kernfs_super_info *info)
{
return (void *)get_exec_env() + info->ve_perms_off;
cgroups now also use kernfs subsystem. kernfs_test_ve() check breaks it when we try to mount cgroup from ve. cgroup1_mount() tries to reuse the existing superblock by calling kernfs_pin_sb to pin it during mount operation cgroup1_mount(): pinned_sb = kernfs_pin_sb(root->kf_root, NULL); ... Then it calls cgroup_do_mount() which fails to reuse the existing superblock because of failed kernfs_test_ve() check, so instead it creates a new one: dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, CGROUP_SUPER_MAGIC, ns); So now we have two superblocks instead of one which may lead to the double percpu_ref_kill_and_confirm() call. E.g. : umount /sys/fs/cgroup/rdma vzctl exec CTID umount /sys/fs/cgroup/rdma ------------[ cut here ]------------ percpu_ref_kill_and_confirm called more than once on css_release! WARNING: CPU: 2 PID: 2664 at lib/percpu-refcount.c:336 percpu_ref_kill_and_confirm+0x7c/0xa0 CPU: 2 PID: 2664 Comm: kworker/2:5 ve: / Tainted: G --------- -t - 4.18.0-80.1.2.vz8.3.12 #1 3.12 RIP: 0010:percpu_ref_kill_and_confirm+0x7c/0xa0 Call Trace: cgroup_kill_sb+0x64/0x90 deactivate_locked_super+0x34/0x70 cleanup_mnt+0x3b/0x70 delayed_mntput+0x26/0x40 process_one_work+0x1a7/0x360 worker_thread+0x30/0x390 kthread+0x112/0x130 ret_from_fork+0x35/0x40 ---[ end trace 0cb674141e1a86e3 ]--- CT: d3cd4dca-ce04-475d-b5d5-c9ef1f24642a: stopped Remove the kernfs_test_ve() check to fix this. As for sysfs which is also uses kernfs, I don't think it gonna cause something bad. Sysfs uses net namespaces (info->ns && sb_info->ns) which should be enough for us. We enter CT's net ns before mounting sysfs in CT. Also note that commands like "vzctl set 101 --netif_add eth0 --save" calls ip util (which mount sysfs) from ve0 and CT's net namespace. So with the removal of kernfs_test_ve() check kernfs_test_super() now return success instead of fail as before. Which is totally fine IMO, as we reuse the existing CT's sysfs superblock instead of creating a new one. https://jira.sw.ru/browse/PSBM-104902 Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> --- fs/kernfs/kernfs-ve.h | 9 --------- fs/kernfs/mount.c | 3 --- fs/kernfs/ve.c | 6 ------ 3 files changed, 18 deletions(-)