From patchwork Thu Jul 16 12:37:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [RH7] netfilter: drop/reaquire nfnl_lock on request_module() in nft_log module From: Konstantin Khorenko X-Patchwork-Id: 13162 Message-Id: <20200716123731.9451-1-khorenko@virtuozzo.com> To: Vasily Averin , Andrey Ryabinin Cc: OpenVZ devel list Date: Thu, 16 Jul 2020 15:37:31 +0300 There could be a deadlock: 1. "nft" process calls request_module() with nfnl_lock(NFNL_SUBSYS_NFTABLES) taken and waits for its completion: [UN] PID: 7391 TASK: ffff939e154bc000 COMMAND: "nft" wait_for_completion_killable __call_usermodehelper_exec __request_module nf_logger_find_get nft_log_init // ops->init() nf_tables_newexpr nf_tables_newrule // called with nfnl_lock taken nfnetlink_rcv_batch // takes nfnl_lock nfnetlink_rcv netlink_unicast netlink_sendmsg 2. request_module() tries to load the module, but waits on "net_mutex": [UN] PID: 8913 TASK: ffff939e140ec000 COMMAND: "modprobe" __mutex_lock_slowpath mutex_lock // mutex_lock(&net_mutex); (owner = 0xffff939e315f2000 == PID: 158 COMMAND: "kworker/u64:1") register_pernet_subsys init_module do_one_initcall load_module sys_init_module 3. "kworker" holds net_mutex, but itself waits for nfnl_lock: [UN] PID: 158 TASK: ffff939e315f2000 COMMAND: "kworker/u64:1" __mutex_lock_slowpath mutex_lock // nfnl_lock(NFNL_SUBSYS_NFTABLES) (owner = 0xffff939e154bc000 PID: 7391 COMMAND: "nft") nfnl_lock nft_unregister_afinfo nf_tables_inet_exit_net ops_exit_list cleanup_net process_one_work Reproducer: =========== [console 1]: export i=0; while true; do nft add table filter; \ nft add chain filter input; \ nft add rule filter input tcp dport 23 ct state new \ log prefix \"Connection to port 23:\" accept; \ nft flush ruleset; \ rmmod nft_log; rmmod nf_log_ipv4; rmmod nf_log_common; \ i=$(($i+1)); echo $i; \ done [console 2]: modprobe nf_tables; \ export j=0; while true; do \ ip net add n1; \ ip net del n1; \ j=$(($j + 1)); echo $j; \ done In mainstream this sutiation is impossible, it's fixed as a side effect by: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard transactions"), but backporting it is quite heavy, so let's just drop/reacquire nfnl lock around request_module() like it's done in similar places. https://jira.sw.ru/browse/PSBM-105534 Signed-off-by: Konstantin Khorenko --- include/net/netfilter/nf_log.h | 2 +- net/netfilter/nf_log.c | 8 ++++--- net/netfilter/nft_log.c | 49 +++++++++++++++++++++++++++++++++++++++--- net/netfilter/xt_LOG.c | 2 +- net/netfilter/xt_TRACE.c | 2 +- 5 files changed, 54 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 4b1b4c44e537b..2686caac8080e 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -64,7 +64,7 @@ int nf_log_bind_pf(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unbind_pf(struct net *net, u_int8_t pf); -int nf_logger_find_get(int pf, enum nf_log_type type); +int nf_logger_find_get_nolock(int pf, enum nf_log_type type); void nf_logger_put(int pf, enum nf_log_type type); void nf_logger_request_module(int pf, enum nf_log_type type); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ff07ee54bbdd3..dd34fd081a89d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -19,7 +19,9 @@ int sysctl_nf_log_all_netns __read_mostly; EXPORT_SYMBOL(sysctl_nf_log_all_netns); -static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; +struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; +EXPORT_SYMBOL(loggers); + static DEFINE_MUTEX(nf_log_mutex); #define nft_log_dereference(logger) \ @@ -164,7 +166,7 @@ void nf_logger_request_module(int pf, enum nf_log_type type) } EXPORT_SYMBOL_GPL(nf_logger_request_module); -int nf_logger_find_get(int pf, enum nf_log_type type) +int nf_logger_find_get_nolock(int pf, enum nf_log_type type) { struct nf_logger *logger; int ret = -ENOENT; @@ -184,7 +186,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) rcu_read_unlock(); return ret; } -EXPORT_SYMBOL_GPL(nf_logger_find_get); +EXPORT_SYMBOL_GPL(nf_logger_find_get_nolock); void nf_logger_put(int pf, enum nf_log_type type) { diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index d21fe91d50fc0..71e35feb1fe27 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,48 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; +extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; + +/* + * In "nft_log" module we need to drop nfnl lock while performing + * request_module(), calls to nf_logger_find_get() in other + * modules are done without nfnl_lock taken. + * + * nf_logger_find_get + * nft_log_init + * nf_tables_newexpr + * nf_tables_newrule // nc->call_batch + * // called with nfnl_lock taken + * + * nfnetlink_rcv_batch // takes nfnl_lock(NFNL_SUBSYS_NFTABLES) + * nfnetlink_rcv + * netlink_unicast + * netlink_sendmsg + */ +static int nf_logger_find_get_lock(int pf, enum nf_log_type type) +{ + struct nf_logger *logger; + int ret = 0; + + logger = loggers[pf][type]; + if (logger == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nf-logger-%u-%u", pf, type); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + ret = -EAGAIN; + } + + rcu_read_lock(); + logger = rcu_dereference(loggers[pf][type]); + + if ((logger == NULL) || + ((ret == 0) && !try_module_get(logger->me))) { + ret = -ENOENT; + } + rcu_read_unlock(); + return ret; +} + static int nft_log_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -99,11 +142,11 @@ static int nft_log_init(const struct nft_ctx *ctx, } if (ctx->afi->family == NFPROTO_INET) { - ret = nf_logger_find_get(NFPROTO_IPV4, li->type); + ret = nf_logger_find_get_lock(NFPROTO_IPV4, li->type); if (ret < 0) return ret; - ret = nf_logger_find_get(NFPROTO_IPV6, li->type); + ret = nf_logger_find_get_lock(NFPROTO_IPV6, li->type); if (ret < 0) { nf_logger_put(NFPROTO_IPV4, li->type); return ret; @@ -111,7 +154,7 @@ static int nft_log_init(const struct nft_ctx *ctx, return 0; } - return nf_logger_find_get(ctx->afi->family, li->type); + return nf_logger_find_get_lock(ctx->afi->family, li->type); } static void nft_log_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index eb31d73e31ebf..89059468e0f80 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -61,7 +61,7 @@ static int log_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); + return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG); } static void log_tg_destroy(const struct xt_tgdtor_param *par) diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 858d189a13031..d0e77eea6b829 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -13,7 +13,7 @@ MODULE_ALIAS("ip6t_TRACE"); static int trace_tg_check(const struct xt_tgchk_param *par) { - return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); + return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG); } static void trace_tg_destroy(const struct xt_tgdtor_param *par)