[RH7] netfilter: drop/reaquire nfnl_lock on request_module() in nft_log module

Submitted by Konstantin Khorenko on July 16, 2020, 12:37 p.m.

Details

Message ID 20200716123731.9451-1-khorenko@virtuozzo.com
State New
Series "netfilter: drop/reaquire nfnl_lock on request_module() in nft_log module"
Headers show

Commit Message

Konstantin Khorenko July 16, 2020, 12:37 p.m.
There could be a deadlock:

1. "nft" process calls request_module() with
nfnl_lock(NFNL_SUBSYS_NFTABLES) taken and waits for its completion:

[UN]  PID: 7391   TASK: ffff939e154bc000  COMMAND: "nft"
wait_for_completion_killable
 __call_usermodehelper_exec
  __request_module
   nf_logger_find_get
    nft_log_init		// ops->init()
     nf_tables_newexpr
      nf_tables_newrule		// called with nfnl_lock taken
       nfnetlink_rcv_batch	// takes nfnl_lock
        nfnetlink_rcv
         netlink_unicast
          netlink_sendmsg

2. request_module() tries to load the module, but waits on "net_mutex":

[UN]  PID: 8913   TASK: ffff939e140ec000  COMMAND: "modprobe"
__mutex_lock_slowpath
 mutex_lock			// mutex_lock(&net_mutex);
 (owner = 0xffff939e315f2000 == PID: 158  COMMAND: "kworker/u64:1")
   register_pernet_subsys
    init_module
     do_one_initcall
      load_module
       sys_init_module

3. "kworker" holds net_mutex, but itself waits for nfnl_lock:

[UN]  PID: 158    TASK: ffff939e315f2000  COMMAND: "kworker/u64:1"
__mutex_lock_slowpath
 mutex_lock			// nfnl_lock(NFNL_SUBSYS_NFTABLES)
 (owner = 0xffff939e154bc000  PID: 7391  COMMAND: "nft")
  nfnl_lock
   nft_unregister_afinfo
    nf_tables_inet_exit_net
     ops_exit_list
      cleanup_net
       process_one_work

Reproducer:

Patch hide | download patch | download mbox

===========
[console 1]: export i=0; while true; do
	nft add table filter; \
	nft add chain filter input; \
	nft add rule filter input tcp dport 23 ct state new \
		log prefix \"Connection to port 23:\" accept; \
	nft flush ruleset; \
	rmmod nft_log; rmmod nf_log_ipv4; rmmod nf_log_common; \
	i=$(($i+1)); echo $i; \
done

[console 2]: modprobe nf_tables; \
	export j=0; while true; do \
		ip net add n1; \
		ip net del n1; \
		j=$(($j + 1)); echo $j; \
	done

In mainstream this sutiation is impossible, it's fixed as a side effect
by: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard
transactions"), but backporting it is quite heavy,

so let's just drop/reacquire nfnl lock around request_module() like it's
done in similar places.

https://jira.sw.ru/browse/PSBM-105534

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 include/net/netfilter/nf_log.h |  2 +-
 net/netfilter/nf_log.c         |  8 ++++---
 net/netfilter/nft_log.c        | 49 +++++++++++++++++++++++++++++++++++++++---
 net/netfilter/xt_LOG.c         |  2 +-
 net/netfilter/xt_TRACE.c       |  2 +-
 5 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 4b1b4c44e537b..2686caac8080e 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -64,7 +64,7 @@  int nf_log_bind_pf(struct net *net, u_int8_t pf,
 		   const struct nf_logger *logger);
 void nf_log_unbind_pf(struct net *net, u_int8_t pf);
 
-int nf_logger_find_get(int pf, enum nf_log_type type);
+int nf_logger_find_get_nolock(int pf, enum nf_log_type type);
 void nf_logger_put(int pf, enum nf_log_type type);
 void nf_logger_request_module(int pf, enum nf_log_type type);
 
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index ff07ee54bbdd3..dd34fd081a89d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -19,7 +19,9 @@ 
 int sysctl_nf_log_all_netns __read_mostly;
 EXPORT_SYMBOL(sysctl_nf_log_all_netns);
 
-static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+EXPORT_SYMBOL(loggers);
+
 static DEFINE_MUTEX(nf_log_mutex);
 
 #define nft_log_dereference(logger) \
@@ -164,7 +166,7 @@  void nf_logger_request_module(int pf, enum nf_log_type type)
 }
 EXPORT_SYMBOL_GPL(nf_logger_request_module);
 
-int nf_logger_find_get(int pf, enum nf_log_type type)
+int nf_logger_find_get_nolock(int pf, enum nf_log_type type)
 {
 	struct nf_logger *logger;
 	int ret = -ENOENT;
@@ -184,7 +186,7 @@  int nf_logger_find_get(int pf, enum nf_log_type type)
 	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_logger_find_get);
+EXPORT_SYMBOL_GPL(nf_logger_find_get_nolock);
 
 void nf_logger_put(int pf, enum nf_log_type type)
 {
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index d21fe91d50fc0..71e35feb1fe27 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -15,6 +15,7 @@ 
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nfnetlink.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_log.h>
 #include <linux/netdevice.h>
@@ -46,6 +47,48 @@  static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
 	[NFTA_LOG_FLAGS]	= { .type = NLA_U32 },
 };
 
+extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+
+/*
+ * In "nft_log" module we need to drop nfnl lock while performing
+ * request_module(), calls to nf_logger_find_get() in other
+ * modules are done without nfnl_lock taken.
+ *
+ * nf_logger_find_get
+ *  nft_log_init
+ *   nf_tables_newexpr
+ *    nf_tables_newrule		// nc->call_batch
+ *				// called with nfnl_lock taken
+ *
+ *     nfnetlink_rcv_batch	// takes nfnl_lock(NFNL_SUBSYS_NFTABLES)
+ *      nfnetlink_rcv
+ *       netlink_unicast
+ *        netlink_sendmsg
+ */
+static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
+{
+	struct nf_logger *logger;
+	int ret = 0;
+
+	logger = loggers[pf][type];
+	if (logger == NULL) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		request_module("nf-logger-%u-%u", pf, type);
+		nfnl_lock(NFNL_SUBSYS_NFTABLES);
+		ret = -EAGAIN;
+	}
+
+	rcu_read_lock();
+	logger = rcu_dereference(loggers[pf][type]);
+
+	if ((logger == NULL) ||
+	    ((ret == 0) && !try_module_get(logger->me))) {
+		ret = -ENOENT;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
 static int nft_log_init(const struct nft_ctx *ctx,
 			const struct nft_expr *expr,
 			const struct nlattr * const tb[])
@@ -99,11 +142,11 @@  static int nft_log_init(const struct nft_ctx *ctx,
 	}
 
 	if (ctx->afi->family == NFPROTO_INET) {
-		ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
+		ret = nf_logger_find_get_lock(NFPROTO_IPV4, li->type);
 		if (ret < 0)
 			return ret;
 
-		ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
+		ret = nf_logger_find_get_lock(NFPROTO_IPV6, li->type);
 		if (ret < 0) {
 			nf_logger_put(NFPROTO_IPV4, li->type);
 			return ret;
@@ -111,7 +154,7 @@  static int nft_log_init(const struct nft_ctx *ctx,
 		return 0;
 	}
 
-	return nf_logger_find_get(ctx->afi->family, li->type);
+	return nf_logger_find_get_lock(ctx->afi->family, li->type);
 }
 
 static void nft_log_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index eb31d73e31ebf..89059468e0f80 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -61,7 +61,7 @@  static int log_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 	}
 
-	return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+	return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG);
 }
 
 static void log_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 858d189a13031..d0e77eea6b829 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -13,7 +13,7 @@  MODULE_ALIAS("ip6t_TRACE");
 
 static int trace_tg_check(const struct xt_tgchk_param *par)
 {
-	return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+	return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG);
 }
 
 static void trace_tg_destroy(const struct xt_tgdtor_param *par)

Comments

Andrey Ryabinin July 16, 2020, 1:55 p.m.
On 7/16/20 3:37 PM, Konstantin Khorenko wrote:

>  
> +extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
> +
> +/*
> + * In "nft_log" module we need to drop nfnl lock while performing
> + * request_module(), calls to nf_logger_find_get() in other
> + * modules are done without nfnl_lock taken.
> + *
> + * nf_logger_find_get
> + *  nft_log_init
> + *   nf_tables_newexpr
> + *    nf_tables_newrule		// nc->call_batch
> + *				// called with nfnl_lock taken
> + *
> + *     nfnetlink_rcv_batch	// takes nfnl_lock(NFNL_SUBSYS_NFTABLES)
> + *      nfnetlink_rcv
> + *       netlink_unicast
> + *        netlink_sendmsg
> + */
> +static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
> +{
> +	struct nf_logger *logger;
> +	int ret = 0;
> +
> +	logger = loggers[pf][type];
> +	if (logger == NULL) {
> +		nfnl_unlock(NFNL_SUBSYS_NFTABLES);

nfnetlink_rcv_batch()  takes nfnl_lock(subsys_id) lock
How you can be sure that subsys_id is always NFNL_SUBSYS_NFTABLES ?


> +		request_module("nf-logger-%u-%u", pf, type);
> +		nfnl_lock(NFNL_SUBSYS_NFTABLES);
> +		ret = -EAGAIN;
> +	}
Konstantin Khorenko July 16, 2020, 4:47 p.m.
On 07/16/2020 04:55 PM, Andrey Ryabinin wrote:
> On 7/16/20 3:37 PM, Konstantin Khorenko wrote:
>>
>> +extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
>> +
>> +/*
>> + * In "nft_log" module we need to drop nfnl lock while performing
>> + * request_module(), calls to nf_logger_find_get() in other
>> + * modules are done without nfnl_lock taken.
>> + *
>> + * nf_logger_find_get
>> + *  nft_log_init
>> + *   nf_tables_newexpr
>> + *    nf_tables_newrule		// nc->call_batch
>> + *				// called with nfnl_lock taken
>> + *
>> + *     nfnetlink_rcv_batch	// takes nfnl_lock(NFNL_SUBSYS_NFTABLES)
>> + *      nfnetlink_rcv
>> + *       netlink_unicast
>> + *        netlink_sendmsg
>> + */
>> +static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
>> +{
>> +	struct nf_logger *logger;
>> +	int ret = 0;
>> +
>> +	logger = loggers[pf][type];
>> +	if (logger == NULL) {
>> +		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
>
> nfnetlink_rcv_batch()  takes nfnl_lock(subsys_id) lock
> How you can be sure that subsys_id is always NFNL_SUBSYS_NFTABLES ?

1) nf_logger_find_get_lock()	is called in only 1 place - in nft_log_init()
2) nft_log_init() == nft_log_ops.init() == struct nft_expr_ops::init()
    struct nft_expr_ops::init()	is called only in nf_tables_newexpr()
    => nft_log_init()		is called only in nf_tables_newexpr()

3) nf_tables_newexpr()		is called only in nf_tables_newrule() and in nft_expr_init()

a) nft_expr_init() is called only in nft_dynset_init()
    nft_dynset_init() == nft_dynset_ops.init() == struct nft_expr_ops::init()

    struct nft_expr_ops::init()	is called only in nf_tables_newexpr()
    => nft_dynset_init()		is called only in nf_tables_newexpr()
    => goto 3)

b) nf_tables_newrule() is the nf_tables_cb[NFT_MSG_NEWRULE].call_batch()
    struct nfnl_callback nf_tables_cb[NFT_MSG_MAX];

    struct nfnl_callback::call_batch() is called only in nfnetlink_rcv_batch() (nc->call_batch())
    =>
    b.1) nf_tables_newrule()	is called only in nfnetlink_rcv_batch()
    b.2) nfnetlink_rcv_batch()	acquires nfnl_lock(NFNL_SUBSYS_NFTABLES ???)
===========
                 /* Work around old nft using host byte order */
                 if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
                         res_id = NFNL_SUBSYS_NFTABLES;
                 else
                         res_id = ntohs(nfgenmsg->res_id);
                 nfnetlink_rcv_batch(skb, nlh, res_id);
===========

Well, looks like i was not patient enough,
i've proven that nfnl_lock() is always taken by above
(i mean there is no path nf_logger_find_get() is called without nfnl_lock() is taken),
but you are right, it can be not nfnl_lock(NFNL_SUBSYS_NFTABLES) but for some other subsys_id.

Thank you for catching this, i will rework the patch.

>> +		request_module("nf-logger-%u-%u", pf, type);
>> +		nfnl_lock(NFNL_SUBSYS_NFTABLES);
>> +		ret = -EAGAIN;
>> +	}
> .
>
Konstantin Khorenko July 16, 2020, 5:21 p.m.
On 07/16/2020 07:47 PM, Konstantin Khorenko wrote:
> On 07/16/2020 04:55 PM, Andrey Ryabinin wrote:
>> On 7/16/20 3:37 PM, Konstantin Khorenko wrote:
>>>
>>> +extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
>>> +
>>> +/*
>>> + * In "nft_log" module we need to drop nfnl lock while performing
>>> + * request_module(), calls to nf_logger_find_get() in other
>>> + * modules are done without nfnl_lock taken.
>>> + *
>>> + * nf_logger_find_get
>>> + *  nft_log_init
>>> + *   nf_tables_newexpr
>>> + *    nf_tables_newrule		// nc->call_batch
>>> + *				// called with nfnl_lock taken
>>> + *
>>> + *     nfnetlink_rcv_batch	// takes nfnl_lock(NFNL_SUBSYS_NFTABLES)
>>> + *      nfnetlink_rcv
>>> + *       netlink_unicast
>>> + *        netlink_sendmsg
>>> + */
>>> +static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
>>> +{
>>> +	struct nf_logger *logger;
>>> +	int ret = 0;
>>> +
>>> +	logger = loggers[pf][type];
>>> +	if (logger == NULL) {
>>> +		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
>>
>> nfnetlink_rcv_batch()  takes nfnl_lock(subsys_id) lock
>> How you can be sure that subsys_id is always NFNL_SUBSYS_NFTABLES ?
>
> 1) nf_logger_find_get_lock()	is called in only 1 place - in nft_log_init()
> 2) nft_log_init() == nft_log_ops.init() == struct nft_expr_ops::init()
>     struct nft_expr_ops::init()	is called only in nf_tables_newexpr()
>     => nft_log_init()		is called only in nf_tables_newexpr()
>
> 3) nf_tables_newexpr()		is called only in nf_tables_newrule() and in nft_expr_init()
>
> a) nft_expr_init() is called only in nft_dynset_init()
>     nft_dynset_init() == nft_dynset_ops.init() == struct nft_expr_ops::init()
>
>     struct nft_expr_ops::init()	is called only in nf_tables_newexpr()
>     => nft_dynset_init()		is called only in nf_tables_newexpr()
>     => goto 3)
>
> b) nf_tables_newrule() is the nf_tables_cb[NFT_MSG_NEWRULE].call_batch()
>     struct nfnl_callback nf_tables_cb[NFT_MSG_MAX];
>
>     struct nfnl_callback::call_batch() is called only in nfnetlink_rcv_batch() (nc->call_batch())
>     =>
>     b.1) nf_tables_newrule()	is called only in nfnetlink_rcv_batch()
>     b.2) nfnetlink_rcv_batch()	acquires nfnl_lock(NFNL_SUBSYS_NFTABLES ???)
> ===========
>                  /* Work around old nft using host byte order */
>                  if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
>                          res_id = NFNL_SUBSYS_NFTABLES;
>                  else
>                          res_id = ntohs(nfgenmsg->res_id);
>                  nfnetlink_rcv_batch(skb, nlh, res_id);
> ===========
>
> Well, looks like i was not patient enough,
> i've proven that nfnl_lock() is always taken by above
> (i mean there is no path nf_logger_find_get() is called without nfnl_lock() is taken),
> but you are right, it can be not nfnl_lock(NFNL_SUBSYS_NFTABLES) but for some other subsys_id.
>
> Thank you for catching this, i will rework the patch.

Nope, i've recalled my checks!

static const struct nfnetlink_subsystem nf_tables_subsys = {
         .name           = "nf_tables",
         .subsys_id      = NFNL_SUBSYS_NFTABLES,
         .cb             = nf_tables_cb,
...

static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
         [NFT_MSG_NEWRULE] = {
                 .call_batch     = nf_tables_newrule,
         },
...

nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
{
         u_int8_t cb_id = NFNL_MSG_TYPE(type);

         return &ss->cb[cb_id];
}

nfnetlink_rcv_batch(subsys_id)
{
...
         nfnl_lock(subsys_id);
         ss = rcu_dereference_protected(table[subsys_id].subsys, ...
...
                 /* We only accept a batch with messages for the same
                  * subsystem.
                  */
                 if (NFNL_SUBSYS_ID(type) != subsys_id) {
                         err = -EINVAL;
                         goto ack;
                 }

                 nc = nfnetlink_find_client(type, ss);
...
			nc->call_batch()



If nc->call_batch() == nf_tables_newrule(), it means
nc == nf_tables_cb[NFT_MSG_NEWRULE]

nc == &ss->cb[cb_id]
=> ss == nf_tables_subsys

and we know that nf_tables_subsys.subsys_id = NFNL_SUBSYS_NFTABLES


static const struct nfnetlink_subsystem nf_tables_subsys = {
         .name           = "nf_tables",
         .subsys_id      = NFNL_SUBSYS_NFTABLES,
         .cb             = nf_tables_cb,
...


Proven?

>>> +		request_module("nf-logger-%u-%u", pf, type);
>>> +		nfnl_lock(NFNL_SUBSYS_NFTABLES);
>>> +		ret = -EAGAIN;
>>> +	}
>> .
>>
Andrey Ryabinin July 17, 2020, 12:39 p.m.
On 7/16/20 3:37 PM, Konstantin Khorenko wrote:
> +static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
> +{
> +	struct nf_logger *logger;
> +	int ret = 0;
> +
> +	logger = loggers[pf][type];
> +	if (logger == NULL) {
> +		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
> +		request_module("nf-logger-%u-%u", pf, type);
> +		nfnl_lock(NFNL_SUBSYS_NFTABLES);
> +		ret = -EAGAIN;

nfnetlink_rcv_batch() has special error path for EAGAIN:
			/* The lock was released to autoload some module, we
			 * have to abort and start from scratch using the
			 * original skb.
			 */
			if (err == -EAGAIN) {
				status |= NFNL_BATCH_REPLAY;
				goto done;
			}


Yet, nf_logger_find_get_lock() don't return immediately with EAGAIN, but continues and potentially replaces EAGAIN with ENOENT.
Why?

> +	}
> +
> +	rcu_read_lock();
> +	logger = rcu_dereference(loggers[pf][type]);
> +
> +	if ((logger == NULL) ||
> +	    ((ret == 0) && !try_module_get(logger->me))) {
> +		ret = -ENOENT;
> +	}
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
Andrey Ryabinin July 17, 2020, 12:41 p.m.
On 7/16/20 8:21 PM, Konstantin Khorenko wrote:

> If nc->call_batch() == nf_tables_newrule(), it means
> nc == nf_tables_cb[NFT_MSG_NEWRULE]
> 
> nc == &ss->cb[cb_id]
> => ss == nf_tables_subsys
> 
> and we know that nf_tables_subsys.subsys_id = NFNL_SUBSYS_NFTABLES
> 
> 
> static const struct nfnetlink_subsystem nf_tables_subsys = {
>         .name           = "nf_tables",
>         .subsys_id      = NFNL_SUBSYS_NFTABLES,
>         .cb             = nf_tables_cb,
> ...
> 
> 
> Proven?
> 

Seems ok.
FTR I was thinking about 'request_module_nowait(); return -EAGAIN;' as an alternative.
Konstantin Khorenko July 17, 2020, 1:19 p.m.
On 07/17/2020 03:39 PM, Andrey Ryabinin wrote:
>
>
> On 7/16/20 3:37 PM, Konstantin Khorenko wrote:
>> +static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
>> +{
>> +	struct nf_logger *logger;
>> +	int ret = 0;
>> +
>> +	logger = loggers[pf][type];
>> +	if (logger == NULL) {
>> +		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
>> +		request_module("nf-logger-%u-%u", pf, type);
>> +		nfnl_lock(NFNL_SUBSYS_NFTABLES);
>> +		ret = -EAGAIN;
>
> nfnetlink_rcv_batch() has special error path for EAGAIN:
> 			/* The lock was released to autoload some module, we
> 			 * have to abort and start from scratch using the
> 			 * original skb.
> 			 */
> 			if (err == -EAGAIN) {
> 				status |= NFNL_BATCH_REPLAY;
> 				goto done;
> 			}
>
>
> Yet, nf_logger_find_get_lock() don't return immediately with EAGAIN, but continues and potentially replaces EAGAIN with ENOENT.
> Why?

Well, this is the original logic, i did not change it.

The idea is:
* if loggers[pf][type] is not registered, try to load the appropriate module
   // the module load could fail, we keep it in mind

* next check loggers[pf][type] once again - and if it's empty again after our attempt to load the module
   (and thus to register desired loggers[pf][type]),
   then we don't need to return -EAGAIN, because
   - next module load attempt could also fail to register loggers[pf][type] and we get in a loop
   - we just don't want to try loading module more than once

=> in case loggers[pf][type] is still empty after single module load attempt =>
    something is deadly wrong, return -ENOENT, no need to probe it once more

- another case - try_module_get() fails - we also don't want to retry in this case

>> +	}
>> +
>> +	rcu_read_lock();
>> +	logger = rcu_dereference(loggers[pf][type]);
>> +
>> +	if ((logger == NULL) ||
>> +	    ((ret == 0) && !try_module_get(logger->me))) {
>> +		ret = -ENOENT;
>> +	}
>> +	rcu_read_unlock();
>> +	return ret;
>> +}
>> +
> .
>
Konstantin Khorenko July 17, 2020, 1:21 p.m.
On 07/17/2020 03:41 PM, Andrey Ryabinin wrote:
>
>
> On 7/16/20 8:21 PM, Konstantin Khorenko wrote:
>
>> If nc->call_batch() == nf_tables_newrule(), it means
>> nc == nf_tables_cb[NFT_MSG_NEWRULE]
>>
>> nc == &ss->cb[cb_id]
>> => ss == nf_tables_subsys
>>
>> and we know that nf_tables_subsys.subsys_id = NFNL_SUBSYS_NFTABLES
>>
>>
>> static const struct nfnetlink_subsystem nf_tables_subsys = {
>>         .name           = "nf_tables",
>>         .subsys_id      = NFNL_SUBSYS_NFTABLES,
>>         .cb             = nf_tables_cb,
>> ...
>>
>>
>> Proven?
>>
>
> Seems ok.
> FTR I was thinking about 'request_module_nowait(); return -EAGAIN;' as an alternative.

Ok, but how do we know when we can try checking again - and expect the loggers[pf][type] has been already registered?
i mean, it's possible the handle -EAGAIN in netlink code and return again into nf_logger_find_get_lock()
before modprobe loads the module we requested and thus loggers[pf][type] will be again empty.