[rh7,10/11] net: ipv4: listify ip_rcv_finish

Submitted by Andrey Ryabinin on Sept. 10, 2018, 3:39 p.m.

Details

Message ID 20180910153940.22827-11-aryabinin@virtuozzo.com
State New
Series "Backort of "Handle multiple received packets at each stage""
Headers show

Commit Message

Andrey Ryabinin Sept. 10, 2018, 3:39 p.m.
From: Edward Cree <ecree@solarflare.com>

ip_rcv_finish_core(), if it does not drop, sets skb->dst by either early
 demux or route lookup.  The last step, calling dst_input(skb), is left to
 the caller; in the listified case, we split to form sublists with a common
 dst, but then ip_sublist_rcv_finish() just calls dst_input(skb) in a loop.
The next step in listification would thus be to add a list_input() method
 to struct dst_entry.

Early demux is an indirect call based on iph->protocol; this is another
 opportunity for listification which is not taken here (it would require
 slicing up ip_rcv_finish_core() to allow splitting on protocol changes).

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

https://jira.sw.ru/browse/PSBM-88420
(cherry picked from commit 5fa12739a53d0780265ed9d44d9ec9ba5f9ad00a)
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 net/ipv4/ip_input.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 0f50a08e5464..306181b2decc 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,7 +313,7 @@  static inline bool ip_rcv_options(struct sk_buff *skb)
 int sysctl_ip_early_demux __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_ip_early_demux);
 
-static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
@@ -366,7 +366,7 @@  static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
 		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
 				skb->len);
 
-	return dst_input(skb);
+	return NET_RX_SUCCESS;
 
 drop:
 	kfree_skb(skb);
@@ -378,6 +378,15 @@  static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
 	goto drop;
 }
 
+static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+{
+	int ret = ip_rcv_finish_core(sk, skb);
+
+	if (ret != NET_RX_DROP)
+		ret = dst_input(skb);
+	return ret;
+}
+
 /*
  * 	Main IP Receive routine.
  */
@@ -486,15 +495,47 @@  int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		       ip_rcv_finish);
 }
 
-static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
-			   struct net *net)
+static void ip_sublist_rcv_finish(struct list_head *head)
 {
 	struct sk_buff *skb, *next;
 
+	list_for_each_entry_safe(skb, next, head, list)
+		dst_input(skb);
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+			       struct list_head *head)
+{
+	struct dst_entry *curr_dst = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct dst_entry *dst;
+
+		if (ip_rcv_finish_core(sk, skb) == NET_RX_DROP)
+			continue;
+
+		dst = skb_dst(skb);
+		if (curr_dst != dst) {
+			/* dispatch old sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			if (!list_empty(&sublist))
+				ip_sublist_rcv_finish(&sublist);
+			/* start new sublist */
+			curr_dst = dst;
+		}
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv_finish(head);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+			   struct net *net)
+{
 	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
 		     head, dev, NULL, ip_rcv_finish);
-	list_for_each_entry_safe(skb, next, head, list)
-		ip_rcv_finish(NULL, skb);
+	ip_list_rcv_finish(net, NULL, head);
 }
 
 /* Receive a list of IP packets */