[PATCH] Use BPF to distribute packet to different work thread.

Liu, Qiao qiao.liu at intel.com
Thu Sep 24 00:58:45 UTC 2020


Remove printf 

# HG changeset patch
# User Liu Qiao <qiao.liu at intel.com>
# Date 1599735293 14400
#      Thu Sep 10 06:54:53 2020 -0400
# Node ID c2eabe9168d0cbefc030807a0808568d86c93e4f
# Parent  da5e3f5b16733167142b599b6af3ce9469a07d52
Use BPF to distribute packet to different work thread.
Use Berkeley Packet Filter to get packet queue_mapping number,
and use this queue_mapping number to distribute the packet to
different work thread, this will improve CPU utilization and http
latency.
Author: Samudrala, Sridhar <sridhar.samudrala at intel.com>

diff -r da5e3f5b1673 -r c2eabe9168d0 auto/os/linux
--- a/auto/os/linux	Wed Sep 02 23:13:36 2020 +0300
+++ b/auto/os/linux	Thu Sep 10 06:54:53 2020 -0400
@@ -32,6 +32,10 @@
     have=NGX_HAVE_POSIX_FADVISE . auto/nohave
 fi
 
+if [ $version -lt 263680 ]; then
+    have=NGX_HAVE_REUSEPORT_CBPF . auto/nohave
+fi
+
 # epoll, EPOLLET version
 
 ngx_feature="epoll"
diff -r da5e3f5b1673 -r c2eabe9168d0 auto/unix
--- a/auto/unix	Wed Sep 02 23:13:36 2020 +0300
+++ b/auto/unix	Thu Sep 10 06:54:53 2020 -0400
@@ -331,6 +331,17 @@
 ngx_feature_test="setsockopt(0, SOL_SOCKET, SO_REUSEPORT, NULL, 0)"
 . auto/feature
 
+ngx_feature="SO_REUSEPORT_CBPF"
+ngx_feature_name="NGX_HAVE_REUSEPORT_CBPF"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <linux/filter.h>
+                  #include <error.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, NULL, 0)"
+. auto/feature
+
 
 ngx_feature="SO_ACCEPTFILTER"
 ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
diff -r da5e3f5b1673 -r c2eabe9168d0 src/core/ngx_connection.c
--- a/src/core/ngx_connection.c	Wed Sep 02 23:13:36 2020 +0300
+++ b/src/core/ngx_connection.c	Thu Sep 10 06:54:53 2020 -0400
@@ -8,7 +8,10 @@
 #include <ngx_config.h>
 #include <ngx_core.h>
 #include <ngx_event.h>
-
+#if (NGX_HAVE_REUSEPORT_CBPF)
+#include <linux/filter.h>
+#include <error.h>
+#endif
 
 ngx_os_io_t  ngx_io;
 
@@ -708,6 +711,35 @@
     return NGX_OK;
 }
 
+#if(NGX_HAVE_REUSEPORT)
+#if(NGX_HAVE_REUSEPORT_CBPF)
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static ngx_int_t  attach_bpf(int fd, uint16_t n)
+{
+	struct sock_filter code[] = {
+		/* A = skb->queue_mapping */
+		{ BPF_LD  | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_QUEUE },
+		/* A = A % n */
+		{ BPF_ALU | BPF_MOD, 0, 0, n },
+		/* return A */
+		{ BPF_RET | BPF_A, 0, 0, 0 },
+	};
+	struct sock_fprog p = {
+		.len = ARRAY_SIZE(code),
+		.filter = code,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+	    return NGX_ERROR;
+	else
+            return NGX_OK;
+}
+#endif
+#endif
+
 
 void
 ngx_configure_listening_sockets(ngx_cycle_t *cycle)
@@ -719,6 +751,11 @@
 #if (NGX_HAVE_DEFERRED_ACCEPT && defined SO_ACCEPTFILTER)
     struct accept_filter_arg   af;
 #endif
+#if (NGX_HAVE_REUSEPORT)
+#if (NGX_HAVE_REUSEPORT_CBPF)
+        ngx_core_conf_t* ccf ;
+#endif
+#endif
 
     ls = cycle->listening.elts;
     for (i = 0; i < cycle->listening.nelts; i++) {
@@ -1011,6 +1048,31 @@
         }
 
 #endif
+#if (NGX_HAVE_REUSEPORT)
+#if (NGX_HAVE_REUSEPORT_CBPF)
+    if(ls[i].reuseport)
+    {
+        ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx,ngx_core_module);
+        if(ccf)
+        {
+            if( NGX_OK == attach_bpf(ls[i].fd, ccf->worker_processes) )
+            {
+                ngx_log_error(NGX_LOG_INFO,cycle->log ,ngx_socket_errno,\
+                      "bpf prog attached to fd:%d\n", ls[i].fd);
+            }
+            else
+            {
+                ngx_log_error(NGX_LOG_ERR,cycle->log ,ngx_socket_errno,\
+                      "failed to set SO_ATTACH_REUSEPORT_CBPF\n");
+            }
+        }
+        else
+           ngx_log_error(NGX_LOG_ERR,cycle->log ,ngx_socket_errno,\
+                      "can not get config, attach bpf failed\n");
+        
+    }
+#endif
+#endif
     }
 
     return;

-----Original Message-----
From: Liu, Qiao 
Sent: Tuesday, September 15, 2020 10:09 AM
To: nginx-devel at nginx.org
Subject: RE: [PATCH] Use BPF to distribute packet to different work thread.

Below is 5 times test result compare, 112 threads, 10000 connections, 1M object http request. Seems P99 have great improvement, and Max is also reduced



                                   AVG          Stdev            Max        P99
                  test 1      1.32s        447.09ms     5.48s      2.82s
BPF           test 2      1.39s        513.8ms       9.42s      3.1s
                  test 3      1.4s          341.38ms     5.63s      2.55s
                  test 4      1.41s        407.45ms     6.96s      2.77s
                  test 5      1.29s        644.81ms     9.45s      3.74s
                 Average  1.362s      470.906ms   7.388s    2.996s

NonBPF   test 1      1.48s         916.88ms     9.44s       5.08s
                 test 2      1.43s         658.48ms     9.54s       3.92s
                 test 3      1.41s         650.38ms     8.63s       3.59s
                 test 4      1.29s         1010ms        10s           5.21s
                 test 5      1.31s         875.01ms     9.53s       4.39s
             Average     1.384s        822.15ms    9.428s    4.438s


Thanks
LQ
-----Original Message-----
From: nginx-devel <nginx-devel-bounces at nginx.org> On Behalf Of Liu, Qiao
Sent: Monday, September 14, 2020 9:18 AM
To: nginx-devel at nginx.org
Subject: RE: [PATCH] Use BPF to distribute packet to different work thread.

Hi, Maxim Dounin:
Thanks for your reply, this server is random selected, we just do BPF and no-BPF test, I think the latency based on server configuration, not related with BPF patch, also the NIC of the server is Mellanox, not ADQ capable hardware , we will do more test Thanks LQ

-----Original Message-----
From: nginx-devel <nginx-devel-bounces at nginx.org> On Behalf Of Maxim Dounin
Sent: Monday, September 14, 2020 7:40 AM
To: nginx-devel at nginx.org
Subject: Re: [PATCH] Use BPF to distribute packet to different work thread.

Hello!

On Fri, Sep 11, 2020 at 05:41:47AM +0000, Liu, Qiao wrote:

> Hi, Vladimir Homutov:
> The below is our WRK test result output with BPF enable
> 
>   112 threads and 10000 connections
>   Thread Stats   Avg      Stdev     Max   +/- Stdev
>     Latency   608.23ms  820.71ms  10.00s    87.48%
>     Connect    16.52ms   54.53ms   1.99s    94.73%
>     Delay     153.13ms  182.17ms   2.00s    90.74%
>     Req/Sec   244.79    142.32     1.99k    68.40%
>   Latency Distribution
>   50.00%  293.50ms
>   75.00%  778.33ms
>   90.00%    1.61s
>   99.00%    3.71s
>   99.90%    7.03s
>   99.99%    8.94s
>   Connect Distribution
>   50.00%    1.93ms
>   75.00%    2.85ms
>   90.00%   55.76ms
>   99.00%  229.19ms
>   99.90%  656.79ms
>   99.99%    1.43s
>   Delay Distribution
>   50.00%  110.96ms
>   75.00%  193.67ms
>   90.00%  321.77ms
>   99.00%  959.27ms
>   99.90%    1.57s
>   99.99%    1.91s
> Compared with no BPF but enable reuseport as below
> 
> 112 threads and 10000 connections
>   Thread Stats   Avg      Stdev     Max   +/- Stdev
>     Latency   680.50ms  943.69ms  10.00s    87.18%
>     Connect    58.44ms  238.08ms   2.00s    94.58%
>     Delay     158.84ms  256.28ms   2.00s    90.92%
>     Req/Sec   244.51    151.00     1.41k    69.67%
>   Latency Distribution
>   50.00%  317.61ms
>   75.00%  913.52ms
>   90.00%    1.90s
>   99.00%    4.30s
>   99.90%    6.52s
>   99.99%    8.80s
>   Connect Distribution
>   50.00%    1.88ms
>   75.00%    2.21ms
>   90.00%   55.94ms
>   99.00%    1.45s
>   99.90%    1.95s
>   99.99%    2.00s
>   Delay Distribution
>   50.00%   73.01ms
>   75.00%  190.40ms
>   90.00%  387.01ms
>   99.00%    1.34s
>   99.90%    1.86s
>   99.99%    1.99s
> 
> 
> From the above results, there shows almost 20% percent latency 
> reduction. P99 latency of BPF is 3.71s , but without BPF is 4.3s.

Thank you for the results.

Given that latency stdev is way higher than the average latency, I don't think the "20% percent latency reduction" observed is statistically significant.  Please try running several tests and use ministat(1) to check the results.

Also, the latency values look very high, and request rate very low.  What's on the server side?

--
Maxim Dounin
http://mdounin.ru/
_______________________________________________
nginx-devel mailing list
nginx-devel at nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
_______________________________________________
nginx-devel mailing list
nginx-devel at nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel


More information about the nginx-devel mailing list