[PATCH] Use BPF to distribute packet to different work thread.

Liu, Qiao qiao.liu at intel.com
Fri Sep 11 05:41:47 UTC 2020


Hi, Vladimir Homutov:
The below is our WRK test result output with BPF enable 

  112 threads and 10000 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   608.23ms  820.71ms  10.00s    87.48%
    Connect    16.52ms   54.53ms   1.99s    94.73%
    Delay     153.13ms  182.17ms   2.00s    90.74%
    Req/Sec   244.79    142.32     1.99k    68.40%
  Latency Distribution
  50.00%  293.50ms
  75.00%  778.33ms
  90.00%    1.61s
  99.00%    3.71s
  99.90%    7.03s
  99.99%    8.94s
  Connect Distribution
  50.00%    1.93ms
  75.00%    2.85ms
  90.00%   55.76ms
  99.00%  229.19ms
  99.90%  656.79ms
  99.99%    1.43s
  Delay Distribution
  50.00%  110.96ms
  75.00%  193.67ms
  90.00%  321.77ms
  99.00%  959.27ms
  99.90%    1.57s
  99.99%    1.91s
Compared with no BPF but enable reuseport as below

112 threads and 10000 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   680.50ms  943.69ms  10.00s    87.18%
    Connect    58.44ms  238.08ms   2.00s    94.58%
    Delay     158.84ms  256.28ms   2.00s    90.92%
    Req/Sec   244.51    151.00     1.41k    69.67%
  Latency Distribution
  50.00%  317.61ms
  75.00%  913.52ms
  90.00%    1.90s
  99.00%    4.30s
  99.90%    6.52s
  99.99%    8.80s
  Connect Distribution
  50.00%    1.88ms
  75.00%    2.21ms
  90.00%   55.94ms
  99.00%    1.45s
  99.90%    1.95s
  99.99%    2.00s
  Delay Distribution
  50.00%   73.01ms
  75.00%  190.40ms
  90.00%  387.01ms
  99.00%    1.34s
  99.90%    1.86s
  99.99%    1.99s


>From the above results, there shows almost 20% percent latency reduction. P99 latency of BPF is 3.71s , but without BPF is 4.3s.

For question2 why not bind queues to cores, as what we think ,bind to thread is different from bind to core. Bind to threads is user space work to distribute the load, bind to core is NIC hardware level. 
We do have ADQ for NIC, that will do a lot bind to cores things
Thanks
LQ
-----Original Message-----
From: nginx-devel <nginx-devel-bounces at nginx.org> On Behalf Of Vladimir Homutov
Sent: Thursday, September 10, 2020 6:12 PM
To: nginx-devel at nginx.org
Subject: Re: [PATCH] Use BPF to distribute packet to different work thread.

On Thu, Sep 10, 2020 at 05:22:12AM +0000, Liu, Qiao wrote:
> # HG changeset patch
> # User Liu Qiao <qiao.liu at intel.com>
> # Date 1599735293 14400
> #      Thu Sep 10 06:54:53 2020 -0400
> # Node ID f79d524a2cc0093c53490f947564e42371cf944f
> # Parent  da5e3f5b16733167142b599b6af3ce9469a07d52
> Use BPF to distribute packet to different work thread.
> Use Berkeley Packet Filter to get packet queue_mapping number, and use 
> this queue_mapping number to distribute the packet to different work 
> thread, this will improve CPU utilization and http latency.
> Author: Samudrala, Sridhar <sridhar.samudrala at intel.com>

Do you have any real measurements with proper testing methodology?

Also, why not bind NIC queues to cores system-wide?

>
> diff -r da5e3f5b1673 -r f79d524a2cc0 auto/os/linux
> --- a/auto/os/linux           Wed Sep 02 23:13:36 2020 +0300
> +++ b/auto/os/linux        Thu Sep 10 06:54:53 2020 -0400
> @@ -32,6 +32,10 @@
>      have=NGX_HAVE_POSIX_FADVISE . auto/nohave fi
> +if [ $version -lt 263680 ]; then
> +    have=NGX_HAVE_REUSEPORT_CBPF . auto/nohave fi
> +
> # epoll, EPOLLET version
>  ngx_feature="epoll"
> diff -r da5e3f5b1673 -r f79d524a2cc0 auto/unix
> --- a/auto/unix   Wed Sep 02 23:13:36 2020 +0300
> +++ b/auto/unix               Thu Sep 10 06:54:53 2020 -0400
> @@ -331,6 +331,17 @@
> ngx_feature_test="setsockopt(0, SOL_SOCKET, SO_REUSEPORT, NULL, 0)"
> . auto/feature
> +ngx_feature="SO_REUSEPORT_CBPF"
> +ngx_feature_name="NGX_HAVE_REUSEPORT_CBPF"
> +ngx_feature_run=no
> +ngx_feature_incs="#include <sys/socket.h>
> +                  #include <linux/filter.h>
> +                  #include <error.h>"
> +ngx_feature_path=
> +ngx_feature_libs=
> +ngx_feature_test="setsockopt(0, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, NULL, 0)"
> +. auto/feature
> +
>  ngx_feature="SO_ACCEPTFILTER"
> ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
> diff -r da5e3f5b1673 -r f79d524a2cc0 src/core/ngx_connection.c
> --- a/src/core/ngx_connection.c Wed Sep 02 23:13:36 2020 +0300
> +++ b/src/core/ngx_connection.c              Thu Sep 10 06:54:53 2020 -0400
> @@ -8,7 +8,10 @@
> #include <ngx_config.h>
> #include <ngx_core.h>
> #include <ngx_event.h>
> -
> +#if (NGX_HAVE_REUSEPORT_CBPF)
> +#include <linux/filter.h>
> +#include <error.h>
> +#endif
>  ngx_os_io_t  ngx_io;
> @@ -708,6 +711,35 @@
>      return NGX_OK;
> }
> +#if(NGX_HAVE_REUSEPORT)
> +#if(NGX_HAVE_REUSEPORT_CBPF)
> +#ifndef ARRAY_SIZE
> +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif
> +
> +static void attach_bpf(int fd, uint16_t n) {
> +             struct sock_filter code[] = {
> +                            /* A = skb->queue_mapping */
> +                            { BPF_LD  | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_QUEUE },
> +                            /* A = A % n */
> +                            { BPF_ALU | BPF_MOD, 0, 0, n },
> +                            /* return A */
> +                            { BPF_RET | BPF_A, 0, 0, 0 },
> +             };
> +             struct sock_fprog p = {
> +                            .len = ARRAY_SIZE(code),
> +                            .filter = code,
> +             };
> +
> +             if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
> +                            printf("failed to set SO_ATTACH_REUSEPORT_CBPF");
> +             else
> +                            printf("bpf prog attached to fd:%d\n", 
> +fd); } #endif #endif
> +
>  void
> ngx_configure_listening_sockets(ngx_cycle_t *cycle) @@ -719,6 +751,11 
> @@ #if (NGX_HAVE_DEFERRED_ACCEPT && defined SO_ACCEPTFILTER)
>      struct accept_filter_arg   af;
> #endif
> +#if (NGX_HAVE_REUSEPORT)
> +#if (NGX_HAVE_REUSEPORT_CBPF)
> +        ngx_core_conf_t* ccf ;
> +#endif
> +#endif
>      ls = cycle->listening.elts;
>      for (i = 0; i < cycle->listening.nelts; i++) { @@ -1011,6 
> +1048,16 @@
>          }
>  #endif
> +#if (NGX_HAVE_REUSEPORT)
> +#if (NGX_HAVE_REUSEPORT_CBPF)
> +    if(ls[i].reuseport)
> +    {
> +        ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx,ngx_core_module);
> +        if(ccf)
> +            attach_bpf(ls[i].fd, ccf->worker_processes);
> +    }
> +#endif
> +#endif
>      }
>      return;
>

> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel

_______________________________________________
nginx-devel mailing list
nginx-devel at nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel


More information about the nginx-devel mailing list