[PATCH 3 of 3] QUIC: path MTU discovery

Maxim Dounin mdounin at mdounin.ru
Mon May 1 20:39:59 UTC 2023


Hello!

On Mon, May 01, 2023 at 08:58:55PM +0400, Sergey Kandaurov wrote:

[...]

> It makes sense to further limit link MTU to system constraints.
> 
> BSD is known to have a system limitation for a maximum outgoing UDP
> datagram size set for some reason.  In all known distributions which
> derive from 4.3BSD-Reno, it defaults to 9216.
> For that, we can query the limit using sysctl.
> 
> Limited, this reduces the number of MTU probes.
> In particular on lo0, before the change:
> 
> - 16356
> + 8778
> - 12567
> - 10672
> - 9725
> - 9251
> + 9014
> 
> Legend: '-' is rejected with EMSGSIZE, '+' sent to network
> After the change, it is the only successful probe of maxdgram.
> 
> On real networks there will be different results,
> but the general pattern can be traced.
> 
> Aside from that, I noticed that we have split FreeBSD
> and Darwin tests/sources but didn't accommodate for that.
> 
> Below to address this.  The first one or two patches of three
> could go directly to the default branch if approved,
> the last one set upper bound for maxdgram where appropriate.
> 
> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682957319 -14400
> #      Mon May 01 20:08:39 2023 +0400
> # Branch quic
> # Node ID b4d35da933cc3df9a098feb6d06957b19e228c39
> # Parent  cc5d2e648dd4359d77c28120e6e02f9b5842024e
> Fixed Darwin support for the "net.inet.tcp.sendspace" sysctl.
> 
> After Darwin support was split in separate files in 345a014436d4 (0.7.7),
> sysctl variables received a separate prefix, but common sources were not
> updated.  In particular, the "net.inet.tcp.sendspace" sysctl value wasn't
> checked as a limit for the send_lowat directive and friends.
> 
> The change unifies a prefix for the "sendspace" variable in both FreeBSD
> and Darwin.  Other extern variables aren't touched: their usage is either
> limited to os-specific source files or they aren't used at all.
> 
> diff --git a/src/http/modules/ngx_http_fastcgi_module.c b/src/http/modules/ngx_http_fastcgi_module.c
> --- a/src/http/modules/ngx_http_fastcgi_module.c
> +++ b/src/http/modules/ngx_http_fastcgi_module.c
> @@ -3905,14 +3905,14 @@ ngx_http_fastcgi_cache_key(ngx_conf_t *c
>  static char *
>  ngx_http_fastcgi_lowat_check(ngx_conf_t *cf, void *post, void *data)
>  {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
>      ssize_t *np = data;
>  
> -    if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> +    if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
>          ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
>                             "\"fastcgi_send_lowat\" must be less than %d "
>                             "(sysctl net.inet.tcp.sendspace)",
> -                           ngx_freebsd_net_inet_tcp_sendspace);
> +                           ngx_net_inet_tcp_sendspace);
>  
>          return NGX_CONF_ERROR;
>      }
> diff --git a/src/http/modules/ngx_http_proxy_module.c b/src/http/modules/ngx_http_proxy_module.c
> --- a/src/http/modules/ngx_http_proxy_module.c
> +++ b/src/http/modules/ngx_http_proxy_module.c
> @@ -4890,14 +4890,14 @@ ngx_http_proxy_ssl_password_file(ngx_con
>  static char *
>  ngx_http_proxy_lowat_check(ngx_conf_t *cf, void *post, void *data)
>  {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
>      ssize_t *np = data;
>  
> -    if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> +    if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
>          ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
>                             "\"proxy_send_lowat\" must be less than %d "
>                             "(sysctl net.inet.tcp.sendspace)",
> -                           ngx_freebsd_net_inet_tcp_sendspace);
> +                           ngx_net_inet_tcp_sendspace);
>  
>          return NGX_CONF_ERROR;
>      }
> diff --git a/src/http/ngx_http_core_module.c b/src/http/ngx_http_core_module.c
> --- a/src/http/ngx_http_core_module.c
> +++ b/src/http/ngx_http_core_module.c
> @@ -5288,14 +5288,14 @@ ngx_http_disable_symlinks(ngx_conf_t *cf
>  static char *
>  ngx_http_core_lowat_check(ngx_conf_t *cf, void *post, void *data)
>  {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
>      ssize_t *np = data;
>  
> -    if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> +    if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
>          ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
>                             "\"send_lowat\" must be less than %d "
>                             "(sysctl net.inet.tcp.sendspace)",
> -                           ngx_freebsd_net_inet_tcp_sendspace);
> +                           ngx_net_inet_tcp_sendspace);
>  
>          return NGX_CONF_ERROR;
>      }
> diff --git a/src/os/unix/ngx_darwin.h b/src/os/unix/ngx_darwin.h
> --- a/src/os/unix/ngx_darwin.h
> +++ b/src/os/unix/ngx_darwin.h
> @@ -15,7 +15,7 @@ ngx_chain_t *ngx_darwin_sendfile_chain(n
>  
>  extern int       ngx_darwin_kern_osreldate;
>  extern int       ngx_darwin_hw_ncpu;
> -extern u_long    ngx_darwin_net_inet_tcp_sendspace;
> +extern u_long    ngx_net_inet_tcp_sendspace;
>  
>  extern ngx_uint_t  ngx_debug_malloc;
>  
> diff --git a/src/os/unix/ngx_darwin_init.c b/src/os/unix/ngx_darwin_init.c
> --- a/src/os/unix/ngx_darwin_init.c
> +++ b/src/os/unix/ngx_darwin_init.c
> @@ -13,7 +13,7 @@ char    ngx_darwin_kern_ostype[16];
>  char    ngx_darwin_kern_osrelease[128];
>  int     ngx_darwin_hw_ncpu;
>  int     ngx_darwin_kern_ipc_somaxconn;
> -u_long  ngx_darwin_net_inet_tcp_sendspace;
> +u_long  ngx_net_inet_tcp_sendspace;
>  
>  ngx_uint_t  ngx_debug_malloc;
>  
> @@ -49,8 +49,8 @@ sysctl_t sysctls[] = {
>        sizeof(ngx_darwin_hw_ncpu), 0 },
>  
>      { "net.inet.tcp.sendspace",
> -      &ngx_darwin_net_inet_tcp_sendspace,
> -      sizeof(ngx_darwin_net_inet_tcp_sendspace), 0 },
> +      &ngx_net_inet_tcp_sendspace,
> +      sizeof(ngx_net_inet_tcp_sendspace), 0 },
>  
>      { "kern.ipc.somaxconn",
>        &ngx_darwin_kern_ipc_somaxconn,
> diff --git a/src/os/unix/ngx_freebsd.h b/src/os/unix/ngx_freebsd.h
> --- a/src/os/unix/ngx_freebsd.h
> +++ b/src/os/unix/ngx_freebsd.h
> @@ -15,7 +15,7 @@ ngx_chain_t *ngx_freebsd_sendfile_chain(
>  
>  extern int         ngx_freebsd_kern_osreldate;
>  extern int         ngx_freebsd_hw_ncpu;
> -extern u_long      ngx_freebsd_net_inet_tcp_sendspace;
> +extern u_long      ngx_net_inet_tcp_sendspace;
>  
>  extern ngx_uint_t  ngx_freebsd_sendfile_nbytes_bug;
>  extern ngx_uint_t  ngx_freebsd_use_tcp_nopush;
> diff --git a/src/os/unix/ngx_freebsd_init.c b/src/os/unix/ngx_freebsd_init.c
> --- a/src/os/unix/ngx_freebsd_init.c
> +++ b/src/os/unix/ngx_freebsd_init.c
> @@ -15,7 +15,7 @@ char    ngx_freebsd_kern_osrelease[128];
>  int     ngx_freebsd_kern_osreldate;
>  int     ngx_freebsd_hw_ncpu;
>  int     ngx_freebsd_kern_ipc_somaxconn;
> -u_long  ngx_freebsd_net_inet_tcp_sendspace;
> +u_long  ngx_net_inet_tcp_sendspace;
>  
>  /* FreeBSD 4.9 */
>  int     ngx_freebsd_machdep_hlt_logical_cpus;
> @@ -62,8 +62,8 @@ sysctl_t sysctls[] = {
>        sizeof(ngx_freebsd_machdep_hlt_logical_cpus), 0 },
>  
>      { "net.inet.tcp.sendspace",
> -      &ngx_freebsd_net_inet_tcp_sendspace,
> -      sizeof(ngx_freebsd_net_inet_tcp_sendspace), 0 },
> +      &ngx_net_inet_tcp_sendspace,
> +      sizeof(ngx_net_inet_tcp_sendspace), 0 },
>  
>      { "kern.ipc.somaxconn",
>        &ngx_freebsd_kern_ipc_somaxconn,

I don't think it worth the effort, since Darwin / macOS isn't used 
as a server OS, and optimizing behaviour for it isn't really 
important.

But if at all, it should be kept with appropriate 
ngx_freebsd_/ngx_darwin_ prefixes, and either tested separately, 
or applied to a generic ngx_net_inet_tcp_sendspace variable kept 
in ngx_os.h, similarly to how ngx_ncpu and 
ngx_tcp_nodelay_and_tcp_nopush are handled.

> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682957408 -14400
> #      Mon May 01 20:10:08 2023 +0400
> # Branch quic
> # Node ID 976e2e40f0f58a5bbcd89b76e24909be0c8d337d
> # Parent  b4d35da933cc3df9a098feb6d06957b19e228c39
> Introduced the "net.inet.udp.maxdgram" sysctl variable.
> 
> diff --git a/src/os/unix/ngx_darwin.h b/src/os/unix/ngx_darwin.h
> --- a/src/os/unix/ngx_darwin.h
> +++ b/src/os/unix/ngx_darwin.h
> @@ -16,6 +16,7 @@ ngx_chain_t *ngx_darwin_sendfile_chain(n
>  extern int       ngx_darwin_kern_osreldate;
>  extern int       ngx_darwin_hw_ncpu;
>  extern u_long    ngx_net_inet_tcp_sendspace;
> +extern u_long    ngx_net_inet_udp_maxdgram;
>  
>  extern ngx_uint_t  ngx_debug_malloc;
>  
> diff --git a/src/os/unix/ngx_darwin_init.c b/src/os/unix/ngx_darwin_init.c
> --- a/src/os/unix/ngx_darwin_init.c
> +++ b/src/os/unix/ngx_darwin_init.c
> @@ -14,6 +14,7 @@ char    ngx_darwin_kern_osrelease[128];
>  int     ngx_darwin_hw_ncpu;
>  int     ngx_darwin_kern_ipc_somaxconn;
>  u_long  ngx_net_inet_tcp_sendspace;
> +u_long  ngx_net_inet_udp_maxdgram;
>  
>  ngx_uint_t  ngx_debug_malloc;
>  
> @@ -52,6 +53,10 @@ sysctl_t sysctls[] = {
>        &ngx_net_inet_tcp_sendspace,
>        sizeof(ngx_net_inet_tcp_sendspace), 0 },
>  
> +    { "net.inet.udp.maxdgram",
> +      &ngx_net_inet_udp_maxdgram,
> +      sizeof(ngx_net_inet_udp_maxdgram), 0 },
> +
>      { "kern.ipc.somaxconn",
>        &ngx_darwin_kern_ipc_somaxconn,
>        sizeof(ngx_darwin_kern_ipc_somaxconn), 0 },
> diff --git a/src/os/unix/ngx_freebsd.h b/src/os/unix/ngx_freebsd.h
> --- a/src/os/unix/ngx_freebsd.h
> +++ b/src/os/unix/ngx_freebsd.h
> @@ -16,6 +16,7 @@ ngx_chain_t *ngx_freebsd_sendfile_chain(
>  extern int         ngx_freebsd_kern_osreldate;
>  extern int         ngx_freebsd_hw_ncpu;
>  extern u_long      ngx_net_inet_tcp_sendspace;
> +extern u_long      ngx_net_inet_udp_maxdgram;
>  
>  extern ngx_uint_t  ngx_freebsd_sendfile_nbytes_bug;
>  extern ngx_uint_t  ngx_freebsd_use_tcp_nopush;
> diff --git a/src/os/unix/ngx_freebsd_init.c b/src/os/unix/ngx_freebsd_init.c
> --- a/src/os/unix/ngx_freebsd_init.c
> +++ b/src/os/unix/ngx_freebsd_init.c
> @@ -16,6 +16,7 @@ int     ngx_freebsd_kern_osreldate;
>  int     ngx_freebsd_hw_ncpu;
>  int     ngx_freebsd_kern_ipc_somaxconn;
>  u_long  ngx_net_inet_tcp_sendspace;
> +u_long  ngx_net_inet_udp_maxdgram;
>  
>  /* FreeBSD 4.9 */
>  int     ngx_freebsd_machdep_hlt_logical_cpus;
> @@ -65,6 +66,10 @@ sysctl_t sysctls[] = {
>        &ngx_net_inet_tcp_sendspace,
>        sizeof(ngx_net_inet_tcp_sendspace), 0 },
>  
> +    { "net.inet.udp.maxdgram",
> +      &ngx_net_inet_udp_maxdgram,
> +      sizeof(ngx_net_inet_udp_maxdgram), 0 },
> +
>      { "kern.ipc.somaxconn",
>        &ngx_freebsd_kern_ipc_somaxconn,
>        sizeof(ngx_freebsd_kern_ipc_somaxconn), 0 },

Same here (though this night be slightly more meaningful).

> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682959469 -14400
> #      Mon May 01 20:44:29 2023 +0400
> # Branch quic
> # Node ID 4f32cf8fe9241fe002701d3bf2dd38ea358c0b5d
> # Parent  976e2e40f0f58a5bbcd89b76e24909be0c8d337d
> QUIC: limited link MTU upper bound to maxdgram.
> 
> diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
> --- a/src/event/quic/ngx_event_quic.c
> +++ b/src/event/quic/ngx_event_quic.c
> @@ -434,6 +434,10 @@ ngx_quic_get_local_mtu(ngx_connection_t 
>  #endif
>          }
>  
> +#if (NGX_FREEBSD || NGX_DARWIN)
> +        mtu = ngx_min(mtu, ngx_net_inet_udp_maxdgram);
> +#endif
> +
>          ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
>                         "quic local mtu:%uz", mtu);
>  

Note that with a generic ngx_os.h variable this won't require 
conditional compilation.

Also, it might be a good idea to reduce the variable on EMSGSIZE 
errors: this will make handling close to optimal even on OSes 
where we don't use net.inet.udp.maxdgram sysctl.

[...]

-- 
Maxim Dounin
http://mdounin.ru/


More information about the nginx-devel mailing list