[PATCH 3 of 3] QUIC: path MTU discovery
Maxim Dounin
mdounin at mdounin.ru
Mon May 1 20:39:59 UTC 2023
Hello!
On Mon, May 01, 2023 at 08:58:55PM +0400, Sergey Kandaurov wrote:
[...]
> It makes sense to further limit link MTU to system constraints.
>
> BSD is known to have a system limitation for a maximum outgoing UDP
> datagram size set for some reason. In all known distributions which
> derive from 4.3BSD-Reno, it defaults to 9216.
> For that, we can query the limit using sysctl.
>
> Limited, this reduces the number of MTU probes.
> In particular on lo0, before the change:
>
> - 16356
> + 8778
> - 12567
> - 10672
> - 9725
> - 9251
> + 9014
>
> Legend: '-' is rejected with EMSGSIZE, '+' sent to network
> After the change, it is the only successful probe of maxdgram.
>
> On real networks there will be different results,
> but the general pattern can be traced.
>
> Aside from that, I noticed that we have split FreeBSD
> and Darwin tests/sources but didn't accommodate for that.
>
> Below to address this. The first one or two patches of three
> could go directly to the default branch if approved,
> the last one set upper bound for maxdgram where appropriate.
>
> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682957319 -14400
> # Mon May 01 20:08:39 2023 +0400
> # Branch quic
> # Node ID b4d35da933cc3df9a098feb6d06957b19e228c39
> # Parent cc5d2e648dd4359d77c28120e6e02f9b5842024e
> Fixed Darwin support for the "net.inet.tcp.sendspace" sysctl.
>
> After Darwin support was split in separate files in 345a014436d4 (0.7.7),
> sysctl variables received a separate prefix, but common sources were not
> updated. In particular, the "net.inet.tcp.sendspace" sysctl value wasn't
> checked as a limit for the send_lowat directive and friends.
>
> The change unifies a prefix for the "sendspace" variable in both FreeBSD
> and Darwin. Other extern variables aren't touched: their usage is either
> limited to os-specific source files or they aren't used at all.
>
> diff --git a/src/http/modules/ngx_http_fastcgi_module.c b/src/http/modules/ngx_http_fastcgi_module.c
> --- a/src/http/modules/ngx_http_fastcgi_module.c
> +++ b/src/http/modules/ngx_http_fastcgi_module.c
> @@ -3905,14 +3905,14 @@ ngx_http_fastcgi_cache_key(ngx_conf_t *c
> static char *
> ngx_http_fastcgi_lowat_check(ngx_conf_t *cf, void *post, void *data)
> {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
> ssize_t *np = data;
>
> - if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> + if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
> ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
> "\"fastcgi_send_lowat\" must be less than %d "
> "(sysctl net.inet.tcp.sendspace)",
> - ngx_freebsd_net_inet_tcp_sendspace);
> + ngx_net_inet_tcp_sendspace);
>
> return NGX_CONF_ERROR;
> }
> diff --git a/src/http/modules/ngx_http_proxy_module.c b/src/http/modules/ngx_http_proxy_module.c
> --- a/src/http/modules/ngx_http_proxy_module.c
> +++ b/src/http/modules/ngx_http_proxy_module.c
> @@ -4890,14 +4890,14 @@ ngx_http_proxy_ssl_password_file(ngx_con
> static char *
> ngx_http_proxy_lowat_check(ngx_conf_t *cf, void *post, void *data)
> {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
> ssize_t *np = data;
>
> - if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> + if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
> ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
> "\"proxy_send_lowat\" must be less than %d "
> "(sysctl net.inet.tcp.sendspace)",
> - ngx_freebsd_net_inet_tcp_sendspace);
> + ngx_net_inet_tcp_sendspace);
>
> return NGX_CONF_ERROR;
> }
> diff --git a/src/http/ngx_http_core_module.c b/src/http/ngx_http_core_module.c
> --- a/src/http/ngx_http_core_module.c
> +++ b/src/http/ngx_http_core_module.c
> @@ -5288,14 +5288,14 @@ ngx_http_disable_symlinks(ngx_conf_t *cf
> static char *
> ngx_http_core_lowat_check(ngx_conf_t *cf, void *post, void *data)
> {
> -#if (NGX_FREEBSD)
> +#if (NGX_FREEBSD || NGX_DARWIN)
> ssize_t *np = data;
>
> - if ((u_long) *np >= ngx_freebsd_net_inet_tcp_sendspace) {
> + if ((u_long) *np >= ngx_net_inet_tcp_sendspace) {
> ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
> "\"send_lowat\" must be less than %d "
> "(sysctl net.inet.tcp.sendspace)",
> - ngx_freebsd_net_inet_tcp_sendspace);
> + ngx_net_inet_tcp_sendspace);
>
> return NGX_CONF_ERROR;
> }
> diff --git a/src/os/unix/ngx_darwin.h b/src/os/unix/ngx_darwin.h
> --- a/src/os/unix/ngx_darwin.h
> +++ b/src/os/unix/ngx_darwin.h
> @@ -15,7 +15,7 @@ ngx_chain_t *ngx_darwin_sendfile_chain(n
>
> extern int ngx_darwin_kern_osreldate;
> extern int ngx_darwin_hw_ncpu;
> -extern u_long ngx_darwin_net_inet_tcp_sendspace;
> +extern u_long ngx_net_inet_tcp_sendspace;
>
> extern ngx_uint_t ngx_debug_malloc;
>
> diff --git a/src/os/unix/ngx_darwin_init.c b/src/os/unix/ngx_darwin_init.c
> --- a/src/os/unix/ngx_darwin_init.c
> +++ b/src/os/unix/ngx_darwin_init.c
> @@ -13,7 +13,7 @@ char ngx_darwin_kern_ostype[16];
> char ngx_darwin_kern_osrelease[128];
> int ngx_darwin_hw_ncpu;
> int ngx_darwin_kern_ipc_somaxconn;
> -u_long ngx_darwin_net_inet_tcp_sendspace;
> +u_long ngx_net_inet_tcp_sendspace;
>
> ngx_uint_t ngx_debug_malloc;
>
> @@ -49,8 +49,8 @@ sysctl_t sysctls[] = {
> sizeof(ngx_darwin_hw_ncpu), 0 },
>
> { "net.inet.tcp.sendspace",
> - &ngx_darwin_net_inet_tcp_sendspace,
> - sizeof(ngx_darwin_net_inet_tcp_sendspace), 0 },
> + &ngx_net_inet_tcp_sendspace,
> + sizeof(ngx_net_inet_tcp_sendspace), 0 },
>
> { "kern.ipc.somaxconn",
> &ngx_darwin_kern_ipc_somaxconn,
> diff --git a/src/os/unix/ngx_freebsd.h b/src/os/unix/ngx_freebsd.h
> --- a/src/os/unix/ngx_freebsd.h
> +++ b/src/os/unix/ngx_freebsd.h
> @@ -15,7 +15,7 @@ ngx_chain_t *ngx_freebsd_sendfile_chain(
>
> extern int ngx_freebsd_kern_osreldate;
> extern int ngx_freebsd_hw_ncpu;
> -extern u_long ngx_freebsd_net_inet_tcp_sendspace;
> +extern u_long ngx_net_inet_tcp_sendspace;
>
> extern ngx_uint_t ngx_freebsd_sendfile_nbytes_bug;
> extern ngx_uint_t ngx_freebsd_use_tcp_nopush;
> diff --git a/src/os/unix/ngx_freebsd_init.c b/src/os/unix/ngx_freebsd_init.c
> --- a/src/os/unix/ngx_freebsd_init.c
> +++ b/src/os/unix/ngx_freebsd_init.c
> @@ -15,7 +15,7 @@ char ngx_freebsd_kern_osrelease[128];
> int ngx_freebsd_kern_osreldate;
> int ngx_freebsd_hw_ncpu;
> int ngx_freebsd_kern_ipc_somaxconn;
> -u_long ngx_freebsd_net_inet_tcp_sendspace;
> +u_long ngx_net_inet_tcp_sendspace;
>
> /* FreeBSD 4.9 */
> int ngx_freebsd_machdep_hlt_logical_cpus;
> @@ -62,8 +62,8 @@ sysctl_t sysctls[] = {
> sizeof(ngx_freebsd_machdep_hlt_logical_cpus), 0 },
>
> { "net.inet.tcp.sendspace",
> - &ngx_freebsd_net_inet_tcp_sendspace,
> - sizeof(ngx_freebsd_net_inet_tcp_sendspace), 0 },
> + &ngx_net_inet_tcp_sendspace,
> + sizeof(ngx_net_inet_tcp_sendspace), 0 },
>
> { "kern.ipc.somaxconn",
> &ngx_freebsd_kern_ipc_somaxconn,
I don't think it worth the effort, since Darwin / macOS isn't used
as a server OS, and optimizing behaviour for it isn't really
important.
But if at all, it should be kept with appropriate
ngx_freebsd_/ngx_darwin_ prefixes, and either tested separately,
or applied to a generic ngx_net_inet_tcp_sendspace variable kept
in ngx_os.h, similarly to how ngx_ncpu and
ngx_tcp_nodelay_and_tcp_nopush are handled.
> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682957408 -14400
> # Mon May 01 20:10:08 2023 +0400
> # Branch quic
> # Node ID 976e2e40f0f58a5bbcd89b76e24909be0c8d337d
> # Parent b4d35da933cc3df9a098feb6d06957b19e228c39
> Introduced the "net.inet.udp.maxdgram" sysctl variable.
>
> diff --git a/src/os/unix/ngx_darwin.h b/src/os/unix/ngx_darwin.h
> --- a/src/os/unix/ngx_darwin.h
> +++ b/src/os/unix/ngx_darwin.h
> @@ -16,6 +16,7 @@ ngx_chain_t *ngx_darwin_sendfile_chain(n
> extern int ngx_darwin_kern_osreldate;
> extern int ngx_darwin_hw_ncpu;
> extern u_long ngx_net_inet_tcp_sendspace;
> +extern u_long ngx_net_inet_udp_maxdgram;
>
> extern ngx_uint_t ngx_debug_malloc;
>
> diff --git a/src/os/unix/ngx_darwin_init.c b/src/os/unix/ngx_darwin_init.c
> --- a/src/os/unix/ngx_darwin_init.c
> +++ b/src/os/unix/ngx_darwin_init.c
> @@ -14,6 +14,7 @@ char ngx_darwin_kern_osrelease[128];
> int ngx_darwin_hw_ncpu;
> int ngx_darwin_kern_ipc_somaxconn;
> u_long ngx_net_inet_tcp_sendspace;
> +u_long ngx_net_inet_udp_maxdgram;
>
> ngx_uint_t ngx_debug_malloc;
>
> @@ -52,6 +53,10 @@ sysctl_t sysctls[] = {
> &ngx_net_inet_tcp_sendspace,
> sizeof(ngx_net_inet_tcp_sendspace), 0 },
>
> + { "net.inet.udp.maxdgram",
> + &ngx_net_inet_udp_maxdgram,
> + sizeof(ngx_net_inet_udp_maxdgram), 0 },
> +
> { "kern.ipc.somaxconn",
> &ngx_darwin_kern_ipc_somaxconn,
> sizeof(ngx_darwin_kern_ipc_somaxconn), 0 },
> diff --git a/src/os/unix/ngx_freebsd.h b/src/os/unix/ngx_freebsd.h
> --- a/src/os/unix/ngx_freebsd.h
> +++ b/src/os/unix/ngx_freebsd.h
> @@ -16,6 +16,7 @@ ngx_chain_t *ngx_freebsd_sendfile_chain(
> extern int ngx_freebsd_kern_osreldate;
> extern int ngx_freebsd_hw_ncpu;
> extern u_long ngx_net_inet_tcp_sendspace;
> +extern u_long ngx_net_inet_udp_maxdgram;
>
> extern ngx_uint_t ngx_freebsd_sendfile_nbytes_bug;
> extern ngx_uint_t ngx_freebsd_use_tcp_nopush;
> diff --git a/src/os/unix/ngx_freebsd_init.c b/src/os/unix/ngx_freebsd_init.c
> --- a/src/os/unix/ngx_freebsd_init.c
> +++ b/src/os/unix/ngx_freebsd_init.c
> @@ -16,6 +16,7 @@ int ngx_freebsd_kern_osreldate;
> int ngx_freebsd_hw_ncpu;
> int ngx_freebsd_kern_ipc_somaxconn;
> u_long ngx_net_inet_tcp_sendspace;
> +u_long ngx_net_inet_udp_maxdgram;
>
> /* FreeBSD 4.9 */
> int ngx_freebsd_machdep_hlt_logical_cpus;
> @@ -65,6 +66,10 @@ sysctl_t sysctls[] = {
> &ngx_net_inet_tcp_sendspace,
> sizeof(ngx_net_inet_tcp_sendspace), 0 },
>
> + { "net.inet.udp.maxdgram",
> + &ngx_net_inet_udp_maxdgram,
> + sizeof(ngx_net_inet_udp_maxdgram), 0 },
> +
> { "kern.ipc.somaxconn",
> &ngx_freebsd_kern_ipc_somaxconn,
> sizeof(ngx_freebsd_kern_ipc_somaxconn), 0 },
Same here (though this night be slightly more meaningful).
> # HG changeset patch
> # User Sergey Kandaurov <pluknet at nginx.com>
> # Date 1682959469 -14400
> # Mon May 01 20:44:29 2023 +0400
> # Branch quic
> # Node ID 4f32cf8fe9241fe002701d3bf2dd38ea358c0b5d
> # Parent 976e2e40f0f58a5bbcd89b76e24909be0c8d337d
> QUIC: limited link MTU upper bound to maxdgram.
>
> diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
> --- a/src/event/quic/ngx_event_quic.c
> +++ b/src/event/quic/ngx_event_quic.c
> @@ -434,6 +434,10 @@ ngx_quic_get_local_mtu(ngx_connection_t
> #endif
> }
>
> +#if (NGX_FREEBSD || NGX_DARWIN)
> + mtu = ngx_min(mtu, ngx_net_inet_udp_maxdgram);
> +#endif
> +
> ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
> "quic local mtu:%uz", mtu);
>
Note that with a generic ngx_os.h variable this won't require
conditional compilation.
Also, it might be a good idea to reduce the variable on EMSGSIZE
errors: this will make handling close to optimal even on OSes
where we don't use net.inet.udp.maxdgram sysctl.
[...]
--
Maxim Dounin
http://mdounin.ru/
More information about the nginx-devel
mailing list