[PATCH 3 of 3] QUIC: path MTU discovery
Roman Arutyunyan
arut at nginx.com
Mon May 8 12:15:49 UTC 2023
Hi,
On Mon, May 01, 2023 at 08:58:55PM +0400, Sergey Kandaurov wrote:
>
> > On 28 Mar 2023, at 18:51, Roman Arutyunyan <arut at nginx.com> wrote:
> >
> > # HG changeset patch
> > # User Roman Arutyunyan <arut at nginx.com>
> > # Date 1679993500 -14400
> > # Tue Mar 28 12:51:40 2023 +0400
> > # Branch quic
> > # Node ID 13d43a278510f131101c7b19d87455a0171ebe2f
> > # Parent c686c97f4abd6e1ca9a2cc2324d5a24f3d035c58
> > QUIC: path MTU discovery.
> >
> > MTU selection starts by probing the maximum allowed MTU first. After that,
> > binary search is used to find the path MTU.
> >
> > Maximum allowed MTU is calculated as the minimum of max_udp_payload for client
> > and server, and local interface MTU.
> >
> > diff --git a/auto/unix b/auto/unix
> > --- a/auto/unix
> > +++ b/auto/unix
> > @@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
> > . auto/feature
> >
> >
> > +# IP packet fragmentation flags
> > +
> > +ngx_feature="IP_DONTFRAG"
> > +ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > + #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +ngx_feature="IPV6_DONTFRAG"
> > +ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > + #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_DONTFRAG, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +# Linux MTU flags
> > +
> > +ngx_feature="IP_PMTUDISC_DO"
> > +ngx_feature_name="NGX_HAVE_IP_PMTUDISC_DO"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > + #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_PMTUDISC_DO, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +ngx_feature="IPV6_PMTUDISC_DO"
> > +ngx_feature_name="NGX_HAVE_IPV6_PMTUDISC_DO"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > + #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_PMTUDISC_DO, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > ngx_feature="TCP_DEFER_ACCEPT"
> > ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
> > ngx_feature_run=no
> > @@ -920,6 +968,19 @@ ngx_feature_test="int i = FIONREAD; prin
> > . auto/feature
> >
> >
> > +ngx_feature="ioctl(SIOCGIFMTU)"
> > +ngx_feature_name="NGX_HAVE_SIOCGIFMTU"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/ioctl.h>
> > + #include <stdio.h>
> > + #include <net/if.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="int i = SIOCGIFMTU; struct ifreq ifr;
> > + ifr.ifr_name[0] = 'e'; printf(\"%d\", i)"
> > +. auto/feature
> > +
> > +
> > ngx_feature="struct tm.tm_gmtoff"
> > ngx_feature_name="NGX_HAVE_GMTOFF"
> > ngx_feature_run=no
> > @@ -1002,3 +1063,17 @@ ngx_feature_test='struct addrinfo *res;
> > if (getaddrinfo("localhost", NULL, NULL, &res) != 0) return 1;
> > freeaddrinfo(res)'
> > . auto/feature
> > +
> > +
> > +ngx_feature="getifaddrs()"
> > +ngx_feature_name="NGX_HAVE_GETIFADDRS"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/types.h>
> > + #include <sys/socket.h>
> > + #include <ifaddrs.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test='struct ifaddrs *ifaddr;
> > + if (getifaddrs(&ifaddr) != 0) return 1;
> > + freeifaddrs(ifaddr)'
> > +. auto/feature
> > diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
> > --- a/src/core/ngx_connection.c
> > +++ b/src/core/ngx_connection.c
> > @@ -1010,6 +1010,74 @@ ngx_configure_listening_sockets(ngx_cycl
> > }
> >
> > #endif
> > +
> > +#if (NGX_HAVE_IP_PMTUDISC_DO)
> > +
> > + if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
> > + value = 1;
> > +
> > + if (setsockopt(ls[i].fd, IPPROTO_IP, IP_PMTUDISC_DO,
> > + (const void *) &value, sizeof(int))
> > + == -1)
> > + {
> > + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > + "setsockopt(IP_PMTUDISC_DO) "
> > + "for %V failed, ignored",
> > + &ls[i].addr_text);
> > + }
> > + }
> > +
> > +#elif (NGX_HAVE_IP_DONTFRAG)
> > +
> > + if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
> > + value = 1;
> > +
> > + if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
> > + (const void *) &value, sizeof(int))
> > + == -1)
> > + {
> > + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > + "setsockopt(IP_DONTFRAG) "
> > + "for %V failed, ignored",
> > + &ls[i].addr_text);
> > + }
> > + }
> > +
> > +#endif
> > +
> > +#if (NGX_HAVE_INET6 && NGX_HAVE_IPV6_PMTUDISC_DO)
> > +
> > + if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
> > + value = 1;
> > +
> > + if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_PMTUDISC_DO,
> > + (const void *) &value, sizeof(int))
> > + == -1)
> > + {
> > + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > + "setsockopt(IPV6_PMTUDISC_DO) "
> > + "for %V failed, ignored",
> > + &ls[i].addr_text);
> > + }
> > + }
> > +
> > +#elif (NGX_HAVE_INET6 && NGX_HAVE_IPV6_DONTFRAG)
> > +
> > + if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
> > + value = 1;
> > +
> > + if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
> > + (const void *) &value, sizeof(int))
> > + == -1)
> > + {
> > + ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > + "setsockopt(IPV6_DONTFRAG) "
> > + "for %V failed, ignored",
> > + &ls[i].addr_text);
> > + }
> > + }
> > +
> > +#endif
> > }
> >
> > return;
> > @@ -1507,6 +1575,10 @@ ngx_connection_error(ngx_connection_t *c
> > }
> > #endif
> >
> > + if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
> > + return 0;
> > + }
> > +
> > if (err == 0
> > || err == NGX_ECONNRESET
> > #if (NGX_WIN32)
> > @@ -1524,6 +1596,7 @@ ngx_connection_error(ngx_connection_t *c
> > {
> > switch (c->log_error) {
> >
> > + case NGX_ERROR_IGNORE_EMSGSIZE:
> > case NGX_ERROR_IGNORE_EINVAL:
> > case NGX_ERROR_IGNORE_ECONNRESET:
> > case NGX_ERROR_INFO:
> > diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
> > --- a/src/core/ngx_connection.h
> > +++ b/src/core/ngx_connection.h
> > @@ -97,7 +97,8 @@ typedef enum {
> > NGX_ERROR_ERR,
> > NGX_ERROR_INFO,
> > NGX_ERROR_IGNORE_ECONNRESET,
> > - NGX_ERROR_IGNORE_EINVAL
> > + NGX_ERROR_IGNORE_EINVAL,
> > + NGX_ERROR_IGNORE_EMSGSIZE
> > } ngx_connection_log_error_e;
> >
> >
>
> I'd move the dontfrag part to a separate change for clarity.
> It can be seen as a foundation for succeeding PLPMTUD work
> not strictly related to it.
> (Further, PLPMTUD is an optional feature, while dontfrag
> is a MUST per RFC 9000, section 14.)
You're right. Attached is a separate patch for this.
[..]
--
Roman Arutyunyan
-------------- next part --------------
# HG changeset patch
# User Roman Arutyunyan <arut at nginx.com>
# Date 1683375807 -14400
# Sat May 06 16:23:27 2023 +0400
# Branch quic
# Node ID afebde21cb32b9326219af075acc7dc415587d71
# Parent 9ae24a9ba7637646ea201a2014ae8294d4db2a82
QUIC: disabled datagram fragmentation.
As per RFC 9000, Section 14:
UDP datagrams MUST NOT be fragmented at the IP layer.
diff --git a/auto/unix b/auto/unix
--- a/auto/unix
+++ b/auto/unix
@@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
. auto/feature
+# IP packet fragmentation
+
+ngx_feature="IP_MTU_DISCOVER"
+ngx_feature_name="NGX_HAVE_IP_MTU_DISCOVER"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="(void) IP_PMTUDISC_DO;
+ setsockopt(0, IPPROTO_IP, IP_MTU_DISCOVER, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_MTU_DISCOVER"
+ngx_feature_name="NGX_HAVE_IPV6_MTU_DISCOVER"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="(void) IPV6_PMTUDISC_DO;
+ setsockopt(0, IPPROTO_IPV6, IPV6_MTU_DISCOVER, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IP_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, IPPROTO_IP, IPV6_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
ngx_feature="TCP_DEFER_ACCEPT"
ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
ngx_feature_run=no
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -1010,6 +1010,78 @@ ngx_configure_listening_sockets(ngx_cycl
}
#endif
+
+#if (NGX_HAVE_IP_MTU_DISCOVER)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+ value = IP_PMTUDISC_DO;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IP, IP_MTU_DISCOVER,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IP_MTU_DISCOVER) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IP_DONTFRAG) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#endif
+
+#if (NGX_HAVE_INET6)
+
+#if (NGX_HAVE_IPV6_MTU_DISCOVER)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+ value = IPV6_PMTUDISC_DO;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IPV6_MTU_DISCOVER) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IPV6_DONTFRAG) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#endif
+
+#endif
}
return;
More information about the nginx-devel
mailing list