[PATCH 3 of 3] QUIC: path MTU discovery

Roman Arutyunyan arut at nginx.com
Mon May 8 12:15:49 UTC 2023


Hi,

On Mon, May 01, 2023 at 08:58:55PM +0400, Sergey Kandaurov wrote:
> 
> > On 28 Mar 2023, at 18:51, Roman Arutyunyan <arut at nginx.com> wrote:
> > 
> > # HG changeset patch
> > # User Roman Arutyunyan <arut at nginx.com>
> > # Date 1679993500 -14400
> > #      Tue Mar 28 12:51:40 2023 +0400
> > # Branch quic
> > # Node ID 13d43a278510f131101c7b19d87455a0171ebe2f
> > # Parent  c686c97f4abd6e1ca9a2cc2324d5a24f3d035c58
> > QUIC: path MTU discovery.
> > 
> > MTU selection starts by probing the maximum allowed MTU first.  After that,
> > binary search is used to find the path MTU.
> > 
> > Maximum allowed MTU is calculated as the minimum of max_udp_payload for client
> > and server, and local interface MTU.
> > 
> > diff --git a/auto/unix b/auto/unix
> > --- a/auto/unix
> > +++ b/auto/unix
> > @@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
> > . auto/feature
> > 
> > 
> > +# IP packet fragmentation flags
> > +
> > +ngx_feature="IP_DONTFRAG"
> > +ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > +                  #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +ngx_feature="IPV6_DONTFRAG"
> > +ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > +                  #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_DONTFRAG, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +# Linux MTU flags
> > +
> > +ngx_feature="IP_PMTUDISC_DO"
> > +ngx_feature_name="NGX_HAVE_IP_PMTUDISC_DO"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > +                  #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_PMTUDISC_DO, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > +ngx_feature="IPV6_PMTUDISC_DO"
> > +ngx_feature_name="NGX_HAVE_IPV6_PMTUDISC_DO"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/socket.h>
> > +                  #include <netinet/in.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_PMTUDISC_DO, NULL, 0)"
> > +. auto/feature
> > +
> > +
> > ngx_feature="TCP_DEFER_ACCEPT"
> > ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
> > ngx_feature_run=no
> > @@ -920,6 +968,19 @@ ngx_feature_test="int i = FIONREAD; prin
> > . auto/feature
> > 
> > 
> > +ngx_feature="ioctl(SIOCGIFMTU)"
> > +ngx_feature_name="NGX_HAVE_SIOCGIFMTU"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/ioctl.h>
> > +                  #include <stdio.h>
> > +                  #include <net/if.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test="int i = SIOCGIFMTU; struct ifreq ifr;
> > +                  ifr.ifr_name[0] = 'e'; printf(\"%d\", i)"
> > +. auto/feature
> > +
> > +
> > ngx_feature="struct tm.tm_gmtoff"
> > ngx_feature_name="NGX_HAVE_GMTOFF"
> > ngx_feature_run=no
> > @@ -1002,3 +1063,17 @@ ngx_feature_test='struct addrinfo *res;
> >                   if (getaddrinfo("localhost", NULL, NULL, &res) != 0) return 1;
> >                   freeaddrinfo(res)'
> > . auto/feature
> > +
> > +
> > +ngx_feature="getifaddrs()"
> > +ngx_feature_name="NGX_HAVE_GETIFADDRS"
> > +ngx_feature_run=no
> > +ngx_feature_incs="#include <sys/types.h>
> > +                  #include <sys/socket.h>
> > +                  #include <ifaddrs.h>"
> > +ngx_feature_path=
> > +ngx_feature_libs=
> > +ngx_feature_test='struct ifaddrs *ifaddr;
> > +                  if (getifaddrs(&ifaddr) != 0) return 1;
> > +                  freeifaddrs(ifaddr)'
> > +. auto/feature
> > diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
> > --- a/src/core/ngx_connection.c
> > +++ b/src/core/ngx_connection.c
> > @@ -1010,6 +1010,74 @@ ngx_configure_listening_sockets(ngx_cycl
> >         }
> > 
> > #endif
> > +
> > +#if (NGX_HAVE_IP_PMTUDISC_DO)
> > +
> > +        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
> > +            value = 1;
> > +
> > +            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_PMTUDISC_DO,
> > +                           (const void *) &value, sizeof(int))
> > +                == -1)
> > +            {
> > +                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > +                              "setsockopt(IP_PMTUDISC_DO) "
> > +                              "for %V failed, ignored",
> > +                              &ls[i].addr_text);
> > +            }
> > +        }
> > +
> > +#elif (NGX_HAVE_IP_DONTFRAG)
> > +
> > +        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
> > +            value = 1;
> > +
> > +            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
> > +                           (const void *) &value, sizeof(int))
> > +                == -1)
> > +            {
> > +                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > +                              "setsockopt(IP_DONTFRAG) "
> > +                              "for %V failed, ignored",
> > +                              &ls[i].addr_text);
> > +            }
> > +        }
> > +
> > +#endif
> > +
> > +#if (NGX_HAVE_INET6 && NGX_HAVE_IPV6_PMTUDISC_DO)
> > +
> > +        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
> > +            value = 1;
> > +
> > +            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_PMTUDISC_DO,
> > +                           (const void *) &value, sizeof(int))
> > +                == -1)
> > +            {
> > +                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > +                              "setsockopt(IPV6_PMTUDISC_DO) "
> > +                              "for %V failed, ignored",
> > +                              &ls[i].addr_text);
> > +            }
> > +        }
> > +
> > +#elif (NGX_HAVE_INET6 && NGX_HAVE_IPV6_DONTFRAG)
> > +
> > +        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
> > +            value = 1;
> > +
> > +            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
> > +                           (const void *) &value, sizeof(int))
> > +                == -1)
> > +            {
> > +                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
> > +                              "setsockopt(IPV6_DONTFRAG) "
> > +                              "for %V failed, ignored",
> > +                              &ls[i].addr_text);
> > +            }
> > +        }
> > +
> > +#endif
> >     }
> > 
> >     return;
> > @@ -1507,6 +1575,10 @@ ngx_connection_error(ngx_connection_t *c
> >     }
> > #endif
> > 
> > +    if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
> > +        return 0;
> > +    }
> > +
> >     if (err == 0
> >         || err == NGX_ECONNRESET
> > #if (NGX_WIN32)
> > @@ -1524,6 +1596,7 @@ ngx_connection_error(ngx_connection_t *c
> >     {
> >         switch (c->log_error) {
> > 
> > +        case NGX_ERROR_IGNORE_EMSGSIZE:
> >         case NGX_ERROR_IGNORE_EINVAL:
> >         case NGX_ERROR_IGNORE_ECONNRESET:
> >         case NGX_ERROR_INFO:
> > diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
> > --- a/src/core/ngx_connection.h
> > +++ b/src/core/ngx_connection.h
> > @@ -97,7 +97,8 @@ typedef enum {
> >     NGX_ERROR_ERR,
> >     NGX_ERROR_INFO,
> >     NGX_ERROR_IGNORE_ECONNRESET,
> > -    NGX_ERROR_IGNORE_EINVAL
> > +    NGX_ERROR_IGNORE_EINVAL,
> > +    NGX_ERROR_IGNORE_EMSGSIZE
> > } ngx_connection_log_error_e;
> > 
> > 
> 
> I'd move the dontfrag part to a separate change for clarity.
> It can be seen as a foundation for succeeding PLPMTUD work
> not strictly related to it.
> (Further, PLPMTUD is an optional feature, while dontfrag
> is a MUST per RFC 9000, section 14.)

You're right.  Attached is a separate patch for this.

[..]

--
Roman Arutyunyan
-------------- next part --------------
# HG changeset patch
# User Roman Arutyunyan <arut at nginx.com>
# Date 1683375807 -14400
#      Sat May 06 16:23:27 2023 +0400
# Branch quic
# Node ID afebde21cb32b9326219af075acc7dc415587d71
# Parent  9ae24a9ba7637646ea201a2014ae8294d4db2a82
QUIC: disabled datagram fragmentation.

As per RFC 9000, Section 14:

  UDP datagrams MUST NOT be fragmented at the IP layer.

diff --git a/auto/unix b/auto/unix
--- a/auto/unix
+++ b/auto/unix
@@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
 . auto/feature
 
 
+# IP packet fragmentation
+
+ngx_feature="IP_MTU_DISCOVER"
+ngx_feature_name="NGX_HAVE_IP_MTU_DISCOVER"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="(void) IP_PMTUDISC_DO;
+                  setsockopt(0, IPPROTO_IP, IP_MTU_DISCOVER, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_MTU_DISCOVER"
+ngx_feature_name="NGX_HAVE_IPV6_MTU_DISCOVER"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="(void) IPV6_PMTUDISC_DO;
+                  setsockopt(0, IPPROTO_IPV6, IPV6_MTU_DISCOVER, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IP_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="setsockopt(0, IPPROTO_IP, IPV6_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
 ngx_feature="TCP_DEFER_ACCEPT"
 ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
 ngx_feature_run=no
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -1010,6 +1010,78 @@ ngx_configure_listening_sockets(ngx_cycl
         }
 
 #endif
+
+#if (NGX_HAVE_IP_MTU_DISCOVER)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+            value = IP_PMTUDISC_DO;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_MTU_DISCOVER,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IP_MTU_DISCOVER) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IP_DONTFRAG) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#endif
+
+#if (NGX_HAVE_INET6)
+
+#if (NGX_HAVE_IPV6_MTU_DISCOVER)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+            value = IPV6_PMTUDISC_DO;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IPV6_MTU_DISCOVER) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IPV6_DONTFRAG) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#endif
+
+#endif
     }
 
     return;


More information about the nginx-devel mailing list