[PATCH 3 of 3] QUIC: path MTU discovery

Roman Arutyunyan arut at nginx.com
Tue Mar 28 14:51:40 UTC 2023


# HG changeset patch
# User Roman Arutyunyan <arut at nginx.com>
# Date 1679993500 -14400
#      Tue Mar 28 12:51:40 2023 +0400
# Branch quic
# Node ID 13d43a278510f131101c7b19d87455a0171ebe2f
# Parent  c686c97f4abd6e1ca9a2cc2324d5a24f3d035c58
QUIC: path MTU discovery.

MTU selection starts by probing the maximum allowed MTU first.  After that,
binary search is used to find the path MTU.

Maximum allowed MTU is calculated as the minimum of max_udp_payload for client
and server, and local interface MTU.

diff --git a/auto/unix b/auto/unix
--- a/auto/unix
+++ b/auto/unix
@@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
 . auto/feature
 
 
+# IP packet fragmentation flags
+
+ngx_feature="IP_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+# Linux MTU flags
+
+ngx_feature="IP_PMTUDISC_DO"
+ngx_feature_name="NGX_HAVE_IP_PMTUDISC_DO"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_PMTUDISC_DO, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_PMTUDISC_DO"
+ngx_feature_name="NGX_HAVE_IPV6_PMTUDISC_DO"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+                  #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_PMTUDISC_DO, NULL, 0)"
+. auto/feature
+
+
 ngx_feature="TCP_DEFER_ACCEPT"
 ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
 ngx_feature_run=no
@@ -920,6 +968,19 @@ ngx_feature_test="int i = FIONREAD; prin
 . auto/feature
 
 
+ngx_feature="ioctl(SIOCGIFMTU)"
+ngx_feature_name="NGX_HAVE_SIOCGIFMTU"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/ioctl.h>
+                  #include <stdio.h>
+                  #include <net/if.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="int i = SIOCGIFMTU; struct ifreq ifr;
+                  ifr.ifr_name[0] = 'e'; printf(\"%d\", i)"
+. auto/feature
+
+
 ngx_feature="struct tm.tm_gmtoff"
 ngx_feature_name="NGX_HAVE_GMTOFF"
 ngx_feature_run=no
@@ -1002,3 +1063,17 @@ ngx_feature_test='struct addrinfo *res;
                   if (getaddrinfo("localhost", NULL, NULL, &res) != 0) return 1;
                   freeaddrinfo(res)'
 . auto/feature
+
+
+ngx_feature="getifaddrs()"
+ngx_feature_name="NGX_HAVE_GETIFADDRS"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/types.h>
+                  #include <sys/socket.h>
+                  #include <ifaddrs.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test='struct ifaddrs *ifaddr;
+                  if (getifaddrs(&ifaddr) != 0) return 1;
+                  freeifaddrs(ifaddr)'
+. auto/feature
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -1010,6 +1010,74 @@ ngx_configure_listening_sockets(ngx_cycl
         }
 
 #endif
+
+#if (NGX_HAVE_IP_PMTUDISC_DO)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_PMTUDISC_DO,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IP_PMTUDISC_DO) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IP_DONTFRAG) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#endif
+
+#if (NGX_HAVE_INET6 && NGX_HAVE_IPV6_PMTUDISC_DO)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_PMTUDISC_DO,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IPV6_PMTUDISC_DO) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#elif (NGX_HAVE_INET6 && NGX_HAVE_IPV6_DONTFRAG)
+
+        if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+            value = 1;
+
+            if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
+                           (const void *) &value, sizeof(int))
+                == -1)
+            {
+                ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+                              "setsockopt(IPV6_DONTFRAG) "
+                              "for %V failed, ignored",
+                              &ls[i].addr_text);
+            }
+        }
+
+#endif
     }
 
     return;
@@ -1507,6 +1575,10 @@ ngx_connection_error(ngx_connection_t *c
     }
 #endif
 
+    if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
+        return 0;
+    }
+
     if (err == 0
         || err == NGX_ECONNRESET
 #if (NGX_WIN32)
@@ -1524,6 +1596,7 @@ ngx_connection_error(ngx_connection_t *c
     {
         switch (c->log_error) {
 
+        case NGX_ERROR_IGNORE_EMSGSIZE:
         case NGX_ERROR_IGNORE_EINVAL:
         case NGX_ERROR_IGNORE_ECONNRESET:
         case NGX_ERROR_INFO:
diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
--- a/src/core/ngx_connection.h
+++ b/src/core/ngx_connection.h
@@ -97,7 +97,8 @@ typedef enum {
     NGX_ERROR_ERR,
     NGX_ERROR_INFO,
     NGX_ERROR_IGNORE_ECONNRESET,
-    NGX_ERROR_IGNORE_EINVAL
+    NGX_ERROR_IGNORE_EINVAL,
+    NGX_ERROR_IGNORE_EMSGSIZE
 } ngx_connection_log_error_e;
 
 
diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
--- a/src/event/quic/ngx_event_quic.c
+++ b/src/event/quic/ngx_event_quic.c
@@ -10,8 +10,17 @@
 #include <ngx_event_quic_connection.h>
 
 
+#define NGX_QUIC_UDP4_MAX_PACKET   65535
+#define NGX_QUIC_UDP4_HEADER_SIZE  28
+
+#define NGX_QUIC_UDP6_MAX_PAYLOAD  65535
+#define NGX_QUIC_UDP6_HEADER_SIZE  48
+
+
 static ngx_quic_connection_t *ngx_quic_new_connection(ngx_connection_t *c,
     ngx_quic_conf_t *conf, ngx_quic_header_t *pkt);
+static ssize_t ngx_quic_get_local_mtu(ngx_connection_t *c,
+    struct sockaddr *sockaddr);
 static ngx_int_t ngx_quic_handle_stateless_reset(ngx_connection_t *c,
     ngx_quic_header_t *pkt);
 static void ngx_quic_input_handler(ngx_event_t *rev);
@@ -149,11 +158,6 @@ ngx_quic_apply_transport_params(ngx_conn
         ngx_log_error(NGX_LOG_INFO, c->log, 0,
                       "quic maximum packet size is invalid");
         return NGX_ERROR;
-
-    } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
-        ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
-        ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
-                       "quic client maximum packet size truncated");
     }
 
     if (ctp->active_connection_id_limit < 2) {
@@ -228,6 +232,7 @@ static ngx_quic_connection_t *
 ngx_quic_new_connection(ngx_connection_t *c, ngx_quic_conf_t *conf,
     ngx_quic_header_t *pkt)
 {
+    ssize_t                 mtu;
     ngx_uint_t              i;
     ngx_quic_tp_t          *ctp;
     ngx_quic_connection_t  *qc;
@@ -297,7 +302,7 @@ ngx_quic_new_connection(ngx_connection_t
     ctp = &qc->ctp;
 
     /* defaults to be used before actual client parameters are received */
-    ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
+    ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
     ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
     ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
     ctp->active_connection_id_limit = 2;
@@ -317,6 +322,18 @@ ngx_quic_new_connection(ngx_connection_t
     qc->congestion.ssthresh = (size_t) -1;
     qc->congestion.recovery_start = ngx_current_msec;
 
+    qc->max_mtu = ngx_min(qc->tp.max_udp_payload_size,
+                          qc->ctp.max_udp_payload_size);
+
+    mtu = ngx_quic_get_local_mtu(c, c->local_sockaddr);
+    if (mtu == NGX_ERROR) {
+        return NULL;
+    }
+
+    if (mtu > 0 && (size_t) mtu < qc->max_mtu) {
+        qc->max_mtu = mtu;
+    }
+
     if (pkt->validated && pkt->retried) {
         qc->tp.retry_scid.len = pkt->dcid.len;
         qc->tp.retry_scid.data = ngx_pstrdup(c->pool, &pkt->dcid);
@@ -347,6 +364,90 @@ ngx_quic_new_connection(ngx_connection_t
 }
 
 
+static ssize_t
+ngx_quic_get_local_mtu(ngx_connection_t *c, struct sockaddr *sockaddr)
+{
+#if (NGX_HAVE_GETIFADDRS && NGX_HAVE_SIOCGIFMTU)
+
+    size_t           mtu;
+    struct ifreq     ifr;
+    struct ifaddrs  *ifaddrs, *ifa;
+
+    if (sockaddr->sa_family != AF_INET
+#if (NGX_HAVE_INET6)
+        && sockaddr->sa_family != AF_INET6
+#endif
+       )
+    {
+        return NGX_DECLINED;
+    }
+
+    if (getifaddrs(&ifaddrs) == -1) {
+        ngx_log_error(NGX_LOG_INFO, c->log, 0, "getifaddrs() failed");
+        return NGX_ERROR;
+    }
+
+    for (ifa = ifaddrs; ifa; ifa = ifa->ifa_next) {
+        if (ifa->ifa_addr == NULL) {
+            continue;
+        }
+
+        if (ngx_cmp_sockaddr(sockaddr, 0, ifa->ifa_addr, 0, 0) != NGX_OK) {
+            continue;
+        }
+
+        ngx_memzero(&ifr, sizeof(struct ifreq));
+        strcpy(ifr.ifr_name, ifa->ifa_name);
+
+        freeifaddrs(ifaddrs);
+
+        if (ioctl(c->fd, SIOCGIFMTU, &ifr)) {
+            ngx_log_error(NGX_LOG_INFO, c->log, 0, "ioctl(SIOCGIFMTU) failed");
+            return NGX_ERROR;
+        }
+
+        mtu = ifr.ifr_mtu;
+
+        if (sockaddr->sa_family == AF_INET) {
+            if (mtu > NGX_QUIC_UDP4_MAX_PACKET) {
+                mtu = NGX_QUIC_UDP4_MAX_PACKET;
+            }
+
+            if (mtu <= NGX_QUIC_UDP4_HEADER_SIZE) {
+                return NGX_DECLINED;
+            }
+
+            mtu -= NGX_QUIC_UDP4_HEADER_SIZE;
+
+#if (NGX_HAVE_INET6)
+        } else { /* sockaddr->sa_family == AF_INET6 */
+
+            if (mtu <= NGX_QUIC_UDP6_HEADER_SIZE) {
+                return NGX_DECLINED;
+            }
+
+            mtu -= NGX_QUIC_UDP6_HEADER_SIZE;
+
+            if (mtu > NGX_QUIC_UDP6_MAX_PAYLOAD) {
+                mtu = NGX_QUIC_UDP6_MAX_PAYLOAD;
+            }
+#endif
+        }
+
+        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "quic local mtu:%uz", mtu);
+
+        return mtu;
+    }
+
+    freeifaddrs(ifaddrs);
+
+#endif
+
+    return NGX_DECLINED;
+}
+
+
 static ngx_int_t
 ngx_quic_handle_stateless_reset(ngx_connection_t *c, ngx_quic_header_t *pkt)
 {
diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
--- a/src/event/quic/ngx_event_quic_ack.c
+++ b/src/event/quic/ngx_event_quic_ack.c
@@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
 
     qc = ngx_quic_get_connection(c);
 
+    if (ctx->level == ssl_encryption_application) {
+        if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
+            return NGX_ERROR;
+        }
+    }
+
     st->max_pn = NGX_TIMER_INFINITE;
     found = 0;
 
diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
--- a/src/event/quic/ngx_event_quic_connection.h
+++ b/src/event/quic/ngx_event_quic_connection.h
@@ -89,14 +89,21 @@ struct ngx_quic_path_s {
     ngx_sockaddr_t                    sa;
     socklen_t                         socklen;
     ngx_quic_client_id_t             *cid;
-    ngx_msec_t                        expires;
-    ngx_uint_t                        tries;
+    ngx_msec_t                        valid_expires;
+    ngx_msec_t                        mtu_expires;
+    ngx_uint_t                        valid_tries;
+    ngx_uint_t                        mtu_tries;
+    ngx_uint_t                        mtu_steps;
     ngx_uint_t                        tag;
+    size_t                            mtu;
+    size_t                            mtud;
+    size_t                            max_mtu;
     off_t                             sent;
     off_t                             received;
     u_char                            challenge1[8];
     u_char                            challenge2[8];
     uint64_t                          seqnum;
+    uint64_t                          mtu_pnum[NGX_QUIC_PATH_RETRIES];
     ngx_str_t                         addr_text;
     u_char                            text[NGX_SOCKADDR_STRLEN];
     unsigned                          validated:1;
@@ -206,6 +213,8 @@ struct ngx_quic_connection_s {
     uint64_t                          server_seqnum;
     uint64_t                          path_seqnum;
 
+    size_t                            max_mtu;
+
     ngx_quic_tp_t                     tp;
     ngx_quic_tp_t                     ctp;
 
diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
--- a/src/event/quic/ngx_event_quic_migration.c
+++ b/src/event/quic/ngx_event_quic_migration.c
@@ -10,6 +10,10 @@
 #include <ngx_event_quic_connection.h>
 
 
+#define NGX_QUIC_MAX_MTU_STEPS   7
+#define NGX_QUIC_MTU_PRECISION   4
+
+
 static void ngx_quic_set_connection_path(ngx_connection_t *c,
     ngx_quic_path_t *path);
 static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
@@ -17,7 +21,13 @@ static ngx_int_t ngx_quic_validate_path(
 static ngx_msec_t ngx_quic_path_pto(ngx_connection_t *c);
 static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
     ngx_quic_path_t *path);
+static ngx_int_t ngx_quic_expire_path_mtu(ngx_connection_t *c,
+    ngx_quic_path_t *path, ngx_msec_int_t *next);
+static ngx_int_t ngx_quic_expire_path(ngx_connection_t *c,
+    ngx_quic_path_t *path, ngx_msec_int_t *next);
 static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
+static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
+    ngx_quic_path_t *path);
 
 
 ngx_int_t
@@ -170,6 +180,10 @@ valid:
     path->validating = 0;
     path->limited = 0;
 
+    if (ngx_quic_discover_path_mtu(c, path) != NGX_OK) {
+        return NGX_ERROR;
+    }
+
     return NGX_OK;
 }
 
@@ -208,6 +222,8 @@ ngx_quic_new_path(ngx_connection_t *c,
 
     path->limited = 1;
 
+    path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
+
     path->seqnum = qc->path_seqnum++;
 
     path->sockaddr = &path->sa.sockaddr;
@@ -505,14 +521,14 @@ ngx_quic_validate_path(ngx_connection_t 
         return NGX_ERROR;
     }
 
+    path->valid_tries = 0;
+
     if (ngx_quic_send_path_challenge(c, path) != NGX_OK) {
         return NGX_ERROR;
     }
 
     pto = ngx_quic_path_pto(c);
-
-    path->expires = ngx_current_msec + pto;
-    path->tries = NGX_QUIC_PATH_RETRIES;
+    path->valid_expires = ngx_current_msec + pto;
 
     if (!qc->path_validation.timer_set) {
         ngx_add_timer(&qc->path_validation, pto);
@@ -556,7 +572,7 @@ ngx_quic_send_path_challenge(ngx_connect
 
     ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
                    "quic path seq:%uL send path_challenge tries:%ui",
-                   path->seqnum, path->tries);
+                   path->seqnum, path->valid_tries);
 
     ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
 
@@ -592,20 +608,16 @@ ngx_quic_send_path_challenge(ngx_connect
 void
 ngx_quic_path_validation_handler(ngx_event_t *ev)
 {
-    ngx_msec_t              now;
     ngx_queue_t            *q;
-    ngx_msec_int_t          left, next, pto;
-    ngx_quic_path_t        *path, *bkp;
+    ngx_msec_int_t          next;
+    ngx_quic_path_t        *path;
     ngx_connection_t       *c;
     ngx_quic_connection_t  *qc;
 
     c = ev->data;
     qc = ngx_quic_get_connection(c);
 
-    pto = ngx_quic_path_pto(c);
-
     next = -1;
-    now = ngx_current_msec;
 
     q = ngx_queue_head(&qc->paths);
 
@@ -614,76 +626,12 @@ ngx_quic_path_validation_handler(ngx_eve
         path = ngx_queue_data(q, ngx_quic_path_t, queue);
         q = ngx_queue_next(q);
 
-        if (!path->validating) {
-            continue;
-        }
-
-        left = path->expires - now;
-
-        if (left > 0) {
-
-            if (next == -1 || left < next) {
-                next = left;
-            }
-
-            continue;
-        }
-
-        if (--path->tries) {
-            path->expires = ngx_current_msec + pto;
-
-            if (next == -1 || pto < next) {
-                next = pto;
-            }
-
-            /* retransmit */
-            (void) ngx_quic_send_path_challenge(c, path);
-
-            continue;
+        if (ngx_quic_expire_path_mtu(c, path, &next) != NGX_OK) {
+            ngx_quic_close_connection(c, NGX_ERROR);
+            return;
         }
 
-        ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
-                       "quic path seq:%uL validation failed", path->seqnum);
-
-        /* found expired path */
-
-        path->validated = 0;
-        path->validating = 0;
-        path->limited = 1;
-
-
-        /* RFC 9000, 9.3.2.  On-Path Address Spoofing
-         *
-         * To protect the connection from failing due to such a spurious
-         * migration, an endpoint MUST revert to using the last validated
-         * peer address when validation of a new peer address fails.
-         */
-
-        if (qc->path == path) {
-            /* active path validation failed */
-
-            bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
-
-            if (bkp == NULL) {
-                qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
-                qc->error_reason = "no viable path";
-                ngx_quic_close_connection(c, NGX_ERROR);
-                return;
-            }
-
-            qc->path = bkp;
-            qc->path->tag = NGX_QUIC_PATH_ACTIVE;
-
-            ngx_quic_set_connection_path(c, qc->path);
-
-            ngx_log_error(NGX_LOG_INFO, c->log, 0,
-                          "quic path seq:%uL addr:%V is restored from backup",
-                          qc->path->seqnum, &qc->path->addr_text);
-
-            ngx_quic_path_dbg(c, "is active", qc->path);
-        }
-
-        if (ngx_quic_free_path(c, path) != NGX_OK) {
+        if (ngx_quic_expire_path(c, path, &next) != NGX_OK) {
             ngx_quic_close_connection(c, NGX_ERROR);
             return;
         }
@@ -693,3 +641,290 @@ ngx_quic_path_validation_handler(ngx_eve
         ngx_add_timer(&qc->path_validation, next);
     }
 }
+
+
+static ngx_int_t
+ngx_quic_expire_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path,
+    ngx_msec_int_t *next)
+{
+    ngx_int_t       rc;
+    ngx_msec_t      now;
+    ngx_msec_int_t  left, pto;
+
+    if (!path->mtud) {
+        return NGX_OK;
+    }
+
+    now = ngx_current_msec;
+
+    left = path->mtu_expires - now;
+
+    if (left > 0) {
+
+        if (*next == -1 || left < *next) {
+            *next = left;
+        }
+
+        return NGX_OK;
+    }
+
+    if (++path->mtu_tries < NGX_QUIC_PATH_RETRIES) {
+        pto = ngx_quic_path_pto(c);
+
+        path->mtu_expires = ngx_current_msec + pto;
+
+        if (*next == -1 || pto < *next) {
+            *next = pto;
+        }
+
+        rc = ngx_quic_send_path_mtu_probe(c, path);
+        if (rc != NGX_DECLINED) {
+            return rc;
+        }
+    }
+
+    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                   "quic path seq:%uL mtu probe failed", path->seqnum);
+
+    path->max_mtu = path->mtud;
+    path->mtud = 0;
+
+    return ngx_quic_discover_path_mtu(c, path);
+}
+
+
+static ngx_int_t
+ngx_quic_expire_path(ngx_connection_t *c, ngx_quic_path_t *path,
+    ngx_msec_int_t *next)
+{
+    ngx_msec_t              now;
+    ngx_msec_int_t          left, pto;
+    ngx_quic_path_t        *bkp;
+    ngx_quic_connection_t  *qc;
+
+    if (!path->validating) {
+        return NGX_OK;
+    }
+
+    qc = ngx_quic_get_connection(c);
+
+    now = ngx_current_msec;
+
+    left = path->valid_expires - now;
+
+    if (left > 0) {
+
+        if (*next == -1 || left < *next) {
+            *next = left;
+        }
+
+        return NGX_OK;
+    }
+
+    if (++path->valid_tries < NGX_QUIC_PATH_RETRIES) {
+        pto = ngx_quic_path_pto(c);
+
+        path->valid_expires = ngx_current_msec + pto;
+
+        if (*next == -1 || pto < *next) {
+            *next = pto;
+        }
+
+        /* retransmit */
+        (void) ngx_quic_send_path_challenge(c, path);
+
+        return NGX_OK;
+    }
+
+    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                   "quic path seq:%uL validation failed", path->seqnum);
+
+    /* found expired path */
+
+    path->validated = 0;
+    path->validating = 0;
+    path->limited = 1;
+
+
+    /* RFC 9000, 9.3.2.  On-Path Address Spoofing
+     *
+     * To protect the connection from failing due to such a spurious
+     * migration, an endpoint MUST revert to using the last validated
+     * peer address when validation of a new peer address fails.
+     */
+
+    if (qc->path == path) {
+        /* active path validation failed */
+
+        bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
+
+        if (bkp == NULL) {
+            qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
+            qc->error_reason = "no viable path";
+            return NGX_ERROR;
+        }
+
+        qc->path = bkp;
+        qc->path->tag = NGX_QUIC_PATH_ACTIVE;
+
+        ngx_quic_set_connection_path(c, qc->path);
+
+        ngx_log_error(NGX_LOG_INFO, c->log, 0,
+                      "quic path seq:%uL addr:%V is restored from backup",
+                      qc->path->seqnum, &qc->path->addr_text);
+
+        ngx_quic_path_dbg(c, "is active", qc->path);
+    }
+
+    if (ngx_quic_free_path(c, path) != NGX_OK) {
+        return NGX_ERROR;
+    }
+
+    return NGX_OK;
+}
+
+
+ngx_int_t
+ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+    ngx_int_t               rc;
+    ngx_uint_t              i;
+    ngx_msec_t              pto;
+    ngx_quic_connection_t  *qc;
+
+    qc = ngx_quic_get_connection(c);
+
+again:
+
+    if (path->mtu_steps == 0) {
+        path->max_mtu = qc->max_mtu;
+        path->mtud = path->max_mtu;
+
+    } else if (path->mtu_steps >= NGX_QUIC_MAX_MTU_STEPS
+               || (path->max_mtu - path->mtu) <= NGX_QUIC_MTU_PRECISION)
+    {
+        return NGX_OK;
+
+    } else {
+        path->mtud = (path->mtu + path->max_mtu) / 2;
+    }
+
+    path->mtu_steps++;
+
+    ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                   "quic initiated mtu discovery of path seq:%uL",
+                   path->seqnum);
+
+    for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+        path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
+    }
+
+    path->mtu_tries = 0;
+
+    rc = ngx_quic_send_path_mtu_probe(c, path);
+
+    if (rc == NGX_DECLINED) {
+        path->max_mtu = path->mtud;
+        path->mtud = 0;
+        goto again;
+    }
+
+    if (rc == NGX_ERROR) {
+        path->mtud = 0;
+        return NGX_ERROR;
+    }
+
+    /* rc == NGX_OK */
+
+    pto = ngx_quic_path_pto(c);
+    path->mtu_expires = ngx_current_msec + pto;
+
+    if (!qc->path_validation.timer_set) {
+        ngx_add_timer(&qc->path_validation, pto);
+    }
+
+    return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+    ngx_int_t               rc;
+    ngx_uint_t              log_error;
+    ngx_quic_frame_t        frame;
+    ngx_quic_send_ctx_t    *ctx;
+    ngx_quic_connection_t  *qc;
+
+    ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
+
+    frame.level = ssl_encryption_application;
+    frame.type = NGX_QUIC_FT_PING;
+
+    qc = ngx_quic_get_connection(c);
+    ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+    path->mtu_pnum[path->mtu_tries] = ctx->pnum;
+
+    ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                   "quic path seq:%uL send mtu probe "
+                   "size:%uz pnum:%uL tries:%ui",
+                   path->seqnum, path->mtud, ctx->pnum, path->mtu_tries);
+
+    log_error = c->log_error;
+    c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
+
+    rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
+    c->log_error = log_error;
+
+    if (rc == NGX_ERROR) {
+        if (c->write->error) {
+            c->write->error = 0;
+
+            ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                           "quic rejected mtu probe of path seq:%uL",
+                           path->seqnum);
+
+            return NGX_DECLINED;
+        }
+
+        return NGX_ERROR;
+    }
+
+    return NGX_OK;
+}
+
+
+ngx_int_t
+ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
+    uint64_t min, uint64_t max)
+{
+    uint64_t    pnum;
+    ngx_uint_t  i;
+
+    if (!path->mtud) {
+        return NGX_OK;
+    }
+
+    for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+        pnum = path->mtu_pnum[i];
+
+        if (pnum == NGX_QUIC_UNSET_PN) {
+            break;
+        }
+
+        if (pnum < min || pnum > max) {
+            continue;
+        }
+
+        path->mtu = path->mtud;
+        path->mtud = 0;
+
+        ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+                       "quic path seq:%uL mtu ack size:%uz",
+                       path->seqnum, path->mtu);
+
+        return ngx_quic_discover_path_mtu(c, path);
+    }
+
+    return NGX_OK;
+}
diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
--- a/src/event/quic/ngx_event_quic_migration.h
+++ b/src/event/quic/ngx_event_quic_migration.h
@@ -39,4 +39,9 @@ ngx_int_t ngx_quic_handle_migration(ngx_
 
 void ngx_quic_path_validation_handler(ngx_event_t *ev);
 
+ngx_int_t ngx_quic_discover_path_mtu(ngx_connection_t *c,
+    ngx_quic_path_t *path);
+ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
+    ngx_quic_path_t *path, uint64_t min, uint64_t max);
+
 #endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
--- a/src/event/quic/ngx_event_quic_output.c
+++ b/src/event/quic/ngx_event_quic_output.c
@@ -10,9 +10,6 @@
 #include <ngx_event_quic_connection.h>
 
 
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT   1252
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6  1232
-
 #define NGX_QUIC_MAX_UDP_SEGMENT_BUF  65487 /* 65K - IPv6 header */
 #define NGX_QUIC_MAX_SEGMENTS            64 /* UDP_MAX_SEGMENTS */
 
@@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
     size_t size);
 
 
-size_t
-ngx_quic_max_udp_payload(ngx_connection_t *c)
-{
-    /* TODO: path MTU discovery */
-
-#if (NGX_HAVE_INET6)
-    if (c->sockaddr->sa_family == AF_INET6) {
-        return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
-    }
-#endif
-
-    return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
-}
-
-
 ngx_int_t
 ngx_quic_output(ngx_connection_t *c)
 {
@@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
 
         p = dst;
 
-        len = ngx_min(qc->ctp.max_udp_payload_size,
-                      NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
-
-        len = ngx_quic_path_limit(c, path, len);
+        len = ngx_quic_path_limit(c, path, path->mtu);
 
         pad = ngx_quic_get_padding_level(c);
 
@@ -271,17 +250,19 @@ ngx_quic_allow_segmentation(ngx_connecti
 {
     size_t                  bytes, len;
     ngx_queue_t            *q;
+    ngx_quic_path_t        *path;
     ngx_quic_frame_t       *f;
     ngx_quic_send_ctx_t    *ctx;
     ngx_quic_connection_t  *qc;
 
     qc = ngx_quic_get_connection(c);
+    path = qc->path;
 
     if (!qc->conf->gso_enabled) {
         return 0;
     }
 
-    if (qc->path->limited) {
+    if (path->limited) {
         /* don't even try to be faster on non-validated paths */
         return 0;
     }
@@ -299,9 +280,7 @@ ngx_quic_allow_segmentation(ngx_connecti
     ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
 
     bytes = 0;
-
-    len = ngx_min(qc->ctp.max_udp_payload_size,
-                  NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+    len = path->mtu;
 
     for (q = ngx_queue_head(&ctx->frames);
          q != ngx_queue_sentinel(&ctx->frames);
@@ -345,8 +324,7 @@ ngx_quic_create_segments(ngx_connection_
         return NGX_ERROR;
     }
 
-    segsize = ngx_min(qc->ctp.max_udp_payload_size,
-                      NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+    segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
     p = dst;
     end = dst + sizeof(dst);
 
diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
--- a/src/event/quic/ngx_event_quic_output.h
+++ b/src/event/quic/ngx_event_quic_output.h
@@ -12,8 +12,6 @@
 #include <ngx_core.h>
 
 
-size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
-
 ngx_int_t ngx_quic_output(ngx_connection_t *c);
 
 ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
--- a/src/event/quic/ngx_event_quic_ssl.c
+++ b/src/event/quic/ngx_event_quic_ssl.c
@@ -499,6 +499,10 @@ ngx_quic_crypto_input(ngx_connection_t *
         return NGX_ERROR;
     }
 
+    if (ngx_quic_discover_path_mtu(c, qc->path) != NGX_OK) {
+        return NGX_ERROR;
+    }
+
     if (ngx_quic_init_streams(c) != NGX_OK) {
         return NGX_ERROR;
     }
diff --git a/src/os/unix/ngx_darwin_config.h b/src/os/unix/ngx_darwin_config.h
--- a/src/os/unix/ngx_darwin_config.h
+++ b/src/os/unix/ngx_darwin_config.h
@@ -47,6 +47,8 @@
 #include <arpa/inet.h>
 #include <netdb.h>
 #include <sys/un.h>
+#include <net/if.h>
+#include <ifaddrs.h>
 
 #include <sys/sysctl.h>
 #include <xlocale.h>
diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
--- a/src/os/unix/ngx_errno.h
+++ b/src/os/unix/ngx_errno.h
@@ -54,6 +54,7 @@ typedef int               ngx_err_t;
 #define NGX_ENOMOREFILES  0
 #define NGX_ELOOP         ELOOP
 #define NGX_EBADF         EBADF
+#define NGX_EMSGSIZE      EMSGSIZE
 
 #if (NGX_HAVE_OPENAT)
 #define NGX_EMLINK        EMLINK
diff --git a/src/os/unix/ngx_freebsd_config.h b/src/os/unix/ngx_freebsd_config.h
--- a/src/os/unix/ngx_freebsd_config.h
+++ b/src/os/unix/ngx_freebsd_config.h
@@ -48,6 +48,9 @@
 #include <libutil.h>            /* setproctitle() before 4.1 */
 #include <osreldate.h>
 #include <sys/sysctl.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <ifaddrs.h>
 
 #include <dlfcn.h>
 
diff --git a/src/os/unix/ngx_linux_config.h b/src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h
+++ b/src/os/unix/ngx_linux_config.h
@@ -54,6 +54,8 @@
 #include <sys/ioctl.h>
 #include <crypt.h>
 #include <sys/utsname.h>        /* uname() */
+#include <net/if.h>
+#include <ifaddrs.h>
 
 #include <dlfcn.h>
 
diff --git a/src/os/unix/ngx_posix_config.h b/src/os/unix/ngx_posix_config.h
--- a/src/os/unix/ngx_posix_config.h
+++ b/src/os/unix/ngx_posix_config.h
@@ -140,6 +140,17 @@ typedef struct aiocb  ngx_aiocb_t;
 #endif
 
 
+#if (NGX_HAVE_SIOCGIFMTU)
+#include <sys/ioctl.h>
+#include <net/if.h>
+#endif
+
+
+#if (NGX_HAVE_GETIFADDRS)
+#include <ifaddrs.h>
+#endif
+
+
 #define NGX_LISTEN_BACKLOG  511
 
 #define ngx_debug_init()
diff --git a/src/os/unix/ngx_solaris_config.h b/src/os/unix/ngx_solaris_config.h
--- a/src/os/unix/ngx_solaris_config.h
+++ b/src/os/unix/ngx_solaris_config.h
@@ -88,6 +88,17 @@
 #endif
 
 
+#if (NGX_HAVE_SIOCGIFMTU)
+#include <sys/ioctl.h>
+#include <net/if.h>
+#endif
+
+
+#if (NGX_HAVE_GETIFADDRS)
+#include <ifaddrs.h>
+#endif
+
+
 #define NGX_LISTEN_BACKLOG           511
 
 


More information about the nginx-devel mailing list