[PATCH 3 of 3] QUIC: path MTU discovery
Roman Arutyunyan
arut at nginx.com
Tue Mar 28 14:51:40 UTC 2023
# HG changeset patch
# User Roman Arutyunyan <arut at nginx.com>
# Date 1679993500 -14400
# Tue Mar 28 12:51:40 2023 +0400
# Branch quic
# Node ID 13d43a278510f131101c7b19d87455a0171ebe2f
# Parent c686c97f4abd6e1ca9a2cc2324d5a24f3d035c58
QUIC: path MTU discovery.
MTU selection starts by probing the maximum allowed MTU first. After that,
binary search is used to find the path MTU.
Maximum allowed MTU is calculated as the minimum of max_udp_payload for client
and server, and local interface MTU.
diff --git a/auto/unix b/auto/unix
--- a/auto/unix
+++ b/auto/unix
@@ -448,6 +448,54 @@ ngx_feature_test="setsockopt(0, IPPROTO_
. auto/feature
+# IP packet fragmentation flags
+
+ngx_feature="IP_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IP_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_DONTFRAG"
+ngx_feature_name="NGX_HAVE_IPV6_DONTFRAG"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_DONTFRAG, NULL, 0)"
+. auto/feature
+
+
+# Linux MTU flags
+
+ngx_feature="IP_PMTUDISC_DO"
+ngx_feature_name="NGX_HAVE_IP_PMTUDISC_DO"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IP, IP_PMTUDISC_DO, NULL, 0)"
+. auto/feature
+
+
+ngx_feature="IPV6_PMTUDISC_DO"
+ngx_feature_name="NGX_HAVE_IPV6_PMTUDISC_DO"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/socket.h>
+ #include <netinet/in.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="getsockopt(0, IPPROTO_IPV6, IPV6_PMTUDISC_DO, NULL, 0)"
+. auto/feature
+
+
ngx_feature="TCP_DEFER_ACCEPT"
ngx_feature_name="NGX_HAVE_DEFERRED_ACCEPT"
ngx_feature_run=no
@@ -920,6 +968,19 @@ ngx_feature_test="int i = FIONREAD; prin
. auto/feature
+ngx_feature="ioctl(SIOCGIFMTU)"
+ngx_feature_name="NGX_HAVE_SIOCGIFMTU"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/ioctl.h>
+ #include <stdio.h>
+ #include <net/if.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test="int i = SIOCGIFMTU; struct ifreq ifr;
+ ifr.ifr_name[0] = 'e'; printf(\"%d\", i)"
+. auto/feature
+
+
ngx_feature="struct tm.tm_gmtoff"
ngx_feature_name="NGX_HAVE_GMTOFF"
ngx_feature_run=no
@@ -1002,3 +1063,17 @@ ngx_feature_test='struct addrinfo *res;
if (getaddrinfo("localhost", NULL, NULL, &res) != 0) return 1;
freeaddrinfo(res)'
. auto/feature
+
+
+ngx_feature="getifaddrs()"
+ngx_feature_name="NGX_HAVE_GETIFADDRS"
+ngx_feature_run=no
+ngx_feature_incs="#include <sys/types.h>
+ #include <sys/socket.h>
+ #include <ifaddrs.h>"
+ngx_feature_path=
+ngx_feature_libs=
+ngx_feature_test='struct ifaddrs *ifaddr;
+ if (getifaddrs(&ifaddr) != 0) return 1;
+ freeifaddrs(ifaddr)'
+. auto/feature
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -1010,6 +1010,74 @@ ngx_configure_listening_sockets(ngx_cycl
}
#endif
+
+#if (NGX_HAVE_IP_PMTUDISC_DO)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IP, IP_PMTUDISC_DO,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IP_PMTUDISC_DO) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#elif (NGX_HAVE_IP_DONTFRAG)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IP, IP_DONTFRAG,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IP_DONTFRAG) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#endif
+
+#if (NGX_HAVE_INET6 && NGX_HAVE_IPV6_PMTUDISC_DO)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_PMTUDISC_DO,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IPV6_PMTUDISC_DO) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#elif (NGX_HAVE_INET6 && NGX_HAVE_IPV6_DONTFRAG)
+
+ if (ls[i].quic && ls[i].sockaddr->sa_family == AF_INET6) {
+ value = 1;
+
+ if (setsockopt(ls[i].fd, IPPROTO_IPV6, IPV6_DONTFRAG,
+ (const void *) &value, sizeof(int))
+ == -1)
+ {
+ ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_socket_errno,
+ "setsockopt(IPV6_DONTFRAG) "
+ "for %V failed, ignored",
+ &ls[i].addr_text);
+ }
+ }
+
+#endif
}
return;
@@ -1507,6 +1575,10 @@ ngx_connection_error(ngx_connection_t *c
}
#endif
+ if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
+ return 0;
+ }
+
if (err == 0
|| err == NGX_ECONNRESET
#if (NGX_WIN32)
@@ -1524,6 +1596,7 @@ ngx_connection_error(ngx_connection_t *c
{
switch (c->log_error) {
+ case NGX_ERROR_IGNORE_EMSGSIZE:
case NGX_ERROR_IGNORE_EINVAL:
case NGX_ERROR_IGNORE_ECONNRESET:
case NGX_ERROR_INFO:
diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
--- a/src/core/ngx_connection.h
+++ b/src/core/ngx_connection.h
@@ -97,7 +97,8 @@ typedef enum {
NGX_ERROR_ERR,
NGX_ERROR_INFO,
NGX_ERROR_IGNORE_ECONNRESET,
- NGX_ERROR_IGNORE_EINVAL
+ NGX_ERROR_IGNORE_EINVAL,
+ NGX_ERROR_IGNORE_EMSGSIZE
} ngx_connection_log_error_e;
diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
--- a/src/event/quic/ngx_event_quic.c
+++ b/src/event/quic/ngx_event_quic.c
@@ -10,8 +10,17 @@
#include <ngx_event_quic_connection.h>
+#define NGX_QUIC_UDP4_MAX_PACKET 65535
+#define NGX_QUIC_UDP4_HEADER_SIZE 28
+
+#define NGX_QUIC_UDP6_MAX_PAYLOAD 65535
+#define NGX_QUIC_UDP6_HEADER_SIZE 48
+
+
static ngx_quic_connection_t *ngx_quic_new_connection(ngx_connection_t *c,
ngx_quic_conf_t *conf, ngx_quic_header_t *pkt);
+static ssize_t ngx_quic_get_local_mtu(ngx_connection_t *c,
+ struct sockaddr *sockaddr);
static ngx_int_t ngx_quic_handle_stateless_reset(ngx_connection_t *c,
ngx_quic_header_t *pkt);
static void ngx_quic_input_handler(ngx_event_t *rev);
@@ -149,11 +158,6 @@ ngx_quic_apply_transport_params(ngx_conn
ngx_log_error(NGX_LOG_INFO, c->log, 0,
"quic maximum packet size is invalid");
return NGX_ERROR;
-
- } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
- ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
- ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
- "quic client maximum packet size truncated");
}
if (ctp->active_connection_id_limit < 2) {
@@ -228,6 +232,7 @@ static ngx_quic_connection_t *
ngx_quic_new_connection(ngx_connection_t *c, ngx_quic_conf_t *conf,
ngx_quic_header_t *pkt)
{
+ ssize_t mtu;
ngx_uint_t i;
ngx_quic_tp_t *ctp;
ngx_quic_connection_t *qc;
@@ -297,7 +302,7 @@ ngx_quic_new_connection(ngx_connection_t
ctp = &qc->ctp;
/* defaults to be used before actual client parameters are received */
- ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
+ ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
ctp->active_connection_id_limit = 2;
@@ -317,6 +322,18 @@ ngx_quic_new_connection(ngx_connection_t
qc->congestion.ssthresh = (size_t) -1;
qc->congestion.recovery_start = ngx_current_msec;
+ qc->max_mtu = ngx_min(qc->tp.max_udp_payload_size,
+ qc->ctp.max_udp_payload_size);
+
+ mtu = ngx_quic_get_local_mtu(c, c->local_sockaddr);
+ if (mtu == NGX_ERROR) {
+ return NULL;
+ }
+
+ if (mtu > 0 && (size_t) mtu < qc->max_mtu) {
+ qc->max_mtu = mtu;
+ }
+
if (pkt->validated && pkt->retried) {
qc->tp.retry_scid.len = pkt->dcid.len;
qc->tp.retry_scid.data = ngx_pstrdup(c->pool, &pkt->dcid);
@@ -347,6 +364,90 @@ ngx_quic_new_connection(ngx_connection_t
}
+static ssize_t
+ngx_quic_get_local_mtu(ngx_connection_t *c, struct sockaddr *sockaddr)
+{
+#if (NGX_HAVE_GETIFADDRS && NGX_HAVE_SIOCGIFMTU)
+
+ size_t mtu;
+ struct ifreq ifr;
+ struct ifaddrs *ifaddrs, *ifa;
+
+ if (sockaddr->sa_family != AF_INET
+#if (NGX_HAVE_INET6)
+ && sockaddr->sa_family != AF_INET6
+#endif
+ )
+ {
+ return NGX_DECLINED;
+ }
+
+ if (getifaddrs(&ifaddrs) == -1) {
+ ngx_log_error(NGX_LOG_INFO, c->log, 0, "getifaddrs() failed");
+ return NGX_ERROR;
+ }
+
+ for (ifa = ifaddrs; ifa; ifa = ifa->ifa_next) {
+ if (ifa->ifa_addr == NULL) {
+ continue;
+ }
+
+ if (ngx_cmp_sockaddr(sockaddr, 0, ifa->ifa_addr, 0, 0) != NGX_OK) {
+ continue;
+ }
+
+ ngx_memzero(&ifr, sizeof(struct ifreq));
+ strcpy(ifr.ifr_name, ifa->ifa_name);
+
+ freeifaddrs(ifaddrs);
+
+ if (ioctl(c->fd, SIOCGIFMTU, &ifr)) {
+ ngx_log_error(NGX_LOG_INFO, c->log, 0, "ioctl(SIOCGIFMTU) failed");
+ return NGX_ERROR;
+ }
+
+ mtu = ifr.ifr_mtu;
+
+ if (sockaddr->sa_family == AF_INET) {
+ if (mtu > NGX_QUIC_UDP4_MAX_PACKET) {
+ mtu = NGX_QUIC_UDP4_MAX_PACKET;
+ }
+
+ if (mtu <= NGX_QUIC_UDP4_HEADER_SIZE) {
+ return NGX_DECLINED;
+ }
+
+ mtu -= NGX_QUIC_UDP4_HEADER_SIZE;
+
+#if (NGX_HAVE_INET6)
+ } else { /* sockaddr->sa_family == AF_INET6 */
+
+ if (mtu <= NGX_QUIC_UDP6_HEADER_SIZE) {
+ return NGX_DECLINED;
+ }
+
+ mtu -= NGX_QUIC_UDP6_HEADER_SIZE;
+
+ if (mtu > NGX_QUIC_UDP6_MAX_PAYLOAD) {
+ mtu = NGX_QUIC_UDP6_MAX_PAYLOAD;
+ }
+#endif
+ }
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic local mtu:%uz", mtu);
+
+ return mtu;
+ }
+
+ freeifaddrs(ifaddrs);
+
+#endif
+
+ return NGX_DECLINED;
+}
+
+
static ngx_int_t
ngx_quic_handle_stateless_reset(ngx_connection_t *c, ngx_quic_header_t *pkt)
{
diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
--- a/src/event/quic/ngx_event_quic_ack.c
+++ b/src/event/quic/ngx_event_quic_ack.c
@@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
qc = ngx_quic_get_connection(c);
+ if (ctx->level == ssl_encryption_application) {
+ if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
+ return NGX_ERROR;
+ }
+ }
+
st->max_pn = NGX_TIMER_INFINITE;
found = 0;
diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
--- a/src/event/quic/ngx_event_quic_connection.h
+++ b/src/event/quic/ngx_event_quic_connection.h
@@ -89,14 +89,21 @@ struct ngx_quic_path_s {
ngx_sockaddr_t sa;
socklen_t socklen;
ngx_quic_client_id_t *cid;
- ngx_msec_t expires;
- ngx_uint_t tries;
+ ngx_msec_t valid_expires;
+ ngx_msec_t mtu_expires;
+ ngx_uint_t valid_tries;
+ ngx_uint_t mtu_tries;
+ ngx_uint_t mtu_steps;
ngx_uint_t tag;
+ size_t mtu;
+ size_t mtud;
+ size_t max_mtu;
off_t sent;
off_t received;
u_char challenge1[8];
u_char challenge2[8];
uint64_t seqnum;
+ uint64_t mtu_pnum[NGX_QUIC_PATH_RETRIES];
ngx_str_t addr_text;
u_char text[NGX_SOCKADDR_STRLEN];
unsigned validated:1;
@@ -206,6 +213,8 @@ struct ngx_quic_connection_s {
uint64_t server_seqnum;
uint64_t path_seqnum;
+ size_t max_mtu;
+
ngx_quic_tp_t tp;
ngx_quic_tp_t ctp;
diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
--- a/src/event/quic/ngx_event_quic_migration.c
+++ b/src/event/quic/ngx_event_quic_migration.c
@@ -10,6 +10,10 @@
#include <ngx_event_quic_connection.h>
+#define NGX_QUIC_MAX_MTU_STEPS 7
+#define NGX_QUIC_MTU_PRECISION 4
+
+
static void ngx_quic_set_connection_path(ngx_connection_t *c,
ngx_quic_path_t *path);
static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
@@ -17,7 +21,13 @@ static ngx_int_t ngx_quic_validate_path(
static ngx_msec_t ngx_quic_path_pto(ngx_connection_t *c);
static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
ngx_quic_path_t *path);
+static ngx_int_t ngx_quic_expire_path_mtu(ngx_connection_t *c,
+ ngx_quic_path_t *path, ngx_msec_int_t *next);
+static ngx_int_t ngx_quic_expire_path(ngx_connection_t *c,
+ ngx_quic_path_t *path, ngx_msec_int_t *next);
static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
+static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
+ ngx_quic_path_t *path);
ngx_int_t
@@ -170,6 +180,10 @@ valid:
path->validating = 0;
path->limited = 0;
+ if (ngx_quic_discover_path_mtu(c, path) != NGX_OK) {
+ return NGX_ERROR;
+ }
+
return NGX_OK;
}
@@ -208,6 +222,8 @@ ngx_quic_new_path(ngx_connection_t *c,
path->limited = 1;
+ path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
+
path->seqnum = qc->path_seqnum++;
path->sockaddr = &path->sa.sockaddr;
@@ -505,14 +521,14 @@ ngx_quic_validate_path(ngx_connection_t
return NGX_ERROR;
}
+ path->valid_tries = 0;
+
if (ngx_quic_send_path_challenge(c, path) != NGX_OK) {
return NGX_ERROR;
}
pto = ngx_quic_path_pto(c);
-
- path->expires = ngx_current_msec + pto;
- path->tries = NGX_QUIC_PATH_RETRIES;
+ path->valid_expires = ngx_current_msec + pto;
if (!qc->path_validation.timer_set) {
ngx_add_timer(&qc->path_validation, pto);
@@ -556,7 +572,7 @@ ngx_quic_send_path_challenge(ngx_connect
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
"quic path seq:%uL send path_challenge tries:%ui",
- path->seqnum, path->tries);
+ path->seqnum, path->valid_tries);
ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
@@ -592,20 +608,16 @@ ngx_quic_send_path_challenge(ngx_connect
void
ngx_quic_path_validation_handler(ngx_event_t *ev)
{
- ngx_msec_t now;
ngx_queue_t *q;
- ngx_msec_int_t left, next, pto;
- ngx_quic_path_t *path, *bkp;
+ ngx_msec_int_t next;
+ ngx_quic_path_t *path;
ngx_connection_t *c;
ngx_quic_connection_t *qc;
c = ev->data;
qc = ngx_quic_get_connection(c);
- pto = ngx_quic_path_pto(c);
-
next = -1;
- now = ngx_current_msec;
q = ngx_queue_head(&qc->paths);
@@ -614,76 +626,12 @@ ngx_quic_path_validation_handler(ngx_eve
path = ngx_queue_data(q, ngx_quic_path_t, queue);
q = ngx_queue_next(q);
- if (!path->validating) {
- continue;
- }
-
- left = path->expires - now;
-
- if (left > 0) {
-
- if (next == -1 || left < next) {
- next = left;
- }
-
- continue;
- }
-
- if (--path->tries) {
- path->expires = ngx_current_msec + pto;
-
- if (next == -1 || pto < next) {
- next = pto;
- }
-
- /* retransmit */
- (void) ngx_quic_send_path_challenge(c, path);
-
- continue;
+ if (ngx_quic_expire_path_mtu(c, path, &next) != NGX_OK) {
+ ngx_quic_close_connection(c, NGX_ERROR);
+ return;
}
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "quic path seq:%uL validation failed", path->seqnum);
-
- /* found expired path */
-
- path->validated = 0;
- path->validating = 0;
- path->limited = 1;
-
-
- /* RFC 9000, 9.3.2. On-Path Address Spoofing
- *
- * To protect the connection from failing due to such a spurious
- * migration, an endpoint MUST revert to using the last validated
- * peer address when validation of a new peer address fails.
- */
-
- if (qc->path == path) {
- /* active path validation failed */
-
- bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
-
- if (bkp == NULL) {
- qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
- qc->error_reason = "no viable path";
- ngx_quic_close_connection(c, NGX_ERROR);
- return;
- }
-
- qc->path = bkp;
- qc->path->tag = NGX_QUIC_PATH_ACTIVE;
-
- ngx_quic_set_connection_path(c, qc->path);
-
- ngx_log_error(NGX_LOG_INFO, c->log, 0,
- "quic path seq:%uL addr:%V is restored from backup",
- qc->path->seqnum, &qc->path->addr_text);
-
- ngx_quic_path_dbg(c, "is active", qc->path);
- }
-
- if (ngx_quic_free_path(c, path) != NGX_OK) {
+ if (ngx_quic_expire_path(c, path, &next) != NGX_OK) {
ngx_quic_close_connection(c, NGX_ERROR);
return;
}
@@ -693,3 +641,290 @@ ngx_quic_path_validation_handler(ngx_eve
ngx_add_timer(&qc->path_validation, next);
}
}
+
+
+static ngx_int_t
+ngx_quic_expire_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path,
+ ngx_msec_int_t *next)
+{
+ ngx_int_t rc;
+ ngx_msec_t now;
+ ngx_msec_int_t left, pto;
+
+ if (!path->mtud) {
+ return NGX_OK;
+ }
+
+ now = ngx_current_msec;
+
+ left = path->mtu_expires - now;
+
+ if (left > 0) {
+
+ if (*next == -1 || left < *next) {
+ *next = left;
+ }
+
+ return NGX_OK;
+ }
+
+ if (++path->mtu_tries < NGX_QUIC_PATH_RETRIES) {
+ pto = ngx_quic_path_pto(c);
+
+ path->mtu_expires = ngx_current_msec + pto;
+
+ if (*next == -1 || pto < *next) {
+ *next = pto;
+ }
+
+ rc = ngx_quic_send_path_mtu_probe(c, path);
+ if (rc != NGX_DECLINED) {
+ return rc;
+ }
+ }
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL mtu probe failed", path->seqnum);
+
+ path->max_mtu = path->mtud;
+ path->mtud = 0;
+
+ return ngx_quic_discover_path_mtu(c, path);
+}
+
+
+static ngx_int_t
+ngx_quic_expire_path(ngx_connection_t *c, ngx_quic_path_t *path,
+ ngx_msec_int_t *next)
+{
+ ngx_msec_t now;
+ ngx_msec_int_t left, pto;
+ ngx_quic_path_t *bkp;
+ ngx_quic_connection_t *qc;
+
+ if (!path->validating) {
+ return NGX_OK;
+ }
+
+ qc = ngx_quic_get_connection(c);
+
+ now = ngx_current_msec;
+
+ left = path->valid_expires - now;
+
+ if (left > 0) {
+
+ if (*next == -1 || left < *next) {
+ *next = left;
+ }
+
+ return NGX_OK;
+ }
+
+ if (++path->valid_tries < NGX_QUIC_PATH_RETRIES) {
+ pto = ngx_quic_path_pto(c);
+
+ path->valid_expires = ngx_current_msec + pto;
+
+ if (*next == -1 || pto < *next) {
+ *next = pto;
+ }
+
+ /* retransmit */
+ (void) ngx_quic_send_path_challenge(c, path);
+
+ return NGX_OK;
+ }
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL validation failed", path->seqnum);
+
+ /* found expired path */
+
+ path->validated = 0;
+ path->validating = 0;
+ path->limited = 1;
+
+
+ /* RFC 9000, 9.3.2. On-Path Address Spoofing
+ *
+ * To protect the connection from failing due to such a spurious
+ * migration, an endpoint MUST revert to using the last validated
+ * peer address when validation of a new peer address fails.
+ */
+
+ if (qc->path == path) {
+ /* active path validation failed */
+
+ bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
+
+ if (bkp == NULL) {
+ qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
+ qc->error_reason = "no viable path";
+ return NGX_ERROR;
+ }
+
+ qc->path = bkp;
+ qc->path->tag = NGX_QUIC_PATH_ACTIVE;
+
+ ngx_quic_set_connection_path(c, qc->path);
+
+ ngx_log_error(NGX_LOG_INFO, c->log, 0,
+ "quic path seq:%uL addr:%V is restored from backup",
+ qc->path->seqnum, &qc->path->addr_text);
+
+ ngx_quic_path_dbg(c, "is active", qc->path);
+ }
+
+ if (ngx_quic_free_path(c, path) != NGX_OK) {
+ return NGX_ERROR;
+ }
+
+ return NGX_OK;
+}
+
+
+ngx_int_t
+ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_int_t rc;
+ ngx_uint_t i;
+ ngx_msec_t pto;
+ ngx_quic_connection_t *qc;
+
+ qc = ngx_quic_get_connection(c);
+
+again:
+
+ if (path->mtu_steps == 0) {
+ path->max_mtu = qc->max_mtu;
+ path->mtud = path->max_mtu;
+
+ } else if (path->mtu_steps >= NGX_QUIC_MAX_MTU_STEPS
+ || (path->max_mtu - path->mtu) <= NGX_QUIC_MTU_PRECISION)
+ {
+ return NGX_OK;
+
+ } else {
+ path->mtud = (path->mtu + path->max_mtu) / 2;
+ }
+
+ path->mtu_steps++;
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic initiated mtu discovery of path seq:%uL",
+ path->seqnum);
+
+ for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+ path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
+ }
+
+ path->mtu_tries = 0;
+
+ rc = ngx_quic_send_path_mtu_probe(c, path);
+
+ if (rc == NGX_DECLINED) {
+ path->max_mtu = path->mtud;
+ path->mtud = 0;
+ goto again;
+ }
+
+ if (rc == NGX_ERROR) {
+ path->mtud = 0;
+ return NGX_ERROR;
+ }
+
+ /* rc == NGX_OK */
+
+ pto = ngx_quic_path_pto(c);
+ path->mtu_expires = ngx_current_msec + pto;
+
+ if (!qc->path_validation.timer_set) {
+ ngx_add_timer(&qc->path_validation, pto);
+ }
+
+ return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_int_t rc;
+ ngx_uint_t log_error;
+ ngx_quic_frame_t frame;
+ ngx_quic_send_ctx_t *ctx;
+ ngx_quic_connection_t *qc;
+
+ ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
+
+ frame.level = ssl_encryption_application;
+ frame.type = NGX_QUIC_FT_PING;
+
+ qc = ngx_quic_get_connection(c);
+ ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+ path->mtu_pnum[path->mtu_tries] = ctx->pnum;
+
+ ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL send mtu probe "
+ "size:%uz pnum:%uL tries:%ui",
+ path->seqnum, path->mtud, ctx->pnum, path->mtu_tries);
+
+ log_error = c->log_error;
+ c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
+
+ rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
+ c->log_error = log_error;
+
+ if (rc == NGX_ERROR) {
+ if (c->write->error) {
+ c->write->error = 0;
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic rejected mtu probe of path seq:%uL",
+ path->seqnum);
+
+ return NGX_DECLINED;
+ }
+
+ return NGX_ERROR;
+ }
+
+ return NGX_OK;
+}
+
+
+ngx_int_t
+ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
+ uint64_t min, uint64_t max)
+{
+ uint64_t pnum;
+ ngx_uint_t i;
+
+ if (!path->mtud) {
+ return NGX_OK;
+ }
+
+ for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+ pnum = path->mtu_pnum[i];
+
+ if (pnum == NGX_QUIC_UNSET_PN) {
+ break;
+ }
+
+ if (pnum < min || pnum > max) {
+ continue;
+ }
+
+ path->mtu = path->mtud;
+ path->mtud = 0;
+
+ ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL mtu ack size:%uz",
+ path->seqnum, path->mtu);
+
+ return ngx_quic_discover_path_mtu(c, path);
+ }
+
+ return NGX_OK;
+}
diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
--- a/src/event/quic/ngx_event_quic_migration.h
+++ b/src/event/quic/ngx_event_quic_migration.h
@@ -39,4 +39,9 @@ ngx_int_t ngx_quic_handle_migration(ngx_
void ngx_quic_path_validation_handler(ngx_event_t *ev);
+ngx_int_t ngx_quic_discover_path_mtu(ngx_connection_t *c,
+ ngx_quic_path_t *path);
+ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
+ ngx_quic_path_t *path, uint64_t min, uint64_t max);
+
#endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
--- a/src/event/quic/ngx_event_quic_output.c
+++ b/src/event/quic/ngx_event_quic_output.c
@@ -10,9 +10,6 @@
#include <ngx_event_quic_connection.h>
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT 1252
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6 1232
-
#define NGX_QUIC_MAX_UDP_SEGMENT_BUF 65487 /* 65K - IPv6 header */
#define NGX_QUIC_MAX_SEGMENTS 64 /* UDP_MAX_SEGMENTS */
@@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
size_t size);
-size_t
-ngx_quic_max_udp_payload(ngx_connection_t *c)
-{
- /* TODO: path MTU discovery */
-
-#if (NGX_HAVE_INET6)
- if (c->sockaddr->sa_family == AF_INET6) {
- return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
- }
-#endif
-
- return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
-}
-
-
ngx_int_t
ngx_quic_output(ngx_connection_t *c)
{
@@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
p = dst;
- len = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
-
- len = ngx_quic_path_limit(c, path, len);
+ len = ngx_quic_path_limit(c, path, path->mtu);
pad = ngx_quic_get_padding_level(c);
@@ -271,17 +250,19 @@ ngx_quic_allow_segmentation(ngx_connecti
{
size_t bytes, len;
ngx_queue_t *q;
+ ngx_quic_path_t *path;
ngx_quic_frame_t *f;
ngx_quic_send_ctx_t *ctx;
ngx_quic_connection_t *qc;
qc = ngx_quic_get_connection(c);
+ path = qc->path;
if (!qc->conf->gso_enabled) {
return 0;
}
- if (qc->path->limited) {
+ if (path->limited) {
/* don't even try to be faster on non-validated paths */
return 0;
}
@@ -299,9 +280,7 @@ ngx_quic_allow_segmentation(ngx_connecti
ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
bytes = 0;
-
- len = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+ len = path->mtu;
for (q = ngx_queue_head(&ctx->frames);
q != ngx_queue_sentinel(&ctx->frames);
@@ -345,8 +324,7 @@ ngx_quic_create_segments(ngx_connection_
return NGX_ERROR;
}
- segsize = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+ segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
p = dst;
end = dst + sizeof(dst);
diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
--- a/src/event/quic/ngx_event_quic_output.h
+++ b/src/event/quic/ngx_event_quic_output.h
@@ -12,8 +12,6 @@
#include <ngx_core.h>
-size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
-
ngx_int_t ngx_quic_output(ngx_connection_t *c);
ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
--- a/src/event/quic/ngx_event_quic_ssl.c
+++ b/src/event/quic/ngx_event_quic_ssl.c
@@ -499,6 +499,10 @@ ngx_quic_crypto_input(ngx_connection_t *
return NGX_ERROR;
}
+ if (ngx_quic_discover_path_mtu(c, qc->path) != NGX_OK) {
+ return NGX_ERROR;
+ }
+
if (ngx_quic_init_streams(c) != NGX_OK) {
return NGX_ERROR;
}
diff --git a/src/os/unix/ngx_darwin_config.h b/src/os/unix/ngx_darwin_config.h
--- a/src/os/unix/ngx_darwin_config.h
+++ b/src/os/unix/ngx_darwin_config.h
@@ -47,6 +47,8 @@
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/un.h>
+#include <net/if.h>
+#include <ifaddrs.h>
#include <sys/sysctl.h>
#include <xlocale.h>
diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
--- a/src/os/unix/ngx_errno.h
+++ b/src/os/unix/ngx_errno.h
@@ -54,6 +54,7 @@ typedef int ngx_err_t;
#define NGX_ENOMOREFILES 0
#define NGX_ELOOP ELOOP
#define NGX_EBADF EBADF
+#define NGX_EMSGSIZE EMSGSIZE
#if (NGX_HAVE_OPENAT)
#define NGX_EMLINK EMLINK
diff --git a/src/os/unix/ngx_freebsd_config.h b/src/os/unix/ngx_freebsd_config.h
--- a/src/os/unix/ngx_freebsd_config.h
+++ b/src/os/unix/ngx_freebsd_config.h
@@ -48,6 +48,9 @@
#include <libutil.h> /* setproctitle() before 4.1 */
#include <osreldate.h>
#include <sys/sysctl.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <ifaddrs.h>
#include <dlfcn.h>
diff --git a/src/os/unix/ngx_linux_config.h b/src/os/unix/ngx_linux_config.h
--- a/src/os/unix/ngx_linux_config.h
+++ b/src/os/unix/ngx_linux_config.h
@@ -54,6 +54,8 @@
#include <sys/ioctl.h>
#include <crypt.h>
#include <sys/utsname.h> /* uname() */
+#include <net/if.h>
+#include <ifaddrs.h>
#include <dlfcn.h>
diff --git a/src/os/unix/ngx_posix_config.h b/src/os/unix/ngx_posix_config.h
--- a/src/os/unix/ngx_posix_config.h
+++ b/src/os/unix/ngx_posix_config.h
@@ -140,6 +140,17 @@ typedef struct aiocb ngx_aiocb_t;
#endif
+#if (NGX_HAVE_SIOCGIFMTU)
+#include <sys/ioctl.h>
+#include <net/if.h>
+#endif
+
+
+#if (NGX_HAVE_GETIFADDRS)
+#include <ifaddrs.h>
+#endif
+
+
#define NGX_LISTEN_BACKLOG 511
#define ngx_debug_init()
diff --git a/src/os/unix/ngx_solaris_config.h b/src/os/unix/ngx_solaris_config.h
--- a/src/os/unix/ngx_solaris_config.h
+++ b/src/os/unix/ngx_solaris_config.h
@@ -88,6 +88,17 @@
#endif
+#if (NGX_HAVE_SIOCGIFMTU)
+#include <sys/ioctl.h>
+#include <net/if.h>
+#endif
+
+
+#if (NGX_HAVE_GETIFADDRS)
+#include <ifaddrs.h>
+#endif
+
+
#define NGX_LISTEN_BACKLOG 511
More information about the nginx-devel
mailing list