[PATCH 4 of 4] QUIC: path MTU discovery
Sergey Kandaurov
pluknet at nginx.com
Tue Aug 8 09:49:46 UTC 2023
> On 31 Jul 2023, at 21:34, Roman Arutyunyan <arut at nginx.com> wrote:
>
> Hi,
>
> On Fri, Jul 28, 2023 at 07:51:22PM +0400, Sergey Kandaurov wrote:
>>
>>> On 6 Jul 2023, at 17:57, Roman Arutyunyan <arut at nginx.com> wrote:
>>>
>>> # HG changeset patch
>>> # User Roman Arutyunyan <arut at nginx.com>
>>> # Date 1688481216 -14400
>>> # Tue Jul 04 18:33:36 2023 +0400
>>> # Node ID 1a49fd5d2013b6c8d50263499e6ced440927e949
>>> # Parent a1ea543d009311765144351b2557c00c8e6445bf
>>> QUIC: path MTU discovery.
>>>
>>> MTU selection starts by stepping up until the first failure. Then binary
>>> search is used to find the path MTU.
>>>
>>> diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
>>> --- a/src/core/ngx_connection.c
>>> +++ b/src/core/ngx_connection.c
>>> @@ -1583,6 +1583,10 @@ ngx_connection_error(ngx_connection_t *c
>>> }
>>> #endif
>>>
>>> + if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
>>> + return 0;
>>> + }
>>> +
>>> if (err == 0
>>> || err == NGX_ECONNRESET
>>> #if (NGX_WIN32)
>>> @@ -1600,6 +1604,7 @@ ngx_connection_error(ngx_connection_t *c
>>> {
>>> switch (c->log_error) {
>>>
>>> + case NGX_ERROR_IGNORE_EMSGSIZE:
>>> case NGX_ERROR_IGNORE_EINVAL:
>>> case NGX_ERROR_IGNORE_ECONNRESET:
>>> case NGX_ERROR_INFO:
>>> diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
>>> --- a/src/core/ngx_connection.h
>>> +++ b/src/core/ngx_connection.h
>>> @@ -97,7 +97,8 @@ typedef enum {
>>> NGX_ERROR_ERR,
>>> NGX_ERROR_INFO,
>>> NGX_ERROR_IGNORE_ECONNRESET,
>>> - NGX_ERROR_IGNORE_EINVAL
>>> + NGX_ERROR_IGNORE_EINVAL,
>>> + NGX_ERROR_IGNORE_EMSGSIZE
>>> } ngx_connection_log_error_e;
>>>
>>>
>>> diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
>>> --- a/src/event/quic/ngx_event_quic.c
>>> +++ b/src/event/quic/ngx_event_quic.c
>>> @@ -149,11 +149,6 @@ ngx_quic_apply_transport_params(ngx_conn
>>> ngx_log_error(NGX_LOG_INFO, c->log, 0,
>>> "quic maximum packet size is invalid");
>>> return NGX_ERROR;
>>> -
>>> - } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
>>> - ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
>>> - ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> - "quic client maximum packet size truncated");
>>> }
>>>
>>> if (ctp->active_connection_id_limit < 2) {
>>> @@ -286,7 +281,7 @@ ngx_quic_new_connection(ngx_connection_t
>>>
>>> qc->path_validation.log = c->log;
>>> qc->path_validation.data = c;
>>> - qc->path_validation.handler = ngx_quic_path_validation_handler;
>>> + qc->path_validation.handler = ngx_quic_path_handler;
>>>
>>> qc->conf = conf;
>>>
>>> @@ -297,7 +292,7 @@ ngx_quic_new_connection(ngx_connection_t
>>> ctp = &qc->ctp;
>>>
>>> /* defaults to be used before actual client parameters are received */
>>> - ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
>>> + ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
>>> ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
>>> ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
>>> ctp->active_connection_id_limit = 2;
>>> diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
>>> --- a/src/event/quic/ngx_event_quic_ack.c
>>> +++ b/src/event/quic/ngx_event_quic_ack.c
>>> @@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
>>>
>>> qc = ngx_quic_get_connection(c);
>>>
>>> + if (ctx->level == ssl_encryption_application) {
>>> + if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
>>> + return NGX_ERROR;
>>> + }
>>> + }
>>> +
>>> st->max_pn = NGX_TIMER_INFINITE;
>>> found = 0;
>>>
>>> diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
>>> --- a/src/event/quic/ngx_event_quic_connection.h
>>> +++ b/src/event/quic/ngx_event_quic_connection.h
>>> @@ -66,6 +66,14 @@ typedef struct ngx_quic_keys_s ng
>>> #define ngx_quic_get_socket(c) ((ngx_quic_socket_t *)((c)->udp))
>>>
>>>
>>> +typedef enum {
>>> + NGX_QUIC_PATH_IDLE = 0,
>>> + NGX_QUIC_PATH_VALIDATING,
>>> + NGX_QUIC_PATH_WAITING,
>>> + NGX_QUIC_PATH_DISCOVERING_MTU
>>
>> Nitpicking on too long name.
>> What about NGX_QUIC_PATH_MTUD or something.
>
> OK, renamed.
>
>>> +} ngx_quic_path_state_e;
>>> +
>>> +
>>> struct ngx_quic_client_id_s {
>>> ngx_queue_t queue;
>>> uint64_t seqnum;
>>> @@ -89,18 +97,22 @@ struct ngx_quic_path_s {
>>> ngx_sockaddr_t sa;
>>> socklen_t socklen;
>>> ngx_quic_client_id_t *cid;
>>> + ngx_quic_path_state_e state;
>>> ngx_msec_t expires;
>>> ngx_uint_t tries;
>>> ngx_uint_t tag;
>>> + size_t mtu;
>>> + size_t mtud;
>>> + size_t max_mtu;
>>> off_t sent;
>>> off_t received;
>>> u_char challenge1[8];
>>> u_char challenge2[8];
>>> uint64_t seqnum;
>>> + uint64_t mtu_pnum[NGX_QUIC_PATH_RETRIES];
>>> ngx_str_t addr_text;
>>> u_char text[NGX_SOCKADDR_STRLEN];
>>> - unsigned validated:1;
>>> - unsigned validating:1;
>>> + ngx_uint_t validated; /* unsigned validated:1; */
>>> };
>>>
>>>
>>> diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
>>> --- a/src/event/quic/ngx_event_quic_migration.c
>>> +++ b/src/event/quic/ngx_event_quic_migration.c
>>> @@ -10,6 +10,11 @@
>>> #include <ngx_event_quic_connection.h>
>>>
>>>
>>> +#define NGX_QUIC_PATH_MTU_DELAY 1000
>>
>> The value looks too much,
>> it would delay each subsequent probe (which is not retry).
>
> Probably the right value should be determined in real-life scenarios.
I'm quite skeptical about delayed probes, it has little to no benefit
for short connections with bulky transfers, as implemented, especially
with short MTU steps. OTOH, immediate probes can lead to congestion
window starvation resulting in request processing slow down.
For the record,
https://www.ietf.org/proceedings/62/pmtud.html provides a discussion
about PMTUD, in particular, on search strategy and when to probe.
For the latter, it seems to articulate an opposite view, i.e. to send
probes ASAP. Also, it raises a question how short lived connections
could benefit from PMTUD, e.g. by caching path MTU, which seems to
correspond to a shared PLPMTU state in RFC 8899.
> Let's decrease the value by a factor of 10 for now.
For a simple implementation, 100ms is a good enough trade-off, IMHO.
>
>>> +#define NGX_QUIC_PATH_MTU_STEP 256
>>> +#define NGX_QUIC_PATH_MTU_PRECISION 16
>>> +
>>> +
>>> static void ngx_quic_set_connection_path(ngx_connection_t *c,
>>> ngx_quic_path_t *path);
>>> static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
>>> @@ -17,7 +22,15 @@ static ngx_int_t ngx_quic_validate_path(
>>> static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
>>> ngx_quic_path_t *path);
>>> static void ngx_quic_set_path_timer(ngx_connection_t *c);
>>> +static ngx_int_t ngx_quic_expire_path_validation(ngx_connection_t *c,
>>> + ngx_quic_path_t *path);
>>> +static ngx_int_t ngx_quic_expire_path_mtu_delay(ngx_connection_t *c,
>>> + ngx_quic_path_t *path);
>>> +static ngx_int_t ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c,
>>> + ngx_quic_path_t *path);
>>> static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
>>> +static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
>>> + ngx_quic_path_t *path);
>>>
>>>
>>> ngx_int_t
>>> @@ -97,7 +110,7 @@ ngx_quic_handle_path_response_frame(ngx_
>>> {
>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
>>>
>>> - if (!path->validating) {
>>> + if (path->state != NGX_QUIC_PATH_VALIDATING) {
>>> continue;
>>> }
>>>
>>> @@ -136,6 +149,9 @@ valid:
>>> {
>>> /* address did not change */
>>> rst = 0;
>>> +
>>> + path->mtu = prev->mtu;
>>> + path->max_mtu = prev->max_mtu;
>>> }
>>> }
>>>
>>> @@ -167,9 +183,8 @@ valid:
>>> ngx_quic_path_dbg(c, "is validated", path);
>>>
>>> path->validated = 1;
>>> - path->validating = 0;
>>>
>>> - ngx_quic_set_path_timer(c);
>>> + ngx_quic_discover_path_mtu(c, path);
>>>
>>> return NGX_OK;
>>> }
>>> @@ -217,6 +232,8 @@ ngx_quic_new_path(ngx_connection_t *c,
>>> path->addr_text.len = ngx_sock_ntop(sockaddr, socklen, path->text,
>>> NGX_SOCKADDR_STRLEN, 1);
>>>
>>> + path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
>>> +
>>> ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> "quic path seq:%uL created addr:%V",
>>> path->seqnum, &path->addr_text);
>>> @@ -464,7 +481,7 @@ ngx_quic_handle_migration(ngx_connection
>>>
>>> ngx_quic_set_connection_path(c, next);
>>>
>>> - if (!next->validated && !next->validating) {
>>> + if (!next->validated && next->state != NGX_QUIC_PATH_VALIDATING) {
>>> if (ngx_quic_validate_path(c, next) != NGX_OK) {
>>> return NGX_ERROR;
>>> }
>>> @@ -492,7 +509,6 @@ ngx_quic_validate_path(ngx_connection_t
>>> ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> "quic initiated validation of path seq:%uL", path->seqnum);
>>>
>>> - path->validating = 1;
>>> path->tries = 0;
>>>
>>> if (RAND_bytes(path->challenge1, 8) != 1) {
>>> @@ -511,6 +527,7 @@ ngx_quic_validate_path(ngx_connection_t
>>> pto = ngx_max(ngx_quic_pto(c, ctx), 1000);
>>>
>>> path->expires = ngx_current_msec + pto;
>>> + path->state = NGX_QUIC_PATH_VALIDATING;
>>>
>>> ngx_quic_set_path_timer(c);
>>>
>>> @@ -558,6 +575,42 @@ ngx_quic_send_path_challenge(ngx_connect
>>> }
>>>
>>>
>>> +void
>>> +ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
>>> +{
>>> + ngx_quic_connection_t *qc;
>>> +
>>> + qc = ngx_quic_get_connection(c);
>>> +
>>> + if (path->max_mtu) {
>>> + if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
>>> + path->state = NGX_QUIC_PATH_IDLE;
>>> + ngx_quic_set_path_timer(c);
>>> + return;
>>> + }
>>> +
>>> + path->mtud = (path->mtu + path->max_mtu) / 2;
>>> +
>>> + } else {
>>> + path->mtud = path->mtu + NGX_QUIC_PATH_MTU_STEP;
>>
>> I like this approach, compared to multiplying probe steps.
>> This better fits to the pattern to keep around a well-known
>> initial value, which is typically 1500 bytes.
>> Another one could be to let user specify its own initial
>> value for cases when he knows well its network environment.
>>
>>> +
>>> + if (path->mtud >= qc->ctp.max_udp_payload_size) {
>>> + path->mtud = qc->ctp.max_udp_payload_size;
>>> + path->max_mtu = qc->ctp.max_udp_payload_size;
>>> + }
>>> + }
>>> +
>>> + path->state = NGX_QUIC_PATH_WAITING;
>>> + path->expires = ngx_current_msec + NGX_QUIC_PATH_MTU_DELAY;
>>> +
>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL schedule mtu:%uz",
>>> + path->seqnum, path->mtud);
>>> +
>>> + ngx_quic_set_path_timer(c);
>>> +}
>>> +
>>> +
>>> static void
>>> ngx_quic_set_path_timer(ngx_connection_t *c)
>>> {
>>> @@ -578,7 +631,7 @@ ngx_quic_set_path_timer(ngx_connection_t
>>> {
>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
>>>
>>> - if (!path->validating) {
>>> + if (path->state == NGX_QUIC_PATH_IDLE) {
>>> continue;
>>> }
>>>
>>> @@ -600,22 +653,18 @@ ngx_quic_set_path_timer(ngx_connection_t
>>>
>>>
>>> void
>>> -ngx_quic_path_validation_handler(ngx_event_t *ev)
>>> +ngx_quic_path_handler(ngx_event_t *ev)
>>> {
>>> ngx_msec_t now;
>>> ngx_queue_t *q;
>>> - ngx_msec_int_t left, next, pto;
>>> - ngx_quic_path_t *path, *bkp;
>>> + ngx_msec_int_t left;
>>> + ngx_quic_path_t *path;
>>> ngx_connection_t *c;
>>> - ngx_quic_send_ctx_t *ctx;
>>> ngx_quic_connection_t *qc;
>>>
>>> c = ev->data;
>>> qc = ngx_quic_get_connection(c);
>>>
>>> - ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>> -
>>> - next = -1;
>>> now = ngx_current_msec;
>>>
>>> q = ngx_queue_head(&qc->paths);
>>> @@ -625,83 +674,274 @@ ngx_quic_path_validation_handler(ngx_eve
>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
>>> q = ngx_queue_next(q);
>>>
>>> - if (!path->validating) {
>>> + if (path->state == NGX_QUIC_PATH_IDLE) {
>>> continue;
>>> }
>>>
>>> left = path->expires - now;
>>>
>>> if (left > 0) {
>>> -
>>> - if (next == -1 || left < next) {
>>> - next = left;
>>> - }
>>> -
>>> - continue;
>>> - }
>>> -
>>> - if (++path->tries < NGX_QUIC_PATH_RETRIES) {
>>> - pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
>>> -
>>> - path->expires = ngx_current_msec + pto;
>>> -
>>> - if (next == -1 || pto < next) {
>>> - next = pto;
>>> - }
>>> -
>>> - /* retransmit */
>>> - (void) ngx_quic_send_path_challenge(c, path);
>>> -
>>> continue;
>>> }
>>>
>>> - ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
>>> - "quic path seq:%uL validation failed", path->seqnum);
>>> + switch (path->state) {
>>> + case NGX_QUIC_PATH_VALIDATING:
>>> + if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
>>> + goto failed;
>>> + }
>>> +
>>> + break;
>>> +
>>> + case NGX_QUIC_PATH_WAITING:
>>> + if (ngx_quic_expire_path_mtu_delay(c, path) != NGX_OK) {
>>> + goto failed;
>>> + }
>>> +
>>> + break;
>>> +
>>> + case NGX_QUIC_PATH_DISCOVERING_MTU:
>>> + if (ngx_quic_expire_path_mtu_discovery(c, path) != NGX_OK) {
>>> + goto failed;
>>> + }
>>> +
>>> + break;
>>> +
>>
>> indentation in several places here
>
> Fixed, thanks.
>
>>> + default:
>>> + break;
>>> + }
>>> + }
>>> +
>>> + ngx_quic_set_path_timer(c);
>>> +
>>> + return;
>>>
>>> - /* found expired path */
>>> +failed:
>>> +
>>> + ngx_quic_close_connection(c, NGX_ERROR);
>>> +}
>>> +
>>> +
>>> +static ngx_int_t
>>> +ngx_quic_expire_path_validation(ngx_connection_t *c, ngx_quic_path_t *path)
>>> +{
>>> + ngx_msec_int_t pto;
>>> + ngx_quic_path_t *bkp;
>>> + ngx_quic_send_ctx_t *ctx;
>>> + ngx_quic_connection_t *qc;
>>>
>>> - path->validated = 0;
>>> - path->validating = 0;
>>> + qc = ngx_quic_get_connection(c);
>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>> +
>>> + if (++path->tries < NGX_QUIC_PATH_RETRIES) {
>>> + pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
>>> + path->expires = ngx_current_msec + pto;
>>> +
>>> + (void) ngx_quic_send_path_challenge(c, path);
>>> +
>>> + return NGX_OK;
>>> + }
>>> +
>>> + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL validation failed", path->seqnum);
>>> +
>>> + /* found expired path */
>>> +
>>> + path->validated = 0;
>>>
>>>
>>> - /* RFC 9000, 9.3.2. On-Path Address Spoofing
>>> - *
>>> - * To protect the connection from failing due to such a spurious
>>> - * migration, an endpoint MUST revert to using the last validated
>>> - * peer address when validation of a new peer address fails.
>>> - */
>>> -
>>> - if (qc->path == path) {
>>> - /* active path validation failed */
>>> -
>>> - bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
>>> + /* RFC 9000, 9.3.2. On-Path Address Spoofing
>>> + *
>>> + * To protect the connection from failing due to such a spurious
>>> + * migration, an endpoint MUST revert to using the last validated
>>> + * peer address when validation of a new peer address fails.
>>> + */
>>>
>>> - if (bkp == NULL) {
>>> - qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
>>> - qc->error_reason = "no viable path";
>>> - ngx_quic_close_connection(c, NGX_ERROR);
>>> - return;
>>> - }
>>> + if (qc->path == path) {
>>> + /* active path validation failed */
>>> +
>>> + bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
>>>
>>> - qc->path = bkp;
>>> - qc->path->tag = NGX_QUIC_PATH_ACTIVE;
>>> -
>>> - ngx_quic_set_connection_path(c, qc->path);
>>> -
>>> - ngx_log_error(NGX_LOG_INFO, c->log, 0,
>>> - "quic path seq:%uL addr:%V is restored from backup",
>>> - qc->path->seqnum, &qc->path->addr_text);
>>> -
>>> - ngx_quic_path_dbg(c, "is active", qc->path);
>>> + if (bkp == NULL) {
>>> + qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
>>> + qc->error_reason = "no viable path";
>>> + return NGX_ERROR;
>>> }
>>>
>>> - if (ngx_quic_free_path(c, path) != NGX_OK) {
>>> - ngx_quic_close_connection(c, NGX_ERROR);
>>> - return;
>>> + qc->path = bkp;
>>> + qc->path->tag = NGX_QUIC_PATH_ACTIVE;
>>> +
>>> + ngx_quic_set_connection_path(c, qc->path);
>>> +
>>> + ngx_log_error(NGX_LOG_INFO, c->log, 0,
>>> + "quic path seq:%uL addr:%V is restored from backup",
>>> + qc->path->seqnum, &qc->path->addr_text);
>>> +
>>> + ngx_quic_path_dbg(c, "is active", qc->path);
>>> + }
>>> +
>>> + return ngx_quic_free_path(c, path);
>>> +}
>>> +
>>> +
>>> +static ngx_int_t
>>> +ngx_quic_expire_path_mtu_delay(ngx_connection_t *c, ngx_quic_path_t *path)
>>> +{
>>> + ngx_int_t rc;
>>> + ngx_uint_t i;
>>> + ngx_msec_t pto;
>>> + ngx_quic_send_ctx_t *ctx;
>>> + ngx_quic_connection_t *qc;
>>> +
>>> + qc = ngx_quic_get_connection(c);
>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>> +
>>> + path->tries = 0;
>>> +
>>> + for ( ;; ) {
>>> +
>>> + for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
>>> + path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
>>> + }
>>> +
>>> + rc = ngx_quic_send_path_mtu_probe(c, path);
>>> +
>>> + if (rc == NGX_ERROR) {
>>> + return NGX_ERROR;
>>> + }
>>> +
>>> + if (rc == NGX_OK) {
>>> + pto = ngx_quic_pto(c, ctx);
>>> + path->expires = ngx_current_msec + pto;
>>> + path->state = NGX_QUIC_PATH_DISCOVERING_MTU;
>>> + return NGX_OK;
>>> + }
>>> +
>>> + /* rc == NGX_DECLINED */
>>> +
>>> + path->max_mtu = path->mtud;
>>> +
>>> + if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
>>> + path->state = NGX_QUIC_PATH_IDLE;
>>> + return NGX_OK;
>>> + }
>>> +
>>> + path->mtud = (path->mtu + path->max_mtu) / 2;
>>> + }
>>> +}
>>> +
>>> +
>>> +static ngx_int_t
>>> +ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c, ngx_quic_path_t *path)
>>> +{
>>> + ngx_int_t rc;
>>> + ngx_msec_int_t pto;
>>> + ngx_quic_send_ctx_t *ctx;
>>> + ngx_quic_connection_t *qc;
>>> +
>>> + qc = ngx_quic_get_connection(c);
>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>> +
>>> + if (++path->tries < NGX_QUIC_PATH_RETRIES) {
>>> + pto = ngx_quic_pto(c, ctx) << path->tries;
>>> + path->expires = ngx_current_msec + pto;
>>> +
>>
>> Setting path->expires there seems useless.
>> It don't seem to be used in ngx_quic_send_path_mtu_probe(),
>> and ngx_quic_discover_path_mtu() used to reset path->expires.
>
> If ngx_quic_send_path_mtu_probe() below returns NGX_OK, the path should stay
> in the same state, but expiration time should be advanced. To make it
> more obvious, I changed this code.
Updated code makes it clear, thanks.
>
>>> + rc = ngx_quic_send_path_mtu_probe(c, path);
>>> + if (rc != NGX_DECLINED) {
>>> + return rc;
>>> }
>>> }
>>>
>>> - if (next != -1) {
>>> - ngx_add_timer(&qc->path_validation, next);
>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL expired mtu:%uz",
>>> + path->seqnum, path->mtud);
>>> +
>>> + path->max_mtu = path->mtud;
>>> +
>>> + ngx_quic_discover_path_mtu(c, path);
>>> +
>>> + return NGX_OK;
>>> +}
>>> +
>>> +
>>> +static ngx_int_t
>>> +ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
>>> +{
>>> + ngx_int_t rc;
>>> + ngx_uint_t log_error;
>>> + ngx_quic_frame_t frame;
>>> + ngx_quic_send_ctx_t *ctx;
>>> + ngx_quic_connection_t *qc;
>>> +
>>> + ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
>>> +
>>> + frame.level = ssl_encryption_application;
>>> + frame.type = NGX_QUIC_FT_PING;
>>> +
>>> + qc = ngx_quic_get_connection(c);
>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>> + path->mtu_pnum[path->tries] = ctx->pnum;
>>> +
>>> + ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL send probe "
>>> + "mtu:%uz pnum:%uL tries:%ui",
>>> + path->seqnum, path->mtud, ctx->pnum, path->tries);
>>> +
>>> + log_error = c->log_error;
>>> + c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
>>> +
>>> + rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
>>> + c->log_error = log_error;
>>> +
>>> + if (rc == NGX_ERROR) {
>>> + if (c->write->error) {
>>> + c->write->error = 0;
>>> +
>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL rejected mtu:%uz",
>>> + path->seqnum, path->mtud);
>>> +
>>> + return NGX_DECLINED;
>>
>> The error handling looks awkward to decline on write error.
>> ngx_sendmsg() sets write error on fatal sendmsg errors,
>> and keeps it unset NGX_EAGAIN and NGX_EINTR, which
>> are typically non-fatal transient errors.
>> I understand this is done mostly to ignore EMSGSIZE,
>> maybe it makes sense to ignore the rest sendmsg() errors.
>
> The problem is, the error doesn't make it to this point. All be have here is
> NGX_ERROR and c->write->error. I think, for PMTUD purposes we can safely
> ignore the errors. It's hinghly likely, the error will repeat while doing
> connection I/O and we'll shortly close the connection.
Ok, I re-read ngx_sendmsg() to clarify the following:
- NGX_EINTR is handled internally and never returned up to here;
- NGX_EAGAIN (NGX_AGAIN) seems to be returned, at least on Linux,
when send buffer is low for the MTU probe, converted to NGX_OK;
- FTR, looking at sosend_dgram() on FreeBSD, sendmsg seems to
return EMSGSIZE there, instead, on low socket buffer space;
- other sendmsg errors are returned as NGX_ERROR, and
c->write->error is set; this is converted to NGX_DECLINED;
- non-sendmsg NGX_ERRORs appeared in ngx_quic_frame_sendto()
are passed as is without conversion.
So, the error handling as implemented to decline on any sendmsg
"fatal" errors actually makes sense, if we'd like to avoid rewriting
ngx_sendmsg() to further distinguish sendmsg errors, notably EMSGSIZE.
The downside is that connection may ignore actually fatal errors,
though the error will likely repeat on sending non-probing frames.
NGX_AGAIN conversion to NGX_OK is questionable, because this
makes a probe considered successfully sent while it is not.
I wonder though what we can do about that other than to simply
ignore, similar to other ngx_quic_frame_sendto() callers,
because next time on a probe retry sendmsg can return success.
>
>>> + }
>>> +
>>> + return NGX_ERROR;
>>> }
>>> +
>>> + return NGX_OK;
>>> }
>>> +
>>> +
>>> +ngx_int_t
>>> +ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
>>> + uint64_t min, uint64_t max)
>>> +{
>>> + uint64_t pnum;
>>> + ngx_uint_t i;
>>> +
>>> + if (path->state != NGX_QUIC_PATH_DISCOVERING_MTU) {
>>> + return NGX_OK;
>>> + }
>>> +
>>> + for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
>>> + pnum = path->mtu_pnum[i];
>>> +
>>> + if (pnum == NGX_QUIC_UNSET_PN) {
>>> + break;
>>> + }
>>> +
>>> + if (pnum < min || pnum > max) {
>>> + continue;
>>> + }
>>> +
>>> + path->mtu = path->mtud;
>>> +
>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>>> + "quic path seq:%uL ack mtu:%uz",
>>> + path->seqnum, path->mtu);
>>> +
>>> + ngx_quic_discover_path_mtu(c, path);
>>> +
>>> + break;
>>
>> What if two probes appeared ACKed in one range.
>> Though that seems to be safe to break there, as two
>> probes barely have different probe sizes, rather retries.
>
> No matter how many probes are acked, the next step is still the same.
> So there's no reason to find all of them.
>
Ok, let's leave it as is.
>>> + }
>>> +
>>> + return NGX_OK;
>>> +}
>>> diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
>>> --- a/src/event/quic/ngx_event_quic_migration.h
>>> +++ b/src/event/quic/ngx_event_quic_migration.h
>>> @@ -18,10 +18,10 @@
>>> #define NGX_QUIC_PATH_BACKUP 2
>>>
>>> #define ngx_quic_path_dbg(c, msg, path) \
>>> - ngx_log_debug6(NGX_LOG_DEBUG_EVENT, c->log, 0, \
>>> - "quic path seq:%uL %s sent:%O recvd:%O state:%s%s", \
>>> + ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0, \
>>> + "quic path seq:%uL %s tx:%O rx:%O valid:%d st:%d mtu:%uz", \
>>> path->seqnum, msg, path->sent, path->received, \
>>> - path->validated ? "V": "N", path->validating ? "R": "");
>>> + path->validated, path->state, path->mtu);
>>>
>>> ngx_int_t ngx_quic_handle_path_challenge_frame(ngx_connection_t *c,
>>> ngx_quic_header_t *pkt, ngx_quic_path_challenge_frame_t *f);
>>> @@ -36,6 +36,10 @@ ngx_int_t ngx_quic_set_path(ngx_connecti
>>> ngx_int_t ngx_quic_handle_migration(ngx_connection_t *c,
>>> ngx_quic_header_t *pkt);
>>>
>>> -void ngx_quic_path_validation_handler(ngx_event_t *ev);
>>> +void ngx_quic_path_handler(ngx_event_t *ev);
>>> +
>>> +void ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path);
>>> +ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
>>> + ngx_quic_path_t *path, uint64_t min, uint64_t max);
>>
>> Nitpicking:
>> maybe just ngx_quic_handle_path_mtu() ?
>> This would match ngx_quic_discover_path_mtu() naming.
>
> OK, renamed.
>
>>>
>>> #endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
>>> diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
>>> --- a/src/event/quic/ngx_event_quic_output.c
>>> +++ b/src/event/quic/ngx_event_quic_output.c
>>> @@ -10,9 +10,6 @@
>>> #include <ngx_event_quic_connection.h>
>>>
>>>
>>> -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT 1252
>>> -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6 1232
>>> -
>>> #define NGX_QUIC_MAX_UDP_SEGMENT_BUF 65487 /* 65K - IPv6 header */
>>> #define NGX_QUIC_MAX_SEGMENTS 64 /* UDP_MAX_SEGMENTS */
>>>
>>> @@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
>>> size_t size);
>>>
>>>
>>> -size_t
>>> -ngx_quic_max_udp_payload(ngx_connection_t *c)
>>> -{
>>> - /* TODO: path MTU discovery */
>>> -
>>> -#if (NGX_HAVE_INET6)
>>> - if (c->sockaddr->sa_family == AF_INET6) {
>>> - return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
>>> - }
>>> -#endif
>>> -
>>> - return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
>>> -}
>>> -
>>> -
>>> ngx_int_t
>>> ngx_quic_output(ngx_connection_t *c)
>>> {
>>> @@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
>>>
>>> p = dst;
>>>
>>> - len = ngx_min(qc->ctp.max_udp_payload_size,
>>> - NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
>>> -
>>> - len = ngx_quic_path_limit(c, path, len);
>>> + len = ngx_quic_path_limit(c, path, path->mtu);
>>>
>>> pad = ngx_quic_get_padding_level(c);
>>>
>>> @@ -299,9 +278,7 @@ ngx_quic_allow_segmentation(ngx_connecti
>>> ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
>>>
>>> bytes = 0;
>>> -
>>> - len = ngx_min(qc->ctp.max_udp_payload_size,
>>> - NGX_QUIC_MAX_UDP_SEGMENT_BUF);
>>> + len = ngx_min(qc->path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
>>>
>>> for (q = ngx_queue_head(&ctx->frames);
>>> q != ngx_queue_sentinel(&ctx->frames);
>>> @@ -345,8 +322,7 @@ ngx_quic_create_segments(ngx_connection_
>>> return NGX_ERROR;
>>> }
>>>
>>> - segsize = ngx_min(qc->ctp.max_udp_payload_size,
>>> - NGX_QUIC_MAX_UDP_SEGMENT_BUF);
>>> + segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
>>> p = dst;
>>> end = dst + sizeof(dst);
>>>
>>> diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
>>> --- a/src/event/quic/ngx_event_quic_output.h
>>> +++ b/src/event/quic/ngx_event_quic_output.h
>>> @@ -12,8 +12,6 @@
>>> #include <ngx_core.h>
>>>
>>>
>>> -size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
>>> -
>>> ngx_int_t ngx_quic_output(ngx_connection_t *c);
>>>
>>> ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
>>> diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
>>> --- a/src/event/quic/ngx_event_quic_ssl.c
>>> +++ b/src/event/quic/ngx_event_quic_ssl.c
>>> @@ -494,6 +494,8 @@ ngx_quic_crypto_input(ngx_connection_t *
>>> */
>>> ngx_quic_discard_ctx(c, ssl_encryption_handshake);
>>>
>>> + ngx_quic_discover_path_mtu(c, qc->path);
>>> +
>>> /* start accepting clients on negotiated number of server ids */
>>> if (ngx_quic_create_sockets(c) != NGX_OK) {
>>> return NGX_ERROR;
>>> diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
>>> --- a/src/os/unix/ngx_errno.h
>>> +++ b/src/os/unix/ngx_errno.h
>>> @@ -54,6 +54,7 @@ typedef int ngx_err_t;
>>> #define NGX_ENOMOREFILES 0
>>> #define NGX_ELOOP ELOOP
>>> #define NGX_EBADF EBADF
>>> +#define NGX_EMSGSIZE EMSGSIZE
>>>
>>> #if (NGX_HAVE_OPENAT)
>>> #define NGX_EMLINK EMLINK
This will break on win32, an NGX_EMSGSIZE definition needs to be
provided there, which is WSAEMSGSIZE.
>
> The diff is attached.
>
The diff looks good.
--
Sergey Kandaurov
More information about the nginx-devel
mailing list