[patch] quic PTO counter fixes

Sergey Kandaurov pluknet at nginx.com
Wed Oct 25 23:08:55 UTC 2023


On Wed, Oct 11, 2023 at 04:58:47PM +0300, Vladimir Homutov via nginx-devel wrote:
> Hello,
> 
> a couple of patches in the quic code:
> 
> first patch improves a bit debugging, and the second patch contains
> fixes for PTO counter calculation - see commit log for details.
> 
> This helps with some clients in interop handshakeloss/handshakecorruption
> testcases
> 
> 

> # HG changeset patch
> # User Vladimir Khomutov <vl at wbsrv.ru>
> # Date 1697031939 -10800
> #      Wed Oct 11 16:45:39 2023 +0300
> # Node ID 1f188102fbd944df797e8710f70cccee76164add
> # Parent  cdda286c0f1b4b10f30d4eb6a63fefb9b8708ecc
> QUIC: improved packet and frames debug tracing.
> 
> Currently, packets generated by ngx_quic_frame_sendto() and
> ngx_quic_send_early_cc() are not logged, thus making it hard
> to read logs due to gaps appearing in packet numbers sequence.
> For such special packets, a frame type being sent is also output.
> 
> At frames level, it is handy to see immediately packet number
> in which they arrived or being sent.
> 
> diff --git a/src/event/quic/ngx_event_quic_frames.c b/src/event/quic/ngx_event_quic_frames.c
> --- a/src/event/quic/ngx_event_quic_frames.c
> +++ b/src/event/quic/ngx_event_quic_frames.c
> @@ -886,8 +886,8 @@ ngx_quic_log_frame(ngx_log_t *log, ngx_q
>          break;
>      }
>  
> -    ngx_log_debug4(NGX_LOG_DEBUG_EVENT, log, 0, "quic frame %s %s %*s",
> -                   tx ? "tx" : "rx", ngx_quic_level_name(f->level),
> +    ngx_log_debug5(NGX_LOG_DEBUG_EVENT, log, 0, "quic frame %s %s:%ui %*s",
> +                   tx ? "tx" : "rx", ngx_quic_level_name(f->level), f->pnum,
>                     p - buf, buf);
>  }
>  
> diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
> --- a/src/event/quic/ngx_event_quic_output.c
> +++ b/src/event/quic/ngx_event_quic_output.c
> @@ -563,8 +563,6 @@ ngx_quic_output_packet(ngx_connection_t 
>              pkt.need_ack = 1;
>          }
>  
> -        ngx_quic_log_frame(c->log, f, 1);
> -
>          flen = ngx_quic_create_frame(p, f);
>          if (flen == -1) {
>              return NGX_ERROR;
> @@ -578,6 +576,8 @@ ngx_quic_output_packet(ngx_connection_t 
>          f->last = now;
>          f->plen = 0;
>  
> +        ngx_quic_log_frame(c->log, f, 1);
> +
>          nframes++;

I'd rather move setting frame fields before calling
ngx_quic_log_frame()/ngx_quic_create_frame()
to preserve consistency with other places, i.e.:
- set fields
- log frame
- create frame

To look as follows:

:        f->pnum = ctx->pnum;
:        f->first = now;
:        f->last = now;
:        f->plen = 0;
:
:        ngx_quic_log_frame(c->log, f, 1);
:
:        flen = ngx_quic_create_frame(p, f);
:

>      }
>  
> @@ -925,6 +925,13 @@ ngx_quic_send_early_cc(ngx_connection_t 
>  
>      res.data = dst;
>  
> +    ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0,
> +                   "quic packet tx %s bytes:%ui need_ack:%d"
> +                   " number:%L encoded nl:%d trunc:0x%xD frame:%ui]",

typo: closing square bracket

Not sure we need logging for a (particular) frame in packet logging,
not to say that it looks like a layering violation.
Anyway, it is logged nearby, for example:

 quic frame tx init:0 CONNECTION_CLOSE err:11 invalid address validation token ft:0
 quic packet tx init bytes:36 need_ack:0 number:0 encoded nl:1 trunc:0x0

So I'd remove this part.

> +                   ngx_quic_level_name(pkt.level), pkt.payload.len,
> +                   pkt.need_ack, pkt.number, pkt.num_len, pkt.trunc,
> +                   frame.type);
> +
>      if (ngx_quic_encrypt(&pkt, &res) != NGX_OK) {
>          return NGX_ERROR;
>      }
> @@ -1179,6 +1186,10 @@ ngx_quic_frame_sendto(ngx_connection_t *
>      pad = 4 - pkt.num_len;
>      min_payload = ngx_max(min_payload, pad);
>  
> +#if (NGX_DEBUG)
> +    frame->pnum = pkt.number;
> +#endif
> +
>      len = ngx_quic_create_frame(NULL, frame);
>      if (len > NGX_QUIC_MAX_UDP_PAYLOAD_SIZE) {
>          return NGX_ERROR;
> @@ -1201,6 +1212,13 @@ ngx_quic_frame_sendto(ngx_connection_t *
>  
>      res.data = dst;
>  
> +    ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0,
> +                   "quic packet tx %s bytes:%ui need_ack:%d"
> +                   " number:%L encoded nl:%d trunc:0x%xD frame:%ui",

Same here.

> +                   ngx_quic_level_name(pkt.level), pkt.payload.len,
> +                   pkt.need_ack, pkt.number, pkt.num_len, pkt.trunc,
> +                   frame->type);
> +

BTW, it would make sense to get a new macro / inline function
for packet tx logging, similar to ngx_quic_log_frame(),
since we will have three places with identical ngx_log_debug7().

>      if (ngx_quic_encrypt(&pkt, &res) != NGX_OK) {
>          return NGX_ERROR;
>      }
> diff --git a/src/event/quic/ngx_event_quic_transport.c b/src/event/quic/ngx_event_quic_transport.c
> --- a/src/event/quic/ngx_event_quic_transport.c
> +++ b/src/event/quic/ngx_event_quic_transport.c
> @@ -1135,6 +1135,9 @@ ngx_quic_parse_frame(ngx_quic_header_t *
>      }
>  
>      f->level = pkt->level;
> +#if (NGX_DEBUG)
> +    f->pnum = pkt->pn;
> +#endif
>  
>      return p - start;
>  

> # HG changeset patch
> # User Vladimir Khomutov <vl at wbsrv.ru>
> # Date 1697031803 -10800
> #      Wed Oct 11 16:43:23 2023 +0300
> # Node ID 9ba2840e88f62343b3bd794e43900781dab43686
> # Parent  1f188102fbd944df797e8710f70cccee76164add
> QUIC: fixed handling of PTO counter.
> 
> The RFC 9002 clearly says in "6.2. Probe Timeout":
>     ...
>     As with loss detection, the PTO is per packet number space.
>     That is, a PTO value is computed per packet number space.
> 
> Despite that, current code is using per-connection PTO counter.
> For example, this may lead to situation when packet loss at handshake
> level will affect PTO calculation for initial packets, preventing
> send of new probes.

Although PTO value is per packet number space, PTO backoff is not,
see "6.2.1 Computing PTO":

: When ack-eliciting packets in multiple packet number spaces are in flight, the
: timer MUST be set to the earlier value of the Initial and Handshake packet
: number spaces.

But:

: When a PTO timer expires, the PTO backoff MUST be increased <..>

: This exponential reduction in the sender's rate is important because consecutive
: PTOs might be caused by loss of packets or acknowledgments due to severe
: congestion.  Even when there are ack-eliciting packets in flight in multiple
: packet number spaces, the exponential increase in PTO occurs across all spaces
: to prevent excess load on the network.  For example, a timeout in the Initial
: packet number space doubles the length of the timeout in the Handshake packet
: number space.

Even if that would be proven otherwise, I don't think the description
provides detailed explanation.  It describes a pretty specific use case,
when both Initial and Handshake packet number spaces have in-flight packets
with different PTO timeout (i.e. different f->last).  Typically they are
sent coalesced (e.g. CRYPTO frames for ServerHello and (at least)
EncryptedExtensions TLS messages).
In interop tests, though, it might be different: such packets may be
sent separately, with Handshake packet thus having a later PTO timeout.
If such, PTO timer will first fire for the Initial packet, then for Handshake,
which will result in PTO backoff accumulated for each packet:

 t1: <- Initial (lost)
 t2: <- Handshake (lost)
t1': pto(t1) timeout
     <- Initial (pto_count=1)
t2': pto(t2) timeout 
     <- Handshake (pto_count=2)
t1'': pto(t1') timeout
     <- Initial (pto_count=3)

So, I would supplement the description with the phrase that that's
fair typically with uncoalesced packets seen in interop tests, and
that the same is true vice verse with packet loss at initial packet
number space affecting PTO backoff in handshake packet number space.

But see above about PTO backoff increase across all spaces.

> 
> Additionally, one case of successful ACK receiving was missing:
> PING frames are not stored in the ctx->sent queue, thus PTO was not
> reset when corresponding packets were acknowledged.

See below.

> 
> diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
> --- a/src/event/quic/ngx_event_quic.c
> +++ b/src/event/quic/ngx_event_quic.c
> @@ -1088,8 +1088,6 @@ ngx_quic_discard_ctx(ngx_connection_t *c
>  
>      ngx_quic_keys_discard(qc->keys, level);
>  
> -    qc->pto_count = 0;
> -
>      ctx = ngx_quic_get_send_ctx(qc, level);
>  
>      ngx_quic_free_buffer(c, &ctx->crypto);
> @@ -1120,6 +1118,7 @@ ngx_quic_discard_ctx(ngx_connection_t *c
>      }
>  
>      ctx->send_ack = 0;
> +    ctx->pto_count = 0;
>  
>      ngx_quic_set_lost_timer(c);
>  }
> diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
> --- a/src/event/quic/ngx_event_quic_ack.c
> +++ b/src/event/quic/ngx_event_quic_ack.c
> @@ -286,8 +286,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
>      if (!found) {
>  
>          if (max < ctx->pnum) {
> -            /* duplicate ACK or ACK for non-ack-eliciting frame */
> -            return NGX_OK;
> +            /*
> +             * - ACK for frames not in sent queue (i.e. PING)
> +             * - duplicate ACK
> +             * - ACK for non-ack-eliciting frame
> +             */
> +            goto done;
>          }
>  
>          ngx_log_error(NGX_LOG_INFO, c->log, 0,
> @@ -300,11 +304,13 @@ ngx_quic_handle_ack_frame_range(ngx_conn
>          return NGX_ERROR;
>      }
>  
> +done:
> +
>      if (!qc->push.timer_set) {
>          ngx_post_event(&qc->push, &ngx_posted_events);
>      }
>  
> -    qc->pto_count = 0;
> +    ctx->pto_count = 0;

This part of the change to reset pto_count
for duplicate ACK or ACK for non-ack-eliciting frame
contradicts the OnAckReceived example in RFC 9002,
although I didn't found a format text in the RFC itself:

OnAckReceived(ack, pn_space):
  ...
  // DetectAndRemoveAckedPackets finds packets that are newly
  // acknowledged and removes them from sent_packets.
  newly_acked_packets =
      DetectAndRemoveAckedPackets(ack, pn_space)
  // Nothing to do if there are no newly acked packets.
  if (newly_acked_packets.empty()):
    return

  // Update the RTT if the largest acknowledged is newly acked
  // and at least one ack-eliciting was newly acked.
  ...

  // Reset pto_count ...

>From which it follows that pto_count is reset
(and RTT updated) for newly ack'ed packets only.

I think the better fix would be to properly track in-flight PING frames.
Moreover, the current behaviour of not tracking PING frames in ctx->sent
prevents from a properly calculated PTO timeout: each time it is calculated
against the original packet (with increasingly receding time to the past)
that triggered the first PTO timeout, which doesn't result in exponentially
increased PTO period as expected, but rather some bogus value.

>  
>      return NGX_OK;
>  }
> @@ -744,7 +750,7 @@ ngx_quic_set_lost_timer(ngx_connection_t
>  
>          q = ngx_queue_last(&ctx->sent);
>          f = ngx_queue_data(q, ngx_quic_frame_t, queue);
> -        w = (ngx_msec_int_t) (f->last + (ngx_quic_pto(c, ctx) << qc->pto_count)
> +        w = (ngx_msec_int_t) (f->last + (ngx_quic_pto(c, ctx) << ctx->pto_count)
>                                - now);
>  
>          if (w < 0) {
> @@ -855,7 +861,7 @@ ngx_quic_pto_handler(ngx_event_t *ev)
>              continue;
>          }
>  
> -        if ((ngx_msec_int_t) (f->last + (ngx_quic_pto(c, ctx) << qc->pto_count)
> +        if ((ngx_msec_int_t) (f->last + (ngx_quic_pto(c, ctx) << ctx->pto_count)
>                                - now) > 0)
>          {
>              continue;
> @@ -863,7 +869,7 @@ ngx_quic_pto_handler(ngx_event_t *ev)
>  
>          ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
>                         "quic pto %s pto_count:%ui",
> -                       ngx_quic_level_name(ctx->level), qc->pto_count);
> +                       ngx_quic_level_name(ctx->level), ctx->pto_count);
>  
>          ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
>  
> @@ -876,10 +882,10 @@ ngx_quic_pto_handler(ngx_event_t *ev)
>              ngx_quic_close_connection(c, NGX_ERROR);
>              return;
>          }
> +
> +        ctx->pto_count++;
>      }
>  
> -    qc->pto_count++;
> -
>      ngx_quic_set_lost_timer(c);
>  
>      ngx_quic_connstate_dbg(c);
> diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
> --- a/src/event/quic/ngx_event_quic_connection.h
> +++ b/src/event/quic/ngx_event_quic_connection.h
> @@ -195,6 +195,8 @@ struct ngx_quic_send_ctx_s {
>      ngx_uint_t                        nranges;
>      ngx_quic_ack_range_t              ranges[NGX_QUIC_MAX_RANGES];
>      ngx_uint_t                        send_ack;
> +
> +    ngx_uint_t                        pto_count;
>  };
>  
>  
> @@ -240,8 +242,6 @@ struct ngx_quic_connection_s {
>      ngx_msec_t                        min_rtt;
>      ngx_msec_t                        rttvar;
>  
> -    ngx_uint_t                        pto_count;
> -
>      ngx_queue_t                       free_frames;
>      ngx_buf_t                        *free_bufs;
>      ngx_buf_t                        *free_shadow_bufs;


More information about the nginx-devel mailing list