Hi,
On Thu, Aug 10, 2023 at 06:18:02PM +0400, Sergey Kandaurov wrote:
>
> > On 10 Aug 2023, at 14:21, Roman Arutyunyan <arut@nginx.com> wrote:
> >
> > On Tue, Aug 08, 2023 at 01:49:46PM +0400, Sergey Kandaurov wrote:
> > Hi,
> >
> >>
> >>> On 31 Jul 2023, at 21:34, Roman Arutyunyan <arut@nginx.com> wrote:
> >>>
> >>> Hi,
> >>>
> >>> On Fri, Jul 28, 2023 at 07:51:22PM +0400, Sergey Kandaurov wrote:
> >>>>
> >>>>> On 6 Jul 2023, at 17:57, Roman Arutyunyan <arut@nginx.com> wrote:
> >>>>>
> >>>>> # HG changeset patch
> >>>>> # User Roman Arutyunyan <arut@nginx.com>
> >>>>> # Date 1688481216 -14400
> >>>>> # Tue Jul 04 18:33:36 2023 +0400
> >>>>> # Node ID 1a49fd5d2013b6c8d50263499e6ced440927e949
> >>>>> # Parent a1ea543d009311765144351b2557c00c8e6445bf
> >>>>> QUIC: path MTU discovery.
> >>>>>
> >>>>> MTU selection starts by stepping up until the first failure. Then binary
> >>>>> search is used to find the path MTU.
> >>>>>
> >>>>> diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
> >>>>> --- a/src/core/ngx_connection.c
> >>>>> +++ b/src/core/ngx_connection.c
> >>>>> @@ -1583,6 +1583,10 @@ ngx_connection_error(ngx_connection_t *c
> >>>>> }
> >>>>> #endif
> >>>>>
> >>>>> + if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
> >>>>> + return 0;
> >>>>> + }
> >>>>> +
> >>>>> if (err == 0
> >>>>> || err == NGX_ECONNRESET
> >>>>> #if (NGX_WIN32)
> >>>>> @@ -1600,6 +1604,7 @@ ngx_connection_error(ngx_connection_t *c
> >>>>> {
> >>>>> switch (c->log_error) {
> >>>>>
> >>>>> + case NGX_ERROR_IGNORE_EMSGSIZE:
> >>>>> case NGX_ERROR_IGNORE_EINVAL:
> >>>>> case NGX_ERROR_IGNORE_ECONNRESET:
> >>>>> case NGX_ERROR_INFO:
> >>>>> diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
> >>>>> --- a/src/core/ngx_connection.h
> >>>>> +++ b/src/core/ngx_connection.h
> >>>>> @@ -97,7 +97,8 @@ typedef enum {
> >>>>> NGX_ERROR_ERR,
> >>>>> NGX_ERROR_INFO,
> >>>>> NGX_ERROR_IGNORE_ECONNRESET,
> >>>>> - NGX_ERROR_IGNORE_EINVAL
> >>>>> + NGX_ERROR_IGNORE_EINVAL,
> >>>>> + NGX_ERROR_IGNORE_EMSGSIZE
> >>>>> } ngx_connection_log_error_e;
> >>>>>
> >>>>>
> >>>>> diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
> >>>>> --- a/src/event/quic/ngx_event_quic.c
> >>>>> +++ b/src/event/quic/ngx_event_quic.c
> >>>>> @@ -149,11 +149,6 @@ ngx_quic_apply_transport_params(ngx_conn
> >>>>> ngx_log_error(NGX_LOG_INFO, c->log, 0,
> >>>>> "quic maximum packet size is invalid");
> >>>>> return NGX_ERROR;
> >>>>> -
> >>>>> - } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
> >>>>> - ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
> >>>>> - ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> - "quic client maximum packet size truncated");
> >>>>> }
> >>>>>
> >>>>> if (ctp->active_connection_id_limit < 2) {
> >>>>> @@ -286,7 +281,7 @@ ngx_quic_new_connection(ngx_connection_t
> >>>>>
> >>>>> qc->path_validation.log = c->log;
> >>>>> qc->path_validation.data = c;
> >>>>> - qc->path_validation.handler = ngx_quic_path_validation_handler;
> >>>>> + qc->path_validation.handler = ngx_quic_path_handler;
> >>>>>
> >>>>> qc->conf = conf;
> >>>>>
> >>>>> @@ -297,7 +292,7 @@ ngx_quic_new_connection(ngx_connection_t
> >>>>> ctp = &qc->ctp;
> >>>>>
> >>>>> /* defaults to be used before actual client parameters are received */
> >>>>> - ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
> >>>>> + ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
> >>>>> ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
> >>>>> ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
> >>>>> ctp->active_connection_id_limit = 2;
> >>>>> diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
> >>>>> --- a/src/event/quic/ngx_event_quic_ack.c
> >>>>> +++ b/src/event/quic/ngx_event_quic_ack.c
> >>>>> @@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
> >>>>>
> >>>>> qc = ngx_quic_get_connection(c);
> >>>>>
> >>>>> + if (ctx->level == ssl_encryption_application) {
> >>>>> + if (ngx_quic_handle_path_mtu_ack(c, qc->path, min, max) != NGX_OK) {
> >>>>> + return NGX_ERROR;
> >>>>> + }
> >>>>> + }
> >>>>> +
> >>>>> st->max_pn = NGX_TIMER_INFINITE;
> >>>>> found = 0;
> >>>>>
> >>>>> diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
> >>>>> --- a/src/event/quic/ngx_event_quic_connection.h
> >>>>> +++ b/src/event/quic/ngx_event_quic_connection.h
> >>>>> @@ -66,6 +66,14 @@ typedef struct ngx_quic_keys_s ng
> >>>>> #define ngx_quic_get_socket(c) ((ngx_quic_socket_t *)((c)->udp))
> >>>>>
> >>>>>
> >>>>> +typedef enum {
> >>>>> + NGX_QUIC_PATH_IDLE = 0,
> >>>>> + NGX_QUIC_PATH_VALIDATING,
> >>>>> + NGX_QUIC_PATH_WAITING,
> >>>>> + NGX_QUIC_PATH_DISCOVERING_MTU
> >>>>
> >>>> Nitpicking on too long name.
> >>>> What about NGX_QUIC_PATH_MTUD or something.
> >>>
> >>> OK, renamed.
> >>>
> >>>>> +} ngx_quic_path_state_e;
> >>>>> +
> >>>>> +
> >>>>> struct ngx_quic_client_id_s {
> >>>>> ngx_queue_t queue;
> >>>>> uint64_t seqnum;
> >>>>> @@ -89,18 +97,22 @@ struct ngx_quic_path_s {
> >>>>> ngx_sockaddr_t sa;
> >>>>> socklen_t socklen;
> >>>>> ngx_quic_client_id_t *cid;
> >>>>> + ngx_quic_path_state_e state;
> >>>>> ngx_msec_t expires;
> >>>>> ngx_uint_t tries;
> >>>>> ngx_uint_t tag;
> >>>>> + size_t mtu;
> >>>>> + size_t mtud;
> >>>>> + size_t max_mtu;
> >>>>> off_t sent;
> >>>>> off_t received;
> >>>>> u_char challenge1[8];
> >>>>> u_char challenge2[8];
> >>>>> uint64_t seqnum;
> >>>>> + uint64_t mtu_pnum[NGX_QUIC_PATH_RETRIES];
> >>>>> ngx_str_t addr_text;
> >>>>> u_char text[NGX_SOCKADDR_STRLEN];
> >>>>> - unsigned validated:1;
> >>>>> - unsigned validating:1;
> >>>>> + ngx_uint_t validated; /* unsigned validated:1; */
> >>>>> };
> >>>>>
> >>>>>
> >>>>> diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
> >>>>> --- a/src/event/quic/ngx_event_quic_migration.c
> >>>>> +++ b/src/event/quic/ngx_event_quic_migration.c
> >>>>> @@ -10,6 +10,11 @@
> >>>>> #include <ngx_event_quic_connection.h>
> >>>>>
> >>>>>
> >>>>> +#define NGX_QUIC_PATH_MTU_DELAY 1000
> >>>>
> >>>> The value looks too much,
> >>>> it would delay each subsequent probe (which is not retry).
> >>>
> >>> Probably the right value should be determined in real-life scenarios.
> >>
> >> I'm quite skeptical about delayed probes, it has little to no benefit
> >> for short connections with bulky transfers, as implemented, especially
> >> with short MTU steps. OTOH, immediate probes can lead to congestion
> >> window starvation resulting in request processing slow down.
> >>
> >> For the record,
> >> https://www.ietf.org/proceedings/62/pmtud.html provides a discussion
> >> about PMTUD, in particular, on search strategy and when to probe.
> >> For the latter, it seems to articulate an opposite view, i.e. to send
> >> probes ASAP. Also, it raises a question how short lived connections
> >> could benefit from PMTUD, e.g. by caching path MTU, which seems to
> >> correspond to a shared PLPMTU state in RFC 8899.
> >>
> >>> Let's decrease the value by a factor of 10 for now.
> >>
> >> For a simple implementation, 100ms is a good enough trade-off, IMHO.
> >>
> >>>
> >>>>> +#define NGX_QUIC_PATH_MTU_STEP 256
> >>>>> +#define NGX_QUIC_PATH_MTU_PRECISION 16
> >>>>> +
> >>>>> +
> >>>>> static void ngx_quic_set_connection_path(ngx_connection_t *c,
> >>>>> ngx_quic_path_t *path);
> >>>>> static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
> >>>>> @@ -17,7 +22,15 @@ static ngx_int_t ngx_quic_validate_path(
> >>>>> static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
> >>>>> ngx_quic_path_t *path);
> >>>>> static void ngx_quic_set_path_timer(ngx_connection_t *c);
> >>>>> +static ngx_int_t ngx_quic_expire_path_validation(ngx_connection_t *c,
> >>>>> + ngx_quic_path_t *path);
> >>>>> +static ngx_int_t ngx_quic_expire_path_mtu_delay(ngx_connection_t *c,
> >>>>> + ngx_quic_path_t *path);
> >>>>> +static ngx_int_t ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c,
> >>>>> + ngx_quic_path_t *path);
> >>>>> static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
> >>>>> +static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
> >>>>> + ngx_quic_path_t *path);
> >>>>>
> >>>>>
> >>>>> ngx_int_t
> >>>>> @@ -97,7 +110,7 @@ ngx_quic_handle_path_response_frame(ngx_
> >>>>> {
> >>>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
> >>>>>
> >>>>> - if (!path->validating) {
> >>>>> + if (path->state != NGX_QUIC_PATH_VALIDATING) {
> >>>>> continue;
> >>>>> }
> >>>>>
> >>>>> @@ -136,6 +149,9 @@ valid:
> >>>>> {
> >>>>> /* address did not change */
> >>>>> rst = 0;
> >>>>> +
> >>>>> + path->mtu = prev->mtu;
> >>>>> + path->max_mtu = prev->max_mtu;
> >>>>> }
> >>>>> }
> >>>>>
> >>>>> @@ -167,9 +183,8 @@ valid:
> >>>>> ngx_quic_path_dbg(c, "is validated", path);
> >>>>>
> >>>>> path->validated = 1;
> >>>>> - path->validating = 0;
> >>>>>
> >>>>> - ngx_quic_set_path_timer(c);
> >>>>> + ngx_quic_discover_path_mtu(c, path);
> >>>>>
> >>>>> return NGX_OK;
> >>>>> }
> >>>>> @@ -217,6 +232,8 @@ ngx_quic_new_path(ngx_connection_t *c,
> >>>>> path->addr_text.len = ngx_sock_ntop(sockaddr, socklen, path->text,
> >>>>> NGX_SOCKADDR_STRLEN, 1);
> >>>>>
> >>>>> + path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
> >>>>> +
> >>>>> ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> "quic path seq:%uL created addr:%V",
> >>>>> path->seqnum, &path->addr_text);
> >>>>> @@ -464,7 +481,7 @@ ngx_quic_handle_migration(ngx_connection
> >>>>>
> >>>>> ngx_quic_set_connection_path(c, next);
> >>>>>
> >>>>> - if (!next->validated && !next->validating) {
> >>>>> + if (!next->validated && next->state != NGX_QUIC_PATH_VALIDATING) {
> >>>>> if (ngx_quic_validate_path(c, next) != NGX_OK) {
> >>>>> return NGX_ERROR;
> >>>>> }
> >>>>> @@ -492,7 +509,6 @@ ngx_quic_validate_path(ngx_connection_t
> >>>>> ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> "quic initiated validation of path seq:%uL", path->seqnum);
> >>>>>
> >>>>> - path->validating = 1;
> >>>>> path->tries = 0;
> >>>>>
> >>>>> if (RAND_bytes(path->challenge1, 8) != 1) {
> >>>>> @@ -511,6 +527,7 @@ ngx_quic_validate_path(ngx_connection_t
> >>>>> pto = ngx_max(ngx_quic_pto(c, ctx), 1000);
> >>>>>
> >>>>> path->expires = ngx_current_msec + pto;
> >>>>> + path->state = NGX_QUIC_PATH_VALIDATING;
> >>>>>
> >>>>> ngx_quic_set_path_timer(c);
> >>>>>
> >>>>> @@ -558,6 +575,42 @@ ngx_quic_send_path_challenge(ngx_connect
> >>>>> }
> >>>>>
> >>>>>
> >>>>> +void
> >>>>> +ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
> >>>>> +{
> >>>>> + ngx_quic_connection_t *qc;
> >>>>> +
> >>>>> + qc = ngx_quic_get_connection(c);
> >>>>> +
> >>>>> + if (path->max_mtu) {
> >>>>> + if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
> >>>>> + path->state = NGX_QUIC_PATH_IDLE;
> >>>>> + ngx_quic_set_path_timer(c);
> >>>>> + return;
> >>>>> + }
> >>>>> +
> >>>>> + path->mtud = (path->mtu + path->max_mtu) / 2;
> >>>>> +
> >>>>> + } else {
> >>>>> + path->mtud = path->mtu + NGX_QUIC_PATH_MTU_STEP;
> >>>>
> >>>> I like this approach, compared to multiplying probe steps.
> >>>> This better fits to the pattern to keep around a well-known
> >>>> initial value, which is typically 1500 bytes.
> >>>> Another one could be to let user specify its own initial
> >>>> value for cases when he knows well its network environment.
> >>>>
> >>>>> +
> >>>>> + if (path->mtud >= qc->ctp.max_udp_payload_size) {
> >>>>> + path->mtud = qc->ctp.max_udp_payload_size;
> >>>>> + path->max_mtu = qc->ctp.max_udp_payload_size;
> >>>>> + }
> >>>>> + }
> >>>>> +
> >>>>> + path->state = NGX_QUIC_PATH_WAITING;
> >>>>> + path->expires = ngx_current_msec + NGX_QUIC_PATH_MTU_DELAY;
> >>>>> +
> >>>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL schedule mtu:%uz",
> >>>>> + path->seqnum, path->mtud);
> >>>>> +
> >>>>> + ngx_quic_set_path_timer(c);
> >>>>> +}
> >>>>> +
> >>>>> +
> >>>>> static void
> >>>>> ngx_quic_set_path_timer(ngx_connection_t *c)
> >>>>> {
> >>>>> @@ -578,7 +631,7 @@ ngx_quic_set_path_timer(ngx_connection_t
> >>>>> {
> >>>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
> >>>>>
> >>>>> - if (!path->validating) {
> >>>>> + if (path->state == NGX_QUIC_PATH_IDLE) {
> >>>>> continue;
> >>>>> }
> >>>>>
> >>>>> @@ -600,22 +653,18 @@ ngx_quic_set_path_timer(ngx_connection_t
> >>>>>
> >>>>>
> >>>>> void
> >>>>> -ngx_quic_path_validation_handler(ngx_event_t *ev)
> >>>>> +ngx_quic_path_handler(ngx_event_t *ev)
> >>>>> {
> >>>>> ngx_msec_t now;
> >>>>> ngx_queue_t *q;
> >>>>> - ngx_msec_int_t left, next, pto;
> >>>>> - ngx_quic_path_t *path, *bkp;
> >>>>> + ngx_msec_int_t left;
> >>>>> + ngx_quic_path_t *path;
> >>>>> ngx_connection_t *c;
> >>>>> - ngx_quic_send_ctx_t *ctx;
> >>>>> ngx_quic_connection_t *qc;
> >>>>>
> >>>>> c = ev->data;
> >>>>> qc = ngx_quic_get_connection(c);
> >>>>>
> >>>>> - ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>> -
> >>>>> - next = -1;
> >>>>> now = ngx_current_msec;
> >>>>>
> >>>>> q = ngx_queue_head(&qc->paths);
> >>>>> @@ -625,83 +674,274 @@ ngx_quic_path_validation_handler(ngx_eve
> >>>>> path = ngx_queue_data(q, ngx_quic_path_t, queue);
> >>>>> q = ngx_queue_next(q);
> >>>>>
> >>>>> - if (!path->validating) {
> >>>>> + if (path->state == NGX_QUIC_PATH_IDLE) {
> >>>>> continue;
> >>>>> }
> >>>>>
> >>>>> left = path->expires - now;
> >>>>>
> >>>>> if (left > 0) {
> >>>>> -
> >>>>> - if (next == -1 || left < next) {
> >>>>> - next = left;
> >>>>> - }
> >>>>> -
> >>>>> - continue;
> >>>>> - }
> >>>>> -
> >>>>> - if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> >>>>> - pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
> >>>>> -
> >>>>> - path->expires = ngx_current_msec + pto;
> >>>>> -
> >>>>> - if (next == -1 || pto < next) {
> >>>>> - next = pto;
> >>>>> - }
> >>>>> -
> >>>>> - /* retransmit */
> >>>>> - (void) ngx_quic_send_path_challenge(c, path);
> >>>>> -
> >>>>> continue;
> >>>>> }
> >>>>>
> >>>>> - ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
> >>>>> - "quic path seq:%uL validation failed", path->seqnum);
> >>>>> + switch (path->state) {
> >>>>> + case NGX_QUIC_PATH_VALIDATING:
> >>>>> + if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
> >>>>> + goto failed;
> >>>>> + }
> >>>>> +
> >>>>> + break;
> >>>>> +
> >>>>> + case NGX_QUIC_PATH_WAITING:
> >>>>> + if (ngx_quic_expire_path_mtu_delay(c, path) != NGX_OK) {
> >>>>> + goto failed;
> >>>>> + }
> >>>>> +
> >>>>> + break;
> >>>>> +
> >>>>> + case NGX_QUIC_PATH_DISCOVERING_MTU:
> >>>>> + if (ngx_quic_expire_path_mtu_discovery(c, path) != NGX_OK) {
> >>>>> + goto failed;
> >>>>> + }
> >>>>> +
> >>>>> + break;
> >>>>> +
> >>>>
> >>>> indentation in several places here
> >>>
> >>> Fixed, thanks.
> >>>
> >>>>> + default:
> >>>>> + break;
> >>>>> + }
> >>>>> + }
> >>>>> +
> >>>>> + ngx_quic_set_path_timer(c);
> >>>>> +
> >>>>> + return;
> >>>>>
> >>>>> - /* found expired path */
> >>>>> +failed:
> >>>>> +
> >>>>> + ngx_quic_close_connection(c, NGX_ERROR);
> >>>>> +}
> >>>>> +
> >>>>> +
> >>>>> +static ngx_int_t
> >>>>> +ngx_quic_expire_path_validation(ngx_connection_t *c, ngx_quic_path_t *path)
> >>>>> +{
> >>>>> + ngx_msec_int_t pto;
> >>>>> + ngx_quic_path_t *bkp;
> >>>>> + ngx_quic_send_ctx_t *ctx;
> >>>>> + ngx_quic_connection_t *qc;
> >>>>>
> >>>>> - path->validated = 0;
> >>>>> - path->validating = 0;
> >>>>> + qc = ngx_quic_get_connection(c);
> >>>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>> +
> >>>>> + if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> >>>>> + pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
> >>>>> + path->expires = ngx_current_msec + pto;
> >>>>> +
> >>>>> + (void) ngx_quic_send_path_challenge(c, path);
> >>>>> +
> >>>>> + return NGX_OK;
> >>>>> + }
> >>>>> +
> >>>>> + ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL validation failed", path->seqnum);
> >>>>> +
> >>>>> + /* found expired path */
> >>>>> +
> >>>>> + path->validated = 0;
> >>>>>
> >>>>>
> >>>>> - /* RFC 9000, 9.3.2. On-Path Address Spoofing
> >>>>> - *
> >>>>> - * To protect the connection from failing due to such a spurious
> >>>>> - * migration, an endpoint MUST revert to using the last validated
> >>>>> - * peer address when validation of a new peer address fails.
> >>>>> - */
> >>>>> -
> >>>>> - if (qc->path == path) {
> >>>>> - /* active path validation failed */
> >>>>> -
> >>>>> - bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
> >>>>> + /* RFC 9000, 9.3.2. On-Path Address Spoofing
> >>>>> + *
> >>>>> + * To protect the connection from failing due to such a spurious
> >>>>> + * migration, an endpoint MUST revert to using the last validated
> >>>>> + * peer address when validation of a new peer address fails.
> >>>>> + */
> >>>>>
> >>>>> - if (bkp == NULL) {
> >>>>> - qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
> >>>>> - qc->error_reason = "no viable path";
> >>>>> - ngx_quic_close_connection(c, NGX_ERROR);
> >>>>> - return;
> >>>>> - }
> >>>>> + if (qc->path == path) {
> >>>>> + /* active path validation failed */
> >>>>> +
> >>>>> + bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
> >>>>>
> >>>>> - qc->path = bkp;
> >>>>> - qc->path->tag = NGX_QUIC_PATH_ACTIVE;
> >>>>> -
> >>>>> - ngx_quic_set_connection_path(c, qc->path);
> >>>>> -
> >>>>> - ngx_log_error(NGX_LOG_INFO, c->log, 0,
> >>>>> - "quic path seq:%uL addr:%V is restored from backup",
> >>>>> - qc->path->seqnum, &qc->path->addr_text);
> >>>>> -
> >>>>> - ngx_quic_path_dbg(c, "is active", qc->path);
> >>>>> + if (bkp == NULL) {
> >>>>> + qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
> >>>>> + qc->error_reason = "no viable path";
> >>>>> + return NGX_ERROR;
> >>>>> }
> >>>>>
> >>>>> - if (ngx_quic_free_path(c, path) != NGX_OK) {
> >>>>> - ngx_quic_close_connection(c, NGX_ERROR);
> >>>>> - return;
> >>>>> + qc->path = bkp;
> >>>>> + qc->path->tag = NGX_QUIC_PATH_ACTIVE;
> >>>>> +
> >>>>> + ngx_quic_set_connection_path(c, qc->path);
> >>>>> +
> >>>>> + ngx_log_error(NGX_LOG_INFO, c->log, 0,
> >>>>> + "quic path seq:%uL addr:%V is restored from backup",
> >>>>> + qc->path->seqnum, &qc->path->addr_text);
> >>>>> +
> >>>>> + ngx_quic_path_dbg(c, "is active", qc->path);
> >>>>> + }
> >>>>> +
> >>>>> + return ngx_quic_free_path(c, path);
> >>>>> +}
> >>>>> +
> >>>>> +
> >>>>> +static ngx_int_t
> >>>>> +ngx_quic_expire_path_mtu_delay(ngx_connection_t *c, ngx_quic_path_t *path)
> >>>>> +{
> >>>>> + ngx_int_t rc;
> >>>>> + ngx_uint_t i;
> >>>>> + ngx_msec_t pto;
> >>>>> + ngx_quic_send_ctx_t *ctx;
> >>>>> + ngx_quic_connection_t *qc;
> >>>>> +
> >>>>> + qc = ngx_quic_get_connection(c);
> >>>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>> +
> >>>>> + path->tries = 0;
> >>>>> +
> >>>>> + for ( ;; ) {
> >>>>> +
> >>>>> + for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
> >>>>> + path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
> >>>>> + }
> >>>>> +
> >>>>> + rc = ngx_quic_send_path_mtu_probe(c, path);
> >>>>> +
> >>>>> + if (rc == NGX_ERROR) {
> >>>>> + return NGX_ERROR;
> >>>>> + }
> >>>>> +
> >>>>> + if (rc == NGX_OK) {
> >>>>> + pto = ngx_quic_pto(c, ctx);
> >>>>> + path->expires = ngx_current_msec + pto;
> >>>>> + path->state = NGX_QUIC_PATH_DISCOVERING_MTU;
> >>>>> + return NGX_OK;
> >>>>> + }
> >>>>> +
> >>>>> + /* rc == NGX_DECLINED */
> >>>>> +
> >>>>> + path->max_mtu = path->mtud;
> >>>>> +
> >>>>> + if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
> >>>>> + path->state = NGX_QUIC_PATH_IDLE;
> >>>>> + return NGX_OK;
> >>>>> + }
> >>>>> +
> >>>>> + path->mtud = (path->mtu + path->max_mtu) / 2;
> >>>>> + }
> >>>>> +}
> >>>>> +
> >>>>> +
> >>>>> +static ngx_int_t
> >>>>> +ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c, ngx_quic_path_t *path)
> >>>>> +{
> >>>>> + ngx_int_t rc;
> >>>>> + ngx_msec_int_t pto;
> >>>>> + ngx_quic_send_ctx_t *ctx;
> >>>>> + ngx_quic_connection_t *qc;
> >>>>> +
> >>>>> + qc = ngx_quic_get_connection(c);
> >>>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>> +
> >>>>> + if (++path->tries < NGX_QUIC_PATH_RETRIES) {
> >>>>> + pto = ngx_quic_pto(c, ctx) << path->tries;
> >>>>> + path->expires = ngx_current_msec + pto;
> >>>>> +
> >>>>
> >>>> Setting path->expires there seems useless.
> >>>> It don't seem to be used in ngx_quic_send_path_mtu_probe(),
> >>>> and ngx_quic_discover_path_mtu() used to reset path->expires.
> >>>
> >>> If ngx_quic_send_path_mtu_probe() below returns NGX_OK, the path should stay
> >>> in the same state, but expiration time should be advanced. To make it
> >>> more obvious, I changed this code.
> >>
> >> Updated code makes it clear, thanks.
> >>
> >>>
> >>>>> + rc = ngx_quic_send_path_mtu_probe(c, path);
> >>>>> + if (rc != NGX_DECLINED) {
> >>>>> + return rc;
> >>>>> }
> >>>>> }
> >>>>>
> >>>>> - if (next != -1) {
> >>>>> - ngx_add_timer(&qc->path_validation, next);
> >>>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL expired mtu:%uz",
> >>>>> + path->seqnum, path->mtud);
> >>>>> +
> >>>>> + path->max_mtu = path->mtud;
> >>>>> +
> >>>>> + ngx_quic_discover_path_mtu(c, path);
> >>>>> +
> >>>>> + return NGX_OK;
> >>>>> +}
> >>>>> +
> >>>>> +
> >>>>> +static ngx_int_t
> >>>>> +ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
> >>>>> +{
> >>>>> + ngx_int_t rc;
> >>>>> + ngx_uint_t log_error;
> >>>>> + ngx_quic_frame_t frame;
> >>>>> + ngx_quic_send_ctx_t *ctx;
> >>>>> + ngx_quic_connection_t *qc;
> >>>>> +
> >>>>> + ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
> >>>>> +
> >>>>> + frame.level = ssl_encryption_application;
> >>>>> + frame.type = NGX_QUIC_FT_PING;
> >>>>> +
> >>>>> + qc = ngx_quic_get_connection(c);
> >>>>> + ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>> + path->mtu_pnum[path->tries] = ctx->pnum;
> >>>>> +
> >>>>> + ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL send probe "
> >>>>> + "mtu:%uz pnum:%uL tries:%ui",
> >>>>> + path->seqnum, path->mtud, ctx->pnum, path->tries);
> >>>>> +
> >>>>> + log_error = c->log_error;
> >>>>> + c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
> >>>>> +
> >>>>> + rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
> >>>>> + c->log_error = log_error;
> >>>>> +
> >>>>> + if (rc == NGX_ERROR) {
> >>>>> + if (c->write->error) {
> >>>>> + c->write->error = 0;
> >>>>> +
> >>>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL rejected mtu:%uz",
> >>>>> + path->seqnum, path->mtud);
> >>>>> +
> >>>>> + return NGX_DECLINED;
> >>>>
> >>>> The error handling looks awkward to decline on write error.
> >>>> ngx_sendmsg() sets write error on fatal sendmsg errors,
> >>>> and keeps it unset NGX_EAGAIN and NGX_EINTR, which
> >>>> are typically non-fatal transient errors.
> >>>> I understand this is done mostly to ignore EMSGSIZE,
> >>>> maybe it makes sense to ignore the rest sendmsg() errors.
> >>>
> >>> The problem is, the error doesn't make it to this point. All be have here is
> >>> NGX_ERROR and c->write->error. I think, for PMTUD purposes we can safely
> >>> ignore the errors. It's hinghly likely, the error will repeat while doing
> >>> connection I/O and we'll shortly close the connection.
> >>
> >> Ok, I re-read ngx_sendmsg() to clarify the following:
> >> - NGX_EINTR is handled internally and never returned up to here;
> >> - NGX_EAGAIN (NGX_AGAIN) seems to be returned, at least on Linux,
> >> when send buffer is low for the MTU probe, converted to NGX_OK;
> >> - FTR, looking at sosend_dgram() on FreeBSD, sendmsg seems to
> >> return EMSGSIZE there, instead, on low socket buffer space;
> >> - other sendmsg errors are returned as NGX_ERROR, and
> >> c->write->error is set; this is converted to NGX_DECLINED;
> >> - non-sendmsg NGX_ERRORs appeared in ngx_quic_frame_sendto()
> >> are passed as is without conversion.
> >>
> >> So, the error handling as implemented to decline on any sendmsg
> >> "fatal" errors actually makes sense, if we'd like to avoid rewriting
> >> ngx_sendmsg() to further distinguish sendmsg errors, notably EMSGSIZE.
> >> The downside is that connection may ignore actually fatal errors,
> >> though the error will likely repeat on sending non-probing frames.
> >>
> >> NGX_AGAIN conversion to NGX_OK is questionable, because this
> >> makes a probe considered successfully sent while it is not.
> >> I wonder though what we can do about that other than to simply
> >> ignore, similar to other ngx_quic_frame_sendto() callers,
> >> because next time on a probe retry sendmsg can return success.
> >
> > Ignoring certain send error for probes is not a big deal since timeout
> > expiration will lead to the same result eventually.
> >
> >>>>> + }
> >>>>> +
> >>>>> + return NGX_ERROR;
> >>>>> }
> >>>>> +
> >>>>> + return NGX_OK;
> >>>>> }
> >>>>> +
> >>>>> +
> >>>>> +ngx_int_t
> >>>>> +ngx_quic_handle_path_mtu_ack(ngx_connection_t *c, ngx_quic_path_t *path,
> >>>>> + uint64_t min, uint64_t max)
> >>>>> +{
> >>>>> + uint64_t pnum;
> >>>>> + ngx_uint_t i;
> >>>>> +
> >>>>> + if (path->state != NGX_QUIC_PATH_DISCOVERING_MTU) {
> >>>>> + return NGX_OK;
> >>>>> + }
> >>>>> +
> >>>>> + for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
> >>>>> + pnum = path->mtu_pnum[i];
> >>>>> +
> >>>>> + if (pnum == NGX_QUIC_UNSET_PN) {
> >>>>> + break;
> >>>>> + }
> >>>>> +
> >>>>> + if (pnum < min || pnum > max) {
> >>>>> + continue;
> >>>>> + }
> >>>>> +
> >>>>> + path->mtu = path->mtud;
> >>>>> +
> >>>>> + ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
> >>>>> + "quic path seq:%uL ack mtu:%uz",
> >>>>> + path->seqnum, path->mtu);
> >>>>> +
> >>>>> + ngx_quic_discover_path_mtu(c, path);
> >>>>> +
> >>>>> + break;
> >>>>
> >>>> What if two probes appeared ACKed in one range.
> >>>> Though that seems to be safe to break there, as two
> >>>> probes barely have different probe sizes, rather retries.
> >>>
> >>> No matter how many probes are acked, the next step is still the same.
> >>> So there's no reason to find all of them.
> >>>
> >>
> >> Ok, let's leave it as is.
> >>
> >>>>> + }
> >>>>> +
> >>>>> + return NGX_OK;
> >>>>> +}
> >>>>> diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
> >>>>> --- a/src/event/quic/ngx_event_quic_migration.h
> >>>>> +++ b/src/event/quic/ngx_event_quic_migration.h
> >>>>> @@ -18,10 +18,10 @@
> >>>>> #define NGX_QUIC_PATH_BACKUP 2
> >>>>>
> >>>>> #define ngx_quic_path_dbg(c, msg, path) \
> >>>>> - ngx_log_debug6(NGX_LOG_DEBUG_EVENT, c->log, 0, \
> >>>>> - "quic path seq:%uL %s sent:%O recvd:%O state:%s%s", \
> >>>>> + ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0, \
> >>>>> + "quic path seq:%uL %s tx:%O rx:%O valid:%d st:%d mtu:%uz", \
> >>>>> path->seqnum, msg, path->sent, path->received, \
> >>>>> - path->validated ? "V": "N", path->validating ? "R": "");
> >>>>> + path->validated, path->state, path->mtu);
> >>>>>
> >>>>> ngx_int_t ngx_quic_handle_path_challenge_frame(ngx_connection_t *c,
> >>>>> ngx_quic_header_t *pkt, ngx_quic_path_challenge_frame_t *f);
> >>>>> @@ -36,6 +36,10 @@ ngx_int_t ngx_quic_set_path(ngx_connecti
> >>>>> ngx_int_t ngx_quic_handle_migration(ngx_connection_t *c,
> >>>>> ngx_quic_header_t *pkt);
> >>>>>
> >>>>> -void ngx_quic_path_validation_handler(ngx_event_t *ev);
> >>>>> +void ngx_quic_path_handler(ngx_event_t *ev);
> >>>>> +
> >>>>> +void ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path);
> >>>>> +ngx_int_t ngx_quic_handle_path_mtu_ack(ngx_connection_t *c,
> >>>>> + ngx_quic_path_t *path, uint64_t min, uint64_t max);
> >>>>
> >>>> Nitpicking:
> >>>> maybe just ngx_quic_handle_path_mtu() ?
> >>>> This would match ngx_quic_discover_path_mtu() naming.
> >>>
> >>> OK, renamed.
> >>>
> >>>>>
> >>>>> #endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
> >>>>> diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
> >>>>> --- a/src/event/quic/ngx_event_quic_output.c
> >>>>> +++ b/src/event/quic/ngx_event_quic_output.c
> >>>>> @@ -10,9 +10,6 @@
> >>>>> #include <ngx_event_quic_connection.h>
> >>>>>
> >>>>>
> >>>>> -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT 1252
> >>>>> -#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6 1232
> >>>>> -
> >>>>> #define NGX_QUIC_MAX_UDP_SEGMENT_BUF 65487 /* 65K - IPv6 header */
> >>>>> #define NGX_QUIC_MAX_SEGMENTS 64 /* UDP_MAX_SEGMENTS */
> >>>>>
> >>>>> @@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
> >>>>> size_t size);
> >>>>>
> >>>>>
> >>>>> -size_t
> >>>>> -ngx_quic_max_udp_payload(ngx_connection_t *c)
> >>>>> -{
> >>>>> - /* TODO: path MTU discovery */
> >>>>> -
> >>>>> -#if (NGX_HAVE_INET6)
> >>>>> - if (c->sockaddr->sa_family == AF_INET6) {
> >>>>> - return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
> >>>>> - }
> >>>>> -#endif
> >>>>> -
> >>>>> - return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
> >>>>> -}
> >>>>> -
> >>>>> -
> >>>>> ngx_int_t
> >>>>> ngx_quic_output(ngx_connection_t *c)
> >>>>> {
> >>>>> @@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
> >>>>>
> >>>>> p = dst;
> >>>>>
> >>>>> - len = ngx_min(qc->ctp.max_udp_payload_size,
> >>>>> - NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
> >>>>> -
> >>>>> - len = ngx_quic_path_limit(c, path, len);
> >>>>> + len = ngx_quic_path_limit(c, path, path->mtu);
> >>>>>
> >>>>> pad = ngx_quic_get_padding_level(c);
> >>>>>
> >>>>> @@ -299,9 +278,7 @@ ngx_quic_allow_segmentation(ngx_connecti
> >>>>> ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
> >>>>>
> >>>>> bytes = 0;
> >>>>> -
> >>>>> - len = ngx_min(qc->ctp.max_udp_payload_size,
> >>>>> - NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> >>>>> + len = ngx_min(qc->path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> >>>>>
> >>>>> for (q = ngx_queue_head(&ctx->frames);
> >>>>> q != ngx_queue_sentinel(&ctx->frames);
> >>>>> @@ -345,8 +322,7 @@ ngx_quic_create_segments(ngx_connection_
> >>>>> return NGX_ERROR;
> >>>>> }
> >>>>>
> >>>>> - segsize = ngx_min(qc->ctp.max_udp_payload_size,
> >>>>> - NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> >>>>> + segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
> >>>>> p = dst;
> >>>>> end = dst + sizeof(dst);
> >>>>>
> >>>>> diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
> >>>>> --- a/src/event/quic/ngx_event_quic_output.h
> >>>>> +++ b/src/event/quic/ngx_event_quic_output.h
> >>>>> @@ -12,8 +12,6 @@
> >>>>> #include <ngx_core.h>
> >>>>>
> >>>>>
> >>>>> -size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
> >>>>> -
> >>>>> ngx_int_t ngx_quic_output(ngx_connection_t *c);
> >>>>>
> >>>>> ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
> >>>>> diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
> >>>>> --- a/src/event/quic/ngx_event_quic_ssl.c
> >>>>> +++ b/src/event/quic/ngx_event_quic_ssl.c
> >>>>> @@ -494,6 +494,8 @@ ngx_quic_crypto_input(ngx_connection_t *
> >>>>> */
> >>>>> ngx_quic_discard_ctx(c, ssl_encryption_handshake);
> >>>>>
> >>>>> + ngx_quic_discover_path_mtu(c, qc->path);
> >>>>> +
> >>>>> /* start accepting clients on negotiated number of server ids */
> >>>>> if (ngx_quic_create_sockets(c) != NGX_OK) {
> >>>>> return NGX_ERROR;
> >>>>> diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
> >>>>> --- a/src/os/unix/ngx_errno.h
> >>>>> +++ b/src/os/unix/ngx_errno.h
> >>>>> @@ -54,6 +54,7 @@ typedef int ngx_err_t;
> >>>>> #define NGX_ENOMOREFILES 0
> >>>>> #define NGX_ELOOP ELOOP
> >>>>> #define NGX_EBADF EBADF
> >>>>> +#define NGX_EMSGSIZE EMSGSIZE
> >>>>>
> >>>>> #if (NGX_HAVE_OPENAT)
> >>>>> #define NGX_EMLINK EMLINK
> >>
> >> This will break on win32, an NGX_EMSGSIZE definition needs to be
> >> provided there, which is WSAEMSGSIZE.
> >
> > Thanks, fixed.
> >
> >>>
> >>> The diff is attached.
> >>>
> >>
> >> The diff looks good.
> >
> > Also, with step-based mtu step updates, QUIC flood detection is triggered with
> > the h3_keepalive.t on interfaces with large mtu. I think, doubling mtu
> > is a safer choice.
>
> While optimizing for theoretical maximum is not something to accent,
> it is certainly expected to work well on various links. Below are some
> numbers to consider, taken on various PMTU and interval/search strategy.
>
> = 20 streams, 1mb response each, pmtu=65536 (sampled 3 times)
>
> pn/ack probes last total bytes
>
> == 1000ms + 256
> 1 21 6.320 79.472
> 2 18 5.808 65.376
> 3 17 5.552 59.568
> 4 15 2.400 33.984
>
> == 100ms + 256
> 1 263 65.500 8.962.971
> 2 256 65.500 8.708.555
> 3 268 65.500 9.188.363
>
> == 100ms * 2
> 1 17-24 65.513 833.607-1.286.679
> 2 23-24 65.513 1.190.607-1.255.710
> 3 20-23 65.513 996.252-1.198.820
> 4 18 65.513 872.007
>
> It is clearly seen from the above:
> - "1000ms + 256" (baseline) does not converge after 20 MB
> - "100ms + 256" converges but hits flood detection limit,
> which is 8 MB of probes if no stream traffic in the connection.
> - "100ms * 2" results in ~ 1 MB of total probe traffic
>
> This explains how h3_keepalive.t is failed.
>
>
> Below "100ms * 2" is also seen with better avg payload size.
> Compared with pto and ack-only packets cleared from raw data,
> seen increased with "100ms * 2" if client ack's with frequency
> less than on every second packet, which messed up avg stats.
>
> = 20 streams, 1mb response each, pmtu=65536 (sampled 3 times)
>
> pn/ack pn stat
>
> == 1000ms + 256
> 1 N=8245 min=3 max=6033 m=2437 avg=2552 std=1750
> 2 N=8481 min=3 max=5733 m=2296 avg=2486 std=1453
> 3 N=8703 min=3 max=5517 m=2189 avg=2424 std=1361
> 4 N=11237 min=3 max=2349 m=2349 avg=1879 std=770
>
> == 100ms + 256
> 1 N=3757 min=3 max=18573 m=3494 avg=5440 std=5226
> 2 N=3696 min=3 max=17037 m=4493 avg=5686 std=4475
> 3 N=3745 min=3 max=16525 m=4493 avg=5657 std=4308
>
> == 100ms * 2
> 1 N=1956 min=3 max=65465 m=1165 avg=10757 std=23394
> 2 N=1150 min=3 max=65465 m=2320 avg=18264 std=26491
> 3 N=1411 min=3 max=65465 m=1165 avg=14879 std=25476
> 4 N=1691 min=3 max=65465 m=548 avg=12419 std=23854
> == after clearing pto and ack-only
> 1* N=1910 min=9 max=65465 m=1165 avg=11014 std=23612
> 2* N=1095 min=10 max=65465 m=2365 avg=19181 std=26823
> 3* N=1117 min=12 max=65465 m=2193 avg=18798 std=27320
> 4* N=1145 min=9 max=65465 m=2365 avg=18333 std=27049
>
> As seen from the above, "100ms * 2" represents worse median,
> even with pto/ack-only cleared, which may be a sign of poor
> packing to maximum packet size, resulting in short packets.
>
>
> On a typical PMTU of up to 1500, the difference is leveled
> (captured with pn/ack=2)
>
> = 20 streams, 1mb response each, pmtu=1500 (sampled 3 times)
>
> x 1000_256
> + 100_256
> * 100_x2
> N Min Max Median Avg Stddev
> x 16443 3 1437 1437 1283.7392 414.27811
> + 16377 3 1437 1437 1288.8831 412.92124
> No difference proven at 95.0% confidence
> * 16398 3 1436 1436 1287.2667 413.79561
> No difference proven at 95.0% confidence
>
> pn/ack probes last total bytes
> == 1000ms + 256
> 2 6 1472 9232
> == 100ms + 256
> 2 6 1472 9232
> == 100ms * 2
> 2 8 1471 12889
>
> >
> > Also, while looking in migration test logs, I thought it might be a good idea to
> > only handle validation/mtu events for the currently active path.
> >
>
> Can you elaborate?
I had another patch applied while testing pmtud and had the migration test
failed. I think, we can in future simplify path event handling by only handin
the active path. But we have to be careful.
> > Including 3 patch updates:
> >
> > - the last one + win32 fix + format fix in ngx_quic_path_dbg()
> > - x2 mtu
Merged these two updates. Final patch attached.
> > - single-path event handling
Let's postpone this one for now.
> > --
> > Roman Arutyunyan
> > <quic-pmtud-fix.txt><quic-pmtud-scale-x2.txt><quic-pmtud-fix-2.txt>_______________________________________________
> > nginx-devel mailing list
> > nginx-devel@nginx.org
> > https://mailman.nginx.org/mailman/listinfo/nginx-devel
>
> --
> Sergey Kandaurov
> _______________________________________________
> nginx-devel mailing list
> nginx-devel@nginx.org
> https://mailman.nginx.org/mailman/listinfo/nginx-devel
--
Roman Arutyunyan
# HG changeset patch
# User Roman Arutyunyan <arut@nginx.com>
# Date 1691990487 -14400
# Mon Aug 14 09:21:27 2023 +0400
# Node ID 58afcd72446ff33811e773f1cabb7866a92a09a0
# Parent f3412ec3b6d146f091acee2c8cf44e39ed0fbe26
QUIC: path MTU discovery.
MTU selection starts by doubling the initial MTU until the first failure.
Then binary search is used to find the path MTU.
diff --git a/src/core/ngx_connection.c b/src/core/ngx_connection.c
--- a/src/core/ngx_connection.c
+++ b/src/core/ngx_connection.c
@@ -1583,6 +1583,10 @@ ngx_connection_error(ngx_connection_t *c
}
#endif
+ if (err == NGX_EMSGSIZE && c->log_error == NGX_ERROR_IGNORE_EMSGSIZE) {
+ return 0;
+ }
+
if (err == 0
|| err == NGX_ECONNRESET
#if (NGX_WIN32)
@@ -1600,6 +1604,7 @@ ngx_connection_error(ngx_connection_t *c
{
switch (c->log_error) {
+ case NGX_ERROR_IGNORE_EMSGSIZE:
case NGX_ERROR_IGNORE_EINVAL:
case NGX_ERROR_IGNORE_ECONNRESET:
case NGX_ERROR_INFO:
diff --git a/src/core/ngx_connection.h b/src/core/ngx_connection.h
--- a/src/core/ngx_connection.h
+++ b/src/core/ngx_connection.h
@@ -97,7 +97,8 @@ typedef enum {
NGX_ERROR_ERR,
NGX_ERROR_INFO,
NGX_ERROR_IGNORE_ECONNRESET,
- NGX_ERROR_IGNORE_EINVAL
+ NGX_ERROR_IGNORE_EINVAL,
+ NGX_ERROR_IGNORE_EMSGSIZE
} ngx_connection_log_error_e;
diff --git a/src/event/quic/ngx_event_quic.c b/src/event/quic/ngx_event_quic.c
--- a/src/event/quic/ngx_event_quic.c
+++ b/src/event/quic/ngx_event_quic.c
@@ -149,11 +149,6 @@ ngx_quic_apply_transport_params(ngx_conn
ngx_log_error(NGX_LOG_INFO, c->log, 0,
"quic maximum packet size is invalid");
return NGX_ERROR;
-
- } else if (ctp->max_udp_payload_size > ngx_quic_max_udp_payload(c)) {
- ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
- ngx_log_debug0(NGX_LOG_DEBUG_EVENT, c->log, 0,
- "quic client maximum packet size truncated");
}
if (ctp->active_connection_id_limit < 2) {
@@ -286,7 +281,7 @@ ngx_quic_new_connection(ngx_connection_t
qc->path_validation.log = c->log;
qc->path_validation.data = c;
- qc->path_validation.handler = ngx_quic_path_validation_handler;
+ qc->path_validation.handler = ngx_quic_path_handler;
qc->conf = conf;
@@ -297,7 +292,7 @@ ngx_quic_new_connection(ngx_connection_t
ctp = &qc->ctp;
/* defaults to be used before actual client parameters are received */
- ctp->max_udp_payload_size = ngx_quic_max_udp_payload(c);
+ ctp->max_udp_payload_size = NGX_QUIC_MAX_UDP_PAYLOAD_SIZE;
ctp->ack_delay_exponent = NGX_QUIC_DEFAULT_ACK_DELAY_EXPONENT;
ctp->max_ack_delay = NGX_QUIC_DEFAULT_MAX_ACK_DELAY;
ctp->active_connection_id_limit = 2;
diff --git a/src/event/quic/ngx_event_quic_ack.c b/src/event/quic/ngx_event_quic_ack.c
--- a/src/event/quic/ngx_event_quic_ack.c
+++ b/src/event/quic/ngx_event_quic_ack.c
@@ -229,6 +229,12 @@ ngx_quic_handle_ack_frame_range(ngx_conn
qc = ngx_quic_get_connection(c);
+ if (ctx->level == ssl_encryption_application) {
+ if (ngx_quic_handle_path_mtu(c, qc->path, min, max) != NGX_OK) {
+ return NGX_ERROR;
+ }
+ }
+
st->max_pn = NGX_TIMER_INFINITE;
found = 0;
diff --git a/src/event/quic/ngx_event_quic_connection.h b/src/event/quic/ngx_event_quic_connection.h
--- a/src/event/quic/ngx_event_quic_connection.h
+++ b/src/event/quic/ngx_event_quic_connection.h
@@ -66,6 +66,14 @@ typedef struct ngx_quic_keys_s ng
#define ngx_quic_get_socket(c) ((ngx_quic_socket_t *)((c)->udp))
+typedef enum {
+ NGX_QUIC_PATH_IDLE = 0,
+ NGX_QUIC_PATH_VALIDATING,
+ NGX_QUIC_PATH_WAITING,
+ NGX_QUIC_PATH_MTUD
+} ngx_quic_path_state_e;
+
+
struct ngx_quic_client_id_s {
ngx_queue_t queue;
uint64_t seqnum;
@@ -89,18 +97,22 @@ struct ngx_quic_path_s {
ngx_sockaddr_t sa;
socklen_t socklen;
ngx_quic_client_id_t *cid;
+ ngx_quic_path_state_e state;
ngx_msec_t expires;
ngx_uint_t tries;
ngx_uint_t tag;
+ size_t mtu;
+ size_t mtud;
+ size_t max_mtu;
off_t sent;
off_t received;
u_char challenge1[8];
u_char challenge2[8];
uint64_t seqnum;
+ uint64_t mtu_pnum[NGX_QUIC_PATH_RETRIES];
ngx_str_t addr_text;
u_char text[NGX_SOCKADDR_STRLEN];
- unsigned validated:1;
- unsigned validating:1;
+ ngx_uint_t validated; /* unsigned validated:1; */
};
diff --git a/src/event/quic/ngx_event_quic_migration.c b/src/event/quic/ngx_event_quic_migration.c
--- a/src/event/quic/ngx_event_quic_migration.c
+++ b/src/event/quic/ngx_event_quic_migration.c
@@ -10,6 +10,10 @@
#include <ngx_event_quic_connection.h>
+#define NGX_QUIC_PATH_MTU_DELAY 100
+#define NGX_QUIC_PATH_MTU_PRECISION 16
+
+
static void ngx_quic_set_connection_path(ngx_connection_t *c,
ngx_quic_path_t *path);
static ngx_int_t ngx_quic_validate_path(ngx_connection_t *c,
@@ -17,7 +21,15 @@ static ngx_int_t ngx_quic_validate_path(
static ngx_int_t ngx_quic_send_path_challenge(ngx_connection_t *c,
ngx_quic_path_t *path);
static void ngx_quic_set_path_timer(ngx_connection_t *c);
+static ngx_int_t ngx_quic_expire_path_validation(ngx_connection_t *c,
+ ngx_quic_path_t *path);
+static ngx_int_t ngx_quic_expire_path_mtu_delay(ngx_connection_t *c,
+ ngx_quic_path_t *path);
+static ngx_int_t ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c,
+ ngx_quic_path_t *path);
static ngx_quic_path_t *ngx_quic_get_path(ngx_connection_t *c, ngx_uint_t tag);
+static ngx_int_t ngx_quic_send_path_mtu_probe(ngx_connection_t *c,
+ ngx_quic_path_t *path);
ngx_int_t
@@ -97,7 +109,7 @@ ngx_quic_handle_path_response_frame(ngx_
{
path = ngx_queue_data(q, ngx_quic_path_t, queue);
- if (!path->validating) {
+ if (path->state != NGX_QUIC_PATH_VALIDATING) {
continue;
}
@@ -136,6 +148,9 @@ valid:
{
/* address did not change */
rst = 0;
+
+ path->mtu = prev->mtu;
+ path->max_mtu = prev->max_mtu;
}
}
@@ -167,9 +182,8 @@ valid:
ngx_quic_path_dbg(c, "is validated", path);
path->validated = 1;
- path->validating = 0;
- ngx_quic_set_path_timer(c);
+ ngx_quic_discover_path_mtu(c, path);
return NGX_OK;
}
@@ -217,6 +231,8 @@ ngx_quic_new_path(ngx_connection_t *c,
path->addr_text.len = ngx_sock_ntop(sockaddr, socklen, path->text,
NGX_SOCKADDR_STRLEN, 1);
+ path->mtu = NGX_QUIC_MIN_INITIAL_SIZE;
+
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
"quic path seq:%uL created addr:%V",
path->seqnum, &path->addr_text);
@@ -464,7 +480,7 @@ ngx_quic_handle_migration(ngx_connection
ngx_quic_set_connection_path(c, next);
- if (!next->validated && !next->validating) {
+ if (!next->validated && next->state != NGX_QUIC_PATH_VALIDATING) {
if (ngx_quic_validate_path(c, next) != NGX_OK) {
return NGX_ERROR;
}
@@ -492,7 +508,6 @@ ngx_quic_validate_path(ngx_connection_t
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
"quic initiated validation of path seq:%uL", path->seqnum);
- path->validating = 1;
path->tries = 0;
if (RAND_bytes(path->challenge1, 8) != 1) {
@@ -511,6 +526,7 @@ ngx_quic_validate_path(ngx_connection_t
pto = ngx_max(ngx_quic_pto(c, ctx), 1000);
path->expires = ngx_current_msec + pto;
+ path->state = NGX_QUIC_PATH_VALIDATING;
ngx_quic_set_path_timer(c);
@@ -558,6 +574,42 @@ ngx_quic_send_path_challenge(ngx_connect
}
+void
+ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_quic_connection_t *qc;
+
+ qc = ngx_quic_get_connection(c);
+
+ if (path->max_mtu) {
+ if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
+ path->state = NGX_QUIC_PATH_IDLE;
+ ngx_quic_set_path_timer(c);
+ return;
+ }
+
+ path->mtud = (path->mtu + path->max_mtu) / 2;
+
+ } else {
+ path->mtud = path->mtu * 2;
+
+ if (path->mtud >= qc->ctp.max_udp_payload_size) {
+ path->mtud = qc->ctp.max_udp_payload_size;
+ path->max_mtu = qc->ctp.max_udp_payload_size;
+ }
+ }
+
+ path->state = NGX_QUIC_PATH_WAITING;
+ path->expires = ngx_current_msec + NGX_QUIC_PATH_MTU_DELAY;
+
+ ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL schedule mtu:%uz",
+ path->seqnum, path->mtud);
+
+ ngx_quic_set_path_timer(c);
+}
+
+
static void
ngx_quic_set_path_timer(ngx_connection_t *c)
{
@@ -578,7 +630,7 @@ ngx_quic_set_path_timer(ngx_connection_t
{
path = ngx_queue_data(q, ngx_quic_path_t, queue);
- if (!path->validating) {
+ if (path->state == NGX_QUIC_PATH_IDLE) {
continue;
}
@@ -600,22 +652,18 @@ ngx_quic_set_path_timer(ngx_connection_t
void
-ngx_quic_path_validation_handler(ngx_event_t *ev)
+ngx_quic_path_handler(ngx_event_t *ev)
{
ngx_msec_t now;
ngx_queue_t *q;
- ngx_msec_int_t left, next, pto;
- ngx_quic_path_t *path, *bkp;
+ ngx_msec_int_t left;
+ ngx_quic_path_t *path;
ngx_connection_t *c;
- ngx_quic_send_ctx_t *ctx;
ngx_quic_connection_t *qc;
c = ev->data;
qc = ngx_quic_get_connection(c);
- ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
-
- next = -1;
now = ngx_current_msec;
q = ngx_queue_head(&qc->paths);
@@ -625,83 +673,280 @@ ngx_quic_path_validation_handler(ngx_eve
path = ngx_queue_data(q, ngx_quic_path_t, queue);
q = ngx_queue_next(q);
- if (!path->validating) {
+ if (path->state == NGX_QUIC_PATH_IDLE) {
continue;
}
left = path->expires - now;
if (left > 0) {
-
- if (next == -1 || left < next) {
- next = left;
- }
-
- continue;
- }
-
- if (++path->tries < NGX_QUIC_PATH_RETRIES) {
- pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
-
- path->expires = ngx_current_msec + pto;
-
- if (next == -1 || pto < next) {
- next = pto;
- }
-
- /* retransmit */
- (void) ngx_quic_send_path_challenge(c, path);
-
continue;
}
- ngx_log_debug1(NGX_LOG_DEBUG_EVENT, ev->log, 0,
- "quic path seq:%uL validation failed", path->seqnum);
-
- /* found expired path */
-
- path->validated = 0;
- path->validating = 0;
-
-
- /* RFC 9000, 9.3.2. On-Path Address Spoofing
- *
- * To protect the connection from failing due to such a spurious
- * migration, an endpoint MUST revert to using the last validated
- * peer address when validation of a new peer address fails.
- */
-
- if (qc->path == path) {
- /* active path validation failed */
-
- bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
-
- if (bkp == NULL) {
- qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
- qc->error_reason = "no viable path";
- ngx_quic_close_connection(c, NGX_ERROR);
- return;
+ switch (path->state) {
+ case NGX_QUIC_PATH_VALIDATING:
+ if (ngx_quic_expire_path_validation(c, path) != NGX_OK) {
+ goto failed;
}
- qc->path = bkp;
- qc->path->tag = NGX_QUIC_PATH_ACTIVE;
+ break;
- ngx_quic_set_connection_path(c, qc->path);
+ case NGX_QUIC_PATH_WAITING:
+ if (ngx_quic_expire_path_mtu_delay(c, path) != NGX_OK) {
+ goto failed;
+ }
+
+ break;
- ngx_log_error(NGX_LOG_INFO, c->log, 0,
- "quic path seq:%uL addr:%V is restored from backup",
- qc->path->seqnum, &qc->path->addr_text);
+ case NGX_QUIC_PATH_MTUD:
+ if (ngx_quic_expire_path_mtu_discovery(c, path) != NGX_OK) {
+ goto failed;
+ }
- ngx_quic_path_dbg(c, "is active", qc->path);
- }
+ break;
- if (ngx_quic_free_path(c, path) != NGX_OK) {
- ngx_quic_close_connection(c, NGX_ERROR);
- return;
+ default:
+ break;
}
}
- if (next != -1) {
- ngx_add_timer(&qc->path_validation, next);
+ ngx_quic_set_path_timer(c);
+
+ return;
+
+failed:
+
+ ngx_quic_close_connection(c, NGX_ERROR);
+}
+
+
+static ngx_int_t
+ngx_quic_expire_path_validation(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_msec_int_t pto;
+ ngx_quic_path_t *bkp;
+ ngx_quic_send_ctx_t *ctx;
+ ngx_quic_connection_t *qc;
+
+ qc = ngx_quic_get_connection(c);
+ ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+
+ if (++path->tries < NGX_QUIC_PATH_RETRIES) {
+ pto = ngx_max(ngx_quic_pto(c, ctx), 1000) << path->tries;
+ path->expires = ngx_current_msec + pto;
+
+ (void) ngx_quic_send_path_challenge(c, path);
+
+ return NGX_OK;
+ }
+
+ ngx_log_debug1(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL validation failed", path->seqnum);
+
+ /* found expired path */
+
+ path->validated = 0;
+
+
+ /* RFC 9000, 9.3.2. On-Path Address Spoofing
+ *
+ * To protect the connection from failing due to such a spurious
+ * migration, an endpoint MUST revert to using the last validated
+ * peer address when validation of a new peer address fails.
+ */
+
+ if (qc->path == path) {
+ /* active path validation failed */
+
+ bkp = ngx_quic_get_path(c, NGX_QUIC_PATH_BACKUP);
+
+ if (bkp == NULL) {
+ qc->error = NGX_QUIC_ERR_NO_VIABLE_PATH;
+ qc->error_reason = "no viable path";
+ return NGX_ERROR;
+ }
+
+ qc->path = bkp;
+ qc->path->tag = NGX_QUIC_PATH_ACTIVE;
+
+ ngx_quic_set_connection_path(c, qc->path);
+
+ ngx_log_error(NGX_LOG_INFO, c->log, 0,
+ "quic path seq:%uL addr:%V is restored from backup",
+ qc->path->seqnum, &qc->path->addr_text);
+
+ ngx_quic_path_dbg(c, "is active", qc->path);
+ }
+
+ return ngx_quic_free_path(c, path);
+}
+
+
+static ngx_int_t
+ngx_quic_expire_path_mtu_delay(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_int_t rc;
+ ngx_uint_t i;
+ ngx_msec_t pto;
+ ngx_quic_send_ctx_t *ctx;
+ ngx_quic_connection_t *qc;
+
+ qc = ngx_quic_get_connection(c);
+ ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+
+ path->tries = 0;
+
+ for ( ;; ) {
+
+ for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+ path->mtu_pnum[i] = NGX_QUIC_UNSET_PN;
+ }
+
+ rc = ngx_quic_send_path_mtu_probe(c, path);
+
+ if (rc == NGX_ERROR) {
+ return NGX_ERROR;
+ }
+
+ if (rc == NGX_OK) {
+ pto = ngx_quic_pto(c, ctx);
+ path->expires = ngx_current_msec + pto;
+ path->state = NGX_QUIC_PATH_MTUD;
+ return NGX_OK;
+ }
+
+ /* rc == NGX_DECLINED */
+
+ path->max_mtu = path->mtud;
+
+ if (path->max_mtu - path->mtu <= NGX_QUIC_PATH_MTU_PRECISION) {
+ path->state = NGX_QUIC_PATH_IDLE;
+ return NGX_OK;
+ }
+
+ path->mtud = (path->mtu + path->max_mtu) / 2;
}
}
+
+
+static ngx_int_t
+ngx_quic_expire_path_mtu_discovery(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_int_t rc;
+ ngx_msec_int_t pto;
+ ngx_quic_send_ctx_t *ctx;
+ ngx_quic_connection_t *qc;
+
+ qc = ngx_quic_get_connection(c);
+ ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+
+ if (++path->tries < NGX_QUIC_PATH_RETRIES) {
+ rc = ngx_quic_send_path_mtu_probe(c, path);
+
+ if (rc == NGX_ERROR) {
+ return NGX_ERROR;
+ }
+
+ if (rc == NGX_OK) {
+ pto = ngx_quic_pto(c, ctx) << path->tries;
+ path->expires = ngx_current_msec + pto;
+ return NGX_OK;
+ }
+
+ /* rc == NGX_DECLINED */
+ }
+
+ ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL expired mtu:%uz",
+ path->seqnum, path->mtud);
+
+ path->max_mtu = path->mtud;
+
+ ngx_quic_discover_path_mtu(c, path);
+
+ return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_quic_send_path_mtu_probe(ngx_connection_t *c, ngx_quic_path_t *path)
+{
+ ngx_int_t rc;
+ ngx_uint_t log_error;
+ ngx_quic_frame_t frame;
+ ngx_quic_send_ctx_t *ctx;
+ ngx_quic_connection_t *qc;
+
+ ngx_memzero(&frame, sizeof(ngx_quic_frame_t));
+
+ frame.level = ssl_encryption_application;
+ frame.type = NGX_QUIC_FT_PING;
+
+ qc = ngx_quic_get_connection(c);
+ ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
+ path->mtu_pnum[path->tries] = ctx->pnum;
+
+ ngx_log_debug4(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL send probe "
+ "mtu:%uz pnum:%uL tries:%ui",
+ path->seqnum, path->mtud, ctx->pnum, path->tries);
+
+ log_error = c->log_error;
+ c->log_error = NGX_ERROR_IGNORE_EMSGSIZE;
+
+ rc = ngx_quic_frame_sendto(c, &frame, path->mtud, path);
+ c->log_error = log_error;
+
+ if (rc == NGX_ERROR) {
+ if (c->write->error) {
+ c->write->error = 0;
+
+ ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL rejected mtu:%uz",
+ path->seqnum, path->mtud);
+
+ return NGX_DECLINED;
+ }
+
+ return NGX_ERROR;
+ }
+
+ return NGX_OK;
+}
+
+
+ngx_int_t
+ngx_quic_handle_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path,
+ uint64_t min, uint64_t max)
+{
+ uint64_t pnum;
+ ngx_uint_t i;
+
+ if (path->state != NGX_QUIC_PATH_MTUD) {
+ return NGX_OK;
+ }
+
+ for (i = 0; i < NGX_QUIC_PATH_RETRIES; i++) {
+ pnum = path->mtu_pnum[i];
+
+ if (pnum == NGX_QUIC_UNSET_PN) {
+ break;
+ }
+
+ if (pnum < min || pnum > max) {
+ continue;
+ }
+
+ path->mtu = path->mtud;
+
+ ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
+ "quic path seq:%uL ack mtu:%uz",
+ path->seqnum, path->mtu);
+
+ ngx_quic_discover_path_mtu(c, path);
+
+ break;
+ }
+
+ return NGX_OK;
+}
diff --git a/src/event/quic/ngx_event_quic_migration.h b/src/event/quic/ngx_event_quic_migration.h
--- a/src/event/quic/ngx_event_quic_migration.h
+++ b/src/event/quic/ngx_event_quic_migration.h
@@ -18,10 +18,10 @@
#define NGX_QUIC_PATH_BACKUP 2
#define ngx_quic_path_dbg(c, msg, path) \
- ngx_log_debug6(NGX_LOG_DEBUG_EVENT, c->log, 0, \
- "quic path seq:%uL %s sent:%O recvd:%O state:%s%s", \
+ ngx_log_debug7(NGX_LOG_DEBUG_EVENT, c->log, 0, \
+ "quic path seq:%uL %s tx:%O rx:%O valid:%ui st:%d mtu:%uz",\
path->seqnum, msg, path->sent, path->received, \
- path->validated ? "V": "N", path->validating ? "R": "");
+ path->validated, path->state, path->mtu);
ngx_int_t ngx_quic_handle_path_challenge_frame(ngx_connection_t *c,
ngx_quic_header_t *pkt, ngx_quic_path_challenge_frame_t *f);
@@ -36,6 +36,10 @@ ngx_int_t ngx_quic_set_path(ngx_connecti
ngx_int_t ngx_quic_handle_migration(ngx_connection_t *c,
ngx_quic_header_t *pkt);
-void ngx_quic_path_validation_handler(ngx_event_t *ev);
+void ngx_quic_path_handler(ngx_event_t *ev);
+
+void ngx_quic_discover_path_mtu(ngx_connection_t *c, ngx_quic_path_t *path);
+ngx_int_t ngx_quic_handle_path_mtu(ngx_connection_t *c,
+ ngx_quic_path_t *path, uint64_t min, uint64_t max);
#endif /* _NGX_EVENT_QUIC_MIGRATION_H_INCLUDED_ */
diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
--- a/src/event/quic/ngx_event_quic_output.c
+++ b/src/event/quic/ngx_event_quic_output.c
@@ -10,9 +10,6 @@
#include <ngx_event_quic_connection.h>
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT 1252
-#define NGX_QUIC_MAX_UDP_PAYLOAD_OUT6 1232
-
#define NGX_QUIC_MAX_UDP_SEGMENT_BUF 65487 /* 65K - IPv6 header */
#define NGX_QUIC_MAX_SEGMENTS 64 /* UDP_MAX_SEGMENTS */
@@ -61,21 +58,6 @@ static size_t ngx_quic_path_limit(ngx_co
size_t size);
-size_t
-ngx_quic_max_udp_payload(ngx_connection_t *c)
-{
- /* TODO: path MTU discovery */
-
-#if (NGX_HAVE_INET6)
- if (c->sockaddr->sa_family == AF_INET6) {
- return NGX_QUIC_MAX_UDP_PAYLOAD_OUT6;
- }
-#endif
-
- return NGX_QUIC_MAX_UDP_PAYLOAD_OUT;
-}
-
-
ngx_int_t
ngx_quic_output(ngx_connection_t *c)
{
@@ -142,10 +124,7 @@ ngx_quic_create_datagrams(ngx_connection
p = dst;
- len = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_PAYLOAD_SIZE);
-
- len = ngx_quic_path_limit(c, path, len);
+ len = ngx_quic_path_limit(c, path, path->mtu);
pad = ngx_quic_get_padding_level(c);
@@ -299,9 +278,7 @@ ngx_quic_allow_segmentation(ngx_connecti
ctx = ngx_quic_get_send_ctx(qc, ssl_encryption_application);
bytes = 0;
-
- len = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+ len = ngx_min(qc->path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
for (q = ngx_queue_head(&ctx->frames);
q != ngx_queue_sentinel(&ctx->frames);
@@ -345,8 +322,7 @@ ngx_quic_create_segments(ngx_connection_
return NGX_ERROR;
}
- segsize = ngx_min(qc->ctp.max_udp_payload_size,
- NGX_QUIC_MAX_UDP_SEGMENT_BUF);
+ segsize = ngx_min(path->mtu, NGX_QUIC_MAX_UDP_SEGMENT_BUF);
p = dst;
end = dst + sizeof(dst);
diff --git a/src/event/quic/ngx_event_quic_output.h b/src/event/quic/ngx_event_quic_output.h
--- a/src/event/quic/ngx_event_quic_output.h
+++ b/src/event/quic/ngx_event_quic_output.h
@@ -12,8 +12,6 @@
#include <ngx_core.h>
-size_t ngx_quic_max_udp_payload(ngx_connection_t *c);
-
ngx_int_t ngx_quic_output(ngx_connection_t *c);
ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
diff --git a/src/event/quic/ngx_event_quic_ssl.c b/src/event/quic/ngx_event_quic_ssl.c
--- a/src/event/quic/ngx_event_quic_ssl.c
+++ b/src/event/quic/ngx_event_quic_ssl.c
@@ -494,6 +494,8 @@ ngx_quic_crypto_input(ngx_connection_t *
*/
ngx_quic_discard_ctx(c, ssl_encryption_handshake);
+ ngx_quic_discover_path_mtu(c, qc->path);
+
/* start accepting clients on negotiated number of server ids */
if (ngx_quic_create_sockets(c) != NGX_OK) {
return NGX_ERROR;
diff --git a/src/os/unix/ngx_errno.h b/src/os/unix/ngx_errno.h
--- a/src/os/unix/ngx_errno.h
+++ b/src/os/unix/ngx_errno.h
@@ -54,6 +54,7 @@ typedef int ngx_err_t;
#define NGX_ENOMOREFILES 0
#define NGX_ELOOP ELOOP
#define NGX_EBADF EBADF
+#define NGX_EMSGSIZE EMSGSIZE
#if (NGX_HAVE_OPENAT)
#define NGX_EMLINK EMLINK
diff --git a/src/os/win32/ngx_errno.h b/src/os/win32/ngx_errno.h
--- a/src/os/win32/ngx_errno.h
+++ b/src/os/win32/ngx_errno.h
@@ -57,6 +57,7 @@ typedef DWORD ngx_e
#define NGX_EILSEQ ERROR_NO_UNICODE_TRANSLATION
#define NGX_ELOOP 0
#define NGX_EBADF WSAEBADF
+#define NGX_EMSGSIZE WSAEMSGSIZE
#define NGX_EALREADY WSAEALREADY
#define NGX_EINVAL WSAEINVAL
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
https://mailman.nginx.org/mailman/listinfo/nginx-devel