From 61d56d8896fb083444ac6a57d2654977fd47bd1d Mon Sep 17 00:00:00 2001 From: Aman Khinvasara Date: Fri, 13 Jun 2025 21:29:43 +0000 Subject: [PATCH 1/2] Reapply "quic: redesign service queues" This reverts commit 543e6f0fb72451eb6294f249c135322e05b8a50d. --- src/waltz/quic/Local.mk | 3 + src/waltz/quic/fd_quic.c | 322 ++++++------------- src/waltz/quic/fd_quic.h | 5 +- src/waltz/quic/fd_quic_conn.c | 13 + src/waltz/quic/fd_quic_conn.h | 18 +- src/waltz/quic/fd_quic_private.h | 35 +- src/waltz/quic/tests/Local.mk | 3 + src/waltz/quic/tests/fuzz_quic_wire.c | 44 ++- src/waltz/quic/tests/test_quic_concurrency.c | 2 +- src/waltz/quic/tests/test_quic_conformance.c | 21 +- src/waltz/quic/tests/test_quic_hs.c | 37 ++- src/waltz/quic/tests/test_quic_keep_alive.c | 15 +- 12 files changed, 208 insertions(+), 310 deletions(-) diff --git a/src/waltz/quic/Local.mk b/src/waltz/quic/Local.mk index b2d81b7510..7784e0c288 100644 --- a/src/waltz/quic/Local.mk +++ b/src/waltz/quic/Local.mk @@ -19,6 +19,9 @@ $(call add-objs,fd_quic_pkt_meta,fd_quic) $(call add-hdrs,fd_quic_retry.h) $(call add-objs,fd_quic_retry,fd_quic) +$(call add-hdrs,fd_quic_svc_q.h) +$(call add-objs,fd_quic_svc_q,fd_quic) + $(call add-hdrs,fd_quic_proto.h fd_quic_proto_structs.h fd_quic_types.h) $(call add-hdrs,fd_quic_stream_pool.h) diff --git a/src/waltz/quic/fd_quic.c b/src/waltz/quic/fd_quic.c index 3f9ecba5f4..72eab0ea9b 100644 --- a/src/waltz/quic/fd_quic.c +++ b/src/waltz/quic/fd_quic.c @@ -9,6 +9,7 @@ #include "fd_quic_proto.h" #include "fd_quic_proto.c" #include "fd_quic_retry.h" +#include "fd_quic_svc_q.h" #define FD_TEMPL_FRAME_CTX fd_quic_frame_ctx_t #include "templ/fd_quic_frame_handler_decl.h" @@ -148,6 +149,13 @@ fd_quic_footprint_ext( fd_quic_limits_t const * limits, if( FD_UNLIKELY( !log_footprint ) ) { FD_LOG_WARNING(( "invalid fd_quic_log_buf_footprint for depth %lu", log_depth )); return 0UL; } offs += log_footprint; + /* allocate space for service timers */ + offs = fd_ulong_align_up( offs, fd_quic_svc_timers_align() ); + layout->svc_timers_off = offs; + ulong svc_timers_footprint = fd_quic_svc_timers_footprint( limits->conn_cnt ); + if( FD_UNLIKELY( !svc_timers_footprint ) ) { FD_LOG_WARNING(( "invalid fd_quic_svc_timers_footprint" )); return 0UL; } + offs += svc_timers_footprint; + return offs; } @@ -479,15 +487,14 @@ fd_quic_init( fd_quic_t * quic ) { /* used for indexing */ conn->conn_idx = (uint)j; + conn->free_conn_next = UINT_MAX; - conn->svc_type = UINT_MAX; - conn->svc_next = conn->svc_prev = UINT_MAX; /* start with minimum supported max datagram */ /* peers may allow more */ conn->tx_max_datagram_sz = FD_QUIC_INITIAL_PAYLOAD_SZ_MAX; /* add to free list */ - *fd_ptr_if( last!=NULL, &last->svc_next, &state->free_conn_list ) = (uint)j; + *fd_ptr_if( last!=NULL, &last->free_conn_next, &state->free_conn_list ) = (uint)j; last = conn; } @@ -502,14 +509,8 @@ fd_quic_init( fd_quic_t * quic ) { } /* State: Initialize service queue */ - - for( uint j=0U; jsvc_queue[j].head = UINT_MAX; - state->svc_queue[j].tail = UINT_MAX; - } - state->svc_delay[ FD_QUIC_SVC_INSTANT ] = 0UL; - state->svc_delay[ FD_QUIC_SVC_ACK_TX ] = quic->config.ack_delay; - state->svc_delay[ FD_QUIC_SVC_WAIT ] = (quic->config.idle_timeout)>>(quic->config.keep_alive); + ulong svc_base = (ulong)quic + layout.svc_timers_off; + state->svc_timers = fd_quic_svc_timers_init( (void *)svc_base, limits->conn_cnt ); /* Check TX AIO */ @@ -668,147 +669,67 @@ fd_quic_reconstruct_pkt_num( ulong pktnum_comp, return candidate_pn; } -static void -fd_quic_svc_unqueue( fd_quic_state_t * state, - fd_quic_conn_t * conn ) { - - fd_quic_svc_queue_t * queue = &state->svc_queue[ conn->svc_type ]; - uint prev_idx = conn->svc_prev; - uint next_idx = conn->svc_next; - fd_quic_conn_t * prev_ele = fd_quic_conn_at_idx( state, prev_idx ); - fd_quic_conn_t * next_ele = fd_quic_conn_at_idx( state, next_idx ); - - *fd_ptr_if( next_idx!=UINT_MAX, &next_ele->svc_prev, &queue->head ) = prev_idx; - *fd_ptr_if( prev_idx!=UINT_MAX, &prev_ele->svc_next, &queue->tail ) = next_idx; - +/* fd_quic_svc_prep_schedule sets conn->svc_meta.next_timeout to + min of current and provided expiry time. */ +static inline void +fd_quic_svc_prep_schedule( fd_quic_conn_t * conn, + ulong expiry ) { + conn->svc_meta.next_timeout = fd_ulong_min( conn->svc_meta.next_timeout, expiry ); } -void -fd_quic_svc_schedule( fd_quic_state_t * state, - fd_quic_conn_t * conn, - uint svc_type ) { - if( FD_UNLIKELY( svc_type >= FD_QUIC_SVC_CNT ) ) { - FD_LOG_ERR(( "fd_quic_svc_schedule called with invalid svc_type (%u)", svc_type )); - } - if( FD_UNLIKELY( conn->state == FD_QUIC_CONN_STATE_INVALID ) ) { - FD_LOG_ERR(( "fd_quic_svc_schedule called with invalid conn" )); - } - - int is_queued = conn->svc_type < FD_QUIC_SVC_CNT; - long cur_delay = (long)conn->svc_time - (long)state->now; - long tgt_delay = (long)state->svc_delay[ svc_type ]; - - /* Don't reschedule if already scheduled sooner */ - if( is_queued && cur_delay<=tgt_delay ) return; - - /* Remove entry from current queue */ - if( is_queued ) { - fd_quic_svc_unqueue( state, conn ); - is_queued = 0; - } - - /* Add into new queue */ - fd_quic_svc_queue_t * queue = &state->svc_queue[ svc_type ]; - uint old_tail_idx = queue->tail; - fd_quic_conn_t * old_tail_ele = fd_quic_conn_at_idx( state, old_tail_idx ); - conn->svc_type = svc_type; - conn->svc_time = state->now + (ulong)tgt_delay; - conn->svc_prev = UINT_MAX; - conn->svc_next = old_tail_idx; - *fd_ptr_if( old_tail_idx!=UINT_MAX, &old_tail_ele->svc_prev, &queue->head ) = (uint)conn->conn_idx; - queue->tail = (uint)conn->conn_idx; - +/* fd_quic_svc_prep_schedule_now sets conn->svc_meta.next_timeout to + current time. For when state is not already available */ +static inline void +fd_quic_svc_prep_schedule_now( fd_quic_conn_t * conn ) { + fd_quic_svc_prep_schedule( conn, fd_quic_get_state(conn->quic)->now ); } -/* fd_quic_svc_queue_validate checks the following: - - dlist prev and next chains are in agreement - - all nodes belong to the same list - - no cycles in list - - no excessive delays (assumes no monotonically increasing timestamp) */ - -static void -fd_quic_svc_queue_validate( fd_quic_t * quic, - uint svc_type ) { - FD_TEST( svc_type < FD_QUIC_SVC_CNT ); - fd_quic_state_t * state = fd_quic_get_state( quic ); - ulong now = state->now; - - ulong cnt = 0UL; - uint prev = UINT_MAX; - uint node = state->svc_queue[ svc_type ].tail; - while( node!=UINT_MAX ) { - FD_TEST( node <= quic->limits.conn_cnt ); - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, node ); - FD_TEST( conn->state != FD_QUIC_CONN_STATE_INVALID ); - FD_TEST( conn->svc_type == svc_type ); - FD_TEST( conn->svc_time <= now + state->svc_delay[ svc_type ] ); - FD_TEST( conn->svc_prev == prev ); - conn->visited = 1U; - - prev = node; - node = conn->svc_next; - cnt++; - FD_TEST( cnt <= quic->limits.conn_cnt ); - - } - FD_TEST( prev == state->svc_queue[ svc_type ].head ); +/* Scheduling helper. Retrieves timers from conn to call schedule */ +static inline void +fd_quic_svc_schedule1( fd_quic_conn_t * conn ) { + fd_quic_svc_schedule( fd_quic_get_state(conn->quic)->svc_timers, conn ); } /* validates the free conn list doesn't cycle, point nowhere, leak, or point to live conn */ static void fd_quic_conn_free_validate( fd_quic_t * quic ) { fd_quic_state_t * state = fd_quic_get_state( quic ); + + /* initialize visited */ + fd_quic_conn_validate_init( quic ); + ulong cnt = 0UL; uint node = state->free_conn_list; while( node!=UINT_MAX ) { FD_TEST( node <= quic->limits.conn_cnt ); fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, node ); FD_TEST( conn->state == FD_QUIC_CONN_STATE_INVALID ); - FD_TEST( conn->svc_prev == UINT_MAX ); - FD_TEST( conn->svc_type == UINT_MAX ); conn->visited = 1U; - node = conn->svc_next; + node = conn->free_conn_next; cnt++; FD_TEST( cnt <= quic->limits.conn_cnt ); } -} -void -fd_quic_svc_validate( fd_quic_t * quic ) { - fd_quic_state_t * state = fd_quic_get_state( quic ); for( ulong j=0UL; j < quic->limits.conn_cnt; j++ ) { fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, j ); FD_TEST( conn->conn_idx==j ); - conn->visited = 0U; if( conn->state == FD_QUIC_CONN_STATE_INVALID ) { - FD_TEST( conn->svc_type==UINT_MAX ); - FD_TEST( conn->svc_prev==UINT_MAX ); - continue; + FD_TEST( conn->visited ); + } else { + FD_TEST( !conn->visited ); } } +} - fd_quic_svc_queue_validate( quic, FD_QUIC_SVC_INSTANT ); - fd_quic_svc_queue_validate( quic, FD_QUIC_SVC_ACK_TX ); - fd_quic_svc_queue_validate( quic, FD_QUIC_SVC_WAIT ); +void +fd_quic_state_validate( fd_quic_t * quic ) { + fd_quic_state_t * state = fd_quic_get_state( quic ); - for( ulong j=0UL; j < quic->limits.conn_cnt; j++ ) { - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, j ); - FD_TEST( conn->conn_idx==j ); - if( conn->state == FD_QUIC_CONN_STATE_INVALID ) { - FD_TEST( conn->svc_type==UINT_MAX ); - FD_TEST( conn->svc_prev==UINT_MAX ); - FD_TEST( !conn->visited ); - continue; - } - FD_TEST( conn->visited ); /* if assertion fails, the conn was leaked */ - } + /* init visited for svc_timers_validate to use */ + fd_quic_conn_validate_init( quic ); + FD_TEST( fd_quic_svc_timers_validate( state->svc_timers, quic ) ); fd_quic_conn_free_validate( quic ); - for( ulong j=0UL; j < quic->limits.conn_cnt; j++ ) { - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, j ); - FD_TEST( conn->conn_idx==j ); - FD_TEST( conn->visited ); - } } /* Helpers for generating fd_quic_log entries */ @@ -851,7 +772,8 @@ fd_quic_conn_error1( fd_quic_conn_t * conn, conn->reason = reason; /* set connection to be serviced ASAP */ - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); + fd_quic_svc_schedule1( conn ); } static void @@ -1281,7 +1203,8 @@ fd_quic_stream_send( fd_quic_stream_t * stream, } /* schedule send */ - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); + fd_quic_svc_schedule1( conn ); return FD_QUIC_SUCCESS; } @@ -1313,7 +1236,8 @@ fd_quic_conn_set_rx_max_data( fd_quic_conn_t * conn, ulong rx_max_data ) { conn->srx->rx_max_data = rx_max_data; conn->flags |= FD_QUIC_CONN_FLAGS_MAX_DATA; conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); + fd_quic_svc_schedule1( conn ); } } @@ -1847,7 +1771,7 @@ fd_quic_handle_v1_initial( fd_quic_t * quic, conn->exp_pkt_number[0] = fd_ulong_max( conn->exp_pkt_number[0], pkt_number+1UL ); /* insert into service queue */ - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule( conn, state->now ); /* return number of bytes consumed */ return tot_sz; @@ -2059,7 +1983,7 @@ fd_quic_handle_v1_retry( /* send the INITIAL */ conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); return cur_sz; } @@ -2095,14 +2019,12 @@ fd_quic_lazy_ack_pkt( fd_quic_t * quic, ( !!(pkt->ack_flag & ACK_FLAG_RQD) ) & ( ( pkt->enc_level == fd_quic_enc_level_initial_id ) | ( pkt->enc_level == fd_quic_enc_level_handshake_id ) ); - uint svc_type; if( ack_sz_threshold_hit | force_instant_ack ) { conn->unacked_sz = 0UL; - svc_type = FD_QUIC_SVC_INSTANT; + fd_quic_svc_prep_schedule( conn, state->now ); } else { - svc_type = FD_QUIC_SVC_ACK_TX; + fd_quic_svc_prep_schedule( conn, state->now + quic->config.ack_delay ); } - fd_quic_svc_schedule( state, conn, svc_type ); return res; } @@ -2361,6 +2283,8 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic, int ack_type = fd_quic_lazy_ack_pkt( quic, conn, pkt ); quic->metrics.ack_tx[ ack_type ]++; + fd_quic_svc_schedule( state->svc_timers, conn ); + if( pkt->rtt_ack_time ) { fd_quic_sample_rtt( conn, (long)pkt->rtt_ack_time, (long)pkt->rtt_ack_delay ); } @@ -2938,14 +2862,10 @@ fd_quic_svc_poll( fd_quic_t * quic, if( FD_UNLIKELY( conn->state == FD_QUIC_CONN_STATE_INVALID ) ) { /* connection shouldn't have been scheduled, and is now removed, so just continue */ - FD_LOG_ERR(( "Invalid conn in schedule (svc_type=%u)", conn->svc_type )); + FD_LOG_ERR(( "Invalid conn in schedule %lu", conn->our_conn_id )); return 1; } - //FD_DEBUG( FD_LOG_DEBUG(( "svc_poll conn=%p svc_type=%u", (void *)conn, conn->svc_type )); ) - conn->svc_type = UINT_MAX; - conn->svc_time = LONG_MAX; - if( FD_UNLIKELY( now >= conn->last_activity + ( conn->idle_timeout_ticks / 2 ) ) ) { if( FD_UNLIKELY( now >= conn->last_activity + conn->idle_timeout_ticks ) ) { if( FD_LIKELY( conn->state != FD_QUIC_CONN_STATE_DEAD ) ) { @@ -2988,68 +2908,31 @@ fd_quic_svc_poll( fd_quic_t * quic, fd_quic_conn_free( quic, conn ); break; default: - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_WAIT ); + /* prep idle timeout or keep alive at idle timeout/2 */ + fd_quic_svc_prep_schedule( conn, state->now + (conn->idle_timeout_ticks>>(quic->config.keep_alive)) ); + fd_quic_svc_schedule( state->svc_timers, conn ); break; } return 1; } -static int -fd_quic_svc_poll_head( fd_quic_t * quic, - uint svc_type, - ulong now ) { - fd_quic_state_t * state = fd_quic_get_state( quic ); - - /* Peek head of queue */ - fd_quic_svc_queue_t * queue = &state->svc_queue[ svc_type ]; - if( queue->head==UINT_MAX ) return 0; - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, queue->head ); - if( conn->svc_time > now ) return 0; - - /* Remove head of queue */ - uint prev_idx = conn->svc_prev; - fd_quic_conn_t * prev_ele = fd_quic_conn_at_idx( state, prev_idx ); - *fd_ptr_if( prev_idx!=UINT_MAX, &prev_ele->svc_next, &queue->tail ) = UINT_MAX; - queue->head = prev_idx; - - return fd_quic_svc_poll( quic, conn, now ); -} - -static int -fd_quic_svc_poll_tail( fd_quic_t * quic, - uint svc_type, - ulong now ) { - fd_quic_state_t * state = fd_quic_get_state( quic ); - - /* Peek tail of queue */ - fd_quic_svc_queue_t * queue = &state->svc_queue[ svc_type ]; - if( queue->tail==UINT_MAX ) return 0; - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, queue->tail ); - if( conn->svc_time > now ) return 0; - - /* Remove tail of queue */ - uint next_idx = conn->svc_next; - fd_quic_conn_t * next_ele = fd_quic_conn_at_idx( state, next_idx ); - *fd_ptr_if( next_idx!=UINT_MAX, &next_ele->svc_prev, &queue->head ) = UINT_MAX; - queue->tail = next_idx; - - return fd_quic_svc_poll( quic, conn, now ); -} - int fd_quic_service( fd_quic_t * quic ) { fd_quic_state_t * state = fd_quic_get_state( quic ); - ulong now = fd_quic_now( quic ); + ulong now = fd_quic_now( quic ); state->now = now; long now_ticks = fd_tickcount(); - int cnt = 0; - cnt += fd_quic_svc_poll_tail( quic, FD_QUIC_SVC_INSTANT, now ); - cnt += fd_quic_svc_poll_head( quic, FD_QUIC_SVC_ACK_TX, now ); - cnt += fd_quic_svc_poll_head( quic, FD_QUIC_SVC_WAIT, now ); + fd_quic_svc_timers_t * timers = state->svc_timers; + fd_quic_svc_event_t next = fd_quic_svc_timers_next( timers, now, 1 /* pop */); + if( FD_UNLIKELY( next.conn == NULL ) ) { + return 0; + } + + int cnt = fd_quic_svc_poll( quic, next.conn, now ); long delta_ticks = fd_tickcount() - now_ticks; @@ -3659,7 +3542,7 @@ fd_quic_conn_tx( fd_quic_t * quic, if( conn->tx_ptr != conn->tx_buf_conn ) { fd_quic_tx_buffered( quic, conn ); - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule( conn, state->now ); return; } @@ -3837,7 +3720,7 @@ fd_quic_conn_tx( fd_quic_t * quic, /* reschedule, since some data was unable to be sent */ /* TODO might want to add a backoff here */ - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule( conn, state->now ); break; } @@ -3927,8 +3810,8 @@ fd_quic_conn_tx( fd_quic_t * quic, FD_LOG_WARNING(( "fd_quic_crypto_encrypt failed" )); /* this situation is unlikely to improve, so kill the connection */ - conn->state = FD_QUIC_CONN_STATE_DEAD; - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_INSTANT ); + fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_DEAD ); + fd_quic_svc_prep_schedule_now( conn ); quic->metrics.conn_aborted_cnt++; break; } @@ -3939,8 +3822,6 @@ fd_quic_conn_tx( fd_quic_t * quic, /* we have committed the packet into the buffer, so inc pkt_number */ conn->pkt_number[pn_space]++; - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_WAIT ); - if( enc_level == fd_quic_enc_level_appdata_id ) { /* short header must be last in datagram so send in packet immediately */ @@ -3974,6 +3855,7 @@ fd_quic_conn_tx( fd_quic_t * quic, void fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ) { (void)now; + conn->svc_meta.next_timeout = ULONG_MAX; /* Send new rtt measurement probe? */ if( FD_UNLIKELY(now > conn->last_ack + (ulong)conn->rtt_period_ticks) ) { @@ -4042,7 +3924,6 @@ fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ) { /* schedule another fd_quic_conn_service to free the conn */ fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_DEAD ); /* TODO need draining state wait for 3 * TPO */ quic->metrics.conn_closed_cnt++; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); break; @@ -4053,7 +3934,6 @@ fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ) { /* schedule another fd_quic_conn_service to free the conn */ fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_DEAD ); quic->metrics.conn_aborted_cnt++; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); break; @@ -4154,15 +4034,10 @@ fd_quic_conn_free( fd_quic_t * quic, } conn->tls_hs = NULL; - /* remove connection from service queue */ - if( FD_LIKELY( conn->svc_type != UINT_MAX ) ) { - fd_quic_svc_unqueue( state, conn ); - } + fd_quic_svc_cancel( state->svc_timers, conn ); /* put connection back in free list */ - conn->svc_type = UINT_MAX; - conn->svc_prev = UINT_MAX; - conn->svc_next = state->free_conn_list; + conn->free_conn_next = state->free_conn_list; state->free_conn_list = conn->conn_idx; fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_INVALID ); @@ -4259,12 +4134,13 @@ fd_quic_connect( fd_quic_t * quic, fd_quic_gen_initial_secret_and_keys( conn, &peer_conn_id, /* is_server */ 0 ); - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_INSTANT ); - /* set "called_conn_new" to indicate we should call conn_final upon teardown */ conn->called_conn_new = 1; + fd_quic_svc_prep_schedule( conn, state->now ); + fd_quic_svc_schedule( state->svc_timers, conn ); + /* everything initialized */ return conn; @@ -4321,16 +4197,14 @@ fd_quic_conn_create( fd_quic_t * quic, insert_entry->conn = conn; /* remove from free list */ - state->free_conn_list = conn->svc_next; - conn->svc_next = UINT_MAX; + state->free_conn_list = conn->free_conn_next; + conn->free_conn_next = UINT_MAX; /* initialize connection members */ conn->quic = quic; conn->server = !!server; conn->established = 0; conn->called_conn_new = 0; - conn->svc_type = UINT_MAX; - conn->svc_time = LONG_MAX; conn->our_conn_id = our_conn_id; conn->host = (fd_quic_net_endpoint_t){ .ip_addr = self_ip_addr, /* may be 0, if outgoing */ @@ -4448,8 +4322,11 @@ fd_quic_conn_create( fd_quic_t * quic, quic->metrics.conn_active_cnt++; quic->metrics.conn_created_cnt++; - /* immediately schedule it */ - fd_quic_svc_schedule( state, conn, FD_QUIC_SVC_WAIT ); + fd_quic_svc_timers_init_conn( conn ); + + /* prep idle timeout or keep alive at idle timeout/2 */ + ulong delay = quic->config.idle_timeout>>(quic->config.keep_alive); + fd_quic_svc_prep_schedule( conn, state->now+delay ); /* return connection */ return conn; @@ -4457,22 +4334,10 @@ fd_quic_conn_create( fd_quic_t * quic, ulong fd_quic_get_next_wakeup( fd_quic_t * quic ) { - /* FIXME not optimized for performance */ - fd_quic_state_t * state = fd_quic_get_state( quic ); - if( state->svc_queue[ FD_QUIC_SVC_INSTANT ].tail != UINT_MAX ) return 0UL; - - long ack_wakeup = LONG_MAX; - long wait_wakeup = LONG_MAX; - if( state->svc_queue[ FD_QUIC_SVC_ACK_TX ].head != UINT_MAX ) { - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, state->svc_queue[ FD_QUIC_SVC_ACK_TX ].head ); - ack_wakeup = (long)conn->svc_time; - } - if( state->svc_queue[ FD_QUIC_SVC_WAIT ].head != UINT_MAX ) { - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, state->svc_queue[ FD_QUIC_SVC_WAIT ].head ); - wait_wakeup = (long)conn->svc_time; - } - - return (ulong)fd_long_max( fd_long_min( ack_wakeup, wait_wakeup ), 0L ); + fd_quic_state_t * state = fd_quic_get_state( quic ); + ulong now = state->now; + fd_quic_svc_event_t next = fd_quic_svc_timers_next( state->svc_timers, now, 0 ); + return next.timeout; } /* frame handling function default definitions */ @@ -4591,7 +4456,7 @@ fd_quic_pkt_meta_retry( fd_quic_t * quic, } if( exit ) { - if( expiry != ~0ul ) fd_quic_svc_schedule1( conn, FD_QUIC_SVC_WAIT ); + fd_quic_svc_prep_schedule( conn, expiry ); return; }; @@ -4698,7 +4563,7 @@ fd_quic_pkt_meta_retry( fd_quic_t * quic, } /* reschedule to ensure the data gets processed */ - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); /* free pkt_meta */ fd_quic_pkt_meta_remove_range( &tracker->sent_pkt_metas[enc_level], @@ -4927,7 +4792,7 @@ fd_quic_reclaim_pkt_meta( fd_quic_conn_t * conn, stream->stream_flags |= FD_QUIC_STREAM_FLAGS_UNSENT; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); } else { /* if no data to send, check whether fin bits are set */ if( ( stream->state & fin_state_mask ) == fin_state_mask ) { @@ -5480,7 +5345,7 @@ fd_quic_handle_conn_close_frame( fd_quic_conn_t * conn ) { } conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); } static ulong @@ -5574,7 +5439,7 @@ fd_quic_handle_handshake_done_frame( } /* Instantly acknowledge the first HANDSHAKE_DONE frame */ - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); /* RFC 9001 4.9.2. Discarding Handshake Keys > An endpoint MUST discard its Handshake keys when the @@ -5631,7 +5496,8 @@ fd_quic_conn_close( fd_quic_conn_t * conn, } /* set connection to be serviced ASAP */ - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + fd_quic_svc_prep_schedule_now( conn ); + fd_quic_svc_schedule1( conn ); } void diff --git a/src/waltz/quic/fd_quic.h b/src/waltz/quic/fd_quic.h index b062fd55e1..a1b781e686 100644 --- a/src/waltz/quic/fd_quic.h +++ b/src/waltz/quic/fd_quic.h @@ -135,6 +135,7 @@ struct fd_quic_layout { int lg_slot_cnt; /* see conn_map_new */ ulong hs_pool_off; /* offset of the handshake pool */ ulong stream_pool_off; /* offset of the stream pool */ + ulong svc_timers_off; /* offset of the service timers */ ulong pkt_meta_pool_off; /* offset of the pkt_meta pool */ }; @@ -588,13 +589,13 @@ fd_quic_get_next_wakeup( fd_quic_t * quic ); FD_QUIC_API int fd_quic_service( fd_quic_t * quic ); -/* fd_quic_svc_validate checks for violations of service queue and free +/* fd_quic_state_validate checks for violations of service queue and free list invariants, such as cycles in linked lists. Prints to warning/ error log and exits the process if checks fail. Intended for use in tests. */ void -fd_quic_svc_validate( fd_quic_t * quic ); +fd_quic_state_validate( fd_quic_t * quic ); /* Stream Send API ****************************************************/ diff --git a/src/waltz/quic/fd_quic_conn.c b/src/waltz/quic/fd_quic_conn.c index 4b434d20bb..1cf46d050d 100644 --- a/src/waltz/quic/fd_quic_conn.c +++ b/src/waltz/quic/fd_quic_conn.c @@ -126,6 +126,9 @@ fd_quic_conn_new( void * mem, state->pkt_meta_pool ); + /* Initialize service timers */ + fd_quic_svc_timers_init_conn( conn ); + return conn; } @@ -162,3 +165,13 @@ fd_quic_conn_reason_name( uint reason ) { return name ? name : "N/A"; } + +void +fd_quic_conn_validate_init( fd_quic_t * quic ) { + fd_quic_state_t * state = fd_quic_get_state( quic ); + ulong conn_cnt = quic->limits.conn_cnt; + for( ulong j=0UL; jvisited = 0U; + } +} diff --git a/src/waltz/quic/fd_quic_conn.h b/src/waltz/quic/fd_quic_conn.h index 3878592fb7..da7cacbe92 100644 --- a/src/waltz/quic/fd_quic_conn.h +++ b/src/waltz/quic/fd_quic_conn.h @@ -7,6 +7,7 @@ #include "fd_quic_conn_id.h" #include "crypto/fd_quic_crypto_suites.h" #include "fd_quic_pkt_meta.h" +#include "fd_quic_svc_q.h" #include "../fd_rtt_est.h" #define FD_QUIC_CONN_STATE_INVALID 0 /* dead object / freed */ @@ -87,14 +88,10 @@ struct fd_quic_conn { uint key_phase : 1; uint key_update : 1; - /* Service queue dlist membership. All active conns (state not INVALID) - are in a service queue, FD_QUIC_SVC_TYPE_WAIT by default. - Free conns (svc_type==UINT_MAX) are members of a singly linked list - (only src_next set) */ - uint svc_type; /* FD_QUIC_SVC_{...} or UINT_MAX */ - uint svc_prev; - uint svc_next; - ulong svc_time; /* service may be delayed until this timestamp */ + /* metadata used by service queue */ + fd_quic_svc_timers_conn_meta_t svc_meta; + /* maintains list of free connections */ + uint free_conn_next; ulong our_conn_id; @@ -274,6 +271,11 @@ fd_quic_conn_set_context( fd_quic_conn_t * conn, void * context ); void * fd_quic_conn_get_context( fd_quic_conn_t * conn ); + +/* set all conns to not visited, used for validation */ +void +fd_quic_conn_validate_init( fd_quic_t* quic); + FD_PROTOTYPES_END #endif /* HEADER_fd_src_waltz_quic_fd_quic_conn_h */ diff --git a/src/waltz/quic/fd_quic_private.h b/src/waltz/quic/fd_quic_private.h index de7ff75a9c..c45d1adf06 100644 --- a/src/waltz/quic/fd_quic_private.h +++ b/src/waltz/quic/fd_quic_private.h @@ -10,6 +10,7 @@ #include "tls/fd_quic_tls.h" #include "fd_quic_stream_pool.h" #include "fd_quic_pretty_print.h" +#include "fd_quic_svc_q.h" #include #include "../../util/log/fd_dtrace.h" @@ -41,22 +42,7 @@ #define FD_QUIC_MAGIC (0xdadf8cfa01cc5460UL) -/* FD_QUIC_SVC_{...} specify connection timer types. */ -#define FD_QUIC_SVC_INSTANT (0U) /* as soon as possible */ -#define FD_QUIC_SVC_ACK_TX (1U) /* within local max_ack_delay (ACK TX coalesce) */ -#define FD_QUIC_SVC_WAIT (2U) /* within min(idle_timeout, peer max_ack_delay) */ -#define FD_QUIC_SVC_CNT (3U) /* number of FD_QUIC_SVC_{...} levels */ - -/* fd_quic_svc_queue_t is a simple doubly linked list. */ - -struct fd_quic_svc_queue { - /* FIXME track count */ // uint cnt; - uint head; - uint tail; -}; - -typedef struct fd_quic_svc_queue fd_quic_svc_queue_t; /* fd_quic_state_t is the internal state of an fd_quic_t. Valid for @@ -96,8 +82,6 @@ struct __attribute__((aligned(16UL))) fd_quic_state_private { fd_quic_stream_pool_t * stream_pool; /* stream pool, nullable */ fd_quic_pkt_meta_t * pkt_meta_pool; fd_rng_t _rng[1]; /* random number generator */ - fd_quic_svc_queue_t svc_queue[ FD_QUIC_SVC_CNT ]; /* dlists */ - ulong svc_delay[ FD_QUIC_SVC_CNT ]; /* target service delay */ /* need to be able to access connections by index */ ulong conn_base; /* address of array of all connections */ @@ -118,6 +102,9 @@ struct __attribute__((aligned(16UL))) fd_quic_state_private { /* Scratch space for packet protection */ uchar crypt_scratch[FD_QUIC_MTU]; + + /* the timer structs, large private fields / data follow */ + fd_quic_svc_timers_t * svc_timers; }; /* FD_QUIC_STATE_OFF is the offset of fd_quic_state_t within fd_quic_t. */ @@ -194,20 +181,6 @@ fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ); -/* fd_quic_svc_schedule installs a connection timer. svc_type is in - [0,FD_QUIC_SVC_CNT) and specifies the timer delay. Lower timers - override higher ones. */ - -void -fd_quic_svc_schedule( fd_quic_state_t * state, - fd_quic_conn_t * conn, - uint svc_type ); - -static inline void -fd_quic_svc_schedule1( fd_quic_conn_t * conn, - uint svc_type ) { - fd_quic_svc_schedule( fd_quic_get_state( conn->quic ), conn, svc_type ); -} /* Memory management **************************************************/ diff --git a/src/waltz/quic/tests/Local.mk b/src/waltz/quic/tests/Local.mk index 195f946bcc..7e3cd520d9 100644 --- a/src/waltz/quic/tests/Local.mk +++ b/src/waltz/quic/tests/Local.mk @@ -15,6 +15,7 @@ $(call make-unit-test,test_quic_layout, test_quic_layout, fd_util) $(call make-unit-test,test_quic_conformance,test_quic_conformance,$(QUIC_TEST_LIBS) fd_util) $(call make-unit-test,test_quic_ack_tx, test_quic_ack_tx, $(QUIC_TEST_LIBS)) $(call make-unit-test,test_quic_concurrency,test_quic_concurrency,$(QUIC_TEST_LIBS)) +$(call make-unit-test,test_quic_svc_q,test_quic_svc_q,$(QUIC_TEST_LIBS)) $(call make-unit-test,test_quic_pkt_meta,test_quic_pkt_meta,$(QUIC_TEST_LIBS)) $(call make-unit-test,test_quic_keep_alive,test_quic_keep_alive,$(QUIC_TEST_LIBS)) $(call run-unit-test,test_quic_proto) @@ -26,8 +27,10 @@ $(call run-unit-test,test_quic_layout) $(call run-unit-test,test_quic_conformance) $(call run-unit-test,test_quic_ack_tx) $(call run-unit-test,test_quic_concurrency) +$(call run-unit-test,test_quic_svc_q) $(call run-unit-test,test_quic_pkt_meta) $(call run-unit-test,test_quic_keep_alive) + # fd_quic_tls unit tests $(call make-unit-test,test_quic_tls_hs,test_quic_tls_hs,$(QUIC_TEST_LIBS)) $(call run-unit-test,test_quic_tls_hs) diff --git a/src/waltz/quic/tests/fuzz_quic_wire.c b/src/waltz/quic/tests/fuzz_quic_wire.c index f0868c3a47..fdb7cc9574 100644 --- a/src/waltz/quic/tests/fuzz_quic_wire.c +++ b/src/waltz/quic/tests/fuzz_quic_wire.c @@ -21,6 +21,7 @@ #include "../fd_quic_proto.h" #include "../fd_quic_proto.c" #include "../fd_quic_private.h" +#include "../fd_quic_svc_q.h" #include #include /* putenv, atexit */ @@ -133,6 +134,7 @@ LLVMFuzzerTestOneInput( uchar const * data, fd_tls_test_sign_ctx( test_signer, rng ); fd_quic_config_test_signer( quic, test_signer ); + g_clock = 0UL; quic->cb.now = test_clock; quic->config.retry = enable_retry; @@ -159,7 +161,12 @@ LLVMFuzzerTestOneInput( uchar const * data, 0U, 0U, 1 /* we are the server */ ); assert( conn ); - assert( conn->svc_type == FD_QUIC_SVC_WAIT ); + fd_quic_svc_schedule( state->svc_timers, conn ); + { + fd_quic_svc_event_t* event = fd_quic_svc_get_event( state->svc_timers, conn ); + assert( event ); + assert( event->timeout > g_clock ); + } conn->tx_max_data = 512UL; conn->tx_initial_max_stream_data_uni = 64UL; @@ -173,7 +180,8 @@ LLVMFuzzerTestOneInput( uchar const * data, conn->keys_avail = 0xff; } - g_clock = 1000UL; + g_clock = 1000UL; + ulong og_clock = g_clock; /* Calls fuzz entrypoint */ send_udp_packet( quic, data, size ); @@ -182,35 +190,38 @@ LLVMFuzzerTestOneInput( uchar const * data, schedule in response to a single packet. */ long svc_quota = fd_long_max( (long)size, 1000L ); - while( state->svc_queue[ FD_QUIC_SVC_INSTANT ].tail!=UINT_MAX ) { + while( g_clock == fd_quic_svc_timers_next(state->svc_timers, ULONG_MAX, 0).timeout ) { fd_quic_service( quic ); assert( --svc_quota > 0 ); } - assert( conn->svc_type != FD_QUIC_SVC_INSTANT ); + const ulong event_idx = conn->svc_meta.idx; + assert( event_idx == FD_QUIC_SVC_IDX_INVAL || state->svc_timers[ event_idx ].timeout > g_clock ); - /* Generate ACKs */ - while( state->svc_queue[ FD_QUIC_SVC_ACK_TX ].head != UINT_MAX ) { - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, state->svc_queue[ FD_QUIC_SVC_ACK_TX ].head ); - g_clock = conn->svc_time; + /* Generate ACKs, if any left */ + fd_quic_svc_event_t next = fd_quic_svc_timers_next( state->svc_timers, ULONG_MAX, 0 ); + while( next.conn && next.timeout <= og_clock + quic->config.ack_threshold ) { + g_clock = next.timeout; fd_quic_service( quic ); assert( --svc_quota > 0 ); + next = fd_quic_svc_timers_next( state->svc_timers, ULONG_MAX, 0 ); } - assert( conn->svc_type != FD_QUIC_SVC_INSTANT && - conn->svc_type != FD_QUIC_SVC_ACK_TX ); + assert( next.timeout > og_clock+quic->config.ack_threshold ); /* Simulate conn timeout */ - while( state->svc_queue[ FD_QUIC_SVC_WAIT ].head != UINT_MAX ) { - ulong idle_timeout_ts = conn->last_activity + quic->config.idle_timeout + 1UL; - fd_quic_conn_t * conn = fd_quic_conn_at_idx( state, state->svc_queue[ FD_QUIC_SVC_WAIT ].head ); + while( next.conn ) { + ulong idle_timeout_ts = next.conn->last_activity + quic->config.idle_timeout + 1UL; /* Idle timeouts should not be scheduled significantly late */ - assert( conn->svc_time < idle_timeout_ts + (ulong)2e9 ); + assert( next.timeout < idle_timeout_ts + (ulong)2e9 ); - g_clock = conn->svc_time; + g_clock = next.timeout; fd_quic_service( quic ); assert( --svc_quota > 0 ); + next = fd_quic_svc_timers_next( state->svc_timers, ULONG_MAX, 0 ); } - assert( conn->svc_type == UINT_MAX ); + + /* connection should be dead */ + assert( conn->svc_meta.idx == FD_QUIC_SVC_IDX_INVAL ); assert( conn->state == FD_QUIC_CONN_STATE_DEAD || conn->state == FD_QUIC_CONN_STATE_INVALID ); /* freed stream resources */ @@ -219,7 +230,6 @@ LLVMFuzzerTestOneInput( uchar const * data, assert( conn->send_streams->sentinel ); assert( !conn->tls_hs ); - fd_quic_delete( fd_quic_leave( fd_quic_fini( quic ) ) ); fd_aio_delete( fd_aio_leave( aio ) ); fd_rng_delete( fd_rng_leave( rng ) ); diff --git a/src/waltz/quic/tests/test_quic_concurrency.c b/src/waltz/quic/tests/test_quic_concurrency.c index 355277c156..b2d4eb8078 100644 --- a/src/waltz/quic/tests/test_quic_concurrency.c +++ b/src/waltz/quic/tests/test_quic_concurrency.c @@ -153,7 +153,7 @@ main( int argc, FD_TEST( quic->metrics.net_tx_pkt_cnt <= frame_cnt ); } - fd_quic_svc_validate( quic ); + fd_quic_state_validate( quic ); fd_wksp_free_laddr( conn_list ); fd_wksp_free_laddr( fd_quic_sandbox_delete( sandbox ) ); diff --git a/src/waltz/quic/tests/test_quic_conformance.c b/src/waltz/quic/tests/test_quic_conformance.c index e573855f48..c34eb9dbcd 100644 --- a/src/waltz/quic/tests/test_quic_conformance.c +++ b/src/waltz/quic/tests/test_quic_conformance.c @@ -120,13 +120,12 @@ test_quic_ping_frame( fd_quic_sandbox_t * sandbox, fd_quic_sandbox_init( sandbox, FD_QUIC_ROLE_SERVER ); fd_quic_conn_t * conn = fd_quic_sandbox_new_conn_established( sandbox, rng ); conn->ack_gen->is_elicited = 0; - FD_TEST( conn->svc_type == FD_QUIC_SVC_WAIT ); + FD_TEST( conn->svc_meta.idx == FD_QUIC_SVC_IDX_INVAL ); uchar buf[1] = {0x01}; fd_quic_sandbox_send_lone_frame( sandbox, conn, buf, sizeof(buf) ); FD_TEST( conn->state == FD_QUIC_CONN_STATE_ACTIVE ); FD_TEST( conn->ack_gen->is_elicited == 1 ); - FD_TEST( conn->svc_type == FD_QUIC_SVC_ACK_TX ); } /* Test an ALPN failure when acting as a server */ @@ -569,13 +568,13 @@ test_quic_conn_free( fd_quic_sandbox_t * sandbox, for( ulong j=0UL; jour_conn_id == 0UL ); } - fd_quic_svc_validate( quic ); + fd_quic_state_validate( quic ); /* Create a bunch of conns */ for( ulong j=0UL; jour_conn_id == cid ); /* CID is kept */ FD_TEST( fd_quic_conn_query1( state->conn_map, cid, sentinel )->conn==conn ); } - fd_quic_svc_validate( quic ); + fd_quic_state_validate( quic ); /* Now, allocate new conns. CIDs should be replaced one by one. */ for( ulong i=0UL; iour_conn_id; FD_TEST( fd_quic_conn_query1( state->conn_map, new_cid, sentinel )->conn==conn ); } - fd_quic_svc_validate( quic ); + fd_quic_state_validate( quic ); /* Finally, validate that packet handlers count freed conns as "keys not available" in metrics. (Logically, for the receiver @@ -638,6 +637,7 @@ test_quic_pktmeta_pktnum_skip( fd_quic_sandbox_t * sandbox, fd_quic_sandbox_init( sandbox, FD_QUIC_ROLE_SERVER ); fd_quic_t * quic = sandbox->quic; + fd_quic_state_t * state = fd_quic_get_state( quic ); fd_quic_conn_t * conn = fd_quic_sandbox_new_conn_established( sandbox, rng ); fd_quic_metrics_t * metrics = &conn->quic->metrics; fd_quic_pkt_meta_tracker_t * tracker = &conn->pkt_meta_tracker; @@ -650,7 +650,8 @@ test_quic_pktmeta_pktnum_skip( fd_quic_sandbox_t * sandbox, conn->flags = ( conn->flags & ~FD_QUIC_CONN_FLAGS_PING_SENT ) | FD_QUIC_CONN_FLAGS_PING; conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; sandbox->wallclock += (ulong)10e6; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + conn->svc_meta.next_timeout = sandbox->wallclock; + fd_quic_svc_schedule( state->svc_timers, conn ); fd_quic_service( quic ); FD_TEST( conn->pkt_number[2] == next_pkt_number + 1UL ); next_pkt_number++; @@ -680,7 +681,8 @@ test_quic_pktmeta_pktnum_skip( fd_quic_sandbox_t * sandbox, conn->flags = ( conn->flags & ~FD_QUIC_CONN_FLAGS_PING_SENT ) | FD_QUIC_CONN_FLAGS_PING; conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; sandbox->wallclock += (ulong)10e6; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + conn->svc_meta.next_timeout = sandbox->wallclock; + fd_quic_svc_schedule( state->svc_timers, conn ); fd_quic_service( quic ); FD_TEST( conn->pkt_number[2] == next_pkt_number ); FD_TEST( *metrics_alloc_fail_cnt > alloc_fail_cnt ); @@ -719,7 +721,8 @@ test_quic_pktmeta_pktnum_skip( fd_quic_sandbox_t * sandbox, conn->flags = ( conn->flags & ~FD_QUIC_CONN_FLAGS_PING_SENT ) | FD_QUIC_CONN_FLAGS_PING; conn->upd_pkt_number = FD_QUIC_PKT_NUM_PENDING; sandbox->wallclock += (ulong)10e6; - fd_quic_svc_schedule1( conn, FD_QUIC_SVC_INSTANT ); + conn->svc_meta.next_timeout = sandbox->wallclock; + fd_quic_svc_schedule( state->svc_timers, conn ); fd_quic_service( quic ); FD_TEST( conn->pkt_number[2] == next_pkt_number + 1UL ); next_pkt_number++; diff --git a/src/waltz/quic/tests/test_quic_hs.c b/src/waltz/quic/tests/test_quic_hs.c index 5f216203ef..017cef4b14 100644 --- a/src/waltz/quic/tests/test_quic_hs.c +++ b/src/waltz/quic/tests/test_quic_hs.c @@ -142,8 +142,8 @@ main( int argc, char ** argv ) { FD_LOG_NOTICE(( "Initializing QUICs" )); FD_TEST( fd_quic_init( server_quic ) ); FD_TEST( fd_quic_init( client_quic ) ); - fd_quic_svc_validate( server_quic ); - fd_quic_svc_validate( client_quic ); + fd_quic_state_validate( server_quic ); + fd_quic_state_validate( client_quic ); FD_LOG_NOTICE(( "Creating connection" )); fd_quic_conn_t * client_conn = fd_quic_connect( client_quic, 0U, 0, 0U, 0 ); @@ -201,10 +201,35 @@ main( int argc, char ** argv ) { FD_LOG_INFO(( "fd_quic_stream_send returned %d", rc )); } + /* testing keep_alive */ + ulong const idle_timeout = fd_ulong_min( + client_quic->config.idle_timeout, + server_quic->config.idle_timeout + ); + ulong const timestep = idle_timeout>>3; + + for( int keep_alive=1; keep_alive>=0; --keep_alive ) { + client_quic->config.keep_alive = keep_alive; + for( int i=0; i<10; ++i ) { + now+=timestep; + fd_quic_service( client_quic ); + fd_quic_service( server_quic ); + } + if( keep_alive ) { + FD_TEST( client_conn->state == FD_QUIC_CONN_STATE_ACTIVE ); + } else { + FD_TEST( client_conn->state == FD_QUIC_CONN_STATE_DEAD || + client_conn->state == FD_QUIC_CONN_STATE_INVALID ); + } + } + + FD_LOG_NOTICE(( "Validated idle_timeout and keep_alive" )); + + FD_LOG_NOTICE(( "Closing connections" )); - fd_quic_svc_validate( server_quic ); - fd_quic_svc_validate( client_quic ); + fd_quic_state_validate( server_quic ); + fd_quic_state_validate( client_quic ); fd_quic_conn_close( client_conn, 0 ); fd_quic_conn_close( server_conn, 0 ); @@ -216,8 +241,8 @@ main( int argc, char ** argv ) { fd_quic_service( server_quic ); } - fd_quic_svc_validate( server_quic ); - fd_quic_svc_validate( client_quic ); + fd_quic_state_validate( server_quic ); + fd_quic_state_validate( client_quic ); validate_quic_hs_tls_cache( client_quic ); validate_quic_hs_tls_cache( server_quic ); diff --git a/src/waltz/quic/tests/test_quic_keep_alive.c b/src/waltz/quic/tests/test_quic_keep_alive.c index 8bd5b4bbeb..00aed2b031 100644 --- a/src/waltz/quic/tests/test_quic_keep_alive.c +++ b/src/waltz/quic/tests/test_quic_keep_alive.c @@ -35,8 +35,8 @@ test_quic_keep_alive( fd_quic_t * client_quic, fd_quic_t * server_quic, int keep FD_TEST( fd_quic_init( server_quic ) ); FD_TEST( fd_quic_init( client_quic ) ); - fd_quic_svc_validate( server_quic ); - fd_quic_svc_validate( client_quic ); + fd_quic_state_validate( server_quic ); + fd_quic_state_validate( client_quic ); fd_quic_conn_t * client_conn = fd_quic_connect( client_quic, 0U, 0, 0U, 0 ); FD_TEST( client_conn ); @@ -53,12 +53,10 @@ test_quic_keep_alive( fd_quic_t * client_quic, fd_quic_t * server_quic, int keep } } - /* FIXME: when svc_queue fixed, make sure these are different - and use idle_timeout = their min */ - FD_TEST( client_quic->config.idle_timeout == server_quic->config.idle_timeout ); - ulong const idle_timeout = client_quic->config.idle_timeout; + ulong const idle_timeout = client_conn->idle_timeout_ticks; ulong const timestep = idle_timeout>>3; + FD_TEST( idle_timeout == fd_ulong_min( client_quic->config.idle_timeout, server_quic->config.idle_timeout )); for( int let_die=0; let_die<2; ++let_die ) { for( int i=0; i<10; ++i ) { @@ -128,6 +126,9 @@ main( int argc, char ** argv ) { server_quic->config.initial_rx_max_stream_data = 1<<16; client_quic->config.initial_rx_max_stream_data = 1<<16; + server_quic->config.idle_timeout = 1e7; + client_quic->config.idle_timeout = 1e9; + fd_quic_virtual_pair_t vp; fd_quic_virtual_pair_init( &vp, server_quic, client_quic ); @@ -145,5 +146,3 @@ main( int argc, char ** argv ) { fd_halt(); return 0; } - - From 55400fd83de26cb4902daa9ccc8f6afe0bea3374 Mon Sep 17 00:00:00 2001 From: Aman Khinvasara Date: Fri, 13 Jun 2025 21:35:26 +0000 Subject: [PATCH 2/2] quic: fix scheduling misses, svc_schedule validates conn reset next_timeout in svc_schedule fix retx timeout ticks/ns issue prep schedule in gen_frames CRIT if svc_timer_cnt != active_conn_cnt schedule idle_timeout from last_activity, not now test_quic_conformance: update for service heap fix test_quic_keep_alive --- src/waltz/quic/fd_quic.c | 39 ++++++++++++++------ src/waltz/quic/fd_quic_svc_q.c | 21 +++++++++-- src/waltz/quic/fd_quic_svc_q.h | 9 ++++- src/waltz/quic/tests/fd_quic_sandbox.c | 3 ++ src/waltz/quic/tests/test_quic_conformance.c | 2 +- src/waltz/quic/tests/test_quic_keep_alive.c | 3 ++ src/waltz/quic/tests/test_quic_svc_q.c | 2 + 7 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/waltz/quic/fd_quic.c b/src/waltz/quic/fd_quic.c index 72eab0ea9b..d226785001 100644 --- a/src/waltz/quic/fd_quic.c +++ b/src/waltz/quic/fd_quic.c @@ -690,6 +690,16 @@ fd_quic_svc_schedule1( fd_quic_conn_t * conn ) { fd_quic_svc_schedule( fd_quic_get_state(conn->quic)->svc_timers, conn ); } +/* Validation Helper */ +static inline void +svc_cnt_eq_active_conn( fd_quic_svc_timers_t * timers, fd_quic_t * quic ) { + ulong const event_cnt = fd_quic_svc_cnt_events( timers ); + ulong const conn_cnt = quic->metrics.conn_active_cnt; + if( FD_UNLIKELY( event_cnt != conn_cnt ) ) { + FD_LOG_CRIT(( "only %lu out of %lu connections are in timer", event_cnt, conn_cnt )); + } +} + /* validates the free conn list doesn't cycle, point nowhere, leak, or point to live conn */ static void fd_quic_conn_free_validate( fd_quic_t * quic ) { @@ -2252,6 +2262,8 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic, break; } + fd_quic_svc_schedule( state->svc_timers, conn ); + if( FD_UNLIKELY( rc == FD_QUIC_PARSE_FAIL ) ) { FD_DEBUG( FD_LOG_DEBUG(( "Rejected packet (type=%d)", long_packet_type )); ) return FD_QUIC_PARSE_FAIL; @@ -2268,6 +2280,9 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic, ulong dst_conn_id = fd_ulong_load_8( cur_ptr+1 ); conn = fd_quic_conn_query( state->conn_map, dst_conn_id ); rc = fd_quic_handle_v1_one_rtt( quic, conn, pkt, cur_ptr, cur_sz ); + + fd_quic_svc_schedule( state->svc_timers, conn ); + if( FD_UNLIKELY( rc == FD_QUIC_PARSE_FAIL ) ) { return FD_QUIC_PARSE_FAIL; } @@ -2283,6 +2298,7 @@ fd_quic_process_quic_packet_v1( fd_quic_t * quic, int ack_type = fd_quic_lazy_ack_pkt( quic, conn, pkt ); quic->metrics.ack_tx[ ack_type ]++; + /* fd_quic_lazy_ack_pkt may have prepped schedule */ fd_quic_svc_schedule( state->svc_timers, conn ); if( pkt->rtt_ack_time ) { @@ -2462,6 +2478,7 @@ fd_quic_process_packet_impl( fd_quic_t * quic, } rc = fd_quic_process_quic_packet_v1( quic, &pkt, cur_ptr, cur_sz ); + svc_cnt_eq_active_conn( state->svc_timers, quic ); /* 0UL means no progress, so fail */ if( FD_UNLIKELY( ( rc == FD_QUIC_PARSE_FAIL ) | @@ -2491,6 +2508,7 @@ fd_quic_process_packet_impl( fd_quic_t * quic, /* short header packet only one_rtt packets currently have short headers */ fd_quic_process_quic_packet_v1( quic, &pkt, cur_ptr, cur_sz ); + svc_cnt_eq_active_conn( state->svc_timers, quic ); } void @@ -2875,7 +2893,7 @@ fd_quic_svc_poll( fd_quic_t * quic, max_idle_timeout value advertised by both endpoints." */ FD_DEBUG( FD_LOG_WARNING(("%s conn %p conn_idx: %u closing due to idle timeout (%g ms)", conn->server?"SERVER":"CLIENT", - (void *)conn, conn->conn_idx, (double)fd_quic_ticks_to_us(conn->idle_timeout_ticks) / 1e3 )); ) + (void *)conn, conn->conn_idx, (double)fd_quic_ticks_to_us( quic, conn->idle_timeout_ticks ) / 1e3 )); ) fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_DEAD ); quic->metrics.conn_timeout_cnt++; @@ -2909,7 +2927,7 @@ fd_quic_svc_poll( fd_quic_t * quic, break; default: /* prep idle timeout or keep alive at idle timeout/2 */ - fd_quic_svc_prep_schedule( conn, state->now + (conn->idle_timeout_ticks>>(quic->config.keep_alive)) ); + fd_quic_svc_prep_schedule( conn, conn->last_activity + (conn->idle_timeout_ticks>>(quic->config.keep_alive)) ); fd_quic_svc_schedule( state->svc_timers, conn ); break; } @@ -2927,6 +2945,7 @@ fd_quic_service( fd_quic_t * quic ) { long now_ticks = fd_tickcount(); fd_quic_svc_timers_t * timers = state->svc_timers; + svc_cnt_eq_active_conn( timers, quic ); fd_quic_svc_event_t next = fd_quic_svc_timers_next( timers, now, 1 /* pop */); if( FD_UNLIKELY( next.conn == NULL ) ) { return 0; @@ -3513,6 +3532,8 @@ fd_quic_gen_frames( fd_quic_conn_t * conn, } } + fd_quic_svc_prep_schedule( conn, pkt_meta_tmpl->expiry ); + return payload_ptr; } @@ -3576,7 +3597,8 @@ fd_quic_conn_tx( fd_quic_t * quic, ulong now = fd_quic_get_state( quic )->now; /* initialize expiry and tx_time */ - fd_quic_pkt_meta_t pkt_meta_tmpl[1] = {{.expiry = now+500000000UL, .tx_time = now}}; + ulong const retx_timeout = fd_quic_us_to_ticks( quic, 500000UL ); /* 500 ms */ + fd_quic_pkt_meta_t pkt_meta_tmpl[1] = {{.expiry = now+retx_timeout, .tx_time = now}}; // pkt_meta_tmpl->expiry = fd_quic_calc_expiry( conn, now ); //ulong margin = (ulong)(conn->rtt->smoothed_rtt) + (ulong)(3 * conn->rtt->var_rtt); //if( margin < pkt_meta->expiry ) { @@ -3855,7 +3877,6 @@ fd_quic_conn_tx( fd_quic_t * quic, void fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ) { (void)now; - conn->svc_meta.next_timeout = ULONG_MAX; /* Send new rtt measurement probe? */ if( FD_UNLIKELY(now > conn->last_ack + (ulong)conn->rtt_period_ticks) ) { @@ -3947,6 +3968,7 @@ fd_quic_conn_service( fd_quic_t * quic, fd_quic_conn_t * conn, ulong now ) { case FD_QUIC_CONN_STATE_INVALID: /* fall thru */ default: + FD_LOG_CRIT(( "invalid conn state %u", conn->state )); return; } @@ -3972,12 +3994,6 @@ fd_quic_conn_free( fd_quic_t * quic, fd_quic_state_t * state = fd_quic_get_state( quic ); - /* no need to remove this connection from the events queue - free is called from two places: - fini - service will never be called again. All events are destroyed - service - removes event before calling free. Event only allowed to be - enqueued once */ - /* remove all stream ids from map, and free stream */ /* remove used streams */ @@ -4034,12 +4050,12 @@ fd_quic_conn_free( fd_quic_t * quic, } conn->tls_hs = NULL; + /* remove from service queue */ fd_quic_svc_cancel( state->svc_timers, conn ); /* put connection back in free list */ conn->free_conn_next = state->free_conn_list; state->free_conn_list = conn->conn_idx; - fd_quic_set_conn_state( conn, FD_QUIC_CONN_STATE_INVALID ); quic->metrics.conn_active_cnt--; @@ -4066,7 +4082,6 @@ fd_quic_connect( fd_quic_t * quic, } } - fd_rng_t * rng = state->_rng; /* create conn ids for us and them diff --git a/src/waltz/quic/fd_quic_svc_q.c b/src/waltz/quic/fd_quic_svc_q.c index 3679bd6284..e4b70ce1f9 100644 --- a/src/waltz/quic/fd_quic_svc_q.c +++ b/src/waltz/quic/fd_quic_svc_q.c @@ -19,10 +19,7 @@ typedef fd_quic_svc_event_t fd_quic_svc_queue_prq_t; ulong fd_quic_svc_timers_footprint( ulong max_conn ) { - ulong offset = 0UL; - offset = fd_ulong_align_up( offset, fd_quic_svc_queue_prq_align() ); - offset += fd_quic_svc_queue_prq_footprint( max_conn ); - return offset; + return fd_quic_svc_queue_prq_footprint( max_conn ); } ulong @@ -78,9 +75,19 @@ fd_quic_svc_cancel( fd_quic_svc_timers_t * timers, void fd_quic_svc_schedule( fd_quic_svc_timers_t * timers, fd_quic_conn_t * conn ) { + + /* if conn null or invalid, do not schedule */ + if( FD_UNLIKELY( !conn || conn->state == FD_QUIC_CONN_STATE_INVALID ) ) { + /* cleaner/safer to check in here for now. If function call overhead + becomes a constraint, move check to caller */ + return; + } + ulong idx = conn->svc_meta.idx; ulong expiry = conn->svc_meta.next_timeout; + conn->svc_meta.next_timeout = ULONG_MAX; /* reset next_timeout */ + if( FD_UNLIKELY( idx != FD_QUIC_SVC_IDX_INVAL ) ) { /* find current expiry */ fd_quic_svc_event_t * event = timers + idx; @@ -95,6 +102,7 @@ fd_quic_svc_schedule( fd_quic_svc_timers_t * timers, conn->svc_meta.idx = FD_QUIC_SVC_IDX_INVAL; } } + /* TODO: potential perf improvement combining remove and insert(?) */ /* insert new element */ fd_quic_svc_event_t e = { @@ -161,3 +169,8 @@ fd_quic_svc_get_event( fd_quic_svc_timers_t * timers, if( idx == FD_QUIC_SVC_IDX_INVAL ) return NULL; return timers + idx; } + +ulong +fd_quic_svc_cnt_events( fd_quic_svc_timers_t * timers ) { + return fd_quic_svc_queue_prq_cnt( timers ); +} diff --git a/src/waltz/quic/fd_quic_svc_q.h b/src/waltz/quic/fd_quic_svc_q.h index da18177702..099c5abd37 100644 --- a/src/waltz/quic/fd_quic_svc_q.h +++ b/src/waltz/quic/fd_quic_svc_q.h @@ -56,7 +56,8 @@ fd_quic_svc_timers_init_conn( fd_quic_conn_t * conn ); /* fd_quic_svc_schedule schedules a connection timer. Uses conn->svc_meta.next_timeout as the expiry time. - If already scheduled, keeps the earlier time. */ + If already scheduled, keeps the earlier time. Resets + next_timeout to ULONG_MAX. */ void fd_quic_svc_schedule( fd_quic_svc_timers_t * timers, fd_quic_conn_t * conn ); @@ -90,4 +91,10 @@ fd_quic_svc_event_t* fd_quic_svc_get_event( fd_quic_svc_timers_t * timers, fd_quic_conn_t * conn ); + +/* fd_quic_svc_cnt_events returns the number of conns with active timers + Primarily used for testing/validation. */ +ulong +fd_quic_svc_cnt_events( fd_quic_svc_timers_t * timers ); + #endif /* HEADER_fd_src_waltz_quic_fd_quic_svc_q_h */ diff --git a/src/waltz/quic/tests/fd_quic_sandbox.c b/src/waltz/quic/tests/fd_quic_sandbox.c index d499678df6..b74b178c9f 100644 --- a/src/waltz/quic/tests/fd_quic_sandbox.c +++ b/src/waltz/quic/tests/fd_quic_sandbox.c @@ -315,6 +315,9 @@ fd_quic_sandbox_new_conn_established( fd_quic_sandbox_t * sandbox, conn->srx->rx_max_data_ackd = 0UL; conn->tx_initial_max_stream_data_uni = 0UL; + fd_quic_state_t * state = fd_quic_get_state( quic ); + fd_quic_svc_schedule( state->svc_timers, conn ); + /* TODO set a realistic packet number */ return conn; diff --git a/src/waltz/quic/tests/test_quic_conformance.c b/src/waltz/quic/tests/test_quic_conformance.c index c34eb9dbcd..99a5894ad8 100644 --- a/src/waltz/quic/tests/test_quic_conformance.c +++ b/src/waltz/quic/tests/test_quic_conformance.c @@ -120,7 +120,7 @@ test_quic_ping_frame( fd_quic_sandbox_t * sandbox, fd_quic_sandbox_init( sandbox, FD_QUIC_ROLE_SERVER ); fd_quic_conn_t * conn = fd_quic_sandbox_new_conn_established( sandbox, rng ); conn->ack_gen->is_elicited = 0; - FD_TEST( conn->svc_meta.idx == FD_QUIC_SVC_IDX_INVAL ); + FD_TEST( conn->svc_meta.idx != FD_QUIC_SVC_IDX_INVAL ); uchar buf[1] = {0x01}; fd_quic_sandbox_send_lone_frame( sandbox, conn, buf, sizeof(buf) ); diff --git a/src/waltz/quic/tests/test_quic_keep_alive.c b/src/waltz/quic/tests/test_quic_keep_alive.c index 00aed2b031..e769eb4f3a 100644 --- a/src/waltz/quic/tests/test_quic_keep_alive.c +++ b/src/waltz/quic/tests/test_quic_keep_alive.c @@ -129,6 +129,9 @@ main( int argc, char ** argv ) { server_quic->config.idle_timeout = 1e7; client_quic->config.idle_timeout = 1e9; + server_quic->config.ack_delay = 1e6; + client_quic->config.ack_delay = 1e6; + fd_quic_virtual_pair_t vp; fd_quic_virtual_pair_init( &vp, server_quic, client_quic ); diff --git a/src/waltz/quic/tests/test_quic_svc_q.c b/src/waltz/quic/tests/test_quic_svc_q.c index f810e5f483..08d5f070c8 100644 --- a/src/waltz/quic/tests/test_quic_svc_q.c +++ b/src/waltz/quic/tests/test_quic_svc_q.c @@ -105,6 +105,7 @@ test_multiple_connections( fd_quic_svc_timers_t * timers, fd_quic_conn_t * conns[conn_cnt]; /* array of conn ptrs */ for( uint i=0; istate = FD_QUIC_CONN_STATE_ACTIVE; } ulong now = 1000UL; @@ -192,6 +193,7 @@ main( int argc, char ** argv ) { { fd_quic_conn_t * conn = (fd_quic_conn_t *)create_mock_conns( &limits , 1); + conn->state = FD_QUIC_CONN_STATE_ACTIVE; test_svc_schedule( timers, conn ); test_svc_cancel( timers, conn ); free( conn );