Skip to content

Commit 860f0ae

Browse files
authored
Add an interval to retry connection to any random broker after interval has passed. (#5039)
Remove the override to schedule a connection when there's no existing one as it causes too frequent connection retries in case cluster isn't reachable. Remove scheduled connections count as the `sparse_connect_random` interval is again effective in any case.
1 parent 8bf0a67 commit 860f0ae

File tree

3 files changed

+27
-40
lines changed

3 files changed

+27
-40
lines changed

src/rdkafka.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2375,7 +2375,6 @@ rd_kafka_t *rd_kafka_new(rd_kafka_type_t type,
23752375
rd_atomic32_init(&rk->rk_logical_broker_cnt, 0);
23762376
rd_atomic32_init(&rk->rk_broker_up_cnt, 0);
23772377
rd_atomic32_init(&rk->rk_broker_down_cnt, 0);
2378-
rd_atomic32_init(&rk->rk_scheduled_connections_cnt, 0);
23792378

23802379
rk->rk_rep = rd_kafka_q_new(rk);
23812380
rk->rk_ops = rd_kafka_q_new(rk);

src/rdkafka_broker.c

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3485,7 +3485,6 @@ rd_kafka_broker_op_serve(rd_kafka_broker_t *rkb, rd_kafka_op_t *rko) {
34853485
break;
34863486

34873487
case RD_KAFKA_OP_CONNECT:
3488-
rd_atomic32_sub(&rkb->rkb_rk->rk_scheduled_connections_cnt, 1);
34893488
/* Sparse connections: connection requested, transition
34903489
* to TRY_CONNECT state to trigger new connection. */
34913490
if (rkb->rkb_state == RD_KAFKA_BROKER_STATE_INIT) {
@@ -5671,14 +5670,17 @@ static int rd_kafka_broker_filter_never_connected(rd_kafka_broker_t *rkb,
56715670
return rd_atomic32_get(&rkb->rkb_c.connects);
56725671
}
56735672

5674-
/**
5675-
* @brief Filter out brokers that aren't connecting.
5676-
*/
5677-
static int rd_kafka_broker_filter_not_connecting(rd_kafka_broker_t *rkb,
5678-
void *opaque) {
5679-
return rkb->rkb_state <= RD_KAFKA_BROKER_STATE_DOWN;
5680-
}
5673+
static void rd_kafka_connect_any_timer_cb(rd_kafka_timers_t *rkts, void *arg) {
5674+
const char *reason = (const char *)arg;
5675+
rd_kafka_t *rk = rkts->rkts_rk;
5676+
if (rd_kafka_terminating(rk))
5677+
return;
56815678

5679+
/* Acquire the read lock for `rd_kafka_connect_any` */
5680+
rd_kafka_rdlock(rk);
5681+
rd_kafka_connect_any(rk, reason);
5682+
rd_kafka_rdunlock(rk);
5683+
}
56825684

56835685
/**
56845686
* @brief Sparse connections:
@@ -5693,8 +5695,6 @@ static int rd_kafka_broker_filter_not_connecting(rd_kafka_broker_t *rkb,
56935695
void rd_kafka_connect_any(rd_kafka_t *rk, const char *reason) {
56945696
rd_kafka_broker_t *rkb;
56955697
rd_ts_t suppr;
5696-
rd_bool_t any_connecting = rd_true;
5697-
int scheduled_connections;
56985698

56995699
/* Don't count connections to logical brokers since they serve
57005700
* a specific purpose (group coordinator) and their connections
@@ -5707,33 +5707,22 @@ void rd_kafka_connect_any(rd_kafka_t *rk, const char *reason) {
57075707
return;
57085708

57095709
mtx_lock(&rk->rk_suppress.sparse_connect_lock);
5710-
rkb = rd_kafka_broker_random(
5711-
rk, -1 /*any state*/, rd_kafka_broker_filter_not_connecting, NULL);
5712-
if (rkb)
5713-
rd_kafka_broker_destroy(
5714-
rkb); /* refcnt from ..broker_random() */
5715-
else
5716-
any_connecting = rd_false;
57175710

5718-
scheduled_connections =
5719-
rd_atomic32_get(&rk->rk_scheduled_connections_cnt);
5720-
5721-
if (!any_connecting && scheduled_connections == 0)
5722-
/* Skip interval */
5723-
rd_interval_reset(&rk->rk_suppress.sparse_connect_random);
57245711
suppr = rd_interval(&rk->rk_suppress.sparse_connect_random,
57255712
rk->rk_conf.sparse_connect_intvl * 1000, 0);
5713+
mtx_unlock(&rk->rk_suppress.sparse_connect_lock);
57265714

57275715
if (suppr <= 0) {
57285716
rd_kafka_dbg(rk, BROKER | RD_KAFKA_DBG_GENERIC, "CONNECT",
57295717
"Not selecting any broker for cluster connection: "
5730-
"still suppressed for %" PRId64
5731-
"ms, "
5732-
"any broker connecting: %s, "
5733-
"scheduled connections %d: %s",
5734-
-suppr / 1000, RD_STR_ToF(any_connecting),
5735-
scheduled_connections, reason);
5736-
goto done;
5718+
"still suppressed for %" PRId64 "ms: %s",
5719+
-suppr / 1000, reason);
5720+
/* Retry after interval + 1ms has passed */
5721+
rd_kafka_timer_start_oneshot(
5722+
&rk->rk_timers, &rk->rk_suppress.sparse_connect_random_tmr,
5723+
rd_false /* don't restart */, 1000LL - suppr,
5724+
rd_kafka_connect_any_timer_cb, (void *)reason);
5725+
return;
57375726
}
57385727

57395728
/* First pass: only match brokers never connected to,
@@ -5755,7 +5744,7 @@ void rd_kafka_connect_any(rd_kafka_t *rk, const char *reason) {
57555744
rd_kafka_dbg(rk, BROKER | RD_KAFKA_DBG_GENERIC, "CONNECT",
57565745
"Cluster connection already in progress: %s",
57575746
reason);
5758-
goto done;
5747+
return;
57595748
}
57605749

57615750
rd_rkb_dbg(rkb, BROKER | RD_KAFKA_DBG_GENERIC, "CONNECT",
@@ -5766,8 +5755,6 @@ void rd_kafka_connect_any(rd_kafka_t *rk, const char *reason) {
57665755
rd_kafka_broker_schedule_connection(rkb);
57675756

57685757
rd_kafka_broker_destroy(rkb); /* refcnt from ..broker_random() */
5769-
done:
5770-
mtx_unlock(&rk->rk_suppress.sparse_connect_lock);
57715758
}
57725759

57735760

@@ -5957,11 +5944,9 @@ void rd_kafka_broker_active_toppar_del(rd_kafka_broker_t *rkb,
59575944
*/
59585945
void rd_kafka_broker_schedule_connection(rd_kafka_broker_t *rkb) {
59595946
rd_kafka_op_t *rko;
5960-
rd_atomic32_add(&rkb->rkb_rk->rk_scheduled_connections_cnt, 1);
59615947
rko = rd_kafka_op_new(RD_KAFKA_OP_CONNECT);
59625948
rd_kafka_op_set_prio(rko, RD_KAFKA_PRIO_FLASH);
5963-
if (!rd_kafka_q_enq(rkb->rkb_ops, rko))
5964-
rd_atomic32_sub(&rkb->rkb_rk->rk_scheduled_connections_cnt, 1);
5949+
rd_kafka_q_enq(rkb->rkb_ops, rko);
59655950
}
59665951

59675952

src/rdkafka_int.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,6 @@ struct rd_kafka_s {
299299
* that have had at least one connection attempt
300300
* and are configured or learned. */
301301
rd_atomic32_t rk_broker_down_cnt;
302-
/* Number of sparse connections requested
303-
* but still not executed. */
304-
rd_atomic32_t rk_scheduled_connections_cnt;
305302

306303
/**< Additional bootstrap servers list.
307304
* contains all brokers added through rd_kafka_brokers_add().
@@ -673,6 +670,12 @@ struct rd_kafka_s {
673670
* Use 10 < reconnect.backoff.jitter.ms / 2 < 1000.
674671
*/
675672
rd_interval_t sparse_connect_random;
673+
674+
/** Sparse connection timer: fires after remaining time of
675+
* `sparse_connect_random` interval + 1ms.
676+
*/
677+
rd_kafka_timer_t sparse_connect_random_tmr;
678+
676679
/**< Lock for sparse_connect_random */
677680
mtx_t sparse_connect_lock;
678681

0 commit comments

Comments
 (0)