Skip to content

Commit 5b534f4

Browse files
committed
fix: (potential) metadata cache invalidation race
Under certain conditions it MAY happen that metadata cache invalidation races, e.g.: | Thread 1 | Thread 2 | | ------------ | ------------ | | get metadata | | | | get metadata | | invalidate | | | get metadata | | | | invalidate | Here the 2nd invalidation is unnecessary. It's not a correctness issue, but we throw away cached data that is perfectly fine. To work around this, we now track a "generation" for data that we get from the metadata cache and check this during invalidation requests. Ref #62. (not closing, need to do the same for the broker connection cache)
1 parent 6519cd0 commit 5b534f4

File tree

5 files changed

+138
-44
lines changed

5 files changed

+138
-44
lines changed

src/client/controller.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ impl ControllerClient {
9999
/// Retrieve the broker ID of the controller
100100
async fn get_controller_id(&self) -> Result<i32> {
101101
// Request an uncached, fresh copy of the metadata.
102-
let metadata = self
102+
let (metadata, _gen) = self
103103
.brokers
104104
.request_metadata(MetadataLookupMode::ArbitraryBroker, Some(vec![]))
105105
.await?;

src/client/metadata_cache.rs

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,29 @@
1+
use std::ops::Deref;
2+
13
use parking_lot::Mutex;
24
use tracing::{debug, info};
35

46
use crate::protocol::messages::MetadataResponse;
57

8+
/// Cache generation for [`MetadataCache`].
9+
///
10+
/// This is used to avoid double-invalidating a cache.
11+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12+
pub struct MetadataCacheGeneration(usize);
13+
614
/// A [`MetadataCache`] provides look-aside caching of [`MetadataResponse`]
715
/// instances.
8-
#[derive(Debug, Default)]
16+
#[derive(Debug)]
917
pub(crate) struct MetadataCache {
10-
cache: Mutex<Option<MetadataResponse>>,
18+
cache: Mutex<(Option<MetadataResponse>, MetadataCacheGeneration)>,
19+
}
20+
21+
impl Default for MetadataCache {
22+
fn default() -> Self {
23+
Self {
24+
cache: Mutex::new((None, MetadataCacheGeneration(0))),
25+
}
26+
}
1127
}
1228

1329
impl MetadataCache {
@@ -16,8 +32,16 @@ impl MetadataCache {
1632
/// If `topics` is `Some` the returned metadata contains topics that are
1733
/// filtered to match by name. If a topic name is specified that doesn't
1834
/// exist in the cached metadata, the cache is invalidated.
19-
pub(crate) fn get(&self, topics: &Option<Vec<String>>) -> Option<MetadataResponse> {
20-
let mut m = self.cache.lock().clone()?;
35+
pub(crate) fn get(
36+
&self,
37+
topics: &Option<Vec<String>>,
38+
) -> Option<(MetadataResponse, MetadataCacheGeneration)> {
39+
let (mut m, gen) = match self.cache.lock().deref() {
40+
(Some(m), gen) => (m.clone(), *gen),
41+
(None, _) => {
42+
return None;
43+
}
44+
};
2145

2246
// If the caller requested a subset of topics, filter the cached result
2347
// to ensure only the expected topics are present.
@@ -39,23 +63,37 @@ impl MetadataCache {
3963
// if a caller keeps requesting metadata for a non-existent
4064
// topic.
4165
debug!("cached metadata query for unknown topic");
42-
self.invalidate("get from metadata cache: unknown topic");
66+
self.invalidate("get from metadata cache: unknown topic", gen);
4367
return None;
4468
}
4569
}
4670

4771
debug!(?m, "using cached metadata response");
4872

49-
Some(m)
73+
Some((m, gen))
5074
}
5175

52-
pub(crate) fn invalidate(&self, reason: &'static str) {
53-
*self.cache.lock() = None;
76+
pub(crate) fn invalidate(&self, reason: &'static str, gen: MetadataCacheGeneration) {
77+
let mut guard = self.cache.lock();
78+
if guard.1 != gen {
79+
// stale request
80+
debug!(
81+
reason,
82+
current_gen = guard.1 .0,
83+
request_gen = gen.0,
84+
"stale invalidation request for metadata cache",
85+
);
86+
return;
87+
}
88+
89+
guard.0 = None;
5490
info!(reason, "invalidated metadata cache",);
5591
}
5692

5793
pub(crate) fn update(&self, m: MetadataResponse) {
58-
*self.cache.lock() = Some(m);
94+
let mut guard = self.cache.lock();
95+
guard.0 = Some(m);
96+
guard.1 .0 += 1;
5997
debug!("updated metadata cache");
6098
}
6199
}
@@ -99,7 +137,7 @@ mod tests {
99137
let m = response_with_topics(None);
100138
cache.update(m.clone());
101139

102-
let got = cache.get(&None).expect("should have cached entry");
140+
let (got, _gen) = cache.get(&None).expect("should have cached entry");
103141
assert_eq!(m, got);
104142
}
105143

@@ -109,16 +147,16 @@ mod tests {
109147
cache.update(response_with_topics(Some(&["bananas", "platanos"])));
110148

111149
// Request a subset of the topics
112-
let got = cache
150+
let (got, _gen) = cache
113151
.get(&Some(vec!["bananas".to_string()]))
114152
.expect("should have cached entry");
115153
assert_eq!(response_with_topics(Some(&["bananas"])), got);
116154

117-
let got = cache.get(&Some(vec![])).expect("should have cached entry");
155+
let (got, _gen) = cache.get(&Some(vec![])).expect("should have cached entry");
118156
assert_eq!(response_with_topics(Some(&[])), got);
119157

120158
// A request for "None" actually means "all of them".
121-
let got = cache.get(&None).expect("should have cached entry");
159+
let (got, _gen) = cache.get(&None).expect("should have cached entry");
122160
assert_eq!(response_with_topics(Some(&["bananas", "platanos"])), got);
123161
}
124162

@@ -147,8 +185,26 @@ mod tests {
147185
topics: Default::default(),
148186
});
149187

188+
let (_data, gen1) = cache.get(&None).unwrap();
189+
cache.invalidate("test", gen1);
190+
assert!(cache.get(&None).is_none());
191+
192+
cache.update(MetadataResponse {
193+
throttle_time_ms: Default::default(),
194+
brokers: Default::default(),
195+
cluster_id: Default::default(),
196+
controller_id: Default::default(),
197+
topics: Default::default(),
198+
});
199+
200+
let (_data, gen2) = cache.get(&None).unwrap();
201+
202+
// outdated gen
203+
cache.invalidate("test", gen1);
150204
assert!(cache.get(&None).is_some());
151-
cache.invalidate("test");
205+
206+
// the actual gen
207+
cache.invalidate("test", gen2);
152208
assert!(cache.get(&None).is_none());
153209
}
154210
}

src/client/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ impl Client {
152152
//
153153
// Because this is an unconstrained metadata request (all topics) it
154154
// will update the cached metadata entry.
155-
let response = self
155+
let (response, _gen) = self
156156
.brokers
157157
.request_metadata(MetadataLookupMode::ArbitraryBroker, None)
158158
.await?;

src/client/partition.rs

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use std::{
3131
use tokio::sync::Mutex;
3232
use tracing::{error, info};
3333

34-
use super::error::ServerErrorResponse;
34+
use super::{error::ServerErrorResponse, metadata_cache::MetadataCacheGeneration};
3535

3636
/// How strongly a [`PartitionClient`] is bound to a partition.
3737
///
@@ -99,6 +99,13 @@ pub enum OffsetAt {
9999
Latest,
100100
}
101101

102+
#[derive(Debug, Default)]
103+
struct CurrentBroker {
104+
broker: Option<BrokerConnection>,
105+
gen_leader_from_arbitrary: Option<MetadataCacheGeneration>,
106+
gen_leader_from_self: Option<MetadataCacheGeneration>,
107+
}
108+
102109
/// Many operations must be performed on the leader for a partition
103110
///
104111
/// Additionally a partition is the unit of concurrency within Kafka
@@ -117,7 +124,7 @@ pub struct PartitionClient {
117124
backoff_config: BackoffConfig,
118125

119126
/// Current broker connection if any
120-
current_broker: Mutex<Option<BrokerConnection>>,
127+
current_broker: Mutex<CurrentBroker>,
121128

122129
unknown_topic_handling: UnknownTopicHandling,
123130
}
@@ -140,7 +147,7 @@ impl PartitionClient {
140147
partition,
141148
brokers: Arc::clone(&brokers),
142149
backoff_config: Default::default(),
143-
current_broker: Mutex::new(None),
150+
current_broker: Mutex::new(CurrentBroker::default()),
144151
unknown_topic_handling,
145152
};
146153

@@ -312,8 +319,11 @@ impl PartitionClient {
312319
}
313320

314321
/// Retrieve the broker ID of the partition leader
315-
async fn get_leader(&self, metadata_mode: MetadataLookupMode) -> Result<i32> {
316-
let metadata = self
322+
async fn get_leader(
323+
&self,
324+
metadata_mode: MetadataLookupMode,
325+
) -> Result<(i32, Option<MetadataCacheGeneration>)> {
326+
let (metadata, gen) = self
317327
.brokers
318328
.request_metadata(metadata_mode, Some(vec![self.topic.clone()]))
319329
.await?;
@@ -379,7 +389,7 @@ impl PartitionClient {
379389
leader=partition.leader_id.0,
380390
"Detected leader",
381391
);
382-
Ok(partition.leader_id.0)
392+
Ok((partition.leader_id.0, gen))
383393
}
384394
}
385395

@@ -391,7 +401,7 @@ impl BrokerCache for &PartitionClient {
391401

392402
async fn get(&self) -> Result<Arc<Self::R>> {
393403
let mut current_broker = self.current_broker.lock().await;
394-
if let Some(broker) = &*current_broker {
404+
if let Some(broker) = &current_broker.broker {
395405
return Ok(Arc::clone(broker));
396406
}
397407

@@ -410,22 +420,29 @@ impl BrokerCache for &PartitionClient {
410420
// * A subsequent query is performed against the leader that does not use the cached entry - this validates
411421
// the correctness of the cached entry.
412422
//
413-
let leader = self.get_leader(MetadataLookupMode::CachedArbitrary).await?;
423+
let (leader, gen_leader_from_arbitrary) =
424+
self.get_leader(MetadataLookupMode::CachedArbitrary).await?;
414425
let broker = match self.brokers.connect(leader).await {
415426
Ok(Some(c)) => Ok(c),
416427
Ok(None) => {
417-
self.brokers.invalidate_metadata_cache(
418-
"partition client: broker that is leader is unknown",
419-
);
428+
if let Some(gen) = gen_leader_from_arbitrary {
429+
self.brokers.invalidate_metadata_cache(
430+
"partition client: broker that is leader is unknown",
431+
gen,
432+
);
433+
}
420434
Err(Error::InvalidResponse(format!(
421435
"Partition leader {} not found in metadata response",
422436
leader
423437
)))
424438
}
425439
Err(e) => {
426-
self.brokers.invalidate_metadata_cache(
427-
"partition client: error connecting to partition leader",
428-
);
440+
if let Some(gen) = gen_leader_from_arbitrary {
441+
self.brokers.invalidate_metadata_cache(
442+
"partition client: error connecting to partition leader",
443+
gen,
444+
);
445+
}
429446
Err(e.into())
430447
}
431448
}?;
@@ -441,14 +458,17 @@ impl BrokerCache for &PartitionClient {
441458
//
442459
// Because this check requires the most up-to-date metadata from a specific broker, do not accept a cached
443460
// metadata response.
444-
let leader_self = self
461+
let (leader_self, gen_leader_from_self) = self
445462
.get_leader(MetadataLookupMode::SpecificBroker(Arc::clone(&broker)))
446463
.await?;
447464
if leader != leader_self {
448-
// The cached metadata identified an incorrect leader - it is stale and should be refreshed.
449-
self.brokers.invalidate_metadata_cache(
450-
"partition client: broker that should be leader does treat itself as a leader",
451-
);
465+
if let Some(gen) = gen_leader_from_self {
466+
// The cached metadata identified an incorrect leader - it is stale and should be refreshed.
467+
self.brokers.invalidate_metadata_cache(
468+
"partition client: broker that should be leader does treat itself as a leader",
469+
gen,
470+
);
471+
}
452472

453473
// this might happen if the leader changed after we got the hint from a arbitrary broker and this specific
454474
// metadata call.
@@ -464,7 +484,11 @@ impl BrokerCache for &PartitionClient {
464484
});
465485
}
466486

467-
*current_broker = Some(Arc::clone(&broker));
487+
*current_broker = CurrentBroker {
488+
broker: Some(Arc::clone(&broker)),
489+
gen_leader_from_arbitrary,
490+
gen_leader_from_self,
491+
};
468492

469493
info!(
470494
topic=%self.topic,
@@ -482,8 +506,17 @@ impl BrokerCache for &PartitionClient {
482506
reason,
483507
"Invaliding cached leader",
484508
);
485-
self.brokers.invalidate_metadata_cache(reason);
486-
*self.current_broker.lock().await = None
509+
510+
let mut current_broker = self.current_broker.lock().await;
511+
512+
if let Some(gen) = current_broker.gen_leader_from_arbitrary {
513+
self.brokers.invalidate_metadata_cache(reason, gen);
514+
}
515+
if let Some(gen) = current_broker.gen_leader_from_self {
516+
self.brokers.invalidate_metadata_cache(reason, gen);
517+
}
518+
519+
current_broker.broker = None
487520
}
488521
}
489522

src/connection.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use tokio::{io::BufStream, sync::Mutex};
77
use tracing::{error, info, warn};
88

99
use crate::backoff::ErrorOrThrottle;
10+
use crate::client::metadata_cache::MetadataCacheGeneration;
1011
use crate::connection::topology::{Broker, BrokerTopology};
1112
use crate::connection::transport::Transport;
1213
use crate::messenger::{Messenger, RequestError};
@@ -224,7 +225,7 @@ impl BrokerConnector {
224225
&self,
225226
metadata_mode: MetadataLookupMode,
226227
topics: Option<Vec<String>>,
227-
) -> Result<MetadataResponse> {
228+
) -> Result<(MetadataResponse, Option<MetadataCacheGeneration>)> {
228229
// Return a cached metadata response as an optimisation to prevent
229230
// multiple successive metadata queries for the same topic across
230231
// multiple PartitionClient instances.
@@ -235,8 +236,8 @@ impl BrokerConnector {
235236
// Client initialises this cache at construction time, so unless
236237
// invalidated, there will always be a cached entry available.
237238
if matches!(metadata_mode, MetadataLookupMode::CachedArbitrary) {
238-
if let Some(m) = self.cached_metadata.get(&topics) {
239-
return Ok(m);
239+
if let Some((m, gen)) = self.cached_metadata.get(&topics) {
240+
return Ok((m, Some(gen)));
240241
}
241242
}
242243

@@ -261,11 +262,15 @@ impl BrokerConnector {
261262
// Since the metadata request contains information about the cluster state, use it to update our view.
262263
self.topology.update(&response.brokers);
263264

264-
Ok(response)
265+
Ok((response, None))
265266
}
266267

267-
pub(crate) fn invalidate_metadata_cache(&self, reason: &'static str) {
268-
self.cached_metadata.invalidate(reason)
268+
pub(crate) fn invalidate_metadata_cache(
269+
&self,
270+
reason: &'static str,
271+
gen: MetadataCacheGeneration,
272+
) {
273+
self.cached_metadata.invalidate(reason, gen)
269274
}
270275

271276
/// Returns a new connection to the broker with the provided id

0 commit comments

Comments
 (0)