-
Notifications
You must be signed in to change notification settings - Fork 2.8k
[receiver/kafka] backoff in case of next consumer error #37009
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
78a77fe
ca00ad8
704560b
6009470
241c3b4
4ade9fa
47a699c
14c5863
2c0d768
4019f7f
4411450
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,9 +9,12 @@ import ( | |
"fmt" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
"github.com/IBM/sarama" | ||
"github.com/cenkalti/backoff/v4" | ||
"go.opentelemetry.io/collector/component" | ||
"go.opentelemetry.io/collector/config/configretry" | ||
"go.opentelemetry.io/collector/consumer" | ||
"go.opentelemetry.io/collector/pdata/plog" | ||
"go.opentelemetry.io/collector/pdata/pmetric" | ||
|
@@ -35,6 +38,8 @@ const ( | |
|
||
var errInvalidInitialOffset = errors.New("invalid initial offset") | ||
|
||
var errMemoryLimiterDataRefused = errors.New("data refused due to high memory usage") | ||
|
||
// kafkaTracesConsumer uses sarama to consume and handle messages from kafka. | ||
type kafkaTracesConsumer struct { | ||
config Config | ||
|
@@ -205,6 +210,7 @@ func (c *kafkaTracesConsumer) Start(_ context.Context, host component.Host) erro | |
messageMarking: c.messageMarking, | ||
headerExtractor: &nopHeaderExtractor{}, | ||
telemetryBuilder: c.telemetryBuilder, | ||
backOff: newExponentialBackOff(c.config.ErrorBackOff), | ||
} | ||
if c.headerExtraction { | ||
consumerGroup.headerExtractor = &headerExtractor{ | ||
|
@@ -313,6 +319,7 @@ func (c *kafkaMetricsConsumer) Start(_ context.Context, host component.Host) err | |
messageMarking: c.messageMarking, | ||
headerExtractor: &nopHeaderExtractor{}, | ||
telemetryBuilder: c.telemetryBuilder, | ||
backOff: newExponentialBackOff(c.config.ErrorBackOff), | ||
} | ||
if c.headerExtraction { | ||
metricsConsumerGroup.headerExtractor = &headerExtractor{ | ||
|
@@ -424,6 +431,7 @@ func (c *kafkaLogsConsumer) Start(_ context.Context, host component.Host) error | |
messageMarking: c.messageMarking, | ||
headerExtractor: &nopHeaderExtractor{}, | ||
telemetryBuilder: c.telemetryBuilder, | ||
backOff: newExponentialBackOff(c.config.ErrorBackOff), | ||
} | ||
if c.headerExtraction { | ||
logsConsumerGroup.headerExtractor = &headerExtractor{ | ||
|
@@ -481,6 +489,7 @@ type tracesConsumerGroupHandler struct { | |
autocommitEnabled bool | ||
messageMarking MessageMarking | ||
headerExtractor HeaderExtractor | ||
backOff *backoff.ExponentialBackOff | ||
} | ||
|
||
type metricsConsumerGroupHandler struct { | ||
|
@@ -498,6 +507,7 @@ type metricsConsumerGroupHandler struct { | |
autocommitEnabled bool | ||
messageMarking MessageMarking | ||
headerExtractor HeaderExtractor | ||
backOff *backoff.ExponentialBackOff | ||
} | ||
|
||
type logsConsumerGroupHandler struct { | ||
|
@@ -515,6 +525,7 @@ type logsConsumerGroupHandler struct { | |
autocommitEnabled bool | ||
messageMarking MessageMarking | ||
headerExtractor HeaderExtractor | ||
backOff *backoff.ExponentialBackOff | ||
} | ||
|
||
var ( | ||
|
@@ -582,8 +593,22 @@ func (c *tracesConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSe | |
if c.messageMarking.After && c.messageMarking.OnError { | ||
session.MarkMessage(message, "") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should messages be marked if you're going to retry them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently if the next consumer returns an error, the kafka receiver simply drops the message and returns the error. At the beginning I intended to keep this behavior and only implement backoff delays without retrying the message. But then I went through the code again and I think we could implement the retry without introducing too much complexity. I've updated the code and added some comments to explain. Let me know if that makes sense. I've updated the unit test but I wonder how I could test this change in a integration or e2e test, in particular the retry logic if the offset is correctly reset and the failed message is consumed again in the next loop. Do you have any suggestion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @atoulme , could you please take another look at the changes that I added for the retry logic ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @atoulme could you please check out the changes and my comments? I'd like to move this PR forward There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've tested the change by running the collector locally with the following config:
I also ran a kafka broker locally and use
By adding some temporary logging, I can validate that when memory limit processor is returning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also added an entry to the change log. |
||
} | ||
if errorRequiresBackoff(err) && c.backOff != nil { | ||
backOffDelay := c.backOff.NextBackOff() | ||
if backOffDelay == backoff.Stop { | ||
return err | ||
} | ||
select { | ||
case <-session.Context().Done(): | ||
return nil | ||
case <-time.After(backOffDelay): | ||
} | ||
} | ||
return err | ||
} | ||
if c.backOff != nil { | ||
c.backOff.Reset() | ||
} | ||
if c.messageMarking.After { | ||
session.MarkMessage(message, "") | ||
} | ||
|
@@ -600,6 +625,10 @@ func (c *tracesConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSe | |
} | ||
} | ||
|
||
func errorRequiresBackoff(err error) bool { | ||
return errors.Is(err, errMemoryLimiterDataRefused) | ||
} | ||
|
||
func (c *metricsConsumerGroupHandler) Setup(session sarama.ConsumerGroupSession) error { | ||
c.readyCloser.Do(func() { | ||
close(c.ready) | ||
|
@@ -659,8 +688,22 @@ func (c *metricsConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupS | |
if c.messageMarking.After && c.messageMarking.OnError { | ||
session.MarkMessage(message, "") | ||
} | ||
if errorRequiresBackoff(err) && c.backOff != nil { | ||
backOffDelay := c.backOff.NextBackOff() | ||
if backOffDelay == backoff.Stop { | ||
return err | ||
} | ||
select { | ||
case <-session.Context().Done(): | ||
return nil | ||
case <-time.After(backOffDelay): | ||
} | ||
} | ||
return err | ||
} | ||
if c.backOff != nil { | ||
c.backOff.Reset() | ||
} | ||
if c.messageMarking.After { | ||
session.MarkMessage(message, "") | ||
} | ||
|
@@ -735,8 +778,22 @@ func (c *logsConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSess | |
if c.messageMarking.After && c.messageMarking.OnError { | ||
session.MarkMessage(message, "") | ||
} | ||
if errorRequiresBackoff(err) && c.backOff != nil { | ||
backOffDelay := c.backOff.NextBackOff() | ||
if backOffDelay == backoff.Stop { | ||
return err | ||
} | ||
select { | ||
case <-session.Context().Done(): | ||
return nil | ||
case <-time.After(backOffDelay): | ||
} | ||
} | ||
return err | ||
} | ||
if c.backOff != nil { | ||
c.backOff.Reset() | ||
} | ||
if c.messageMarking.After { | ||
session.MarkMessage(message, "") | ||
} | ||
|
@@ -753,6 +810,20 @@ func (c *logsConsumerGroupHandler) ConsumeClaim(session sarama.ConsumerGroupSess | |
} | ||
} | ||
|
||
func newExponentialBackOff(config configretry.BackOffConfig) *backoff.ExponentialBackOff { | ||
if !config.Enabled { | ||
return nil | ||
} | ||
backOff := backoff.NewExponentialBackOff() | ||
backOff.InitialInterval = config.InitialInterval | ||
backOff.RandomizationFactor = config.RandomizationFactor | ||
backOff.Multiplier = config.Multiplier | ||
backOff.MaxInterval = config.MaxInterval | ||
backOff.MaxElapsedTime = config.MaxElapsedTime | ||
backOff.Reset() | ||
return backOff | ||
} | ||
|
||
func toSaramaInitialOffset(initialOffset string) (int64, error) { | ||
switch initialOffset { | ||
case offsetEarliest: | ||
|
Uh oh!
There was an error while loading. Please reload this page.