Skip to content

Fix MergeSplit issue that ignores the initial message size #12257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .chloggen/fix-traces-split.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: exporterhelper

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix MergeSplit issue that ignores the initial message size.

# One or more tracking issues or pull requests related to the change
issues: [12257]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
73 changes: 18 additions & 55 deletions exporter/exporterhelper/logs_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,73 +14,36 @@ import (
// MergeSplit splits and/or merges the provided logs request and the current request into one or more requests
// conforming with the MaxSizeConfig.
func (req *logsRequest) MergeSplit(_ context.Context, cfg exporterbatcher.MaxSizeConfig, r2 Request) ([]Request, error) {
var req2 *logsRequest
if r2 != nil {
var ok bool
req2, ok = r2.(*logsRequest)
req2, ok := r2.(*logsRequest)
if !ok {
return nil, errors.New("invalid input type")
}
req2.mergeTo(req)
}

// If no limit we can simply merge the new request into the current and return.
if cfg.MaxSizeItems == 0 {
req.setCachedItemsCount(req.ItemsCount() + req2.ItemsCount())
req2.setCachedItemsCount(0)
req2.ld.ResourceLogs().MoveAndAppendTo(req.ld.ResourceLogs())
return []Request{req}, nil
}
return req.split(cfg)
}

var (
res []Request
destReq *logsRequest
capacityLeft = cfg.MaxSizeItems
)
for _, srcReq := range []*logsRequest{req, req2} {
if srcReq == nil {
continue
}

srcCount := srcReq.ItemsCount()
if srcCount <= capacityLeft {
if destReq == nil {
destReq = srcReq
} else {
destReq.setCachedItemsCount(destReq.ItemsCount() + srcCount)
srcReq.setCachedItemsCount(0)
srcReq.ld.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
}
capacityLeft -= srcCount
continue
}

for {
extractedLogs := extractLogs(srcReq.ld, capacityLeft)
extractedCount := extractedLogs.LogRecordCount()
if extractedCount == 0 {
break
}

if destReq == nil {
destReq = &logsRequest{ld: extractedLogs, pusher: srcReq.pusher, cachedItemsCount: extractedCount}
} else {
extractedLogs.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
destReq.setCachedItemsCount(destReq.ItemsCount() + extractedCount)
srcReq.setCachedItemsCount(srcReq.ItemsCount() - extractedCount)
}

// Create new batch once capacity is reached.
capacityLeft -= extractedCount
if capacityLeft == 0 {
res = append(res, destReq)
destReq = nil
capacityLeft = cfg.MaxSizeItems
}
}
}
func (req *logsRequest) mergeTo(dst *logsRequest) {
dst.setCachedItemsCount(dst.ItemsCount() + req.ItemsCount())
req.setCachedItemsCount(0)
req.ld.ResourceLogs().MoveAndAppendTo(dst.ld.ResourceLogs())
}

if destReq != nil {
res = append(res, destReq)
func (req *logsRequest) split(cfg exporterbatcher.MaxSizeConfig) ([]Request, error) {
var res []Request
for req.ItemsCount() > cfg.MaxSizeItems {
ld := extractLogs(req.ld, cfg.MaxSizeItems)
size := ld.LogRecordCount()
req.setCachedItemsCount(req.ItemsCount() - size)
res = append(res, &logsRequest{ld: ld, pusher: req.pusher, cachedItemsCount: size})
}
res = append(res, req)
return res, nil
}

Expand Down
17 changes: 16 additions & 1 deletion exporter/exporterhelper/logs_batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,22 @@ func TestExtractLogs(t *testing.T) {
}
}

func BenchmarkSplittingBasedOnItemCountManySmallLogs(b *testing.B) {
func TestMergeSplitManySmallLogs(t *testing.T) {
// All requests merge into a single batch.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 10000}
merged := []Request{newLogsRequest(testdata.GenerateLogs(1), nil)}
for j := 0; j < 1000; j++ {
lr2 := newLogsRequest(testdata.GenerateLogs(10), nil)
res, _ := merged[len(merged)-1].MergeSplit(context.Background(), cfg, lr2)
merged = append(merged[0:len(merged)-1], res...)
}
assert.Len(t, merged, 2)
}

func BenchmarkSplittingBasedOnItemCountManySmallLogs(b *testing.B) {
// All requests merge into a single batch.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 10010}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newLogsRequest(testdata.GenerateLogs(10), nil)}
for j := 0; j < 1000; j++ {
Expand All @@ -170,6 +183,7 @@ func BenchmarkSplittingBasedOnItemCountManySmallLogs(b *testing.B) {
func BenchmarkSplittingBasedOnItemCountManyLogsSlightlyAboveLimit(b *testing.B) {
// Every incoming request results in a split.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 10000}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newLogsRequest(testdata.GenerateLogs(0), nil)}
for j := 0; j < 10; j++ {
Expand All @@ -184,6 +198,7 @@ func BenchmarkSplittingBasedOnItemCountManyLogsSlightlyAboveLimit(b *testing.B)
func BenchmarkSplittingBasedOnItemCountHugeLogs(b *testing.B) {
// One request splits into many batches.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 10000}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newLogsRequest(testdata.GenerateLogs(0), nil)}
lr2 := newLogsRequest(testdata.GenerateLogs(100000), nil)
Expand Down
74 changes: 18 additions & 56 deletions exporter/exporterhelper/metrics_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,74 +14,36 @@ import (
// MergeSplit splits and/or merges the provided metrics request and the current request into one or more requests
// conforming with the MaxSizeConfig.
func (req *metricsRequest) MergeSplit(_ context.Context, cfg exporterbatcher.MaxSizeConfig, r2 Request) ([]Request, error) {
var req2 *metricsRequest
if r2 != nil {
var ok bool
req2, ok = r2.(*metricsRequest)
req2, ok := r2.(*metricsRequest)
if !ok {
return nil, errors.New("invalid input type")
}
req2.mergeTo(req)
}

// If no limit we can simply merge the new request into the current and return.
if cfg.MaxSizeItems == 0 {
req.setCachedItemsCount(req.ItemsCount() + req2.ItemsCount())
req2.setCachedItemsCount(0)
req2.md.ResourceMetrics().MoveAndAppendTo(req.md.ResourceMetrics())
return []Request{req}, nil
}
return req.split(cfg)
}

var (
res []Request
destReq *metricsRequest
capacityLeft = cfg.MaxSizeItems
)
for _, srcReq := range []*metricsRequest{req, req2} {
if srcReq == nil {
continue
}

srcCount := srcReq.ItemsCount()
if srcCount <= capacityLeft {
if destReq == nil {
destReq = srcReq
} else {
destReq.setCachedItemsCount(destReq.ItemsCount() + srcCount)
srcReq.setCachedItemsCount(0)
srcReq.md.ResourceMetrics().MoveAndAppendTo(destReq.md.ResourceMetrics())
}
capacityLeft -= srcCount
continue
}

for {
extractedMetrics := extractMetrics(srcReq.md, capacityLeft)
extractedCount := extractedMetrics.DataPointCount()
if extractedCount == 0 {
break
}

if destReq == nil {
destReq = &metricsRequest{md: extractedMetrics, pusher: srcReq.pusher, cachedItemsCount: extractedCount}
} else {
destReq.setCachedItemsCount(destReq.ItemsCount() + extractedCount)
srcReq.setCachedItemsCount(srcReq.ItemsCount() - extractedCount)
extractedMetrics.ResourceMetrics().MoveAndAppendTo(destReq.md.ResourceMetrics())
}

// Create new batch once capacity is reached.
capacityLeft -= extractedCount
if capacityLeft == 0 {
res = append(res, destReq)
destReq = nil
capacityLeft = cfg.MaxSizeItems
}
}
}
func (req *metricsRequest) mergeTo(dst *metricsRequest) {
dst.setCachedItemsCount(dst.ItemsCount() + req.ItemsCount())
req.setCachedItemsCount(0)
req.md.ResourceMetrics().MoveAndAppendTo(dst.md.ResourceMetrics())
}

if destReq != nil {
res = append(res, destReq)
func (req *metricsRequest) split(cfg exporterbatcher.MaxSizeConfig) ([]Request, error) {
var res []Request
for req.ItemsCount() > cfg.MaxSizeItems {
md := extractMetrics(req.md, cfg.MaxSizeItems)
size := md.DataPointCount()
req.setCachedItemsCount(req.ItemsCount() - size)
res = append(res, &metricsRequest{md: md, pusher: req.pusher, cachedItemsCount: size})
}

res = append(res, req)
return res, nil
}

Expand Down
17 changes: 16 additions & 1 deletion exporter/exporterhelper/metrics_batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,22 @@ func TestExtractMetricsInvalidMetric(t *testing.T) {
assert.Equal(t, 0, md.ResourceMetrics().Len())
}

func BenchmarkSplittingBasedOnItemCountManySmallMetrics(b *testing.B) {
func TestMergeSplitManySmallMetrics(t *testing.T) {
// All requests merge into a single batch.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 20000}
merged := []Request{newMetricsRequest(testdata.GenerateMetrics(1), nil)}
for j := 0; j < 1000; j++ {
lr2 := newMetricsRequest(testdata.GenerateMetrics(10), nil)
res, _ := merged[len(merged)-1].MergeSplit(context.Background(), cfg, lr2)
merged = append(merged[0:len(merged)-1], res...)
}
assert.Len(t, merged, 2)
}

func BenchmarkSplittingBasedOnItemCountManySmallMetrics(b *testing.B) {
// All requests merge into a single batch.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 20020}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newMetricsRequest(testdata.GenerateMetrics(10), nil)}
for j := 0; j < 1000; j++ {
Expand All @@ -178,6 +191,7 @@ func BenchmarkSplittingBasedOnItemCountManySmallMetrics(b *testing.B) {
func BenchmarkSplittingBasedOnItemCountManyMetricsSlightlyAboveLimit(b *testing.B) {
// Every incoming request results in a split.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 20000}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newMetricsRequest(testdata.GenerateMetrics(0), nil)}
for j := 0; j < 10; j++ {
Expand All @@ -192,6 +206,7 @@ func BenchmarkSplittingBasedOnItemCountManyMetricsSlightlyAboveLimit(b *testing.
func BenchmarkSplittingBasedOnItemCountHugeMetrics(b *testing.B) {
// One request splits into many batches.
cfg := exporterbatcher.MaxSizeConfig{MaxSizeItems: 20000}
b.ReportAllocs()
for i := 0; i < b.N; i++ {
merged := []Request{newMetricsRequest(testdata.GenerateMetrics(0), nil)}
lr2 := newMetricsRequest(testdata.GenerateMetrics(100000), nil)
Expand Down
73 changes: 18 additions & 55 deletions exporter/exporterhelper/traces_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,73 +14,36 @@ import (
// MergeSplit splits and/or merges the provided traces request and the current request into one or more requests
// conforming with the MaxSizeConfig.
func (req *tracesRequest) MergeSplit(_ context.Context, cfg exporterbatcher.MaxSizeConfig, r2 Request) ([]Request, error) {
var req2 *tracesRequest
if r2 != nil {
var ok bool
req2, ok = r2.(*tracesRequest)
req2, ok := r2.(*tracesRequest)
if !ok {
return nil, errors.New("invalid input type")
}
req2.mergeTo(req)
}

// If no limit we can simply merge the new request into the current and return.
if cfg.MaxSizeItems == 0 {
req.setCachedItemsCount(req.ItemsCount() + req2.ItemsCount())
req2.setCachedItemsCount(0)
req2.td.ResourceSpans().MoveAndAppendTo(req.td.ResourceSpans())
return []Request{req}, nil
}
return req.split(cfg)
}

var (
res []Request
destReq *tracesRequest
capacityLeft = cfg.MaxSizeItems
)
for _, srcReq := range []*tracesRequest{req, req2} {
if srcReq == nil {
continue
}

srcCount := srcReq.ItemsCount()
if srcCount <= capacityLeft {
if destReq == nil {
destReq = srcReq
} else {
destReq.setCachedItemsCount(destReq.ItemsCount() + srcCount)
srcReq.setCachedItemsCount(0)
srcReq.td.ResourceSpans().MoveAndAppendTo(destReq.td.ResourceSpans())
}
capacityLeft -= srcCount
continue
}

for {
extractedTraces := extractTraces(srcReq.td, capacityLeft)
extractedCount := extractedTraces.SpanCount()
if extractedCount == 0 {
break
}

if destReq == nil {
destReq = &tracesRequest{td: extractedTraces, pusher: srcReq.pusher, cachedItemsCount: extractedCount}
} else {
destReq.setCachedItemsCount(destReq.ItemsCount() + extractedCount)
srcReq.setCachedItemsCount(srcReq.ItemsCount() - extractedCount)
extractedTraces.ResourceSpans().MoveAndAppendTo(destReq.td.ResourceSpans())
}

// Create new batch once capacity is reached.
capacityLeft -= extractedCount
if capacityLeft == 0 {
res = append(res, destReq)
destReq = nil
capacityLeft = cfg.MaxSizeItems
}
}
}
func (req *tracesRequest) mergeTo(dst *tracesRequest) {
dst.setCachedItemsCount(dst.ItemsCount() + req.ItemsCount())
req.setCachedItemsCount(0)
req.td.ResourceSpans().MoveAndAppendTo(dst.td.ResourceSpans())
}

if destReq != nil {
res = append(res, destReq)
func (req *tracesRequest) split(cfg exporterbatcher.MaxSizeConfig) ([]Request, error) {
var res []Request
for req.ItemsCount() > cfg.MaxSizeItems {
td := extractTraces(req.td, cfg.MaxSizeItems)
size := td.SpanCount()
req.setCachedItemsCount(req.ItemsCount() - size)
res = append(res, &tracesRequest{td: td, pusher: req.pusher, cachedItemsCount: size})
}
res = append(res, req)
return res, nil
}

Expand Down
Loading
Loading