Skip to content

Commit c0deae5

Browse files
authored
[exporterhelper] Fix invalid write index updates in the persistent queue (#8963)
**Description:** Fixing a bug where the in-memory value of the persistent queue's write index would be updated even if writing to the storage failed. This normally wouldn't have any negative effect other than inflating the queue size temporarily, as the read loop would simply skip over the nonexistent record. However, in the case where the storage doesn't have any available space, the in-memory and in-storage write index could become significantly different, at which point a collector restart would leave the queue in an inconsistent state. Worth noting that the same issue affects reading from the queue, but in that case the writes are very small, and in practice the storage will almost always have enough space to carry them out. **Link to tracking Issue:** #8115 **Testing:** The `TestPersistentQueue_StorageFull` test actually only passed by accident. Writing would leave one additional item in the put channel, then the first read would fail (as there is not enough space to do the read index and dispatched items writes), but subsequent reads would succeed, so the bugs would cancel out. I modified this test to check for the number of items in the queue after inserting them, and also to expect one fewer item to be returned.
1 parent b7f49f1 commit c0deae5

File tree

3 files changed

+43
-5
lines changed

3 files changed

+43
-5
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
7+
component: exporterhelper
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Fix invalid write index updates in the persistent queue
11+
12+
# One or more tracking issues or pull requests related to the change
13+
issues: [8115]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# Optional: The change log or logs in which this entry should be included.
21+
# e.g. '[user]' or '[user, api]'
22+
# Include 'user' if the change is relevant to end users.
23+
# Include 'api' if there is a change to a library API.
24+
# Default: '[user]'
25+
change_logs: []

exporter/exporterhelper/internal/persistent_queue.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -215,20 +215,27 @@ func (pq *persistentQueue[T]) putInternal(ctx context.Context, req T) error {
215215
}
216216

217217
itemKey := getItemKey(pq.writeIndex)
218-
pq.writeIndex++
218+
newIndex := pq.writeIndex + 1
219219

220220
reqBuf, err := pq.marshaler(req)
221221
if err != nil {
222222
return err
223223
}
224-
err = pq.client.Batch(ctx,
225-
storage.SetOperation(writeIndexKey, itemIndexToBytes(pq.writeIndex)),
226-
storage.SetOperation(itemKey, reqBuf))
227224

225+
// Carry out a transaction where we both add the item and update the write index
226+
ops := []storage.Operation{
227+
storage.SetOperation(writeIndexKey, itemIndexToBytes(newIndex)),
228+
storage.SetOperation(itemKey, reqBuf),
229+
}
230+
if storageErr := pq.client.Batch(ctx, ops...); storageErr != nil {
231+
return storageErr
232+
}
233+
234+
pq.writeIndex = newIndex
228235
// Inform the loop that there's some data to process
229236
pq.putChan <- struct{}{}
230237

231-
return err
238+
return nil
232239
}
233240

234241
// getNextItem pulls the next available item from the persistent storage along with a callback function that should be

exporter/exporterhelper/internal/persistent_queue_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,9 @@ func TestPersistentQueue_StorageFull(t *testing.T) {
626626
reqCount++
627627
}
628628

629+
// Check that the size is correct
630+
require.Equal(t, reqCount, ps.Size(), "Size must be equal to the number of items inserted")
631+
629632
// Manually set the storage to only have a small amount of free space left
630633
newMaxSize := client.GetSizeInBytes() + freeSpaceInBytes
631634
client.SetMaxSizeInBytes(newMaxSize)
@@ -634,6 +637,9 @@ func TestPersistentQueue_StorageFull(t *testing.T) {
634637
require.Error(t, ps.Offer(context.Background(), req))
635638

636639
// Take out all the items
640+
// Getting the first item fails, as we can't update the state in storage, so we just delete it without returning it
641+
// Subsequent items succeed, as deleting the first item frees enough space for the state update
642+
reqCount--
637643
for i := reqCount; i > 0; i-- {
638644
require.True(t, ps.Consume(func(context.Context, ptrace.Traces) {}))
639645
}

0 commit comments

Comments
 (0)