Skip to content

Commit e495816

Browse files
axwcarsonipandrzej-stencel
authored
[exporter/elasticsearch] remove dedup config (#33776)
**Description:** Remove the `dedup` configuration setting, and always de-duplicate. Elasticsearch does not permit duplicate keys in JSON objects, and this configuration is adding more complexity to the code than it's worth. I've simplified the `internal/objmodel` API slightly, unexporting the `Sort` methods, which are internally called by the now unconditional call to `Dedup`. **Link to tracking Issue:** Closes #33773 **Testing:** Ran the unit tests, which cover deduplication. None of the tests in package elasticsearchexporter covered `dedup: false`. **Documentation:** <Describe the documentation added.> --------- Co-authored-by: Carson Ip <[email protected]> Co-authored-by: Andrzej Stencel <[email protected]>
1 parent fce2cfe commit e495816

File tree

16 files changed

+150
-144
lines changed

16 files changed

+150
-144
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: breaking
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: elasticsearchexporter
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Make "dedup" option no-op, always de-duplicate.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [33773]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
Elasticsearch does not permit duplicate keys in JSON objects,
20+
so there is no value in being able to configure deduplication.
21+
22+
# If your change doesn't affect end users or the exported elements of any package,
23+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
24+
# Optional: The change log or logs in which this entry should be included.
25+
# e.g. '[user]' or '[user, api]'
26+
# Include 'user' if the change is relevant to end users.
27+
# Include 'api' if there is a change to a library API.
28+
# Default: '[user]'
29+
change_logs: [user]

cmd/otelcontribcol/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ require (
727727
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
728728
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.954 // indirect
729729
github.com/tg123/go-htpasswd v1.2.2 // indirect
730-
github.com/tidwall/gjson v1.14.2 // indirect
730+
github.com/tidwall/gjson v1.17.1 // indirect
731731
github.com/tidwall/match v1.1.1 // indirect
732732
github.com/tidwall/pretty v1.2.0 // indirect
733733
github.com/tidwall/tinylru v1.1.0 // indirect

cmd/otelcontribcol/go.sum

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/elasticsearchexporter/README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,9 @@ behaviours, which may be configured through the following settings:
129129
- `raw`: Omit the `Attributes.` string prefixed to field names for log and
130130
span attributes as well as omit the `Events.` string prefixed to
131131
field names for span events.
132-
- `dedup` (default=true; DEPRECATED, in future deduplication will always be enabled):
133-
Try to find and remove duplicate fields/attributes from events before publishing
134-
to Elasticsearch. Some structured logging libraries can produce duplicate fields
135-
(for example zap). Elasticsearch will reject documents that have duplicate fields.
132+
- `dedup` (DEPRECATED). This configuration is deprecated and non-operational,
133+
and will be removed in the future. Object keys are always deduplicated to
134+
avoid Elasticsearch rejecting documents.
136135
- `dedot` (default=true; DEPRECATED, in future dedotting will always be enabled
137136
for ECS mode, and never for other modes): When enabled attributes with `.`
138137
will be split into proper json objects.

exporter/elasticsearchexporter/config.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ type MappingsSettings struct {
160160
// Mode configures the field mappings.
161161
Mode string `mapstructure:"mode"`
162162

163-
// Try to find and remove duplicate fields
163+
// Dedup is non-operational, and will be removed in the future.
164164
//
165-
// Deprecated: [v0.104.0] deduplication will always be applied in future,
166-
// with no option to disable. Disabling deduplication is not meaningful,
167-
// as Elasticsearch will reject documents with duplicate JSON object keys.
168-
Dedup bool `mapstructure:"dedup"`
165+
// Deprecated: [v0.104.0] deduplication is always enabled, and cannot be
166+
// disabled. Disabling deduplication is not meaningful, as Elasticsearch
167+
// will always reject documents with duplicate JSON object keys.
168+
Dedup *bool `mapstructure:"dedup,omitempty"`
169169

170170
// Deprecated: [v0.104.0] dedotting will always be applied for ECS mode
171171
// in future, and never for other modes. Elasticsearch's "dot_expander"
@@ -322,8 +322,8 @@ func (cfg *Config) MappingMode() MappingMode {
322322
}
323323

324324
func logConfigDeprecationWarnings(cfg *Config, logger *zap.Logger) {
325-
if !cfg.Mapping.Dedup {
326-
logger.Warn("dedup has been deprecated, and will always be enabled in future")
325+
if cfg.Mapping.Dedup != nil {
326+
logger.Warn("dedup is deprecated, and is always enabled")
327327
}
328328
if cfg.Mapping.Dedot && cfg.MappingMode() != MappingECS || !cfg.Mapping.Dedot && cfg.MappingMode() == MappingECS {
329329
logger.Warn("dedot has been deprecated: in the future, dedotting will always be performed in ECS mode only")

exporter/elasticsearchexporter/config_test.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ func TestConfig(t *testing.T) {
100100
},
101101
Mapping: MappingsSettings{
102102
Mode: "none",
103-
Dedup: true,
104103
Dedot: true,
105104
},
106105
LogstashFormat: LogstashFormatSettings{
@@ -162,7 +161,6 @@ func TestConfig(t *testing.T) {
162161
},
163162
Mapping: MappingsSettings{
164163
Mode: "none",
165-
Dedup: true,
166164
Dedot: true,
167165
},
168166
LogstashFormat: LogstashFormatSettings{
@@ -224,7 +222,6 @@ func TestConfig(t *testing.T) {
224222
},
225223
Mapping: MappingsSettings{
226224
Mode: "none",
227-
Dedup: true,
228225
Dedot: true,
229226
},
230227
LogstashFormat: LogstashFormatSettings{

exporter/elasticsearchexporter/exporter.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ func newExporter(
4444
}
4545

4646
model := &encodeModel{
47-
dedup: cfg.Mapping.Dedup,
4847
dedot: cfg.Mapping.Dedot,
4948
mode: cfg.MappingMode(),
5049
}

exporter/elasticsearchexporter/factory.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ func createDefaultConfig() component.Config {
7676
},
7777
Mapping: MappingsSettings{
7878
Mode: "none",
79-
Dedup: true,
8079
Dedot: true,
8180
},
8281
LogstashFormat: LogstashFormatSettings{

exporter/elasticsearchexporter/factory_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,9 @@ func TestFactory_CreateLogsAndTracesExporterWithDeprecatedIndexOption(t *testing
100100
func TestFactory_DedupDeprecated(t *testing.T) {
101101
factory := NewFactory()
102102
cfg := withDefaultConfig(func(cfg *Config) {
103+
dedup := false
103104
cfg.Endpoint = "http://testing.invalid:9200"
104-
cfg.Mapping.Dedup = false
105+
cfg.Mapping.Dedup = &dedup
105106
cfg.Mapping.Dedot = false // avoid dedot warnings
106107
})
107108

@@ -123,9 +124,9 @@ func TestFactory_DedupDeprecated(t *testing.T) {
123124

124125
records := logObserver.AllUntimed()
125126
assert.Len(t, records, 3)
126-
assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[0].Message)
127-
assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[1].Message)
128-
assert.Equal(t, "dedup has been deprecated, and will always be enabled in future", records[2].Message)
127+
assert.Equal(t, "dedup is deprecated, and is always enabled", records[0].Message)
128+
assert.Equal(t, "dedup is deprecated, and is always enabled", records[1].Message)
129+
assert.Equal(t, "dedup is deprecated, and is always enabled", records[2].Message)
129130
}
130131

131132
func TestFactory_DedotDeprecated(t *testing.T) {

exporter/elasticsearchexporter/go.mod

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111
github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.104.0
1212
github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.104.0
1313
github.com/stretchr/testify v1.9.0
14+
github.com/tidwall/gjson v1.17.1
1415
go.opentelemetry.io/collector/component v0.104.1-0.20240712081520-6227646b0146
1516
go.opentelemetry.io/collector/config/configauth v0.104.1-0.20240712081520-6227646b0146
1617
go.opentelemetry.io/collector/config/configcompression v1.11.1-0.20240712081520-6227646b0146
@@ -64,6 +65,8 @@ require (
6465
github.com/prometheus/common v0.55.0 // indirect
6566
github.com/prometheus/procfs v0.15.1 // indirect
6667
github.com/rs/cors v1.11.0 // indirect
68+
github.com/tidwall/match v1.1.1 // indirect
69+
github.com/tidwall/pretty v1.2.0 // indirect
6770
go.elastic.co/apm/module/apmzap/v2 v2.6.0 // indirect
6871
go.elastic.co/apm/v2 v2.6.0 // indirect
6972
go.elastic.co/fastjson v1.3.0 // indirect

0 commit comments

Comments
 (0)