Skip to content

Commit a4b60c7

Browse files
[processor/logdedup] feat: add ottl condition to logdedup processor (#35443)
**Description:** Adds OTTL Condition field to Deduplicate Logs Processor **Link to tracking Issue:** Closes #35440 **Testing:** - Tested functionality with BindPlane - Added unit tests for the condition logic **Documentation:** Added documentation to the logdedup processor README about the condition field and an example configuration with a condition. --------- Co-authored-by: Mike Goldsmith <[email protected]>
1 parent 679374a commit a4b60c7

25 files changed

+881
-67
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: logdedupprocessor
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add a `condition` field to the Log DeDuplication Processor.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [35440]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

pkg/pdatatest/plogtest/logs_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,14 @@ func TestCompareLogs(t *testing.T) {
142142
withoutOptions: errors.New(`resource "map[]": scope "collector": log record "map[]": timestamp doesn't match expected: 11651379494838206465, actual: 11651379494838206464`),
143143
withOptions: nil,
144144
},
145+
{
146+
name: "ignore-log-record-attribute-value",
147+
compareOptions: []CompareLogsOption{
148+
IgnoreLogRecordAttributeValue("Key1"),
149+
},
150+
withoutOptions: errors.New(`resource "map[]": scope "": missing expected log record: map[Key1:Val2]; resource "map[]": scope "": unexpected log record: map[Key1:Val1]`),
151+
withOptions: nil,
152+
},
145153
}
146154

147155
for _, tc := range tcs {

pkg/pdatatest/plogtest/options.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,47 @@ func (opt ignoreResourceAttributeValue) applyOnLogs(expected, actual plog.Logs)
4242
opt.maskLogsResourceAttributeValue(actual)
4343
}
4444

45-
func (opt ignoreResourceAttributeValue) maskLogsResourceAttributeValue(metrics plog.Logs) {
46-
rls := metrics.ResourceLogs()
45+
func (opt ignoreResourceAttributeValue) maskLogsResourceAttributeValue(logs plog.Logs) {
46+
rls := logs.ResourceLogs()
4747
for i := 0; i < rls.Len(); i++ {
4848
internal.MaskResourceAttributeValue(rls.At(i).Resource(), opt.attributeName)
4949
}
5050
}
5151

52+
// IgnoreLogRecordAttributeValue is a CompareLogsOption that sets the value of an attribute
53+
// to empty bytes for every log record
54+
func IgnoreLogRecordAttributeValue(attributeName string) CompareLogsOption {
55+
return ignoreLogRecordAttributeValue{
56+
attributeName: attributeName,
57+
}
58+
}
59+
60+
type ignoreLogRecordAttributeValue struct {
61+
attributeName string
62+
}
63+
64+
func (opt ignoreLogRecordAttributeValue) applyOnLogs(expected, actual plog.Logs) {
65+
opt.maskLogRecordAttributeValue(expected)
66+
opt.maskLogRecordAttributeValue(actual)
67+
}
68+
69+
func (opt ignoreLogRecordAttributeValue) maskLogRecordAttributeValue(logs plog.Logs) {
70+
rls := logs.ResourceLogs()
71+
for i := 0; i < logs.ResourceLogs().Len(); i++ {
72+
sls := rls.At(i).ScopeLogs()
73+
for j := 0; j < sls.Len(); j++ {
74+
lrs := sls.At(j).LogRecords()
75+
for k := 0; k < lrs.Len(); k++ {
76+
lr := lrs.At(k)
77+
val, exists := lr.Attributes().Get(opt.attributeName)
78+
if exists {
79+
val.SetEmptyBytes()
80+
}
81+
}
82+
}
83+
}
84+
}
85+
5286
func IgnoreTimestamp() CompareLogsOption {
5387
return compareLogsOptionFunc(func(expected, actual plog.Logs) {
5488
now := pcommon.NewTimestampFromTime(time.Now())
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
resourceLogs:
2+
- resource: {}
3+
scopeLogs:
4+
- logRecords:
5+
- attributes:
6+
- key: Key1
7+
value:
8+
stringValue: Val1
9+
body: {}
10+
spanId: ""
11+
traceId: ""
12+
scope: {}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
resourceLogs:
2+
- resource: {}
3+
scopeLogs:
4+
- logRecords:
5+
- attributes:
6+
- key: Key1
7+
value:
8+
stringValue: Val2
9+
body: {}
10+
spanId: ""
11+
traceId: ""
12+
scope: {}

processor/logdedupprocessor/README.md

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ This processor is used to deduplicate logs by detecting identical logs over a ra
1515

1616
## How It Works
1717
1. The user configures the log deduplication processor in the desired logs pipeline.
18-
2. All logs sent to the processor and aggregated over the configured `interval`. Logs are considered identical if they have the same body, resource attributes, severity, and log attributes.
18+
2. If the processor does not provide `conditions`, all logs are considered eligible for aggregation. If the processor does have configured `conditions`, all log entries where at least one of the `conditions` evaluates `true` are considered eligible for aggregation. Eligible identical logs are aggregated over the configured `interval`. Logs are considered identical if they have the same body, resource attributes, severity, and log attributes. Logs that do not match any condition in `conditions` are passed onward in the pipeline without aggregating.
1919
3. After the interval, the processor emits a single log with the count of logs that were deduplicated. The emitted log will have the same body, resource attributes, severity, and log attributes as the original log. The emitted log will also have the following new attributes:
2020

2121
- `log_count`: The count of logs that were deduplicated over the interval. The name of the attribute is configurable via the `log_count_attribute` parameter.
@@ -25,13 +25,17 @@ This processor is used to deduplicate logs by detecting identical logs over a ra
2525
**Note**: The `ObservedTimestamp` and `Timestamp` of the emitted log will be the time that the aggregated log was emitted and will not be the same as the `ObservedTimestamp` and `Timestamp` of the original logs.
2626

2727
## Configuration
28-
| Field | Type | Default | Description |
29-
| --- | --- | --- | --- |
30-
| interval | duration | `10s` | The interval at which logs are aggregated. The counter will reset after each interval. |
31-
| log_count_attribute | string | `log_count` | The name of the count attribute of deduplicated logs that will be added to the emitted aggregated log. |
32-
| timezone | string | `UTC` | The timezone of the `first_observed_timestamp` and `last_observed_timestamp` timestamps on the emitted aggregated log. The available locations depend on the local IANA Time Zone database. [This page](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) contains many examples, such as `America/New_York`. |
33-
| exclude_fields | []string | `[]` | Fields to exclude from duplication matching. Fields can be excluded from the log `body` or `attributes`. These fields will not be present in the emitted aggregated log. Nested fields must be `.` delimited. If a field contains a `.` it can be escaped by using a `\` see [example config](#example-config-with-excluded-fields).<br><br>**Note**: The entire `body` cannot be excluded. If the body is a map then fields within it can be excluded. |
28+
| Field | Type | Default | Description |
29+
| --- | --- | --- | --- |
30+
| interval | duration | `10s` | The interval at which logs are aggregated. The counter will reset after each interval. |
31+
| conditions | []string | `[]` | A slice of [OTTL] expressions used to evaluate which log records are deduped. All paths in the [log context] are available to reference. All [converters] are available to use. |
32+
| log_count_attribute | string | `log_count` | The name of the count attribute of deduplicated logs that will be added to the emitted aggregated log. |
33+
| timezone | string | `UTC` | The timezone of the `first_observed_timestamp` and `last_observed_timestamp` timestamps on the emitted aggregated log. The available locations depend on the local IANA Time Zone database. [This page](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) contains many examples, such as `America/New_York`. |
34+
| exclude_fields | []string | `[]` | Fields to exclude from duplication matching. Fields can be excluded from the log `body` or `attributes`. These fields will not be present in the emitted aggregated log. Nested fields must be `.` delimited. If a field contains a `.` it can be escaped by using a `\` see [example config](#example-config-with-excluded-fields).<br><br>**Note**: The entire `body` cannot be excluded. If the body is a map then fields within it can be excluded. |
3435

36+
[OTTL]: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/v0.109.0/pkg/ottl#readme
37+
[converters]: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.109.0/pkg/ottl/ottlfuncs/README.md#converters
38+
[log context]: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/v0.109.0/pkg/ottl/contexts/ottllog/README.md
3539

3640
### Example Config
3741
The following config is an example configuration for the log deduplication processor. It is configured with an aggregation interval of `60 seconds`, a timezone of `America/Los_Angeles`, and a log count attribute of `dedup_count`. It has no fields being excluded.
@@ -82,3 +86,30 @@ service:
8286
processors: [logdedup]
8387
exporters: [googlecloud]
8488
```
89+
90+
91+
### Example Config with Conditions
92+
The following config is an example configuration that only performs the deduping process on telemetry where Attribute `ID` equals `1` OR where Resource Attribute `service.name` equals `my-service`:
93+
94+
```yaml
95+
receivers:
96+
filelog:
97+
include: [./example/*.log]
98+
processors:
99+
logdedup:
100+
conditions:
101+
- attributes["ID"] == 1
102+
- resource.attributes["service.name"] == "my-service"
103+
interval: 60s
104+
log_count_attribute: dedup_count
105+
timezone: 'America/Los_Angeles'
106+
exporters:
107+
googlecloud:
108+
109+
service:
110+
pipelines:
111+
logs:
112+
receivers: [filelog]
113+
processors: [logdedup]
114+
exporters: [googlecloud]
115+
```

processor/logdedupprocessor/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ type Config struct {
4444
Interval time.Duration `mapstructure:"interval"`
4545
Timezone string `mapstructure:"timezone"`
4646
ExcludeFields []string `mapstructure:"exclude_fields"`
47+
Conditions []string `mapstructure:"conditions"`
4748
}
4849

4950
// createDefaultConfig returns the default config for the processor.
@@ -53,6 +54,7 @@ func createDefaultConfig() component.Config {
5354
Interval: defaultInterval,
5455
Timezone: defaultTimezone,
5556
ExcludeFields: []string{},
57+
Conditions: []string{},
5658
}
5759
}
5860

processor/logdedupprocessor/config_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func TestValidateConfig(t *testing.T) {
9090
LogCountAttribute: defaultLogCountAttribute,
9191
Interval: defaultInterval,
9292
Timezone: defaultTimezone,
93+
Conditions: []string{},
9394
ExcludeFields: []string{"body.thing", "attributes.otherthing"},
9495
},
9596
expectedErr: nil,

processor/logdedupprocessor/factory.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
"go.opentelemetry.io/collector/consumer"
1212
"go.opentelemetry.io/collector/processor"
1313

14+
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/filter/filterottl"
15+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
1416
"github.com/open-telemetry/opentelemetry-collector-contrib/processor/logdedupprocessor/internal/metadata"
1517
)
1618

@@ -30,5 +32,25 @@ func createLogsProcessor(_ context.Context, settings processor.Settings, cfg com
3032
return nil, fmt.Errorf("invalid config type: %+v", cfg)
3133
}
3234

33-
return newProcessor(processorCfg, consumer, settings)
35+
processor, err := newProcessor(processorCfg, consumer, settings)
36+
if err != nil {
37+
return nil, fmt.Errorf("error creating processor: %w", err)
38+
}
39+
40+
if len(processorCfg.Conditions) == 0 {
41+
processor.conditions = nil
42+
} else {
43+
conditions, err := filterottl.NewBoolExprForLog(
44+
processorCfg.Conditions,
45+
filterottl.StandardLogFuncs(),
46+
ottl.PropagateError,
47+
settings.TelemetrySettings,
48+
)
49+
if err != nil {
50+
return nil, fmt.Errorf("invalid condition: %w", err)
51+
}
52+
processor.conditions = conditions
53+
}
54+
55+
return processor, nil
3456
}

processor/logdedupprocessor/factory_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,37 @@ func TestCreateLogs(t *testing.T) {
3737
cfg: nil,
3838
expectedErr: "invalid config type",
3939
},
40+
{
41+
name: "valid custom condition",
42+
cfg: &Config{
43+
LogCountAttribute: defaultLogCountAttribute,
44+
Interval: defaultInterval,
45+
Timezone: defaultTimezone,
46+
ExcludeFields: []string{},
47+
Conditions: []string{"false"},
48+
},
49+
},
50+
{
51+
name: "valid multiple conditions",
52+
cfg: &Config{
53+
LogCountAttribute: defaultLogCountAttribute,
54+
Interval: defaultInterval,
55+
Timezone: defaultTimezone,
56+
ExcludeFields: []string{},
57+
Conditions: []string{"false", `(attributes["ID"] == 1)`},
58+
},
59+
},
60+
{
61+
name: "invalid condition",
62+
cfg: &Config{
63+
LogCountAttribute: defaultLogCountAttribute,
64+
Interval: defaultInterval,
65+
Timezone: defaultTimezone,
66+
ExcludeFields: []string{},
67+
Conditions: []string{"x"},
68+
},
69+
expectedErr: "invalid condition",
70+
},
4071
}
4172

4273
for _, tc := range testCases {

0 commit comments

Comments
 (0)