diff --git a/.chloggen/semantic_source_identifier.yaml b/.chloggen/semantic_source_identifier.yaml new file mode 100644 index 0000000000000..912bf1e4a1b4e --- /dev/null +++ b/.chloggen/semantic_source_identifier.yaml @@ -0,0 +1,28 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix default source identifier in recombine operator + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [37210] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Its defualt value is now aligned with the semantic conventions: `attributes["log.file.path"]` + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/pkg/stanza/docs/operators/recombine.md b/pkg/stanza/docs/operators/recombine.md index 891b1af079fef..dff090d96f28d 100644 --- a/pkg/stanza/docs/operators/recombine.md +++ b/pkg/stanza/docs/operators/recombine.md @@ -4,22 +4,22 @@ The `recombine` operator combines consecutive logs into single logs based on sim ### Configuration Fields -| Field | Default | Description | -| --- | --- | --- | -| `id` | `recombine` | A unique identifier for the operator. | -| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | -| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). | -| `is_first_entry` | | An [expression](../types/expression.md) that returns true if the entry being processed is the first entry in a multiline series. | -| `is_last_entry` | | An [expression](../types/expression.md) that returns true if the entry being processed is the last entry in a multiline series. | -| `combine_field` | required | The [field](../types/field.md) from all the entries that will be recombined. | -| `combine_with` | `"\n"` | The string that is put between the combined entries. This can be an empty string as well. When using special characters like `\n`, be sure to enclose the value in double quotes: `"\n"`. | -| `max_batch_size` | 1000 | The maximum number of consecutive entries that will be combined into a single entry. | -| `max_unmatched_batch_size` | 100 | The maximum number of consecutive entries that will be combined into a single entry before the match occurs (with `is_first_entry` or `is_last_entry`), e.g. `max_unmatched_batch_size=0` - all entries combined, `max_unmatched_batch_size=1` - all entries uncombined until the match occurs, `max_unmatched_batch_size=100` - entries combined into 100-entry-packages until the match occurs | -| `overwrite_with` | `newest` | Whether to use the fields from the `oldest` or the `newest` entry for all the fields that are not combined. | -| `force_flush_period` | `5s` | Flush timeout after which entries will be flushed aborting the wait for their sub parts to be merged with. | -| `source_identifier` | `$attributes["file.path"]` | The [field](../types/field.md) to separate one source of logs from others when combining them. | -| `max_sources` | 1000 | The maximum number of unique sources allowed concurrently to be tracked for combining separately. | -| `max_log_size` | 0 | The maximum bytes size of the combined field. Once the size exceeds the limit, all received entries of the source will be combined and flushed. "0" of max_log_size means no limit. | +| Field | Default | Description | +| --- | --- | --- | +| `id` | `recombine` | A unique identifier for the operator. | +| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | +| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). | +| `is_first_entry` | | An [expression](../types/expression.md) that returns true if the entry being processed is the first entry in a multiline series. | +| `is_last_entry` | | An [expression](../types/expression.md) that returns true if the entry being processed is the last entry in a multiline series. | +| `combine_field` | required | The [field](../types/field.md) from all the entries that will be recombined. | +| `combine_with` | `"\n"` | The string that is put between the combined entries. This can be an empty string as well. When using special characters like `\n`, be sure to enclose the value in double quotes: `"\n"`. | +| `max_batch_size` | 1000 | The maximum number of consecutive entries that will be combined into a single entry. | +| `max_unmatched_batch_size` | 100 | The maximum number of consecutive entries that will be combined into a single entry before the match occurs (with `is_first_entry` or `is_last_entry`), e.g. `max_unmatched_batch_size=0` - all entries combined, `max_unmatched_batch_size=1` - all entries uncombined until the match occurs, `max_unmatched_batch_size=100` - entries combined into 100-entry-packages until the match occurs | +| `overwrite_with` | `newest` | Whether to use the fields from the `oldest` or the `newest` entry for all the fields that are not combined. | +| `force_flush_period` | `5s` | Flush timeout after which entries will be flushed aborting the wait for their sub parts to be merged with. | +| `source_identifier` | attributes["log.file.path"] | The [field](../types/field.md) to separate one source of logs from others when combining them. | +| `max_sources` | 1000 | The maximum number of unique sources allowed concurrently to be tracked for combining separately. | +| `max_log_size` | 0 | The maximum bytes size of the combined field. Once the size exceeds the limit, all received entries of the source will be combined and flushed. "0" of max_log_size means no limit. | Exactly one of `is_first_entry` and `is_last_entry` must be specified. diff --git a/pkg/stanza/operator/input/file/input_test.go b/pkg/stanza/operator/input/file/input_test.go index 9319c71f7ba50..16c3384ab2f43 100644 --- a/pkg/stanza/operator/input/file/input_test.go +++ b/pkg/stanza/operator/input/file/input_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/attrs" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil" ) @@ -62,7 +63,7 @@ func TestAddFileResolvedFields(t *testing.T) { e := waitForOne(t, logReceived) require.Equal(t, filepath.Base(symLinkPath), e.Attributes["log.file.name"]) - require.Equal(t, symLinkPath, e.Attributes["log.file.path"]) + require.Equal(t, symLinkPath, e.Attributes[attrs.LogFilePath]) require.Equal(t, filepath.Base(resolved), e.Attributes["log.file.name_resolved"]) require.Equal(t, resolved, e.Attributes["log.file.path_resolved"]) if runtime.GOOS != "windows" { diff --git a/pkg/stanza/operator/parser/container/config.go b/pkg/stanza/operator/parser/container/config.go index 77654faa8ce3d..e2263aafb1822 100644 --- a/pkg/stanza/operator/parser/container/config.go +++ b/pkg/stanza/operator/parser/container/config.go @@ -12,6 +12,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/errors" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/attrs" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/transformer/recombine" @@ -19,7 +20,7 @@ import ( const ( operatorType = "container" - recombineSourceIdentifier = "log.file.path" + recombineSourceIdentifier = attrs.LogFilePath recombineIsLastEntry = "attributes.logtag == 'F'" removeOriginalTimeFieldFeatureFlag = "filelog.container.removeOriginalTimeField" ) diff --git a/pkg/stanza/operator/parser/container/parser.go b/pkg/stanza/operator/parser/container/parser.go index 96567a73af755..11b1f0f2c49c0 100644 --- a/pkg/stanza/operator/parser/container/parser.go +++ b/pkg/stanza/operator/parser/container/parser.go @@ -17,6 +17,7 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/timeutils" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/fileconsumer/attrs" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper" ) @@ -30,7 +31,7 @@ const ( crioPattern = "^(?P