Skip to content

Commit d0c0e97

Browse files
authored
[pkg/ottl] Add ParseKeyValue function (#31035)
**Description:** <Describe what has changed.> Adds a `ParseKeyValue` converter function that parses out key values pairs into a `pcommon.Map`. It takes a `StringGetter` target argument and 2 optional arguments for the pair delimiter and key value delimiter. This is an adaptation of the Stanza Key Value Parser operator to provide feature parity. Given the following input string `"k1=v1 k2=v2 k3=v3"`, the function would return the following map: ``` { "k1": "v1", "k2": "v2", "k3": "v3" } ``` **Link to tracking Issue:** <Issue number if applicable> Closes #30998 **Testing:** <Describe what testing was performed and which tests were added.> Added unit tests and e2e test. **Documentation:** <Describe the documentation added.> Added function documentation.
1 parent 16db125 commit d0c0e97

File tree

10 files changed

+897
-0
lines changed

10 files changed

+897
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: pkg/ottl
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add `ParseKeyValue` function for parsing key value pairs from a target string
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [30998]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package parseutils
5+
6+
import (
7+
"testing"
8+
9+
"go.uber.org/goleak"
10+
)
11+
12+
func TestMain(m *testing.M) {
13+
goleak.VerifyTestMain(m)
14+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils"
5+
6+
import (
7+
"fmt"
8+
"strings"
9+
10+
"go.uber.org/multierr"
11+
)
12+
13+
// SplitString will split the input on the delimiter and return the resulting slice while respecting quotes. Outer quotes are stripped.
14+
// Use in place of `strings.Split` when quotes need to be respected.
15+
// Requires `delimiter` not be an empty string
16+
func SplitString(input, delimiter string) ([]string, error) {
17+
var result []string
18+
current := ""
19+
delimiterLength := len(delimiter)
20+
quoteChar := "" // "" means we are not in quotes
21+
22+
for i := 0; i < len(input); i++ {
23+
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter
24+
if current == "" { // leading || trailing delimiter; ignore
25+
i += delimiterLength - 1
26+
continue
27+
}
28+
result = append(result, current)
29+
current = ""
30+
i += delimiterLength - 1
31+
continue
32+
}
33+
34+
if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
35+
quoteChar = string(input[i])
36+
continue
37+
}
38+
if string(input[i]) == quoteChar { // end of quote
39+
quoteChar = ""
40+
continue
41+
}
42+
43+
current += string(input[i])
44+
}
45+
46+
if quoteChar != "" { // check for closed quotes
47+
return nil, fmt.Errorf("never reached the end of a quoted value")
48+
}
49+
if current != "" { // avoid adding empty value bc of a trailing delimiter
50+
return append(result, current), nil
51+
}
52+
53+
return result, nil
54+
}
55+
56+
// ParseKeyValuePairs will split each string in `pairs` on the `delimiter` into a key and value string that get added to a map and returned.
57+
func ParseKeyValuePairs(pairs []string, delimiter string) (map[string]any, error) {
58+
parsed := make(map[string]any)
59+
var err error
60+
for _, p := range pairs {
61+
pair := strings.SplitN(p, delimiter, 2)
62+
if len(pair) != 2 {
63+
err = multierr.Append(err, fmt.Errorf("cannot split %q into 2 items, got %d item(s)", p, len(pair)))
64+
continue
65+
}
66+
67+
key := strings.TrimSpace(pair[0])
68+
value := strings.TrimSpace(pair[1])
69+
70+
parsed[key] = value
71+
}
72+
return parsed, err
73+
}
Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package parseutils
5+
6+
import (
7+
"fmt"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func Test_SplitString(t *testing.T) {
14+
testCases := []struct {
15+
name string
16+
input string
17+
delimiter string
18+
expected []string
19+
expectedErr error
20+
}{
21+
{
22+
name: "simple",
23+
input: "a b c",
24+
delimiter: " ",
25+
expected: []string{
26+
"a",
27+
"b",
28+
"c",
29+
},
30+
},
31+
{
32+
name: "single quotes",
33+
input: "a 'b c d'",
34+
delimiter: " ",
35+
expected: []string{
36+
"a",
37+
"b c d",
38+
},
39+
},
40+
{
41+
name: "double quotes",
42+
input: `a " b c " d`,
43+
delimiter: " ",
44+
expected: []string{
45+
"a",
46+
" b c ",
47+
"d",
48+
},
49+
},
50+
{
51+
name: "multi-char delimiter",
52+
input: "abc!@! def !@! g",
53+
delimiter: "!@!",
54+
expected: []string{
55+
"abc",
56+
" def ",
57+
" g",
58+
},
59+
},
60+
{
61+
name: "leading and trailing delimiters",
62+
input: " name=ottl func=key_value hello=world ",
63+
delimiter: " ",
64+
expected: []string{
65+
"name=ottl",
66+
"func=key_value",
67+
"hello=world",
68+
},
69+
},
70+
{
71+
name: "embedded double quotes in single quoted value",
72+
input: `ab c='this is a "co ol" value'`,
73+
delimiter: " ",
74+
expected: []string{
75+
"ab",
76+
`c=this is a "co ol" value`,
77+
},
78+
},
79+
{
80+
name: "embedded double quotes end single quoted value",
81+
input: `ab c='this is a "co ol"'`,
82+
delimiter: " ",
83+
expected: []string{
84+
"ab",
85+
`c=this is a "co ol"`,
86+
},
87+
},
88+
{
89+
name: "quoted values include whitespace",
90+
input: `name=" ottl " func=" key_ value"`,
91+
delimiter: " ",
92+
expected: []string{
93+
"name= ottl ",
94+
"func= key_ value",
95+
},
96+
},
97+
{
98+
name: "delimiter longer than input",
99+
input: "abc",
100+
delimiter: "aaaa",
101+
expected: []string{
102+
"abc",
103+
},
104+
},
105+
{
106+
name: "delimiter not found",
107+
input: "a b c",
108+
delimiter: "!",
109+
expected: []string{
110+
"a b c",
111+
},
112+
},
113+
{
114+
name: "newlines in input",
115+
input: `a
116+
b
117+
c`,
118+
delimiter: " ",
119+
expected: []string{
120+
"a\nb\nc",
121+
},
122+
},
123+
{
124+
name: "newline delimiter",
125+
input: `a b c
126+
d e f
127+
g
128+
h`,
129+
delimiter: "\n",
130+
expected: []string{
131+
"a b c",
132+
"d e f",
133+
"g ",
134+
"h",
135+
},
136+
},
137+
{
138+
name: "empty input",
139+
input: "",
140+
delimiter: " ",
141+
expected: nil,
142+
},
143+
{
144+
name: "equal input and delimiter",
145+
input: "abc",
146+
delimiter: "abc",
147+
expected: nil,
148+
},
149+
{
150+
name: "unclosed quotes",
151+
input: "a 'b c",
152+
delimiter: " ",
153+
expectedErr: fmt.Errorf("never reached the end of a quoted value"),
154+
},
155+
{
156+
name: "mismatched quotes",
157+
input: `a 'b c' "d '`,
158+
delimiter: " ",
159+
expectedErr: fmt.Errorf("never reached the end of a quoted value"),
160+
},
161+
{
162+
name: "tab delimiters",
163+
input: "a b c",
164+
delimiter: "\t",
165+
expected: []string{
166+
"a",
167+
"b",
168+
"c",
169+
},
170+
},
171+
}
172+
173+
for _, tc := range testCases {
174+
t.Run(tc.name, func(t *testing.T) {
175+
result, err := SplitString(tc.input, tc.delimiter)
176+
177+
if tc.expectedErr == nil {
178+
assert.NoError(t, err)
179+
assert.Equal(t, tc.expected, result)
180+
} else {
181+
assert.EqualError(t, err, tc.expectedErr.Error())
182+
assert.Nil(t, result)
183+
}
184+
})
185+
}
186+
}
187+
188+
func Test_ParseKeyValuePairs(t *testing.T) {
189+
testCases := []struct {
190+
name string
191+
pairs []string
192+
delimiter string
193+
expected map[string]any
194+
expectedErr error
195+
}{
196+
{
197+
name: "multiple delimiters",
198+
pairs: []string{"a==b", "c=d=", "e=f"},
199+
delimiter: "=",
200+
expected: map[string]any{
201+
"a": "=b",
202+
"c": "d=",
203+
"e": "f",
204+
},
205+
},
206+
{
207+
name: "no delimiter found",
208+
pairs: []string{"ab"},
209+
delimiter: "=",
210+
expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s)"),
211+
},
212+
{
213+
name: "no delimiter found 2x",
214+
pairs: []string{"ab", "cd"},
215+
delimiter: "=",
216+
expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s); cannot split \"cd\" into 2 items, got 1 item(s)"),
217+
},
218+
{
219+
name: "empty pairs",
220+
pairs: []string{},
221+
delimiter: "=",
222+
expected: map[string]any{},
223+
},
224+
{
225+
name: "empty pair string",
226+
pairs: []string{""},
227+
delimiter: "=",
228+
expectedErr: fmt.Errorf("cannot split \"\" into 2 items, got 1 item(s)"),
229+
},
230+
{
231+
name: "empty delimiter",
232+
pairs: []string{"a=b", "c=d"},
233+
delimiter: "",
234+
expected: map[string]any{
235+
"a": "=b",
236+
"c": "=d",
237+
},
238+
},
239+
{
240+
name: "empty pairs & delimiter",
241+
pairs: []string{},
242+
delimiter: "",
243+
expected: map[string]any{},
244+
},
245+
{
246+
name: "early delimiter",
247+
pairs: []string{"=a=b"},
248+
delimiter: "=",
249+
expected: map[string]any{
250+
"": "a=b",
251+
},
252+
},
253+
{
254+
name: "weird spacing",
255+
pairs: []string{" a= b ", " c = d "},
256+
delimiter: "=",
257+
expected: map[string]any{
258+
"a": "b",
259+
"c": "d",
260+
},
261+
},
262+
}
263+
264+
for _, tc := range testCases {
265+
t.Run(tc.name, func(t *testing.T) {
266+
result, err := ParseKeyValuePairs(tc.pairs, tc.delimiter)
267+
268+
if tc.expectedErr == nil {
269+
assert.NoError(t, err)
270+
assert.Equal(t, tc.expected, result)
271+
} else {
272+
assert.EqualError(t, err, tc.expectedErr.Error())
273+
}
274+
})
275+
}
276+
}

0 commit comments

Comments
 (0)