Skip to content

Commit ba0e4ee

Browse files
committed
Remove unnecessary copy while decoding and constructing string
Signed-off-by: Bogdan Drutu <[email protected]>
1 parent 6638b83 commit ba0e4ee

File tree

28 files changed

+189
-194
lines changed

28 files changed

+189
-194
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: textutil
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Remove unnecessary copy while decoding and constructing string
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [37734]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: This PR affects all log receivers, text extension and kafkareceiver.
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [api]

.chloggen/rm-unncessary-copy.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: deprecation
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: pkg/stanza
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Deprecate all functions in stanza/decode
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [37734]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [api]

extension/encoding/textencodingextension/config.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@ func (c *Config) Validate() error {
2020
return err
2121
}
2222
}
23-
encCfg := textutils.NewEncodingConfig()
24-
encCfg.Encoding = c.Encoding
25-
_, err := encCfg.Build()
23+
_, err := textutils.LookupEncoding(c.Encoding)
2624
if err != nil {
2725
return err
2826
}

extension/encoding/textencodingextension/extension.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@ func (e *textExtension) MarshalLogs(ld plog.Logs) ([]byte, error) {
3232
}
3333

3434
func (e *textExtension) Start(_ context.Context, _ component.Host) error {
35-
encCfg := textutils.NewEncodingConfig()
36-
encCfg.Encoding = e.config.Encoding
37-
enc, err := encCfg.Build()
35+
enc, err := textutils.LookupEncoding(e.config.Encoding)
3836
if err != nil {
3937
return err
4038
}
4139
e.textEncoder = &textLogCodec{
42-
enc: &enc,
40+
decoder: enc.NewDecoder(),
4341
}
4442

4543
return err

extension/encoding/textencodingextension/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ require (
1313
go.opentelemetry.io/collector/extension/extensiontest v0.119.0
1414
go.opentelemetry.io/collector/pdata v1.25.0
1515
go.uber.org/goleak v1.3.0
16+
golang.org/x/text v0.22.0
1617
)
1718

1819
require (
@@ -43,7 +44,6 @@ require (
4344
go.uber.org/zap v1.27.0 // indirect
4445
golang.org/x/net v0.33.0 // indirect
4546
golang.org/x/sys v0.29.0 // indirect
46-
golang.org/x/text v0.22.0 // indirect
4747
google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a // indirect
4848
google.golang.org/grpc v1.70.0 // indirect
4949
google.golang.org/protobuf v1.36.4 // indirect

extension/encoding/textencodingextension/text.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ import (
1111

1212
"go.opentelemetry.io/collector/pdata/pcommon"
1313
"go.opentelemetry.io/collector/pdata/plog"
14+
"golang.org/x/text/encoding"
1415

1516
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/textutils"
1617
)
1718

1819
type textLogCodec struct {
19-
enc *textutils.Encoding
20+
decoder *encoding.Decoder
2021
marshalingSeparator string
2122
unmarshalingSeparator *regexp.Regexp
2223
}
@@ -50,11 +51,11 @@ func (r *textLogCodec) UnmarshalLogs(buf []byte) (plog.Logs, error) {
5051
for s.Scan() {
5152
l := p.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty()
5253
l.SetObservedTimestamp(now)
53-
decoded, err := r.enc.Decode(s.Bytes())
54+
decoded, err := textutils.DecodeAsString(r.decoder, s.Bytes())
5455
if err != nil {
5556
return p, err
5657
}
57-
l.Body().SetStr(string(decoded))
58+
l.Body().SetStr(decoded)
5859
}
5960

6061
return p, nil

extension/encoding/textencodingextension/text_test.go

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,10 @@ import (
1414
)
1515

1616
func TestTextRoundtrip(t *testing.T) {
17-
encCfg := textutils.NewEncodingConfig()
18-
encCfg.Encoding = "utf8"
19-
enc, err := encCfg.Build()
17+
enc, err := textutils.LookupEncoding("utf8")
2018
require.NoError(t, err)
2119
r := regexp.MustCompile(`\r?\n`)
22-
codec := &textLogCodec{enc: &enc, unmarshalingSeparator: r, marshalingSeparator: "\n"}
20+
codec := &textLogCodec{decoder: enc.NewDecoder(), unmarshalingSeparator: r, marshalingSeparator: "\n"}
2321
require.NoError(t, err)
2422
ld, err := codec.UnmarshalLogs([]byte("foo\r\nbar\n"))
2523
require.NoError(t, err)
@@ -30,12 +28,10 @@ func TestTextRoundtrip(t *testing.T) {
3028
}
3129

3230
func TestTextRoundtripMissingNewline(t *testing.T) {
33-
encCfg := textutils.NewEncodingConfig()
34-
encCfg.Encoding = "utf8"
35-
enc, err := encCfg.Build()
31+
enc, err := textutils.LookupEncoding("utf8")
3632
require.NoError(t, err)
3733
r := regexp.MustCompile(`\r?\n`)
38-
codec := &textLogCodec{enc: &enc, unmarshalingSeparator: r, marshalingSeparator: "\n"}
34+
codec := &textLogCodec{decoder: enc.NewDecoder(), unmarshalingSeparator: r, marshalingSeparator: "\n"}
3935
require.NoError(t, err)
4036
ld, err := codec.UnmarshalLogs([]byte("foo\r\nbar"))
4137
require.NoError(t, err)
@@ -46,11 +42,9 @@ func TestTextRoundtripMissingNewline(t *testing.T) {
4642
}
4743

4844
func TestNoSeparator(t *testing.T) {
49-
encCfg := textutils.NewEncodingConfig()
50-
encCfg.Encoding = "utf8"
51-
enc, err := encCfg.Build()
45+
enc, err := textutils.LookupEncoding("utf8")
5246
require.NoError(t, err)
53-
codec := &textLogCodec{enc: &enc}
47+
codec := &textLogCodec{decoder: enc.NewDecoder()}
5448
require.NoError(t, err)
5549
ld, err := codec.UnmarshalLogs([]byte("foo\r\nbar\n"))
5650
require.NoError(t, err)

internal/coreinternal/textutils/encoding.go

Lines changed: 17 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,64 +4,15 @@
44
package textutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/textutils"
55

66
import (
7-
"errors"
87
"fmt"
98
"strings"
9+
"unsafe"
1010

1111
"golang.org/x/text/encoding"
1212
"golang.org/x/text/encoding/ianaindex"
1313
"golang.org/x/text/encoding/unicode"
14-
"golang.org/x/text/transform"
1514
)
1615

17-
// NewEncodingConfig creates a new Encoding config
18-
func NewEncodingConfig() EncodingConfig {
19-
return EncodingConfig{
20-
Encoding: "utf-8",
21-
}
22-
}
23-
24-
// EncodingConfig is the configuration of a Encoding helper
25-
type EncodingConfig struct {
26-
Encoding string `mapstructure:"encoding,omitempty"`
27-
}
28-
29-
// Build will build an Encoding operator.
30-
func (c EncodingConfig) Build() (Encoding, error) {
31-
enc, err := lookupEncoding(c.Encoding)
32-
if err != nil {
33-
return Encoding{}, err
34-
}
35-
36-
return Encoding{
37-
Encoding: enc,
38-
decodeBuffer: make([]byte, 1<<12),
39-
decoder: enc.NewDecoder(),
40-
}, nil
41-
}
42-
43-
type Encoding struct {
44-
Encoding encoding.Encoding
45-
decoder *encoding.Decoder
46-
decodeBuffer []byte
47-
}
48-
49-
// Decode converts the bytes in msgBuf to utf-8 from the configured encoding
50-
func (e *Encoding) Decode(msgBuf []byte) ([]byte, error) {
51-
for {
52-
e.decoder.Reset()
53-
nDst, _, err := e.decoder.Transform(e.decodeBuffer, msgBuf, true)
54-
if err == nil {
55-
return e.decodeBuffer[:nDst], nil
56-
}
57-
if errors.Is(err, transform.ErrShortDst) {
58-
e.decodeBuffer = make([]byte, len(e.decodeBuffer)*2)
59-
continue
60-
}
61-
return nil, fmt.Errorf("transform encoding: %w", err)
62-
}
63-
}
64-
6516
var encodingOverrides = map[string]encoding.Encoding{
6617
"utf-16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
6718
"utf16": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
@@ -73,8 +24,8 @@ var encodingOverrides = map[string]encoding.Encoding{
7324
"": unicode.UTF8,
7425
}
7526

76-
func lookupEncoding(enc string) (encoding.Encoding, error) {
77-
if e, ok := EncodingOverridesMap.Get(strings.ToLower(enc)); ok {
27+
func LookupEncoding(enc string) (encoding.Encoding, error) {
28+
if e, ok := encodingOverrides[strings.ToLower(enc)]; ok {
7829
return e, nil
7930
}
8031
e, err := ianaindex.IANA.Encoding(enc)
@@ -88,18 +39,25 @@ func lookupEncoding(enc string) (encoding.Encoding, error) {
8839
}
8940

9041
func IsNop(enc string) bool {
91-
e, err := lookupEncoding(enc)
42+
e, err := LookupEncoding(enc)
9243
if err != nil {
9344
return false
9445
}
9546
return e == encoding.Nop
9647
}
9748

98-
var EncodingOverridesMap = encodingOverridesMap{}
99-
100-
type encodingOverridesMap struct{}
49+
// DecodeAsString converts the given encoded bytes using the given decoder. It returns the converted
50+
// bytes or nil, err if any error occurred.
51+
func DecodeAsString(decoder *encoding.Decoder, buf []byte) (string, error) {
52+
dstBuf, err := decoder.Bytes(buf)
53+
if err != nil {
54+
return "", err
55+
}
56+
return UnsafeBytesAsString(dstBuf), nil
57+
}
10158

102-
func (e *encodingOverridesMap) Get(key string) (encoding.Encoding, bool) {
103-
v, ok := encodingOverrides[key]
104-
return v, ok
59+
// UnsafeBytesAsString converts the byte array to string.
60+
// This function must be called iff the input buffer is not going to be re-used after.
61+
func UnsafeBytesAsString(buf []byte) string {
62+
return unsafe.String(unsafe.SliceData(buf), len(buf))
10563
}

internal/coreinternal/textutils/encoding_test.go

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"golang.org/x/text/encoding/korean"
1313
"golang.org/x/text/encoding/simplifiedchinese"
1414
"golang.org/x/text/encoding/unicode"
15+
"golang.org/x/text/transform"
1516
)
1617

1718
func TestUTF8Encoding(t *testing.T) {
@@ -43,11 +44,45 @@ func TestUTF8Encoding(t *testing.T) {
4344
}
4445
for _, test := range tests {
4546
t.Run(test.name, func(t *testing.T) {
46-
encCfg := NewEncodingConfig()
47-
encCfg.Encoding = test.encodingName
48-
enc, err := encCfg.Build()
47+
enc, err := LookupEncoding(test.encodingName)
4948
assert.NoError(t, err)
50-
assert.Equal(t, test.encoding, enc.Encoding)
49+
assert.Equal(t, test.encoding, enc)
50+
})
51+
}
52+
}
53+
54+
func TestDecodeAsString(t *testing.T) {
55+
tests := []struct {
56+
name string
57+
decoder *encoding.Decoder
58+
input []byte
59+
expected string
60+
}{
61+
{
62+
name: "nil",
63+
decoder: &encoding.Decoder{Transformer: transform.Nop},
64+
input: nil,
65+
expected: "",
66+
},
67+
{
68+
name: "empty",
69+
decoder: &encoding.Decoder{Transformer: transform.Nop},
70+
input: []byte{},
71+
expected: "",
72+
},
73+
{
74+
name: "empty",
75+
decoder: &encoding.Decoder{Transformer: transform.Nop},
76+
input: []byte("test"),
77+
expected: "test",
78+
},
79+
}
80+
81+
for _, test := range tests {
82+
t.Run(test.name, func(t *testing.T) {
83+
enc, err := DecodeAsString(test.decoder, test.input)
84+
assert.NoError(t, err)
85+
assert.Equal(t, test.expected, enc)
5186
})
5287
}
5388
}

pkg/ottl/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ require (
2424
go.uber.org/zap v1.27.0
2525
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842
2626
golang.org/x/net v0.34.0
27-
golang.org/x/text v0.22.0
2827
)
2928

3029
require (
@@ -50,6 +49,7 @@ require (
5049
go.opentelemetry.io/otel/sdk/metric v1.34.0 // indirect
5150
go.uber.org/multierr v1.11.0 // indirect
5251
golang.org/x/sys v0.29.0 // indirect
52+
golang.org/x/text v0.22.0 // indirect
5353
google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a // indirect
5454
google.golang.org/grpc v1.70.0 // indirect
5555
google.golang.org/protobuf v1.36.4 // indirect

pkg/ottl/ottlfuncs/func_decode.go

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,8 @@ import (
77
"context"
88
"encoding/base64"
99
"fmt"
10-
"strings"
1110

1211
"go.opentelemetry.io/collector/pdata/pcommon"
13-
"golang.org/x/text/encoding"
14-
"golang.org/x/text/encoding/ianaindex"
1512

1613
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/textutils"
1714
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
@@ -71,7 +68,7 @@ func Decode[K any](target ottl.Getter[K], encoding string) (ottl.ExprFunc[K], er
7168
}
7269
return string(decodedBytes), nil
7370
default:
74-
e, err := getEncoding(encoding)
71+
e, err := textutils.LookupEncoding(encoding)
7572
if err != nil {
7673
return nil, err
7774
}
@@ -85,19 +82,3 @@ func Decode[K any](target ottl.Getter[K], encoding string) (ottl.ExprFunc[K], er
8582
}
8683
}, nil
8784
}
88-
89-
func getEncoding(encoding string) (encoding.Encoding, error) {
90-
if e, ok := textutils.EncodingOverridesMap.Get(strings.ToLower(encoding)); ok {
91-
return e, nil
92-
}
93-
e, err := ianaindex.IANA.Encoding(encoding)
94-
if err != nil {
95-
return nil, fmt.Errorf("could not get encoding for %s: %w", encoding, err)
96-
}
97-
if e == nil {
98-
// for some encodings a nil error and a nil encoding is returned, so we need to double check
99-
// if the encoding is actually set here
100-
return nil, fmt.Errorf("no decoder available for encoding: %s", encoding)
101-
}
102-
return e, nil
103-
}

0 commit comments

Comments
 (0)