Skip to content

Commit 7e0af51

Browse files
santileiradmathieupellared
authored
baggage: Fix invalid percent-encoded octet sequences (#5528)
# Goal Replace the percent encoded octet sequence with the replacement code point (U+FFFD) when it doesn't match the UTF-8 encoding schema. Issue: #5519 Current behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 5 fmt.Println(utf8.ValidString(val)) # false } ``` Expected behavior: ``` package main import ( "fmt" "log" "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) func main() { kv := "k=aa%ffcc" b, err := baggage.Parse(kv) if err != nil { log.Fatal(err) } val := b.Members()[0].Value() fmt.Println(len(val)) # 7 fmt.Println(utf8.ValidString(val)) # true } ``` ## Benchmark - `go test -bench=BenchmarkParse -count 20 > old.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1548118 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1547653 786.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1544949 770.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1558972 770.2 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1554973 774.7 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1550200 779.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1545100 774.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1549634 777.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552530 769.6 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1536499 855.0 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1552244 770.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1560225 767.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562738 772.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1556679 838.9 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1562500 777.1 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1530901 836.5 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1000000 1372 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1534678 780.3 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1366180 822.4 ns/op 864 B/op 8 allocs/op BenchmarkParse-10 1539852 796.8 ns/op 864 B/op 8 allocs/op PASS ok go.opentelemetry.io/otel/baggage 40.839s ``` - `go test -bench=BenchmarkParse -count 20 > new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage BenchmarkParse-10 1355893 886.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1349192 883.1 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1363053 880.4 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1372404 875.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1359979 880.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1360497 874.7 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375520 870.2 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1375268 882.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361998 964.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1373461 961.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1378065 872.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1377290 879.0 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1362094 885.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1352175 915.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1364914 887.9 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1355782 890.5 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1361848 1245 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1163396 878.8 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1370886 916.6 ns/op 888 B/op 9 allocs/op BenchmarkParse-10 1340149 1175 ns/op 888 B/op 9 allocs/op PASS ok go.opentelemetry.io/otel/baggage 44.347s ``` - `benchstat old.txt new.txt` ``` goos: darwin goarch: arm64 pkg: go.opentelemetry.io/otel/baggage │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Parse-10 777.3n ± 3% 884.4n ± 4% +13.77% (p=0.000 n=20) │ old.txt │ new.txt │ │ B/op │ B/op vs base │ Parse-10 864.0 ± 0% 888.0 ± 0% +2.78% (p=0.000 n=20) │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ Parse-10 8.000 ± 0% 9.000 ± 0% +12.50% (p=0.000 n=20) ``` --------- Co-authored-by: Damien Mathieu <[email protected]> Co-authored-by: Robert Pająk <[email protected]>
1 parent 61ff66c commit 7e0af51

File tree

3 files changed

+94
-5
lines changed

3 files changed

+94
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
4949
- Fix stale timestamps reported by the last-value aggregation. (#5517)
5050
- Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521)
5151
- Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549)
52+
- Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528)
5253

5354
## [1.27.0/0.49.0/0.3.0] 2024-05-21
5455

baggage/baggage.go

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) {
294294
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
295295
}
296296

297-
val := strings.TrimSpace(v)
298-
if !validateValue(val) {
297+
rawVal := strings.TrimSpace(v)
298+
if !validateValue(rawVal) {
299299
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v)
300300
}
301301

302302
// Decode a percent-encoded value.
303-
value, err := url.PathUnescape(val)
303+
unescapeVal, err := url.PathUnescape(rawVal)
304304
if err != nil {
305305
return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err)
306306
}
307+
308+
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
307309
return Member{key: key, value: value, properties: props, hasData: true}, nil
308310
}
309311

312+
// replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'.
313+
func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string {
314+
if utf8.ValidString(unescapeVal) {
315+
return unescapeVal
316+
}
317+
// W3C baggage spec:
318+
// https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69
319+
320+
var b strings.Builder
321+
b.Grow(cap)
322+
for i := 0; i < len(unescapeVal); {
323+
r, size := utf8.DecodeRuneInString(unescapeVal[i:])
324+
if r == utf8.RuneError && size == 1 {
325+
// Invalid UTF-8 sequence found, replace it with '�'
326+
_, _ = b.WriteString("�")
327+
} else {
328+
_, _ = b.WriteRune(r)
329+
}
330+
i += size
331+
}
332+
333+
return b.String()
334+
}
335+
310336
// validate ensures m conforms to the W3C Baggage specification.
311337
// A key must be an ASCII string, returning an error otherwise.
312338
func (m Member) validate() error {
@@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) {
607633
}
608634

609635
// Decode a percent-encoded value.
610-
value, err := url.PathUnescape(s[valueStart:valueEnd])
636+
rawVal := s[valueStart:valueEnd]
637+
unescapeVal, err := url.PathUnescape(rawVal)
611638
if err != nil {
612639
return
613640
}
641+
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
614642

615643
ok = true
616644
p.key = s[keyStart:keyEnd]

baggage/baggage_test.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"slices"
1010
"strings"
1111
"testing"
12+
"unicode/utf8"
1213

1314
"github.com/stretchr/testify/assert"
1415
"github.com/stretchr/testify/require"
@@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) {
469470
in: tooManyMembers,
470471
err: errMemberNumber,
471472
},
473+
{
474+
name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme",
475+
in: "k=aa%ffcc;p=d%fff",
476+
want: baggage.List{
477+
"k": {
478+
Value: "aa�cc",
479+
Properties: []baggage.Property{
480+
{Key: "p", Value: "d�f", HasValue: true},
481+
},
482+
},
483+
},
484+
},
472485
}
473486

474487
for _, tc := range testcases {
@@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) {
480493
}
481494
}
482495

496+
func TestBaggageParseValue(t *testing.T) {
497+
testcases := []struct {
498+
name string
499+
in string
500+
valueWant string
501+
valueWantSize int
502+
}{
503+
{
504+
name: "percent encoded octet sequence matches UTF-8 encoding scheme",
505+
in: "k=aa%26cc",
506+
valueWant: "aa&cc",
507+
valueWantSize: 5,
508+
},
509+
{
510+
name: "percent encoded octet sequence doesn't match UTF-8 encoding scheme",
511+
in: "k=aa%ffcc",
512+
valueWant: "aa�cc",
513+
valueWantSize: 7,
514+
},
515+
{
516+
name: "multiple percent encoded octet sequences don't match UTF-8 encoding scheme",
517+
in: "k=aa%ffcc%fedd%fa",
518+
valueWant: "aa�cc�dd�",
519+
valueWantSize: 15,
520+
},
521+
{
522+
name: "raw value",
523+
in: "k=aacc",
524+
valueWant: "aacc",
525+
valueWantSize: 4,
526+
},
527+
}
528+
529+
for _, tc := range testcases {
530+
t.Run(tc.name, func(t *testing.T) {
531+
b, err := Parse(tc.in)
532+
assert.Empty(t, err)
533+
534+
val := b.Members()[0].Value()
535+
536+
assert.EqualValues(t, val, tc.valueWant)
537+
assert.Equal(t, len(val), tc.valueWantSize)
538+
assert.True(t, utf8.ValidString(val))
539+
})
540+
}
541+
}
542+
483543
func TestBaggageString(t *testing.T) {
484544
testcases := []struct {
485545
name string
@@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) {
9791039
b.ReportAllocs()
9801040

9811041
for i := 0; i < b.N; i++ {
982-
benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`)
1042+
benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue")
9831043
}
9841044
}
9851045

0 commit comments

Comments
 (0)