Skip to content

Commit 9fd0b09

Browse files
committed
Fix sdk/log record attr value limit
Truncate based on characters not byte length.
1 parent e6fef27 commit 9fd0b09

File tree

2 files changed

+67
-28
lines changed

2 files changed

+67
-28
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
3333
- Fix inconsistent request body closing in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`. (#5954)
3434
- Fix invalid exemplar keys in `go.opentelemetry.io/otel/exporters/prometheus`. (#5995)
3535
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/trace`. (#5997)
36+
- Fix attribute value truncation in `go.opentelemetry.io/otel/sdk/log`. (#6032)
3637

3738
<!-- Released section -->
3839
<!-- Don't change this section unless doing release -->

sdk/log/record.go

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
406406
case log.KindString:
407407
s := val.AsString()
408408
if len(s) > r.attributeValueLengthLimit {
409-
val = log.StringValue(truncate(s, r.attributeValueLengthLimit))
409+
val = log.StringValue(truncate(r.attributeValueLengthLimit, s))
410410
}
411411
case log.KindSlice:
412412
sl := val.AsSlice()
@@ -427,40 +427,78 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
427427
return val
428428
}
429429

430-
// truncate returns a copy of str truncated to have a length of at most n
431-
// characters. If the length of str is less than n, str itself is returned.
430+
// truncate returns a truncated version of s such that it contains less than
431+
// the limit number of characters. Truncation is applied by returning the limit
432+
// number of valid characters contained in s.
432433
//
433-
// The truncate of str ensures that no valid UTF-8 code point is split. The
434-
// copy returned will be less than n if a characters straddles the length
435-
// limit.
434+
// If limit is negative, it returns the original string.
436435
//
437-
// No truncation is performed if n is less than zero.
438-
func truncate(str string, n int) string {
439-
if n < 0 {
440-
return str
436+
// UTF-8 is supported. When truncating, all invalid characters are dropped
437+
// before applying truncation.
438+
//
439+
// If s already contains less than the limit number of bytes, it is returned
440+
// unchanged. No invalid characters are removed.
441+
func truncate(limit int, s string) string {
442+
// This prioritize performance in the following order based on the most
443+
// common expected use-cases.
444+
//
445+
// - Short values less than the default limit (128).
446+
// - Strings with valid encodings that exceed the limit.
447+
// - No limit.
448+
// - Strings with invalid encodings that exceed the limit.
449+
if limit < 0 || len(s) <= limit {
450+
return s
441451
}
442452

443-
// cut returns a copy of the s truncated to not exceed a length of n. If
444-
// invalid UTF-8 is encountered, s is returned with false. Otherwise, the
445-
// truncated copy will be returned with true.
446-
cut := func(s string) (string, bool) {
447-
var i int
448-
for i = 0; i < n; {
449-
r, size := utf8.DecodeRuneInString(s[i:])
450-
if r == utf8.RuneError {
451-
return s, false
453+
// Optimistically, assume all valid UTF-8.
454+
var b strings.Builder
455+
count := 0
456+
for i, c := range s {
457+
if c != utf8.RuneError {
458+
count++
459+
if count > limit {
460+
return s[:i]
452461
}
453-
if i+size > n {
454-
break
455-
}
456-
i += size
462+
continue
463+
}
464+
465+
_, size := utf8.DecodeRuneInString(s[i:])
466+
if size == 1 {
467+
// Invalid encoding.
468+
b.Grow(len(s) - 1)
469+
_, _ = b.WriteString(s[:i])
470+
s = s[i:]
471+
break
457472
}
458-
return s[:i], true
459473
}
460474

461-
cp, ok := cut(str)
462-
if !ok {
463-
cp, _ = cut(strings.ToValidUTF8(str, ""))
475+
// Fast-path, no invalid input.
476+
if b.Cap() == 0 {
477+
return s
464478
}
465-
return cp
479+
480+
// Truncate while validating UTF-8.
481+
for i := 0; i < len(s) && count < limit; {
482+
c := s[i]
483+
if c < utf8.RuneSelf {
484+
// Optimization for single byte runes (common case).
485+
_ = b.WriteByte(c)
486+
i++
487+
count++
488+
continue
489+
}
490+
491+
_, size := utf8.DecodeRuneInString(s[i:])
492+
if size == 1 {
493+
// We checked for all 1-byte runes above, this is a RuneError.
494+
i++
495+
continue
496+
}
497+
498+
_, _ = b.WriteString(s[i : i+size])
499+
i += size
500+
count++
501+
}
502+
503+
return b.String()
466504
}

0 commit comments

Comments
 (0)