@@ -406,7 +406,7 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
406
406
case log .KindString :
407
407
s := val .AsString ()
408
408
if len (s ) > r .attributeValueLengthLimit {
409
- val = log .StringValue (truncate (s , r .attributeValueLengthLimit ))
409
+ val = log .StringValue (truncate (r .attributeValueLengthLimit , s ))
410
410
}
411
411
case log .KindSlice :
412
412
sl := val .AsSlice ()
@@ -427,40 +427,78 @@ func (r *Record) applyValueLimits(val log.Value) log.Value {
427
427
return val
428
428
}
429
429
430
- // truncate returns a copy of str truncated to have a length of at most n
431
- // characters. If the length of str is less than n, str itself is returned.
430
+ // truncate returns a truncated version of s such that it contains less than
431
+ // the limit number of characters. Truncation is applied by returning the limit
432
+ // number of valid characters contained in s.
432
433
//
433
- // The truncate of str ensures that no valid UTF-8 code point is split. The
434
- // copy returned will be less than n if a characters straddles the length
435
- // limit.
434
+ // If limit is negative, it returns the original string.
436
435
//
437
- // No truncation is performed if n is less than zero.
438
- func truncate (str string , n int ) string {
439
- if n < 0 {
440
- return str
436
+ // UTF-8 is supported. When truncating, all invalid characters are dropped
437
+ // before applying truncation.
438
+ //
439
+ // If s already contains less than the limit number of bytes, it is returned
440
+ // unchanged. No invalid characters are removed.
441
+ func truncate (limit int , s string ) string {
442
+ // This prioritize performance in the following order based on the most
443
+ // common expected use-cases.
444
+ //
445
+ // - Short values less than the default limit (128).
446
+ // - Strings with valid encodings that exceed the limit.
447
+ // - No limit.
448
+ // - Strings with invalid encodings that exceed the limit.
449
+ if limit < 0 || len (s ) <= limit {
450
+ return s
441
451
}
442
452
443
- // cut returns a copy of the s truncated to not exceed a length of n. If
444
- // invalid UTF-8 is encountered, s is returned with false. Otherwise, the
445
- // truncated copy will be returned with true.
446
- cut := func (s string ) (string , bool ) {
447
- var i int
448
- for i = 0 ; i < n ; {
449
- r , size := utf8 .DecodeRuneInString (s [i :])
450
- if r == utf8 .RuneError {
451
- return s , false
453
+ // Optimistically, assume all valid UTF-8.
454
+ var b strings.Builder
455
+ count := 0
456
+ for i , c := range s {
457
+ if c != utf8 .RuneError {
458
+ count ++
459
+ if count > limit {
460
+ return s [:i ]
452
461
}
453
- if i + size > n {
454
- break
455
- }
456
- i += size
462
+ continue
463
+ }
464
+
465
+ _ , size := utf8 .DecodeRuneInString (s [i :])
466
+ if size == 1 {
467
+ // Invalid encoding.
468
+ b .Grow (len (s ) - 1 )
469
+ _ , _ = b .WriteString (s [:i ])
470
+ s = s [i :]
471
+ break
457
472
}
458
- return s [:i ], true
459
473
}
460
474
461
- cp , ok := cut ( str )
462
- if ! ok {
463
- cp , _ = cut ( strings . ToValidUTF8 ( str , "" ))
475
+ // Fast-path, no invalid input.
476
+ if b . Cap () == 0 {
477
+ return s
464
478
}
465
- return cp
479
+
480
+ // Truncate while validating UTF-8.
481
+ for i := 0 ; i < len (s ) && count < limit ; {
482
+ c := s [i ]
483
+ if c < utf8 .RuneSelf {
484
+ // Optimization for single byte runes (common case).
485
+ _ = b .WriteByte (c )
486
+ i ++
487
+ count ++
488
+ continue
489
+ }
490
+
491
+ _ , size := utf8 .DecodeRuneInString (s [i :])
492
+ if size == 1 {
493
+ // We checked for all 1-byte runes above, this is a RuneError.
494
+ i ++
495
+ continue
496
+ }
497
+
498
+ _ , _ = b .WriteString (s [i : i + size ])
499
+ i += size
500
+ count ++
501
+ }
502
+
503
+ return b .String ()
466
504
}
0 commit comments