@@ -556,26 +556,24 @@ utf8_string::size_type utf8_string::get_num_resulting_bytes( size_type start_byt
556
556
bool misformatted;
557
557
bool * check_misformatted = this ->_misformatted ? &misformatted : nullptr ;
558
558
size_type cur_byte = start_byte;
559
+ size_type buffer_len = size + 1 ;
559
560
560
561
// Reduce the byte count by the number of utf8 data bytes
561
- if ( this ->sso_active () ){
562
+ if ( utf8_string::is_small_string (buffer_len) ){ // this->sso_active(), but we have already cached size()
562
563
while ( codepoint_count-- > 0 && cur_byte < size )
563
- cur_byte += get_num_bytes_of_utf8_char ( buffer , cur_byte , this ->_buffer_len , check_misformatted );
564
- return cur_byte - start_byte;
564
+ cur_byte += get_num_bytes_of_utf8_char ( buffer , cur_byte , buffer_len , check_misformatted );
565
565
}
566
-
567
- // Add at least as many bytes as codepoints
568
- cur_byte += codepoint_count;
569
-
570
- if ( size_type indices_len = this ->_indices_len )
566
+ else if ( size_type indices_len = this ->_indices_len )
571
567
{
568
+ // Add at least as many bytes as codepoints
569
+ cur_byte += codepoint_count;
570
+
572
571
size_type index_multibyte_table = 0 ;
573
572
const void * indices = this ->get_indices ();
574
- unsigned char indices_datatype_bytes = get_index_datatype_bytes ( this -> _buffer_len );
573
+ unsigned char indices_datatype_bytes = get_index_datatype_bytes (buffer_len );
575
574
576
575
// Iterate to the start of the relevant part of the multibyte table
577
- while ( index_multibyte_table < indices_len )
578
- {
576
+ while ( index_multibyte_table < indices_len ){
579
577
if ( utf8_string::get_nth_index ( indices , indices_datatype_bytes , index_multibyte_table ) >= start_byte )
580
578
break ;
581
579
index_multibyte_table++;
@@ -592,9 +590,11 @@ utf8_string::size_type utf8_string::get_num_resulting_bytes( size_type start_byt
592
590
index_multibyte_table++;
593
591
594
592
// Add the utf8 data bytes to the number of bytes
595
- cur_byte += get_num_bytes_of_utf8_char ( buffer , multibyte_pos , this -> _buffer_len , check_misformatted ) - 1 ; // Add utf8 data bytes
593
+ cur_byte += get_num_bytes_of_utf8_char ( buffer , multibyte_pos , buffer_len , check_misformatted ) - 1 ; // Add utf8 data bytes
596
594
}
597
595
}
596
+ else
597
+ return codepoint_count;
598
598
599
599
return cur_byte - start_byte;
600
600
}
@@ -693,9 +693,9 @@ void utf8_string::compute_multibyte_table( void* table , bool* misformatted )
693
693
unsigned char indices_datatype_bytes = get_index_datatype_bytes ( buffer_len );
694
694
695
695
// Fill Multibyte Table
696
- for ( size_type index = 0 ; index < buffer_len ; index++ )
696
+ for ( size_type index = 0 ; index < buffer_len ; )
697
697
{
698
- unsigned char cur_num_bytes = get_num_bytes_of_utf8_char ( buffer , index , buffer_len , misformatted ) - 1 ;
698
+ unsigned char cur_num_bytes = get_num_bytes_of_utf8_char ( buffer , index , buffer_len , misformatted );
699
699
if ( cur_num_bytes > 1 )
700
700
utf8_string::set_nth_index ( table , indices_datatype_bytes , multibyte_index++ , index );
701
701
index += cur_num_bytes;
@@ -1137,12 +1137,14 @@ utf8_string::value_type utf8_string::at( size_type requested_index ) const
1137
1137
if ( requested_index >= size () )
1138
1138
return 0 ;
1139
1139
1140
- if ( !this ->sso_active () && !requires_unicode () )
1141
- return (value_type) this ->_buffer [requested_index];
1140
+ const char * buffer = this ->get_buffer ();
1141
+
1142
+ if ( !requires_unicode () )
1143
+ return (value_type) buffer[requested_index];
1142
1144
1143
1145
// Decode internal buffer at position n
1144
1146
value_type codepoint = 0 ;
1145
- decode_utf8 ( this -> get_buffer () + get_num_resulting_bytes ( 0 , requested_index ) , codepoint , this ->_misformatted );
1147
+ decode_utf8 ( buffer + get_num_resulting_bytes ( 0 , requested_index ) , codepoint , this ->_misformatted );
1146
1148
1147
1149
return codepoint;
1148
1150
}
0 commit comments