@@ -139,22 +139,18 @@ namespace tiny_utf8
139
139
140
140
// ! Count leading zeros utility
141
141
#if defined(__GNUC__)
142
- #ifndef TINY_UTF8_HAS_CLZ
143
- #define TINY_UTF8_HAS_CLZ true
144
- #endif
142
+ #define TINY_UTF8_HAS_CLZ true
145
143
static inline unsigned int clz ( unsigned int value ) noexcept { return (unsigned int )__builtin_clz ( value ); }
146
144
static inline unsigned int clz ( unsigned long int value ) noexcept { return (unsigned int )__builtin_clzl ( value ); }
147
145
static inline unsigned int clz ( char32_t value ) noexcept {
148
146
return sizeof (char32_t ) == sizeof (unsigned long int ) ? (unsigned int )__builtin_clzl ( value ) : (unsigned int )__builtin_clz ( value );
149
147
}
150
148
#elif defined(_MSC_VER)
151
- #ifndef TINY_UTF8_HAS_CLZ
152
- #define TINY_UTF8_HAS_CLZ true
153
- #endif
149
+ #define TINY_UTF8_HAS_CLZ true
154
150
template <typename T>
155
151
static inline unsigned int lzcnt ( T value ) noexcept {
156
152
unsigned long value_log2;
157
- #if !defined( WIN32 ) && !defined( _WIN32 ) && !defined( __WIN32__ )
153
+ #if INTPTR_MAX >= INT64_MAX
158
154
_BitScanReverse64 ( &value_log2 , value );
159
155
#else
160
156
_BitScanReverse ( &value_log2 , value );
@@ -163,11 +159,14 @@ namespace tiny_utf8
163
159
}
164
160
static inline unsigned int clz ( std::uint16_t value ) noexcept { return lzcnt ( value ); }
165
161
static inline unsigned int clz ( std::uint32_t value ) noexcept { return lzcnt ( value ); }
166
- #ifndef WIN32
162
+ #if INTPTR_MAX >= INT64_MAX
167
163
static inline unsigned int clz ( std::uint64_t value ) noexcept { return lzcnt ( value ); }
168
- #endif // WIN32
164
+ #endif
169
165
static inline unsigned int clz ( char32_t value ) noexcept { return lzcnt ( value ); }
166
+ #else
167
+ #define TINY_UTF8_HAS_CLZ false
170
168
#endif
169
+
171
170
172
171
// ! Helper to detect little endian
173
172
class is_little_endian
@@ -883,7 +882,7 @@ namespace tiny_utf8
883
882
* Returns the number of code units (bytes) using the supplied first byte of a utf8 codepoint
884
883
*/
885
884
// Data left is the number of bytes left in the buffer INCLUDING this one
886
- #if defined( TINY_UTF8_HAS_CLZ) && TINY_UTF8_HAS_CLZ == true
885
+ #if TINY_UTF8_HAS_CLZ
887
886
static inline width_type get_codepoint_bytes ( data_type first_byte , size_type data_left ) noexcept
888
887
{
889
888
if ( first_byte ){
@@ -906,7 +905,7 @@ namespace tiny_utf8
906
905
*/
907
906
static inline width_type get_codepoint_bytes ( value_type cp ) noexcept
908
907
{
909
- #if defined( TINY_UTF8_HAS_CLZ) && TINY_UTF8_HAS_CLZ == true
908
+ #if TINY_UTF8_HAS_CLZ
910
909
if ( !cp )
911
910
return 1 ;
912
911
static const width_type lut[32 ] = {
@@ -3176,37 +3175,37 @@ namespace tiny_utf8
3176
3175
}
3177
3176
}
3178
3177
3179
- #if !defined( TINY_UTF8_HAS_CLZ) || TINY_UTF8_HAS_CLZ == false
3178
+ #if !TINY_UTF8_HAS_CLZ
3180
3179
template <typename V, typename D, typename A>
3181
3180
typename basic_string<V, D, A>::width_type basic_string<V, D, A>::get_codepoint_bytes( typename basic_string<V, D, A>::data_type first_byte , typename basic_string<V, D, A>::size_type data_left ) noexcept
3182
3181
{
3183
3182
// Only Check the possibilities, that could appear
3184
3183
switch ( data_left )
3185
3184
{
3186
3185
default :
3187
- if ( ((unsigned char )first_byte & 0xFFu ) == 0xFEu ) // 11111110 seven bytes
3186
+ if ( ( (unsigned char )first_byte & 0xFFu ) == 0xFEu ) // 11111110 -> seven bytes
3188
3187
return 7 ;
3189
3188
case 6 :
3190
- if ( ((unsigned char )first_byte & 0xFEu ) == 0xFCu ) // 1111110X six bytes
3189
+ if ( ( (unsigned char )first_byte & 0xFEu ) == 0xFCu ) // 1111110X -> six bytes
3191
3190
return 6 ;
3192
3191
case 5 :
3193
- if ( ((unsigned char )first_byte & 0xFCu ) == 0xF8u ) // 111110XX five bytes
3192
+ if ( ( (unsigned char )first_byte & 0xFCu ) == 0xF8u ) // 111110XX -> five bytes
3194
3193
return 5 ;
3195
3194
case 4 :
3196
- if ( ((unsigned char )first_byte & 0xF8u ) == 0xF0u ) // 11110XXX four bytes
3195
+ if ( ( (unsigned char )first_byte & 0xF8u ) == 0xF0u ) // 11110XXX -> four bytes
3197
3196
return 4 ;
3198
3197
case 3 :
3199
- if ( ((unsigned char )first_byte & 0xF0u ) == 0xE0u ) // 1110XXXX three bytes
3198
+ if ( ( (unsigned char )first_byte & 0xF0u ) == 0xE0u ) // 1110XXXX -> three bytes
3200
3199
return 3 ;
3201
3200
case 2 :
3202
- if ( ((unsigned char )first_byte & 0xE0u ) == 0xC0u ) // 110XXXXX two bytes
3201
+ if ( ( (unsigned char )first_byte & 0xE0u ) == 0xC0u ) // 110XXXXX -> two bytes
3203
3202
return 2 ;
3204
3203
case 1 :
3205
3204
case 0 :
3206
- return 1 ;
3205
+ return 1 ; // one byte
3207
3206
}
3208
3207
}
3209
- #endif // !defined( TINY_UTF8_HAS_CLZ) || TINY_UTF8_HAS_CLZ == false
3208
+ #endif // !TINY_UTF8_HAS_CLZ
3210
3209
3211
3210
template <typename V, typename D, typename A>
3212
3211
basic_string<V, D, A>& basic_string<V, D, A>::operator =( const basic_string<V, D, A>& str ) noexcept (TINY_UTF8_NOEXCEPT)
0 commit comments