Skip to content

Commit 861604f

Browse files
committed
url: fast path ascii domains, do not run ToASCII
To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: nodejs#12965 Refs: nodejs#12966 Refs: whatwg/url#309
1 parent e86297c commit 861604f

File tree

4 files changed

+51
-68
lines changed

4 files changed

+51
-68
lines changed

node.gyp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@
192192
'src/spawn_sync.cc',
193193
'src/string_bytes.cc',
194194
'src/string_search.cc',
195-
'src/string_utils.cc',
196195
'src/stream_base.cc',
197196
'src/stream_wrap.cc',
198197
'src/tcp_wrap.cc',
@@ -629,7 +628,6 @@
629628
'<(OBJ_PATH)<(OBJ_SEPARATOR)util.<(OBJ_SUFFIX)',
630629
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_bytes.<(OBJ_SUFFIX)',
631630
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_search.<(OBJ_SUFFIX)',
632-
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_utils.<(OBJ_SUFFIX)',
633631
'<(OBJ_PATH)<(OBJ_SEPARATOR)stream_base.<(OBJ_SUFFIX)',
634632
'<(OBJ_PATH)<(OBJ_SEPARATOR)node_constants.<(OBJ_SUFFIX)',
635633
'<(OBJ_PATH)<(OBJ_SEPARATOR)node_revert.<(OBJ_SUFFIX)',

src/node_url.cc

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ enum url_error_cb_args {
131131
return str.length() >= 2 && name(str[0], str[1]); \
132132
}
133133

134-
CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z'))
135-
136-
CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z'))
137-
138134
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
139135
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
140136

@@ -865,9 +861,7 @@ static url_host_type ParseHost(url_host* host,
865861
if (!stringutils::ContainsNonAscii(buf, strlen(buf))) {
866862
// Lowercase ASCII domains
867863
for (size_t n = 0; n < decoded.size(); n++) {
868-
if (!IsLowerCaseASCII(decoded[n])) {
869-
decoded[n] = ASCIILowercase(decoded[n]);
870-
}
864+
decoded[n] = ASCIILowercase(decoded[n]);
871865
}
872866
} else {
873867
// Then we have to Unicode IDNA toASCII

src/string_utils.cc

Lines changed: 0 additions & 55 deletions
This file was deleted.

src/string_utils.h

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,59 @@
22
#ifndef SRC_STRING_UTILS_H_
33
#define SRC_STRING_UTILS_H_
44

5-
#include "env.h"
6-
#include "env-inl.h"
7-
#include "util.h"
5+
#include <cstddef>
6+
#include <cstdint>
87

98
namespace node {
109
namespace stringutils {
11-
bool ContainsNonAscii(const char* src, size_t len);
10+
inline static bool contains_non_ascii_slow(const char* buf, size_t len) {
11+
for (size_t i = 0; i < len; ++i) {
12+
if (buf[i] & 0x80)
13+
return true;
14+
}
15+
return false;
16+
}
17+
18+
inline bool ContainsNonAscii(const char* src, size_t len) {
19+
if (len < 16) {
20+
return contains_non_ascii_slow(src, len);
21+
}
22+
23+
const unsigned bytes_per_word = sizeof(uintptr_t);
24+
const unsigned align_mask = bytes_per_word - 1;
25+
const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
26+
27+
if (unaligned > 0) {
28+
const unsigned n = bytes_per_word - unaligned;
29+
if (contains_non_ascii_slow(src, n))
30+
return true;
31+
src += n;
32+
len -= n;
33+
}
34+
35+
36+
#if defined(_WIN64) || defined(_LP64)
37+
const uintptr_t mask = 0x8080808080808080ll;
38+
#else
39+
const uintptr_t mask = 0x80808080l;
40+
#endif
41+
42+
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
43+
44+
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
45+
if (srcw[i] & mask)
46+
return true;
47+
}
48+
49+
const unsigned remainder = len & align_mask;
50+
if (remainder > 0) {
51+
const size_t offset = len - remainder;
52+
if (contains_non_ascii_slow(src + offset, remainder))
53+
return true;
54+
}
55+
56+
return false;
57+
}
1258
} // namespace stringutils
1359
} // namespace node
1460

0 commit comments

Comments
 (0)