Bug Summary

File:out/../src/node_url.cc
Warning:line 962, column 11
Value stored to 'special_back_slash' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name node_url.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D NODE_ARCH="x64" -D NODE_PLATFORM="linux" -D NODE_WANT_INTERNALS=1 -D V8_DEPRECATION_WARNINGS=1 -D NODE_OPENSSL_SYSTEM_CERT_PATH="" -D NODE_USE_NODE_CODE_CACHE=1 -D HAVE_INSPECTOR=1 -D NODE_ENABLE_LARGE_CODE_PAGES=1 -D __POSIX__ -D NODE_USE_V8_PLATFORM=1 -D NODE_HAVE_I18N_SUPPORT=1 -D HAVE_OPENSSL=1 -D OPENSSL_API_COMPAT=0x10100000L -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_STATIC_IMPLEMENTATION=1 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -D _LARGEFILE_SOURCE -D _FILE_OFFSET_BITS=64 -D _POSIX_C_SOURCE=200112 -D NGHTTP2_STATICLIB -D NDEBUG -D OPENSSL_USE_NODELETE -D L_ENDIAN -D OPENSSL_BUILDING_OPENSSL -D AES_ASM -D BSAES_ASM -D CMLL_ASM -D ECP_NISTZ256_ASM -D GHASH_ASM -D KECCAK1600_ASM -D MD5_ASM -D OPENSSL_BN_ASM_GF2m -D OPENSSL_BN_ASM_MONT -D OPENSSL_BN_ASM_MONT5 -D OPENSSL_CPUID_OBJ -D OPENSSL_IA32_SSE2 -D PADLOCK_ASM -D POLY1305_ASM -D SHA1_ASM -D SHA256_ASM -D SHA512_ASM -D VPAES_ASM -D WHIRLPOOL_ASM -D X25519_ASM -D OPENSSL_PIC -D NGTCP2_STATICLIB -D NGHTTP3_STATICLIB -I ../src -I /home/maurizio/node-v18.6.0/out/Release/obj/gen -I /home/maurizio/node-v18.6.0/out/Release/obj/gen/include -I /home/maurizio/node-v18.6.0/out/Release/obj/gen/src -I ../deps/googletest/include -I ../deps/histogram/src -I ../deps/uvwasi/include -I ../deps/v8/include -I ../deps/icu-small/source/i18n -I ../deps/icu-small/source/common -I ../deps/zlib -I ../deps/llhttp/include -I ../deps/cares/include -I ../deps/uv/include -I ../deps/nghttp2/lib/includes -I ../deps/brotli/c/include -I ../deps/openssl/openssl/include -I ../deps/openssl/openssl/crypto/include -I ../deps/openssl/config/archs/linux-x86_64/asm/include -I ../deps/openssl/config/archs/linux-x86_64/asm -I ../deps/ngtcp2 -I ../deps/ngtcp2/ngtcp2/lib/includes -I ../deps/ngtcp2/ngtcp2/crypto/includes -I ../deps/ngtcp2/nghttp3/lib/includes -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-unused-parameter -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../src/node_url.cc
1#include "node_url.h"
2#include "base_object-inl.h"
3#include "node_errors.h"
4#include "node_external_reference.h"
5#include "node_i18n.h"
6#include "util-inl.h"
7
8#include <algorithm>
9#include <cmath>
10#include <cstdio>
11#include <numeric>
12#include <string>
13#include <vector>
14
15namespace node {
16
17using errors::TryCatchScope;
18
19using url::table_data::hex;
20using url::table_data::C0_CONTROL_ENCODE_SET;
21using url::table_data::FRAGMENT_ENCODE_SET;
22using url::table_data::PATH_ENCODE_SET;
23using url::table_data::USERINFO_ENCODE_SET;
24using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
25using url::table_data::QUERY_ENCODE_SET_SPECIAL;
26
27using v8::Array;
28using v8::Context;
29using v8::Function;
30using v8::FunctionCallbackInfo;
31using v8::HandleScope;
32using v8::Int32;
33using v8::Integer;
34using v8::Isolate;
35using v8::Local;
36using v8::MaybeLocal;
37using v8::NewStringType;
38using v8::Null;
39using v8::Object;
40using v8::String;
41using v8::Undefined;
42using v8::Value;
43
44Local<String> Utf8String(Isolate* isolate, const std::string& str) {
45 return String::NewFromUtf8(isolate,
46 str.data(),
47 NewStringType::kNormal,
48 str.length()).ToLocalChecked();
49}
50
51namespace url {
52namespace {
53
54// https://url.spec.whatwg.org/#eof-code-point
55constexpr char kEOL = -1;
56
57// https://url.spec.whatwg.org/#concept-host
58class URLHost {
59 public:
60 ~URLHost();
61
62 void ParseIPv4Host(const char* input, size_t length);
63 void ParseIPv6Host(const char* input, size_t length);
64 void ParseOpaqueHost(const char* input, size_t length);
65 void ParseHost(const char* input,
66 size_t length,
67 bool is_special,
68 bool unicode = false);
69
70 bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
71 std::string ToString() const;
72 // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
73 std::string ToStringMove();
74
75 private:
76 enum class HostType {
77 H_FAILED,
78 H_DOMAIN,
79 H_IPV4,
80 H_IPV6,
81 H_OPAQUE,
82 };
83
84 union Value {
85 std::string domain_or_opaque;
86 uint32_t ipv4;
87 uint16_t ipv6[8];
88
89 ~Value() {}
90 Value() : ipv4(0) {}
91 };
92
93 Value value_;
94 HostType type_ = HostType::H_FAILED;
95
96 void Reset() {
97 using string = std::string;
98 switch (type_) {
99 case HostType::H_DOMAIN:
100 case HostType::H_OPAQUE:
101 value_.domain_or_opaque.~string();
102 break;
103 default:
104 break;
105 }
106 type_ = HostType::H_FAILED;
107 }
108
109 // Setting the string members of the union with = is brittle because
110 // it relies on them being initialized to a state that requires no
111 // destruction of old data.
112 // For a long time, that worked well enough because ParseIPv6Host() happens
113 // to zero-fill `value_`, but that really is relying on standard library
114 // internals too much.
115 // These helpers are the easiest solution but we might want to consider
116 // just not forcing strings into an union.
117 void SetOpaque(std::string&& string) {
118 Reset();
119 type_ = HostType::H_OPAQUE;
120 new(&value_.domain_or_opaque) std::string(std::move(string));
121 }
122
123 void SetDomain(std::string&& string) {
124 Reset();
125 type_ = HostType::H_DOMAIN;
126 new(&value_.domain_or_opaque) std::string(std::move(string));
127 }
128};
129
130URLHost::~URLHost() {
131 Reset();
132}
133
134#define ARGS(XX)XX(ARG_FLAGS) XX(ARG_PROTOCOL) XX(ARG_USERNAME) XX(ARG_PASSWORD
) XX(ARG_HOST) XX(ARG_PORT) XX(ARG_PATH) XX(ARG_QUERY) XX(ARG_FRAGMENT
) XX(ARG_COUNT)
\
135 XX(ARG_FLAGS) \
136 XX(ARG_PROTOCOL) \
137 XX(ARG_USERNAME) \
138 XX(ARG_PASSWORD) \
139 XX(ARG_HOST) \
140 XX(ARG_PORT) \
141 XX(ARG_PATH) \
142 XX(ARG_QUERY) \
143 XX(ARG_FRAGMENT) \
144 XX(ARG_COUNT) // This one has to be last.
145
146enum url_cb_args {
147#define XX(name) name,
148 ARGS(XX)XX(ARG_FLAGS) XX(ARG_PROTOCOL) XX(ARG_USERNAME) XX(ARG_PASSWORD
) XX(ARG_HOST) XX(ARG_PORT) XX(ARG_PATH) XX(ARG_QUERY) XX(ARG_FRAGMENT
) XX(ARG_COUNT)
149#undef XX
150};
151
152#define TWO_CHAR_STRING_TEST(bits, name, expr) \
153 template <typename T> \
154 bool name(const T ch1, const T ch2) { \
155 static_assert(sizeof(ch1) >= (bits) / 8, \
156 "Character must be wider than " #bits " bits"); \
157 return (expr); \
158 } \
159 template <typename T> \
160 bool name(const std::basic_string<T>& str) { \
161 static_assert(sizeof(str[0]) >= (bits) / 8, \
162 "Character must be wider than " #bits " bits"); \
163 return str.length() >= 2 && name(str[0], str[1]); \
164 }
165
166// https://infra.spec.whatwg.org/#ascii-tab-or-newline
167CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))template <typename T> bool IsASCIITabOrNewline(const T ch
) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((ch == '\t' || ch == '\n' || ch == '\r'
)); }
168
169// https://infra.spec.whatwg.org/#c0-control
170CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f'))template <typename T> bool IsC0Control(const T ch) { static_assert
(sizeof(ch) >= (8) / 8, "Character must be wider than " "8"
" bits"); return ((ch >= '\0' && ch <= '\x1f')
); }
171
172// https://infra.spec.whatwg.org/#c0-control-or-space
173CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))template <typename T> bool IsC0ControlOrSpace(const T ch
) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((ch >= '\0' && ch <= ' '
)); }
174
175// https://infra.spec.whatwg.org/#ascii-digit
176CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))template <typename T> bool IsASCIIDigit(const T ch) { static_assert
(sizeof(ch) >= (8) / 8, "Character must be wider than " "8"
" bits"); return ((ch >= '0' && ch <= '9')); }
177
178// https://infra.spec.whatwg.org/#ascii-hex-digit
179CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||template <typename T> bool IsASCIIHexDigit(const T ch) {
static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((IsASCIIDigit(ch) || (ch >= 'A' &&
ch <= 'F') || (ch >= 'a' && ch <= 'f'))); }
180 (ch >= 'A' && ch <= 'F') ||template <typename T> bool IsASCIIHexDigit(const T ch) {
static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((IsASCIIDigit(ch) || (ch >= 'A' &&
ch <= 'F') || (ch >= 'a' && ch <= 'f'))); }
181 (ch >= 'a' && ch <= 'f')))template <typename T> bool IsASCIIHexDigit(const T ch) {
static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((IsASCIIDigit(ch) || (ch >= 'A' &&
ch <= 'F') || (ch >= 'a' && ch <= 'f'))); }
182
183// https://infra.spec.whatwg.org/#ascii-alpha
184CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||template <typename T> bool IsASCIIAlpha(const T ch) { static_assert
(sizeof(ch) >= (8) / 8, "Character must be wider than " "8"
" bits"); return (((ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z'))); }
185 (ch >= 'a' && ch <= 'z')))template <typename T> bool IsASCIIAlpha(const T ch) { static_assert
(sizeof(ch) >= (8) / 8, "Character must be wider than " "8"
" bits"); return (((ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z'))); }
186
187// https://infra.spec.whatwg.org/#ascii-alphanumeric
188CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))template <typename T> bool IsASCIIAlphanumeric(const T ch
) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return ((IsASCIIDigit(ch) || IsASCIIAlpha(ch))
); }
189
190// https://infra.spec.whatwg.org/#ascii-lowercase
191template <typename T>
192T ASCIILowercase(T ch) {
193 return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
194}
195
196// https://url.spec.whatwg.org/#forbidden-host-code-point
197CHAR_TEST(8,template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
198 IsForbiddenHostCodePoint,template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
199 ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' ||template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
200 ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' ||template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
201 ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
202 ch == '^' || ch == '|')template <typename T> bool IsForbiddenHostCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (ch == '\0' || ch == '\t' || ch == '\n'
|| ch == '\r' || ch == ' ' || ch == '#' || ch == '/' || ch ==
':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' ||
ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch ==
'|'); }
203
204// https://url.spec.whatwg.org/#forbidden-domain-code-point
205CHAR_TEST(8,template <typename T> bool IsForbiddenDomainCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (IsForbiddenHostCodePoint(ch) || IsC0Control
(ch) || ch == '%' || ch == '\x7f'); }
206 IsForbiddenDomainCodePoint,template <typename T> bool IsForbiddenDomainCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (IsForbiddenHostCodePoint(ch) || IsC0Control
(ch) || ch == '%' || ch == '\x7f'); }
207 IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' ||template <typename T> bool IsForbiddenDomainCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (IsForbiddenHostCodePoint(ch) || IsC0Control
(ch) || ch == '%' || ch == '\x7f'); }
208 ch == '\x7f')template <typename T> bool IsForbiddenDomainCodePoint(const
T ch) { static_assert(sizeof(ch) >= (8) / 8, "Character must be wider than "
"8" " bits"); return (IsForbiddenHostCodePoint(ch) || IsC0Control
(ch) || ch == '%' || ch == '\x7f'); }
209
210// https://url.spec.whatwg.org/#windows-drive-letter
211TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
212 (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
213
214// https://url.spec.whatwg.org/#normalized-windows-drive-letter
215TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
216 (IsASCIIAlpha(ch1) && ch2 == ':'))
217
218#undef TWO_CHAR_STRING_TEST
219
220bool BitAt(const uint8_t a[], const uint8_t i) {
221 return !!(a[i >> 3] & (1 << (i & 7)));
222}
223
224// Appends ch to str. If ch position in encode_set is set, the ch will
225// be percent-encoded then appended.
226void AppendOrEscape(std::string* str,
227 const unsigned char ch,
228 const uint8_t encode_set[]) {
229 if (BitAt(encode_set, ch))
230 *str += hex + ch * 4; // "%XX\0" has a length of 4
231 else
232 *str += ch;
233}
234
235unsigned hex2bin(const char ch) {
236 if (ch >= '0' && ch <= '9')
237 return ch - '0';
238 if (ch >= 'A' && ch <= 'F')
239 return 10 + (ch - 'A');
240 if (ch >= 'a' && ch <= 'f')
241 return 10 + (ch - 'a');
242 UNREACHABLE()do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "242", "\"Unreachable code reached\"", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0)
;
243}
244
245std::string PercentDecode(const char* input, size_t len) {
246 std::string dest;
247 if (len == 0)
248 return dest;
249 dest.reserve(len);
250 const char* pointer = input;
251 const char* end = input + len;
252
253 while (pointer < end) {
254 const char ch = pointer[0];
255 size_t remaining = end - pointer - 1;
256 if (ch != '%' || remaining < 2 ||
257 (ch == '%' &&
258 (!IsASCIIHexDigit(pointer[1]) ||
259 !IsASCIIHexDigit(pointer[2])))) {
260 dest += ch;
261 pointer++;
262 continue;
263 } else {
264 unsigned a = hex2bin(pointer[1]);
265 unsigned b = hex2bin(pointer[2]);
266 char c = static_cast<char>(a * 16 + b);
267 dest += c;
268 pointer += 3;
269 }
270 }
271 return dest;
272}
273
274#define SPECIALS(XX)XX(ftp, 21, "ftp:") XX(file, -1, "file:") XX(http, 80, "http:"
) XX(https, 443, "https:") XX(ws, 80, "ws:") XX(wss, 443, "wss:"
)
\
275 XX(ftp, 21, "ftp:") \
276 XX(file, -1, "file:") \
277 XX(http, 80, "http:") \
278 XX(https, 443, "https:") \
279 XX(ws, 80, "ws:") \
280 XX(wss, 443, "wss:")
281
282bool IsSpecial(const std::string& scheme) {
283#define V(_, __, name) if (scheme == name) return true;
284 SPECIALS(V)V(ftp, 21, "ftp:") V(file, -1, "file:") V(http, 80, "http:") V
(https, 443, "https:") V(ws, 80, "ws:") V(wss, 443, "wss:")
;
285#undef V
286 return false;
287}
288
289Local<String> GetSpecial(Environment* env, const std::string& scheme) {
290#define V(key, _, name) if (scheme == name) \
291 return env->url_special_##key##_string();
292 SPECIALS(V)V(ftp, 21, "ftp:") V(file, -1, "file:") V(http, 80, "http:") V
(https, 443, "https:") V(ws, 80, "ws:") V(wss, 443, "wss:")
293#undef V
294 UNREACHABLE()do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "294", "\"Unreachable code reached\"", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0)
;
295}
296
297int NormalizePort(const std::string& scheme, int p) {
298#define V(_, port, name) if (scheme == name && p == port) return -1;
299 SPECIALS(V)V(ftp, 21, "ftp:") V(file, -1, "file:") V(http, 80, "http:") V
(https, 443, "https:") V(ws, 80, "ws:") V(wss, 443, "wss:")
;
300#undef V
301 return p;
302}
303
304// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
305bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
306 size_t length = end - p;
307 return length >= 2 &&
308 IsWindowsDriveLetter(p[0], p[1]) &&
309 (length == 2 ||
310 p[2] == '/' ||
311 p[2] == '\\' ||
312 p[2] == '?' ||
313 p[2] == '#');
314}
315
316#if defined(NODE_HAVE_I18N_SUPPORT1)
317bool ToUnicode(const std::string& input, std::string* output) {
318 MaybeStackBuffer<char> buf;
319 if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
320 return false;
321 output->assign(*buf, buf.length());
322 return true;
323}
324
325bool ToASCII(const std::string& input, std::string* output) {
326 MaybeStackBuffer<char> buf;
327 if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
328 return false;
329 if (buf.length() == 0)
330 return false;
331 output->assign(*buf, buf.length());
332 return true;
333}
334#else // !defined(NODE_HAVE_I18N_SUPPORT)
335// Intentional non-ops if ICU is not present.
336bool ToUnicode(const std::string& input, std::string* output) {
337 *output = input;
338 return true;
339}
340
341bool ToASCII(const std::string& input, std::string* output) {
342 *output = input;
343 return true;
344}
345#endif // !defined(NODE_HAVE_I18N_SUPPORT)
346
347#define NS_IN6ADDRSZ16 16
348
349void URLHost::ParseIPv6Host(const char* input, size_t length) {
350 CHECK_EQ(type_, HostType::H_FAILED)do { if (__builtin_expect(!!(!((type_) == (HostType::H_FAILED
))), 0)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "350", "(type_) == (HostType::H_FAILED)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
351
352 unsigned char buf[sizeof(struct in6_addr)];
353 MaybeStackBuffer<char> ipv6(length + 1);
354 *(*ipv6 + length) = 0;
355 memset(buf, 0, sizeof(buf));
356 memcpy(*ipv6, input, sizeof(const char) * length);
357
358 int ret = uv_inet_pton(AF_INET610, *ipv6, buf);
359
360 if (ret != 0) {
361 return;
362 }
363
364 // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
365 for (int i = 0; i < NS_IN6ADDRSZ16; i += 2) {
366 value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
367 }
368
369 type_ = HostType::H_IPV6;
370}
371
372// https://url.spec.whatwg.org/#ipv4-number-parser
373int64_t ParseIPv4Number(const char* start, const char* end) {
374 if (end - start == 0) return -1;
375
376 unsigned R = 10;
377 if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
378 start += 2;
379 R = 16;
380 } else if (end - start >= 2 && start[0] == '0') {
381 start++;
382 R = 8;
383 }
384
385 if (end - start == 0) return 0;
386
387 const char* p = start;
388
389 while (p < end) {
390 const char ch = p[0];
391 switch (R) {
392 case 8:
393 if (ch < '0' || ch > '7')
394 return -1;
395 break;
396 case 10:
397 if (!IsASCIIDigit(ch))
398 return -1;
399 break;
400 case 16:
401 if (!IsASCIIHexDigit(ch))
402 return -1;
403 break;
404 }
405 p++;
406 }
407 return strtoll(start, nullptr, R);
408}
409
410// https://url.spec.whatwg.org/#ends-in-a-number-checker
411bool EndsInANumber(const std::string& input) {
412 std::vector<std::string> parts = SplitString(input, '.', false);
413
414 if (parts.empty()) return false;
415
416 if (parts.back().empty()) {
417 if (parts.size() == 1) return false;
418 parts.pop_back();
419 }
420
421 const std::string& last = parts.back();
422
423 // If last is non-empty and contains only ASCII digits, then return true
424 if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) {
425 return true;
426 }
427
428 const char* last_str = last.c_str();
429 int64_t num = ParseIPv4Number(last_str, last_str + last.size());
430 if (num >= 0) return true;
431
432 return false;
433}
434
435void URLHost::ParseIPv4Host(const char* input, size_t length) {
436 CHECK_EQ(type_, HostType::H_FAILED)do { if (__builtin_expect(!!(!((type_) == (HostType::H_FAILED
))), 0)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "436", "(type_) == (HostType::H_FAILED)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
437 const char* pointer = input;
438 const char* mark = input;
439 const char* end = pointer + length;
440 int parts = 0;
441 uint32_t val = 0;
442 uint64_t numbers[4];
443 int tooBigNumbers = 0;
444 if (length == 0)
445 return;
446
447 while (pointer <= end) {
448 const char ch = pointer < end ? pointer[0] : kEOL;
449 int64_t remaining = end - pointer - 1;
450 if (ch == '.' || ch == kEOL) {
451 if (++parts > static_cast<int>(arraysize(numbers)))
452 return;
453 if (pointer == mark)
454 return;
455 int64_t n = ParseIPv4Number(mark, pointer);
456 if (n < 0)
457 return;
458
459 if (n > 255) {
460 tooBigNumbers++;
461 }
462 numbers[parts - 1] = n;
463 mark = pointer + 1;
464 if (ch == '.' && remaining == 0)
465 break;
466 }
467 pointer++;
468 }
469 CHECK_GT(parts, 0)do { if (__builtin_expect(!!(!((parts) > (0))), 0)) { do {
static const node::AssertionInfo args = { "../src/node_url.cc"
":" "469", "(parts) > (0)", __PRETTY_FUNCTION__ }; node::
Assert(args); } while (0); } } while (0)
;
470
471 // If any but the last item in numbers is greater than 255, return failure.
472 // If the last item in numbers is greater than or equal to
473 // 256^(5 - the number of items in numbers), return failure.
474 if (tooBigNumbers > 1 ||
475 (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
476 numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
477 return;
478 }
479
480 type_ = HostType::H_IPV4;
481 val = static_cast<uint32_t>(numbers[parts - 1]);
482 for (int n = 0; n < parts - 1; n++) {
483 double b = 3 - n;
484 val +=
485 static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
486 }
487
488 value_.ipv4 = val;
489}
490
491void URLHost::ParseOpaqueHost(const char* input, size_t length) {
492 CHECK_EQ(type_, HostType::H_FAILED)do { if (__builtin_expect(!!(!((type_) == (HostType::H_FAILED
))), 0)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "492", "(type_) == (HostType::H_FAILED)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
493 std::string output;
494 output.reserve(length);
495 for (size_t i = 0; i < length; i++) {
496 const char ch = input[i];
497 if (IsForbiddenHostCodePoint(ch)) {
498 return;
499 } else {
500 AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
501 }
502 }
503
504 SetOpaque(std::move(output));
505}
506
507void URLHost::ParseHost(const char* input,
508 size_t length,
509 bool is_special,
510 bool unicode) {
511 CHECK_EQ(type_, HostType::H_FAILED)do { if (__builtin_expect(!!(!((type_) == (HostType::H_FAILED
))), 0)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "511", "(type_) == (HostType::H_FAILED)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
512 const char* pointer = input;
513
514 if (length == 0)
515 return;
516
517 if (pointer[0] == '[') {
518 if (pointer[length - 1] != ']')
519 return;
520 return ParseIPv6Host(++pointer, length - 2);
521 }
522
523 if (!is_special)
524 return ParseOpaqueHost(input, length);
525
526 // First, we have to percent decode
527 std::string decoded = PercentDecode(input, length);
528
529 // Then we have to punycode toASCII
530 if (!ToASCII(decoded, &decoded))
531 return;
532
533 // If any of the following characters are still present, we have to fail
534 for (size_t n = 0; n < decoded.size(); n++) {
535 const char ch = decoded[n];
536 if (IsForbiddenDomainCodePoint(ch)) {
537 return;
538 }
539 }
540
541 // If domain ends in a number, then return the result of IPv4 parsing domain
542 if (EndsInANumber(decoded)) {
543 return ParseIPv4Host(decoded.c_str(), decoded.length());
544 }
545
546 // If the unicode flag is set, run the result through punycode ToUnicode
547 if (unicode && !ToUnicode(decoded, &decoded))
548 return;
549
550 // It's not an IPv4 or IPv6 address, it must be a domain
551 SetDomain(std::move(decoded));
552}
553
554// Locates the longest sequence of 0 segments in an IPv6 address
555// in order to use the :: compression when serializing
556template <typename T>
557T* FindLongestZeroSequence(T* values, size_t len) {
558 T* start = values;
559 T* end = start + len;
560 T* result = nullptr;
561
562 T* current = nullptr;
563 unsigned counter = 0, longest = 1;
564
565 while (start < end) {
566 if (*start == 0) {
567 if (current == nullptr)
568 current = start;
569 counter++;
570 } else {
571 if (counter > longest) {
572 longest = counter;
573 result = current;
574 }
575 counter = 0;
576 current = nullptr;
577 }
578 start++;
579 }
580 if (counter > longest)
581 result = current;
582 return result;
583}
584
585std::string URLHost::ToStringMove() {
586 std::string return_value;
587 switch (type_) {
588 case HostType::H_DOMAIN:
589 case HostType::H_OPAQUE:
590 return_value = std::move(value_.domain_or_opaque);
591 break;
592 default:
593 return_value = ToString();
594 break;
595 }
596 Reset();
597 return return_value;
598}
599
600std::string URLHost::ToString() const {
601 std::string dest;
602 switch (type_) {
603 case HostType::H_DOMAIN:
604 case HostType::H_OPAQUE:
605 return value_.domain_or_opaque;
606 case HostType::H_IPV4: {
607 dest.reserve(15);
608 uint32_t value = value_.ipv4;
609 for (int n = 0; n < 4; n++) {
610 dest.insert(0, std::to_string(value % 256));
611 if (n < 3)
612 dest.insert(0, 1, '.');
613 value /= 256;
614 }
615 break;
616 }
617 case HostType::H_IPV6: {
618 dest.reserve(41);
619 dest += '[';
620 const uint16_t* start = &value_.ipv6[0];
621 const uint16_t* compress_pointer =
622 FindLongestZeroSequence(start, 8);
623 bool ignore0 = false;
624 for (int n = 0; n <= 7; n++) {
625 const uint16_t* piece = &value_.ipv6[n];
626 if (ignore0 && *piece == 0)
627 continue;
628 else if (ignore0)
629 ignore0 = false;
630 if (compress_pointer == piece) {
631 dest += n == 0 ? "::" : ":";
632 ignore0 = true;
633 continue;
634 }
635 char buf[5];
636 snprintf(buf, sizeof(buf), "%x", *piece);
637 dest += buf;
638 if (n < 7)
639 dest += ':';
640 }
641 dest += ']';
642 break;
643 }
644 case HostType::H_FAILED:
645 break;
646 }
647 return dest;
648}
649
650bool ParseHost(const std::string& input,
651 std::string* output,
652 bool is_special,
653 bool unicode = false) {
654 if (input.empty()) {
655 output->clear();
656 return true;
657 }
658 URLHost host;
659 host.ParseHost(input.c_str(), input.length(), is_special, unicode);
660 if (host.ParsingFailed())
661 return false;
662 *output = host.ToStringMove();
663 return true;
664}
665
666std::vector<std::string> FromJSStringArray(Environment* env,
667 Local<Array> array) {
668 std::vector<std::string> vec;
669 if (array->Length() > 0)
670 vec.reserve(array->Length());
671 for (size_t n = 0; n < array->Length(); n++) {
672 Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
673 if (val->IsString()) {
674 Utf8Value value(env->isolate(), val.As<String>());
675 vec.emplace_back(*value, value.length());
676 }
677 }
678 return vec;
679}
680
681url_data HarvestBase(Environment* env, Local<Object> base_obj) {
682 url_data base;
683 Local<Context> context = env->context();
684
685 Local<Value> flags =
686 base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
687 if (flags->IsInt32())
688 base.flags = flags->Int32Value(context).FromJust();
689
690 Local<Value> port =
691 base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
692 if (port->IsInt32())
693 base.port = port->Int32Value(context).FromJust();
694
695 Local<Value> scheme =
696 base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
697 base.scheme = Utf8Value(env->isolate(), scheme).out();
698
699 auto GetStr = [&](std::string url_data::*member,
700 int flag,
701 Local<String> name,
702 bool empty_as_present) {
703 Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
704 if (value->IsString()) {
705 Utf8Value utf8value(env->isolate(), value.As<String>());
706 (base.*member).assign(*utf8value, utf8value.length());
707 if (empty_as_present || value.As<String>()->Length() != 0) {
708 base.flags |= flag;
709 }
710 }
711 };
712 GetStr(&url_data::username,
713 URL_FLAGS_HAS_USERNAME,
714 env->username_string(),
715 false);
716 GetStr(&url_data::password,
717 URL_FLAGS_HAS_PASSWORD,
718 env->password_string(),
719 false);
720 GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
721 GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
722 GetStr(&url_data::fragment,
723 URL_FLAGS_HAS_FRAGMENT,
724 env->fragment_string(),
725 true);
726
727 Local<Value>
728 path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
729 if (path->IsArray()) {
730 base.flags |= URL_FLAGS_HAS_PATH;
731 base.path = FromJSStringArray(env, path.As<Array>());
732 }
733 return base;
734}
735
736url_data HarvestContext(Environment* env, Local<Object> context_obj) {
737 url_data context;
738 Local<Value> flags =
739 context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
740 if (flags->IsInt32()) {
741 static constexpr int32_t kCopyFlagsMask =
742 URL_FLAGS_SPECIAL |
743 URL_FLAGS_CANNOT_BE_BASE |
744 URL_FLAGS_HAS_USERNAME |
745 URL_FLAGS_HAS_PASSWORD |
746 URL_FLAGS_HAS_HOST;
747 context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
748 }
749 Local<Value> scheme =
750 context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
751 if (scheme->IsString()) {
752 Utf8Value value(env->isolate(), scheme);
753 context.scheme.assign(*value, value.length());
754 }
755 Local<Value> port =
756 context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
757 if (port->IsInt32())
758 context.port = port.As<Int32>()->Value();
759 if (context.flags & URL_FLAGS_HAS_USERNAME) {
760 Local<Value> username =
761 context_obj->Get(env->context(),
762 env->username_string()).ToLocalChecked();
763 CHECK(username->IsString())do { if (__builtin_expect(!!(!(username->IsString())), 0))
{ do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "763", "username->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
764 Utf8Value value(env->isolate(), username);
765 context.username.assign(*value, value.length());
766 }
767 if (context.flags & URL_FLAGS_HAS_PASSWORD) {
768 Local<Value> password =
769 context_obj->Get(env->context(),
770 env->password_string()).ToLocalChecked();
771 CHECK(password->IsString())do { if (__builtin_expect(!!(!(password->IsString())), 0))
{ do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "771", "password->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
772 Utf8Value value(env->isolate(), password);
773 context.password.assign(*value, value.length());
774 }
775 Local<Value> host =
776 context_obj->Get(env->context(),
777 env->host_string()).ToLocalChecked();
778 if (host->IsString()) {
779 Utf8Value value(env->isolate(), host);
780 context.host.assign(*value, value.length());
781 }
782 return context;
783}
784
785// Single dot segment can be ".", "%2e", or "%2E"
786bool IsSingleDotSegment(const std::string& str) {
787 switch (str.size()) {
788 case 1:
789 return str == ".";
790 case 3:
791 return str[0] == '%' &&
792 str[1] == '2' &&
793 ASCIILowercase(str[2]) == 'e';
794 default:
795 return false;
796 }
797}
798
799// Double dot segment can be:
800// "..", ".%2e", ".%2E", "%2e.", "%2E.",
801// "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
802bool IsDoubleDotSegment(const std::string& str) {
803 switch (str.size()) {
804 case 2:
805 return str == "..";
806 case 4:
807 if (str[0] != '.' && str[0] != '%')
808 return false;
809 return ((str[0] == '.' &&
810 str[1] == '%' &&
811 str[2] == '2' &&
812 ASCIILowercase(str[3]) == 'e') ||
813 (str[0] == '%' &&
814 str[1] == '2' &&
815 ASCIILowercase(str[2]) == 'e' &&
816 str[3] == '.'));
817 case 6:
818 return (str[0] == '%' &&
819 str[1] == '2' &&
820 ASCIILowercase(str[2]) == 'e' &&
821 str[3] == '%' &&
822 str[4] == '2' &&
823 ASCIILowercase(str[5]) == 'e');
824 default:
825 return false;
826 }
827}
828
829void ShortenUrlPath(struct url_data* url) {
830 if (url->path.empty()) return;
831 if (url->path.size() == 1 && url->scheme == "file:" &&
832 IsNormalizedWindowsDriveLetter(url->path[0])) return;
833 url->path.pop_back();
834}
835
836} // anonymous namespace
837
838void URL::Parse(const char* input,
839 size_t len,
840 enum url_parse_state state_override,
841 struct url_data* url,
842 bool has_url,
843 const struct url_data* base,
844 bool has_base) {
845 const char* p = input;
846 const char* end = input + len;
847
848 if (!has_url) {
849 for (const char* ptr = p; ptr < end; ptr++) {
850 if (IsC0ControlOrSpace(*ptr))
851 p++;
852 else
853 break;
854 }
855 for (const char* ptr = end - 1; ptr >= p; ptr--) {
856 if (IsC0ControlOrSpace(*ptr))
857 end--;
858 else
859 break;
860 }
861 input = p;
862 len = end - p;
863 }
864
865 // The spec says we should strip out any ASCII tabs or newlines.
866 // In those cases, we create another std::string instance with the filtered
867 // contents, but in the general case we avoid the overhead.
868 std::string whitespace_stripped;
869 for (const char* ptr = p; ptr < end; ptr++) {
870 if (!IsASCIITabOrNewline(*ptr))
871 continue;
872 // Hit tab or newline. Allocate storage, copy what we have until now,
873 // and then iterate and filter all similar characters out.
874 whitespace_stripped.reserve(len - 1);
875 whitespace_stripped.assign(p, ptr - p);
876 // 'ptr + 1' skips the current char, which we know to be tab or newline.
877 for (ptr = ptr + 1; ptr < end; ptr++) {
878 if (!IsASCIITabOrNewline(*ptr))
879 whitespace_stripped += *ptr;
880 }
881
882 // Update variables like they should have looked like if the string
883 // had been stripped of whitespace to begin with.
884 input = whitespace_stripped.c_str();
885 len = whitespace_stripped.size();
886 p = input;
887 end = input + len;
888 break;
889 }
890
891 bool atflag = false; // Set when @ has been seen.
892 bool square_bracket_flag = false; // Set inside of [...]
893 bool password_token_seen_flag = false; // Set after a : after an username.
894
895 std::string buffer;
896
897 // Set the initial parse state.
898 const bool has_state_override = state_override != kUnknownState;
899 enum url_parse_state state = has_state_override ? state_override :
900 kSchemeStart;
901
902 if (state < kSchemeStart || state > kFragment) {
903 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
904 return;
905 }
906
907 while (p <= end) {
908 const char ch = p < end ? p[0] : kEOL;
909 bool special = (url->flags & URL_FLAGS_SPECIAL);
910 bool cannot_be_base;
911 bool special_back_slash = (special && ch == '\\');
912
913 switch (state) {
914 case kSchemeStart:
915 if (IsASCIIAlpha(ch)) {
916 buffer += ASCIILowercase(ch);
917 state = kScheme;
918 } else if (!has_state_override) {
919 state = kNoScheme;
920 continue;
921 } else {
922 url->flags |= URL_FLAGS_FAILED;
923 return;
924 }
925 break;
926 case kScheme:
927 if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
928 buffer += ASCIILowercase(ch);
929 } else if (ch == ':' || (has_state_override && ch == kEOL)) {
930 if (has_state_override && buffer.size() == 0) {
931 url->flags |= URL_FLAGS_TERMINATED;
932 return;
933 }
934 buffer += ':';
935
936 bool new_is_special = IsSpecial(buffer);
937
938 if (has_state_override) {
939 if ((special != new_is_special) ||
940 ((buffer == "file:") &&
941 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
942 (url->flags & URL_FLAGS_HAS_PASSWORD) ||
943 (url->port != -1))) ||
944 (url->scheme == "file:" && url->host.empty())) {
945 url->flags |= URL_FLAGS_TERMINATED;
946 return;
947 }
948 }
949
950 url->scheme = std::move(buffer);
951 url->port = NormalizePort(url->scheme, url->port);
952 if (new_is_special) {
953 url->flags |= URL_FLAGS_SPECIAL;
954 special = true;
955 } else {
956 url->flags &= ~URL_FLAGS_SPECIAL;
957 special = false;
958 }
959 // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
960 // here always not equals to `\\`. So `special_back_slash` here always
961 // equals to `false`.
962 special_back_slash = false;
Value stored to 'special_back_slash' is never read
963 buffer.clear();
964 if (has_state_override)
965 return;
966 if (url->scheme == "file:") {
967 state = kFile;
968 } else if (special &&
969 has_base &&
970 url->scheme == base->scheme) {
971 state = kSpecialRelativeOrAuthority;
972 } else if (special) {
973 state = kSpecialAuthoritySlashes;
974 } else if (p + 1 < end && p[1] == '/') {
975 state = kPathOrAuthority;
976 p++;
977 } else {
978 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
979 url->flags |= URL_FLAGS_HAS_PATH;
980 url->path.emplace_back("");
981 state = kCannotBeBase;
982 }
983 } else if (!has_state_override) {
984 buffer.clear();
985 state = kNoScheme;
986 p = input;
987 continue;
988 } else {
989 url->flags |= URL_FLAGS_FAILED;
990 return;
991 }
992 break;
993 case kNoScheme:
994 cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
995 if (!has_base || (cannot_be_base && ch != '#')) {
996 url->flags |= URL_FLAGS_FAILED;
997 return;
998 } else if (cannot_be_base && ch == '#') {
999 url->scheme = base->scheme;
1000 if (IsSpecial(url->scheme)) {
1001 url->flags |= URL_FLAGS_SPECIAL;
1002 special = true;
1003 } else {
1004 url->flags &= ~URL_FLAGS_SPECIAL;
1005 special = false;
1006 }
1007 special_back_slash = (special && ch == '\\');
1008 if (base->flags & URL_FLAGS_HAS_PATH) {
1009 url->flags |= URL_FLAGS_HAS_PATH;
1010 url->path = base->path;
1011 }
1012 if (base->flags & URL_FLAGS_HAS_QUERY) {
1013 url->flags |= URL_FLAGS_HAS_QUERY;
1014 url->query = base->query;
1015 }
1016 if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1017 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1018 url->fragment = base->fragment;
1019 }
1020 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1021 state = kFragment;
1022 } else if (has_base &&
1023 base->scheme != "file:") {
1024 state = kRelative;
1025 continue;
1026 } else {
1027 url->scheme = "file:";
1028 url->flags |= URL_FLAGS_SPECIAL;
1029 special = true;
1030 state = kFile;
1031 special_back_slash = (special && ch == '\\');
1032 continue;
1033 }
1034 break;
1035 case kSpecialRelativeOrAuthority:
1036 if (ch == '/' && p + 1 < end && p[1] == '/') {
1037 state = kSpecialAuthorityIgnoreSlashes;
1038 p++;
1039 } else {
1040 state = kRelative;
1041 continue;
1042 }
1043 break;
1044 case kPathOrAuthority:
1045 if (ch == '/') {
1046 state = kAuthority;
1047 } else {
1048 state = kPath;
1049 continue;
1050 }
1051 break;
1052 case kRelative:
1053 url->scheme = base->scheme;
1054 if (IsSpecial(url->scheme)) {
1055 url->flags |= URL_FLAGS_SPECIAL;
1056 special = true;
1057 } else {
1058 url->flags &= ~URL_FLAGS_SPECIAL;
1059 special = false;
1060 }
1061 special_back_slash = (special && ch == '\\');
1062 switch (ch) {
1063 case kEOL:
1064 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1065 url->flags |= URL_FLAGS_HAS_USERNAME;
1066 url->username = base->username;
1067 }
1068 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1069 url->flags |= URL_FLAGS_HAS_PASSWORD;
1070 url->password = base->password;
1071 }
1072 if (base->flags & URL_FLAGS_HAS_HOST) {
1073 url->flags |= URL_FLAGS_HAS_HOST;
1074 url->host = base->host;
1075 }
1076 if (base->flags & URL_FLAGS_HAS_QUERY) {
1077 url->flags |= URL_FLAGS_HAS_QUERY;
1078 url->query = base->query;
1079 }
1080 if (base->flags & URL_FLAGS_HAS_PATH) {
1081 url->flags |= URL_FLAGS_HAS_PATH;
1082 url->path = base->path;
1083 }
1084 url->port = base->port;
1085 break;
1086 case '/':
1087 state = kRelativeSlash;
1088 break;
1089 case '?':
1090 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1091 url->flags |= URL_FLAGS_HAS_USERNAME;
1092 url->username = base->username;
1093 }
1094 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1095 url->flags |= URL_FLAGS_HAS_PASSWORD;
1096 url->password = base->password;
1097 }
1098 if (base->flags & URL_FLAGS_HAS_HOST) {
1099 url->flags |= URL_FLAGS_HAS_HOST;
1100 url->host = base->host;
1101 }
1102 if (base->flags & URL_FLAGS_HAS_PATH) {
1103 url->flags |= URL_FLAGS_HAS_PATH;
1104 url->path = base->path;
1105 }
1106 url->port = base->port;
1107 state = kQuery;
1108 break;
1109 case '#':
1110 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1111 url->flags |= URL_FLAGS_HAS_USERNAME;
1112 url->username = base->username;
1113 }
1114 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1115 url->flags |= URL_FLAGS_HAS_PASSWORD;
1116 url->password = base->password;
1117 }
1118 if (base->flags & URL_FLAGS_HAS_HOST) {
1119 url->flags |= URL_FLAGS_HAS_HOST;
1120 url->host = base->host;
1121 }
1122 if (base->flags & URL_FLAGS_HAS_QUERY) {
1123 url->flags |= URL_FLAGS_HAS_QUERY;
1124 url->query = base->query;
1125 }
1126 if (base->flags & URL_FLAGS_HAS_PATH) {
1127 url->flags |= URL_FLAGS_HAS_PATH;
1128 url->path = base->path;
1129 }
1130 url->port = base->port;
1131 state = kFragment;
1132 break;
1133 default:
1134 if (special_back_slash) {
1135 state = kRelativeSlash;
1136 } else {
1137 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1138 url->flags |= URL_FLAGS_HAS_USERNAME;
1139 url->username = base->username;
1140 }
1141 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1142 url->flags |= URL_FLAGS_HAS_PASSWORD;
1143 url->password = base->password;
1144 }
1145 if (base->flags & URL_FLAGS_HAS_HOST) {
1146 url->flags |= URL_FLAGS_HAS_HOST;
1147 url->host = base->host;
1148 }
1149 if (base->flags & URL_FLAGS_HAS_PATH) {
1150 url->flags |= URL_FLAGS_HAS_PATH;
1151 url->path = base->path;
1152 ShortenUrlPath(url);
1153 }
1154 url->port = base->port;
1155 state = kPath;
1156 continue;
1157 }
1158 }
1159 break;
1160 case kRelativeSlash:
1161 if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1162 state = kSpecialAuthorityIgnoreSlashes;
1163 } else if (ch == '/') {
1164 state = kAuthority;
1165 } else {
1166 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1167 url->flags |= URL_FLAGS_HAS_USERNAME;
1168 url->username = base->username;
1169 }
1170 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1171 url->flags |= URL_FLAGS_HAS_PASSWORD;
1172 url->password = base->password;
1173 }
1174 if (base->flags & URL_FLAGS_HAS_HOST) {
1175 url->flags |= URL_FLAGS_HAS_HOST;
1176 url->host = base->host;
1177 }
1178 url->port = base->port;
1179 state = kPath;
1180 continue;
1181 }
1182 break;
1183 case kSpecialAuthoritySlashes:
1184 state = kSpecialAuthorityIgnoreSlashes;
1185 if (ch == '/' && p + 1 < end && p[1] == '/') {
1186 p++;
1187 } else {
1188 continue;
1189 }
1190 break;
1191 case kSpecialAuthorityIgnoreSlashes:
1192 if (ch != '/' && ch != '\\') {
1193 state = kAuthority;
1194 continue;
1195 }
1196 break;
1197 case kAuthority:
1198 if (ch == '@') {
1199 if (atflag) {
1200 buffer.reserve(buffer.size() + 3);
1201 buffer.insert(0, "%40");
1202 }
1203 atflag = true;
1204 size_t blen = buffer.size();
1205 if (blen > 0 && buffer[0] != ':') {
1206 url->flags |= URL_FLAGS_HAS_USERNAME;
1207 }
1208 for (size_t n = 0; n < blen; n++) {
1209 const char bch = buffer[n];
1210 if (bch == ':') {
1211 url->flags |= URL_FLAGS_HAS_PASSWORD;
1212 if (!password_token_seen_flag) {
1213 password_token_seen_flag = true;
1214 continue;
1215 }
1216 }
1217 if (password_token_seen_flag) {
1218 AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1219 } else {
1220 AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1221 }
1222 }
1223 buffer.clear();
1224 } else if (ch == kEOL ||
1225 ch == '/' ||
1226 ch == '?' ||
1227 ch == '#' ||
1228 special_back_slash) {
1229 if (atflag && buffer.size() == 0) {
1230 url->flags |= URL_FLAGS_FAILED;
1231 return;
1232 }
1233 p -= buffer.size() + 1;
1234 buffer.clear();
1235 state = kHost;
1236 } else {
1237 buffer += ch;
1238 }
1239 break;
1240 case kHost:
1241 case kHostname:
1242 if (has_state_override && url->scheme == "file:") {
1243 state = kFileHost;
1244 continue;
1245 } else if (ch == ':' && !square_bracket_flag) {
1246 if (buffer.size() == 0) {
1247 url->flags |= URL_FLAGS_FAILED;
1248 return;
1249 }
1250 if (state_override == kHostname) {
1251 return;
1252 }
1253 url->flags |= URL_FLAGS_HAS_HOST;
1254 if (!ParseHost(buffer, &url->host, special)) {
1255 url->flags |= URL_FLAGS_FAILED;
1256 return;
1257 }
1258 buffer.clear();
1259 state = kPort;
1260 } else if (ch == kEOL ||
1261 ch == '/' ||
1262 ch == '?' ||
1263 ch == '#' ||
1264 special_back_slash) {
1265 p--;
1266 if (special && buffer.size() == 0) {
1267 url->flags |= URL_FLAGS_FAILED;
1268 return;
1269 }
1270 if (has_state_override &&
1271 buffer.size() == 0 &&
1272 ((url->username.size() > 0 || url->password.size() > 0) ||
1273 url->port != -1)) {
1274 url->flags |= URL_FLAGS_TERMINATED;
1275 return;
1276 }
1277 url->flags |= URL_FLAGS_HAS_HOST;
1278 if (!ParseHost(buffer, &url->host, special)) {
1279 url->flags |= URL_FLAGS_FAILED;
1280 return;
1281 }
1282 buffer.clear();
1283 state = kPathStart;
1284 if (has_state_override) {
1285 return;
1286 }
1287 } else {
1288 if (ch == '[')
1289 square_bracket_flag = true;
1290 if (ch == ']')
1291 square_bracket_flag = false;
1292 buffer += ch;
1293 }
1294 break;
1295 case kPort:
1296 if (IsASCIIDigit(ch)) {
1297 buffer += ch;
1298 } else if (has_state_override ||
1299 ch == kEOL ||
1300 ch == '/' ||
1301 ch == '?' ||
1302 ch == '#' ||
1303 special_back_slash) {
1304 if (buffer.size() > 0) {
1305 unsigned port = 0;
1306 // the condition port <= 0xffff prevents integer overflow
1307 for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1308 port = port * 10 + buffer[i] - '0';
1309 if (port > 0xffff) {
1310 // TODO(TimothyGu): This hack is currently needed for the host
1311 // setter since it needs access to hostname if it is valid, and
1312 // if the FAILED flag is set the entire response to JS layer
1313 // will be empty.
1314 if (state_override == kHost)
1315 url->port = -1;
1316 else
1317 url->flags |= URL_FLAGS_FAILED;
1318 return;
1319 }
1320 // the port is valid
1321 url->port = NormalizePort(url->scheme, static_cast<int>(port));
1322 if (url->port == -1)
1323 url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1324 buffer.clear();
1325 } else if (has_state_override) {
1326 // TODO(TimothyGu): Similar case as above.
1327 if (state_override == kHost)
1328 url->port = -1;
1329 else
1330 url->flags |= URL_FLAGS_TERMINATED;
1331 return;
1332 }
1333 state = kPathStart;
1334 continue;
1335 } else {
1336 url->flags |= URL_FLAGS_FAILED;
1337 return;
1338 }
1339 break;
1340 case kFile:
1341 url->scheme = "file:";
1342 url->host.clear();
1343 url->flags |= URL_FLAGS_HAS_HOST;
1344 if (ch == '/' || ch == '\\') {
1345 state = kFileSlash;
1346 } else if (has_base && base->scheme == "file:") {
1347 switch (ch) {
1348 case kEOL:
1349 if (base->flags & URL_FLAGS_HAS_HOST) {
1350 url->host = base->host;
1351 }
1352 if (base->flags & URL_FLAGS_HAS_PATH) {
1353 url->flags |= URL_FLAGS_HAS_PATH;
1354 url->path = base->path;
1355 }
1356 if (base->flags & URL_FLAGS_HAS_QUERY) {
1357 url->flags |= URL_FLAGS_HAS_QUERY;
1358 url->query = base->query;
1359 }
1360 break;
1361 case '?':
1362 if (base->flags & URL_FLAGS_HAS_HOST) {
1363 url->host = base->host;
1364 }
1365 if (base->flags & URL_FLAGS_HAS_PATH) {
1366 url->flags |= URL_FLAGS_HAS_PATH;
1367 url->path = base->path;
1368 }
1369 url->flags |= URL_FLAGS_HAS_QUERY;
1370 url->query.clear();
1371 state = kQuery;
1372 break;
1373 case '#':
1374 if (base->flags & URL_FLAGS_HAS_HOST) {
1375 url->host = base->host;
1376 }
1377 if (base->flags & URL_FLAGS_HAS_PATH) {
1378 url->flags |= URL_FLAGS_HAS_PATH;
1379 url->path = base->path;
1380 }
1381 if (base->flags & URL_FLAGS_HAS_QUERY) {
1382 url->flags |= URL_FLAGS_HAS_QUERY;
1383 url->query = base->query;
1384 }
1385 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1386 url->fragment.clear();
1387 state = kFragment;
1388 break;
1389 default:
1390 url->query.clear();
1391 if (base->flags & URL_FLAGS_HAS_HOST) {
1392 url->host = base->host;
1393 }
1394 if (base->flags & URL_FLAGS_HAS_PATH) {
1395 url->flags |= URL_FLAGS_HAS_PATH;
1396 url->path = base->path;
1397 }
1398 if (!StartsWithWindowsDriveLetter(p, end)) {
1399 ShortenUrlPath(url);
1400 } else {
1401 url->path.clear();
1402 }
1403 state = kPath;
1404 continue;
1405 }
1406 } else {
1407 state = kPath;
1408 continue;
1409 }
1410 break;
1411 case kFileSlash:
1412 if (ch == '/' || ch == '\\') {
1413 state = kFileHost;
1414 } else {
1415 if (has_base && base->scheme == "file:") {
1416 url->flags |= URL_FLAGS_HAS_HOST;
1417 url->host = base->host;
1418 if (!StartsWithWindowsDriveLetter(p, end) &&
1419 IsNormalizedWindowsDriveLetter(base->path[0])) {
1420 url->flags |= URL_FLAGS_HAS_PATH;
1421 url->path.push_back(base->path[0]);
1422 }
1423 }
1424 state = kPath;
1425 continue;
1426 }
1427 break;
1428 case kFileHost:
1429 if (ch == kEOL ||
1430 ch == '/' ||
1431 ch == '\\' ||
1432 ch == '?' ||
1433 ch == '#') {
1434 if (!has_state_override &&
1435 buffer.size() == 2 &&
1436 IsWindowsDriveLetter(buffer)) {
1437 state = kPath;
1438 } else if (buffer.size() == 0) {
1439 url->flags |= URL_FLAGS_HAS_HOST;
1440 url->host.clear();
1441 if (has_state_override)
1442 return;
1443 state = kPathStart;
1444 } else {
1445 std::string host;
1446 if (!ParseHost(buffer, &host, special)) {
1447 url->flags |= URL_FLAGS_FAILED;
1448 return;
1449 }
1450 if (host == "localhost")
1451 host.clear();
1452 url->flags |= URL_FLAGS_HAS_HOST;
1453 url->host = host;
1454 if (has_state_override)
1455 return;
1456 buffer.clear();
1457 state = kPathStart;
1458 }
1459 continue;
1460 } else {
1461 buffer += ch;
1462 }
1463 break;
1464 case kPathStart:
1465 if (IsSpecial(url->scheme)) {
1466 state = kPath;
1467 if (ch != '/' && ch != '\\') {
1468 continue;
1469 }
1470 } else if (!has_state_override && ch == '?') {
1471 url->flags |= URL_FLAGS_HAS_QUERY;
1472 url->query.clear();
1473 state = kQuery;
1474 } else if (!has_state_override && ch == '#') {
1475 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1476 url->fragment.clear();
1477 state = kFragment;
1478 } else if (ch != kEOL) {
1479 state = kPath;
1480 if (ch != '/') {
1481 continue;
1482 }
1483 } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1484 url->flags |= URL_FLAGS_HAS_PATH;
1485 url->path.emplace_back("");
1486 }
1487 break;
1488 case kPath:
1489 if (ch == kEOL ||
1490 ch == '/' ||
1491 special_back_slash ||
1492 (!has_state_override && (ch == '?' || ch == '#'))) {
1493 if (IsDoubleDotSegment(buffer)) {
1494 ShortenUrlPath(url);
1495 if (ch != '/' && !special_back_slash) {
1496 url->flags |= URL_FLAGS_HAS_PATH;
1497 url->path.emplace_back("");
1498 }
1499 } else if (IsSingleDotSegment(buffer) &&
1500 ch != '/' && !special_back_slash) {
1501 url->flags |= URL_FLAGS_HAS_PATH;
1502 url->path.emplace_back("");
1503 } else if (!IsSingleDotSegment(buffer)) {
1504 if (url->scheme == "file:" &&
1505 url->path.empty() &&
1506 buffer.size() == 2 &&
1507 IsWindowsDriveLetter(buffer)) {
1508 buffer[1] = ':';
1509 }
1510 url->flags |= URL_FLAGS_HAS_PATH;
1511 url->path.emplace_back(std::move(buffer));
1512 }
1513 buffer.clear();
1514 if (ch == '?') {
1515 url->flags |= URL_FLAGS_HAS_QUERY;
1516 url->query.clear();
1517 state = kQuery;
1518 } else if (ch == '#') {
1519 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1520 url->fragment.clear();
1521 state = kFragment;
1522 }
1523 } else {
1524 AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1525 }
1526 break;
1527 case kCannotBeBase:
1528 switch (ch) {
1529 case '?':
1530 state = kQuery;
1531 break;
1532 case '#':
1533 state = kFragment;
1534 break;
1535 default:
1536 if (url->path.empty())
1537 url->path.emplace_back("");
1538 else if (ch != kEOL)
1539 AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1540 }
1541 break;
1542 case kQuery:
1543 if (ch == kEOL || (!has_state_override && ch == '#')) {
1544 url->flags |= URL_FLAGS_HAS_QUERY;
1545 url->query = std::move(buffer);
1546 buffer.clear();
1547 if (ch == '#')
1548 state = kFragment;
1549 } else {
1550 AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1551 QUERY_ENCODE_SET_NONSPECIAL);
1552 }
1553 break;
1554 case kFragment:
1555 switch (ch) {
1556 case kEOL:
1557 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1558 url->fragment = std::move(buffer);
1559 break;
1560 default:
1561 AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1562 }
1563 break;
1564 default:
1565 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1566 return;
1567 }
1568
1569 p++;
1570 }
1571} // NOLINT(readability/fn_size)
1572
1573// https://url.spec.whatwg.org/#url-serializing
1574std::string URL::SerializeURL(const url_data& url,
1575 bool exclude = false) {
1576 std::string output;
1577 output.reserve(
1578 10 + // We generally insert < 10 separator characters between URL parts
1579 url.scheme.size() +
1580 url.username.size() +
1581 url.password.size() +
1582 url.host.size() +
1583 url.query.size() +
1584 url.fragment.size() +
1585 url.href.size() +
1586 std::accumulate(
1587 url.path.begin(),
1588 url.path.end(),
1589 0,
1590 [](size_t sum, const auto& str) { return sum + str.size(); }));
1591
1592 output += url.scheme;
1593 if (url.flags & URL_FLAGS_HAS_HOST) {
1594 output += "//";
1595 if (url.flags & URL_FLAGS_HAS_USERNAME ||
1596 url.flags & URL_FLAGS_HAS_PASSWORD) {
1597 if (url.flags & URL_FLAGS_HAS_USERNAME) {
1598 output += url.username;
1599 }
1600 if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1601 output += ":" + url.password;
1602 }
1603 output += "@";
1604 }
1605 output += url.host;
1606 if (url.port != -1) {
1607 output += ":" + std::to_string(url.port);
1608 }
1609 }
1610 if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1611 output += url.path[0];
1612 } else {
1613 if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1614 url.path.size() > 1 &&
1615 url.path[0].empty()) {
1616 output += "/.";
1617 }
1618 for (size_t i = 1; i < url.path.size(); i++) {
1619 output += "/" + url.path[i];
1620 }
1621 }
1622 if (url.flags & URL_FLAGS_HAS_QUERY) {
1623 output += "?" + url.query;
1624 }
1625 if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1626 output += "#" + url.fragment;
1627 }
1628 output.shrink_to_fit();
1629 return output;
1630}
1631
1632namespace {
1633void SetArgs(Environment* env,
1634 Local<Value> argv[ARG_COUNT],
1635 const struct url_data& url) {
1636 Isolate* isolate = env->isolate();
1637 argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1638 argv[ARG_PROTOCOL] =
1639 url.flags & URL_FLAGS_SPECIAL ?
1640 GetSpecial(env, url.scheme) :
1641 OneByteString(isolate, url.scheme.c_str());
1642 if (url.flags & URL_FLAGS_HAS_USERNAME)
1643 argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1644 if (url.flags & URL_FLAGS_HAS_PASSWORD)
1645 argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1646 if (url.flags & URL_FLAGS_HAS_HOST)
1647 argv[ARG_HOST] = Utf8String(isolate, url.host);
1648 if (url.flags & URL_FLAGS_HAS_QUERY)
1649 argv[ARG_QUERY] = Utf8String(isolate, url.query);
1650 if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1651 argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1652 if (url.port > -1)
1653 argv[ARG_PORT] = Integer::New(isolate, url.port);
1654 if (url.flags & URL_FLAGS_HAS_PATH)
1655 argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1656}
1657
1658void Parse(Environment* env,
1659 Local<Value> recv,
1660 const char* input,
1661 size_t len,
1662 enum url_parse_state state_override,
1663 Local<Value> base_obj,
1664 Local<Value> context_obj,
1665 Local<Function> cb,
1666 Local<Value> error_cb) {
1667 Isolate* isolate = env->isolate();
1668 Local<Context> context = env->context();
1669 HandleScope handle_scope(isolate);
1670 Context::Scope context_scope(context);
1671
1672 const bool has_context = context_obj->IsObject();
1673 const bool has_base = base_obj->IsObject();
1674
1675 url_data base;
1676 url_data url;
1677 if (has_context)
1678 url = HarvestContext(env, context_obj.As<Object>());
1679 if (has_base)
1680 base = HarvestBase(env, base_obj.As<Object>());
1681
1682 URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1683 if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1684 ((state_override != kUnknownState) &&
1685 (url.flags & URL_FLAGS_TERMINATED)))
1686 return;
1687
1688 // Define the return value placeholders
1689 const Local<Value> undef = Undefined(isolate);
1690 const Local<Value> null = Null(isolate);
1691 if (!(url.flags & URL_FLAGS_FAILED)) {
1692 Local<Value> argv[] = {
1693 undef,
1694 undef,
1695 undef,
1696 undef,
1697 null, // host defaults to null
1698 null, // port defaults to null
1699 undef,
1700 null, // query defaults to null
1701 null, // fragment defaults to null
1702 };
1703 SetArgs(env, argv, url);
1704 USE(cb->Call(context, recv, arraysize(argv), argv));
1705 } else if (error_cb->IsFunction()) {
1706 Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1707 USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1708 }
1709}
1710
1711void Parse(const FunctionCallbackInfo<Value>& args) {
1712 Environment* env = Environment::GetCurrent(args);
1713 CHECK_GE(args.Length(), 5)do { if (__builtin_expect(!!(!((args.Length()) >= (5))), 0
)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1713", "(args.Length()) >= (5)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
1714 CHECK(args[0]->IsString())do { if (__builtin_expect(!!(!(args[0]->IsString())), 0)) {
do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1714", "args[0]->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
; // input
1715 CHECK(args[2]->IsUndefined() || // base contextdo { if (__builtin_expect(!!(!(args[2]->IsUndefined() || args
[2]->IsNull() || args[2]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1717"
, "args[2]->IsUndefined() || args[2]->IsNull() || args[2]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
1716 args[2]->IsNull() ||do { if (__builtin_expect(!!(!(args[2]->IsUndefined() || args
[2]->IsNull() || args[2]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1717"
, "args[2]->IsUndefined() || args[2]->IsNull() || args[2]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
1717 args[2]->IsObject())do { if (__builtin_expect(!!(!(args[2]->IsUndefined() || args
[2]->IsNull() || args[2]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1717"
, "args[2]->IsUndefined() || args[2]->IsNull() || args[2]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
;
1718 CHECK(args[3]->IsUndefined() || // contextdo { if (__builtin_expect(!!(!(args[3]->IsUndefined() || args
[3]->IsNull() || args[3]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1720"
, "args[3]->IsUndefined() || args[3]->IsNull() || args[3]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
1719 args[3]->IsNull() ||do { if (__builtin_expect(!!(!(args[3]->IsUndefined() || args
[3]->IsNull() || args[3]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1720"
, "args[3]->IsUndefined() || args[3]->IsNull() || args[3]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
1720 args[3]->IsObject())do { if (__builtin_expect(!!(!(args[3]->IsUndefined() || args
[3]->IsNull() || args[3]->IsObject())), 0)) { do { static
const node::AssertionInfo args = { "../src/node_url.cc" ":" "1720"
, "args[3]->IsUndefined() || args[3]->IsNull() || args[3]->IsObject()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
;
1721 CHECK(args[4]->IsFunction())do { if (__builtin_expect(!!(!(args[4]->IsFunction())), 0)
) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1721", "args[4]->IsFunction()", __PRETTY_FUNCTION__ }
; node::Assert(args); } while (0); } } while (0)
; // complete callback
1722 CHECK(args[5]->IsUndefined() || args[5]->IsFunction())do { if (__builtin_expect(!!(!(args[5]->IsUndefined() || args
[5]->IsFunction())), 0)) { do { static const node::AssertionInfo
args = { "../src/node_url.cc" ":" "1722", "args[5]->IsUndefined() || args[5]->IsFunction()"
, __PRETTY_FUNCTION__ }; node::Assert(args); } while (0); } }
while (0)
; // error callback
1723
1724 Utf8Value input(env->isolate(), args[0]);
1725 enum url_parse_state state_override = kUnknownState;
1726 if (args[1]->IsNumber()) {
1727 state_override = static_cast<enum url_parse_state>(
1728 args[1]->Uint32Value(env->context()).FromJust());
1729 }
1730
1731 Parse(env, args.This(),
1732 *input, input.length(),
1733 state_override,
1734 args[2],
1735 args[3],
1736 args[4].As<Function>(),
1737 args[5]);
1738}
1739
1740void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1741 Environment* env = Environment::GetCurrent(args);
1742 CHECK_GE(args.Length(), 1)do { if (__builtin_expect(!!(!((args.Length()) >= (1))), 0
)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1742", "(args.Length()) >= (1)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
1743 CHECK(args[0]->IsString())do { if (__builtin_expect(!!(!(args[0]->IsString())), 0)) {
do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1743", "args[0]->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
1744 Utf8Value value(env->isolate(), args[0]);
1745 std::string output;
1746 size_t len = value.length();
1747 output.reserve(len);
1748 for (size_t n = 0; n < len; n++) {
1749 const char ch = (*value)[n];
1750 AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1751 }
1752 args.GetReturnValue().Set(
1753 String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1754}
1755
1756void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1757 Environment* env = Environment::GetCurrent(args);
1758 CHECK_GE(args.Length(), 1)do { if (__builtin_expect(!!(!((args.Length()) >= (1))), 0
)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1758", "(args.Length()) >= (1)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
1759 CHECK(args[0]->IsString())do { if (__builtin_expect(!!(!(args[0]->IsString())), 0)) {
do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1759", "args[0]->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
1760 Utf8Value value(env->isolate(), args[0]);
1761
1762 URLHost host;
1763 // Assuming the host is used for a special scheme.
1764 host.ParseHost(*value, value.length(), true);
1765 if (host.ParsingFailed()) {
1766 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1767 return;
1768 }
1769 std::string out = host.ToStringMove();
1770 args.GetReturnValue().Set(
1771 String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1772}
1773
1774void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1775 Environment* env = Environment::GetCurrent(args);
1776 CHECK_GE(args.Length(), 1)do { if (__builtin_expect(!!(!((args.Length()) >= (1))), 0
)) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1776", "(args.Length()) >= (1)", __PRETTY_FUNCTION__
}; node::Assert(args); } while (0); } } while (0)
;
1777 CHECK(args[0]->IsString())do { if (__builtin_expect(!!(!(args[0]->IsString())), 0)) {
do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1777", "args[0]->IsString()", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
1778 Utf8Value value(env->isolate(), args[0]);
1779
1780 URLHost host;
1781 // Assuming the host is used for a special scheme.
1782 host.ParseHost(*value, value.length(), true, true);
1783 if (host.ParsingFailed()) {
1784 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1785 return;
1786 }
1787 std::string out = host.ToStringMove();
1788 args.GetReturnValue().Set(
1789 String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1790}
1791
1792void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1793 Environment* env = Environment::GetCurrent(args);
1794 CHECK_EQ(args.Length(), 1)do { if (__builtin_expect(!!(!((args.Length()) == (1))), 0)) {
do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1794", "(args.Length()) == (1)", __PRETTY_FUNCTION__ };
node::Assert(args); } while (0); } } while (0)
;
1795 CHECK(args[0]->IsFunction())do { if (__builtin_expect(!!(!(args[0]->IsFunction())), 0)
) { do { static const node::AssertionInfo args = { "../src/node_url.cc"
":" "1795", "args[0]->IsFunction()", __PRETTY_FUNCTION__ }
; node::Assert(args); } while (0); } } while (0)
;
1796 env->set_url_constructor_function(args[0].As<Function>());
1797}
1798
1799void Initialize(Local<Object> target,
1800 Local<Value> unused,
1801 Local<Context> context,
1802 void* priv) {
1803 Environment* env = Environment::GetCurrent(context);
1804 env->SetMethod(target, "parse", Parse);
1805 env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1806 env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1807 env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1808 env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1809
1810#define XX(name, _) NODE_DEFINE_CONSTANT(target, name)do { v8::Isolate* isolate = target->GetIsolate(); v8::Local
<v8::Context> context = isolate->GetCurrentContext()
; v8::Local<v8::String> constant_name = v8::String::NewFromUtf8
(isolate, "name", v8::NewStringType::kInternalized).ToLocalChecked
(); v8::Local<v8::Number> constant_value = v8::Number::
New(isolate, static_cast<double>(name)); v8::PropertyAttribute
constant_attributes = static_cast<v8::PropertyAttribute>
(v8::ReadOnly | v8::DontDelete); (target)->DefineOwnProperty
(context, constant_name, constant_value, constant_attributes)
.Check(); } while (0)
;
1811 FLAGS(XX)XX(URL_FLAGS_NONE, 0) XX(URL_FLAGS_FAILED, 0x01) XX(URL_FLAGS_CANNOT_BE_BASE
, 0x02) XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) XX(URL_FLAGS_TERMINATED
, 0x08) XX(URL_FLAGS_SPECIAL, 0x10) XX(URL_FLAGS_HAS_USERNAME
, 0x20) XX(URL_FLAGS_HAS_PASSWORD, 0x40) XX(URL_FLAGS_HAS_HOST
, 0x80) XX(URL_FLAGS_HAS_PATH, 0x100) XX(URL_FLAGS_HAS_QUERY,
0x200) XX(URL_FLAGS_HAS_FRAGMENT, 0x400) XX(URL_FLAGS_IS_DEFAULT_SCHEME_PORT
, 0x800)
1812#undef XX
1813
1814#define XX(name) NODE_DEFINE_CONSTANT(target, name)do { v8::Isolate* isolate = target->GetIsolate(); v8::Local
<v8::Context> context = isolate->GetCurrentContext()
; v8::Local<v8::String> constant_name = v8::String::NewFromUtf8
(isolate, "name", v8::NewStringType::kInternalized).ToLocalChecked
(); v8::Local<v8::Number> constant_value = v8::Number::
New(isolate, static_cast<double>(name)); v8::PropertyAttribute
constant_attributes = static_cast<v8::PropertyAttribute>
(v8::ReadOnly | v8::DontDelete); (target)->DefineOwnProperty
(context, constant_name, constant_value, constant_attributes)
.Check(); } while (0)
;
1815 PARSESTATES(XX)XX(kSchemeStart) XX(kScheme) XX(kNoScheme) XX(kSpecialRelativeOrAuthority
) XX(kPathOrAuthority) XX(kRelative) XX(kRelativeSlash) XX(kSpecialAuthoritySlashes
) XX(kSpecialAuthorityIgnoreSlashes) XX(kAuthority) XX(kHost)
XX(kHostname) XX(kPort) XX(kFile) XX(kFileSlash) XX(kFileHost
) XX(kPathStart) XX(kPath) XX(kCannotBeBase) XX(kQuery) XX(kFragment
)
1816#undef XX
1817}
1818} // namespace
1819
1820void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1821 registry->Register(Parse);
1822 registry->Register(EncodeAuthSet);
1823 registry->Register(DomainToASCII);
1824 registry->Register(DomainToUnicode);
1825 registry->Register(SetURLConstructor);
1826}
1827
1828std::string URL::ToFilePath() const {
1829 if (context_.scheme != "file:") {
1830 return "";
1831 }
1832
1833#ifdef _WIN32
1834 const char* slash = "\\";
1835 auto is_slash = [] (char ch) {
1836 return ch == '/' || ch == '\\';
1837 };
1838#else
1839 const char* slash = "/";
1840 auto is_slash = [] (char ch) {
1841 return ch == '/';
1842 };
1843 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1844 context_.host.length() > 0) {
1845 return "";
1846 }
1847#endif
1848 std::string decoded_path;
1849 for (const std::string& part : context_.path) {
1850 std::string decoded = PercentDecode(part.c_str(), part.length());
1851 for (char& ch : decoded) {
1852 if (is_slash(ch)) {
1853 return "";
1854 }
1855 }
1856 decoded_path += slash + decoded;
1857 }
1858
1859#ifdef _WIN32
1860 // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1861
1862 // If hostname is set, then we have a UNC path. Pass the hostname through
1863 // ToUnicode just in case it is an IDN using punycode encoding. We do not
1864 // need to worry about percent encoding because the URL parser will have
1865 // already taken care of that for us. Note that this only causes IDNs with an
1866 // appropriate `xn--` prefix to be decoded.
1867 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1868 context_.host.length() > 0) {
1869 std::string unicode_host;
1870 if (!ToUnicode(context_.host, &unicode_host)) {
1871 return "";
1872 }
1873 return "\\\\" + unicode_host + decoded_path;
1874 }
1875 // Otherwise, it's a local path that requires a drive letter.
1876 if (decoded_path.length() < 3) {
1877 return "";
1878 }
1879 if (decoded_path[2] != ':' ||
1880 !IsASCIIAlpha(decoded_path[1])) {
1881 return "";
1882 }
1883 // Strip out the leading '\'.
1884 return decoded_path.substr(1);
1885#else
1886 return decoded_path;
1887#endif
1888}
1889
1890URL URL::FromFilePath(const std::string& file_path) {
1891 URL url("file://");
1892 std::string escaped_file_path;
1893 for (size_t i = 0; i < file_path.length(); ++i) {
1894 escaped_file_path += file_path[i];
1895 if (file_path[i] == '%')
1896 escaped_file_path += "25";
1897 }
1898 URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1899 &url.context_, true, nullptr, false);
1900 return url;
1901}
1902
1903// This function works by calling out to a JS function that creates and
1904// returns the JS URL object. Be mindful of the JS<->Native boundary
1905// crossing that is required.
1906MaybeLocal<Value> URL::ToObject(Environment* env) const {
1907 Isolate* isolate = env->isolate();
1908 Local<Context> context = env->context();
1909 Context::Scope context_scope(context);
1910
1911 const Local<Value> undef = Undefined(isolate);
1912 const Local<Value> null = Null(isolate);
1913
1914 if (context_.flags & URL_FLAGS_FAILED)
1915 return Local<Value>();
1916
1917 Local<Value> argv[] = {
1918 undef,
1919 undef,
1920 undef,
1921 undef,
1922 null, // host defaults to null
1923 null, // port defaults to null
1924 undef,
1925 null, // query defaults to null
1926 null, // fragment defaults to null
1927 };
1928 SetArgs(env, argv, context_);
1929
1930 MaybeLocal<Value> ret;
1931 {
1932 TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1933
1934 // The SetURLConstructor method must have been called already to
1935 // set the constructor function used below. SetURLConstructor is
1936 // called automatically when the internal/url.js module is loaded
1937 // during the internal/bootstrap/node.js processing.
1938 ret = env->url_constructor_function()
1939 ->Call(env->context(), undef, arraysize(argv), argv);
1940 }
1941
1942 return ret;
1943}
1944
1945} // namespace url
1946} // namespace node
1947
1948NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)static node::node_module _module = { 108, NM_F_INTERNAL, nullptr
, "../src/node_url.cc", nullptr, (node::addon_context_register_func
)(node::url::Initialize), "url", nullptr, nullptr}; void _register_url
() { node_module_register(&_module); }
1949NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)void _register_external_reference_url( node::ExternalReferenceRegistry
* registry) { node::url::RegisterExternalReferences(registry)
; }