File: | d/url.c |
Warning: | line 808, column 13 Duplicate code detected |
Note: | line 812, column 13 Similar code here |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * The contents of this file are subject to the Mozilla Public License |
3 | * Version 1.1 (the "License"); you may not use this file except in |
4 | * compliance with the License. You may obtain a copy of the License at |
5 | * http://mozilla.org/. |
6 | * |
7 | * Software distributed under the License is distributed on an "AS IS" |
8 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See |
9 | * the License for the specific language governing rights and limitations |
10 | * under the License. |
11 | * |
12 | * The Original Code is AOLserver Code and related documentation |
13 | * distributed by AOL. |
14 | * |
15 | * The Initial Developer of the Original Code is America Online, |
16 | * Inc. Portions created by AOL are Copyright (C) 1999 America Online, |
17 | * Inc. All Rights Reserved. |
18 | * |
19 | * Alternatively, the contents of this file may be used under the terms |
20 | * of the GNU General Public License (the "GPL"), in which case the |
21 | * provisions of GPL are applicable instead of those above. If you wish |
22 | * to allow use of your version of this file only under the terms of the |
23 | * GPL and not to allow others to use your version of this file under the |
24 | * License, indicate your decision by deleting the provisions above and |
25 | * replace them with the notice and other provisions required by the GPL. |
26 | * If you do not delete the provisions above, a recipient may use your |
27 | * version of this file under either the License or the GPL. |
28 | */ |
29 | |
30 | |
31 | /* |
32 | * url.c -- |
33 | * |
34 | * Parse URLs. |
35 | */ |
36 | |
37 | #include "nsd.h" |
38 | |
39 | /* |
40 | * Local typedefs of functions |
41 | */ |
42 | |
43 | /* |
44 | * Local functions defined in this file |
45 | */ |
46 | |
47 | static char* ParseUpTo(char *chars, char ch) |
48 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))); |
49 | |
50 | |
51 | /* |
52 | *---------------------------------------------------------------------- |
53 | * |
54 | * Ns_RelativeUrl -- |
55 | * |
56 | * If the url passed in is for this server, then the initial |
57 | * part of the URL is stripped off. e.g., on a server whose |
58 | * location is http://www.foo.com, Ns_RelativeUrl of |
59 | * "http://www.foo.com/hello" will return "/hello". |
60 | * |
61 | * Results: |
62 | * A pointer to the beginning of the relative url in the |
63 | * passed-in url, or NULL if error. |
64 | * |
65 | * Side effects: |
66 | * Will set errno on error. |
67 | * |
68 | *---------------------------------------------------------------------- |
69 | */ |
70 | |
71 | const char * |
72 | Ns_RelativeUrl(const char *url, const char *location) |
73 | { |
74 | const char *v, *result; |
75 | |
76 | if (url == NULL((void*)0) || location == NULL((void*)0)) { |
77 | result = NULL((void*)0); |
78 | } else { |
79 | |
80 | /* |
81 | * Ns_Match will return the point in URL where location stops |
82 | * being equal to it because location ends. |
83 | * |
84 | * e.g., if location = "http://www.foo.com" and |
85 | * url="http://www.foo.com/a/b" then after the call, |
86 | * v="/a/b", or NULL if there's a mismatch. |
87 | */ |
88 | |
89 | v = Ns_Match(location, url); |
90 | if (v != NULL((void*)0)) { |
91 | url = v; |
92 | } |
93 | while (url[0] == '/' && url[1] == '/') { |
94 | ++url; |
95 | } |
96 | result = url; |
97 | } |
98 | return result; |
99 | } |
100 | |
101 | |
102 | /* |
103 | *---------------------------------------------------------------------- |
104 | * |
105 | * ParseUserInfo -- |
106 | * |
107 | * Parse the user-info part from the "authority" part of a URL |
108 | * |
109 | * authority = [ userinfo "@" ] host [ ":" port ] |
110 | * |
111 | * and return the reminded of the string. |
112 | * |
113 | * Results: |
114 | * String starting with the "host" part. |
115 | * |
116 | * Side effects: |
117 | * |
118 | * In case the "authority" contains "userinfo", it is returned via the |
119 | * pointer in the second argument. |
120 | * |
121 | *---------------------------------------------------------------------- |
122 | */ |
123 | |
124 | static char * |
125 | ParseUserInfo(char *chars, char **userinfo) |
126 | { |
127 | char *p; |
128 | |
129 | /* |
130 | * RFC 3986 defines |
131 | * |
132 | * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) |
133 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
134 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
135 | * / "*" / "+" / "," / ";" / "=" |
136 | * |
137 | * ALPHA = (%41-%5A and %61-%7A) |
138 | * DIGIT = (%30-%39), |
139 | * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E) |
140 | * exclam (%21) dollar (%24) amp (%26) singlequote (%27) |
141 | * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B) |
142 | * comma (%2C) semicolon (%3B) equals (%3D) |
143 | * |
144 | * colon (%3a) |
145 | * |
146 | * Percent-encoded is just checked by the character range, but does not |
147 | * check the two following (number) chars. |
148 | * |
149 | * percent (%25) ... for percent-encoded |
150 | */ |
151 | static const bool_Bool userinfo_table[256] = { |
152 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
153 | /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
154 | /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
155 | /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, |
156 | /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, |
157 | /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
158 | /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
159 | /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
160 | /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, |
161 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
162 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
163 | /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
164 | /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
165 | /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
166 | /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
167 | /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
168 | /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
169 | }; |
170 | |
171 | NS_NONNULL_ASSERT(chars != NULL)((void) (0)); |
172 | NS_NONNULL_ASSERT(userinfo != NULL)((void) (0)); |
173 | |
174 | for (p = chars; userinfo_table[UCHAR(*p)((unsigned char)(*p))] != 0; p++) { |
175 | ; |
176 | } |
177 | |
178 | if (*p == '\x40') { |
179 | *userinfo = chars; |
180 | *p = '\0'; |
181 | chars = p+1; |
182 | } else { |
183 | *userinfo = NULL((void*)0); |
184 | } |
185 | /*fprintf(stderr, "==== userinfo p %.2x, '%s'\n", *p, chars);*/ |
186 | |
187 | return chars; |
188 | } |
189 | |
190 | |
191 | /* |
192 | *---------------------------------------------------------------------- |
193 | * |
194 | * ParseUpTo -- |
195 | * |
196 | * Helper function of Ns_ParseUrl(). Return the characters up to a |
197 | * specified character and terminate the parsed string by a NUL |
198 | * character. The string is searched from left to right. If the |
199 | * character does not exist in the string, return NULL. |
200 | * |
201 | * Results: |
202 | * Parsed string or NULL. |
203 | * |
204 | * Side effects: |
205 | * None. |
206 | * |
207 | *---------------------------------------------------------------------- |
208 | */ |
209 | |
210 | static char * |
211 | ParseUpTo(char *chars, char ch) |
212 | { |
213 | char *p = strchr(chars, INTCHAR(ch)((int)((unsigned char)((ch))))); |
214 | |
215 | if (p != NULL((void*)0)) { |
216 | *p++ = '\0'; |
217 | } |
218 | return p; |
219 | } |
220 | |
221 | /* |
222 | *---------------------------------------------------------------------- |
223 | * |
224 | * ValidateChars -- |
225 | * |
226 | * Helper function of Ns_ParseUrl(). Scan a string up to the end based on |
227 | * the provided table of valid characters. |
228 | * |
229 | * Results: |
230 | * |
231 | * When the string is valid, it is returned unmodified. in case it contains |
232 | * errors, NULL is returned and the error message is set. |
233 | * |
234 | * Side effects: |
235 | * None. |
236 | * |
237 | *---------------------------------------------------------------------- |
238 | */ |
239 | |
240 | static char * |
241 | ValidateChars(char *chars, const bool_Bool *table, const char *msg, const char** errorMsg) |
242 | { |
243 | char *p, *result; |
244 | |
245 | for (p = chars; table[UCHAR(*p)((unsigned char)(*p))] != 0; p++) { |
246 | ; |
247 | } |
248 | if (*p == '\0') { |
249 | result = chars; |
250 | } else { |
251 | *errorMsg = msg; |
252 | result = NULL((void*)0); |
253 | } |
254 | return result; |
255 | } |
256 | |
257 | |
258 | /* |
259 | *---------------------------------------------------------------------- |
260 | * |
261 | * Ns_ParseUrl -- |
262 | * |
263 | * Parse a URL into its component parts |
264 | * |
265 | * Results: |
266 | * NS_OK or NS_ERROR |
267 | * |
268 | * Side effects: |
269 | * Pointers to the protocol, host, port, path, and "tail" (last |
270 | * path element) will be set by reference in the passed-in pointers. |
271 | * The passed-in url will be modified. |
272 | * |
273 | *---------------------------------------------------------------------- |
274 | */ |
275 | Ns_ReturnCode |
276 | Ns_ParseUrl(char *url, bool_Bool strict, Ns_URL *urlPtr, const char **errorMsg) |
277 | { |
278 | char *end; |
279 | |
280 | /* |
281 | * RFC 3986 defines |
282 | * |
283 | * foo://example.com:8042/over/there?name=ferret#nose |
284 | * \_/ \______________/\_________/ \_________/ \__/ |
285 | * | | | | | |
286 | * scheme authority path query fragment |
287 | * |
288 | * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) |
289 | * ALPHA = (%41-%5A and %61-%7A) |
290 | * DIGIT = (%30-%39), |
291 | * plus (%2B) hyphen (%2D), period (%2E), |
292 | * |
293 | * underscore (%5F), tilde (%7E) |
294 | */ |
295 | |
296 | static const bool_Bool scheme_table[256] = { |
297 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
298 | /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
299 | /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
300 | /* 0x20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, |
301 | /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
302 | /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
303 | /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
304 | /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
305 | /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
306 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
307 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
308 | /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
309 | /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
310 | /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
311 | /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
312 | /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
313 | /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
314 | }; |
315 | |
316 | /* |
317 | * RFC 3986 defines (simplified) |
318 | * |
319 | * path = path-abempty ; begins with "/" or is empty |
320 | * / path-absolute ; begins with "/" but not "//" |
321 | * path-absolute = "/" [ segment-nz *( "/" segment ) ] |
322 | * segment = *pchar |
323 | * segment-nz = 1*pchar |
324 | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
325 | * |
326 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
327 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
328 | * / "*" / "+" / "," / ";" / "=" |
329 | * |
330 | * ALPHA = (%41-%5A and %61-%7A) |
331 | * DIGIT = (%30-%39), |
332 | * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E) |
333 | * exclam (%21) dollar (%24) amp (%26) singlequote (%27) |
334 | * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B) |
335 | * comma (%2C) semicolon (%3B) equals (%3D) |
336 | * |
337 | * slash (%2F) colon (%3A) at (%40) |
338 | * |
339 | * Percent-encoded is just checked by the character range, but does not |
340 | * check the two following (number) chars. |
341 | * |
342 | * percent (%25) ... for percent-encoded |
343 | */ |
344 | |
345 | static const bool_Bool path_table[256] = { |
346 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
347 | /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
348 | /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
349 | /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
350 | /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, |
351 | /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
352 | /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
353 | /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
354 | /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, |
355 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
356 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
357 | /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
358 | /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
359 | /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
360 | /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
361 | /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
362 | /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
363 | }; |
364 | |
365 | /* |
366 | * RFC 3986 defines |
367 | * |
368 | * query = *( pchar / "/" / "?" ) |
369 | * fragment = *( pchar / "/" / "?" ) |
370 | * |
371 | * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
372 | * |
373 | * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
374 | * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
375 | * / "*" / "+" / "," / ";" / "=" |
376 | * |
377 | * ALPHA = (%41-%5A and %61-%7A) |
378 | * DIGIT = (%30-%39), |
379 | * hyphen (%2D), period (%2E), underscore (%5F), tilde (%7E) |
380 | * exclam (%21) dollar (%24) amp (%26) singlequote (%27) |
381 | * lparen (%28) lparen (%29) asterisk (%2A) plus (%2B) |
382 | * comma (%2C) semicolon (%3B) equals (%3D) |
383 | * |
384 | * slash (%2F) colon (%3A) question mark (%3F) at (%40) |
385 | * |
386 | * Percent-encoded is just checked by the character range, but does not |
387 | * check the two following (number) chars. |
388 | * |
389 | * percent (%25) ... for percent-encoded |
390 | */ |
391 | |
392 | static const bool_Bool fragment_table[256] = { |
393 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
394 | /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
395 | /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
396 | /* 0x20 */ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
397 | /* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, |
398 | /* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
399 | /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, |
400 | /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
401 | /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, |
402 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
403 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
404 | /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
405 | /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
406 | /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
407 | /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
408 | /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
409 | /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
410 | }; |
411 | static const bool_Bool alpha_table[256] = { |
412 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
413 | /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
414 | /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
415 | /* 0x20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
416 | /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
417 | /* 0x40 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
418 | /* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
419 | /* 0x60 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
420 | /* 0x70 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, |
421 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
422 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
423 | /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
424 | /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
425 | /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
426 | /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
427 | /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
428 | /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
429 | }; |
430 | |
431 | NS_NONNULL_ASSERT(urlPtr)((void) (0)); |
432 | |
433 | memset(urlPtr, 0, sizeof(Ns_URL)); |
434 | |
435 | /* |
436 | * Set variable "end" to the end of the protocol |
437 | * http://www.foo.com:8000/baz/blah/spoo.html |
438 | * ^ |
439 | * +--end |
440 | */ |
441 | |
442 | if (alpha_table[UCHAR(*url)((unsigned char)(*url))]) { |
443 | for (end = url+1; scheme_table[UCHAR(*end)((unsigned char)(*end))] != 0; end++) { |
444 | ; |
445 | } |
446 | } else { |
447 | end = url; |
448 | } |
449 | if (end != url && *end == ':') { |
450 | /* |
451 | * There is a protocol specified. Clear out the colon. |
452 | * Set pprotocol to the start of the protocol, and url to |
453 | * the first character after the colon. |
454 | * |
455 | * http\0//www.foo.com:8000/baz/blah/spoo.html |
456 | * ^ ^ ^ |
457 | * | | +-- url |
458 | * | +-- end |
459 | * +-------- protocol |
460 | */ |
461 | |
462 | *end = '\0'; |
463 | urlPtr->protocol = url; |
464 | url = end + 1; |
465 | /*fprintf(stderr, "SCHEME looks ok: %s\n", *pprotocol);*/ |
466 | |
467 | } else if (*end != '/' && *end != '?' && *end != '#' && *end != '\0' ) { |
468 | /* |
469 | * We do not have an explicit relative URL starting with a |
470 | * slash. Accept relative URL based on the heuristic to avoid getting |
471 | * every non-accepted scheme here (the remainding URL must not have a |
472 | * colon before a slash. |
473 | */ |
474 | char *p; |
475 | |
476 | for (p = end; *p != '\0' && *p != '/'; p++) { |
477 | if (*p == ':') { |
478 | /* |
479 | * We have a colon before the slash or end, do not accept |
480 | * this. |
481 | */ |
482 | Ns_Log(Debug, "URI scheme does not look ok: last char 0x%.2x '%s'", |
483 | *end, url); |
484 | *errorMsg = "invalid scheme"; |
485 | return NS_ERROR; |
486 | } |
487 | } |
488 | } |
489 | |
490 | |
491 | if (url[0] == '/' && url[1] == '/') { |
492 | bool_Bool hostParsedOk; |
493 | |
494 | urlPtr->path = (char *)""; |
495 | urlPtr->tail = (char *)""; |
496 | |
497 | /* |
498 | * The URL starts with two slashes, which means an authority part |
499 | * (host) is specified. Advance url past that and set *phost. |
500 | * |
501 | * http\0//www.foo.com:8000/baz/blah/spoo.html |
502 | * ^ ^ ^ |
503 | * | | +-- url, *host |
504 | * | +-- end |
505 | * +-------- protocol |
506 | */ |
507 | url = url + 2; |
508 | |
509 | /* |
510 | * RFC 3986 defines |
511 | * |
512 | * authority = [ userinfo "@" ] host [ ":" port ] |
513 | * |
514 | */ |
515 | url = ParseUserInfo(url, &urlPtr->userinfo); |
516 | urlPtr->host = url; |
517 | |
518 | /* |
519 | * Parse authority part and return the optional string pointing to the |
520 | * port. |
521 | */ |
522 | hostParsedOk = Ns_HttpParseHost2(url, strict, &urlPtr->host, &urlPtr->port, &end); |
523 | if (!hostParsedOk) { |
524 | *errorMsg = "invalid authority"; |
525 | return NS_ERROR; |
526 | } |
527 | |
528 | if (urlPtr->port != NULL((void*)0)) { |
529 | |
530 | /* |
531 | * A port was specified. Set urlPtr->port to the first |
532 | * digit. |
533 | * |
534 | * http\0//www.foo.com\08000/baz/blah/spoo.html |
535 | * ^ ^ ^ ^ |
536 | * | +-- host | +------ url, port |
537 | * +----- protocol +--- end |
538 | */ |
539 | |
540 | url = urlPtr->port; |
541 | urlPtr->port = url; |
542 | } |
543 | } else { |
544 | end = url; |
545 | } |
546 | /* |
547 | * "end" points now either to |
548 | * - the string terminator (NUL) |
549 | * - the slash which starts the path/tail, or to |
550 | * - one of the remaining components (query, or fragment) |
551 | * |
552 | * http\0//www.foo.com\08000\0baz/blah/spoo.html |
553 | * ^ ^ ^ ^ ^ |
554 | * | | | | +-- url |
555 | * | +-- host | +-- end |
556 | * +----- protocol +-- port |
557 | */ |
558 | /*fprintf(stderr, "CHECK FOR PATH <%s>\n", end);*/ |
559 | |
560 | |
561 | if (*end == '\0') { |
562 | /* |
563 | * No path, tail, query, fragment specified: we are done. |
564 | */ |
565 | |
566 | } else if (*end == '#') { |
567 | /* |
568 | * No path, tail, query, just a fragment specified. |
569 | * We could validate. |
570 | */ |
571 | *end = '\0'; |
572 | urlPtr->fragment = end + 1; |
573 | |
574 | } else if (*end == '?') { |
575 | /* |
576 | * No path, tail, just a query and maybe a fragment specified. |
577 | */ |
578 | *end = '\0'; |
579 | urlPtr->query = end + 1; |
580 | urlPtr->fragment = ParseUpTo(urlPtr->query, '#'); |
581 | |
582 | } else { |
583 | if (*end == '/') { |
584 | urlPtr->path = (char *)""; |
585 | urlPtr->tail = (char *)""; |
586 | |
587 | /* |
588 | * We have a path, tail, and maybe a query or fragment specified. |
589 | */ |
590 | *end = '\0'; |
591 | url = end + 1; |
592 | /* |
593 | * Set the path to URL and advance to the last slash. |
594 | * Set ptail to the character after that, or if there is none, |
595 | * it becomes path and path becomes an empty string. |
596 | * |
597 | * http\0//www.foo.com\08000\0baz/blah/spoo.html |
598 | * ^ ^ ^ ^ ^ ^^ |
599 | * | | | | | |+-- tail |
600 | * | | | | | +-- end |
601 | * | | | | +-- path |
602 | * | +-- host | +-- end |
603 | * +----- protocol +-- port |
604 | */ |
605 | |
606 | |
607 | /* |
608 | * Separate the "tail" from the "path", otherwise the string is |
609 | * just "tail". |
610 | */ |
611 | urlPtr->query = ParseUpTo(url, '?'); |
612 | if (urlPtr->query == NULL((void*)0)) { |
613 | urlPtr->fragment = ParseUpTo(url, '#'); |
614 | } |
615 | |
616 | end = strrchr(url, INTCHAR('/')((int)((unsigned char)(('/'))))); |
617 | if (end == NULL((void*)0)) { |
618 | urlPtr->tail = url; |
619 | } else { |
620 | *end = '\0'; |
621 | urlPtr->path = url; |
622 | urlPtr->tail = end + 1; |
623 | } |
624 | |
625 | } else { |
626 | /* |
627 | * The URL starts with no slash, just set the "tail" and let |
628 | * "path" undefined (legacy NaviServer). |
629 | */ |
630 | urlPtr->tail = end; |
631 | } |
632 | |
633 | if (urlPtr->tail != NULL((void*)0)) { |
634 | if (urlPtr->query == NULL((void*)0)) { |
635 | urlPtr->query = ParseUpTo(urlPtr->tail, '?'); |
636 | } |
637 | if (urlPtr->query != NULL((void*)0)) { |
638 | urlPtr->fragment = ParseUpTo(urlPtr->query, '#'); |
639 | } else if (urlPtr->fragment == NULL((void*)0)) { |
640 | urlPtr->fragment = ParseUpTo(urlPtr->tail, '#'); |
641 | } |
642 | } |
643 | if (strict) { |
644 | /* |
645 | * Validate content. |
646 | */ |
647 | if (urlPtr->query != NULL((void*)0)) { |
648 | urlPtr->query = ValidateChars(urlPtr->query, fragment_table, |
649 | "query contains invalid character", errorMsg); |
650 | } |
651 | if (urlPtr->fragment != NULL((void*)0)) { |
652 | urlPtr->fragment = ValidateChars(urlPtr->fragment, fragment_table, |
653 | "fragment contains invalid character", errorMsg); |
654 | } |
655 | if (urlPtr->tail != NULL((void*)0)) { |
656 | urlPtr->tail = ValidateChars(urlPtr->tail, path_table, |
657 | "query contains invalid character", errorMsg); |
658 | } |
659 | if (urlPtr->path != NULL((void*)0)) { |
660 | urlPtr->path = ValidateChars(urlPtr->path, path_table, |
661 | "path contains invalid character", errorMsg); |
662 | } |
663 | } |
664 | } |
665 | |
666 | return NS_OK; |
667 | } |
668 | |
669 | |
670 | /* |
671 | *---------------------------------------------------------------------- |
672 | * |
673 | * Ns_AbsoluteUrl -- |
674 | * |
675 | * Construct a URL based on baseurl but with as many parts of |
676 | * the incomplete url as possible. |
677 | * |
678 | * Results: |
679 | * NS_OK or NS_ERROR. |
680 | * |
681 | * Side effects: |
682 | * None. |
683 | * |
684 | *---------------------------------------------------------------------- |
685 | */ |
686 | |
687 | Ns_ReturnCode |
688 | Ns_AbsoluteUrl(Ns_DStringTcl_DString *dsPtr, const char *url, const char *base) |
689 | { |
690 | Ns_DStringTcl_DString urlDs, baseDs; |
691 | Ns_URL u, bu; |
692 | const char *errorMsg = NULL((void*)0); |
693 | Ns_ReturnCode status; |
694 | |
695 | /* |
696 | * Copy the URL's to allow Ns_ParseUrl to destroy them. |
697 | */ |
698 | |
699 | Ns_DStringInitTcl_DStringInit(&urlDs); |
700 | Ns_DStringInitTcl_DStringInit(&baseDs); |
701 | |
702 | /* |
703 | * The first part does not have to be a valid URL. |
704 | */ |
705 | Ns_DStringAppend(&urlDs, url)Tcl_DStringAppend((&urlDs), (url), -1); |
706 | (void) Ns_ParseUrl(urlDs.string, NS_FALSE0, &u, &errorMsg); |
707 | |
708 | Ns_DStringAppend(&baseDs, base)Tcl_DStringAppend((&baseDs), (base), -1); |
709 | status = Ns_ParseUrl(baseDs.string, NS_FALSE0, &bu, &errorMsg); |
710 | |
711 | if (bu.protocol == NULL((void*)0) || bu.host == NULL((void*)0) || bu.path == NULL((void*)0)) { |
712 | status = NS_ERROR; |
713 | goto done; |
714 | } |
715 | if (u.protocol == NULL((void*)0)) { |
716 | u.protocol = bu.protocol; |
717 | } |
718 | assert(u.protocol != NULL)((void) (0)); |
719 | |
720 | if (u.host == NULL((void*)0)) { |
721 | u.host = bu.host; |
722 | u.port = bu.port; |
723 | } |
724 | assert(u.host != NULL)((void) (0)); |
725 | |
726 | if (u.path == NULL((void*)0)) { |
727 | u.path = bu.path; |
728 | } |
729 | assert(u.path != NULL)((void) (0)); |
730 | |
731 | if (strchr(u.host, INTCHAR(':')((int)((unsigned char)((':'))))) == NULL((void*)0)) { |
732 | /* |
733 | * We have to use IP literal notation to avoid ambiguity of colon |
734 | * (part of address or separator for port). |
735 | */ |
736 | Ns_DStringVarAppend(dsPtr, u.protocol, "://", u.host, (char *)0L); |
737 | } else { |
738 | Ns_DStringVarAppend(dsPtr, u.protocol, "://[", u.host, "]", (char *)0L); |
739 | } |
740 | if (u.port != NULL((void*)0)) { |
741 | Ns_DStringVarAppend(dsPtr, ":", u.port, (char *)0L); |
742 | } |
743 | if (*u.path == '\0') { |
744 | Ns_DStringVarAppend(dsPtr, "/", u.tail, (char *)0L); |
745 | } else { |
746 | Ns_DStringVarAppend(dsPtr, "/", u.path, "/", u.tail, (char *)0L); |
747 | } |
748 | done: |
749 | Ns_DStringFreeTcl_DStringFree(&urlDs); |
750 | Ns_DStringFreeTcl_DStringFree(&baseDs); |
751 | |
752 | return status; |
753 | } |
754 | |
755 | |
756 | |
757 | /* |
758 | *---------------------------------------------------------------------- |
759 | * |
760 | * NsTclParseUrlObjCmd -- |
761 | * |
762 | * Implements "ns_parseurl". Offers the functionality of |
763 | * Ns_ParseUrl on the Tcl layer. |
764 | * |
765 | * Results: |
766 | * Tcl result. |
767 | * |
768 | * Side effects: |
769 | * none |
770 | * |
771 | *---------------------------------------------------------------------- |
772 | */ |
773 | |
774 | int |
775 | NsTclParseUrlObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv) |
776 | { |
777 | int result = TCL_OK0, strict = 0; |
778 | char *urlString; |
779 | Ns_ObjvSpec opts[] = { |
780 | {"-strict", Ns_ObjvBool, &strict, INT2PTR(NS_TRUE)((void *)(intptr_t)(1))}, |
781 | {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)} |
782 | }; |
783 | Ns_ObjvSpec args[] = { |
784 | {"url", Ns_ObjvString, &urlString, NULL((void*)0)}, |
785 | {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)} |
786 | }; |
787 | |
788 | if (Ns_ParseObjv(opts, args, interp, 1, objc, objv) != NS_OK) { |
789 | result = TCL_ERROR1; |
790 | } else { |
791 | char *url; |
792 | Ns_URL u; |
793 | const char *errorMsg = NULL((void*)0); |
794 | |
795 | url = ns_strdup(urlString); |
796 | |
797 | if (Ns_ParseUrl(url, (bool_Bool)strict, &u, &errorMsg) == NS_OK) { |
798 | Tcl_Obj *resultObj = Tcl_NewListObj(0, NULL((void*)0)); |
799 | |
800 | if (u.protocol != NULL((void*)0)) { |
801 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("proto", 5)); |
802 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.protocol, -1)); |
803 | } |
804 | if (u.userinfo != NULL((void*)0)) { |
805 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("userinfo", 8)); |
806 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.userinfo, -1)); |
807 | } |
808 | if (u.host != NULL((void*)0)) { |
Duplicate code detected | |
809 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("host", 4)); |
810 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.host, -1)); |
811 | } |
812 | if (u.port != NULL((void*)0)) { |
Similar code here | |
813 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("port", 4)); |
814 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.port, -1)); |
815 | } |
816 | if (u.path != NULL((void*)0)) { |
817 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("path", 4)); |
818 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.path, -1)); |
819 | } |
820 | if (u.tail != NULL((void*)0)) { |
821 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("tail", 4)); |
822 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.tail, -1)); |
823 | } |
824 | if (u.query != NULL((void*)0)) { |
825 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("query", 5)); |
826 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.query, -1)); |
827 | } |
828 | if (u.fragment != NULL((void*)0)) { |
829 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("fragment", 8)); |
830 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(u.fragment, -1)); |
831 | } |
832 | if (errorMsg != NULL((void*)0)) { |
833 | Ns_TclPrintfResult(interp, "Could not parse URL \"%s\": %s", urlString, errorMsg); |
834 | result = TCL_ERROR1; |
835 | } else { |
836 | Tcl_SetObjResult(interp, resultObj); |
837 | } |
838 | |
839 | } else { |
840 | Ns_TclPrintfResult(interp, "Could not parse URL \"%s\": %s", urlString, errorMsg); |
841 | result = TCL_ERROR1; |
842 | } |
843 | ns_free(url); |
844 | } |
845 | /*Ns_Log(Notice, "===== ns_parseurl '%s' returns result %d", urlString, result);*/ |
846 | return result; |
847 | } |
848 | |
849 | |
850 | /* |
851 | *---------------------------------------------------------------------- |
852 | * |
853 | * NsTclParseHostportObjCmd -- |
854 | * |
855 | * Implements "ns_parsehostport". Offers the functionality of |
856 | * Ns_HttpParseHost2 on the Tcl layer. |
857 | * |
858 | * Results: |
859 | * Tcl result. |
860 | * |
861 | * Side effects: |
862 | * none |
863 | * |
864 | *---------------------------------------------------------------------- |
865 | */ |
866 | |
867 | int |
868 | NsTclParseHostportObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv) |
869 | { |
870 | int result = TCL_OK0, strict = 0; |
871 | char *hostportString; |
872 | Ns_ObjvSpec opts[] = { |
873 | {"-strict", Ns_ObjvBool, &strict, INT2PTR(NS_TRUE)((void *)(intptr_t)(1))}, |
874 | {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)} |
875 | }; |
876 | Ns_ObjvSpec args[] = { |
877 | {"hostport", Ns_ObjvString, &hostportString, NULL((void*)0)}, |
878 | {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)} |
879 | }; |
880 | |
881 | if (Ns_ParseObjv(opts, args, interp, 1, objc, objv) != NS_OK) { |
882 | result = TCL_ERROR1; |
883 | } else { |
884 | char *hostport, *hostStart, *portStart, *end; |
885 | bool_Bool success; |
886 | |
887 | hostport = ns_strdup(hostportString); |
888 | success = Ns_HttpParseHost2(hostport, strict, &hostStart, &portStart, &end); |
889 | if (success && *hostStart != '\0' && portStart != hostport) { |
890 | Tcl_Obj *resultObj = Tcl_NewListObj(0, NULL((void*)0)); |
891 | |
892 | if (hostStart != NULL((void*)0)) { |
893 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("host", 4)); |
894 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(hostStart, -1)); |
895 | } |
896 | if (portStart != NULL((void*)0)) { |
897 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj("port", 4)); |
898 | Tcl_ListObjAppendElement(interp, resultObj, Tcl_NewStringObj(portStart, -1)); |
899 | } |
900 | |
901 | Tcl_SetObjResult(interp, resultObj); |
902 | |
903 | } else { |
904 | Ns_TclPrintfResult(interp, "Could not parse host and port \"%s\"", hostportString); |
905 | result = TCL_ERROR1; |
906 | } |
907 | ns_free(hostport); |
908 | } |
909 | return result; |
910 | } |
911 | |
912 | |
913 | /* |
914 | *---------------------------------------------------------------------- |
915 | * |
916 | * NsTclAbsoluteUrlObjCmd -- |
917 | * |
918 | * Implements "ns_absoluteurl". Offers the functionality of |
919 | * Ns_AbsoluteUrl on the Tcl layer. |
920 | * |
921 | * Results: |
922 | * Tcl result. |
923 | * |
924 | * Side effects: |
925 | * none |
926 | * |
927 | *---------------------------------------------------------------------- |
928 | */ |
929 | int |
930 | NsTclAbsoluteUrlObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv) |
931 | { |
932 | int result = TCL_OK0; |
933 | char *urlString, *baseString; |
934 | Ns_ObjvSpec args[] = { |
935 | {"partialurl", Ns_ObjvString, &urlString, NULL((void*)0)}, |
936 | {"baseurl", Ns_ObjvString, &baseString, NULL((void*)0)}, |
937 | {NULL((void*)0), NULL((void*)0), NULL((void*)0), NULL((void*)0)} |
938 | }; |
939 | |
940 | if (Ns_ParseObjv(NULL((void*)0), args, interp, 1, objc, objv) != NS_OK) { |
941 | result = TCL_ERROR1; |
942 | } else { |
943 | Tcl_DString ds; |
944 | |
945 | Tcl_DStringInit(&ds); |
946 | if (Ns_AbsoluteUrl(&ds, urlString, baseString) == NS_OK) { |
947 | Tcl_DStringResult(interp, &ds); |
948 | } else { |
949 | Ns_TclPrintfResult(interp, "Could not parse base URL into protocol, host and path"); |
950 | Tcl_DStringFree(&ds); |
951 | result = TCL_ERROR1; |
952 | } |
953 | } |
954 | |
955 | return result; |
956 | } |
957 | |
958 | /* |
959 | * Local Variables: |
960 | * mode: c |
961 | * c-basic-offset: 4 |
962 | * fill-column: 78 |
963 | * indent-tabs-mode: nil |
964 | * End: |
965 | */ |