File: | d/encoding.c |
Warning: | line 665, column 26 Using a fixed address is not portable because that address will probably not be valid in all environments or platforms |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * The contents of this file are subject to the Mozilla Public License | |||
3 | * Version 1.1 (the "License"); you may not use this file except in | |||
4 | * compliance with the License. You may obtain a copy of the License at | |||
5 | * http://mozilla.org/. | |||
6 | * | |||
7 | * Software distributed under the License is distributed on an "AS IS" | |||
8 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See | |||
9 | * the License for the specific language governing rights and limitations | |||
10 | * under the License. | |||
11 | * | |||
12 | * The Original Code is AOLserver Code and related documentation | |||
13 | * distributed by AOL. | |||
14 | * | |||
15 | * The Initial Developer of the Original Code is America Online, | |||
16 | * Inc. Portions created by AOL are Copyright (C) 1999 America Online, | |||
17 | * Inc. All Rights Reserved. | |||
18 | * | |||
19 | * Alternatively, the contents of this file may be used under the terms | |||
20 | * of the GNU General Public License (the "GPL"), in which case the | |||
21 | * provisions of GPL are applicable instead of those above. If you wish | |||
22 | * to allow use of your version of this file only under the terms of the | |||
23 | * GPL and not to allow others to use your version of this file under the | |||
24 | * License, indicate your decision by deleting the provisions above and | |||
25 | * replace them with the notice and other provisions required by the GPL. | |||
26 | * If you do not delete the provisions above, a recipient may use your | |||
27 | * version of this file under either the License or the GPL. | |||
28 | */ | |||
29 | ||||
30 | /* | |||
31 | * encoding.c -- | |||
32 | * | |||
33 | * Defines standard default charset to encoding mappings. | |||
34 | */ | |||
35 | ||||
36 | #include "nsd.h" | |||
37 | ||||
38 | /* | |||
39 | * Local functions defined in this file. | |||
40 | */ | |||
41 | ||||
42 | static void AddCharset(const char *charset, const char *name) | |||
43 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))) NS_GNUC_NONNULL(2)__attribute__((__nonnull__(2))); | |||
44 | ||||
45 | static void AddExtension(const char *ext, const char *name) | |||
46 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))) NS_GNUC_NONNULL(2)__attribute__((__nonnull__(2))); | |||
47 | ||||
48 | static Tcl_Encoding LoadEncoding(const char *name) | |||
49 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))); | |||
50 | ||||
51 | static Ns_ServerInitProc ConfigServerEncodings; | |||
52 | ||||
53 | /* | |||
54 | * Local variables defined in this file. | |||
55 | */ | |||
56 | ||||
57 | static Tcl_HashTable extensions; /* Maps file extensions to charsets. */ | |||
58 | static Tcl_HashTable charsets; /* Maps Internet charset names to Tcl encoding names */ | |||
59 | static Tcl_HashTable encnames; /* Maps Tcl encoding names to Internet charset names. */ | |||
60 | static Tcl_HashTable encodings; /* Cache of loaded Tcl encodings */ | |||
61 | ||||
62 | static Ns_Mutex lock; /* Lock around encodings. */ | |||
63 | static Ns_Cond cond; | |||
64 | ||||
65 | Tcl_Encoding NS_utf8Encoding = NULL((void*)0); /* Cached pointer to utf-8 encoding. */ | |||
66 | ||||
67 | #define EncodingLocked((Tcl_Encoding) (-1)) ((Tcl_Encoding) (-1)) | |||
68 | ||||
69 | /* | |||
70 | * The default table maps file extensions to Tcl encodings. | |||
71 | * That is, the encoding used to read the files from disk (mainly ADP). | |||
72 | */ | |||
73 | ||||
74 | static const struct { | |||
75 | const char *extension; | |||
76 | const char *name; | |||
77 | } builtinExt[] = { | |||
78 | {".txt", "ascii"}, | |||
79 | {".htm", "utf-8"}, | |||
80 | {".html", "utf-8"}, | |||
81 | {".adp", "utf-8"}, | |||
82 | {NULL((void*)0), NULL((void*)0)} | |||
83 | }; | |||
84 | ||||
85 | /* | |||
86 | * The following table provides HTTP charset aliases for Tcl encodings names. | |||
87 | */ | |||
88 | ||||
89 | static const struct { | |||
90 | const char *charset; | |||
91 | const char *name; | |||
92 | } builtinChar[] = { | |||
93 | { "gb18030", "cp936" }, | |||
94 | { "gb_2312-80", "gb2312" }, | |||
95 | { "ibm437", "cp437" }, | |||
96 | { "ibm775", "cp775" }, | |||
97 | { "ibm850", "cp850" }, | |||
98 | { "ibm852", "cp852" }, | |||
99 | { "ibm855", "cp855" }, | |||
100 | { "ibm857", "cp857" }, | |||
101 | { "ibm860", "cp860" }, | |||
102 | { "ibm861", "cp861" }, | |||
103 | { "ibm862", "cp862" }, | |||
104 | { "ibm863", "cp863" }, | |||
105 | { "ibm864", "cp864" }, | |||
106 | { "ibm865", "cp865" }, | |||
107 | { "ibm866", "cp866" }, | |||
108 | { "ibm869", "cp869" }, | |||
109 | { "iso-2022-jp", "iso2022-jp" }, | |||
110 | { "iso-2022-kr", "iso2022-kr" }, | |||
111 | { "iso-8859-1", "iso8859-1" }, | |||
112 | { "iso-8859-10", "iso8859-10" }, | |||
113 | { "iso-8859-13", "iso8859-13" }, | |||
114 | { "iso-8859-14", "iso8859-14" }, | |||
115 | { "iso-8859-15", "iso8859-15" }, | |||
116 | { "iso-8859-16", "iso8859-16" }, | |||
117 | { "iso-8859-2", "iso8859-2" }, | |||
118 | { "iso-8859-3", "iso8859-3" }, | |||
119 | { "iso-8859-4", "iso8859-4" }, | |||
120 | { "iso-8859-5", "iso8859-5" }, | |||
121 | { "iso-8859-6", "iso8859-6" }, | |||
122 | { "iso-8859-7", "iso8859-7" }, | |||
123 | { "iso-8859-8", "iso8859-8" }, | |||
124 | { "iso-8859-9", "iso8859-9" }, | |||
125 | { "jis_x0201", "jis0201" }, | |||
126 | { "jis_x0212-1990", "jis0212" }, | |||
127 | { "korean", "ksc5601" }, | |||
128 | { "ksc_5601", "ksc5601" }, | |||
129 | { "mac", "macRoman" }, | |||
130 | { "mac-centeuro", "macCentEuro" }, | |||
131 | { "mac-centraleupore", "macCentEuro" }, | |||
132 | { "mac-croatian", "macCroatian" }, | |||
133 | { "mac-cyrillic", "macCyrillic" }, | |||
134 | { "mac-greek", "macGreek" }, | |||
135 | { "mac-iceland", "macIceland" }, | |||
136 | { "mac-japan", "macJapan" }, | |||
137 | { "mac-roman", "macRoman" }, | |||
138 | { "mac-romania", "macRomania" }, | |||
139 | { "mac-thai", "macThai" }, | |||
140 | { "mac-turkish", "macTurkish" }, | |||
141 | { "mac-ukraine", "macUkraine" }, | |||
142 | { "maccenteuro", "macCentEuro" }, | |||
143 | { "maccentraleupore", "macCentEuro" }, | |||
144 | { "maccroatian", "macCroatian" }, | |||
145 | { "maccyrillic", "macCyrillic" }, | |||
146 | { "macgreek", "macGreek" }, | |||
147 | { "maciceland", "macIceland" }, | |||
148 | { "macintosh", "macRoman" }, | |||
149 | { "macjapan", "macJapan" }, | |||
150 | { "macroman", "macRoman" }, | |||
151 | { "macromania", "macRomania" }, | |||
152 | { "macthai", "macThai" }, | |||
153 | { "macturkish", "macTurkish" }, | |||
154 | { "macukraine", "macUkraine" }, | |||
155 | { "shift_jis", "shiftjis" }, | |||
156 | { "us-ascii", "ascii" }, | |||
157 | { "windows-1250", "cp1250" }, | |||
158 | { "windows-1251", "cp1251" }, | |||
159 | { "windows-1252", "cp1252" }, | |||
160 | { "windows-1253", "cp1253" }, | |||
161 | { "windows-1254", "cp1254" }, | |||
162 | { "windows-1255", "cp1255" }, | |||
163 | { "windows-1256", "cp1256" }, | |||
164 | { "windows-1257", "cp1257" }, | |||
165 | { "windows-1258", "cp1258" }, | |||
166 | { "windows-31j", "cp932" }, | |||
167 | { "windows-874", "cp874" }, | |||
168 | { "x-mac", "macRoman" }, | |||
169 | { "x-mac-centeuro", "macCentEuro" }, | |||
170 | { "x-mac-centraleupore", "macCentEuro" }, | |||
171 | { "x-mac-croatian", "macCroatian" }, | |||
172 | { "x-mac-cyrillic", "macCyrillic" }, | |||
173 | { "x-mac-greek", "macGreek" }, | |||
174 | { "x-mac-iceland", "macIceland" }, | |||
175 | { "x-mac-japan", "macJapan" }, | |||
176 | { "x-mac-roman", "macRoman" }, | |||
177 | { "x-mac-romania", "macRomania" }, | |||
178 | { "x-mac-thai", "macThai" }, | |||
179 | { "x-mac-turkish", "macTurkish" }, | |||
180 | { "x-mac-ukraine", "macUkraine" }, | |||
181 | { "x-macintosh", "macRoman" }, | |||
182 | ||||
183 | /* | |||
184 | * The following entries are strictly speaking not needed, since the | |||
185 | * IANA name is identical with the Tcl charset name. We add these to | |||
186 | * be able to return full set of supported IANA charsets via | |||
187 | * [ns_charset]. | |||
188 | * | |||
189 | * See: https://www.iana.org/assignments/character-sets/character-sets.xml | |||
190 | */ | |||
191 | { "big5", "big5" }, | |||
192 | { "euc-jp", "euc-jp" }, | |||
193 | { "euc-kr", "euc-kr" }, | |||
194 | { "gb2312", "gb2312" }, | |||
195 | { "koi8-r", "koi8-r" }, | |||
196 | { "koi8-u", "koi8-u" }, | |||
197 | { "tis-620", "tis-620" }, | |||
198 | { "utf-8", "utf-8" }, | |||
199 | ||||
200 | { NULL((void*)0), NULL((void*)0) } | |||
201 | }; | |||
202 | ||||
203 | ||||
204 | /* | |||
205 | *---------------------------------------------------------------------- | |||
206 | * | |||
207 | * NsConfigEncodings -- | |||
208 | * | |||
209 | * Configure charset aliases and file extension mappings. | |||
210 | * | |||
211 | * Results: | |||
212 | * None. | |||
213 | * | |||
214 | * Side effects: | |||
215 | * None. | |||
216 | * | |||
217 | *---------------------------------------------------------------------- | |||
218 | */ | |||
219 | ||||
220 | void | |||
221 | NsConfigEncodings(void) | |||
222 | { | |||
223 | const Ns_Set *set; | |||
224 | size_t i; | |||
225 | ||||
226 | Ns_MutexSetName(&lock, "ns:encodings"); | |||
227 | Tcl_InitHashTable(&extensions, TCL_STRING_KEYS(0)); | |||
228 | Tcl_InitHashTable(&charsets, TCL_STRING_KEYS(0)); | |||
229 | Tcl_InitHashTable(&encnames, TCL_STRING_KEYS(0)); | |||
230 | Tcl_InitHashTable(&encodings, TCL_STRING_KEYS(0)); | |||
231 | NS_utf8Encoding = Ns_GetCharsetEncoding("utf-8"); | |||
232 | ||||
233 | /* | |||
234 | * Add default charsets and file mappings. | |||
235 | */ | |||
236 | ||||
237 | for (i = 0u; builtinChar[i].charset != NULL((void*)0); ++i) { | |||
238 | AddCharset(builtinChar[i].charset, builtinChar[i].name); | |||
239 | } | |||
240 | for (i = 0u; builtinExt[i].extension != NULL((void*)0); ++i) { | |||
241 | AddExtension(builtinExt[i].extension, builtinExt[i].name); | |||
242 | } | |||
243 | ||||
244 | /* | |||
245 | * Add configured charsets and file mappings. | |||
246 | */ | |||
247 | ||||
248 | set = Ns_ConfigGetSection("ns/charsets"); | |||
249 | for (i = 0u; set != NULL((void*)0) && i < Ns_SetSize(set)((set)->size); ++i) { | |||
250 | AddCharset(Ns_SetKey(set, i)((set)->fields[(i)].name), Ns_SetValue(set, i)((set)->fields[(i)].value)); | |||
251 | } | |||
252 | set = Ns_ConfigGetSection("ns/encodings"); | |||
253 | for (i = 0u; set != NULL((void*)0) && i < Ns_SetSize(set)((set)->size); ++i) { | |||
254 | AddExtension(Ns_SetKey(set, i)((set)->fields[(i)].name), Ns_SetValue(set, i)((set)->fields[(i)].value)); | |||
255 | } | |||
256 | ||||
257 | NsRegisterServerInit(ConfigServerEncodings); | |||
258 | } | |||
259 | ||||
260 | static Ns_ReturnCode | |||
261 | ConfigServerEncodings(const char *server) | |||
262 | { | |||
263 | NsServer *servPtr = NsGetServer(server); | |||
264 | Ns_ReturnCode result; | |||
265 | ||||
266 | if (unlikely(servPtr == NULL)(__builtin_expect((servPtr == ((void*)0)), 0))) { | |||
267 | Ns_Log(Warning, "Could not set encoding, server '%s' unknown", server); | |||
268 | result = NS_ERROR; | |||
269 | ||||
270 | } else { | |||
271 | const char *path; | |||
272 | ||||
273 | /* | |||
274 | * Configure the encoding used in the request URL. | |||
275 | */ | |||
276 | ||||
277 | path = Ns_ConfigSectionPath(NULL((void*)0), server, NULL((void*)0), (char *)0L); | |||
278 | ||||
279 | servPtr->encoding.urlCharset = | |||
280 | Ns_ConfigString(path, "urlCharset", "utf-8"); | |||
281 | ||||
282 | servPtr->encoding.urlEncoding = | |||
283 | Ns_GetCharsetEncoding(servPtr->encoding.urlCharset); | |||
284 | ||||
285 | if (servPtr->encoding.urlEncoding == NULL((void*)0)) { | |||
286 | Ns_Log(Warning, "no encoding found for charset \"%s\" from config", | |||
287 | servPtr->encoding.urlCharset); | |||
288 | } | |||
289 | servPtr->encoding.formFallbackCharset = | |||
290 | Ns_ConfigString(path, "FormFallbackCharset", NULL((void*)0)); | |||
291 | if (servPtr->encoding.formFallbackCharset != NULL((void*)0) | |||
292 | && *servPtr->encoding.formFallbackCharset == '\0') { | |||
293 | servPtr->encoding.formFallbackCharset = NULL((void*)0); | |||
294 | } | |||
295 | ||||
296 | /* | |||
297 | * Configure the encoding used for Tcl/ADP output. | |||
298 | */ | |||
299 | ||||
300 | servPtr->encoding.outputCharset = | |||
301 | Ns_ConfigString(path, "outputCharset", "utf-8"); | |||
302 | ||||
303 | servPtr->encoding.outputEncoding = | |||
304 | Ns_GetCharsetEncoding(servPtr->encoding.outputCharset); | |||
305 | if (servPtr->encoding.outputEncoding == NULL((void*)0)) { | |||
306 | Ns_Fatal("could not find encoding for default output charset \"%s\"", | |||
307 | servPtr->encoding.outputCharset); | |||
308 | } | |||
309 | ||||
310 | result = NS_OK; | |||
311 | ||||
312 | } | |||
313 | return result; | |||
314 | } | |||
315 | ||||
316 | ||||
317 | /* | |||
318 | *---------------------------------------------------------------------- | |||
319 | * | |||
320 | * Ns_GetFileEncoding -- | |||
321 | * | |||
322 | * Return the Tcl_Encoding that should be used to read a file from disk | |||
323 | * according to its extension. | |||
324 | * | |||
325 | * Note this may not be the same as the encoding for the charset of the | |||
326 | * file's mimetype. | |||
327 | * | |||
328 | * Results: | |||
329 | * Tcl_Encoding or NULL if not found. | |||
330 | * | |||
331 | * Side effects: | |||
332 | * See Ns_GetCharsetEncoding(). | |||
333 | * | |||
334 | *---------------------------------------------------------------------- | |||
335 | */ | |||
336 | ||||
337 | Tcl_Encoding | |||
338 | Ns_GetFileEncoding(const char *file) | |||
339 | { | |||
340 | const char *ext; | |||
341 | Tcl_Encoding encoding = NULL((void*)0); | |||
342 | ||||
343 | NS_NONNULL_ASSERT(file != NULL)((void) (0)); | |||
344 | ||||
345 | ext = strrchr(file, INTCHAR('.')((int)((unsigned char)(('.'))))); | |||
346 | if (ext != NULL((void*)0)) { | |||
347 | const Tcl_HashEntry *hPtr = Tcl_FindHashEntry(&extensions, ext)(*((&extensions)->findProc))(&extensions, (const char *)(ext)); | |||
348 | ||||
349 | if (hPtr != NULL((void*)0)) { | |||
350 | const char *name = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
351 | encoding = Ns_GetCharsetEncoding(name); | |||
352 | } | |||
353 | } | |||
354 | return encoding; | |||
355 | } | |||
356 | ||||
357 | ||||
358 | /* | |||
359 | *---------------------------------------------------------------------- | |||
360 | * | |||
361 | * Ns_GetTypeEncoding -- | |||
362 | * | |||
363 | * Return the Tcl_Encoding for the given Content-type header, | |||
364 | * e.g., "text/html; charset=iso-8859-1" returns Tcl_Encoding | |||
365 | * for iso8859-1. | |||
366 | * | |||
367 | * This function will utilize the ns/parameters/OutputCharset | |||
368 | * config parameter if given a content-type "text/<anything>" with | |||
369 | * no charset. | |||
370 | * | |||
371 | * When no OutputCharset defined, the fall-back behavior is to | |||
372 | * return NULL. | |||
373 | * | |||
374 | * Results: | |||
375 | * Tcl_Encoding or NULL if not found. | |||
376 | * | |||
377 | * Side effects: | |||
378 | * See LoadEncoding(). | |||
379 | * | |||
380 | *---------------------------------------------------------------------- | |||
381 | */ | |||
382 | ||||
383 | Tcl_Encoding | |||
384 | Ns_GetTypeEncoding(const char *mimeType) | |||
385 | { | |||
386 | const char *charset; | |||
387 | size_t len; | |||
388 | ||||
389 | NS_NONNULL_ASSERT(mimeType != NULL)((void) (0)); | |||
390 | ||||
391 | charset = NsFindCharset(mimeType, &len); | |||
392 | return (charset != NULL((void*)0)) ? Ns_GetCharsetEncodingEx(charset, (int)len) : NULL((void*)0); | |||
393 | } | |||
394 | ||||
395 | ||||
396 | /* | |||
397 | *---------------------------------------------------------------------- | |||
398 | * | |||
399 | * Ns_GetCharsetEncoding, Ns_GetCharsetEncodingEx -- | |||
400 | * | |||
401 | * Return the Tcl_Encoding for the given charset, e.g., | |||
402 | * "iso-8859-1" returns Tcl_Encoding for iso8859-1. | |||
403 | * | |||
404 | * Results: | |||
405 | * Tcl_Encoding or NULL if not found. | |||
406 | * | |||
407 | * Side effects: | |||
408 | * See LoadEncoding(). | |||
409 | * | |||
410 | *---------------------------------------------------------------------- | |||
411 | */ | |||
412 | ||||
413 | Tcl_Encoding | |||
414 | Ns_GetCharsetEncoding(const char *charset) | |||
415 | { | |||
416 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
417 | ||||
418 | return Ns_GetCharsetEncodingEx(charset, -1); | |||
419 | } | |||
420 | ||||
421 | Tcl_Encoding | |||
422 | Ns_GetCharsetEncodingEx(const char *charset, int len) | |||
423 | { | |||
424 | const Tcl_HashEntry *hPtr; | |||
425 | Tcl_Encoding encoding; | |||
426 | Ns_DStringTcl_DString ds; | |||
427 | ||||
428 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
429 | ||||
430 | /* | |||
431 | * Cleanup the charset name and check for an | |||
432 | * alias (e.g., iso-8859-1 = iso8859-1) before | |||
433 | * assuming the charset and Tcl encoding names | |||
434 | * match (e.g., big5). | |||
435 | */ | |||
436 | ||||
437 | Ns_DStringInitTcl_DStringInit(&ds); | |||
438 | Ns_DStringNAppendTcl_DStringAppend(&ds, charset, len); | |||
439 | charset = Ns_StrTrim(Ns_StrToLower(ds.string)); | |||
440 | hPtr = Tcl_FindHashEntry(&charsets, charset)(*((&charsets)->findProc))(&charsets, (const char * )(charset)); | |||
441 | if (hPtr != NULL((void*)0)) { | |||
442 | charset = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
443 | } | |||
444 | encoding = LoadEncoding(charset); | |||
445 | Ns_DStringFreeTcl_DStringFree(&ds); | |||
446 | ||||
447 | return encoding; | |||
448 | } | |||
449 | ||||
450 | Tcl_Encoding | |||
451 | Ns_GetEncoding(const char *name) | |||
452 | { | |||
453 | /* Deprecated, use Ns_GetCharsetEncodingEx(). */ | |||
454 | return LoadEncoding(name); | |||
455 | } | |||
456 | ||||
457 | ||||
458 | /* | |||
459 | *---------------------------------------------------------------------- | |||
460 | * | |||
461 | * Ns_GetEncodingCharset -- | |||
462 | * | |||
463 | * Return the charset name for the given Tcl_Encoding. | |||
464 | * | |||
465 | * Results: | |||
466 | * Charset name, or encoding name if no alias. | |||
467 | * | |||
468 | * Side effects: | |||
469 | * None. | |||
470 | * | |||
471 | *---------------------------------------------------------------------- | |||
472 | */ | |||
473 | ||||
474 | const char * | |||
475 | Ns_GetEncodingCharset(Tcl_Encoding encoding) | |||
476 | { | |||
477 | const char *encname, *charset = NULL((void*)0); | |||
478 | const Tcl_HashEntry *hPtr; | |||
479 | ||||
480 | NS_NONNULL_ASSERT(encoding != NULL)((void) (0)); | |||
481 | ||||
482 | encname = Tcl_GetEncodingName(encoding); | |||
483 | hPtr = Tcl_FindHashEntry(&encnames, encname)(*((&encnames)->findProc))(&encnames, (const char * )(encname)); | |||
484 | if (hPtr != NULL((void*)0)) { | |||
485 | charset = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
486 | } | |||
487 | return (charset != NULL((void*)0)) ? charset : encname; | |||
488 | } | |||
489 | ||||
490 | ||||
491 | /* | |||
492 | *---------------------------------------------------------------------- | |||
493 | * | |||
494 | * NsFindCharset -- | |||
495 | * | |||
496 | * Find start of charset within a mime-type string. | |||
497 | * | |||
498 | * Results: | |||
499 | * Pointer to start of charset or NULL on no charset. | |||
500 | * | |||
501 | * Side effects: | |||
502 | * None. | |||
503 | * | |||
504 | *---------------------------------------------------------------------- | |||
505 | */ | |||
506 | ||||
507 | const char * | |||
508 | NsFindCharset(const char *mimetype, size_t *lenPtr) | |||
509 | { | |||
510 | const char *start, *charset = NULL((void*)0); | |||
511 | ||||
512 | NS_NONNULL_ASSERT(mimetype != NULL)((void) (0)); | |||
513 | NS_NONNULL_ASSERT(lenPtr != NULL)((void) (0)); | |||
514 | ||||
515 | start = Ns_StrCaseFind(mimetype, "charset"); | |||
516 | if (start != NULL((void*)0)) { | |||
517 | start += 7; | |||
518 | start += strspn(start, " "); | |||
519 | if (*start++ == '=') { | |||
520 | const char *end; | |||
521 | ||||
522 | start += strspn(start, " "); | |||
523 | end = start; | |||
524 | while (*end != '\0' && CHARTYPE(space, *end)(((*__ctype_b_loc ())[(int) (((int)((unsigned char)(*end))))] & (unsigned short int) _ISspace)) == 0) { | |||
525 | ++end; | |||
526 | } | |||
527 | *lenPtr = (size_t)(end - start); | |||
528 | charset = start; | |||
529 | } | |||
530 | } | |||
531 | return charset; | |||
532 | } | |||
533 | ||||
534 | ||||
535 | /* | |||
536 | *---------------------------------------------------------------------- | |||
537 | * | |||
538 | * NsTclCharsetsObjCmd -- | |||
539 | * | |||
540 | * Implements "ns_charsets". The command returns the list of charsets for | |||
541 | * which encodings are defined. | |||
542 | * | |||
543 | * Results: | |||
544 | * TCL_OK | |||
545 | * | |||
546 | * Side effects: | |||
547 | * Sets Tcl interpreter result. | |||
548 | * | |||
549 | *---------------------------------------------------------------------- | |||
550 | */ | |||
551 | ||||
552 | int | |||
553 | NsTclCharsetsObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, | |||
554 | int UNUSED(objc)UNUSED_objc __attribute__((__unused__)), Tcl_Obj *const* UNUSED(objv)UNUSED_objv __attribute__((__unused__))) | |||
555 | { | |||
556 | const Tcl_HashEntry *hPtr; | |||
557 | Tcl_HashSearch search; | |||
558 | Tcl_Obj *listObj = Tcl_NewListObj(0, NULL((void*)0)); | |||
559 | ||||
560 | for (hPtr = Tcl_FirstHashEntry(&charsets, &search); | |||
561 | hPtr != NULL((void*)0); | |||
562 | hPtr = Tcl_NextHashEntry(&search) | |||
563 | ) { | |||
564 | const char *key = Tcl_GetHashKey(&charsets, hPtr)((void *) (((&charsets)->keyType == (1) || (&charsets )->keyType == (-1)) ? (hPtr)->key.oneWordValue : (hPtr) ->key.string)); | |||
565 | Tcl_ListObjAppendElement(interp, listObj, Tcl_NewStringObj(key, -1)); | |||
566 | } | |||
567 | Tcl_SetObjResult(interp, listObj); | |||
568 | ||||
569 | return TCL_OK0; | |||
570 | } | |||
571 | ||||
572 | ||||
573 | ||||
574 | /* | |||
575 | *---------------------------------------------------------------------- | |||
576 | * | |||
577 | * NsTclEncodingForCharsetObjCmd -- | |||
578 | * | |||
579 | * Implements "ns_encodingforcharset". The command returns the name of | |||
580 | * the encoding for the specified charset. | |||
581 | * | |||
582 | * Results: | |||
583 | * Tcl result contains an encoding name or "". | |||
584 | * | |||
585 | * Side effects: | |||
586 | * None. | |||
587 | * | |||
588 | *---------------------------------------------------------------------- | |||
589 | */ | |||
590 | ||||
591 | int | |||
592 | NsTclEncodingForCharsetObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv) | |||
593 | { | |||
594 | int result = TCL_OK0; | |||
595 | ||||
596 | if (objc != 2) { | |||
597 | Tcl_WrongNumArgs(interp, 1, objv, "charset"); | |||
598 | result = TCL_ERROR1; | |||
599 | } else { | |||
600 | int encodingNameLen; | |||
601 | const char *encodingName = Tcl_GetStringFromObj(objv[1], &encodingNameLen); | |||
602 | Tcl_Encoding encoding = Ns_GetCharsetEncodingEx(encodingName, encodingNameLen); | |||
603 | ||||
604 | if (encoding != NULL((void*)0)) { | |||
605 | Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetEncodingName(encoding), -1)); | |||
606 | } | |||
607 | } | |||
608 | ||||
609 | return result; | |||
610 | } | |||
611 | ||||
612 | ||||
613 | /* | |||
614 | *---------------------------------------------------------------------- | |||
615 | * | |||
616 | * NsEncodingIsUtf8 -- | |||
617 | * | |||
618 | * Is the given encoding the utf-8 encoding? | |||
619 | * | |||
620 | * Results: | |||
621 | * Boolean. | |||
622 | * | |||
623 | * Side effects: | |||
624 | * None. | |||
625 | * | |||
626 | *---------------------------------------------------------------------- | |||
627 | */ | |||
628 | ||||
629 | bool_Bool | |||
630 | NsEncodingIsUtf8(const Tcl_Encoding encoding) | |||
631 | { | |||
632 | return (encoding == NS_utf8Encoding); | |||
633 | } | |||
634 | ||||
635 | ||||
636 | /* | |||
637 | *---------------------------------------------------------------------- | |||
638 | * | |||
639 | * LoadEncoding -- | |||
640 | * | |||
641 | * Return the Tcl_Encoding for the given charset. | |||
642 | * | |||
643 | * Results: | |||
644 | * Tcl_Encoding or NULL if not found. | |||
645 | * | |||
646 | * Side effects: | |||
647 | * Will load encoding from disk on first access. | |||
648 | * May wait for other thread to load encoding from disk. | |||
649 | * | |||
650 | *---------------------------------------------------------------------- | |||
651 | */ | |||
652 | ||||
653 | static Tcl_Encoding | |||
654 | LoadEncoding(const char *name) | |||
655 | { | |||
656 | Tcl_HashEntry *hPtr; | |||
657 | Tcl_Encoding encoding; | |||
658 | int isNew; | |||
659 | ||||
660 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
661 | ||||
662 | Ns_MutexLock(&lock); | |||
663 | hPtr = Tcl_CreateHashEntry(&encodings, name, &isNew)(*((&encodings)->createProc))(&encodings, (const char *)(name), &isNew); | |||
664 | if (isNew == 0) { | |||
665 | while ((encoding = Tcl_GetHashValue(hPtr)((hPtr)->clientData)) == EncodingLocked((Tcl_Encoding) (-1))) { | |||
| ||||
666 | Ns_CondWait(&cond, &lock); | |||
667 | } | |||
668 | } else { | |||
669 | Tcl_SetHashValue(hPtr, INT2PTR(EncodingLocked))((hPtr)->clientData = (ClientData) (((void *)(intptr_t)((( Tcl_Encoding) (-1)))))); | |||
670 | Ns_MutexUnlock(&lock); | |||
671 | encoding = Tcl_GetEncoding(NULL((void*)0), name); | |||
672 | if (encoding == NULL((void*)0)) { | |||
673 | Ns_Log(Warning, "encoding: could not load: '%s'", name); | |||
674 | } else { | |||
675 | Ns_Log(Debug, "encoding: loaded: %s", name); | |||
676 | } | |||
677 | Ns_MutexLock(&lock); | |||
678 | Tcl_SetHashValue(hPtr, encoding)((hPtr)->clientData = (ClientData) (encoding)); | |||
679 | Ns_CondBroadcast(&cond); | |||
680 | } | |||
681 | Ns_MutexUnlock(&lock); | |||
682 | ||||
683 | return encoding; | |||
684 | } | |||
685 | ||||
686 | ||||
687 | /* | |||
688 | *---------------------------------------------------------------------- | |||
689 | * | |||
690 | * AddCharset, AddExtension -- | |||
691 | * | |||
692 | * Add extensiont to encoding mapping and charset aliases. | |||
693 | * | |||
694 | * Results: | |||
695 | * None. | |||
696 | * | |||
697 | * Side effects: | |||
698 | * None. | |||
699 | * | |||
700 | *---------------------------------------------------------------------- | |||
701 | */ | |||
702 | ||||
703 | static void | |||
704 | AddExtension(const char *ext, const char *name) | |||
705 | { | |||
706 | Tcl_HashEntry *hPtr; | |||
707 | int isNew; | |||
708 | ||||
709 | NS_NONNULL_ASSERT(ext != NULL)((void) (0)); | |||
710 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
711 | ||||
712 | hPtr = Tcl_CreateHashEntry(&extensions, ext, &isNew)(*((&extensions)->createProc))(&extensions, (const char *)(ext), &isNew); | |||
713 | Tcl_SetHashValue(hPtr, name)((hPtr)->clientData = (ClientData) (name)); | |||
714 | } | |||
715 | ||||
716 | static void | |||
717 | AddCharset(const char *charset, const char *name) | |||
718 | { | |||
719 | Tcl_HashEntry *hPtr; | |||
720 | Ns_DStringTcl_DString ds; | |||
721 | int isNew; | |||
722 | ||||
723 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
724 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
725 | ||||
726 | Ns_DStringInitTcl_DStringInit(&ds); | |||
727 | charset = Ns_StrToLower(Ns_DStringAppend(&ds, charset)Tcl_DStringAppend((&ds), (charset), -1)); | |||
728 | ||||
729 | /* | |||
730 | * Map in the forward direction: charsets to encodings. | |||
731 | */ | |||
732 | ||||
733 | hPtr = Tcl_CreateHashEntry(&charsets, charset, &isNew)(*((&charsets)->createProc))(&charsets, (const char *)(charset), &isNew); | |||
734 | Tcl_SetHashValue(hPtr, name)((hPtr)->clientData = (ClientData) (name)); | |||
735 | ||||
736 | /* | |||
737 | * Map in the reverse direction: encodings to charsets. | |||
738 | * Nb: Ignore duplicate mappings. | |||
739 | */ | |||
740 | ||||
741 | hPtr = Tcl_CreateHashEntry(&encnames, name, &isNew)(*((&encnames)->createProc))(&encnames, (const char *)(name), &isNew); | |||
742 | if (isNew != 0) { | |||
743 | Tcl_SetHashValue(hPtr, ns_strdup(charset))((hPtr)->clientData = (ClientData) (ns_strdup(charset))); | |||
744 | } | |||
745 | ||||
746 | Ns_DStringFreeTcl_DStringFree(&ds); | |||
747 | } | |||
748 | ||||
749 | /* | |||
750 | *---------------------------------------------------------------------- | |||
751 | * | |||
752 | * NsGetFallbackEncoding -- | |||
753 | * | |||
754 | * Obtain a fallback encoding either from a specified argument | |||
755 | * (fallbackCharsetObj) or from the configuration. | |||
756 | * | |||
757 | * The resolving order is | |||
758 | * - use command line parameter, if specified. | |||
759 | * - use per server parameter "formFallbackCharset" if specified; | |||
760 | * - use global server parameter "formFallbackCharset" if specified. | |||
761 | * The last two options are only tried, when "fromConfig" is true. | |||
762 | * | |||
763 | * Results: | |||
764 | * NS_OK or NS_ERROR. In the error case, an error message is left in the | |||
765 | * interp result. | |||
766 | * | |||
767 | * Side effects: | |||
768 | * None. | |||
769 | * | |||
770 | *---------------------------------------------------------------------- | |||
771 | */ | |||
772 | ||||
773 | Ns_ReturnCode | |||
774 | NsGetFallbackEncoding(Tcl_Interp *interp, NsServer *servPtr, | |||
775 | Tcl_Obj *fallbackCharsetObj, bool_Bool fromConfig, | |||
776 | Tcl_Encoding *encodingPtr) | |||
777 | { | |||
778 | const char *fallbackCharsetString = NULL((void*)0); | |||
779 | Ns_ReturnCode result = NS_OK; | |||
780 | ||||
781 | NS_NONNULL_ASSERT(interp != NULL)((void) (0)); | |||
782 | NS_NONNULL_ASSERT(encodingPtr != NULL)((void) (0)); | |||
783 | ||||
784 | if (fallbackCharsetObj != NULL((void*)0)) { | |||
| ||||
785 | fallbackCharsetString = Tcl_GetString(fallbackCharsetObj); | |||
786 | if (*fallbackCharsetString == '\0') { | |||
787 | fallbackCharsetString = NULL((void*)0); | |||
788 | } | |||
789 | } | |||
790 | if (fromConfig) { | |||
791 | if (fallbackCharsetString == NULL((void*)0) && servPtr != NULL((void*)0)) { | |||
792 | fallbackCharsetString = servPtr->encoding.formFallbackCharset; | |||
793 | } | |||
794 | if (fallbackCharsetString == NULL((void*)0) && servPtr != NULL((void*)0)) { | |||
795 | fallbackCharsetString = nsconf.formFallbackCharset; | |||
796 | } | |||
797 | } | |||
798 | Ns_Log(Debug, "NsGetFallbackEncoding fromConfig %p %d, '%s'", | |||
799 | (void*)fallbackCharsetObj, fromConfig, fallbackCharsetString); | |||
800 | if (fallbackCharsetString
| |||
801 | *encodingPtr = Ns_GetCharsetEncoding(fallbackCharsetString); | |||
802 | if (*encodingPtr == NULL((void*)0)) { | |||
803 | Ns_TclPrintfResult(interp, | |||
804 | "invalid fallback encoding: '%s'", | |||
805 | fallbackCharsetString); | |||
806 | result = NS_ERROR; | |||
807 | } | |||
808 | } else { | |||
809 | *encodingPtr = NULL((void*)0); | |||
810 | } | |||
811 | ||||
812 | return result; | |||
813 | } | |||
814 | ||||
815 | /* | |||
816 | * Local Variables: | |||
817 | * mode: c | |||
818 | * c-basic-offset: 4 | |||
819 | * fill-column: 78 | |||
820 | * indent-tabs-mode: nil | |||
821 | * End: | |||
822 | */ |