| File: | d/encoding.c |
| Warning: | line 669, column 9 Using a fixed address is not portable because that address will probably not be valid in all environments or platforms |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * The contents of this file are subject to the Mozilla Public License | |||
| 3 | * Version 1.1 (the "License"); you may not use this file except in | |||
| 4 | * compliance with the License. You may obtain a copy of the License at | |||
| 5 | * http://mozilla.org/. | |||
| 6 | * | |||
| 7 | * Software distributed under the License is distributed on an "AS IS" | |||
| 8 | * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See | |||
| 9 | * the License for the specific language governing rights and limitations | |||
| 10 | * under the License. | |||
| 11 | * | |||
| 12 | * The Original Code is AOLserver Code and related documentation | |||
| 13 | * distributed by AOL. | |||
| 14 | * | |||
| 15 | * The Initial Developer of the Original Code is America Online, | |||
| 16 | * Inc. Portions created by AOL are Copyright (C) 1999 America Online, | |||
| 17 | * Inc. All Rights Reserved. | |||
| 18 | * | |||
| 19 | * Alternatively, the contents of this file may be used under the terms | |||
| 20 | * of the GNU General Public License (the "GPL"), in which case the | |||
| 21 | * provisions of GPL are applicable instead of those above. If you wish | |||
| 22 | * to allow use of your version of this file only under the terms of the | |||
| 23 | * GPL and not to allow others to use your version of this file under the | |||
| 24 | * License, indicate your decision by deleting the provisions above and | |||
| 25 | * replace them with the notice and other provisions required by the GPL. | |||
| 26 | * If you do not delete the provisions above, a recipient may use your | |||
| 27 | * version of this file under either the License or the GPL. | |||
| 28 | */ | |||
| 29 | ||||
| 30 | /* | |||
| 31 | * encoding.c -- | |||
| 32 | * | |||
| 33 | * Defines standard default charset to encoding mappings. | |||
| 34 | */ | |||
| 35 | ||||
| 36 | #include "nsd.h" | |||
| 37 | ||||
| 38 | /* | |||
| 39 | * Local functions defined in this file. | |||
| 40 | */ | |||
| 41 | ||||
| 42 | static void AddCharset(const char *charset, const char *name) | |||
| 43 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))) NS_GNUC_NONNULL(2)__attribute__((__nonnull__(2))); | |||
| 44 | ||||
| 45 | static void AddExtension(const char *ext, const char *name) | |||
| 46 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))) NS_GNUC_NONNULL(2)__attribute__((__nonnull__(2))); | |||
| 47 | ||||
| 48 | static Tcl_Encoding LoadEncoding(const char *name) | |||
| 49 | NS_GNUC_NONNULL(1)__attribute__((__nonnull__(1))); | |||
| 50 | ||||
| 51 | static Ns_ServerInitProc ConfigServerEncodings; | |||
| 52 | ||||
| 53 | /* | |||
| 54 | * Local variables defined in this file. | |||
| 55 | */ | |||
| 56 | ||||
| 57 | static Tcl_HashTable extensions; /* Maps file extensions to charsets. */ | |||
| 58 | static Tcl_HashTable charsets; /* Maps Internet charset names to Tcl encoding names */ | |||
| 59 | static Tcl_HashTable encnames; /* Maps Tcl encoding names to Internet charset names. */ | |||
| 60 | static Tcl_HashTable encodings; /* Cache of loaded Tcl encodings */ | |||
| 61 | ||||
| 62 | static Ns_Mutex lock; /* Lock around encodings. */ | |||
| 63 | static Ns_Cond cond; | |||
| 64 | ||||
| 65 | Tcl_Encoding NS_utf8Encoding = NULL((void*)0); /* Cached pointer to utf-8 encoding. */ | |||
| 66 | ||||
| 67 | #define EncodingLocked((Tcl_Encoding) (-1)) ((Tcl_Encoding) (-1)) | |||
| 68 | ||||
| 69 | /* | |||
| 70 | * The default table maps file extensions to Tcl encodings. | |||
| 71 | * That is, the encoding used to read the files from disk (mainly ADP). | |||
| 72 | */ | |||
| 73 | ||||
| 74 | static const struct { | |||
| 75 | const char *extension; | |||
| 76 | const char *name; | |||
| 77 | } builtinExt[] = { | |||
| 78 | {".txt", "ascii"}, | |||
| 79 | {".htm", "utf-8"}, | |||
| 80 | {".html", "utf-8"}, | |||
| 81 | {".adp", "utf-8"}, | |||
| 82 | {NULL((void*)0), NULL((void*)0)} | |||
| 83 | }; | |||
| 84 | ||||
| 85 | /* | |||
| 86 | * The following table provides HTTP charset aliases for Tcl encodings names. | |||
| 87 | */ | |||
| 88 | ||||
| 89 | static const struct { | |||
| 90 | const char *charset; | |||
| 91 | const char *name; | |||
| 92 | } builtinChar[] = { | |||
| 93 | { "gb18030", "cp936" }, | |||
| 94 | { "gb_2312-80", "gb2312" }, | |||
| 95 | { "ibm437", "cp437" }, | |||
| 96 | { "ibm775", "cp775" }, | |||
| 97 | { "ibm850", "cp850" }, | |||
| 98 | { "ibm852", "cp852" }, | |||
| 99 | { "ibm855", "cp855" }, | |||
| 100 | { "ibm857", "cp857" }, | |||
| 101 | { "ibm860", "cp860" }, | |||
| 102 | { "ibm861", "cp861" }, | |||
| 103 | { "ibm862", "cp862" }, | |||
| 104 | { "ibm863", "cp863" }, | |||
| 105 | { "ibm864", "cp864" }, | |||
| 106 | { "ibm865", "cp865" }, | |||
| 107 | { "ibm866", "cp866" }, | |||
| 108 | { "ibm869", "cp869" }, | |||
| 109 | { "iso-2022-jp", "iso2022-jp" }, | |||
| 110 | { "iso-2022-kr", "iso2022-kr" }, | |||
| 111 | { "iso-8859-1", "iso8859-1" }, | |||
| 112 | { "iso-8859-10", "iso8859-10" }, | |||
| 113 | { "iso-8859-13", "iso8859-13" }, | |||
| 114 | { "iso-8859-14", "iso8859-14" }, | |||
| 115 | { "iso-8859-15", "iso8859-15" }, | |||
| 116 | { "iso-8859-16", "iso8859-16" }, | |||
| 117 | { "iso-8859-2", "iso8859-2" }, | |||
| 118 | { "iso-8859-3", "iso8859-3" }, | |||
| 119 | { "iso-8859-4", "iso8859-4" }, | |||
| 120 | { "iso-8859-5", "iso8859-5" }, | |||
| 121 | { "iso-8859-6", "iso8859-6" }, | |||
| 122 | { "iso-8859-7", "iso8859-7" }, | |||
| 123 | { "iso-8859-8", "iso8859-8" }, | |||
| 124 | { "iso-8859-9", "iso8859-9" }, | |||
| 125 | { "jis_x0201", "jis0201" }, | |||
| 126 | { "jis_x0212-1990", "jis0212" }, | |||
| 127 | { "korean", "ksc5601" }, | |||
| 128 | { "ksc_5601", "ksc5601" }, | |||
| 129 | { "mac", "macRoman" }, | |||
| 130 | { "mac-centeuro", "macCentEuro" }, | |||
| 131 | { "mac-centraleupore", "macCentEuro" }, | |||
| 132 | { "mac-croatian", "macCroatian" }, | |||
| 133 | { "mac-cyrillic", "macCyrillic" }, | |||
| 134 | { "mac-greek", "macGreek" }, | |||
| 135 | { "mac-iceland", "macIceland" }, | |||
| 136 | { "mac-japan", "macJapan" }, | |||
| 137 | { "mac-roman", "macRoman" }, | |||
| 138 | { "mac-romania", "macRomania" }, | |||
| 139 | { "mac-thai", "macThai" }, | |||
| 140 | { "mac-turkish", "macTurkish" }, | |||
| 141 | { "mac-ukraine", "macUkraine" }, | |||
| 142 | { "maccenteuro", "macCentEuro" }, | |||
| 143 | { "maccentraleupore", "macCentEuro" }, | |||
| 144 | { "maccroatian", "macCroatian" }, | |||
| 145 | { "maccyrillic", "macCyrillic" }, | |||
| 146 | { "macgreek", "macGreek" }, | |||
| 147 | { "maciceland", "macIceland" }, | |||
| 148 | { "macintosh", "macRoman" }, | |||
| 149 | { "macjapan", "macJapan" }, | |||
| 150 | { "macroman", "macRoman" }, | |||
| 151 | { "macromania", "macRomania" }, | |||
| 152 | { "macthai", "macThai" }, | |||
| 153 | { "macturkish", "macTurkish" }, | |||
| 154 | { "macukraine", "macUkraine" }, | |||
| 155 | { "shift_jis", "shiftjis" }, | |||
| 156 | { "us-ascii", "ascii" }, | |||
| 157 | { "windows-1250", "cp1250" }, | |||
| 158 | { "windows-1251", "cp1251" }, | |||
| 159 | { "windows-1252", "cp1252" }, | |||
| 160 | { "windows-1253", "cp1253" }, | |||
| 161 | { "windows-1254", "cp1254" }, | |||
| 162 | { "windows-1255", "cp1255" }, | |||
| 163 | { "windows-1256", "cp1256" }, | |||
| 164 | { "windows-1257", "cp1257" }, | |||
| 165 | { "windows-1258", "cp1258" }, | |||
| 166 | { "windows-31j", "cp932" }, | |||
| 167 | { "windows-874", "cp874" }, | |||
| 168 | { "x-mac", "macRoman" }, | |||
| 169 | { "x-mac-centeuro", "macCentEuro" }, | |||
| 170 | { "x-mac-centraleupore", "macCentEuro" }, | |||
| 171 | { "x-mac-croatian", "macCroatian" }, | |||
| 172 | { "x-mac-cyrillic", "macCyrillic" }, | |||
| 173 | { "x-mac-greek", "macGreek" }, | |||
| 174 | { "x-mac-iceland", "macIceland" }, | |||
| 175 | { "x-mac-japan", "macJapan" }, | |||
| 176 | { "x-mac-roman", "macRoman" }, | |||
| 177 | { "x-mac-romania", "macRomania" }, | |||
| 178 | { "x-mac-thai", "macThai" }, | |||
| 179 | { "x-mac-turkish", "macTurkish" }, | |||
| 180 | { "x-mac-ukraine", "macUkraine" }, | |||
| 181 | { "x-macintosh", "macRoman" }, | |||
| 182 | ||||
| 183 | /* | |||
| 184 | * The following entries are strictly speaking not needed, since the | |||
| 185 | * IANA name is identical with the Tcl charset name. We add these to | |||
| 186 | * be able to return full set of supported IANA charsets via | |||
| 187 | * [ns_charset]. | |||
| 188 | * | |||
| 189 | * See: https://www.iana.org/assignments/character-sets/character-sets.xml | |||
| 190 | */ | |||
| 191 | { "big5", "big5" }, | |||
| 192 | { "euc-jp", "euc-jp" }, | |||
| 193 | { "euc-kr", "euc-kr" }, | |||
| 194 | { "gb2312", "gb2312" }, | |||
| 195 | { "koi8-r", "koi8-r" }, | |||
| 196 | { "koi8-u", "koi8-u" }, | |||
| 197 | { "tis-620", "tis-620" }, | |||
| 198 | { "utf-8", "utf-8" }, | |||
| 199 | ||||
| 200 | { NULL((void*)0), NULL((void*)0) } | |||
| 201 | }; | |||
| 202 | ||||
| 203 | ||||
| 204 | /* | |||
| 205 | *---------------------------------------------------------------------- | |||
| 206 | * | |||
| 207 | * NsConfigEncodings -- | |||
| 208 | * | |||
| 209 | * Configure charset aliases and file extension mappings. | |||
| 210 | * | |||
| 211 | * Results: | |||
| 212 | * None. | |||
| 213 | * | |||
| 214 | * Side effects: | |||
| 215 | * None. | |||
| 216 | * | |||
| 217 | *---------------------------------------------------------------------- | |||
| 218 | */ | |||
| 219 | ||||
| 220 | void | |||
| 221 | NsConfigEncodings(void) | |||
| 222 | { | |||
| 223 | const Ns_Set *set; | |||
| 224 | size_t i; | |||
| 225 | ||||
| 226 | Ns_MutexSetName(&lock, "ns:encodings"); | |||
| 227 | Tcl_InitHashTable(&extensions, TCL_STRING_KEYS(0)); | |||
| 228 | Tcl_InitHashTable(&charsets, TCL_STRING_KEYS(0)); | |||
| 229 | Tcl_InitHashTable(&encnames, TCL_STRING_KEYS(0)); | |||
| 230 | Tcl_InitHashTable(&encodings, TCL_STRING_KEYS(0)); | |||
| 231 | NS_utf8Encoding = Ns_GetCharsetEncoding("utf-8"); | |||
| 232 | ||||
| 233 | /* | |||
| 234 | * Add default charsets and file mappings. | |||
| 235 | */ | |||
| 236 | ||||
| 237 | for (i = 0u; builtinChar[i].charset != NULL((void*)0); ++i) { | |||
| 238 | AddCharset(builtinChar[i].charset, builtinChar[i].name); | |||
| 239 | } | |||
| 240 | for (i = 0u; builtinExt[i].extension != NULL((void*)0); ++i) { | |||
| 241 | AddExtension(builtinExt[i].extension, builtinExt[i].name); | |||
| 242 | } | |||
| 243 | ||||
| 244 | /* | |||
| 245 | * Add configured charsets and file mappings. | |||
| 246 | */ | |||
| 247 | ||||
| 248 | set = Ns_ConfigGetSection("ns/charsets"); | |||
| 249 | for (i = 0u; set != NULL((void*)0) && i < Ns_SetSize(set)((set)->size); ++i) { | |||
| 250 | AddCharset(Ns_SetKey(set, i)((set)->fields[(i)].name), Ns_SetValue(set, i)((set)->fields[(i)].value)); | |||
| 251 | } | |||
| 252 | set = Ns_ConfigGetSection("ns/encodings"); | |||
| 253 | for (i = 0u; set != NULL((void*)0) && i < Ns_SetSize(set)((set)->size); ++i) { | |||
| 254 | AddExtension(Ns_SetKey(set, i)((set)->fields[(i)].name), Ns_SetValue(set, i)((set)->fields[(i)].value)); | |||
| 255 | } | |||
| 256 | ||||
| 257 | NsRegisterServerInit(ConfigServerEncodings); | |||
| 258 | } | |||
| 259 | ||||
| 260 | static Ns_ReturnCode | |||
| 261 | ConfigServerEncodings(const char *server) | |||
| 262 | { | |||
| 263 | NsServer *servPtr = NsGetServer(server); | |||
| 264 | Ns_ReturnCode result; | |||
| 265 | ||||
| 266 | if (unlikely(servPtr == NULL)(__builtin_expect((servPtr == ((void*)0)), 0))) { | |||
| 267 | Ns_Log(Warning, "Could not set encoding, server '%s' unknown", server); | |||
| 268 | result = NS_ERROR; | |||
| 269 | ||||
| 270 | } else { | |||
| 271 | const char *path; | |||
| 272 | ||||
| 273 | /* | |||
| 274 | * Configure the encoding used in the request URL. | |||
| 275 | */ | |||
| 276 | ||||
| 277 | path = Ns_ConfigSectionPath(NULL((void*)0), server, NULL((void*)0), (char *)0L); | |||
| 278 | ||||
| 279 | servPtr->encoding.urlCharset = | |||
| 280 | Ns_ConfigString(path, "urlCharset", "utf-8"); | |||
| 281 | ||||
| 282 | servPtr->encoding.urlEncoding = | |||
| 283 | Ns_GetCharsetEncoding(servPtr->encoding.urlCharset); | |||
| 284 | ||||
| 285 | if (servPtr->encoding.urlEncoding == NULL((void*)0)) { | |||
| 286 | Ns_Log(Warning, "no encoding found for charset \"%s\" from config", | |||
| 287 | servPtr->encoding.urlCharset); | |||
| 288 | } | |||
| 289 | servPtr->encoding.formFallbackCharset = | |||
| 290 | Ns_ConfigString(path, "FormFallbackCharset", NULL((void*)0)); | |||
| 291 | if (servPtr->encoding.formFallbackCharset != NULL((void*)0) | |||
| 292 | && *servPtr->encoding.formFallbackCharset == '\0') { | |||
| 293 | servPtr->encoding.formFallbackCharset = NULL((void*)0); | |||
| 294 | } | |||
| 295 | ||||
| 296 | /* | |||
| 297 | * Configure the encoding used for Tcl/ADP output. | |||
| 298 | */ | |||
| 299 | ||||
| 300 | servPtr->encoding.outputCharset = | |||
| 301 | Ns_ConfigString(path, "outputCharset", "utf-8"); | |||
| 302 | ||||
| 303 | servPtr->encoding.outputEncoding = | |||
| 304 | Ns_GetCharsetEncoding(servPtr->encoding.outputCharset); | |||
| 305 | if (servPtr->encoding.outputEncoding == NULL((void*)0)) { | |||
| 306 | Ns_Fatal("could not find encoding for default output charset \"%s\"", | |||
| 307 | servPtr->encoding.outputCharset); | |||
| 308 | } | |||
| 309 | ||||
| 310 | result = NS_OK; | |||
| 311 | ||||
| 312 | } | |||
| 313 | return result; | |||
| 314 | } | |||
| 315 | ||||
| 316 | ||||
| 317 | /* | |||
| 318 | *---------------------------------------------------------------------- | |||
| 319 | * | |||
| 320 | * Ns_GetFileEncoding -- | |||
| 321 | * | |||
| 322 | * Return the Tcl_Encoding that should be used to read a file from disk | |||
| 323 | * according to its extension. | |||
| 324 | * | |||
| 325 | * Note this may not be the same as the encoding for the charset of the | |||
| 326 | * file's mimetype. | |||
| 327 | * | |||
| 328 | * Results: | |||
| 329 | * Tcl_Encoding or NULL if not found. | |||
| 330 | * | |||
| 331 | * Side effects: | |||
| 332 | * See Ns_GetCharsetEncoding(). | |||
| 333 | * | |||
| 334 | *---------------------------------------------------------------------- | |||
| 335 | */ | |||
| 336 | ||||
| 337 | Tcl_Encoding | |||
| 338 | Ns_GetFileEncoding(const char *file) | |||
| 339 | { | |||
| 340 | const char *ext; | |||
| 341 | Tcl_Encoding encoding = NULL((void*)0); | |||
| 342 | ||||
| 343 | NS_NONNULL_ASSERT(file != NULL)((void) (0)); | |||
| 344 | ||||
| 345 | ext = strrchr(file, INTCHAR('.')((int)((unsigned char)(('.'))))); | |||
| 346 | if (ext != NULL((void*)0)) { | |||
| 347 | const Tcl_HashEntry *hPtr = Tcl_FindHashEntry(&extensions, ext)(*((&extensions)->findProc))(&extensions, (const char *)(ext)); | |||
| 348 | ||||
| 349 | if (hPtr != NULL((void*)0)) { | |||
| 350 | const char *name = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
| 351 | encoding = Ns_GetCharsetEncoding(name); | |||
| 352 | } | |||
| 353 | } | |||
| 354 | return encoding; | |||
| 355 | } | |||
| 356 | ||||
| 357 | ||||
| 358 | /* | |||
| 359 | *---------------------------------------------------------------------- | |||
| 360 | * | |||
| 361 | * Ns_GetTypeEncoding -- | |||
| 362 | * | |||
| 363 | * Return the Tcl_Encoding for the given Content-type header, | |||
| 364 | * e.g., "text/html; charset=iso-8859-1" returns Tcl_Encoding | |||
| 365 | * for iso8859-1. | |||
| 366 | * | |||
| 367 | * This function will utilize the ns/parameters/OutputCharset | |||
| 368 | * config parameter if given a content-type "text/<anything>" with | |||
| 369 | * no charset. | |||
| 370 | * | |||
| 371 | * When no OutputCharset defined, the fall-back behavior is to | |||
| 372 | * return NULL. | |||
| 373 | * | |||
| 374 | * Results: | |||
| 375 | * Tcl_Encoding or NULL if not found. | |||
| 376 | * | |||
| 377 | * Side effects: | |||
| 378 | * See LoadEncoding(). | |||
| 379 | * | |||
| 380 | *---------------------------------------------------------------------- | |||
| 381 | */ | |||
| 382 | ||||
| 383 | Tcl_Encoding | |||
| 384 | Ns_GetTypeEncoding(const char *mimeType) | |||
| 385 | { | |||
| 386 | const char *charset; | |||
| 387 | size_t len; | |||
| 388 | ||||
| 389 | NS_NONNULL_ASSERT(mimeType != NULL)((void) (0)); | |||
| 390 | ||||
| 391 | charset = NsFindCharset(mimeType, &len); | |||
| 392 | return (charset != NULL((void*)0)) ? Ns_GetCharsetEncodingEx(charset, (int)len) : NULL((void*)0); | |||
| 393 | } | |||
| 394 | ||||
| 395 | ||||
| 396 | /* | |||
| 397 | *---------------------------------------------------------------------- | |||
| 398 | * | |||
| 399 | * Ns_GetCharsetEncoding, Ns_GetCharsetEncodingEx -- | |||
| 400 | * | |||
| 401 | * Return the Tcl_Encoding for the given charset, e.g., | |||
| 402 | * "iso-8859-1" returns Tcl_Encoding for iso8859-1. | |||
| 403 | * | |||
| 404 | * Results: | |||
| 405 | * Tcl_Encoding or NULL if not found. | |||
| 406 | * | |||
| 407 | * Side effects: | |||
| 408 | * See LoadEncoding(). | |||
| 409 | * | |||
| 410 | *---------------------------------------------------------------------- | |||
| 411 | */ | |||
| 412 | ||||
| 413 | Tcl_Encoding | |||
| 414 | Ns_GetCharsetEncoding(const char *charset) | |||
| 415 | { | |||
| 416 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
| 417 | ||||
| 418 | return Ns_GetCharsetEncodingEx(charset, -1); | |||
| 419 | } | |||
| 420 | ||||
| 421 | Tcl_Encoding | |||
| 422 | Ns_GetCharsetEncodingEx(const char *charset, int len) | |||
| 423 | { | |||
| 424 | const Tcl_HashEntry *hPtr; | |||
| 425 | Tcl_Encoding encoding; | |||
| 426 | Ns_DStringTcl_DString ds; | |||
| 427 | ||||
| 428 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
| 429 | ||||
| 430 | /* | |||
| 431 | * Cleanup the charset name and check for an | |||
| 432 | * alias (e.g., iso-8859-1 = iso8859-1) before | |||
| 433 | * assuming the charset and Tcl encoding names | |||
| 434 | * match (e.g., big5). | |||
| 435 | */ | |||
| 436 | ||||
| 437 | Ns_DStringInitTcl_DStringInit(&ds); | |||
| 438 | Ns_DStringNAppendTcl_DStringAppend(&ds, charset, len); | |||
| 439 | charset = Ns_StrTrim(Ns_StrToLower(ds.string)); | |||
| 440 | hPtr = Tcl_FindHashEntry(&charsets, charset)(*((&charsets)->findProc))(&charsets, (const char * )(charset)); | |||
| 441 | if (hPtr != NULL((void*)0)) { | |||
| 442 | charset = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
| 443 | } | |||
| 444 | encoding = LoadEncoding(charset); | |||
| 445 | Ns_DStringFreeTcl_DStringFree(&ds); | |||
| 446 | ||||
| 447 | return encoding; | |||
| 448 | } | |||
| 449 | ||||
| 450 | Tcl_Encoding | |||
| 451 | Ns_GetEncoding(const char *name) | |||
| 452 | { | |||
| 453 | /* Deprecated, use Ns_GetCharsetEncodingEx(). */ | |||
| 454 | return LoadEncoding(name); | |||
| 455 | } | |||
| 456 | ||||
| 457 | ||||
| 458 | /* | |||
| 459 | *---------------------------------------------------------------------- | |||
| 460 | * | |||
| 461 | * Ns_GetEncodingCharset -- | |||
| 462 | * | |||
| 463 | * Return the charset name for the given Tcl_Encoding. | |||
| 464 | * | |||
| 465 | * Results: | |||
| 466 | * Charset name, or encoding name if no alias. | |||
| 467 | * | |||
| 468 | * Side effects: | |||
| 469 | * None. | |||
| 470 | * | |||
| 471 | *---------------------------------------------------------------------- | |||
| 472 | */ | |||
| 473 | ||||
| 474 | const char * | |||
| 475 | Ns_GetEncodingCharset(Tcl_Encoding encoding) | |||
| 476 | { | |||
| 477 | const char *encname, *charset = NULL((void*)0); | |||
| 478 | const Tcl_HashEntry *hPtr; | |||
| 479 | ||||
| 480 | NS_NONNULL_ASSERT(encoding != NULL)((void) (0)); | |||
| 481 | ||||
| 482 | encname = Tcl_GetEncodingName(encoding); | |||
| 483 | hPtr = Tcl_FindHashEntry(&encnames, encname)(*((&encnames)->findProc))(&encnames, (const char * )(encname)); | |||
| 484 | if (hPtr != NULL((void*)0)) { | |||
| 485 | charset = Tcl_GetHashValue(hPtr)((hPtr)->clientData); | |||
| 486 | } | |||
| 487 | return (charset != NULL((void*)0)) ? charset : encname; | |||
| 488 | } | |||
| 489 | ||||
| 490 | ||||
| 491 | /* | |||
| 492 | *---------------------------------------------------------------------- | |||
| 493 | * | |||
| 494 | * NsFindCharset -- | |||
| 495 | * | |||
| 496 | * Find start of charset within a mime-type string. | |||
| 497 | * | |||
| 498 | * Results: | |||
| 499 | * Pointer to start of charset or NULL on no charset. | |||
| 500 | * | |||
| 501 | * Side effects: | |||
| 502 | * None. | |||
| 503 | * | |||
| 504 | *---------------------------------------------------------------------- | |||
| 505 | */ | |||
| 506 | ||||
| 507 | const char * | |||
| 508 | NsFindCharset(const char *mimetype, size_t *lenPtr) | |||
| 509 | { | |||
| 510 | const char *start, *charset = NULL((void*)0); | |||
| 511 | ||||
| 512 | NS_NONNULL_ASSERT(mimetype != NULL)((void) (0)); | |||
| 513 | NS_NONNULL_ASSERT(lenPtr != NULL)((void) (0)); | |||
| 514 | ||||
| 515 | start = Ns_StrCaseFind(mimetype, "charset"); | |||
| 516 | if (start != NULL((void*)0)) { | |||
| 517 | start += 7; | |||
| 518 | start += strspn(start, " "); | |||
| 519 | if (*start++ == '=') { | |||
| 520 | const char *end; | |||
| 521 | ||||
| 522 | start += strspn(start, " "); | |||
| 523 | end = start; | |||
| 524 | while (*end != '\0' && CHARTYPE(space, *end)(((*__ctype_b_loc ())[(int) (((int)((unsigned char)(*end))))] & (unsigned short int) _ISspace)) == 0) { | |||
| 525 | ++end; | |||
| 526 | } | |||
| 527 | *lenPtr = (size_t)(end - start); | |||
| 528 | charset = start; | |||
| 529 | } | |||
| 530 | } | |||
| 531 | return charset; | |||
| 532 | } | |||
| 533 | ||||
| 534 | ||||
| 535 | /* | |||
| 536 | *---------------------------------------------------------------------- | |||
| 537 | * | |||
| 538 | * NsTclCharsetsObjCmd -- | |||
| 539 | * | |||
| 540 | * Implements "ns_charsets". The command returns the list of charsets for | |||
| 541 | * which encodings are defined. | |||
| 542 | * | |||
| 543 | * Results: | |||
| 544 | * TCL_OK | |||
| 545 | * | |||
| 546 | * Side effects: | |||
| 547 | * Sets Tcl interpreter result. | |||
| 548 | * | |||
| 549 | *---------------------------------------------------------------------- | |||
| 550 | */ | |||
| 551 | ||||
| 552 | int | |||
| 553 | NsTclCharsetsObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, | |||
| 554 | int UNUSED(objc)UNUSED_objc __attribute__((__unused__)), Tcl_Obj *const* UNUSED(objv)UNUSED_objv __attribute__((__unused__))) | |||
| 555 | { | |||
| 556 | const Tcl_HashEntry *hPtr; | |||
| 557 | Tcl_HashSearch search; | |||
| 558 | Tcl_Obj *listObj = Tcl_NewListObj(0, NULL((void*)0)); | |||
| 559 | ||||
| 560 | for (hPtr = Tcl_FirstHashEntry(&charsets, &search); | |||
| 561 | hPtr != NULL((void*)0); | |||
| 562 | hPtr = Tcl_NextHashEntry(&search) | |||
| 563 | ) { | |||
| 564 | const char *key = Tcl_GetHashKey(&charsets, hPtr)((void *) (((&charsets)->keyType == (1) || (&charsets )->keyType == (-1)) ? (hPtr)->key.oneWordValue : (hPtr) ->key.string)); | |||
| 565 | Tcl_ListObjAppendElement(interp, listObj, Tcl_NewStringObj(key, -1)); | |||
| 566 | } | |||
| 567 | Tcl_SetObjResult(interp, listObj); | |||
| 568 | ||||
| 569 | return TCL_OK0; | |||
| 570 | } | |||
| 571 | ||||
| 572 | ||||
| 573 | ||||
| 574 | /* | |||
| 575 | *---------------------------------------------------------------------- | |||
| 576 | * | |||
| 577 | * NsTclEncodingForCharsetObjCmd -- | |||
| 578 | * | |||
| 579 | * Implements "ns_encodingforcharset". The command returns the name of | |||
| 580 | * the encoding for the specified charset. | |||
| 581 | * | |||
| 582 | * Results: | |||
| 583 | * Tcl result contains an encoding name or "". | |||
| 584 | * | |||
| 585 | * Side effects: | |||
| 586 | * None. | |||
| 587 | * | |||
| 588 | *---------------------------------------------------------------------- | |||
| 589 | */ | |||
| 590 | ||||
| 591 | int | |||
| 592 | NsTclEncodingForCharsetObjCmd(ClientData UNUSED(clientData)UNUSED_clientData __attribute__((__unused__)), Tcl_Interp *interp, int objc, Tcl_Obj *const* objv) | |||
| 593 | { | |||
| 594 | int result = TCL_OK0; | |||
| 595 | ||||
| 596 | if (objc != 2) { | |||
| 597 | Tcl_WrongNumArgs(interp, 1, objv, "charset"); | |||
| 598 | result = TCL_ERROR1; | |||
| 599 | } else { | |||
| 600 | int encodingNameLen; | |||
| 601 | const char *encodingName = Tcl_GetStringFromObj(objv[1], &encodingNameLen); | |||
| 602 | Tcl_Encoding encoding = Ns_GetCharsetEncodingEx(encodingName, encodingNameLen); | |||
| 603 | ||||
| 604 | if (encoding != NULL((void*)0)) { | |||
| 605 | Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetEncodingName(encoding), -1)); | |||
| 606 | } | |||
| 607 | } | |||
| 608 | ||||
| 609 | return result; | |||
| 610 | } | |||
| 611 | ||||
| 612 | ||||
| 613 | /* | |||
| 614 | *---------------------------------------------------------------------- | |||
| 615 | * | |||
| 616 | * NsEncodingIsUtf8 -- | |||
| 617 | * | |||
| 618 | * Is the given encoding the utf-8 encoding? | |||
| 619 | * | |||
| 620 | * Results: | |||
| 621 | * Boolean. | |||
| 622 | * | |||
| 623 | * Side effects: | |||
| 624 | * None. | |||
| 625 | * | |||
| 626 | *---------------------------------------------------------------------- | |||
| 627 | */ | |||
| 628 | ||||
| 629 | bool_Bool | |||
| 630 | NsEncodingIsUtf8(const Tcl_Encoding encoding) | |||
| 631 | { | |||
| 632 | return (encoding == NS_utf8Encoding); | |||
| 633 | } | |||
| 634 | ||||
| 635 | ||||
| 636 | /* | |||
| 637 | *---------------------------------------------------------------------- | |||
| 638 | * | |||
| 639 | * LoadEncoding -- | |||
| 640 | * | |||
| 641 | * Return the Tcl_Encoding for the given charset. | |||
| 642 | * | |||
| 643 | * Results: | |||
| 644 | * Tcl_Encoding or NULL if not found. | |||
| 645 | * | |||
| 646 | * Side effects: | |||
| 647 | * Will load encoding from disk on first access. | |||
| 648 | * May wait for other thread to load encoding from disk. | |||
| 649 | * | |||
| 650 | *---------------------------------------------------------------------- | |||
| 651 | */ | |||
| 652 | ||||
| 653 | static Tcl_Encoding | |||
| 654 | LoadEncoding(const char *name) | |||
| 655 | { | |||
| 656 | Tcl_HashEntry *hPtr; | |||
| 657 | Tcl_Encoding encoding; | |||
| 658 | int isNew; | |||
| 659 | ||||
| 660 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
| 661 | ||||
| 662 | Ns_MutexLock(&lock); | |||
| 663 | hPtr = Tcl_CreateHashEntry(&encodings, name, &isNew)(*((&encodings)->createProc))(&encodings, (const char *)(name), &isNew); | |||
| 664 | if (isNew == 0) { | |||
| 665 | while ((encoding = Tcl_GetHashValue(hPtr)((hPtr)->clientData)) == EncodingLocked((Tcl_Encoding) (-1))) { | |||
| 666 | Ns_CondWait(&cond, &lock); | |||
| 667 | } | |||
| 668 | } else { | |||
| 669 | Tcl_SetHashValue(hPtr, INT2PTR(EncodingLocked))((hPtr)->clientData = (ClientData) (((void *)(intptr_t)((( Tcl_Encoding) (-1)))))); | |||
| ||||
| 670 | Ns_MutexUnlock(&lock); | |||
| 671 | encoding = Tcl_GetEncoding(NULL((void*)0), name); | |||
| 672 | if (encoding == NULL((void*)0)) { | |||
| 673 | Ns_Log(Warning, "encoding: could not load: '%s'", name); | |||
| 674 | } else { | |||
| 675 | Ns_Log(Debug, "encoding: loaded: %s", name); | |||
| 676 | } | |||
| 677 | Ns_MutexLock(&lock); | |||
| 678 | Tcl_SetHashValue(hPtr, encoding)((hPtr)->clientData = (ClientData) (encoding)); | |||
| 679 | Ns_CondBroadcast(&cond); | |||
| 680 | } | |||
| 681 | Ns_MutexUnlock(&lock); | |||
| 682 | ||||
| 683 | return encoding; | |||
| 684 | } | |||
| 685 | ||||
| 686 | ||||
| 687 | /* | |||
| 688 | *---------------------------------------------------------------------- | |||
| 689 | * | |||
| 690 | * AddCharset, AddExtension -- | |||
| 691 | * | |||
| 692 | * Add extensiont to encoding mapping and charset aliases. | |||
| 693 | * | |||
| 694 | * Results: | |||
| 695 | * None. | |||
| 696 | * | |||
| 697 | * Side effects: | |||
| 698 | * None. | |||
| 699 | * | |||
| 700 | *---------------------------------------------------------------------- | |||
| 701 | */ | |||
| 702 | ||||
| 703 | static void | |||
| 704 | AddExtension(const char *ext, const char *name) | |||
| 705 | { | |||
| 706 | Tcl_HashEntry *hPtr; | |||
| 707 | int isNew; | |||
| 708 | ||||
| 709 | NS_NONNULL_ASSERT(ext != NULL)((void) (0)); | |||
| 710 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
| 711 | ||||
| 712 | hPtr = Tcl_CreateHashEntry(&extensions, ext, &isNew)(*((&extensions)->createProc))(&extensions, (const char *)(ext), &isNew); | |||
| 713 | Tcl_SetHashValue(hPtr, name)((hPtr)->clientData = (ClientData) (name)); | |||
| 714 | } | |||
| 715 | ||||
| 716 | static void | |||
| 717 | AddCharset(const char *charset, const char *name) | |||
| 718 | { | |||
| 719 | Tcl_HashEntry *hPtr; | |||
| 720 | Ns_DStringTcl_DString ds; | |||
| 721 | int isNew; | |||
| 722 | ||||
| 723 | NS_NONNULL_ASSERT(charset != NULL)((void) (0)); | |||
| 724 | NS_NONNULL_ASSERT(name != NULL)((void) (0)); | |||
| 725 | ||||
| 726 | Ns_DStringInitTcl_DStringInit(&ds); | |||
| 727 | charset = Ns_StrToLower(Ns_DStringAppend(&ds, charset)Tcl_DStringAppend((&ds), (charset), -1)); | |||
| 728 | ||||
| 729 | /* | |||
| 730 | * Map in the forward direction: charsets to encodings. | |||
| 731 | */ | |||
| 732 | ||||
| 733 | hPtr = Tcl_CreateHashEntry(&charsets, charset, &isNew)(*((&charsets)->createProc))(&charsets, (const char *)(charset), &isNew); | |||
| 734 | Tcl_SetHashValue(hPtr, name)((hPtr)->clientData = (ClientData) (name)); | |||
| 735 | ||||
| 736 | /* | |||
| 737 | * Map in the reverse direction: encodings to charsets. | |||
| 738 | * Nb: Ignore duplicate mappings. | |||
| 739 | */ | |||
| 740 | ||||
| 741 | hPtr = Tcl_CreateHashEntry(&encnames, name, &isNew)(*((&encnames)->createProc))(&encnames, (const char *)(name), &isNew); | |||
| 742 | if (isNew != 0) { | |||
| 743 | Tcl_SetHashValue(hPtr, ns_strdup(charset))((hPtr)->clientData = (ClientData) (ns_strdup(charset))); | |||
| 744 | } | |||
| 745 | ||||
| 746 | Ns_DStringFreeTcl_DStringFree(&ds); | |||
| 747 | } | |||
| 748 | ||||
| 749 | /* | |||
| 750 | *---------------------------------------------------------------------- | |||
| 751 | * | |||
| 752 | * NsGetFallbackEncoding -- | |||
| 753 | * | |||
| 754 | * Obtain a fallback encoding either from a specified argument | |||
| 755 | * (fallbackCharsetObj) or from the configuration. | |||
| 756 | * | |||
| 757 | * The resolving order is | |||
| 758 | * - use command line parameter, if specified. | |||
| 759 | * - use per server parameter "formFallbackCharset" if specified; | |||
| 760 | * - use global server parameter "formFallbackCharset" if specified. | |||
| 761 | * The last two options are only tried, when "fromConfig" is true. | |||
| 762 | * | |||
| 763 | * Results: | |||
| 764 | * NS_OK or NS_ERROR. In the error case, an error message is left in the | |||
| 765 | * interp result. | |||
| 766 | * | |||
| 767 | * Side effects: | |||
| 768 | * None. | |||
| 769 | * | |||
| 770 | *---------------------------------------------------------------------- | |||
| 771 | */ | |||
| 772 | ||||
| 773 | Ns_ReturnCode | |||
| 774 | NsGetFallbackEncoding(Tcl_Interp *interp, NsServer *servPtr, | |||
| 775 | Tcl_Obj *fallbackCharsetObj, bool_Bool fromConfig, | |||
| 776 | Tcl_Encoding *encodingPtr) | |||
| 777 | { | |||
| 778 | const char *fallbackCharsetString = NULL((void*)0); | |||
| 779 | Ns_ReturnCode result = NS_OK; | |||
| 780 | ||||
| 781 | NS_NONNULL_ASSERT(interp != NULL)((void) (0)); | |||
| 782 | NS_NONNULL_ASSERT(encodingPtr != NULL)((void) (0)); | |||
| 783 | ||||
| 784 | if (fallbackCharsetObj != NULL((void*)0)) { | |||
| ||||
| 785 | fallbackCharsetString = Tcl_GetString(fallbackCharsetObj); | |||
| 786 | if (*fallbackCharsetString == '\0') { | |||
| 787 | fallbackCharsetString = NULL((void*)0); | |||
| 788 | } | |||
| 789 | } | |||
| 790 | if (fromConfig) { | |||
| 791 | if (fallbackCharsetString == NULL((void*)0) && servPtr != NULL((void*)0)) { | |||
| 792 | fallbackCharsetString = servPtr->encoding.formFallbackCharset; | |||
| 793 | } | |||
| 794 | if (fallbackCharsetString == NULL((void*)0) && servPtr != NULL((void*)0)) { | |||
| 795 | fallbackCharsetString = nsconf.formFallbackCharset; | |||
| 796 | } | |||
| 797 | } | |||
| 798 | Ns_Log(Debug, "NsGetFallbackEncoding fromConfig %p %d, '%s'", | |||
| 799 | (void*)fallbackCharsetObj, fromConfig, fallbackCharsetString); | |||
| 800 | if (fallbackCharsetString
| |||
| 801 | *encodingPtr = Ns_GetCharsetEncoding(fallbackCharsetString); | |||
| 802 | if (*encodingPtr == NULL((void*)0)) { | |||
| 803 | Ns_TclPrintfResult(interp, | |||
| 804 | "invalid fallback encoding: '%s'", | |||
| 805 | fallbackCharsetString); | |||
| 806 | result = NS_ERROR; | |||
| 807 | } | |||
| 808 | } else { | |||
| 809 | *encodingPtr = NULL((void*)0); | |||
| 810 | } | |||
| 811 | ||||
| 812 | return result; | |||
| 813 | } | |||
| 814 | ||||
| 815 | /* | |||
| 816 | * Local Variables: | |||
| 817 | * mode: c | |||
| 818 | * c-basic-offset: 4 | |||
| 819 | * fill-column: 78 | |||
| 820 | * indent-tabs-mode: nil | |||
| 821 | * End: | |||
| 822 | */ |