File: | out/../deps/icu-small/source/common/ucnvsel.cpp |
Warning: | line 235, column 15 Array access (via field 'encodings') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. | ||||
2 | // License & terms of use: http://www.unicode.org/copyright.html | ||||
3 | /* | ||||
4 | ******************************************************************************* | ||||
5 | * | ||||
6 | * Copyright (C) 2008-2011, International Business Machines | ||||
7 | * Corporation, Google and others. All Rights Reserved. | ||||
8 | * | ||||
9 | ******************************************************************************* | ||||
10 | */ | ||||
11 | // Author : eldawy@google.com (Mohamed Eldawy) | ||||
12 | // ucnvsel.cpp | ||||
13 | // | ||||
14 | // Purpose: To generate a list of encodings capable of handling | ||||
15 | // a given Unicode text | ||||
16 | // | ||||
17 | // Started 09-April-2008 | ||||
18 | |||||
19 | /** | ||||
20 | * \file | ||||
21 | * | ||||
22 | * This is an implementation of an encoding selector. | ||||
23 | * The goal is, given a unicode string, find the encodings | ||||
24 | * this string can be mapped to. To make processing faster | ||||
25 | * a trie is built when you call ucnvsel_open() that | ||||
26 | * stores all encodings a codepoint can map to | ||||
27 | */ | ||||
28 | |||||
29 | #include "unicode/ucnvsel.h" | ||||
30 | |||||
31 | #if !UCONFIG_NO_CONVERSION0 | ||||
32 | |||||
33 | #include <string.h> | ||||
34 | |||||
35 | #include "unicode/uchar.h" | ||||
36 | #include "unicode/uniset.h" | ||||
37 | #include "unicode/ucnv.h" | ||||
38 | #include "unicode/ustring.h" | ||||
39 | #include "unicode/uchriter.h" | ||||
40 | #include "utrie2.h" | ||||
41 | #include "propsvec.h" | ||||
42 | #include "uassert.h" | ||||
43 | #include "ucmndata.h" | ||||
44 | #include "udataswp.h" | ||||
45 | #include "uenumimp.h" | ||||
46 | #include "cmemory.h" | ||||
47 | #include "cstring.h" | ||||
48 | |||||
49 | U_NAMESPACE_USEusing namespace icu_71; | ||||
50 | |||||
51 | struct UConverterSelector { | ||||
52 | UTrie2 *trie; // 16 bit trie containing offsets into pv | ||||
53 | uint32_t* pv; // table of bits! | ||||
54 | int32_t pvCount; | ||||
55 | char** encodings; // which encodings did user ask to use? | ||||
56 | int32_t encodingsCount; | ||||
57 | int32_t encodingStrLength; | ||||
58 | uint8_t* swapped; | ||||
59 | UBool ownPv, ownEncodingStrings; | ||||
60 | }; | ||||
61 | |||||
62 | static void generateSelectorData(UConverterSelector* result, | ||||
63 | UPropsVectors *upvec, | ||||
64 | const USet* excludedCodePoints, | ||||
65 | const UConverterUnicodeSet whichSet, | ||||
66 | UErrorCode* status) { | ||||
67 | if (U_FAILURE(*status)) { | ||||
68 | return; | ||||
69 | } | ||||
70 | |||||
71 | int32_t columns = (result->encodingsCount+31)/32; | ||||
72 | |||||
73 | // set errorValue to all-ones | ||||
74 | for (int32_t col = 0; col < columns; col++) { | ||||
75 | upvec_setValueupvec_setValue_71(upvec, UPVEC_ERROR_VALUE_CP0x110001, UPVEC_ERROR_VALUE_CP0x110001, | ||||
76 | col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status); | ||||
77 | } | ||||
78 | |||||
79 | for (int32_t i = 0; i < result->encodingsCount; ++i) { | ||||
80 | uint32_t mask; | ||||
81 | uint32_t column; | ||||
82 | int32_t item_count; | ||||
83 | int32_t j; | ||||
84 | UConverter* test_converter = ucnv_openucnv_open_71(result->encodings[i], status); | ||||
85 | if (U_FAILURE(*status)) { | ||||
86 | return; | ||||
87 | } | ||||
88 | USet* unicode_point_set; | ||||
89 | unicode_point_set = uset_openuset_open_71(1, 0); // empty set | ||||
90 | |||||
91 | ucnv_getUnicodeSetucnv_getUnicodeSet_71(test_converter, unicode_point_set, | ||||
92 | whichSet, status); | ||||
93 | if (U_FAILURE(*status)) { | ||||
94 | ucnv_closeucnv_close_71(test_converter); | ||||
95 | return; | ||||
96 | } | ||||
97 | |||||
98 | column = i / 32; | ||||
99 | mask = 1 << (i%32); | ||||
100 | // now iterate over intervals on set i! | ||||
101 | item_count = uset_getItemCountuset_getItemCount_71(unicode_point_set); | ||||
102 | |||||
103 | for (j = 0; j < item_count; ++j) { | ||||
104 | UChar32 start_char; | ||||
105 | UChar32 end_char; | ||||
106 | UErrorCode smallStatus = U_ZERO_ERROR; | ||||
107 | uset_getItemuset_getItem_71(unicode_point_set, j, &start_char, &end_char, NULL__null, 0, | ||||
108 | &smallStatus); | ||||
109 | if (U_FAILURE(smallStatus)) { | ||||
110 | // this will be reached for the converters that fill the set with | ||||
111 | // strings. Those should be ignored by our system | ||||
112 | } else { | ||||
113 | upvec_setValueupvec_setValue_71(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask, | ||||
114 | status); | ||||
115 | } | ||||
116 | } | ||||
117 | ucnv_closeucnv_close_71(test_converter); | ||||
118 | uset_closeuset_close_71(unicode_point_set); | ||||
119 | if (U_FAILURE(*status)) { | ||||
120 | return; | ||||
121 | } | ||||
122 | } | ||||
123 | |||||
124 | // handle excluded encodings! Simply set their values to all 1's in the upvec | ||||
125 | if (excludedCodePoints) { | ||||
126 | int32_t item_count = uset_getItemCountuset_getItemCount_71(excludedCodePoints); | ||||
127 | for (int32_t j = 0; j < item_count; ++j) { | ||||
128 | UChar32 start_char; | ||||
129 | UChar32 end_char; | ||||
130 | |||||
131 | uset_getItemuset_getItem_71(excludedCodePoints, j, &start_char, &end_char, NULL__null, 0, | ||||
132 | status); | ||||
133 | for (int32_t col = 0; col < columns; col++) { | ||||
134 | upvec_setValueupvec_setValue_71(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), | ||||
135 | status); | ||||
136 | } | ||||
137 | } | ||||
138 | } | ||||
139 | |||||
140 | // alright. Now, let's put things in the same exact form you'd get when you | ||||
141 | // unserialize things. | ||||
142 | result->trie = upvec_compactToUTrie2WithRowIndexesupvec_compactToUTrie2WithRowIndexes_71(upvec, status); | ||||
143 | result->pv = upvec_cloneArrayupvec_cloneArray_71(upvec, &result->pvCount, NULL__null, status); | ||||
144 | result->pvCount *= columns; // number of uint32_t = rows * columns | ||||
145 | result->ownPv = TRUE1; | ||||
146 | } | ||||
147 | |||||
148 | /* open a selector. If converterListSize is 0, build for all converters. | ||||
149 | If excludedCodePoints is NULL, don't exclude any codepoints */ | ||||
150 | U_CAPIextern "C" UConverterSelector* U_EXPORT2 | ||||
151 | ucnvsel_openucnvsel_open_71(const char* const* converterList, int32_t converterListSize, | ||||
152 | const USet* excludedCodePoints, | ||||
153 | const UConverterUnicodeSet whichSet, UErrorCode* status) { | ||||
154 | // check if already failed | ||||
155 | if (U_FAILURE(*status)) { | ||||
156 | return NULL__null; | ||||
157 | } | ||||
158 | // ensure args make sense! | ||||
159 | if (converterListSize < 0 || (converterList == NULL__null && converterListSize != 0)) { | ||||
| |||||
160 | *status = U_ILLEGAL_ARGUMENT_ERROR; | ||||
161 | return NULL__null; | ||||
162 | } | ||||
163 | |||||
164 | // allocate a new converter | ||||
165 | LocalUConverterSelectorPointer newSelector( | ||||
166 | (UConverterSelector*)uprv_mallocuprv_malloc_71(sizeof(UConverterSelector))); | ||||
167 | if (newSelector.isNull()) { | ||||
168 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
169 | return NULL__null; | ||||
170 | } | ||||
171 | uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector)):: memset(newSelector.getAlias(), 0, sizeof(UConverterSelector )); | ||||
172 | |||||
173 | if (converterListSize == 0) { | ||||
174 | converterList = NULL__null; | ||||
175 | converterListSize = ucnv_countAvailableucnv_countAvailable_71(); | ||||
176 | } | ||||
177 | newSelector->encodings = | ||||
178 | (char**)uprv_mallocuprv_malloc_71(converterListSize * sizeof(char*)); | ||||
179 | if (!newSelector->encodings) { | ||||
180 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
181 | return NULL__null; | ||||
182 | } | ||||
183 | newSelector->encodings[0] = NULL__null; // now we can call ucnvsel_close() | ||||
184 | |||||
185 | // make a backup copy of the list of converters | ||||
186 | int32_t totalSize = 0; | ||||
187 | int32_t i; | ||||
188 | for (i = 0; i < converterListSize; i++) { | ||||
189 | totalSize += | ||||
190 | (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)):: strlen(converterList != __null ? converterList[i] : ucnv_getAvailableName_71 (i)) + 1; | ||||
191 | } | ||||
192 | // 4-align the totalSize to 4-align the size of the serialized form | ||||
193 | int32_t encodingStrPadding = totalSize & 3; | ||||
194 | if (encodingStrPadding != 0) { | ||||
195 | encodingStrPadding = 4 - encodingStrPadding; | ||||
196 | } | ||||
197 | newSelector->encodingStrLength = totalSize += encodingStrPadding; | ||||
198 | char* allStrings = (char*) uprv_mallocuprv_malloc_71(totalSize); | ||||
199 | if (!allStrings) { | ||||
200 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
201 | return NULL__null; | ||||
202 | } | ||||
203 | |||||
204 | for (i = 0; i < converterListSize; i++) { | ||||
205 | newSelector->encodings[i] = allStrings; | ||||
206 | uprv_strcpy(newSelector->encodings[i],:: strcpy(newSelector->encodings[i], converterList != __null ? converterList[i] : ucnv_getAvailableName_71(i)) | ||||
207 | converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)):: strcpy(newSelector->encodings[i], converterList != __null ? converterList[i] : ucnv_getAvailableName_71(i)); | ||||
208 | allStrings += uprv_strlen(newSelector->encodings[i]):: strlen(newSelector->encodings[i]) + 1; | ||||
209 | } | ||||
210 | while (encodingStrPadding > 0) { | ||||
211 | *allStrings++ = 0; | ||||
212 | --encodingStrPadding; | ||||
213 | } | ||||
214 | |||||
215 | newSelector->ownEncodingStrings = TRUE1; | ||||
216 | newSelector->encodingsCount = converterListSize; | ||||
217 | UPropsVectors *upvec = upvec_openupvec_open_71((converterListSize+31)/32, status); | ||||
218 | generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status); | ||||
219 | upvec_closeupvec_close_71(upvec); | ||||
220 | |||||
221 | if (U_FAILURE(*status)) { | ||||
222 | return NULL__null; | ||||
223 | } | ||||
224 | |||||
225 | return newSelector.orphan(); | ||||
226 | } | ||||
227 | |||||
228 | /* close opened selector */ | ||||
229 | U_CAPIextern "C" void U_EXPORT2 | ||||
230 | ucnvsel_closeucnvsel_close_71(UConverterSelector *sel) { | ||||
231 | if (!sel
| ||||
232 | return; | ||||
233 | } | ||||
234 | if (sel->ownEncodingStrings) { | ||||
235 | uprv_freeuprv_free_71(sel->encodings[0]); | ||||
| |||||
236 | } | ||||
237 | uprv_freeuprv_free_71(sel->encodings); | ||||
238 | if (sel->ownPv) { | ||||
239 | uprv_freeuprv_free_71(sel->pv); | ||||
240 | } | ||||
241 | utrie2_closeutrie2_close_71(sel->trie); | ||||
242 | uprv_freeuprv_free_71(sel->swapped); | ||||
243 | uprv_freeuprv_free_71(sel); | ||||
244 | } | ||||
245 | |||||
246 | static const UDataInfo dataInfo = { | ||||
247 | sizeof(UDataInfo), | ||||
248 | 0, | ||||
249 | |||||
250 | U_IS_BIG_ENDIAN(1234 == 4321), | ||||
251 | U_CHARSET_FAMILY0, | ||||
252 | U_SIZEOF_UCHAR2, | ||||
253 | 0, | ||||
254 | |||||
255 | { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */ | ||||
256 | { 1, 0, 0, 0 }, /* formatVersion */ | ||||
257 | { 0, 0, 0, 0 } /* dataVersion */ | ||||
258 | }; | ||||
259 | |||||
260 | enum { | ||||
261 | UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes | ||||
262 | UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors | ||||
263 | UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names | ||||
264 | UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding | ||||
265 | UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader | ||||
266 | UCNVSEL_INDEX_COUNT = 16 | ||||
267 | }; | ||||
268 | |||||
269 | /* | ||||
270 | * Serialized form of a UConverterSelector, formatVersion 1: | ||||
271 | * | ||||
272 | * The serialized form begins with a standard ICU DataHeader with a UDataInfo | ||||
273 | * as the template above. | ||||
274 | * This is followed by: | ||||
275 | * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above | ||||
276 | * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes | ||||
277 | * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors | ||||
278 | * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding | ||||
279 | */ | ||||
280 | |||||
281 | /* serialize a selector */ | ||||
282 | U_CAPIextern "C" int32_t U_EXPORT2 | ||||
283 | ucnvsel_serializeucnvsel_serialize_71(const UConverterSelector* sel, | ||||
284 | void* buffer, int32_t bufferCapacity, UErrorCode* status) { | ||||
285 | // check if already failed | ||||
286 | if (U_FAILURE(*status)) { | ||||
287 | return 0; | ||||
288 | } | ||||
289 | // ensure args make sense! | ||||
290 | uint8_t *p = (uint8_t *)buffer; | ||||
291 | if (bufferCapacity < 0 || | ||||
292 | (bufferCapacity > 0 && (p == NULL__null || (U_POINTER_MASK_LSB(p, 3)((uintptr_t)(p) & (3)) != 0))) | ||||
293 | ) { | ||||
294 | *status = U_ILLEGAL_ARGUMENT_ERROR; | ||||
295 | return 0; | ||||
296 | } | ||||
297 | // add up the size of the serialized form | ||||
298 | int32_t serializedTrieSize = utrie2_serializeutrie2_serialize_71(sel->trie, NULL__null, 0, status); | ||||
299 | if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { | ||||
300 | return 0; | ||||
301 | } | ||||
302 | *status = U_ZERO_ERROR; | ||||
303 | |||||
304 | DataHeader header; | ||||
305 | uprv_memset(&header, 0, sizeof(header)):: memset(&header, 0, sizeof(header)); | ||||
306 | header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15); | ||||
307 | header.dataHeader.magic1 = 0xda; | ||||
308 | header.dataHeader.magic2 = 0x27; | ||||
309 | uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(&header .info, &dataInfo, sizeof(dataInfo)); } while (false); | ||||
310 | |||||
311 | int32_t indexes[UCNVSEL_INDEX_COUNT] = { | ||||
312 | serializedTrieSize, | ||||
313 | sel->pvCount, | ||||
314 | sel->encodingsCount, | ||||
315 | sel->encodingStrLength | ||||
316 | }; | ||||
317 | |||||
318 | int32_t totalSize = | ||||
319 | header.dataHeader.headerSize + | ||||
320 | (int32_t)sizeof(indexes) + | ||||
321 | serializedTrieSize + | ||||
322 | sel->pvCount * 4 + | ||||
323 | sel->encodingStrLength; | ||||
324 | indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize; | ||||
325 | if (totalSize > bufferCapacity) { | ||||
326 | *status = U_BUFFER_OVERFLOW_ERROR; | ||||
327 | return totalSize; | ||||
328 | } | ||||
329 | // ok, save! | ||||
330 | int32_t length = header.dataHeader.headerSize; | ||||
331 | uprv_memcpy(p, &header, sizeof(header))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(p, &header , sizeof(header)); } while (false); | ||||
332 | uprv_memset(p + sizeof(header), 0, length - sizeof(header)):: memset(p + sizeof(header), 0, length - sizeof(header)); | ||||
333 | p += length; | ||||
334 | |||||
335 | length = (int32_t)sizeof(indexes); | ||||
336 | uprv_memcpy(p, indexes, length)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(p, indexes , length); } while (false); | ||||
337 | p += length; | ||||
338 | |||||
339 | utrie2_serializeutrie2_serialize_71(sel->trie, p, serializedTrieSize, status); | ||||
340 | p += serializedTrieSize; | ||||
341 | |||||
342 | length = sel->pvCount * 4; | ||||
343 | uprv_memcpy(p, sel->pv, length)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(p, sel-> pv, length); } while (false); | ||||
344 | p += length; | ||||
345 | |||||
346 | uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(p, sel-> encodings[0], sel->encodingStrLength); } while (false); | ||||
347 | p += sel->encodingStrLength; | ||||
348 | |||||
349 | return totalSize; | ||||
350 | } | ||||
351 | |||||
352 | /** | ||||
353 | * swap a selector into the desired Endianness and Asciiness of | ||||
354 | * the system. Just as FYI, selectors are always saved in the format | ||||
355 | * of the system that created them. They are only converted if used | ||||
356 | * on another system. In other words, selectors created on different | ||||
357 | * system can be different even if the params are identical (endianness | ||||
358 | * and Asciiness differences only) | ||||
359 | * | ||||
360 | * @param ds pointer to data swapper containing swapping info | ||||
361 | * @param inData pointer to incoming data | ||||
362 | * @param length length of inData in bytes | ||||
363 | * @param outData pointer to output data. Capacity should | ||||
364 | * be at least equal to capacity of inData | ||||
365 | * @param status an in/out ICU UErrorCode | ||||
366 | * @return 0 on failure, number of bytes swapped on success | ||||
367 | * number of bytes swapped can be smaller than length | ||||
368 | */ | ||||
369 | static int32_t | ||||
370 | ucnvsel_swap(const UDataSwapper *ds, | ||||
371 | const void *inData, int32_t length, | ||||
372 | void *outData, UErrorCode *status) { | ||||
373 | /* udata_swapDataHeader checks the arguments */ | ||||
374 | int32_t headerSize = udata_swapDataHeaderudata_swapDataHeader_71(ds, inData, length, outData, status); | ||||
375 | if(U_FAILURE(*status)) { | ||||
376 | return 0; | ||||
377 | } | ||||
378 | |||||
379 | /* check data format and format version */ | ||||
380 | const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); | ||||
381 | if(!( | ||||
382 | pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ | ||||
383 | pInfo->dataFormat[1] == 0x53 && | ||||
384 | pInfo->dataFormat[2] == 0x65 && | ||||
385 | pInfo->dataFormat[3] == 0x6c | ||||
386 | )) { | ||||
387 | udata_printErrorudata_printError_71(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", | ||||
388 | pInfo->dataFormat[0], pInfo->dataFormat[1], | ||||
389 | pInfo->dataFormat[2], pInfo->dataFormat[3]); | ||||
390 | *status = U_INVALID_FORMAT_ERROR; | ||||
391 | return 0; | ||||
392 | } | ||||
393 | if(pInfo->formatVersion[0] != 1) { | ||||
394 | udata_printErrorudata_printError_71(ds, "ucnvsel_swap(): format version %02x is not supported\n", | ||||
395 | pInfo->formatVersion[0]); | ||||
396 | *status = U_UNSUPPORTED_ERROR; | ||||
397 | return 0; | ||||
398 | } | ||||
399 | |||||
400 | if(length >= 0) { | ||||
401 | length -= headerSize; | ||||
402 | if(length < 16*4) { | ||||
403 | udata_printErrorudata_printError_71(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", | ||||
404 | length); | ||||
405 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
406 | return 0; | ||||
407 | } | ||||
408 | } | ||||
409 | |||||
410 | const uint8_t *inBytes = (const uint8_t *)inData + headerSize; | ||||
411 | uint8_t *outBytes = (uint8_t *)outData + headerSize; | ||||
412 | |||||
413 | /* read the indexes */ | ||||
414 | const int32_t *inIndexes = (const int32_t *)inBytes; | ||||
415 | int32_t indexes[16]; | ||||
416 | int32_t i; | ||||
417 | for(i = 0; i < 16; ++i) { | ||||
418 | indexes[i] = udata_readInt32udata_readInt32_71(ds, inIndexes[i]); | ||||
419 | } | ||||
420 | |||||
421 | /* get the total length of the data */ | ||||
422 | int32_t size = indexes[UCNVSEL_INDEX_SIZE]; | ||||
423 | if(length >= 0) { | ||||
424 | if(length < size) { | ||||
425 | udata_printErrorudata_printError_71(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", | ||||
426 | length); | ||||
427 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
428 | return 0; | ||||
429 | } | ||||
430 | |||||
431 | /* copy the data for inaccessible bytes */ | ||||
432 | if(inBytes != outBytes) { | ||||
433 | uprv_memcpy(outBytes, inBytes, size)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(outBytes, inBytes, size); } while (false); | ||||
434 | } | ||||
435 | |||||
436 | int32_t offset = 0, count; | ||||
437 | |||||
438 | /* swap the int32_t indexes[] */ | ||||
439 | count = UCNVSEL_INDEX_COUNT*4; | ||||
440 | ds->swapArray32(ds, inBytes, count, outBytes, status); | ||||
441 | offset += count; | ||||
442 | |||||
443 | /* swap the UTrie2 */ | ||||
444 | count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; | ||||
445 | utrie2_swaputrie2_swap_71(ds, inBytes + offset, count, outBytes + offset, status); | ||||
446 | offset += count; | ||||
447 | |||||
448 | /* swap the uint32_t pv[] */ | ||||
449 | count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; | ||||
450 | ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); | ||||
451 | offset += count; | ||||
452 | |||||
453 | /* swap the encoding names */ | ||||
454 | count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; | ||||
455 | ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); | ||||
456 | offset += count; | ||||
457 | |||||
458 | U_ASSERT(offset == size)(void)0; | ||||
459 | } | ||||
460 | |||||
461 | return headerSize + size; | ||||
462 | } | ||||
463 | |||||
464 | /* unserialize a selector */ | ||||
465 | U_CAPIextern "C" UConverterSelector* U_EXPORT2 | ||||
466 | ucnvsel_openFromSerializeducnvsel_openFromSerialized_71(const void* buffer, int32_t length, UErrorCode* status) { | ||||
467 | // check if already failed | ||||
468 | if (U_FAILURE(*status)) { | ||||
469 | return NULL__null; | ||||
470 | } | ||||
471 | // ensure args make sense! | ||||
472 | const uint8_t *p = (const uint8_t *)buffer; | ||||
473 | if (length <= 0 || | ||||
474 | (length > 0 && (p == NULL__null || (U_POINTER_MASK_LSB(p, 3)((uintptr_t)(p) & (3)) != 0))) | ||||
475 | ) { | ||||
476 | *status = U_ILLEGAL_ARGUMENT_ERROR; | ||||
477 | return NULL__null; | ||||
478 | } | ||||
479 | // header | ||||
480 | if (length < 32) { | ||||
481 | // not even enough space for a minimal header | ||||
482 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
483 | return NULL__null; | ||||
484 | } | ||||
485 | const DataHeader *pHeader = (const DataHeader *)p; | ||||
486 | if (!( | ||||
487 | pHeader->dataHeader.magic1==0xda && | ||||
488 | pHeader->dataHeader.magic2==0x27 && | ||||
489 | pHeader->info.dataFormat[0] == 0x43 && | ||||
490 | pHeader->info.dataFormat[1] == 0x53 && | ||||
491 | pHeader->info.dataFormat[2] == 0x65 && | ||||
492 | pHeader->info.dataFormat[3] == 0x6c | ||||
493 | )) { | ||||
494 | /* header not valid or dataFormat not recognized */ | ||||
495 | *status = U_INVALID_FORMAT_ERROR; | ||||
496 | return NULL__null; | ||||
497 | } | ||||
498 | if (pHeader->info.formatVersion[0] != 1) { | ||||
499 | *status = U_UNSUPPORTED_ERROR; | ||||
500 | return NULL__null; | ||||
501 | } | ||||
502 | uint8_t* swapped = NULL__null; | ||||
503 | if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN(1234 == 4321) || | ||||
504 | pHeader->info.charsetFamily != U_CHARSET_FAMILY0 | ||||
505 | ) { | ||||
506 | // swap the data | ||||
507 | UDataSwapper *ds = | ||||
508 | udata_openSwapperForInputDataudata_openSwapperForInputData_71(p, length, U_IS_BIG_ENDIAN(1234 == 4321), U_CHARSET_FAMILY0, status); | ||||
509 | int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL__null, status); | ||||
510 | if (U_FAILURE(*status)) { | ||||
511 | udata_closeSwapperudata_closeSwapper_71(ds); | ||||
512 | return NULL__null; | ||||
513 | } | ||||
514 | if (length < totalSize) { | ||||
515 | udata_closeSwapperudata_closeSwapper_71(ds); | ||||
516 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
517 | return NULL__null; | ||||
518 | } | ||||
519 | swapped = (uint8_t*)uprv_mallocuprv_malloc_71(totalSize); | ||||
520 | if (swapped == NULL__null) { | ||||
521 | udata_closeSwapperudata_closeSwapper_71(ds); | ||||
522 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
523 | return NULL__null; | ||||
524 | } | ||||
525 | ucnvsel_swap(ds, p, length, swapped, status); | ||||
526 | udata_closeSwapperudata_closeSwapper_71(ds); | ||||
527 | if (U_FAILURE(*status)) { | ||||
528 | uprv_freeuprv_free_71(swapped); | ||||
529 | return NULL__null; | ||||
530 | } | ||||
531 | p = swapped; | ||||
532 | pHeader = (const DataHeader *)p; | ||||
533 | } | ||||
534 | if (length < (pHeader->dataHeader.headerSize + 16 * 4)) { | ||||
535 | // not even enough space for the header and the indexes | ||||
536 | uprv_freeuprv_free_71(swapped); | ||||
537 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
538 | return NULL__null; | ||||
539 | } | ||||
540 | p += pHeader->dataHeader.headerSize; | ||||
541 | length -= pHeader->dataHeader.headerSize; | ||||
542 | // indexes | ||||
543 | const int32_t *indexes = (const int32_t *)p; | ||||
544 | if (length < indexes[UCNVSEL_INDEX_SIZE]) { | ||||
545 | uprv_freeuprv_free_71(swapped); | ||||
546 | *status = U_INDEX_OUTOFBOUNDS_ERROR; | ||||
547 | return NULL__null; | ||||
548 | } | ||||
549 | p += UCNVSEL_INDEX_COUNT * 4; | ||||
550 | // create and populate the selector object | ||||
551 | UConverterSelector* sel = (UConverterSelector*)uprv_mallocuprv_malloc_71(sizeof(UConverterSelector)); | ||||
552 | char **encodings = | ||||
553 | (char **)uprv_mallocuprv_malloc_71( | ||||
554 | indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *)); | ||||
555 | if (sel == NULL__null || encodings == NULL__null) { | ||||
556 | uprv_freeuprv_free_71(swapped); | ||||
557 | uprv_freeuprv_free_71(sel); | ||||
558 | uprv_freeuprv_free_71(encodings); | ||||
559 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
560 | return NULL__null; | ||||
561 | } | ||||
562 | uprv_memset(sel, 0, sizeof(UConverterSelector)):: memset(sel, 0, sizeof(UConverterSelector)); | ||||
563 | sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT]; | ||||
564 | sel->encodings = encodings; | ||||
565 | sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT]; | ||||
566 | sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; | ||||
567 | sel->swapped = swapped; | ||||
568 | // trie | ||||
569 | sel->trie = utrie2_openFromSerializedutrie2_openFromSerialized_71(UTRIE2_16_VALUE_BITS, | ||||
570 | p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL__null, | ||||
571 | status); | ||||
572 | p += indexes[UCNVSEL_INDEX_TRIE_SIZE]; | ||||
573 | if (U_FAILURE(*status)) { | ||||
574 | ucnvsel_closeucnvsel_close_71(sel); | ||||
575 | return NULL__null; | ||||
576 | } | ||||
577 | // bit vectors | ||||
578 | sel->pv = (uint32_t *)p; | ||||
579 | p += sel->pvCount * 4; | ||||
580 | // encoding names | ||||
581 | char* s = (char*)p; | ||||
582 | for (int32_t i = 0; i < sel->encodingsCount; ++i) { | ||||
583 | sel->encodings[i] = s; | ||||
584 | s += uprv_strlen(s):: strlen(s) + 1; | ||||
585 | } | ||||
586 | p += sel->encodingStrLength; | ||||
587 | |||||
588 | return sel; | ||||
589 | } | ||||
590 | |||||
591 | // a bunch of functions for the enumeration thingie! Nothing fancy here. Just | ||||
592 | // iterate over the selected encodings | ||||
593 | struct Enumerator { | ||||
594 | int16_t* index; | ||||
595 | int16_t length; | ||||
596 | int16_t cur; | ||||
597 | const UConverterSelector* sel; | ||||
598 | }; | ||||
599 | |||||
600 | U_CDECL_BEGINextern "C" { | ||||
601 | |||||
602 | static void U_CALLCONV | ||||
603 | ucnvsel_close_selector_iterator(UEnumeration *enumerator) { | ||||
604 | uprv_freeuprv_free_71(((Enumerator*)(enumerator->context))->index); | ||||
605 | uprv_freeuprv_free_71(enumerator->context); | ||||
606 | uprv_freeuprv_free_71(enumerator); | ||||
607 | } | ||||
608 | |||||
609 | |||||
610 | static int32_t U_CALLCONV | ||||
611 | ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) { | ||||
612 | // check if already failed | ||||
613 | if (U_FAILURE(*status)) { | ||||
614 | return 0; | ||||
615 | } | ||||
616 | return ((Enumerator*)(enumerator->context))->length; | ||||
617 | } | ||||
618 | |||||
619 | |||||
620 | static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator, | ||||
621 | int32_t* resultLength, | ||||
622 | UErrorCode* status) { | ||||
623 | // check if already failed | ||||
624 | if (U_FAILURE(*status)) { | ||||
625 | return NULL__null; | ||||
626 | } | ||||
627 | |||||
628 | int16_t cur = ((Enumerator*)(enumerator->context))->cur; | ||||
629 | const UConverterSelector* sel; | ||||
630 | const char* result; | ||||
631 | if (cur >= ((Enumerator*)(enumerator->context))->length) { | ||||
632 | return NULL__null; | ||||
633 | } | ||||
634 | sel = ((Enumerator*)(enumerator->context))->sel; | ||||
635 | result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ]; | ||||
636 | ((Enumerator*)(enumerator->context))->cur++; | ||||
637 | if (resultLength) { | ||||
638 | *resultLength = (int32_t)uprv_strlen(result):: strlen(result); | ||||
639 | } | ||||
640 | return result; | ||||
641 | } | ||||
642 | |||||
643 | static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator, | ||||
644 | UErrorCode* status) { | ||||
645 | // check if already failed | ||||
646 | if (U_FAILURE(*status)) { | ||||
647 | return ; | ||||
648 | } | ||||
649 | ((Enumerator*)(enumerator->context))->cur = 0; | ||||
650 | } | ||||
651 | |||||
652 | U_CDECL_END} | ||||
653 | |||||
654 | |||||
655 | static const UEnumeration defaultEncodings = { | ||||
656 | NULL__null, | ||||
657 | NULL__null, | ||||
658 | ucnvsel_close_selector_iterator, | ||||
659 | ucnvsel_count_encodings, | ||||
660 | uenum_unextDefaultuenum_unextDefault_71, | ||||
661 | ucnvsel_next_encoding, | ||||
662 | ucnvsel_reset_iterator | ||||
663 | }; | ||||
664 | |||||
665 | |||||
666 | // internal fn to intersect two sets of masks | ||||
667 | // returns whether the mask has reduced to all zeros | ||||
668 | static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) { | ||||
669 | int32_t i; | ||||
670 | uint32_t oredDest = 0; | ||||
671 | for (i = 0 ; i < len ; ++i) { | ||||
672 | oredDest |= (dest[i] &= source1[i]); | ||||
673 | } | ||||
674 | return oredDest == 0; | ||||
675 | } | ||||
676 | |||||
677 | // internal fn to count how many 1's are there in a mask | ||||
678 | // algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html | ||||
679 | static int16_t countOnes(uint32_t* mask, int32_t len) { | ||||
680 | int32_t i, totalOnes = 0; | ||||
681 | for (i = 0 ; i < len ; ++i) { | ||||
682 | uint32_t ent = mask[i]; | ||||
683 | for (; ent; totalOnes++) | ||||
684 | { | ||||
685 | ent &= ent - 1; // clear the least significant bit set | ||||
686 | } | ||||
687 | } | ||||
688 | return static_cast<int16_t>(totalOnes); | ||||
689 | } | ||||
690 | |||||
691 | |||||
692 | /* internal function! */ | ||||
693 | static UEnumeration *selectForMask(const UConverterSelector* sel, | ||||
694 | uint32_t *theMask, UErrorCode *status) { | ||||
695 | LocalMemory<uint32_t> mask(theMask); | ||||
696 | // this is the context we will use. Store a table of indices to which | ||||
697 | // encodings are legit. | ||||
698 | LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_mallocuprv_malloc_71(sizeof(Enumerator)))); | ||||
699 | if (result.isNull()) { | ||||
700 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
701 | return nullptr; | ||||
702 | } | ||||
703 | result->index = nullptr; // this will be allocated later! | ||||
704 | result->length = result->cur = 0; | ||||
705 | result->sel = sel; | ||||
706 | |||||
707 | LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_mallocuprv_malloc_71(sizeof(UEnumeration)))); | ||||
708 | if (en.isNull()) { | ||||
709 | // TODO(markus): Combine Enumerator and UEnumeration into one struct. | ||||
710 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
711 | return nullptr; | ||||
712 | } | ||||
713 | memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration)); | ||||
714 | |||||
715 | int32_t columns = (sel->encodingsCount+31)/32; | ||||
716 | int16_t numOnes = countOnes(mask.getAlias(), columns); | ||||
717 | // now, we know the exact space we need for index | ||||
718 | if (numOnes > 0) { | ||||
719 | result->index = static_cast<int16_t*>(uprv_mallocuprv_malloc_71(numOnes * sizeof(int16_t))); | ||||
720 | if (result->index == nullptr) { | ||||
721 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
722 | return nullptr; | ||||
723 | } | ||||
724 | int32_t i, j; | ||||
725 | int16_t k = 0; | ||||
726 | for (j = 0 ; j < columns; j++) { | ||||
727 | uint32_t v = mask[j]; | ||||
728 | for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) { | ||||
729 | if ((v & 1) != 0) { | ||||
730 | result->index[result->length++] = k; | ||||
731 | } | ||||
732 | v >>= 1; | ||||
733 | } | ||||
734 | } | ||||
735 | } //otherwise, index will remain NULL (and will never be touched by | ||||
736 | //the enumerator code anyway) | ||||
737 | en->context = result.orphan(); | ||||
738 | return en.orphan(); | ||||
739 | } | ||||
740 | |||||
741 | /* check a string against the selector - UTF16 version */ | ||||
742 | U_CAPIextern "C" UEnumeration * U_EXPORT2 | ||||
743 | ucnvsel_selectForStringucnvsel_selectForString_71(const UConverterSelector* sel, | ||||
744 | const UChar *s, int32_t length, UErrorCode *status) { | ||||
745 | // check if already failed | ||||
746 | if (U_FAILURE(*status)) { | ||||
747 | return NULL__null; | ||||
748 | } | ||||
749 | // ensure args make sense! | ||||
750 | if (sel == NULL__null || (s == NULL__null && length != 0)) { | ||||
751 | *status = U_ILLEGAL_ARGUMENT_ERROR; | ||||
752 | return NULL__null; | ||||
753 | } | ||||
754 | |||||
755 | int32_t columns = (sel->encodingsCount+31)/32; | ||||
756 | uint32_t* mask = (uint32_t*) uprv_mallocuprv_malloc_71(columns * 4); | ||||
757 | if (mask == NULL__null) { | ||||
758 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
759 | return NULL__null; | ||||
760 | } | ||||
761 | uprv_memset(mask, ~0, columns *4):: memset(mask, ~0, columns *4); | ||||
762 | |||||
763 | if(s!=NULL__null) { | ||||
764 | const UChar *limit; | ||||
765 | if (length >= 0) { | ||||
766 | limit = s + length; | ||||
767 | } else { | ||||
768 | limit = NULL__null; | ||||
769 | } | ||||
770 | |||||
771 | while (limit == NULL__null ? *s != 0 : s != limit) { | ||||
772 | UChar32 c; | ||||
773 | uint16_t pvIndex; | ||||
774 | UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex)do { { uint16_t __c2; (c)=*(s)++; if(!(((c)&0xfffffc00)== 0xd800)) { (pvIndex)=(sel->trie)->index[(((int32_t)(((sel ->trie)->index)[(0)+((c)>>UTRIE2_SHIFT_2)]) << UTRIE2_INDEX_SHIFT)+ ((c)&UTRIE2_DATA_MASK))]; } else if( (s)==(limit) || !(((__c2=*(s))&0xfffffc00)==0xdc00)) { (pvIndex )=(sel->trie)->index[(((int32_t)(((sel->trie)->index )[(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2)) +((c)>>UTRIE2_SHIFT_2)]) <<UTRIE2_INDEX_SHIFT)+ ( (c)&UTRIE2_DATA_MASK))]; } else { ++(s); (c)=(((UChar32)( (c))<<10UL)+(UChar32)(__c2)-((0xd800<<10UL)+0xdc00 -0x10000)); (pvIndex)=((sel->trie))->index[((c))>=(( sel->trie))->highStart ? ((sel->trie))->highValueIndex : (((int32_t)((((sel->trie))->index)[ (((sel->trie) )->index)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH )+ (((c))>>UTRIE2_SHIFT_1)]+ ((((c))>>UTRIE2_SHIFT_2 )&UTRIE2_INDEX_2_MASK)]) <<UTRIE2_INDEX_SHIFT)+ ((( c))&UTRIE2_DATA_MASK))]; } } } while (false); | ||||
775 | if (intersectMasks(mask, sel->pv+pvIndex, columns)) { | ||||
776 | break; | ||||
777 | } | ||||
778 | } | ||||
779 | } | ||||
780 | return selectForMask(sel, mask, status); | ||||
781 | } | ||||
782 | |||||
783 | /* check a string against the selector - UTF8 version */ | ||||
784 | U_CAPIextern "C" UEnumeration * U_EXPORT2 | ||||
785 | ucnvsel_selectForUTF8ucnvsel_selectForUTF8_71(const UConverterSelector* sel, | ||||
786 | const char *s, int32_t length, UErrorCode *status) { | ||||
787 | // check if already failed | ||||
788 | if (U_FAILURE(*status)) { | ||||
789 | return NULL__null; | ||||
790 | } | ||||
791 | // ensure args make sense! | ||||
792 | if (sel == NULL__null || (s == NULL__null && length != 0)) { | ||||
793 | *status = U_ILLEGAL_ARGUMENT_ERROR; | ||||
794 | return NULL__null; | ||||
795 | } | ||||
796 | |||||
797 | int32_t columns = (sel->encodingsCount+31)/32; | ||||
798 | uint32_t* mask = (uint32_t*) uprv_mallocuprv_malloc_71(columns * 4); | ||||
799 | if (mask == NULL__null) { | ||||
800 | *status = U_MEMORY_ALLOCATION_ERROR; | ||||
801 | return NULL__null; | ||||
802 | } | ||||
803 | uprv_memset(mask, ~0, columns *4):: memset(mask, ~0, columns *4); | ||||
804 | |||||
805 | if (length < 0) { | ||||
806 | length = (int32_t)uprv_strlen(s):: strlen(s); | ||||
807 | } | ||||
808 | |||||
809 | if(s!=NULL__null) { | ||||
810 | const char *limit = s + length; | ||||
811 | |||||
812 | while (s != limit) { | ||||
813 | uint16_t pvIndex; | ||||
814 | UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex)do { uint8_t __lead=(uint8_t)*(s)++; if((((__lead)&0x80)== 0)) { (pvIndex)=(sel->trie)->data16[__lead]; } else { uint8_t __t1, __t2; if( 0xe0<=__lead && __lead<0xf0 && ((s)+1)<(limit) && ("\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [(__lead)&0xf]&(1<<((uint8_t)(__t1=(uint8_t)*(s ))>>5))) && (__t2=(uint8_t)(*((s)+1)-0x80))<= 0x3f ) { (s)+=2; (pvIndex)=(sel->trie)->index[ ((int32_t )((sel->trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2 ))+ ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>> UTRIE2_SHIFT_2)]) <<UTRIE2_INDEX_SHIFT)+ (__t2&UTRIE2_DATA_MASK )]; } else if( __lead<0xe0 && __lead>=0xc2 && (s)<(limit) && (__t1=(uint8_t)(*(s)-0x80))<=0x3f ) { ++(s); (pvIndex)=(sel->trie)->index[ (sel->trie )->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ __t1 ]; } else { int32_t __index=utrie2_internalU8NextIndex_71((sel ->trie), __lead, (const uint8_t *)(s), (const uint8_t *)(limit )); (s)+=__index&7; (pvIndex)=(sel->trie)->index[__index >>3]; } } } while (false); | ||||
815 | if (intersectMasks(mask, sel->pv+pvIndex, columns)) { | ||||
816 | break; | ||||
817 | } | ||||
818 | } | ||||
819 | } | ||||
820 | return selectForMask(sel, mask, status); | ||||
821 | } | ||||
822 | |||||
823 | #endif // !UCONFIG_NO_CONVERSION |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2008-2011, International Business Machines |
7 | * Corporation, Google and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | */ |
11 | /* |
12 | * Author : eldawy@google.com (Mohamed Eldawy) |
13 | * ucnvsel.h |
14 | * |
15 | * Purpose: To generate a list of encodings capable of handling |
16 | * a given Unicode text |
17 | * |
18 | * Started 09-April-2008 |
19 | */ |
20 | |
21 | #ifndef __ICU_UCNV_SEL_H__ |
22 | #define __ICU_UCNV_SEL_H__ |
23 | |
24 | #include "unicode/utypes.h" |
25 | |
26 | #if !UCONFIG_NO_CONVERSION0 |
27 | |
28 | #include "unicode/uset.h" |
29 | #include "unicode/utf16.h" |
30 | #include "unicode/uenum.h" |
31 | #include "unicode/ucnv.h" |
32 | |
33 | #if U_SHOW_CPLUSPLUS_API1 |
34 | #include "unicode/localpointer.h" |
35 | #endif // U_SHOW_CPLUSPLUS_API |
36 | |
37 | /** |
38 | * \file |
39 | * |
40 | * A converter selector is built with a set of encoding/charset names |
41 | * and given an input string returns the set of names of the |
42 | * corresponding converters which can convert the string. |
43 | * |
44 | * A converter selector can be serialized into a buffer and reopened |
45 | * from the serialized form. |
46 | */ |
47 | |
48 | struct UConverterSelector; |
49 | /** |
50 | * @{ |
51 | * Typedef for selector data structure. |
52 | */ |
53 | typedef struct UConverterSelector UConverterSelector; |
54 | /** @} */ |
55 | |
56 | /** |
57 | * Open a selector. |
58 | * If converterListSize is 0, build for all available converters. |
59 | * If excludedCodePoints is NULL, don't exclude any code points. |
60 | * |
61 | * @param converterList a pointer to encoding names needed to be involved. |
62 | * Can be NULL if converterListSize==0. |
63 | * The list and the names will be cloned, and the caller |
64 | * retains ownership of the original. |
65 | * @param converterListSize number of encodings in above list. |
66 | * If 0, builds a selector for all available converters. |
67 | * @param excludedCodePoints a set of code points to be excluded from consideration. |
68 | * That is, excluded code points in a string do not change |
69 | * the selection result. (They might be handled by a callback.) |
70 | * Use NULL to exclude nothing. |
71 | * @param whichSet what converter set to use? Use this to determine whether |
72 | * to consider only roundtrip mappings or also fallbacks. |
73 | * @param status an in/out ICU UErrorCode |
74 | * @return the new selector |
75 | * |
76 | * @stable ICU 4.2 |
77 | */ |
78 | U_CAPIextern "C" UConverterSelector* U_EXPORT2 |
79 | ucnvsel_openucnvsel_open_71(const char* const* converterList, int32_t converterListSize, |
80 | const USet* excludedCodePoints, |
81 | const UConverterUnicodeSet whichSet, UErrorCode* status); |
82 | |
83 | /** |
84 | * Closes a selector. |
85 | * If any Enumerations were returned by ucnv_select*, they become invalid. |
86 | * They can be closed before or after calling ucnv_closeSelector, |
87 | * but should never be used after the selector is closed. |
88 | * |
89 | * @see ucnv_selectForString |
90 | * @see ucnv_selectForUTF8 |
91 | * |
92 | * @param sel selector to close |
93 | * |
94 | * @stable ICU 4.2 |
95 | */ |
96 | U_CAPIextern "C" void U_EXPORT2 |
97 | ucnvsel_closeucnvsel_close_71(UConverterSelector *sel); |
98 | |
99 | #if U_SHOW_CPLUSPLUS_API1 |
100 | |
101 | U_NAMESPACE_BEGINnamespace icu_71 { |
102 | |
103 | /** |
104 | * \class LocalUConverterSelectorPointer |
105 | * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). |
106 | * For most methods see the LocalPointerBase base class. |
107 | * |
108 | * @see LocalPointerBase |
109 | * @see LocalPointer |
110 | * @stable ICU 4.4 |
111 | */ |
112 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close)class LocalUConverterSelectorPointer : public LocalPointerBase <UConverterSelector> { public: using LocalPointerBase< UConverterSelector>::operator*; using LocalPointerBase< UConverterSelector>::operator->; explicit LocalUConverterSelectorPointer (UConverterSelector *p=__null) : LocalPointerBase<UConverterSelector >(p) {} LocalUConverterSelectorPointer(LocalUConverterSelectorPointer &&src) noexcept : LocalPointerBase<UConverterSelector >(src.ptr) { src.ptr=__null; } explicit LocalUConverterSelectorPointer (std::unique_ptr<UConverterSelector, decltype(&ucnvsel_close_71 )> &&p) : LocalPointerBase<UConverterSelector> (p.release()) {} ~LocalUConverterSelectorPointer() { if (ptr != __null) { ucnvsel_close_71(ptr); } } LocalUConverterSelectorPointer &operator=(LocalUConverterSelectorPointer &&src) noexcept { if (ptr != __null) { ucnvsel_close_71(ptr); } LocalPointerBase <UConverterSelector>::ptr=src.ptr; src.ptr=__null; return *this; } LocalUConverterSelectorPointer &operator=(std:: unique_ptr<UConverterSelector, decltype(&ucnvsel_close_71 )> &&p) { adoptInstead(p.release()); return *this; } void swap(LocalUConverterSelectorPointer &other) noexcept { UConverterSelector *temp=LocalPointerBase<UConverterSelector >::ptr; LocalPointerBase<UConverterSelector>::ptr=other .ptr; other.ptr=temp; } friend inline void swap(LocalUConverterSelectorPointer &p1, LocalUConverterSelectorPointer &p2) noexcept { p1 .swap(p2); } void adoptInstead(UConverterSelector *p) { if (ptr != __null) { ucnvsel_close_71(ptr); } ptr=p; } operator std:: unique_ptr<UConverterSelector, decltype(&ucnvsel_close_71 )> () && { return std::unique_ptr<UConverterSelector , decltype(&ucnvsel_close_71)>(LocalPointerBase<UConverterSelector >::orphan(), ucnvsel_close_71); } }; |
113 | |
114 | U_NAMESPACE_END} |
115 | |
116 | #endif |
117 | |
118 | /** |
119 | * Open a selector from its serialized form. |
120 | * The buffer must remain valid and unchanged for the lifetime of the selector. |
121 | * This is much faster than creating a selector from scratch. |
122 | * Using a serialized form from a different machine (endianness/charset) is supported. |
123 | * |
124 | * @param buffer pointer to the serialized form of a converter selector; |
125 | * must be 32-bit-aligned |
126 | * @param length the capacity of this buffer (can be equal to or larger than |
127 | * the actual data length) |
128 | * @param status an in/out ICU UErrorCode |
129 | * @return the new selector |
130 | * |
131 | * @stable ICU 4.2 |
132 | */ |
133 | U_CAPIextern "C" UConverterSelector* U_EXPORT2 |
134 | ucnvsel_openFromSerializeducnvsel_openFromSerialized_71(const void* buffer, int32_t length, UErrorCode* status); |
135 | |
136 | /** |
137 | * Serialize a selector into a linear buffer. |
138 | * The serialized form is portable to different machines. |
139 | * |
140 | * @param sel selector to consider |
141 | * @param buffer pointer to 32-bit-aligned memory to be filled with the |
142 | * serialized form of this converter selector |
143 | * @param bufferCapacity the capacity of this buffer |
144 | * @param status an in/out ICU UErrorCode |
145 | * @return the required buffer capacity to hold serialize data (even if the call fails |
146 | * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) |
147 | * |
148 | * @stable ICU 4.2 |
149 | */ |
150 | U_CAPIextern "C" int32_t U_EXPORT2 |
151 | ucnvsel_serializeucnvsel_serialize_71(const UConverterSelector* sel, |
152 | void* buffer, int32_t bufferCapacity, UErrorCode* status); |
153 | |
154 | /** |
155 | * Select converters that can map all characters in a UTF-16 string, |
156 | * ignoring the excluded code points. |
157 | * |
158 | * @param sel a selector |
159 | * @param s UTF-16 string |
160 | * @param length length of the string, or -1 if NUL-terminated |
161 | * @param status an in/out ICU UErrorCode |
162 | * @return an enumeration containing encoding names. |
163 | * The returned encoding names and their order will be the same as |
164 | * supplied when building the selector. |
165 | * |
166 | * @stable ICU 4.2 |
167 | */ |
168 | U_CAPIextern "C" UEnumeration * U_EXPORT2 |
169 | ucnvsel_selectForStringucnvsel_selectForString_71(const UConverterSelector* sel, |
170 | const UChar *s, int32_t length, UErrorCode *status); |
171 | |
172 | /** |
173 | * Select converters that can map all characters in a UTF-8 string, |
174 | * ignoring the excluded code points. |
175 | * |
176 | * @param sel a selector |
177 | * @param s UTF-8 string |
178 | * @param length length of the string, or -1 if NUL-terminated |
179 | * @param status an in/out ICU UErrorCode |
180 | * @return an enumeration containing encoding names. |
181 | * The returned encoding names and their order will be the same as |
182 | * supplied when building the selector. |
183 | * |
184 | * @stable ICU 4.2 |
185 | */ |
186 | U_CAPIextern "C" UEnumeration * U_EXPORT2 |
187 | ucnvsel_selectForUTF8ucnvsel_selectForUTF8_71(const UConverterSelector* sel, |
188 | const char *s, int32_t length, UErrorCode *status); |
189 | |
190 | #endif /* !UCONFIG_NO_CONVERSION */ |
191 | |
192 | #endif /* __ICU_UCNV_SEL_H__ */ |