File: | out/../deps/icu-small/source/tools/toolutil/ucm.cpp |
Warning: | line 796, column 10 Although the value stored to 'bLen' is used in the enclosing expression, the value is never actually read from 'bLen' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2003-2013, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: ucm.c |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2003jun20 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * This file reads a .ucm file, stores its mappings and sorts them. |
19 | * It implements handling of Unicode conversion mappings from .ucm files |
20 | * for makeconv, canonucm, rptp2ucm, etc. |
21 | * |
22 | * Unicode code point sequences with a length of more than 1, |
23 | * as well as byte sequences with more than 4 bytes or more than one complete |
24 | * character sequence are handled to support m:n mappings. |
25 | */ |
26 | |
27 | #include "unicode/utypes.h" |
28 | #include "unicode/ustring.h" |
29 | #include "cstring.h" |
30 | #include "cmemory.h" |
31 | #include "filestrm.h" |
32 | #include "uarrsort.h" |
33 | #include "ucnvmbcs.h" |
34 | #include "ucnv_bld.h" |
35 | #include "ucnv_ext.h" |
36 | #include "uparse.h" |
37 | #include "ucm.h" |
38 | #include <stdio.h> |
39 | |
40 | #if !UCONFIG_NO_CONVERSION0 |
41 | |
42 | /* -------------------------------------------------------------------------- */ |
43 | |
44 | static void |
45 | printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) { |
46 | int32_t j; |
47 | |
48 | for(j=0; j<m->uLen; ++j) { |
49 | fprintf(f, "<U%04lX>", (long)codePoints[j]); |
50 | } |
51 | |
52 | fputc(' ', f); |
53 | |
54 | for(j=0; j<m->bLen; ++j) { |
55 | fprintf(f, "\\x%02X", bytes[j]); |
56 | } |
57 | |
58 | if(m->f>=0) { |
59 | fprintf(f, " |%u\n", m->f); |
60 | } else { |
61 | fputs("\n", f); |
62 | } |
63 | } |
64 | |
65 | U_CAPIextern "C" void U_EXPORT2 |
66 | ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) { |
67 | printMapping(m, UCM_GET_CODE_POINTS(table, m)(((m)->uLen==1) ? &(m)->u : (table)->codePoints+ (m)->u), UCM_GET_BYTES(table, m)(((m)->bLen<=4) ? (m)->b.bytes : (table)->bytes+( m)->b.idx), f); |
68 | } |
69 | |
70 | U_CAPIextern "C" void U_EXPORT2 |
71 | ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) { |
72 | UCMapping *m; |
73 | int32_t i, length; |
74 | |
75 | m=table->mappings; |
76 | length=table->mappingsLength; |
77 | if(byUnicode) { |
78 | for(i=0; i<length; ++m, ++i) { |
79 | ucm_printMapping(table, m, f); |
80 | } |
81 | } else { |
82 | const int32_t *map=table->reverseMap; |
83 | for(i=0; i<length; ++i) { |
84 | ucm_printMapping(table, m+map[i], f); |
85 | } |
86 | } |
87 | } |
88 | |
89 | /* mapping comparisons ------------------------------------------------------ */ |
90 | |
91 | static int32_t |
92 | compareUnicode(UCMTable *lTable, const UCMapping *l, |
93 | UCMTable *rTable, const UCMapping *r) { |
94 | const UChar32 *lu, *ru; |
95 | int32_t result, i, length; |
96 | |
97 | if(l->uLen==1 && r->uLen==1) { |
98 | /* compare two single code points */ |
99 | return l->u-r->u; |
100 | } |
101 | |
102 | /* get pointers to the code point sequences */ |
103 | lu=UCM_GET_CODE_POINTS(lTable, l)(((l)->uLen==1) ? &(l)->u : (lTable)->codePoints +(l)->u); |
104 | ru=UCM_GET_CODE_POINTS(rTable, r)(((r)->uLen==1) ? &(r)->u : (rTable)->codePoints +(r)->u); |
105 | |
106 | /* get the minimum length */ |
107 | if(l->uLen<=r->uLen) { |
108 | length=l->uLen; |
109 | } else { |
110 | length=r->uLen; |
111 | } |
112 | |
113 | /* compare the code points */ |
114 | for(i=0; i<length; ++i) { |
115 | result=lu[i]-ru[i]; |
116 | if(result!=0) { |
117 | return result; |
118 | } |
119 | } |
120 | |
121 | /* compare the lengths */ |
122 | return l->uLen-r->uLen; |
123 | } |
124 | |
125 | static int32_t |
126 | compareBytes(UCMTable *lTable, const UCMapping *l, |
127 | UCMTable *rTable, const UCMapping *r, |
128 | UBool lexical) { |
129 | const uint8_t *lb, *rb; |
130 | int32_t result, i, length; |
131 | |
132 | /* |
133 | * A lexical comparison is used for sorting in the builder, to allow |
134 | * an efficient search for a byte sequence that could be a prefix |
135 | * of a previously entered byte sequence. |
136 | * |
137 | * Comparing by lengths first is for compatibility with old .ucm tools |
138 | * like canonucm and rptp2ucm. |
139 | */ |
140 | if(lexical) { |
141 | /* get the minimum length and continue */ |
142 | if(l->bLen<=r->bLen) { |
143 | length=l->bLen; |
144 | } else { |
145 | length=r->bLen; |
146 | } |
147 | } else { |
148 | /* compare lengths first */ |
149 | result=l->bLen-r->bLen; |
150 | if(result!=0) { |
151 | return result; |
152 | } else { |
153 | length=l->bLen; |
154 | } |
155 | } |
156 | |
157 | /* get pointers to the byte sequences */ |
158 | lb=UCM_GET_BYTES(lTable, l)(((l)->bLen<=4) ? (l)->b.bytes : (lTable)->bytes+ (l)->b.idx); |
159 | rb=UCM_GET_BYTES(rTable, r)(((r)->bLen<=4) ? (r)->b.bytes : (rTable)->bytes+ (r)->b.idx); |
160 | |
161 | /* compare the bytes */ |
162 | for(i=0; i<length; ++i) { |
163 | result=lb[i]-rb[i]; |
164 | if(result!=0) { |
165 | return result; |
166 | } |
167 | } |
168 | |
169 | /* compare the lengths */ |
170 | return l->bLen-r->bLen; |
171 | } |
172 | |
173 | /* compare UCMappings for sorting */ |
174 | static int32_t |
175 | compareMappings(UCMTable *lTable, const UCMapping *l, |
176 | UCMTable *rTable, const UCMapping *r, |
177 | UBool uFirst) { |
178 | int32_t result; |
179 | |
180 | /* choose which side to compare first */ |
181 | if(uFirst) { |
182 | /* Unicode then bytes */ |
183 | result=compareUnicode(lTable, l, rTable, r); |
184 | if(result==0) { |
185 | result=compareBytes(lTable, l, rTable, r, FALSE0); /* not lexically, like canonucm */ |
186 | } |
187 | } else { |
188 | /* bytes then Unicode */ |
189 | result=compareBytes(lTable, l, rTable, r, TRUE1); /* lexically, for builder */ |
190 | if(result==0) { |
191 | result=compareUnicode(lTable, l, rTable, r); |
192 | } |
193 | } |
194 | |
195 | if(result!=0) { |
196 | return result; |
197 | } |
198 | |
199 | /* compare the flags */ |
200 | return l->f-r->f; |
201 | } |
202 | U_CDECL_BEGINextern "C" { |
203 | /* sorting by Unicode first sorts mappings directly */ |
204 | static int32_t U_CALLCONV |
205 | compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) { |
206 | return compareMappings( |
207 | (UCMTable *)context, (const UCMapping *)left, |
208 | (UCMTable *)context, (const UCMapping *)right, TRUE1); |
209 | } |
210 | |
211 | /* sorting by bytes first sorts the reverseMap; use indirection to mappings */ |
212 | static int32_t U_CALLCONV |
213 | compareMappingsBytesFirst(const void *context, const void *left, const void *right) { |
214 | UCMTable *table=(UCMTable *)context; |
215 | int32_t l=*(const int32_t *)left, r=*(const int32_t *)right; |
216 | return compareMappings( |
217 | table, table->mappings+l, |
218 | table, table->mappings+r, FALSE0); |
219 | } |
220 | U_CDECL_END} |
221 | |
222 | U_CAPIextern "C" void U_EXPORT2 |
223 | ucm_sortTable(UCMTable *t) { |
224 | UErrorCode errorCode; |
225 | int32_t i; |
226 | |
227 | if(t->isSorted) { |
228 | return; |
229 | } |
230 | |
231 | errorCode=U_ZERO_ERROR; |
232 | |
233 | /* 1. sort by Unicode first */ |
234 | uprv_sortArrayuprv_sortArray_71(t->mappings, t->mappingsLength, sizeof(UCMapping), |
235 | compareMappingsUnicodeFirst, t, |
236 | FALSE0, &errorCode); |
237 | |
238 | /* build the reverseMap */ |
239 | if(t->reverseMap==NULL__null) { |
240 | /* |
241 | * allocate mappingsCapacity instead of mappingsLength so that |
242 | * if mappings are added, the reverseMap need not be |
243 | * reallocated each time |
244 | * (see ucm_moveMappings() and ucm_addMapping()) |
245 | */ |
246 | t->reverseMap=(int32_t *)uprv_mallocuprv_malloc_71(t->mappingsCapacity*sizeof(int32_t)); |
247 | if(t->reverseMap==NULL__null) { |
248 | fprintf(stderrstderr, "ucm error: unable to allocate reverseMap\n"); |
249 | exit(U_MEMORY_ALLOCATION_ERROR); |
250 | } |
251 | } |
252 | for(i=0; i<t->mappingsLength; ++i) { |
253 | t->reverseMap[i]=i; |
254 | } |
255 | |
256 | /* 2. sort reverseMap by mappings bytes first */ |
257 | uprv_sortArrayuprv_sortArray_71(t->reverseMap, t->mappingsLength, sizeof(int32_t), |
258 | compareMappingsBytesFirst, t, |
259 | FALSE0, &errorCode); |
260 | |
261 | if(U_FAILURE(errorCode)) { |
262 | fprintf(stderrstderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n", |
263 | u_errorNameu_errorName_71(errorCode)); |
264 | exit(errorCode); |
265 | } |
266 | |
267 | t->isSorted=TRUE1; |
268 | } |
269 | |
270 | /* |
271 | * remove mappings with their move flag set from the base table |
272 | * and move some of them (with UCM_MOVE_TO_EXT) to the extension table |
273 | */ |
274 | U_CAPIextern "C" void U_EXPORT2 |
275 | ucm_moveMappings(UCMTable *base, UCMTable *ext) { |
276 | UCMapping *mb, *mbLimit; |
277 | int8_t flag; |
278 | |
279 | mb=base->mappings; |
280 | mbLimit=mb+base->mappingsLength; |
281 | |
282 | while(mb<mbLimit) { |
283 | flag=mb->moveFlag; |
284 | if(flag!=0) { |
285 | /* reset the move flag */ |
286 | mb->moveFlag=0; |
287 | |
288 | if(ext!=NULL__null && (flag&UCM_MOVE_TO_EXT)) { |
289 | /* add the mapping to the extension table */ |
290 | ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb)(((mb)->uLen==1) ? &(mb)->u : (base)->codePoints +(mb)->u), UCM_GET_BYTES(base, mb)(((mb)->bLen<=4) ? (mb)->b.bytes : (base)->bytes+ (mb)->b.idx)); |
291 | } |
292 | |
293 | /* remove this mapping: move the last base mapping down and overwrite the current one */ |
294 | if(mb<(mbLimit-1)) { |
295 | uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(mb, mbLimit -1, sizeof(UCMapping)); } while (false); |
296 | } |
297 | --mbLimit; |
298 | --base->mappingsLength; |
299 | base->isSorted=FALSE0; |
300 | } else { |
301 | ++mb; |
302 | } |
303 | } |
304 | } |
305 | |
306 | enum { |
307 | NEEDS_MOVE=1, |
308 | HAS_ERRORS=2 |
309 | }; |
310 | |
311 | static uint8_t |
312 | checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
313 | UBool moveToExt, UBool intersectBase) { |
314 | (void)baseStates; |
315 | |
316 | UCMapping *mb, *me, *mbLimit, *meLimit; |
317 | int32_t cmp; |
318 | uint8_t result; |
319 | |
320 | mb=base->mappings; |
321 | mbLimit=mb+base->mappingsLength; |
322 | |
323 | me=ext->mappings; |
324 | meLimit=me+ext->mappingsLength; |
325 | |
326 | result=0; |
327 | |
328 | for(;;) { |
329 | /* skip irrelevant mappings on both sides */ |
330 | for(;;) { |
331 | if(mb==mbLimit) { |
332 | return result; |
333 | } |
334 | |
335 | if((0<=mb->f && mb->f<=2) || mb->f==4) { |
336 | break; |
337 | } |
338 | |
339 | ++mb; |
340 | } |
341 | |
342 | for(;;) { |
343 | if(me==meLimit) { |
344 | return result; |
345 | } |
346 | |
347 | if((0<=me->f && me->f<=2) || me->f==4) { |
348 | break; |
349 | } |
350 | |
351 | ++me; |
352 | } |
353 | |
354 | /* compare the base and extension mappings */ |
355 | cmp=compareUnicode(base, mb, ext, me); |
356 | if(cmp<0) { |
357 | if(intersectBase && (intersectBase!=2 || mb->bLen>1)) { |
358 | /* |
359 | * mapping in base but not in ext, move it |
360 | * |
361 | * if ext is DBCS, move DBCS mappings here |
362 | * and check SBCS ones for Unicode prefix below |
363 | */ |
364 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
365 | result|=NEEDS_MOVE; |
366 | |
367 | /* does mb map from an input sequence that is a prefix of me's? */ |
368 | } else if( mb->uLen<me->uLen && |
369 | 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen):: memcmp((((mb)->uLen==1) ? &(mb)->u : (base)-> codePoints+(mb)->u), (((me)->uLen==1) ? &(me)->u : (ext)->codePoints+(me)->u),4*mb->uLen) |
370 | ) { |
371 | if(moveToExt) { |
372 | /* mark this mapping to be moved to the extension table */ |
373 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
374 | result|=NEEDS_MOVE; |
375 | } else { |
376 | fprintf(stderrstderr, |
377 | "ucm error: the base table contains a mapping whose input sequence\n" |
378 | " is a prefix of the input sequence of an extension mapping\n"); |
379 | ucm_printMapping(base, mb, stderrstderr); |
380 | ucm_printMapping(ext, me, stderrstderr); |
381 | result|=HAS_ERRORS; |
382 | } |
383 | } |
384 | |
385 | ++mb; |
386 | } else if(cmp==0) { |
387 | /* |
388 | * same output: remove the extension mapping, |
389 | * otherwise treat as an error |
390 | */ |
391 | if( mb->f==me->f && mb->bLen==me->bLen && |
392 | 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen):: memcmp((((mb)->bLen<=4) ? (mb)->b.bytes : (base)-> bytes+(mb)->b.idx), (((me)->bLen<=4) ? (me)->b.bytes : (ext)->bytes+(me)->b.idx),mb->bLen) |
393 | ) { |
394 | me->moveFlag|=UCM_REMOVE_MAPPING; |
395 | result|=NEEDS_MOVE; |
396 | } else if(intersectBase) { |
397 | /* mapping in base but not in ext, move it */ |
398 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
399 | result|=NEEDS_MOVE; |
400 | } else { |
401 | fprintf(stderrstderr, |
402 | "ucm error: the base table contains a mapping whose input sequence\n" |
403 | " is the same as the input sequence of an extension mapping\n" |
404 | " but it maps differently\n"); |
405 | ucm_printMapping(base, mb, stderrstderr); |
406 | ucm_printMapping(ext, me, stderrstderr); |
407 | result|=HAS_ERRORS; |
408 | } |
409 | |
410 | ++mb; |
411 | } else /* cmp>0 */ { |
412 | ++me; |
413 | } |
414 | } |
415 | } |
416 | |
417 | static uint8_t |
418 | checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext, |
419 | UBool moveToExt, UBool intersectBase) { |
420 | UCMapping *mb, *me; |
421 | int32_t *baseMap, *extMap; |
422 | int32_t b, e, bLimit, eLimit, cmp; |
423 | uint8_t result; |
424 | UBool isSISO; |
425 | |
426 | baseMap=base->reverseMap; |
427 | extMap=ext->reverseMap; |
428 | |
429 | b=e=0; |
430 | bLimit=base->mappingsLength; |
431 | eLimit=ext->mappingsLength; |
432 | |
433 | result=0; |
434 | |
435 | isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO); |
436 | |
437 | for(;;) { |
438 | /* skip irrelevant mappings on both sides */ |
439 | for(;; ++b) { |
440 | if(b==bLimit) { |
441 | return result; |
442 | } |
443 | mb=base->mappings+baseMap[b]; |
444 | |
445 | if(intersectBase==2 && mb->bLen==1) { |
446 | /* |
447 | * comparing a base against a DBCS extension: |
448 | * leave SBCS base mappings alone |
449 | */ |
450 | continue; |
451 | } |
452 | |
453 | if(mb->f==0 || mb->f==3) { |
454 | break; |
455 | } |
456 | } |
457 | |
458 | for(;;) { |
459 | if(e==eLimit) { |
460 | return result; |
461 | } |
462 | me=ext->mappings+extMap[e]; |
463 | |
464 | if(me->f==0 || me->f==3) { |
465 | break; |
466 | } |
467 | |
468 | ++e; |
469 | } |
470 | |
471 | /* compare the base and extension mappings */ |
472 | cmp=compareBytes(base, mb, ext, me, TRUE1); |
473 | if(cmp<0) { |
474 | if(intersectBase) { |
475 | /* mapping in base but not in ext, move it */ |
476 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
477 | result|=NEEDS_MOVE; |
478 | |
479 | /* |
480 | * does mb map from an input sequence that is a prefix of me's? |
481 | * for SI/SO tables, a single byte is never a prefix because it |
482 | * occurs in a separate single-byte state |
483 | */ |
484 | } else if( mb->bLen<me->bLen && |
485 | (!isSISO || mb->bLen>1) && |
486 | 0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen):: memcmp((((mb)->bLen<=4) ? (mb)->b.bytes : (base)-> bytes+(mb)->b.idx), (((me)->bLen<=4) ? (me)->b.bytes : (ext)->bytes+(me)->b.idx),mb->bLen) |
487 | ) { |
488 | if(moveToExt) { |
489 | /* mark this mapping to be moved to the extension table */ |
490 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
491 | result|=NEEDS_MOVE; |
492 | } else { |
493 | fprintf(stderrstderr, |
494 | "ucm error: the base table contains a mapping whose input sequence\n" |
495 | " is a prefix of the input sequence of an extension mapping\n"); |
496 | ucm_printMapping(base, mb, stderrstderr); |
497 | ucm_printMapping(ext, me, stderrstderr); |
498 | result|=HAS_ERRORS; |
499 | } |
500 | } |
501 | |
502 | ++b; |
503 | } else if(cmp==0) { |
504 | /* |
505 | * same output: remove the extension mapping, |
506 | * otherwise treat as an error |
507 | */ |
508 | if( mb->f==me->f && mb->uLen==me->uLen && |
509 | 0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen):: memcmp((((mb)->uLen==1) ? &(mb)->u : (base)-> codePoints+(mb)->u), (((me)->uLen==1) ? &(me)->u : (ext)->codePoints+(me)->u),4*mb->uLen) |
510 | ) { |
511 | me->moveFlag|=UCM_REMOVE_MAPPING; |
512 | result|=NEEDS_MOVE; |
513 | } else if(intersectBase) { |
514 | /* mapping in base but not in ext, move it */ |
515 | mb->moveFlag|=UCM_MOVE_TO_EXT; |
516 | result|=NEEDS_MOVE; |
517 | } else { |
518 | fprintf(stderrstderr, |
519 | "ucm error: the base table contains a mapping whose input sequence\n" |
520 | " is the same as the input sequence of an extension mapping\n" |
521 | " but it maps differently\n"); |
522 | ucm_printMapping(base, mb, stderrstderr); |
523 | ucm_printMapping(ext, me, stderrstderr); |
524 | result|=HAS_ERRORS; |
525 | } |
526 | |
527 | ++b; |
528 | } else /* cmp>0 */ { |
529 | ++e; |
530 | } |
531 | } |
532 | } |
533 | |
534 | U_CAPIextern "C" UBool U_EXPORT2 |
535 | ucm_checkValidity(UCMTable *table, UCMStates *baseStates) { |
536 | UCMapping *m, *mLimit; |
537 | int32_t count; |
538 | UBool isOK; |
539 | |
540 | m=table->mappings; |
541 | mLimit=m+table->mappingsLength; |
542 | isOK=TRUE1; |
543 | |
544 | while(m<mLimit) { |
545 | count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m)(((m)->bLen<=4) ? (m)->b.bytes : (table)->bytes+( m)->b.idx), m->bLen); |
546 | if(count<1) { |
547 | ucm_printMapping(table, m, stderrstderr); |
548 | isOK=FALSE0; |
549 | } |
550 | ++m; |
551 | } |
552 | |
553 | return isOK; |
554 | } |
555 | |
556 | U_CAPIextern "C" UBool U_EXPORT2 |
557 | ucm_checkBaseExt(UCMStates *baseStates, |
558 | UCMTable *base, UCMTable *ext, UCMTable *moveTarget, |
559 | UBool intersectBase) { |
560 | uint8_t result; |
561 | |
562 | /* if we have an extension table, we must always use precision flags */ |
563 | if(base->flagsType&UCM_FLAGS_IMPLICIT) { |
564 | fprintf(stderrstderr, "ucm error: the base table contains mappings without precision flags\n"); |
565 | return FALSE0; |
566 | } |
567 | if(ext->flagsType&UCM_FLAGS_IMPLICIT) { |
568 | fprintf(stderrstderr, "ucm error: extension table contains mappings without precision flags\n"); |
569 | return FALSE0; |
570 | } |
571 | |
572 | /* checking requires both tables to be sorted */ |
573 | ucm_sortTable(base); |
574 | ucm_sortTable(ext); |
575 | |
576 | /* check */ |
577 | result= |
578 | checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=NULL__null), intersectBase)| |
579 | checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=NULL__null), intersectBase); |
580 | |
581 | if(result&HAS_ERRORS) { |
582 | return FALSE0; |
583 | } |
584 | |
585 | if(result&NEEDS_MOVE) { |
586 | ucm_moveMappings(ext, NULL__null); |
587 | ucm_moveMappings(base, moveTarget); |
588 | ucm_sortTable(base); |
589 | ucm_sortTable(ext); |
590 | if(moveTarget!=NULL__null) { |
591 | ucm_sortTable(moveTarget); |
592 | } |
593 | } |
594 | |
595 | return TRUE1; |
596 | } |
597 | |
598 | /* merge tables for rptp2ucm ------------------------------------------------ */ |
599 | |
600 | U_CAPIextern "C" void U_EXPORT2 |
601 | ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable, |
602 | const uint8_t *subchar, int32_t subcharLength, |
603 | uint8_t subchar1) { |
604 | UCMapping *fromUMapping, *toUMapping; |
605 | int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp; |
606 | |
607 | ucm_sortTable(fromUTable); |
608 | ucm_sortTable(toUTable); |
609 | |
610 | fromUMapping=fromUTable->mappings; |
611 | toUMapping=toUTable->mappings; |
612 | |
613 | fromUTop=fromUTable->mappingsLength; |
614 | toUTop=toUTable->mappingsLength; |
615 | |
616 | fromUIndex=toUIndex=0; |
617 | |
618 | while(fromUIndex<fromUTop && toUIndex<toUTop) { |
619 | cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, TRUE1); |
620 | if(cmp==0) { |
621 | /* equal: roundtrip, nothing to do (flags are initially 0) */ |
622 | ++fromUMapping; |
623 | ++toUMapping; |
624 | |
625 | ++fromUIndex; |
626 | ++toUIndex; |
627 | } else if(cmp<0) { |
628 | /* |
629 | * the fromU mapping does not have a toU counterpart: |
630 | * fallback Unicode->codepage |
631 | */ |
632 | if( (fromUMapping->bLen==subcharLength && |
633 | 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength):: memcmp((((fromUMapping)->bLen<=4) ? (fromUMapping)-> b.bytes : (fromUTable)->bytes+(fromUMapping)->b.idx), subchar ,subcharLength)) || |
634 | (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) |
635 | ) { |
636 | fromUMapping->f=2; /* SUB mapping */ |
637 | } else { |
638 | fromUMapping->f=1; /* normal fallback */ |
639 | } |
640 | |
641 | ++fromUMapping; |
642 | ++fromUIndex; |
643 | } else { |
644 | /* |
645 | * the toU mapping does not have a fromU counterpart: |
646 | * (reverse) fallback codepage->Unicode, copy it to the fromU table |
647 | */ |
648 | |
649 | /* ignore reverse fallbacks to Unicode SUB */ |
650 | if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { |
651 | toUMapping->f=3; /* reverse fallback */ |
652 | ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping)(((toUMapping)->uLen==1) ? &(toUMapping)->u : (toUTable )->codePoints+(toUMapping)->u), UCM_GET_BYTES(toUTable, toUMapping)(((toUMapping)->bLen<=4) ? (toUMapping)->b.bytes : ( toUTable)->bytes+(toUMapping)->b.idx)); |
653 | |
654 | /* the table may have been reallocated */ |
655 | fromUMapping=fromUTable->mappings+fromUIndex; |
656 | } |
657 | |
658 | ++toUMapping; |
659 | ++toUIndex; |
660 | } |
661 | } |
662 | |
663 | /* either one or both tables are exhausted */ |
664 | while(fromUIndex<fromUTop) { |
665 | /* leftover fromU mappings are fallbacks */ |
666 | if( (fromUMapping->bLen==subcharLength && |
667 | 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength):: memcmp((((fromUMapping)->bLen<=4) ? (fromUMapping)-> b.bytes : (fromUTable)->bytes+(fromUMapping)->b.idx), subchar ,subcharLength)) || |
668 | (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1) |
669 | ) { |
670 | fromUMapping->f=2; /* SUB mapping */ |
671 | } else { |
672 | fromUMapping->f=1; /* normal fallback */ |
673 | } |
674 | |
675 | ++fromUMapping; |
676 | ++fromUIndex; |
677 | } |
678 | |
679 | while(toUIndex<toUTop) { |
680 | /* leftover toU mappings are reverse fallbacks */ |
681 | |
682 | /* ignore reverse fallbacks to Unicode SUB */ |
683 | if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) { |
684 | toUMapping->f=3; /* reverse fallback */ |
685 | ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping)(((toUMapping)->uLen==1) ? &(toUMapping)->u : (toUTable )->codePoints+(toUMapping)->u), UCM_GET_BYTES(toUTable, toUMapping)(((toUMapping)->bLen<=4) ? (toUMapping)->b.bytes : ( toUTable)->bytes+(toUMapping)->b.idx)); |
686 | } |
687 | |
688 | ++toUMapping; |
689 | ++toUIndex; |
690 | } |
691 | |
692 | fromUTable->isSorted=FALSE0; |
693 | } |
694 | |
695 | /* separate extension mappings out of base table for rptp2ucm --------------- */ |
696 | |
697 | U_CAPIextern "C" UBool U_EXPORT2 |
698 | ucm_separateMappings(UCMFile *ucm, UBool isSISO) { |
699 | UCMTable *table; |
700 | UCMapping *m, *mLimit; |
701 | int32_t type; |
702 | UBool needsMove, isOK; |
703 | |
704 | table=ucm->base; |
705 | m=table->mappings; |
706 | mLimit=m+table->mappingsLength; |
707 | |
708 | needsMove=FALSE0; |
709 | isOK=TRUE1; |
710 | |
711 | for(; m<mLimit; ++m) { |
712 | if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) { |
713 | fprintf(stderrstderr, "warning: removing illegal mapping from an SI/SO-stateful table\n"); |
714 | ucm_printMapping(table, m, stderrstderr); |
715 | m->moveFlag|=UCM_REMOVE_MAPPING; |
716 | needsMove=TRUE1; |
717 | continue; |
718 | } |
719 | |
720 | type=ucm_mappingType( |
721 | &ucm->states, m, |
722 | UCM_GET_CODE_POINTS(table, m)(((m)->uLen==1) ? &(m)->u : (table)->codePoints+ (m)->u), UCM_GET_BYTES(table, m)(((m)->bLen<=4) ? (m)->b.bytes : (table)->bytes+( m)->b.idx)); |
723 | if(type<0) { |
724 | /* illegal byte sequence */ |
725 | printMapping(m, UCM_GET_CODE_POINTS(table, m)(((m)->uLen==1) ? &(m)->u : (table)->codePoints+ (m)->u), UCM_GET_BYTES(table, m)(((m)->bLen<=4) ? (m)->b.bytes : (table)->bytes+( m)->b.idx), stderrstderr); |
726 | isOK=FALSE0; |
727 | } else if(type>0) { |
728 | m->moveFlag|=UCM_MOVE_TO_EXT; |
729 | needsMove=TRUE1; |
730 | } |
731 | } |
732 | |
733 | if(!isOK) { |
734 | return FALSE0; |
735 | } |
736 | if(needsMove) { |
737 | ucm_moveMappings(ucm->base, ucm->ext); |
738 | return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE0); |
739 | } else { |
740 | ucm_sortTable(ucm->base); |
741 | return TRUE1; |
742 | } |
743 | } |
744 | |
745 | /* ucm parser --------------------------------------------------------------- */ |
746 | |
747 | U_CAPIextern "C" int8_t U_EXPORT2 |
748 | ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f], const char *line, const char **ps) { |
749 | const char *s=*ps; |
750 | char *end; |
751 | uint8_t byte; |
752 | int8_t bLen; |
753 | |
754 | bLen=0; |
755 | for(;;) { |
756 | /* skip an optional plus sign */ |
757 | if(bLen>0 && *s=='+') { |
758 | ++s; |
759 | } |
760 | if(*s!='\\') { |
761 | break; |
762 | } |
763 | |
764 | if( s[1]!='x' || |
765 | (byte=(uint8_t)uprv_strtoul(s+2, &end, 16):: strtoul(s+2, &end, 16), end)!=s+4 |
766 | ) { |
767 | fprintf(stderrstderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line); |
768 | return -1; |
769 | } |
770 | |
771 | if(bLen==UCNV_EXT_MAX_BYTES0x1f) { |
772 | fprintf(stderrstderr, "ucm error: too many bytes on \"%s\"\n", line); |
773 | return -1; |
774 | } |
775 | bytes[bLen++]=byte; |
776 | s=end; |
777 | } |
778 | |
779 | *ps=s; |
780 | return bLen; |
781 | } |
782 | |
783 | /* parse a mapping line; must not be empty */ |
784 | U_CAPIextern "C" UBool U_EXPORT2 |
785 | ucm_parseMappingLine(UCMapping *m, |
786 | UChar32 codePoints[UCNV_EXT_MAX_UCHARS19], |
787 | uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f], |
788 | const char *line) { |
789 | const char *s; |
790 | char *end; |
791 | UChar32 cp; |
792 | int32_t u16Length; |
793 | int8_t uLen, bLen, f; |
794 | |
795 | s=line; |
796 | uLen=bLen=0; |
Although the value stored to 'bLen' is used in the enclosing expression, the value is never actually read from 'bLen' | |
797 | |
798 | /* parse code points */ |
799 | for(;;) { |
800 | /* skip an optional plus sign */ |
801 | if(uLen>0 && *s=='+') { |
802 | ++s; |
803 | } |
804 | if(*s!='<') { |
805 | break; |
806 | } |
807 | |
808 | if( s[1]!='U' || |
809 | (cp=(UChar32)uprv_strtoul(s+2, &end, 16):: strtoul(s+2, &end, 16), end)==s+2 || |
810 | *end!='>' |
811 | ) { |
812 | fprintf(stderrstderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line); |
813 | return FALSE0; |
814 | } |
815 | if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)(((cp)&0xfffff800)==0xd800)) { |
816 | fprintf(stderrstderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line); |
817 | return FALSE0; |
818 | } |
819 | |
820 | if(uLen==UCNV_EXT_MAX_UCHARS19) { |
821 | fprintf(stderrstderr, "ucm error: too many code points on \"%s\"\n", line); |
822 | return FALSE0; |
823 | } |
824 | codePoints[uLen++]=cp; |
825 | s=end+1; |
826 | } |
827 | |
828 | if(uLen==0) { |
829 | fprintf(stderrstderr, "ucm error: no Unicode code points on \"%s\"\n", line); |
830 | return FALSE0; |
831 | } else if(uLen==1) { |
832 | m->u=codePoints[0]; |
833 | } else { |
834 | UErrorCode errorCode=U_ZERO_ERROR; |
835 | u_strFromUTF32u_strFromUTF32_71(NULL__null, 0, &u16Length, codePoints, uLen, &errorCode); |
836 | if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) || |
837 | u16Length>UCNV_EXT_MAX_UCHARS19 |
838 | ) { |
839 | fprintf(stderrstderr, "ucm error: too many UChars on \"%s\"\n", line); |
840 | return FALSE0; |
841 | } |
842 | } |
843 | |
844 | s=u_skipWhitespace(s); |
845 | |
846 | /* parse bytes */ |
847 | bLen=ucm_parseBytes(bytes, line, &s); |
848 | |
849 | if(bLen<0) { |
850 | return FALSE0; |
851 | } else if(bLen==0) { |
852 | fprintf(stderrstderr, "ucm error: no bytes on \"%s\"\n", line); |
853 | return FALSE0; |
854 | } else if(bLen<=4) { |
855 | uprv_memcpy(m->b.bytes, bytes, bLen)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(m->b.bytes , bytes, bLen); } while (false); |
856 | } |
857 | |
858 | /* skip everything until the fallback indicator, even the start of a comment */ |
859 | for(;;) { |
860 | if(*s==0) { |
861 | f=-1; /* no fallback indicator */ |
862 | break; |
863 | } else if(*s=='|') { |
864 | f=(int8_t)(s[1]-'0'); |
865 | if((uint8_t)f>4) { |
866 | fprintf(stderrstderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line); |
867 | return FALSE0; |
868 | } |
869 | break; |
870 | } |
871 | ++s; |
872 | } |
873 | |
874 | m->uLen=uLen; |
875 | m->bLen=bLen; |
876 | m->f=f; |
877 | return TRUE1; |
878 | } |
879 | |
880 | /* general APIs ------------------------------------------------------------- */ |
881 | |
882 | U_CAPIextern "C" UCMTable * U_EXPORT2 |
883 | ucm_openTable() { |
884 | UCMTable *table=(UCMTable *)uprv_mallocuprv_malloc_71(sizeof(UCMTable)); |
885 | if(table==NULL__null) { |
886 | fprintf(stderrstderr, "ucm error: unable to allocate a UCMTable\n"); |
887 | exit(U_MEMORY_ALLOCATION_ERROR); |
888 | } |
889 | |
890 | memset(table, 0, sizeof(UCMTable)); |
891 | return table; |
892 | } |
893 | |
894 | U_CAPIextern "C" void U_EXPORT2 |
895 | ucm_closeTable(UCMTable *table) { |
896 | if(table!=NULL__null) { |
897 | uprv_freeuprv_free_71(table->mappings); |
898 | uprv_freeuprv_free_71(table->codePoints); |
899 | uprv_freeuprv_free_71(table->bytes); |
900 | uprv_freeuprv_free_71(table->reverseMap); |
901 | uprv_freeuprv_free_71(table); |
902 | } |
903 | } |
904 | |
905 | U_CAPIextern "C" void U_EXPORT2 |
906 | ucm_resetTable(UCMTable *table) { |
907 | if(table!=NULL__null) { |
908 | table->mappingsLength=0; |
909 | table->flagsType=0; |
910 | table->unicodeMask=0; |
911 | table->bytesLength=table->codePointsLength=0; |
912 | table->isSorted=FALSE0; |
913 | } |
914 | } |
915 | |
916 | U_CAPIextern "C" void U_EXPORT2 |
917 | ucm_addMapping(UCMTable *table, |
918 | UCMapping *m, |
919 | UChar32 codePoints[UCNV_EXT_MAX_UCHARS19], |
920 | uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f]) { |
921 | UCMapping *tm; |
922 | UChar32 c; |
923 | int32_t idx; |
924 | |
925 | if(table->mappingsLength>=table->mappingsCapacity) { |
926 | /* make the mappings array larger */ |
927 | if(table->mappingsCapacity==0) { |
928 | table->mappingsCapacity=1000; |
929 | } else { |
930 | table->mappingsCapacity*=10; |
931 | } |
932 | table->mappings=(UCMapping *)uprv_reallocuprv_realloc_71(table->mappings, |
933 | table->mappingsCapacity*sizeof(UCMapping)); |
934 | if(table->mappings==NULL__null) { |
935 | fprintf(stderrstderr, "ucm error: unable to allocate %d UCMappings\n", |
936 | (int)table->mappingsCapacity); |
937 | exit(U_MEMORY_ALLOCATION_ERROR); |
938 | } |
939 | |
940 | if(table->reverseMap!=NULL__null) { |
941 | /* the reverseMap must be reallocated in a new sort */ |
942 | uprv_freeuprv_free_71(table->reverseMap); |
943 | table->reverseMap=NULL__null; |
944 | } |
945 | } |
946 | |
947 | if(m->uLen>1 && table->codePointsCapacity==0) { |
948 | table->codePointsCapacity=10000; |
949 | table->codePoints=(UChar32 *)uprv_mallocuprv_malloc_71(table->codePointsCapacity*4); |
950 | if(table->codePoints==NULL__null) { |
951 | fprintf(stderrstderr, "ucm error: unable to allocate %d UChar32s\n", |
952 | (int)table->codePointsCapacity); |
953 | exit(U_MEMORY_ALLOCATION_ERROR); |
954 | } |
955 | } |
956 | |
957 | if(m->bLen>4 && table->bytesCapacity==0) { |
958 | table->bytesCapacity=10000; |
959 | table->bytes=(uint8_t *)uprv_mallocuprv_malloc_71(table->bytesCapacity); |
960 | if(table->bytes==NULL__null) { |
961 | fprintf(stderrstderr, "ucm error: unable to allocate %d bytes\n", |
962 | (int)table->bytesCapacity); |
963 | exit(U_MEMORY_ALLOCATION_ERROR); |
964 | } |
965 | } |
966 | |
967 | if(m->uLen>1) { |
968 | idx=table->codePointsLength; |
969 | table->codePointsLength+=m->uLen; |
970 | if(table->codePointsLength>table->codePointsCapacity) { |
971 | fprintf(stderrstderr, "ucm error: too many code points in multiple-code point mappings\n"); |
972 | exit(U_MEMORY_ALLOCATION_ERROR); |
973 | } |
974 | |
975 | uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(table-> codePoints+idx, codePoints, (size_t)m->uLen*4); } while (false ); |
976 | m->u=idx; |
977 | } |
978 | |
979 | if(m->bLen>4) { |
980 | idx=table->bytesLength; |
981 | table->bytesLength+=m->bLen; |
982 | if(table->bytesLength>table->bytesCapacity) { |
983 | fprintf(stderrstderr, "ucm error: too many bytes in mappings with >4 charset bytes\n"); |
984 | exit(U_MEMORY_ALLOCATION_ERROR); |
985 | } |
986 | |
987 | uprv_memcpy(table->bytes+idx, bytes, m->bLen)do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(table-> bytes+idx, bytes, m->bLen); } while (false); |
988 | m->b.idx=idx; |
989 | } |
990 | |
991 | /* set unicodeMask */ |
992 | for(idx=0; idx<m->uLen; ++idx) { |
993 | c=codePoints[idx]; |
994 | if(c>=0x10000) { |
995 | table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY1; /* there are supplementary code points */ |
996 | } else if(U_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) { |
997 | table->unicodeMask|=UCNV_HAS_SURROGATES2; /* there are surrogate code points */ |
998 | } |
999 | } |
1000 | |
1001 | /* set flagsType */ |
1002 | if(m->f<0) { |
1003 | table->flagsType|=UCM_FLAGS_IMPLICIT; |
1004 | } else { |
1005 | table->flagsType|=UCM_FLAGS_EXPLICIT; |
1006 | } |
1007 | |
1008 | tm=table->mappings+table->mappingsLength++; |
1009 | uprv_memcpy(tm, m, sizeof(UCMapping))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(tm, m, sizeof (UCMapping)); } while (false); |
1010 | |
1011 | table->isSorted=FALSE0; |
1012 | } |
1013 | |
1014 | U_CAPIextern "C" UCMFile * U_EXPORT2 |
1015 | ucm_open() { |
1016 | UCMFile *ucm=(UCMFile *)uprv_mallocuprv_malloc_71(sizeof(UCMFile)); |
1017 | if(ucm==NULL__null) { |
1018 | fprintf(stderrstderr, "ucm error: unable to allocate a UCMFile\n"); |
1019 | exit(U_MEMORY_ALLOCATION_ERROR); |
1020 | } |
1021 | |
1022 | memset(ucm, 0, sizeof(UCMFile)); |
1023 | |
1024 | ucm->base=ucm_openTable(); |
1025 | ucm->ext=ucm_openTable(); |
1026 | |
1027 | ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT; |
1028 | ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER; |
1029 | ucm->states.outputType=-1; |
1030 | ucm->states.minCharLength=ucm->states.maxCharLength=1; |
1031 | |
1032 | return ucm; |
1033 | } |
1034 | |
1035 | U_CAPIextern "C" void U_EXPORT2 |
1036 | ucm_close(UCMFile *ucm) { |
1037 | if(ucm!=NULL__null) { |
1038 | ucm_closeTable(ucm->base); |
1039 | ucm_closeTable(ucm->ext); |
1040 | uprv_freeuprv_free_71(ucm); |
1041 | } |
1042 | } |
1043 | |
1044 | U_CAPIextern "C" int32_t U_EXPORT2 |
1045 | ucm_mappingType(UCMStates *baseStates, |
1046 | UCMapping *m, |
1047 | UChar32 codePoints[UCNV_EXT_MAX_UCHARS19], |
1048 | uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f]) { |
1049 | (void)codePoints; |
1050 | /* check validity of the bytes and count the characters in them */ |
1051 | int32_t count=ucm_countChars(baseStates, bytes, m->bLen); |
1052 | if(count<1) { |
1053 | /* illegal byte sequence */ |
1054 | return -1; |
1055 | } |
1056 | |
1057 | /* |
1058 | * Suitable for an ICU conversion base table means: |
1059 | * - a 1:1 mapping (1 Unicode code point : 1 byte sequence) |
1060 | * - precision flag 0..3 |
1061 | * - SBCS: any 1:1 mapping |
1062 | * (the table stores additional bits to distinguish mapping types) |
1063 | * - MBCS: not a |2 SUB mapping for <subchar1> |
1064 | * - MBCS: not a |1 fallback to 0x00 |
1065 | * - MBCS: not a multi-byte mapping with leading 0x00 bytes |
1066 | * |
1067 | * Further restrictions for fromUnicode tables |
1068 | * are enforced in makeconv (MBCSOkForBaseFromUnicode()). |
1069 | * |
1070 | * All of the MBCS fromUnicode specific tests could be removed from here, |
1071 | * but the ones above are for unusual mappings, and removing the tests |
1072 | * from here would change canonucm output which seems gratuitous. |
1073 | * (Markus Scherer 2006-nov-28) |
1074 | * |
1075 | * Exception: All implicit mappings (f<0) that need to be moved |
1076 | * because of fromUnicode restrictions _must_ be moved here because |
1077 | * makeconv uses a hack for moving mappings only for the fromUnicode table |
1078 | * that only works with non-negative values of f. |
1079 | */ |
1080 | if( m->uLen==1 && count==1 && m->f<=3 && |
1081 | (baseStates->maxCharLength==1 || |
1082 | !((m->f==2 && m->bLen==1) || |
1083 | (m->f==1 && bytes[0]==0) || |
1084 | (m->f<=1 && m->bLen>1 && bytes[0]==0))) |
1085 | ) { |
1086 | return 0; /* suitable for a base table */ |
1087 | } else { |
1088 | return 1; /* needs to go into an extension table */ |
1089 | } |
1090 | } |
1091 | |
1092 | U_CAPIextern "C" UBool U_EXPORT2 |
1093 | ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates, |
1094 | UCMapping *m, |
1095 | UChar32 codePoints[UCNV_EXT_MAX_UCHARS19], |
1096 | uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f]) { |
1097 | int32_t type; |
1098 | |
1099 | if(m->f==2 && m->uLen>1) { |
1100 | fprintf(stderrstderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n"); |
1101 | printMapping(m, codePoints, bytes, stderrstderr); |
1102 | return FALSE0; |
1103 | } |
1104 | |
1105 | if(baseStates!=NULL__null) { |
1106 | /* check validity of the bytes and count the characters in them */ |
1107 | type=ucm_mappingType(baseStates, m, codePoints, bytes); |
1108 | if(type<0) { |
1109 | /* illegal byte sequence */ |
1110 | printMapping(m, codePoints, bytes, stderrstderr); |
1111 | return FALSE0; |
1112 | } |
1113 | } else { |
1114 | /* not used - adding a mapping for an extension-only table before its base table is read */ |
1115 | type=1; |
1116 | } |
1117 | |
1118 | /* |
1119 | * Add the mapping to the base table if this is requested and suitable. |
1120 | * Otherwise, add it to the extension table. |
1121 | */ |
1122 | if(forBase && type==0) { |
1123 | ucm_addMapping(ucm->base, m, codePoints, bytes); |
1124 | } else { |
1125 | ucm_addMapping(ucm->ext, m, codePoints, bytes); |
1126 | } |
1127 | |
1128 | return TRUE1; |
1129 | } |
1130 | |
1131 | U_CAPIextern "C" UBool U_EXPORT2 |
1132 | ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) { |
1133 | UCMapping m={ 0, {0}, 0, 0, 0, 0 }; |
1134 | UChar32 codePoints[UCNV_EXT_MAX_UCHARS19]; |
1135 | uint8_t bytes[UCNV_EXT_MAX_BYTES0x1f]; |
1136 | |
1137 | const char *s; |
1138 | |
1139 | /* ignore empty and comment lines */ |
1140 | if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') { |
1141 | return TRUE1; |
1142 | } |
1143 | |
1144 | return |
1145 | ucm_parseMappingLine(&m, codePoints, bytes, line) && |
1146 | ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes); |
1147 | } |
1148 | |
1149 | U_CAPIextern "C" void U_EXPORT2 |
1150 | ucm_readTable(UCMFile *ucm, FileStream* convFile, |
1151 | UBool forBase, UCMStates *baseStates, |
1152 | UErrorCode *pErrorCode) { |
1153 | char line[500]; |
1154 | char *end; |
1155 | UBool isOK; |
1156 | |
1157 | if(U_FAILURE(*pErrorCode)) { |
1158 | return; |
1159 | } |
1160 | |
1161 | isOK=TRUE1; |
1162 | |
1163 | for(;;) { |
1164 | /* read the next line */ |
1165 | if(!T_FileStream_readLine(convFile, line, sizeof(line))) { |
1166 | fprintf(stderrstderr, "incomplete charmap section\n"); |
1167 | isOK=FALSE0; |
1168 | break; |
1169 | } |
1170 | |
1171 | /* remove CR LF */ |
1172 | end=uprv_strchr(line, 0):: strchr(line, 0); |
1173 | while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { |
1174 | --end; |
1175 | } |
1176 | *end=0; |
1177 | |
1178 | /* ignore empty and comment lines */ |
1179 | if(line[0]==0 || line[0]=='#') { |
1180 | continue; |
1181 | } |
1182 | |
1183 | /* stop at the end of the mapping table */ |
1184 | if(0==uprv_strcmp(line, "END CHARMAP"):: strcmp(line, "END CHARMAP")) { |
1185 | break; |
1186 | } |
1187 | |
1188 | isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates); |
1189 | } |
1190 | |
1191 | if(!isOK) { |
1192 | *pErrorCode=U_INVALID_TABLE_FORMAT; |
1193 | } |
1194 | } |
1195 | #endif |