File: | out/../deps/icu-small/source/tools/toolutil/uparse.cpp |
Warning: | line 161, column 17 Value stored to 'i' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2000-2012, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uparse.c |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2000apr18 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * This file provides a parser for files that are delimited by one single |
19 | * character like ';' or TAB. Example: the Unicode Character Properties files |
20 | * like UnicodeData.txt are semicolon-delimited. |
21 | */ |
22 | |
23 | #include "unicode/utypes.h" |
24 | #include "unicode/uchar.h" |
25 | #include "unicode/ustring.h" |
26 | #include "unicode/utf16.h" |
27 | #include "cstring.h" |
28 | #include "filestrm.h" |
29 | #include "uparse.h" |
30 | #include "ustr_imp.h" |
31 | |
32 | #include <stdio.h> |
33 | |
34 | U_CAPIextern "C" const char * U_EXPORT2 |
35 | u_skipWhitespace(const char *s) { |
36 | while(U_IS_INV_WHITESPACE(*s)((*s)==' ' || (*s)=='\t' || (*s)=='\r' || (*s)=='\n')) { |
37 | ++s; |
38 | } |
39 | return s; |
40 | } |
41 | |
42 | U_CAPIextern "C" char * U_EXPORT2 |
43 | u_rtrim(char *s) { |
44 | char *end=uprv_strchr(s, 0):: strchr(s, 0); |
45 | while(s<end && U_IS_INV_WHITESPACE(*(end-1))((*(end-1))==' ' || (*(end-1))=='\t' || (*(end-1))=='\r' || ( *(end-1))=='\n')) { |
46 | *--end = 0; |
47 | } |
48 | return end; |
49 | } |
50 | |
51 | /* |
52 | * If the string starts with # @missing: then return the pointer to the |
53 | * following non-whitespace character. |
54 | * Otherwise return the original pointer. |
55 | * Unicode 5.0 adds such lines in some data files to document |
56 | * default property values. |
57 | * Poor man's regex for variable amounts of white space. |
58 | */ |
59 | static const char * |
60 | getMissingLimit(const char *s) { |
61 | const char *s0=s; |
62 | if( |
63 | *(s=u_skipWhitespace(s))=='#' && |
64 | *(s=u_skipWhitespace(s+1))=='@' && |
65 | 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) && |
66 | *(s=u_skipWhitespace(s+7))==':' |
67 | ) { |
68 | return u_skipWhitespace(s+1); |
69 | } else { |
70 | return s0; |
71 | } |
72 | } |
73 | |
74 | U_CAPIextern "C" void U_EXPORT2 |
75 | u_parseDelimitedFile(const char *filename, char delimiter, |
76 | char *fields[][2], int32_t fieldCount, |
77 | UParseLineFn *lineFn, void *context, |
78 | UErrorCode *pErrorCode) { |
79 | FileStream *file; |
80 | char line[10000]; |
81 | char *start, *limit; |
82 | int32_t i, length; |
83 | |
84 | if(U_FAILURE(*pErrorCode)) { |
85 | return; |
86 | } |
87 | |
88 | if(fields==NULL__null || lineFn==NULL__null || fieldCount<=0) { |
89 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
90 | return; |
91 | } |
92 | |
93 | if(filename==NULL__null || *filename==0 || (*filename=='-' && filename[1]==0)) { |
94 | filename=NULL__null; |
95 | file=T_FileStream_stdin(); |
96 | } else { |
97 | file=T_FileStream_open(filename, "r"); |
98 | } |
99 | if(file==NULL__null) { |
100 | *pErrorCode=U_FILE_ACCESS_ERROR; |
101 | return; |
102 | } |
103 | |
104 | while(T_FileStream_readLine(file, line, sizeof(line))!=NULL__null) { |
105 | /* remove trailing newline characters */ |
106 | length=(int32_t)(u_rtrim(line)-line); |
107 | |
108 | /* |
109 | * detect a line with # @missing: |
110 | * start parsing after that, or else from the beginning of the line |
111 | * set the default warning for @missing lines |
112 | */ |
113 | start=(char *)getMissingLimit(line); |
114 | if(start==line) { |
115 | *pErrorCode=U_ZERO_ERROR; |
116 | } else { |
117 | *pErrorCode=U_USING_DEFAULT_WARNING; |
118 | } |
119 | |
120 | /* skip this line if it is empty or a comment */ |
121 | if(*start==0 || *start=='#') { |
122 | continue; |
123 | } |
124 | |
125 | /* remove in-line comments */ |
126 | limit=uprv_strchr(start, '#'):: strchr(start, '#'); |
127 | if(limit!=NULL__null) { |
128 | /* get white space before the pound sign */ |
129 | while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))((*(limit-1))==' ' || (*(limit-1))=='\t' || (*(limit-1))=='\r' || (*(limit-1))=='\n')) { |
130 | --limit; |
131 | } |
132 | |
133 | /* truncate the line */ |
134 | *limit=0; |
135 | } |
136 | |
137 | /* skip lines with only whitespace */ |
138 | if(u_skipWhitespace(start)[0]==0) { |
139 | continue; |
140 | } |
141 | |
142 | /* for each field, call the corresponding field function */ |
143 | for(i=0; i<fieldCount; ++i) { |
144 | /* set the limit pointer of this field */ |
145 | limit=start; |
146 | while(*limit!=delimiter && *limit!=0) { |
147 | ++limit; |
148 | } |
149 | |
150 | /* set the field start and limit in the fields array */ |
151 | fields[i][0]=start; |
152 | fields[i][1]=limit; |
153 | |
154 | /* set start to the beginning of the next field, if any */ |
155 | start=limit; |
156 | if(*start!=0) { |
157 | ++start; |
158 | } else if(i+1<fieldCount) { |
159 | *pErrorCode=U_PARSE_ERROR; |
160 | limit=line+length; |
161 | i=fieldCount; |
Value stored to 'i' is never read | |
162 | break; |
163 | } |
164 | } |
165 | |
166 | /* too few fields? */ |
167 | if(U_FAILURE(*pErrorCode)) { |
168 | break; |
169 | } |
170 | |
171 | /* call the field function */ |
172 | lineFn(context, fields, fieldCount, pErrorCode); |
173 | if(U_FAILURE(*pErrorCode)) { |
174 | break; |
175 | } |
176 | } |
177 | |
178 | if(filename!=NULL__null) { |
179 | T_FileStream_close(file); |
180 | } |
181 | } |
182 | |
183 | /* |
184 | * parse a list of code points |
185 | * store them as a UTF-32 string in dest[destCapacity] |
186 | * return the number of code points |
187 | */ |
188 | U_CAPIextern "C" int32_t U_EXPORT2 |
189 | u_parseCodePoints(const char *s, |
190 | uint32_t *dest, int32_t destCapacity, |
191 | UErrorCode *pErrorCode) { |
192 | char *end; |
193 | uint32_t value; |
194 | int32_t count; |
195 | |
196 | if(U_FAILURE(*pErrorCode)) { |
197 | return 0; |
198 | } |
199 | if(s==NULL__null || destCapacity<0 || (destCapacity>0 && dest==NULL__null)) { |
200 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
201 | return 0; |
202 | } |
203 | |
204 | count=0; |
205 | for(;;) { |
206 | s=u_skipWhitespace(s); |
207 | if(*s==';' || *s==0) { |
208 | return count; |
209 | } |
210 | |
211 | /* read one code point */ |
212 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
213 | if(end<=s || (!U_IS_INV_WHITESPACE(*end)((*end)==' ' || (*end)=='\t' || (*end)=='\r' || (*end)=='\n') && *end!=';' && *end!=0) || value>=0x110000) { |
214 | *pErrorCode=U_PARSE_ERROR; |
215 | return 0; |
216 | } |
217 | |
218 | /* append it to the destination array */ |
219 | if(count<destCapacity) { |
220 | dest[count++]=value; |
221 | } else { |
222 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
223 | } |
224 | |
225 | /* go to the following characters */ |
226 | s=end; |
227 | } |
228 | } |
229 | |
230 | /* |
231 | * parse a list of code points |
232 | * store them as a string in dest[destCapacity] |
233 | * set the first code point in *pFirst |
234 | * @return The length of the string in numbers of UChars. |
235 | */ |
236 | U_CAPIextern "C" int32_t U_EXPORT2 |
237 | u_parseString(const char *s, |
238 | UChar *dest, int32_t destCapacity, |
239 | uint32_t *pFirst, |
240 | UErrorCode *pErrorCode) { |
241 | char *end; |
242 | uint32_t value; |
243 | int32_t destLength; |
244 | |
245 | if(U_FAILURE(*pErrorCode)) { |
246 | return 0; |
247 | } |
248 | if(s==NULL__null || destCapacity<0 || (destCapacity>0 && dest==NULL__null)) { |
249 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
250 | return 0; |
251 | } |
252 | |
253 | if(pFirst!=NULL__null) { |
254 | *pFirst=0xffffffff; |
255 | } |
256 | |
257 | destLength=0; |
258 | for(;;) { |
259 | s=u_skipWhitespace(s); |
260 | if(*s==';' || *s==0) { |
261 | if(destLength<destCapacity) { |
262 | dest[destLength]=0; |
263 | } else if(destLength==destCapacity) { |
264 | *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
265 | } else { |
266 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
267 | } |
268 | return destLength; |
269 | } |
270 | |
271 | /* read one code point */ |
272 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
273 | if(end<=s || (!U_IS_INV_WHITESPACE(*end)((*end)==' ' || (*end)=='\t' || (*end)=='\r' || (*end)=='\n') && *end!=';' && *end!=0) || value>=0x110000) { |
274 | *pErrorCode=U_PARSE_ERROR; |
275 | return 0; |
276 | } |
277 | |
278 | /* store the first code point */ |
279 | if(pFirst!=NULL__null) { |
280 | *pFirst=value; |
281 | pFirst=NULL__null; |
282 | } |
283 | |
284 | /* append it to the destination array */ |
285 | if((destLength+U16_LENGTH(value)((uint32_t)(value)<=0xffff ? 1 : 2))<=destCapacity) { |
286 | U16_APPEND_UNSAFE(dest, destLength, value)do { if((uint32_t)(value)<=0xffff) { (dest)[(destLength)++ ]=(uint16_t)(value); } else { (dest)[(destLength)++]=(uint16_t )(((value)>>10)+0xd7c0); (dest)[(destLength)++]=(uint16_t )(((value)&0x3ff)|0xdc00); } } while (false); |
287 | } else { |
288 | destLength+=U16_LENGTH(value)((uint32_t)(value)<=0xffff ? 1 : 2); |
289 | } |
290 | |
291 | /* go to the following characters */ |
292 | s=end; |
293 | } |
294 | } |
295 | |
296 | /* read a range like start or start..end */ |
297 | U_CAPIextern "C" int32_t U_EXPORT2 |
298 | u_parseCodePointRangeAnyTerminator(const char *s, |
299 | uint32_t *pStart, uint32_t *pEnd, |
300 | const char **terminator, |
301 | UErrorCode *pErrorCode) { |
302 | char *end; |
303 | uint32_t value; |
304 | |
305 | if(U_FAILURE(*pErrorCode)) { |
306 | return 0; |
307 | } |
308 | if(s==NULL__null || pStart==NULL__null || pEnd==NULL__null) { |
309 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
310 | return 0; |
311 | } |
312 | |
313 | /* read the start code point */ |
314 | s=u_skipWhitespace(s); |
315 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
316 | if(end<=s || value>=0x110000) { |
317 | *pErrorCode=U_PARSE_ERROR; |
318 | return 0; |
319 | } |
320 | *pStart=*pEnd=value; |
321 | |
322 | /* is there a "..end"? */ |
323 | s=u_skipWhitespace(end); |
324 | if(*s!='.' || s[1]!='.') { |
325 | *terminator=end; |
326 | return 1; |
327 | } |
328 | s=u_skipWhitespace(s+2); |
329 | |
330 | /* read the end code point */ |
331 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
332 | if(end<=s || value>=0x110000) { |
333 | *pErrorCode=U_PARSE_ERROR; |
334 | return 0; |
335 | } |
336 | *pEnd=value; |
337 | |
338 | /* is this a valid range? */ |
339 | if(value<*pStart) { |
340 | *pErrorCode=U_PARSE_ERROR; |
341 | return 0; |
342 | } |
343 | |
344 | *terminator=end; |
345 | return value-*pStart+1; |
346 | } |
347 | |
348 | U_CAPIextern "C" int32_t U_EXPORT2 |
349 | u_parseCodePointRange(const char *s, |
350 | uint32_t *pStart, uint32_t *pEnd, |
351 | UErrorCode *pErrorCode) { |
352 | const char *terminator; |
353 | int32_t rangeLength= |
354 | u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); |
355 | if(U_SUCCESS(*pErrorCode)) { |
356 | terminator=u_skipWhitespace(terminator); |
357 | if(*terminator!=';' && *terminator!=0) { |
358 | *pErrorCode=U_PARSE_ERROR; |
359 | return 0; |
360 | } |
361 | } |
362 | return rangeLength; |
363 | } |
364 | |
365 | U_CAPIextern "C" int32_t U_EXPORT2 |
366 | u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { |
367 | const char *read = source; |
368 | int32_t i = 0; |
369 | unsigned int value = 0; |
370 | if(sLen == -1) { |
371 | sLen = (int32_t)strlen(source); |
372 | } |
373 | |
374 | while(read < source+sLen) { |
375 | sscanf(read, "%2x", &value); |
376 | if(i < destCapacity) { |
377 | dest[i] = (char)value; |
378 | } |
379 | i++; |
380 | read += 2; |
381 | } |
382 | return u_terminateCharsu_terminateChars_71(dest, destCapacity, i, status); |
383 | } |