| File: | out/../deps/icu-small/source/tools/toolutil/uparse.cpp |
| Warning: | line 160, column 17 Value stored to 'limit' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * |
| 6 | * Copyright (C) 2000-2012, International Business Machines |
| 7 | * Corporation and others. All Rights Reserved. |
| 8 | * |
| 9 | ******************************************************************************* |
| 10 | * file name: uparse.c |
| 11 | * encoding: UTF-8 |
| 12 | * tab size: 8 (not used) |
| 13 | * indentation:4 |
| 14 | * |
| 15 | * created on: 2000apr18 |
| 16 | * created by: Markus W. Scherer |
| 17 | * |
| 18 | * This file provides a parser for files that are delimited by one single |
| 19 | * character like ';' or TAB. Example: the Unicode Character Properties files |
| 20 | * like UnicodeData.txt are semicolon-delimited. |
| 21 | */ |
| 22 | |
| 23 | #include "unicode/utypes.h" |
| 24 | #include "unicode/uchar.h" |
| 25 | #include "unicode/ustring.h" |
| 26 | #include "unicode/utf16.h" |
| 27 | #include "cstring.h" |
| 28 | #include "filestrm.h" |
| 29 | #include "uparse.h" |
| 30 | #include "ustr_imp.h" |
| 31 | |
| 32 | #include <stdio.h> |
| 33 | |
| 34 | U_CAPIextern "C" const char * U_EXPORT2 |
| 35 | u_skipWhitespace(const char *s) { |
| 36 | while(U_IS_INV_WHITESPACE(*s)((*s)==' ' || (*s)=='\t' || (*s)=='\r' || (*s)=='\n')) { |
| 37 | ++s; |
| 38 | } |
| 39 | return s; |
| 40 | } |
| 41 | |
| 42 | U_CAPIextern "C" char * U_EXPORT2 |
| 43 | u_rtrim(char *s) { |
| 44 | char *end=uprv_strchr(s, 0):: strchr(s, 0); |
| 45 | while(s<end && U_IS_INV_WHITESPACE(*(end-1))((*(end-1))==' ' || (*(end-1))=='\t' || (*(end-1))=='\r' || ( *(end-1))=='\n')) { |
| 46 | *--end = 0; |
| 47 | } |
| 48 | return end; |
| 49 | } |
| 50 | |
| 51 | /* |
| 52 | * If the string starts with # @missing: then return the pointer to the |
| 53 | * following non-whitespace character. |
| 54 | * Otherwise return the original pointer. |
| 55 | * Unicode 5.0 adds such lines in some data files to document |
| 56 | * default property values. |
| 57 | * Poor man's regex for variable amounts of white space. |
| 58 | */ |
| 59 | static const char * |
| 60 | getMissingLimit(const char *s) { |
| 61 | const char *s0=s; |
| 62 | if( |
| 63 | *(s=u_skipWhitespace(s))=='#' && |
| 64 | *(s=u_skipWhitespace(s+1))=='@' && |
| 65 | 0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) && |
| 66 | *(s=u_skipWhitespace(s+7))==':' |
| 67 | ) { |
| 68 | return u_skipWhitespace(s+1); |
| 69 | } else { |
| 70 | return s0; |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | U_CAPIextern "C" void U_EXPORT2 |
| 75 | u_parseDelimitedFile(const char *filename, char delimiter, |
| 76 | char *fields[][2], int32_t fieldCount, |
| 77 | UParseLineFn *lineFn, void *context, |
| 78 | UErrorCode *pErrorCode) { |
| 79 | FileStream *file; |
| 80 | char line[10000]; |
| 81 | char *start, *limit; |
| 82 | int32_t i, length; |
| 83 | |
| 84 | if(U_FAILURE(*pErrorCode)) { |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | if(fields==NULL__null || lineFn==NULL__null || fieldCount<=0) { |
| 89 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 90 | return; |
| 91 | } |
| 92 | |
| 93 | if(filename==NULL__null || *filename==0 || (*filename=='-' && filename[1]==0)) { |
| 94 | filename=NULL__null; |
| 95 | file=T_FileStream_stdin(); |
| 96 | } else { |
| 97 | file=T_FileStream_open(filename, "r"); |
| 98 | } |
| 99 | if(file==NULL__null) { |
| 100 | *pErrorCode=U_FILE_ACCESS_ERROR; |
| 101 | return; |
| 102 | } |
| 103 | |
| 104 | while(T_FileStream_readLine(file, line, sizeof(line))!=NULL__null) { |
| 105 | /* remove trailing newline characters */ |
| 106 | length=(int32_t)(u_rtrim(line)-line); |
| 107 | |
| 108 | /* |
| 109 | * detect a line with # @missing: |
| 110 | * start parsing after that, or else from the beginning of the line |
| 111 | * set the default warning for @missing lines |
| 112 | */ |
| 113 | start=(char *)getMissingLimit(line); |
| 114 | if(start==line) { |
| 115 | *pErrorCode=U_ZERO_ERROR; |
| 116 | } else { |
| 117 | *pErrorCode=U_USING_DEFAULT_WARNING; |
| 118 | } |
| 119 | |
| 120 | /* skip this line if it is empty or a comment */ |
| 121 | if(*start==0 || *start=='#') { |
| 122 | continue; |
| 123 | } |
| 124 | |
| 125 | /* remove in-line comments */ |
| 126 | limit=uprv_strchr(start, '#'):: strchr(start, '#'); |
| 127 | if(limit!=NULL__null) { |
| 128 | /* get white space before the pound sign */ |
| 129 | while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))((*(limit-1))==' ' || (*(limit-1))=='\t' || (*(limit-1))=='\r' || (*(limit-1))=='\n')) { |
| 130 | --limit; |
| 131 | } |
| 132 | |
| 133 | /* truncate the line */ |
| 134 | *limit=0; |
| 135 | } |
| 136 | |
| 137 | /* skip lines with only whitespace */ |
| 138 | if(u_skipWhitespace(start)[0]==0) { |
| 139 | continue; |
| 140 | } |
| 141 | |
| 142 | /* for each field, call the corresponding field function */ |
| 143 | for(i=0; i<fieldCount; ++i) { |
| 144 | /* set the limit pointer of this field */ |
| 145 | limit=start; |
| 146 | while(*limit!=delimiter && *limit!=0) { |
| 147 | ++limit; |
| 148 | } |
| 149 | |
| 150 | /* set the field start and limit in the fields array */ |
| 151 | fields[i][0]=start; |
| 152 | fields[i][1]=limit; |
| 153 | |
| 154 | /* set start to the beginning of the next field, if any */ |
| 155 | start=limit; |
| 156 | if(*start!=0) { |
| 157 | ++start; |
| 158 | } else if(i+1<fieldCount) { |
| 159 | *pErrorCode=U_PARSE_ERROR; |
| 160 | limit=line+length; |
Value stored to 'limit' is never read | |
| 161 | i=fieldCount; |
| 162 | break; |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /* too few fields? */ |
| 167 | if(U_FAILURE(*pErrorCode)) { |
| 168 | break; |
| 169 | } |
| 170 | |
| 171 | /* call the field function */ |
| 172 | lineFn(context, fields, fieldCount, pErrorCode); |
| 173 | if(U_FAILURE(*pErrorCode)) { |
| 174 | break; |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | if(filename!=NULL__null) { |
| 179 | T_FileStream_close(file); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | /* |
| 184 | * parse a list of code points |
| 185 | * store them as a UTF-32 string in dest[destCapacity] |
| 186 | * return the number of code points |
| 187 | */ |
| 188 | U_CAPIextern "C" int32_t U_EXPORT2 |
| 189 | u_parseCodePoints(const char *s, |
| 190 | uint32_t *dest, int32_t destCapacity, |
| 191 | UErrorCode *pErrorCode) { |
| 192 | char *end; |
| 193 | uint32_t value; |
| 194 | int32_t count; |
| 195 | |
| 196 | if(U_FAILURE(*pErrorCode)) { |
| 197 | return 0; |
| 198 | } |
| 199 | if(s==NULL__null || destCapacity<0 || (destCapacity>0 && dest==NULL__null)) { |
| 200 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 201 | return 0; |
| 202 | } |
| 203 | |
| 204 | count=0; |
| 205 | for(;;) { |
| 206 | s=u_skipWhitespace(s); |
| 207 | if(*s==';' || *s==0) { |
| 208 | return count; |
| 209 | } |
| 210 | |
| 211 | /* read one code point */ |
| 212 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
| 213 | if(end<=s || (!U_IS_INV_WHITESPACE(*end)((*end)==' ' || (*end)=='\t' || (*end)=='\r' || (*end)=='\n') && *end!=';' && *end!=0) || value>=0x110000) { |
| 214 | *pErrorCode=U_PARSE_ERROR; |
| 215 | return 0; |
| 216 | } |
| 217 | |
| 218 | /* append it to the destination array */ |
| 219 | if(count<destCapacity) { |
| 220 | dest[count++]=value; |
| 221 | } else { |
| 222 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| 223 | } |
| 224 | |
| 225 | /* go to the following characters */ |
| 226 | s=end; |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | /* |
| 231 | * parse a list of code points |
| 232 | * store them as a string in dest[destCapacity] |
| 233 | * set the first code point in *pFirst |
| 234 | * @return The length of the string in numbers of UChars. |
| 235 | */ |
| 236 | U_CAPIextern "C" int32_t U_EXPORT2 |
| 237 | u_parseString(const char *s, |
| 238 | UChar *dest, int32_t destCapacity, |
| 239 | uint32_t *pFirst, |
| 240 | UErrorCode *pErrorCode) { |
| 241 | char *end; |
| 242 | uint32_t value; |
| 243 | int32_t destLength; |
| 244 | |
| 245 | if(U_FAILURE(*pErrorCode)) { |
| 246 | return 0; |
| 247 | } |
| 248 | if(s==NULL__null || destCapacity<0 || (destCapacity>0 && dest==NULL__null)) { |
| 249 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 250 | return 0; |
| 251 | } |
| 252 | |
| 253 | if(pFirst!=NULL__null) { |
| 254 | *pFirst=0xffffffff; |
| 255 | } |
| 256 | |
| 257 | destLength=0; |
| 258 | for(;;) { |
| 259 | s=u_skipWhitespace(s); |
| 260 | if(*s==';' || *s==0) { |
| 261 | if(destLength<destCapacity) { |
| 262 | dest[destLength]=0; |
| 263 | } else if(destLength==destCapacity) { |
| 264 | *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
| 265 | } else { |
| 266 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; |
| 267 | } |
| 268 | return destLength; |
| 269 | } |
| 270 | |
| 271 | /* read one code point */ |
| 272 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
| 273 | if(end<=s || (!U_IS_INV_WHITESPACE(*end)((*end)==' ' || (*end)=='\t' || (*end)=='\r' || (*end)=='\n') && *end!=';' && *end!=0) || value>=0x110000) { |
| 274 | *pErrorCode=U_PARSE_ERROR; |
| 275 | return 0; |
| 276 | } |
| 277 | |
| 278 | /* store the first code point */ |
| 279 | if(pFirst!=NULL__null) { |
| 280 | *pFirst=value; |
| 281 | pFirst=NULL__null; |
| 282 | } |
| 283 | |
| 284 | /* append it to the destination array */ |
| 285 | if((destLength+U16_LENGTH(value)((uint32_t)(value)<=0xffff ? 1 : 2))<=destCapacity) { |
| 286 | U16_APPEND_UNSAFE(dest, destLength, value)do { if((uint32_t)(value)<=0xffff) { (dest)[(destLength)++ ]=(uint16_t)(value); } else { (dest)[(destLength)++]=(uint16_t )(((value)>>10)+0xd7c0); (dest)[(destLength)++]=(uint16_t )(((value)&0x3ff)|0xdc00); } } while (false); |
| 287 | } else { |
| 288 | destLength+=U16_LENGTH(value)((uint32_t)(value)<=0xffff ? 1 : 2); |
| 289 | } |
| 290 | |
| 291 | /* go to the following characters */ |
| 292 | s=end; |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | /* read a range like start or start..end */ |
| 297 | U_CAPIextern "C" int32_t U_EXPORT2 |
| 298 | u_parseCodePointRangeAnyTerminator(const char *s, |
| 299 | uint32_t *pStart, uint32_t *pEnd, |
| 300 | const char **terminator, |
| 301 | UErrorCode *pErrorCode) { |
| 302 | char *end; |
| 303 | uint32_t value; |
| 304 | |
| 305 | if(U_FAILURE(*pErrorCode)) { |
| 306 | return 0; |
| 307 | } |
| 308 | if(s==NULL__null || pStart==NULL__null || pEnd==NULL__null) { |
| 309 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 310 | return 0; |
| 311 | } |
| 312 | |
| 313 | /* read the start code point */ |
| 314 | s=u_skipWhitespace(s); |
| 315 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
| 316 | if(end<=s || value>=0x110000) { |
| 317 | *pErrorCode=U_PARSE_ERROR; |
| 318 | return 0; |
| 319 | } |
| 320 | *pStart=*pEnd=value; |
| 321 | |
| 322 | /* is there a "..end"? */ |
| 323 | s=u_skipWhitespace(end); |
| 324 | if(*s!='.' || s[1]!='.') { |
| 325 | *terminator=end; |
| 326 | return 1; |
| 327 | } |
| 328 | s=u_skipWhitespace(s+2); |
| 329 | |
| 330 | /* read the end code point */ |
| 331 | value=(uint32_t)uprv_strtoul(s, &end, 16):: strtoul(s, &end, 16); |
| 332 | if(end<=s || value>=0x110000) { |
| 333 | *pErrorCode=U_PARSE_ERROR; |
| 334 | return 0; |
| 335 | } |
| 336 | *pEnd=value; |
| 337 | |
| 338 | /* is this a valid range? */ |
| 339 | if(value<*pStart) { |
| 340 | *pErrorCode=U_PARSE_ERROR; |
| 341 | return 0; |
| 342 | } |
| 343 | |
| 344 | *terminator=end; |
| 345 | return value-*pStart+1; |
| 346 | } |
| 347 | |
| 348 | U_CAPIextern "C" int32_t U_EXPORT2 |
| 349 | u_parseCodePointRange(const char *s, |
| 350 | uint32_t *pStart, uint32_t *pEnd, |
| 351 | UErrorCode *pErrorCode) { |
| 352 | const char *terminator; |
| 353 | int32_t rangeLength= |
| 354 | u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); |
| 355 | if(U_SUCCESS(*pErrorCode)) { |
| 356 | terminator=u_skipWhitespace(terminator); |
| 357 | if(*terminator!=';' && *terminator!=0) { |
| 358 | *pErrorCode=U_PARSE_ERROR; |
| 359 | return 0; |
| 360 | } |
| 361 | } |
| 362 | return rangeLength; |
| 363 | } |
| 364 | |
| 365 | U_CAPIextern "C" int32_t U_EXPORT2 |
| 366 | u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { |
| 367 | const char *read = source; |
| 368 | int32_t i = 0; |
| 369 | unsigned int value = 0; |
| 370 | if(sLen == -1) { |
| 371 | sLen = (int32_t)strlen(source); |
| 372 | } |
| 373 | |
| 374 | while(read < source+sLen) { |
| 375 | sscanf(read, "%2x", &value); |
| 376 | if(i < destCapacity) { |
| 377 | dest[i] = (char)value; |
| 378 | } |
| 379 | i++; |
| 380 | read += 2; |
| 381 | } |
| 382 | return u_terminateCharsu_terminateChars_71(dest, destCapacity, i, status); |
| 383 | } |