File: | out/../deps/icu-small/source/tools/toolutil/xmlparser.cpp |
Warning: | line 724, column 13 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. | |||
2 | // License & terms of use: http://www.unicode.org/copyright.html | |||
3 | /* | |||
4 | ******************************************************************************* | |||
5 | * | |||
6 | * Copyright (C) 2004-2010, International Business Machines | |||
7 | * Corporation and others. All Rights Reserved. | |||
8 | * | |||
9 | ******************************************************************************* | |||
10 | * file name: xmlparser.cpp | |||
11 | * encoding: UTF-8 | |||
12 | * tab size: 8 (not used) | |||
13 | * indentation:4 | |||
14 | * | |||
15 | * created on: 2004jul21 | |||
16 | * created by: Andy Heninger | |||
17 | */ | |||
18 | ||||
19 | #include <stdio.h> | |||
20 | #include "unicode/uchar.h" | |||
21 | #include "unicode/ucnv.h" | |||
22 | #include "unicode/regex.h" | |||
23 | #include "filestrm.h" | |||
24 | #include "xmlparser.h" | |||
25 | ||||
26 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS0 && !UCONFIG_NO_CONVERSION0 | |||
27 | ||||
28 | // character constants | |||
29 | enum { | |||
30 | x_QUOT=0x22, | |||
31 | x_AMP=0x26, | |||
32 | x_APOS=0x27, | |||
33 | x_LT=0x3c, | |||
34 | x_GT=0x3e, | |||
35 | x_l=0x6c | |||
36 | }; | |||
37 | ||||
38 | #define XML_SPACES"[ \\u0009\\u000d\\u000a]" "[ \\u0009\\u000d\\u000a]" | |||
39 | ||||
40 | // XML #4 | |||
41 | #define XML_NAMESTARTCHAR"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" \ | |||
42 | "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" \ | |||
43 | "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" \ | |||
44 | "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" | |||
45 | ||||
46 | // XML #5 | |||
47 | #define XML_NAMECHAR"[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" "[" XML_NAMESTARTCHAR"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" | |||
48 | ||||
49 | // XML #6 | |||
50 | #define XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" XML_NAMESTARTCHAR"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" XML_NAMECHAR"[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" | |||
51 | ||||
52 | U_NAMESPACE_BEGINnamespace icu_71 { | |||
53 | ||||
54 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLParser)UClassID UXMLParser::getStaticClassID() { static char classID = 0; return (UClassID)&classID; } UClassID UXMLParser::getDynamicClassID () const { return UXMLParser::getStaticClassID(); } | |||
55 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLElement)UClassID UXMLElement::getStaticClassID() { static char classID = 0; return (UClassID)&classID; } UClassID UXMLElement:: getDynamicClassID() const { return UXMLElement::getStaticClassID (); } | |||
56 | ||||
57 | // | |||
58 | // UXMLParser constructor. Mostly just initializes the ICU regexes that are | |||
59 | // used for parsing. | |||
60 | // | |||
61 | UXMLParser::UXMLParser(UErrorCode &status) : | |||
62 | // XML Declaration. XML Production #23. | |||
63 | // example: "<?xml version=1.0 encoding="utf-16" ?> | |||
64 | // This is a sloppy implementation - just look for the leading <?xml and the closing ?> | |||
65 | // allow for a possible leading BOM. | |||
66 | mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
67 | ||||
68 | // XML Comment production #15 | |||
69 | // example: "<!-- whatever --> | |||
70 | // note, does not detect an illegal "--" within comments | |||
71 | mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
72 | ||||
73 | // XML Spaces | |||
74 | // production [3] | |||
75 | mXMLSP(UnicodeString(XML_SPACES"[ \\u0009\\u000d\\u000a]" "+", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
76 | ||||
77 | // XML Doctype decl production #28 | |||
78 | // example "<!DOCTYPE foo SYSTEM "somewhere" > | |||
79 | // or "<!DOCTYPE foo [internal dtd]> | |||
80 | // TODO: we don't actually parse the DOCTYPE or internal subsets. | |||
81 | // Some internal dtd subsets could confuse this simple-minded | |||
82 | // attempt at skipping over them, specifically, occurrences | |||
83 | // of closing square brackets. These could appear in comments, | |||
84 | // or in parameter entity declarations, for example. | |||
85 | mXMLDoctype(UnicodeString( | |||
86 | "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INVicu::UnicodeString::kInvariant | |||
87 | ), 0, status), | |||
88 | ||||
89 | // XML PI production #16 | |||
90 | // example "<?target stuff?> | |||
91 | mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
92 | ||||
93 | // XML Element Start Productions #40, #41 | |||
94 | // example <foo att1='abc' att2="d e f" > | |||
95 | // capture #1: the tag name | |||
96 | // | |||
97 | mXMLElemStart (UnicodeString("(?s)<(" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" ")" // match "<tag_name" | |||
98 | "(?:" | |||
99 | XML_SPACES"[ \\u0009\\u000d\\u000a]" "+" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*=" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*" // match "ATTR_NAME = " | |||
100 | "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' | |||
101 | ")*" // * for zero or more attributes. | |||
102 | XML_SPACES"[ \\u0009\\u000d\\u000a]" "*?>", -1, US_INVicu::UnicodeString::kInvariant), 0, status), // match " >" | |||
103 | ||||
104 | // XML Element End production #42 | |||
105 | // example </foo> | |||
106 | mXMLElemEnd (UnicodeString("</(" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" ")" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*>", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
107 | ||||
108 | // XML Element Empty production #44 | |||
109 | // example <foo att1="abc" att2="d e f" /> | |||
110 | mXMLElemEmpty (UnicodeString("(?s)<(" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" ")" // match "<tag_name" | |||
111 | "(?:" | |||
112 | XML_SPACES"[ \\u0009\\u000d\\u000a]" "+" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*=" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*" // match "ATTR_NAME = " | |||
113 | "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"' | |||
114 | ")*" // * for zero or more attributes. | |||
115 | XML_SPACES"[ \\u0009\\u000d\\u000a]" "*?/>", -1, US_INVicu::UnicodeString::kInvariant), 0, status), // match " />" | |||
116 | ||||
117 | ||||
118 | // XMLCharData. Everything but '<'. Note that & will be dealt with later. | |||
119 | mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
120 | ||||
121 | // Attribute name = "value". XML Productions 10, 40/41 | |||
122 | // Capture group 1 is name, | |||
123 | // 2 is the attribute value, including the quotes. | |||
124 | // | |||
125 | // Note that attributes are scanned twice. The first time is with | |||
126 | // the regex for an entire element start. There, the attributes | |||
127 | // are checked syntactically, but not separated out one by one. | |||
128 | // Here, we match a single attribute, and make its name and | |||
129 | // attribute value available to the parser code. | |||
130 | mAttrValue(UnicodeString(XML_SPACES"[ \\u0009\\u000d\\u000a]" "+(" XML_NAME"[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "(?:" "[" "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]" "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]" ")*" ")" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*=" XML_SPACES"[ \\u0009\\u000d\\u000a]" "*" | |||
131 | "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
132 | ||||
133 | ||||
134 | mAttrNormalizer(UnicodeString(XML_SPACES"[ \\u0009\\u000d\\u000a]", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
135 | ||||
136 | // Match any of the new-line sequences in content. | |||
137 | // All are changed to \u000a. | |||
138 | mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INVicu::UnicodeString::kInvariant), 0, status), | |||
139 | ||||
140 | // & char references | |||
141 | // We will figure out what we've got based on which capture group has content. | |||
142 | // The last one is a catchall for unrecognized entity references.. | |||
143 | // 1 2 3 4 5 6 7 8 | |||
144 | mAmps(UnicodeString("&(?:(amp;)|(lt;)|(gt;)|(apos;)|(quot;)|#x([0-9A-Fa-f]{1,8});|#([0-9]{1,8});|(.))"), | |||
145 | 0, status), | |||
146 | ||||
147 | fNames(status), | |||
148 | fElementStack(status), | |||
149 | fOneLF((UChar)0x0a) // Plain new-line string, used in new line normalization. | |||
150 | { | |||
151 | } | |||
152 | ||||
153 | UXMLParser * | |||
154 | UXMLParser::createParser(UErrorCode &errorCode) { | |||
155 | if (U_FAILURE(errorCode)) { | |||
156 | return NULL__null; | |||
157 | } else { | |||
158 | return new UXMLParser(errorCode); | |||
159 | } | |||
160 | } | |||
161 | ||||
162 | UXMLParser::~UXMLParser() {} | |||
163 | ||||
164 | UXMLElement * | |||
165 | UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) { | |||
166 | char bytes[4096], charsetBuffer[100]; | |||
167 | FileStream *f; | |||
168 | const char *charset, *pb; | |||
169 | UnicodeString src; | |||
170 | UConverter *cnv; | |||
171 | UChar *buffer, *pu; | |||
172 | int32_t fileLength, bytesLength, length, capacity; | |||
173 | UBool flush; | |||
174 | ||||
175 | if(U_FAILURE(errorCode)) { | |||
176 | return NULL__null; | |||
177 | } | |||
178 | ||||
179 | f=T_FileStream_open(filename, "rb"); | |||
180 | if(f==NULL__null) { | |||
181 | errorCode=U_FILE_ACCESS_ERROR; | |||
182 | return NULL__null; | |||
183 | } | |||
184 | ||||
185 | bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); | |||
186 | if(bytesLength<(int32_t)sizeof(bytes)) { | |||
187 | // we have already read the entire file | |||
188 | fileLength=bytesLength; | |||
189 | } else { | |||
190 | // get the file length | |||
191 | fileLength=T_FileStream_size(f); | |||
192 | } | |||
193 | ||||
194 | /* | |||
195 | * get the charset: | |||
196 | * 1. Unicode signature | |||
197 | * 2. treat as ISO-8859-1 and read XML encoding="charser" | |||
198 | * 3. default to UTF-8 | |||
199 | */ | |||
200 | charset=ucnv_detectUnicodeSignatureucnv_detectUnicodeSignature_71(bytes, bytesLength, NULL__null, &errorCode); | |||
201 | if(U_SUCCESS(errorCode) && charset!=NULL__null) { | |||
202 | // open converter according to Unicode signature | |||
203 | cnv=ucnv_openucnv_open_71(charset, &errorCode); | |||
204 | } else { | |||
205 | // read as Latin-1 and parse the XML declaration and encoding | |||
206 | cnv=ucnv_openucnv_open_71("ISO-8859-1", &errorCode); | |||
207 | if(U_FAILURE(errorCode)) { | |||
208 | // unexpected error opening Latin-1 converter | |||
209 | goto exit; | |||
210 | } | |||
211 | ||||
212 | buffer=toUCharPtr(src.getBuffer(bytesLength)); | |||
213 | if(buffer==NULL__null) { | |||
214 | // unexpected failure to reserve some string capacity | |||
215 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |||
216 | goto exit; | |||
217 | } | |||
218 | pb=bytes; | |||
219 | pu=buffer; | |||
220 | ucnv_toUnicodeucnv_toUnicode_71( | |||
221 | cnv, | |||
222 | &pu, buffer+src.getCapacity(), | |||
223 | &pb, bytes+bytesLength, | |||
224 | NULL__null, TRUE1, &errorCode); | |||
225 | src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); | |||
226 | ucnv_closeucnv_close_71(cnv); | |||
227 | cnv=NULL__null; | |||
228 | if(U_FAILURE(errorCode)) { | |||
229 | // unexpected error in conversion from Latin-1 | |||
230 | src.remove(); | |||
231 | goto exit; | |||
232 | } | |||
233 | ||||
234 | // parse XML declaration | |||
235 | if(mXMLDecl.reset(src).lookingAt(0, errorCode)) { | |||
236 | int32_t declEnd=mXMLDecl.end(errorCode); | |||
237 | // go beyond <?xml | |||
238 | int32_t pos=src.indexOf((UChar)x_l)+1; | |||
239 | ||||
240 | mAttrValue.reset(src); | |||
241 | while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) { // loop runs once per attribute on this element. | |||
242 | UnicodeString attName = mAttrValue.group(1, errorCode); | |||
243 | UnicodeString attValue = mAttrValue.group(2, errorCode); | |||
244 | ||||
245 | // Trim the quotes from the att value. These are left over from the original regex | |||
246 | // that parsed the attribute, which couldn't conveniently strip them. | |||
247 | attValue.remove(0,1); // one char from the beginning | |||
248 | attValue.truncate(attValue.length()-1); // and one from the end. | |||
249 | ||||
250 | if(attName==UNICODE_STRING("encoding", 8)icu::UnicodeString(true, u"encoding", 8)) { | |||
251 | length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer)); | |||
252 | charset=charsetBuffer; | |||
253 | break; | |||
254 | } | |||
255 | pos = mAttrValue.end(2, errorCode); | |||
256 | } | |||
257 | ||||
258 | if(charset==NULL__null) { | |||
259 | // default to UTF-8 | |||
260 | charset="UTF-8"; | |||
261 | } | |||
262 | cnv=ucnv_openucnv_open_71(charset, &errorCode); | |||
263 | } | |||
264 | } | |||
265 | ||||
266 | if(U_FAILURE(errorCode)) { | |||
267 | // unable to open the converter | |||
268 | goto exit; | |||
269 | } | |||
270 | ||||
271 | // convert the file contents | |||
272 | capacity=fileLength; // estimated capacity | |||
273 | src.getBuffer(capacity); | |||
274 | src.releaseBuffer(0); // zero length | |||
275 | flush=FALSE0; | |||
276 | for(;;) { | |||
277 | // convert contents of bytes[bytesLength] | |||
278 | pb=bytes; | |||
279 | for(;;) { | |||
280 | length=src.length(); | |||
281 | buffer=toUCharPtr(src.getBuffer(capacity)); | |||
282 | if(buffer==NULL__null) { | |||
283 | // unexpected failure to reserve some string capacity | |||
284 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |||
285 | goto exit; | |||
286 | } | |||
287 | ||||
288 | pu=buffer+length; | |||
289 | ucnv_toUnicodeucnv_toUnicode_71( | |||
290 | cnv, &pu, buffer+src.getCapacity(), | |||
291 | &pb, bytes+bytesLength, | |||
292 | NULL__null, FALSE0, &errorCode); | |||
293 | src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0); | |||
294 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | |||
295 | errorCode=U_ZERO_ERROR; | |||
296 | capacity=(3*src.getCapacity())/2; // increase capacity by 50% | |||
297 | } else { | |||
298 | break; | |||
299 | } | |||
300 | } | |||
301 | ||||
302 | if(U_FAILURE(errorCode)) { | |||
303 | break; // conversion error | |||
304 | } | |||
305 | ||||
306 | if(flush) { | |||
307 | break; // completely converted the file | |||
308 | } | |||
309 | ||||
310 | // read next block | |||
311 | bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes)); | |||
312 | if(bytesLength==0) { | |||
313 | // reached end of file, convert once more to flush the converter | |||
314 | flush=TRUE1; | |||
315 | } | |||
316 | } | |||
317 | ||||
318 | exit: | |||
319 | ucnv_closeucnv_close_71(cnv); | |||
320 | T_FileStream_close(f); | |||
321 | ||||
322 | if(U_SUCCESS(errorCode)) { | |||
323 | return parse(src, errorCode); | |||
324 | } else { | |||
325 | return NULL__null; | |||
326 | } | |||
327 | } | |||
328 | ||||
329 | UXMLElement * | |||
330 | UXMLParser::parse(const UnicodeString &src, UErrorCode &status) { | |||
331 | if(U_FAILURE(status)) { | |||
332 | return NULL__null; | |||
333 | } | |||
334 | ||||
335 | UXMLElement *root = NULL__null; | |||
336 | fPos = 0; // TODO use just a local pos variable and pass it into functions | |||
337 | // where necessary? | |||
338 | ||||
339 | // set all matchers to work on the input string | |||
340 | mXMLDecl.reset(src); | |||
341 | mXMLComment.reset(src); | |||
342 | mXMLSP.reset(src); | |||
343 | mXMLDoctype.reset(src); | |||
344 | mXMLPI.reset(src); | |||
345 | mXMLElemStart.reset(src); | |||
346 | mXMLElemEnd.reset(src); | |||
347 | mXMLElemEmpty.reset(src); | |||
348 | mXMLCharData.reset(src); | |||
349 | mAttrValue.reset(src); | |||
350 | mAttrNormalizer.reset(src); | |||
351 | mNewLineNormalizer.reset(src); | |||
352 | mAmps.reset(src); | |||
353 | ||||
354 | // Consume the XML Declaration, if present. | |||
355 | if (mXMLDecl.lookingAt(fPos, status)) { | |||
356 | fPos = mXMLDecl.end(status); | |||
357 | } | |||
358 | ||||
359 | // Consume "misc" [XML production 27] appearing before DocType | |||
360 | parseMisc(status); | |||
361 | ||||
362 | // Consume a DocType declaration, if present. | |||
363 | if (mXMLDoctype.lookingAt(fPos, status)) { | |||
364 | fPos = mXMLDoctype.end(status); | |||
365 | } | |||
366 | ||||
367 | // Consume additional "misc" [XML production 27] appearing after the DocType | |||
368 | parseMisc(status); | |||
369 | ||||
370 | // Get the root element | |||
371 | if (mXMLElemEmpty.lookingAt(fPos, status)) { | |||
372 | // Root is an empty element (no nested elements or content) | |||
373 | root = createElement(mXMLElemEmpty, status); | |||
374 | fPos = mXMLElemEmpty.end(status); | |||
375 | } else { | |||
376 | if (mXMLElemStart.lookingAt(fPos, status) == FALSE0) { | |||
377 | error("Root Element expected", status); | |||
378 | goto errorExit; | |||
379 | } | |||
380 | root = createElement(mXMLElemStart, status); | |||
381 | UXMLElement *el = root; | |||
382 | ||||
383 | // | |||
384 | // This is the loop that consumes the root element of the document, | |||
385 | // including all nested content. Nested elements are handled by | |||
386 | // explicit pushes/pops of the element stack; there is no recursion | |||
387 | // in the control flow of this code. | |||
388 | // "el" always refers to the current element, the one to which content | |||
389 | // is being added. It is above the top of the element stack. | |||
390 | for (;;) { | |||
391 | // Nested Element Start | |||
392 | if (mXMLElemStart.lookingAt(fPos, status)) { | |||
393 | UXMLElement *t = createElement(mXMLElemStart, status); | |||
394 | el->fChildren.addElement(t, status); | |||
395 | t->fParent = el; | |||
396 | fElementStack.push(el, status); | |||
397 | el = t; | |||
398 | continue; | |||
399 | } | |||
400 | ||||
401 | // Text Content. String is concatenated onto the current node's content, | |||
402 | // but only if it contains something other than spaces. | |||
403 | UnicodeString s = scanContent(status); | |||
404 | if (s.length() > 0) { | |||
405 | mXMLSP.reset(s); | |||
406 | if (mXMLSP.matches(status) == FALSE0) { | |||
407 | // This chunk of text contains something other than just | |||
408 | // white space. Make a child node for it. | |||
409 | replaceCharRefs(s, status); | |||
410 | el->fChildren.addElement(s.clone(), status); | |||
411 | } | |||
412 | mXMLSP.reset(src); // The matchers need to stay set to the main input string. | |||
413 | continue; | |||
414 | } | |||
415 | ||||
416 | // Comments. Discard. | |||
417 | if (mXMLComment.lookingAt(fPos, status)) { | |||
418 | fPos = mXMLComment.end(status); | |||
419 | continue; | |||
420 | } | |||
421 | ||||
422 | // PIs. Discard. | |||
423 | if (mXMLPI.lookingAt(fPos, status)) { | |||
424 | fPos = mXMLPI.end(status); | |||
425 | continue; | |||
426 | } | |||
427 | ||||
428 | // Element End | |||
429 | if (mXMLElemEnd.lookingAt(fPos, status)) { | |||
430 | fPos = mXMLElemEnd.end(0, status); | |||
431 | const UnicodeString name = mXMLElemEnd.group(1, status); | |||
432 | if (name != *el->fName) { | |||
433 | error("Element start / end tag mismatch", status); | |||
434 | goto errorExit; | |||
435 | } | |||
436 | if (fElementStack.empty()) { | |||
437 | // Close of the root element. We're done with the doc. | |||
438 | el = NULL__null; | |||
439 | break; | |||
440 | } | |||
441 | el = (UXMLElement *)fElementStack.pop(); | |||
442 | continue; | |||
443 | } | |||
444 | ||||
445 | // Empty Element. Stored as a child of the current element, but not stacked. | |||
446 | if (mXMLElemEmpty.lookingAt(fPos, status)) { | |||
447 | UXMLElement *t = createElement(mXMLElemEmpty, status); | |||
448 | el->fChildren.addElement(t, status); | |||
449 | continue; | |||
450 | } | |||
451 | ||||
452 | // Hit something within the document that doesn't match anything. | |||
453 | // It's an error. | |||
454 | error("Unrecognized markup", status); | |||
455 | break; | |||
456 | } | |||
457 | ||||
458 | if (el != NULL__null || !fElementStack.empty()) { | |||
459 | // We bailed out early, for some reason. | |||
460 | error("Root element not closed.", status); | |||
461 | goto errorExit; | |||
462 | } | |||
463 | } | |||
464 | ||||
465 | // Root Element parse is complete. | |||
466 | // Consume the annoying xml "Misc" that can appear at the end of the doc. | |||
467 | parseMisc(status); | |||
468 | ||||
469 | // We should have reached the end of the input | |||
470 | if (fPos != src.length()) { | |||
471 | error("Extra content at the end of the document", status); | |||
472 | goto errorExit; | |||
473 | } | |||
474 | ||||
475 | // Success! | |||
476 | return root; | |||
477 | ||||
478 | errorExit: | |||
479 | delete root; | |||
480 | return NULL__null; | |||
481 | } | |||
482 | ||||
483 | // | |||
484 | // createElement | |||
485 | // We've just matched an element start tag. Create and fill in a UXMLElement object | |||
486 | // for it. | |||
487 | // | |||
488 | UXMLElement * | |||
489 | UXMLParser::createElement(RegexMatcher &mEl, UErrorCode &status) { | |||
490 | // First capture group is the element's name. | |||
491 | UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status); | |||
492 | ||||
493 | // Scan for attributes. | |||
494 | int32_t pos = mEl.end(1, status); // The position after the end of the tag name | |||
495 | ||||
496 | while (mAttrValue.lookingAt(pos, status)) { // loop runs once per attribute on this element. | |||
497 | UnicodeString attName = mAttrValue.group(1, status); | |||
498 | UnicodeString attValue = mAttrValue.group(2, status); | |||
499 | ||||
500 | // Trim the quotes from the att value. These are left over from the original regex | |||
501 | // that parsed the attribute, which couldn't conveniently strip them. | |||
502 | attValue.remove(0,1); // one char from the beginning | |||
503 | attValue.truncate(attValue.length()-1); // and one from the end. | |||
504 | ||||
505 | // XML Attribute value normalization. | |||
506 | // This is one of the really screwy parts of the XML spec. | |||
507 | // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize | |||
508 | // Note that non-validating parsers must treat all entities as type CDATA | |||
509 | // which simplifies things some. | |||
510 | ||||
511 | // Att normalization step 1: normalize any newlines in the attribute value | |||
512 | mNewLineNormalizer.reset(attValue); | |||
513 | attValue = mNewLineNormalizer.replaceAll(fOneLF, status); | |||
514 | ||||
515 | // Next change all xml white space chars to plain \u0020 spaces. | |||
516 | mAttrNormalizer.reset(attValue); | |||
517 | UnicodeString oneSpace((UChar)0x0020); | |||
518 | attValue = mAttrNormalizer.replaceAll(oneSpace, status); | |||
519 | ||||
520 | // Replace character entities. | |||
521 | replaceCharRefs(attValue, status); | |||
522 | ||||
523 | // Save the attribute name and value in our document structure. | |||
524 | el->fAttNames.addElement((void *)intern(attName, status), status); | |||
525 | el->fAttValues.addElement(attValue.clone(), status); | |||
526 | pos = mAttrValue.end(2, status); | |||
527 | } | |||
528 | fPos = mEl.end(0, status); | |||
529 | return el; | |||
530 | } | |||
531 | ||||
532 | // | |||
533 | // parseMisc | |||
534 | // Consume XML "Misc" [production #27] | |||
535 | // which is any combination of space, PI and comments | |||
536 | // Need to watch end-of-input because xml MISC stuff is allowed after | |||
537 | // the document element, so we WILL scan off the end in this function | |||
538 | // | |||
539 | void | |||
540 | UXMLParser::parseMisc(UErrorCode &status) { | |||
541 | for (;;) { | |||
542 | if (fPos >= mXMLPI.input().length()) { | |||
543 | break; | |||
544 | } | |||
545 | if (mXMLPI.lookingAt(fPos, status)) { | |||
546 | fPos = mXMLPI.end(status); | |||
547 | continue; | |||
548 | } | |||
549 | if (mXMLSP.lookingAt(fPos, status)) { | |||
550 | fPos = mXMLSP.end(status); | |||
551 | continue; | |||
552 | } | |||
553 | if (mXMLComment.lookingAt(fPos, status)) { | |||
554 | fPos = mXMLComment.end(status); | |||
555 | continue; | |||
556 | } | |||
557 | break; | |||
558 | } | |||
559 | } | |||
560 | ||||
561 | // | |||
562 | // Scan for document content. | |||
563 | // | |||
564 | UnicodeString | |||
565 | UXMLParser::scanContent(UErrorCode &status) { | |||
566 | UnicodeString result; | |||
567 | if (mXMLCharData.lookingAt(fPos, status)) { | |||
568 | result = mXMLCharData.group((int32_t)0, status); | |||
569 | // Normalize the new-lines. (Before char ref substitution) | |||
570 | mNewLineNormalizer.reset(result); | |||
571 | result = mNewLineNormalizer.replaceAll(fOneLF, status); | |||
572 | ||||
573 | // TODO: handle CDATA | |||
574 | fPos = mXMLCharData.end(0, status); | |||
575 | } | |||
576 | ||||
577 | return result; | |||
578 | } | |||
579 | ||||
580 | // | |||
581 | // replaceCharRefs | |||
582 | // | |||
583 | // replace the char entities < & { ካ etc. in a string | |||
584 | // with the corresponding actual character. | |||
585 | // | |||
586 | void | |||
587 | UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) { | |||
588 | UnicodeString result; | |||
589 | UnicodeString replacement; | |||
590 | int i; | |||
591 | ||||
592 | mAmps.reset(s); | |||
593 | // See the initialization for the regex matcher mAmps. | |||
594 | // Which entity we've matched is determined by which capture group has content, | |||
595 | // which is flagged by start() of that group not being -1. | |||
596 | while (mAmps.find()) { | |||
597 | if (mAmps.start(1, status) != -1) { | |||
598 | replacement.setTo((UChar)x_AMP); | |||
599 | } else if (mAmps.start(2, status) != -1) { | |||
600 | replacement.setTo((UChar)x_LT); | |||
601 | } else if (mAmps.start(3, status) != -1) { | |||
602 | replacement.setTo((UChar)x_GT); | |||
603 | } else if (mAmps.start(4, status) != -1) { | |||
604 | replacement.setTo((UChar)x_APOS); | |||
605 | } else if (mAmps.start(5, status) != -1) { | |||
606 | replacement.setTo((UChar)x_QUOT); | |||
607 | } else if (mAmps.start(6, status) != -1) { | |||
608 | UnicodeString hexString = mAmps.group(6, status); | |||
609 | UChar32 val = 0; | |||
610 | for (i=0; i<hexString.length(); i++) { | |||
611 | val = (val << 4) + u_digitu_digit_71(hexString.charAt(i), 16); | |||
612 | } | |||
613 | // TODO: some verification that the character is valid | |||
614 | replacement.setTo(val); | |||
615 | } else if (mAmps.start(7, status) != -1) { | |||
616 | UnicodeString decimalString = mAmps.group(7, status); | |||
617 | UChar32 val = 0; | |||
618 | for (i=0; i<decimalString.length(); i++) { | |||
619 | val = val*10 + u_digitu_digit_71(decimalString.charAt(i), 10); | |||
620 | } | |||
621 | // TODO: some verification that the character is valid | |||
622 | replacement.setTo(val); | |||
623 | } else { | |||
624 | // An unrecognized &entity; Leave it alone. | |||
625 | // TODO: check that it really looks like an entity, and is not some | |||
626 | // random & in the text. | |||
627 | replacement = mAmps.group((int32_t)0, status); | |||
628 | } | |||
629 | mAmps.appendReplacement(result, replacement, status); | |||
630 | } | |||
631 | mAmps.appendTail(result); | |||
632 | s = result; | |||
633 | } | |||
634 | ||||
635 | void | |||
636 | UXMLParser::error(const char *message, UErrorCode &status) { | |||
637 | // TODO: something better here... | |||
638 | const UnicodeString &src=mXMLDecl.input(); | |||
639 | int line = 0; | |||
640 | int ci = 0; | |||
641 | while (ci < fPos && ci>=0) { | |||
642 | ci = src.indexOf((UChar)0x0a, ci+1); | |||
643 | line++; | |||
644 | } | |||
645 | fprintf(stderrstderr, "Error: %s at line %d\n", message, line); | |||
646 | if (U_SUCCESS(status)) { | |||
647 | status = U_PARSE_ERROR; | |||
648 | } | |||
649 | } | |||
650 | ||||
651 | // intern strings like in Java | |||
652 | ||||
653 | const UnicodeString * | |||
654 | UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) { | |||
655 | const UHashElement *he=fNames.find(s); | |||
656 | if(he!=NULL__null) { | |||
657 | // already a known name, return its hashed key pointer | |||
658 | return (const UnicodeString *)he->key.pointer; | |||
659 | } else { | |||
660 | // add this new name and return its hashed key pointer | |||
661 | fNames.puti(s, 1, errorCode); | |||
662 | he=fNames.find(s); | |||
663 | return (const UnicodeString *)he->key.pointer; | |||
664 | } | |||
665 | } | |||
666 | ||||
667 | const UnicodeString * | |||
668 | UXMLParser::findName(const UnicodeString &s) const { | |||
669 | const UHashElement *he=fNames.find(s); | |||
670 | if(he!=NULL__null) { | |||
671 | // a known name, return its hashed key pointer | |||
672 | return (const UnicodeString *)he->key.pointer; | |||
673 | } else { | |||
674 | // unknown name | |||
675 | return NULL__null; | |||
676 | } | |||
677 | } | |||
678 | ||||
679 | // UXMLElement ------------------------------------------------------------- *** | |||
680 | ||||
681 | UXMLElement::UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode) : | |||
682 | fParser(parser), | |||
683 | fName(name), | |||
684 | fAttNames(errorCode), | |||
685 | fAttValues(errorCode), | |||
686 | fChildren(errorCode), | |||
687 | fParent(NULL__null) | |||
688 | { | |||
689 | } | |||
690 | ||||
691 | UXMLElement::~UXMLElement() { | |||
692 | int i; | |||
693 | // attribute names are owned by the UXMLParser, don't delete them here | |||
694 | for (i=fAttValues.size()-1; i>=0; i--) { | |||
695 | delete (UObject *)fAttValues.elementAt(i); | |||
696 | } | |||
697 | for (i=fChildren.size()-1; i>=0; i--) { | |||
698 | delete (UObject *)fChildren.elementAt(i); | |||
699 | } | |||
700 | } | |||
701 | ||||
702 | const UnicodeString & | |||
703 | UXMLElement::getTagName() const { | |||
704 | return *fName; | |||
705 | } | |||
706 | ||||
707 | UnicodeString | |||
708 | UXMLElement::getText(UBool recurse) const { | |||
709 | UnicodeString text; | |||
710 | appendText(text, recurse); | |||
| ||||
711 | return text; | |||
712 | } | |||
713 | ||||
714 | void | |||
715 | UXMLElement::appendText(UnicodeString &text, UBool recurse) const { | |||
716 | const UObject *node; | |||
717 | int32_t i, count=fChildren.size(); | |||
718 | for(i=0; i<count; ++i) { | |||
719 | node=(const UObject *)fChildren.elementAt(i); | |||
720 | const UnicodeString *s=dynamic_cast<const UnicodeString *>(node); | |||
721 | if(s!=NULL__null) { | |||
722 | text.append(*s); | |||
723 | } else if(recurse) /* must be a UXMLElement */ { | |||
724 | ((const UXMLElement *)node)->appendText(text, recurse); | |||
| ||||
725 | } | |||
726 | } | |||
727 | } | |||
728 | ||||
729 | int32_t | |||
730 | UXMLElement::countAttributes() const { | |||
731 | return fAttNames.size(); | |||
732 | } | |||
733 | ||||
734 | const UnicodeString * | |||
735 | UXMLElement::getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const { | |||
736 | if(0<=i && i<fAttNames.size()) { | |||
737 | name.setTo(*(const UnicodeString *)fAttNames.elementAt(i)); | |||
738 | value.setTo(*(const UnicodeString *)fAttValues.elementAt(i)); | |||
739 | return &value; // or return (UnicodeString *)fAttValues.elementAt(i); | |||
740 | } else { | |||
741 | return NULL__null; | |||
742 | } | |||
743 | } | |||
744 | ||||
745 | const UnicodeString * | |||
746 | UXMLElement::getAttribute(const UnicodeString &name) const { | |||
747 | // search for the attribute name by comparing the interned pointer, | |||
748 | // not the string contents | |||
749 | const UnicodeString *p=fParser->findName(name); | |||
750 | if(p==NULL__null) { | |||
751 | return NULL__null; // no such attribute seen by the parser at all | |||
752 | } | |||
753 | ||||
754 | int32_t i, count=fAttNames.size(); | |||
755 | for(i=0; i<count; ++i) { | |||
756 | if(p==(const UnicodeString *)fAttNames.elementAt(i)) { | |||
757 | return (const UnicodeString *)fAttValues.elementAt(i); | |||
758 | } | |||
759 | } | |||
760 | return NULL__null; | |||
761 | } | |||
762 | ||||
763 | int32_t | |||
764 | UXMLElement::countChildren() const { | |||
765 | return fChildren.size(); | |||
766 | } | |||
767 | ||||
768 | const UObject * | |||
769 | UXMLElement::getChild(int32_t i, UXMLNodeType &type) const { | |||
770 | if(0<=i && i<fChildren.size()) { | |||
771 | const UObject *node=(const UObject *)fChildren.elementAt(i); | |||
772 | if(dynamic_cast<const UXMLElement *>(node)!=NULL__null) { | |||
773 | type=UXML_NODE_TYPE_ELEMENT; | |||
774 | } else { | |||
775 | type=UXML_NODE_TYPE_STRING; | |||
776 | } | |||
777 | return node; | |||
778 | } else { | |||
779 | return NULL__null; | |||
780 | } | |||
781 | } | |||
782 | ||||
783 | const UXMLElement * | |||
784 | UXMLElement::nextChildElement(int32_t &i) const { | |||
785 | if(i<0) { | |||
786 | return NULL__null; | |||
787 | } | |||
788 | ||||
789 | const UObject *node; | |||
790 | int32_t count=fChildren.size(); | |||
791 | while(i<count) { | |||
792 | node=(const UObject *)fChildren.elementAt(i++); | |||
793 | const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node); | |||
794 | if(elem!=NULL__null) { | |||
795 | return elem; | |||
796 | } | |||
797 | } | |||
798 | return NULL__null; | |||
799 | } | |||
800 | ||||
801 | const UXMLElement * | |||
802 | UXMLElement::getChildElement(const UnicodeString &name) const { | |||
803 | // search for the element name by comparing the interned pointer, | |||
804 | // not the string contents | |||
805 | const UnicodeString *p=fParser->findName(name); | |||
806 | if(p==NULL__null) { | |||
807 | return NULL__null; // no such element seen by the parser at all | |||
808 | } | |||
809 | ||||
810 | const UObject *node; | |||
811 | int32_t i, count=fChildren.size(); | |||
812 | for(i=0; i<count; ++i) { | |||
813 | node=(const UObject *)fChildren.elementAt(i); | |||
814 | const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node); | |||
815 | if(elem!=NULL__null) { | |||
816 | if(p==elem->fName) { | |||
817 | return elem; | |||
818 | } | |||
819 | } | |||
820 | } | |||
821 | return NULL__null; | |||
822 | } | |||
823 | ||||
824 | U_NAMESPACE_END} | |||
825 | ||||
826 | #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ | |||
827 |