File: | out/../deps/icu-small/source/i18n/alphaindex.cpp |
Warning: | line 888, column 26 Forming reference to null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. | |||
2 | // License & terms of use: http://www.unicode.org/copyright.html | |||
3 | /* | |||
4 | ******************************************************************************* | |||
5 | * Copyright (C) 2009-2014, International Business Machines Corporation and | |||
6 | * others. All Rights Reserved. | |||
7 | ******************************************************************************* | |||
8 | */ | |||
9 | ||||
10 | #include "unicode/utypes.h" | |||
11 | ||||
12 | #if !UCONFIG_NO_COLLATION0 | |||
13 | ||||
14 | #include "unicode/alphaindex.h" | |||
15 | #include "unicode/coll.h" | |||
16 | #include "unicode/localpointer.h" | |||
17 | #include "unicode/normalizer2.h" | |||
18 | #include "unicode/tblcoll.h" | |||
19 | #include "unicode/uchar.h" | |||
20 | #include "unicode/ulocdata.h" | |||
21 | #include "unicode/uniset.h" | |||
22 | #include "unicode/uobject.h" | |||
23 | #include "unicode/usetiter.h" | |||
24 | #include "unicode/utf16.h" | |||
25 | ||||
26 | #include "cmemory.h" | |||
27 | #include "cstring.h" | |||
28 | #include "uassert.h" | |||
29 | #include "uvector.h" | |||
30 | #include "uvectr64.h" | |||
31 | ||||
32 | //#include <string> | |||
33 | //#include <iostream> | |||
34 | ||||
35 | U_NAMESPACE_BEGINnamespace icu_71 { | |||
36 | ||||
37 | namespace { | |||
38 | ||||
39 | /** | |||
40 | * Prefix string for Chinese index buckets. | |||
41 | * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes | |||
42 | */ | |||
43 | const UChar BASE[1] = { 0xFDD0 }; | |||
44 | const int32_t BASE_LENGTH = 1; | |||
45 | ||||
46 | UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, | |||
47 | const UnicodeString &one, const UnicodeString &other); | |||
48 | ||||
49 | } // namespace | |||
50 | ||||
51 | static int32_t U_CALLCONV | |||
52 | collatorComparator(const void *context, const void *left, const void *right); | |||
53 | ||||
54 | static int32_t U_CALLCONV | |||
55 | recordCompareFn(const void *context, const void *left, const void *right); | |||
56 | ||||
57 | // UVector<Record *> support function, delete a Record. | |||
58 | static void U_CALLCONV | |||
59 | alphaIndex_deleteRecord(void *obj) { | |||
60 | delete static_cast<AlphabeticIndex::Record *>(obj); | |||
61 | } | |||
62 | ||||
63 | namespace { | |||
64 | ||||
65 | UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned, | |||
66 | UErrorCode &errorCode) { | |||
67 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
68 | if (owned.isValid()) { | |||
69 | return owned.orphan(); | |||
70 | } | |||
71 | UnicodeString *p = new UnicodeString(s); | |||
72 | if (p == NULL__null) { | |||
73 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
74 | } | |||
75 | return p; | |||
76 | } | |||
77 | ||||
78 | inline UnicodeString *getString(const UVector &list, int32_t i) { | |||
79 | return static_cast<UnicodeString *>(list[i]); | |||
80 | } | |||
81 | ||||
82 | inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) { | |||
83 | return static_cast<AlphabeticIndex::Bucket *>(list[i]); | |||
84 | } | |||
85 | ||||
86 | inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) { | |||
87 | return static_cast<AlphabeticIndex::Record *>(list[i]); | |||
88 | } | |||
89 | ||||
90 | /** | |||
91 | * Like Java Collections.binarySearch(List, String, Comparator). | |||
92 | * | |||
93 | * @return the index>=0 where the item was found, | |||
94 | * or the index<0 for inserting the string at ~index in sorted order | |||
95 | */ | |||
96 | int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) { | |||
97 | if (list.size() == 0) { return ~0; } | |||
98 | int32_t start = 0; | |||
99 | int32_t limit = list.size(); | |||
100 | for (;;) { | |||
101 | int32_t i = (start + limit) / 2; | |||
102 | const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i)); | |||
103 | UErrorCode errorCode = U_ZERO_ERROR; | |||
104 | UCollationResult cmp = coll.compare(s, *si, errorCode); | |||
105 | if (cmp == UCOL_EQUAL) { | |||
106 | return i; | |||
107 | } else if (cmp < 0) { | |||
108 | if (i == start) { | |||
109 | return ~start; // insert s before *si | |||
110 | } | |||
111 | limit = i; | |||
112 | } else { | |||
113 | if (i == start) { | |||
114 | return ~(start + 1); // insert s after *si | |||
115 | } | |||
116 | start = i; | |||
117 | } | |||
118 | } | |||
119 | } | |||
120 | ||||
121 | } // namespace | |||
122 | ||||
123 | // The BucketList is not in the anonymous namespace because only Clang | |||
124 | // seems to support its use in other classes from there. | |||
125 | // However, we also don't need U_I18N_API because it is not used from outside the i18n library. | |||
126 | class BucketList : public UObject { | |||
127 | public: | |||
128 | BucketList(UVector *bucketList, UVector *publicBucketList) | |||
129 | : bucketList_(bucketList), immutableVisibleList_(publicBucketList) { | |||
130 | int32_t displayIndex = 0; | |||
131 | for (int32_t i = 0; i < publicBucketList->size(); ++i) { | |||
132 | getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++; | |||
133 | } | |||
134 | } | |||
135 | ||||
136 | // The virtual destructor must not be inline. | |||
137 | // See ticket #8454 for details. | |||
138 | virtual ~BucketList(); | |||
139 | ||||
140 | int32_t getBucketCount() const { | |||
141 | return immutableVisibleList_->size(); | |||
142 | } | |||
143 | ||||
144 | int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly, | |||
145 | UErrorCode &errorCode) { | |||
146 | // binary search | |||
147 | int32_t start = 0; | |||
148 | int32_t limit = bucketList_->size(); | |||
149 | while ((start + 1) < limit) { | |||
150 | int32_t i = (start + limit) / 2; | |||
151 | const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i); | |||
152 | UCollationResult nameVsBucket = | |||
153 | collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode); | |||
154 | if (nameVsBucket < 0) { | |||
155 | limit = i; | |||
156 | } else { | |||
157 | start = i; | |||
158 | } | |||
159 | } | |||
160 | const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start); | |||
161 | if (bucket->displayBucket_ != NULL__null) { | |||
162 | bucket = bucket->displayBucket_; | |||
163 | } | |||
164 | return bucket->displayIndex_; | |||
165 | } | |||
166 | ||||
167 | /** All of the buckets, visible and invisible. */ | |||
168 | UVector *bucketList_; | |||
169 | /** Just the visible buckets. */ | |||
170 | UVector *immutableVisibleList_; | |||
171 | }; | |||
172 | ||||
173 | BucketList::~BucketList() { | |||
174 | delete bucketList_; | |||
175 | if (immutableVisibleList_ != bucketList_) { | |||
176 | delete immutableVisibleList_; | |||
177 | } | |||
178 | } | |||
179 | ||||
180 | AlphabeticIndex::ImmutableIndex::~ImmutableIndex() { | |||
181 | delete buckets_; | |||
182 | delete collatorPrimaryOnly_; | |||
183 | } | |||
184 | ||||
185 | int32_t | |||
186 | AlphabeticIndex::ImmutableIndex::getBucketCount() const { | |||
187 | return buckets_->getBucketCount(); | |||
188 | } | |||
189 | ||||
190 | int32_t | |||
191 | AlphabeticIndex::ImmutableIndex::getBucketIndex( | |||
192 | const UnicodeString &name, UErrorCode &errorCode) const { | |||
193 | return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode); | |||
194 | } | |||
195 | ||||
196 | const AlphabeticIndex::Bucket * | |||
197 | AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const { | |||
198 | if (0 <= index && index < buckets_->getBucketCount()) { | |||
199 | return icu::getBucket(*buckets_->immutableVisibleList_, index); | |||
200 | } else { | |||
201 | return NULL__null; | |||
202 | } | |||
203 | } | |||
204 | ||||
205 | AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) | |||
206 | : inputList_(NULL__null), | |||
207 | labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL__null), | |||
208 | maxLabelCount_(99), | |||
209 | initialLabels_(NULL__null), firstCharsInScripts_(NULL__null), | |||
210 | collator_(NULL__null), collatorPrimaryOnly_(NULL__null), | |||
211 | buckets_(NULL__null) { | |||
212 | init(&locale, status); | |||
213 | } | |||
214 | ||||
215 | ||||
216 | AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) | |||
217 | : inputList_(NULL__null), | |||
218 | labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL__null), | |||
219 | maxLabelCount_(99), | |||
220 | initialLabels_(NULL__null), firstCharsInScripts_(NULL__null), | |||
221 | collator_(collator), collatorPrimaryOnly_(NULL__null), | |||
222 | buckets_(NULL__null) { | |||
223 | init(NULL__null, status); | |||
| ||||
224 | } | |||
225 | ||||
226 | ||||
227 | ||||
228 | AlphabeticIndex::~AlphabeticIndex() { | |||
229 | delete collator_; | |||
230 | delete collatorPrimaryOnly_; | |||
231 | delete firstCharsInScripts_; | |||
232 | delete buckets_; | |||
233 | delete inputList_; | |||
234 | delete initialLabels_; | |||
235 | } | |||
236 | ||||
237 | ||||
238 | AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) { | |||
239 | if (U_FAILURE(status)) { | |||
240 | return *this; | |||
241 | } | |||
242 | initialLabels_->addAll(additions); | |||
243 | clearBuckets(); | |||
244 | return *this; | |||
245 | } | |||
246 | ||||
247 | ||||
248 | AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) { | |||
249 | addIndexExemplars(locale, status); | |||
250 | clearBuckets(); | |||
251 | return *this; | |||
252 | } | |||
253 | ||||
254 | ||||
255 | AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) { | |||
256 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
257 | // In C++, the ImmutableIndex must own its copy of the BucketList, | |||
258 | // even if it contains no records, for proper memory management. | |||
259 | // We could clone the buckets_ if they are not NULL, | |||
260 | // but that would be worth it only if this method is called multiple times, | |||
261 | // or called after using the old-style bucket iterator API. | |||
262 | LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode)); | |||
263 | LocalPointer<RuleBasedCollator> coll(collatorPrimaryOnly_->clone()); | |||
264 | if (immutableBucketList.isNull() || coll.isNull()) { | |||
265 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
266 | return NULL__null; | |||
267 | } | |||
268 | ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias()); | |||
269 | if (immIndex == NULL__null) { | |||
270 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
271 | return NULL__null; | |||
272 | } | |||
273 | // The ImmutableIndex adopted its parameter objects. | |||
274 | immutableBucketList.orphan(); | |||
275 | coll.orphan(); | |||
276 | return immIndex; | |||
277 | } | |||
278 | ||||
279 | int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) { | |||
280 | initBuckets(status); | |||
281 | if (U_FAILURE(status)) { | |||
282 | return 0; | |||
283 | } | |||
284 | return buckets_->getBucketCount(); | |||
285 | } | |||
286 | ||||
287 | ||||
288 | int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { | |||
289 | if (U_FAILURE(status) || inputList_ == NULL__null) { | |||
290 | return 0; | |||
291 | } | |||
292 | return inputList_->size(); | |||
293 | } | |||
294 | ||||
295 | void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { | |||
296 | U_ASSERT(indexCharacters.hasDeleter())(void)0; | |||
297 | const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); | |||
298 | if (U_FAILURE(errorCode)) { return; } | |||
299 | ||||
300 | const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); | |||
301 | const UnicodeString &overflowBoundary = | |||
302 | *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); | |||
303 | ||||
304 | // We make a sorted array of elements. | |||
305 | // Some of the input may be redundant. | |||
306 | // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". | |||
307 | // We filter out those cases. | |||
308 | UnicodeSetIterator iter(*initialLabels_); | |||
309 | while (U_SUCCESS(errorCode) && iter.next()) { | |||
310 | const UnicodeString *item = &iter.getString(); | |||
311 | LocalPointer<UnicodeString> ownedItem; | |||
312 | UBool checkDistinct; | |||
313 | int32_t itemLength = item->length(); | |||
314 | if (!item->hasMoreChar32Than(0, itemLength, 1)) { | |||
315 | checkDistinct = FALSE0; | |||
316 | } else if(item->charAt(itemLength - 1) == 0x2a && // '*' | |||
317 | item->charAt(itemLength - 2) != 0x2a) { | |||
318 | // Use a label if it is marked with one trailing star, | |||
319 | // even if the label string sorts the same when all contractions are suppressed. | |||
320 | ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); | |||
321 | item = ownedItem.getAlias(); | |||
322 | if (item == NULL__null) { | |||
323 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
324 | return; | |||
325 | } | |||
326 | checkDistinct = FALSE0; | |||
327 | } else { | |||
328 | checkDistinct = TRUE1; | |||
329 | } | |||
330 | if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { | |||
331 | // Ignore a primary-ignorable or non-alphabetic index character. | |||
332 | } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { | |||
333 | // Ignore an index character that will land in the overflow bucket. | |||
334 | } else if (checkDistinct && | |||
335 | collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { | |||
336 | // Ignore a multi-code point index character that does not sort distinctly | |||
337 | // from the sequence of its separate characters. | |||
338 | } else { | |||
339 | int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); | |||
340 | if (insertionPoint < 0) { | |||
341 | indexCharacters.insertElementAt( | |||
342 | ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); | |||
343 | } else { | |||
344 | const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); | |||
345 | if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { | |||
346 | indexCharacters.setElementAt( | |||
347 | ownedString(*item, ownedItem, errorCode), insertionPoint); | |||
348 | } | |||
349 | } | |||
350 | } | |||
351 | } | |||
352 | if (U_FAILURE(errorCode)) { return; } | |||
353 | ||||
354 | // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element | |||
355 | ||||
356 | int32_t size = indexCharacters.size() - 1; | |||
357 | if (size > maxLabelCount_) { | |||
358 | int32_t count = 0; | |||
359 | int32_t old = -1; | |||
360 | for (int32_t i = 0; i < indexCharacters.size();) { | |||
361 | ++count; | |||
362 | int32_t bump = count * maxLabelCount_ / size; | |||
363 | if (bump == old) { | |||
364 | indexCharacters.removeElementAt(i); | |||
365 | } else { | |||
366 | old = bump; | |||
367 | ++i; | |||
368 | } | |||
369 | } | |||
370 | } | |||
371 | } | |||
372 | ||||
373 | namespace { | |||
374 | ||||
375 | const UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) { | |||
376 | if (!current.startsWith(BASE, BASE_LENGTH)) { | |||
377 | return current; | |||
378 | } | |||
379 | UChar rest = current.charAt(BASE_LENGTH); | |||
380 | if (0x2800 < rest && rest <= 0x28FF) { // stroke count | |||
381 | int32_t count = rest-0x2800; | |||
382 | temp.setTo((UChar)(0x30 + count % 10)); | |||
383 | if (count >= 10) { | |||
384 | count /= 10; | |||
385 | temp.insert(0, (UChar)(0x30 + count % 10)); | |||
386 | if (count >= 10) { | |||
387 | count /= 10; | |||
388 | temp.insert(0, (UChar)(0x30 + count)); | |||
389 | } | |||
390 | } | |||
391 | return temp.append((UChar)0x5283); | |||
392 | } | |||
393 | return temp.setTo(current, BASE_LENGTH); | |||
394 | } | |||
395 | ||||
396 | UBool hasMultiplePrimaryWeights( | |||
397 | const RuleBasedCollator &coll, uint32_t variableTop, | |||
398 | const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) { | |||
399 | ces.removeAllElements(); | |||
400 | coll.internalGetCEs(s, ces, errorCode); | |||
401 | if (U_FAILURE(errorCode)) { return FALSE0; } | |||
402 | UBool seenPrimary = FALSE0; | |||
403 | for (int32_t i = 0; i < ces.size(); ++i) { | |||
404 | int64_t ce = ces.elementAti(i); | |||
405 | uint32_t p = (uint32_t)(ce >> 32); | |||
406 | if (p > variableTop) { | |||
407 | // not primary ignorable | |||
408 | if (seenPrimary) { | |||
409 | return TRUE1; | |||
410 | } | |||
411 | seenPrimary = TRUE1; | |||
412 | } | |||
413 | } | |||
414 | return FALSE0; | |||
415 | } | |||
416 | ||||
417 | } // namespace | |||
418 | ||||
419 | BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { | |||
420 | // Initialize indexCharacters. | |||
421 | UVector indexCharacters(errorCode); | |||
422 | indexCharacters.setDeleter(uprv_deleteUObjectuprv_deleteUObject_71); | |||
423 | initLabels(indexCharacters, errorCode); | |||
424 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
425 | ||||
426 | // Variables for hasMultiplePrimaryWeights(). | |||
427 | UVector64 ces(errorCode); | |||
428 | uint32_t variableTop; | |||
429 | if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { | |||
430 | variableTop = collatorPrimaryOnly_->getVariableTop(errorCode); | |||
431 | } else { | |||
432 | variableTop = 0; | |||
433 | } | |||
434 | UBool hasInvisibleBuckets = FALSE0; | |||
435 | ||||
436 | // Helper arrays for Chinese Pinyin collation. | |||
437 | Bucket *asciiBuckets[26] = { | |||
438 | NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, | |||
439 | NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null | |||
440 | }; | |||
441 | Bucket *pinyinBuckets[26] = { | |||
442 | NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, | |||
443 | NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null, NULL__null | |||
444 | }; | |||
445 | UBool hasPinyin = FALSE0; | |||
446 | ||||
447 | LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode); | |||
448 | if (U_FAILURE(errorCode)) { | |||
449 | return NULL__null; | |||
450 | } | |||
451 | bucketList->setDeleter(uprv_deleteUObjectuprv_deleteUObject_71); | |||
452 | ||||
453 | // underflow bucket | |||
454 | LocalPointer<Bucket> bucket(new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW), errorCode); | |||
455 | if (U_FAILURE(errorCode)) { | |||
456 | return NULL__null; | |||
457 | } | |||
458 | bucketList->adoptElement(bucket.orphan(), errorCode); | |||
459 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
460 | ||||
461 | UnicodeString temp; | |||
462 | ||||
463 | // fix up the list, adding underflow, additions, overflow | |||
464 | // Insert inflow labels as needed. | |||
465 | int32_t scriptIndex = -1; | |||
466 | const UnicodeString *scriptUpperBoundary = &emptyString_; | |||
467 | for (int32_t i = 0; i < indexCharacters.size(); ++i) { | |||
468 | UnicodeString ¤t = *getString(indexCharacters, i); | |||
469 | if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { | |||
470 | // We crossed the script boundary into a new script. | |||
471 | const UnicodeString &inflowBoundary = *scriptUpperBoundary; | |||
472 | UBool skippedScript = FALSE0; | |||
473 | for (;;) { | |||
474 | scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); | |||
475 | if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { | |||
476 | break; | |||
477 | } | |||
478 | skippedScript = TRUE1; | |||
479 | } | |||
480 | if (skippedScript && bucketList->size() > 1) { | |||
481 | // We are skipping one or more scripts, | |||
482 | // and we are not just getting out of the underflow label. | |||
483 | bucket.adoptInsteadAndCheckErrorCode( | |||
484 | new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW), errorCode); | |||
485 | bucketList->adoptElement(bucket.orphan(), errorCode); | |||
486 | if (U_FAILURE(errorCode)) { return nullptr; } | |||
487 | } | |||
488 | } | |||
489 | // Add a bucket with the current label. | |||
490 | bucket.adoptInsteadAndCheckErrorCode( | |||
491 | new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL), errorCode); | |||
492 | bucketList->adoptElement(bucket.orphan(), errorCode); | |||
493 | if (U_FAILURE(errorCode)) { return nullptr; } | |||
494 | // Remember ASCII and Pinyin buckets for Pinyin redirects. | |||
495 | UChar c; | |||
496 | if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z | |||
497 | asciiBuckets[c - 0x41] = (Bucket *)bucketList->lastElement(); | |||
498 | } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && | |||
499 | 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { | |||
500 | pinyinBuckets[c - 0x41] = (Bucket *)bucketList->lastElement(); | |||
501 | hasPinyin = TRUE1; | |||
502 | } | |||
503 | // Check for multiple primary weights. | |||
504 | if (!current.startsWith(BASE, BASE_LENGTH) && | |||
505 | hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current, | |||
506 | ces, errorCode) && | |||
507 | current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { | |||
508 | // "AE-ligature" or "Sch" etc. | |||
509 | for (int32_t j = bucketList->size() - 2;; --j) { | |||
510 | Bucket *singleBucket = getBucket(*bucketList, j); | |||
511 | if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { | |||
512 | // There is no single-character bucket since the last | |||
513 | // underflow or inflow label. | |||
514 | break; | |||
515 | } | |||
516 | if (singleBucket->displayBucket_ == NULL__null && | |||
517 | !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, | |||
518 | singleBucket->lowerBoundary_, | |||
519 | ces, errorCode)) { | |||
520 | // Add an invisible bucket that redirects strings greater than the expansion | |||
521 | // to the previous single-character bucket. | |||
522 | // For example, after ... Q R S Sch we add Sch\uFFFF->S | |||
523 | // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. | |||
524 | bucket.adoptInsteadAndCheckErrorCode(new Bucket(emptyString_, | |||
525 | UnicodeString(current).append((UChar)0xFFFF), | |||
526 | U_ALPHAINDEX_NORMAL), | |||
527 | errorCode); | |||
528 | if (U_FAILURE(errorCode)) { | |||
529 | return NULL__null; | |||
530 | } | |||
531 | bucket->displayBucket_ = singleBucket; | |||
532 | bucketList->adoptElement(bucket.orphan(), errorCode); | |||
533 | if (U_FAILURE(errorCode)) { return nullptr; } | |||
534 | hasInvisibleBuckets = TRUE1; | |||
535 | break; | |||
536 | } | |||
537 | } | |||
538 | } | |||
539 | } | |||
540 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
541 | if (bucketList->size() == 1) { | |||
542 | // No real labels, show only the underflow label. | |||
543 | BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); | |||
544 | if (bl == NULL__null) { | |||
545 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
546 | return NULL__null; | |||
547 | } | |||
548 | bucketList.orphan(); | |||
549 | return bl; | |||
550 | } | |||
551 | // overflow bucket | |||
552 | bucket.adoptInsteadAndCheckErrorCode( | |||
553 | new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW), errorCode); | |||
554 | bucketList->adoptElement(bucket.orphan(), errorCode); // final | |||
555 | if (U_FAILURE(errorCode)) { return nullptr; } | |||
556 | ||||
557 | if (hasPinyin) { | |||
558 | // Redirect Pinyin buckets. | |||
559 | Bucket *asciiBucket = NULL__null; | |||
560 | for (int32_t i = 0; i < 26; ++i) { | |||
561 | if (asciiBuckets[i] != NULL__null) { | |||
562 | asciiBucket = asciiBuckets[i]; | |||
563 | } | |||
564 | if (pinyinBuckets[i] != NULL__null && asciiBucket != NULL__null) { | |||
565 | pinyinBuckets[i]->displayBucket_ = asciiBucket; | |||
566 | hasInvisibleBuckets = TRUE1; | |||
567 | } | |||
568 | } | |||
569 | } | |||
570 | ||||
571 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
572 | if (!hasInvisibleBuckets) { | |||
573 | BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); | |||
574 | if (bl == NULL__null) { | |||
575 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
576 | return NULL__null; | |||
577 | } | |||
578 | bucketList.orphan(); | |||
579 | return bl; | |||
580 | } | |||
581 | // Merge inflow buckets that are visually adjacent. | |||
582 | // Iterate backwards: Merge inflow into overflow rather than the other way around. | |||
583 | int32_t i = bucketList->size() - 1; | |||
584 | Bucket *nextBucket = getBucket(*bucketList, i); | |||
585 | while (--i > 0) { | |||
586 | Bucket *bucket = getBucket(*bucketList, i); | |||
587 | if (bucket->displayBucket_ != NULL__null) { | |||
588 | continue; // skip invisible buckets | |||
589 | } | |||
590 | if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { | |||
591 | if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { | |||
592 | bucket->displayBucket_ = nextBucket; | |||
593 | continue; | |||
594 | } | |||
595 | } | |||
596 | nextBucket = bucket; | |||
597 | } | |||
598 | ||||
599 | LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode); | |||
600 | if (U_FAILURE(errorCode)) { | |||
601 | return NULL__null; | |||
602 | } | |||
603 | // Do not call publicBucketList->setDeleter(): | |||
604 | // This vector shares its objects with the bucketList. | |||
605 | for (int32_t j = 0; j < bucketList->size(); ++j) { | |||
606 | Bucket *bucket = getBucket(*bucketList, j); | |||
607 | if (bucket->displayBucket_ == NULL__null) { | |||
608 | publicBucketList->addElement(bucket, errorCode); | |||
609 | } | |||
610 | } | |||
611 | if (U_FAILURE(errorCode)) { return NULL__null; } | |||
612 | BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); | |||
613 | if (bl == NULL__null) { | |||
614 | errorCode = U_MEMORY_ALLOCATION_ERROR; | |||
615 | return NULL__null; | |||
616 | } | |||
617 | bucketList.orphan(); | |||
618 | publicBucketList.orphan(); | |||
619 | return bl; | |||
620 | } | |||
621 | ||||
622 | /** | |||
623 | * Creates an index, and buckets and sorts the list of records into the index. | |||
624 | */ | |||
625 | void AlphabeticIndex::initBuckets(UErrorCode &errorCode) { | |||
626 | if (U_FAILURE(errorCode) || buckets_ != NULL__null) { | |||
627 | return; | |||
628 | } | |||
629 | buckets_ = createBucketList(errorCode); | |||
630 | if (U_FAILURE(errorCode) || inputList_ == NULL__null || inputList_->isEmpty()) { | |||
631 | return; | |||
632 | } | |||
633 | ||||
634 | // Sort the records by name. | |||
635 | // Stable sort preserves input order of collation duplicates. | |||
636 | inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode); | |||
637 | ||||
638 | // Now, we traverse all of the input, which is now sorted. | |||
639 | // If the item doesn't go in the current bucket, we find the next bucket that contains it. | |||
640 | // This makes the process order n*log(n), since we just sort the list and then do a linear process. | |||
641 | // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so | |||
642 | // we need to improve it for that case. | |||
643 | ||||
644 | Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0); | |||
645 | int32_t bucketIndex = 1; | |||
646 | Bucket *nextBucket; | |||
647 | const UnicodeString *upperBoundary; | |||
648 | if (bucketIndex < buckets_->bucketList_->size()) { | |||
649 | nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); | |||
650 | upperBoundary = &nextBucket->lowerBoundary_; | |||
651 | } else { | |||
652 | nextBucket = NULL__null; | |||
653 | upperBoundary = NULL__null; | |||
654 | } | |||
655 | for (int32_t i = 0; i < inputList_->size(); ++i) { | |||
656 | Record *r = getRecord(*inputList_, i); | |||
657 | // if the current bucket isn't the right one, find the one that is | |||
658 | // We have a special flag for the last bucket so that we don't look any further | |||
659 | while (upperBoundary != NULL__null && | |||
660 | collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) { | |||
661 | currentBucket = nextBucket; | |||
662 | // now reset the boundary that we compare against | |||
663 | if (bucketIndex < buckets_->bucketList_->size()) { | |||
664 | nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); | |||
665 | upperBoundary = &nextBucket->lowerBoundary_; | |||
666 | } else { | |||
667 | upperBoundary = NULL__null; | |||
668 | } | |||
669 | } | |||
670 | // now put the record into the bucket. | |||
671 | Bucket *bucket = currentBucket; | |||
672 | if (bucket->displayBucket_ != NULL__null) { | |||
673 | bucket = bucket->displayBucket_; | |||
674 | } | |||
675 | if (bucket->records_ == NULL__null) { | |||
676 | LocalPointer<UVector> records(new UVector(errorCode), errorCode); | |||
677 | if (U_FAILURE(errorCode)) { | |||
678 | return; | |||
679 | } | |||
680 | bucket->records_ = records.orphan(); | |||
681 | } | |||
682 | bucket->records_->addElement(r, errorCode); | |||
683 | } | |||
684 | } | |||
685 | ||||
686 | void AlphabeticIndex::clearBuckets() { | |||
687 | if (buckets_ != NULL__null) { | |||
688 | delete buckets_; | |||
689 | buckets_ = NULL__null; | |||
690 | internalResetBucketIterator(); | |||
691 | } | |||
692 | } | |||
693 | ||||
694 | void AlphabeticIndex::internalResetBucketIterator() { | |||
695 | labelsIterIndex_ = -1; | |||
696 | currentBucket_ = NULL__null; | |||
697 | } | |||
698 | ||||
699 | ||||
700 | void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { | |||
701 | LocalULocaleDataPointer uld(ulocdata_openulocdata_open_71(locale.getName(), &status)); | |||
702 | if (U_FAILURE(status)) { | |||
703 | return; | |||
704 | } | |||
705 | ||||
706 | UnicodeSet exemplars; | |||
707 | ulocdata_getExemplarSetulocdata_getExemplarSet_71(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); | |||
708 | if (U_SUCCESS(status)) { | |||
709 | initialLabels_->addAll(exemplars); | |||
710 | return; | |||
711 | } | |||
712 | status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR | |||
713 | ||||
714 | // The locale data did not include explicit Index characters. | |||
715 | // Synthesize a set of them from the locale's standard exemplar characters. | |||
716 | ulocdata_getExemplarSetulocdata_getExemplarSet_71(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); | |||
717 | if (U_FAILURE(status)) { | |||
718 | return; | |||
719 | } | |||
720 | ||||
721 | // question: should we add auxiliary exemplars? | |||
722 | if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.isEmpty()) { | |||
723 | exemplars.add(0x61, 0x7A); | |||
724 | } | |||
725 | if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables | |||
726 | // cut down to small list | |||
727 | exemplars.remove(0xAC00, 0xD7A3). | |||
728 | add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). | |||
729 | add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). | |||
730 | add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). | |||
731 | add(0xD30C).add(0xD558); | |||
732 | } | |||
733 | if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block | |||
734 | // cut down to small list | |||
735 | // make use of the fact that Ethiopic is allocated in 8's, where | |||
736 | // the base is 0 mod 8. | |||
737 | UnicodeSet ethiopic(UnicodeString(u"[ሀለሐመሠረሰሸቀቈቐቘበቨተቸኀኈነኘአከኰኸዀወዐዘዠየደዸጀገጐጘጠጨጰጸፀፈፐፘ]"), status); | |||
738 | ethiopic.retainAll(exemplars); | |||
739 | exemplars.remove(u'ሀ', 0x137F).addAll(ethiopic); | |||
740 | } | |||
741 | ||||
742 | // Upper-case any that aren't already so. | |||
743 | // (We only do this for synthesized index characters.) | |||
744 | UnicodeSetIterator it(exemplars); | |||
745 | UnicodeString upperC; | |||
746 | while (it.next()) { | |||
747 | const UnicodeString &exemplarC = it.getString(); | |||
748 | upperC = exemplarC; | |||
749 | upperC.toUpper(locale); | |||
750 | initialLabels_->add(upperC); | |||
751 | } | |||
752 | } | |||
753 | ||||
754 | UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) { | |||
755 | UnicodeSet contractions; | |||
756 | collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode); | |||
757 | if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE0; } | |||
758 | initialLabels_->addAll(contractions); | |||
759 | UnicodeSetIterator iter(contractions); | |||
760 | while (iter.next()) { | |||
761 | const UnicodeString &s = iter.getString(); | |||
762 | U_ASSERT (s.startsWith(BASE, BASE_LENGTH))(void)0; | |||
763 | UChar c = s.charAt(s.length() - 1); | |||
764 | if (0x41 <= c && c <= 0x5A) { // A-Z | |||
765 | // There are Pinyin labels, add ASCII A-Z labels as well. | |||
766 | initialLabels_->add(0x41, 0x5A); // A-Z | |||
767 | break; | |||
768 | } | |||
769 | } | |||
770 | return TRUE1; | |||
771 | } | |||
772 | ||||
773 | ||||
774 | /* | |||
775 | * Return the string with interspersed CGJs. Input must have more than 2 codepoints. | |||
776 | */ | |||
777 | static const UChar CGJ = 0x034F; | |||
778 | UnicodeString AlphabeticIndex::separated(const UnicodeString &item) { | |||
779 | UnicodeString result; | |||
780 | if (item.length() == 0) { | |||
781 | return result; | |||
782 | } | |||
783 | int32_t i = 0; | |||
784 | for (;;) { | |||
785 | UChar32 cp = item.char32At(i); | |||
786 | result.append(cp); | |||
787 | i = item.moveIndex32(i, 1); | |||
788 | if (i >= item.length()) { | |||
789 | break; | |||
790 | } | |||
791 | result.append(CGJ); | |||
792 | } | |||
793 | return result; | |||
794 | } | |||
795 | ||||
796 | ||||
797 | bool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { | |||
798 | return false; | |||
799 | } | |||
800 | ||||
801 | ||||
802 | bool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { | |||
803 | return false; | |||
804 | } | |||
805 | ||||
806 | ||||
807 | const RuleBasedCollator &AlphabeticIndex::getCollator() const { | |||
808 | return *collator_; | |||
809 | } | |||
810 | ||||
811 | ||||
812 | const UnicodeString &AlphabeticIndex::getInflowLabel() const { | |||
813 | return inflowLabel_; | |||
814 | } | |||
815 | ||||
816 | const UnicodeString &AlphabeticIndex::getOverflowLabel() const { | |||
817 | return overflowLabel_; | |||
818 | } | |||
819 | ||||
820 | ||||
821 | const UnicodeString &AlphabeticIndex::getUnderflowLabel() const { | |||
822 | return underflowLabel_; | |||
823 | } | |||
824 | ||||
825 | ||||
826 | AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { | |||
827 | inflowLabel_ = label; | |||
828 | clearBuckets(); | |||
829 | return *this; | |||
830 | } | |||
831 | ||||
832 | ||||
833 | AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { | |||
834 | overflowLabel_ = label; | |||
835 | clearBuckets(); | |||
836 | return *this; | |||
837 | } | |||
838 | ||||
839 | ||||
840 | AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { | |||
841 | underflowLabel_ = label; | |||
842 | clearBuckets(); | |||
843 | return *this; | |||
844 | } | |||
845 | ||||
846 | ||||
847 | int32_t AlphabeticIndex::getMaxLabelCount() const { | |||
848 | return maxLabelCount_; | |||
849 | } | |||
850 | ||||
851 | ||||
852 | AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) { | |||
853 | if (U_FAILURE(status)) { | |||
854 | return *this; | |||
855 | } | |||
856 | if (maxLabelCount <= 0) { | |||
857 | status = U_ILLEGAL_ARGUMENT_ERROR; | |||
858 | return *this; | |||
859 | } | |||
860 | maxLabelCount_ = maxLabelCount; | |||
861 | clearBuckets(); | |||
862 | return *this; | |||
863 | } | |||
864 | ||||
865 | ||||
866 | // | |||
867 | // init() - Common code for constructors. | |||
868 | // | |||
869 | ||||
870 | void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) { | |||
871 | if (U_FAILURE(status)) { return; } | |||
872 | if (locale
| |||
873 | status = U_ILLEGAL_ARGUMENT_ERROR; | |||
874 | return; | |||
875 | } | |||
876 | ||||
877 | initialLabels_ = new UnicodeSet(); | |||
878 | if (initialLabels_ == NULL__null) { | |||
879 | status = U_MEMORY_ALLOCATION_ERROR; | |||
880 | return; | |||
881 | } | |||
882 | ||||
883 | inflowLabel_.setTo((UChar)0x2026); // Ellipsis | |||
884 | overflowLabel_ = inflowLabel_; | |||
885 | underflowLabel_ = inflowLabel_; | |||
886 | ||||
887 | if (collator_ == NULL__null) { | |||
888 | Collator *coll = Collator::createInstance(*locale, status); | |||
| ||||
889 | if (U_FAILURE(status)) { | |||
890 | delete coll; | |||
891 | return; | |||
892 | } | |||
893 | if (coll == NULL__null) { | |||
894 | status = U_MEMORY_ALLOCATION_ERROR; | |||
895 | return; | |||
896 | } | |||
897 | collator_ = dynamic_cast<RuleBasedCollator *>(coll); | |||
898 | if (collator_ == NULL__null) { | |||
899 | delete coll; | |||
900 | status = U_UNSUPPORTED_ERROR; | |||
901 | return; | |||
902 | } | |||
903 | } | |||
904 | collatorPrimaryOnly_ = collator_->clone(); | |||
905 | if (collatorPrimaryOnly_ == NULL__null) { | |||
906 | status = U_MEMORY_ALLOCATION_ERROR; | |||
907 | return; | |||
908 | } | |||
909 | collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status); | |||
910 | firstCharsInScripts_ = firstStringsInScript(status); | |||
911 | if (U_FAILURE(status)) { return; } | |||
912 | firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status); | |||
913 | // Guard against a degenerate collator where | |||
914 | // some script boundary strings are primary ignorable. | |||
915 | for (;;) { | |||
916 | if (U_FAILURE(status)) { return; } | |||
917 | if (firstCharsInScripts_->isEmpty()) { | |||
918 | // AlphabeticIndex requires some non-ignorable script boundary strings. | |||
919 | status = U_ILLEGAL_ARGUMENT_ERROR; | |||
920 | return; | |||
921 | } | |||
922 | if (collatorPrimaryOnly_->compare( | |||
923 | *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)), | |||
924 | emptyString_, status) == UCOL_EQUAL) { | |||
925 | firstCharsInScripts_->removeElementAt(0); | |||
926 | } else { | |||
927 | break; | |||
928 | } | |||
929 | } | |||
930 | ||||
931 | // Chinese index characters, which are specific to each of the several Chinese tailorings, | |||
932 | // take precedence over the single locale data exemplar set per language. | |||
933 | if (!addChineseIndexCharacters(status) && locale != NULL__null) { | |||
934 | addIndexExemplars(*locale, status); | |||
935 | } | |||
936 | } | |||
937 | ||||
938 | ||||
939 | // | |||
940 | // Comparison function for UVector<UnicodeString *> sorting with a collator. | |||
941 | // | |||
942 | static int32_t U_CALLCONV | |||
943 | collatorComparator(const void *context, const void *left, const void *right) { | |||
944 | const UElement *leftElement = static_cast<const UElement *>(left); | |||
945 | const UElement *rightElement = static_cast<const UElement *>(right); | |||
946 | const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer); | |||
947 | const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer); | |||
948 | ||||
949 | if (leftString == rightString) { | |||
950 | // Catches case where both are NULL | |||
951 | return 0; | |||
952 | } | |||
953 | if (leftString == NULL__null) { | |||
954 | return 1; | |||
955 | } | |||
956 | if (rightString == NULL__null) { | |||
957 | return -1; | |||
958 | } | |||
959 | const Collator *col = static_cast<const Collator *>(context); | |||
960 | UErrorCode errorCode = U_ZERO_ERROR; | |||
961 | return col->compare(*leftString, *rightString, errorCode); | |||
962 | } | |||
963 | ||||
964 | // | |||
965 | // Comparison function for UVector<Record *> sorting with a collator. | |||
966 | // | |||
967 | static int32_t U_CALLCONV | |||
968 | recordCompareFn(const void *context, const void *left, const void *right) { | |||
969 | const UElement *leftElement = static_cast<const UElement *>(left); | |||
970 | const UElement *rightElement = static_cast<const UElement *>(right); | |||
971 | const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer); | |||
972 | const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer); | |||
973 | const Collator *col = static_cast<const Collator *>(context); | |||
974 | UErrorCode errorCode = U_ZERO_ERROR; | |||
975 | return col->compare(leftRec->name_, rightRec->name_, errorCode); | |||
976 | } | |||
977 | ||||
978 | UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { | |||
979 | if (U_FAILURE(status)) { | |||
980 | return NULL__null; | |||
981 | } | |||
982 | LocalPointer<UVector> dest(new UVector(status), status); | |||
983 | if (U_FAILURE(status)) { | |||
984 | return NULL__null; | |||
985 | } | |||
986 | dest->setDeleter(uprv_deleteUObjectuprv_deleteUObject_71); | |||
987 | // Fetch the script-first-primary contractions which are defined in the root collator. | |||
988 | // They all start with U+FDD1. | |||
989 | UnicodeSet set; | |||
990 | collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status); | |||
991 | if (U_FAILURE(status)) { | |||
992 | return NULL__null; | |||
993 | } | |||
994 | if (set.isEmpty()) { | |||
995 | status = U_UNSUPPORTED_ERROR; | |||
996 | return NULL__null; | |||
997 | } | |||
998 | UnicodeSetIterator iter(set); | |||
999 | while (iter.next()) { | |||
1000 | const UnicodeString &boundary = iter.getString(); | |||
1001 | uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1))((uint32_t)1<<(u_charType_71(boundary.char32At(1)))); | |||
1002 | if ((gcMask & (U_GC_L_MASK(((uint32_t)1<<(U_UPPERCASE_LETTER))|((uint32_t)1<< (U_LOWERCASE_LETTER))|((uint32_t)1<<(U_TITLECASE_LETTER ))|((uint32_t)1<<(U_MODIFIER_LETTER))|((uint32_t)1<< (U_OTHER_LETTER))) | U_GC_CN_MASK((uint32_t)1<<(U_GENERAL_OTHER_TYPES)))) == 0) { | |||
1003 | // Ignore boundaries for the special reordering groups. | |||
1004 | // Take only those for "real scripts" (where the sample character is a Letter, | |||
1005 | // and the one for unassigned implicit weights (Cn). | |||
1006 | continue; | |||
1007 | } | |||
1008 | LocalPointer<UnicodeString> s(new UnicodeString(boundary), status); | |||
1009 | dest->adoptElement(s.orphan(), status); | |||
1010 | if (U_FAILURE(status)) { | |||
1011 | return nullptr; | |||
1012 | } | |||
1013 | } | |||
1014 | return dest.orphan(); | |||
1015 | } | |||
1016 | ||||
1017 | ||||
1018 | namespace { | |||
1019 | ||||
1020 | /** | |||
1021 | * Returns true if one index character string is "better" than the other. | |||
1022 | * Shorter NFKD is better, and otherwise NFKD-binary-less-than is | |||
1023 | * better, and otherwise binary-less-than is better. | |||
1024 | */ | |||
1025 | UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, | |||
1026 | const UnicodeString &one, const UnicodeString &other) { | |||
1027 | // This is called with primary-equal strings, but never with one.equals(other). | |||
1028 | UErrorCode status = U_ZERO_ERROR; | |||
1029 | UnicodeString n1 = nfkdNormalizer.normalize(one, status); | |||
1030 | UnicodeString n2 = nfkdNormalizer.normalize(other, status); | |||
1031 | if (U_FAILURE(status)) { return FALSE0; } | |||
1032 | int32_t result = n1.countChar32() - n2.countChar32(); | |||
1033 | if (result != 0) { | |||
1034 | return result < 0; | |||
1035 | } | |||
1036 | result = n1.compareCodePointOrder(n2); | |||
1037 | if (result != 0) { | |||
1038 | return result < 0; | |||
1039 | } | |||
1040 | return one.compareCodePointOrder(other) < 0; | |||
1041 | } | |||
1042 | ||||
1043 | } // namespace | |||
1044 | ||||
1045 | // | |||
1046 | // Constructor & Destructor for AlphabeticIndex::Record | |||
1047 | // | |||
1048 | // Records are internal only, instances are not directly surfaced in the public API. | |||
1049 | // This class is mostly struct-like, with all public fields. | |||
1050 | ||||
1051 | AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data) | |||
1052 | : name_(name), data_(data) {} | |||
1053 | ||||
1054 | AlphabeticIndex::Record::~Record() { | |||
1055 | } | |||
1056 | ||||
1057 | ||||
1058 | AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) { | |||
1059 | if (U_FAILURE(status)) { | |||
1060 | return *this; | |||
1061 | } | |||
1062 | if (inputList_ == NULL__null) { | |||
1063 | LocalPointer<UVector> inputList(new UVector(status), status); | |||
1064 | if (U_FAILURE(status)) { | |||
1065 | return *this; | |||
1066 | } | |||
1067 | inputList_ = inputList.orphan(); | |||
1068 | inputList_->setDeleter(alphaIndex_deleteRecord); | |||
1069 | } | |||
1070 | LocalPointer<Record> r(new Record(name, data), status); | |||
1071 | inputList_->adoptElement(r.orphan(), status); | |||
1072 | if (U_FAILURE(status)) { | |||
1073 | return *this; | |||
1074 | } | |||
1075 | clearBuckets(); | |||
1076 | //std::string ss; | |||
1077 | //std::string ss2; | |||
1078 | //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << | |||
1079 | // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl; | |||
1080 | return *this; | |||
1081 | } | |||
1082 | ||||
1083 | ||||
1084 | AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) { | |||
1085 | if (U_SUCCESS(status) && inputList_ != NULL__null && !inputList_->isEmpty()) { | |||
1086 | inputList_->removeAllElements(); | |||
1087 | clearBuckets(); | |||
1088 | } | |||
1089 | return *this; | |||
1090 | } | |||
1091 | ||||
1092 | int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) { | |||
1093 | initBuckets(status); | |||
1094 | if (U_FAILURE(status)) { | |||
1095 | return 0; | |||
1096 | } | |||
1097 | return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status); | |||
1098 | } | |||
1099 | ||||
1100 | ||||
1101 | int32_t AlphabeticIndex::getBucketIndex() const { | |||
1102 | return labelsIterIndex_; | |||
1103 | } | |||
1104 | ||||
1105 | ||||
1106 | UBool AlphabeticIndex::nextBucket(UErrorCode &status) { | |||
1107 | if (U_FAILURE(status)) { | |||
1108 | return FALSE0; | |||
1109 | } | |||
1110 | if (buckets_ == NULL__null && currentBucket_ != NULL__null) { | |||
1111 | status = U_ENUM_OUT_OF_SYNC_ERROR; | |||
1112 | return FALSE0; | |||
1113 | } | |||
1114 | initBuckets(status); | |||
1115 | if (U_FAILURE(status)) { | |||
1116 | return FALSE0; | |||
1117 | } | |||
1118 | ++labelsIterIndex_; | |||
1119 | if (labelsIterIndex_ >= buckets_->getBucketCount()) { | |||
1120 | labelsIterIndex_ = buckets_->getBucketCount(); | |||
1121 | return FALSE0; | |||
1122 | } | |||
1123 | currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_); | |||
1124 | resetRecordIterator(); | |||
1125 | return TRUE1; | |||
1126 | } | |||
1127 | ||||
1128 | const UnicodeString &AlphabeticIndex::getBucketLabel() const { | |||
1129 | if (currentBucket_ != NULL__null) { | |||
1130 | return currentBucket_->label_; | |||
1131 | } else { | |||
1132 | return emptyString_; | |||
1133 | } | |||
1134 | } | |||
1135 | ||||
1136 | ||||
1137 | UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const { | |||
1138 | if (currentBucket_ != NULL__null) { | |||
1139 | return currentBucket_->labelType_; | |||
1140 | } else { | |||
1141 | return U_ALPHAINDEX_NORMAL; | |||
1142 | } | |||
1143 | } | |||
1144 | ||||
1145 | ||||
1146 | int32_t AlphabeticIndex::getBucketRecordCount() const { | |||
1147 | if (currentBucket_ != NULL__null && currentBucket_->records_ != NULL__null) { | |||
1148 | return currentBucket_->records_->size(); | |||
1149 | } else { | |||
1150 | return 0; | |||
1151 | } | |||
1152 | } | |||
1153 | ||||
1154 | ||||
1155 | AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) { | |||
1156 | if (U_FAILURE(status)) { | |||
1157 | return *this; | |||
1158 | } | |||
1159 | internalResetBucketIterator(); | |||
1160 | return *this; | |||
1161 | } | |||
1162 | ||||
1163 | ||||
1164 | UBool AlphabeticIndex::nextRecord(UErrorCode &status) { | |||
1165 | if (U_FAILURE(status)) { | |||
1166 | return FALSE0; | |||
1167 | } | |||
1168 | if (currentBucket_ == NULL__null) { | |||
1169 | // We are trying to iterate over the items in a bucket, but there is no | |||
1170 | // current bucket from the enumeration of buckets. | |||
1171 | status = U_INVALID_STATE_ERROR; | |||
1172 | return FALSE0; | |||
1173 | } | |||
1174 | if (buckets_ == NULL__null) { | |||
1175 | status = U_ENUM_OUT_OF_SYNC_ERROR; | |||
1176 | return FALSE0; | |||
1177 | } | |||
1178 | if (currentBucket_->records_ == NULL__null) { | |||
1179 | return FALSE0; | |||
1180 | } | |||
1181 | ++itemsIterIndex_; | |||
1182 | if (itemsIterIndex_ >= currentBucket_->records_->size()) { | |||
1183 | itemsIterIndex_ = currentBucket_->records_->size(); | |||
1184 | return FALSE0; | |||
1185 | } | |||
1186 | return TRUE1; | |||
1187 | } | |||
1188 | ||||
1189 | ||||
1190 | const UnicodeString &AlphabeticIndex::getRecordName() const { | |||
1191 | const UnicodeString *retStr = &emptyString_; | |||
1192 | if (currentBucket_ != NULL__null && currentBucket_->records_ != NULL__null && | |||
1193 | itemsIterIndex_ >= 0 && | |||
1194 | itemsIterIndex_ < currentBucket_->records_->size()) { | |||
1195 | Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); | |||
1196 | retStr = &item->name_; | |||
1197 | } | |||
1198 | return *retStr; | |||
1199 | } | |||
1200 | ||||
1201 | const void *AlphabeticIndex::getRecordData() const { | |||
1202 | const void *retPtr = NULL__null; | |||
1203 | if (currentBucket_ != NULL__null && currentBucket_->records_ != NULL__null && | |||
1204 | itemsIterIndex_ >= 0 && | |||
1205 | itemsIterIndex_ < currentBucket_->records_->size()) { | |||
1206 | Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); | |||
1207 | retPtr = item->data_; | |||
1208 | } | |||
1209 | return retPtr; | |||
1210 | } | |||
1211 | ||||
1212 | ||||
1213 | AlphabeticIndex & AlphabeticIndex::resetRecordIterator() { | |||
1214 | itemsIterIndex_ = -1; | |||
1215 | return *this; | |||
1216 | } | |||
1217 | ||||
1218 | ||||
1219 | ||||
1220 | AlphabeticIndex::Bucket::Bucket(const UnicodeString &label, | |||
1221 | const UnicodeString &lowerBoundary, | |||
1222 | UAlphabeticIndexLabelType type) | |||
1223 | : label_(label), lowerBoundary_(lowerBoundary), labelType_(type), | |||
1224 | displayBucket_(NULL__null), displayIndex_(-1), | |||
1225 | records_(NULL__null) { | |||
1226 | } | |||
1227 | ||||
1228 | ||||
1229 | AlphabeticIndex::Bucket::~Bucket() { | |||
1230 | delete records_; | |||
1231 | } | |||
1232 | ||||
1233 | U_NAMESPACE_END} | |||
1234 | ||||
1235 | #endif // !UCONFIG_NO_COLLATION |