File: | out/../deps/icu-small/source/i18n/collationweights.cpp |
Warning: | line 85, column 41 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2016 and later: Unicode, Inc. and others. | |||
2 | // License & terms of use: http://www.unicode.org/copyright.html | |||
3 | /* | |||
4 | ******************************************************************************* | |||
5 | * | |||
6 | * Copyright (C) 1999-2015, International Business Machines | |||
7 | * Corporation and others. All Rights Reserved. | |||
8 | * | |||
9 | ******************************************************************************* | |||
10 | * file name: collationweights.cpp | |||
11 | * encoding: UTF-8 | |||
12 | * tab size: 8 (not used) | |||
13 | * indentation:4 | |||
14 | * | |||
15 | * created on: 2001mar08 as ucol_wgt.cpp | |||
16 | * created by: Markus W. Scherer | |||
17 | * | |||
18 | * This file contains code for allocating n collation element weights | |||
19 | * between two exclusive limits. | |||
20 | * It is used only internally by the collation tailoring builder. | |||
21 | */ | |||
22 | ||||
23 | #include "unicode/utypes.h" | |||
24 | ||||
25 | #if !UCONFIG_NO_COLLATION0 | |||
26 | ||||
27 | #include "cmemory.h" | |||
28 | #include "collation.h" | |||
29 | #include "collationweights.h" | |||
30 | #include "uarrsort.h" | |||
31 | #include "uassert.h" | |||
32 | ||||
33 | #ifdef UCOL_DEBUG | |||
34 | # include <stdio.h> | |||
35 | #endif | |||
36 | ||||
37 | U_NAMESPACE_BEGINnamespace icu_71 { | |||
38 | ||||
39 | /* collation element weight allocation -------------------------------------- */ | |||
40 | ||||
41 | /* helper functions for CE weights */ | |||
42 | ||||
43 | static inline uint32_t | |||
44 | getWeightTrail(uint32_t weight, int32_t length) { | |||
45 | return (uint32_t)(weight>>(8*(4-length)))&0xff; | |||
46 | } | |||
47 | ||||
48 | static inline uint32_t | |||
49 | setWeightTrail(uint32_t weight, int32_t length, uint32_t trail) { | |||
50 | length=8*(4-length); | |||
51 | return (uint32_t)((weight&(0xffffff00<<length))|(trail<<length)); | |||
52 | } | |||
53 | ||||
54 | static inline uint32_t | |||
55 | getWeightByte(uint32_t weight, int32_t idx) { | |||
56 | return getWeightTrail(weight, idx); /* same calculation */ | |||
57 | } | |||
58 | ||||
59 | static inline uint32_t | |||
60 | setWeightByte(uint32_t weight, int32_t idx, uint32_t byte) { | |||
61 | uint32_t mask; /* 0xffffffff except a 00 "hole" for the index-th byte */ | |||
62 | ||||
63 | idx*=8; | |||
64 | if(idx<32) { | |||
65 | mask=((uint32_t)0xffffffff)>>idx; | |||
66 | } else { | |||
67 | // Do not use uint32_t>>32 because on some platforms that does not shift at all | |||
68 | // while we need it to become 0. | |||
69 | // PowerPC: 0xffffffff>>32 = 0 (wanted) | |||
70 | // x86: 0xffffffff>>32 = 0xffffffff (not wanted) | |||
71 | // | |||
72 | // ANSI C99 6.5.7 Bitwise shift operators: | |||
73 | // "If the value of the right operand is negative | |||
74 | // or is greater than or equal to the width of the promoted left operand, | |||
75 | // the behavior is undefined." | |||
76 | mask=0; | |||
77 | } | |||
78 | idx=32-idx; | |||
79 | mask|=0xffffff00<<idx; | |||
80 | return (uint32_t)((weight&mask)|(byte<<idx)); | |||
81 | } | |||
82 | ||||
83 | static inline uint32_t | |||
84 | truncateWeight(uint32_t weight, int32_t length) { | |||
85 | return (uint32_t)(weight&(0xffffffff<<(8*(4-length)))); | |||
| ||||
86 | } | |||
87 | ||||
88 | static inline uint32_t | |||
89 | incWeightTrail(uint32_t weight, int32_t length) { | |||
90 | return (uint32_t)(weight+(1UL<<(8*(4-length)))); | |||
91 | } | |||
92 | ||||
93 | static inline uint32_t | |||
94 | decWeightTrail(uint32_t weight, int32_t length) { | |||
95 | return (uint32_t)(weight-(1UL<<(8*(4-length)))); | |||
96 | } | |||
97 | ||||
98 | CollationWeights::CollationWeights() | |||
99 | : middleLength(0), rangeIndex(0), rangeCount(0) { | |||
100 | for(int32_t i = 0; i < 5; ++i) { | |||
101 | minBytes[i] = maxBytes[i] = 0; | |||
102 | } | |||
103 | } | |||
104 | ||||
105 | void | |||
106 | CollationWeights::initForPrimary(UBool compressible) { | |||
107 | middleLength=1; | |||
108 | minBytes[1] = Collation::MERGE_SEPARATOR_BYTE + 1; | |||
109 | maxBytes[1] = Collation::TRAIL_WEIGHT_BYTE; | |||
110 | if(compressible) { | |||
111 | minBytes[2] = Collation::PRIMARY_COMPRESSION_LOW_BYTE + 1; | |||
112 | maxBytes[2] = Collation::PRIMARY_COMPRESSION_HIGH_BYTE - 1; | |||
113 | } else { | |||
114 | minBytes[2] = 2; | |||
115 | maxBytes[2] = 0xff; | |||
116 | } | |||
117 | minBytes[3] = 2; | |||
118 | maxBytes[3] = 0xff; | |||
119 | minBytes[4] = 2; | |||
120 | maxBytes[4] = 0xff; | |||
121 | } | |||
122 | ||||
123 | void | |||
124 | CollationWeights::initForSecondary() { | |||
125 | // We use only the lower 16 bits for secondary weights. | |||
126 | middleLength=3; | |||
127 | minBytes[1] = 0; | |||
128 | maxBytes[1] = 0; | |||
129 | minBytes[2] = 0; | |||
130 | maxBytes[2] = 0; | |||
131 | minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; | |||
132 | maxBytes[3] = 0xff; | |||
133 | minBytes[4] = 2; | |||
134 | maxBytes[4] = 0xff; | |||
135 | } | |||
136 | ||||
137 | void | |||
138 | CollationWeights::initForTertiary() { | |||
139 | // We use only the lower 16 bits for tertiary weights. | |||
140 | middleLength=3; | |||
141 | minBytes[1] = 0; | |||
142 | maxBytes[1] = 0; | |||
143 | minBytes[2] = 0; | |||
144 | maxBytes[2] = 0; | |||
145 | // We use only 6 bits per byte. | |||
146 | // The other bits are used for case & quaternary weights. | |||
147 | minBytes[3] = Collation::LEVEL_SEPARATOR_BYTE + 1; | |||
148 | maxBytes[3] = 0x3f; | |||
149 | minBytes[4] = 2; | |||
150 | maxBytes[4] = 0x3f; | |||
151 | } | |||
152 | ||||
153 | uint32_t | |||
154 | CollationWeights::incWeight(uint32_t weight, int32_t length) const { | |||
155 | for(;;) { | |||
156 | uint32_t byte=getWeightByte(weight, length); | |||
157 | if(byte<maxBytes[length]) { | |||
158 | return setWeightByte(weight, length, byte+1); | |||
159 | } else { | |||
160 | // Roll over, set this byte to the minimum and increment the previous one. | |||
161 | weight=setWeightByte(weight, length, minBytes[length]); | |||
162 | --length; | |||
163 | U_ASSERT(length > 0)(void)0; | |||
164 | } | |||
165 | } | |||
166 | } | |||
167 | ||||
168 | uint32_t | |||
169 | CollationWeights::incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const { | |||
170 | for(;;) { | |||
171 | offset += getWeightByte(weight, length); | |||
172 | if((uint32_t)offset <= maxBytes[length]) { | |||
173 | return setWeightByte(weight, length, offset); | |||
174 | } else { | |||
175 | // Split the offset between this byte and the previous one. | |||
176 | offset -= minBytes[length]; | |||
177 | weight = setWeightByte(weight, length, minBytes[length] + offset % countBytes(length)); | |||
178 | offset /= countBytes(length); | |||
179 | --length; | |||
180 | U_ASSERT(length > 0)(void)0; | |||
181 | } | |||
182 | } | |||
183 | } | |||
184 | ||||
185 | void | |||
186 | CollationWeights::lengthenRange(WeightRange &range) const { | |||
187 | int32_t length=range.length+1; | |||
188 | range.start=setWeightTrail(range.start, length, minBytes[length]); | |||
189 | range.end=setWeightTrail(range.end, length, maxBytes[length]); | |||
190 | range.count*=countBytes(length); | |||
191 | range.length=length; | |||
192 | } | |||
193 | ||||
194 | /* for uprv_sortArray: sort ranges in weight order */ | |||
195 | static int32_t U_CALLCONV | |||
196 | compareRanges(const void * /*context*/, const void *left, const void *right) { | |||
197 | uint32_t l, r; | |||
198 | ||||
199 | l=((const CollationWeights::WeightRange *)left)->start; | |||
200 | r=((const CollationWeights::WeightRange *)right)->start; | |||
201 | if(l<r) { | |||
202 | return -1; | |||
203 | } else if(l>r) { | |||
204 | return 1; | |||
205 | } else { | |||
206 | return 0; | |||
207 | } | |||
208 | } | |||
209 | ||||
210 | UBool | |||
211 | CollationWeights::getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit) { | |||
212 | U_ASSERT(lowerLimit != 0)(void)0; | |||
213 | U_ASSERT(upperLimit != 0)(void)0; | |||
214 | ||||
215 | /* get the lengths of the limits */ | |||
216 | int32_t lowerLength=lengthOfWeight(lowerLimit); | |||
217 | int32_t upperLength=lengthOfWeight(upperLimit); | |||
218 | ||||
219 | #ifdef UCOL_DEBUG | |||
220 | printf("length of lower limit 0x%08lx is %ld\n", lowerLimit, lowerLength); | |||
221 | printf("length of upper limit 0x%08lx is %ld\n", upperLimit, upperLength); | |||
222 | #endif | |||
223 | U_ASSERT(lowerLength>=middleLength)(void)0; | |||
224 | // Permit upperLength<middleLength: The upper limit for secondaries is 0x10000. | |||
225 | ||||
226 | if(lowerLimit>=upperLimit) { | |||
227 | #ifdef UCOL_DEBUG | |||
228 | printf("error: no space between lower & upper limits\n"); | |||
229 | #endif | |||
230 | return FALSE0; | |||
231 | } | |||
232 | ||||
233 | /* check that neither is a prefix of the other */ | |||
234 | if(lowerLength
| |||
235 | if(lowerLimit==truncateWeight(upperLimit, lowerLength)) { | |||
236 | #ifdef UCOL_DEBUG | |||
237 | printf("error: lower limit 0x%08lx is a prefix of upper limit 0x%08lx\n", lowerLimit, upperLimit); | |||
238 | #endif | |||
239 | return FALSE0; | |||
240 | } | |||
241 | } | |||
242 | /* if the upper limit is a prefix of the lower limit then the earlier test lowerLimit>=upperLimit has caught it */ | |||
243 | ||||
244 | WeightRange lower[5], middle, upper[5]; /* [0] and [1] are not used - this simplifies indexing */ | |||
245 | uprv_memset(lower, 0, sizeof(lower)):: memset(lower, 0, sizeof(lower)); | |||
246 | uprv_memset(&middle, 0, sizeof(middle)):: memset(&middle, 0, sizeof(middle)); | |||
247 | uprv_memset(upper, 0, sizeof(upper)):: memset(upper, 0, sizeof(upper)); | |||
248 | ||||
249 | /* | |||
250 | * With the limit lengths of 1..4, there are up to 7 ranges for allocation: | |||
251 | * range minimum length | |||
252 | * lower[4] 4 | |||
253 | * lower[3] 3 | |||
254 | * lower[2] 2 | |||
255 | * middle 1 | |||
256 | * upper[2] 2 | |||
257 | * upper[3] 3 | |||
258 | * upper[4] 4 | |||
259 | * | |||
260 | * We are now going to calculate up to 7 ranges. | |||
261 | * Some of them will typically overlap, so we will then have to merge and eliminate ranges. | |||
262 | */ | |||
263 | uint32_t weight=lowerLimit; | |||
264 | for(int32_t length=lowerLength; length>middleLength; --length) { | |||
265 | uint32_t trail=getWeightTrail(weight, length); | |||
266 | if(trail<maxBytes[length]) { | |||
267 | lower[length].start=incWeightTrail(weight, length); | |||
268 | lower[length].end=setWeightTrail(weight, length, maxBytes[length]); | |||
269 | lower[length].length=length; | |||
270 | lower[length].count=maxBytes[length]-trail; | |||
271 | } | |||
272 | weight=truncateWeight(weight, length-1); | |||
273 | } | |||
274 | if(weight<0xff000000) { | |||
275 | middle.start=incWeightTrail(weight, middleLength); | |||
276 | } else { | |||
277 | // Prevent overflow for primary lead byte FF | |||
278 | // which would yield a middle range starting at 0. | |||
279 | middle.start=0xffffffff; // no middle range | |||
280 | } | |||
281 | ||||
282 | weight=upperLimit; | |||
283 | for(int32_t length=upperLength; length>middleLength; --length) { | |||
284 | uint32_t trail=getWeightTrail(weight, length); | |||
285 | if(trail>minBytes[length]) { | |||
286 | upper[length].start=setWeightTrail(weight, length, minBytes[length]); | |||
287 | upper[length].end=decWeightTrail(weight, length); | |||
288 | upper[length].length=length; | |||
289 | upper[length].count=trail-minBytes[length]; | |||
290 | } | |||
291 | weight=truncateWeight(weight, length-1); | |||
292 | } | |||
293 | middle.end=decWeightTrail(weight, middleLength); | |||
294 | ||||
295 | /* set the middle range */ | |||
296 | middle.length=middleLength; | |||
297 | if(middle.end>=middle.start) { | |||
298 | middle.count=(int32_t)((middle.end-middle.start)>>(8*(4-middleLength)))+1; | |||
299 | } else { | |||
300 | /* no middle range, eliminate overlaps */ | |||
301 | for(int32_t length=4; length>middleLength; --length) { | |||
302 | if(lower[length].count>0 && upper[length].count>0) { | |||
303 | // Note: The lowerEnd and upperStart weights are versions of | |||
304 | // lowerLimit and upperLimit (which are lowerLimit<upperLimit), | |||
305 | // truncated (still less-or-equal) | |||
306 | // and then with their last bytes changed to the | |||
307 | // maxByte (for lowerEnd) or minByte (for upperStart). | |||
308 | const uint32_t lowerEnd=lower[length].end; | |||
309 | const uint32_t upperStart=upper[length].start; | |||
310 | UBool merged=FALSE0; | |||
311 | ||||
312 | if(lowerEnd>upperStart) { | |||
313 | // These two lower and upper ranges collide. | |||
314 | // Since lowerLimit<upperLimit and lowerEnd and upperStart | |||
315 | // are versions with only their last bytes modified | |||
316 | // (and following ones removed/reset to 0), | |||
317 | // lowerEnd>upperStart is only possible | |||
318 | // if the leading bytes are equal | |||
319 | // and lastByte(lowerEnd)>lastByte(upperStart). | |||
320 | U_ASSERT(truncateWeight(lowerEnd, length-1)==(void)0 | |||
321 | truncateWeight(upperStart, length-1))(void)0; | |||
322 | // Intersect these two ranges. | |||
323 | lower[length].end=upper[length].end; | |||
324 | lower[length].count= | |||
325 | (int32_t)getWeightTrail(lower[length].end, length)- | |||
326 | (int32_t)getWeightTrail(lower[length].start, length)+1; | |||
327 | // count might be <=0 in which case there is no room, | |||
328 | // and the range-collecting code below will ignore this range. | |||
329 | merged=TRUE1; | |||
330 | } else if(lowerEnd==upperStart) { | |||
331 | // Not possible, unless minByte==maxByte which is not allowed. | |||
332 | U_ASSERT(minBytes[length]<maxBytes[length])(void)0; | |||
333 | } else /* lowerEnd<upperStart */ { | |||
334 | if(incWeight(lowerEnd, length)==upperStart) { | |||
335 | // Merge adjacent ranges. | |||
336 | lower[length].end=upper[length].end; | |||
337 | lower[length].count+=upper[length].count; // might be >countBytes | |||
338 | merged=TRUE1; | |||
339 | } | |||
340 | } | |||
341 | if(merged) { | |||
342 | // Remove all shorter ranges. | |||
343 | // There was no room available for them between the ranges we just merged. | |||
344 | upper[length].count=0; | |||
345 | while(--length>middleLength) { | |||
346 | lower[length].count=upper[length].count=0; | |||
347 | } | |||
348 | break; | |||
349 | } | |||
350 | } | |||
351 | } | |||
352 | } | |||
353 | ||||
354 | #ifdef UCOL_DEBUG | |||
355 | /* print ranges */ | |||
356 | for(int32_t length=4; length>=2; --length) { | |||
357 | if(lower[length].count>0) { | |||
358 | printf("lower[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, lower[length].start, lower[length].end, lower[length].count); | |||
359 | } | |||
360 | } | |||
361 | if(middle.count>0) { | |||
362 | printf("middle .start=0x%08lx .end=0x%08lx .count=%ld\n", middle.start, middle.end, middle.count); | |||
363 | } | |||
364 | for(int32_t length=2; length<=4; ++length) { | |||
365 | if(upper[length].count>0) { | |||
366 | printf("upper[%ld] .start=0x%08lx .end=0x%08lx .count=%ld\n", length, upper[length].start, upper[length].end, upper[length].count); | |||
367 | } | |||
368 | } | |||
369 | #endif | |||
370 | ||||
371 | /* copy the ranges, shortest first, into the result array */ | |||
372 | rangeCount=0; | |||
373 | if(middle.count>0) { | |||
374 | uprv_memcpy(ranges, &middle, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(ranges, & middle, sizeof(WeightRange)); } while (false); | |||
375 | rangeCount=1; | |||
376 | } | |||
377 | for(int32_t length=middleLength+1; length<=4; ++length) { | |||
378 | /* copy upper first so that later the middle range is more likely the first one to use */ | |||
379 | if(upper[length].count>0) { | |||
380 | uprv_memcpy(ranges+rangeCount, upper+length, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(ranges+rangeCount , upper+length, sizeof(WeightRange)); } while (false); | |||
381 | ++rangeCount; | |||
382 | } | |||
383 | if(lower[length].count>0) { | |||
384 | uprv_memcpy(ranges+rangeCount, lower+length, sizeof(WeightRange))do { clang diagnostic push
clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(ranges+rangeCount , lower+length, sizeof(WeightRange)); } while (false); | |||
385 | ++rangeCount; | |||
386 | } | |||
387 | } | |||
388 | return rangeCount>0; | |||
389 | } | |||
390 | ||||
391 | UBool | |||
392 | CollationWeights::allocWeightsInShortRanges(int32_t n, int32_t minLength) { | |||
393 | // See if the first few minLength and minLength+1 ranges have enough weights. | |||
394 | for(int32_t i = 0; i < rangeCount && ranges[i].length <= (minLength + 1); ++i) { | |||
395 | if(n <= ranges[i].count) { | |||
396 | // Use the first few minLength and minLength+1 ranges. | |||
397 | if(ranges[i].length > minLength) { | |||
398 | // Reduce the number of weights from the last minLength+1 range | |||
399 | // which might sort before some minLength ranges, | |||
400 | // so that we use all weights in the minLength ranges. | |||
401 | ranges[i].count = n; | |||
402 | } | |||
403 | rangeCount = i + 1; | |||
404 | #ifdef UCOL_DEBUG | |||
405 | printf("take first %ld ranges\n", rangeCount); | |||
406 | #endif | |||
407 | ||||
408 | if(rangeCount>1) { | |||
409 | /* sort the ranges by weight values */ | |||
410 | UErrorCode errorCode=U_ZERO_ERROR; | |||
411 | uprv_sortArrayuprv_sortArray_71(ranges, rangeCount, sizeof(WeightRange), | |||
412 | compareRanges, NULL__null, FALSE0, &errorCode); | |||
413 | /* ignore error code: we know that the internal sort function will not fail here */ | |||
414 | } | |||
415 | return TRUE1; | |||
416 | } | |||
417 | n -= ranges[i].count; // still >0 | |||
418 | } | |||
419 | return FALSE0; | |||
420 | } | |||
421 | ||||
422 | UBool | |||
423 | CollationWeights::allocWeightsInMinLengthRanges(int32_t n, int32_t minLength) { | |||
424 | // See if the minLength ranges have enough weights | |||
425 | // when we split one and lengthen the following ones. | |||
426 | int32_t count = 0; | |||
427 | int32_t minLengthRangeCount; | |||
428 | for(minLengthRangeCount = 0; | |||
429 | minLengthRangeCount < rangeCount && | |||
430 | ranges[minLengthRangeCount].length == minLength; | |||
431 | ++minLengthRangeCount) { | |||
432 | count += ranges[minLengthRangeCount].count; | |||
433 | } | |||
434 | ||||
435 | int32_t nextCountBytes = countBytes(minLength + 1); | |||
436 | if(n > count * nextCountBytes) { return FALSE0; } | |||
437 | ||||
438 | // Use the minLength ranges. Merge them, and then split again as necessary. | |||
439 | uint32_t start = ranges[0].start; | |||
440 | uint32_t end = ranges[0].end; | |||
441 | for(int32_t i = 1; i < minLengthRangeCount; ++i) { | |||
442 | if(ranges[i].start < start) { start = ranges[i].start; } | |||
443 | if(ranges[i].end > end) { end = ranges[i].end; } | |||
444 | } | |||
445 | ||||
446 | // Calculate how to split the range between minLength (count1) and minLength+1 (count2). | |||
447 | // Goal: | |||
448 | // count1 + count2 * nextCountBytes = n | |||
449 | // count1 + count2 = count | |||
450 | // These turn into | |||
451 | // (count - count2) + count2 * nextCountBytes = n | |||
452 | // and then into the following count1 & count2 computations. | |||
453 | int32_t count2 = (n - count) / (nextCountBytes - 1); // number of weights to be lengthened | |||
454 | int32_t count1 = count - count2; // number of minLength weights | |||
455 | if(count2 == 0 || (count1 + count2 * nextCountBytes) < n) { | |||
456 | // round up | |||
457 | ++count2; | |||
458 | --count1; | |||
459 | U_ASSERT((count1 + count2 * nextCountBytes) >= n)(void)0; | |||
460 | } | |||
461 | ||||
462 | ranges[0].start = start; | |||
463 | ||||
464 | if(count1 == 0) { | |||
465 | // Make one long range. | |||
466 | ranges[0].end = end; | |||
467 | ranges[0].count = count; | |||
468 | lengthenRange(ranges[0]); | |||
469 | rangeCount = 1; | |||
470 | } else { | |||
471 | // Split the range, lengthen the second part. | |||
472 | #ifdef UCOL_DEBUG | |||
473 | printf("split the range number %ld (out of %ld minLength ranges) by %ld:%ld\n", | |||
474 | splitRange, rangeCount, count1, count2); | |||
475 | #endif | |||
476 | ||||
477 | // Next start = start + count1. First end = 1 before that. | |||
478 | ranges[0].end = incWeightByOffset(start, minLength, count1 - 1); | |||
479 | ranges[0].count = count1; | |||
480 | ||||
481 | ranges[1].start = incWeight(ranges[0].end, minLength); | |||
482 | ranges[1].end = end; | |||
483 | ranges[1].length = minLength; // +1 when lengthened | |||
484 | ranges[1].count = count2; // *countBytes when lengthened | |||
485 | lengthenRange(ranges[1]); | |||
486 | rangeCount = 2; | |||
487 | } | |||
488 | return TRUE1; | |||
489 | } | |||
490 | ||||
491 | /* | |||
492 | * call getWeightRanges and then determine heuristically | |||
493 | * which ranges to use for a given number of weights between (excluding) | |||
494 | * two limits | |||
495 | */ | |||
496 | UBool | |||
497 | CollationWeights::allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n) { | |||
498 | #ifdef UCOL_DEBUG | |||
499 | puts(""); | |||
500 | #endif | |||
501 | ||||
502 | if(!getWeightRanges(lowerLimit, upperLimit)) { | |||
| ||||
503 | #ifdef UCOL_DEBUG | |||
504 | printf("error: unable to get Weight ranges\n"); | |||
505 | #endif | |||
506 | return FALSE0; | |||
507 | } | |||
508 | ||||
509 | /* try until we find suitably large ranges */ | |||
510 | for(;;) { | |||
511 | /* get the smallest number of bytes in a range */ | |||
512 | int32_t minLength=ranges[0].length; | |||
513 | ||||
514 | if(allocWeightsInShortRanges(n, minLength)) { break; } | |||
515 | ||||
516 | if(minLength == 4) { | |||
517 | #ifdef UCOL_DEBUG | |||
518 | printf("error: the maximum number of %ld weights is insufficient for n=%ld\n", | |||
519 | minLengthCount, n); | |||
520 | #endif | |||
521 | return FALSE0; | |||
522 | } | |||
523 | ||||
524 | if(allocWeightsInMinLengthRanges(n, minLength)) { break; } | |||
525 | ||||
526 | /* no good match, lengthen all minLength ranges and iterate */ | |||
527 | #ifdef UCOL_DEBUG | |||
528 | printf("lengthen the short ranges from %ld bytes to %ld and iterate\n", minLength, minLength+1); | |||
529 | #endif | |||
530 | for(int32_t i=0; i<rangeCount && ranges[i].length==minLength; ++i) { | |||
531 | lengthenRange(ranges[i]); | |||
532 | } | |||
533 | } | |||
534 | ||||
535 | #ifdef UCOL_DEBUG | |||
536 | puts("final ranges:"); | |||
537 | for(int32_t i=0; i<rangeCount; ++i) { | |||
538 | printf("ranges[%ld] .start=0x%08lx .end=0x%08lx .length=%ld .count=%ld\n", | |||
539 | i, ranges[i].start, ranges[i].end, ranges[i].length, ranges[i].count); | |||
540 | } | |||
541 | #endif | |||
542 | ||||
543 | rangeIndex = 0; | |||
544 | return TRUE1; | |||
545 | } | |||
546 | ||||
547 | uint32_t | |||
548 | CollationWeights::nextWeight() { | |||
549 | if(rangeIndex >= rangeCount) { | |||
550 | return 0xffffffff; | |||
551 | } else { | |||
552 | /* get the next weight */ | |||
553 | WeightRange &range = ranges[rangeIndex]; | |||
554 | uint32_t weight = range.start; | |||
555 | if(--range.count == 0) { | |||
556 | /* this range is finished */ | |||
557 | ++rangeIndex; | |||
558 | } else { | |||
559 | /* increment the weight for the next value */ | |||
560 | range.start = incWeight(weight, range.length); | |||
561 | U_ASSERT(range.start <= range.end)(void)0; | |||
562 | } | |||
563 | ||||
564 | return weight; | |||
565 | } | |||
566 | } | |||
567 | ||||
568 | U_NAMESPACE_END} | |||
569 | ||||
570 | #endif /* #if !UCONFIG_NO_COLLATION */ |