File: | out/../deps/icu-small/source/i18n/number_longnames.cpp |
Warning: | line 852, column 17 Value stored to 'endSlice' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // © 2017 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include "unicode/utypes.h" |
5 | |
6 | #if !UCONFIG_NO_FORMATTING0 |
7 | |
8 | #include <cstdlib> |
9 | |
10 | #include "unicode/simpleformatter.h" |
11 | #include "unicode/ures.h" |
12 | #include "ureslocs.h" |
13 | #include "charstr.h" |
14 | #include "uresimp.h" |
15 | #include "measunit_impl.h" |
16 | #include "number_longnames.h" |
17 | #include "number_microprops.h" |
18 | #include <algorithm> |
19 | #include "cstring.h" |
20 | #include "util.h" |
21 | |
22 | using namespace icu; |
23 | using namespace icu::number; |
24 | using namespace icu::number::impl; |
25 | |
26 | namespace { |
27 | |
28 | /** |
29 | * Display Name (this format has no placeholder). |
30 | * |
31 | * Used as an index into the LongNameHandler::simpleFormats array. Units |
32 | * resources cover the normal set of PluralRules keys, as well as `dnam` and |
33 | * `per` forms. |
34 | */ |
35 | constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; |
36 | /** |
37 | * "per" form (e.g. "{0} per day" is day's "per" form). |
38 | * |
39 | * Used as an index into the LongNameHandler::simpleFormats array. Units |
40 | * resources cover the normal set of PluralRules keys, as well as `dnam` and |
41 | * `per` forms. |
42 | */ |
43 | constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; |
44 | /** |
45 | * Gender of the word, in languages with grammatical gender. |
46 | */ |
47 | constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; |
48 | // Number of keys in the array populated by PluralTableSink. |
49 | constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; |
50 | |
51 | // TODO(icu-units#28): load this list from resources, after creating a "&set" |
52 | // function for use in ldml2icu rules. |
53 | const int32_t GENDER_COUNT = 7; |
54 | const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", |
55 | "masculine", "neuter", "personal"}; |
56 | |
57 | // Converts a UnicodeString to a const char*, either pointing to a string in |
58 | // gGenders, or pointing to an empty string if an appropriate string was not |
59 | // found. |
60 | const char *getGenderString(UnicodeString uGender, UErrorCode status) { |
61 | if (uGender.length() == 0) { |
62 | return ""; |
63 | } |
64 | CharString gender; |
65 | gender.appendInvariantChars(uGender, status); |
66 | if (U_FAILURE(status)) { |
67 | return ""; |
68 | } |
69 | int32_t first = 0; |
70 | int32_t last = GENDER_COUNT; |
71 | while (first < last) { |
72 | int32_t mid = (first + last) / 2; |
73 | int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]):: strcmp(gender.data(), gGenders[mid]); |
74 | if (cmp == 0) { |
75 | return gGenders[mid]; |
76 | } else if (cmp > 0) { |
77 | first = mid + 1; |
78 | } else if (cmp < 0) { |
79 | last = mid; |
80 | } |
81 | } |
82 | // We don't return an error in case our gGenders list is incomplete in |
83 | // production. |
84 | // |
85 | // TODO(icu-units#28): a unit test checking all locales' genders are covered |
86 | // by gGenders? Else load a complete list of genders found in |
87 | // grammaticalFeatures in an initOnce. |
88 | return ""; |
89 | } |
90 | |
91 | // Returns the array index that corresponds to the given pluralKeyword. |
92 | static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { |
93 | // pluralKeyword can also be "dnam", "per", or "gender" |
94 | switch (*pluralKeyword) { |
95 | case 'd': |
96 | if (uprv_strcmp(pluralKeyword + 1, "nam"):: strcmp(pluralKeyword + 1, "nam") == 0) { |
97 | return DNAM_INDEX; |
98 | } |
99 | break; |
100 | case 'g': |
101 | if (uprv_strcmp(pluralKeyword + 1, "ender"):: strcmp(pluralKeyword + 1, "ender") == 0) { |
102 | return GENDER_INDEX; |
103 | } |
104 | break; |
105 | case 'p': |
106 | if (uprv_strcmp(pluralKeyword + 1, "er"):: strcmp(pluralKeyword + 1, "er") == 0) { |
107 | return PER_INDEX; |
108 | } |
109 | break; |
110 | default: |
111 | break; |
112 | } |
113 | StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); |
114 | return plural; |
115 | } |
116 | |
117 | // Selects a string out of the `strings` array which corresponds to the |
118 | // specified plural form, with fallback to the OTHER form. |
119 | // |
120 | // The `strings` array must have ARRAY_LENGTH items: one corresponding to each |
121 | // of the plural forms, plus a display name ("dnam") and a "per" form. |
122 | static UnicodeString getWithPlural( |
123 | const UnicodeString* strings, |
124 | StandardPlural::Form plural, |
125 | UErrorCode& status) { |
126 | UnicodeString result = strings[plural]; |
127 | if (result.isBogus()) { |
128 | result = strings[StandardPlural::Form::OTHER]; |
129 | } |
130 | if (result.isBogus()) { |
131 | // There should always be data in the "other" plural variant. |
132 | status = U_INTERNAL_PROGRAM_ERROR; |
133 | } |
134 | return result; |
135 | } |
136 | |
137 | enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END }; |
138 | |
139 | /** |
140 | * Returns three outputs extracted from pattern. |
141 | * |
142 | * @param coreUnit is extracted as per Extract(...) in the spec: |
143 | * https://unicode.org/reports/tr35/tr35-general.html#compound-units |
144 | * @param PlaceholderPosition indicates where in the string the placeholder was |
145 | * found. |
146 | * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar |
147 | * contains the space character (if any) that separated the placeholder from |
148 | * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one |
149 | * space character is considered. |
150 | */ |
151 | void extractCorePattern(const UnicodeString &pattern, |
152 | UnicodeString &coreUnit, |
153 | PlaceholderPosition &placeholderPosition, |
154 | UChar &joinerChar) { |
155 | joinerChar = 0; |
156 | int32_t len = pattern.length(); |
157 | if (pattern.startsWith(u"{0}", 3)) { |
158 | placeholderPosition = PH_BEGINNING; |
159 | if (u_isJavaSpaceCharu_isJavaSpaceChar_71(pattern[3])) { |
160 | joinerChar = pattern[3]; |
161 | coreUnit.setTo(pattern, 4, len - 4); |
162 | } else { |
163 | coreUnit.setTo(pattern, 3, len - 3); |
164 | } |
165 | } else if (pattern.endsWith(u"{0}", 3)) { |
166 | placeholderPosition = PH_END; |
167 | if (u_isJavaSpaceCharu_isJavaSpaceChar_71(pattern[len - 4])) { |
168 | coreUnit.setTo(pattern, 0, len - 4); |
169 | joinerChar = pattern[len - 4]; |
170 | } else { |
171 | coreUnit.setTo(pattern, 0, len - 3); |
172 | } |
173 | } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) { |
174 | placeholderPosition = PH_NONE; |
175 | coreUnit = pattern; |
176 | } else { |
177 | placeholderPosition = PH_MIDDLE; |
178 | coreUnit = pattern; |
179 | } |
180 | } |
181 | |
182 | ////////////////////////// |
183 | /// BEGIN DATA LOADING /// |
184 | ////////////////////////// |
185 | |
186 | // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty |
187 | // string both in case of unknown gender and in case of unknown unit. |
188 | UnicodeString |
189 | getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { |
190 | LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status)); |
191 | if (U_FAILURE(status)) { return {}; } |
192 | |
193 | // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... |
194 | // TODO(ICU-20400): Get duration-*-person data properly with aliases. |
195 | StringPiece subtypeForResource; |
196 | int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()):: strlen(builtinUnit.getSubtype())); |
197 | if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person"):: strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person" ) == 0) { |
198 | subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7}; |
199 | } else { |
200 | subtypeForResource = builtinUnit.getSubtype(); |
201 | } |
202 | |
203 | CharString key; |
204 | key.append("units/", status); |
205 | key.append(builtinUnit.getType(), status); |
206 | key.append("/", status); |
207 | key.append(subtypeForResource, status); |
208 | key.append("/gender", status); |
209 | |
210 | UErrorCode localStatus = status; |
211 | int32_t resultLen = 0; |
212 | const UChar *result = |
213 | ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus); |
214 | if (U_SUCCESS(localStatus)) { |
215 | status = localStatus; |
216 | return UnicodeString(true, result, resultLen); |
217 | } else { |
218 | // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to |
219 | // check whether the parent "$unitRes" exists? Then we could return |
220 | // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not |
221 | // being a builtin). |
222 | return {}; |
223 | } |
224 | } |
225 | |
226 | // Loads data from a resource tree with paths matching |
227 | // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases |
228 | // and genders. |
229 | // |
230 | // An InflectedPluralSink is configured to load data for a specific gender and |
231 | // case. It loads all plural forms, because selection between plural forms is |
232 | // dependent upon the value being formatted. |
233 | // |
234 | // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at |
235 | // units/compound/power2: German has case, French has differences for gender, |
236 | // but no case. |
237 | // |
238 | // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the |
239 | // tree structures are different. After homogenizing the structures, we may be |
240 | // able to unify the two classes. |
241 | // |
242 | // TODO: Spec violation: expects presence of "count" - does not fallback to an |
243 | // absent "count"! If this fallback were added, getCompoundValue could be |
244 | // superseded? |
245 | class InflectedPluralSink : public ResourceSink { |
246 | public: |
247 | // Accepts `char*` rather than StringPiece because |
248 | // ResourceTable::findValue(...) requires a null-terminated `char*`. |
249 | // |
250 | // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds |
251 | // checking is performed. |
252 | explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray) |
253 | : gender(gender), caseVariant(caseVariant), outArray(outArray) { |
254 | // Initialize the array to bogus strings. |
255 | for (int32_t i = 0; i < ARRAY_LENGTH; i++) { |
256 | outArray[i].setToBogus(); |
257 | } |
258 | } |
259 | |
260 | // See ResourceSink::put(). |
261 | void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDEoverride { |
262 | int32_t pluralIndex = getIndex(key, status); |
263 | if (U_FAILURE(status)) { return; } |
264 | if (!outArray[pluralIndex].isBogus()) { |
265 | // We already have a pattern |
266 | return; |
267 | } |
268 | ResourceTable genderTable = value.getTable(status); |
269 | ResourceTable caseTable; // This instance has to outlive `value` |
270 | if (loadForPluralForm(genderTable, caseTable, value, status)) { |
271 | outArray[pluralIndex] = value.getUnicodeString(status); |
272 | } |
273 | } |
274 | |
275 | private: |
276 | // Tries to load data for the configured gender from `genderTable`. Returns |
277 | // true if found, returning the data in `value`. The returned data will be |
278 | // for the configured gender if found, falling back to "neuter" and |
279 | // no-gender if not. The caseTable parameter holds the intermediate |
280 | // ResourceTable for the sake of lifetime management. |
281 | bool loadForPluralForm(const ResourceTable &genderTable, |
282 | ResourceTable &caseTable, |
283 | ResourceValue &value, |
284 | UErrorCode &status) { |
285 | if (uprv_strcmp(gender, ""):: strcmp(gender, "") != 0) { |
286 | if (loadForGender(genderTable, gender, caseTable, value, status)) { |
287 | return true; |
288 | } |
289 | if (uprv_strcmp(gender, "neuter"):: strcmp(gender, "neuter") != 0 && |
290 | loadForGender(genderTable, "neuter", caseTable, value, status)) { |
291 | return true; |
292 | } |
293 | } |
294 | if (loadForGender(genderTable, "_", caseTable, value, status)) { |
295 | return true; |
296 | } |
297 | return false; |
298 | } |
299 | |
300 | // Tries to load data for the given gender from `genderTable`. Returns true |
301 | // if found, returning the data in `value`. The returned data will be for |
302 | // the configured case if found, falling back to "nominative" and no-case if |
303 | // not. |
304 | bool loadForGender(const ResourceTable &genderTable, |
305 | const char *genderVal, |
306 | ResourceTable &caseTable, |
307 | ResourceValue &value, |
308 | UErrorCode &status) { |
309 | if (!genderTable.findValue(genderVal, value)) { |
310 | return false; |
311 | } |
312 | caseTable = value.getTable(status); |
313 | if (uprv_strcmp(caseVariant, ""):: strcmp(caseVariant, "") != 0) { |
314 | if (loadForCase(caseTable, caseVariant, value)) { |
315 | return true; |
316 | } |
317 | if (uprv_strcmp(caseVariant, "nominative"):: strcmp(caseVariant, "nominative") != 0 && |
318 | loadForCase(caseTable, "nominative", value)) { |
319 | return true; |
320 | } |
321 | } |
322 | if (loadForCase(caseTable, "_", value)) { |
323 | return true; |
324 | } |
325 | return false; |
326 | } |
327 | |
328 | // Tries to load data for the given case from `caseTable`. Returns true if |
329 | // found, returning the data in `value`. |
330 | bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) { |
331 | if (!caseTable.findValue(caseValue, value)) { |
332 | return false; |
333 | } |
334 | return true; |
335 | } |
336 | |
337 | const char *gender; |
338 | const char *caseVariant; |
339 | UnicodeString *outArray; |
340 | }; |
341 | |
342 | // Fetches localised formatting patterns for the given subKey. See documentation |
343 | // for InflectedPluralSink for details. |
344 | // |
345 | // Data is loaded for the appropriate unit width, with missing data filled in |
346 | // from unitsShort. |
347 | void getInflectedMeasureData(StringPiece subKey, |
348 | const Locale &locale, |
349 | const UNumberUnitWidth &width, |
350 | const char *gender, |
351 | const char *caseVariant, |
352 | UnicodeString *outArray, |
353 | UErrorCode &status) { |
354 | InflectedPluralSink sink(gender, caseVariant, outArray); |
355 | LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status)); |
356 | if (U_FAILURE(status)) { return; } |
357 | |
358 | CharString key; |
359 | key.append("units", status); |
360 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
361 | key.append("Narrow", status); |
362 | } else if (width == UNUM_UNIT_WIDTH_SHORT) { |
363 | key.append("Short", status); |
364 | } |
365 | key.append("/", status); |
366 | key.append(subKey, status); |
367 | |
368 | UErrorCode localStatus = status; |
369 | ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), key.data(), sink, localStatus); |
370 | if (width == UNUM_UNIT_WIDTH_SHORT) { |
371 | status = localStatus; |
372 | return; |
373 | } |
374 | } |
375 | |
376 | class PluralTableSink : public ResourceSink { |
377 | public: |
378 | // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds |
379 | // checking is performed. |
380 | explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { |
381 | // Initialize the array to bogus strings. |
382 | for (int32_t i = 0; i < ARRAY_LENGTH; i++) { |
383 | outArray[i].setToBogus(); |
384 | } |
385 | } |
386 | |
387 | void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDEoverride { |
388 | if (uprv_strcmp(key, "case"):: strcmp(key, "case") == 0) { |
389 | return; |
390 | } |
391 | int32_t index = getIndex(key, status); |
392 | if (U_FAILURE(status)) { return; } |
393 | if (!outArray[index].isBogus()) { |
394 | return; |
395 | } |
396 | outArray[index] = value.getUnicodeString(status); |
397 | if (U_FAILURE(status)) { return; } |
398 | } |
399 | |
400 | private: |
401 | UnicodeString *outArray; |
402 | }; |
403 | |
404 | /** |
405 | * Populates outArray with `locale`-specific values for `unit` through use of |
406 | * PluralTableSink. Only the set of basic units are supported! |
407 | * |
408 | * Reading from resources *unitsNarrow* and *unitsShort* (for width |
409 | * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width |
410 | * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". |
411 | * |
412 | * @param unit must be a built-in unit, i.e. must have a type and subtype, |
413 | * listed in gTypes and gSubTypes in measunit.cpp. |
414 | * @param unitDisplayCase the empty string and "nominative" are treated the |
415 | * same. For other cases, strings for the requested case are used if found. |
416 | * (For any missing case-specific data, we fall back to nominative.) |
417 | * @param outArray must be of fixed length ARRAY_LENGTH. |
418 | */ |
419 | void getMeasureData(const Locale &locale, |
420 | const MeasureUnit &unit, |
421 | const UNumberUnitWidth &width, |
422 | const char *unitDisplayCase, |
423 | UnicodeString *outArray, |
424 | UErrorCode &status) { |
425 | PluralTableSink sink(outArray); |
426 | LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status)); |
427 | if (U_FAILURE(status)) { return; } |
428 | |
429 | CharString subKey; |
430 | subKey.append("/", status); |
431 | subKey.append(unit.getType(), status); |
432 | subKey.append("/", status); |
433 | |
434 | // Check if unitSubType is an alias or not. |
435 | LocalUResourceBundlePointer aliasBundle(ures_openures_open_71(U_ICUDATA_ALIAS"ICUDATA", "metadata", &status)); |
436 | |
437 | UErrorCode aliasStatus = status; |
438 | StackUResourceBundle aliasFillIn; |
439 | CharString aliasKey; |
440 | aliasKey.append("alias/unit/", aliasStatus); |
441 | aliasKey.append(unit.getSubtype(), aliasStatus); |
442 | aliasKey.append("/replacement", aliasStatus); |
443 | ures_getByKeyWithFallbackures_getByKeyWithFallback_71(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(), |
444 | &aliasStatus); |
445 | CharString unitSubType; |
446 | if (!U_FAILURE(aliasStatus)) { |
447 | // This means the subType is an alias. Then, replace unitSubType with the replacement. |
448 | auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status); |
449 | unitSubType.appendInvariantChars(replacement, status); |
450 | } else { |
451 | unitSubType.append(unit.getSubtype(), status); |
452 | } |
453 | |
454 | // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... |
455 | // TODO(ICU-20400): Get duration-*-person data properly with aliases. |
456 | int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()):: strlen(unitSubType.data())); |
457 | if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person"):: strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) { |
458 | subKey.append({unitSubType.data(), subtypeLen - 7}, status); |
459 | } else { |
460 | subKey.append({unitSubType.data(), subtypeLen}, status); |
461 | } |
462 | |
463 | if (width != UNUM_UNIT_WIDTH_FULL_NAME) { |
464 | UErrorCode localStatus = status; |
465 | CharString genderKey; |
466 | genderKey.append("units", localStatus); |
467 | genderKey.append(subKey, localStatus); |
468 | genderKey.append("/gender", localStatus); |
469 | StackUResourceBundle fillIn; |
470 | ures_getByKeyWithFallbackures_getByKeyWithFallback_71(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(), |
471 | &localStatus); |
472 | outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus); |
473 | } |
474 | |
475 | CharString key; |
476 | key.append("units", status); |
477 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
478 | key.append("Narrow", status); |
479 | } else if (width == UNUM_UNIT_WIDTH_SHORT) { |
480 | key.append("Short", status); |
481 | } |
482 | key.append(subKey, status); |
483 | |
484 | // Grab desired case first, if available. Then grab no-case data to fill in |
485 | // the gaps. |
486 | if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) { |
487 | CharString caseKey; |
488 | caseKey.append(key, status); |
489 | caseKey.append("/case/", status); |
490 | caseKey.append(unitDisplayCase, status); |
491 | |
492 | UErrorCode localStatus = U_ZERO_ERROR; |
493 | // TODO(icu-units#138): our fallback logic is not spec-compliant: |
494 | // lateral fallback should happen before locale fallback. Switch to |
495 | // getInflectedMeasureData after homogenizing data format? Find a unit |
496 | // test case that demonstrates the incorrect fallback logic (via |
497 | // regional variant of an inflected language?) |
498 | ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); |
499 | } |
500 | |
501 | // TODO(icu-units#138): our fallback logic is not spec-compliant: we |
502 | // check the given case, then go straight to the no-case data. The spec |
503 | // states we should first look for case="nominative". As part of #138, |
504 | // either get the spec changed, or add unit tests that warn us if |
505 | // case="nominative" data differs from no-case data? |
506 | UErrorCode localStatus = U_ZERO_ERROR; |
507 | ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), key.data(), sink, localStatus); |
508 | if (width == UNUM_UNIT_WIDTH_SHORT) { |
509 | if (U_FAILURE(localStatus)) { |
510 | status = localStatus; |
511 | } |
512 | return; |
513 | } |
514 | } |
515 | |
516 | // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. |
517 | void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, |
518 | UErrorCode &status) { |
519 | // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. |
520 | // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C? |
521 | PluralTableSink sink(outArray); |
522 | LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_CURR"icudt" "71" "l" "-" "curr", locale.getName(), &status)); |
523 | if (U_FAILURE(status)) { return; } |
524 | ures_getAllChildrenWithFallbackures_getAllChildrenWithFallback_71(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); |
525 | if (U_FAILURE(status)) { return; } |
526 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
527 | UnicodeString &pattern = outArray[i]; |
528 | if (pattern.isBogus()) { |
529 | continue; |
530 | } |
531 | int32_t longNameLen = 0; |
532 | const char16_t *longName = ucurr_getPluralNameucurr_getPluralName_71( |
533 | currency.getISOCurrency(), |
534 | locale.getName(), |
535 | nullptr /* isChoiceFormat */, |
536 | StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)), |
537 | &longNameLen, |
538 | &status); |
539 | // Example pattern from data: "{0} {1}" |
540 | // Example output after find-and-replace: "{0} US dollars" |
541 | pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen)); |
542 | } |
543 | } |
544 | |
545 | UnicodeString getCompoundValue(StringPiece compoundKey, |
546 | const Locale &locale, |
547 | const UNumberUnitWidth &width, |
548 | UErrorCode &status) { |
549 | LocalUResourceBundlePointer unitsBundle(ures_openures_open_71(U_ICUDATA_UNIT"icudt" "71" "l" "-" "unit", locale.getName(), &status)); |
550 | if (U_FAILURE(status)) { return {}; } |
551 | CharString key; |
552 | key.append("units", status); |
553 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
554 | key.append("Narrow", status); |
555 | } else if (width == UNUM_UNIT_WIDTH_SHORT) { |
556 | key.append("Short", status); |
557 | } |
558 | key.append("/compound/", status); |
559 | key.append(compoundKey, status); |
560 | |
561 | UErrorCode localStatus = status; |
562 | int32_t len = 0; |
563 | const UChar *ptr = |
564 | ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &len, &localStatus); |
565 | if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) { |
566 | // Fall back to short, which contains more compound data |
567 | key.clear(); |
568 | key.append("unitsShort/compound/", status); |
569 | key.append(compoundKey, status); |
570 | ptr = ures_getStringByKeyWithFallbackures_getStringByKeyWithFallback_71(unitsBundle.getAlias(), key.data(), &len, &status); |
571 | } else { |
572 | status = localStatus; |
573 | } |
574 | if (U_FAILURE(status)) { |
575 | return {}; |
576 | } |
577 | return UnicodeString(ptr, len); |
578 | } |
579 | |
580 | /** |
581 | * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. |
582 | * |
583 | * Consider a deriveComponent rule that looks like this: |
584 | * |
585 | * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/> |
586 | * |
587 | * Instantiating an instance as follows: |
588 | * |
589 | * DerivedComponents d(loc, "case", "per"); |
590 | * |
591 | * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", |
592 | * and `d.value1("foo")` will be "nominative". |
593 | * |
594 | * The values returned by value0(...) and value1(...) are valid only while the |
595 | * instance exists. In case of any kind of failure, value0(...) and value1(...) |
596 | * will return "". |
597 | */ |
598 | class DerivedComponents { |
599 | public: |
600 | /** |
601 | * Constructor. |
602 | * |
603 | * The feature and structure parameters must be null-terminated. The string |
604 | * referenced by compoundValue must exist for longer than the |
605 | * DerivedComponents instance. |
606 | */ |
607 | DerivedComponents(const Locale &locale, const char *feature, const char *structure) { |
608 | StackUResourceBundle derivationsBundle, stackBundle; |
609 | ures_openDirectFillInures_openDirectFillIn_71(derivationsBundle.getAlias(), NULL__null, "grammaticalFeatures", &status); |
610 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), |
611 | &status); |
612 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), |
613 | &status); |
614 | if (U_FAILURE(status)) { |
615 | return; |
616 | } |
617 | UErrorCode localStatus = U_ZERO_ERROR; |
618 | // TODO(icu-units#28): use standard normal locale resolution algorithms |
619 | // rather than just grabbing language: |
620 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), |
621 | &localStatus); |
622 | // TODO(icu-units#28): |
623 | // - code currently assumes if the locale exists, the rules are there - |
624 | // instead of falling back to root when the requested rule is missing. |
625 | // - investigate ures.h functions, see if one that uses res_findResource() |
626 | // might be better (or use res_findResource directly), or maybe help |
627 | // improve ures documentation to guide function selection? |
628 | if (localStatus == U_MISSING_RESOURCE_ERROR) { |
629 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); |
630 | } else { |
631 | status = localStatus; |
632 | } |
633 | ures_getByKeyures_getByKey_71(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); |
634 | ures_getByKeyures_getByKey_71(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); |
635 | ures_getByKeyures_getByKey_71(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); |
636 | UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); |
637 | UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); |
638 | if (U_SUCCESS(status)) { |
639 | if (val0.compare(UnicodeString(u"compound")) == 0) { |
640 | compound0_ = true; |
641 | } else { |
642 | compound0_ = false; |
643 | value0_.appendInvariantChars(val0, status); |
644 | } |
645 | if (val1.compare(UnicodeString(u"compound")) == 0) { |
646 | compound1_ = true; |
647 | } else { |
648 | compound1_ = false; |
649 | value1_.appendInvariantChars(val1, status); |
650 | } |
651 | } |
652 | } |
653 | |
654 | // Returns a StringPiece that is only valid as long as the instance exists. |
655 | StringPiece value0(const StringPiece compoundValue) const { |
656 | return compound0_ ? compoundValue : value0_.toStringPiece(); |
657 | } |
658 | |
659 | // Returns a StringPiece that is only valid as long as the instance exists. |
660 | StringPiece value1(const StringPiece compoundValue) const { |
661 | return compound1_ ? compoundValue : value1_.toStringPiece(); |
662 | } |
663 | |
664 | // Returns a char* that is only valid as long as the instance exists. |
665 | const char *value0(const char *compoundValue) const { |
666 | return compound0_ ? compoundValue : value0_.data(); |
667 | } |
668 | |
669 | // Returns a char* that is only valid as long as the instance exists. |
670 | const char *value1(const char *compoundValue) const { |
671 | return compound1_ ? compoundValue : value1_.data(); |
672 | } |
673 | |
674 | private: |
675 | UErrorCode status = U_ZERO_ERROR; |
676 | |
677 | // Holds strings referred to by value0 and value1; |
678 | bool compound0_ = false, compound1_ = false; |
679 | CharString value0_, value1_; |
680 | }; |
681 | |
682 | // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding |
683 | // testsuite support for testing with synthetic data? |
684 | /** |
685 | * Loads and returns the value in rules that look like these: |
686 | * |
687 | * <deriveCompound feature="gender" structure="per" value="0"/> |
688 | * <deriveCompound feature="gender" structure="times" value="1"/> |
689 | * |
690 | * Currently a fake example, but spec compliant: |
691 | * <deriveCompound feature="gender" structure="power" value="feminine"/> |
692 | * |
693 | * NOTE: If U_FAILURE(status), returns an empty string. |
694 | */ |
695 | UnicodeString |
696 | getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { |
697 | StackUResourceBundle derivationsBundle, stackBundle; |
698 | ures_openDirectFillInures_openDirectFillIn_71(derivationsBundle.getAlias(), NULL__null, "grammaticalFeatures", &status); |
699 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), |
700 | &status); |
701 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); |
702 | // TODO: use standard normal locale resolution algorithms rather than just grabbing language: |
703 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); |
704 | // TODO: |
705 | // - code currently assumes if the locale exists, the rules are there - |
706 | // instead of falling back to root when the requested rule is missing. |
707 | // - investigate ures.h functions, see if one that uses res_findResource() |
708 | // might be better (or use res_findResource directly), or maybe help |
709 | // improve ures documentation to guide function selection? |
710 | if (status == U_MISSING_RESOURCE_ERROR) { |
711 | status = U_ZERO_ERROR; |
712 | ures_getByKeyures_getByKey_71(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); |
713 | } |
714 | ures_getByKeyures_getByKey_71(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); |
715 | ures_getByKeyures_getByKey_71(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); |
716 | UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); |
717 | if (U_FAILURE(status)) { |
718 | return {}; |
719 | } |
720 | U_ASSERT(!uVal.isBogus())(void)0; |
721 | return uVal; |
722 | } |
723 | |
724 | // Returns the gender string for structures following these rules: |
725 | // |
726 | // <deriveCompound feature="gender" structure="per" value="0"/> |
727 | // <deriveCompound feature="gender" structure="times" value="1"/> |
728 | // |
729 | // Fake example: |
730 | // <deriveCompound feature="gender" structure="power" value="feminine"/> |
731 | // |
732 | // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that |
733 | // correspond to value="0" and value="1". |
734 | // |
735 | // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. |
736 | // "prefix" doesn't). |
737 | UnicodeString getDerivedGender(Locale locale, |
738 | const char *structure, |
739 | UnicodeString *data0, |
740 | UnicodeString *data1, |
741 | UErrorCode &status) { |
742 | UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status); |
743 | if (val.length() == 1) { |
744 | switch (val[0]) { |
745 | case u'0': |
746 | return data0[GENDER_INDEX]; |
747 | case u'1': |
748 | if (data1 == nullptr) { |
749 | return {}; |
750 | } |
751 | return data1[GENDER_INDEX]; |
752 | } |
753 | } |
754 | return val; |
755 | } |
756 | |
757 | //////////////////////// |
758 | /// END DATA LOADING /// |
759 | //////////////////////// |
760 | |
761 | // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace |
762 | const UChar *trimSpaceChars(const UChar *s, int32_t &length) { |
763 | if (length <= 0 || (!u_isJavaSpaceCharu_isJavaSpaceChar_71(s[0]) && !u_isJavaSpaceCharu_isJavaSpaceChar_71(s[length - 1]))) { |
764 | return s; |
765 | } |
766 | int32_t start = 0; |
767 | int32_t limit = length; |
768 | while (start < limit && u_isJavaSpaceCharu_isJavaSpaceChar_71(s[start])) { |
769 | ++start; |
770 | } |
771 | if (start < limit) { |
772 | // There is non-white space at start; we will not move limit below that, |
773 | // so we need not test start<limit in the loop. |
774 | while (u_isJavaSpaceCharu_isJavaSpaceChar_71(s[limit - 1])) { |
775 | --limit; |
776 | } |
777 | } |
778 | length = limit - start; |
779 | return s + start; |
780 | } |
781 | |
782 | /** |
783 | * Calculates the gender of an arbitrary unit: this is the *second* |
784 | * implementation of an algorithm to do this: |
785 | * |
786 | * Gender is also calculated in "processPatternTimes": that code path is "bottom |
787 | * up", loading the gender for every component of a compound unit (at the same |
788 | * time as loading the Long Names formatting patterns), even if the gender is |
789 | * unneeded, then combining the single units' genders into the compound unit's |
790 | * gender, according to the rules. This algorithm does a lazier "top-down" |
791 | * evaluation, starting with the compound unit, calculating which single unit's |
792 | * gender is needed by breaking it down according to the rules, and then loading |
793 | * only the gender of the one single unit who's gender is needed. |
794 | * |
795 | * For future refactorings: |
796 | * 1. we could drop processPatternTimes' gender calculation and just call this |
797 | * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very |
798 | * same table as the formatting patterns, so loading it then may be |
799 | * efficient. For other unit widths however, it needs to be explicitly looked |
800 | * up anyway. |
801 | * 2. alternatively, if CLDR is providing all the genders we need such that we |
802 | * don't need to calculate them in ICU anymore, we could drop this function |
803 | * and keep only processPatternTimes' calculation. (And optimise it a bit?) |
804 | * |
805 | * @param locale The desired locale. |
806 | * @param unit The measure unit to calculate the gender for. |
807 | * @return The gender string for the unit, or an empty string if unknown or |
808 | * ungendered. |
809 | */ |
810 | UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) { |
811 | MeasureUnitImpl impl; |
812 | const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status); |
813 | int32_t singleUnitIndex = 0; |
814 | if (mui.complexity == UMEASURE_UNIT_COMPOUND) { |
815 | int32_t startSlice = 0; |
816 | // inclusive |
817 | int32_t endSlice = mui.singleUnits.length()-1; |
818 | U_ASSERT(endSlice > 0)(void)0; // Else it would not be COMPOUND |
819 | if (mui.singleUnits[endSlice]->dimensionality < 0) { |
820 | // We have a -per- construct |
821 | UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status); |
822 | if (perRule.length() != 1) { |
823 | // Fixed gender for -per- units |
824 | return perRule; |
825 | } |
826 | if (perRule[0] == u'1') { |
827 | // Find the start of the denominator. We already know there is one. |
828 | while (mui.singleUnits[startSlice]->dimensionality >= 0) { |
829 | startSlice++; |
830 | } |
831 | } else { |
832 | // Find the end of the numerator |
833 | while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) { |
834 | endSlice--; |
835 | } |
836 | if (endSlice < 0) { |
837 | // We have only a denominator, e.g. "per-second". |
838 | // TODO(icu-units#28): find out what gender to use in the |
839 | // absence of a first value - mentioned in CLDR-14253. |
840 | return {}; |
841 | } |
842 | } |
843 | } |
844 | if (endSlice > startSlice) { |
845 | // We have a -times- construct |
846 | UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status); |
847 | if (timesRule.length() != 1) { |
848 | // Fixed gender for -times- units |
849 | return timesRule; |
850 | } |
851 | if (timesRule[0] == u'0') { |
852 | endSlice = startSlice; |
Value stored to 'endSlice' is never read | |
853 | } else { |
854 | // We assume timesRule[0] == u'1' |
855 | startSlice = endSlice; |
856 | } |
857 | } |
858 | U_ASSERT(startSlice == endSlice)(void)0; |
859 | singleUnitIndex = startSlice; |
860 | } else if (mui.complexity == UMEASURE_UNIT_MIXED) { |
861 | status = U_INTERNAL_PROGRAM_ERROR; |
862 | return {}; |
863 | } else { |
864 | U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE)(void)0; |
865 | U_ASSERT(mui.singleUnits.length() == 1)(void)0; |
866 | } |
867 | |
868 | // Now we know which singleUnit's gender we want |
869 | const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex]; |
870 | // Check for any power-prefix gender override: |
871 | if (std::abs(singleUnit->dimensionality) != 1) { |
872 | UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status); |
873 | if (powerRule.length() != 1) { |
874 | // Fixed gender for -powN- units |
875 | return powerRule; |
876 | } |
877 | // powerRule[0] == u'0'; u'1' not currently in spec. |
878 | } |
879 | // Check for any SI and binary prefix gender override: |
880 | if (std::abs(singleUnit->dimensionality) != 1) { |
881 | UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status); |
882 | if (prefixRule.length() != 1) { |
883 | // Fixed gender for -powN- units |
884 | return prefixRule; |
885 | } |
886 | // prefixRule[0] == u'0'; u'1' not currently in spec. |
887 | } |
888 | // Now we've boiled it down to the gender of one simple unit identifier: |
889 | return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status), |
890 | status); |
891 | } |
892 | |
893 | void maybeCalculateGender(const Locale &locale, |
894 | const MeasureUnit &unitRef, |
895 | UnicodeString *outArray, |
896 | UErrorCode &status) { |
897 | if (outArray[GENDER_INDEX].isBogus()) { |
898 | UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status); |
899 | if (meterGender.isEmpty()) { |
900 | // No gender for meter: assume ungendered language |
901 | return; |
902 | } |
903 | // We have a gendered language, but are lacking gender for unitRef. |
904 | outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status); |
905 | } |
906 | } |
907 | |
908 | } // namespace |
909 | |
910 | void LongNameHandler::forMeasureUnit(const Locale &loc, |
911 | const MeasureUnit &unitRef, |
912 | const UNumberUnitWidth &width, |
913 | const char *unitDisplayCase, |
914 | const PluralRules *rules, |
915 | const MicroPropsGenerator *parent, |
916 | LongNameHandler *fillIn, |
917 | UErrorCode &status) { |
918 | // From https://unicode.org/reports/tr35/tr35-general.html#compound-units - |
919 | // Points 1 and 2 are mostly handled by MeasureUnit: |
920 | // |
921 | // 1. If the unitId is empty or invalid, fail |
922 | // 2. Put the unitId into normalized order |
923 | U_ASSERT(fillIn != nullptr)(void)0; |
924 | |
925 | if (uprv_strcmp(unitRef.getType(), ""):: strcmp(unitRef.getType(), "") != 0) { |
926 | // Handling built-in units: |
927 | // |
928 | // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant) |
929 | // - If result is not empty, return it |
930 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
931 | getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status); |
932 | maybeCalculateGender(loc, unitRef, simpleFormats, status); |
933 | if (U_FAILURE(status)) { |
934 | return; |
935 | } |
936 | fillIn->rules = rules; |
937 | fillIn->parent = parent; |
938 | fillIn->simpleFormatsToModifiers(simpleFormats, |
939 | {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); |
940 | if (!simpleFormats[GENDER_INDEX].isBogus()) { |
941 | fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); |
942 | } |
943 | return; |
944 | |
945 | // TODO(icu-units#145): figure out why this causes a failure in |
946 | // format/MeasureFormatTest/TestIndividualPluralFallback and other |
947 | // tests, when it should have been an alternative for the lines above: |
948 | |
949 | // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); |
950 | // fillIn->rules = rules; |
951 | // fillIn->parent = parent; |
952 | // return; |
953 | } else { |
954 | // Check if it is a MeasureUnit this constructor handles: this |
955 | // constructor does not handle mixed units |
956 | U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED)(void)0; |
957 | forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); |
958 | fillIn->rules = rules; |
959 | fillIn->parent = parent; |
960 | return; |
961 | } |
962 | } |
963 | |
964 | void LongNameHandler::forArbitraryUnit(const Locale &loc, |
965 | const MeasureUnit &unitRef, |
966 | const UNumberUnitWidth &width, |
967 | const char *unitDisplayCase, |
968 | LongNameHandler *fillIn, |
969 | UErrorCode &status) { |
970 | if (U_FAILURE(status)) { |
971 | return; |
972 | } |
973 | if (fillIn == nullptr) { |
974 | status = U_INTERNAL_PROGRAM_ERROR; |
975 | return; |
976 | } |
977 | |
978 | // Numbered list items are from the algorithms at |
979 | // https://unicode.org/reports/tr35/tr35-general.html#compound-units: |
980 | // |
981 | // 4. Divide the unitId into numerator (the part before the "-per-") and |
982 | // denominator (the part after the "-per-). If both are empty, fail |
983 | MeasureUnitImpl unit; |
984 | MeasureUnitImpl perUnit; |
985 | { |
986 | MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); |
987 | if (U_FAILURE(status)) { |
988 | return; |
989 | } |
990 | for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) { |
991 | SingleUnitImpl *subUnit = fullUnit.singleUnits[i]; |
992 | if (subUnit->dimensionality > 0) { |
993 | unit.appendSingleUnit(*subUnit, status); |
994 | } else { |
995 | subUnit->dimensionality *= -1; |
996 | perUnit.appendSingleUnit(*subUnit, status); |
997 | } |
998 | } |
999 | } |
1000 | |
1001 | // TODO(icu-units#28): check placeholder logic, see if it needs to be |
1002 | // present here instead of only in processPatternTimes: |
1003 | // |
1004 | // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty |
1005 | |
1006 | DerivedComponents derivedPerCases(loc, "case", "per"); |
1007 | |
1008 | // 6. numeratorUnitString |
1009 | UnicodeString numeratorUnitData[ARRAY_LENGTH]; |
1010 | processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase), |
1011 | numeratorUnitData, status); |
1012 | |
1013 | // 7. denominatorUnitString |
1014 | UnicodeString denominatorUnitData[ARRAY_LENGTH]; |
1015 | processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase), |
1016 | denominatorUnitData, status); |
1017 | |
1018 | // TODO(icu-units#139): |
1019 | // - implement DerivedComponents for "plural/times" and "plural/power": |
1020 | // French has different rules, we'll be producing the wrong results |
1021 | // currently. (Prove via tests!) |
1022 | // - implement DerivedComponents for "plural/per", "plural/prefix", |
1023 | // "case/times", "case/power", and "case/prefix" - although they're |
1024 | // currently hardcoded. Languages with different rules are surely on the |
1025 | // way. |
1026 | // |
1027 | // Currently we only use "case/per", "plural/times", "case/times", and |
1028 | // "case/power". |
1029 | // |
1030 | // This may have impact on multiSimpleFormatsToModifiers(...) below too? |
1031 | // These rules are currently (ICU 69) all the same and hard-coded below. |
1032 | UnicodeString perUnitPattern; |
1033 | if (!denominatorUnitData[PER_INDEX].isBogus()) { |
1034 | // If we have no denominator, we obtain the empty string: |
1035 | perUnitPattern = denominatorUnitData[PER_INDEX]; |
1036 | } else { |
1037 | // 8. Set perPattern to be getValue([per], locale, length) |
1038 | UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status); |
1039 | // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. |
1040 | SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status); |
1041 | if (U_FAILURE(status)) { |
1042 | return; |
1043 | } |
1044 | // Plural and placeholder handling for 7. denominatorUnitString: |
1045 | // TODO(icu-units#139): hardcoded: |
1046 | // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/> |
1047 | UnicodeString denominatorFormat = |
1048 | getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status); |
1049 | // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. |
1050 | SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status); |
1051 | if (U_FAILURE(status)) { |
1052 | return; |
1053 | } |
1054 | UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments(); |
1055 | int32_t trimmedLen = denominatorPattern.length(); |
1056 | const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen); |
1057 | UnicodeString denominatorString(false, trimmed, trimmedLen); |
1058 | // 9. If the denominatorString is empty, set result to |
1059 | // [numeratorString], otherwise set result to format(perPattern, |
1060 | // numeratorString, denominatorString) |
1061 | // |
1062 | // TODO(icu-units#28): Why does UnicodeString need to be explicit in the |
1063 | // following line? |
1064 | perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status); |
1065 | if (U_FAILURE(status)) { |
1066 | return; |
1067 | } |
1068 | } |
1069 | if (perUnitPattern.length() == 0) { |
1070 | fillIn->simpleFormatsToModifiers(numeratorUnitData, |
1071 | {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); |
1072 | } else { |
1073 | fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern, |
1074 | {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); |
1075 | } |
1076 | |
1077 | // Gender |
1078 | // |
1079 | // TODO(icu-units#28): find out what gender to use in the absence of a first |
1080 | // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253. |
1081 | // |
1082 | // gender/per deriveCompound rules don't say: |
1083 | // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) --> |
1084 | fillIn->gender = getGenderString( |
1085 | getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status); |
1086 | } |
1087 | |
1088 | void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, |
1089 | Locale loc, |
1090 | const UNumberUnitWidth &width, |
1091 | const char *caseVariant, |
1092 | UnicodeString *outArray, |
1093 | UErrorCode &status) { |
1094 | if (U_FAILURE(status)) { |
1095 | return; |
1096 | } |
1097 | if (productUnit.complexity == UMEASURE_UNIT_MIXED) { |
1098 | // These are handled by MixedUnitLongNameHandler |
1099 | status = U_UNSUPPORTED_ERROR; |
1100 | return; |
1101 | } |
1102 | |
1103 | #if U_DEBUG0 |
1104 | for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { |
1105 | U_ASSERT(outArray[pluralIndex].length() == 0)(void)0; |
1106 | U_ASSERT(!outArray[pluralIndex].isBogus())(void)0; |
1107 | } |
1108 | #endif |
1109 | |
1110 | if (productUnit.identifier.isEmpty()) { |
1111 | // TODO(icu-units#28): consider when serialize should be called. |
1112 | // identifier might also be empty for MeasureUnit(). |
1113 | productUnit.serialize(status); |
1114 | } |
1115 | if (U_FAILURE(status)) { |
1116 | return; |
1117 | } |
1118 | if (productUnit.identifier.length() == 0) { |
1119 | // MeasureUnit(): no units: return empty strings. |
1120 | return; |
1121 | } |
1122 | |
1123 | MeasureUnit builtinUnit; |
1124 | if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) { |
1125 | // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it |
1126 | // breaks them all down. Do we want to drop this? |
1127 | // - findBySubType isn't super efficient, if we skip it and go to basic |
1128 | // singles, we don't have to construct MeasureUnit's anymore. |
1129 | // - Check all the existing unit tests that fail without this: is it due |
1130 | // to incorrect fallback via getMeasureData? |
1131 | // - Do those unit tests cover this code path representatively? |
1132 | if (builtinUnit != MeasureUnit()) { |
1133 | getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status); |
1134 | maybeCalculateGender(loc, builtinUnit, outArray, status); |
1135 | } |
1136 | return; |
1137 | } |
1138 | |
1139 | // 2. Set timesPattern to be getValue(times, locale, length) |
1140 | UnicodeString timesPattern = getCompoundValue("times", loc, width, status); |
1141 | SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status); |
1142 | if (U_FAILURE(status)) { |
1143 | return; |
1144 | } |
1145 | |
1146 | PlaceholderPosition globalPlaceholder[ARRAY_LENGTH]; |
1147 | UChar globalJoinerChar = 0; |
1148 | // Numbered list items are from the algorithms at |
1149 | // https://unicode.org/reports/tr35/tr35-general.html#compound-units: |
1150 | // |
1151 | // pattern(...) point 5: |
1152 | // - Set both globalPlaceholder and globalPlaceholderPosition to be empty |
1153 | // |
1154 | // 3. Set result to be empty |
1155 | for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { |
1156 | // Initial state: empty string pattern, via all falling back to OTHER: |
1157 | if (pluralIndex == StandardPlural::Form::OTHER) { |
1158 | outArray[pluralIndex].remove(); |
1159 | } else { |
1160 | outArray[pluralIndex].setToBogus(); |
1161 | } |
1162 | globalPlaceholder[pluralIndex] = PH_EMPTY; |
1163 | } |
1164 | |
1165 | // Empty string represents "compound" (propagate the plural form). |
1166 | const char *pluralCategory = ""; |
1167 | DerivedComponents derivedTimesPlurals(loc, "plural", "times"); |
1168 | DerivedComponents derivedTimesCases(loc, "case", "times"); |
1169 | DerivedComponents derivedPowerCases(loc, "case", "power"); |
1170 | |
1171 | // 4. For each single_unit in product_unit |
1172 | for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); |
1173 | singleUnitIndex++) { |
1174 | SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex]; |
1175 | const char *singlePluralCategory; |
1176 | const char *singleCaseVariant; |
1177 | // TODO(icu-units#28): ensure we have unit tests that change/fail if we |
1178 | // assign incorrect case variants here: |
1179 | if (singleUnitIndex < productUnit.singleUnits.length() - 1) { |
1180 | // 4.1. If hasMultiple |
1181 | singlePluralCategory = derivedTimesPlurals.value0(pluralCategory); |
1182 | singleCaseVariant = derivedTimesCases.value0(caseVariant); |
1183 | pluralCategory = derivedTimesPlurals.value1(pluralCategory); |
1184 | caseVariant = derivedTimesCases.value1(caseVariant); |
1185 | } else { |
1186 | singlePluralCategory = derivedTimesPlurals.value1(pluralCategory); |
1187 | singleCaseVariant = derivedTimesCases.value1(caseVariant); |
1188 | } |
1189 | |
1190 | // 4.2. Get the gender of that single_unit |
1191 | MeasureUnit simpleUnit; |
1192 | if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) { |
1193 | // Ideally all simple units should be known, but they're not: |
1194 | // 100-kilometer is internally treated as a simple unit, but it is |
1195 | // not a built-in unit and does not have formatting data in CLDR 39. |
1196 | // |
1197 | // TODO(icu-units#28): test (desirable) invariants in unit tests. |
1198 | status = U_UNSUPPORTED_ERROR; |
1199 | return; |
1200 | } |
1201 | const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status); |
1202 | |
1203 | // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-' |
1204 | U_ASSERT(singleUnit->dimensionality > 0)(void)0; |
1205 | int32_t dimensionality = singleUnit->dimensionality; |
1206 | UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH]; |
1207 | if (dimensionality != 1) { |
1208 | // 4.3.1. set dimensionalityPrefixPattern to be |
1209 | // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender), |
1210 | // such as "{0} kwadratowym" |
1211 | CharString dimensionalityKey("compound/power", status); |
1212 | dimensionalityKey.appendNumber(dimensionality, status); |
1213 | getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender, |
1214 | singleCaseVariant, dimensionalityPrefixPatterns, status); |
1215 | if (U_FAILURE(status)) { |
1216 | // At the time of writing, only pow2 and pow3 are supported. |
1217 | // Attempting to format other powers results in a |
1218 | // U_RESOURCE_TYPE_MISMATCH. We convert the error if we |
1219 | // understand it: |
1220 | if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) { |
1221 | status = U_UNSUPPORTED_ERROR; |
1222 | } |
1223 | return; |
1224 | } |
1225 | |
1226 | // TODO(icu-units#139): |
1227 | // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory) |
1228 | |
1229 | // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant) |
1230 | singleCaseVariant = derivedPowerCases.value0(singleCaseVariant); |
1231 | // 4.3.4. remove the dimensionality_prefix from singleUnit |
1232 | singleUnit->dimensionality = 1; |
1233 | } |
1234 | |
1235 | // 4.4. if singleUnit starts with an si_prefix, such as 'centi' |
1236 | UMeasurePrefix prefix = singleUnit->unitPrefix; |
1237 | UnicodeString prefixPattern; |
1238 | if (prefix != UMEASURE_PREFIX_ONE) { |
1239 | // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale, |
1240 | // length), such as "centy{0}" |
1241 | CharString prefixKey; |
1242 | // prefixKey looks like "1024p3" or "10p-2": |
1243 | prefixKey.appendNumber(umeas_getPrefixBaseumeas_getPrefixBase_71(prefix), status); |
1244 | prefixKey.append('p', status); |
1245 | prefixKey.appendNumber(umeas_getPrefixPowerumeas_getPrefixPower_71(prefix), status); |
1246 | // Contains a pattern like "centy{0}". |
1247 | prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status); |
1248 | |
1249 | // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory) |
1250 | // |
1251 | // TODO(icu-units#139): that refers to these rules: |
1252 | // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/> |
1253 | // though I'm not sure what other value they might end up having. |
1254 | // |
1255 | // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant) |
1256 | // |
1257 | // TODO(icu-units#139): that refers to: |
1258 | // <deriveComponent feature="case" structure="prefix" value0="nominative" |
1259 | // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply |
1260 | // propagates. |
1261 | |
1262 | // 4.4.4. remove the si_prefix from singleUnit |
1263 | singleUnit->unitPrefix = UMEASURE_PREFIX_ONE; |
1264 | } |
1265 | |
1266 | // 4.5. Set corePattern to be the getValue(singleUnit, locale, length, |
1267 | // singlePluralCategory, singleCaseVariant), such as "{0} metrem" |
1268 | UnicodeString singleUnitArray[ARRAY_LENGTH]; |
1269 | // At this point we are left with a Simple Unit: |
1270 | U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==(void)0 |
1271 | 0)(void)0; |
1272 | getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray, |
1273 | status); |
1274 | if (U_FAILURE(status)) { |
1275 | // Shouldn't happen if we have data for all single units |
1276 | return; |
1277 | } |
1278 | |
1279 | // Calculate output gender |
1280 | if (!singleUnitArray[GENDER_INDEX].isBogus()) { |
1281 | U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty())(void)0; |
1282 | UnicodeString uVal; |
1283 | |
1284 | if (prefix != UMEASURE_PREFIX_ONE) { |
1285 | singleUnitArray[GENDER_INDEX] = |
1286 | getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status); |
1287 | } |
1288 | |
1289 | if (dimensionality != 1) { |
1290 | singleUnitArray[GENDER_INDEX] = |
1291 | getDerivedGender(loc, "power", singleUnitArray, nullptr, status); |
1292 | } |
1293 | |
1294 | UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status); |
1295 | if (timesGenderRule.length() == 1) { |
1296 | switch (timesGenderRule[0]) { |
1297 | case u'0': |
1298 | if (singleUnitIndex == 0) { |
1299 | U_ASSERT(outArray[GENDER_INDEX].isBogus())(void)0; |
1300 | outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; |
1301 | } |
1302 | break; |
1303 | case u'1': |
1304 | if (singleUnitIndex == productUnit.singleUnits.length() - 1) { |
1305 | U_ASSERT(outArray[GENDER_INDEX].isBogus())(void)0; |
1306 | outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; |
1307 | } |
1308 | } |
1309 | } else { |
1310 | if (outArray[GENDER_INDEX].isBogus()) { |
1311 | outArray[GENDER_INDEX] = timesGenderRule; |
1312 | } |
1313 | } |
1314 | } |
1315 | |
1316 | // Calculate resulting patterns for each plural form |
1317 | for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { |
1318 | StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex); |
1319 | |
1320 | // singleUnitArray[pluralIndex] looks something like "{0} Meter" |
1321 | if (outArray[pluralIndex].isBogus()) { |
1322 | if (singleUnitArray[pluralIndex].isBogus()) { |
1323 | // Let the usual plural fallback mechanism take care of this |
1324 | // plural form |
1325 | continue; |
1326 | } else { |
1327 | // Since our singleUnit can have a plural form that outArray |
1328 | // doesn't yet have (relying on fallback to OTHER), we start |
1329 | // by grabbing it with the normal plural fallback mechanism |
1330 | outArray[pluralIndex] = getWithPlural(outArray, plural, status); |
1331 | if (U_FAILURE(status)) { |
1332 | return; |
1333 | } |
1334 | } |
1335 | } |
1336 | |
1337 | if (uprv_strcmp(singlePluralCategory, ""):: strcmp(singlePluralCategory, "") != 0) { |
1338 | plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status)); |
1339 | } |
1340 | |
1341 | // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern. |
1342 | UnicodeString coreUnit; |
1343 | PlaceholderPosition placeholderPosition; |
1344 | UChar joinerChar; |
1345 | extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit, |
1346 | placeholderPosition, joinerChar); |
1347 | |
1348 | // 4.7 If the position is middle, then fail |
1349 | if (placeholderPosition == PH_MIDDLE) { |
1350 | status = U_UNSUPPORTED_ERROR; |
1351 | return; |
1352 | } |
1353 | |
1354 | // 4.8. If globalPlaceholder is empty |
1355 | if (globalPlaceholder[pluralIndex] == PH_EMPTY) { |
1356 | globalPlaceholder[pluralIndex] = placeholderPosition; |
1357 | globalJoinerChar = joinerChar; |
1358 | } else { |
1359 | // Expect all units involved to have the same placeholder position |
1360 | U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition)(void)0; |
1361 | // TODO(icu-units#28): Do we want to add a unit test that checks |
1362 | // for consistent joiner chars? Probably not, given how |
1363 | // inconsistent they are. File a CLDR ticket with examples? |
1364 | } |
1365 | // Now coreUnit would be just "Meter" |
1366 | |
1367 | // 4.9. If siPrefixPattern is not empty |
1368 | if (prefix != UMEASURE_PREFIX_ONE) { |
1369 | SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status); |
1370 | if (U_FAILURE(status)) { |
1371 | return; |
1372 | } |
1373 | |
1374 | // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern, |
1375 | // coreUnit) |
1376 | UnicodeString tmp; |
1377 | // combineLowercasing(locale, length, prefixPattern, coreUnit) |
1378 | // |
1379 | // TODO(icu-units#28): run this only if prefixPattern does not |
1380 | // contain space characters - do languages "as", "bn", "hi", |
1381 | // "kk", etc have concepts of upper and lower case?: |
1382 | if (width == UNUM_UNIT_WIDTH_FULL_NAME) { |
1383 | coreUnit.toLower(loc); |
1384 | } |
1385 | prefixCompiled.format(coreUnit, tmp, status); |
1386 | if (U_FAILURE(status)) { |
1387 | return; |
1388 | } |
1389 | coreUnit = tmp; |
1390 | } |
1391 | |
1392 | // 4.10. If dimensionalityPrefixPattern is not empty |
1393 | if (dimensionality != 1) { |
1394 | SimpleFormatter dimensionalityCompiled( |
1395 | getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status); |
1396 | if (U_FAILURE(status)) { |
1397 | return; |
1398 | } |
1399 | |
1400 | // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length, |
1401 | // dimensionalityPrefixPattern, coreUnit) |
1402 | UnicodeString tmp; |
1403 | // combineLowercasing(locale, length, prefixPattern, coreUnit) |
1404 | // |
1405 | // TODO(icu-units#28): run this only if prefixPattern does not |
1406 | // contain space characters - do languages "as", "bn", "hi", |
1407 | // "kk", etc have concepts of upper and lower case?: |
1408 | if (width == UNUM_UNIT_WIDTH_FULL_NAME) { |
1409 | coreUnit.toLower(loc); |
1410 | } |
1411 | dimensionalityCompiled.format(coreUnit, tmp, status); |
1412 | if (U_FAILURE(status)) { |
1413 | return; |
1414 | } |
1415 | coreUnit = tmp; |
1416 | } |
1417 | |
1418 | if (outArray[pluralIndex].length() == 0) { |
1419 | // 4.11. If the result is empty, set result to be coreUnit |
1420 | outArray[pluralIndex] = coreUnit; |
1421 | } else { |
1422 | // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit) |
1423 | UnicodeString tmp; |
1424 | timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status); |
1425 | outArray[pluralIndex] = tmp; |
1426 | } |
1427 | } |
1428 | } |
1429 | for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { |
1430 | if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { |
1431 | UnicodeString tmp; |
1432 | tmp.append(u"{0}", 3); |
1433 | if (globalJoinerChar != 0) { |
1434 | tmp.append(globalJoinerChar); |
1435 | } |
1436 | tmp.append(outArray[pluralIndex]); |
1437 | outArray[pluralIndex] = tmp; |
1438 | } else if (globalPlaceholder[pluralIndex] == PH_END) { |
1439 | if (globalJoinerChar != 0) { |
1440 | outArray[pluralIndex].append(globalJoinerChar); |
1441 | } |
1442 | outArray[pluralIndex].append(u"{0}", 3); |
1443 | } |
1444 | } |
1445 | } |
1446 | |
1447 | UnicodeString LongNameHandler::getUnitDisplayName( |
1448 | const Locale& loc, |
1449 | const MeasureUnit& unit, |
1450 | UNumberUnitWidth width, |
1451 | UErrorCode& status) { |
1452 | if (U_FAILURE(status)) { |
1453 | return ICU_Utility::makeBogusString(); |
1454 | } |
1455 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
1456 | getMeasureData(loc, unit, width, "", simpleFormats, status); |
1457 | return simpleFormats[DNAM_INDEX]; |
1458 | } |
1459 | |
1460 | UnicodeString LongNameHandler::getUnitPattern( |
1461 | const Locale& loc, |
1462 | const MeasureUnit& unit, |
1463 | UNumberUnitWidth width, |
1464 | StandardPlural::Form pluralForm, |
1465 | UErrorCode& status) { |
1466 | if (U_FAILURE(status)) { |
1467 | return ICU_Utility::makeBogusString(); |
1468 | } |
1469 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
1470 | getMeasureData(loc, unit, width, "", simpleFormats, status); |
1471 | // The above already handles fallback from other widths to short |
1472 | if (U_FAILURE(status)) { |
1473 | return ICU_Utility::makeBogusString(); |
1474 | } |
1475 | // Now handle fallback from other plural forms to OTHER |
1476 | return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]: |
1477 | simpleFormats[StandardPlural::Form::OTHER]; |
1478 | } |
1479 | |
1480 | LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, |
1481 | const PluralRules *rules, |
1482 | const MicroPropsGenerator *parent, |
1483 | UErrorCode &status) { |
1484 | auto* result = new LongNameHandler(rules, parent); |
1485 | if (result == nullptr) { |
1486 | status = U_MEMORY_ALLOCATION_ERROR; |
1487 | return nullptr; |
1488 | } |
1489 | UnicodeString simpleFormats[ARRAY_LENGTH]; |
1490 | getCurrencyLongNameData(loc, currency, simpleFormats, status); |
1491 | if (U_FAILURE(status)) { return nullptr; } |
1492 | result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); |
1493 | // TODO(icu-units#28): currency gender? |
1494 | return result; |
1495 | } |
1496 | |
1497 | void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, |
1498 | UErrorCode &status) { |
1499 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
1500 | StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); |
1501 | UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); |
1502 | if (U_FAILURE(status)) { return; } |
1503 | SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); |
1504 | if (U_FAILURE(status)) { return; } |
1505 | fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural}); |
1506 | } |
1507 | } |
1508 | |
1509 | void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, |
1510 | Field field, UErrorCode &status) { |
1511 | SimpleFormatter trailCompiled(trailFormat, 1, 1, status); |
1512 | if (U_FAILURE(status)) { return; } |
1513 | for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { |
1514 | StandardPlural::Form plural = static_cast<StandardPlural::Form>(i); |
1515 | UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); |
1516 | if (U_FAILURE(status)) { return; } |
1517 | UnicodeString compoundFormat; |
1518 | if (leadFormat.length() == 0) { |
1519 | compoundFormat = trailFormat; |
1520 | } else { |
1521 | trailCompiled.format(leadFormat, compoundFormat, status); |
1522 | if (U_FAILURE(status)) { return; } |
1523 | } |
1524 | SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); |
1525 | if (U_FAILURE(status)) { return; } |
1526 | fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); |
1527 | } |
1528 | } |
1529 | |
1530 | void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, |
1531 | UErrorCode &status) const { |
1532 | if (parent != NULL__null) { |
1533 | parent->processQuantity(quantity, micros, status); |
1534 | } |
1535 | StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); |
1536 | micros.modOuter = &fModifiers[pluralForm]; |
1537 | micros.gender = gender; |
1538 | } |
1539 | |
1540 | const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { |
1541 | return &fModifiers[plural]; |
1542 | } |
1543 | |
1544 | void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, |
1545 | const MeasureUnit &mixedUnit, |
1546 | const UNumberUnitWidth &width, |
1547 | const char *unitDisplayCase, |
1548 | const PluralRules *rules, |
1549 | const MicroPropsGenerator *parent, |
1550 | MixedUnitLongNameHandler *fillIn, |
1551 | UErrorCode &status) { |
1552 | U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED)(void)0; |
1553 | U_ASSERT(fillIn != nullptr)(void)0; |
1554 | if (U_FAILURE(status)) { |
1555 | return; |
1556 | } |
1557 | |
1558 | MeasureUnitImpl temp; |
1559 | const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); |
1560 | // Defensive, for production code: |
1561 | if (impl.complexity != UMEASURE_UNIT_MIXED) { |
1562 | // Should be using the normal LongNameHandler |
1563 | status = U_UNSUPPORTED_ERROR; |
1564 | return; |
1565 | } |
1566 | |
1567 | fillIn->fMixedUnitCount = impl.singleUnits.length(); |
1568 | fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); |
1569 | for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { |
1570 | // Grab data for each of the components. |
1571 | UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; |
1572 | // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this |
1573 | // propagation of unitDisplayCase is correct: |
1574 | getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, |
1575 | status); |
1576 | // TODO(ICU-21494): if we add support for gender for mixed units, we may |
1577 | // need maybeCalculateGender() here. |
1578 | } |
1579 | |
1580 | // TODO(icu-units#120): Make sure ICU doesn't output zero-valued |
1581 | // high-magnitude fields |
1582 | // * for mixed units count N, produce N listFormatters, one for each subset |
1583 | // that might be formatted. |
1584 | UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; |
1585 | if (width == UNUM_UNIT_WIDTH_NARROW) { |
1586 | listWidth = ULISTFMT_WIDTH_NARROW; |
1587 | } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) { |
1588 | // This might be the same as SHORT in most languages: |
1589 | listWidth = ULISTFMT_WIDTH_WIDE; |
1590 | } |
1591 | fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( |
1592 | ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); |
1593 | // TODO(ICU-21494): grab gender of each unit, calculate the gender |
1594 | // associated with this list formatter, save it for later. |
1595 | fillIn->rules = rules; |
1596 | fillIn->parent = parent; |
1597 | |
1598 | // We need a localised NumberFormatter for the numbers of the bigger units |
1599 | // (providing Arabic numerals, for example). |
1600 | fillIn->fNumberFormatter = NumberFormatter::withLocale(loc); |
1601 | } |
1602 | |
1603 | void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, |
1604 | UErrorCode &status) const { |
1605 | U_ASSERT(fMixedUnitCount > 1)(void)0; |
1606 | if (parent != nullptr) { |
1607 | parent->processQuantity(quantity, micros, status); |
1608 | } |
1609 | micros.modOuter = getMixedUnitModifier(quantity, micros, status); |
1610 | } |
1611 | |
1612 | const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity, |
1613 | MicroProps µs, |
1614 | UErrorCode &status) const { |
1615 | if (micros.mixedMeasuresCount == 0) { |
1616 | U_ASSERT(micros.mixedMeasuresCount > 0)(void)0; // Mixed unit: we must have more than one unit value |
1617 | status = U_UNSUPPORTED_ERROR; |
1618 | return µs.helpers.emptyWeakModifier; |
1619 | } |
1620 | |
1621 | // Algorithm: |
1622 | // |
1623 | // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should |
1624 | // find "3 yard" and "1 foot" in micros.mixedMeasures. |
1625 | // |
1626 | // Obtain long-names with plural forms corresponding to measure values: |
1627 | // * {0} yards, {0} foot, {0} inches |
1628 | // |
1629 | // Format the integer values appropriately and modify with the format |
1630 | // strings: |
1631 | // - 3 yards, 1 foot |
1632 | // |
1633 | // Use ListFormatter to combine, with one placeholder: |
1634 | // - 3 yards, 1 foot and {0} inches |
1635 | // |
1636 | // Return a SimpleModifier for this pattern, letting the rest of the |
1637 | // pipeline take care of the remaining inches. |
1638 | |
1639 | LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status); |
1640 | if (U_FAILURE(status)) { |
1641 | return µs.helpers.emptyWeakModifier; |
1642 | } |
1643 | |
1644 | StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER; |
1645 | for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) { |
1646 | DecimalQuantity fdec; |
1647 | |
1648 | // If numbers are negative, only the first number needs to have its |
1649 | // negative sign formatted. |
1650 | int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i]; |
1651 | |
1652 | if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity` |
1653 | // If quantity is not the first value and quantity is negative |
1654 | if (micros.indexOfQuantity > 0 && quantity.isNegative()) { |
1655 | quantity.negate(); |
1656 | } |
1657 | |
1658 | StandardPlural::Form quantityPlural = |
1659 | utils::getPluralSafe(micros.rounder, rules, quantity, status); |
1660 | UnicodeString quantityFormatWithPlural = |
1661 | getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status); |
1662 | SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status); |
1663 | quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status); |
1664 | } else { |
1665 | fdec.setToLong(number); |
1666 | StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); |
1667 | UnicodeString simpleFormat = |
1668 | getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); |
1669 | SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); |
1670 | UnicodeString num; |
1671 | auto appendable = UnicodeStringAppendable(num); |
1672 | |
1673 | fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); |
1674 | compiledFormatter.format(num, outputMeasuresList[i], status); |
1675 | } |
1676 | } |
1677 | |
1678 | // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we |
1679 | // can set micros.gender to the gender associated with the list formatter in |
1680 | // use below (once we have correct support for that). And then document this |
1681 | // appropriately? "getMixedUnitModifier" doesn't sound like it would do |
1682 | // something like this. |
1683 | |
1684 | // Combine list into a "premixed" pattern |
1685 | UnicodeString premixedFormatPattern; |
1686 | fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern, |
1687 | status); |
1688 | SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status); |
1689 | if (U_FAILURE(status)) { |
1690 | return µs.helpers.emptyWeakModifier; |
1691 | } |
1692 | |
1693 | micros.helpers.mixedUnitModifier = |
1694 | SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural}); |
1695 | return µs.helpers.mixedUnitModifier; |
1696 | } |
1697 | |
1698 | const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, |
1699 | StandardPlural::Form /*plural*/) const { |
1700 | // TODO(icu-units#28): investigate this method when investigating where |
1701 | // ModifierStore::getModifier() gets used. To be sure it remains |
1702 | // unreachable: |
1703 | UPRV_UNREACHABLE_EXITabort(); |
1704 | return nullptr; |
1705 | } |
1706 | |
1707 | LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, |
1708 | const MaybeStackVector<MeasureUnit> &units, |
1709 | const UNumberUnitWidth &width, |
1710 | const char *unitDisplayCase, |
1711 | const PluralRules *rules, |
1712 | const MicroPropsGenerator *parent, |
1713 | UErrorCode &status) { |
1714 | LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status); |
1715 | if (U_FAILURE(status)) { |
1716 | return nullptr; |
1717 | } |
1718 | U_ASSERT(units.length() > 0)(void)0; |
1719 | if (result->fHandlers.resize(units.length()) == nullptr) { |
1720 | status = U_MEMORY_ALLOCATION_ERROR; |
1721 | return nullptr; |
1722 | } |
1723 | result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); |
1724 | for (int32_t i = 0, length = units.length(); i < length; i++) { |
1725 | const MeasureUnit &unit = *units[i]; |
1726 | result->fMeasureUnits[i] = unit; |
1727 | if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { |
1728 | MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); |
1729 | MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL__null, |
1730 | mlnh, status); |
1731 | result->fHandlers[i] = mlnh; |
1732 | } else { |
1733 | LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); |
1734 | LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL__null, lnh, status); |
1735 | result->fHandlers[i] = lnh; |
1736 | } |
1737 | if (U_FAILURE(status)) { |
1738 | return nullptr; |
1739 | } |
1740 | } |
1741 | return result.orphan(); |
1742 | } |
1743 | |
1744 | void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs, |
1745 | UErrorCode &status) const { |
1746 | // We call parent->processQuantity() from the Multiplexer, instead of |
1747 | // letting LongNameHandler handle it: we don't know which LongNameHandler to |
1748 | // call until we've called the parent! |
1749 | fParent->processQuantity(quantity, micros, status); |
1750 | |
1751 | // Call the correct LongNameHandler based on outputUnit |
1752 | for (int i = 0; i < fHandlers.getCapacity(); i++) { |
1753 | if (fMeasureUnits[i] == micros.outputUnit) { |
1754 | fHandlers[i]->processQuantity(quantity, micros, status); |
1755 | return; |
1756 | } |
1757 | } |
1758 | if (U_FAILURE(status)) { |
1759 | return; |
1760 | } |
1761 | // We shouldn't receive any outputUnit for which we haven't already got a |
1762 | // LongNameHandler: |
1763 | status = U_INTERNAL_PROGRAM_ERROR; |
1764 | } |
1765 | |
1766 | #endif /* #if !UCONFIG_NO_FORMATTING */ |