Bug Summary

File:out/../deps/icu-small/source/common/locid.cpp
Warning:line 2390, column 5
Returning null reference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name locid.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/locid.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8*
9* File locid.cpp
10*
11* Created by: Richard Gillam
12*
13* Modification History:
14*
15* Date Name Description
16* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17* methods to get and set it.
18* 04/02/97 aliu Made operator!= inline; fixed return value
19* of getName().
20* 04/15/97 aliu Cleanup for AIX/Win32.
21* 04/24/97 aliu Numerous changes per code review.
22* 08/18/98 stephen Changed getDisplayName()
23* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24* Added getISOCountries(), getISOLanguages(),
25* getLanguagesForCountry()
26* 03/16/99 bertrand rehaul.
27* 07/21/99 stephen Added U_CFUNC setDefault
28* 11/09/99 weiv Added const char * getName() const;
29* 04/12/00 srl removing unicodestring api's and cached hash code
30* 08/10/01 grhoten Change the static Locales to accessor functions
31******************************************************************************
32*/
33
34#include <utility>
35
36#include "unicode/bytestream.h"
37#include "unicode/locid.h"
38#include "unicode/localebuilder.h"
39#include "unicode/strenum.h"
40#include "unicode/stringpiece.h"
41#include "unicode/uloc.h"
42#include "unicode/ures.h"
43
44#include "bytesinkutil.h"
45#include "charstr.h"
46#include "charstrmap.h"
47#include "cmemory.h"
48#include "cstring.h"
49#include "mutex.h"
50#include "putilimp.h"
51#include "uassert.h"
52#include "ucln_cmn.h"
53#include "uhash.h"
54#include "ulocimp.h"
55#include "umutex.h"
56#include "uniquecharstr.h"
57#include "ustr_imp.h"
58#include "uvector.h"
59
60U_CDECL_BEGINextern "C" {
61static UBool U_CALLCONV locale_cleanup(void);
62U_CDECL_END}
63
64U_NAMESPACE_BEGINnamespace icu_71 {
65
66static Locale *gLocaleCache = NULL__null;
67static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
68
69// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
70static UMutex gDefaultLocaleMutex;
71static UHashtable *gDefaultLocalesHashT = NULL__null;
72static Locale *gDefaultLocale = NULL__null;
73
74/**
75 * \def ULOC_STRING_LIMIT
76 * strings beyond this value crash in CharString
77 */
78#define ULOC_STRING_LIMIT357913941 357913941
79
80U_NAMESPACE_END}
81
82typedef enum ELocalePos {
83 eENGLISH,
84 eFRENCH,
85 eGERMAN,
86 eITALIAN,
87 eJAPANESE,
88 eKOREAN,
89 eCHINESE,
90
91 eFRANCE,
92 eGERMANY,
93 eITALY,
94 eJAPAN,
95 eKOREA,
96 eCHINA, /* Alias for PRC */
97 eTAIWAN,
98 eUK,
99 eUS,
100 eCANADA,
101 eCANADA_FRENCH,
102 eROOT,
103
104
105 //eDEFAULT,
106 eMAX_LOCALES
107} ELocalePos;
108
109U_CDECL_BEGINextern "C" {
110//
111// Deleter function for Locales owned by the default Locale hash table/
112//
113static void U_CALLCONV
114deleteLocale(void *obj) {
115 delete (icu::Locale *) obj;
116}
117
118static UBool U_CALLCONV locale_cleanup(void)
119{
120 U_NAMESPACE_USEusing namespace icu_71;
121
122 delete [] gLocaleCache;
123 gLocaleCache = NULL__null;
124 gLocaleCacheInitOnce.reset();
125
126 if (gDefaultLocalesHashT) {
127 uhash_closeuhash_close_71(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
128 gDefaultLocalesHashT = NULL__null;
129 }
130 gDefaultLocale = NULL__null;
131 return TRUE1;
132}
133
134
135static void U_CALLCONV locale_init(UErrorCode &status) {
136 U_NAMESPACE_USEusing namespace icu_71;
137
138 U_ASSERT(gLocaleCache == NULL)(void)0;
139 gLocaleCache = new Locale[(int)eMAX_LOCALES];
140 if (gLocaleCache == NULL__null) {
141 status = U_MEMORY_ALLOCATION_ERROR;
142 return;
143 }
144 ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_LOCALE, locale_cleanup);
145 gLocaleCache[eROOT] = Locale("");
146 gLocaleCache[eENGLISH] = Locale("en");
147 gLocaleCache[eFRENCH] = Locale("fr");
148 gLocaleCache[eGERMAN] = Locale("de");
149 gLocaleCache[eITALIAN] = Locale("it");
150 gLocaleCache[eJAPANESE] = Locale("ja");
151 gLocaleCache[eKOREAN] = Locale("ko");
152 gLocaleCache[eCHINESE] = Locale("zh");
153 gLocaleCache[eFRANCE] = Locale("fr", "FR");
154 gLocaleCache[eGERMANY] = Locale("de", "DE");
155 gLocaleCache[eITALY] = Locale("it", "IT");
156 gLocaleCache[eJAPAN] = Locale("ja", "JP");
157 gLocaleCache[eKOREA] = Locale("ko", "KR");
158 gLocaleCache[eCHINA] = Locale("zh", "CN");
159 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
160 gLocaleCache[eUK] = Locale("en", "GB");
161 gLocaleCache[eUS] = Locale("en", "US");
162 gLocaleCache[eCANADA] = Locale("en", "CA");
163 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
164}
165
166U_CDECL_END}
167
168U_NAMESPACE_BEGINnamespace icu_71 {
169
170Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
171 // Synchronize this entire function.
172 Mutex lock(&gDefaultLocaleMutex);
173
174 UBool canonicalize = FALSE0;
175
176 // If given a NULL string for the locale id, grab the default
177 // name from the system.
178 // (Different from most other locale APIs, where a null name means use
179 // the current ICU default locale.)
180 if (id == NULL__null) {
181 id = uprv_getDefaultLocaleIDuprv_getDefaultLocaleID_71(); // This function not thread safe? TODO: verify.
182 canonicalize = TRUE1; // always canonicalize host ID
183 }
184
185 CharString localeNameBuf;
186 {
187 CharStringByteSink sink(&localeNameBuf);
188 if (canonicalize) {
189 ulocimp_canonicalizeulocimp_canonicalize_71(id, sink, &status);
190 } else {
191 ulocimp_getNameulocimp_getName_71(id, sink, &status);
192 }
193 }
194
195 if (U_FAILURE(status)) {
196 return gDefaultLocale;
197 }
198
199 if (gDefaultLocalesHashT == NULL__null) {
200 gDefaultLocalesHashT = uhash_openuhash_open_71(uhash_hashCharsuhash_hashChars_71, uhash_compareCharsuhash_compareChars_71, NULL__null, &status);
201 if (U_FAILURE(status)) {
202 return gDefaultLocale;
203 }
204 uhash_setValueDeleteruhash_setValueDeleter_71(gDefaultLocalesHashT, deleteLocale);
205 ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_LOCALE, locale_cleanup);
206 }
207
208 Locale *newDefault = (Locale *)uhash_getuhash_get_71(gDefaultLocalesHashT, localeNameBuf.data());
209 if (newDefault == NULL__null) {
210 newDefault = new Locale(Locale::eBOGUS);
211 if (newDefault == NULL__null) {
212 status = U_MEMORY_ALLOCATION_ERROR;
213 return gDefaultLocale;
214 }
215 newDefault->init(localeNameBuf.data(), FALSE0);
216 uhash_putuhash_put_71(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
217 if (U_FAILURE(status)) {
218 return gDefaultLocale;
219 }
220 }
221 gDefaultLocale = newDefault;
222 return gDefaultLocale;
223}
224
225U_NAMESPACE_END}
226
227/* sfb 07/21/99 */
228U_CFUNCextern "C" void
229locale_set_defaultlocale_set_default_71(const char *id)
230{
231 U_NAMESPACE_USEusing namespace icu_71;
232 UErrorCode status = U_ZERO_ERROR;
233 locale_set_default_internal(id, status);
234}
235/* end */
236
237U_CFUNCextern "C" const char *
238locale_get_defaultlocale_get_default_71(void)
239{
240 U_NAMESPACE_USEusing namespace icu_71;
241 return Locale::getDefault().getName();
242}
243
244
245U_NAMESPACE_BEGINnamespace icu_71 {
246
247UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)UClassID Locale::getStaticClassID() { static char classID = 0
; return (UClassID)&classID; } UClassID Locale::getDynamicClassID
() const { return Locale::getStaticClassID(); }
248
249/*Character separating the posix id fields*/
250// '_'
251// In the platform codepage.
252#define SEP_CHAR'_' '_'
253#define NULL_CHAR'\0' '\0'
254
255Locale::~Locale()
256{
257 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
258 uprv_freeuprv_free_71(baseName);
259 }
260 baseName = NULL__null;
261 /*if fullName is on the heap, we free it*/
262 if (fullName != fullNameBuffer)
263 {
264 uprv_freeuprv_free_71(fullName);
265 fullName = NULL__null;
266 }
267}
268
269Locale::Locale()
270 : UObject(), fullName(fullNameBuffer), baseName(NULL__null)
271{
272 init(NULL__null, FALSE0);
273}
274
275/*
276 * Internal constructor to allow construction of a locale object with
277 * NO side effects. (Default constructor tries to get
278 * the default locale.)
279 */
280Locale::Locale(Locale::ELocaleType)
281 : UObject(), fullName(fullNameBuffer), baseName(NULL__null)
282{
283 setToBogus();
284}
285
286
287Locale::Locale( const char * newLanguage,
288 const char * newCountry,
289 const char * newVariant,
290 const char * newKeywords)
291 : UObject(), fullName(fullNameBuffer), baseName(NULL__null)
292{
293 if( (newLanguage==NULL__null) && (newCountry == NULL__null) && (newVariant == NULL__null) )
294 {
295 init(NULL__null, FALSE0); /* shortcut */
296 }
297 else
298 {
299 UErrorCode status = U_ZERO_ERROR;
300 int32_t lsize = 0;
301 int32_t csize = 0;
302 int32_t vsize = 0;
303 int32_t ksize = 0;
304
305 // Check the sizes of the input strings.
306
307 // Language
308 if ( newLanguage != NULL__null )
309 {
310 lsize = (int32_t)uprv_strlen(newLanguage):: strlen(newLanguage);
311 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
312 setToBogus();
313 return;
314 }
315 }
316
317 CharString togo(newLanguage, lsize, status); // start with newLanguage
318
319 // _Country
320 if ( newCountry != NULL__null )
321 {
322 csize = (int32_t)uprv_strlen(newCountry):: strlen(newCountry);
323 if ( csize < 0 || csize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
324 setToBogus();
325 return;
326 }
327 }
328
329 // _Variant
330 if ( newVariant != NULL__null )
331 {
332 // remove leading _'s
333 while(newVariant[0] == SEP_CHAR'_')
334 {
335 newVariant++;
336 }
337
338 // remove trailing _'s
339 vsize = (int32_t)uprv_strlen(newVariant):: strlen(newVariant);
340 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT357913941 ) { // int32 wrap
341 setToBogus();
342 return;
343 }
344 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR'_') )
345 {
346 vsize--;
347 }
348 }
349
350 if ( newKeywords != NULL__null)
351 {
352 ksize = (int32_t)uprv_strlen(newKeywords):: strlen(newKeywords);
353 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT357913941 ) {
354 setToBogus();
355 return;
356 }
357 }
358
359 // We've checked the input sizes, now build up the full locale string..
360
361 // newLanguage is already copied
362
363 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
364 { // ^
365 togo.append(SEP_CHAR'_', status);
366 }
367
368 if ( csize != 0 )
369 {
370 togo.append(newCountry, status);
371 }
372
373 if ( vsize != 0)
374 {
375 togo.append(SEP_CHAR'_', status)
376 .append(newVariant, vsize, status);
377 }
378
379 if ( ksize != 0)
380 {
381 if (uprv_strchr(newKeywords, '='):: strchr(newKeywords, '=')) {
382 togo.append('@', status); /* keyword parsing */
383 }
384 else {
385 togo.append('_', status); /* Variant parsing with a script */
386 if ( vsize == 0) {
387 togo.append('_', status); /* No country found */
388 }
389 }
390 togo.append(newKeywords, status);
391 }
392
393 if (U_FAILURE(status)) {
394 // Something went wrong with appending, etc.
395 setToBogus();
396 return;
397 }
398 // Parse it, because for example 'language' might really be a complete
399 // string.
400 init(togo.data(), FALSE0);
401 }
402}
403
404Locale::Locale(const Locale &other)
405 : UObject(other), fullName(fullNameBuffer), baseName(NULL__null)
406{
407 *this = other;
408}
409
410Locale::Locale(Locale&& other) U_NOEXCEPTnoexcept
411 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
412 *this = std::move(other);
413}
414
415Locale& Locale::operator=(const Locale& other) {
416 if (this == &other) {
417 return *this;
418 }
419
420 setToBogus();
421
422 if (other.fullName == other.fullNameBuffer) {
423 uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
424 } else if (other.fullName == nullptr) {
425 fullName = nullptr;
426 } else {
427 fullName = uprv_strdupuprv_strdup_71(other.fullName);
428 if (fullName == nullptr) return *this;
429 }
430
431 if (other.baseName == other.fullName) {
432 baseName = fullName;
433 } else if (other.baseName != nullptr) {
434 baseName = uprv_strdupuprv_strdup_71(other.baseName);
435 if (baseName == nullptr) return *this;
436 }
437
438 uprv_strcpy(language, other.language):: strcpy(language, other.language);
439 uprv_strcpy(script, other.script):: strcpy(script, other.script);
440 uprv_strcpy(country, other.country):: strcpy(country, other.country);
441
442 variantBegin = other.variantBegin;
443 fIsBogus = other.fIsBogus;
444
445 return *this;
446}
447
448Locale& Locale::operator=(Locale&& other) U_NOEXCEPTnoexcept {
449 if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_freeuprv_free_71(baseName);
450 if (fullName != fullNameBuffer) uprv_freeuprv_free_71(fullName);
451
452 if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
453 uprv_strcpy(fullNameBuffer, other.fullNameBuffer):: strcpy(fullNameBuffer, other.fullNameBuffer);
454 }
455 if (other.fullName == other.fullNameBuffer) {
456 fullName = fullNameBuffer;
457 } else {
458 fullName = other.fullName;
459 }
460
461 if (other.baseName == other.fullNameBuffer) {
462 baseName = fullNameBuffer;
463 } else if (other.baseName == other.fullName) {
464 baseName = fullName;
465 } else {
466 baseName = other.baseName;
467 }
468
469 uprv_strcpy(language, other.language):: strcpy(language, other.language);
470 uprv_strcpy(script, other.script):: strcpy(script, other.script);
471 uprv_strcpy(country, other.country):: strcpy(country, other.country);
472
473 variantBegin = other.variantBegin;
474 fIsBogus = other.fIsBogus;
475
476 other.baseName = other.fullName = other.fullNameBuffer;
477
478 return *this;
479}
480
481Locale *
482Locale::clone() const {
483 return new Locale(*this);
484}
485
486bool
487Locale::operator==( const Locale& other) const
488{
489 return (uprv_strcmp(other.fullName, fullName):: strcmp(other.fullName, fullName) == 0);
490}
491
492namespace {
493
494UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
495UHashtable *gKnownCanonicalized = nullptr;
496
497static const char* const KNOWN_CANONICALIZED[] = {
498 "c",
499 // Commonly used locales known are already canonicalized
500 "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
501 "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
502 "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
503 "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
504 "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
505 "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
506 "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
507 "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
508 "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
509 "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
510 "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
511 "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
512 "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
513 "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
514 "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
515 "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
516 "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
517 "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
518 "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
519};
520
521static UBool U_CALLCONV cleanupKnownCanonicalized() {
522 gKnownCanonicalizedInitOnce.reset();
523 if (gKnownCanonicalized) { uhash_closeuhash_close_71(gKnownCanonicalized); }
524 return TRUE1;
525}
526
527static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
528 ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
529 cleanupKnownCanonicalized);
530 LocalUHashtablePointer newKnownCanonicalizedMap(
531 uhash_openuhash_open_71(uhash_hashCharsuhash_hashChars_71, uhash_compareCharsuhash_compareChars_71, nullptr, &status));
532 for (int32_t i = 0;
533 U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED
)[0]))
;
534 i++) {
535 uhash_putiuhash_puti_71(newKnownCanonicalizedMap.getAlias(),
536 (void*)KNOWN_CANONICALIZED[i],
537 1, &status);
538 }
539 if (U_FAILURE(status)) {
540 return;
541 }
542
543 gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
544}
545
546class AliasData;
547
548/**
549 * A Builder class to build the alias data.
550 */
551class AliasDataBuilder {
552public:
553 AliasDataBuilder() {
554 }
555
556 // Build the AliasData from resource.
557 AliasData* build(UErrorCode &status);
558
559private:
560 void readAlias(UResourceBundle* alias,
561 UniqueCharStrings* strings,
562 LocalMemory<const char*>& types,
563 LocalMemory<int32_t>& replacementIndexes,
564 int32_t &length,
565 void (*checkType)(const char* type),
566 void (*checkReplacement)(const UnicodeString& replacement),
567 UErrorCode &status);
568
569 // Read the languageAlias data from alias to
570 // strings+types+replacementIndexes
571 // The number of record will be stored into length.
572 // Allocate length items for types, to store the type field.
573 // Allocate length items for replacementIndexes,
574 // to store the index in the strings for the replacement script.
575 void readLanguageAlias(UResourceBundle* alias,
576 UniqueCharStrings* strings,
577 LocalMemory<const char*>& types,
578 LocalMemory<int32_t>& replacementIndexes,
579 int32_t &length,
580 UErrorCode &status);
581
582 // Read the scriptAlias data from alias to
583 // strings+types+replacementIndexes
584 // Allocate length items for types, to store the type field.
585 // Allocate length items for replacementIndexes,
586 // to store the index in the strings for the replacement script.
587 void readScriptAlias(UResourceBundle* alias,
588 UniqueCharStrings* strings,
589 LocalMemory<const char*>& types,
590 LocalMemory<int32_t>& replacementIndexes,
591 int32_t &length, UErrorCode &status);
592
593 // Read the territoryAlias data from alias to
594 // strings+types+replacementIndexes
595 // Allocate length items for types, to store the type field.
596 // Allocate length items for replacementIndexes,
597 // to store the index in the strings for the replacement script.
598 void readTerritoryAlias(UResourceBundle* alias,
599 UniqueCharStrings* strings,
600 LocalMemory<const char*>& types,
601 LocalMemory<int32_t>& replacementIndexes,
602 int32_t &length, UErrorCode &status);
603
604 // Read the variantAlias data from alias to
605 // strings+types+replacementIndexes
606 // Allocate length items for types, to store the type field.
607 // Allocate length items for replacementIndexes,
608 // to store the index in the strings for the replacement variant.
609 void readVariantAlias(UResourceBundle* alias,
610 UniqueCharStrings* strings,
611 LocalMemory<const char*>& types,
612 LocalMemory<int32_t>& replacementIndexes,
613 int32_t &length, UErrorCode &status);
614
615 // Read the subdivisionAlias data from alias to
616 // strings+types+replacementIndexes
617 // Allocate length items for types, to store the type field.
618 // Allocate length items for replacementIndexes,
619 // to store the index in the strings for the replacement variant.
620 void readSubdivisionAlias(UResourceBundle* alias,
621 UniqueCharStrings* strings,
622 LocalMemory<const char*>& types,
623 LocalMemory<int32_t>& replacementIndexes,
624 int32_t &length, UErrorCode &status);
625};
626
627/**
628 * A class to hold the Alias Data.
629 */
630class AliasData : public UMemory {
631public:
632 static const AliasData* singleton(UErrorCode& status) {
633 if (U_FAILURE(status)) {
634 // Do not get into loadData if the status already has error.
635 return nullptr;
636 }
637 umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
638 return gSingleton;
639 }
640
641 const CharStringMap& languageMap() const { return language; }
642 const CharStringMap& scriptMap() const { return script; }
643 const CharStringMap& territoryMap() const { return territory; }
644 const CharStringMap& variantMap() const { return variant; }
645 const CharStringMap& subdivisionMap() const { return subdivision; }
646
647 static void U_CALLCONV loadData(UErrorCode &status);
648 static UBool U_CALLCONV cleanup();
649
650 static UInitOnce gInitOnce;
651
652private:
653 AliasData(CharStringMap languageMap,
654 CharStringMap scriptMap,
655 CharStringMap territoryMap,
656 CharStringMap variantMap,
657 CharStringMap subdivisionMap,
658 CharString* strings)
659 : language(std::move(languageMap)),
660 script(std::move(scriptMap)),
661 territory(std::move(territoryMap)),
662 variant(std::move(variantMap)),
663 subdivision(std::move(subdivisionMap)),
664 strings(strings) {
665 }
666
667 ~AliasData() {
668 delete strings;
669 }
670
671 static const AliasData* gSingleton;
672
673 CharStringMap language;
674 CharStringMap script;
675 CharStringMap territory;
676 CharStringMap variant;
677 CharStringMap subdivision;
678 CharString* strings;
679
680 friend class AliasDataBuilder;
681};
682
683
684const AliasData* AliasData::gSingleton = nullptr;
685UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
686
687UBool U_CALLCONV
688AliasData::cleanup()
689{
690 gInitOnce.reset();
691 delete gSingleton;
692 return TRUE1;
693}
694
695void
696AliasDataBuilder::readAlias(
697 UResourceBundle* alias,
698 UniqueCharStrings* strings,
699 LocalMemory<const char*>& types,
700 LocalMemory<int32_t>& replacementIndexes,
701 int32_t &length,
702 void (*checkType)(const char* type),
703 void (*checkReplacement)(const UnicodeString& replacement),
704 UErrorCode &status) {
705 if (U_FAILURE(status)) {
706 return;
707 }
708 length = ures_getSizeures_getSize_71(alias);
709 const char** rawTypes = types.allocateInsteadAndCopy(length);
710 if (rawTypes == nullptr) {
711 status = U_MEMORY_ALLOCATION_ERROR;
712 return;
713 }
714 int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
715 if (rawIndexes == nullptr) {
716 status = U_MEMORY_ALLOCATION_ERROR;
717 return;
718 }
719 int i = 0;
720 while (ures_hasNextures_hasNext_71(alias)) {
721 LocalUResourceBundlePointer res(
722 ures_getNextResourceures_getNextResource_71(alias, nullptr, &status));
723 const char* aliasFrom = ures_getKeyures_getKey_71(res.getAlias());
724 UnicodeString aliasTo =
725 ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
726
727 checkType(aliasFrom);
728 checkReplacement(aliasTo);
729
730 rawTypes[i] = aliasFrom;
731 rawIndexes[i] = strings->add(aliasTo, status);
732 i++;
733 }
734}
735
736/**
737 * Read the languageAlias data from alias to strings+types+replacementIndexes.
738 * Allocate length items for types, to store the type field. Allocate length
739 * items for replacementIndexes, to store the index in the strings for the
740 * replacement language.
741 */
742void
743AliasDataBuilder::readLanguageAlias(
744 UResourceBundle* alias,
745 UniqueCharStrings* strings,
746 LocalMemory<const char*>& types,
747 LocalMemory<int32_t>& replacementIndexes,
748 int32_t &length,
749 UErrorCode &status)
750{
751 return readAlias(
752 alias, strings, types, replacementIndexes, length,
753#if U_DEBUG0
754 [](const char* type) {
755 // Assert the aliasFrom only contains the following possibilities
756 // language_REGION_variant
757 // language_REGION
758 // language_variant
759 // language
760 // und_variant
761 Locale test(type);
762 // Assert no script in aliasFrom
763 U_ASSERT(test.getScript()[0] == '\0')(void)0;
764 // Assert when language is und, no REGION in aliasFrom.
765 U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0')(void)0;
766 },
767#else
768 [](const char*) {},
769#endif
770 [](const UnicodeString&) {}, status);
771}
772
773/**
774 * Read the scriptAlias data from alias to strings+types+replacementIndexes.
775 * Allocate length items for types, to store the type field. Allocate length
776 * items for replacementIndexes, to store the index in the strings for the
777 * replacement script.
778 */
779void
780AliasDataBuilder::readScriptAlias(
781 UResourceBundle* alias,
782 UniqueCharStrings* strings,
783 LocalMemory<const char*>& types,
784 LocalMemory<int32_t>& replacementIndexes,
785 int32_t &length,
786 UErrorCode &status)
787{
788 return readAlias(
789 alias, strings, types, replacementIndexes, length,
790#if U_DEBUG0
791 [](const char* type) {
792 U_ASSERT(uprv_strlen(type) == 4)(void)0;
793 },
794 [](const UnicodeString& replacement) {
795 U_ASSERT(replacement.length() == 4)(void)0;
796 },
797#else
798 [](const char*) {},
799 [](const UnicodeString&) { },
800#endif
801 status);
802}
803
804/**
805 * Read the territoryAlias data from alias to strings+types+replacementIndexes.
806 * Allocate length items for types, to store the type field. Allocate length
807 * items for replacementIndexes, to store the index in the strings for the
808 * replacement regions.
809 */
810void
811AliasDataBuilder::readTerritoryAlias(
812 UResourceBundle* alias,
813 UniqueCharStrings* strings,
814 LocalMemory<const char*>& types,
815 LocalMemory<int32_t>& replacementIndexes,
816 int32_t &length,
817 UErrorCode &status)
818{
819 return readAlias(
820 alias, strings, types, replacementIndexes, length,
821#if U_DEBUG0
822 [](const char* type) {
823 U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3)(void)0;
824 },
825#else
826 [](const char*) {},
827#endif
828 [](const UnicodeString&) { },
829 status);
830}
831
832/**
833 * Read the variantAlias data from alias to strings+types+replacementIndexes.
834 * Allocate length items for types, to store the type field. Allocate length
835 * items for replacementIndexes, to store the index in the strings for the
836 * replacement variant.
837 */
838void
839AliasDataBuilder::readVariantAlias(
840 UResourceBundle* alias,
841 UniqueCharStrings* strings,
842 LocalMemory<const char*>& types,
843 LocalMemory<int32_t>& replacementIndexes,
844 int32_t &length,
845 UErrorCode &status)
846{
847 return readAlias(
848 alias, strings, types, replacementIndexes, length,
849#if U_DEBUG0
850 [](const char* type) {
851 U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8)(void)0;
852 U_ASSERT(uprv_strlen(type) != 4 ||(void)0
853 (type[0] >= '0' && type[0] <= '9'))(void)0;
854 },
855 [](const UnicodeString& replacement) {
856 U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8)(void)0;
857 U_ASSERT(replacement.length() != 4 ||(void)0
858 (replacement.charAt(0) >= u'0' &&(void)0
859 replacement.charAt(0) <= u'9'))(void)0;
860 },
861#else
862 [](const char*) {},
863 [](const UnicodeString&) { },
864#endif
865 status);
866}
867
868/**
869 * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
870 * Allocate length items for types, to store the type field. Allocate length
871 * items for replacementIndexes, to store the index in the strings for the
872 * replacement regions.
873 */
874void
875AliasDataBuilder::readSubdivisionAlias(
876 UResourceBundle* alias,
877 UniqueCharStrings* strings,
878 LocalMemory<const char*>& types,
879 LocalMemory<int32_t>& replacementIndexes,
880 int32_t &length,
881 UErrorCode &status)
882{
883 return readAlias(
884 alias, strings, types, replacementIndexes, length,
885#if U_DEBUG0
886 [](const char* type) {
887 U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8)(void)0;
888 },
889#else
890 [](const char*) {},
891#endif
892 [](const UnicodeString&) { },
893 status);
894}
895
896/**
897 * Initializes the alias data from the ICU resource bundles. The alias data
898 * contains alias of language, country, script and variants.
899 *
900 * If the alias data has already loaded, then this method simply returns without
901 * doing anything meaningful.
902 */
903void U_CALLCONV
904AliasData::loadData(UErrorCode &status)
905{
906#ifdef LOCALE_CANONICALIZATION_DEBUG
907 UDate start = uprv_getRawUTCtimeuprv_getRawUTCtime_71();
908#endif // LOCALE_CANONICALIZATION_DEBUG
909 ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_LOCALE_ALIAS, cleanup);
910 AliasDataBuilder builder;
911 gSingleton = builder.build(status);
912#ifdef LOCALE_CANONICALIZATION_DEBUG
913 UDate end = uprv_getRawUTCtimeuprv_getRawUTCtime_71();
914 printf("AliasData::loadData took total %f ms\n", end - start);
915#endif // LOCALE_CANONICALIZATION_DEBUG
916}
917
918/**
919 * Build the alias data from resources.
920 */
921AliasData*
922AliasDataBuilder::build(UErrorCode &status) {
923 LocalUResourceBundlePointer metadata(
924 ures_openDirectures_openDirect_71(nullptr, "metadata", &status));
925 LocalUResourceBundlePointer metadataAlias(
926 ures_getByKeyures_getByKey_71(metadata.getAlias(), "alias", nullptr, &status));
927 LocalUResourceBundlePointer languageAlias(
928 ures_getByKeyures_getByKey_71(metadataAlias.getAlias(), "language", nullptr, &status));
929 LocalUResourceBundlePointer scriptAlias(
930 ures_getByKeyures_getByKey_71(metadataAlias.getAlias(), "script", nullptr, &status));
931 LocalUResourceBundlePointer territoryAlias(
932 ures_getByKeyures_getByKey_71(metadataAlias.getAlias(), "territory", nullptr, &status));
933 LocalUResourceBundlePointer variantAlias(
934 ures_getByKeyures_getByKey_71(metadataAlias.getAlias(), "variant", nullptr, &status));
935 LocalUResourceBundlePointer subdivisionAlias(
936 ures_getByKeyures_getByKey_71(metadataAlias.getAlias(), "subdivision", nullptr, &status));
937
938 if (U_FAILURE(status)) {
939 return nullptr;
940 }
941 int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
942 variantLength = 0, subdivisionLength = 0;
943
944 // Read the languageAlias into languageTypes, languageReplacementIndexes
945 // and strings
946 UniqueCharStrings strings(status);
947 LocalMemory<const char*> languageTypes;
948 LocalMemory<int32_t> languageReplacementIndexes;
949 readLanguageAlias(languageAlias.getAlias(),
950 &strings,
951 languageTypes,
952 languageReplacementIndexes,
953 languagesLength,
954 status);
955
956 // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
957 // and strings
958 LocalMemory<const char*> scriptTypes;
959 LocalMemory<int32_t> scriptReplacementIndexes;
960 readScriptAlias(scriptAlias.getAlias(),
961 &strings,
962 scriptTypes,
963 scriptReplacementIndexes,
964 scriptLength,
965 status);
966
967 // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
968 // and strings
969 LocalMemory<const char*> territoryTypes;
970 LocalMemory<int32_t> territoryReplacementIndexes;
971 readTerritoryAlias(territoryAlias.getAlias(),
972 &strings,
973 territoryTypes,
974 territoryReplacementIndexes,
975 territoryLength, status);
976
977 // Read the variantAlias into variantTypes, variantReplacementIndexes
978 // and strings
979 LocalMemory<const char*> variantTypes;
980 LocalMemory<int32_t> variantReplacementIndexes;
981 readVariantAlias(variantAlias.getAlias(),
982 &strings,
983 variantTypes,
984 variantReplacementIndexes,
985 variantLength, status);
986
987 // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
988 // and strings
989 LocalMemory<const char*> subdivisionTypes;
990 LocalMemory<int32_t> subdivisionReplacementIndexes;
991 readSubdivisionAlias(subdivisionAlias.getAlias(),
992 &strings,
993 subdivisionTypes,
994 subdivisionReplacementIndexes,
995 subdivisionLength, status);
996
997 if (U_FAILURE(status)) {
998 return nullptr;
999 }
1000
1001 // We can only use strings after freeze it.
1002 strings.freeze();
1003
1004 // Build the languageMap from languageTypes & languageReplacementIndexes
1005 CharStringMap languageMap(490, status);
1006 for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
1007 languageMap.put(languageTypes[i],
1008 strings.get(languageReplacementIndexes[i]),
1009 status);
1010 }
1011
1012 // Build the scriptMap from scriptTypes & scriptReplacementIndexes
1013 CharStringMap scriptMap(1, status);
1014 for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
1015 scriptMap.put(scriptTypes[i],
1016 strings.get(scriptReplacementIndexes[i]),
1017 status);
1018 }
1019
1020 // Build the territoryMap from territoryTypes & territoryReplacementIndexes
1021 CharStringMap territoryMap(650, status);
1022 for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
1023 territoryMap.put(territoryTypes[i],
1024 strings.get(territoryReplacementIndexes[i]),
1025 status);
1026 }
1027
1028 // Build the variantMap from variantTypes & variantReplacementIndexes.
1029 CharStringMap variantMap(2, status);
1030 for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
1031 variantMap.put(variantTypes[i],
1032 strings.get(variantReplacementIndexes[i]),
1033 status);
1034 }
1035
1036 // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1037 CharStringMap subdivisionMap(2, status);
1038 for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
1039 subdivisionMap.put(subdivisionTypes[i],
1040 strings.get(subdivisionReplacementIndexes[i]),
1041 status);
1042 }
1043
1044 if (U_FAILURE(status)) {
1045 return nullptr;
1046 }
1047
1048 // copy hashtables
1049 auto *data = new AliasData(
1050 std::move(languageMap),
1051 std::move(scriptMap),
1052 std::move(territoryMap),
1053 std::move(variantMap),
1054 std::move(subdivisionMap),
1055 strings.orphanCharStrings());
1056
1057 if (data == nullptr) {
1058 status = U_MEMORY_ALLOCATION_ERROR;
1059 }
1060 return data;
1061}
1062
1063/**
1064 * A class that find the replacement values of locale fields by using AliasData.
1065 */
1066class AliasReplacer {
1067public:
1068 AliasReplacer(UErrorCode status) :
1069 language(nullptr), script(nullptr), region(nullptr),
1070 extensions(nullptr), variants(status),
1071 data(nullptr) {
1072 }
1073 ~AliasReplacer() {
1074 }
1075
1076 // Check the fields inside locale, if need to replace fields,
1077 // place the the replaced locale ID in out and return true.
1078 // Otherwise return false for no replacement or error.
1079 bool replace(
1080 const Locale& locale, CharString& out, UErrorCode& status);
1081
1082private:
1083 const char* language;
1084 const char* script;
1085 const char* region;
1086 const char* extensions;
1087 UVector variants;
1088
1089 const AliasData* data;
1090
1091 inline bool notEmpty(const char* str) {
1092 return str && str[0] != NULL_CHAR'\0';
1093 }
1094
1095 /**
1096 * If replacement is neither null nor empty and input is either null or empty,
1097 * return replacement.
1098 * If replacement is neither null nor empty but input is not empty, return input.
1099 * If replacement is either null or empty and type is either null or empty,
1100 * return input.
1101 * Otherwise return null.
1102 * replacement input type return
1103 * AAA nullptr * AAA
1104 * AAA BBB * BBB
1105 * nullptr || "" CCC nullptr CCC
1106 * nullptr || "" * DDD nullptr
1107 */
1108 inline const char* deleteOrReplace(
1109 const char* input, const char* type, const char* replacement) {
1110 return notEmpty(replacement) ?
1111 ((input == nullptr) ? replacement : input) :
1112 ((type == nullptr) ? input : nullptr);
1113 }
1114
1115 inline bool same(const char* a, const char* b) {
1116 if (a == nullptr && b == nullptr) {
1117 return true;
1118 }
1119 if ((a == nullptr && b != nullptr) ||
1120 (a != nullptr && b == nullptr)) {
1121 return false;
1122 }
1123 return uprv_strcmp(a, b):: strcmp(a, b) == 0;
1124 }
1125
1126 // Gather fields and generate locale ID into out.
1127 CharString& outputToString(CharString& out, UErrorCode status);
1128
1129 // Generate the lookup key.
1130 CharString& generateKey(const char* language, const char* region,
1131 const char* variant, CharString& out,
1132 UErrorCode status);
1133
1134 void parseLanguageReplacement(const char* replacement,
1135 const char*& replaceLanguage,
1136 const char*& replaceScript,
1137 const char*& replaceRegion,
1138 const char*& replaceVariant,
1139 const char*& replaceExtensions,
1140 UVector& toBeFreed,
1141 UErrorCode& status);
1142
1143 // Replace by using languageAlias.
1144 bool replaceLanguage(bool checkLanguage, bool checkRegion,
1145 bool checkVariants, UVector& toBeFreed,
1146 UErrorCode& status);
1147
1148 // Replace by using territoryAlias.
1149 bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1150
1151 // Replace by using scriptAlias.
1152 bool replaceScript(UErrorCode& status);
1153
1154 // Replace by using variantAlias.
1155 bool replaceVariant(UErrorCode& status);
1156
1157 // Replace by using subdivisionAlias.
1158 bool replaceSubdivision(StringPiece subdivision,
1159 CharString& output, UErrorCode& status);
1160
1161 // Replace transformed extensions.
1162 bool replaceTransformedExtensions(
1163 CharString& transformedExtensions, CharString& output, UErrorCode& status);
1164};
1165
1166CharString&
1167AliasReplacer::generateKey(
1168 const char* language, const char* region, const char* variant,
1169 CharString& out, UErrorCode status)
1170{
1171 out.append(language, status);
1172 if (notEmpty(region)) {
1173 out.append(SEP_CHAR'_', status)
1174 .append(region, status);
1175 }
1176 if (notEmpty(variant)) {
1177 out.append(SEP_CHAR'_', status)
1178 .append(variant, status);
1179 }
1180 return out;
1181}
1182
1183void
1184AliasReplacer::parseLanguageReplacement(
1185 const char* replacement,
1186 const char*& replacedLanguage,
1187 const char*& replacedScript,
1188 const char*& replacedRegion,
1189 const char*& replacedVariant,
1190 const char*& replacedExtensions,
1191 UVector& toBeFreed,
1192 UErrorCode& status)
1193{
1194 if (U_FAILURE(status)) {
1195 return;
1196 }
1197 replacedScript = replacedRegion = replacedVariant
1198 = replacedExtensions = nullptr;
1199 if (uprv_strchr(replacement, '_'):: strchr(replacement, '_') == nullptr) {
1200 replacedLanguage = replacement;
1201 // reach the end, just return it.
1202 return;
1203 }
1204 // We have multiple field so we have to allocate and parse
1205 CharString* str = new CharString(
1206 replacement, (int32_t)uprv_strlen(replacement):: strlen(replacement), status);
1207 LocalPointer<CharString> lpStr(str, status);
1208 toBeFreed.adoptElement(lpStr.orphan(), status);
1209 if (U_FAILURE(status)) {
1210 return;
1211 }
1212 char* data = str->data();
1213 replacedLanguage = (const char*) data;
1214 char* endOfField = uprv_strchr(data, '_'):: strchr(data, '_');
1215 *endOfField = '\0'; // null terminiate it.
1216 endOfField++;
1217 const char* start = endOfField;
1218 endOfField = (char*) uprv_strchr(start, '_'):: strchr(start, '_');
1219 size_t len = 0;
1220 if (endOfField == nullptr) {
1221 len = uprv_strlen(start):: strlen(start);
1222 } else {
1223 len = endOfField - start;
1224 *endOfField = '\0'; // null terminiate it.
1225 }
1226 if (len == 4 && uprv_isASCIILetteruprv_isASCIILetter_71(*start)) {
1227 // Got a script
1228 replacedScript = start;
1229 if (endOfField == nullptr) {
1230 return;
1231 }
1232 start = endOfField++;
1233 endOfField = (char*)uprv_strchr(start, '_'):: strchr(start, '_');
1234 if (endOfField == nullptr) {
1235 len = uprv_strlen(start):: strlen(start);
1236 } else {
1237 len = endOfField - start;
1238 *endOfField = '\0'; // null terminiate it.
1239 }
1240 }
1241 if (len >= 2 && len <= 3) {
1242 // Got a region
1243 replacedRegion = start;
1244 if (endOfField == nullptr) {
1245 return;
1246 }
1247 start = endOfField++;
1248 endOfField = (char*)uprv_strchr(start, '_'):: strchr(start, '_');
1249 if (endOfField == nullptr) {
1250 len = uprv_strlen(start):: strlen(start);
1251 } else {
1252 len = endOfField - start;
1253 *endOfField = '\0'; // null terminiate it.
1254 }
1255 }
1256 if (len >= 4) {
1257 // Got a variant
1258 replacedVariant = start;
1259 if (endOfField == nullptr) {
1260 return;
1261 }
1262 start = endOfField++;
1263 }
1264 replacedExtensions = start;
1265}
1266
1267bool
1268AliasReplacer::replaceLanguage(
1269 bool checkLanguage, bool checkRegion,
1270 bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1271{
1272 if (U_FAILURE(status)) {
1273 return false;
1274 }
1275 if ( (checkRegion && region == nullptr) ||
1276 (checkVariants && variants.size() == 0)) {
1277 // Nothing to search.
1278 return false;
1279 }
1280 int32_t variant_size = checkVariants ? variants.size() : 1;
1281 // Since we may have more than one variant, we need to loop through them.
1282 const char* searchLanguage = checkLanguage ? language : "und";
1283 const char* searchRegion = checkRegion ? region : nullptr;
1284 const char* searchVariant = nullptr;
1285 for (int32_t variant_index = 0;
1286 variant_index < variant_size;
1287 variant_index++) {
1288 if (checkVariants) {
1289 U_ASSERT(variant_index < variant_size)(void)0;
1290 searchVariant = (const char*)(variants.elementAt(variant_index));
1291 }
1292
1293 if (searchVariant != nullptr && uprv_strlen(searchVariant):: strlen(searchVariant) < 4) {
1294 // Do not consider ill-formed variant subtag.
1295 searchVariant = nullptr;
1296 }
1297 CharString typeKey;
1298 generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1299 status);
1300 if (U_FAILURE(status)) {
1301 return false;
1302 }
1303 const char *replacement = data->languageMap().get(typeKey.data());
1304 if (replacement == nullptr) {
1305 // Found no replacement data.
1306 continue;
1307 }
1308
1309 const char* replacedLanguage = nullptr;
1310 const char* replacedScript = nullptr;
1311 const char* replacedRegion = nullptr;
1312 const char* replacedVariant = nullptr;
1313 const char* replacedExtensions = nullptr;
1314 parseLanguageReplacement(replacement,
1315 replacedLanguage,
1316 replacedScript,
1317 replacedRegion,
1318 replacedVariant,
1319 replacedExtensions,
1320 toBeFreed,
1321 status);
1322 replacedLanguage =
1323 (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und"):: strcmp(replacedLanguage, "und") == 0) ?
1324 language : replacedLanguage;
1325 replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1326 replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1327 replacedVariant = deleteOrReplace(
1328 searchVariant, searchVariant, replacedVariant);
1329
1330 if ( same(language, replacedLanguage) &&
1331 same(script, replacedScript) &&
1332 same(region, replacedRegion) &&
1333 same(searchVariant, replacedVariant) &&
1334 replacedExtensions == nullptr) {
1335 // Replacement produce no changes.
1336 continue;
1337 }
1338
1339 language = replacedLanguage;
1340 region = replacedRegion;
1341 script = replacedScript;
1342 if (searchVariant != nullptr) {
1343 if (notEmpty(replacedVariant)) {
1344 variants.setElementAt((void*)replacedVariant, variant_index);
1345 } else {
1346 variants.removeElementAt(variant_index);
1347 }
1348 }
1349 if (replacedExtensions != nullptr) {
1350 // DO NOTHING
1351 // UTS35 does not specify what should we do if we have extensions in the
1352 // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1353 // extensions in them languageAlias:
1354 // i_default => en_x_i_default
1355 // i_enochian => und_x_i_enochian
1356 // i_mingo => see_x_i_mingo
1357 // zh_min => nan_x_zh_min
1358 // But all of them are already changed by code inside ultag_parse() before
1359 // hitting this code.
1360 }
1361
1362 // Something changed by language alias data.
1363 return true;
1364 }
1365 // Nothing changed by language alias data.
1366 return false;
1367}
1368
1369bool
1370AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1371{
1372 if (U_FAILURE(status)) {
1373 return false;
1374 }
1375 if (region == nullptr) {
1376 // No region to search.
1377 return false;
1378 }
1379 const char *replacement = data->territoryMap().get(region);
1380 if (replacement == nullptr) {
1381 // Found no replacement data for this region.
1382 return false;
1383 }
1384 const char* replacedRegion = replacement;
1385 const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1386 if (firstSpace != nullptr) {
1387 // If there are are more than one region in the replacement.
1388 // We need to check which one match based on the language.
1389 // Cannot use nullptr for language because that will construct
1390 // the default locale, in that case, use "und" to get the correct
1391 // locale.
1392 Locale l = LocaleBuilder()
1393 .setLanguage(language == nullptr ? "und" : language)
1394 .setScript(script)
1395 .build(status);
1396 l.addLikelySubtags(status);
1397 const char* likelyRegion = l.getCountry();
1398 LocalPointer<CharString> item;
1399 if (likelyRegion != nullptr && uprv_strlen(likelyRegion):: strlen(likelyRegion) > 0) {
1400 size_t len = uprv_strlen(likelyRegion):: strlen(likelyRegion);
1401 const char* foundInReplacement = uprv_strstr(replacement,:: strstr(replacement, likelyRegion)
1402 likelyRegion):: strstr(replacement, likelyRegion);
1403 if (foundInReplacement != nullptr) {
1404 // Assuming the case there are no three letter region code in
1405 // the replacement of territoryAlias
1406 U_ASSERT(foundInReplacement == replacement ||(void)0
1407 *(foundInReplacement-1) == ' ')(void)0;
1408 U_ASSERT(foundInReplacement[len] == ' ' ||(void)0
1409 foundInReplacement[len] == '\0')(void)0;
1410 item.adoptInsteadAndCheckErrorCode(
1411 new CharString(foundInReplacement, (int32_t)len, status), status);
1412 }
1413 }
1414 if (item.isNull() && U_SUCCESS(status)) {
1415 item.adoptInsteadAndCheckErrorCode(
1416 new CharString(replacement,
1417 (int32_t)(firstSpace - replacement), status), status);
1418 }
1419 if (U_FAILURE(status)) { return false; }
1420 replacedRegion = item->data();
1421 toBeFreed.adoptElement(item.orphan(), status);
1422 if (U_FAILURE(status)) { return false; }
1423 }
1424 U_ASSERT(!same(region, replacedRegion))(void)0;
1425 region = replacedRegion;
1426 // The region is changed by data in territory alias.
1427 return true;
1428}
1429
1430bool
1431AliasReplacer::replaceScript(UErrorCode& status)
1432{
1433 if (U_FAILURE(status)) {
1434 return false;
1435 }
1436 if (script == nullptr) {
1437 // No script to search.
1438 return false;
1439 }
1440 const char *replacement = data->scriptMap().get(script);
1441 if (replacement == nullptr) {
1442 // Found no replacement data for this script.
1443 return false;
1444 }
1445 U_ASSERT(!same(script, replacement))(void)0;
1446 script = replacement;
1447 // The script is changed by data in script alias.
1448 return true;
1449}
1450
1451bool
1452AliasReplacer::replaceVariant(UErrorCode& status)
1453{
1454 if (U_FAILURE(status)) {
1455 return false;
1456 }
1457 // Since we may have more than one variant, we need to loop through them.
1458 for (int32_t i = 0; i < variants.size(); i++) {
1459 const char *variant = (const char*)(variants.elementAt(i));
1460 const char *replacement = data->variantMap().get(variant);
1461 if (replacement == nullptr) {
1462 // Found no replacement data for this variant.
1463 continue;
1464 }
1465 U_ASSERT((uprv_strlen(replacement) >= 5 &&(void)0
1466 uprv_strlen(replacement) <= 8) ||(void)0
1467 (uprv_strlen(replacement) == 4 &&(void)0
1468 replacement[0] >= '0' &&(void)0
1469 replacement[0] <= '9'))(void)0;
1470 if (!same(variant, replacement)) {
1471 variants.setElementAt((void*)replacement, i);
1472 // Special hack to handle hepburn-heploc => alalc97
1473 if (uprv_strcmp(variant, "heploc"):: strcmp(variant, "heploc") == 0) {
1474 for (int32_t j = 0; j < variants.size(); j++) {
1475 if (uprv_strcmp((const char*)(variants.elementAt(j)),:: strcmp((const char*)(variants.elementAt(j)), "hepburn")
1476 "hepburn"):: strcmp((const char*)(variants.elementAt(j)), "hepburn") == 0) {
1477 variants.removeElementAt(j);
1478 }
1479 }
1480 }
1481 return true;
1482 }
1483 }
1484 return false;
1485}
1486
1487bool
1488AliasReplacer::replaceSubdivision(
1489 StringPiece subdivision, CharString& output, UErrorCode& status)
1490{
1491 if (U_FAILURE(status)) {
1492 return false;
1493 }
1494 const char *replacement = data->subdivisionMap().get(subdivision.data());
1495 if (replacement != nullptr) {
1496 const char* firstSpace = uprv_strchr(replacement, ' '):: strchr(replacement, ' ');
1497 // Found replacement data for this subdivision.
1498 size_t len = (firstSpace != nullptr) ?
1499 (firstSpace - replacement) : uprv_strlen(replacement):: strlen(replacement);
1500 if (2 <= len && len <= 8) {
1501 output.append(replacement, (int32_t)len, status);
1502 if (2 == len) {
1503 // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1504 output.append("zzzz", 4, status);
1505 }
1506 }
1507 return true;
1508 }
1509 return false;
1510}
1511
1512bool
1513AliasReplacer::replaceTransformedExtensions(
1514 CharString& transformedExtensions, CharString& output, UErrorCode& status)
1515{
1516 // The content of the transformedExtensions will be modified in this
1517 // function to NULL-terminating (tkey-tvalue) pairs.
1518 if (U_FAILURE(status)) {
1519 return false;
1520 }
1521 int32_t len = transformedExtensions.length();
1522 const char* str = transformedExtensions.data();
1523 const char* tkey = ultag_getTKeyStartultag_getTKeyStart_71(str);
1524 int32_t tlangLen = (tkey == str) ? 0 :
1525 ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
1526 CharStringByteSink sink(&output);
1527 if (tlangLen > 0) {
1528 Locale tlang = LocaleBuilder()
1529 .setLanguageTag(StringPiece(str, tlangLen))
1530 .build(status);
1531 tlang.canonicalize(status);
1532 tlang.toLanguageTag(sink, status);
1533 if (U_FAILURE(status)) {
1534 return false;
1535 }
1536 T_CString_toLowerCaseT_CString_toLowerCase_71(output.data());
1537 }
1538 if (tkey != nullptr) {
1539 // We need to sort the tfields by tkey
1540 UVector tfields(status);
1541 if (U_FAILURE(status)) {
1542 return false;
1543 }
1544 do {
1545 const char* tvalue = uprv_strchr(tkey, '-'):: strchr(tkey, '-');
1546 if (tvalue == nullptr) {
1547 status = U_ILLEGAL_ARGUMENT_ERROR;
1548 return false;
1549 }
1550 const char* nextTKey = ultag_getTKeyStartultag_getTKeyStart_71(tvalue);
1551 if (nextTKey != nullptr) {
1552 *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
1553 }
1554 tfields.insertElementAt((void*)tkey, tfields.size(), status);
1555 if (U_FAILURE(status)) {
1556 return false;
1557 }
1558 tkey = nextTKey;
1559 } while (tkey != nullptr);
1560 tfields.sort([](UElement e1, UElement e2) -> int32_t {
1561 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1562 }, status);
1563 for (int32_t i = 0; i < tfields.size(); i++) {
1564 if (output.length() > 0) {
1565 output.append('-', status);
1566 }
1567 const char* tfield = (const char*) tfields.elementAt(i);
1568 const char* tvalue = uprv_strchr(tfield, '-'):: strchr(tfield, '-');
1569 if (tvalue == nullptr) {
1570 status = U_ILLEGAL_ARGUMENT_ERROR;
1571 return false;
1572 }
1573 // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1574 *((char*)tvalue++) = '\0'; // NULL terminate tkey
1575 output.append(tfield, status).append('-', status);
1576 const char* bcpTValue = ulocimp_toBcpTypeulocimp_toBcpType_71(tfield, tvalue, nullptr, nullptr);
1577 output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
1578 }
1579 }
1580 if (U_FAILURE(status)) {
1581 return false;
1582 }
1583 return true;
1584}
1585
1586CharString&
1587AliasReplacer::outputToString(
1588 CharString& out, UErrorCode status)
1589{
1590 out.append(language, status);
1591 if (notEmpty(script)) {
1592 out.append(SEP_CHAR'_', status)
1593 .append(script, status);
1594 }
1595 if (notEmpty(region)) {
1596 out.append(SEP_CHAR'_', status)
1597 .append(region, status);
1598 }
1599 if (variants.size() > 0) {
1600 if (!notEmpty(script) && !notEmpty(region)) {
1601 out.append(SEP_CHAR'_', status);
1602 }
1603 variants.sort([](UElement e1, UElement e2) -> int32_t {
1604 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1605 }, status);
1606 int32_t variantsStart = out.length();
1607 for (int32_t i = 0; i < variants.size(); i++) {
1608 out.append(SEP_CHAR'_', status)
1609 .append((const char*)(variants.elementAt(i)),
1610 status);
1611 }
1612 T_CString_toUpperCaseT_CString_toUpperCase_71(out.data() + variantsStart);
1613 }
1614 if (notEmpty(extensions)) {
1615 CharString tmp("und_", status);
1616 tmp.append(extensions, status);
1617 Locale tmpLocale(tmp.data());
1618 // only support x extension inside CLDR for now.
1619 U_ASSERT(extensions[0] == 'x')(void)0;
1620 out.append(tmpLocale.getName() + 1, status);
1621 }
1622 return out;
1623}
1624
1625bool
1626AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1627{
1628 data = AliasData::singleton(status);
1629 if (U_FAILURE(status)) {
1630 return false;
1631 }
1632 U_ASSERT(data != nullptr)(void)0;
1633 out.clear();
1634 language = locale.getLanguage();
1635 if (!notEmpty(language)) {
1636 language = nullptr;
1637 }
1638 script = locale.getScript();
1639 if (!notEmpty(script)) {
1640 script = nullptr;
1641 }
1642 region = locale.getCountry();
1643 if (!notEmpty(region)) {
1644 region = nullptr;
1645 }
1646 const char* variantsStr = locale.getVariant();
1647 CharString variantsBuff(variantsStr, -1, status);
1648 if (!variantsBuff.isEmpty()) {
1649 if (U_FAILURE(status)) { return false; }
1650 char* start = variantsBuff.data();
1651 T_CString_toLowerCaseT_CString_toLowerCase_71(start);
1652 char* end;
1653 while ((end = uprv_strchr(start, SEP_CHAR):: strchr(start, '_')) != nullptr &&
1654 U_SUCCESS(status)) {
1655 *end = NULL_CHAR'\0'; // null terminate inside variantsBuff
1656 variants.addElement(start, status);
1657 start = end + 1;
1658 }
1659 variants.addElement(start, status);
1660 }
1661 if (U_FAILURE(status)) { return false; }
1662
1663 // Sort the variants
1664 variants.sort([](UElement e1, UElement e2) -> int32_t {
1665 return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer):: strcmp((const char*)e1.pointer, (const char*)e2.pointer);
1666 }, status);
1667
1668 // A changed count to assert when loop too many times.
1669 int changed = 0;
1670 // A UVector to to hold CharString allocated by the replace* method
1671 // and freed when out of scope from his function.
1672 UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
1673 nullptr, 10, status);
1674 while (U_SUCCESS(status)) {
1675 // Something wrong with the data cause looping here more than 10 times
1676 // already.
1677 U_ASSERT(changed < 5)(void)0;
1678 // From observation of key in data/misc/metadata.txt
1679 // we know currently we only need to search in the following combination
1680 // of fields for type in languageAlias:
1681 // * lang_region_variant
1682 // * lang_region
1683 // * lang_variant
1684 // * lang
1685 // * und_variant
1686 // This assumption is ensured by the U_ASSERT in readLanguageAlias
1687 //
1688 // lang REGION variant
1689 if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
1690 replaceLanguage(true, true, false, stringsToBeFreed, status) ||
1691 replaceLanguage(true, false, true, stringsToBeFreed, status) ||
1692 replaceLanguage(true, false, false, stringsToBeFreed, status) ||
1693 replaceLanguage(false,false, true, stringsToBeFreed, status) ||
1694 replaceTerritory(stringsToBeFreed, status) ||
1695 replaceScript(status) ||
1696 replaceVariant(status)) {
1697 // Some values in data is changed, try to match from the beginning
1698 // again.
1699 changed++;
1700 continue;
1701 }
1702 // Nothing changed. Break out.
1703 break;
1704 } // while(1)
1705
1706 if (U_FAILURE(status)) { return false; }
1707 // Nothing changed and we know the order of the variants are not change
1708 // because we have no variant or only one.
1709 const char* extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_71(locale.getName());
1710 if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
1711 return false;
1712 }
1713 outputToString(out, status);
1714 if (U_FAILURE(status)) {
1715 return false;
1716 }
1717 if (extensionsStr != nullptr) {
1718 changed = 0;
1719 Locale temp(locale);
1720 LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1721 if (U_SUCCESS(status) && !iter.isNull()) {
1722 const char* key;
1723 while ((key = iter->next(nullptr, status)) != nullptr) {
1724 if (uprv_strcmp("sd", key):: strcmp("sd", key) == 0 || uprv_strcmp("rg", key):: strcmp("rg", key) == 0 ||
1725 uprv_strcmp("t", key):: strcmp("t", key) == 0) {
1726 CharString value;
1727 CharStringByteSink valueSink(&value);
1728 locale.getKeywordValue(key, valueSink, status);
1729 if (U_FAILURE(status)) {
1730 status = U_ZERO_ERROR;
1731 continue;
1732 }
1733 CharString replacement;
1734 if (uprv_strlen(key):: strlen(key) == 2) {
1735 if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1736 changed++;
1737 temp.setKeywordValue(key, replacement.data(), status);
1738 }
1739 } else {
1740 U_ASSERT(uprv_strcmp(key, "t") == 0)(void)0;
1741 if (replaceTransformedExtensions(value, replacement, status)) {
1742 changed++;
1743 temp.setKeywordValue(key, replacement.data(), status);
1744 }
1745 }
1746 if (U_FAILURE(status)) {
1747 return false;
1748 }
1749 }
1750 }
1751 }
1752 if (changed != 0) {
1753 extensionsStr = locale_getKeywordsStartlocale_getKeywordsStart_71(temp.getName());
1754 }
1755 out.append(extensionsStr, status);
1756 }
1757 if (U_FAILURE(status)) {
1758 return false;
1759 }
1760 // If the tag is not changed, return.
1761 if (uprv_strcmp(out.data(), locale.getName()):: strcmp(out.data(), locale.getName()) == 0) {
1762 out.clear();
1763 return false;
1764 }
1765 return true;
1766}
1767
1768// Return true if the locale is changed during canonicalization.
1769// The replaced value then will be put into out.
1770bool
1771canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1772{
1773 AliasReplacer replacer(status);
1774 return replacer.replace(locale, out, status);
1775}
1776
1777// Function to optimize for known cases without so we can skip the loading
1778// of resources in the startup time until we really need it.
1779bool
1780isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1781{
1782 if ( uprv_strcmp(locale, "c"):: strcmp(locale, "c") == 0 ||
1783 uprv_strcmp(locale, "en"):: strcmp(locale, "en") == 0 ||
1784 uprv_strcmp(locale, "en_US"):: strcmp(locale, "en_US") == 0) {
1785 return true;
1786 }
1787
1788 // common well-known Canonicalized.
1789 umtx_initOnce(gKnownCanonicalizedInitOnce,
1790 &loadKnownCanonicalized, status);
1791 if (U_FAILURE(status)) {
1792 return false;
1793 }
1794 U_ASSERT(gKnownCanonicalized != nullptr)(void)0;
1795 return uhash_getiuhash_geti_71(gKnownCanonicalized, locale) != 0;
1796}
1797
1798} // namespace
1799
1800// Function for testing.
1801U_CAPIextern "C" const char* const*
1802ulocimp_getKnownCanonicalizedLocaleForTestulocimp_getKnownCanonicalizedLocaleForTest_71(int32_t* length)
1803{
1804 *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED)(int32_t)(sizeof(KNOWN_CANONICALIZED)/sizeof((KNOWN_CANONICALIZED
)[0]))
;
1805 return KNOWN_CANONICALIZED;
1806}
1807
1808// Function for testing.
1809U_CAPIextern "C" bool
1810ulocimp_isCanonicalizedLocaleForTestulocimp_isCanonicalizedLocaleForTest_71(const char* localeName)
1811{
1812 Locale l(localeName);
1813 UErrorCode status = U_ZERO_ERROR;
1814 CharString temp;
1815 return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1816}
1817
1818/*This function initializes a Locale from a C locale ID*/
1819Locale& Locale::init(const char* localeID, UBool canonicalize)
1820{
1821 fIsBogus = FALSE0;
1822 /* Free our current storage */
1823 if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1824 uprv_freeuprv_free_71(baseName);
1825 }
1826 baseName = NULL__null;
1827 if(fullName != fullNameBuffer) {
1828 uprv_freeuprv_free_71(fullName);
1829 fullName = fullNameBuffer;
1830 }
1831
1832 // not a loop:
1833 // just an easy way to have a common error-exit
1834 // without goto and without another function
1835 do {
1836 char *separator;
1837 char *field[5] = {0};
1838 int32_t fieldLen[5] = {0};
1839 int32_t fieldIdx;
1840 int32_t variantField;
1841 int32_t length;
1842 UErrorCode err;
1843
1844 if(localeID == NULL__null) {
1845 // not an error, just set the default locale
1846 return *this = getDefault();
1847 }
1848
1849 /* preset all fields to empty */
1850 language[0] = script[0] = country[0] = 0;
1851
1852 // "canonicalize" the locale ID to ICU/Java format
1853 err = U_ZERO_ERROR;
1854 length = canonicalize ?
1855 uloc_canonicalizeuloc_canonicalize_71(localeID, fullName, sizeof(fullNameBuffer), &err) :
1856 uloc_getNameuloc_getName_71(localeID, fullName, sizeof(fullNameBuffer), &err);
1857
1858 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
1859 U_ASSERT(baseName == nullptr)(void)0;
1860 /*Go to heap for the fullName if necessary*/
1861 fullName = (char *)uprv_mallocuprv_malloc_71(sizeof(char)*(length + 1));
1862 if(fullName == 0) {
1863 fullName = fullNameBuffer;
1864 break; // error: out of memory
1865 }
1866 err = U_ZERO_ERROR;
1867 length = canonicalize ?
1868 uloc_canonicalizeuloc_canonicalize_71(localeID, fullName, length+1, &err) :
1869 uloc_getNameuloc_getName_71(localeID, fullName, length+1, &err);
1870 }
1871 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
1872 /* should never occur */
1873 break;
1874 }
1875
1876 variantBegin = length;
1877
1878 /* after uloc_getName/canonicalize() we know that only '_' are separators */
1879 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
1880 separator = field[0] = fullName;
1881 fieldIdx = 1;
1882 char* at = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
1883 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR):: strchr(field[fieldIdx-1], '_')) != 0 &&
1884 fieldIdx < UPRV_LENGTHOF(field)(int32_t)(sizeof(field)/sizeof((field)[0]))-1 &&
1885 (at == nullptr || separator < at)) {
1886 field[fieldIdx] = separator + 1;
1887 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1888 fieldIdx++;
1889 }
1890 // variant may contain @foo or .foo POSIX cruft; remove it
1891 separator = uprv_strchr(field[fieldIdx-1], '@'):: strchr(field[fieldIdx-1], '@');
1892 char* sep2 = uprv_strchr(field[fieldIdx-1], '.'):: strchr(field[fieldIdx-1], '.');
1893 if (separator!=NULL__null || sep2!=NULL__null) {
1894 if (separator==NULL__null || (sep2!=NULL__null && separator > sep2)) {
1895 separator = sep2;
1896 }
1897 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
1898 } else {
1899 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
1900 }
1901
1902 if (fieldLen[0] >= (int32_t)(sizeof(language)))
1903 {
1904 break; // error: the language field is too long
1905 }
1906
1907 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
1908 if (fieldLen[0] > 0) {
1909 /* We have a language */
1910 uprv_memcpy(language, fullName, fieldLen[0])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(void)0; (void)0; clang diagnostic pop :: memcpy(language,
fullName, fieldLen[0]); } while (false)
;
1911 language[fieldLen[0]] = 0;
1912 }
1913 if (fieldLen[1] == 4 && uprv_isASCIILetteruprv_isASCIILetter_71(field[1][0]) &&
1914 uprv_isASCIILetteruprv_isASCIILetter_71(field[1][1]) && uprv_isASCIILetteruprv_isASCIILetter_71(field[1][2]) &&
1915 uprv_isASCIILetteruprv_isASCIILetter_71(field[1][3])) {
1916 /* We have at least a script */
1917 uprv_memcpy(script, field[1], fieldLen[1])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(void)0; (void)0; clang diagnostic pop :: memcpy(script, field
[1], fieldLen[1]); } while (false)
;
1918 script[fieldLen[1]] = 0;
1919 variantField++;
1920 }
1921
1922 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
1923 /* We have a country */
1924 uprv_memcpy(country, field[variantField], fieldLen[variantField])do { clang diagnostic push clang diagnostic ignored "-Waddress"
(void)0; (void)0; clang diagnostic pop :: memcpy(country, field
[variantField], fieldLen[variantField]); } while (false)
;
1925 country[fieldLen[variantField]] = 0;
1926 variantField++;
1927 } else if (fieldLen[variantField] == 0) {
1928 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
1929 }
1930
1931 if (fieldLen[variantField] > 0) {
1932 /* We have a variant */
1933 variantBegin = (int32_t)(field[variantField] - fullName);
1934 }
1935
1936 err = U_ZERO_ERROR;
1937 initBaseName(err);
1938 if (U_FAILURE(err)) {
1939 break;
1940 }
1941
1942 if (canonicalize) {
1943 if (!isKnownCanonicalizedLocale(fullName, err)) {
1944 CharString replaced;
1945 // Not sure it is already canonicalized
1946 if (canonicalizeLocale(*this, replaced, err)) {
1947 U_ASSERT(U_SUCCESS(err))(void)0;
1948 // If need replacement, call init again.
1949 init(replaced.data(), false);
1950 }
1951 if (U_FAILURE(err)) {
1952 break;
1953 }
1954 }
1955 } // if (canonicalize) {
1956
1957 // successful end of init()
1958 return *this;
1959 } while(0); /*loop doesn't iterate*/
1960
1961 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
1962 setToBogus();
1963
1964 return *this;
1965}
1966
1967/*
1968 * Set up the base name.
1969 * If there are no key words, it's exactly the full name.
1970 * If key words exist, it's the full name truncated at the '@' character.
1971 * Need to set up both at init() and after setting a keyword.
1972 */
1973void
1974Locale::initBaseName(UErrorCode &status) {
1975 if (U_FAILURE(status)) {
1976 return;
1977 }
1978 U_ASSERT(baseName==NULL || baseName==fullName)(void)0;
1979 const char *atPtr = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
1980 const char *eqPtr = uprv_strchr(fullName, '='):: strchr(fullName, '=');
1981 if (atPtr && eqPtr && atPtr < eqPtr) {
1982 // Key words exist.
1983 int32_t baseNameLength = (int32_t)(atPtr - fullName);
1984 baseName = (char *)uprv_mallocuprv_malloc_71(baseNameLength + 1);
1985 if (baseName == NULL__null) {
1986 status = U_MEMORY_ALLOCATION_ERROR;
1987 return;
1988 }
1989 uprv_strncpy(baseName, fullName, baseNameLength):: strncpy(baseName, fullName, baseNameLength);
1990 baseName[baseNameLength] = 0;
1991
1992 // The original computation of variantBegin leaves it equal to the length
1993 // of fullName if there is no variant. It should instead be
1994 // the length of the baseName.
1995 if (variantBegin > baseNameLength) {
1996 variantBegin = baseNameLength;
1997 }
1998 } else {
1999 baseName = fullName;
2000 }
2001}
2002
2003
2004int32_t
2005Locale::hashCode() const
2006{
2007 return ustr_hashCharsNustr_hashCharsN_71(fullName, static_cast<int32_t>(uprv_strlen(fullName):: strlen(fullName)));
2008}
2009
2010void
2011Locale::setToBogus() {
2012 /* Free our current storage */
2013 if((baseName != fullName) && (baseName != fullNameBuffer)) {
2014 uprv_freeuprv_free_71(baseName);
2015 }
2016 baseName = NULL__null;
2017 if(fullName != fullNameBuffer) {
2018 uprv_freeuprv_free_71(fullName);
2019 fullName = fullNameBuffer;
2020 }
2021 *fullNameBuffer = 0;
2022 *language = 0;
2023 *script = 0;
2024 *country = 0;
2025 fIsBogus = TRUE1;
2026 variantBegin = 0;
2027}
2028
2029const Locale& U_EXPORT2
2030Locale::getDefault()
2031{
2032 {
2033 Mutex lock(&gDefaultLocaleMutex);
2034 if (gDefaultLocale != NULL__null) {
2035 return *gDefaultLocale;
2036 }
2037 }
2038 UErrorCode status = U_ZERO_ERROR;
2039 return *locale_set_default_internal(NULL__null, status);
2040}
2041
2042
2043
2044void U_EXPORT2
2045Locale::setDefault( const Locale& newLocale,
2046 UErrorCode& status)
2047{
2048 if (U_FAILURE(status)) {
2049 return;
2050 }
2051
2052 /* Set the default from the full name string of the supplied locale.
2053 * This is a convenient way to access the default locale caching mechanisms.
2054 */
2055 const char *localeID = newLocale.getName();
2056 locale_set_default_internal(localeID, status);
2057}
2058
2059void
2060Locale::addLikelySubtags(UErrorCode& status) {
2061 if (U_FAILURE(status)) {
2062 return;
2063 }
2064
2065 CharString maximizedLocaleID;
2066 {
2067 CharStringByteSink sink(&maximizedLocaleID);
2068 ulocimp_addLikelySubtagsulocimp_addLikelySubtags_71(fullName, sink, &status);
2069 }
2070
2071 if (U_FAILURE(status)) {
2072 return;
2073 }
2074
2075 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE0);
2076 if (isBogus()) {
2077 status = U_ILLEGAL_ARGUMENT_ERROR;
2078 }
2079}
2080
2081void
2082Locale::minimizeSubtags(UErrorCode& status) {
2083 if (U_FAILURE(status)) {
2084 return;
2085 }
2086
2087 CharString minimizedLocaleID;
2088 {
2089 CharStringByteSink sink(&minimizedLocaleID);
2090 ulocimp_minimizeSubtagsulocimp_minimizeSubtags_71(fullName, sink, &status);
2091 }
2092
2093 if (U_FAILURE(status)) {
2094 return;
2095 }
2096
2097 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE0);
2098 if (isBogus()) {
2099 status = U_ILLEGAL_ARGUMENT_ERROR;
2100 }
2101}
2102
2103void
2104Locale::canonicalize(UErrorCode& status) {
2105 if (U_FAILURE(status)) {
2106 return;
2107 }
2108 if (isBogus()) {
2109 status = U_ILLEGAL_ARGUMENT_ERROR;
2110 return;
2111 }
2112 CharString uncanonicalized(fullName, status);
2113 if (U_FAILURE(status)) {
2114 return;
2115 }
2116 init(uncanonicalized.data(), /*canonicalize=*/TRUE1);
2117 if (isBogus()) {
2118 status = U_ILLEGAL_ARGUMENT_ERROR;
2119 }
2120}
2121
2122Locale U_EXPORT2
2123Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2124{
2125 Locale result(Locale::eBOGUS);
2126
2127 if (U_FAILURE(status)) {
2128 return result;
2129 }
2130
2131 // If a BCP 47 language tag is passed as the language parameter to the
2132 // normal Locale constructor, it will actually fall back to invoking
2133 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
2134 // the string actually is BCP 47. This works well for things like strings
2135 // using BCP 47 extensions, but it does not at all work for things like
2136 // legacy language tags (marked as “Type: grandfathered” in BCP 47,
2137 // e.g., "en-GB-oed") which are possible to also
2138 // interpret as ICU locale IDs and because of that won't trigger the BCP 47
2139 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2140 // and then Locale::init(), instead of just calling the normal constructor.
2141
2142 CharString localeID;
2143 int32_t parsedLength;
2144 {
2145 CharStringByteSink sink(&localeID);
2146 ulocimp_forLanguageTagulocimp_forLanguageTag_71(
2147 tag.data(),
2148 tag.length(),
2149 sink,
2150 &parsedLength,
2151 &status);
2152 }
2153
2154 if (U_FAILURE(status)) {
2155 return result;
2156 }
2157
2158 if (parsedLength != tag.size()) {
2159 status = U_ILLEGAL_ARGUMENT_ERROR;
2160 return result;
2161 }
2162
2163 result.init(localeID.data(), /*canonicalize=*/FALSE0);
2164 if (result.isBogus()) {
2165 status = U_ILLEGAL_ARGUMENT_ERROR;
2166 }
2167 return result;
2168}
2169
2170void
2171Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2172{
2173 if (U_FAILURE(status)) {
2174 return;
2175 }
2176
2177 if (fIsBogus) {
2178 status = U_ILLEGAL_ARGUMENT_ERROR;
2179 return;
2180 }
2181
2182 ulocimp_toLanguageTagulocimp_toLanguageTag_71(fullName, sink, /*strict=*/FALSE0, &status);
2183}
2184
2185Locale U_EXPORT2
2186Locale::createFromName (const char *name)
2187{
2188 if (name) {
2189 Locale l("");
2190 l.init(name, FALSE0);
2191 return l;
2192 }
2193 else {
2194 return getDefault();
2195 }
2196}
2197
2198Locale U_EXPORT2
2199Locale::createCanonical(const char* name) {
2200 Locale loc("");
2201 loc.init(name, TRUE1);
2202 return loc;
2203}
2204
2205const char *
2206Locale::getISO3Language() const
2207{
2208 return uloc_getISO3Languageuloc_getISO3Language_71(fullName);
2209}
2210
2211
2212const char *
2213Locale::getISO3Country() const
2214{
2215 return uloc_getISO3Countryuloc_getISO3Country_71(fullName);
2216}
2217
2218/**
2219 * Return the LCID value as specified in the "LocaleID" resource for this
2220 * locale. The LocaleID must be expressed as a hexadecimal number, from
2221 * one to four digits. If the LocaleID resource is not present, or is
2222 * in an incorrect format, 0 is returned. The LocaleID is for use in
2223 * Windows (it is an LCID), but is available on all platforms.
2224 */
2225uint32_t
2226Locale::getLCID() const
2227{
2228 return uloc_getLCIDuloc_getLCID_71(fullName);
2229}
2230
2231const char* const* U_EXPORT2 Locale::getISOCountries()
2232{
2233 return uloc_getISOCountriesuloc_getISOCountries_71();
2234}
2235
2236const char* const* U_EXPORT2 Locale::getISOLanguages()
2237{
2238 return uloc_getISOLanguagesuloc_getISOLanguages_71();
2239}
2240
2241// Set the locale's data based on a posix id.
2242void Locale::setFromPOSIXID(const char *posixID)
2243{
2244 init(posixID, TRUE1);
2245}
2246
2247const Locale & U_EXPORT2
2248Locale::getRoot(void)
2249{
2250 return getLocale(eROOT);
2251}
2252
2253const Locale & U_EXPORT2
2254Locale::getEnglish(void)
2255{
2256 return getLocale(eENGLISH);
2257}
2258
2259const Locale & U_EXPORT2
2260Locale::getFrench(void)
2261{
2262 return getLocale(eFRENCH);
2263}
2264
2265const Locale & U_EXPORT2
2266Locale::getGerman(void)
2267{
2268 return getLocale(eGERMAN);
2269}
2270
2271const Locale & U_EXPORT2
2272Locale::getItalian(void)
2273{
2274 return getLocale(eITALIAN);
2275}
2276
2277const Locale & U_EXPORT2
2278Locale::getJapanese(void)
2279{
2280 return getLocale(eJAPANESE);
2281}
2282
2283const Locale & U_EXPORT2
2284Locale::getKorean(void)
2285{
2286 return getLocale(eKOREAN);
2287}
2288
2289const Locale & U_EXPORT2
2290Locale::getChinese(void)
2291{
2292 return getLocale(eCHINESE);
2293}
2294
2295const Locale & U_EXPORT2
2296Locale::getSimplifiedChinese(void)
2297{
2298 return getLocale(eCHINA);
2299}
2300
2301const Locale & U_EXPORT2
2302Locale::getTraditionalChinese(void)
2303{
2304 return getLocale(eTAIWAN);
2305}
2306
2307
2308const Locale & U_EXPORT2
2309Locale::getFrance(void)
2310{
2311 return getLocale(eFRANCE);
2312}
2313
2314const Locale & U_EXPORT2
2315Locale::getGermany(void)
2316{
2317 return getLocale(eGERMANY);
2318}
2319
2320const Locale & U_EXPORT2
2321Locale::getItaly(void)
2322{
2323 return getLocale(eITALY);
2324}
2325
2326const Locale & U_EXPORT2
2327Locale::getJapan(void)
2328{
2329 return getLocale(eJAPAN);
2330}
2331
2332const Locale & U_EXPORT2
2333Locale::getKorea(void)
2334{
2335 return getLocale(eKOREA);
2336}
2337
2338const Locale & U_EXPORT2
2339Locale::getChina(void)
2340{
2341 return getLocale(eCHINA);
2342}
2343
2344const Locale & U_EXPORT2
2345Locale::getPRC(void)
2346{
2347 return getLocale(eCHINA);
2348}
2349
2350const Locale & U_EXPORT2
2351Locale::getTaiwan(void)
2352{
2353 return getLocale(eTAIWAN);
2354}
2355
2356const Locale & U_EXPORT2
2357Locale::getUK(void)
2358{
2359 return getLocale(eUK);
2360}
2361
2362const Locale & U_EXPORT2
2363Locale::getUS(void)
2364{
2365 return getLocale(eUS);
2366}
2367
2368const Locale & U_EXPORT2
2369Locale::getCanada(void)
2370{
2371 return getLocale(eCANADA);
2372}
2373
2374const Locale & U_EXPORT2
2375Locale::getCanadaFrench(void)
2376{
2377 return getLocale(eCANADA_FRENCH);
1
Calling 'Locale::getLocale'
2378}
2379
2380const Locale &
2381Locale::getLocale(int locid)
2382{
2383 Locale *localeCache = getLocaleCache();
2
'localeCache' initialized here
2384 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0))(void)0;
2385 if (localeCache == NULL__null) {
3
Assuming 'localeCache' is equal to NULL
4
Taking true branch
2386 // Failure allocating the locale cache.
2387 // The best we can do is return a NULL reference.
2388 locid = 0;
2389 }
2390 return localeCache[locid]; /*operating on NULL*/
5
Returning null reference
2391}
2392
2393/*
2394This function is defined this way in order to get around static
2395initialization and static destruction.
2396 */
2397Locale *
2398Locale::getLocaleCache(void)
2399{
2400 UErrorCode status = U_ZERO_ERROR;
2401 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2402 return gLocaleCache;
2403}
2404
2405class KeywordEnumeration : public StringEnumeration {
2406private:
2407 char *keywords;
2408 char *current;
2409 int32_t length;
2410 UnicodeString currUSKey;
2411 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
2412
2413public:
2414 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
2415 virtual UClassID getDynamicClassID(void) const override { return getStaticClassID(); }
2416public:
2417 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2418 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
2419 if(U_SUCCESS(status) && keywordLen != 0) {
2420 if(keys == NULL__null || keywordLen < 0) {
2421 status = U_ILLEGAL_ARGUMENT_ERROR;
2422 } else {
2423 keywords = (char *)uprv_mallocuprv_malloc_71(keywordLen+1);
2424 if (keywords == NULL__null) {
2425 status = U_MEMORY_ALLOCATION_ERROR;
2426 }
2427 else {
2428 uprv_memcpy(keywords, keys, keywordLen)do { clang diagnostic push clang diagnostic ignored "-Waddress"
(void)0; (void)0; clang diagnostic pop :: memcpy(keywords,
keys, keywordLen); } while (false)
;
2429 keywords[keywordLen] = 0;
2430 current = keywords + currentIndex;
2431 length = keywordLen;
2432 }
2433 }
2434 }
2435 }
2436
2437 virtual ~KeywordEnumeration();
2438
2439 virtual StringEnumeration * clone() const override
2440 {
2441 UErrorCode status = U_ZERO_ERROR;
2442 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
2443 }
2444
2445 virtual int32_t count(UErrorCode &/*status*/) const override {
2446 char *kw = keywords;
2447 int32_t result = 0;
2448 while(*kw) {
2449 result++;
2450 kw += uprv_strlen(kw):: strlen(kw)+1;
2451 }
2452 return result;
2453 }
2454
2455 virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2456 const char* result;
2457 int32_t len;
2458 if(U_SUCCESS(status) && *current != 0) {
2459 result = current;
2460 len = (int32_t)uprv_strlen(current):: strlen(current);
2461 current += len+1;
2462 if(resultLength != NULL__null) {
2463 *resultLength = len;
2464 }
2465 } else {
2466 if(resultLength != NULL__null) {
2467 *resultLength = 0;
2468 }
2469 result = NULL__null;
2470 }
2471 return result;
2472 }
2473
2474 virtual const UnicodeString* snext(UErrorCode& status) override {
2475 int32_t resultLength = 0;
2476 const char *s = next(&resultLength, status);
2477 return setChars(s, resultLength, status);
2478 }
2479
2480 virtual void reset(UErrorCode& /*status*/) override {
2481 current = keywords;
2482 }
2483};
2484
2485const char KeywordEnumeration::fgClassID = '\0';
2486
2487KeywordEnumeration::~KeywordEnumeration() {
2488 uprv_freeuprv_free_71(keywords);
2489}
2490
2491// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2492// the next() method for each keyword before returning it.
2493class UnicodeKeywordEnumeration : public KeywordEnumeration {
2494public:
2495 using KeywordEnumeration::KeywordEnumeration;
2496 virtual ~UnicodeKeywordEnumeration();
2497
2498 virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2499 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2500 while (U_SUCCESS(status) && legacy_key != nullptr) {
2501 const char* key = uloc_toUnicodeLocaleKeyuloc_toUnicodeLocaleKey_71(legacy_key);
2502 if (key != nullptr) {
2503 if (resultLength != nullptr) {
2504 *resultLength = static_cast<int32_t>(uprv_strlen(key):: strlen(key));
2505 }
2506 return key;
2507 }
2508 // Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
2509 legacy_key = KeywordEnumeration::next(nullptr, status);
2510 }
2511 if (resultLength != nullptr) *resultLength = 0;
2512 return nullptr;
2513 }
2514};
2515
2516// Out-of-line virtual destructor to serve as the "key function".
2517UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2518
2519StringEnumeration *
2520Locale::createKeywords(UErrorCode &status) const
2521{
2522 StringEnumeration *result = NULL__null;
2523
2524 if (U_FAILURE(status)) {
2525 return result;
2526 }
2527
2528 const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2529 const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2530 if(variantStart) {
2531 if(assignment > variantStart) {
2532 CharString keywords;
2533 CharStringByteSink sink(&keywords);
2534 ulocimp_getKeywordsulocimp_getKeywords_71(variantStart+1, '@', sink, FALSE0, &status);
2535 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2536 result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2537 if (!result) {
2538 status = U_MEMORY_ALLOCATION_ERROR;
2539 }
2540 }
2541 } else {
2542 status = U_INVALID_FORMAT_ERROR;
2543 }
2544 }
2545 return result;
2546}
2547
2548StringEnumeration *
2549Locale::createUnicodeKeywords(UErrorCode &status) const
2550{
2551 StringEnumeration *result = NULL__null;
2552
2553 if (U_FAILURE(status)) {
2554 return result;
2555 }
2556
2557 const char* variantStart = uprv_strchr(fullName, '@'):: strchr(fullName, '@');
2558 const char* assignment = uprv_strchr(fullName, '='):: strchr(fullName, '=');
2559 if(variantStart) {
2560 if(assignment > variantStart) {
2561 CharString keywords;
2562 CharStringByteSink sink(&keywords);
2563 ulocimp_getKeywordsulocimp_getKeywords_71(variantStart+1, '@', sink, FALSE0, &status);
2564 if (U_SUCCESS(status) && !keywords.isEmpty()) {
2565 result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
2566 if (!result) {
2567 status = U_MEMORY_ALLOCATION_ERROR;
2568 }
2569 }
2570 } else {
2571 status = U_INVALID_FORMAT_ERROR;
2572 }
2573 }
2574 return result;
2575}
2576
2577int32_t
2578Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
2579{
2580 return uloc_getKeywordValueuloc_getKeywordValue_71(fullName, keywordName, buffer, bufLen, &status);
2581}
2582
2583void
2584Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2585 if (U_FAILURE(status)) {
2586 return;
2587 }
2588
2589 if (fIsBogus) {
2590 status = U_ILLEGAL_ARGUMENT_ERROR;
2591 return;
2592 }
2593
2594 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2595 const CharString keywordName_nul(keywordName, status);
2596 if (U_FAILURE(status)) {
2597 return;
2598 }
2599
2600 ulocimp_getKeywordValueulocimp_getKeywordValue_71(fullName, keywordName_nul.data(), sink, &status);
2601}
2602
2603void
2604Locale::getUnicodeKeywordValue(StringPiece keywordName,
2605 ByteSink& sink,
2606 UErrorCode& status) const {
2607 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2608 const CharString keywordName_nul(keywordName, status);
2609 if (U_FAILURE(status)) {
2610 return;
2611 }
2612
2613 const char* legacy_key = uloc_toLegacyKeyuloc_toLegacyKey_71(keywordName_nul.data());
2614
2615 if (legacy_key == nullptr) {
2616 status = U_ILLEGAL_ARGUMENT_ERROR;
2617 return;
2618 }
2619
2620 CharString legacy_value;
2621 {
2622 CharStringByteSink sink(&legacy_value);
2623 getKeywordValue(legacy_key, sink, status);
2624 }
2625
2626 if (U_FAILURE(status)) {
2627 return;
2628 }
2629
2630 const char* unicode_value = uloc_toUnicodeLocaleTypeuloc_toUnicodeLocaleType_71(
2631 keywordName_nul.data(), legacy_value.data());
2632
2633 if (unicode_value == nullptr) {
2634 status = U_ILLEGAL_ARGUMENT_ERROR;
2635 return;
2636 }
2637
2638 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value):: strlen(unicode_value)));
2639}
2640
2641void
2642Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
2643{
2644 if (U_FAILURE(status)) {
2645 return;
2646 }
2647 if (status == U_STRING_NOT_TERMINATED_WARNING) {
2648 status = U_ZERO_ERROR;
2649 }
2650 int32_t bufferLength = uprv_maxuprv_max_71((int32_t)(uprv_strlen(fullName):: strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY157);
2651 int32_t newLength = uloc_setKeywordValueuloc_setKeywordValue_71(keywordName, keywordValue, fullName,
2652 bufferLength, &status) + 1;
2653 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING)(void)0;
2654 /* Handle the case the current buffer is not enough to hold the new id */
2655 if (status == U_BUFFER_OVERFLOW_ERROR) {
2656 U_ASSERT(newLength > bufferLength)(void)0;
2657 char* newFullName = (char *)uprv_mallocuprv_malloc_71(newLength);
2658 if (newFullName == nullptr) {
2659 status = U_MEMORY_ALLOCATION_ERROR;
2660 return;
2661 }
2662 uprv_strcpy(newFullName, fullName):: strcpy(newFullName, fullName);
2663 if (fullName != fullNameBuffer) {
2664 // if full Name is already on the heap, need to free it.
2665 uprv_freeuprv_free_71(fullName);
2666 if (baseName == fullName) {
2667 baseName = newFullName; // baseName should not point to freed memory.
2668 }
2669 }
2670 fullName = newFullName;
2671 status = U_ZERO_ERROR;
2672 uloc_setKeywordValueuloc_setKeywordValue_71(keywordName, keywordValue, fullName, newLength, &status);
2673 U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING)(void)0;
2674 } else {
2675 U_ASSERT(newLength <= bufferLength)(void)0;
2676 }
2677 if (U_SUCCESS(status) && baseName == fullName) {
2678 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
2679 initBaseName(status);
2680 }
2681}
2682
2683void
2684Locale::setKeywordValue(StringPiece keywordName,
2685 StringPiece keywordValue,
2686 UErrorCode& status) {
2687 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2688 const CharString keywordName_nul(keywordName, status);
2689 const CharString keywordValue_nul(keywordValue, status);
2690 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
2691}
2692
2693void
2694Locale::setUnicodeKeywordValue(StringPiece keywordName,
2695 StringPiece keywordValue,
2696 UErrorCode& status) {
2697 // TODO: Remove the need for a const char* to a NUL terminated buffer.
2698 const CharString keywordName_nul(keywordName, status);
2699 const CharString keywordValue_nul(keywordValue, status);
2700
2701 if (U_FAILURE(status)) {
2702 return;
2703 }
2704
2705 const char* legacy_key = uloc_toLegacyKeyuloc_toLegacyKey_71(keywordName_nul.data());
2706
2707 if (legacy_key == nullptr) {
2708 status = U_ILLEGAL_ARGUMENT_ERROR;
2709 return;
2710 }
2711
2712 const char* legacy_value = nullptr;
2713
2714 if (!keywordValue_nul.isEmpty()) {
2715 legacy_value =
2716 uloc_toLegacyTypeuloc_toLegacyType_71(keywordName_nul.data(), keywordValue_nul.data());
2717
2718 if (legacy_value == nullptr) {
2719 status = U_ILLEGAL_ARGUMENT_ERROR;
2720 return;
2721 }
2722 }
2723
2724 setKeywordValue(legacy_key, legacy_value, status);
2725}
2726
2727const char *
2728Locale::getBaseName() const {
2729 return baseName;
2730}
2731
2732Locale::Iterator::~Iterator() = default;
2733
2734//eof
2735U_NAMESPACE_END}