../deps/icu-small/source/common/normalizer2impl.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/normalizer2impl.cpp
Warning:	line 2794, column 9 Value stored to 'offset' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name normalizer2impl.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/normalizer2impl.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2009-2014, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: normalizer2impl.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2009nov22
16	* created by: Markus W. Scherer
17	*/
18
19	// #define UCPTRIE_DEBUG
20
21	#include "unicode/utypes.h"
22
23	#if !UCONFIG_NO_NORMALIZATION0
24
25	#include "unicode/bytestream.h"
26	#include "unicode/edits.h"
27	#include "unicode/normalizer2.h"
28	#include "unicode/stringoptions.h"
29	#include "unicode/ucptrie.h"
30	#include "unicode/udata.h"
31	#include "unicode/umutablecptrie.h"
32	#include "unicode/ustring.h"
33	#include "unicode/utf16.h"
34	#include "unicode/utf8.h"
35	#include "bytesinkutil.h"
36	#include "cmemory.h"
37	#include "mutex.h"
38	#include "normalizer2impl.h"
39	#include "putilimp.h"
40	#include "uassert.h"
41	#include "ucptrie_impl.h"
42	#include "uset_imp.h"
43	#include "uvector.h"
44
45	U_NAMESPACE_BEGINnamespace icu_71 {
46
47	namespace {
48
49	/**
50	* UTF-8 lead byte for minNoMaybeCP.
51	* Can be lower than the actual lead byte for c.
52	* Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold.
53	*/
54	inline uint8_t leadByteForCP(UChar32 c) {
55	if (c <= 0x7f) {
56	return (uint8_t)c;
57	} else if (c <= 0x7ff) {
58	return (uint8_t)(0xc0+(c>>6));
59	} else {
60	// Should not occur because ccc(U+0300)!=0.
61	return 0xe0;
62	}
63	}
64
65	/**
66	* Returns the code point from one single well-formed UTF-8 byte sequence
67	* between cpStart and cpLimit.
68	*
69	* Trie UTF-8 macros do not assemble whole code points (for efficiency).
70	* When we do need the code point, we call this function.
71	* We should not need it for normalization-inert data (norm16==0).
72	* Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
73	*/
74	UChar32 codePointFromValidUTF8(const uint8_t cpStart, const uint8_t cpLimit) {
75	// Similar to U8_NEXT_UNSAFE(s, i, c).
76	U_ASSERT(cpStart < cpLimit)(void)0;
77	uint8_t c = *cpStart;
78	switch(cpLimit-cpStart) {
79	case 1:
80	return c;
81	case 2:
82	return ((c&0x1f)<<6) \| (cpStart[1]&0x3f);
83	case 3:
84	// no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar)
85	return (UChar)((c<<12) \| ((cpStart[1]&0x3f)<<6) \| (cpStart[2]&0x3f));
86	case 4:
87	return ((c&7)<<18) \| ((cpStart[1]&0x3f)<<12) \| ((cpStart[2]&0x3f)<<6) \| (cpStart[3]&0x3f);
88	default:
89	UPRV_UNREACHABLE_EXITabort(); // Should not occur.
90	}
91	}
92
93	/**
94	* Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
95	* Otherwise returns a negative value.
96	*/
97	UChar32 previousHangulOrJamo(const uint8_t start, const uint8_t p) {
98	if ((p - start) >= 3) {
99	p -= 3;
100	uint8_t l = *p;
101	uint8_t t1, t2;
102	if (0xe1 <= l && l <= 0xed &&
103	(t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&
104	(t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&
105	(l < 0xed \|\| t1 <= 0x1f)) {
106	return ((l & 0xf) << 12) \| (t1 << 6) \| t2;
107	}
108	}
109	return U_SENTINEL(-1);
110	}
111
112	/**
113	* Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
114	* Otherwise returns a negative value.
115	*/
116	int32_t getJamoTMinusBase(const uint8_t src, const uint8_t limit) {
117	// Jamo T: E1 86 A8..E1 87 82
118	if ((limit - src) >= 3 && *src == 0xe1) {
119	if (src[1] == 0x86) {
120	uint8_t t = src[2];
121	// The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7.
122	// Offset 0 does not correspond to any conjoining Jamo.
123	if (0xa8 <= t && t <= 0xbf) {
124	return t - 0xa7;
125	}
126	} else if (src[1] == 0x87) {
127	uint8_t t = src[2];
128	if ((int8_t)t <= (int8_t)0x82u) {
129	return t - (0xa7 - 0x40);
130	}
131	}
132	}
133	return -1;
134	}
135
136	void
137	appendCodePointDelta(const uint8_t cpStart, const uint8_t cpLimit, int32_t delta,
138	ByteSink &sink, Edits *edits) {
139	char buffer[U8_MAX_LENGTH4];
140	int32_t length;
141	int32_t cpLength = (int32_t)(cpLimit - cpStart);
142	if (cpLength == 1) {
143	// The builder makes ASCII map to ASCII.
144	buffer[0] = (uint8_t)(*cpStart + delta);
145	length = 1;
146	} else {
147	int32_t trail = *(cpLimit-1) + delta;
148	if (0x80 <= trail && trail <= 0xbf) {
149	// The delta only changes the last trail byte.
150	--cpLimit;
151	length = 0;
152	do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit);
153	buffer[length++] = (uint8_t)trail;
154	} else {
155	// Decode the code point, add the delta, re-encode.
156	UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta;
157	length = 0;
158	U8_APPEND_UNSAFE(buffer, length, c)do { uint32_t __uc=(c); if(__uc<=0x7f) { (buffer)[(length) ++]=(uint8_t)__uc; } else { if(__uc<=0x7ff) { (buffer)[(length )++]=(uint8_t)((__uc>>6)\|0xc0); } else { if(__uc<=0xffff ) { (buffer)[(length)++]=(uint8_t)((__uc>>12)\|0xe0); } else { (buffer)[(length)++]=(uint8_t)((__uc>>18)\|0xf0); (buffer )[(length)++]=(uint8_t)(((__uc>>12)&0x3f)\|0x80); } ( buffer)[(length)++]=(uint8_t)(((__uc>>6)&0x3f)\|0x80 ); } (buffer)[(length)++]=(uint8_t)((__uc&0x3f)\|0x80); } } while (false);
159	}
160	}
161	if (edits != nullptr) {
162	edits->addReplace(cpLength, length);
163	}
164	sink.Append(buffer, length);
165	}
166
167	} // namespace
168
169	// ReorderingBuffer -------------------------------------------------------- ***
170
171	ReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest,
172	UErrorCode &errorCode) :
173	impl(ni), str(dest),
174	start(str.getBuffer(8)), reorderStart(start), limit(start),
175	remainingCapacity(str.getCapacity()), lastCC(0) {
176	if (start == nullptr && U_SUCCESS(errorCode)) {
177	// getBuffer() already did str.setToBogus()
178	errorCode = U_MEMORY_ALLOCATION_ERROR;
179	}
180	}
181
182	UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
183	int32_t length=str.length();
184	start=str.getBuffer(destCapacity);
185	if(start==NULL__null) {
186	// getBuffer() already did str.setToBogus()
187	errorCode=U_MEMORY_ALLOCATION_ERROR;
188	return FALSE0;
189	}
190	limit=start+length;
191	remainingCapacity=str.getCapacity()-length;
192	reorderStart=start;
193	if(start==limit) {
194	lastCC=0;
195	} else {
196	setIterator();
197	lastCC=previousCC();
198	// Set reorderStart after the last code point with cc<=1 if there is one.
199	if(lastCC>1) {
200	while(previousCC()>1) {}
201	}
202	reorderStart=codePointLimit;
203	}
204	return TRUE1;
205	}
206
207	UBool ReorderingBuffer::equals(const UChar otherStart, const UChar otherLimit) const {
208	int32_t length=(int32_t)(limit-start);
209	return
210	length==(int32_t)(otherLimit-otherStart) &&
211	0==u_memcmpu_memcmp_71(start, otherStart, length);
212	}
213
214	UBool ReorderingBuffer::equals(const uint8_t otherStart, const uint8_t otherLimit) const {
215	U_ASSERT((otherLimit - otherStart) <= INT32_MAX)(void)0; // ensured by caller
216	int32_t length = (int32_t)(limit - start);
217	int32_t otherLength = (int32_t)(otherLimit - otherStart);
218	// For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.
219	if (otherLength < length \|\| (otherLength / 3) > length) {
220	return FALSE0;
221	}
222	// Compare valid strings from between normalization boundaries.
223	// (Invalid sequences are normalization-inert.)
224	for (int32_t i = 0, j = 0;;) {
225	if (i >= length) {
226	return j >= otherLength;
227	} else if (j >= otherLength) {
228	return FALSE0;
229	}
230	// Not at the end of either string yet.
231	UChar32 c, other;
232	U16_NEXT_UNSAFE(start, i, c)do { (c)=(start)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { (c)=(((UChar32)((c))<<10UL)+(UChar32)((start)[(i)++])- ((0xd800<<10UL)+0xdc00-0x10000)); } } while (false);
233	U8_NEXT_UNSAFE(otherStart, j, other)do { (other)=(uint8_t)(otherStart)[(j)++]; if(!(((other)& 0x80)==0)) { if((other)<0xe0) { (other)=(((other)&0x1f )<<6)\|((otherStart)[(j)++]&0x3f); } else if((other) <0xf0) { (other)=(UChar)(((other)<<12)\|(((otherStart )[j]&0x3f)<<6)\|((otherStart)[(j)+1]&0x3f)); (j) +=2; } else { (other)=(((other)&7)<<18)\|(((otherStart )[j]&0x3f)<<12)\|(((otherStart)[(j)+1]&0x3f)<< 6)\|((otherStart)[(j)+2]&0x3f); (j)+=3; } } } while (false );
234	if (c != other) {
235	return FALSE0;
236	}
237	}
238	}
239
240	UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
241	if(remainingCapacity<2 && !resize(2, errorCode)) {
242	return FALSE0;
243	}
244	if(lastCC<=cc \|\| cc==0) {
245	limit[0]=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
246	limit[1]=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
247	limit+=2;
248	lastCC=cc;
249	if(cc<=1) {
250	reorderStart=limit;
251	}
252	} else {
253	insert(c, cc);
254	}
255	remainingCapacity-=2;
256	return TRUE1;
257	}
258
259	UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
260	uint8_t leadCC, uint8_t trailCC,
261	UErrorCode &errorCode) {
262	if(length==0) {
263	return TRUE1;
264	}
265	if(remainingCapacity<length && !resize(length, errorCode)) {
266	return FALSE0;
267	}
268	remainingCapacity-=length;
269	if(lastCC<=leadCC \|\| leadCC==0) {
270	if(trailCC<=1) {
271	reorderStart=limit+length;
272	} else if(leadCC<=1) {
273	reorderStart=limit+1; // Ok if not a code point boundary.
274	}
275	const UChar *sLimit=s+length;
276	do { limit++=s++; } while(s!=sLimit);
277	lastCC=trailCC;
278	} else {
279	int32_t i=0;
280	UChar32 c;
281	U16_NEXT(s, i, length, c)do { (c)=(s)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { uint16_t __c2; if((i)!=(length) && (((__c2=(s)[(i)])&0xfffffc00 )==0xdc00)) { ++(i); (c)=(((UChar32)((c))<<10UL)+(UChar32 )(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); } } } while ( false);
282	insert(c, leadCC); // insert first code point
283	while(i<length) {
284	U16_NEXT(s, i, length, c)do { (c)=(s)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { uint16_t __c2; if((i)!=(length) && (((__c2=(s)[(i)])&0xfffffc00 )==0xdc00)) { ++(i); (c)=(((UChar32)((c))<<10UL)+(UChar32 )(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); } } } while ( false);
285	if(i<length) {
286	if (isNFD) {
287	leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
288	} else {
289	leadCC = impl.getCC(impl.getNorm16(c));
290	}
291	} else {
292	leadCC=trailCC;
293	}
294	append(c, leadCC, errorCode);
295	}
296	}
297	return TRUE1;
298	}
299
300	UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
301	int32_t cpLength=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
302	if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
303	return FALSE0;
304	}
305	remainingCapacity-=cpLength;
306	if(cpLength==1) {
307	*limit++=(UChar)c;
308	} else {
309	limit[0]=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
310	limit[1]=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
311	limit+=2;
312	}
313	lastCC=0;
314	reorderStart=limit;
315	return TRUE1;
316	}
317
318	UBool ReorderingBuffer::appendZeroCC(const UChar s, const UChar sLimit, UErrorCode &errorCode) {
319	if(s==sLimit) {
320	return TRUE1;
321	}
322	int32_t length=(int32_t)(sLimit-s);
323	if(remainingCapacity<length && !resize(length, errorCode)) {
324	return FALSE0;
325	}
326	u_memcpyu_memcpy_71(limit, s, length);
327	limit+=length;
328	remainingCapacity-=length;
329	lastCC=0;
330	reorderStart=limit;
331	return TRUE1;
332	}
333
334	void ReorderingBuffer::remove() {
335	reorderStart=limit=start;
336	remainingCapacity=str.getCapacity();
337	lastCC=0;
338	}
339
340	void ReorderingBuffer::removeSuffix(int32_t suffixLength) {
341	if(suffixLength<(limit-start)) {
342	limit-=suffixLength;
343	remainingCapacity+=suffixLength;
344	} else {
345	limit=start;
346	remainingCapacity=str.getCapacity();
347	}
348	lastCC=0;
349	reorderStart=limit;
350	}
351
352	UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
353	int32_t reorderStartIndex=(int32_t)(reorderStart-start);
354	int32_t length=(int32_t)(limit-start);
355	str.releaseBuffer(length);
356	int32_t newCapacity=length+appendLength;
357	int32_t doubleCapacity=2*str.getCapacity();
358	if(newCapacity<doubleCapacity) {
359	newCapacity=doubleCapacity;
360	}
361	if(newCapacity<256) {
362	newCapacity=256;
363	}
364	start=str.getBuffer(newCapacity);
365	if(start==NULL__null) {
366	// getBuffer() already did str.setToBogus()
367	errorCode=U_MEMORY_ALLOCATION_ERROR;
368	return FALSE0;
369	}
370	reorderStart=start+reorderStartIndex;
371	limit=start+length;
372	remainingCapacity=str.getCapacity()-length;
373	return TRUE1;
374	}
375
376	void ReorderingBuffer::skipPrevious() {
377	codePointLimit=codePointStart;
378	UChar c=*--codePointStart;
379	if(U16_IS_TRAIL(c)(((c)&0xfffffc00)==0xdc00) && start<codePointStart && U16_IS_LEAD((codePointStart-1))((((codePointStart-1))&0xfffffc00)==0xd800)) {
380	--codePointStart;
381	}
382	}
383
384	uint8_t ReorderingBuffer::previousCC() {
385	codePointLimit=codePointStart;
386	if(reorderStart>=codePointStart) {
387	return 0;
388	}
389	UChar32 c=*--codePointStart;
390	UChar c2;
391	if(U16_IS_TRAIL(c)(((c)&0xfffffc00)==0xdc00) && start<codePointStart && U16_IS_LEAD(c2=(codePointStart-1))(((c2=(codePointStart-1))&0xfffffc00)==0xd800)) {
392	--codePointStart;
393	c=U16_GET_SUPPLEMENTARY(c2, c)(((UChar32)(c2)<<10UL)+(UChar32)(c)-((0xd800<<10UL )+0xdc00-0x10000));
394	}
395	return impl.getCCFromYesOrMaybeCP(c);
396	}
397
398	// Inserts c somewhere before the last character.
399	// Requires 0<cc<lastCC which implies reorderStart<limit.
400	void ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
401	for(setIterator(), skipPrevious(); previousCC()>cc;) {}
402	// insert c at codePointLimit, after the character with prevCC<=cc
403	UChar *q=limit;
404	UChar *r=limit+=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
405	do {
406	--r=--q;
407	} while(codePointLimit!=q);
408	writeCodePoint(q, c);
409	if(cc<=1) {
410	reorderStart=r;
411	}
412	}
413
414	// Normalizer2Impl --------------------------------------------------------- ***
415
416	struct CanonIterData : public UMemory {
417	CanonIterData(UErrorCode &errorCode);
418	~CanonIterData();
419	void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
420	UMutableCPTrie *mutableTrie;
421	UCPTrie *trie;
422	UVector canonStartSets; // contains UnicodeSet *
423	};
424
425	Normalizer2Impl::~Normalizer2Impl() {
426	delete fCanonIterData;
427	}
428
429	void
430	Normalizer2Impl::init(const int32_t inIndexes, const UCPTrie inTrie,
431	const uint16_t inExtraData, const uint8_t inSmallFCD) {
432	minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
433	minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
434	minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
435
436	minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
437	minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
438	minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
439	minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
440	minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
441	minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
442	limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
443	minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
444	U_ASSERT((minMaybeYes & 7) == 0)(void)0; // 8-aligned for noNoDelta bit fields
445	centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
446
447	normTrie=inTrie;
448
449	maybeYesCompositions=inExtraData;
450	extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
451
452	smallFCD=inSmallFCD;
453	}
454
455	U_CDECL_BEGINextern "C" {
456
457	static uint32_t U_CALLCONV
458	segmentStarterMapper(const void * /context/, uint32_t value) {
459	return value&CANON_NOT_SEGMENT_STARTER0x80000000;
460	}
461
462	U_CDECL_END}
463
464	void
465	Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
466	UChar32 start = 0, end;
467	uint32_t norm16;
468	while ((end = ucptrie_getRangeucptrie_getRange_71(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
469	nullptr, nullptr, &norm16)) >= 0) {
470	if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
471	norm16 != Normalizer2Impl::JAMO_VT) {
472	set.add(start, end);
473	} else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
474	uint16_t fcd16 = getFCD16(start);
475	if (fcd16 > 0xff) { set.add(start, end); }
476	}
477	start = end + 1;
478	}
479	}
480
481	void
482	Normalizer2Impl::addPropertyStarts(const USetAdder sa, UErrorCode & /errorCode*/) const {
483	// Add the start code point of each same-value range of the trie.
484	UChar32 start = 0, end;
485	uint32_t value;
486	while ((end = ucptrie_getRangeucptrie_getRange_71(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
487	nullptr, nullptr, &value)) >= 0) {
488	sa->add(sa->set, start);
489	if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
490	(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
491	// Range of code points with same-norm16-value algorithmic decompositions.
492	// They might have different non-zero FCD16 values.
493	uint16_t prevFCD16 = getFCD16(start);
494	while (++start <= end) {
495	uint16_t fcd16 = getFCD16(start);
496	if (fcd16 != prevFCD16) {
497	sa->add(sa->set, start);
498	prevFCD16 = fcd16;
499	}
500	}
501	}
502	start = end + 1;
503	}
504
505	/* add Hangul LV syllables and LV+1 because of skippables */
506	for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
507	sa->add(sa->set, c);
508	sa->add(sa->set, c+1);
509	}
510	sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
511	}
512
513	void
514	Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
515	// Add the start code point of each same-value range of the canonical iterator data trie.
516	if (!ensureCanonIterData(errorCode)) { return; }
517	// Currently only used for the SEGMENT_STARTER property.
518	UChar32 start = 0, end;
519	uint32_t value;
520	while ((end = ucptrie_getRangeucptrie_getRange_71(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
521	segmentStarterMapper, nullptr, &value)) >= 0) {
522	sa->add(sa->set, start);
523	start = end + 1;
524	}
525	}
526
527	const UChar *
528	Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
529	UChar32 minNeedDataCP,
530	ReorderingBuffer *buffer,
531	UErrorCode &errorCode) const {
532	// Make some effort to support NUL-terminated strings reasonably.
533	// Take the part of the fast quick check loop that does not look up
534	// data and check the first part of the string.
535	// After this prefix, determine the string length to simplify the rest
536	// of the code.
537	const UChar *prevSrc=src;
538	UChar c;
539	while((c=*src++)<minNeedDataCP && c!=0) {}
540	// Back out the last character for full processing.
541	// Copy this prefix.
542	if(--src!=prevSrc) {
543	if(buffer!=NULL__null) {
544	buffer->appendZeroCC(prevSrc, src, errorCode);
545	}
546	}
547	return src;
548	}
549
550	UnicodeString &
551	Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
552	UErrorCode &errorCode) const {
553	if(U_FAILURE(errorCode)) {
554	dest.setToBogus();
555	return dest;
556	}
557	const UChar *sArray=src.getBuffer();
558	if(&dest==&src \|\| sArray==NULL__null) {
559	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
560	dest.setToBogus();
561	return dest;
562	}
563	decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
564	return dest;
565	}
566
567	void
568	Normalizer2Impl::decompose(const UChar src, const UChar limit,
569	UnicodeString &dest,
570	int32_t destLengthEstimate,
571	UErrorCode &errorCode) const {
572	if(destLengthEstimate<0 && limit!=NULL__null) {
573	destLengthEstimate=(int32_t)(limit-src);
574	}
575	dest.remove();
576	ReorderingBuffer buffer(*this, dest);
577	if(buffer.init(destLengthEstimate, errorCode)) {
578	decompose(src, limit, &buffer, errorCode);
579	}
580	}
581
582	// Dual functionality:
583	// buffer!=NULL: normalize
584	// buffer==NULL: isNormalized/spanQuickCheckYes
585	const UChar *
586	Normalizer2Impl::decompose(const UChar src, const UChar limit,
587	ReorderingBuffer *buffer,
588	UErrorCode &errorCode) const {
589	UChar32 minNoCP=minDecompNoCP;
590	if(limit==NULL__null) {
591	src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
592	if(U_FAILURE(errorCode)) {
593	return src;
594	}
595	limit=u_strchru_strchr_71(src, 0);
596	}
597
598	const UChar *prevSrc;
599	UChar32 c=0;
600	uint16_t norm16=0;
601
602	// only for quick check
603	const UChar *prevBoundary=src;
604	uint8_t prevCC=0;
605
606	for(;;) {
607	// count code units below the minimum or with irrelevant data for the quick check
608	for(prevSrc=src; src!=limit;) {
609	if( (c=*src)<minNoCP \|\|
610	isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK ))]))
611	) {
612	++src;
613	} else if(!U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
614	break;
615	} else {
616	UChar c2;
617	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])(((c2=src[1])&0xfffffc00)==0xdc00)) {
618	c=U16_GET_SUPPLEMENTARY(c, c2)(((UChar32)(c)<<10UL)+(UChar32)(c2)-((0xd800<<10UL )+0xdc00-0x10000));
619	norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c))]);
620	if(isMostDecompYesAndZeroCC(norm16)) {
621	src+=2;
622	} else {
623	break;
624	}
625	} else {
626	++src; // unpaired lead surrogate: inert
627	}
628	}
629	}
630	// copy these code units all at once
631	if(src!=prevSrc) {
632	if(buffer!=NULL__null) {
633	if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
634	break;
635	}
636	} else {
637	prevCC=0;
638	prevBoundary=src;
639	}
640	}
641	if(src==limit) {
642	break;
643	}
644
645	// Check one above-minimum, relevant code point.
646	src+=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
647	if(buffer!=NULL__null) {
648	if(!decompose(c, norm16, *buffer, errorCode)) {
649	break;
650	}
651	} else {
652	if(isDecompYes(norm16)) {
653	uint8_t cc=getCCFromYesOrMaybe(norm16);
654	if(prevCC<=cc \|\| cc==0) {
655	prevCC=cc;
656	if(cc<=1) {
657	prevBoundary=src;
658	}
659	continue;
660	}
661	}
662	return prevBoundary; // "no" or cc out of order
663	}
664	}
665	return src;
666	}
667
668	// Decompose a short piece of text which is likely to contain characters that
669	// fail the quick check loop and/or where the quick check loop's overhead
670	// is unlikely to be amortized.
671	// Called by the compose() and makeFCD() implementations.
672	const UChar *
673	Normalizer2Impl::decomposeShort(const UChar src, const UChar limit,
674	UBool stopAtCompBoundary, UBool onlyContiguous,
675	ReorderingBuffer &buffer, UErrorCode &errorCode) const {
676	if (U_FAILURE(errorCode)) {
677	return nullptr;
678	}
679	while(src<limit) {
680	if (stopAtCompBoundary && *src < minCompNoMaybeCP) {
681	return src;
682	}
683	const UChar *prevSrc = src;
684	UChar32 c;
685	uint16_t norm16;
686	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16)do { (c) = (src)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (src ) != (limit) && (((__c2 = (src))&0xfffffc00)==0xdc00 )) { ++(src); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
687	if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
688	return prevSrc;
689	}
690	if(!decompose(c, norm16, buffer, errorCode)) {
691	return nullptr;
692	}
693	if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
694	return src;
695	}
696	}
697	return src;
698	}
699
700	UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
701	ReorderingBuffer &buffer,
702	UErrorCode &errorCode) const {
703	// get the decomposition and the lead and trail cc's
704	if (norm16 >= limitNoNo) {
705	if (isMaybeOrNonZeroCC(norm16)) {
706	return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
707	}
708	// Maps to an isCompYesAndZeroCC.
709	c=mapAlgorithmic(c, norm16);
710	norm16=getRawNorm16(c);
711	}
712	if (norm16 < minYesNo) {
713	// c does not decompose
714	return buffer.append(c, 0, errorCode);
715	} else if(isHangulLV(norm16) \|\| isHangulLVT(norm16)) {
716	// Hangul syllable: decompose algorithmically
717	UChar jamos[3];
718	return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
719	}
720	// c decomposes, get everything from the variable-length extra data
721	const uint16_t *mapping=getMapping(norm16);
722	uint16_t firstUnit=*mapping;
723	int32_t length=firstUnit&MAPPING_LENGTH_MASK;
724	uint8_t leadCC, trailCC;
725	trailCC=(uint8_t)(firstUnit>>8);
726	if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
727	leadCC=(uint8_t)(*(mapping-1)>>8);
728	} else {
729	leadCC=0;
730	}
731	return buffer.append((const UChar *)mapping+1, length, TRUE1, leadCC, trailCC, errorCode);
732	}
733
734	// Dual functionality:
735	// sink != nullptr: normalize
736	// sink == nullptr: isNormalized/spanQuickCheckYes
737	const uint8_t *
738	Normalizer2Impl::decomposeUTF8(uint32_t options,
739	const uint8_t src, const uint8_t limit,
740	ByteSink sink, Edits edits, UErrorCode &errorCode) const {
741	U_ASSERT(limit != nullptr)(void)0;
742	UnicodeString s16;
743	uint8_t minNoLead = leadByteForCP(minDecompNoCP);
744
745	const uint8_t *prevBoundary = src;
746	// only for quick check
747	uint8_t prevCC = 0;
748
749	for (;;) {
750	// Fast path: Scan over a sequence of characters below the minimum "no" code point,
751	// or with (decompYes && ccc==0) properties.
752	const uint8_t *fastStart = src;
753	const uint8_t *prevSrc;
754	uint16_t norm16 = 0;
755
756	for (;;) {
757	if (src == limit) {
758	if (prevBoundary != limit && sink != nullptr) {
759	ByteSinkUtil::appendUnchanged(prevBoundary, limit,
760	*sink, options, edits, errorCode);
761	}
762	return src;
763	}
764	if (*src < minNoLead) {
765	++src;
766	} else {
767	prevSrc = src;
768	UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16)do { int32_t __lead = (uint8_t)(src)++; if (!(((__lead)& 0x80)==0)) { uint8_t __t1, __t2, __t3; if ((src) != (limit) && (__lead >= 0xe0 ? __lead < 0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [__lead &= 0xf] & (1 << ((__t1 = (src)) >> 5)) && ++(src) != (limit) && (__t2 = (src) - 0x80) <= 0x3f && (__lead = ((int32_t)(normTrie)-> index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) : ( __lead -= 0xf0) <= 4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" [(__t1 = (src)) >> 4] & (1 << __lead) && (__lead = (__lead << 6) \| (__t1 & 0x3f), ++(src) != (limit)) && (__t2 = (src) - 0x80) <= 0x3f && ++(src) != (limit) && (__t3 = (src) - 0x80) <= 0x3f && (__lead = __lead >= (normTrie)->shifted12HighStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallU8Index_71((normTrie), __lead, __t2, __t3 ), 1) : __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && (__lead = (int32_t)(normTrie)->index[__lead & 0x1f] + __t1, 1))) { ++(src); } else { __lead = (normTrie )->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } ( norm16) = ((normTrie)->data.ptr16[__lead]); } while (false );
769	if (!isMostDecompYesAndZeroCC(norm16)) {
770	break;
771	}
772	}
773	}
774	// isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
775	// and the current character at [prevSrc..src[ is not a common case with cc=0
776	// (MIN_NORMAL_MAYBE_YES or JAMO_VT).
777	// It could still be a maybeYes with cc=0.
778	if (prevSrc != fastStart) {
779	// The fast path looped over yes/0 characters before the current one.
780	if (sink != nullptr &&
781	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
782	*sink, options, edits, errorCode)) {
783	break;
784	}
785	prevBoundary = prevSrc;
786	prevCC = 0;
787	}
788
789	// Medium-fast path: Quick check.
790	if (isMaybeOrNonZeroCC(norm16)) {
791	// Does not decompose.
792	uint8_t cc = getCCFromYesOrMaybe(norm16);
793	if (prevCC <= cc \|\| cc == 0) {
794	prevCC = cc;
795	if (cc <= 1) {
796	if (sink != nullptr &&
797	!ByteSinkUtil::appendUnchanged(prevBoundary, src,
798	*sink, options, edits, errorCode)) {
799	break;
800	}
801	prevBoundary = src;
802	}
803	continue;
804	}
805	}
806	if (sink == nullptr) {
807	return prevBoundary; // quick check: "no" or cc out of order
808	}
809
810	// Slow path
811	// Decompose up to and including the current character.
812	if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
813	if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
814	*sink, options, edits, errorCode)) {
815	break;
816	}
817	prevBoundary = prevSrc;
818	}
819	ReorderingBuffer buffer(*this, s16, errorCode);
820	if (U_FAILURE(errorCode)) {
821	break;
822	}
823	decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE0 /* onlyContiguous */,
824	buffer, errorCode);
825	// Decompose until the next boundary.
826	if (buffer.getLastCC() > 1) {
827	src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE0 /* onlyContiguous */,
828	buffer, errorCode);
829	}
830	if (U_FAILURE(errorCode)) {
831	break;
832	}
833	if ((src - prevSrc) > INT32_MAX(2147483647)) { // guard before buffer.equals()
834	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
835	break;
836	}
837	// We already know there was a change if the original character decomposed;
838	// otherwise compare.
839	if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
840	if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
841	*sink, options, edits, errorCode)) {
842	break;
843	}
844	} else {
845	if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
846	*sink, edits, errorCode)) {
847	break;
848	}
849	}
850	prevBoundary = src;
851	prevCC = 0;
852	}
853	return src;
854	}
855
856	const uint8_t *
857	Normalizer2Impl::decomposeShort(const uint8_t src, const uint8_t limit,
858	StopAt stopAt, UBool onlyContiguous,
859	ReorderingBuffer &buffer, UErrorCode &errorCode) const {
860	if (U_FAILURE(errorCode)) {
861	return nullptr;
862	}
863	while (src < limit) {
864	const uint8_t *prevSrc = src;
865	uint16_t norm16;
866	UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16)do { int32_t __lead = (uint8_t)(src)++; if (!(((__lead)& 0x80)==0)) { uint8_t __t1, __t2, __t3; if ((src) != (limit) && (__lead >= 0xe0 ? __lead < 0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [__lead &= 0xf] & (1 << ((__t1 = (src)) >> 5)) && ++(src) != (limit) && (__t2 = (src) - 0x80) <= 0x3f && (__lead = ((int32_t)(normTrie)-> index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) : ( __lead -= 0xf0) <= 4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" [(__t1 = (src)) >> 4] & (1 << __lead) && (__lead = (__lead << 6) \| (__t1 & 0x3f), ++(src) != (limit)) && (__t2 = (src) - 0x80) <= 0x3f && ++(src) != (limit) && (__t3 = (src) - 0x80) <= 0x3f && (__lead = __lead >= (normTrie)->shifted12HighStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallU8Index_71((normTrie), __lead, __t2, __t3 ), 1) : __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && (__lead = (int32_t)(normTrie)->index[__lead & 0x1f] + __t1, 1))) { ++(src); } else { __lead = (normTrie )->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } ( norm16) = ((normTrie)->data.ptr16[__lead]); } while (false );
867	// Get the decomposition and the lead and trail cc's.
868	UChar32 c = U_SENTINEL(-1);
869	if (norm16 >= limitNoNo) {
870	if (isMaybeOrNonZeroCC(norm16)) {
871	// No comp boundaries around this character.
872	uint8_t cc = getCCFromYesOrMaybe(norm16);
873	if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
874	return prevSrc;
875	}
876	c = codePointFromValidUTF8(prevSrc, src);
877	if (!buffer.append(c, cc, errorCode)) {
878	return nullptr;
879	}
880	if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
881	return src;
882	}
883	continue;
884	}
885	// Maps to an isCompYesAndZeroCC.
886	if (stopAt != STOP_AT_LIMIT) {
887	return prevSrc;
888	}
889	c = codePointFromValidUTF8(prevSrc, src);
890	c = mapAlgorithmic(c, norm16);
891	norm16 = getRawNorm16(c);
892	} else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
893	return prevSrc;
894	}
895	// norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
896	// We do not see invalid UTF-8 here because
897	// its norm16==INERT is normalization-inert,
898	// so it gets copied unchanged in the fast path,
899	// and we stop the slow path where invalid UTF-8 begins.
900	// c >= 0 is the result of an algorithmic mapping.
901	U_ASSERT(c >= 0 \|\| norm16 != INERT)(void)0;
902	if (norm16 < minYesNo) {
903	if (c < 0) {
904	c = codePointFromValidUTF8(prevSrc, src);
905	}
906	// does not decompose
907	if (!buffer.append(c, 0, errorCode)) {
908	return nullptr;
909	}
910	} else if (isHangulLV(norm16) \|\| isHangulLVT(norm16)) {
911	// Hangul syllable: decompose algorithmically
912	if (c < 0) {
913	c = codePointFromValidUTF8(prevSrc, src);
914	}
915	char16_t jamos[3];
916	if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) {
917	return nullptr;
918	}
919	} else {
920	// The character decomposes, get everything from the variable-length extra data.
921	const uint16_t *mapping = getMapping(norm16);
922	uint16_t firstUnit = *mapping;
923	int32_t length = firstUnit & MAPPING_LENGTH_MASK;
924	uint8_t trailCC = (uint8_t)(firstUnit >> 8);
925	uint8_t leadCC;
926	if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) {
927	leadCC = (uint8_t)(*(mapping-1) >> 8);
928	} else {
929	leadCC = 0;
930	}
931	if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
932	return prevSrc;
933	}
934	if (!buffer.append((const char16_t *)mapping+1, length, TRUE1, leadCC, trailCC, errorCode)) {
935	return nullptr;
936	}
937	}
938	if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) \|\|
939	(stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
940	return src;
941	}
942	}
943	return src;
944	}
945
946	const UChar *
947	Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
948	uint16_t norm16;
949	if(c<minDecompNoCP \|\| isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
950	// c does not decompose
951	return nullptr;
952	}
953	const UChar *decomp = nullptr;
954	if(isDecompNoAlgorithmic(norm16)) {
955	// Maps to an isCompYesAndZeroCC.
956	c=mapAlgorithmic(c, norm16);
957	decomp=buffer;
958	length=0;
959	U16_APPEND_UNSAFE(buffer, length, c)do { if((uint32_t)(c)<=0xffff) { (buffer)[(length)++]=(uint16_t )(c); } else { (buffer)[(length)++]=(uint16_t)(((c)>>10 )+0xd7c0); (buffer)[(length)++]=(uint16_t)(((c)&0x3ff)\|0xdc00 ); } } while (false);
960	// The mapping might decompose further.
961	norm16 = getRawNorm16(c);
962	}
963	if (norm16 < minYesNo) {
964	return decomp;
965	} else if(isHangulLV(norm16) \|\| isHangulLVT(norm16)) {
966	// Hangul syllable: decompose algorithmically
967	length=Hangul::decompose(c, buffer);
968	return buffer;
969	}
970	// c decomposes, get everything from the variable-length extra data
971	const uint16_t *mapping=getMapping(norm16);
972	length=*mapping&MAPPING_LENGTH_MASK;
973	return (const UChar *)mapping+1;
974	}
975
976	// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
977	// so that a raw mapping fits that consists of one unit ("rm0")
978	// plus all but the first two code units of the normal mapping.
979	// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
980	const UChar *
981	Normalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
982	uint16_t norm16;
983	if(c<minDecompNoCP \|\| isDecompYes(norm16=getNorm16(c))) {
984	// c does not decompose
985	return NULL__null;
986	} else if(isHangulLV(norm16) \|\| isHangulLVT(norm16)) {
987	// Hangul syllable: decompose algorithmically
988	Hangul::getRawDecomposition(c, buffer);
989	length=2;
990	return buffer;
991	} else if(isDecompNoAlgorithmic(norm16)) {
992	c=mapAlgorithmic(c, norm16);
993	length=0;
994	U16_APPEND_UNSAFE(buffer, length, c)do { if((uint32_t)(c)<=0xffff) { (buffer)[(length)++]=(uint16_t )(c); } else { (buffer)[(length)++]=(uint16_t)(((c)>>10 )+0xd7c0); (buffer)[(length)++]=(uint16_t)(((c)&0x3ff)\|0xdc00 ); } } while (false);
995	return buffer;
996	}
997	// c decomposes, get everything from the variable-length extra data
998	const uint16_t *mapping=getMapping(norm16);
999	uint16_t firstUnit=*mapping;
1000	int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping
1001	if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
1002	// Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
1003	// Bit 7=MAPPING_HAS_CCC_LCCC_WORD
1004	const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
1005	uint16_t rm0=*rawMapping;
1006	if(rm0<=MAPPING_LENGTH_MASK) {
1007	length=rm0;
1008	return (const UChar *)rawMapping-rm0;
1009	} else {
1010	// Copy the normal mapping and replace its first two code units with rm0.
1011	buffer[0]=(UChar)rm0;
1012	u_memcpyu_memcpy_71(buffer+1, (const UChar *)mapping+1+2, mLength-2);
1013	length=mLength-1;
1014	return buffer;
1015	}
1016	} else {
1017	length=mLength;
1018	return (const UChar *)mapping+1;
1019	}
1020	}
1021
1022	void Normalizer2Impl::decomposeAndAppend(const UChar src, const UChar limit,
1023	UBool doDecompose,
1024	UnicodeString &safeMiddle,
1025	ReorderingBuffer &buffer,
1026	UErrorCode &errorCode) const {
1027	buffer.copyReorderableSuffixTo(safeMiddle);
1028	if(doDecompose) {
1029	decompose(src, limit, &buffer, errorCode);
1030	return;
1031	}
1032	// Just merge the strings at the boundary.
1033	bool isFirst = true;
1034	uint8_t firstCC = 0, prevCC = 0, cc;
1035	const UChar *p = src;
1036	while (p != limit) {
1037	const UChar *codePointStart = p;
1038	UChar32 c;
1039	uint16_t norm16;
1040	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16)do { (c) = (p)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (p ) != (limit) && (((__c2 = (p))&0xfffffc00)==0xdc00 )) { ++(p); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
1041	if ((cc = getCC(norm16)) == 0) {
1042	p = codePointStart;
1043	break;
1044	}
1045	if (isFirst) {
1046	firstCC = cc;
1047	isFirst = false;
1048	}
1049	prevCC = cc;
1050	}
1051	if(limit==NULL__null) { // appendZeroCC() needs limit!=NULL
1052	limit=u_strchru_strchr_71(p, 0);
1053	}
1054
1055	if (buffer.append(src, (int32_t)(p - src), FALSE0, firstCC, prevCC, errorCode)) {
1056	buffer.appendZeroCC(p, limit, errorCode);
1057	}
1058	}
1059
1060	UBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const {
1061	return c < minLcccCP \|\| (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) \|\|
1062	norm16HasDecompBoundaryBefore(getNorm16(c));
1063	}
1064
1065	UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const {
1066	if (norm16 < minNoNoCompNoMaybeCC) {
1067	return TRUE1;
1068	}
1069	if (norm16 >= limitNoNo) {
1070	return norm16 <= MIN_NORMAL_MAYBE_YES \|\| norm16 == JAMO_VT;
1071	}
1072	// c decomposes, get everything from the variable-length extra data
1073	const uint16_t *mapping=getMapping(norm16);
1074	uint16_t firstUnit=*mapping;
1075	// TRUE if leadCC==0 (hasFCDBoundaryBefore())
1076	return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 \|\| (*(mapping-1)&0xff00)==0;
1077	}
1078
1079	UBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const {
1080	if (c < minDecompNoCP) {
1081	return TRUE1;
1082	}
1083	if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
1084	return TRUE1;
1085	}
1086	return norm16HasDecompBoundaryAfter(getNorm16(c));
1087	}
1088
1089	UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const {
1090	if(norm16 <= minYesNo \|\| isHangulLVT(norm16)) {
1091	return TRUE1;
1092	}
1093	if (norm16 >= limitNoNo) {
1094	if (isMaybeOrNonZeroCC(norm16)) {
1095	return norm16 <= MIN_NORMAL_MAYBE_YES \|\| norm16 == JAMO_VT;
1096	}
1097	// Maps to an isCompYesAndZeroCC.
1098	return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
1099	}
1100	// c decomposes, get everything from the variable-length extra data
1101	const uint16_t *mapping=getMapping(norm16);
1102	uint16_t firstUnit=*mapping;
1103	// decomp after-boundary: same as hasFCDBoundaryAfter(),
1104	// fcd16<=1 \|\| trailCC==0
1105	if(firstUnit>0x1ff) {
1106	return FALSE0; // trailCC>1
1107	}
1108	if(firstUnit<=0xff) {
1109	return TRUE1; // trailCC==0
1110	}
1111	// if(trailCC==1) test leadCC==0, same as checking for before-boundary
1112	// TRUE if leadCC==0 (hasFCDBoundaryBefore())
1113	return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 \|\| (*(mapping-1)&0xff00)==0;
1114	}
1115
1116	/*
1117	* Finds the recomposition result for
1118	* a forward-combining "lead" character,
1119	* specified with a pointer to its compositions list,
1120	* and a backward-combining "trail" character.
1121	*
1122	* If the lead and trail characters combine, then this function returns
1123	* the following "compositeAndFwd" value:
1124	* Bits 21..1 composite character
1125	* Bit 0 set if the composite is a forward-combining starter
1126	* otherwise it returns -1.
1127	*
1128	* The compositions list has (trail, compositeAndFwd) pair entries,
1129	* encoded as either pairs or triples of 16-bit units.
1130	* The last entry has the high bit of its first unit set.
1131	*
1132	* The list is sorted by ascending trail characters (there are no duplicates).
1133	* A linear search is used.
1134	*
1135	* See normalizer2impl.h for a more detailed description
1136	* of the compositions list format.
1137	*/
1138	int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
1139	uint16_t key1, firstUnit;
1140	if(trail<COMP_1_TRAIL_LIMIT) {
1141	// trail character is 0..33FF
1142	// result entry may have 2 or 3 units
1143	key1=(uint16_t)(trail<<1);
1144	while(key1>(firstUnit=*list)) {
1145	list+=2+(firstUnit&COMP_1_TRIPLE);
1146	}
1147	if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
1148	if(firstUnit&COMP_1_TRIPLE) {
1149	return ((int32_t)list[1]<<16)\|list[2];
1150	} else {
1151	return list[1];
1152	}
1153	}
1154	} else {
1155	// trail character is 3400..10FFFF
1156	// result entry has 3 units
1157	key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
1158	(((trail>>COMP_1_TRAIL_SHIFT))&
1159	~COMP_1_TRIPLE));
1160	uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
1161	uint16_t secondUnit;
1162	for(;;) {
1163	if(key1>(firstUnit=*list)) {
1164	list+=2+(firstUnit&COMP_1_TRIPLE);
1165	} else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
1166	if(key2>(secondUnit=list[1])) {
1167	if(firstUnit&COMP_1_LAST_TUPLE) {
1168	break;
1169	} else {
1170	list+=3;
1171	}
1172	} else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
1173	return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)\|list[2];
1174	} else {
1175	break;
1176	}
1177	} else {
1178	break;
1179	}
1180	}
1181	}
1182	return -1;
1183	}
1184
1185	/**
1186	* @param list some character's compositions list
1187	* @param set recursively receives the composites from these compositions
1188	*/
1189	void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
1190	uint16_t firstUnit;
1191	int32_t compositeAndFwd;
1192	do {
1193	firstUnit=*list;
1194	if((firstUnit&COMP_1_TRIPLE)==0) {
1195	compositeAndFwd=list[1];
1196	list+=2;
1197	} else {
1198	compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)\|list[2];
1199	list+=3;
1200	}
1201	UChar32 composite=compositeAndFwd>>1;
1202	if((compositeAndFwd&1)!=0) {
1203	addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
1204	}
1205	set.add(composite);
1206	} while((firstUnit&COMP_1_LAST_TUPLE)==0);
1207	}
1208
1209	/*
1210	* Recomposes the buffer text starting at recomposeStartIndex
1211	* (which is in NFD - decomposed and canonically ordered),
1212	* and truncates the buffer contents.
1213	*
1214	* Note that recomposition never lengthens the text:
1215	* Any character consists of either one or two code units;
1216	* a composition may contain at most one more code unit than the original starter,
1217	* while the combining mark that is removed has at least one code unit.
1218	*/
1219	void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
1220	UBool onlyContiguous) const {
1221	UChar *p=buffer.getStart()+recomposeStartIndex;
1222	UChar *limit=buffer.getLimit();
1223	if(p==limit) {
1224	return;
1225	}
1226
1227	UChar starter, pRemove, q, r;
1228	const uint16_t *compositionsList;
1229	UChar32 c, compositeAndFwd;
1230	uint16_t norm16;
1231	uint8_t cc, prevCC;
1232	UBool starterIsSupplementary;
1233
1234	// Some of the following variables are not used until we have a forward-combining starter
1235	// and are only initialized now to avoid compiler warnings.
1236	compositionsList=NULL__null; // used as indicator for whether we have a forward-combining starter
1237	starter=NULL__null;
1238	starterIsSupplementary=FALSE0;
1239	prevCC=0;
1240
1241	for(;;) {
1242	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16)do { (c) = (p)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (p ) != (limit) && (((__c2 = (p))&0xfffffc00)==0xdc00 )) { ++(p); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
1243	cc=getCCFromYesOrMaybe(norm16);
1244	if( // this character combines backward and
1245	isMaybe(norm16) &&
1246	// we have seen a starter that combines forward and
1247	compositionsList!=NULL__null &&
1248	// the backward-combining character is not blocked
1249	(prevCC<cc \|\| prevCC==0)
1250	) {
1251	if(isJamoVT(norm16)) {
1252	// c is a Jamo V/T, see if we can compose it with the previous character.
1253	if(c<Hangul::JAMO_T_BASE) {
1254	// c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
1255	UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
1256	if(prev<Hangul::JAMO_L_COUNT) {
1257	pRemove=p-1;
1258	UChar syllable=(UChar)
1259	(Hangul::HANGUL_BASE+
1260	(prevHangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))
1261	Hangul::JAMO_T_COUNT);
1262	UChar t;
1263	if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
1264	++p;
1265	syllable+=t; // The next character was a Jamo T.
1266	}
1267	*starter=syllable;
1268	// remove the Jamo V/T
1269	q=pRemove;
1270	r=p;
1271	while(r<limit) {
1272	q++=r++;
1273	}
1274	limit=q;
1275	p=pRemove;
1276	}
1277	}
1278	/*
1279	* No "else" for Jamo T:
1280	* Since the input is in NFD, there are no Hangul LV syllables that
1281	* a Jamo T could combine with.
1282	* All Jamo Ts are combined above when handling Jamo Vs.
1283	*/
1284	if(p==limit) {
1285	break;
1286	}
1287	compositionsList=NULL__null;
1288	continue;
1289	} else if((compositeAndFwd=combine(compositionsList, c))>=0) {
1290	// The starter and the combining mark (c) do combine.
1291	UChar32 composite=compositeAndFwd>>1;
1292
1293	// Replace the starter with the composite, remove the combining mark.
1294	pRemove=p-U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2); // pRemove & p: start & limit of the combining mark
1295	if(starterIsSupplementary) {
1296	if(U_IS_SUPPLEMENTARY(composite)((uint32_t)((composite)-0x10000)<=0xfffff)) {
1297	// both are supplementary
1298	starter[0]=U16_LEAD(composite)(UChar)(((composite)>>10)+0xd7c0);
1299	starter[1]=U16_TRAIL(composite)(UChar)(((composite)&0x3ff)\|0xdc00);
1300	} else {
1301	*starter=(UChar)composite;
1302	// The composite is shorter than the starter,
1303	// move the intermediate characters forward one.
1304	starterIsSupplementary=FALSE0;
1305	q=starter+1;
1306	r=q+1;
1307	while(r<pRemove) {
1308	q++=r++;
1309	}
1310	--pRemove;
1311	}
1312	} else if(U_IS_SUPPLEMENTARY(composite)((uint32_t)((composite)-0x10000)<=0xfffff)) {
1313	// The composite is longer than the starter,
1314	// move the intermediate characters back one.
1315	starterIsSupplementary=TRUE1;
1316	++starter; // temporarily increment for the loop boundary
1317	q=pRemove;
1318	r=++pRemove;
1319	while(starter<q) {
1320	--r=--q;
1321	}
1322	*starter=U16_TRAIL(composite)(UChar)(((composite)&0x3ff)\|0xdc00);
1323	*--starter=U16_LEAD(composite)(UChar)(((composite)>>10)+0xd7c0); // undo the temporary increment
1324	} else {
1325	// both are on the BMP
1326	*starter=(UChar)composite;
1327	}
1328
1329	/* remove the combining mark by moving the following text over it */
1330	if(pRemove<p) {
1331	q=pRemove;
1332	r=p;
1333	while(r<limit) {
1334	q++=r++;
1335	}
1336	limit=q;
1337	p=pRemove;
1338	}
1339	// Keep prevCC because we removed the combining mark.
1340
1341	if(p==limit) {
1342	break;
1343	}
1344	// Is the composite a starter that combines forward?
1345	if(compositeAndFwd&1) {
1346	compositionsList=
1347	getCompositionsListForComposite(getRawNorm16(composite));
1348	} else {
1349	compositionsList=NULL__null;
1350	}
1351
1352	// We combined; continue with looking for compositions.
1353	continue;
1354	}
1355	}
1356
1357	// no combination this time
1358	prevCC=cc;
1359	if(p==limit) {
1360	break;
1361	}
1362
1363	// If c did not combine, then check if it is a starter.
1364	if(cc==0) {
1365	// Found a new starter.
1366	if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL__null) {
1367	// It may combine with something, prepare for it.
1368	if(U_IS_BMP(c)((uint32_t)(c)<=0xffff)) {
1369	starterIsSupplementary=FALSE0;
1370	starter=p-1;
1371	} else {
1372	starterIsSupplementary=TRUE1;
1373	starter=p-2;
1374	}
1375	}
1376	} else if(onlyContiguous) {
1377	// FCC: no discontiguous compositions; any intervening character blocks.
1378	compositionsList=NULL__null;
1379	}
1380	}
1381	buffer.setReorderingLimit(limit);
1382	}
1383
1384	UChar32
1385	Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
1386	uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16
1387	const uint16_t *list;
1388	if(isInert(norm16)) {
1389	return U_SENTINEL(-1);
1390	} else if(norm16<minYesNoMappingsOnly) {
1391	// a combines forward.
1392	if(isJamoL(norm16)) {
1393	b-=Hangul::JAMO_V_BASE;
1394	if(0<=b && b<Hangul::JAMO_V_COUNT) {
1395	return
1396	(Hangul::HANGUL_BASE+
1397	((a-Hangul::JAMO_L_BASE)Hangul::JAMO_V_COUNT+b)
1398	Hangul::JAMO_T_COUNT);
1399	} else {
1400	return U_SENTINEL(-1);
1401	}
1402	} else if(isHangulLV(norm16)) {
1403	b-=Hangul::JAMO_T_BASE;
1404	if(0<b && b<Hangul::JAMO_T_COUNT) { // not b==0!
1405	return a+b;
1406	} else {
1407	return U_SENTINEL(-1);
1408	}
1409	} else {
1410	// 'a' has a compositions list in extraData
1411	list=getMapping(norm16);
1412	if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list
1413	list+= // mapping pointer
1414	1+ // +1 to skip the first unit with the mapping length
1415	(*list&MAPPING_LENGTH_MASK); // + mapping length
1416	}
1417	}
1418	} else if(norm16<minMaybeYes \|\| MIN_NORMAL_MAYBE_YES<=norm16) {
1419	return U_SENTINEL(-1);
1420	} else {
1421	list=getCompositionsListForMaybe(norm16);
1422	}
1423	if(b<0 \|\| 0x10ffff<b) { // combine(list, b) requires a valid code point b
1424	return U_SENTINEL(-1);
1425	}
1426	#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC1
1427	return combine(list, b)>>1;
1428	#else
1429	int32_t compositeAndFwd=combine(list, b);
1430	return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL(-1);
1431	#endif
1432	}
1433
1434	// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
1435	// doCompose: normalize
1436	// !doCompose: isNormalized (buffer must be empty and initialized)
1437	UBool
1438	Normalizer2Impl::compose(const UChar src, const UChar limit,
1439	UBool onlyContiguous,
1440	UBool doCompose,
1441	ReorderingBuffer &buffer,
1442	UErrorCode &errorCode) const {
1443	const UChar *prevBoundary=src;
1444	UChar32 minNoMaybeCP=minCompNoMaybeCP;
1445	if(limit==NULL__null) {
1446	src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
1447	doCompose ? &buffer : NULL__null,
1448	errorCode);
1449	if(U_FAILURE(errorCode)) {
1450	return FALSE0;
1451	}
1452	limit=u_strchru_strchr_71(src, 0);
1453	if (prevBoundary != src) {
1454	if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
1455	prevBoundary = src;
1456	} else {
1457	buffer.removeSuffix(1);
1458	prevBoundary = --src;
1459	}
1460	}
1461	}
1462
1463	for (;;) {
1464	// Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
1465	// or with (compYes && ccc==0) properties.
1466	const UChar *prevSrc;
1467	UChar32 c = 0;
1468	uint16_t norm16 = 0;
1469	for (;;) {
1470	if (src == limit) {
1471	if (prevBoundary != limit && doCompose) {
1472	buffer.appendZeroCC(prevBoundary, limit, errorCode);
1473	}
1474	return TRUE1;
1475	}
1476	if( (c=*src)<minNoMaybeCP \|\|
1477	isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK ))]))
1478	) {
1479	++src;
1480	} else {
1481	prevSrc = src++;
1482	if(!U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
1483	break;
1484	} else {
1485	UChar c2;
1486	if(src!=limit && U16_IS_TRAIL(c2=src)(((c2=src)&0xfffffc00)==0xdc00)) {
1487	++src;
1488	c=U16_GET_SUPPLEMENTARY(c, c2)(((UChar32)(c)<<10UL)+(UChar32)(c2)-((0xd800<<10UL )+0xdc00-0x10000));
1489	norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c))]);
1490	if(!isCompYesAndZeroCC(norm16)) {
1491	break;
1492	}
1493	}
1494	}
1495	}
1496	}
1497	// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
1498	// The current character is either a "noNo" (has a mapping)
1499	// or a "maybeYes" (combines backward)
1500	// or a "yesYes" with ccc!=0.
1501	// It is not a Hangul syllable or Jamo L because those have "yes" properties.
1502
1503	// Medium-fast path: Handle cases that do not require full decomposition and recomposition.
1504	if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
1505	if (!doCompose) {
1506	return FALSE0;
1507	}
1508	// Fast path for mapping a character that is immediately surrounded by boundaries.
1509	// In this case, we need not decompose around the current character.
1510	if (isDecompNoAlgorithmic(norm16)) {
1511	// Maps to a single isCompYesAndZeroCC character
1512	// which also implies hasCompBoundaryBefore.
1513	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) \|\|
1514	hasCompBoundaryBefore(src, limit)) {
1515	if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1516	break;
1517	}
1518	if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) {
1519	break;
1520	}
1521	prevBoundary = src;
1522	continue;
1523	}
1524	} else if (norm16 < minNoNoCompBoundaryBefore) {
1525	// The mapping is comp-normalized which also implies hasCompBoundaryBefore.
1526	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) \|\|
1527	hasCompBoundaryBefore(src, limit)) {
1528	if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1529	break;
1530	}
1531	const UChar mapping = reinterpret_cast<const UChar >(getMapping(norm16));
1532	int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
1533	if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) {
1534	break;
1535	}
1536	prevBoundary = src;
1537	continue;
1538	}
1539	} else if (norm16 >= minNoNoEmpty) {
1540	// The current character maps to nothing.
1541	// Simply omit it from the output if there is a boundary before _or_ after it.
1542	// The character itself implies no boundaries.
1543	if (hasCompBoundaryBefore(src, limit) \|\|
1544	hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
1545	if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1546	break;
1547	}
1548	prevBoundary = src;
1549	continue;
1550	}
1551	}
1552	// Other "noNo" type, or need to examine more text around this character:
1553	// Fall through to the slow path.
1554	} else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
1555	UChar prev=*(prevSrc-1);
1556	if(c<Hangul::JAMO_T_BASE) {
1557	// The current character is a Jamo Vowel,
1558	// compose with previous Jamo L and following Jamo T.
1559	UChar l = (UChar)(prev-Hangul::JAMO_L_BASE);
1560	if(l<Hangul::JAMO_L_COUNT) {
1561	if (!doCompose) {
1562	return FALSE0;
1563	}
1564	int32_t t;
1565	if (src != limit &&
1566	0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&
1567	t < Hangul::JAMO_T_COUNT) {
1568	// The next character is a Jamo T.
1569	++src;
1570	} else if (hasCompBoundaryBefore(src, limit)) {
1571	// No Jamo T follows, not even via decomposition.
1572	t = 0;
1573	} else {
1574	t = -1;
1575	}
1576	if (t >= 0) {
1577	UChar32 syllable = Hangul::HANGUL_BASE +
1578	(lHangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE))
1579	Hangul::JAMO_T_COUNT + t;
1580	--prevSrc; // Replace the Jamo L as well.
1581	if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1582	break;
1583	}
1584	if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
1585	break;
1586	}
1587	prevBoundary = src;
1588	continue;
1589	}
1590	// If we see L+V+x where x!=T then we drop to the slow path,
1591	// decompose and recompose.
1592	// This is to deal with NFKC finding normal L and V but a
1593	// compatibility variant of a T.
1594	// We need to either fully compose that combination here
1595	// (which would complicate the code and may not work with strange custom data)
1596	// or use the slow path.
1597	}
1598	} else if (Hangul::isHangulLV(prev)) {
1599	// The current character is a Jamo Trailing consonant,
1600	// compose with previous Hangul LV that does not contain a Jamo T.
1601	if (!doCompose) {
1602	return FALSE0;
1603	}
1604	UChar32 syllable = prev + c - Hangul::JAMO_T_BASE;
1605	--prevSrc; // Replace the Hangul LV as well.
1606	if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1607	break;
1608	}
1609	if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) {
1610	break;
1611	}
1612	prevBoundary = src;
1613	continue;
1614	}
1615	// No matching context, or may need to decompose surrounding text first:
1616	// Fall through to the slow path.
1617	} else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
1618	// One or more combining marks that do not combine-back:
1619	// Check for canonical order, copy unchanged if ok and
1620	// if followed by a character with a boundary-before.
1621	uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
1622	if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
1623	// Fails FCD test, need to decompose and contiguously recompose.
1624	if (!doCompose) {
1625	return FALSE0;
1626	}
1627	} else {
1628	// If !onlyContiguous (not FCC), then we ignore the tccc of
1629	// the previous character which passed the quick check "yes && ccc==0" test.
1630	const UChar *nextSrc;
1631	uint16_t n16;
1632	for (;;) {
1633	if (src == limit) {
1634	if (doCompose) {
1635	buffer.appendZeroCC(prevBoundary, limit, errorCode);
1636	}
1637	return TRUE1;
1638	}
1639	uint8_t prevCC = cc;
1640	nextSrc = src;
1641	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16)do { (c) = (nextSrc)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (nextSrc ) != (limit) && (((__c2 = (nextSrc))&0xfffffc00) ==0xdc00)) { ++(nextSrc); (c) = (((UChar32)((c))<<10UL) +(UChar32)(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (n16) = ((normTrie) ->data.ptr16[__index]); } while (false);
1642	if (n16 >= MIN_YES_YES_WITH_CC) {
1643	cc = getCCFromNormalYesOrMaybe(n16);
1644	if (prevCC > cc) {
1645	if (!doCompose) {
1646	return FALSE0;
1647	}
1648	break;
1649	}
1650	} else {
1651	break;
1652	}
1653	src = nextSrc;
1654	}
1655	// src is after the last in-order combining mark.
1656	// If there is a boundary here, then we continue with no change.
1657	if (norm16HasCompBoundaryBefore(n16)) {
1658	if (isCompYesAndZeroCC(n16)) {
1659	src = nextSrc;
1660	}
1661	continue;
1662	}
1663	// Use the slow path. There is no boundary in [prevSrc, src[.
1664	}
1665	}
1666
1667	// Slow path: Find the nearest boundaries around the current character,
1668	// decompose and recompose.
1669	if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
1670	const UChar *p = prevSrc;
1671	UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16)do { (c) = --(p); int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)!=0) && (p ) != (prevBoundary) && (((__c2 = ((p) - 1))&0xfffffc00 )==0xd800)) { --(p); (c) = (((UChar32)(__c2)<<10UL)+(UChar32 )((c))-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
1672	if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
1673	prevSrc = p;
1674	}
1675	}
1676	if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
1677	break;
1678	}
1679	int32_t recomposeStartIndex=buffer.length();
1680	// We know there is not a boundary here.
1681	decomposeShort(prevSrc, src, FALSE0 /* !stopAtCompBoundary */, onlyContiguous,
1682	buffer, errorCode);
1683	// Decompose until the next boundary.
1684	src = decomposeShort(src, limit, TRUE1 /* stopAtCompBoundary */, onlyContiguous,
1685	buffer, errorCode);
1686	if (U_FAILURE(errorCode)) {
1687	break;
1688	}
1689	if ((src - prevSrc) > INT32_MAX(2147483647)) { // guard before buffer.equals()
1690	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1691	return TRUE1;
1692	}
1693	recompose(buffer, recomposeStartIndex, onlyContiguous);
1694	if(!doCompose) {
1695	if(!buffer.equals(prevSrc, src)) {
1696	return FALSE0;
1697	}
1698	buffer.remove();
1699	}
1700	prevBoundary=src;
1701	}
1702	return TRUE1;
1703	}
1704
1705	// Very similar to compose(): Make the same changes in both places if relevant.
1706	// pQCResult==NULL: spanQuickCheckYes
1707	// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
1708	const UChar *
1709	Normalizer2Impl::composeQuickCheck(const UChar src, const UChar limit,
1710	UBool onlyContiguous,
1711	UNormalizationCheckResult *pQCResult) const {
1712	const UChar *prevBoundary=src;
1713	UChar32 minNoMaybeCP=minCompNoMaybeCP;
1714	if(limit==NULL__null) {
1715	UErrorCode errorCode=U_ZERO_ERROR;
1716	src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL__null, errorCode);
1717	limit=u_strchru_strchr_71(src, 0);
1718	if (prevBoundary != src) {
1719	if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
1720	prevBoundary = src;
1721	} else {
1722	prevBoundary = --src;
1723	}
1724	}
1725	}
1726
1727	for(;;) {
1728	// Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
1729	// or with (compYes && ccc==0) properties.
1730	const UChar *prevSrc;
1731	UChar32 c = 0;
1732	uint16_t norm16 = 0;
1733	for (;;) {
1734	if(src==limit) {
1735	return src;
1736	}
1737	if( (c=*src)<minNoMaybeCP \|\|
1738	isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK ))]))
1739	) {
1740	++src;
1741	} else {
1742	prevSrc = src++;
1743	if(!U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
1744	break;
1745	} else {
1746	UChar c2;
1747	if(src!=limit && U16_IS_TRAIL(c2=src)(((c2=src)&0xfffffc00)==0xdc00)) {
1748	++src;
1749	c=U16_GET_SUPPLEMENTARY(c, c2)(((UChar32)(c)<<10UL)+(UChar32)(c2)-((0xd800<<10UL )+0xdc00-0x10000));
1750	norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c)((normTrie)->data.ptr16[((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c))]);
1751	if(!isCompYesAndZeroCC(norm16)) {
1752	break;
1753	}
1754	}
1755	}
1756	}
1757	}
1758	// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
1759	// The current character is either a "noNo" (has a mapping)
1760	// or a "maybeYes" (combines backward)
1761	// or a "yesYes" with ccc!=0.
1762	// It is not a Hangul syllable or Jamo L because those have "yes" properties.
1763
1764	uint16_t prevNorm16 = INERT;
1765	if (prevBoundary != prevSrc) {
1766	if (norm16HasCompBoundaryBefore(norm16)) {
1767	prevBoundary = prevSrc;
1768	} else {
1769	const UChar *p = prevSrc;
1770	uint16_t n16;
1771	UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16)do { (c) = --(p); int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)!=0) && (p ) != (prevBoundary) && (((__c2 = ((p) - 1))&0xfffffc00 )==0xd800)) { --(p); (c) = (((UChar32)(__c2)<<10UL)+(UChar32 )((c))-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (n16) = ((normTrie) ->data.ptr16[__index]); } while (false);
1772	if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
1773	prevBoundary = prevSrc;
1774	} else {
1775	prevBoundary = p;
1776	prevNorm16 = n16;
1777	}
1778	}
1779	}
1780
1781	if(isMaybeOrNonZeroCC(norm16)) {
1782	uint8_t cc=getCCFromYesOrMaybe(norm16);
1783	if (onlyContiguous /* FCC */ && cc != 0 &&
1784	getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
1785	// The [prevBoundary..prevSrc[ character
1786	// passed the quick check "yes && ccc==0" test
1787	// but is out of canonical order with the current combining mark.
1788	} else {
1789	// If !onlyContiguous (not FCC), then we ignore the tccc of
1790	// the previous character which passed the quick check "yes && ccc==0" test.
1791	const UChar *nextSrc;
1792	for (;;) {
1793	if (norm16 < MIN_YES_YES_WITH_CC) {
1794	if (pQCResult != nullptr) {
1795	*pQCResult = UNORM_MAYBE;
1796	} else {
1797	return prevBoundary;
1798	}
1799	}
1800	if (src == limit) {
1801	return src;
1802	}
1803	uint8_t prevCC = cc;
1804	nextSrc = src;
1805	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16)do { (c) = (nextSrc)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (nextSrc ) != (limit) && (((__c2 = (nextSrc))&0xfffffc00) ==0xdc00)) { ++(nextSrc); (c) = (((UChar32)((c))<<10UL) +(UChar32)(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
1806	if (isMaybeOrNonZeroCC(norm16)) {
1807	cc = getCCFromYesOrMaybe(norm16);
1808	if (!(prevCC <= cc \|\| cc == 0)) {
1809	break;
1810	}
1811	} else {
1812	break;
1813	}
1814	src = nextSrc;
1815	}
1816	// src is after the last in-order combining mark.
1817	if (isCompYesAndZeroCC(norm16)) {
1818	prevBoundary = src;
1819	src = nextSrc;
1820	continue;
1821	}
1822	}
1823	}
1824	if(pQCResult!=NULL__null) {
1825	*pQCResult=UNORM_NO;
1826	}
1827	return prevBoundary;
1828	}
1829	}
1830
1831	void Normalizer2Impl::composeAndAppend(const UChar src, const UChar limit,
1832	UBool doCompose,
1833	UBool onlyContiguous,
1834	UnicodeString &safeMiddle,
1835	ReorderingBuffer &buffer,
1836	UErrorCode &errorCode) const {
1837	if(!buffer.isEmpty()) {
1838	const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous);
1839	if(src!=firstStarterInSrc) {
1840	const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
1841	buffer.getLimit(), onlyContiguous);
1842	int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
1843	UnicodeString middle(lastStarterInDest, destSuffixLength);
1844	buffer.removeSuffix(destSuffixLength);
1845	safeMiddle=middle;
1846	middle.append(src, (int32_t)(firstStarterInSrc-src));
1847	const UChar *middleStart=middle.getBuffer();
1848	compose(middleStart, middleStart+middle.length(), onlyContiguous,
1849	TRUE1, buffer, errorCode);
1850	if(U_FAILURE(errorCode)) {
1851	return;
1852	}
1853	src=firstStarterInSrc;
1854	}
1855	}
1856	if(doCompose) {
1857	compose(src, limit, onlyContiguous, TRUE1, buffer, errorCode);
1858	} else {
1859	if(limit==NULL__null) { // appendZeroCC() needs limit!=NULL
1860	limit=u_strchru_strchr_71(src, 0);
1861	}
1862	buffer.appendZeroCC(src, limit, errorCode);
1863	}
1864	}
1865
1866	UBool
1867	Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
1868	const uint8_t src, const uint8_t limit,
1869	ByteSink sink, Edits edits, UErrorCode &errorCode) const {
1870	U_ASSERT(limit != nullptr)(void)0;
1871	UnicodeString s16;
1872	uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP);
1873	const uint8_t *prevBoundary = src;
1874
1875	for (;;) {
1876	// Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
1877	// or with (compYes && ccc==0) properties.
1878	const uint8_t *prevSrc;
1879	uint16_t norm16 = 0;
1880	for (;;) {
1881	if (src == limit) {
1882	if (prevBoundary != limit && sink != nullptr) {
1883	ByteSinkUtil::appendUnchanged(prevBoundary, limit,
1884	*sink, options, edits, errorCode);
1885	}
1886	return TRUE1;
1887	}
1888	if (*src < minNoMaybeLead) {
1889	++src;
1890	} else {
1891	prevSrc = src;
1892	UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16)do { int32_t __lead = (uint8_t)(src)++; if (!(((__lead)& 0x80)==0)) { uint8_t __t1, __t2, __t3; if ((src) != (limit) && (__lead >= 0xe0 ? __lead < 0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [__lead &= 0xf] & (1 << ((__t1 = (src)) >> 5)) && ++(src) != (limit) && (__t2 = (src) - 0x80) <= 0x3f && (__lead = ((int32_t)(normTrie)-> index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) : ( __lead -= 0xf0) <= 4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" [(__t1 = (src)) >> 4] & (1 << __lead) && (__lead = (__lead << 6) \| (__t1 & 0x3f), ++(src) != (limit)) && (__t2 = (src) - 0x80) <= 0x3f && ++(src) != (limit) && (__t3 = (src) - 0x80) <= 0x3f && (__lead = __lead >= (normTrie)->shifted12HighStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallU8Index_71((normTrie), __lead, __t2, __t3 ), 1) : __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && (__lead = (int32_t)(normTrie)->index[__lead & 0x1f] + __t1, 1))) { ++(src); } else { __lead = (normTrie )->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } ( norm16) = ((normTrie)->data.ptr16[__lead]); } while (false );
1893	if (!isCompYesAndZeroCC(norm16)) {
1894	break;
1895	}
1896	}
1897	}
1898	// isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
1899	// The current character is either a "noNo" (has a mapping)
1900	// or a "maybeYes" (combines backward)
1901	// or a "yesYes" with ccc!=0.
1902	// It is not a Hangul syllable or Jamo L because those have "yes" properties.
1903
1904	// Medium-fast path: Handle cases that do not require full decomposition and recomposition.
1905	if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
1906	if (sink == nullptr) {
1907	return FALSE0;
1908	}
1909	// Fast path for mapping a character that is immediately surrounded by boundaries.
1910	// In this case, we need not decompose around the current character.
1911	if (isDecompNoAlgorithmic(norm16)) {
1912	// Maps to a single isCompYesAndZeroCC character
1913	// which also implies hasCompBoundaryBefore.
1914	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) \|\|
1915	hasCompBoundaryBefore(src, limit)) {
1916	if (prevBoundary != prevSrc &&
1917	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
1918	*sink, options, edits, errorCode)) {
1919	break;
1920	}
1921	appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
1922	prevBoundary = src;
1923	continue;
1924	}
1925	} else if (norm16 < minNoNoCompBoundaryBefore) {
1926	// The mapping is comp-normalized which also implies hasCompBoundaryBefore.
1927	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) \|\|
1928	hasCompBoundaryBefore(src, limit)) {
1929	if (prevBoundary != prevSrc &&
1930	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
1931	*sink, options, edits, errorCode)) {
1932	break;
1933	}
1934	const uint16_t *mapping = getMapping(norm16);
1935	int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
1936	if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length,
1937	*sink, edits, errorCode)) {
1938	break;
1939	}
1940	prevBoundary = src;
1941	continue;
1942	}
1943	} else if (norm16 >= minNoNoEmpty) {
1944	// The current character maps to nothing.
1945	// Simply omit it from the output if there is a boundary before _or_ after it.
1946	// The character itself implies no boundaries.
1947	if (hasCompBoundaryBefore(src, limit) \|\|
1948	hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
1949	if (prevBoundary != prevSrc &&
1950	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
1951	*sink, options, edits, errorCode)) {
1952	break;
1953	}
1954	if (edits != nullptr) {
1955	edits->addReplace((int32_t)(src - prevSrc), 0);
1956	}
1957	prevBoundary = src;
1958	continue;
1959	}
1960	}
1961	// Other "noNo" type, or need to examine more text around this character:
1962	// Fall through to the slow path.
1963	} else if (isJamoVT(norm16)) {
1964	// Jamo L: E1 84 80..92
1965	// Jamo V: E1 85 A1..B5
1966	// Jamo T: E1 86 A8..E1 87 82
1967	U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1)(void)0;
1968	UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc);
1969	if (prevSrc[1] == 0x85) {
1970	// The current character is a Jamo Vowel,
1971	// compose with previous Jamo L and following Jamo T.
1972	UChar32 l = prev - Hangul::JAMO_L_BASE;
1973	if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
1974	if (sink == nullptr) {
1975	return FALSE0;
1976	}
1977	int32_t t = getJamoTMinusBase(src, limit);
1978	if (t >= 0) {
1979	// The next character is a Jamo T.
1980	src += 3;
1981	} else if (hasCompBoundaryBefore(src, limit)) {
1982	// No Jamo T follows, not even via decomposition.
1983	t = 0;
1984	}
1985	if (t >= 0) {
1986	UChar32 syllable = Hangul::HANGUL_BASE +
1987	(lHangul::JAMO_V_COUNT + (prevSrc[2]-0xa1))
1988	Hangul::JAMO_T_COUNT + t;
1989	prevSrc -= 3; // Replace the Jamo L as well.
1990	if (prevBoundary != prevSrc &&
1991	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
1992	*sink, options, edits, errorCode)) {
1993	break;
1994	}
1995	ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
1996	prevBoundary = src;
1997	continue;
1998	}
1999	// If we see L+V+x where x!=T then we drop to the slow path,
2000	// decompose and recompose.
2001	// This is to deal with NFKC finding normal L and V but a
2002	// compatibility variant of a T.
2003	// We need to either fully compose that combination here
2004	// (which would complicate the code and may not work with strange custom data)
2005	// or use the slow path.
2006	}
2007	} else if (Hangul::isHangulLV(prev)) {
2008	// The current character is a Jamo Trailing consonant,
2009	// compose with previous Hangul LV that does not contain a Jamo T.
2010	if (sink == nullptr) {
2011	return FALSE0;
2012	}
2013	UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
2014	prevSrc -= 3; // Replace the Hangul LV as well.
2015	if (prevBoundary != prevSrc &&
2016	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
2017	*sink, options, edits, errorCode)) {
2018	break;
2019	}
2020	ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
2021	prevBoundary = src;
2022	continue;
2023	}
2024	// No matching context, or may need to decompose surrounding text first:
2025	// Fall through to the slow path.
2026	} else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
2027	// One or more combining marks that do not combine-back:
2028	// Check for canonical order, copy unchanged if ok and
2029	// if followed by a character with a boundary-before.
2030	uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
2031	if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
2032	// Fails FCD test, need to decompose and contiguously recompose.
2033	if (sink == nullptr) {
2034	return FALSE0;
2035	}
2036	} else {
2037	// If !onlyContiguous (not FCC), then we ignore the tccc of
2038	// the previous character which passed the quick check "yes && ccc==0" test.
2039	const uint8_t *nextSrc;
2040	uint16_t n16;
2041	for (;;) {
2042	if (src == limit) {
2043	if (sink != nullptr) {
2044	ByteSinkUtil::appendUnchanged(prevBoundary, limit,
2045	*sink, options, edits, errorCode);
2046	}
2047	return TRUE1;
2048	}
2049	uint8_t prevCC = cc;
2050	nextSrc = src;
2051	UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16)do { int32_t __lead = (uint8_t)(nextSrc)++; if (!(((__lead)& 0x80)==0)) { uint8_t __t1, __t2, __t3; if ((nextSrc) != (limit ) && (__lead >= 0xe0 ? __lead < 0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [__lead &= 0xf] & (1 << ((__t1 = (nextSrc)) >> 5)) && ++(nextSrc) != (limit) && (__t2 = (nextSrc ) - 0x80) <= 0x3f && (__lead = ((int32_t)(normTrie )->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) : (__lead -= 0xf0) <= 4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" [(__t1 = (nextSrc)) >> 4] & (1 << __lead) && (__lead = (__lead << 6) \| (__t1 & 0x3f), ++(nextSrc ) != (limit)) && (__t2 = (nextSrc) - 0x80) <= 0x3f && ++(nextSrc) != (limit) && (__t3 = (nextSrc ) - 0x80) <= 0x3f && (__lead = __lead >= (normTrie )->shifted12HighStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallU8Index_71((normTrie), __lead, __t2, __t3 ), 1) : __lead >= 0xc2 && (__t1 = *(nextSrc) - 0x80 ) <= 0x3f && (__lead = (int32_t)(normTrie)->index [__lead & 0x1f] + __t1, 1))) { ++(nextSrc); } else { __lead = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (n16) = ((normTrie)->data.ptr16[__lead]); } while (false );
2052	if (n16 >= MIN_YES_YES_WITH_CC) {
2053	cc = getCCFromNormalYesOrMaybe(n16);
2054	if (prevCC > cc) {
2055	if (sink == nullptr) {
2056	return FALSE0;
2057	}
2058	break;
2059	}
2060	} else {
2061	break;
2062	}
2063	src = nextSrc;
2064	}
2065	// src is after the last in-order combining mark.
2066	// If there is a boundary here, then we continue with no change.
2067	if (norm16HasCompBoundaryBefore(n16)) {
2068	if (isCompYesAndZeroCC(n16)) {
2069	src = nextSrc;
2070	}
2071	continue;
2072	}
2073	// Use the slow path. There is no boundary in [prevSrc, src[.
2074	}
2075	}
2076
2077	// Slow path: Find the nearest boundaries around the current character,
2078	// decompose and recompose.
2079	if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
2080	const uint8_t *p = prevSrc;
2081	UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16)do { int32_t __index = (uint8_t)--(p); if (!(((__index)& 0x80)==0)) { __index = ucptrie_internalU8PrevIndex_71((normTrie ), __index, (const uint8_t )(prevBoundary), (const uint8_t * )(p)); (p) -= __index & 7; __index >>= 3; } (norm16 ) = ((normTrie)->data.ptr16[__index]); } while (false);
2082	if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
2083	prevSrc = p;
2084	}
2085	}
2086	ReorderingBuffer buffer(*this, s16, errorCode);
2087	if (U_FAILURE(errorCode)) {
2088	break;
2089	}
2090	// We know there is not a boundary here.
2091	decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
2092	buffer, errorCode);
2093	// Decompose until the next boundary.
2094	src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
2095	buffer, errorCode);
2096	if (U_FAILURE(errorCode)) {
2097	break;
2098	}
2099	if ((src - prevSrc) > INT32_MAX(2147483647)) { // guard before buffer.equals()
2100	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
2101	return TRUE1;
2102	}
2103	recompose(buffer, 0, onlyContiguous);
2104	if (!buffer.equals(prevSrc, src)) {
2105	if (sink == nullptr) {
2106	return FALSE0;
2107	}
2108	if (prevBoundary != prevSrc &&
2109	!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
2110	*sink, options, edits, errorCode)) {
2111	break;
2112	}
2113	if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
2114	*sink, edits, errorCode)) {
2115	break;
2116	}
2117	prevBoundary = src;
2118	}
2119	}
2120	return TRUE1;
2121	}
2122
2123	UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar src, const UChar limit) const {
2124	if (src == limit \|\| *src < minCompNoMaybeCP) {
2125	return TRUE1;
2126	}
2127	UChar32 c;
2128	uint16_t norm16;
2129	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16)do { (c) = (src)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (src ) != (limit) && (((__c2 = (src))&0xfffffc00)==0xdc00 )) { ++(src); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
2130	return norm16HasCompBoundaryBefore(norm16);
2131	}
2132
2133	UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t src, const uint8_t limit) const {
2134	if (src == limit) {
2135	return TRUE1;
2136	}
2137	uint16_t norm16;
2138	UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16)do { int32_t __lead = (uint8_t)(src)++; if (!(((__lead)& 0x80)==0)) { uint8_t __t1, __t2, __t3; if ((src) != (limit) && (__lead >= 0xe0 ? __lead < 0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" [__lead &= 0xf] & (1 << ((__t1 = (src)) >> 5)) && ++(src) != (limit) && (__t2 = (src) - 0x80) <= 0x3f && (__lead = ((int32_t)(normTrie)-> index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) : ( __lead -= 0xf0) <= 4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" [(__t1 = (src)) >> 4] & (1 << __lead) && (__lead = (__lead << 6) \| (__t1 & 0x3f), ++(src) != (limit)) && (__t2 = (src) - 0x80) <= 0x3f && ++(src) != (limit) && (__t3 = (src) - 0x80) <= 0x3f && (__lead = __lead >= (normTrie)->shifted12HighStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallU8Index_71((normTrie), __lead, __t2, __t3 ), 1) : __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && (__lead = (int32_t)(normTrie)->index[__lead & 0x1f] + __t1, 1))) { ++(src); } else { __lead = (normTrie )->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } ( norm16) = ((normTrie)->data.ptr16[__lead]); } while (false );
2139	return norm16HasCompBoundaryBefore(norm16);
2140	}
2141
2142	UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar start, const UChar p,
2143	UBool onlyContiguous) const {
2144	if (start == p) {
2145	return TRUE1;
2146	}
2147	UChar32 c;
2148	uint16_t norm16;
2149	UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16)do { (c) = --(p); int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)!=0) && (p ) != (start) && (((__c2 = ((p) - 1))&0xfffffc00) ==0xd800)) { --(p); (c) = (((UChar32)(__c2)<<10UL)+(UChar32 )((c))-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
2150	return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
2151	}
2152
2153	UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t start, const uint8_t p,
2154	UBool onlyContiguous) const {
2155	if (start == p) {
2156	return TRUE1;
2157	}
2158	uint16_t norm16;
2159	UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16)do { int32_t __index = (uint8_t)--(p); if (!(((__index)& 0x80)==0)) { __index = ucptrie_internalU8PrevIndex_71((normTrie ), __index, (const uint8_t )(start), (const uint8_t *)(p)); ( p) -= __index & 7; __index >>= 3; } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
2160	return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
2161	}
2162
2163	const UChar Normalizer2Impl::findPreviousCompBoundary(const UChar start, const UChar *p,
2164	UBool onlyContiguous) const {
2165	while (p != start) {
2166	const UChar *codePointLimit = p;
2167	UChar32 c;
2168	uint16_t norm16;
2169	UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16)do { (c) = --(p); int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)!=0) && (p ) != (start) && (((__c2 = ((p) - 1))&0xfffffc00) ==0xd800)) { --(p); (c) = (((UChar32)(__c2)<<10UL)+(UChar32 )((c))-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
2170	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
2171	return codePointLimit;
2172	}
2173	if (hasCompBoundaryBefore(c, norm16)) {
2174	return p;
2175	}
2176	}
2177	return p;
2178	}
2179
2180	const UChar Normalizer2Impl::findNextCompBoundary(const UChar p, const UChar *limit,
2181	UBool onlyContiguous) const {
2182	while (p != limit) {
2183	const UChar *codePointStart = p;
2184	UChar32 c;
2185	uint16_t norm16;
2186	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16)do { (c) = (p)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (p ) != (limit) && (((__c2 = (p))&0xfffffc00)==0xdc00 )) { ++(p); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
2187	if (hasCompBoundaryBefore(c, norm16)) {
2188	return codePointStart;
2189	}
2190	if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
2191	return p;
2192	}
2193	}
2194	return p;
2195	}
2196
2197	uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar start, const UChar p) const {
2198	if (start == p) {
2199	return 0;
2200	}
2201	int32_t i = (int32_t)(p - start);
2202	UChar32 c;
2203	U16_PREV(start, 0, i, c)do { (c)=(start)[--(i)]; if((((c)&0xfffffc00)==0xdc00)) { uint16_t __c2; if((i)>(0) && (((__c2=(start)[(i)- 1])&0xfffffc00)==0xd800)) { --(i); (c)=(((UChar32)(__c2)<< 10UL)+(UChar32)((c))-((0xd800<<10UL)+0xdc00-0x10000)); } } } while (false);
2204	return (uint8_t)getFCD16(c);
2205	}
2206
2207	uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t start, const uint8_t p) const {
2208	if (start == p) {
2209	return 0;
2210	}
2211	int32_t i = (int32_t)(p - start);
2212	UChar32 c;
2213	U8_PREV(start, 0, i, c)do { (c)=(uint8_t)(start)[--(i)]; if(!(((c)&0x80)==0)) { ( c)=utf8_prevCharSafeBody_71((const uint8_t *)start, 0, &( i), c, -1); } } while (false);
2214	return (uint8_t)getFCD16(c);
2215	}
2216
2217	// Note: normalizer2impl.cpp r30982 (2011-nov-27)
2218	// still had getFCDTrie() which built and cached an FCD trie.
2219	// That provided faster access to FCD data than getFCD16FromNormData()
2220	// but required synchronization and consumed some 10kB of heap memory
2221	// in any process that uses FCD (e.g., via collation).
2222	// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
2223	// at least for ASCII & CJK.
2224
2225	// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
2226	// function on Windows ARM64. As a work-around, we disable optimizations for this function.
2227	// This work-around could/should be removed once the following versions of Visual Studio are no
2228	// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
2229	#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
2230	#pragma optimize( "", off )
2231	#endif
2232	// Gets the FCD value from the regular normalization data.
2233	uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
2234	uint16_t norm16=getNorm16(c);
2235	if (norm16 >= limitNoNo) {
2236	if(norm16>=MIN_NORMAL_MAYBE_YES) {
2237	// combining mark
2238	norm16=getCCFromNormalYesOrMaybe(norm16);
2239	return norm16\|(norm16<<8);
2240	} else if(norm16>=minMaybeYes) {
2241	return 0;
2242	} else { // isDecompNoAlgorithmic(norm16)
2243	uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK;
2244	if (deltaTrailCC <= DELTA_TCCC_1) {
2245	return deltaTrailCC >> OFFSET_SHIFT;
2246	}
2247	// Maps to an isCompYesAndZeroCC.
2248	c=mapAlgorithmic(c, norm16);
2249	norm16=getRawNorm16(c);
2250	}
2251	}
2252	if(norm16<=minYesNo \|\| isHangulLVT(norm16)) {
2253	// no decomposition or Hangul syllable, all zeros
2254	return 0;
2255	}
2256	// c decomposes, get everything from the variable-length extra data
2257	const uint16_t *mapping=getMapping(norm16);
2258	uint16_t firstUnit=*mapping;
2259	norm16=firstUnit>>8; // tccc
2260	if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
2261	norm16\|=*(mapping-1)&0xff00; // lccc
2262	}
2263	return norm16;
2264	}
2265	#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
2266	#pragma optimize( "", on )
2267	#endif
2268
2269	// Dual functionality:
2270	// buffer!=NULL: normalize
2271	// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
2272	const UChar *
2273	Normalizer2Impl::makeFCD(const UChar src, const UChar limit,
2274	ReorderingBuffer *buffer,
2275	UErrorCode &errorCode) const {
2276	// Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
2277	// Similar to the prevBoundary in the compose() implementation.
2278	const UChar *prevBoundary=src;
2279	int32_t prevFCD16=0;
2280	if(limit==NULL__null) {
2281	src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode);
2282	if(U_FAILURE(errorCode)) {
2283	return src;
2284	}
2285	if(prevBoundary<src) {
2286	prevBoundary=src;
2287	// We know that the previous character's lccc==0.
2288	// Fetching the fcd16 value was deferred for this below-U+0300 code point.
2289	prevFCD16=getFCD16(*(src-1));
2290	if(prevFCD16>1) {
2291	--prevBoundary;
2292	}
2293	}
2294	limit=u_strchru_strchr_71(src, 0);
2295	}
2296
2297	// Note: In this function we use buffer->appendZeroCC() because we track
2298	// the lead and trail combining classes here, rather than leaving it to
2299	// the ReorderingBuffer.
2300	// The exception is the call to decomposeShort() which uses the buffer
2301	// in the normal way.
2302
2303	const UChar *prevSrc;
2304	UChar32 c=0;
2305	uint16_t fcd16=0;
2306
2307	for(;;) {
2308	// count code units with lccc==0
2309	for(prevSrc=src; src!=limit;) {
2310	if((c=*src)<minLcccCP) {
2311	prevFCD16=~c;
2312	++src;
2313	} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
2314	prevFCD16=0;
2315	++src;
2316	} else {
2317	if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
2318	UChar c2;
2319	if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])(((c2=src[1])&0xfffffc00)==0xdc00)) {
2320	c=U16_GET_SUPPLEMENTARY(c, c2)(((UChar32)(c)<<10UL)+(UChar32)(c2)-((0xd800<<10UL )+0xdc00-0x10000));
2321	}
2322	}
2323	if((fcd16=getFCD16FromNormData(c))<=0xff) {
2324	prevFCD16=fcd16;
2325	src+=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
2326	} else {
2327	break;
2328	}
2329	}
2330	}
2331	// copy these code units all at once
2332	if(src!=prevSrc) {
2333	if(buffer!=NULL__null && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
2334	break;
2335	}
2336	if(src==limit) {
2337	break;
2338	}
2339	prevBoundary=src;
2340	// We know that the previous character's lccc==0.
2341	if(prevFCD16<0) {
2342	// Fetching the fcd16 value was deferred for this below-minLcccCP code point.
2343	UChar32 prev=~prevFCD16;
2344	if(prev<minDecompNoCP) {
2345	prevFCD16=0;
2346	} else {
2347	prevFCD16=getFCD16FromNormData(prev);
2348	if(prevFCD16>1) {
2349	--prevBoundary;
2350	}
2351	}
2352	} else {
2353	const UChar *p=src-1;
2354	if(U16_IS_TRAIL(p)(((p)&0xfffffc00)==0xdc00) && prevSrc<p && U16_IS_LEAD((p-1))((((p-1))&0xfffffc00)==0xd800)) {
2355	--p;
2356	// Need to fetch the previous character's FCD value because
2357	// prevFCD16 was just for the trail surrogate code point.
2358	prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1])(((UChar32)(p[0])<<10UL)+(UChar32)(p[1])-((0xd800<< 10UL)+0xdc00-0x10000)));
2359	// Still known to have lccc==0 because its lead surrogate unit had lccc==0.
2360	}
2361	if(prevFCD16>1) {
2362	prevBoundary=p;
2363	}
2364	}
2365	// The start of the current character (c).
2366	prevSrc=src;
2367	} else if(src==limit) {
2368	break;
2369	}
2370
2371	src+=U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
2372	// The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
2373	// Check for proper order, and decompose locally if necessary.
2374	if((prevFCD16&0xff)<=(fcd16>>8)) {
2375	// proper order: prev tccc <= current lccc
2376	if((fcd16&0xff)<=1) {
2377	prevBoundary=src;
2378	}
2379	if(buffer!=NULL__null && !buffer->appendZeroCC(c, errorCode)) {
2380	break;
2381	}
2382	prevFCD16=fcd16;
2383	continue;
2384	} else if(buffer==NULL__null) {
2385	return prevBoundary; // quick check "no"
2386	} else {
2387	/*
2388	* Back out the part of the source that we copied or appended
2389	* already but is now going to be decomposed.
2390	* prevSrc is set to after what was copied/appended.
2391	*/
2392	buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
2393	/*
2394	* Find the part of the source that needs to be decomposed,
2395	* up to the next safe boundary.
2396	*/
2397	src=findNextFCDBoundary(src, limit);
2398	/*
2399	* The source text does not fulfill the conditions for FCD.
2400	* Decompose and reorder a limited piece of the text.
2401	*/
2402	decomposeShort(prevBoundary, src, FALSE0, FALSE0, *buffer, errorCode);
2403	if (U_FAILURE(errorCode)) {
2404	break;
2405	}
2406	prevBoundary=src;
2407	prevFCD16=0;
2408	}
2409	}
2410	return src;
2411	}
2412
2413	void Normalizer2Impl::makeFCDAndAppend(const UChar src, const UChar limit,
2414	UBool doMakeFCD,
2415	UnicodeString &safeMiddle,
2416	ReorderingBuffer &buffer,
2417	UErrorCode &errorCode) const {
2418	if(!buffer.isEmpty()) {
2419	const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
2420	if(src!=firstBoundaryInSrc) {
2421	const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
2422	buffer.getLimit());
2423	int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
2424	UnicodeString middle(lastBoundaryInDest, destSuffixLength);
2425	buffer.removeSuffix(destSuffixLength);
2426	safeMiddle=middle;
2427	middle.append(src, (int32_t)(firstBoundaryInSrc-src));
2428	const UChar *middleStart=middle.getBuffer();
2429	makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
2430	if(U_FAILURE(errorCode)) {
2431	return;
2432	}
2433	src=firstBoundaryInSrc;
2434	}
2435	}
2436	if(doMakeFCD) {
2437	makeFCD(src, limit, &buffer, errorCode);
2438	} else {
2439	if(limit==NULL__null) { // appendZeroCC() needs limit!=NULL
2440	limit=u_strchru_strchr_71(src, 0);
2441	}
2442	buffer.appendZeroCC(src, limit, errorCode);
2443	}
2444	}
2445
2446	const UChar Normalizer2Impl::findPreviousFCDBoundary(const UChar start, const UChar *p) const {
2447	while(start<p) {
2448	const UChar *codePointLimit = p;
2449	UChar32 c;
2450	uint16_t norm16;
2451	UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16)do { (c) = --(p); int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)!=0) && (p ) != (start) && (((__c2 = ((p) - 1))&0xfffffc00) ==0xd800)) { --(p); (c) = (((UChar32)(__c2)<<10UL)+(UChar32 )((c))-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; } } (norm16) = ((normTrie )->data.ptr16[__index]); } while (false);
2452	if (c < minDecompNoCP \|\| norm16HasDecompBoundaryAfter(norm16)) {
2453	return codePointLimit;
2454	}
2455	if (norm16HasDecompBoundaryBefore(norm16)) {
2456	return p;
2457	}
2458	}
2459	return p;
2460	}
2461
2462	const UChar Normalizer2Impl::findNextFCDBoundary(const UChar p, const UChar *limit) const {
2463	while(p<limit) {
2464	const UChar *codePointStart=p;
2465	UChar32 c;
2466	uint16_t norm16;
2467	UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16)do { (c) = (p)++; int32_t __index; if (!(((c)&0xfffff800 )==0xd800)) { __index = ((int32_t)(normTrie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)); } else { uint16_t __c2; if ((((c)&0x400)==0) && (p ) != (limit) && (((__c2 = (p))&0xfffffc00)==0xdc00 )) { ++(p); (c) = (((UChar32)((c))<<10UL)+(UChar32)(__c2 )-((0xd800<<10UL)+0xdc00-0x10000)); __index = ((c) >= (normTrie)->highStart ? (normTrie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71(normTrie, c)); } else { __index = (normTrie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET ; } } (norm16) = ((normTrie)->data.ptr16[__index]); } while (false);
2468	if (c < minLcccCP \|\| norm16HasDecompBoundaryBefore(norm16)) {
2469	return codePointStart;
2470	}
2471	if (norm16HasDecompBoundaryAfter(norm16)) {
2472	return p;
2473	}
2474	}
2475	return p;
2476	}
2477
2478	// CanonicalIterator data -------------------------------------------------- ***
2479
2480	CanonIterData::CanonIterData(UErrorCode &errorCode) :
2481	mutableTrie(umutablecptrie_openumutablecptrie_open_71(0, 0, &errorCode)), trie(nullptr),
2482	canonStartSets(uprv_deleteUObjectuprv_deleteUObject_71, NULL__null, errorCode) {}
2483
2484	CanonIterData::~CanonIterData() {
2485	umutablecptrie_closeumutablecptrie_close_71(mutableTrie);
2486	ucptrie_closeucptrie_close_71(trie);
2487	}
2488
2489	void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
2490	uint32_t canonValue = umutablecptrie_getumutablecptrie_get_71(mutableTrie, decompLead);
2491	if((canonValue&(CANON_HAS_SET0x200000\|CANON_VALUE_MASK0x1fffff))==0 && origin!=0) {
2492	// origin is the first character whose decomposition starts with
2493	// the character for which we are setting the value.
2494	umutablecptrie_setumutablecptrie_set_71(mutableTrie, decompLead, canonValue\|origin, &errorCode);
2495	} else {
2496	// origin is not the first character, or it is U+0000.
2497	UnicodeSet *set;
2498	if((canonValue&CANON_HAS_SET0x200000)==0) {
2499	LocalPointer<UnicodeSet> lpSet(new UnicodeSet, errorCode);
2500	set=lpSet.getAlias();
2501	if(U_FAILURE(errorCode)) {
2502	return;
2503	}
2504	UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK0x1fffff);
2505	canonValue=(canonValue&~CANON_VALUE_MASK0x1fffff)\|CANON_HAS_SET0x200000\|(uint32_t)canonStartSets.size();
2506	umutablecptrie_setumutablecptrie_set_71(mutableTrie, decompLead, canonValue, &errorCode);
2507	canonStartSets.adoptElement(lpSet.orphan(), errorCode);
2508	if (U_FAILURE(errorCode)) {
2509	return;
2510	}
2511	if(firstOrigin!=0) {
2512	set->add(firstOrigin);
2513	}
2514	} else {
2515	set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK0x1fffff)];
2516	}
2517	set->add(origin);
2518	}
2519	}
2520
2521	// C++ class for friend access to private Normalizer2Impl members.
2522	class InitCanonIterData {
2523	public:
2524	static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
2525	};
2526
2527	U_CDECL_BEGINextern "C" {
2528
2529	// UInitOnce instantiation function for CanonIterData
2530	static void U_CALLCONV
2531	initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
2532	InitCanonIterData::doInit(impl, errorCode);
2533	}
2534
2535	U_CDECL_END}
2536
2537	void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
2538	U_ASSERT(impl->fCanonIterData == NULL)(void)0;
2539	impl->fCanonIterData = new CanonIterData(errorCode);
2540	if (impl->fCanonIterData == NULL__null) {
2541	errorCode=U_MEMORY_ALLOCATION_ERROR;
2542	}
2543	if (U_SUCCESS(errorCode)) {
2544	UChar32 start = 0, end;
2545	uint32_t value;
2546	while ((end = ucptrie_getRangeucptrie_getRange_71(impl->normTrie, start,
2547	UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
2548	nullptr, nullptr, &value)) >= 0) {
2549	// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
2550	if (value != Normalizer2Impl::INERT) {
2551	impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
2552	}
2553	start = end + 1;
2554	}
2555	#ifdef UCPTRIE_DEBUG
2556	umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
2557	#endif
2558	impl->fCanonIterData->trie = umutablecptrie_buildImmutableumutablecptrie_buildImmutable_71(
2559	impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
2560	umutablecptrie_closeumutablecptrie_close_71(impl->fCanonIterData->mutableTrie);
2561	impl->fCanonIterData->mutableTrie = nullptr;
2562	}
2563	if (U_FAILURE(errorCode)) {
2564	delete impl->fCanonIterData;
2565	impl->fCanonIterData = NULL__null;
2566	}
2567	}
2568
2569	void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
2570	CanonIterData &newData,
2571	UErrorCode &errorCode) const {
2572	if(isInert(norm16) \|\| (minYesNo<=norm16 && norm16<minNoNo)) {
2573	// Inert, or 2-way mapping (including Hangul syllable).
2574	// We do not write a canonStartSet for any yesNo character.
2575	// Composites from 2-way mappings are added at runtime from the
2576	// starter's compositions list, and the other characters in
2577	// 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
2578	// "maybe" characters.
2579	return;
2580	}
2581	for(UChar32 c=start; c<=end; ++c) {
2582	uint32_t oldValue = umutablecptrie_getumutablecptrie_get_71(newData.mutableTrie, c);
2583	uint32_t newValue=oldValue;
2584	if(isMaybeOrNonZeroCC(norm16)) {
2585	// not a segment starter if it occurs in a decomposition or has cc!=0
2586	newValue\|=CANON_NOT_SEGMENT_STARTER0x80000000;
2587	if(norm16<MIN_NORMAL_MAYBE_YES) {
2588	newValue\|=CANON_HAS_COMPOSITIONS0x40000000;
2589	}
2590	} else if(norm16<minYesNo) {
2591	newValue\|=CANON_HAS_COMPOSITIONS0x40000000;
2592	} else {
2593	// c has a one-way decomposition
2594	UChar32 c2=c;
2595	// Do not modify the whole-range norm16 value.
2596	uint16_t norm16_2=norm16;
2597	if (isDecompNoAlgorithmic(norm16_2)) {
2598	// Maps to an isCompYesAndZeroCC.
2599	c2 = mapAlgorithmic(c2, norm16_2);
2600	norm16_2 = getRawNorm16(c2);
2601	// No compatibility mappings for the CanonicalIterator.
2602	U_ASSERT(!(isHangulLV(norm16_2) \|\| isHangulLVT(norm16_2)))(void)0;
2603	}
2604	if (norm16_2 > minYesNo) {
2605	// c decomposes, get everything from the variable-length extra data
2606	const uint16_t *mapping=getMapping(norm16_2);
2607	uint16_t firstUnit=*mapping;
2608	int32_t length=firstUnit&MAPPING_LENGTH_MASK;
2609	if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
2610	if(c==c2 && (*(mapping-1)&0xff)!=0) {
2611	newValue\|=CANON_NOT_SEGMENT_STARTER0x80000000; // original c has cc!=0
2612	}
2613	}
2614	// Skip empty mappings (no characters in the decomposition).
2615	if(length!=0) {
2616	++mapping; // skip over the firstUnit
2617	// add c to first code point's start set
2618	int32_t i=0;
2619	U16_NEXT_UNSAFE(mapping, i, c2)do { (c2)=(mapping)[(i)++]; if((((c2)&0xfffffc00)==0xd800 )) { (c2)=(((UChar32)((c2))<<10UL)+(UChar32)((mapping)[ (i)++])-((0xd800<<10UL)+0xdc00-0x10000)); } } while (false );
2620	newData.addToStartSet(c, c2, errorCode);
2621	// Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
2622	// one-way mapping. A 2-way mapping is possible here after
2623	// intermediate algorithmic mapping.
2624	if(norm16_2>=minNoNo) {
2625	while(i<length) {
2626	U16_NEXT_UNSAFE(mapping, i, c2)do { (c2)=(mapping)[(i)++]; if((((c2)&0xfffffc00)==0xd800 )) { (c2)=(((UChar32)((c2))<<10UL)+(UChar32)((mapping)[ (i)++])-((0xd800<<10UL)+0xdc00-0x10000)); } } while (false );
2627	uint32_t c2Value = umutablecptrie_getumutablecptrie_get_71(newData.mutableTrie, c2);
2628	if((c2Value&CANON_NOT_SEGMENT_STARTER0x80000000)==0) {
2629	umutablecptrie_setumutablecptrie_set_71(newData.mutableTrie, c2,
2630	c2Value\|CANON_NOT_SEGMENT_STARTER0x80000000, &errorCode);
2631	}
2632	}
2633	}
2634	}
2635	} else {
2636	// c decomposed to c2 algorithmically; c has cc==0
2637	newData.addToStartSet(c, c2, errorCode);
2638	}
2639	}
2640	if(newValue!=oldValue) {
2641	umutablecptrie_setumutablecptrie_set_71(newData.mutableTrie, c, newValue, &errorCode);
2642	}
2643	}
2644	}
2645
2646	UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
2647	// Logically const: Synchronized instantiation.
2648	Normalizer2Impl me=const_cast<Normalizer2Impl >(this);
2649	umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
2650	return U_SUCCESS(errorCode);
2651	}
2652
2653	int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
2654	return (int32_t)ucptrie_getucptrie_get_71(fCanonIterData->trie, c);
2655	}
2656
2657	const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
2658	return (const UnicodeSet )fCanonIterData->canonStartSets[n];
2659	}
2660
2661	UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
2662	return getCanonValue(c)>=0;
2663	}
2664
2665	UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
2666	int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER0x80000000;
2667	if(canonValue==0) {
2668	return FALSE0;
2669	}
2670	set.clear();
2671	int32_t value=canonValue&CANON_VALUE_MASK0x1fffff;
2672	if((canonValue&CANON_HAS_SET0x200000)!=0) {
2673	set.addAll(getCanonStartSet(value));
2674	} else if(value!=0) {
2675	set.add(value);
2676	}
2677	if((canonValue&CANON_HAS_COMPOSITIONS0x40000000)!=0) {
2678	uint16_t norm16=getRawNorm16(c);
2679	if(norm16==JAMO_L) {
2680	UChar32 syllable=
2681	(UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
2682	set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
2683	} else {
2684	addComposites(getCompositionsList(norm16), set);
2685	}
2686	}
2687	return TRUE1;
2688	}
2689
2690	U_NAMESPACE_END}
2691
2692	// Normalizer2 data swapping ----------------------------------------------- ***
2693
2694	U_NAMESPACE_USEusing namespace icu_71;
2695
2696	U_CAPIextern "C" int32_t U_EXPORT2
2697	unorm2_swapunorm2_swap_71(const UDataSwapper *ds,
2698	const void inData, int32_t length, void outData,
2699	UErrorCode *pErrorCode) {
2700	const UDataInfo *pInfo;
2701	int32_t headerSize;
2702
2703	const uint8_t *inBytes;
2704	uint8_t *outBytes;
2705
2706	const int32_t *inIndexes;
2707	int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1];
2708
2709	int32_t i, offset, nextOffset, size;
2710
2711	/* udata_swapDataHeader checks the arguments */
2712	headerSize=udata_swapDataHeaderudata_swapDataHeader_71(ds, inData, length, outData, pErrorCode);
2713	if(pErrorCode==NULL__null \|\| U_FAILURE(*pErrorCode)) {
2714	return 0;
2715	}
2716
2717	/* check data format and format version */
2718	pInfo=(const UDataInfo )((const char )inData+4);
2719	uint8_t formatVersion0=pInfo->formatVersion[0];
2720	if(!(
2721	pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
2722	pInfo->dataFormat[1]==0x72 &&
2723	pInfo->dataFormat[2]==0x6d &&
2724	pInfo->dataFormat[3]==0x32 &&
2725	(1<=formatVersion0 && formatVersion0<=4)
2726	)) {
2727	udata_printErrorudata_printError_71(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
2728	pInfo->dataFormat[0], pInfo->dataFormat[1],
2729	pInfo->dataFormat[2], pInfo->dataFormat[3],
2730	pInfo->formatVersion[0]);
2731	*pErrorCode=U_UNSUPPORTED_ERROR;
2732	return 0;
2733	}
2734
2735	inBytes=(const uint8_t *)inData+headerSize;
2736	outBytes=(uint8_t *)outData+headerSize;
2737
2738	inIndexes=(const int32_t *)inBytes;
2739	int32_t minIndexesLength;
2740	if(formatVersion0==1) {
2741	minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
2742	} else if(formatVersion0==2) {
2743	minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
2744	} else {
2745	minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
2746	}
2747
2748	if(length>=0) {
2749	length-=headerSize;
2750	if(length<minIndexesLength*4) {
2751	udata_printErrorudata_printError_71(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
2752	length);
2753	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2754	return 0;
2755	}
2756	}
2757
2758	/* read the first few indexes */
2759	for(i=0; i<UPRV_LENGTHOF(indexes)(int32_t)(sizeof(indexes)/sizeof((indexes)[0])); ++i) {
2760	indexes[i]=udata_readInt32udata_readInt32_71(ds, inIndexes[i]);
2761	}
2762
2763	/* get the total length of the data */
2764	size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
2765
2766	if(length>=0) {
2767	if(length<size) {
2768	udata_printErrorudata_printError_71(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
2769	length);
2770	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2771	return 0;
2772	}
2773
2774	/* copy the data for inaccessible bytes */
2775	if(inBytes!=outBytes) {
2776	uprv_memcpy(outBytes, inBytes, size)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(outBytes, inBytes, size); } while (false);
2777	}
2778
2779	offset=0;
2780
2781	/* swap the int32_t indexes[] */
2782	nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
2783	ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
2784	offset=nextOffset;
2785
2786	/* swap the trie */
2787	nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
2788	utrie_swapAnyVersionutrie_swapAnyVersion_71(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
2789	offset=nextOffset;
2790
2791	/* swap the uint16_t extraData[] */
2792	nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
2793	ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
2794	offset=nextOffset;
	Value stored to 'offset' is never read
2795
2796	/* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
2797	nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
2798	offset=nextOffset;
2799
2800	U_ASSERT(offset==size)(void)0;
2801	}
2802
2803	return headerSize+size;
2804	}
2805
2806	#endif // !UCONFIG_NO_NORMALIZATION