../deps/icu-small/source/i18n/usearch.cpp

Bug Summary

File:	out/../deps/icu-small/source/i18n/usearch.cpp
Warning:	line 2337, column 22 Although the value stored to 'maxLimit' is used in the enclosing expression, the value is never actually read from 'maxLimit'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name usearch.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_I18N_IMPLEMENTATION=1 -D U_IO_IMPLEMENTATION=1 -D U_TOOLUTIL_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -I ../deps/icu-small/source/i18n -I ../deps/icu-small/source/tools/toolutil -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/i18n/usearch.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2001-2015 IBM and others. All rights reserved.
6	**********************************************************************
7	* Date Name Description
8	* 07/02/2001 synwee Creation.
9	**********************************************************************
10	*/
11
12	#include "unicode/utypes.h"
13
14	#if !UCONFIG_NO_COLLATION0 && !UCONFIG_NO_BREAK_ITERATION0
15
16	#include "unicode/usearch.h"
17	#include "unicode/ustring.h"
18	#include "unicode/uchar.h"
19	#include "unicode/utf16.h"
20	#include "normalizer2impl.h"
21	#include "usrchimp.h"
22	#include "cmemory.h"
23	#include "ucln_in.h"
24	#include "uassert.h"
25	#include "ustr_imp.h"
26
27	U_NAMESPACE_USEusing namespace icu_71;
28
29	// internal definition ---------------------------------------------------
30
31	#define LAST_BYTE_MASK_0xFF 0xFF
32	#define SECOND_LAST_BYTE_SHIFT_8 8
33	#define SUPPLEMENTARY_MIN_VALUE_0x10000 0x10000
34
35	static const Normalizer2Impl *g_nfcImpl = nullptr;
36
37	// internal methods -------------------------------------------------
38
39	/**
40	* Fast collation element iterator setOffset.
41	* This function does not check for bounds.
42	* @param coleiter collation element iterator
43	* @param offset to set
44	*/
45	static
46	inline void setColEIterOffset(UCollationElements *elems,
47	int32_t offset,
48	UErrorCode &status)
49	{
50	// Note: Not "fast" any more after the 2013 collation rewrite.
51	// We do not want to expose more internals than necessary.
52	ucol_setOffsetucol_setOffset_71(elems, offset, &status);
53	}
54
55	/**
56	* Getting the mask for collation strength
57	* @param strength collation strength
58	* @return collation element mask
59	*/
60	static
61	inline uint32_t getMask(UCollationStrength strength)
62	{
63	switch (strength)
64	{
65	case UCOL_PRIMARY:
66	return UCOL_PRIMARYORDERMASK0xffff0000;
67	case UCOL_SECONDARY:
68	return UCOL_SECONDARYORDERMASK0x0000ff00 \| UCOL_PRIMARYORDERMASK0xffff0000;
69	default:
70	return UCOL_TERTIARYORDERMASK0x000000ff \| UCOL_SECONDARYORDERMASK0x0000ff00 \|
71	UCOL_PRIMARYORDERMASK0xffff0000;
72	}
73	}
74
75	U_CDECL_BEGINextern "C" {
76	static UBool U_CALLCONV
77	usearch_cleanup(void) {
78	g_nfcImpl = nullptr;
79	return TRUE1;
80	}
81	U_CDECL_END}
82
83	/**
84	* Initializing the fcd tables.
85	* Internal method, status assumed to be a success.
86	* @param status output error if any, caller to check status before calling
87	* method, status assumed to be success when passed in.
88	*/
89	static
90	inline void initializeFCD(UErrorCode *status)
91	{
92	if (g_nfcImpl == nullptr) {
93	g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
94	ucln_i18n_registerCleanupucln_i18n_registerCleanup_71(UCLN_I18N_USEARCH, usearch_cleanup);
95	}
96	}
97
98	/**
99	* Gets the fcd value for a character at the argument index.
100	* This method takes into accounts of the supplementary characters.
101	* @param str UTF16 string where character for fcd retrieval resides
102	* @param offset position of the character whose fcd is to be retrieved, to be
103	* overwritten with the next character position, taking
104	* surrogate characters into consideration.
105	* @param strlength length of the argument string
106	* @return fcd value
107	*/
108	static
109	uint16_t getFCD(const UChar str, int32_t offset,
110	int32_t strlength)
111	{
112	const UChar temp = str + offset;
113	uint16_t result = g_nfcImpl->nextFCD16(temp, str + strlength);
114	*offset = (int32_t)(temp - str);
115	return result;
116	}
117
118	/**
119	* Getting the modified collation elements taking into account the collation
120	* attributes
121	* @param strsrch string search data
122	* @param sourcece
123	* @return the modified collation element
124	*/
125	static
126	inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
127	{
128	// note for tertiary we can't use the collator->tertiaryMask, that
129	// is a preprocessed mask that takes into account case options. since
130	// we are only concerned with exact matches, we don't need that.
131	sourcece &= strsrch->ceMask;
132
133	if (strsrch->toShift) {
134	// alternate handling here, since only the 16 most significant digits
135	// is only used, we can safely do a compare without masking
136	// if the ce is a variable, we mask and get only the primary values
137	// no shifting to quartenary is required since all primary values
138	// less than variabletop will need to be masked off anyway.
139	if (strsrch->variableTop > sourcece) {
140	if (strsrch->strength >= UCOL_QUATERNARY) {
141	sourcece &= UCOL_PRIMARYORDERMASK0xffff0000;
142	}
143	else {
144	sourcece = UCOL_IGNORABLE0;
145	}
146	}
147	} else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE0) {
148	sourcece = 0xFFFF;
149	}
150
151	return sourcece;
152	}
153
154	/**
155	* Allocate a memory and returns nullptr if it failed.
156	* Internal method, status assumed to be a success.
157	* @param size to allocate
158	* @param status output error if any, caller to check status before calling
159	* method, status assumed to be success when passed in.
160	* @return newly allocated array, nullptr otherwise
161	*/
162	static
163	inline void * allocateMemory(uint32_t size, UErrorCode *status)
164	{
165	uint32_t result = (uint32_t )uprv_mallocuprv_malloc_71(size);
166	if (result == nullptr) {
167	*status = U_MEMORY_ALLOCATION_ERROR;
168	}
169	return result;
170	}
171
172	/**
173	* Adds a uint32_t value to a destination array.
174	* Creates a new array if we run out of space. The caller will have to
175	* manually deallocate the newly allocated array.
176	* Internal method, status assumed to be success, caller has to check status
177	* before calling this method. destination not to be nullptr and has at least
178	* size destinationlength.
179	* @param destination target array
180	* @param offset destination offset to add value
181	* @param destinationlength target array size, return value for the new size
182	* @param value to be added
183	* @param increments incremental size expected
184	* @param status output error if any, caller to check status before calling
185	* method, status assumed to be success when passed in.
186	* @return new destination array, destination if there was no new allocation
187	*/
188	static
189	inline int32_t * addTouint32_tArray(int32_t *destination,
190	uint32_t offset,
191	uint32_t *destinationlength,
192	uint32_t value,
193	uint32_t increments,
194	UErrorCode *status)
195	{
196	uint32_t newlength = *destinationlength;
197	if (offset + 1 == newlength) {
198	newlength += increments;
199	int32_t temp = (int32_t )allocateMemory(
200	sizeof(int32_t) * newlength, status);
201	if (U_FAILURE(*status)) {
202	return nullptr;
203	}
204	uprv_memcpy(temp, destination, sizeof(int32_t) * (size_t)offset)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(temp, destination , sizeof(int32_t) * (size_t)offset); } while (false);
205	*destinationlength = newlength;
206	destination = temp;
207	}
208	destination[offset] = value;
209	return destination;
210	}
211
212	/**
213	* Adds a uint64_t value to a destination array.
214	* Creates a new array if we run out of space. The caller will have to
215	* manually deallocate the newly allocated array.
216	* Internal method, status assumed to be success, caller has to check status
217	* before calling this method. destination not to be nullptr and has at least
218	* size destinationlength.
219	* @param destination target array
220	* @param offset destination offset to add value
221	* @param destinationlength target array size, return value for the new size
222	* @param value to be added
223	* @param increments incremental size expected
224	* @param status output error if any, caller to check status before calling
225	* method, status assumed to be success when passed in.
226	* @return new destination array, destination if there was no new allocation
227	*/
228	static
229	inline int64_t * addTouint64_tArray(int64_t *destination,
230	uint32_t offset,
231	uint32_t *destinationlength,
232	uint64_t value,
233	uint32_t increments,
234	UErrorCode *status)
235	{
236	uint32_t newlength = *destinationlength;
237	if (offset + 1 == newlength) {
238	newlength += increments;
239	int64_t temp = (int64_t )allocateMemory(
240	sizeof(int64_t) * newlength, status);
241
242	if (U_FAILURE(*status)) {
243	return nullptr;
244	}
245
246	uprv_memcpy(temp, destination, sizeof(int64_t) * (size_t)offset)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(temp, destination , sizeof(int64_t) * (size_t)offset); } while (false);
247	*destinationlength = newlength;
248	destination = temp;
249	}
250
251	destination[offset] = value;
252
253	return destination;
254	}
255
256	/**
257	* Initializing the ce table for a pattern.
258	* Stores non-ignorable collation keys.
259	* Table size will be estimated by the size of the pattern text. Table
260	* expansion will be perform as we go along. Adding 1 to ensure that the table
261	* size definitely increases.
262	* Internal method, status assumed to be a success.
263	* @param strsrch string search data
264	* @param status output error if any, caller to check status before calling
265	* method, status assumed to be success when passed in.
266	*/
267	static
268	inline void initializePatternCETable(UStringSearch strsrch, UErrorCode status)
269	{
270	UPattern *pattern = &(strsrch->pattern);
271	uint32_t cetablesize = INITIAL_ARRAY_SIZE_256;
272	int32_t *cetable = pattern->cesBuffer;
273	uint32_t patternlength = pattern->textLength;
274	UCollationElements *coleiter = strsrch->utilIter;
275
276	if (coleiter == nullptr) {
277	coleiter = ucol_openElementsucol_openElements_71(strsrch->collator, pattern->text,
278	patternlength, status);
279	// status will be checked in ucol_next(..) later and if it is an
280	// error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
281	// returned.
282	strsrch->utilIter = coleiter;
283	}
284	else {
285	ucol_setTextucol_setText_71(coleiter, pattern->text, pattern->textLength, status);
286	}
287	if(U_FAILURE(*status)) {
288	return;
289	}
290
291	if (pattern->ces != cetable && pattern->ces) {
292	uprv_freeuprv_free_71(pattern->ces);
293	}
294
295	uint32_t offset = 0;
296	int32_t ce;
297
298	while ((ce = ucol_nextucol_next_71(coleiter, status)) != UCOL_NULLORDER((int32_t)0xFFFFFFFF) &&
299	U_SUCCESS(*status)) {
300	uint32_t newce = getCE(strsrch, ce);
301	if (newce) {
302	int32_t *temp = addTouint32_tArray(cetable, offset, &cetablesize,
303	newce,
304	patternlength - ucol_getOffsetucol_getOffset_71(coleiter) + 1,
305	status);
306	if (U_FAILURE(*status)) {
307	return;
308	}
309	offset ++;
310	if (cetable != temp && cetable != pattern->cesBuffer) {
311	uprv_freeuprv_free_71(cetable);
312	}
313	cetable = temp;
314	}
315	}
316
317	cetable[offset] = 0;
318	pattern->ces = cetable;
319	pattern->cesLength = offset;
320	}
321
322	/**
323	* Initializing the pce table for a pattern.
324	* Stores non-ignorable collation keys.
325	* Table size will be estimated by the size of the pattern text. Table
326	* expansion will be perform as we go along. Adding 1 to ensure that the table
327	* size definitely increases.
328	* Internal method, status assumed to be a success.
329	* @param strsrch string search data
330	* @param status output error if any, caller to check status before calling
331	* method, status assumed to be success when passed in.
332	*/
333	static
334	inline void initializePatternPCETable(UStringSearch *strsrch,
335	UErrorCode *status)
336	{
337	UPattern *pattern = &(strsrch->pattern);
338	uint32_t pcetablesize = INITIAL_ARRAY_SIZE_256;
339	int64_t *pcetable = pattern->pcesBuffer;
340	uint32_t patternlength = pattern->textLength;
341	UCollationElements *coleiter = strsrch->utilIter;
342
343	if (coleiter == nullptr) {
344	coleiter = ucol_openElementsucol_openElements_71(strsrch->collator, pattern->text,
345	patternlength, status);
346	// status will be checked in nextProcessed(..) later and if it is an error
347	// then UCOL_PROCESSED_NULLORDER is returned by nextProcessed(..), so 0 will be
348	// returned.
349	strsrch->utilIter = coleiter;
350	} else {
351	ucol_setTextucol_setText_71(coleiter, pattern->text, pattern->textLength, status);
352	}
353	if(U_FAILURE(*status)) {
354	return;
355	}
356
357	if (pattern->pces != pcetable && pattern->pces != nullptr) {
358	uprv_freeuprv_free_71(pattern->pces);
359	}
360
361	uint32_t offset = 0;
362	int64_t pce;
363
364	icu::UCollationPCE iter(coleiter);
365
366	// ** Should processed CEs be signed or unsigned?
367	// ** (the rest of the code in this file seems to play fast-and-loose with
368	// ** whether a CE is signed or unsigned. For example, look at routine above this one.)
369	while ((pce = iter.nextProcessed(nullptr, nullptr, status)) != UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L))) &&
370	U_SUCCESS(*status)) {
371	int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
372	pce,
373	patternlength - ucol_getOffsetucol_getOffset_71(coleiter) + 1,
374	status);
375
376	if (U_FAILURE(*status)) {
377	return;
378	}
379
380	offset += 1;
381
382	if (pcetable != temp && pcetable != pattern->pcesBuffer) {
383	uprv_freeuprv_free_71(pcetable);
384	}
385
386	pcetable = temp;
387	}
388
389	pcetable[offset] = 0;
390	pattern->pces = pcetable;
391	pattern->pcesLength = offset;
392	}
393
394	/**
395	* Initializes the pattern struct.
396	* @param strsrch UStringSearch data storage
397	* @param status output error if any, caller to check status before calling
398	* method, status assumed to be success when passed in.
399	*/
400	static
401	inline void initializePattern(UStringSearch strsrch, UErrorCode status)
402	{
403	if (U_FAILURE(*status)) { return; }
404
405	UPattern *pattern = &(strsrch->pattern);
406	const UChar *patterntext = pattern->text;
407	int32_t length = pattern->textLength;
408	int32_t index = 0;
409
410	// Since the strength is primary, accents are ignored in the pattern.
411	if (strsrch->strength == UCOL_PRIMARY) {
412	pattern->hasPrefixAccents = 0;
413	pattern->hasSuffixAccents = 0;
414	} else {
415	pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
416	SECOND_LAST_BYTE_SHIFT_8;
417	index = length;
418	U16_BACK_1(patterntext, 0, index)do { if(((((patterntext)[--(index)])&0xfffffc00)==0xdc00) && (index)>(0) && ((((patterntext)[(index )-1])&0xfffffc00)==0xd800)) { --(index); } } while (false );
419	pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
420	LAST_BYTE_MASK_0xFF;
421	}
422
423	// HACK
424	if (strsrch->pattern.pces != nullptr) {
425	if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
426	uprv_freeuprv_free_71(strsrch->pattern.pces);
427	}
428
429	strsrch->pattern.pces = nullptr;
430	}
431
432	initializePatternCETable(strsrch, status);
433	}
434
435	/**
436	* Initializes the pattern struct and builds the pattern collation element table.
437	* @param strsrch UStringSearch data storage
438	* @param status for output errors if it occurs, status is assumed to be a
439	* success when it is passed in.
440	*/
441	static
442	inline void initialize(UStringSearch strsrch, UErrorCode status)
443	{
444	initializePattern(strsrch, status);
445	}
446
447	#if !UCONFIG_NO_BREAK_ITERATION0
448	// If the caller provided a character breakiterator we'll return that,
449	// otherwise we lazily create the internal break iterator.
450	static UBreakIterator* getBreakIterator(UStringSearch *strsrch, UErrorCode &status)
451	{
452	if (U_FAILURE(status)) {
453	return nullptr;
454	}
455
456	if (strsrch->search->breakIter != nullptr) {
457	return strsrch->search->breakIter;
458	}
459
460	if (strsrch->search->internalBreakIter != nullptr) {
461	return strsrch->search->internalBreakIter;
462	}
463
464	// Need to create the internal break iterator.
465	strsrch->search->internalBreakIter = ubrk_openubrk_open_71(UBRK_CHARACTER,
466	ucol_getLocaleByTypeucol_getLocaleByType_71(strsrch->collator, ULOC_VALID_LOCALE, &status),
467	strsrch->search->text, strsrch->search->textLength, &status);
468
469	return strsrch->search->internalBreakIter;
470	}
471	#endif
472
473	/**
474	* Sets the match result to "not found", regardless of the incoming error status.
475	* If an error occurs while setting the result, it is reported back.
476	*
477	* @param strsrch string search data
478	* @param status for output errors, if they occur.
479	*/
480	static
481	inline void setMatchNotFound(UStringSearch *strsrch, UErrorCode &status)
482	{
483	UErrorCode localStatus = U_ZERO_ERROR;
484
485	strsrch->search->matchedIndex = USEARCH_DONE-1;
486	strsrch->search->matchedLength = 0;
487	if (strsrch->search->isForwardSearching) {
488	setColEIterOffset(strsrch->textIter, strsrch->search->textLength, localStatus);
489	}
490	else {
491	setColEIterOffset(strsrch->textIter, 0, localStatus);
492	}
493
494	// If an error occurred while setting the result to not found (ex: OOM),
495	// then we want to report that error back to the caller.
496	if (U_SUCCESS(status) && U_FAILURE(localStatus)) {
497	status = localStatus;
498	}
499	}
500
501	/**
502	* Checks if the offset runs out of the text string
503	* @param offset
504	* @param textlength of the text string
505	* @return TRUE if offset is out of bounds, FALSE otherwise
506	*/
507	static
508	inline UBool isOutOfBounds(int32_t textlength, int32_t offset)
509	{
510	return offset < 0 \|\| offset > textlength;
511	}
512
513	/**
514	* Checks for identical match
515	* @param strsrch string search data
516	* @param start offset of possible match
517	* @param end offset of possible match
518	* @return TRUE if identical match is found
519	*/
520	static
521	inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, int32_t end)
522	{
523	if (strsrch->strength != UCOL_IDENTICAL) {
524	return TRUE1;
525	}
526
527	// Note: We could use Normalizer::compare() or similar, but for short strings
528	// which may not be in FCD it might be faster to just NFD them.
529	UErrorCode status = U_ZERO_ERROR;
530	UnicodeString t2, p2;
531	strsrch->nfd->normalize(
532	UnicodeString(FALSE0, strsrch->search->text + start, end - start), t2, status);
533	strsrch->nfd->normalize(
534	UnicodeString(FALSE0, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
535	// return FALSE if NFD failed
536	return U_SUCCESS(status) && t2 == p2;
537	}
538
539	// constructors and destructor -------------------------------------------
540
541	U_CAPIextern "C" UStringSearch * U_EXPORT2 usearch_openusearch_open_71(const UChar *pattern,
542	int32_t patternlength,
543	const UChar *text,
544	int32_t textlength,
545	const char *locale,
546	UBreakIterator *breakiter,
547	UErrorCode *status)
548	{
549	if (U_FAILURE(*status)) {
550	return nullptr;
551	}
552	#if UCONFIG_NO_BREAK_ITERATION0
553	if (breakiter != nullptr) {
554	*status = U_UNSUPPORTED_ERROR;
555	return nullptr;
556	}
557	#endif
558	if (locale) {
559	// ucol_open internally checks for status
560	UCollator *collator = ucol_openucol_open_71(locale, status);
561	// pattern, text checks are done in usearch_openFromCollator
562	UStringSearch *result = usearch_openFromCollatorusearch_openFromCollator_71(pattern,
563	patternlength, text, textlength,
564	collator, breakiter, status);
565
566	if (result == nullptr \|\| U_FAILURE(*status)) {
567	if (collator) {
568	ucol_closeucol_close_71(collator);
569	}
570	return nullptr;
571	}
572	else {
573	result->ownCollator = TRUE1;
574	}
575	return result;
576	}
577	*status = U_ILLEGAL_ARGUMENT_ERROR;
578	return nullptr;
579	}
580
581	U_CAPIextern "C" UStringSearch * U_EXPORT2 usearch_openFromCollatorusearch_openFromCollator_71(
582	const UChar *pattern,
583	int32_t patternlength,
584	const UChar *text,
585	int32_t textlength,
586	const UCollator *collator,
587	UBreakIterator *breakiter,
588	UErrorCode *status)
589	{
590	if (U_FAILURE(*status)) {
591	return nullptr;
592	}
593	#if UCONFIG_NO_BREAK_ITERATION0
594	if (breakiter != nullptr) {
595	*status = U_UNSUPPORTED_ERROR;
596	return nullptr;
597	}
598	#endif
599	if (pattern == nullptr \|\| text == nullptr \|\| collator == nullptr) {
600	*status = U_ILLEGAL_ARGUMENT_ERROR;
601	return nullptr;
602	}
603
604	// string search does not really work when numeric collation is turned on
605	if(ucol_getAttributeucol_getAttribute_71(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) {
606	*status = U_UNSUPPORTED_ERROR;
607	return nullptr;
608	}
609
610	if (U_SUCCESS(*status)) {
611	initializeFCD(status);
612	if (U_FAILURE(*status)) {
613	return nullptr;
614	}
615
616	UStringSearch *result;
617	if (textlength == -1) {
618	textlength = u_strlenu_strlen_71(text);
619	}
620	if (patternlength == -1) {
621	patternlength = u_strlenu_strlen_71(pattern);
622	}
623	if (textlength <= 0 \|\| patternlength <= 0) {
624	*status = U_ILLEGAL_ARGUMENT_ERROR;
625	return nullptr;
626	}
627
628	result = (UStringSearch *)uprv_mallocuprv_malloc_71(sizeof(UStringSearch));
629	if (result == nullptr) {
630	*status = U_MEMORY_ALLOCATION_ERROR;
631	return nullptr;
632	}
633
634	result->collator = collator;
635	result->strength = ucol_getStrengthucol_getStrength_71(collator);
636	result->ceMask = getMask(result->strength);
637	result->toShift =
638	ucol_getAttributeucol_getAttribute_71(collator, UCOL_ALTERNATE_HANDLING, status) ==
639	UCOL_SHIFTED;
640	result->variableTop = ucol_getVariableTopucol_getVariableTop_71(collator, status);
641
642	result->nfd = Normalizer2::getNFDInstance(*status);
643
644	if (U_FAILURE(*status)) {
645	uprv_freeuprv_free_71(result);
646	return nullptr;
647	}
648
649	result->search = (USearch *)uprv_mallocuprv_malloc_71(sizeof(USearch));
650	if (result->search == nullptr) {
651	*status = U_MEMORY_ALLOCATION_ERROR;
652	uprv_freeuprv_free_71(result);
653	return nullptr;
654	}
655
656	result->search->text = text;
657	result->search->textLength = textlength;
658
659	result->pattern.text = pattern;
660	result->pattern.textLength = patternlength;
661	result->pattern.ces = nullptr;
662	result->pattern.pces = nullptr;
663
664	result->search->breakIter = breakiter;
665	#if !UCONFIG_NO_BREAK_ITERATION0
666	result->search->internalBreakIter = nullptr; // Lazily created.
667	if (breakiter) {
668	ubrk_setTextubrk_setText_71(breakiter, text, textlength, status);
669	}
670	#endif
671
672	result->ownCollator = FALSE0;
673	result->search->matchedLength = 0;
674	result->search->matchedIndex = USEARCH_DONE-1;
675	result->utilIter = nullptr;
676	result->textIter = ucol_openElementsucol_openElements_71(collator, text,
677	textlength, status);
678	result->textProcessedIter = nullptr;
679	if (U_FAILURE(*status)) {
680	usearch_closeusearch_close_71(result);
681	return nullptr;
682	}
683
684	result->search->isOverlap = FALSE0;
685	result->search->isCanonicalMatch = FALSE0;
686	result->search->elementComparisonType = 0;
687	result->search->isForwardSearching = TRUE1;
688	result->search->reset = TRUE1;
689
690	initialize(result, status);
691
692	if (U_FAILURE(*status)) {
693	usearch_closeusearch_close_71(result);
694	return nullptr;
695	}
696
697	return result;
698	}
699	return nullptr;
700	}
701
702	U_CAPIextern "C" void U_EXPORT2 usearch_closeusearch_close_71(UStringSearch *strsrch)
703	{
704	if (strsrch) {
705	if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
706	strsrch->pattern.ces) {
707	uprv_freeuprv_free_71(strsrch->pattern.ces);
708	}
709
710	if (strsrch->pattern.pces != nullptr &&
711	strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
712	uprv_freeuprv_free_71(strsrch->pattern.pces);
713	}
714
715	delete strsrch->textProcessedIter;
716	ucol_closeElementsucol_closeElements_71(strsrch->textIter);
717	ucol_closeElementsucol_closeElements_71(strsrch->utilIter);
718
719	if (strsrch->ownCollator && strsrch->collator) {
720	ucol_closeucol_close_71((UCollator *)strsrch->collator);
721	}
722
723	#if !UCONFIG_NO_BREAK_ITERATION0
724	if (strsrch->search->internalBreakIter != nullptr) {
725	ubrk_closeubrk_close_71(strsrch->search->internalBreakIter);
726	}
727	#endif
728
729	uprv_freeuprv_free_71(strsrch->search);
730	uprv_freeuprv_free_71(strsrch);
731	}
732	}
733
734	namespace {
735
736	UBool initTextProcessedIter(UStringSearch strsrch, UErrorCode status) {
737	if (U_FAILURE(*status)) { return FALSE0; }
738	if (strsrch->textProcessedIter == nullptr) {
739	strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
740	if (strsrch->textProcessedIter == nullptr) {
741	*status = U_MEMORY_ALLOCATION_ERROR;
742	return FALSE0;
743	}
744	} else {
745	strsrch->textProcessedIter->init(strsrch->textIter);
746	}
747	return TRUE1;
748	}
749
750	}
751
752	// set and get methods --------------------------------------------------
753
754	U_CAPIextern "C" void U_EXPORT2 usearch_setOffsetusearch_setOffset_71(UStringSearch *strsrch,
755	int32_t position,
756	UErrorCode *status)
757	{
758	if (U_SUCCESS(*status) && strsrch) {
759	if (isOutOfBounds(strsrch->search->textLength, position)) {
760	*status = U_INDEX_OUTOFBOUNDS_ERROR;
761	}
762	else {
763	setColEIterOffset(strsrch->textIter, position, *status);
764	}
765	strsrch->search->matchedIndex = USEARCH_DONE-1;
766	strsrch->search->matchedLength = 0;
767	strsrch->search->reset = FALSE0;
768	}
769	}
770
771	U_CAPIextern "C" int32_t U_EXPORT2 usearch_getOffsetusearch_getOffset_71(const UStringSearch *strsrch)
772	{
773	if (strsrch) {
774	int32_t result = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
775	if (isOutOfBounds(strsrch->search->textLength, result)) {
776	return USEARCH_DONE-1;
777	}
778	return result;
779	}
780	return USEARCH_DONE-1;
781	}
782
783	U_CAPIextern "C" void U_EXPORT2 usearch_setAttributeusearch_setAttribute_71(UStringSearch *strsrch,
784	USearchAttribute attribute,
785	USearchAttributeValue value,
786	UErrorCode *status)
787	{
788	if (U_SUCCESS(*status) && strsrch) {
789	switch (attribute)
790	{
791	case USEARCH_OVERLAP :
792	strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE1 : FALSE0);
793	break;
794	case USEARCH_CANONICAL_MATCH :
795	strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE1 :
796	FALSE0);
797	break;
798	case USEARCH_ELEMENT_COMPARISON :
799	if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD \|\| value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
800	strsrch->search->elementComparisonType = (int16_t)value;
801	} else {
802	strsrch->search->elementComparisonType = 0;
803	}
804	break;
805	case USEARCH_ATTRIBUTE_COUNT :
806	default:
807	*status = U_ILLEGAL_ARGUMENT_ERROR;
808	}
809	}
810	if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
811	*status = U_ILLEGAL_ARGUMENT_ERROR;
812	}
813	}
814
815	U_CAPIextern "C" USearchAttributeValue U_EXPORT2 usearch_getAttributeusearch_getAttribute_71(
816	const UStringSearch *strsrch,
817	USearchAttribute attribute)
818	{
819	if (strsrch) {
820	switch (attribute) {
821	case USEARCH_OVERLAP :
822	return (strsrch->search->isOverlap == TRUE1 ? USEARCH_ON :
823	USEARCH_OFF);
824	case USEARCH_CANONICAL_MATCH :
825	return (strsrch->search->isCanonicalMatch == TRUE1 ? USEARCH_ON :
826	USEARCH_OFF);
827	case USEARCH_ELEMENT_COMPARISON :
828	{
829	int16_t value = strsrch->search->elementComparisonType;
830	if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD \|\| value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
831	return (USearchAttributeValue)value;
832	} else {
833	return USEARCH_STANDARD_ELEMENT_COMPARISON;
834	}
835	}
836	case USEARCH_ATTRIBUTE_COUNT :
837	return USEARCH_DEFAULT;
838	}
839	}
840	return USEARCH_DEFAULT;
841	}
842
843	U_CAPIextern "C" int32_t U_EXPORT2 usearch_getMatchedStartusearch_getMatchedStart_71(
844	const UStringSearch *strsrch)
845	{
846	if (strsrch == nullptr) {
847	return USEARCH_DONE-1;
848	}
849	return strsrch->search->matchedIndex;
850	}
851
852
853	U_CAPIextern "C" int32_t U_EXPORT2 usearch_getMatchedTextusearch_getMatchedText_71(const UStringSearch *strsrch,
854	UChar *result,
855	int32_t resultCapacity,
856	UErrorCode *status)
857	{
858	if (U_FAILURE(*status)) {
859	return USEARCH_DONE-1;
860	}
861	if (strsrch == nullptr \|\| resultCapacity < 0 \|\| (resultCapacity > 0 &&
862	result == nullptr)) {
863	*status = U_ILLEGAL_ARGUMENT_ERROR;
864	return USEARCH_DONE-1;
865	}
866
867	int32_t copylength = strsrch->search->matchedLength;
868	int32_t copyindex = strsrch->search->matchedIndex;
869	if (copyindex == USEARCH_DONE-1) {
870	u_terminateUCharsu_terminateUChars_71(result, resultCapacity, 0, status);
871	return USEARCH_DONE-1;
872	}
873
874	if (resultCapacity < copylength) {
875	copylength = resultCapacity;
876	}
877	if (copylength > 0) {
878	uprv_memcpy(result, strsrch->search->text + copyindex,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(result, strsrch ->search->text + copyindex, copylength * sizeof(UChar)) ; } while (false)
879	copylength * sizeof(UChar))do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(result, strsrch ->search->text + copyindex, copylength * sizeof(UChar)) ; } while (false);
880	}
881	return u_terminateUCharsu_terminateUChars_71(result, resultCapacity,
882	strsrch->search->matchedLength, status);
883	}
884
885	U_CAPIextern "C" int32_t U_EXPORT2 usearch_getMatchedLengthusearch_getMatchedLength_71(
886	const UStringSearch *strsrch)
887	{
888	if (strsrch) {
889	return strsrch->search->matchedLength;
890	}
891	return USEARCH_DONE-1;
892	}
893
894	#if !UCONFIG_NO_BREAK_ITERATION0
895
896	U_CAPIextern "C" void U_EXPORT2 usearch_setBreakIteratorusearch_setBreakIterator_71(UStringSearch *strsrch,
897	UBreakIterator *breakiter,
898	UErrorCode *status)
899	{
900	if (U_SUCCESS(*status) && strsrch) {
901	strsrch->search->breakIter = breakiter;
902	if (breakiter) {
903	ubrk_setTextubrk_setText_71(breakiter, strsrch->search->text,
904	strsrch->search->textLength, status);
905	}
906	}
907	}
908
909	U_CAPIextern "C" const UBreakIterator* U_EXPORT2
910	usearch_getBreakIteratorusearch_getBreakIterator_71(const UStringSearch *strsrch)
911	{
912	if (strsrch) {
913	return strsrch->search->breakIter;
914	}
915	return nullptr;
916	}
917
918	#endif
919
920	U_CAPIextern "C" void U_EXPORT2 usearch_setTextusearch_setText_71( UStringSearch *strsrch,
921	const UChar *text,
922	int32_t textlength,
923	UErrorCode *status)
924	{
925	if (U_SUCCESS(*status)) {
926	if (strsrch == nullptr \|\| text == nullptr \|\| textlength < -1 \|\|
927	textlength == 0) {
928	*status = U_ILLEGAL_ARGUMENT_ERROR;
929	}
930	else {
931	if (textlength == -1) {
932	textlength = u_strlenu_strlen_71(text);
933	}
934	strsrch->search->text = text;
935	strsrch->search->textLength = textlength;
936	ucol_setTextucol_setText_71(strsrch->textIter, text, textlength, status);
937	strsrch->search->matchedIndex = USEARCH_DONE-1;
938	strsrch->search->matchedLength = 0;
939	strsrch->search->reset = TRUE1;
940	#if !UCONFIG_NO_BREAK_ITERATION0
941	if (strsrch->search->breakIter != nullptr) {
942	ubrk_setTextubrk_setText_71(strsrch->search->breakIter, text,
943	textlength, status);
944	}
945	if (strsrch->search->internalBreakIter != nullptr) {
946	ubrk_setTextubrk_setText_71(strsrch->search->internalBreakIter, text, textlength, status);
947	}
948	#endif
949	}
950	}
951	}
952
953	U_CAPIextern "C" const UChar * U_EXPORT2 usearch_getTextusearch_getText_71(const UStringSearch *strsrch,
954	int32_t *length)
955	{
956	if (strsrch) {
957	*length = strsrch->search->textLength;
958	return strsrch->search->text;
959	}
960	return nullptr;
961	}
962
963	U_CAPIextern "C" void U_EXPORT2 usearch_setCollatorusearch_setCollator_71( UStringSearch *strsrch,
964	const UCollator *collator,
965	UErrorCode *status)
966	{
967	if (U_SUCCESS(*status)) {
968	if (collator == nullptr) {
969	*status = U_ILLEGAL_ARGUMENT_ERROR;
970	return;
971	}
972
973	if (strsrch) {
974	delete strsrch->textProcessedIter;
975	strsrch->textProcessedIter = nullptr;
976	ucol_closeElementsucol_closeElements_71(strsrch->textIter);
977	ucol_closeElementsucol_closeElements_71(strsrch->utilIter);
978	strsrch->textIter = strsrch->utilIter = nullptr;
979	if (strsrch->ownCollator && (strsrch->collator != collator)) {
980	ucol_closeucol_close_71((UCollator *)strsrch->collator);
981	strsrch->ownCollator = FALSE0;
982	}
983	strsrch->collator = collator;
984	strsrch->strength = ucol_getStrengthucol_getStrength_71(collator);
985	strsrch->ceMask = getMask(strsrch->strength);
986	#if !UCONFIG_NO_BREAK_ITERATION0
987	if (strsrch->search->internalBreakIter != nullptr) {
988	ubrk_closeubrk_close_71(strsrch->search->internalBreakIter);
989	strsrch->search->internalBreakIter = nullptr; // Lazily created.
990	}
991	#endif
992	// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
993	strsrch->toShift =
994	ucol_getAttributeucol_getAttribute_71(collator, UCOL_ALTERNATE_HANDLING, status) ==
995	UCOL_SHIFTED;
996	// if status is a failure, ucol_getVariableTop returns 0
997	strsrch->variableTop = ucol_getVariableTopucol_getVariableTop_71(collator, status);
998	strsrch->textIter = ucol_openElementsucol_openElements_71(collator,
999	strsrch->search->text,
1000	strsrch->search->textLength,
1001	status);
1002	strsrch->utilIter = ucol_openElementsucol_openElements_71(
1003	collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
1004	// initialize() _after_ setting the iterators for the new collator.
1005	initialize(strsrch, status);
1006	}
1007
1008	// **** are these calls needed?
1009	// **** we call uprv_init_pce in initializePatternPCETable
1010	// **** and the CEIBuffer constructor...
1011	#if 0
1012	uprv_init_pce(strsrch->textIter);
1013	uprv_init_pce(strsrch->utilIter);
1014	#endif
1015	}
1016	}
1017
1018	U_CAPIextern "C" UCollator * U_EXPORT2 usearch_getCollatorusearch_getCollator_71(const UStringSearch *strsrch)
1019	{
1020	if (strsrch) {
1021	return (UCollator *)strsrch->collator;
1022	}
1023	return nullptr;
1024	}
1025
1026	U_CAPIextern "C" void U_EXPORT2 usearch_setPatternusearch_setPattern_71( UStringSearch *strsrch,
1027	const UChar *pattern,
1028	int32_t patternlength,
1029	UErrorCode *status)
1030	{
1031	if (U_SUCCESS(*status)) {
1032	if (strsrch == nullptr \|\| pattern == nullptr) {
1033	*status = U_ILLEGAL_ARGUMENT_ERROR;
1034	}
1035	else {
1036	if (patternlength == -1) {
1037	patternlength = u_strlenu_strlen_71(pattern);
1038	}
1039	if (patternlength == 0) {
1040	*status = U_ILLEGAL_ARGUMENT_ERROR;
1041	return;
1042	}
1043	strsrch->pattern.text = pattern;
1044	strsrch->pattern.textLength = patternlength;
1045	initialize(strsrch, status);
1046	}
1047	}
1048	}
1049
1050	U_CAPIextern "C" const UChar* U_EXPORT2
1051	usearch_getPatternusearch_getPattern_71(const UStringSearch *strsrch,
1052	int32_t *length)
1053	{
1054	if (strsrch) {
1055	*length = strsrch->pattern.textLength;
1056	return strsrch->pattern.text;
1057	}
1058	return nullptr;
1059	}
1060
1061	// miscellaneous methods --------------------------------------------------
1062
1063	U_CAPIextern "C" int32_t U_EXPORT2 usearch_firstusearch_first_71(UStringSearch *strsrch,
1064	UErrorCode *status)
1065	{
1066	if (strsrch && U_SUCCESS(*status)) {
1067	strsrch->search->isForwardSearching = TRUE1;
1068	usearch_setOffsetusearch_setOffset_71(strsrch, 0, status);
1069	if (U_SUCCESS(*status)) {
1070	return usearch_nextusearch_next_71(strsrch, status);
1071	}
1072	}
1073	return USEARCH_DONE-1;
1074	}
1075
1076	U_CAPIextern "C" int32_t U_EXPORT2 usearch_followingusearch_following_71(UStringSearch *strsrch,
1077	int32_t position,
1078	UErrorCode *status)
1079	{
1080	if (strsrch && U_SUCCESS(*status)) {
1081	strsrch->search->isForwardSearching = TRUE1;
1082	// position checked in usearch_setOffset
1083	usearch_setOffsetusearch_setOffset_71(strsrch, position, status);
1084	if (U_SUCCESS(*status)) {
1085	return usearch_nextusearch_next_71(strsrch, status);
1086	}
1087	}
1088	return USEARCH_DONE-1;
1089	}
1090
1091	U_CAPIextern "C" int32_t U_EXPORT2 usearch_lastusearch_last_71(UStringSearch *strsrch,
1092	UErrorCode *status)
1093	{
1094	if (strsrch && U_SUCCESS(*status)) {
1095	strsrch->search->isForwardSearching = FALSE0;
1096	usearch_setOffsetusearch_setOffset_71(strsrch, strsrch->search->textLength, status);
1097	if (U_SUCCESS(*status)) {
1098	return usearch_previoususearch_previous_71(strsrch, status);
1099	}
1100	}
1101	return USEARCH_DONE-1;
1102	}
1103
1104	U_CAPIextern "C" int32_t U_EXPORT2 usearch_precedingusearch_preceding_71(UStringSearch *strsrch,
1105	int32_t position,
1106	UErrorCode *status)
1107	{
1108	if (strsrch && U_SUCCESS(*status)) {
1109	strsrch->search->isForwardSearching = FALSE0;
1110	// position checked in usearch_setOffset
1111	usearch_setOffsetusearch_setOffset_71(strsrch, position, status);
1112	if (U_SUCCESS(*status)) {
1113	return usearch_previoususearch_previous_71(strsrch, status);
1114	}
1115	}
1116	return USEARCH_DONE-1;
1117	}
1118
1119	/**
1120	* If a direction switch is required, we'll count the number of ces till the
1121	* beginning of the collation element iterator and iterate forwards that
1122	* number of times. This is so that we get to the correct point within the
1123	* string to continue the search in. Imagine when we are in the middle of the
1124	* normalization buffer when the change in direction is request. arrrgghh....
1125	* After searching the offset within the collation element iterator will be
1126	* shifted to the start of the match. If a match is not found, the offset would
1127	* have been set to the end of the text string in the collation element
1128	* iterator.
1129	* Okay, here's my take on normalization buffer. The only time when there can
1130	* be 2 matches within the same normalization is when the pattern is consists
1131	* of all accents. But since the offset returned is from the text string, we
1132	* should not confuse the caller by returning the second match within the
1133	* same normalization buffer. If we do, the 2 results will have the same match
1134	* offsets, and that'll be confusing. I'll return the next match that doesn't
1135	* fall within the same normalization buffer. Note this does not affect the
1136	* results of matches spanning the text and the normalization buffer.
1137	* The position to start searching is taken from the collation element
1138	* iterator. Callers of this API would have to set the offset in the collation
1139	* element iterator before using this method.
1140	*/
1141	U_CAPIextern "C" int32_t U_EXPORT2 usearch_nextusearch_next_71(UStringSearch *strsrch,
1142	UErrorCode *status)
1143	{
1144	if (U_SUCCESS(*status) && strsrch) {
1145	// note offset is either equivalent to the start of the previous match
1146	// or is set by the user
1147	int32_t offset = usearch_getOffsetusearch_getOffset_71(strsrch);
1148	USearch *search = strsrch->search;
1149	search->reset = FALSE0;
1150	int32_t textlength = search->textLength;
1151	if (search->isForwardSearching) {
1152	if (offset == textlength \|\|
1153	(! search->isOverlap &&
1154	(search->matchedIndex != USEARCH_DONE-1 &&
1155	offset + search->matchedLength > textlength))) {
1156	// not enough characters to match
1157	setMatchNotFound(strsrch, *status);
1158	return USEARCH_DONE-1;
1159	}
1160	}
1161	else {
1162	// switching direction.
1163	// if matchedIndex == USEARCH_DONE, it means that either a
1164	// setOffset has been called or that previous ran off the text
1165	// string. the iterator would have been set to offset 0 if a
1166	// match is not found.
1167	search->isForwardSearching = TRUE1;
1168	if (search->matchedIndex != USEARCH_DONE-1) {
1169	// there's no need to set the collation element iterator
1170	// the next call to next will set the offset.
1171	return search->matchedIndex;
1172	}
1173	}
1174
1175	if (U_SUCCESS(*status)) {
1176	if (strsrch->pattern.cesLength == 0) {
1177	if (search->matchedIndex == USEARCH_DONE-1) {
1178	search->matchedIndex = offset;
1179	}
1180	else { // moves by codepoints
1181	U16_FWD_1(search->text, search->matchedIndex, textlength)do { if(((((search->text)[(search->matchedIndex)++])& 0xfffffc00)==0xd800) && (search->matchedIndex)!=(textlength ) && ((((search->text)[search->matchedIndex])& 0xfffffc00)==0xdc00)) { ++(search->matchedIndex); } } while (false);
1182	}
1183
1184	search->matchedLength = 0;
1185	setColEIterOffset(strsrch->textIter, search->matchedIndex, *status);
1186	// status checked below
1187	if (search->matchedIndex == textlength) {
1188	search->matchedIndex = USEARCH_DONE-1;
1189	}
1190	}
1191	else {
1192	if (search->matchedLength > 0) {
1193	// if matchlength is 0 we are at the start of the iteration
1194	if (search->isOverlap) {
1195	ucol_setOffsetucol_setOffset_71(strsrch->textIter, offset + 1, status);
1196	}
1197	else {
1198	ucol_setOffsetucol_setOffset_71(strsrch->textIter,
1199	offset + search->matchedLength, status);
1200	}
1201	}
1202	else {
1203	// for boundary check purposes. this will ensure that the
1204	// next match will not precede the current offset
1205	// note search->matchedIndex will always be set to something
1206	// in the code
1207	search->matchedIndex = offset - 1;
1208	}
1209
1210	if (search->isCanonicalMatch) {
1211	// can't use exact here since extra accents are allowed.
1212	usearch_handleNextCanonicalusearch_handleNextCanonical_71(strsrch, status);
1213	}
1214	else {
1215	usearch_handleNextExactusearch_handleNextExact_71(strsrch, status);
1216	}
1217	}
1218
1219	if (U_FAILURE(*status)) {
1220	return USEARCH_DONE-1;
1221	}
1222
1223	if (search->matchedIndex == USEARCH_DONE-1) {
1224	ucol_setOffsetucol_setOffset_71(strsrch->textIter, search->textLength, status);
1225	} else {
1226	ucol_setOffsetucol_setOffset_71(strsrch->textIter, search->matchedIndex, status);
1227	}
1228
1229	return search->matchedIndex;
1230	}
1231	}
1232	return USEARCH_DONE-1;
1233	}
1234
1235	U_CAPIextern "C" int32_t U_EXPORT2 usearch_previoususearch_previous_71(UStringSearch *strsrch,
1236	UErrorCode *status)
1237	{
1238	if (U_SUCCESS(*status) && strsrch) {
1239	int32_t offset;
1240	USearch *search = strsrch->search;
1241	if (search->reset) {
1242	offset = search->textLength;
1243	search->isForwardSearching = FALSE0;
1244	search->reset = FALSE0;
1245	setColEIterOffset(strsrch->textIter, offset, *status);
1246	}
1247	else {
1248	offset = usearch_getOffsetusearch_getOffset_71(strsrch);
1249	}
1250
1251	int32_t matchedindex = search->matchedIndex;
1252	if (search->isForwardSearching == TRUE1) {
1253	// switching direction.
1254	// if matchedIndex == USEARCH_DONE, it means that either a
1255	// setOffset has been called or that next ran off the text
1256	// string. the iterator would have been set to offset textLength if
1257	// a match is not found.
1258	search->isForwardSearching = FALSE0;
1259	if (matchedindex != USEARCH_DONE-1) {
1260	return matchedindex;
1261	}
1262	}
1263	else {
1264
1265	// Could check pattern length, but the
1266	// linear search will do the right thing
1267	if (offset == 0 \|\| matchedindex == 0) {
1268	setMatchNotFound(strsrch, *status);
1269	return USEARCH_DONE-1;
1270	}
1271	}
1272
1273	if (U_SUCCESS(*status)) {
1274	if (strsrch->pattern.cesLength == 0) {
1275	search->matchedIndex =
1276	(matchedindex == USEARCH_DONE-1 ? offset : matchedindex);
1277	if (search->matchedIndex == 0) {
1278	setMatchNotFound(strsrch, *status);
1279	// status checked below
1280	}
1281	else { // move by codepoints
1282	U16_BACK_1(search->text, 0, search->matchedIndex)do { if(((((search->text)[--(search->matchedIndex)])& 0xfffffc00)==0xdc00) && (search->matchedIndex)> (0) && ((((search->text)[(search->matchedIndex) -1])&0xfffffc00)==0xd800)) { --(search->matchedIndex); } } while (false);
1283	setColEIterOffset(strsrch->textIter, search->matchedIndex, *status);
1284	// status checked below
1285	search->matchedLength = 0;
1286	}
1287	}
1288	else {
1289	if (strsrch->search->isCanonicalMatch) {
1290	// can't use exact here since extra accents are allowed.
1291	usearch_handlePreviousCanonicalusearch_handlePreviousCanonical_71(strsrch, status);
1292	// status checked below
1293	}
1294	else {
1295	usearch_handlePreviousExactusearch_handlePreviousExact_71(strsrch, status);
1296	// status checked below
1297	}
1298	}
1299
1300	if (U_FAILURE(*status)) {
1301	return USEARCH_DONE-1;
1302	}
1303
1304	return search->matchedIndex;
1305	}
1306	}
1307	return USEARCH_DONE-1;
1308	}
1309
1310
1311
1312	U_CAPIextern "C" void U_EXPORT2 usearch_resetusearch_reset_71(UStringSearch *strsrch)
1313	{
1314	/*
1315	reset is setting the attributes that are already in
1316	string search, hence all attributes in the collator should
1317	be retrieved without any problems
1318	*/
1319	if (strsrch) {
1320	UErrorCode status = U_ZERO_ERROR;
1321	UBool sameCollAttribute = TRUE1;
1322	uint32_t ceMask;
1323	UBool shift;
1324	uint32_t varTop;
1325
1326	// ** hack to deal w/ how processed CEs encode quaternary **
1327	UCollationStrength newStrength = ucol_getStrengthucol_getStrength_71(strsrch->collator);
1328	if ((strsrch->strength < UCOL_QUATERNARY && newStrength >= UCOL_QUATERNARY) \|\|
1329	(strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
1330	sameCollAttribute = FALSE0;
1331	}
1332
1333	strsrch->strength = ucol_getStrengthucol_getStrength_71(strsrch->collator);
1334	ceMask = getMask(strsrch->strength);
1335	if (strsrch->ceMask != ceMask) {
1336	strsrch->ceMask = ceMask;
1337	sameCollAttribute = FALSE0;
1338	}
1339
1340	// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
1341	shift = ucol_getAttributeucol_getAttribute_71(strsrch->collator, UCOL_ALTERNATE_HANDLING,
1342	&status) == UCOL_SHIFTED;
1343	if (strsrch->toShift != shift) {
1344	strsrch->toShift = shift;
1345	sameCollAttribute = FALSE0;
1346	}
1347
1348	// if status is a failure, ucol_getVariableTop returns 0
1349	varTop = ucol_getVariableTopucol_getVariableTop_71(strsrch->collator, &status);
1350	if (strsrch->variableTop != varTop) {
1351	strsrch->variableTop = varTop;
1352	sameCollAttribute = FALSE0;
1353	}
1354	if (!sameCollAttribute) {
1355	initialize(strsrch, &status);
1356	}
1357	ucol_setTextucol_setText_71(strsrch->textIter, strsrch->search->text,
1358	strsrch->search->textLength,
1359	&status);
1360	strsrch->search->matchedLength = 0;
1361	strsrch->search->matchedIndex = USEARCH_DONE-1;
1362	strsrch->search->isOverlap = FALSE0;
1363	strsrch->search->isCanonicalMatch = FALSE0;
1364	strsrch->search->elementComparisonType = 0;
1365	strsrch->search->isForwardSearching = TRUE1;
1366	strsrch->search->reset = TRUE1;
1367	}
1368	}
1369
1370	//
1371	// CEI Collation Element + source text index.
1372	// These structs are kept in the circular buffer.
1373	//
1374	struct CEI {
1375	int64_t ce;
1376	int32_t lowIndex;
1377	int32_t highIndex;
1378	};
1379
1380	U_NAMESPACE_BEGINnamespace icu_71 {
1381
1382	namespace {
1383	//
1384	// CEIBuffer A circular buffer of CEs-with-index from the text being searched.
1385	//
1386	#define DEFAULT_CEBUFFER_SIZE96 96
1387	#define CEBUFFER_EXTRA32 32
1388	// Some typical max values to make buffer size more reasonable for asymmetric search.
1389	// #8694 is for a better long-term solution to allocation of this buffer.
1390	#define MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L8 8
1391	#define MAX_TARGET_IGNORABLES_PER_PAT_OTHER3 3
1392	#define MIGHT_BE_JAMO_L(c)((c >= 0x1100 && c <= 0x115E) \|\| (c >= 0x3131 && c <= 0x314E) \|\| (c >= 0x3165 && c <= 0x3186)) ((c >= 0x1100 && c <= 0x115E) \|\| (c >= 0x3131 && c <= 0x314E) \|\| (c >= 0x3165 && c <= 0x3186))
1393	struct CEIBuffer {
1394	CEI defBuf[DEFAULT_CEBUFFER_SIZE96];
1395	CEI *buf;
1396	int32_t bufSize;
1397	int32_t firstIx;
1398	int32_t limitIx;
1399	UCollationElements *ceIter;
1400	UStringSearch *strSearch;
1401
1402
1403
1404	CEIBuffer(UStringSearch ss, UErrorCode status);
1405	~CEIBuffer();
1406	const CEI *get(int32_t index);
1407	const CEI *getPrevious(int32_t index);
1408	};
1409
1410
1411	CEIBuffer::CEIBuffer(UStringSearch ss, UErrorCode status) {
1412	buf = defBuf;
1413	strSearch = ss;
1414	bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA32;
1415	if (ss->search->elementComparisonType != 0) {
1416	const UChar * patText = ss->pattern.text;
1417	if (patText) {
1418	const UChar * patTextLimit = patText + ss->pattern.textLength;
1419	while ( patText < patTextLimit ) {
1420	UChar c = *patText++;
1421	if (MIGHT_BE_JAMO_L(c)((c >= 0x1100 && c <= 0x115E) \|\| (c >= 0x3131 && c <= 0x314E) \|\| (c >= 0x3165 && c <= 0x3186))) {
1422	bufSize += MAX_TARGET_IGNORABLES_PER_PAT_JAMO_L8;
1423	} else {
1424	// No check for surrogates, we might allocate slightly more buffer than necessary.
1425	bufSize += MAX_TARGET_IGNORABLES_PER_PAT_OTHER3;
1426	}
1427	}
1428	}
1429	}
1430	ceIter = ss->textIter;
1431	firstIx = 0;
1432	limitIx = 0;
1433
1434	if (!initTextProcessedIter(ss, status)) { return; }
1435
1436	if (bufSize>DEFAULT_CEBUFFER_SIZE96) {
1437	buf = (CEI )uprv_mallocuprv_malloc_71(bufSize sizeof(CEI));
1438	if (buf == nullptr) {
1439	*status = U_MEMORY_ALLOCATION_ERROR;
1440	}
1441	}
1442	}
1443
1444	// TODO: add a reset or init function so that allocated
1445	// buffers can be retained & reused.
1446
1447	CEIBuffer::~CEIBuffer() {
1448	if (buf != defBuf) {
1449	uprv_freeuprv_free_71(buf);
1450	}
1451	}
1452
1453
1454	// Get the CE with the specified index.
1455	// Index must be in the range
1456	// n-history_size < index < n+1
1457	// where n is the largest index to have been fetched by some previous call to this function.
1458	// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
1459	//
1460	const CEI *CEIBuffer::get(int32_t index) {
1461	int i = index % bufSize;
1462
1463	if (index>=firstIx && index<limitIx) {
1464	// The request was for an entry already in our buffer.
1465	// Just return it.
1466	return &buf[i];
1467	}
1468
1469	// Caller is requesting a new, never accessed before, CE.
1470	// Verify that it is the next one in sequence, which is all
1471	// that is allowed.
1472	if (index != limitIx) {
1473	UPRV_UNREACHABLE_ASSERT(void)0;
1474	// TODO: In ICU 64 the above was changed from U_ASSERT to UPRV_UNREACHABLE,
1475	// which unconditionally called abort(). However, there were cases in which it
1476	// was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70,
1477	// we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation.
1478	// ICU-20792 tracks the follow-up work/further investigation on this.
1479	return nullptr;
1480	}
1481
1482	// Manage the circular CE buffer indexing
1483	limitIx++;
1484
1485	if (limitIx - firstIx >= bufSize) {
1486	// The buffer is full, knock out the lowest-indexed entry.
1487	firstIx++;
1488	}
1489
1490	UErrorCode status = U_ZERO_ERROR;
1491
1492	buf[i].ce = strSearch->textProcessedIter->nextProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
1493
1494	return &buf[i];
1495	}
1496
1497	// Get the CE with the specified index.
1498	// Index must be in the range
1499	// n-history_size < index < n+1
1500	// where n is the largest index to have been fetched by some previous call to this function.
1501	// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
1502	//
1503	const CEI *CEIBuffer::getPrevious(int32_t index) {
1504	int i = index % bufSize;
1505
1506	if (index>=firstIx && index<limitIx) {
1507	// The request was for an entry already in our buffer.
1508	// Just return it.
1509	return &buf[i];
1510	}
1511
1512	// Caller is requesting a new, never accessed before, CE.
1513	// Verify that it is the next one in sequence, which is all
1514	// that is allowed.
1515	if (index != limitIx) {
1516	UPRV_UNREACHABLE_ASSERT(void)0;
1517	// TODO: In ICU 64 the above was changed from U_ASSERT to UPRV_UNREACHABLE,
1518	// which unconditionally called abort(). However, there were cases in which it
1519	// was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70,
1520	// we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation.
1521	// ICU-20792 tracks the follow-up work/further investigation on this.
1522	return nullptr;
1523	}
1524
1525	// Manage the circular CE buffer indexing
1526	limitIx++;
1527
1528	if (limitIx - firstIx >= bufSize) {
1529	// The buffer is full, knock out the lowest-indexed entry.
1530	firstIx++;
1531	}
1532
1533	UErrorCode status = U_ZERO_ERROR;
1534
1535	buf[i].ce = strSearch->textProcessedIter->previousProcessed(&buf[i].lowIndex, &buf[i].highIndex, &status);
1536
1537	return &buf[i];
1538	}
1539
1540	}
1541
1542	U_NAMESPACE_END}
1543
1544
1545	// #define USEARCH_DEBUG
1546
1547	#ifdef USEARCH_DEBUG
1548	#include <stdio.h>
1549	#include <stdlib.h>
1550	#endif
1551
1552	/*
1553	* Find the next break boundary after startIndex. If the UStringSearch object
1554	* has an external break iterator, use that. Otherwise use the internal character
1555	* break iterator.
1556	*/
1557	static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex, UErrorCode &status) {
1558	if (U_FAILURE(status)) {
1559	return startIndex;
1560	}
1561	#if 0
1562	const UChar *text = strsrch->search->text;
1563	int32_t textLen = strsrch->search->textLength;
1564
1565	U_ASSERT(startIndex>=0)(void)0;
1566	U_ASSERT(startIndex<=textLen)(void)0;
1567
1568	if (startIndex >= textLen) {
1569	return startIndex;
1570	}
1571
1572	UChar32 c;
1573	int32_t i = startIndex;
1574	U16_NEXT(text, i, textLen, c)do { (c)=(text)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { uint16_t __c2; if((i)!=(textLen) && (((__c2=(text)[(i)])& 0xfffffc00)==0xdc00)) { ++(i); (c)=(((UChar32)((c))<<10UL )+(UChar32)(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); } } } while (false);
1575
1576	// If we are on a control character, stop without looking for combining marks.
1577	// Control characters do not combine.
1578	int32_t gcProperty = u_getIntPropertyValueu_getIntPropertyValue_71(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
1579	if (gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_GCB_CR) {
1580	return i;
1581	}
1582
1583	// The initial character was not a control, and can thus accept trailing
1584	// combining characters. Advance over however many of them there are.
1585	int32_t indexOfLastCharChecked;
1586	for (;;) {
1587	indexOfLastCharChecked = i;
1588	if (i>=textLen) {
1589	break;
1590	}
1591	U16_NEXT(text, i, textLen, c)do { (c)=(text)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { uint16_t __c2; if((i)!=(textLen) && (((__c2=(text)[(i)])& 0xfffffc00)==0xdc00)) { ++(i); (c)=(((UChar32)((c))<<10UL )+(UChar32)(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); } } } while (false);
1592	gcProperty = u_getIntPropertyValueu_getIntPropertyValue_71(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
1593	if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
1594	break;
1595	}
1596	}
1597	return indexOfLastCharChecked;
1598	#elif !UCONFIG_NO_BREAK_ITERATION0
1599	UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
1600	if (U_FAILURE(status)) {
1601	return startIndex;
1602	}
1603
1604	return ubrk_followingubrk_following_71(breakiterator, startIndex);
1605	#else
1606	// ** or should we use the original code? **
1607	return startIndex;
1608	#endif
1609
1610	}
1611
1612	/*
1613	* Returns TRUE if index is on a break boundary. If the UStringSearch
1614	* has an external break iterator, test using that, otherwise test
1615	* using the internal character break iterator.
1616	*/
1617	static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index, UErrorCode &status) {
1618	if (U_FAILURE(status)) {
1619	return TRUE1;
1620	}
1621	#if 0
1622	const UChar *text = strsrch->search->text;
1623	int32_t textLen = strsrch->search->textLength;
1624
1625	U_ASSERT(index>=0)(void)0;
1626	U_ASSERT(index<=textLen)(void)0;
1627
1628	if (index>=textLen \|\| index<=0) {
1629	return TRUE1;
1630	}
1631
1632	// If the character at the current index is not a GRAPHEME_EXTEND
1633	// then we can not be within a combining sequence.
1634	UChar32 c;
1635	U16_GET(text, 0, index, textLen, c)do { (c)=(text)[index]; if((((c)&0xfffff800)==0xd800)) { uint16_t __c2; if((((c)&0x400)==0)) { if((index)+1!=(textLen) && (((__c2=(text)[(index)+1])&0xfffffc00)==0xdc00)) { (c)=( ((UChar32)((c))<<10UL)+(UChar32)(__c2)-((0xd800<< 10UL)+0xdc00-0x10000)); } } else { if((index)>(0) && (((__c2=(text)[(index)-1])&0xfffffc00)==0xd800)) { (c)=( ((UChar32)(__c2)<<10UL)+(UChar32)((c))-((0xd800<< 10UL)+0xdc00-0x10000)); } } } } while (false);
1636	int32_t gcProperty = u_getIntPropertyValueu_getIntPropertyValue_71(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
1637	if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
1638	return TRUE1;
1639	}
1640
1641	// We are at a combining mark. If the preceding character is anything
1642	// except a CONTROL, CR or LF, we are in a combining sequence.
1643	U16_PREV(text, 0, index, c)do { (c)=(text)[--(index)]; if((((c)&0xfffffc00)==0xdc00) ) { uint16_t __c2; if((index)>(0) && (((__c2=(text )[(index)-1])&0xfffffc00)==0xd800)) { --(index); (c)=(((UChar32 )(__c2)<<10UL)+(UChar32)((c))-((0xd800<<10UL)+0xdc00 -0x10000)); } } } while (false);
1644	gcProperty = u_getIntPropertyValueu_getIntPropertyValue_71(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
1645	UBool combining = !(gcProperty==U_GCB_CONTROL \|\| gcProperty==U_GCB_LF \|\| gcProperty==U_GCB_CR);
1646	return !combining;
1647	#elif !UCONFIG_NO_BREAK_ITERATION0
1648	UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
1649	if (U_FAILURE(status)) {
1650	return TRUE1;
1651	}
1652
1653	return ubrk_isBoundaryubrk_isBoundary_71(breakiterator, index);
1654	#else
1655	// ** or use the original code? **
1656	return TRUE1;
1657	#endif
1658	}
1659
1660	#if 0
1661	static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end, UErrorCode &status)
1662	{
1663	if (U_FAILURE(status)) {
1664	return TRUE1;
1665	}
1666
1667	#if !UCONFIG_NO_BREAK_ITERATION0
1668	UBreakIterator *breakiterator = getBreakIterator(strsrch, status);
1669	if (U_SUCCESS(status)) {
1670	int32_t startindex = ubrk_firstubrk_first_71(breakiterator);
1671	int32_t endindex = ubrk_lastubrk_last_71(breakiterator);
1672
1673	// out-of-range indexes are never boundary positions
1674	if (start < startindex \|\| start > endindex \|\|
1675	end < startindex \|\| end > endindex) {
1676	return FALSE0;
1677	}
1678
1679	return ubrk_isBoundaryubrk_isBoundary_71(breakiterator, start) &&
1680	ubrk_isBoundaryubrk_isBoundary_71(breakiterator, end);
1681	}
1682	#endif
1683
1684	return TRUE1;
1685	}
1686	#endif
1687
1688	typedef enum {
1689	U_CE_MATCH = -1,
1690	U_CE_NO_MATCH = 0,
1691	U_CE_SKIP_TARG,
1692	U_CE_SKIP_PATN
1693	} UCompareCEsResult;
1694	#define U_CE_LEVEL2_BASE0x00000005 0x00000005
1695	#define U_CE_LEVEL3_BASE0x00050000 0x00050000
1696
1697	static UCompareCEsResult compareCE64s(int64_t targCE, int64_t patCE, int16_t compareType) {
1698	if (targCE == patCE) {
1699	return U_CE_MATCH;
1700	}
1701	if (compareType == 0) {
1702	return U_CE_NO_MATCH;
1703	}
1704
1705	int64_t targCEshifted = targCE >> 32;
1706	int64_t patCEshifted = patCE >> 32;
1707	int64_t mask;
1708
1709	mask = 0xFFFF0000;
1710	int32_t targLev1 = (int32_t)(targCEshifted & mask);
1711	int32_t patLev1 = (int32_t)(patCEshifted & mask);
1712	if ( targLev1 != patLev1 ) {
1713	if ( targLev1 == 0 ) {
1714	return U_CE_SKIP_TARG;
1715	}
1716	if ( patLev1 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
1717	return U_CE_SKIP_PATN;
1718	}
1719	return U_CE_NO_MATCH;
1720	}
1721
1722	mask = 0x0000FFFF;
1723	int32_t targLev2 = (int32_t)(targCEshifted & mask);
1724	int32_t patLev2 = (int32_t)(patCEshifted & mask);
1725	if ( targLev2 != patLev2 ) {
1726	if ( targLev2 == 0 ) {
1727	return U_CE_SKIP_TARG;
1728	}
1729	if ( patLev2 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
1730	return U_CE_SKIP_PATN;
1731	}
1732	return (patLev2 == U_CE_LEVEL2_BASE0x00000005 \|\| (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev2 == U_CE_LEVEL2_BASE0x00000005) )?
1733	U_CE_MATCH: U_CE_NO_MATCH;
1734	}
1735
1736	mask = 0xFFFF0000;
1737	int32_t targLev3 = (int32_t)(targCE & mask);
1738	int32_t patLev3 = (int32_t)(patCE & mask);
1739	if ( targLev3 != patLev3 ) {
1740	return (patLev3 == U_CE_LEVEL3_BASE0x00050000 \|\| (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev3 == U_CE_LEVEL3_BASE0x00050000) )?
1741	U_CE_MATCH: U_CE_NO_MATCH;
1742	}
1743
1744	return U_CE_MATCH;
1745	}
1746
1747	namespace {
1748
1749	UChar32 codePointAt(const USearch &search, int32_t index) {
1750	if (index < search.textLength) {
1751	UChar32 c;
1752	U16_NEXT(search.text, index, search.textLength, c)do { (c)=(search.text)[(index)++]; if((((c)&0xfffffc00)== 0xd800)) { uint16_t __c2; if((index)!=(search.textLength) && (((__c2=(search.text)[(index)])&0xfffffc00)==0xdc00)) { ++ (index); (c)=(((UChar32)((c))<<10UL)+(UChar32)(__c2)-(( 0xd800<<10UL)+0xdc00-0x10000)); } } } while (false);
1753	return c;
1754	}
1755	return U_SENTINEL(-1);
1756	}
1757
1758	UChar32 codePointBefore(const USearch &search, int32_t index) {
1759	if (0 < index) {
1760	UChar32 c;
1761	U16_PREV(search.text, 0, index, c)do { (c)=(search.text)[--(index)]; if((((c)&0xfffffc00)== 0xdc00)) { uint16_t __c2; if((index)>(0) && (((__c2 =(search.text)[(index)-1])&0xfffffc00)==0xd800)) { --(index ); (c)=(((UChar32)(__c2)<<10UL)+(UChar32)((c))-((0xd800 <<10UL)+0xdc00-0x10000)); } } } while (false);
1762	return c;
1763	}
1764	return U_SENTINEL(-1);
1765	}
1766
1767	} // namespace
1768
1769	U_CAPIextern "C" UBool U_EXPORT2 usearch_searchusearch_search_71(UStringSearch *strsrch,
1770	int32_t startIdx,
1771	int32_t *matchStart,
1772	int32_t *matchLimit,
1773	UErrorCode *status)
1774	{
1775	if (U_FAILURE(*status)) {
1776	return FALSE0;
1777	}
1778
1779	// TODO: reject search patterns beginning with a combining char.
1780
1781	#ifdef USEARCH_DEBUG
1782	if (getenv("USEARCH_DEBUG") != nullptr) {
1783	printf("Pattern CEs\n");
1784	for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
1785	printf(" %8x", strsrch->pattern.ces[ii]);
1786	}
1787	printf("\n");
1788	}
1789
1790	#endif
1791	// Input parameter sanity check.
1792	// TODO: should input indices clip to the text length
1793	// in the same way that UText does.
1794	if(strsrch->pattern.cesLength == 0 \|\|
1795	startIdx < 0 \|\|
1796	startIdx > strsrch->search->textLength \|\|
1797	strsrch->pattern.ces == nullptr) {
1798	*status = U_ILLEGAL_ARGUMENT_ERROR;
1799	return FALSE0;
1800	}
1801
1802	if (strsrch->pattern.pces == nullptr) {
1803	initializePatternPCETable(strsrch, status);
1804	}
1805
1806	ucol_setOffsetucol_setOffset_71(strsrch->textIter, startIdx, status);
1807	CEIBuffer ceb(strsrch, status);
1808
1809	// An out-of-memory (OOM) failure can occur in the initializePatternPCETable function
1810	// or CEIBuffer constructor above, so we need to check the status.
1811	if (U_FAILURE(*status)) {
1812	return FALSE0;
1813	}
1814
1815	int32_t targetIx = 0;
1816	const CEI *targetCEI = nullptr;
1817	int32_t patIx;
1818	UBool found;
1819
1820	int32_t mStart = -1;
1821	int32_t mLimit = -1;
1822	int32_t minLimit;
1823	int32_t maxLimit;
1824
1825
1826
1827	// Outer loop moves over match starting positions in the
1828	// target CE space.
1829	// Here we see the target as a sequence of collation elements, resulting from the following:
1830	// 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
1831	// (for example, digraphs such as IJ may be broken into two characters).
1832	// 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
1833	// 16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
1834	// fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
1835	// CE weight 0 (as for a combining diacritic with secondary weight when the collator strength is primary),
1836	// then the CE is deleted, so the following code sees only CEs that are relevant.
1837	// For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
1838	// If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
1839	// characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
1840	//
1841	for(targetIx=0; ; targetIx++)
1842	{
1843	found = TRUE1;
1844	// Inner loop checks for a match beginning at each
1845	// position from the outer loop.
1846	int32_t targetIxOffset = 0;
1847	int64_t patCE = 0;
1848	// For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer
1849	// (compared to the last CE fetched for the previous targetIx value) as we need to go
1850	// for this targetIx value, so if it is non-nullptr then other ceb.get calls should be OK.
1851	const CEI *firstCEI = ceb.get(targetIx);
1852	if (firstCEI == nullptr) {
1853	*status = U_INTERNAL_PROGRAM_ERROR;
1854	found = FALSE0;
1855	break;
1856	}
1857
1858	for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
1859	patCE = strsrch->pattern.pces[patIx];
1860	targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
1861	// Compare CE from target string with CE from the pattern.
1862	// Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
1863	// which will fail the compare, below.
1864	UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
1865	if ( ceMatch == U_CE_NO_MATCH ) {
1866	found = FALSE0;
1867	break;
1868	} else if ( ceMatch > U_CE_NO_MATCH ) {
1869	if ( ceMatch == U_CE_SKIP_TARG ) {
1870	// redo with same patCE, next targCE
1871	patIx--;
1872	targetIxOffset++;
1873	} else { // ceMatch == U_CE_SKIP_PATN
1874	// redo with same targCE, next patCE
1875	targetIxOffset--;
1876	}
1877	}
1878	}
1879	targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
1880
1881	if (!found && ((targetCEI == nullptr) \|\| (targetCEI->ce != UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))))) {
1882	// No match at this targetIx. Try again at the next.
1883	continue;
1884	}
1885
1886	if (!found) {
1887	// No match at all, we have run off the end of the target text.
1888	break;
1889	}
1890
1891
1892	// We have found a match in CE space.
1893	// Now determine the bounds in string index space.
1894	// There still is a chance of match failure if the CE range not correspond to
1895	// an acceptable character range.
1896	//
1897	const CEI *lastCEI = ceb.get(targetIx + targetIxOffset - 1);
1898
1899	mStart = firstCEI->lowIndex;
1900	minLimit = lastCEI->lowIndex;
1901
1902	// Look at the CE following the match. If it is UCOL_NULLORDER the match
1903	// extended to the end of input, and the match is good.
1904
1905	// Look at the high and low indices of the CE following the match. If
1906	// they are the same it means one of two things:
1907	// 1. The match extended to the last CE from the target text, which is OK, or
1908	// 2. The last CE that was part of the match is in an expansion that extends
1909	// to the first CE after the match. In this case, we reject the match.
1910	const CEI *nextCEI = 0;
1911	if (strsrch->search->elementComparisonType == 0) {
1912	nextCEI = ceb.get(targetIx + targetIxOffset);
1913	maxLimit = nextCEI->lowIndex;
1914	if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))) {
1915	found = FALSE0;
1916	}
1917	} else {
1918	for ( ; ; ++targetIxOffset ) {
1919	nextCEI = ceb.get(targetIx + targetIxOffset);
1920	maxLimit = nextCEI->lowIndex;
1921	// If we are at the end of the target too, match succeeds
1922	if ( nextCEI->ce == UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L))) ) {
1923	break;
1924	}
1925	// As long as the next CE has primary weight of 0,
1926	// it is part of the last target element matched by the pattern;
1927	// make sure it can be part of a match with the last patCE
1928	if ( (((nextCEI->ce) >> 32) & 0xFFFF0000UL) == 0 ) {
1929	UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
1930	if ( ceMatch == U_CE_NO_MATCH \|\| ceMatch == U_CE_SKIP_PATN ) {
1931	found = FALSE0;
1932	break;
1933	}
1934	// If lowIndex == highIndex, this target CE is part of an expansion of the last matched
1935	// target element, but it has non-zero primary weight => match fails
1936	} else if ( nextCEI->lowIndex == nextCEI->highIndex ) {
1937	found = false;
1938	break;
1939	// Else the target CE is not part of an expansion of the last matched element, match succeeds
1940	} else {
1941	break;
1942	}
1943	}
1944	}
1945
1946
1947	// Check for the start of the match being within a combining sequence.
1948	// This can happen if the pattern itself begins with a combining char, and
1949	// the match found combining marks in the target text that were attached
1950	// to something else.
1951	// This type of match should be rejected for not completely consuming a
1952	// combining sequence.
1953	if (!isBreakBoundary(strsrch, mStart, *status)) {
1954	found = FALSE0;
1955	}
1956	if (U_FAILURE(*status)) {
1957	break;
1958	}
1959
1960	// Check for the start of the match being within an Collation Element Expansion,
1961	// meaning that the first char of the match is only partially matched.
1962	// With expansions, the first CE will report the index of the source
1963	// character, and all subsequent (expansions) CEs will report the source index of the
1964	// _following_ character.
1965	int32_t secondIx = firstCEI->highIndex;
1966	if (mStart == secondIx) {
1967	found = FALSE0;
1968	}
1969
1970	// Allow matches to end in the middle of a grapheme cluster if the following
1971	// conditions are met; this is needed to make prefix search work properly in
1972	// Indic, see #11750
1973	// * the default breakIter is being used
1974	// * the next collation element after this combining sequence
1975	// - has non-zero primary weight
1976	// - corresponds to a separate character following the one at end of the current match
1977	// (the second of these conditions, and perhaps both, may be redundant given the
1978	// subsequent check for normalization boundary; however they are likely much faster
1979	// tests in any case)
1980	// * the match limit is a normalization boundary
1981	UBool allowMidclusterMatch = FALSE0;
1982	if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) {
1983	allowMidclusterMatch =
1984	strsrch->search->breakIter == nullptr &&
1985	nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
1986	maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit &&
1987	(strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) \|\|
1988	strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
1989	}
1990	// If those conditions are met, then:
1991	// * do NOT advance the candidate match limit (mLimit) to a break boundary; however
1992	// the match limit may be backed off to a previous break boundary. This handles
1993	// cases in which mLimit includes target characters that are ignorable with current
1994	// settings (such as space) and which extend beyond the pattern match.
1995	// * do NOT require that end of the combining sequence not extend beyond the match in CE space
1996	// * do NOT require that match limit be on a breakIter boundary
1997
1998	// Advance the match end position to the first acceptable match boundary.
1999	// This advances the index over any combining characters.
2000	mLimit = maxLimit;
2001	if (minLimit < maxLimit) {
2002	// When the last CE's low index is same with its high index, the CE is likely
2003	// a part of expansion. In this case, the index is located just after the
2004	// character corresponding to the CEs compared above. If the index is right
2005	// at the break boundary, move the position to the next boundary will result
2006	// incorrect match length when there are ignorable characters exist between
2007	// the position and the next character produces CE(s). See ticket#8482.
2008	if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit, *status)) {
2009	mLimit = minLimit;
2010	} else {
2011	int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
2012	// Note that we can have nba < maxLimit && nba >= minLImit, in which
2013	// case we want to set mLimit to nba regardless of allowMidclusterMatch
2014	// (i.e. we back off mLimit to the previous breakIterator boundary).
2015	if (nba >= lastCEI->highIndex && (!allowMidclusterMatch \|\| nba < maxLimit)) {
2016	mLimit = nba;
2017	}
2018	}
2019	}
2020
2021	if (U_FAILURE(*status)) {
2022	break;
2023	}
2024
2025	#ifdef USEARCH_DEBUG
2026	if (getenv("USEARCH_DEBUG") != nullptr) {
2027	printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
2028	}
2029	#endif
2030
2031	if (!allowMidclusterMatch) {
2032	// If advancing to the end of a combining sequence in character indexing space
2033	// advanced us beyond the end of the match in CE space, reject this match.
2034	if (mLimit > maxLimit) {
2035	found = FALSE0;
2036	}
2037
2038	if (!isBreakBoundary(strsrch, mLimit, *status)) {
2039	found = FALSE0;
2040	}
2041	if (U_FAILURE(*status)) {
2042	break;
2043	}
2044	}
2045
2046	if (! checkIdentical(strsrch, mStart, mLimit)) {
2047	found = FALSE0;
2048	}
2049
2050	if (found) {
2051	break;
2052	}
2053	}
2054
2055	#ifdef USEARCH_DEBUG
2056	if (getenv("USEARCH_DEBUG") != nullptr) {
2057	printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
2058	int32_t lastToPrint = ceb.limitIx+2;
2059	for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
2060	printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
2061	}
2062	printf("\n%s\n", found? "match found" : "no match");
2063	}
2064	#endif
2065
2066	// All Done. Store back the match bounds to the caller.
2067	//
2068
2069	if (U_FAILURE(*status)) {
2070	found = FALSE0; // No match if a failure occured.
2071	}
2072
2073	if (found==FALSE0) {
2074	mLimit = -1;
2075	mStart = -1;
2076	}
2077
2078	if (matchStart != nullptr) {
2079	*matchStart= mStart;
2080	}
2081
2082	if (matchLimit != nullptr) {
2083	*matchLimit = mLimit;
2084	}
2085
2086	return found;
2087	}
2088
2089	U_CAPIextern "C" UBool U_EXPORT2 usearch_searchBackwardsusearch_searchBackwards_71(UStringSearch *strsrch,
2090	int32_t startIdx,
2091	int32_t *matchStart,
2092	int32_t *matchLimit,
2093	UErrorCode *status)
2094	{
2095	if (U_FAILURE(*status)) {
2096	return FALSE0;
2097	}
2098
2099	// TODO: reject search patterns beginning with a combining char.
2100
2101	#ifdef USEARCH_DEBUG
2102	if (getenv("USEARCH_DEBUG") != nullptr) {
2103	printf("Pattern CEs\n");
2104	for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
2105	printf(" %8x", strsrch->pattern.ces[ii]);
2106	}
2107	printf("\n");
2108	}
2109
2110	#endif
2111	// Input parameter sanity check.
2112	// TODO: should input indices clip to the text length
2113	// in the same way that UText does.
2114	if(strsrch->pattern.cesLength == 0 \|\|
2115	startIdx < 0 \|\|
2116	startIdx > strsrch->search->textLength \|\|
2117	strsrch->pattern.ces == nullptr) {
2118	*status = U_ILLEGAL_ARGUMENT_ERROR;
2119	return FALSE0;
2120	}
2121
2122	if (strsrch->pattern.pces == nullptr) {
2123	initializePatternPCETable(strsrch, status);
2124	}
2125
2126	CEIBuffer ceb(strsrch, status);
2127	int32_t targetIx = 0;
2128
2129	/*
2130	* Pre-load the buffer with the CE's for the grapheme
2131	* after our starting position so that we're sure that
2132	* we can look at the CE following the match when we
2133	* check the match boundaries.
2134	*
2135	* This will also pre-fetch the first CE that we'll
2136	* consider for the match.
2137	*/
2138	if (startIdx < strsrch->search->textLength) {
2139	UBreakIterator breakiterator = getBreakIterator(strsrch, status);
2140	if (U_FAILURE(*status)) {
2141	return FALSE0;
2142	}
2143	int32_t next = ubrk_followingubrk_following_71(breakiterator, startIdx);
2144
2145	ucol_setOffsetucol_setOffset_71(strsrch->textIter, next, status);
2146
2147	for (targetIx = 0; ; targetIx += 1) {
2148	if (ceb.getPrevious(targetIx)->lowIndex < startIdx) {
2149	break;
2150	}
2151	}
2152	} else {
2153	ucol_setOffsetucol_setOffset_71(strsrch->textIter, startIdx, status);
2154	}
2155
2156	// An out-of-memory (OOM) failure can occur above, so we need to check the status.
2157	if (U_FAILURE(*status)) {
2158	return FALSE0;
2159	}
2160
2161	const CEI *targetCEI = nullptr;
2162	int32_t patIx;
2163	UBool found;
2164
2165	int32_t limitIx = targetIx;
2166	int32_t mStart = -1;
2167	int32_t mLimit = -1;
2168	int32_t minLimit;
2169	int32_t maxLimit;
2170
2171
2172
2173	// Outer loop moves over match starting positions in the
2174	// target CE space.
2175	// Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
2176	// But patIx is 0 at the beginning of the pattern and increases toward the end.
2177	// So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
2178	// and the beginning of the base text.
2179	for(targetIx = limitIx; ; targetIx += 1)
2180	{
2181	found = TRUE1;
2182	// For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer
2183	// (compared to the last CE fetched for the previous targetIx value) as we need to go
2184	// for this targetIx value, so if it is non-nullptr then other ceb.getPrevious calls should be OK.
2185	const CEI *lastCEI = ceb.getPrevious(targetIx);
2186	if (lastCEI == nullptr) {
2187	*status = U_INTERNAL_PROGRAM_ERROR;
2188	found = FALSE0;
2189	break;
2190	}
2191	// Inner loop checks for a match beginning at each
2192	// position from the outer loop.
2193	int32_t targetIxOffset = 0;
2194	for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
2195	int64_t patCE = strsrch->pattern.pces[patIx];
2196
2197	targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
2198	// Compare CE from target string with CE from the pattern.
2199	// Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
2200	// which will fail the compare, below.
2201	UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
2202	if ( ceMatch == U_CE_NO_MATCH ) {
2203	found = FALSE0;
2204	break;
2205	} else if ( ceMatch > U_CE_NO_MATCH ) {
2206	if ( ceMatch == U_CE_SKIP_TARG ) {
2207	// redo with same patCE, next targCE
2208	patIx++;
2209	targetIxOffset++;
2210	} else { // ceMatch == U_CE_SKIP_PATN
2211	// redo with same targCE, next patCE
2212	targetIxOffset--;
2213	}
2214	}
2215	}
2216
2217	if (!found && ((targetCEI == nullptr) \|\| (targetCEI->ce != UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))))) {
2218	// No match at this targetIx. Try again at the next.
2219	continue;
2220	}
2221
2222	if (!found) {
2223	// No match at all, we have run off the end of the target text.
2224	break;
2225	}
2226
2227
2228	// We have found a match in CE space.
2229	// Now determine the bounds in string index space.
2230	// There still is a chance of match failure if the CE range not correspond to
2231	// an acceptable character range.
2232	//
2233	const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
2234	mStart = firstCEI->lowIndex;
2235
2236	// Check for the start of the match being within a combining sequence.
2237	// This can happen if the pattern itself begins with a combining char, and
2238	// the match found combining marks in the target text that were attached
2239	// to something else.
2240	// This type of match should be rejected for not completely consuming a
2241	// combining sequence.
2242	if (!isBreakBoundary(strsrch, mStart, *status)) {
2243	found = FALSE0;
2244	}
2245	if (U_FAILURE(*status)) {
2246	break;
2247	}
2248
2249	// Look at the high index of the first CE in the match. If it's the same as the
2250	// low index, the first CE in the match is in the middle of an expansion.
2251	if (mStart == firstCEI->highIndex) {
2252	found = FALSE0;
2253	}
2254
2255
2256	minLimit = lastCEI->lowIndex;
2257
2258	if (targetIx > 0) {
2259	// Look at the CE following the match. If it is UCOL_NULLORDER the match
2260	// extended to the end of input, and the match is good.
2261
2262	// Look at the high and low indices of the CE following the match. If
2263	// they are the same it means one of two things:
2264	// 1. The match extended to the last CE from the target text, which is OK, or
2265	// 2. The last CE that was part of the match is in an expansion that extends
2266	// to the first CE after the match. In this case, we reject the match.
2267	const CEI *nextCEI = ceb.getPrevious(targetIx - 1);
2268
2269	if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))) {
2270	found = FALSE0;
2271	}
2272
2273	mLimit = maxLimit = nextCEI->lowIndex;
2274
2275	// Allow matches to end in the middle of a grapheme cluster if the following
2276	// conditions are met; this is needed to make prefix search work properly in
2277	// Indic, see #11750
2278	// * the default breakIter is being used
2279	// * the next collation element after this combining sequence
2280	// - has non-zero primary weight
2281	// - corresponds to a separate character following the one at end of the current match
2282	// (the second of these conditions, and perhaps both, may be redundant given the
2283	// subsequent check for normalization boundary; however they are likely much faster
2284	// tests in any case)
2285	// * the match limit is a normalization boundary
2286	UBool allowMidclusterMatch = FALSE0;
2287	if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) {
2288	allowMidclusterMatch =
2289	strsrch->search->breakIter == nullptr &&
2290	nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 &&
2291	maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit &&
2292	(strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) \|\|
2293	strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
2294	}
2295	// If those conditions are met, then:
2296	// * do NOT advance the candidate match limit (mLimit) to a break boundary; however
2297	// the match limit may be backed off to a previous break boundary. This handles
2298	// cases in which mLimit includes target characters that are ignorable with current
2299	// settings (such as space) and which extend beyond the pattern match.
2300	// * do NOT require that end of the combining sequence not extend beyond the match in CE space
2301	// * do NOT require that match limit be on a breakIter boundary
2302
2303	// Advance the match end position to the first acceptable match boundary.
2304	// This advances the index over any combining characters.
2305	if (minLimit < maxLimit) {
2306	int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
2307	// Note that we can have nba < maxLimit && nba >= minLImit, in which
2308	// case we want to set mLimit to nba regardless of allowMidclusterMatch
2309	// (i.e. we back off mLimit to the previous breakIterator boundary).
2310	if (nba >= lastCEI->highIndex && (!allowMidclusterMatch \|\| nba < maxLimit)) {
2311	mLimit = nba;
2312	}
2313	}
2314
2315	if (!allowMidclusterMatch) {
2316	// If advancing to the end of a combining sequence in character indexing space
2317	// advanced us beyond the end of the match in CE space, reject this match.
2318	if (mLimit > maxLimit) {
2319	found = FALSE0;
2320	}
2321
2322	// Make sure the end of the match is on a break boundary
2323	if (!isBreakBoundary(strsrch, mLimit, *status)) {
2324	found = FALSE0;
2325	}
2326	if (U_FAILURE(*status)) {
2327	break;
2328	}
2329	}
2330
2331	} else {
2332	// No non-ignorable CEs after this point.
2333	// The maximum position is detected by boundary after
2334	// the last non-ignorable CE. Combining sequence
2335	// across the start index will be truncated.
2336	int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status);
2337	mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
	Although the value stored to 'maxLimit' is used in the enclosing expression, the value is never actually read from 'maxLimit'
2338	}
2339
2340	#ifdef USEARCH_DEBUG
2341	if (getenv("USEARCH_DEBUG") != nullptr) {
2342	printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
2343	}
2344	#endif
2345
2346
2347	if (! checkIdentical(strsrch, mStart, mLimit)) {
2348	found = FALSE0;
2349	}
2350
2351	if (found) {
2352	break;
2353	}
2354	}
2355
2356	#ifdef USEARCH_DEBUG
2357	if (getenv("USEARCH_DEBUG") != nullptr) {
2358	printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
2359	int32_t lastToPrint = ceb.limitIx+2;
2360	for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
2361	printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
2362	}
2363	printf("\n%s\n", found? "match found" : "no match");
2364	}
2365	#endif
2366
2367	// All Done. Store back the match bounds to the caller.
2368	//
2369
2370	if (U_FAILURE(*status)) {
2371	found = FALSE0; // No match if a failure occured.
2372	}
2373
2374	if (found==FALSE0) {
2375	mLimit = -1;
2376	mStart = -1;
2377	}
2378
2379	if (matchStart != nullptr) {
2380	*matchStart= mStart;
2381	}
2382
2383	if (matchLimit != nullptr) {
2384	*matchLimit = mLimit;
2385	}
2386
2387	return found;
2388	}
2389
2390	// internal use methods declared in usrchimp.h -----------------------------
2391
2392	UBool usearch_handleNextExactusearch_handleNextExact_71(UStringSearch strsrch, UErrorCode status)
2393	{
2394	if (U_FAILURE(*status)) {
2395	setMatchNotFound(strsrch, *status);
2396	return FALSE0;
2397	}
2398
2399	int32_t textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2400	int32_t start = -1;
2401	int32_t end = -1;
2402
2403	if (usearch_searchusearch_search_71(strsrch, textOffset, &start, &end, status)) {
2404	strsrch->search->matchedIndex = start;
2405	strsrch->search->matchedLength = end - start;
2406	return TRUE1;
2407	} else {
2408	setMatchNotFound(strsrch, *status);
2409	return FALSE0;
2410	}
2411	}
2412
2413	UBool usearch_handleNextCanonicalusearch_handleNextCanonical_71(UStringSearch strsrch, UErrorCode status)
2414	{
2415	if (U_FAILURE(*status)) {
2416	setMatchNotFound(strsrch, *status);
2417	return FALSE0;
2418	}
2419
2420	int32_t textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2421	int32_t start = -1;
2422	int32_t end = -1;
2423
2424	if (usearch_searchusearch_search_71(strsrch, textOffset, &start, &end, status)) {
2425	strsrch->search->matchedIndex = start;
2426	strsrch->search->matchedLength = end - start;
2427	return TRUE1;
2428	} else {
2429	setMatchNotFound(strsrch, *status);
2430	return FALSE0;
2431	}
2432	}
2433
2434	UBool usearch_handlePreviousExactusearch_handlePreviousExact_71(UStringSearch strsrch, UErrorCode status)
2435	{
2436	if (U_FAILURE(*status)) {
2437	setMatchNotFound(strsrch, *status);
2438	return FALSE0;
2439	}
2440
2441	int32_t textOffset;
2442
2443	if (strsrch->search->isOverlap) {
2444	if (strsrch->search->matchedIndex != USEARCH_DONE-1) {
2445	textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
2446	} else {
2447	// move the start position at the end of possible match
2448	initializePatternPCETable(strsrch, status);
2449	if (!initTextProcessedIter(strsrch, status)) {
2450	setMatchNotFound(strsrch, *status);
2451	return FALSE0;
2452	}
2453	for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
2454	int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status);
2455	if (pce == UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))) {
2456	// at the end of the text
2457	break;
2458	}
2459	}
2460	if (U_FAILURE(*status)) {
2461	setMatchNotFound(strsrch, *status);
2462	return FALSE0;
2463	}
2464	textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2465	}
2466	} else {
2467	textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2468	}
2469
2470	int32_t start = -1;
2471	int32_t end = -1;
2472
2473	if (usearch_searchBackwardsusearch_searchBackwards_71(strsrch, textOffset, &start, &end, status)) {
2474	strsrch->search->matchedIndex = start;
2475	strsrch->search->matchedLength = end - start;
2476	return TRUE1;
2477	} else {
2478	setMatchNotFound(strsrch, *status);
2479	return FALSE0;
2480	}
2481	}
2482
2483	UBool usearch_handlePreviousCanonicalusearch_handlePreviousCanonical_71(UStringSearch *strsrch,
2484	UErrorCode *status)
2485	{
2486	if (U_FAILURE(*status)) {
2487	setMatchNotFound(strsrch, *status);
2488	return FALSE0;
2489	}
2490
2491	int32_t textOffset;
2492
2493	if (strsrch->search->isOverlap) {
2494	if (strsrch->search->matchedIndex != USEARCH_DONE-1) {
2495	textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
2496	} else {
2497	// move the start position at the end of possible match
2498	initializePatternPCETable(strsrch, status);
2499	if (!initTextProcessedIter(strsrch, status)) {
2500	setMatchNotFound(strsrch, *status);
2501	return FALSE0;
2502	}
2503	for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
2504	int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status);
2505	if (pce == UCOL_PROCESSED_NULLORDER((int64_t)((int64_t)(9223372036854775807L)))) {
2506	// at the end of the text
2507	break;
2508	}
2509	}
2510	if (U_FAILURE(*status)) {
2511	setMatchNotFound(strsrch, *status);
2512	return FALSE0;
2513	}
2514	textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2515	}
2516	} else {
2517	textOffset = ucol_getOffsetucol_getOffset_71(strsrch->textIter);
2518	}
2519
2520	int32_t start = -1;
2521	int32_t end = -1;
2522
2523	if (usearch_searchBackwardsusearch_searchBackwards_71(strsrch, textOffset, &start, &end, status)) {
2524	strsrch->search->matchedIndex = start;
2525	strsrch->search->matchedLength = end - start;
2526	return TRUE1;
2527	} else {
2528	setMatchNotFound(strsrch, *status);
2529	return FALSE0;
2530	}
2531	}
2532
2533	#endif /* #if !UCONFIG_NO_COLLATION */