../deps/icu-small/source/common/rbbi.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/rbbi.cpp
Warning:	line 1270, column 13 Value stored to 'status' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name rbbi.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/rbbi.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	***************************************************************************
5	* Copyright (C) 1999-2016 International Business Machines Corporation
6	* and others. All rights reserved.
7	***************************************************************************
8	*/
9	//
10	// file: rbbi.cpp Contains the implementation of the rule based break iterator
11	// runtime engine and the API implementation for
12	// class RuleBasedBreakIterator
13	//
14
15	#include "utypeinfo.h" // for 'typeid' to work
16
17	#include "unicode/utypes.h"
18
19	#if !UCONFIG_NO_BREAK_ITERATION0
20
21	#include <cinttypes>
22
23	#include "unicode/rbbi.h"
24	#include "unicode/schriter.h"
25	#include "unicode/uchriter.h"
26	#include "unicode/uclean.h"
27	#include "unicode/udata.h"
28
29	#include "brkeng.h"
30	#include "ucln_cmn.h"
31	#include "cmemory.h"
32	#include "cstring.h"
33	#include "localsvc.h"
34	#include "rbbidata.h"
35	#include "rbbi_cache.h"
36	#include "rbbirb.h"
37	#include "uassert.h"
38	#include "umutex.h"
39	#include "uvectr32.h"
40
41	#ifdef RBBI_DEBUG
42	static UBool gTrace = FALSE0;
43	#endif
44
45	U_NAMESPACE_BEGINnamespace icu_71 {
46
47	// The state number of the starting state
48	constexpr int32_t START_STATE = 1;
49
50	// The state-transition value indicating "stop"
51	constexpr int32_t STOP_STATE = 0;
52
53
54	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)UClassID RuleBasedBreakIterator::getStaticClassID() { static char classID = 0; return (UClassID)&classID; } UClassID RuleBasedBreakIterator ::getDynamicClassID() const { return RuleBasedBreakIterator:: getStaticClassID(); }
55
56
57	//=======================================================================
58	// constructors
59	//=======================================================================
60
61	/**
62	* Constructs a RuleBasedBreakIterator that uses the already-created
63	* tables object that is passed in as a parameter.
64	*/
65	RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
66	: fSCharIter(UnicodeString())
67	{
68	init(status);
69	fData = new RBBIDataWrapper(data, status); // status checked in constructor
70	if (U_FAILURE(status)) {return;}
71	if(fData == nullptr) {
72	status = U_MEMORY_ALLOCATION_ERROR;
73	return;
74	}
75	if (fData->fForwardTable->fLookAheadResultsSize > 0) {
76	fLookAheadMatches = static_cast<int32_t *>(
77	uprv_mallocuprv_malloc_71(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
78	if (fLookAheadMatches == nullptr) {
79	status = U_MEMORY_ALLOCATION_ERROR;
80	return;
81	}
82	}
83	}
84
85	//-------------------------------------------------------------------------------
86	//
87	// Constructor from a UDataMemory handle to precompiled break rules
88	// stored in an ICU data file. This construcotr is private API,
89	// only for internal use.
90	//
91	//-------------------------------------------------------------------------------
92	RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UBool isPhraseBreaking,
93	UErrorCode &status) : RuleBasedBreakIterator(udm, status)
94	{
95	fIsPhraseBreaking = isPhraseBreaking;
96	}
97
98	//
99	// Construct from precompiled binary rules (tables). This constructor is public API,
100	// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
101	//
102	RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
103	uint32_t ruleLength,
104	UErrorCode &status)
105	: fSCharIter(UnicodeString())
106	{
107	init(status);
108	if (U_FAILURE(status)) {
109	return;
110	}
111	if (compiledRules == NULL__null \|\| ruleLength < sizeof(RBBIDataHeader)) {
112	status = U_ILLEGAL_ARGUMENT_ERROR;
113	return;
114	}
115	const RBBIDataHeader data = (const RBBIDataHeader )compiledRules;
116	if (data->fLength > ruleLength) {
117	status = U_ILLEGAL_ARGUMENT_ERROR;
118	return;
119	}
120	fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
121	if (U_FAILURE(status)) {return;}
122	if(fData == nullptr) {
123	status = U_MEMORY_ALLOCATION_ERROR;
124	return;
125	}
126	if (fData->fForwardTable->fLookAheadResultsSize > 0) {
127	fLookAheadMatches = static_cast<int32_t *>(
128	uprv_mallocuprv_malloc_71(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
129	if (fLookAheadMatches == nullptr) {
130	status = U_MEMORY_ALLOCATION_ERROR;
131	return;
132	}
133	}
134	}
135
136
137	//-------------------------------------------------------------------------------
138	//
139	// Constructor from a UDataMemory handle to precompiled break rules
140	// stored in an ICU data file.
141	//
142	//-------------------------------------------------------------------------------
143	RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
144	: fSCharIter(UnicodeString())
145	{
146	init(status);
147	fData = new RBBIDataWrapper(udm, status); // status checked in constructor
148	if (U_FAILURE(status)) {return;}
149	if(fData == nullptr) {
150	status = U_MEMORY_ALLOCATION_ERROR;
151	return;
152	}
153	if (fData->fForwardTable->fLookAheadResultsSize > 0) {
154	fLookAheadMatches = static_cast<int32_t *>(
155	uprv_mallocuprv_malloc_71(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
156	if (fLookAheadMatches == nullptr) {
157	status = U_MEMORY_ALLOCATION_ERROR;
158	return;
159	}
160	}
161	}
162
163
164
165	//-------------------------------------------------------------------------------
166	//
167	// Constructor from a set of rules supplied as a string.
168	//
169	//-------------------------------------------------------------------------------
170	RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules,
171	UParseError &parseError,
172	UErrorCode &status)
173	: fSCharIter(UnicodeString())
174	{
175	init(status);
176	if (U_FAILURE(status)) {return;}
177	RuleBasedBreakIterator bi = (RuleBasedBreakIterator )
178	RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
179	// Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that
180	// creates and returns a complete RBBI. From here, in a constructor, we
181	// can't just return the object created by the builder factory, hence
182	// the assignment of the factory created object to "this".
183	if (U_SUCCESS(status)) {
184	this = bi;
185	delete bi;
186	}
187	}
188
189
190	//-------------------------------------------------------------------------------
191	//
192	// Default Constructor. Create an empty shell that can be set up later.
193	// Used when creating a RuleBasedBreakIterator from a set
194	// of rules.
195	//-------------------------------------------------------------------------------
196	RuleBasedBreakIterator::RuleBasedBreakIterator()
197	: fSCharIter(UnicodeString())
198	{
199	UErrorCode status = U_ZERO_ERROR;
200	init(status);
201	}
202
203
204	//-------------------------------------------------------------------------------
205	//
206	// Copy constructor. Will produce a break iterator with the same behavior,
207	// and which iterates over the same text, as the one passed in.
208	//
209	//-------------------------------------------------------------------------------
210	RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
211	: BreakIterator(other),
212	fSCharIter(UnicodeString())
213	{
214	UErrorCode status = U_ZERO_ERROR;
215	this->init(status);
216	*this = other;
217	}
218
219
220	/**
221	* Destructor
222	*/
223	RuleBasedBreakIterator::~RuleBasedBreakIterator() {
224	if (fCharIter != &fSCharIter) {
225	// fCharIter was adopted from the outside.
226	delete fCharIter;
227	}
228	fCharIter = nullptr;
229
230	utext_closeutext_close_71(&fText);
231
232	if (fData != nullptr) {
233	fData->removeReference();
234	fData = nullptr;
235	}
236	delete fBreakCache;
237	fBreakCache = nullptr;
238
239	delete fDictionaryCache;
240	fDictionaryCache = nullptr;
241
242	delete fLanguageBreakEngines;
243	fLanguageBreakEngines = nullptr;
244
245	delete fUnhandledBreakEngine;
246	fUnhandledBreakEngine = nullptr;
247
248	uprv_freeuprv_free_71(fLookAheadMatches);
249	fLookAheadMatches = nullptr;
250	}
251
252	/**
253	* Assignment operator. Sets this iterator to have the same behavior,
254	* and iterate over the same text, as the one passed in.
255	* TODO: needs better handling of memory allocation errors.
256	*/
257	RuleBasedBreakIterator&
258	RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
259	if (this == &that) {
260	return *this;
261	}
262	BreakIterator::operator=(that);
263
264	if (fLanguageBreakEngines != NULL__null) {
265	delete fLanguageBreakEngines;
266	fLanguageBreakEngines = NULL__null; // Just rebuild for now
267	}
268	// TODO: clone fLanguageBreakEngines from "that"
269	UErrorCode status = U_ZERO_ERROR;
270	utext_cloneutext_clone_71(&fText, &that.fText, FALSE0, TRUE1, &status);
271
272	if (fCharIter != &fSCharIter) {
273	delete fCharIter;
274	}
275	fCharIter = &fSCharIter;
276
277	if (that.fCharIter != NULL__null && that.fCharIter != &that.fSCharIter) {
278	// This is a little bit tricky - it will initially appear that
279	// this->fCharIter is adopted, even if that->fCharIter was
280	// not adopted. That's ok.
281	fCharIter = that.fCharIter->clone();
282	}
283	fSCharIter = that.fSCharIter;
284	if (fCharIter == NULL__null) {
285	fCharIter = &fSCharIter;
286	}
287
288	if (fData != NULL__null) {
289	fData->removeReference();
290	fData = NULL__null;
291	}
292	if (that.fData != NULL__null) {
293	fData = that.fData->addReference();
294	}
295
296	uprv_freeuprv_free_71(fLookAheadMatches);
297	fLookAheadMatches = nullptr;
298	if (fData && fData->fForwardTable->fLookAheadResultsSize > 0) {
299	fLookAheadMatches = static_cast<int32_t *>(
300	uprv_mallocuprv_malloc_71(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t)));
301	}
302
303
304	fPosition = that.fPosition;
305	fRuleStatusIndex = that.fRuleStatusIndex;
306	fDone = that.fDone;
307
308	// TODO: both the dictionary and the main cache need to be copied.
309	// Current position could be within a dictionary range. Trying to continue
310	// the iteration without the caches present would go to the rules, with
311	// the assumption that the current position is on a rule boundary.
312	fBreakCache->reset(fPosition, fRuleStatusIndex);
313	fDictionaryCache->reset();
314
315	return *this;
316	}
317
318
319
320	//-----------------------------------------------------------------------------
321	//
322	// init() Shared initialization routine. Used by all the constructors.
323	// Initializes all fields, leaving the object in a consistent state.
324	//
325	//-----------------------------------------------------------------------------
326	void RuleBasedBreakIterator::init(UErrorCode &status) {
327	fCharIter = nullptr;
328	fData = nullptr;
329	fPosition = 0;
330	fRuleStatusIndex = 0;
331	fDone = false;
332	fDictionaryCharCount = 0;
333	fLanguageBreakEngines = nullptr;
334	fUnhandledBreakEngine = nullptr;
335	fBreakCache = nullptr;
336	fDictionaryCache = nullptr;
337	fLookAheadMatches = nullptr;
338	fIsPhraseBreaking = false;
339
340	// Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
341	// fText = UTEXT_INITIALIZER;
342	static const UText initializedUText = UTEXT_INITIALIZER{ UTEXT_MAGIC, 0, 0, sizeof(UText), 0, 0, 0, 0, 0, 0, __null, __null, __null, __null, __null, __null, __null, __null, 0, 0 , 0, 0, 0, 0 };
343	uprv_memcpy(&fText, &initializedUText, sizeof(UText))do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(&fText , &initializedUText, sizeof(UText)); } while (false);
344
345	if (U_FAILURE(status)) {
346	return;
347	}
348
349	utext_openUCharsutext_openUChars_71(&fText, NULL__null, 0, &status);
350	fDictionaryCache = new DictionaryCache(this, status);
351	fBreakCache = new BreakCache(this, status);
352	if (U_SUCCESS(status) && (fDictionaryCache == NULL__null \|\| fBreakCache == NULL__null)) {
353	status = U_MEMORY_ALLOCATION_ERROR;
354	}
355
356	#ifdef RBBI_DEBUG
357	static UBool debugInitDone = FALSE0;
358	if (debugInitDone == FALSE0) {
359	char *debugEnv = getenv("U_RBBIDEBUG");
360	if (debugEnv && uprv_strstr(debugEnv, "trace"):: strstr(debugEnv, "trace")) {
361	gTrace = TRUE1;
362	}
363	debugInitDone = TRUE1;
364	}
365	#endif
366	}
367
368
369
370	//-----------------------------------------------------------------------------
371	//
372	// clone - Returns a newly-constructed RuleBasedBreakIterator with the same
373	// behavior, and iterating over the same text, as this one.
374	// Virtual function: does the right thing with subclasses.
375	//
376	//-----------------------------------------------------------------------------
377	RuleBasedBreakIterator*
378	RuleBasedBreakIterator::clone() const {
379	return new RuleBasedBreakIterator(*this);
380	}
381
382	/**
383	* Equality operator. Returns true if both BreakIterators are of the
384	* same class, have the same behavior, and iterate over the same text.
385	*/
386	bool
387	RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
388	if (typeid(*this) != typeid(that)) {
389	return false;
390	}
391	if (this == &that) {
392	return true;
393	}
394
395	// The base class BreakIterator carries no state that participates in equality,
396	// and does not implement an equality function that would otherwise be
397	// checked at this point.
398
399	const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
400
401	if (!utext_equalsutext_equals_71(&fText, &that2.fText)) {
402	// The two break iterators are operating on different text,
403	// or have a different iteration position.
404	// Note that fText's position is always the same as the break iterator's position.
405	return false;
406	}
407
408	if (!(fPosition == that2.fPosition &&
409	fRuleStatusIndex == that2.fRuleStatusIndex &&
410	fDone == that2.fDone)) {
411	return false;
412	}
413
414	if (that2.fData == fData \|\|
415	(fData != NULL__null && that2.fData != NULL__null && that2.fData == fData)) {
416	// The two break iterators are using the same rules.
417	return true;
418	}
419	return false;
420	}
421
422	/**
423	* Compute a hash code for this BreakIterator
424	* @return A hash code
425	*/
426	int32_t
427	RuleBasedBreakIterator::hashCode(void) const {
428	int32_t hash = 0;
429	if (fData != NULL__null) {
430	hash = fData->hashCode();
431	}
432	return hash;
433	}
434
435
436	void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
437	if (U_FAILURE(status)) {
438	return;
439	}
440	fBreakCache->reset();
441	fDictionaryCache->reset();
442	utext_cloneutext_clone_71(&fText, ut, FALSE0, TRUE1, &status);
443
444	// Set up a dummy CharacterIterator to be returned if anyone
445	// calls getText(). With input from UText, there is no reasonable
446	// way to return a characterIterator over the actual input text.
447	// Return one over an empty string instead - this is the closest
448	// we can come to signaling a failure.
449	// (GetText() is obsolete, this failure is sort of OK)
450	fSCharIter.setText(UnicodeString());
451
452	if (fCharIter != &fSCharIter) {
453	// existing fCharIter was adopted from the outside. Delete it now.
454	delete fCharIter;
455	}
456	fCharIter = &fSCharIter;
457
458	this->first();
459	}
460
461
462	UText RuleBasedBreakIterator::getUText(UText fillIn, UErrorCode &status) const {
463	UText *result = utext_cloneutext_clone_71(fillIn, &fText, FALSE0, TRUE1, &status);
464	return result;
465	}
466
467
468	//=======================================================================
469	// BreakIterator overrides
470	//=======================================================================
471
472	/**
473	* Return a CharacterIterator over the text being analyzed.
474	*/
475	CharacterIterator&
476	RuleBasedBreakIterator::getText() const {
477	return *fCharIter;
478	}
479
480	/**
481	* Set the iterator to analyze a new piece of text. This function resets
482	* the current iteration position to the beginning of the text.
483	* @param newText An iterator over the text to analyze.
484	*/
485	void
486	RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
487	// If we are holding a CharacterIterator adopted from a
488	// previous call to this function, delete it now.
489	if (fCharIter != &fSCharIter) {
490	delete fCharIter;
491	}
492
493	fCharIter = newText;
494	UErrorCode status = U_ZERO_ERROR;
495	fBreakCache->reset();
496	fDictionaryCache->reset();
497	if (newText==NULL__null \|\| newText->startIndex() != 0) {
498	// startIndex !=0 wants to be an error, but there's no way to report it.
499	// Make the iterator text be an empty string.
500	utext_openUCharsutext_openUChars_71(&fText, NULL__null, 0, &status);
501	} else {
502	utext_openCharacterIteratorutext_openCharacterIterator_71(&fText, newText, &status);
503	}
504	this->first();
505	}
506
507	/**
508	* Set the iterator to analyze a new piece of text. This function resets
509	* the current iteration position to the beginning of the text.
510	* @param newText An iterator over the text to analyze.
511	*/
512	void
513	RuleBasedBreakIterator::setText(const UnicodeString& newText) {
514	UErrorCode status = U_ZERO_ERROR;
515	fBreakCache->reset();
516	fDictionaryCache->reset();
517	utext_openConstUnicodeStringutext_openConstUnicodeString_71(&fText, &newText, &status);
518
519	// Set up a character iterator on the string.
520	// Needed in case someone calls getText().
521	// Can not, unfortunately, do this lazily on the (probably never)
522	// call to getText(), because getText is const.
523	fSCharIter.setText(newText);
524
525	if (fCharIter != &fSCharIter) {
526	// old fCharIter was adopted from the outside. Delete it.
527	delete fCharIter;
528	}
529	fCharIter = &fSCharIter;
530
531	this->first();
532	}
533
534
535	/**
536	* Provide a new UText for the input text. Must reference text with contents identical
537	* to the original.
538	* Intended for use with text data originating in Java (garbage collected) environments
539	* where the data may be moved in memory at arbitrary times.
540	*/
541	RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) {
542	if (U_FAILURE(status)) {
543	return *this;
544	}
545	if (input == NULL__null) {
546	status = U_ILLEGAL_ARGUMENT_ERROR;
547	return *this;
548	}
549	int64_t pos = utext_getNativeIndexutext_getNativeIndex_71(&fText);
550	// Shallow read-only clone of the new UText into the existing input UText
551	utext_cloneutext_clone_71(&fText, input, FALSE0, TRUE1, &status);
552	if (U_FAILURE(status)) {
553	return *this;
554	}
555	utext_setNativeIndexutext_setNativeIndex_71(&fText, pos);
556	if (utext_getNativeIndexutext_getNativeIndex_71(&fText) != pos) {
557	// Sanity check. The new input utext is supposed to have the exact same
558	// contents as the old. If we can't set to the same position, it doesn't.
559	// The contents underlying the old utext might be invalid at this point,
560	// so it's not safe to check directly.
561	status = U_ILLEGAL_ARGUMENT_ERROR;
562	}
563	return *this;
564	}
565
566
567	/**
568	* Sets the current iteration position to the beginning of the text, position zero.
569	* @return The new iterator position, which is zero.
570	*/
571	int32_t RuleBasedBreakIterator::first(void) {
572	UErrorCode status = U_ZERO_ERROR;
573	if (!fBreakCache->seek(0)) {
574	fBreakCache->populateNear(0, status);
575	}
576	fBreakCache->current();
577	U_ASSERT(fPosition == 0)(void)0;
578	return 0;
579	}
580
581	/**
582	* Sets the current iteration position to the end of the text.
583	* @return The text's past-the-end offset.
584	*/
585	int32_t RuleBasedBreakIterator::last(void) {
586	int32_t endPos = (int32_t)utext_nativeLengthutext_nativeLength_71(&fText);
587	UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position.
588	(void)endShouldBeBoundary;
589	U_ASSERT(endShouldBeBoundary)(void)0;
590	U_ASSERT(fPosition == endPos)(void)0;
591	return endPos;
592	}
593
594	/**
595	* Advances the iterator either forward or backward the specified number of steps.
596	* Negative values move backward, and positive values move forward. This is
597	* equivalent to repeatedly calling next() or previous().
598	* @param n The number of steps to move. The sign indicates the direction
599	* (negative is backwards, and positive is forwards).
600	* @return The character offset of the boundary position n boundaries away from
601	* the current one.
602	*/
603	int32_t RuleBasedBreakIterator::next(int32_t n) {
604	int32_t result = 0;
605	if (n > 0) {
606	for (; n > 0 && result != UBRK_DONE((int32_t) -1); --n) {
607	result = next();
608	}
609	} else if (n < 0) {
610	for (; n < 0 && result != UBRK_DONE((int32_t) -1); ++n) {
611	result = previous();
612	}
613	} else {
614	result = current();
615	}
616	return result;
617	}
618
619	/**
620	* Advances the iterator to the next boundary position.
621	* @return The position of the first boundary after this one.
622	*/
623	int32_t RuleBasedBreakIterator::next(void) {
624	fBreakCache->next();
625	return fDone ? UBRK_DONE((int32_t) -1) : fPosition;
626	}
627
628	/**
629	* Move the iterator backwards, to the boundary preceding the current one.
630	*
631	* Starts from the current position within fText.
632	* Starting position need not be on a boundary.
633	*
634	* @return The position of the boundary position immediately preceding the starting position.
635	*/
636	int32_t RuleBasedBreakIterator::previous(void) {
637	UErrorCode status = U_ZERO_ERROR;
638	fBreakCache->previous(status);
639	return fDone ? UBRK_DONE((int32_t) -1) : fPosition;
640	}
641
642	/**
643	* Sets the iterator to refer to the first boundary position following
644	* the specified position.
645	* @param startPos The position from which to begin searching for a break position.
646	* @return The position of the first break after the current position.
647	*/
648	int32_t RuleBasedBreakIterator::following(int32_t startPos) {
649	// if the supplied position is before the beginning, return the
650	// text's starting offset
651	if (startPos < 0) {
652	return first();
653	}
654
655	// Move requested offset to a code point start. It might be on a trail surrogate,
656	// or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text.
657	utext_setNativeIndexutext_setNativeIndex_71(&fText, startPos);
658	startPos = (int32_t)utext_getNativeIndexutext_getNativeIndex_71(&fText);
659
660	UErrorCode status = U_ZERO_ERROR;
661	fBreakCache->following(startPos, status);
662	return fDone ? UBRK_DONE((int32_t) -1) : fPosition;
663	}
664
665	/**
666	* Sets the iterator to refer to the last boundary position before the
667	* specified position.
668	* @param offset The position to begin searching for a break from.
669	* @return The position of the last boundary before the starting position.
670	*/
671	int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
672	if (offset > utext_nativeLengthutext_nativeLength_71(&fText)) {
673	return last();
674	}
675
676	// Move requested offset to a code point start. It might be on a trail surrogate,
677	// or on a trail byte if the input is UTF-8.
678
679	utext_setNativeIndexutext_setNativeIndex_71(&fText, offset);
680	int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndexutext_getNativeIndex_71(&fText));
681
682	UErrorCode status = U_ZERO_ERROR;
683	fBreakCache->preceding(adjustedOffset, status);
684	return fDone ? UBRK_DONE((int32_t) -1) : fPosition;
685	}
686
687	/**
688	* Returns true if the specified position is a boundary position. As a side
689	* effect, leaves the iterator pointing to the first boundary position at
690	* or after "offset".
691	*
692	* @param offset the offset to check.
693	* @return True if "offset" is a boundary position.
694	*/
695	UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
696	// out-of-range indexes are never boundary positions
697	if (offset < 0) {
698	first(); // For side effects on current position, tag values.
699	return FALSE0;
700	}
701
702	// Adjust offset to be on a code point boundary and not beyond the end of the text.
703	// Note that isBoundary() is always false for offsets that are not on code point boundaries.
704	// But we still need the side effect of leaving iteration at the following boundary.
705
706	utext_setNativeIndexutext_setNativeIndex_71(&fText, offset);
707	int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndexutext_getNativeIndex_71(&fText));
708
709	bool result = false;
710	UErrorCode status = U_ZERO_ERROR;
711	if (fBreakCache->seek(adjustedOffset) \|\| fBreakCache->populateNear(adjustedOffset, status)) {
712	result = (fBreakCache->current() == offset);
713	}
714
715	if (result && adjustedOffset < offset && utext_char32Atutext_char32At_71(&fText, offset) == U_SENTINEL(-1)) {
716	// Original offset is beyond the end of the text. Return FALSE, it's not a boundary,
717	// but the iteration position remains set to the end of the text, which is a boundary.
718	return FALSE0;
719	}
720	if (!result) {
721	// Not on a boundary. isBoundary() must leave iterator on the following boundary.
722	// Cache->seek(), above, left us on the preceding boundary, so advance one.
723	next();
724	}
725	return result;
726	}
727
728
729	/**
730	* Returns the current iteration position.
731	* @return The current iteration position.
732	*/
733	int32_t RuleBasedBreakIterator::current(void) const {
734	return fPosition;
735	}
736
737
738	//=======================================================================
739	// implementation
740	//=======================================================================
741
742	//
743	// RBBIRunMode - the state machine runs an extra iteration at the beginning and end
744	// of user text. A variable with this enum type keeps track of where we
745	// are. The state machine only fetches user input while in the RUN mode.
746	//
747	enum RBBIRunMode {
748	RBBI_START, // state machine processing is before first char of input
749	RBBI_RUN, // state machine processing is in the user text
750	RBBI_END // state machine processing is after end of user text.
751	};
752
753
754	// Wrapper functions to select the appropriate handleNext() or handleSafePrevious()
755	// instantiation, based on whether an 8 or 16 bit table is required.
756	//
757	// These Trie access functions will be inlined within the handleNext()/Previous() instantions.
758	static inline uint16_t TrieFunc8(const UCPTrie *trie, UChar32 c) {
759	return UCPTRIE_FAST_GET(trie, UCPTRIE_8, c)((trie)->data.ptr8[((uint32_t)(c) <= (uint32_t)(0xffff) ? ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT ] + ((c) & UCPTRIE_FAST_DATA_MASK)) : (uint32_t)(c) <= 0x10ffff ? ((c) >= (trie)->highStart ? (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (trie, c)) : (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET )]);
760	}
761
762	static inline uint16_t TrieFunc16(const UCPTrie *trie, UChar32 c) {
763	return UCPTRIE_FAST_GET(trie, UCPTRIE_16, c)((trie)->data.ptr16[((uint32_t)(c) <= (uint32_t)(0xffff ) ? ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT ] + ((c) & UCPTRIE_FAST_DATA_MASK)) : (uint32_t)(c) <= 0x10ffff ? ((c) >= (trie)->highStart ? (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : ucptrie_internalSmallIndex_71 (trie, c)) : (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET )]);
764	}
765
766	int32_t RuleBasedBreakIterator::handleNext() {
767	const RBBIStateTable *statetable = fData->fForwardTable;
768	bool use8BitsTrie = ucptrie_getValueWidthucptrie_getValueWidth_71(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
769	if (statetable->fFlags & RBBI_8BITS_ROWS) {
770	if (use8BitsTrie) {
771	return handleNext<RBBIStateTableRow8, TrieFunc8>();
772	} else {
773	return handleNext<RBBIStateTableRow8, TrieFunc16>();
774	}
775	} else {
776	if (use8BitsTrie) {
777	return handleNext<RBBIStateTableRow16, TrieFunc8>();
778	} else {
779	return handleNext<RBBIStateTableRow16, TrieFunc16>();
780	}
781	}
782	}
783
784	int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
785	const RBBIStateTable *statetable = fData->fReverseTable;
786	bool use8BitsTrie = ucptrie_getValueWidthucptrie_getValueWidth_71(fData->fTrie) == UCPTRIE_VALUE_BITS_8;
787	if (statetable->fFlags & RBBI_8BITS_ROWS) {
788	if (use8BitsTrie) {
789	return handleSafePrevious<RBBIStateTableRow8, TrieFunc8>(fromPosition);
790	} else {
791	return handleSafePrevious<RBBIStateTableRow8, TrieFunc16>(fromPosition);
792	}
793	} else {
794	if (use8BitsTrie) {
795	return handleSafePrevious<RBBIStateTableRow16, TrieFunc8>(fromPosition);
796	} else {
797	return handleSafePrevious<RBBIStateTableRow16, TrieFunc16>(fromPosition);
798	}
799	}
800	}
801
802
803	//-----------------------------------------------------------------------------------
804	//
805	// handleNext()
806	// Run the state machine to find a boundary
807	//
808	//-----------------------------------------------------------------------------------
809	template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc>
810	int32_t RuleBasedBreakIterator::handleNext() {
811	int32_t state;
812	uint16_t category = 0;
813	RBBIRunMode mode;
814
815	RowType *row;
816	UChar32 c;
817	int32_t result = 0;
818	int32_t initialPosition = 0;
819	const RBBIStateTable *statetable = fData->fForwardTable;
820	const char *tableData = statetable->fTableData;
821	uint32_t tableRowLen = statetable->fRowLen;
822	uint32_t dictStart = statetable->fDictCategoriesStart;
823	#ifdef RBBI_DEBUG
824	if (gTrace) {
825	RBBIDebugPuts("Handle Next pos char state category");
826	}
827	#endif
828
829	// handleNext always sets the break tag value.
830	// Set the default for it.
831	fRuleStatusIndex = 0;
832
833	fDictionaryCharCount = 0;
834
835	// if we're already at the end of the text, return DONE.
836	initialPosition = fPosition;
837	UTEXT_SETNATIVEINDEX(&fText, initialPosition)do { int64_t __offset = (initialPosition) - (&fText)-> chunkNativeStart; if (__offset>=0 && __offset<( int64_t)(&fText)->nativeIndexingLimit && (& fText)->chunkContents[__offset]<0xdc00) { (&fText)-> chunkOffset=(int32_t)__offset; } else { utext_setNativeIndex_71 ((&fText), (initialPosition)); } } while (false);
838	result = initialPosition;
839	c = UTEXT_NEXT32(&fText)((&fText)->chunkOffset < (&fText)->chunkLength && ((&fText)->chunkContents)[(&fText)-> chunkOffset]<0xd800 ? ((&fText)->chunkContents)[((& fText)->chunkOffset)++] : utext_next32_71(&fText));
840	if (c==U_SENTINEL(-1)) {
841	fDone = TRUE1;
842	return UBRK_DONE((int32_t) -1);
843	}
844
845	// Set the initial state for the state machine
846	state = START_STATE;
847	row = (RowType *)
848	//(statetable->fTableData + (statetable->fRowLen * state));
849	(tableData + tableRowLen * state);
850
851
852	mode = RBBI_RUN;
853	if (statetable->fFlags & RBBI_BOF_REQUIRED) {
854	category = 2;
855	mode = RBBI_START;
856	}
857
858
859	// loop until we reach the end of the text or transition to state 0
860	//
861	for (;;) {
862	if (c == U_SENTINEL(-1)) {
863	// Reached end of input string.
864	if (mode == RBBI_END) {
865	// We have already run the loop one last time with the
866	// character set to the psueudo {eof} value. Now it is time
867	// to unconditionally bail out.
868	break;
869	}
870	// Run the loop one last time with the fake end-of-input character category.
871	mode = RBBI_END;
872	category = 1;
873	}
874
875	//
876	// Get the char category. An incoming category of 1 or 2 means that
877	// we are preset for doing the beginning or end of input, and
878	// that we shouldn't get a category from an actual text input character.
879	//
880	if (mode == RBBI_RUN) {
881	// look up the current character's character category, which tells us
882	// which column in the state table to look at.
883	category = trieFunc(fData->fTrie, c);
884	fDictionaryCharCount += (category >= dictStart);
885	}
886
887	#ifdef RBBI_DEBUG
888	if (gTrace) {
889	RBBIDebugPrintf(" %4" PRId64"l" "d" " ", utext_getNativeIndexutext_getNativeIndex_71(&fText));
890	if (0x20<=c && c<0x7f) {
891	RBBIDebugPrintf("\"%c\" ", c);
892	} else {
893	RBBIDebugPrintf("%5x ", c);
894	}
895	RBBIDebugPrintf("%3d %3d\n", state, category);
896	}
897	#endif
898
899	// State Transition - move machine to its next state
900	//
901
902	// fNextState is a variable-length array.
903	U_ASSERT(category<fData->fHeader->fCatCount)(void)0;
904	state = row->fNextState[category]; /Not accessing beyond memory/
905	row = (RowType *)
906	// (statetable->fTableData + (statetable->fRowLen * state));
907	(tableData + tableRowLen * state);
908
909
910	uint16_t accepting = row->fAccepting;
911	if (accepting == ACCEPTING_UNCONDITIONAL) {
912	// Match found, common case.
913	if (mode != RBBI_START) {
914	result = (int32_t)UTEXT_GETNATIVEINDEX(&fText)((&fText)->chunkOffset <= (&fText)->nativeIndexingLimit ? (&fText)->chunkNativeStart+(&fText)->chunkOffset : (&fText)->pFuncs->mapOffsetToNative(&fText));
915	}
916	fRuleStatusIndex = row->fTagsIdx; // Remember the break status (tag) values.
917	} else if (accepting > ACCEPTING_UNCONDITIONAL) {
918	// Lookahead match is completed.
919	U_ASSERT(accepting < fData->fForwardTable->fLookAheadResultsSize)(void)0;
920	int32_t lookaheadResult = fLookAheadMatches[accepting];
921	if (lookaheadResult >= 0) {
922	fRuleStatusIndex = row->fTagsIdx;
923	fPosition = lookaheadResult;
924	return lookaheadResult;
925	}
926	}
927
928	// If we are at the position of the '/' in a look-ahead (hard break) rule;
929	// record the current position, to be returned later, if the full rule matches.
930	// TODO: Move this check before the previous check of fAccepting.
931	// This would enable hard-break rules with no following context.
932	// But there are line break test failures when trying this. Investigate.
933	// Issue ICU-20837
934	uint16_t rule = row->fLookAhead;
935	U_ASSERT(rule == 0 \|\| rule > ACCEPTING_UNCONDITIONAL)(void)0;
936	U_ASSERT(rule == 0 \|\| rule < fData->fForwardTable->fLookAheadResultsSize)(void)0;
937	if (rule > ACCEPTING_UNCONDITIONAL) {
938	int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText)((&fText)->chunkOffset <= (&fText)->nativeIndexingLimit ? (&fText)->chunkNativeStart+(&fText)->chunkOffset : (&fText)->pFuncs->mapOffsetToNative(&fText));
939	fLookAheadMatches[rule] = pos;
940	}
941
942	if (state == STOP_STATE) {
943	// This is the normal exit from the lookup state machine.
944	// We have advanced through the string until it is certain that no
945	// longer match is possible, no matter what characters follow.
946	break;
947	}
948
949	// Advance to the next character.
950	// If this is a beginning-of-input loop iteration, don't advance
951	// the input position. The next iteration will be processing the
952	// first real input character.
953	if (mode == RBBI_RUN) {
954	c = UTEXT_NEXT32(&fText)((&fText)->chunkOffset < (&fText)->chunkLength && ((&fText)->chunkContents)[(&fText)-> chunkOffset]<0xd800 ? ((&fText)->chunkContents)[((& fText)->chunkOffset)++] : utext_next32_71(&fText));
955	} else {
956	if (mode == RBBI_START) {
957	mode = RBBI_RUN;
958	}
959	}
960	}
961
962	// The state machine is done. Check whether it found a match...
963
964	// If the iterator failed to advance in the match engine, force it ahead by one.
965	// (This really indicates a defect in the break rules. They should always match
966	// at least one character.)
967	if (result == initialPosition) {
968	utext_setNativeIndexutext_setNativeIndex_71(&fText, initialPosition);
969	utext_next32utext_next32_71(&fText);
970	result = (int32_t)utext_getNativeIndexutext_getNativeIndex_71(&fText);
971	fRuleStatusIndex = 0;
972	}
973
974	// Leave the iterator at our result position.
975	fPosition = result;
976	#ifdef RBBI_DEBUG
977	if (gTrace) {
978	RBBIDebugPrintf("result = %d\n\n", result);
979	}
980	#endif
981	return result;
982	}
983
984
985	//-----------------------------------------------------------------------------------
986	//
987	// handleSafePrevious()
988	//
989	// Iterate backwards using the safe reverse rules.
990	// The logic of this function is similar to handleNext(), but simpler
991	// because the safe table does not require as many options.
992	//
993	//-----------------------------------------------------------------------------------
994	template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc>
995	int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
996
997	int32_t state;
998	uint16_t category = 0;
999	RowType *row;
1000	UChar32 c;
1001	int32_t result = 0;
1002
1003	const RBBIStateTable *stateTable = fData->fReverseTable;
1004	UTEXT_SETNATIVEINDEX(&fText, fromPosition)do { int64_t __offset = (fromPosition) - (&fText)->chunkNativeStart ; if (__offset>=0 && __offset<(int64_t)(&fText )->nativeIndexingLimit && (&fText)->chunkContents [__offset]<0xdc00) { (&fText)->chunkOffset=(int32_t )__offset; } else { utext_setNativeIndex_71((&fText), (fromPosition )); } } while (false);
1005	#ifdef RBBI_DEBUG
1006	if (gTrace) {
1007	RBBIDebugPuts("Handle Previous pos char state category");
1008	}
1009	#endif
1010
1011	// if we're already at the start of the text, return DONE.
1012	if (fData == NULL__null \|\| UTEXT_GETNATIVEINDEX(&fText)((&fText)->chunkOffset <= (&fText)->nativeIndexingLimit ? (&fText)->chunkNativeStart+(&fText)->chunkOffset : (&fText)->pFuncs->mapOffsetToNative(&fText))==0) {
1013	return BreakIterator::DONE;
1014	}
1015
1016	// Set the initial state for the state machine
1017	c = UTEXT_PREVIOUS32(&fText)((&fText)->chunkOffset > 0 && (&fText)-> chunkContents[(&fText)->chunkOffset-1] < 0xd800 ? ( &fText)->chunkContents[--((&fText)->chunkOffset )] : utext_previous32_71(&fText));
1018	state = START_STATE;
1019	row = (RowType *)
1020	(stateTable->fTableData + (stateTable->fRowLen * state));
1021
1022	// loop until we reach the start of the text or transition to state 0
1023	//
1024	for (; c != U_SENTINEL(-1); c = UTEXT_PREVIOUS32(&fText)((&fText)->chunkOffset > 0 && (&fText)-> chunkContents[(&fText)->chunkOffset-1] < 0xd800 ? ( &fText)->chunkContents[--((&fText)->chunkOffset )] : utext_previous32_71(&fText))) {
1025
1026	// look up the current character's character category, which tells us
1027	// which column in the state table to look at.
1028	//
1029	// Off the dictionary flag bit. For reverse iteration it is not used.
1030	category = trieFunc(fData->fTrie, c);
1031
1032	#ifdef RBBI_DEBUG
1033	if (gTrace) {
1034	RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndexutext_getNativeIndex_71(&fText));
1035	if (0x20<=c && c<0x7f) {
1036	RBBIDebugPrintf("\"%c\" ", c);
1037	} else {
1038	RBBIDebugPrintf("%5x ", c);
1039	}
1040	RBBIDebugPrintf("%3d %3d\n", state, category);
1041	}
1042	#endif
1043
1044	// State Transition - move machine to its next state
1045	//
1046	// fNextState is a variable-length array.
1047	U_ASSERT(category<fData->fHeader->fCatCount)(void)0;
1048	state = row->fNextState[category]; /Not accessing beyond memory/
1049	row = (RowType *)
1050	(stateTable->fTableData + (stateTable->fRowLen * state));
1051
1052	if (state == STOP_STATE) {
1053	// This is the normal exit from the lookup state machine.
1054	// Transition to state zero means we have found a safe point.
1055	break;
1056	}
1057	}
1058
1059	// The state machine is done. Check whether it found a match...
1060	result = (int32_t)UTEXT_GETNATIVEINDEX(&fText)((&fText)->chunkOffset <= (&fText)->nativeIndexingLimit ? (&fText)->chunkNativeStart+(&fText)->chunkOffset : (&fText)->pFuncs->mapOffsetToNative(&fText));
1061	#ifdef RBBI_DEBUG
1062	if (gTrace) {
1063	RBBIDebugPrintf("result = %d\n\n", result);
1064	}
1065	#endif
1066	return result;
1067	}
1068
1069
1070	//-------------------------------------------------------------------------------
1071	//
1072	// getRuleStatus() Return the break rule tag associated with the current
1073	// iterator position. If the iterator arrived at its current
1074	// position by iterating forwards, the value will have been
1075	// cached by the handleNext() function.
1076	//
1077	//-------------------------------------------------------------------------------
1078
1079	int32_t RuleBasedBreakIterator::getRuleStatus() const {
1080
1081	// fLastRuleStatusIndex indexes to the start of the appropriate status record
1082	// (the number of status values.)
1083	// This function returns the last (largest) of the array of status values.
1084	int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex];
1085	int32_t tagVal = fData->fRuleStatusTable[idx];
1086
1087	return tagVal;
1088	}
1089
1090
1091	int32_t RuleBasedBreakIterator::getRuleStatusVec(
1092	int32_t *fillInVec, int32_t capacity, UErrorCode &status) {
1093	if (U_FAILURE(status)) {
1094	return 0;
1095	}
1096
1097	int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex];
1098	int32_t numValsToCopy = numVals;
1099	if (numVals > capacity) {
1100	status = U_BUFFER_OVERFLOW_ERROR;
1101	numValsToCopy = capacity;
1102	}
1103	int i;
1104	for (i=0; i<numValsToCopy; i++) {
1105	fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1];
1106	}
1107	return numVals;
1108	}
1109
1110
1111
1112	//-------------------------------------------------------------------------------
1113	//
1114	// getBinaryRules Access to the compiled form of the rules,
1115	// for use by build system tools that save the data
1116	// for standard iterator types.
1117	//
1118	//-------------------------------------------------------------------------------
1119	const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
1120	const uint8_t *retPtr = NULL__null;
1121	length = 0;
1122
1123	if (fData != NULL__null) {
1124	retPtr = (const uint8_t *)fData->fHeader;
1125	length = fData->fHeader->fLength;
1126	}
1127	return retPtr;
1128	}
1129
1130
1131	RuleBasedBreakIterator *RuleBasedBreakIterator::createBufferClone(
1132	void * /stackBuffer/, int32_t &bufferSize, UErrorCode &status) {
1133	if (U_FAILURE(status)){
1134	return NULL__null;
1135	}
1136
1137	if (bufferSize == 0) {
1138	bufferSize = 1; // preflighting for deprecated functionality
1139	return NULL__null;
1140	}
1141
1142	BreakIterator *clonedBI = clone();
1143	if (clonedBI == NULL__null) {
1144	status = U_MEMORY_ALLOCATION_ERROR;
1145	} else {
1146	status = U_SAFECLONE_ALLOCATED_WARNING;
1147	}
1148	return (RuleBasedBreakIterator *)clonedBI;
1149	}
1150
1151	U_NAMESPACE_END}
1152
1153
1154	static icu::UStack *gLanguageBreakFactories = nullptr;
1155	static const icu::UnicodeString *gEmptyString = nullptr;
1156	static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
1157	static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER{{ 0 }, U_ZERO_ERROR};
1158
1159	/**
1160	* Release all static memory held by breakiterator.
1161	*/
1162	U_CDECL_BEGINextern "C" {
1163	UBool U_CALLCONV rbbi_cleanuprbbi_cleanup_71(void) {
1164	delete gLanguageBreakFactories;
1165	gLanguageBreakFactories = nullptr;
1166	delete gEmptyString;
1167	gEmptyString = nullptr;
1168	gLanguageBreakFactoriesInitOnce.reset();
1169	gRBBIInitOnce.reset();
1170	return TRUE1;
1171	}
1172	U_CDECL_END}
1173
1174	U_CDECL_BEGINextern "C" {
1175	static void U_CALLCONV _deleteFactory(void *obj) {
1176	delete (icu::LanguageBreakFactory *) obj;
1177	}
1178	U_CDECL_END}
1179	U_NAMESPACE_BEGINnamespace icu_71 {
1180
1181	static void U_CALLCONV rbbiInit() {
1182	gEmptyString = new UnicodeString();
1183	ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_RBBI, rbbi_cleanuprbbi_cleanup_71);
1184	}
1185
1186	static void U_CALLCONV initLanguageFactories() {
1187	UErrorCode status = U_ZERO_ERROR;
1188	U_ASSERT(gLanguageBreakFactories == NULL)(void)0;
1189	gLanguageBreakFactories = new UStack(_deleteFactory, NULL__null, status);
1190	if (gLanguageBreakFactories != NULL__null && U_SUCCESS(status)) {
1191	ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
1192	gLanguageBreakFactories->push(builtIn, status);
1193	#ifdef U_LOCAL_SERVICE_HOOK
1194	LanguageBreakFactory extra = (LanguageBreakFactory )uprv_svc_hook("languageBreakFactory", &status);
1195	if (extra != NULL__null) {
1196	gLanguageBreakFactories->push(extra, status);
1197	}
1198	#endif
1199	}
1200	ucln_common_registerCleanupucln_common_registerCleanup_71(UCLN_COMMON_RBBI, rbbi_cleanuprbbi_cleanup_71);
1201	}
1202
1203
1204	static const LanguageBreakEngine*
1205	getLanguageBreakEngineFromFactory(UChar32 c)
1206	{
1207	umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
1208	if (gLanguageBreakFactories == NULL__null) {
1209	return NULL__null;
1210	}
1211
1212	int32_t i = gLanguageBreakFactories->size();
1213	const LanguageBreakEngine *lbe = NULL__null;
1214	while (--i >= 0) {
1215	LanguageBreakFactory factory = (LanguageBreakFactory )(gLanguageBreakFactories->elementAt(i));
1216	lbe = factory->getEngineFor(c);
1217	if (lbe != NULL__null) {
1218	break;
1219	}
1220	}
1221	return lbe;
1222	}
1223
1224
1225	//-------------------------------------------------------------------------------
1226	//
1227	// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the
1228	// the character c.
1229	//
1230	//-------------------------------------------------------------------------------
1231	const LanguageBreakEngine *
1232	RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
1233	const LanguageBreakEngine *lbe = NULL__null;
1234	UErrorCode status = U_ZERO_ERROR;
1235
1236	if (fLanguageBreakEngines == NULL__null) {
1237	fLanguageBreakEngines = new UStack(status);
1238	if (fLanguageBreakEngines == NULL__null \|\| U_FAILURE(status)) {
1239	delete fLanguageBreakEngines;
1240	fLanguageBreakEngines = 0;
1241	return NULL__null;
1242	}
1243	}
1244
1245	int32_t i = fLanguageBreakEngines->size();
1246	while (--i >= 0) {
1247	lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
1248	if (lbe->handles(c)) {
1249	return lbe;
1250	}
1251	}
1252
1253	// No existing dictionary took the character. See if a factory wants to
1254	// give us a new LanguageBreakEngine for this character.
1255	lbe = getLanguageBreakEngineFromFactory(c);
1256
1257	// If we got one, use it and push it on our stack.
1258	if (lbe != NULL__null) {
1259	fLanguageBreakEngines->push((void *)lbe, status);
1260	// Even if we can't remember it, we can keep looking it up, so
1261	// return it even if the push fails.
1262	return lbe;
1263	}
1264
1265	// No engine is forthcoming for this character. Add it to the
1266	// reject set. Create the reject break engine if needed.
1267	if (fUnhandledBreakEngine == NULL__null) {
1268	fUnhandledBreakEngine = new UnhandledEngine(status);
1269	if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL__null) {
1270	status = U_MEMORY_ALLOCATION_ERROR;
	Value stored to 'status' is never read
1271	return nullptr;
1272	}
1273	// Put it last so that scripts for which we have an engine get tried
1274	// first.
1275	fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
1276	// If we can't insert it, or creation failed, get rid of it
1277	U_ASSERT(!fLanguageBreakEngines->hasDeleter())(void)0;
1278	if (U_FAILURE(status)) {
1279	delete fUnhandledBreakEngine;
1280	fUnhandledBreakEngine = 0;
1281	return NULL__null;
1282	}
1283	}
1284
1285	// Tell the reject engine about the character; at its discretion, it may
1286	// add more than just the one character.
1287	fUnhandledBreakEngine->handleCharacter(c);
1288
1289	return fUnhandledBreakEngine;
1290	}
1291
1292	void RuleBasedBreakIterator::dumpCache() {
1293	fBreakCache->dumpCache();
1294	}
1295
1296	void RuleBasedBreakIterator::dumpTables() {
1297	fData->printData();
1298	}
1299
1300	/**
1301	* Returns the description used to create this iterator
1302	*/
1303
1304	const UnicodeString&
1305	RuleBasedBreakIterator::getRules() const {
1306	if (fData != NULL__null) {
1307	return fData->getRuleSourceString();
1308	} else {
1309	umtx_initOnce(gRBBIInitOnce, &rbbiInit);
1310	return *gEmptyString;
1311	}
1312	}
1313
1314	U_NAMESPACE_END}
1315
1316	#endif /* #if !UCONFIG_NO_BREAK_ITERATION */