../deps/icu-small/source/common/uniset.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/uniset.cpp
Warning:	line 1745, column 15 Assigned value is garbage or undefined
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name uniset.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/uniset.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 1999-2015, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   10/20/99    alan        Creation.
10**********************************************************************
11*/

13#include "unicode/utypes.h"
14#include "unicode/parsepos.h"
15#include "unicode/symtable.h"
16#include "unicode/uniset.h"
17#include "unicode/ustring.h"
18#include "unicode/utf8.h"
19#include "unicode/utf16.h"
20#include "ruleiter.h"
21#include "cmemory.h"
22#include "cstring.h"
23#include "patternprops.h"
24#include "uelement.h"
25#include "util.h"
26#include "uvector.h"
27#include "charstr.h"
28#include "ustrfmt.h"
29#include "uassert.h"
30#include "bmpset.h"
31#include "unisetspan.h"

33// HIGH_VALUE > all valid values. 110000 for codepoints
34#define UNICODESET_HIGH0x0110000 0x0110000

36// LOW <= all valid values. ZERO for codepoints
37#define UNICODESET_LOW0x000000 0x000000

39/** Max list [0, 1, 2, ..., max code point, HIGH] */
40constexpr int32_t MAX_LENGTH = UNICODESET_HIGH0x0110000 + 1;

42U_NAMESPACE_BEGINnamespace icu_71 {

44SymbolTable::~SymbolTable() {}

46UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)UClassID UnicodeSet::getStaticClassID() { static char classID
 = 0; return (UClassID)&classID; } UClassID UnicodeSet::getDynamicClassID
() const { return UnicodeSet::getStaticClassID(); }

48/**
* Modify the given UChar32 variable so that it is in range, by
* pinning values < UNICODESET_LOW to UNICODESET_LOW, and
* pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
* It modifies its argument in-place and also returns it.
*/
54static inline UChar32 pinCodePoint(UChar32& c) {
  if (c < UNICODESET_LOW0x000000) {
      c = UNICODESET_LOW0x000000;
  } else if (c > (UNICODESET_HIGH0x0110000-1)) {
      c = (UNICODESET_HIGH0x0110000-1);
  }
  return c;
61}

63//----------------------------------------------------------------
64// Debugging
65//----------------------------------------------------------------

67// DO NOT DELETE THIS CODE.  This code is used to debug memory leaks.
68// To enable the debugging, define the symbol DEBUG_MEM in the line
69// below.  This will result in text being sent to stdout that looks
70// like this:
71//   DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
72//   DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
73// Each line lists a construction (ct) or destruction (dt) event, the
74// object address, the number of outstanding objects after the event,
75// and the pattern of the object in question.

77// #define DEBUG_MEM

79#ifdef DEBUG_MEM
80#include <stdio.h>
81static int32_t _dbgCount = 0;

83static inline void _dbgct(UnicodeSet* set) {
  UnicodeString str;
  set->toPattern(str, TRUE1);
  char buf[40];
  str.extract(0, 39, buf, "");
  printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
89}

91static inline void _dbgdt(UnicodeSet* set) {
  UnicodeString str;
  set->toPattern(str, TRUE1);
  char buf[40];
  str.extract(0, 39, buf, "");
  printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
97}

99#else

101#define _dbgct(set)
102#define _dbgdt(set)

104#endif

106//----------------------------------------------------------------
107// UnicodeString in UVector support
108//----------------------------------------------------------------

110static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) {
  dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
112}

114static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
  const UnicodeString &a = *(const UnicodeString*)t1.pointer;
  const UnicodeString &b = *(const UnicodeString*)t2.pointer;
  return a.compare(b);
118}

120UBool UnicodeSet::hasStrings() const {
  return strings != nullptr && !strings->isEmpty();
122}

124int32_t UnicodeSet::stringsSize() const {
  return strings == nullptr ? 0 : strings->size();
126}

128UBool UnicodeSet::stringsContains(const UnicodeString &s) const {
  return strings != nullptr && strings->contains((void*) &s);
130}

132//----------------------------------------------------------------
133// Constructors &c
134//----------------------------------------------------------------

136/**
* Constructs an empty set.
*/
139UnicodeSet::UnicodeSet() {
  list[0] = UNICODESET_HIGH0x0110000;
  _dbgct(this);
142}

144/**
* Constructs a set containing the given range. If <code>end >
* start</code> then an empty set is created.
*
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
*/
151UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) {
  list[0] = UNICODESET_HIGH0x0110000;
  add(start, end);
  _dbgct(this);
155}

157/**
* Constructs a set that is identical to the given UnicodeSet.
*/
160UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) {
  *this = o;
  _dbgct(this);
163}

165// Copy-construct as thawed.
166UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) {
  if (ensureCapacity(o.len)) {
      // *this = o except for bmpSet and stringSpan
      len = o.len;
      uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(list, o.list
, (size_t)len*sizeof(UChar32)); } while (false);
      if (o.hasStrings()) {
          UErrorCode status = U_ZERO_ERROR;
          if (!allocateStrings(status) ||
                  (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
              setToBogus();
              return;
          }
      }
      if (o.pat) {
          setPattern(o.pat, o.patLen);
      }
      _dbgct(this);
  }
184}

186/**
* Destructs the set.
*/
189UnicodeSet::~UnicodeSet() {
  _dbgdt(this); // first!
  if (list != stackList) {
      uprv_freeuprv_free_71(list);
  }
  delete bmpSet;
  if (buffer != stackList) {
      uprv_freeuprv_free_71(buffer);
  }
  delete strings;
  delete stringSpan;
  releasePattern();
201}

203/**
* Assigns this object to be a copy of another.
*/
206UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
  return copyFrom(o, FALSE0);
208}

210UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) {
  if (this == &o) {
      return *this;
  }
  if (isFrozen()) {
      return *this;
  }
  if (o.isBogus()) {
      setToBogus();
      return *this;
  }
  if (!ensureCapacity(o.len)) {
      // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens.
      return *this;
  }
  len = o.len;
  uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(list, o.list
, (size_t)len*sizeof(UChar32)); } while (false);
  if (o.bmpSet != nullptr && !asThawed) {
      bmpSet = new BMPSet(*o.bmpSet, list, len);
      if (bmpSet == NULL__null) { // Check for memory allocation error.
          setToBogus();
          return *this;
      }
  }
  if (o.hasStrings()) {
      UErrorCode status = U_ZERO_ERROR;
      if ((strings == nullptr && !allocateStrings(status)) ||
              (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) {
          setToBogus();
          return *this;
      }
  } else if (hasStrings()) {
      strings->removeAllElements();
  }
  if (o.stringSpan != nullptr && !asThawed) {
      stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
      if (stringSpan == NULL__null) { // Check for memory allocation error.
          setToBogus();
          return *this;
      }
  }
  releasePattern();
  if (o.pat) {
      setPattern(o.pat, o.patLen);
  }
  return *this;
256}

258/**
* Returns a copy of this object.  All UnicodeMatcher objects have
* to support cloning in order to allow classes using
* UnicodeMatchers, such as Transliterator, to implement cloning.
*/
263UnicodeSet* UnicodeSet::clone() const {
  return new UnicodeSet(*this);
265}

267UnicodeSet *UnicodeSet::cloneAsThawed() const {
  return new UnicodeSet(*this, TRUE1);
269}

271/**
* Compares the specified object with this set for equality.  Returns
* <tt>true</tt> if the two sets
* have the same size, and every member of the specified set is
* contained in this set (or equivalently, every member of this set is
* contained in the specified set).
*
* @param o set to be compared for equality with this set.
* @return <tt>true</tt> if the specified set is equal to this set.
*/
281bool UnicodeSet::operator==(const UnicodeSet& o) const {
  if (len != o.len) return false;
  for (int32_t i = 0; i < len; ++i) {
      if (list[i] != o.list[i]) return false;
  }
  if (hasStrings() != o.hasStrings()) { return false; }
  if (hasStrings() && *strings != *o.strings) return false;
  return true;
289}

291/**
* Returns the hash code value for this set.
*
* @return the hash code value for this set.
* @see Object#hashCode()
*/
297int32_t UnicodeSet::hashCode(void) const {
  uint32_t result = static_cast<uint32_t>(len);
  for (int32_t i = 0; i < len; ++i) {
      result *= 1000003u;
      result += list[i];
  }
  return static_cast<int32_t>(result);
304}

306//----------------------------------------------------------------
307// Public API
308//----------------------------------------------------------------

310/**
* Returns the number of elements in this set (its cardinality),
* Note than the elements of a set may include both individual
* codepoints and strings.
*
* @return the number of elements in this set (its cardinality).
*/
317int32_t UnicodeSet::size(void) const {
  int32_t n = 0;
  int32_t count = getRangeCount();
  for (int32_t i = 0; i < count; ++i) {
      n += getRangeEnd(i) - getRangeStart(i) + 1;
  }
  return n + stringsSize();
324}

326/**
* Returns <tt>true</tt> if this set contains no elements.
*
* @return <tt>true</tt> if this set contains no elements.
*/
331UBool UnicodeSet::isEmpty(void) const {
  return len == 1 && !hasStrings();
333}

335/**
* Returns true if this set contains the given character.
* @param c character to be checked for containment
* @return true if the test condition is met
*/
340UBool UnicodeSet::contains(UChar32 c) const {
  // Set i to the index of the start item greater than ch
  // We know we will terminate without length test!
  // LATER: for large sets, add binary search
  //int32_t i = -1;
  //for (;;) {
  //    if (c < list[++i]) break;
  //}
  if (bmpSet != NULL__null) {
      return bmpSet->contains(c);
  }
  if (stringSpan != NULL__null) {
      return stringSpan->contains(c);
  }
  if (c >= UNICODESET_HIGH0x0110000) { // Don't need to check LOW bound
      return FALSE0;
  }
  int32_t i = findCodePoint(c);
  return (UBool)(i & 1); // return true if odd
359}

361/**
* Returns the smallest value i such that c < list[i].  Caller
* must ensure that c is a legal value or this method will enter
* an infinite loop.  This method performs a binary search.
* @param c a character in the range MIN_VALUE..MAX_VALUE
* inclusive
* @return the smallest integer i in the range 0..len-1,
* inclusive, such that c < list[i]
*/
370int32_t UnicodeSet::findCodePoint(UChar32 c) const {
  /* Examples:
                                     findCodePoint(c)
     set              list[]         c=0 1 3 4 7 8
     ===              ==============   ===========
     []               [110000]         0 0 0 0 0 0
     [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
     [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
     [:Any:]          [0, 110000]      1 1 1 1 1 1
   */

  // Return the smallest i such that c < list[i].  Assume
  // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
  if (c < list[0])
      return 0;
  // High runner test.  c is often after the last range, so an
  // initial check for this condition pays off.
  int32_t lo = 0;
  int32_t hi = len - 1;
  if (lo >= hi || c >= list[hi-1])
      return hi;
  // invariant: c >= list[lo]
  // invariant: c < list[hi]
  for (;;) {
      int32_t i = (lo + hi) >> 1;
      if (i == lo) {
          break; // Found!
      } else if (c < list[i]) {
          hi = i;
      } else {
          lo = i;
      }
  }
  return hi;
404}

406/**
* Returns true if this set contains every character
* of the given range.
* @param start first character, inclusive, of the range
* @param end last character, inclusive, of the range
* @return true if the test condition is met
*/
413UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
  //int32_t i = -1;
  //for (;;) {
  //    if (start < list[++i]) break;
  //}
  int32_t i = findCodePoint(start);
  return ((i & 1) != 0 && end < list[i]);
420}

422/**
* Returns <tt>true</tt> if this set contains the given
* multicharacter string.
* @param s string to be checked for containment
* @return <tt>true</tt> if this set contains the specified string
*/
428UBool UnicodeSet::contains(const UnicodeString& s) const {
  int32_t cp = getSingleCP(s);
  if (cp < 0) {
      return stringsContains(s);
  } else {
      return contains((UChar32) cp);
  }
435}

437/**
* Returns true if this set contains all the characters and strings
* of the given set.
* @param c set to be checked for containment
* @return true if the test condition is met
*/
443UBool UnicodeSet::containsAll(const UnicodeSet& c) const {
  // The specified set is a subset if all of its pairs are contained in
  // this set.  It's possible to code this more efficiently in terms of
  // direct manipulation of the inversion lists if the need arises.
  int32_t n = c.getRangeCount();
  for (int i=0; i<n; ++i) {
      if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
          return FALSE0;
      }
  }
  return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings));
454}

456/**
* Returns true if this set contains all the characters
* of the given string.
* @param s string containing characters to be checked for containment
* @return true if the test condition is met
*/
462UBool UnicodeSet::containsAll(const UnicodeString& s) const {
  return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
                 s.length());
465}

467/**
* Returns true if this set contains none of the characters
* of the given range.
* @param start first character, inclusive, of the range
* @param end last character, inclusive, of the range
* @return true if the test condition is met
*/
474UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
  //int32_t i = -1;
  //for (;;) {
  //    if (start < list[++i]) break;
  //}
  int32_t i = findCodePoint(start);
  return ((i & 1) == 0 && end < list[i]);
481}

483/**
* Returns true if this set contains none of the characters and strings
* of the given set.
* @param c set to be checked for containment
* @return true if the test condition is met
*/
489UBool UnicodeSet::containsNone(const UnicodeSet& c) const {
  // The specified set is a subset if all of its pairs are contained in
  // this set.  It's possible to code this more efficiently in terms of
  // direct manipulation of the inversion lists if the need arises.
  int32_t n = c.getRangeCount();
  for (int32_t i=0; i<n; ++i) {
      if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
          return FALSE0;
      }
  }
  return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings);
500}

502/**
* Returns true if this set contains none of the characters
* of the given string.
* @param s string containing characters to be checked for containment
* @return true if the test condition is met
*/
508UBool UnicodeSet::containsNone(const UnicodeString& s) const {
  return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
                 s.length());
511}

513/**
* Returns <tt>true</tt> if this set contains any character whose low byte
* is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
* indexing.
*/
518UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
  /* The index value v, in the range [0,255], is contained in this set if
   * it is contained in any pair of this set.  Pairs either have the high
   * bytes equal, or unequal.  If the high bytes are equal, then we have
   * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
   * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
   * Then v is contained if xx <= v || v <= yy.  (This is identical to the
   * time zone month containment logic.)
   */
  int32_t i;
  int32_t rangeCount=getRangeCount();
  for (i=0; i<rangeCount; ++i) {
      UChar32 low = getRangeStart(i);
      UChar32 high = getRangeEnd(i);
      if ((low & ~0xFF) == (high & ~0xFF)) {
          if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
              return TRUE1;
          }
      } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
          return TRUE1;
      }
  }
  if (hasStrings()) {
      for (i=0; i<strings->size(); ++i) {
          const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
          if (s.isEmpty()) {
              continue;  // skip the empty string
          }
          UChar32 c = s.char32At(0);
          if ((c & 0xFF) == v) {
              return TRUE1;
          }
      }
  }
  return FALSE0;
553}

555/**
* Implementation of UnicodeMatcher::matches().  Always matches the
* longest possible multichar string.
*/
559UMatchDegree UnicodeSet::matches(const Replaceable& text,
                               int32_t& offset,
                               int32_t limit,
                               UBool incremental) {
  if (offset == limit) {
      if (contains(U_ETHER((char16_t)0xFFFF))) {
          return incremental ? U_PARTIAL_MATCH : U_MATCH;
      } else {
          return U_MISMATCH;
      }
  } else {
      if (hasStrings()) { // try strings first

          // might separate forward and backward loops later
          // for now they are combined

          // TODO Improve efficiency of this, at least in the forward
          // direction, if not in both.  In the forward direction we
          // can assume the strings are sorted.

          int32_t i;
          UBool forward = offset < limit;

          // firstChar is the leftmost char to match in the
          // forward direction or the rightmost char to match in
          // the reverse direction.
          UChar firstChar = text.charAt(offset);

          // If there are multiple strings that can match we
          // return the longest match.
          int32_t highWaterLength = 0;

          for (i=0; i<strings->size(); ++i) {
              const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
              if (trial.isEmpty()) {
                  continue;  // skip the empty string
              }

              UChar c = trial.charAt(forward ? 0 : trial.length() - 1);

              // Strings are sorted, so we can optimize in the
              // forward direction.
              if (forward && c > firstChar) break;
              if (c != firstChar) continue;

              int32_t matchLen = matchRest(text, offset, limit, trial);

              if (incremental) {
                  int32_t maxLen = forward ? limit-offset : offset-limit;
                  if (matchLen == maxLen) {
                      // We have successfully matched but only up to limit.
                      return U_PARTIAL_MATCH;
                  }
              }

              if (matchLen == trial.length()) {
                  // We have successfully matched the whole string.
                  if (matchLen > highWaterLength) {
                      highWaterLength = matchLen;
                  }
                  // In the forward direction we know strings
                  // are sorted so we can bail early.
                  if (forward && matchLen < highWaterLength) {
                      break;
                  }
                  continue;
              }
          }

          // We've checked all strings without a partial match.
          // If we have full matches, return the longest one.
          if (highWaterLength != 0) {
              offset += forward ? highWaterLength : -highWaterLength;
              return U_MATCH;
          }
      }
      return UnicodeFilter::matches(text, offset, limit, incremental);
  }
637}

639/**
* Returns the longest match for s in text at the given position.
* If limit > start then match forward from start+1 to limit
* matching all characters except s.charAt(0).  If limit < start,
* go backward starting from start-1 matching all characters
* except s.charAt(s.length()-1).  This method assumes that the
* first character, text.charAt(start), matches s, so it does not
* check it.
* @param text the text to match
* @param start the first character to match.  In the forward
* direction, text.charAt(start) is matched against s.charAt(0).
* In the reverse direction, it is matched against
* s.charAt(s.length()-1).
* @param limit the limit offset for matching, either last+1 in
* the forward direction, or last-1 in the reverse direction,
* where last is the index of the last character to match.
* @return If part of s matches up to the limit, return |limit -
* start|.  If all of s matches before reaching the limit, return
* s.length().  If there is a mismatch between s and text, return
* 0
*/
660int32_t UnicodeSet::matchRest(const Replaceable& text,
                            int32_t start, int32_t limit,
                            const UnicodeString& s) {
  int32_t i;
  int32_t maxLen;
  int32_t slen = s.length();
  if (start < limit) {
      maxLen = limit - start;
      if (maxLen > slen) maxLen = slen;
      for (i = 1; i < maxLen; ++i) {
          if (text.charAt(start + i) != s.charAt(i)) return 0;
      }
  } else {
      maxLen = start - limit;
      if (maxLen > slen) maxLen = slen;
      --slen; // <=> slen = s.length() - 1;
      for (i = 1; i < maxLen; ++i) {
          if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
      }
  }
  return maxLen;
681}

683/**
* Implement of UnicodeMatcher
*/
686void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
  toUnionTo.addAll(*this);
688}

690/**
* Returns the index of the given character within this set, where
* the set is ordered by ascending code point.  If the character
* is not in this set, return -1.  The inverse of this method is
* <code>charAt()</code>.
* @return an index from 0..size()-1, or -1
*/
697int32_t UnicodeSet::indexOf(UChar32 c) const {
  if (c < MIN_VALUE || c > MAX_VALUE) {
      return -1;
  }
  int32_t i = 0;
  int32_t n = 0;
  for (;;) {
      UChar32 start = list[i++];
      if (c < start) {
          return -1;
      }
      UChar32 limit = list[i++];
      if (c < limit) {
          return n + c - start;
      }
      n += limit - start;
  }
714}

716/**
* Returns the character at the given index within this set, where
* the set is ordered by ascending code point.  If the index is
* out of range, return (UChar32)-1.  The inverse of this method is
* <code>indexOf()</code>.
* @param index an index from 0..size()-1
* @return the character at the given index, or (UChar32)-1.
*/
724UChar32 UnicodeSet::charAt(int32_t index) const {
  if (index >= 0) {
      // len2 is the largest even integer <= len, that is, it is len
      // for even values and len-1 for odd values.  With odd values
      // the last entry is UNICODESET_HIGH.
      int32_t len2 = len & ~1;
      for (int32_t i=0; i < len2;) {
          UChar32 start = list[i++];
          int32_t count = list[i++] - start;
          if (index < count) {
              return (UChar32)(start + index);
          }
          index -= count;
      }
  }
  return (UChar32)-1;
740}

742/**
* Make this object represent the range <code>start - end</code>.
* If <code>end > start</code> then this object is set to an
* an empty range.
*
* @param start first character in the set, inclusive
* @rparam end last character in the set, inclusive
*/
750UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
  clear();
  complement(start, end);
  return *this;
754}

756/**
* Adds the specified range to this set if it is not already
* present.  If this set already contains the specified range,
* the call leaves this set unchanged.  If <code>end > start</code>
* then an empty range is added, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be added
* to this set.
* @param end last character, inclusive, of range to be added
* to this set.
*/
767UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
  if (pinCodePoint(start) < pinCodePoint(end)) {
      UChar32 limit = end + 1;
      // Fast path for adding a new range after the last one.
      // Odd list length: [..., lastStart, lastLimit, HIGH]
      if ((len & 1) != 0) {
          // If the list is empty, set lastLimit low enough to not be adjacent to 0.
          UChar32 lastLimit = len == 1 ? -2 : list[len - 2];
          if (lastLimit <= start && !isFrozen() && !isBogus()) {
              if (lastLimit == start) {
                  // Extend the last range.
                  list[len - 2] = limit;
                  if (limit == UNICODESET_HIGH0x0110000) {
                      --len;
                  }
              } else {
                  list[len - 1] = start;
                  if (limit < UNICODESET_HIGH0x0110000) {
                      if (ensureCapacity(len + 2)) {
                          list[len++] = limit;
                          list[len++] = UNICODESET_HIGH0x0110000;
                      }
                  } else {  // limit == UNICODESET_HIGH
                      if (ensureCapacity(len + 1)) {
                          list[len++] = UNICODESET_HIGH0x0110000;
                      }
                  }
              }
              releasePattern();
              return *this;
          }
      }
      // This is slow. Could be much faster using findCodePoint(start)
      // and modifying the list, dealing with adjacent & overlapping ranges.
      UChar32 range[3] = { start, limit, UNICODESET_HIGH0x0110000 };
      add(range, 2, 0);
  } else if (start == end) {
      add(start);
  }
  return *this;
807}

809// #define DEBUG_US_ADD

811#ifdef DEBUG_US_ADD
812#include <stdio.h>
813void dump(UChar32 c) {
  if (c <= 0xFF) {
      printf("%c", (char)c);
  } else {
      printf("U+%04X", c);
  }
819}
820void dump(const UChar32* list, int32_t len) {
  printf("[");
  for (int32_t i=0; i<len; ++i) {
      if (i != 0) printf(", ");
      dump(list[i]);
  }
  printf("]");
827}
828#endif

830/**
* Adds the specified character to this set if it is not already
* present.  If this set already contains the specified character,
* the call leaves this set unchanged.
*/
835UnicodeSet& UnicodeSet::add(UChar32 c) {
  // find smallest i such that c < list[i]
  // if odd, then it is IN the set
  // if even, then it is OUT of the set
  int32_t i = findCodePoint(pinCodePoint(c));

  // already in set?
  if ((i & 1) != 0  || isFrozen() || isBogus()) return *this;

  // HIGH is 0x110000
  // assert(list[len-1] == HIGH);

  // empty = [HIGH]
  // [start_0, limit_0, start_1, limit_1, HIGH]

  // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
  //                             ^
  //                             list[i]

  // i == 0 means c is before the first range

856#ifdef DEBUG_US_ADD
  printf("Add of ");
  dump(c);
  printf(" found at %d", i);
  printf(": ");
  dump(list, len);
  printf(" => ");
863#endif

  if (c == list[i]-1) {
      // c is before start of next range
      list[i] = c;
      // if we touched the HIGH mark, then add a new one
      if (c == (UNICODESET_HIGH0x0110000 - 1)) {
          if (!ensureCapacity(len+1)) {
              // ensureCapacity will mark the object as Bogus if OOM failure happens.
              return *this;
          }
          list[len++] = UNICODESET_HIGH0x0110000;
      }
      if (i > 0 && c == list[i-1]) {
          // collapse adjacent ranges

          // [..., start_k-1, c, c, limit_k, ..., HIGH]
          //                     ^
          //                     list[i]

          //for (int32_t k=i-1; k<len-2; ++k) {
          //    list[k] = list[k+2];
          //}
          UChar32* dst = list + i - 1;
          UChar32* src = dst + 2;
          UChar32* srclimit = list + len;
          while (src < srclimit) *(dst++) = *(src++);

          len -= 2;
      }
  }

  else if (i > 0 && c == list[i-1]) {
      // c is after end of prior range
      list[i-1]++;
      // no need to check for collapse here
  }

  else {
      // At this point we know the new char is not adjacent to
      // any existing ranges, and it is not 10FFFF.


      // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
      //                             ^
      //                             list[i]

      // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
      //                             ^
      //                             list[i]

      if (!ensureCapacity(len+2)) {
          // ensureCapacity will mark the object as Bogus if OOM failure happens.
          return *this;
      }

      UChar32 *p = list + i;
      uprv_memmove(p + 2, p, (len - i) * sizeof(*p))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memmove(p + 2, p
, (len - i) * sizeof(*p)); } while (false);
      list[i] = c;
      list[i+1] = c+1;
      len += 2;
  }

926#ifdef DEBUG_US_ADD
  dump(list, len);
  printf("\n");

  for (i=1; i<len; ++i) {
      if (list[i] <= list[i-1]) {
          // Corrupt array!
          printf("ERROR: list has been corrupted\n");
          exit(1);
      }
  }
937#endif

  releasePattern();
  return *this;
941}

943/**
* Adds the specified multicharacter to this set if it is not already
* present.  If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
*
* @param s the source string
* @return the modified set, for chaining
*/
952UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
  if (isFrozen() || isBogus()) return *this;
  int32_t cp = getSingleCP(s);
  if (cp < 0) {
      if (!stringsContains(s)) {
          _add(s);
          releasePattern();
      }
  } else {
      add((UChar32)cp);
  }
  return *this;
964}

966/**
* Adds the given string, in order, to 'strings'.  The given string
* must have been checked by the caller to not already be in 'strings'.
*/
970void UnicodeSet::_add(const UnicodeString& s) {
  if (isFrozen() || isBogus()) {
      return;
  }
  UErrorCode ec = U_ZERO_ERROR;
  if (strings == nullptr && !allocateStrings(ec)) {
      setToBogus();
      return;
  }
  UnicodeString* t = new UnicodeString(s);
  if (t == NULL__null) { // Check for memory allocation error.
      setToBogus();
      return;
  }
  strings->sortedInsert(t, compareUnicodeString, ec);
  if (U_FAILURE(ec)) {
      setToBogus();
  }
988}

990/**
* @return a code point IF the string consists of a single one.
* otherwise returns -1.
* @param string to test
*/
995int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
  int32_t sLength = s.length();
  if (sLength == 1) return s.charAt(0);
  if (sLength == 2) {
      UChar32 cp = s.char32At(0);
      if (cp > 0xFFFF) { // is surrogate pair
          return cp;
      }
  }
  return -1;
1005}

1007/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* @param the source string
* @return the modified set, for chaining
*/
1013UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
  UChar32 cp;
  for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)((uint32_t)(cp)<=0xffff ? 1 : 2)) {
      cp = s.char32At(i);
      add(cp);
  }
  return *this;
1020}

1022/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* @param the source string
* @return the modified set, for chaining
*/
1028UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
  UnicodeSet set;
  set.addAll(s);
  retainAll(set);
  return *this;
1033}

1035/**
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* @param the source string
* @return the modified set, for chaining
*/
1041UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
  UnicodeSet set;
  set.addAll(s);
  complementAll(set);
  return *this;
1046}

1048/**
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* @param the source string
* @return the modified set, for chaining
*/
1054UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
  UnicodeSet set;
  set.addAll(s);
  removeAll(set);
  return *this;
1059}

1061UnicodeSet& UnicodeSet::removeAllStrings() {
  if (!isFrozen() && hasStrings()) {
      strings->removeAllElements();
      releasePattern();
  }
  return *this;
1067}


1070/**
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
* @param the source string
* @return a newly created set containing the given string
*/
1076UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
  UnicodeSet *set = new UnicodeSet();
  if (set != NULL__null) { // Check for memory allocation error.
      set->add(s);
  }
  return set;
1082}


1085/**
* Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
* @param the source string
* @return a newly created set containing the given characters
*/
1090UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
  UnicodeSet *set = new UnicodeSet();
  if (set != NULL__null) { // Check for memory allocation error.
      set->addAll(s);
  }
  return set;
1096}

1098/**
* Retain only the elements in this set that are contained in the
* specified range.  If <code>end > start</code> then an empty range is
* retained, leaving the set empty.
*
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
*/
1108UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
  if (pinCodePoint(start) <= pinCodePoint(end)) {
      UChar32 range[3] = { start, end+1, UNICODESET_HIGH0x0110000 };
      retain(range, 2, 0);
  } else {
      clear();
  }
  return *this;
1116}

1118UnicodeSet& UnicodeSet::retain(UChar32 c) {
  return retain(c, c);
1120}

1122UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
  if (isFrozen() || isBogus()) { return *this; }
  UChar32 cp = getSingleCP(s);
  if (cp < 0) {
      bool isIn = stringsContains(s);
      // Check for getRangeCount() first to avoid somewhat-expensive size()
      // when there are single code points.
      if (isIn && getRangeCount() == 0 && size() == 1) {
          return *this;
      }
      clear();
      if (isIn) {
          _add(s);
      }
  } else {
      retain(cp, cp);
  }
  return *this;
1140}

1142/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
* returns.  If <code>end > start</code> then an empty range is
* removed, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
*/
1153UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
  if (pinCodePoint(start) <= pinCodePoint(end)) {
      UChar32 range[3] = { start, end+1, UNICODESET_HIGH0x0110000 };
      retain(range, 2, 2);
  }
  return *this;
1159}

1161/**
* Removes the specified character from this set if it is present.
* The set will not contain the specified range once the call
* returns.
*/
1166UnicodeSet& UnicodeSet::remove(UChar32 c) {
  return remove(c, c);
1168}

1170/**
* Removes the specified string from this set if it is present.
* The set will not contain the specified character once the call
* returns.
* @param the source string
* @return the modified set, for chaining
*/
1177UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
  if (isFrozen() || isBogus()) return *this;
  int32_t cp = getSingleCP(s);
  if (cp < 0) {
      if (strings != nullptr && strings->removeElement((void*) &s)) {
          releasePattern();
      }
  } else {
      remove((UChar32)cp, (UChar32)cp);
  }
  return *this;
1188}

1190/**
* Complements the specified range in this set.  Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set.  If <code>end > start</code>
* then an empty range is xor'ed, leaving the set unchanged.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
*/
1201UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
  if (isFrozen() || isBogus()) {
5
←
Taking false branch→
      return *this;
  }
  if (pinCodePoint(start) <= pinCodePoint(end)) {
6
←
Taking true branch→
      UChar32 range[3] = { start, end+1, UNICODESET_HIGH0x0110000 };
      exclusiveOr(range, 2, 0);
7
←
Calling 'UnicodeSet::exclusiveOr'→
  }
  releasePattern();
  return *this;
1211}

1213UnicodeSet& UnicodeSet::complement(UChar32 c) {
  return complement(c, c);
1215}

1217/**
* This is equivalent to
* <code>complement(MIN_VALUE, MAX_VALUE)</code>.
*/
1221UnicodeSet& UnicodeSet::complement(void) {
  if (isFrozen() || isBogus()) {
      return *this;
  }
  if (list[0] == UNICODESET_LOW0x000000) {
      uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memmove(list, list
 + 1, (size_t)(len-1)*sizeof(UChar32)); } while (false);
      --len;
  } else {
      if (!ensureCapacity(len+1)) {
          return *this;
      }
      uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memmove(list + 1
, list, (size_t)len*sizeof(UChar32)); } while (false);
      list[0] = UNICODESET_LOW0x000000;
      ++len;
  }
  releasePattern();
  return *this;
1238}

1240/**
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
*
* @param s the string to complement
* @return this object, for chaining
*/
1248UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
  if (isFrozen() || isBogus()) return *this;
1
Assuming the condition is false→
2
←
Taking false branch→
  int32_t cp = getSingleCP(s);
  if (cp2.1
'cp' is >= 0
 < 0) {
3
←
Taking false branch→
      if (stringsContains(s)) {
          strings->removeElement((void*) &s);
      } else {
          _add(s);
      }
      releasePattern();
  } else {
      complement((UChar32)cp, (UChar32)cp);
4
←
Calling 'UnicodeSet::complement'→
  }
  return *this;
1262}

1264/**
* Adds all of the elements in the specified set to this set if
* they're not already present.  This operation effectively
* modifies this set so that its value is the <i>union</i> of the two
* sets.  The behavior of this operation is unspecified if the specified
* collection is modified while the operation is in progress.
*
* @param c set whose elements are to be added to this set.
* @see #add(char, char)
*/
1274UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
  if ( c.len>0 && c.list!=NULL__null ) {
      add(c.list, c.len, 0);
  }

  // Add strings in order
  if ( c.strings!=NULL__null ) {
      for (int32_t i=0; i<c.strings->size(); ++i) {
          const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
          if (!stringsContains(*s)) {
              _add(*s);
          }
      }
  }
  return *this;
1289}

1291/**
* Retains only the elements in this set that are contained in the
* specified set.  In other words, removes from this set all of
* its elements that are not contained in the specified set.  This
* operation effectively modifies this set so that its value is
* the <i>intersection</i> of the two sets.
*
* @param c set that defines which elements this set will retain.
*/
1300UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
  if (isFrozen() || isBogus()) {
      return *this;
  }
  retain(c.list, c.len, 0);
  if (hasStrings()) {
      if (!c.hasStrings()) {
          strings->removeAllElements();
      } else {
          strings->retainAll(*c.strings);
      }
  }
  return *this;
1313}

1315/**
* Removes from this set all of its elements that are contained in the
* specified set.  This operation effectively modifies this
* set so that its value is the <i>asymmetric set difference</i> of
* the two sets.
*
* @param c set that defines which elements will be removed from
*          this set.
*/
1324UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
  if (isFrozen() || isBogus()) {
      return *this;
  }
  retain(c.list, c.len, 2);
  if (hasStrings() && c.hasStrings()) {
      strings->removeAll(*c.strings);
  }
  return *this;
1333}

1335/**
* Complements in this set all elements contained in the specified
* set.  Any character in the other set will be removed if it is
* in this set, or will be added if it is not in this set.
*
* @param c set that defines which elements will be xor'ed from
*          this set.
*/
1343UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
  if (isFrozen() || isBogus()) {
      return *this;
  }
  exclusiveOr(c.list, c.len, 0);

  if (c.strings != nullptr) {
      for (int32_t i=0; i<c.strings->size(); ++i) {
          void* e = c.strings->elementAt(i);
          if (strings == nullptr || !strings->removeElement(e)) {
              _add(*(const UnicodeString*)e);
          }
      }
  }
  return *this;
1358}

1360/**
* Removes all of the elements from this set.  This set will be
* empty after this call returns.
*/
1364UnicodeSet& UnicodeSet::clear(void) {
  if (isFrozen()) {
      return *this;
  }
  list[0] = UNICODESET_HIGH0x0110000;
  len = 1;
  releasePattern();
  if (strings != NULL__null) {
      strings->removeAllElements();
  }
  // Remove bogus
  fFlags = 0;
  return *this;
1377}

1379/**
* Iteration method that returns the number of ranges contained in
* this set.
* @see #getRangeStart
* @see #getRangeEnd
*/
1385int32_t UnicodeSet::getRangeCount() const {
  return len/2;
1387}

1389/**
* Iteration method that returns the first character in the
* specified range of this set.
* @see #getRangeCount
* @see #getRangeEnd
*/
1395UChar32 UnicodeSet::getRangeStart(int32_t index) const {
  return list[index*2];
1397}

1399/**
* Iteration method that returns the last character in the
* specified range of this set.
* @see #getRangeStart
* @see #getRangeEnd
*/
1405UChar32 UnicodeSet::getRangeEnd(int32_t index) const {
  return list[index*2 + 1] - 1;
1407}

1409const UnicodeString* UnicodeSet::getString(int32_t index) const {
  return (const UnicodeString*) strings->elementAt(index);
1411}

1413/**
* Reallocate this objects internal structures to take up the least
* possible space, without changing this object's value.
*/
1417UnicodeSet& UnicodeSet::compact() {
  if (isFrozen() || isBogus()) {
      return *this;
  }
  // Delete buffer first to defragment memory less.
  if (buffer != stackList) {
      uprv_freeuprv_free_71(buffer);
      buffer = NULL__null;
      bufferCapacity = 0;
  }
  if (list == stackList) {
      // pass
  } else if (len <= INITIAL_CAPACITY) {
      uprv_memcpy(stackList, list, len * sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(stackList
, list, len * sizeof(UChar32)); } while (false);
      uprv_freeuprv_free_71(list);
      list = stackList;
      capacity = INITIAL_CAPACITY;
  } else if ((len + 7) < capacity) {
      // If we have more than a little unused capacity, shrink it to len.
      UChar32* temp = (UChar32*) uprv_reallocuprv_realloc_71(list, sizeof(UChar32) * len);
      if (temp) {
          list = temp;
          capacity = len;
      }
      // else what the heck happened?! We allocated less memory!
      // Oh well. We'll keep our original array.
  }
  if (strings != nullptr && strings->isEmpty()) {
      delete strings;
      strings = nullptr;
  }
  return *this;
1449}

1451#ifdef DEBUG_SERIALIZE
1452#include <stdio.h>
1453#endif

1455/**
* Deserialize constructor.
*/
1458UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization,
                     UErrorCode &ec) {

if(U_FAILURE(ec)) {
  setToBogus();
  return;
}

if( (serialization != kSerialized)
    || (data==NULL__null)
    || (dataLen < 1)) {
  ec = U_ILLEGAL_ARGUMENT_ERROR;
  setToBogus();
  return;
}

// bmp?
int32_t headerSize = ((data[0]&0x8000)) ?2:1;
int32_t bmpLength = (headerSize==1)?data[0]:data[1];

int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength;
1479#ifdef DEBUG_SERIALIZE
printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]);
1481#endif
if(!ensureCapacity(newLength + 1)) {  // +1 for HIGH
  return;
}
// copy bmp
int32_t i;
for(i = 0; i< bmpLength;i++) {
  list[i] = data[i+headerSize];
1489#ifdef DEBUG_SERIALIZE
  printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]);
1491#endif
}
// copy smp
for(i=bmpLength;i<newLength;i++) {
  list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) +
            ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]);
1497#ifdef DEBUG_SERIALIZE
  printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]);
1499#endif
}
U_ASSERT(i == newLength)(void)0;
if (i == 0 || list[i - 1] != UNICODESET_HIGH0x0110000) {
  list[i++] = UNICODESET_HIGH0x0110000;
}
len = i;
1506}


1509int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
  int32_t bmpLength, length, destLength;

  if (U_FAILURE(ec)) {
      return 0;
  }

  if (destCapacity<0 || (destCapacity>0 && dest==NULL__null)) {
      ec=U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
  }

  /* count necessary 16-bit units */
  length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
  // assert(length>=0);
  if (length==0) {
      /* empty set */
      if (destCapacity>0) {
          *dest=0;
      } else {
          ec=U_BUFFER_OVERFLOW_ERROR;
      }
      return 1;
  }
  /* now length>0 */

  if (this->list[length-1]<=0xffff) {
      /* all BMP */
      bmpLength=length;
  } else if (this->list[0]>=0x10000) {
      /* all supplementary */
      bmpLength=0;
      length*=2;
  } else {
      /* some BMP, some supplementary */
      for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
      length=bmpLength+2*(length-bmpLength);
  }
1547#ifdef DEBUG_SERIALIZE
  printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len);
1549#endif
  /* length: number of 16-bit array units */
  if (length>0x7fff) {
      /* there are only 15 bits for the length in the first serialized word */
      ec=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }

  /*
   * total serialized length:
   * number of 16-bit array units (length) +
   * 1 length unit (always) +
   * 1 bmpLength unit (if there are supplementary values)
   */
  destLength=length+((length>bmpLength)?2:1);
  if (destLength<=destCapacity) {
      const UChar32 *p;
      int32_t i;

1568#ifdef DEBUG_SERIALIZE
      printf("writeHdr\n");
1570#endif
      *dest=(uint16_t)length;
      if (length>bmpLength) {
          *dest|=0x8000;
          *++dest=(uint16_t)bmpLength;
      }
      ++dest;

      /* write the BMP part of the array */
      p=this->list;
      for (i=0; i<bmpLength; ++i) {
1581#ifdef DEBUG_SERIALIZE
        printf("writebmp: %x\n", (int)*p);
1583#endif
          *dest++=(uint16_t)*p++;
      }

      /* write the supplementary part of the array */
      for (; i<length; i+=2) {
1589#ifdef DEBUG_SERIALIZE
        printf("write32: %x\n", (int)*p);
1591#endif
          *dest++=(uint16_t)(*p>>16);
          *dest++=(uint16_t)*p++;
      }
  } else {
      ec=U_BUFFER_OVERFLOW_ERROR;
  }
  return destLength;
1599}

1601//----------------------------------------------------------------
1602// Implementation: Utility methods
1603//----------------------------------------------------------------

1605/**
* Allocate our strings vector and return TRUE if successful.
*/
1608UBool UnicodeSet::allocateStrings(UErrorCode &status) {
  if (U_FAILURE(status)) {
      return FALSE0;
  }
  strings = new UVector(uprv_deleteUObjectuprv_deleteUObject_71,
                        uhash_compareUnicodeStringuhash_compareUnicodeString_71, 1, status);
  if (strings == NULL__null) { // Check for memory allocation error.
      status = U_MEMORY_ALLOCATION_ERROR;
      return FALSE0;
  }
  if (U_FAILURE(status)) {
      delete strings;
      strings = NULL__null;
      return FALSE0;
  } 
  return TRUE1;
1624}

1626int32_t UnicodeSet::nextCapacity(int32_t minCapacity) {
  // Grow exponentially to reduce the frequency of allocations.
  if (minCapacity < INITIAL_CAPACITY) {
      return minCapacity + INITIAL_CAPACITY;
  } else if (minCapacity <= 2500) {
      return 5 * minCapacity;
  } else {
      int32_t newCapacity = 2 * minCapacity;
      if (newCapacity > MAX_LENGTH) {
          newCapacity = MAX_LENGTH;
      }
      return newCapacity;
  }
1639}

1641bool UnicodeSet::ensureCapacity(int32_t newLen) {
  if (newLen > MAX_LENGTH) {
      newLen = MAX_LENGTH;
  }
  if (newLen <= capacity) {
      return true;
  }
  int32_t newCapacity = nextCapacity(newLen);
  UChar32* temp = (UChar32*) uprv_mallocuprv_malloc_71(newCapacity * sizeof(UChar32));
  if (temp == NULL__null) {
      setToBogus(); // set the object to bogus state if an OOM failure occurred.
      return false;
  }
  // Copy only the actual contents.
  uprv_memcpy(temp, list, len * sizeof(UChar32))do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(temp, list
, len * sizeof(UChar32)); } while (false);
  if (list != stackList) {
      uprv_freeuprv_free_71(list);
  }
  list = temp;
  capacity = newCapacity;
  return true;
1662}

1664bool UnicodeSet::ensureBufferCapacity(int32_t newLen) {
  if (newLen > MAX_LENGTH) {
      newLen = MAX_LENGTH;
  }
  if (newLen <= bufferCapacity) {
      return true;
  }
  int32_t newCapacity = nextCapacity(newLen);
  UChar32* temp = (UChar32*) uprv_mallocuprv_malloc_71(newCapacity * sizeof(UChar32));
  if (temp == NULL__null) {
      setToBogus();
      return false;
  }
  // The buffer has no contents to be copied.
  // It is always filled from scratch after this call.
  if (buffer != stackList) {
      uprv_freeuprv_free_71(buffer);
  }
  buffer = temp;
  bufferCapacity = newCapacity;
  return true;
1685}

1687/**
* Swap list and buffer.
*/
1690void UnicodeSet::swapBuffers(void) {
  // swap list and buffer
  UChar32* temp = list;
  list = buffer;
  buffer = temp;

  int32_t c = capacity;
  capacity = bufferCapacity;
  bufferCapacity = c;
1699}

1701void UnicodeSet::setToBogus() {
  clear(); // Remove everything in the set.
  fFlags = kIsBogus;
1704}

1706//----------------------------------------------------------------
1707// Implementation: Fundamental operators
1708//----------------------------------------------------------------

1710static inline UChar32 max(UChar32 a, UChar32 b) {
  return (a > b) ? a : b;
1712}

1714// polarity = 0, 3 is normal: x xor y
1715// polarity = 1, 2: x xor ~y == x === y

1717void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
  if (isFrozen() || isBogus()) {
8
←
Taking false branch→
      return;
  }
  if (!ensureBufferCapacity(len + otherLen)) {
9
←
Taking false branch→
      return;
  }

  int32_t i = 0, j = 0, k = 0;
  UChar32 a = list[i++];
  UChar32 b;
  if (polarity9.1
'polarity' is not equal to 1
 == 1 || polarity9.2
'polarity' is not equal to 2
 == 2) {
10
←
Taking false branch→
      b = UNICODESET_LOW0x000000;
      if (other[j] == UNICODESET_LOW0x000000) { // skip base if already LOW
          ++j;
          b = other[j];
      }
  } else {
      b = other[j++];
  }
  // simplest of all the routines
  // sort the values, discarding identicals!
  for (;;) {
11
←
Loop condition is true.  Entering loop body→
16
←
Loop condition is true.  Entering loop body→
21
←
Loop condition is true.  Entering loop body→
      if (a < b) {
12
←
Assuming 'a' is >= 'b'→
13
←
Taking false branch→
17
←
Assuming 'a' is >= 'b'→
18
←
Taking false branch→
22
←
Assuming 'a' is >= 'b'→
23
←
Taking false branch→
          buffer[k++] = a;
          a = list[i++];
      } else if (b < a) {
14
←
Assuming 'b' is < 'a'→
15
←
Taking true branch→
19
←
Assuming 'b' is < 'a'→
20
←
Taking true branch→
24
←
Assuming 'b' is < 'a'→
25
←
Taking true branch→
          buffer[k++] = b;
          b = other[j++];
26
←
Assigned value is garbage or undefined
      } else if (a != UNICODESET_HIGH0x0110000) { // at this point, a == b
          // discard both values!
          a = list[i++];
          b = other[j++];
      } else { // DONE!
          buffer[k++] = UNICODESET_HIGH0x0110000;
          len = k;
          break;
      }
  }
  swapBuffers();
  releasePattern();
1758}

1760// polarity = 0 is normal: x union y
1761// polarity = 2: x union ~y
1762// polarity = 1: ~x union y
1763// polarity = 3: ~x union ~y

1765void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
  if (isFrozen() || isBogus() || other==NULL__null) {
      return;
  }
  if (!ensureBufferCapacity(len + otherLen)) {
      return;
  }

  int32_t i = 0, j = 0, k = 0;
  UChar32 a = list[i++];
  UChar32 b = other[j++];
  // change from xor is that we have to check overlapping pairs
  // polarity bit 1 means a is second, bit 2 means b is.
  for (;;) {
      switch (polarity) {
        case 0: // both first; take lower if unequal
          if (a < b) { // take a
              // Back up over overlapping ranges in buffer[]
              if (k > 0 && a <= buffer[k-1]) {
                  // Pick latter end value in buffer[] vs. list[]
                  a = max(list[i], buffer[--k]);
              } else {
                  // No overlap
                  buffer[k++] = a;
                  a = list[i];
              }
              i++; // Common if/else code factored out
              polarity ^= 1;
          } else if (b < a) { // take b
              if (k > 0 && b <= buffer[k-1]) {
                  b = max(other[j], buffer[--k]);
              } else {
                  buffer[k++] = b;
                  b = other[j];
              }
              j++;
              polarity ^= 2;
          } else { // a == b, take a, drop b
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              // This is symmetrical; it doesn't matter if
              // we backtrack with a or b. - liu
              if (k > 0 && a <= buffer[k-1]) {
                  a = max(list[i], buffer[--k]);
              } else {
                  // No overlap
                  buffer[k++] = a;
                  a = list[i];
              }
              i++;
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
        case 3: // both second; take higher if unequal, and drop other
          if (b <= a) { // take a
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              buffer[k++] = a;
          } else { // take b
              if (b == UNICODESET_HIGH0x0110000) goto loop_end;
              buffer[k++] = b;
          }
          a = list[i++];
          polarity ^= 1;   // factored common code
          b = other[j++];
          polarity ^= 2;
          break;
        case 1: // a second, b first; if b < a, overlap
          if (a < b) { // no overlap, take a
              buffer[k++] = a; a = list[i++]; polarity ^= 1;
          } else if (b < a) { // OVERLAP, drop b
              b = other[j++];
              polarity ^= 2;
          } else { // a == b, drop both!
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
        case 2: // a first, b second; if a < b, overlap
          if (b < a) { // no overlap, take b
              buffer[k++] = b;
              b = other[j++];
              polarity ^= 2;
          } else  if (a < b) { // OVERLAP, drop a
              a = list[i++];
              polarity ^= 1;
          } else { // a == b, drop both!
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
      }
  }
loop_end:
  buffer[k++] = UNICODESET_HIGH0x0110000;    // terminate
  len = k;
  swapBuffers();
  releasePattern();
1869}

1871// polarity = 0 is normal: x intersect y
1872// polarity = 2: x intersect ~y == set-minus
1873// polarity = 1: ~x intersect y
1874// polarity = 3: ~x intersect ~y

1876void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
  if (isFrozen() || isBogus()) {
      return;
  }
  if (!ensureBufferCapacity(len + otherLen)) {
      return;
  }

  int32_t i = 0, j = 0, k = 0;
  UChar32 a = list[i++];
  UChar32 b = other[j++];
  // change from xor is that we have to check overlapping pairs
  // polarity bit 1 means a is second, bit 2 means b is.
  for (;;) {
      switch (polarity) {
        case 0: // both first; drop the smaller
          if (a < b) { // drop a
              a = list[i++];
              polarity ^= 1;
          } else if (b < a) { // drop b
              b = other[j++];
              polarity ^= 2;
          } else { // a == b, take one, drop other
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              buffer[k++] = a;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
        case 3: // both second; take lower if unequal
          if (a < b) { // take a
              buffer[k++] = a;
              a = list[i++];
              polarity ^= 1;
          } else if (b < a) { // take b
              buffer[k++] = b;
              b = other[j++];
              polarity ^= 2;
          } else { // a == b, take one, drop other
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              buffer[k++] = a;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
        case 1: // a second, b first;
          if (a < b) { // NO OVERLAP, drop a
              a = list[i++];
              polarity ^= 1;
          } else if (b < a) { // OVERLAP, take b
              buffer[k++] = b;
              b = other[j++];
              polarity ^= 2;
          } else { // a == b, drop both!
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
        case 2: // a first, b second; if a < b, overlap
          if (b < a) { // no overlap, drop b
              b = other[j++];
              polarity ^= 2;
          } else  if (a < b) { // OVERLAP, take a
              buffer[k++] = a;
              a = list[i++];
              polarity ^= 1;
          } else { // a == b, drop both!
              if (a == UNICODESET_HIGH0x0110000) goto loop_end;
              a = list[i++];
              polarity ^= 1;
              b = other[j++];
              polarity ^= 2;
          }
          break;
      }
  }
loop_end:
  buffer[k++] = UNICODESET_HIGH0x0110000;    // terminate
  len = k;
  swapBuffers();
  releasePattern();
1964}

1966/**
* Append the <code>toPattern()</code> representation of a
* string to the given <code>StringBuffer</code>.
*/
1970void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable) {
  UChar32 cp;
  for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)((uint32_t)(cp)<=0xffff ? 1 : 2)) {
      _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
  }
1975}

1977/**
* Append the <code>toPattern()</code> representation of a
* character to the given <code>StringBuffer</code>.
*/
1981void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable) {
  if (escapeUnprintable ? ICU_Utility::isUnprintable(c) : ICU_Utility::shouldAlwaysBeEscaped(c)) {
      // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
      // unprintable
      ICU_Utility::escape(buf, c);
      return;
  }
  // Okay to let ':' pass through
  switch (c) {
  case u'[':
  case u']':
  case u'-':
  case u'^':
  case u'&':
  case u'\\':
  case u'{':
  case u'}':
  case u':':
  case SymbolTable::SYMBOL_REF:
      buf.append(u'\\');
      break;
  default:
      // Escape whitespace
      if (PatternProps::isWhiteSpace(c)) {
          buf.append(u'\\');
      }
      break;
  }
  buf.append(c);
2010}

2012void UnicodeSet::_appendToPat(UnicodeString &result, UChar32 start, UChar32 end,
                            UBool escapeUnprintable) {
  _appendToPat(result, start, escapeUnprintable);
  if (start != end) {
      if ((start+1) != end ||
              // Avoid writing what looks like a lead+trail surrogate pair.
              start == 0xdbff) {
          result.append(u'-');
      }
      _appendToPat(result, end, escapeUnprintable);
  }
2023}

2025/**
* Append a string representation of this set to result.  This will be
* a cleaned version of the string passed to applyPattern(), if there
* is one.  Otherwise it will be generated.
*/
2030UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
                                    UBool escapeUnprintable) const
2032{
  if (pat != NULL__null) {
      int32_t i;
      int32_t backslashCount = 0;
      for (i=0; i<patLen; ) {
          UChar32 c;
          U16_NEXT(pat, i, patLen, c)do { (c)=(pat)[(i)++]; if((((c)&0xfffffc00)==0xd800)) { uint16_t
 __c2; if((i)!=(patLen) && (((__c2=(pat)[(i)])&0xfffffc00
)==0xdc00)) { ++(i); (c)=(((UChar32)((c))<<10UL)+(UChar32
)(__c2)-((0xd800<<10UL)+0xdc00-0x10000)); } } } while (
false);
          if (escapeUnprintable ?
                  ICU_Utility::isUnprintable(c) : ICU_Utility::shouldAlwaysBeEscaped(c)) {
              // If the unprintable character is preceded by an odd
              // number of backslashes, then it has been escaped.
              // Before unescaping it, we delete the final
              // backslash.
              if ((backslashCount % 2) == 1) {
                  result.truncate(result.length() - 1);
              }
              ICU_Utility::escape(result, c);
              backslashCount = 0;
          } else {
              result.append(c);
              if (c == u'\\') {
                  ++backslashCount;
              } else {
                  backslashCount = 0;
              }
          }
      }
      return result;
  }

  return _generatePattern(result, escapeUnprintable);
2063}

2065/**
* Returns a string representation of this set.  If the result of
* calling this function is passed to a UnicodeSet constructor, it
* will produce another set that is equal to this one.
*/
2070UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
                                   UBool escapeUnprintable) const
2072{
  result.truncate(0);
  return _toPattern(result, escapeUnprintable);
2075}

2077/**
* Generate and append a string representation of this set to result.
* This does not use this.pat, the cleaned up copy of the string
* passed to applyPattern().
*/
2082UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
                                          UBool escapeUnprintable) const
2084{
  result.append(u'[');

  int32_t i = 0;
  int32_t limit = len & ~1;  // = 2 * getRangeCount()

  // If the set contains at least 2 intervals and includes both
  // MIN_VALUE and MAX_VALUE, then the inverse representation will
  // be more economical.
  //     if (getRangeCount() >= 2 &&
  //             getRangeStart(0) == MIN_VALUE &&
  //             getRangeEnd(last) == MAX_VALUE)
  // Invariant: list[len-1] == HIGH == MAX_VALUE + 1
  // If limit == len then len is even and the last range ends with MAX_VALUE.
  //
  // *But* do not write the inverse (complement) if there are strings.
  // Since ICU 70, the '^' performs a code point complement which removes all strings.
  if (len >= 4 && list[0] == 0 && limit == len && !hasStrings()) {
      // Emit the inverse
      result.append(u'^');
      // Offsetting the inversion list index by one lets us
      // iterate over the ranges of the set complement.
      i = 1;
      --limit;
  }

  // Emit the ranges as pairs.
  while (i < limit) {
      UChar32 start = list[i];  // getRangeStart()
      UChar32 end = list[i + 1] - 1;  // getRangeEnd() = range limit minus one
      if (!(0xd800 <= end && end <= 0xdbff)) {
          _appendToPat(result, start, end, escapeUnprintable);
          i += 2;
      } else {
          // The range ends with a lead surrogate.
          // Avoid writing what looks like a lead+trail surrogate pair.
          // 1. Postpone ranges that start with a lead surrogate code point.
          int32_t firstLead = i;
          while ((i += 2) < limit && list[i] <= 0xdbff) {}
          int32_t firstAfterLead = i;
          // 2. Write following ranges that start with a trail surrogate code point.
          while (i < limit && (start = list[i]) <= 0xdfff) {
              _appendToPat(result, start, list[i + 1] - 1, escapeUnprintable);
              i += 2;
          }
          // 3. Now write the postponed ranges.
          for (int j = firstLead; j < firstAfterLead; j += 2) {
              _appendToPat(result, list[j], list[j + 1] - 1, escapeUnprintable);
          }
      }
  }

  if (strings != nullptr) {
      for (int32_t i = 0; i<strings->size(); ++i) {
          result.append(u'{');
          _appendToPat(result,
                       *(const UnicodeString*) strings->elementAt(i),
                       escapeUnprintable);
          result.append(u'}');
      }
  }
  return result.append(u']');
2146}

2148/**
2149* Release existing cached pattern
2150*/
2151void UnicodeSet::releasePattern() {
  if (pat) {
      uprv_freeuprv_free_71(pat);
      pat = NULL__null;
      patLen = 0;
  }
2157}

2159/**
2160* Set the new pattern to cache.
2161*/
2162void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
  releasePattern();
  pat = (UChar *)uprv_mallocuprv_malloc_71((newPatLen + 1) * sizeof(UChar));
  if (pat) {
      patLen = newPatLen;
      u_memcpyu_memcpy_71(pat, newPat, patLen);
      pat[patLen] = 0;
  }
  // else we don't care if malloc failed. This was just a nice cache.
  // We can regenerate an equivalent pattern later when requested.
2172}

2174UnicodeSet *UnicodeSet::freeze() {
  if(!isFrozen() && !isBogus()) {
      compact();

      // Optimize contains() and span() and similar functions.
      if (hasStrings()) {
          stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
          if (stringSpan == nullptr) {
              setToBogus();
              return this;
          } else if (!stringSpan->needsStringSpanUTF16()) {
              // All strings are irrelevant for span() etc. because
              // all of each string's code points are contained in this set.
              // Do not check needsStringSpanUTF8() because UTF-8 has at most as
              // many relevant strings as UTF-16.
              // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
              delete stringSpan;
              stringSpan = NULL__null;
          }
      }
      if (stringSpan == NULL__null) {
          // No span-relevant strings: Optimize for code point spans.
          bmpSet=new BMPSet(list, len);
          if (bmpSet == NULL__null) { // Check for memory allocation error.
              setToBogus();
          }
      }
  }
  return this;
2203}

2205int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
  if(length>0 && bmpSet!=NULL__null) {
      return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
  }
  if(length<0) {
      length=u_strlenu_strlen_71(s);
  }
  if(length==0) {
      return 0;
  }
  if(stringSpan!=NULL__null) {
      return stringSpan->span(s, length, spanCondition);
  } else if(hasStrings()) {
      uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
                          UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
                          UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
      UnicodeSetStringSpan strSpan(*this, *strings, which);
      if(strSpan.needsStringSpanUTF16()) {
          return strSpan.span(s, length, spanCondition);
      }
  }

  if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
      spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
  }

  UChar32 c;
  int32_t start=0, prev=0;
  do {
      U16_NEXT(s, start, length, c)do { (c)=(s)[(start)++]; if((((c)&0xfffffc00)==0xd800)) {
 uint16_t __c2; if((start)!=(length) && (((__c2=(s)[(
start)])&0xfffffc00)==0xdc00)) { ++(start); (c)=(((UChar32
)((c))<<10UL)+(UChar32)(__c2)-((0xd800<<10UL)+0xdc00
-0x10000)); } } } while (false);
      if(spanCondition!=contains(c)) {
          break;
      }
  } while((prev=start)<length);
  return prev;
2240}

2242int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
  if(length>0 && bmpSet!=NULL__null) {
      return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
  }
  if(length<0) {
      length=u_strlenu_strlen_71(s);
  }
  if(length==0) {
      return 0;
  }
  if(stringSpan!=NULL__null) {
      return stringSpan->spanBack(s, length, spanCondition);
  } else if(hasStrings()) {
      uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
                          UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
                          UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
      UnicodeSetStringSpan strSpan(*this, *strings, which);
      if(strSpan.needsStringSpanUTF16()) {
          return strSpan.spanBack(s, length, spanCondition);
      }
  }

  if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
      spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
  }

  UChar32 c;
  int32_t prev=length;
  do {
      U16_PREV(s, 0, length, c)do { (c)=(s)[--(length)]; if((((c)&0xfffffc00)==0xdc00)) {
 uint16_t __c2; if((length)>(0) && (((__c2=(s)[(length
)-1])&0xfffffc00)==0xd800)) { --(length); (c)=(((UChar32)
(__c2)<<10UL)+(UChar32)((c))-((0xd800<<10UL)+0xdc00
-0x10000)); } } } while (false);
      if(spanCondition!=contains(c)) {
          break;
      }
  } while((prev=length)>0);
  return prev;
2277}

2279int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
  if(length>0 && bmpSet!=NULL__null) {
      const uint8_t *s0=(const uint8_t *)s;
      return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
  }
  if(length<0) {
      length=(int32_t)uprv_strlen(s):: strlen(s);
  }
  if(length==0) {
      return 0;
  }
  if(stringSpan!=NULL__null) {
      return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
  } else if(hasStrings()) {
      uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
                          UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
                          UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
      UnicodeSetStringSpan strSpan(*this, *strings, which);
      if(strSpan.needsStringSpanUTF8()) {
          return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
      }
  }

  if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
      spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
  }

  UChar32 c;
  int32_t start=0, prev=0;
  do {
      U8_NEXT_OR_FFFD(s, start, length, c)do { (c)=(uint8_t)(s)[(start)++]; if(!(((c)&0x80)==0)) { uint8_t
 __t = 0; if((start)!=(length) && ((c)>=0xe0 ? ((c
)<0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
[(c)&=0xf]&(1<<((__t=(s)[start])>>5)) &&
 (__t&=0x3f, 1) : ((c)-=0xf0)<=4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
[(__t=(s)[start])>>4]&(1<<(c)) && ((c
)=((c)<<6)|(__t&0x3f), ++(start)!=(length)) &&
 (__t=(s)[start]-0x80)<=0x3f) && ((c)=((c)<<
6)|__t, ++(start)!=(length)) : (c)>=0xc2 && ((c)&=
0x1f, 1)) && (__t=(s)[start]-0x80)<=0x3f &&
 ((c)=((c)<<6)|__t, ++(start), 1)) { } else { (c)=(0xfffd
); } } } while (false);
      if(spanCondition!=contains(c)) {
          break;
      }
  } while((prev=start)<length);
  return prev;
2315}

2317int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
  if(length>0 && bmpSet!=NULL__null) {
      const uint8_t *s0=(const uint8_t *)s;
      return bmpSet->spanBackUTF8(s0, length, spanCondition);
  }
  if(length<0) {
      length=(int32_t)uprv_strlen(s):: strlen(s);
  }
  if(length==0) {
      return 0;
  }
  if(stringSpan!=NULL__null) {
      return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
  } else if(hasStrings()) {
      uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
                          UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
                          UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
      UnicodeSetStringSpan strSpan(*this, *strings, which);
      if(strSpan.needsStringSpanUTF8()) {
          return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
      }
  }

  if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
      spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
  }

  UChar32 c;
  int32_t prev=length;
  do {
      U8_PREV_OR_FFFD(s, 0, length, c)do { (c)=(uint8_t)(s)[--(length)]; if(!(((c)&0x80)==0)) {
 (c)=utf8_prevCharSafeBody_71((const uint8_t *)s, 0, &(length
), c, -3); } } while (false);
      if(spanCondition!=contains(c)) {
          break;
      }
  } while((prev=length)>0);
  return prev;
2353}

2355U_NAMESPACE_END}