../deps/icu-small/source/common/utext.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/utext.cpp
Warning:	line 2573, column 24 Array access (from variable 'dest') results in a null pointer dereference
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name utext.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/utext.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2005-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  utext.cpp
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2005apr12
16*   created by: Markus W. Scherer
17*/

19#include <cstddef>

21#include "unicode/utypes.h"
22#include "unicode/ustring.h"
23#include "unicode/unistr.h"
24#include "unicode/chariter.h"
25#include "unicode/utext.h"
26#include "unicode/utf.h"
27#include "unicode/utf8.h"
28#include "unicode/utf16.h"
29#include "ustr_imp.h"
30#include "cmemory.h"
31#include "cstring.h"
32#include "uassert.h"
33#include "putilimp.h"

35U_NAMESPACE_USEusing namespace icu_71;

37#define I32_FLAG(bitIndex)((int32_t)1<<(bitIndex)) ((int32_t)1<<(bitIndex))


40static UBool
41utext_access(UText *ut, int64_t index, UBool forward) {
  return ut->pFuncs->access(ut, index, forward);
43}



47U_CAPIextern "C" UBool U_EXPORT2
48utext_moveIndex32utext_moveIndex32_71(UText *ut, int32_t delta) {
  UChar32  c;
  if (delta > 0) {
      do {
          if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE1)) {
              return FALSE0;
          }
          c = ut->chunkContents[ut->chunkOffset];
          if (U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
              c = utext_next32utext_next32_71(ut);
              if (c == U_SENTINEL(-1)) {
                  return FALSE0;
              }
          } else {
              ut->chunkOffset++;
          }
      } while(--delta>0);

  } else if (delta<0) {
      do {
          if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE0)) {
              return FALSE0;
          }
          c = ut->chunkContents[ut->chunkOffset-1];
          if (U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
              c = utext_previous32utext_previous32_71(ut);
              if (c == U_SENTINEL(-1)) {
                  return FALSE0;
              }
          } else {
              ut->chunkOffset--;
          }
      } while(++delta<0);
  }

  return TRUE1;
84}


87U_CAPIextern "C" int64_t U_EXPORT2
88utext_nativeLengthutext_nativeLength_71(UText *ut) {
  return ut->pFuncs->nativeLength(ut);
90}


93U_CAPIextern "C" UBool U_EXPORT2
94utext_isLengthExpensiveutext_isLengthExpensive_71(const UText *ut) {
  UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE))) != 0;
  return r;
97}


100U_CAPIextern "C" int64_t U_EXPORT2
101utext_getNativeIndexutext_getNativeIndex_71(const UText *ut) {
  if(ut->chunkOffset <= ut->nativeIndexingLimit) {
      return ut->chunkNativeStart+ut->chunkOffset;
  } else {
      return ut->pFuncs->mapOffsetToNative(ut);
  }
107}


110U_CAPIextern "C" void U_EXPORT2
111utext_setNativeIndexutext_setNativeIndex_71(UText *ut, int64_t index) {
  if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
      // The desired position is outside of the current chunk.
      // Access the new position.  Assume a forward iteration from here,
      // which will also be optimimum for a single random access.
      // Reverse iterations may suffer slightly.
      ut->pFuncs->access(ut, index, TRUE1);
  } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
      // utf-16 indexing.
      ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
  } else {
       ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
  }
  // The convention is that the index must always be on a code point boundary.
  // Adjust the index position if it is in the middle of a surrogate pair.
  if (ut->chunkOffset<ut->chunkLength) {
      UChar c= ut->chunkContents[ut->chunkOffset];
      if (U16_IS_TRAIL(c)(((c)&0xfffffc00)==0xdc00)) {
          if (ut->chunkOffset==0) {
              ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE0);
          }
          if (ut->chunkOffset>0) {
              UChar lead = ut->chunkContents[ut->chunkOffset-1];
              if (U16_IS_LEAD(lead)(((lead)&0xfffffc00)==0xd800)) {
                  ut->chunkOffset--;
              }
          }
      }
  }
140}



144U_CAPIextern "C" int64_t U_EXPORT2
145utext_getPreviousNativeIndexutext_getPreviousNativeIndex_71(UText *ut) {
  //
  //  Fast-path the common case.
  //     Common means current position is not at the beginning of a chunk
  //     and the preceding character is not supplementary.
  //
  int32_t i = ut->chunkOffset - 1;
  int64_t result;
  if (i >= 0) {
      UChar c = ut->chunkContents[i];
      if (U16_IS_TRAIL(c)(((c)&0xfffffc00)==0xdc00) == FALSE0) {
          if (i <= ut->nativeIndexingLimit) {
              result = ut->chunkNativeStart + i;
          } else {
              ut->chunkOffset = i;
              result = ut->pFuncs->mapOffsetToNative(ut);
              ut->chunkOffset++;
          }
          return result;
      }
  }

  // If at the start of text, simply return 0.
  if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {
      return 0;
  }

  // Harder, less common cases.  We are at a chunk boundary, or on a surrogate.
  //    Keep it simple, use other functions to handle the edges.
  //
  utext_previous32utext_previous32_71(ut);
  result = UTEXT_GETNATIVEINDEX(ut)((ut)->chunkOffset <= (ut)->nativeIndexingLimit? (ut
)->chunkNativeStart+(ut)->chunkOffset : (ut)->pFuncs
->mapOffsetToNative(ut));
  utext_next32utext_next32_71(ut);
  return result;
179}


182//
183//  utext_current32.  Get the UChar32 at the current position.
184//                    UText iteration position is always on a code point boundary,
185//                    never on the trail half of a surrogate pair.
186//
187U_CAPIextern "C" UChar32 U_EXPORT2
188utext_current32utext_current32_71(UText *ut) {
  UChar32  c;
  if (ut->chunkOffset==ut->chunkLength) {
      // Current position is just off the end of the chunk.
      if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE1) == FALSE0) {
          // Off the end of the text.
          return U_SENTINEL(-1);
      }
  }

  c = ut->chunkContents[ut->chunkOffset];
  if (U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800) == FALSE0) {
      // Normal, non-supplementary case.
      return c;
  }

  //
  //  Possible supplementary char.
  //
  UChar32   trail = 0;
  UChar32   supplementaryC = c;
  if ((ut->chunkOffset+1) < ut->chunkLength) {
      // The trail surrogate is in the same chunk.
      trail = ut->chunkContents[ut->chunkOffset+1];
  } else {
      //  The trail surrogate is in a different chunk.
      //     Because we must maintain the iteration position, we need to switch forward
      //     into the new chunk, get the trail surrogate, then revert the chunk back to the
      //     original one.
      //     An edge case to be careful of:  the entire text may end with an unpaired
      //        leading surrogate.  The attempt to access the trail will fail, but
      //        the original position before the unpaired lead still needs to be restored.
      int64_t  nativePosition = ut->chunkNativeLimit;
      int32_t  originalOffset = ut->chunkOffset;
      if (ut->pFuncs->access(ut, nativePosition, TRUE1)) {
          trail = ut->chunkContents[ut->chunkOffset];
      }
      UBool r = ut->pFuncs->access(ut, nativePosition, FALSE0);  // reverse iteration flag loads preceding chunk
      U_ASSERT(r==TRUE)(void)0;
      ut->chunkOffset = originalOffset;
      if(!r) {
          return U_SENTINEL(-1);
      }
  }

  if (U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
      supplementaryC = U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000));
  }
  return supplementaryC;

238}


241U_CAPIextern "C" UChar32 U_EXPORT2
242utext_char32Atutext_char32At_71(UText *ut, int64_t nativeIndex) {
  UChar32 c = U_SENTINEL(-1);

  // Fast path the common case.
  if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
      ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
      c = ut->chunkContents[ut->chunkOffset];
      if (U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800) == FALSE0) {
          return c;
      }
  }


  utext_setNativeIndexutext_setNativeIndex_71(ut, nativeIndex);
  if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
      c = ut->chunkContents[ut->chunkOffset];
      if (U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
          // For surrogates, let current32() deal with the complications
          //    of supplementaries that may span chunk boundaries.
          c = utext_current32utext_current32_71(ut);
      }
  }
  return c;
265}


268U_CAPIextern "C" UChar32 U_EXPORT2
269utext_next32utext_next32_71(UText *ut) {
  UChar32       c;

  if (ut->chunkOffset >= ut->chunkLength) {
      if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE1) == FALSE0) {
          return U_SENTINEL(-1);
      }
  }

  c = ut->chunkContents[ut->chunkOffset++];
  if (U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800) == FALSE0) {
      // Normal case, not supplementary.
      //   (A trail surrogate seen here is just returned as is, as a surrogate value.
      //    It cannot be part of a pair.)
      return c;
  }

  if (ut->chunkOffset >= ut->chunkLength) {
      if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE1) == FALSE0) {
          // c is an unpaired lead surrogate at the end of the text.
          // return it as it is.
          return c;
      }
  }
  UChar32 trail = ut->chunkContents[ut->chunkOffset];
  if (U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00) == FALSE0) {
      // c was an unpaired lead surrogate, not at the end of the text.
      // return it as it is (unpaired).  Iteration position is on the
      // following character, possibly in the next chunk, where the
      //  trail surrogate would have been if it had existed.
      return c;
  }

  UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000));
  ut->chunkOffset++;   // move iteration position over the trail surrogate.
  return supplementary;
  }


308U_CAPIextern "C" UChar32 U_EXPORT2
309utext_previous32utext_previous32_71(UText *ut) {
  UChar32       c;

  if (ut->chunkOffset <= 0) {
      if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE0) == FALSE0) {
          return U_SENTINEL(-1);
      }
  }
  ut->chunkOffset--;
  c = ut->chunkContents[ut->chunkOffset];
  if (U16_IS_TRAIL(c)(((c)&0xfffffc00)==0xdc00) == FALSE0) {
      // Normal case, not supplementary.
      //   (A lead surrogate seen here is just returned as is, as a surrogate value.
      //    It cannot be part of a pair.)
      return c;
  }

  if (ut->chunkOffset <= 0) {
      if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE0) == FALSE0) {
          // c is an unpaired trail surrogate at the start of the text.
          // return it as it is.
          return c;
      }
  }

  UChar32 lead = ut->chunkContents[ut->chunkOffset-1];
  if (U16_IS_LEAD(lead)(((lead)&0xfffffc00)==0xd800) == FALSE0) {
      // c was an unpaired trail surrogate, not at the end of the text.
      // return it as it is (unpaired).  Iteration position is at c
      return c;
  }

  UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c)(((UChar32)(lead)<<10UL)+(UChar32)(c)-((0xd800<<10UL
)+0xdc00-0x10000));
  ut->chunkOffset--;   // move iteration position over the lead surrogate.
  return supplementary;
344}



348U_CAPIextern "C" UChar32 U_EXPORT2
349utext_next32Fromutext_next32From_71(UText *ut, int64_t index) {
  UChar32       c      = U_SENTINEL(-1);

  if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
      // Desired position is outside of the current chunk.
      if(!ut->pFuncs->access(ut, index, TRUE1)) {
          // no chunk available here
          return U_SENTINEL(-1);
      }
  } else if (index - ut->chunkNativeStart  <= (int64_t)ut->nativeIndexingLimit) {
      // Desired position is in chunk, with direct 1:1 native to UTF16 indexing
      ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
  } else {
      // Desired position is in chunk, with non-UTF16 indexing.
      ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
  }

  c = ut->chunkContents[ut->chunkOffset++];
  if (U16_IS_SURROGATE(c)(((c)&0xfffff800)==0xd800)) {
      // Surrogates.  Many edge cases.  Use other functions that already
      //              deal with the problems.
      utext_setNativeIndexutext_setNativeIndex_71(ut, index);
      c = utext_next32utext_next32_71(ut);
  }
  return c;
374}


377U_CAPIextern "C" UChar32 U_EXPORT2
378utext_previous32Fromutext_previous32From_71(UText *ut, int64_t index) {
  //
  //  Return the character preceding the specified index.
  //  Leave the iteration position at the start of the character that was returned.
  //
  UChar32     cPrev;    // The character preceding cCurr, which is what we will return.

  // Address the chunk containing the position preceding the incoming index
  // A tricky edge case:
  //   We try to test the requested native index against the chunkNativeStart to determine
  //    whether the character preceding the one at the index is in the current chunk.
  //    BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
  //    requested index is on something other than the first position of the first char.
  //
  if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {
      // Requested native index is outside of the current chunk.
      if(!ut->pFuncs->access(ut, index, FALSE0)) {
          // no chunk available here
          return U_SENTINEL(-1);
      }
  } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
      // Direct UTF-16 indexing.
      ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
  } else {
      ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
      if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE0)) {
          // no chunk available here
          return U_SENTINEL(-1);
      }
  }

  //
  // Simple case with no surrogates.
  //
  ut->chunkOffset--;
  cPrev = ut->chunkContents[ut->chunkOffset];

  if (U16_IS_SURROGATE(cPrev)(((cPrev)&0xfffff800)==0xd800)) {
      // Possible supplementary.  Many edge cases.
      // Let other functions do the heavy lifting.
      utext_setNativeIndexutext_setNativeIndex_71(ut, index);
      cPrev = utext_previous32utext_previous32_71(ut);
  }
  return cPrev;
422}


425U_CAPIextern "C" int32_t U_EXPORT2
426utext_extractutext_extract_71(UText *ut,
           int64_t start, int64_t limit,
           UChar *dest, int32_t destCapacity,
           UErrorCode *status) {
               return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
           }



435U_CAPIextern "C" UBool U_EXPORT2
436utext_equalsutext_equals_71(const UText *a, const UText *b) {
  if (a==NULL__null || b==NULL__null ||
      a->magic != UTEXT_MAGIC ||
      b->magic != UTEXT_MAGIC) {
          // Null or invalid arguments don't compare equal to anything.
          return FALSE0;
  }

  if (a->pFuncs != b->pFuncs) {
      // Different types of text providers.
      return FALSE0;
  }

  if (a->context != b->context) {
      // Different sources (different strings)
      return FALSE0;
  }
  if (utext_getNativeIndexutext_getNativeIndex_71(a) != utext_getNativeIndexutext_getNativeIndex_71(b)) {
      // Different current position in the string.
      return FALSE0;
  }

  return TRUE1;
459}

461U_CAPIextern "C" UBool U_EXPORT2
462utext_isWritableutext_isWritable_71(const UText *ut)
463{
  UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE))) != 0;
  return b;
466}


469U_CAPIextern "C" void U_EXPORT2
470utext_freezeutext_freeze_71(UText *ut) {
  // Zero out the WRITABLE flag.
  ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE)));
473}


476U_CAPIextern "C" UBool U_EXPORT2
477utext_hasMetaDatautext_hasMetaData_71(const UText *ut)
478{
  UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)((int32_t)1<<(UTEXT_PROVIDER_HAS_META_DATA))) != 0;
  return b;
481}



485U_CAPIextern "C" int32_t U_EXPORT2
486utext_replaceutext_replace_71(UText *ut,
           int64_t nativeStart, int64_t nativeLimit,
           const UChar *replacementText, int32_t replacementLength,
           UErrorCode *status)
490{
  if (U_FAILURE(*status)) {
      return 0;
  }
  if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE))) == 0) {
      *status = U_NO_WRITE_PERMISSION;
      return 0;
  }
  int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
  return i;
500}

502U_CAPIextern "C" void U_EXPORT2
503utext_copyutext_copy_71(UText *ut,
        int64_t nativeStart, int64_t nativeLimit,
        int64_t destIndex,
        UBool move,
        UErrorCode *status)
508{
  if (U_FAILURE(*status)) {
      return;
  }
  if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE))) == 0) {
      *status = U_NO_WRITE_PERMISSION;
      return;
  }
  ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
517}



521U_CAPIextern "C" UText * U_EXPORT2
522utext_cloneutext_clone_71(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
  if (U_FAILURE(*status)) {
      return dest;
  }
  UText *result = src->pFuncs->clone(dest, src, deep, status);
  if (U_FAILURE(*status)) {
      return result;
  }
  if (result == NULL__null) {
      *status = U_MEMORY_ALLOCATION_ERROR;
      return result;
  }
  if (readOnly) {
      utext_freezeutext_freeze_71(result);
  }
  return result;
538}



542//------------------------------------------------------------------------------
543//
544//   UText common functions implementation
545//
546//------------------------------------------------------------------------------

548//
549//  UText.flags bit definitions
550//
551enum {
  UTEXT_HEAP_ALLOCATED  = 1,      //  1 if ICU has allocated this UText struct on the heap.
                                  //  0 if caller provided storage for the UText.

  UTEXT_EXTRA_HEAP_ALLOCATED = 2, //  1 if ICU has allocated extra storage as a separate
                                  //     heap block.
                                  //  0 if there is no separate allocation.  Either no extra
                                  //     storage was requested, or it is appended to the end
                                  //     of the main UText storage.

  UTEXT_OPEN = 4                  //  1 if this UText is currently open
                                  //  0 if this UText is not open.
563};


566//
567//  Extended form of a UText.  The purpose is to aid in computing the total size required
568//    when a provider asks for a UText to be allocated with extra storage.

570struct ExtendedUText {
  UText               ut;
  std::max_align_t    extension;
573};

575static const UText emptyText = UTEXT_INITIALIZER{ UTEXT_MAGIC, 0, 0, sizeof(UText), 0, 0, 0, 0, 0, 0, __null,
 __null, __null, __null, __null, __null, __null, __null, 0, 0
, 0, 0, 0, 0 };

577U_CAPIextern "C" UText * U_EXPORT2
578utext_setuputext_setup_71(UText *ut, int32_t extraSpace, UErrorCode *status) {
  if (U_FAILURE(*status)) {
      return ut;
  }

  if (ut == NULL__null) {
      // We need to heap-allocate storage for the new UText
      int32_t spaceRequired = sizeof(UText);
      if (extraSpace > 0) {
          spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);
      }
      ut = (UText *)uprv_mallocuprv_malloc_71(spaceRequired);
      if (ut == NULL__null) {
          *status = U_MEMORY_ALLOCATION_ERROR;
          return NULL__null;
      } else {
          *ut = emptyText;
          ut->flags |= UTEXT_HEAP_ALLOCATED;
          if (spaceRequired>0) {
              ut->extraSize = extraSpace;
              ut->pExtra    = &((ExtendedUText *)ut)->extension;
          }
      }
  } else {
      // We have been supplied with an already existing UText.
      // Verify that it really appears to be a UText.
      if (ut->magic != UTEXT_MAGIC) {
          *status = U_ILLEGAL_ARGUMENT_ERROR;
          return ut;
      }
      // If the ut is already open and there's a provider supplied close
      //   function, call it.
      if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL__null)  {
          ut->pFuncs->close(ut);
      }
      ut->flags &= ~UTEXT_OPEN;

      // If extra space was requested by our caller, check whether
      //   sufficient already exists, and allocate new if needed.
      if (extraSpace > ut->extraSize) {
          // Need more space.  If there is existing separately allocated space,
          //   delete it first, then allocate new space.
          if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
              uprv_freeuprv_free_71(ut->pExtra);
              ut->extraSize = 0;
          }
          ut->pExtra = uprv_mallocuprv_malloc_71(extraSpace);
          if (ut->pExtra == NULL__null) {
              *status = U_MEMORY_ALLOCATION_ERROR;
          } else {
              ut->extraSize = extraSpace;
              ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;
          }
      }
  }
  if (U_SUCCESS(*status)) {
      ut->flags |= UTEXT_OPEN;

      // Initialize all remaining fields of the UText.
      //
      ut->context             = NULL__null;
      ut->chunkContents       = NULL__null;
      ut->p                   = NULL__null;
      ut->q                   = NULL__null;
      ut->r                   = NULL__null;
      ut->a                   = 0;
      ut->b                   = 0;
      ut->c                   = 0;
      ut->chunkOffset         = 0;
      ut->chunkLength         = 0;
      ut->chunkNativeStart    = 0;
      ut->chunkNativeLimit    = 0;
      ut->nativeIndexingLimit = 0;
      ut->providerProperties  = 0;
      ut->privA               = 0;
      ut->privB               = 0;
      ut->privC               = 0;
      ut->privP               = NULL__null;
      if (ut->pExtra!=NULL__null && ut->extraSize>0)
          uprv_memset(ut->pExtra, 0, ut->extraSize):: memset(ut->pExtra, 0, ut->extraSize);

  }
  return ut;
661}


664U_CAPIextern "C" UText * U_EXPORT2
665utext_closeutext_close_71(UText *ut) {
  if (ut==NULL__null ||
      ut->magic != UTEXT_MAGIC ||
      (ut->flags & UTEXT_OPEN) == 0)
  {
      // The supplied ut is not an open UText.
      // Do nothing.
      return ut;
  }

  // If the provider gave us a close function, call it now.
  // This will clean up anything allocated specifically by the provider.
  if (ut->pFuncs->close != NULL__null) {
      ut->pFuncs->close(ut);
  }
  ut->flags &= ~UTEXT_OPEN;

  // If we (the framework) allocated the UText or subsidiary storage,
  //   delete it.
  if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
      uprv_freeuprv_free_71(ut->pExtra);
      ut->pExtra = NULL__null;
      ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
      ut->extraSize = 0;
  }

  // Zero out function table of the closed UText.  This is a defensive move,
  //   intended to cause applications that inadvertently use a closed
  //   utext to crash with null pointer errors.
  ut->pFuncs        = NULL__null;

  if (ut->flags & UTEXT_HEAP_ALLOCATED) {
      // This UText was allocated by UText setup.  We need to free it.
      // Clear magic, so we can detect if the user messes up and immediately
      //  tries to reopen another UText using the deleted storage.
      ut->magic = 0;
      uprv_freeuprv_free_71(ut);
      ut = NULL__null;
  }
  return ut;
705}




710//
711// invalidateChunk   Reset a chunk to have no contents, so that the next call
712//                   to access will cause new data to load.
713//                   This is needed when copy/move/replace operate directly on the
714//                   backing text, potentially putting it out of sync with the
715//                   contents in the chunk.
716//
717static void
718invalidateChunk(UText *ut) {
  ut->chunkLength = 0;
  ut->chunkNativeLimit = 0;
  ut->chunkNativeStart = 0;
  ut->chunkOffset = 0;
  ut->nativeIndexingLimit = 0;
724}

726//
727// pinIndex        Do range pinning on a native index parameter.
728//                 64 bit pinning is done in place.
729//                 32 bit truncated result is returned as a convenience for
730//                        use in providers that don't need 64 bits.
731static int32_t
732pinIndex(int64_t &index, int64_t limit) {
  if (index<0) {
      index = 0;
  } else if (index > limit) {
      index = limit;
  }
  return (int32_t)index;
739}


742U_CDECL_BEGINextern "C" {

744//
745// Pointer relocation function,
746//   a utility used by shallow clone.
747//   Adjust a pointer that refers to something within one UText (the source)
748//   to refer to the same relative offset within a another UText (the target)
749//
750static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {
  // convert all pointers to (char *) so that byte address arithmetic will work.
  char  *dptr = (char *)*destPtr;
  char  *dUText = (char *)dest;
  char  *sUText = (char *)src;

  if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {
      // target ptr was to something within the src UText's pExtra storage.
      //   relocate it into the target UText's pExtra region.
      *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);
  } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
      // target ptr was pointing to somewhere within the source UText itself.
      //   Move it to the same offset within the target UText.
      *destPtr = dUText + (dptr-sUText);
  }
765}


768//
769//  Clone.  This is a generic copy-the-utext-by-value clone function that can be
770//          used as-is with some utext types, and as a helper by other clones.
771//
772static UText * U_CALLCONV
773shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
  if (U_FAILURE(*status)) {
      return NULL__null;
  }
  int32_t  srcExtraSize = src->extraSize;

  //
  // Use the generic text_setup to allocate storage if required.
  //
  dest = utext_setuputext_setup_71(dest, srcExtraSize, status);
  if (U_FAILURE(*status)) {
      return dest;
  }

  //
  //  flags (how the UText was allocated) and the pointer to the
  //   extra storage must retain the values in the cloned utext that
  //   were set up by utext_setup.  Save them separately before
  //   copying the whole struct.
  //
  void *destExtra = dest->pExtra;
  int32_t flags   = dest->flags;


  //
  //  Copy the whole UText struct by value.
  //  Any "Extra" storage is copied also.
  //
  int sizeToCopy = src->sizeOfStruct;
  if (sizeToCopy > dest->sizeOfStruct) {
      sizeToCopy = dest->sizeOfStruct;
  }
  uprv_memcpy(dest, src, sizeToCopy)do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(dest, src
, sizeToCopy); } while (false);
  dest->pExtra = destExtra;
  dest->flags  = flags;
  if (srcExtraSize > 0) {
      uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize)do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(dest->
pExtra, src->pExtra, srcExtraSize); } while (false);
  }

  //
  // Relocate any pointers in the target that refer to the UText itself
  //   to point to the cloned copy rather than the original source.
  //
  adjustPointer(dest, &dest->context, src);
  adjustPointer(dest, &dest->p, src);
  adjustPointer(dest, &dest->q, src);
  adjustPointer(dest, &dest->r, src);
  adjustPointer(dest, (const void **)&dest->chunkContents, src);

  // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
  // (The source for the clone may or may not have owned the text.)

  dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT));

  return dest;
828}


831U_CDECL_END}



835//------------------------------------------------------------------------------
836//
837//     UText implementation for UTF-8 char * strings (read-only)
838//     Limitation:  string length must be <= 0x7fffffff in length.
839//                  (length must for in an int32_t variable)
840//
841//         Use of UText data members:
842//              context    pointer to UTF-8 string
843//              utext.b    is the input string length (bytes).
844//              utext.c    Length scanned so far in string
845//                           (for optimizing finding length of zero terminated strings.)
846//              utext.p    pointer to the current buffer
847//              utext.q    pointer to the other buffer.
848//
849//------------------------------------------------------------------------------

851// Chunk size.
852//     Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
853//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
854//     to two UChars.)
855//     The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
856//     is a three-byte sequence (truncated four-byte sequence).
857//
858enum { UTF8_TEXT_CHUNK_SIZE=32 };

860//
861// UTF8Buf  Two of these structs will be set up in the UText's extra allocated space.
862//          Each contains the UChar chunk buffer, the to and from native maps, and
863//          header info.
864//
865//     because backwards iteration fills the buffers starting at the end and
866//     working towards the front, the filled part of the buffers may not begin
867//     at the start of the available storage for the buffers.
868//
869//     Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
870//     the last character added being a supplementary, and thus requiring a surrogate
871//     pair.  Doing this is simpler than checking for the edge case.
872//

874struct UTF8Buf {
  int32_t   bufNativeStart;                        // Native index of first char in UChar buf
  int32_t   bufNativeLimit;                        // Native index following last char in buf.
  int32_t   bufStartIdx;                           // First filled position in buf.
  int32_t   bufLimitIdx;                           // Limit of filled range in buf.
  int32_t   bufNILimit;                            // Limit of native indexing part of buf
  int32_t   toUCharsMapStart;                      // Native index corresponding to
                                                   //   mapToUChars[0].
                                                   //   Set to bufNativeStart when filling forwards.
                                                   //   Set to computed value when filling backwards.

  UChar     buf[UTF8_TEXT_CHUNK_SIZE+4];           // The UChar buffer.  Requires one extra position beyond the
                                                   //   the chunk size, to allow for surrogate at the end.
                                                   //   Length must be identical to mapToNative array, below,
                                                   //   because of the way indexing works when the array is
                                                   //   filled backwards during a reverse iteration.  Thus,
                                                   //   the additional extra size.
  uint8_t   mapToNative[UTF8_TEXT_CHUNK_SIZE+4];   // map UChar index in buf to
                                                   //  native offset from bufNativeStart.
                                                   //  Requires two extra slots,
                                                   //    one for a supplementary starting in the last normal position,
                                                   //    and one for an entry for the buffer limit position.
  uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
                                                   //   corresponding offset in filled part of buf.
  int32_t   align;
899};

901U_CDECL_BEGINextern "C" {

903//
904//   utf8TextLength
905//
906//        Get the length of the string.  If we don't already know it,
907//              we'll need to scan for the trailing  nul.
908//
909static int64_t U_CALLCONV
910utf8TextLength(UText *ut) {
  if (ut->b < 0) {
      // Zero terminated string, and we haven't scanned to the end yet.
      // Scan it now.
      const char *r = (const char *)ut->context + ut->c;
      while (*r != 0) {
          r++;
      }
      if ((r - (const char *)ut->context) < 0x7fffffff) {
          ut->b = (int32_t)(r - (const char *)ut->context);
      } else {
          // Actual string was bigger (more than 2 gig) than we
          //   can handle.  Clip it to 2 GB.
          ut->b = 0x7fffffff;
      }
      ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
  }
  return ut->b;
928}






935static UBool U_CALLCONV
936utf8TextAccess(UText *ut, int64_t index, UBool forward) {
  //
  //  Apologies to those who are allergic to goto statements.
  //    Consider each goto to a labelled block to be the equivalent of
  //         call the named block as if it were a function();
  //         return;
  //
  const uint8_t *s8=(const uint8_t *)ut->context;
  UTF8Buf *u8b = NULL__null;
  int32_t  length = ut->b;         // Length of original utf-8
  int32_t  ix= (int32_t)index;     // Requested index, trimmed to 32 bits.
  int32_t  mapIndex = 0;
  if (index<0) {
      ix=0;
  } else if (index > 0x7fffffff) {
      // Strings with 64 bit lengths not supported by this UTF-8 provider.
      ix = 0x7fffffff;
  }

  // Pin requested index to the string length.
  if (ix>length) {
      if (length>=0) {
          ix=length;
      } else if (ix>=ut->c) {
          // Zero terminated string, and requested index is beyond
          //   the region that has already been scanned.
          //   Scan up to either the end of the string or to the
          //   requested position, whichever comes first.
          while (ut->c<ix && s8[ut->c]!=0) {
              ut->c++;
          }
          //  TODO:  support for null terminated string length > 32 bits.
          if (s8[ut->c] == 0) {
              // We just found the actual length of the string.
              //  Trim the requested index back to that.
              ix     = ut->c;
              ut->b  = ut->c;
              length = ut->c;
              ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
          }
      }
  }

  //
  // Dispatch to the appropriate action for a forward iteration request.
  //
  if (forward) {
      if (ix==ut->chunkNativeLimit) {
          // Check for normal sequential iteration cases first.
          if (ix==length) {
              // Just reached end of string
              // Don't swap buffers, but do set the
              //   current buffer position.
              ut->chunkOffset = ut->chunkLength;
              return FALSE0;
          } else {
              // End of current buffer.
              //   check whether other buffer already has what we need.
              UTF8Buf *altB = (UTF8Buf *)ut->q;
              if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
                  goto swapBuffers;
              }
          }
      }

      // A random access.  Desired index could be in either or niether buf.
      // For optimizing the order of testing, first check for the index
      //    being in the other buffer.  This will be the case for uses that
      //    move back and forth over a fairly limited range
      {
          u8b = (UTF8Buf *)ut->q;   // the alternate buffer
          if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
              // Requested index is in the other buffer.
              goto swapBuffers;
          }
          if (ix == length) {
              // Requested index is end-of-string.
              //   (this is the case of randomly seeking to the end.
              //    The case of iterating off the end is handled earlier.)
              if (ix == ut->chunkNativeLimit) {
                  // Current buffer extends up to the end of the string.
                  //   Leave it as the current buffer.
                  ut->chunkOffset = ut->chunkLength;
                  return FALSE0;
              }
              if (ix == u8b->bufNativeLimit) {
                  // Alternate buffer extends to the end of string.
                  //   Swap it in as the current buffer.
                  goto swapBuffersAndFail;
              }

              // Neither existing buffer extends to the end of the string.
              goto makeStubBuffer;
          }

          if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {
              // Requested index is in neither buffer.
              goto fillForward;
          }

          // Requested index is in this buffer.
          u8b = (UTF8Buf *)ut->p;   // the current buffer
          mapIndex = ix - u8b->toUCharsMapStart;
          U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars))(void)0;
          ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
          return TRUE1;

      }
  }


  //
  // Dispatch to the appropriate action for a
  //   Backwards Direction iteration request.
  //
  if (ix==ut->chunkNativeStart) {
      // Check for normal sequential iteration cases first.
      if (ix==0) {
          // Just reached the start of string
          // Don't swap buffers, but do set the
          //   current buffer position.
          ut->chunkOffset = 0;
          return FALSE0;
      } else {
          // Start of current buffer.
          //   check whether other buffer already has what we need.
          UTF8Buf *altB = (UTF8Buf *)ut->q;
          if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
              goto swapBuffers;
          }
      }
  }

  // A random access.  Desired index could be in either or niether buf.
  // For optimizing the order of testing,
  //    Most likely case:  in the other buffer.
  //    Second most likely: in neither buffer.
  //    Unlikely, but must work:  in the current buffer.
  u8b = (UTF8Buf *)ut->q;   // the alternate buffer
  if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
      // Requested index is in the other buffer.
      goto swapBuffers;
  }
  // Requested index is start-of-string.
  //   (this is the case of randomly seeking to the start.
  //    The case of iterating off the start is handled earlier.)
  if (ix==0) {
      if (u8b->bufNativeStart==0) {
          // Alternate buffer contains the data for the start string.
          // Make it be the current buffer.
          goto swapBuffersAndFail;
      } else {
          // Request for data before the start of string,
          //   neither buffer is usable.
          //   set up a zero-length buffer.
          goto makeStubBuffer;
      }
  }

  if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {
      // Requested index is in neither buffer.
      goto fillReverse;
  }

  // Requested index is in this buffer.
  //   Set the utf16 buffer index.
  u8b = (UTF8Buf *)ut->p;
  mapIndex = ix - u8b->toUCharsMapStart;
  ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
  if (ut->chunkOffset==0) {
      // This occurs when the first character in the text is
      //   a multi-byte UTF-8 char, and the requested index is to
      //   one of the trailing bytes.  Because there is no preceding ,
      //   character, this access fails.  We can't pick up on the
      //   situation sooner because the requested index is not zero.
      return FALSE0;
  } else {
      return TRUE1;
  }



1118swapBuffers:
  //  The alternate buffer (ut->q) has the string data that was requested.
  //  Swap the primary and alternate buffers, and set the
  //   chunk index into the new primary buffer.
  {
      u8b   = (UTF8Buf *)ut->q;
      ut->q = ut->p;
      ut->p = u8b;
      ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
      ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
      ut->chunkNativeStart    = u8b->bufNativeStart;
      ut->chunkNativeLimit    = u8b->bufNativeLimit;
      ut->nativeIndexingLimit = u8b->bufNILimit;

      // Index into the (now current) chunk
      // Use the map to set the chunk index.  It's more trouble than it's worth
      //    to check whether native indexing can be used.
      U_ASSERT(ix>=u8b->bufNativeStart)(void)0;
      U_ASSERT(ix<=u8b->bufNativeLimit)(void)0;
      mapIndex = ix - u8b->toUCharsMapStart;
      U_ASSERT(mapIndex>=0)(void)0;
      U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars))(void)0;
      ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;

      return TRUE1;
  }


swapBuffersAndFail:
  // We got a request for either the start or end of the string,
  //  with iteration continuing in the out-of-bounds direction.
  // The alternate buffer already contains the data up to the
  //  start/end.
  // Swap the buffers, then return failure, indicating that we couldn't
  //  make things correct for continuing the iteration in the requested
  //  direction.  The position & buffer are correct should the
  //  user decide to iterate in the opposite direction.
  u8b   = (UTF8Buf *)ut->q;
  ut->q = ut->p;
  ut->p = u8b;
  ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
  ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
  ut->chunkNativeStart    = u8b->bufNativeStart;
  ut->chunkNativeLimit    = u8b->bufNativeLimit;
  ut->nativeIndexingLimit = u8b->bufNILimit;

  // Index into the (now current) chunk
  //  For this function  (swapBuffersAndFail), the requested index
  //    will always be at either the start or end of the chunk.
  if (ix==u8b->bufNativeLimit) {
      ut->chunkOffset = ut->chunkLength;
  } else  {
      ut->chunkOffset = 0;
      U_ASSERT(ix == u8b->bufNativeStart)(void)0;
  }
  return FALSE0;

1175makeStubBuffer:
  //   The user has done a seek/access past the start or end
  //   of the string.  Rather than loading data that is likely
  //   to never be used, just set up a zero-length buffer at
  //   the position.
  u8b = (UTF8Buf *)ut->q;
  u8b->bufNativeStart   = ix;
  u8b->bufNativeLimit   = ix;
  u8b->bufStartIdx      = 0;
  u8b->bufLimitIdx      = 0;
  u8b->bufNILimit       = 0;
  u8b->toUCharsMapStart = ix;
  u8b->mapToNative[0]   = 0;
  u8b->mapToUChars[0]   = 0;
  goto swapBuffersAndFail;



1193fillForward:
  {
      // Move the incoming index to a code point boundary.
      U8_SET_CP_START(s8, 0, ix)do { if(((int8_t)((s8)[(ix)])<-0x40)) { (ix)=utf8_back1SafeBody_71
(s8, 0, (ix)); } } while (false);

      // Swap the UText buffers.
      //  We want to fill what was previously the alternate buffer,
      //  and make what was the current buffer be the new alternate.
      UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
      ut->q = ut->p;
      ut->p = u8b_swap;

      int32_t strLen = ut->b;
      UBool   nulTerminated = FALSE0;
      if (strLen < 0) {
          strLen = 0x7fffffff;
          nulTerminated = TRUE1;
      }

      UChar   *buf = u8b_swap->buf;
      uint8_t *mapToNative  = u8b_swap->mapToNative;
      uint8_t *mapToUChars  = u8b_swap->mapToUChars;
      int32_t  destIx       = 0;
      int32_t  srcIx        = ix;
      UBool    seenNonAscii = FALSE0;
      UChar32  c = 0;

      // Fill the chunk buffer and mapping arrays.
      while (destIx<UTF8_TEXT_CHUNK_SIZE) {
          c = s8[srcIx];
          if (c>0 && c<0x80) {
              // Special case ASCII range for speed.
              //   zero is excluded to simplify bounds checking.
              buf[destIx] = (UChar)c;
              mapToNative[destIx]    = (uint8_t)(srcIx - ix);
              mapToUChars[srcIx-ix]  = (uint8_t)destIx;
              srcIx++;
              destIx++;
          } else {
              // General case, handle everything.
              if (seenNonAscii == FALSE0) {
                  seenNonAscii = TRUE1;
                  u8b_swap->bufNILimit = destIx;
              }

              int32_t  cIx      = srcIx;
              int32_t  dIx      = destIx;
              int32_t  dIxSaved = destIx;
              U8_NEXT_OR_FFFD(s8, srcIx, strLen, c)do { (c)=(uint8_t)(s8)[(srcIx)++]; if(!(((c)&0x80)==0)) {
 uint8_t __t = 0; if((srcIx)!=(strLen) && ((c)>=0xe0
 ? ((c)<0xf0 ? "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
[(c)&=0xf]&(1<<((__t=(s8)[srcIx])>>5)) &&
 (__t&=0x3f, 1) : ((c)-=0xf0)<=4 && "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
[(__t=(s8)[srcIx])>>4]&(1<<(c)) && ((
c)=((c)<<6)|(__t&0x3f), ++(srcIx)!=(strLen)) &&
 (__t=(s8)[srcIx]-0x80)<=0x3f) && ((c)=((c)<<
6)|__t, ++(srcIx)!=(strLen)) : (c)>=0xc2 && ((c)&=
0x1f, 1)) && (__t=(s8)[srcIx]-0x80)<=0x3f &&
 ((c)=((c)<<6)|__t, ++(srcIx), 1)) { } else { (c)=(0xfffd
); } } } while (false);
              if (c==0 && nulTerminated) {
                  srcIx--;
                  break;
              }

              U16_APPEND_UNSAFE(buf, destIx, c)do { if((uint32_t)(c)<=0xffff) { (buf)[(destIx)++]=(uint16_t
)(c); } else { (buf)[(destIx)++]=(uint16_t)(((c)>>10)+0xd7c0
); (buf)[(destIx)++]=(uint16_t)(((c)&0x3ff)|0xdc00); } } while
 (false);
              do {
                  mapToNative[dIx++] = (uint8_t)(cIx - ix);
              } while (dIx < destIx);

              do {
                  mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
              } while (cIx < srcIx);
          }
          if (srcIx>=strLen) {
              break;
          }

      }

      //  store Native <--> Chunk Map entries for the end of the buffer.
      //    There is no actual character here, but the index position is valid.
      mapToNative[destIx]     = (uint8_t)(srcIx - ix);
      mapToUChars[srcIx - ix] = (uint8_t)destIx;

      //  fill in Buffer descriptor
      u8b_swap->bufNativeStart     = ix;
      u8b_swap->bufNativeLimit     = srcIx;
      u8b_swap->bufStartIdx        = 0;
      u8b_swap->bufLimitIdx        = destIx;
      if (seenNonAscii == FALSE0) {
          u8b_swap->bufNILimit     = destIx;
      }
      u8b_swap->toUCharsMapStart   = u8b_swap->bufNativeStart;

      // Set UText chunk to refer to this buffer.
      ut->chunkContents       = buf;
      ut->chunkOffset         = 0;
      ut->chunkLength         = u8b_swap->bufLimitIdx;
      ut->chunkNativeStart    = u8b_swap->bufNativeStart;
      ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;
      ut->nativeIndexingLimit = u8b_swap->bufNILimit;

      // For zero terminated strings, keep track of the maximum point
      //   scanned so far.
      if (nulTerminated && srcIx>ut->c) {
          ut->c = srcIx;
          if (c==0) {
              // We scanned to the end.
              //   Remember the actual length.
              ut->b = srcIx;
              ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
          }
      }
      return TRUE1;
  }


1300fillReverse:
  {
      // Move the incoming index to a code point boundary.
      // Can only do this if the incoming index is somewhere in the interior of the string.
      //   If index is at the end, there is no character there to look at.
      if (ix != ut->b) {
          // Note: this function will only move the index back if it is on a trail byte
          //       and there is a preceding lead byte and the sequence from the lead 
          //       through this trail could be part of a valid UTF-8 sequence
          //       Otherwise the index remains unchanged.
          U8_SET_CP_START(s8, 0, ix)do { if(((int8_t)((s8)[(ix)])<-0x40)) { (ix)=utf8_back1SafeBody_71
(s8, 0, (ix)); } } while (false);
      }

      // Swap the UText buffers.
      //  We want to fill what was previously the alternate buffer,
      //  and make what was the current buffer be the new alternate.
      UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
      ut->q = ut->p;
      ut->p = u8b_swap;

      UChar   *buf = u8b_swap->buf;
      uint8_t *mapToNative = u8b_swap->mapToNative;
      uint8_t *mapToUChars = u8b_swap->mapToUChars;
      int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
      // Note that toUCharsMapStart can be negative. Happens when the remaining
      // text from current position to the beginning is less than the buffer size.
      // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
      int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
                                                  //   at end of buffer to leave room
                                                  //   for a surrogate pair at the
                                                  //   buffer start.
      int32_t  srcIx  = ix;
      int32_t  bufNILimit = destIx;
      UChar32   c;

      // Map to/from Native Indexes, fill in for the position at the end of
      //   the buffer.
      //
      mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
      mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;

      // Fill the chunk buffer
      // Work backwards, filling from the end of the buffer towards the front.
      //
      while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {
          srcIx--;
          destIx--;

          // Get last byte of the UTF-8 character
          c = s8[srcIx];
          if (c<0x80) {
              // Special case ASCII range for speed.
              buf[destIx] = (UChar)c;
              U_ASSERT(toUCharsMapStart <= srcIx)(void)0;
              mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
              mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
          } else {
              // General case, handle everything non-ASCII.

              int32_t  sIx      = srcIx;  // ix of last byte of multi-byte u8 char

              // Get the full character from the UTF8 string.
              //   use code derived from the macros in utf8.h
              //   Leaves srcIx pointing at the first byte of the UTF-8 char.
              //
              c=utf8_prevCharSafeBodyutf8_prevCharSafeBody_71(s8, 0, &srcIx, c, -3);
              // leaves srcIx at first byte of the multi-byte char.

              // Store the character in UTF-16 buffer.
              if (c<0x10000) {
                  buf[destIx] = (UChar)c;
                  mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
              } else {
                  buf[destIx]         = U16_TRAIL(c)(UChar)(((c)&0x3ff)|0xdc00);
                  mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
                  buf[--destIx]       = U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
                  mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
              }

              // Fill in the map from native indexes to UChars buf index.
              do {
                  mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
              } while (sIx >= srcIx);
              U_ASSERT(toUCharsMapStart <= (srcIx+1))(void)0;

              // Set native indexing limit to be the current position.
              //   We are processing a non-ascii, non-native-indexing char now;
              //     the limit will be here if the rest of the chars to be
              //     added to this buffer are ascii.
              bufNILimit = destIx;
          }
      }
      u8b_swap->bufNativeStart     = srcIx;
      u8b_swap->bufNativeLimit     = ix;
      u8b_swap->bufStartIdx        = destIx;
      u8b_swap->bufLimitIdx        = UTF8_TEXT_CHUNK_SIZE+2;
      u8b_swap->bufNILimit         = bufNILimit - u8b_swap->bufStartIdx;
      u8b_swap->toUCharsMapStart   = toUCharsMapStart;

      ut->chunkContents       = &buf[u8b_swap->bufStartIdx];
      ut->chunkLength         = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
      ut->chunkOffset         = ut->chunkLength;
      ut->chunkNativeStart    = u8b_swap->bufNativeStart;
      ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;
      ut->nativeIndexingLimit = u8b_swap->bufNILimit;
      return TRUE1;
  }

1408}



1412//
1413//  This is a slightly modified copy of u_strFromUTF8,
1414//     Inserts a Replacement Char rather than failing on invalid UTF-8
1415//     Removes unnecessary features.
1416//
1417static UChar*
1418utext_strFromUTF8(UChar *dest,
            int32_t destCapacity,
            int32_t *pDestLength,
            const char* src,
            int32_t srcLength,        // required.  NUL terminated not supported.
            UErrorCode *pErrorCode
            )
1425{

  UChar *pDest = dest;
  UChar *pDestLimit = (dest!=NULL__null)?(dest+destCapacity):NULL__null;
  UChar32 ch=0;
  int32_t index = 0;
  int32_t reqLength = 0;
  uint8_t* pSrc = (uint8_t*) src;


  while((index < srcLength)&&(pDest<pDestLimit)){
      ch = pSrc[index++];
      if(ch <=0x7f){
          *pDest++=(UChar)ch;
      }else{
          ch=utf8_nextCharSafeBodyutf8_nextCharSafeBody_71(pSrc, &index, srcLength, ch, -3);
          if(U_IS_BMP(ch)((uint32_t)(ch)<=0xffff)){
              *(pDest++)=(UChar)ch;
          }else{
              *(pDest++)=U16_LEAD(ch)(UChar)(((ch)>>10)+0xd7c0);
              if(pDest<pDestLimit){
                  *(pDest++)=U16_TRAIL(ch)(UChar)(((ch)&0x3ff)|0xdc00);
              }else{
                  reqLength++;
                  break;
              }
          }
      }
  }
  /* donot fill the dest buffer just count the UChars needed */
  while(index < srcLength){
      ch = pSrc[index++];
      if(ch <= 0x7f){
          reqLength++;
      }else{
          ch=utf8_nextCharSafeBodyutf8_nextCharSafeBody_71(pSrc, &index, srcLength, ch, -3);
          reqLength+=U16_LENGTH(ch)((uint32_t)(ch)<=0xffff ? 1 : 2);
      }
  }

  reqLength+=(int32_t)(pDest - dest);

  if(pDestLength){
      *pDestLength = reqLength;
  }

  /* Terminate the buffer */
  u_terminateUCharsu_terminateUChars_71(dest,destCapacity,reqLength,pErrorCode);

  return dest;
1475}



1479static int32_t U_CALLCONV
1480utf8TextExtract(UText *ut,
              int64_t start, int64_t limit,
              UChar *dest, int32_t destCapacity,
              UErrorCode *pErrorCode) {
  if(U_FAILURE(*pErrorCode)) {
      return 0;
  }
  if(destCapacity<0 || (dest==NULL__null && destCapacity>0)) {
      *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
  }
  int32_t  length  = ut->b;
  int32_t  start32 = pinIndex(start, length);
  int32_t  limit32 = pinIndex(limit, length);

  if(start32>limit32) {
      *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }


  // adjust the incoming indexes to land on code point boundaries if needed.
  //    adjust by no more than three, because that is the largest number of trail bytes
  //    in a well formed UTF8 character.
  const uint8_t *buf = (const uint8_t *)ut->context;
  int i;
  if (start32 < ut->chunkNativeLimit) {
      for (i=0; i<3; i++) {
          if (U8_IS_SINGLE(buf[start32])(((buf[start32])&0x80)==0) || U8_IS_LEAD(buf[start32])((uint8_t)((buf[start32])-0xc2)<=0x32) || start32==0) {
              break;
          }
          start32--;
      }
  }

  if (limit32 < ut->chunkNativeLimit) {
      for (i=0; i<3; i++) {
          if (U8_IS_SINGLE(buf[limit32])(((buf[limit32])&0x80)==0) || U8_IS_LEAD(buf[limit32])((uint8_t)((buf[limit32])-0xc2)<=0x32) || limit32==0) {
              break;
          }
          limit32--;
      }
  }

  // Do the actual extract.
  int32_t destLength=0;
  utext_strFromUTF8(dest, destCapacity, &destLength,
                  (const char *)ut->context+start32, limit32-start32,
                  pErrorCode);
  utf8TextAccess(ut, limit32, TRUE1);
  return destLength;
1531}

1533//
1534// utf8TextMapOffsetToNative
1535//
1536// Map a chunk (UTF-16) offset to a native index.
1537static int64_t U_CALLCONV
1538utf8TextMapOffsetToNative(const UText *ut) {
  //
  UTF8Buf *u8b = (UTF8Buf *)ut->p;
  U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength)(void)0;
  int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
  U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit)(void)0;
  return nativeOffset;
1545}

1547//
1548// Map a native index to the corresponding chunk offset
1549//
1550static int32_t U_CALLCONV
1551utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
  U_ASSERT(index64 <= 0x7fffffff)(void)0;
  int32_t index = (int32_t)index64;
  UTF8Buf *u8b = (UTF8Buf *)ut->p;
  U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit)(void)0;
  U_ASSERT(index<=ut->chunkNativeLimit)(void)0;
  int32_t mapIndex = index - u8b->toUCharsMapStart;
  U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars))(void)0;
  int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
  U_ASSERT(offset>=0 && offset<=ut->chunkLength)(void)0;
  return offset;
1562}

1564static UText * U_CALLCONV
1565utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
1566{
  // First do a generic shallow clone.  Does everything needed for the UText struct itself.
  dest = shallowTextClone(dest, src, status);

  // For deep clones, make a copy of the string.
  //  The copied storage is owned by the newly created clone.
  //
  // TODO:  There is an issue with using utext_nativeLength().
  //        That function is non-const in cases where the input was NUL terminated
  //          and the length has not yet been determined.
  //        This function (clone()) is const.
  //        There potentially a thread safety issue lurking here.
  //
  if (deep && U_SUCCESS(*status)) {
      int32_t  len = (int32_t)utext_nativeLengthutext_nativeLength_71((UText *)src);
      char *copyStr = (char *)uprv_mallocuprv_malloc_71(len+1);
      if (copyStr == NULL__null) {
          *status = U_MEMORY_ALLOCATION_ERROR;
      } else {
          uprv_memcpy(copyStr, src->context, len+1)do { clang diagnostic push
 clang diagnostic ignored "-Waddress"

 (void)0; (void)0; clang diagnostic pop
 :: memcpy(copyStr, src
->context, len+1); } while (false);
          dest->context = copyStr;
          dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT));
      }
  }
  return dest;
1591}


1594static void U_CALLCONV
1595utf8TextClose(UText *ut) {
  // Most of the work of close is done by the generic UText framework close.
  // All that needs to be done here is to delete the UTF8 string if the UText
  //  owns it.  This occurs if the UText was created by cloning.
  if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT))) {
      char *s = (char *)ut->context;
      uprv_freeuprv_free_71(s);
      ut->context = NULL__null;
  }
1604}

1606U_CDECL_END}


1609static const struct UTextFuncs utf8Funcs =
1610{
  sizeof(UTextFuncs),
  0, 0, 0,             // Reserved alignment padding
  utf8TextClone,
  utf8TextLength,
  utf8TextAccess,
  utf8TextExtract,
  NULL__null,                /* replace*/
  NULL__null,                /* copy   */
  utf8TextMapOffsetToNative,
  utf8TextMapIndexToUTF16,
  utf8TextClose,
  NULL__null,                // spare 1
  NULL__null,                // spare 2
  NULL__null                 // spare 3
1625};


1628static const char gEmptyString[] = {0};

1630U_CAPIextern "C" UText * U_EXPORT2
1631utext_openUTF8utext_openUTF8_71(UText *ut, const char *s, int64_t length, UErrorCode *status) {
  if(U_FAILURE(*status)) {
      return NULL__null;
  }
  if(s==NULL__null && length==0) {
      s = gEmptyString;
  }

  if(s==NULL__null || length<-1 || length>INT32_MAX(2147483647)) {
      *status=U_ILLEGAL_ARGUMENT_ERROR;
      return NULL__null;
  }

  ut = utext_setuputext_setup_71(ut, sizeof(UTF8Buf) * 2, status);
  if (U_FAILURE(*status)) {
      return ut;
  }

  ut->pFuncs  = &utf8Funcs;
  ut->context = s;
  ut->b       = (int32_t)length;
  ut->c       = (int32_t)length;
  if (ut->c < 0) {
      ut->c = 0;
      ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
  }
  ut->p = ut->pExtra;
  ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);
  return ut;

1661}








1670//------------------------------------------------------------------------------
1671//
1672//     UText implementation wrapper for Replaceable (read/write)
1673//
1674//         Use of UText data members:
1675//            context    pointer to Replaceable.
1676//            p          pointer to Replaceable if it is owned by the UText.
1677//
1678//------------------------------------------------------------------------------



1682// minimum chunk size for this implementation: 3
1683// to allow for possible trimming for code point boundaries
1684enum { REP_TEXT_CHUNK_SIZE=10 };

1686struct ReplExtra {
  /*
   * Chunk UChars.
   * +1 to simplify filling with surrogate pair at the end.
   */
  UChar s[REP_TEXT_CHUNK_SIZE+1];
1692};


1695U_CDECL_BEGINextern "C" {

1697static UText * U_CALLCONV
1698repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
  // First do a generic shallow clone.  Does everything needed for the UText struct itself.
  dest = shallowTextClone(dest, src, status);

  // For deep clones, make a copy of the Replaceable.
  //  The copied Replaceable storage is owned by the newly created UText clone.
  //  A non-NULL pointer in UText.p is the signal to the close() function to delete
  //    it.
  //
  if (deep && U_SUCCESS(*status)) {
      const Replaceable *replSrc = (const Replaceable *)src->context;
      dest->context = replSrc->clone();
      dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT));

      // with deep clone, the copy is writable, even when the source is not.
      dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE));
  }
  return dest;
1716}


1719static void U_CALLCONV
1720repTextClose(UText *ut) {
  // Most of the work of close is done by the generic UText framework close.
  // All that needs to be done here is delete the Replaceable if the UText
  //  owns it.  This occurs if the UText was created by cloning.
  if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT))) {
      Replaceable *rep = (Replaceable *)ut->context;
      delete rep;
      ut->context = NULL__null;
  }
1729}


1732static int64_t U_CALLCONV
1733repTextLength(UText *ut) {
  const Replaceable *replSrc = (const Replaceable *)ut->context;
  int32_t  len = replSrc->length();
  return len;
1737}


1740static UBool U_CALLCONV
1741repTextAccess(UText *ut, int64_t index, UBool forward) {
  const Replaceable *rep=(const Replaceable *)ut->context;
  int32_t length=rep->length();   // Full length of the input text (bigger than a chunk)

  // clip the requested index to the limits of the text.
  int32_t index32 = pinIndex(index, length);
  U_ASSERT(index<=INT32_MAX)(void)0;


  /*
   * Compute start/limit boundaries around index, for a segment of text
   * to be extracted.
   * To allow for the possibility that our user gave an index to the trailing
   * half of a surrogate pair, we must request one extra preceding UChar when
   * going in the forward direction.  This will ensure that the buffer has the
   * entire code point at the specified index.
   */
  if(forward) {

      if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
          // Buffer already contains the requested position.
          ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
          return TRUE1;
      }
      if (index32>=length && ut->chunkNativeLimit==length) {
          // Request for end of string, and buffer already extends up to it.
          // Can't get the data, but don't change the buffer.
          ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
          return FALSE0;
      }

      ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
      // Going forward, so we want to have the buffer with stuff at and beyond
      //   the requested index.  The -1 gets us one code point before the
      //   requested index also, to handle the case of the index being on
      //   a trail surrogate of a surrogate pair.
      if(ut->chunkNativeLimit > length) {
          ut->chunkNativeLimit = length;
      }
      // unless buffer ran off end, start is index-1.
      ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
      if(ut->chunkNativeStart < 0) {
          ut->chunkNativeStart = 0;
      }
  } else {
      // Reverse iteration.  Fill buffer with data preceding the requested index.
      if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
          // Requested position already in buffer.
          ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
          return TRUE1;
      }
      if (index32==0 && ut->chunkNativeStart==0) {
          // Request for start, buffer already begins at start.
          //  No data, but keep the buffer as is.
          ut->chunkOffset = 0;
          return FALSE0;
      }

      // Figure out the bounds of the chunk to extract for reverse iteration.
      // Need to worry about chunk not splitting surrogate pairs, and while still
      // containing the data we need.
      // Fix by requesting a chunk that includes an extra UChar at the end.
      // If this turns out to be a lead surrogate, we can lop it off and still have
      //   the data we wanted.
      ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;
      if (ut->chunkNativeStart < 0) {
          ut->chunkNativeStart = 0;
      }

      ut->chunkNativeLimit = index32 + 1;
      if (ut->chunkNativeLimit > length) {
          ut->chunkNativeLimit = length;
      }
  }

  // Extract the new chunk of text from the Replaceable source.
  ReplExtra *ex = (ReplExtra *)ut->pExtra;
  // UnicodeString with its buffer a writable alias to the chunk buffer
  UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
  rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);

  ut->chunkContents  = ex->s;
  ut->chunkLength    = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
  ut->chunkOffset    = (int32_t)(index32 - ut->chunkNativeStart);

  // Surrogate pairs from the input text must not span chunk boundaries.
  // If end of chunk could be the start of a surrogate, trim it off.
  if (ut->chunkNativeLimit < length &&
      U16_IS_LEAD(ex->s[ut->chunkLength-1])(((ex->s[ut->chunkLength-1])&0xfffffc00)==0xd800)) {
          ut->chunkLength--;
          ut->chunkNativeLimit--;
          if (ut->chunkOffset > ut->chunkLength) {
              ut->chunkOffset = ut->chunkLength;
          }
      }

  // if the first UChar in the chunk could be the trailing half of a surrogate pair,
  // trim it off.
  if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])(((ex->s[0])&0xfffffc00)==0xdc00)) {
      ++(ut->chunkContents);
      ++(ut->chunkNativeStart);
      --(ut->chunkLength);
      --(ut->chunkOffset);
  }

  // adjust the index/chunkOffset to a code point boundary
  U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset)do { if(((((ut->chunkContents)[ut->chunkOffset])&0xfffffc00
)==0xdc00) && (ut->chunkOffset)>(0) && (
(((ut->chunkContents)[(ut->chunkOffset)-1])&0xfffffc00
)==0xd800)) { --(ut->chunkOffset); } } while (false);

  // Use fast indexing for get/setNativeIndex()
  ut->nativeIndexingLimit = ut->chunkLength;

  return TRUE1;
1853}



1857static int32_t U_CALLCONV
1858repTextExtract(UText *ut,
             int64_t start, int64_t limit,
             UChar *dest, int32_t destCapacity,
             UErrorCode *status) {
  const Replaceable *rep=(const Replaceable *)ut->context;
  int32_t  length=rep->length();

  if(U_FAILURE(*status)) {
      return 0;
  }
  if(destCapacity<0 || (dest==NULL__null && destCapacity>0)) {
      *status=U_ILLEGAL_ARGUMENT_ERROR;
  }
  if(start>limit) {
      *status=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }

  int32_t  start32 = pinIndex(start, length);
  int32_t  limit32 = pinIndex(limit, length);

  // adjust start, limit if they point to trail half of surrogates
  if (start32<length && U16_IS_TRAIL(rep->charAt(start32))(((rep->charAt(start32))&0xfffffc00)==0xdc00) &&
      U_IS_SUPPLEMENTARY(rep->char32At(start32))((uint32_t)((rep->char32At(start32))-0x10000)<=0xfffff)){
          start32--;
  }
  if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32))(((rep->charAt(limit32))&0xfffffc00)==0xdc00) &&
      U_IS_SUPPLEMENTARY(rep->char32At(limit32))((uint32_t)((rep->char32At(limit32))-0x10000)<=0xfffff)){
          limit32--;
  }

  length=limit32-start32;
  if(length>destCapacity) {
      limit32 = start32 + destCapacity;
  }
  UnicodeString buffer(dest, 0, destCapacity); // writable alias
  rep->extractBetween(start32, limit32, buffer);
  repTextAccess(ut, limit32, TRUE1);

  return u_terminateUCharsu_terminateUChars_71(dest, destCapacity, length, status);
1898}

1900static int32_t U_CALLCONV
1901repTextReplace(UText *ut,
             int64_t start, int64_t limit,
             const UChar *src, int32_t length,
             UErrorCode *status) {
  Replaceable *rep=(Replaceable *)ut->context;
  int32_t oldLength;

  if(U_FAILURE(*status)) {
      return 0;
  }
  if(src==NULL__null && length!=0) {
      *status=U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
  }
  oldLength=rep->length(); // will subtract from new length
  if(start>limit ) {
      *status=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }

  int32_t start32 = pinIndex(start, oldLength);
  int32_t limit32 = pinIndex(limit, oldLength);

  // Snap start & limit to code point boundaries.
  if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32))(((rep->charAt(start32))&0xfffffc00)==0xdc00) &&
      start32>0 && U16_IS_LEAD(rep->charAt(start32-1))(((rep->charAt(start32-1))&0xfffffc00)==0xd800))
  {
          start32--;
  }
  if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1))(((rep->charAt(limit32-1))&0xfffffc00)==0xd800) &&
      U16_IS_TRAIL(rep->charAt(limit32))(((rep->charAt(limit32))&0xfffffc00)==0xdc00))
  {
          limit32++;
  }

  // Do the actual replace operation using methods of the Replaceable class
  UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
  rep->handleReplaceBetween(start32, limit32, replStr);
  int32_t newLength = rep->length();
  int32_t lengthDelta = newLength - oldLength;

  // Is the UText chunk buffer OK?
  if (ut->chunkNativeLimit > start32) {
      // this replace operation may have impacted the current chunk.
      // invalidate it, which will force a reload on the next access.
      invalidateChunk(ut);
  }

  // set the iteration position to the end of the newly inserted replacement text.
  int32_t newIndexPos = limit32 + lengthDelta;
  repTextAccess(ut, newIndexPos, TRUE1);

  return lengthDelta;
1954}


1957static void U_CALLCONV
1958repTextCopy(UText *ut,
              int64_t start, int64_t limit,
              int64_t destIndex,
              UBool move,
              UErrorCode *status)
1963{
  Replaceable *rep=(Replaceable *)ut->context;
  int32_t length=rep->length();

  if(U_FAILURE(*status)) {
      return;
  }
  if (start>limit || (start<destIndex && destIndex<limit))
  {
      *status=U_INDEX_OUTOFBOUNDS_ERROR;
      return;
  }

  int32_t start32     = pinIndex(start, length);
  int32_t limit32     = pinIndex(limit, length);
  int32_t destIndex32 = pinIndex(destIndex, length);

  // TODO:  snap input parameters to code point boundaries.

  if(move) {
      // move: copy to destIndex, then replace original with nothing
      int32_t segLength=limit32-start32;
      rep->copy(start32, limit32, destIndex32);
      if(destIndex32<start32) {
          start32+=segLength;
          limit32+=segLength;
      }
      rep->handleReplaceBetween(start32, limit32, UnicodeString());
  } else {
      // copy
      rep->copy(start32, limit32, destIndex32);
  }

  // If the change to the text touched the region in the chunk buffer,
  //  invalidate the buffer.
  int32_t firstAffectedIndex = destIndex32;
  if (move && start32<firstAffectedIndex) {
      firstAffectedIndex = start32;
  }
  if (firstAffectedIndex < ut->chunkNativeLimit) {
      // changes may have affected range covered by the chunk
      invalidateChunk(ut);
  }

  // Put iteration position at the newly inserted (moved) block,
  int32_t  nativeIterIndex = destIndex32 + limit32 - start32;
  if (move && destIndex32>start32) {
      // moved a block of text towards the end of the string.
      nativeIterIndex = destIndex32;
  }

  // Set position, reload chunk if needed.
  repTextAccess(ut, nativeIterIndex, TRUE1);
2016}

2018static const struct UTextFuncs repFuncs =
2019{
  sizeof(UTextFuncs),
  0, 0, 0,           // Reserved alignment padding
  repTextClone,
  repTextLength,
  repTextAccess,
  repTextExtract,
  repTextReplace,
  repTextCopy,
  NULL__null,              // MapOffsetToNative,
  NULL__null,              // MapIndexToUTF16,
  repTextClose,
  NULL__null,              // spare 1
  NULL__null,              // spare 2
  NULL__null               // spare 3
2034};


2037U_CAPIextern "C" UText * U_EXPORT2
2038utext_openReplaceableutext_openReplaceable_71(UText *ut, Replaceable *rep, UErrorCode *status)
2039{
  if(U_FAILURE(*status)) {
      return NULL__null;
  }
  if(rep==NULL__null) {
      *status=U_ILLEGAL_ARGUMENT_ERROR;
      return NULL__null;
  }
  ut = utext_setuputext_setup_71(ut, sizeof(ReplExtra), status);
  if(U_FAILURE(*status)) {
      return ut;
  }

  ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE));
  if(rep->hasMetaData()) {
      ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)((int32_t)1<<(UTEXT_PROVIDER_HAS_META_DATA));
  }

  ut->pFuncs  = &repFuncs;
  ut->context =  rep;
  return ut;
2060}

2062U_CDECL_END}








2071//------------------------------------------------------------------------------
2072//
2073//     UText implementation for UnicodeString (read/write)  and
2074//                    for const UnicodeString (read only)
2075//             (same implementation, only the flags are different)
2076//
2077//         Use of UText data members:
2078//            context    pointer to UnicodeString
2079//            p          pointer to UnicodeString IF this UText owns the string
2080//                       and it must be deleted on close().  NULL otherwise.
2081//
2082//------------------------------------------------------------------------------

2084U_CDECL_BEGINextern "C" {


2087static UText * U_CALLCONV
2088unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
  // First do a generic shallow clone.  Does everything needed for the UText struct itself.
  dest = shallowTextClone(dest, src, status);

  // For deep clones, make a copy of the UnicodeSring.
  //  The copied UnicodeString storage is owned by the newly created UText clone.
  //  A non-NULL pointer in UText.p is the signal to the close() function to delete
  //    the UText.
  //
  if (deep && U_SUCCESS(*status)) {
      const UnicodeString *srcString = (const UnicodeString *)src->context;
      dest->context = new UnicodeString(*srcString);
      dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT));

      // with deep clone, the copy is writable, even when the source is not.
      dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE));
  }
  return dest;
2106}

2108static void U_CALLCONV
2109unistrTextClose(UText *ut) {
  // Most of the work of close is done by the generic UText framework close.
  // All that needs to be done here is delete the UnicodeString if the UText
  //  owns it.  This occurs if the UText was created by cloning.
  if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT))) {
      UnicodeString *str = (UnicodeString *)ut->context;
      delete str;
      ut->context = NULL__null;
  }
2118}


2121static int64_t U_CALLCONV
2122unistrTextLength(UText *t) {
  return ((const UnicodeString *)t->context)->length();
2124}


2127static UBool U_CALLCONV
2128unistrTextAccess(UText *ut, int64_t index, UBool  forward) {
  int32_t length  = ut->chunkLength;
  ut->chunkOffset = pinIndex(index, length);

  // Check whether request is at the start or end
  UBool retVal = (forward && index<length) || (!forward && index>0);
  return retVal;
2135}



2139static int32_t U_CALLCONV
2140unistrTextExtract(UText *t,
                int64_t start, int64_t limit,
                UChar *dest, int32_t destCapacity,
                UErrorCode *pErrorCode) {
  const UnicodeString *us=(const UnicodeString *)t->context;
  int32_t length=us->length();

  if(U_FAILURE(*pErrorCode)) {
      return 0;
  }
  if(destCapacity<0 || (dest==NULL__null && destCapacity>0)) {
      *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  }
  if(start<0 || start>limit) {
      *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }

  int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
  int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;

  length=limit32-start32;
  if (destCapacity>0 && dest!=NULL__null) {
      int32_t trimmedLength = length;
      if(trimmedLength>destCapacity) {
          trimmedLength=destCapacity;
      }
      us->extract(start32, trimmedLength, dest);
      t->chunkOffset = start32+trimmedLength;
  } else {
      t->chunkOffset = start32;
  }
  u_terminateUCharsu_terminateUChars_71(dest, destCapacity, length, pErrorCode);
  return length;
2174}

2176static int32_t U_CALLCONV
2177unistrTextReplace(UText *ut,
                int64_t start, int64_t limit,
                const UChar *src, int32_t length,
                UErrorCode *pErrorCode) {
  UnicodeString *us=(UnicodeString *)ut->context;
  int32_t oldLength;

  if(U_FAILURE(*pErrorCode)) {
      return 0;
  }
  if(src==NULL__null && length!=0) {
      *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  }
  if(start>limit) {
      *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
      return 0;
  }
  oldLength=us->length();
  int32_t start32 = pinIndex(start, oldLength);
  int32_t limit32 = pinIndex(limit, oldLength);
  if (start32 < oldLength) {
      start32 = us->getChar32Start(start32);
  }
  if (limit32 < oldLength) {
      limit32 = us->getChar32Start(limit32);
  }

  // replace
  us->replace(start32, limit32-start32, src, length);
  int32_t newLength = us->length();

  // Update the chunk description.
  ut->chunkContents    = us->getBuffer();
  ut->chunkLength      = newLength;
  ut->chunkNativeLimit = newLength;
  ut->nativeIndexingLimit = newLength;

  // Set iteration position to the point just following the newly inserted text.
  int32_t lengthDelta = newLength - oldLength;
  ut->chunkOffset = limit32 + lengthDelta;

  return lengthDelta;
2219}

2221static void U_CALLCONV
2222unistrTextCopy(UText *ut,
             int64_t start, int64_t limit,
             int64_t destIndex,
             UBool move,
             UErrorCode *pErrorCode) {
  UnicodeString *us=(UnicodeString *)ut->context;
  int32_t length=us->length();

  if(U_FAILURE(*pErrorCode)) {
      return;
  }
  int32_t start32 = pinIndex(start, length);
  int32_t limit32 = pinIndex(limit, length);
  int32_t destIndex32 = pinIndex(destIndex, length);

  if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {
      *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
      return;
  }

  if(move) {
      // move: copy to destIndex, then remove original
      int32_t segLength=limit32-start32;
      us->copy(start32, limit32, destIndex32);
      if(destIndex32<start32) {
          start32+=segLength;
      }
      us->remove(start32, segLength);
  } else {
      // copy
      us->copy(start32, limit32, destIndex32);
  }

  // update chunk description, set iteration position.
  ut->chunkContents = us->getBuffer();
  if (move==FALSE0) {
      // copy operation, string length grows
      ut->chunkLength += limit32-start32;
      ut->chunkNativeLimit = ut->chunkLength;
      ut->nativeIndexingLimit = ut->chunkLength;
  }

  // Iteration position to end of the newly inserted text.
  ut->chunkOffset = destIndex32+limit32-start32;
  if (move && destIndex32>start32) {
      ut->chunkOffset = destIndex32;
  }

2270}

2272static const struct UTextFuncs unistrFuncs =
2273{
  sizeof(UTextFuncs),
  0, 0, 0,             // Reserved alignment padding
  unistrTextClone,
  unistrTextLength,
  unistrTextAccess,
  unistrTextExtract,
  unistrTextReplace,
  unistrTextCopy,
  NULL__null,                // MapOffsetToNative,
  NULL__null,                // MapIndexToUTF16,
  unistrTextClose,
  NULL__null,                // spare 1
  NULL__null,                // spare 2
  NULL__null                 // spare 3
2288};



2292U_CDECL_END}


2295U_CAPIextern "C" UText * U_EXPORT2
2296utext_openUnicodeStringutext_openUnicodeString_71(UText *ut, UnicodeString *s, UErrorCode *status) {
  ut = utext_openConstUnicodeStringutext_openConstUnicodeString_71(ut, s, status);
  if (U_SUCCESS(*status)) {
      ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE)((int32_t)1<<(UTEXT_PROVIDER_WRITABLE));
  }
  return ut;
2302}



2306U_CAPIextern "C" UText * U_EXPORT2
2307utext_openConstUnicodeStringutext_openConstUnicodeString_71(UText *ut, const UnicodeString *s, UErrorCode *status) {
  if (U_SUCCESS(*status) && s->isBogus()) {
      // The UnicodeString is bogus, but we still need to detach the UText
      //   from whatever it was hooked to before, if anything.
      utext_openUCharsutext_openUChars_71(ut, NULL__null, 0, status);
      *status = U_ILLEGAL_ARGUMENT_ERROR;
      return ut;
  }
  ut = utext_setuputext_setup_71(ut, 0, status);
  //    note:  use the standard (writable) function table for UnicodeString.
  //           The flag settings disable writing, so having the functions in
  //           the table is harmless.
  if (U_SUCCESS(*status)) {
      ut->pFuncs              = &unistrFuncs;
      ut->context             = s;
      ut->providerProperties  = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)((int32_t)1<<(UTEXT_PROVIDER_STABLE_CHUNKS));
      ut->chunkContents       = s->getBuffer();
      ut->chunkLength         = s->length();
      ut->chunkNativeStart    = 0;
      ut->chunkNativeLimit    = ut->chunkLength;
      ut->nativeIndexingLimit = ut->chunkLength;
  }
  return ut;
2330}

2332//------------------------------------------------------------------------------
2333//
2334//     UText implementation for const UChar * strings
2335//
2336//         Use of UText data members:
2337//            context    pointer to UnicodeString
2338//            a          length.  -1 if not yet known.
2339//
2340//         TODO:  support 64 bit lengths.
2341//
2342//------------------------------------------------------------------------------

2344U_CDECL_BEGINextern "C" {


2347static UText * U_CALLCONV
2348ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
  // First do a generic shallow clone.
  dest = shallowTextClone(dest, src, status);

  // For deep clones, make a copy of the string.
  //  The copied storage is owned by the newly created clone.
  //  A non-NULL pointer in UText.p is the signal to the close() function to delete
  //    it.
  //
  if (deep && U_SUCCESS(*status)) {
      U_ASSERT(utext_nativeLength(dest) < INT32_MAX)(void)0;
      int32_t  len = (int32_t)utext_nativeLengthutext_nativeLength_71(dest);

      // The cloned string IS going to be NUL terminated, whether or not the original was.
      const UChar *srcStr = (const UChar *)src->context;
      UChar *copyStr = (UChar *)uprv_mallocuprv_malloc_71((len+1) * sizeof(UChar));
      if (copyStr == NULL__null) {
          *status = U_MEMORY_ALLOCATION_ERROR;
      } else {
          int64_t i;
          for (i=0; i<len; i++) {
              copyStr[i] = srcStr[i];
          }
          copyStr[len] = 0;
          dest->context = copyStr;
          dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT));
      }
  }
  return dest;
2377}


2380static void U_CALLCONV
2381ucstrTextClose(UText *ut) {
  // Most of the work of close is done by the generic UText framework close.
  // All that needs to be done here is delete the string if the UText
  //  owns it.  This occurs if the UText was created by cloning.
  if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)((int32_t)1<<(UTEXT_PROVIDER_OWNS_TEXT))) {
      UChar *s = (UChar *)ut->context;
      uprv_freeuprv_free_71(s);
      ut->context = NULL__null;
  }
2390}



2394static int64_t U_CALLCONV
2395ucstrTextLength(UText *ut) {
  if (ut->a < 0) {
      // null terminated, we don't yet know the length. Scan for it.
      //    Access is not convenient for doing this
      //    because the current iteration position can't be changed.
      const UChar  *str = (const UChar *)ut->context;
      for (;;) {
          if (str[ut->chunkNativeLimit] == 0) {
              break;
          }
          ut->chunkNativeLimit++;
      }
      ut->a = ut->chunkNativeLimit;
      ut->chunkLength = (int32_t)ut->chunkNativeLimit;
      ut->nativeIndexingLimit = ut->chunkLength;
      ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
  }
  return ut->a;
2413}


2416static UBool U_CALLCONV
2417ucstrTextAccess(UText *ut, int64_t index, UBool  forward) {
  const UChar *str   = (const UChar *)ut->context;

  // pin the requested index to the bounds of the string,
  //  and set current iteration position.
  if (index<0) {
      index = 0;
  } else if (index < ut->chunkNativeLimit) {
      // The request data is within the chunk as it is known so far.
      // Put index on a code point boundary.
      U16_SET_CP_START(str, 0, index)do { if(((((str)[index])&0xfffffc00)==0xdc00) && (
index)>(0) && ((((str)[(index)-1])&0xfffffc00)
==0xd800)) { --(index); } } while (false);
  } else if (ut->a >= 0) {
      // We know the length of this string, and the user is requesting something
      // at or beyond the length.  Pin the requested index to the length.
      index = ut->a;
  } else {
      // Null terminated string, length not yet known, and the requested index
      //  is beyond where we have scanned so far.
      //  Scan to 32 UChars beyond the requested index.  The strategy here is
      //  to avoid fully scanning a long string when the caller only wants to
      //  see a few characters at its beginning.
      int32_t scanLimit = (int32_t)index + 32;
      if ((index + 32)>INT32_MAX(2147483647) || (index + 32)<0 ) {   // note: int64 expression
          scanLimit = INT32_MAX(2147483647);
      }

      int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
      for (; chunkLimit<scanLimit; chunkLimit++) {
          if (str[chunkLimit] == 0) {
              // We found the end of the string.  Remember it, pin the requested index to it,
              //  and bail out of here.
              ut->a = chunkLimit;
              ut->chunkLength = chunkLimit;
              ut->nativeIndexingLimit = chunkLimit;
              if (index >= chunkLimit) {
                  index = chunkLimit;
              } else {
                  U16_SET_CP_START(str, 0, index)do { if(((((str)[index])&0xfffffc00)==0xdc00) && (
index)>(0) && ((((str)[(index)-1])&0xfffffc00)
==0xd800)) { --(index); } } while (false);
              }

              ut->chunkNativeLimit = chunkLimit;
              ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
              goto breakout;
          }
      }
      // We scanned through the next batch of UChars without finding the end.
      U16_SET_CP_START(str, 0, index)do { if(((((str)[index])&0xfffffc00)==0xdc00) && (
index)>(0) && ((((str)[(index)-1])&0xfffffc00)
==0xd800)) { --(index); } } while (false);
      if (chunkLimit == INT32_MAX(2147483647)) {
          // Scanned to the limit of a 32 bit length.
          // Forceably trim the overlength string back so length fits in int32
          //  TODO:  add support for 64 bit strings.
          ut->a = chunkLimit;
          ut->chunkLength = chunkLimit;
          ut->nativeIndexingLimit = chunkLimit;
          if (index > chunkLimit) {
              index = chunkLimit;
          }
          ut->chunkNativeLimit = chunkLimit;
          ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
      } else {
          // The endpoint of a chunk must not be left in the middle of a surrogate pair.
          // If the current end is on a lead surrogate, back the end up by one.
          // It doesn't matter if the end char happens to be an unpaired surrogate,
          //    and it's simpler not to worry about it.
          if (U16_IS_LEAD(str[chunkLimit-1])(((str[chunkLimit-1])&0xfffffc00)==0xd800)) {
              --chunkLimit;
          }
          // Null-terminated chunk with end still unknown.
          // Update the chunk length to reflect what has been scanned thus far.
          // That the full length is still unknown is (still) flagged by
          //    ut->a being < 0.
          ut->chunkNativeLimit = chunkLimit;
          ut->nativeIndexingLimit = chunkLimit;
          ut->chunkLength = chunkLimit;
      }

  }
2494breakout:
  U_ASSERT(index<=INT32_MAX)(void)0;
  ut->chunkOffset = (int32_t)index;

  // Check whether request is at the start or end
  UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);
  return retVal;
2501}



2505static int32_t U_CALLCONV
2506ucstrTextExtract(UText *ut,
                int64_t start, int64_t limit,
                UChar *dest, int32_t destCapacity,
                UErrorCode *pErrorCode)
2510{
  if(U_FAILURE(*pErrorCode)) {
      return 0;
  }
  if(destCapacity<0 || (dest==NULL__null && destCapacity>0) || start>limit) {
1
Assuming 'destCapacity' is >= 0→
2
←
Assuming 'dest' is equal to NULL→
3
←
Assuming 'destCapacity' is <= 0→
4
←
Assuming 'start' is <= 'limit'→
5
←
Taking false branch→
      *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
  }

  //const UChar *s=(const UChar *)ut->context;
  int32_t si, di;

  int32_t start32;
  int32_t limit32;

  // Access the start.  Does two things we need:
  //   Pins 'start' to the length of the string, if it came in out-of-bounds.
  //   Snaps 'start' to the beginning of a code point.
  ucstrTextAccess(ut, start, TRUE1);
  const UChar *s=ut->chunkContents;
  start32 = ut->chunkOffset;

  int32_t strLength=(int32_t)ut->a;
  if (strLength >= 0) {
6
←
Assuming 'strLength' is >= 0→
7
←
Taking true branch→
      limit32 = pinIndex(limit, strLength);
  } else {
      limit32 = pinIndex(limit, INT32_MAX(2147483647));
  }
  di = 0;
  for (si=start32; si<limit32; si++) {
8
←
Assuming 'si' is < 'limit32'→
      if (strLength8.1
'strLength' is >= 0
<0 && s[si]==0) {
          // Just hit the end of a null-terminated string.
          ut->a = si;               // set string length for this UText
          ut->chunkNativeLimit    = si;
          ut->chunkLength         = si;
          ut->nativeIndexingLimit = si;
          strLength               = si;
          limit32                 = si;
          break;
      }
      U_ASSERT(di>=0)(void)0; /* to ensure di never exceeds INT32_MAX, which must not happen logically */
      if (di8.2
'di' is >= 'destCapacity'
<destCapacity) {
9
←
Taking false branch→
          // only store if there is space.
          dest[di] = s[si];
      } else {
          if (strLength9.1
'strLength' is >= 0
>=0) {
10
←
Taking true branch→
              // We have filled the destination buffer, and the string length is known.
              //  Cut the loop short.  There is no need to scan string termination.
              di = limit32 - start32;
              si = limit32;
              break;
          }
      }
      di++;
  }

  // If the limit index points to a lead surrogate of a pair,
  //   add the corresponding trail surrogate to the destination.
  if (si>0 && U16_IS_LEAD(s[si-1])(((s[si-1])&0xfffffc00)==0xd800) &&
11
←
Assuming 'si' is > 0→
12
←
Assuming the condition is true→
15
←
Taking true branch→
          ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])(((s[si])&0xfffffc00)==0xdc00)))
13
←
Assuming 'si' is < 'strLength'→
14
←
Assuming the condition is true→
  {
      if (di<destCapacity) {
16
←
Assuming 'di' is < 'destCapacity'→
17
←
Taking true branch→
          // store only if there is space in the output buffer.
          dest[di++] = s[si];
18
←
Array access (from variable 'dest') results in a null pointer dereference
      }
      si++;
  }

  // Put iteration position at the point just following the extracted text
  if (si <= ut->chunkNativeLimit) {
      ut->chunkOffset = si;
  } else {
      ucstrTextAccess(ut, si, TRUE1);
  }

  // Add a terminating NUL if space in the buffer permits,
  // and set the error status as required.
  u_terminateUCharsu_terminateUChars_71(dest, destCapacity, di, pErrorCode);
  return di;
2589}

2591static const struct UTextFuncs ucstrFuncs =
2592{
  sizeof(UTextFuncs),
  0, 0, 0,           // Reserved alignment padding
  ucstrTextClone,
  ucstrTextLength,
  ucstrTextAccess,
  ucstrTextExtract,
  NULL__null,              // Replace
  NULL__null,              // Copy
  NULL__null,              // MapOffsetToNative,
  NULL__null,              // MapIndexToUTF16,
  ucstrTextClose,
  NULL__null,              // spare 1
  NULL__null,              // spare 2
  NULL__null,              // spare 3
2607};

2609U_CDECL_END}

2611static const UChar gEmptyUString[] = {0};

2613U_CAPIextern "C" UText * U_EXPORT2
2614utext_openUCharsutext_openUChars_71(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
  if (U_FAILURE(*status)) {
      return NULL__null;
  }
  if(s==NULL__null && length==0) {
      s = gEmptyUString;
  }
  if (s==NULL__null || length < -1 || length>INT32_MAX(2147483647)) {
      *status = U_ILLEGAL_ARGUMENT_ERROR;
      return NULL__null;
  }
  ut = utext_setuputext_setup_71(ut, 0, status);
  if (U_SUCCESS(*status)) {
      ut->pFuncs               = &ucstrFuncs;
      ut->context              = s;
      ut->providerProperties   = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)((int32_t)1<<(UTEXT_PROVIDER_STABLE_CHUNKS));
      if (length==-1) {
          ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)((int32_t)1<<(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE));
      }
      ut->a                    = length;
      ut->chunkContents        = s;
      ut->chunkNativeStart     = 0;
      ut->chunkNativeLimit     = length>=0? length : 0;
      ut->chunkLength          = (int32_t)ut->chunkNativeLimit;
      ut->chunkOffset          = 0;
      ut->nativeIndexingLimit  = ut->chunkLength;
  }
  return ut;
2642}


2645//------------------------------------------------------------------------------
2646//
2647//     UText implementation for text from ICU CharacterIterators
2648//
2649//         Use of UText data members:
2650//            context    pointer to the CharacterIterator
2651//            a          length of the full text.
2652//            p          pointer to  buffer 1
2653//            b          start index of local buffer 1 contents
2654//            q          pointer to buffer 2
2655//            c          start index of local buffer 2 contents
2656//            r          pointer to the character iterator if the UText owns it.
2657//                       Null otherwise.
2658//
2659//------------------------------------------------------------------------------
2660#define CIBufSize16 16

2662U_CDECL_BEGINextern "C" {
2663static void U_CALLCONV
2664charIterTextClose(UText *ut) {
  // Most of the work of close is done by the generic UText framework close.
  // All that needs to be done here is delete the CharacterIterator if the UText
  //  owns it.  This occurs if the UText was created by cloning.
  CharacterIterator *ci = (CharacterIterator *)ut->r;
  delete ci;
  ut->r = NULL__null;
2671}

2673static int64_t U_CALLCONV
2674charIterTextLength(UText *ut) {
  return (int32_t)ut->a;
2676}

2678static UBool U_CALLCONV
2679charIterTextAccess(UText *ut, int64_t index, UBool  forward) {
  CharacterIterator *ci   = (CharacterIterator *)ut->context;

  int32_t clippedIndex = (int32_t)index;
  if (clippedIndex<0) {
      clippedIndex=0;
  } else if (clippedIndex>=ut->a) {
      clippedIndex=(int32_t)ut->a;
  }
  int32_t neededIndex = clippedIndex;
  if (!forward && neededIndex>0) {
      // reverse iteration, want the position just before what was asked for.
      neededIndex--;
  } else if (forward && neededIndex==ut->a && neededIndex>0) {
      // Forward iteration, don't ask for something past the end of the text.
      neededIndex--;
  }

  // Find the native index of the start of the buffer containing what we want.
  neededIndex -= neededIndex % CIBufSize16;

  UChar *buf = NULL__null;
  UBool  needChunkSetup = TRUE1;
  int    i;
  if (ut->chunkNativeStart == neededIndex) {
      // The buffer we want is already the current chunk.
      needChunkSetup = FALSE0;
  } else if (ut->b == neededIndex) {
      // The first buffer (buffer p) has what we need.
      buf = (UChar *)ut->p;
  } else if (ut->c == neededIndex) {
      // The second buffer (buffer q) has what we need.
      buf = (UChar *)ut->q;
  } else {
      // Neither buffer already has what we need.
      // Load new data from the character iterator.
      // Use the buf that is not the current buffer.
      buf = (UChar *)ut->p;
      if (ut->p == ut->chunkContents) {
          buf = (UChar *)ut->q;
      }
      ci->setIndex(neededIndex);
      for (i=0; i<CIBufSize16; i++) {
          buf[i] = ci->nextPostInc();
          if (i+neededIndex > ut->a) {
              break;
          }
      }
  }

  // We have a buffer with the data we need.
  // Set it up as the current chunk, if it wasn't already.
  if (needChunkSetup) {
      ut->chunkContents = buf;
      ut->chunkLength   = CIBufSize16;
      ut->chunkNativeStart = neededIndex;
      ut->chunkNativeLimit = neededIndex + CIBufSize16;
      if (ut->chunkNativeLimit > ut->a) {
          ut->chunkNativeLimit = ut->a;
          ut->chunkLength  = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
      }
      ut->nativeIndexingLimit = ut->chunkLength;
      U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize)(void)0;
  }
  ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
  UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
  return success;
2746}

2748static UText * U_CALLCONV
2749charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {
  if (U_FAILURE(*status)) {
      return NULL__null;
  }

  if (deep) {
      // There is no CharacterIterator API for cloning the underlying text storage.
      *status = U_UNSUPPORTED_ERROR;
      return NULL__null;
  } else {
      CharacterIterator *srcCI =(CharacterIterator *)src->context;
      srcCI = srcCI->clone();
      dest = utext_openCharacterIteratorutext_openCharacterIterator_71(dest, srcCI, status);
      if (U_FAILURE(*status)) {
          return dest;
      }
      // cast off const on getNativeIndex.
      //   For CharacterIterator based UTexts, this is safe, the operation is const.
      int64_t  ix = utext_getNativeIndexutext_getNativeIndex_71((UText *)src);
      utext_setNativeIndexutext_setNativeIndex_71(dest, ix);
      dest->r = srcCI;    // flags that this UText owns the CharacterIterator
  }
  return dest;
2772}

2774static int32_t U_CALLCONV
2775charIterTextExtract(UText *ut,
                int64_t start, int64_t limit,
                UChar *dest, int32_t destCapacity,
                UErrorCode *status)
2779{
  if(U_FAILURE(*status)) {
      return 0;
  }
  if(destCapacity<0 || (dest==NULL__null && destCapacity>0) || start>limit) {
      *status=U_ILLEGAL_ARGUMENT_ERROR;
      return 0;
  }
  int32_t  length  = (int32_t)ut->a;
  int32_t  start32 = pinIndex(start, length);
  int32_t  limit32 = pinIndex(limit, length);
  int32_t  desti   = 0;
  int32_t  srci;
  int32_t  copyLimit;

  CharacterIterator *ci = (CharacterIterator *)ut->context;
  ci->setIndex32(start32);   // Moves ix to lead of surrogate pair, if needed.
  srci = ci->getIndex();
  copyLimit = srci;
  while (srci<limit32) {
      UChar32 c = ci->next32PostInc();
      int32_t  len = U16_LENGTH(c)((uint32_t)(c)<=0xffff ? 1 : 2);
      U_ASSERT(desti+len>0)(void)0; /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
      if (desti+len <= destCapacity) {
          U16_APPEND_UNSAFE(dest, desti, c)do { if((uint32_t)(c)<=0xffff) { (dest)[(desti)++]=(uint16_t
)(c); } else { (dest)[(desti)++]=(uint16_t)(((c)>>10)+0xd7c0
); (dest)[(desti)++]=(uint16_t)(((c)&0x3ff)|0xdc00); } } while
 (false);
          copyLimit = srci+len;
      } else {
          desti += len;
          *status = U_BUFFER_OVERFLOW_ERROR;
      }
      srci += len;
  }

  charIterTextAccess(ut, copyLimit, TRUE1);

  u_terminateUCharsu_terminateUChars_71(dest, destCapacity, desti, status);
  return desti;
2816}

2818static const struct UTextFuncs charIterFuncs =
2819{
  sizeof(UTextFuncs),
  0, 0, 0,             // Reserved alignment padding
  charIterTextClone,
  charIterTextLength,
  charIterTextAccess,
  charIterTextExtract,
  NULL__null,                // Replace
  NULL__null,                // Copy
  NULL__null,                // MapOffsetToNative,
  NULL__null,                // MapIndexToUTF16,
  charIterTextClose,
  NULL__null,                // spare 1
  NULL__null,                // spare 2
  NULL__null                 // spare 3
2834};
2835U_CDECL_END}


2838U_CAPIextern "C" UText * U_EXPORT2
2839utext_openCharacterIteratorutext_openCharacterIterator_71(UText *ut, CharacterIterator *ci, UErrorCode *status) {
  if (U_FAILURE(*status)) {
      return NULL__null;
  }

  if (ci->startIndex() > 0) {
      // No support for CharacterIterators that do not start indexing from zero.
      *status = U_UNSUPPORTED_ERROR;
      return NULL__null;
  }

  // Extra space in UText for 2 buffers of CIBufSize UChars each.
  int32_t  extraSpace = 2 * CIBufSize16 * sizeof(UChar);
  ut = utext_setuputext_setup_71(ut, extraSpace, status);
  if (U_SUCCESS(*status)) {
      ut->pFuncs                = &charIterFuncs;
      ut->context              = ci;
      ut->providerProperties   = 0;
      ut->a                    = ci->endIndex();        // Length of text
      ut->p                    = ut->pExtra;            // First buffer
      ut->b                    = -1;                    // Native index of first buffer contents
      ut->q                    = (UChar*)ut->pExtra+CIBufSize16;  // Second buffer
      ut->c                    = -1;                    // Native index of second buffer contents

      // Initialize current chunk contents to be empty.
      //   First access will fault something in.
      //   Note:  The initial nativeStart and chunkOffset must sum to zero
      //          so that getNativeIndex() will correctly compute to zero
      //          if no call to Access() has ever been made.  They can't be both
      //          zero without Access() thinking that the chunk is valid.
      ut->chunkContents        = (UChar *)ut->p;
      ut->chunkNativeStart     = -1;
      ut->chunkOffset          = 1;
      ut->chunkNativeLimit     = 0;
      ut->chunkLength          = 0;
      ut->nativeIndexingLimit  = ut->chunkOffset;  // enables native indexing
  }
  return ut;
2877}