../deps/icu-small/source/common/ucnvbocu.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/ucnvbocu.cpp
Warning:	line 919, column 17 The result of the left shift is undefined because the left operand is negative
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnvbocu.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ucnvbocu.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 2002-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*   file name:  ucnvbocu.cpp
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2002mar27
16*   created by: Markus W. Scherer
17*
18*   This is an implementation of the Binary Ordered Compression for Unicode,
19*   in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
20*/

22#include "unicode/utypes.h"

24#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_ONLY_HTML_CONVERSION0

26#include "unicode/ucnv.h"
27#include "unicode/ucnv_cb.h"
28#include "unicode/utf16.h"
29#include "putilimp.h"
30#include "ucnv_bld.h"
31#include "ucnv_cnv.h"
32#include "uassert.h"

34/* BOCU-1 constants and macros ---------------------------------------------- */

36/*
* BOCU-1 encodes the code points of a Unicode string as
* a sequence of byte-encoded differences (slope detection),
* preserving lexical order.
*
* Optimize the difference-taking for runs of Unicode text within
* small scripts:
*
* Most small scripts are allocated within aligned 128-blocks of Unicode
* code points. Lexical order is preserved if the "previous code point" state
* is always moved into the middle of such a block.
*
* Additionally, "prev" is moved from anywhere in the Unihan and Hangul
* areas into the middle of those areas.
*
* C0 control codes and space are encoded with their US-ASCII bytes.
* "prev" is reset for C0 controls but not for space.
*/

55/* initial value for "prev": middle of the ASCII range */
56#define BOCU1_ASCII_PREV0x40        0x40

58/* bounding byte values for differences */
59#define BOCU1_MIN0x21               0x21
60#define BOCU1_MIDDLE0x90            0x90
61#define BOCU1_MAX_LEAD0xfe          0xfe
62#define BOCU1_MAX_TRAIL0xff         0xff
63#define BOCU1_RESET0xff             0xff

65/* number of lead bytes */
66#define BOCU1_COUNT(0xfe -0x21 +1)             (BOCU1_MAX_LEAD0xfe-BOCU1_MIN0x21+1)

68/* adjust trail byte counts for the use of some C0 control byte values */
69#define BOCU1_TRAIL_CONTROLS_COUNT20  20
70#define BOCU1_TRAIL_BYTE_OFFSET(0x21 -20)     (BOCU1_MIN0x21-BOCU1_TRAIL_CONTROLS_COUNT20)

72/* number of trail bytes */
73#define BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)       ((BOCU1_MAX_TRAIL0xff-BOCU1_MIN0x21+1)+BOCU1_TRAIL_CONTROLS_COUNT20)

75/*
* number of positive and negative single-byte codes
* (counting 0==BOCU1_MIDDLE among the positive ones)
*/
79#define BOCU1_SINGLE64            64

81/* number of lead bytes for positive and negative 2/3/4-byte sequences */
82#define BOCU1_LEAD_243            43
83#define BOCU1_LEAD_33            3
84#define BOCU1_LEAD_41            1

86/* The difference value range for single-byters. */
87#define BOCU1_REACH_POS_1(64 -1)   (BOCU1_SINGLE64-1)
88#define BOCU1_REACH_NEG_1(-64)   (-BOCU1_SINGLE64)

90/* The difference value range for double-byters. */
91#define BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))   (BOCU1_REACH_POS_1(64 -1)+BOCU1_LEAD_243*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))
92#define BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20))   (BOCU1_REACH_NEG_1(-64)-BOCU1_LEAD_243*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))

94/* The difference value range for 3-byters. */
95#define BOCU1_REACH_POS_3(((64 -1)+43*((0xff -0x21 +1)+20))+3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))   \
  (BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))+BOCU1_LEAD_33*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))

98#define BOCU1_REACH_NEG_3(((-64)-43*((0xff -0x21 +1)+20))-3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))   (BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20))-BOCU1_LEAD_33*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))

100/* The lead byte start values. */
101#define BOCU1_START_POS_2(0x90 +(64 -1)+1)   (BOCU1_MIDDLE0x90+BOCU1_REACH_POS_1(64 -1)+1)
102#define BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)   (BOCU1_START_POS_2(0x90 +(64 -1)+1)+BOCU1_LEAD_243)
103#define BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3)   (BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)+BOCU1_LEAD_33)
   /* ==BOCU1_MAX_LEAD */

106#define BOCU1_START_NEG_2(0x90 +(-64))   (BOCU1_MIDDLE0x90+BOCU1_REACH_NEG_1(-64))
107#define BOCU1_START_NEG_3((0x90 +(-64))-43)   (BOCU1_START_NEG_2(0x90 +(-64))-BOCU1_LEAD_243)
108#define BOCU1_START_NEG_4(((0x90 +(-64))-43)-3)   (BOCU1_START_NEG_3((0x90 +(-64))-43)-BOCU1_LEAD_33)
   /* ==BOCU1_MIN+1 */

111/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
112#define BOCU1_LENGTH_FROM_LEAD(lead)(((0x90 +(-64))<=(lead) && (lead)<(0x90 +(64 -1
)+1)) ? 1 : (((0x90 +(-64))-43)<=(lead) && (lead)<
((0x90 +(64 -1)+1)+43)) ? 2 : ((((0x90 +(-64))-43)-3)<=(lead
) && (lead)<(((0x90 +(64 -1)+1)+43)+3)) ? 3 : 4) \
  ((BOCU1_START_NEG_2(0x90 +(-64))<=(lead) && (lead)<BOCU1_START_POS_2(0x90 +(64 -1)+1)) ? 1 : \
   (BOCU1_START_NEG_3((0x90 +(-64))-43)<=(lead) && (lead)<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)) ? 2 : \
   (BOCU1_START_NEG_4(((0x90 +(-64))-43)-3)<=(lead) && (lead)<BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3)) ? 3 : 4)

117/* The length of a byte sequence, according to its packed form. */
118#define BOCU1_LENGTH_FROM_PACKED(packed)((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) \
  ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)

121/*
* 12 commonly used C0 control codes (and space) are only used to encode
* themselves directly,
* which makes BOCU-1 MIME-usable and reasonably safe for
* ASCII-oriented software.
*
* These controls are
*  0   NUL
*
*  7   BEL
*  8   BS
*
*  9   TAB
*  a   LF
*  b   VT
*  c   FF
*  d   CR
*
*  e   SO
*  f   SI
*
* 1a   SUB
* 1b   ESC
*
* The other 20 C0 controls are also encoded directly (to preserve order)
* but are also used as trail bytes in difference encoding
* (for better compression).
*/
149#define BOCU1_TRAIL_TO_BYTE(t)((t)>=20 ? (t)+(0x21 -20) : bocu1TrailToByte[t]) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT20 ? (t)+BOCU1_TRAIL_BYTE_OFFSET(0x21 -20) : bocu1TrailToByte[t])

151/*
* Byte value map for control codes,
* from external byte values 0x00..0x20
* to trail byte values 0..19 (0..0x13) as used in the difference calculation.
* External byte values that are illegal as trail bytes are mapped to -1.
*/
157static const int8_t
158bocu1ByteToTrail[BOCU1_MIN0x21]={
159/*  0     1     2     3     4     5     6     7    */
  -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,

162/*  8     9     a     b     c     d     e     f    */
  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,

165/*  10    11    12    13    14    15    16    17   */
  0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,

168/*  18    19    1a    1b    1c    1d    1e    1f   */
  0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,

171/*  20   */
  -1
173};

175/*
* Byte value map for control codes,
* from trail byte values 0..19 (0..0x13) as used in the difference calculation
* to external byte values 0x00..0x20.
*/
180static const int8_t
181bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT20]={
182/*  0     1     2     3     4     5     6     7    */
  0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,

185/*  8     9     a     b     c     d     e     f    */
  0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,

188/*  10    11    12    13   */
  0x1c, 0x1d, 0x1e, 0x1f
190};

192/**
* Integer division and modulo with negative numerators
* yields negative modulo results and quotients that are one more than
* what we need here.
* This macro adjust the results so that the modulo-value m is always >=0.
*
* For positive n, the if() condition is always FALSE.
*
* @param n Number to be split into quotient and rest.
*          Will be modified to contain the quotient.
* @param d Divisor.
* @param m Output variable for the rest (modulo result).
*/
205#define NEGDIVMOD(n, d, m)do { (m)=(n)%(d); (n)/=(d); if((m)<0) { --(n); (m)+=(d); }
 } while (false) UPRV_BLOCK_MACRO_BEGINdo { \
  (m)=(n)%(d); \
  (n)/=(d); \
  if((m)<0) { \
      --(n); \
      (m)+=(d); \
  } \
212} UPRV_BLOCK_MACRO_ENDwhile (false)

214/* Faster versions of packDiff() for single-byte-encoded diff values. */

216/** Is a diff value encodable in a single byte? */
217#define DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1)) (BOCU1_REACH_NEG_1(-64)<=(diff) && (diff)<=BOCU1_REACH_POS_1(64 -1))

219/** Encode a diff value in a single byte. */
220#define PACK_SINGLE_DIFF(diff)(0x90 +(diff)) (BOCU1_MIDDLE0x90+(diff))

222/** Is a diff value encodable in two bytes? */
223#define DIFF_IS_DOUBLE(diff)(((-64)-43*((0xff -0x21 +1)+20))<=(diff) && (diff)
<=((64 -1)+43*((0xff -0x21 +1)+20))) (BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20))<=(diff) && (diff)<=BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20)))

225/* BOCU-1 implementation functions ------------------------------------------ */

227#define BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40) (((c)&~0x7f)+BOCU1_ASCII_PREV0x40)

229/**
* Compute the next "previous" value for differencing
* from the current code point.
*
* @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
* @return "previous code point" state value
*/
236static inline int32_t
237bocu1Prev(int32_t c) {
  /* compute new prev */
  if(/* 0x3040<=c && */ c<=0x309f) {
      /* Hiragana is not 128-aligned */
      return 0x3070;
  } else if(0x4e00<=c && c<=0x9fa5) {
      /* CJK Unihan */
      return 0x4e00-BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
  } else if(0xac00<=c /* && c<=0xd7a3 */) {
      /* Korean Hangul */
      return (0xd7a3+0xac00)/2;
  } else {
      /* mostly small scripts */
      return BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
  }
252}

254/** Fast version of bocu1Prev() for most scripts. */
255#define BOCU1_PREV(c)((c)<0x3040 || (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev
(c)) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40) : bocu1Prev(c))

257/*
* The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
* The UConverter fields are used as follows:
*
* fromUnicodeStatus    encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
*
* toUnicodeStatus      decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
* mode                 decoder's incomplete (diff<<2)|count (ignored when toULength==0)
*/

267/* BOCU-1-from-Unicode conversion functions --------------------------------- */

269/**
* Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
* and return a packed integer with them.
*
* The encoding favors small absolute differences with short encodings
* to compress runs of same-script characters.
*
* Optimized version with unrolled loops and fewer floating-point operations
* than the standard packDiff().
*
* @param diff difference value -0x10ffff..0x10ffff
* @return
*      0x010000zz for 1-byte sequence zz
*      0x0200yyzz for 2-byte sequence yy zz
*      0x03xxyyzz for 3-byte sequence xx yy zz
*      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
*/
286static int32_t
287packDiff(int32_t diff) {
  int32_t result, m;

  U_ASSERT(!DIFF_IS_SINGLE(diff))(void)0; /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
  if(diff>=BOCU1_REACH_NEG_1(-64)) {
      /* mostly positive differences, and single-byte negative ones */
293#if 0   /* single-byte case handled in macros, see below */
      if(diff<=BOCU1_REACH_POS_1(64 -1)) {
          /* single byte */
          return 0x01000000|(BOCU1_MIDDLE0x90+diff);
      } else
298#endif
      if(diff<=BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))) {
          /* two bytes */
          diff-=BOCU1_REACH_POS_1(64 -1)+1;
          result=0x02000000;

          m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          result|=(BOCU1_START_POS_2(0x90 +(64 -1)+1)+diff)<<8;
      } else if(diff<=BOCU1_REACH_POS_3(((64 -1)+43*((0xff -0x21 +1)+20))+3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))) {
          /* three bytes */
          diff-=BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))+1;
          result=0x03000000;

          m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;

          result|=(BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)+diff)<<16;
      } else {
          /* four bytes */
          diff-=BOCU1_REACH_POS_3(((64 -1)+43*((0xff -0x21 +1)+20))+3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))+1;

          m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;

          /*
           * We know that / and % would deliver quotient 0 and rest=diff.
           * Avoid division and modulo for performance.
           */
          result|=BOCU1_TRAIL_TO_BYTE(diff)((diff)>=20 ? (diff)+(0x21 -20) : bocu1TrailToByte[diff])<<16;

          result|=((uint32_t)BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3))<<24;
      }
  } else {
      /* two- to four-byte negative differences */
      if(diff>=BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20))) {
          /* two bytes */
          diff-=BOCU1_REACH_NEG_1(-64);
          result=0x02000000;

          NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          result|=(BOCU1_START_NEG_2(0x90 +(-64))+diff)<<8;
      } else if(diff>=BOCU1_REACH_NEG_3(((-64)-43*((0xff -0x21 +1)+20))-3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))) {
          /* three bytes */
          diff-=BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
          result=0x03000000;

          NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;

          result|=(BOCU1_START_NEG_3((0x90 +(-64))-43)+diff)<<16;
      } else {
          /* four bytes */
          diff-=BOCU1_REACH_NEG_3(((-64)-43*((0xff -0x21 +1)+20))-3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20));

          NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
          result=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);

          NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;

          /*
           * We know that NEGDIVMOD would deliver
           * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
           * Avoid division and modulo for performance.
           */
          m=diff+BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
          result|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<16;

          result|=BOCU1_MIN0x21<<24;
      }
  }
  return result;
388}


391static void U_CALLCONV
392_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                           UErrorCode *pErrorCode) {
  UConverter *cnv;
  const UChar *source, *sourceLimit;
  uint8_t *target;
  int32_t targetCapacity;
  int32_t *offsets;

  int32_t prev, c, diff;

  int32_t sourceIndex, nextSourceIndex;

  /* set up the local pointers */
  cnv=pArgs->converter;
  source=pArgs->source;
  sourceLimit=pArgs->sourceLimit;
  target=(uint8_t *)pArgs->target;
  targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  offsets=pArgs->offsets;

  /* get the converter state from UConverter */
  c=cnv->fromUChar32;
  prev=(int32_t)cnv->fromUnicodeStatus;
  if(prev==0) {
      prev=BOCU1_ASCII_PREV0x40;
  }

  /* sourceIndex=-1 if the current character began in the previous buffer */
  sourceIndex= c==0 ? 0 : -1;
  nextSourceIndex=0;

  /* conversion loop */
  if(c!=0 && targetCapacity>0) {
      goto getTrail;
  }

428fastSingle:
  /* fast loop for single-byte differences */
  /* use only one loop counter variable, targetCapacity, not also source */
  diff=(int32_t)(sourceLimit-source);
  if(targetCapacity>diff) {
      targetCapacity=diff;
  }
  while(targetCapacity>0 && (c=*source)<0x3000) {
      if(c<=0x20) {
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(uint8_t)c;
          *offsets++=nextSourceIndex++;
          ++source;
          --targetCapacity;
      } else {
          diff=c-prev;
          if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
              *target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
              *offsets++=nextSourceIndex++;
              ++source;
              --targetCapacity;
          } else {
              break;
          }
      }
  }
  /* restore real values */
  targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
  sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */

  /* regular loop for all cases */
  while(source<sourceLimit) {
      if(targetCapacity>0) {
          c=*source++;
          ++nextSourceIndex;

          if(c<=0x20) {
              /*
               * ISO C0 control & space:
               * Encode directly for MIME compatibility,
               * and reset state except for space, to not disrupt compression.
               */
              if(c!=0x20) {
                  prev=BOCU1_ASCII_PREV0x40;
              }
              *target++=(uint8_t)c;
              *offsets++=sourceIndex;
              --targetCapacity;

              sourceIndex=nextSourceIndex;
              continue;
          }

          if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
485getTrail:
              if(source<sourceLimit) {
                  /* test the following code unit */
                  UChar trail=*source;
                  if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
                      ++source;
                      ++nextSourceIndex;
                      c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000));
                  }
              } else {
                  /* no more input */
                  c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
                  break;
              }
          }

          /*
           * all other Unicode code points c==U+0021..U+10ffff
           * are encoded with the difference c-prev
           *
           * a new prev is computed from c,
           * placed in the middle of a 0x80-block (for most small scripts) or
           * in the middle of the Unihan and Hangul blocks
           * to statistically minimize the following difference
           */
          diff=c-prev;
          prev=BOCU1_PREV(c)((c)<0x3040 || (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev
(c));
          if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
              *target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
              *offsets++=sourceIndex;
              --targetCapacity;
              sourceIndex=nextSourceIndex;
              if(c<0x3000) {
                  goto fastSingle;
              }
          } else if(DIFF_IS_DOUBLE(diff)(((-64)-43*((0xff -0x21 +1)+20))<=(diff) && (diff)
<=((64 -1)+43*((0xff -0x21 +1)+20))) && 2<=targetCapacity) {
              /* optimize 2-byte case */
              int32_t m;

              if(diff>=0) {
                  diff-=BOCU1_REACH_POS_1(64 -1)+1;
                  m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
                  diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
                  diff+=BOCU1_START_POS_2(0x90 +(64 -1)+1);
              } else {
                  diff-=BOCU1_REACH_NEG_1(-64);
                  NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
                  diff+=BOCU1_START_NEG_2(0x90 +(-64));
              }
              *target++=(uint8_t)diff;
              *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
              *offsets++=sourceIndex;
              *offsets++=sourceIndex;
              targetCapacity-=2;
              sourceIndex=nextSourceIndex;
          } else {
              int32_t length; /* will be 2..4 */

              diff=packDiff(diff);
              length=BOCU1_LENGTH_FROM_PACKED(diff)((uint32_t)(diff)<0x04000000 ? (diff)>>24 : 4);

              /* write the output character bytes from diff and length */
              /* from the first if in the loop we know that targetCapacity>0 */
              if(length<=targetCapacity) {
                  switch(length) {
                      /* each branch falls through to the next one */
                  case 4:
                      *target++=(uint8_t)(diff>>24);
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 3:
                      *target++=(uint8_t)(diff>>16);
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 2:
                      *target++=(uint8_t)(diff>>8);
                      *offsets++=sourceIndex;
                  /* case 1: handled above */
                      *target++=(uint8_t)diff;
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }
                  targetCapacity-=length;
                  sourceIndex=nextSourceIndex;
              } else {
                  uint8_t *charErrorBuffer;

                  /*
                   * We actually do this backwards here:
                   * In order to save an intermediate variable, we output
                   * first to the overflow buffer what does not fit into the
                   * regular target.
                   */
                  /* we know that 1<=targetCapacity<length<=4 */
                  length-=targetCapacity;
                  charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
                  switch(length) {
                      /* each branch falls through to the next one */
                  case 3:
                      *charErrorBuffer++=(uint8_t)(diff>>16);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 2:
                      *charErrorBuffer++=(uint8_t)(diff>>8);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 1:
                      *charErrorBuffer=(uint8_t)diff;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }
                  cnv->charErrorBufferLength=(int8_t)length;

                  /* now output what fits into the regular target */
                  diff>>=8*length; /* length was reduced by targetCapacity */
                  switch(targetCapacity) {
                      /* each branch falls through to the next one */
                  case 3:
                      *target++=(uint8_t)(diff>>16);
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 2:
                      *target++=(uint8_t)(diff>>8);
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 1:
                      *target++=(uint8_t)diff;
                      *offsets++=sourceIndex;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }

                  /* target overflow */
                  targetCapacity=0;
                  *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                  break;
              }
          }
      } else {
          /* target is full */
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          break;
      }
  }

  /* set the converter state back into UConverter */
  cnv->fromUChar32= c<0 ? -c : 0;
  cnv->fromUnicodeStatus=(uint32_t)prev;

  /* write back the updated pointers */
  pArgs->source=source;
  pArgs->target=(char *)target;
  pArgs->offsets=offsets;
643}

645/*
* Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
* If a change is made in the original function, then either
* change this function the same way or
* re-copy the original function and remove the variables
* offsets, sourceIndex, and nextSourceIndex.
*/
652static void U_CALLCONV
653_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
                UErrorCode *pErrorCode) {
  UConverter *cnv;
  const UChar *source, *sourceLimit;
  uint8_t *target;
  int32_t targetCapacity;

  int32_t prev, c, diff;

  /* set up the local pointers */
  cnv=pArgs->converter;
  source=pArgs->source;
  sourceLimit=pArgs->sourceLimit;
  target=(uint8_t *)pArgs->target;
  targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);

  /* get the converter state from UConverter */
  c=cnv->fromUChar32;
  prev=(int32_t)cnv->fromUnicodeStatus;
  if(prev==0) {
      prev=BOCU1_ASCII_PREV0x40;
  }

  /* conversion loop */
  if(c!=0 && targetCapacity>0) {
      goto getTrail;
  }

681fastSingle:
  /* fast loop for single-byte differences */
  /* use only one loop counter variable, targetCapacity, not also source */
  diff=(int32_t)(sourceLimit-source);
  if(targetCapacity>diff) {
      targetCapacity=diff;
  }
  while(targetCapacity>0 && (c=*source)<0x3000) {
      if(c<=0x20) {
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(uint8_t)c;
      } else {
          diff=c-prev;
          if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
              *target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
          } else {
              break;
          }
      }
      ++source;
      --targetCapacity;
  }
  /* restore real values */
  targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);

  /* regular loop for all cases */
  while(source<sourceLimit) {
      if(targetCapacity>0) {
          c=*source++;

          if(c<=0x20) {
              /*
               * ISO C0 control & space:
               * Encode directly for MIME compatibility,
               * and reset state except for space, to not disrupt compression.
               */
              if(c!=0x20) {
                  prev=BOCU1_ASCII_PREV0x40;
              }
              *target++=(uint8_t)c;
              --targetCapacity;
              continue;
          }

          if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
729getTrail:
              if(source<sourceLimit) {
                  /* test the following code unit */
                  UChar trail=*source;
                  if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
                      ++source;
                      c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<<
10UL)+0xdc00-0x10000));
                  }
              } else {
                  /* no more input */
                  c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
                  break;
              }
          }

          /*
           * all other Unicode code points c==U+0021..U+10ffff
           * are encoded with the difference c-prev
           *
           * a new prev is computed from c,
           * placed in the middle of a 0x80-block (for most small scripts) or
           * in the middle of the Unihan and Hangul blocks
           * to statistically minimize the following difference
           */
          diff=c-prev;
          prev=BOCU1_PREV(c)((c)<0x3040 || (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev
(c));
          if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
              *target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
              --targetCapacity;
              if(c<0x3000) {
                  goto fastSingle;
              }
          } else if(DIFF_IS_DOUBLE(diff)(((-64)-43*((0xff -0x21 +1)+20))<=(diff) && (diff)
<=((64 -1)+43*((0xff -0x21 +1)+20))) && 2<=targetCapacity) {
              /* optimize 2-byte case */
              int32_t m;

              if(diff>=0) {
                  diff-=BOCU1_REACH_POS_1(64 -1)+1;
                  m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
                  diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
                  diff+=BOCU1_START_POS_2(0x90 +(64 -1)+1);
              } else {
                  diff-=BOCU1_REACH_NEG_1(-64);
                  NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21
 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20
)); } } while (false);
                  diff+=BOCU1_START_NEG_2(0x90 +(-64));
              }
              *target++=(uint8_t)diff;
              *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
              targetCapacity-=2;
          } else {
              int32_t length; /* will be 2..4 */

              diff=packDiff(diff);
              length=BOCU1_LENGTH_FROM_PACKED(diff)((uint32_t)(diff)<0x04000000 ? (diff)>>24 : 4);

              /* write the output character bytes from diff and length */
              /* from the first if in the loop we know that targetCapacity>0 */
              if(length<=targetCapacity) {
                  switch(length) {
                      /* each branch falls through to the next one */
                  case 4:
                      *target++=(uint8_t)(diff>>24);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 3:
                      *target++=(uint8_t)(diff>>16);
                  /* case 2: handled above */
                      *target++=(uint8_t)(diff>>8);
                  /* case 1: handled above */
                      *target++=(uint8_t)diff;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }
                  targetCapacity-=length;
              } else {
                  uint8_t *charErrorBuffer;

                  /*
                   * We actually do this backwards here:
                   * In order to save an intermediate variable, we output
                   * first to the overflow buffer what does not fit into the
                   * regular target.
                   */
                  /* we know that 1<=targetCapacity<length<=4 */
                  length-=targetCapacity;
                  charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
                  switch(length) {
                      /* each branch falls through to the next one */
                  case 3:
                      *charErrorBuffer++=(uint8_t)(diff>>16);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 2:
                      *charErrorBuffer++=(uint8_t)(diff>>8);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 1:
                      *charErrorBuffer=(uint8_t)diff;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }
                  cnv->charErrorBufferLength=(int8_t)length;

                  /* now output what fits into the regular target */
                  diff>>=8*length; /* length was reduced by targetCapacity */
                  switch(targetCapacity) {
                      /* each branch falls through to the next one */
                  case 3:
                      *target++=(uint8_t)(diff>>16);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 2:
                      *target++=(uint8_t)(diff>>8);
                      U_FALLTHROUGH[[clang::fallthrough]];
                  case 1:
                      *target++=(uint8_t)diff;
                      U_FALLTHROUGH[[clang::fallthrough]];
                  default:
                      /* will never occur */
                      break;
                  }

                  /* target overflow */
                  targetCapacity=0;
                  *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                  break;
              }
          }
      } else {
          /* target is full */
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          break;
      }
  }

  /* set the converter state back into UConverter */
  cnv->fromUChar32= c<0 ? -c : 0;
  cnv->fromUnicodeStatus=(uint32_t)prev;

  /* write back the updated pointers */
  pArgs->source=source;
  pArgs->target=(char *)target;
871}

873/* BOCU-1-to-Unicode conversion functions ----------------------------------- */

875/**
* Function for BOCU-1 decoder; handles multi-byte lead bytes.
*
* @param b lead byte;
*          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
* @return (diff<<2)|count
*/
882static inline int32_t
883decodeBocu1LeadByte(int32_t b) {
  int32_t diff, count;

  if(b>=BOCU1_START_NEG_2(0x90 +(-64))) {
17
←
Taking false branch→
      /* positive difference */
      if(b<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)) {
          /* two bytes */
          diff=((int32_t)b-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
          count=1;
      } else if(b<BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3)) {
          /* three bytes */
          diff=((int32_t)b-BOCU1_START_POS_3((0x90 +(64 -1)+1)+43))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))+1;
          count=2;
      } else {
          /* four bytes */
          diff=BOCU1_REACH_POS_3(((64 -1)+43*((0xff -0x21 +1)+20))+3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20))+1;
          count=3;
      }
  } else {
      /* negative difference */
      if(b>=BOCU1_START_NEG_3((0x90 +(-64))-43)) {
18
←
Taking false branch→
          /* two bytes */
          diff=((int32_t)b-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
          count=1;
      } else if(b>BOCU1_MIN0x21) {
19
←
Assuming 'b' is <= BOCU1_MIN→
20
←
Taking false branch→
          /* three bytes */
          diff=((int32_t)b-BOCU1_START_NEG_3((0x90 +(-64))-43))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
          count=2;
      } else {
          /* four bytes */
          diff=-BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_3(((-64)-43*((0xff -0x21 +1)+20))-3*((0xff -0x21 +1)+20)*((0xff
 -0x21 +1)+20));
21
←
The value -14536567 is assigned to 'diff'→
          count=3;
      }
  }

  /* return the state for decoding the trail byte(s) */
  return (diff<<2)|count;
22
←
The result of the left shift is undefined because the left operand is negative
920}

922/**
* Function for BOCU-1 decoder; handles multi-byte trail bytes.
*
* @param count number of remaining trail bytes including this one
* @param b trail byte
* @return new delta for diff including b - <0 indicates an error
*
* @see decodeBocu1
*/
931static inline int32_t
932decodeBocu1TrailByte(int32_t count, int32_t b) {
  if(b<=0x20) {
      /* skip some C0 controls and make the trail byte range contiguous */
      b=bocu1ByteToTrail[b];
      /* b<0 for an illegal trail byte value will result in return<0 below */
937#if BOCU1_MAX_TRAIL0xff<0xff
  } else if(b>BOCU1_MAX_TRAIL0xff) {
      return -99;
940#endif
  } else {
      b-=BOCU1_TRAIL_BYTE_OFFSET(0x21 -20);
  }

  /* add trail byte into difference and decrement count */
  if(count==1) {
      return b;
  } else if(count==2) {
      return b*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
  } else /* count==3 */ {
      return b*(BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20));
  }
953}

955static void U_CALLCONV
956_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                         UErrorCode *pErrorCode) {
  UConverter *cnv;
  const uint8_t *source, *sourceLimit;
  UChar *target;
  const UChar *targetLimit;
  int32_t *offsets;

  int32_t prev, count, diff, c;

  int8_t byteIndex;
  uint8_t *bytes;

  int32_t sourceIndex, nextSourceIndex;

  /* set up the local pointers */
  cnv=pArgs->converter;
  source=(const uint8_t *)pArgs->source;
  sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  target=pArgs->target;
  targetLimit=pArgs->targetLimit;
  offsets=pArgs->offsets;

  /* get the converter state from UConverter */
  prev=(int32_t)cnv->toUnicodeStatus;
  if(prev==0) {
      prev=BOCU1_ASCII_PREV0x40;
  }
  diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
  count=diff&3;
  diff>>=2;

  byteIndex=cnv->toULength;
  bytes=cnv->toUBytes;

  /* sourceIndex=-1 if the current character began in the previous buffer */
  sourceIndex=byteIndex==0 ? 0 : -1;
  nextSourceIndex=0;

  /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
  if(count>0 && byteIndex>0 && target<targetLimit) {
      goto getTrail;
  }

1000fastSingle:
  /* fast loop for single-byte differences */
  /* use count as the only loop counter variable */
  diff=(int32_t)(sourceLimit-source);
  count=(int32_t)(pArgs->targetLimit-target);
  if(count>diff) {
      count=diff;
  }
  while(count>0) {
      if(BOCU1_START_NEG_2(0x90 +(-64))<=(c=*source) && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
          c=prev+(c-BOCU1_MIDDLE0x90);
          if(c<0x3000) {
              *target++=(UChar)c;
              *offsets++=nextSourceIndex++;
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
          } else {
              break;
          }
      } else if(c<=0x20) {
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(UChar)c;
          *offsets++=nextSourceIndex++;
      } else {
          break;
      }
      ++source;
      --count;
  }
  sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */

  /* decode a sequence of single and lead bytes */
  while(source<sourceLimit) {
      if(target>=targetLimit) {
          /* target is full */
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          break;
      }

      ++nextSourceIndex;
      c=*source++;
      if(BOCU1_START_NEG_2(0x90 +(-64))<=c && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
          /* Write a code point directly from a single-byte difference. */
          c=prev+(c-BOCU1_MIDDLE0x90);
          if(c<0x3000) {
              *target++=(UChar)c;
              *offsets++=sourceIndex;
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
              sourceIndex=nextSourceIndex;
              goto fastSingle;
          }
      } else if(c<=0x20) {
          /*
           * Direct-encoded C0 control code or space.
           * Reset prev for C0 control codes but not for space.
           */
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(UChar)c;
          *offsets++=sourceIndex;
          sourceIndex=nextSourceIndex;
          continue;
      } else if(BOCU1_START_NEG_3((0x90 +(-64))-43)<=c && c<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43) && source<sourceLimit) {
          /* Optimize two-byte case. */
          if(c>=BOCU1_MIDDLE0x90) {
              diff=((int32_t)c-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
          } else {
              diff=((int32_t)c-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
          }

          /* trail byte */
          ++nextSourceIndex;
          c=decodeBocu1TrailByte(1, *source++);
          if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
              bytes[0]=source[-2];
              bytes[1]=source[-1];
              byteIndex=2;
              *pErrorCode=U_ILLEGAL_CHAR_FOUND;
              break;
          }
      } else if(c==BOCU1_RESET0xff) {
          /* only reset the state, no code point */
          prev=BOCU1_ASCII_PREV0x40;
          sourceIndex=nextSourceIndex;
          continue;
      } else {
          /*
           * For multi-byte difference lead bytes, set the decoder state
           * with the partial difference value from the lead byte and
           * with the number of trail bytes.
           */
          bytes[0]=(uint8_t)c;
          byteIndex=1;

          diff=decodeBocu1LeadByte(c);
          count=diff&3;
          diff>>=2;
1099getTrail:
          for(;;) {
              if(source>=sourceLimit) {
                  goto endloop;
              }
              ++nextSourceIndex;
              c=bytes[byteIndex++]=*source++;

              /* trail byte in any position */
              c=decodeBocu1TrailByte(count, c);
              if(c<0) {
                  *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                  goto endloop;
              }

              diff+=c;
              if(--count==0) {
                  /* final trail byte, deliver a code point */
                  byteIndex=0;
                  c=prev+diff;
                  if((uint32_t)c>0x10ffff) {
                      *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                      goto endloop;
                  }
                  break;
              }
          }
      }

      /* calculate the next prev and output c */
      prev=BOCU1_PREV(c)((c)<0x3040 || (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev
(c));
      if(c<=0xffff) {
          *target++=(UChar)c;
          *offsets++=sourceIndex;
      } else {
          /* output surrogate pair */
          *target++=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
          if(target<targetLimit) {
              *target++=U16_TRAIL(c)(UChar)(((c)&0x3ff)|0xdc00);
              *offsets++=sourceIndex;
              *offsets++=sourceIndex;
          } else {
              /* target overflow */
              *offsets++=sourceIndex;
              cnv->UCharErrorBuffer[0]=U16_TRAIL(c)(UChar)(((c)&0x3ff)|0xdc00);
              cnv->UCharErrorBufferLength=1;
              *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
              break;
          }
      }
      sourceIndex=nextSourceIndex;
  }
1151endloop:

  if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
      /* set the converter state in UConverter to deal with the next character */
      cnv->toUnicodeStatus=BOCU1_ASCII_PREV0x40;
      cnv->mode=0;
  } else {
      /* set the converter state back into UConverter */
      cnv->toUnicodeStatus=(uint32_t)prev;
      cnv->mode=(diff<<2)|count;
  }
  cnv->toULength=byteIndex;

  /* write back the updated pointers */
  pArgs->source=(const char *)source;
  pArgs->target=target;
  pArgs->offsets=offsets;
  return;
1169}

1171/*
* Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
* If a change is made in the original function, then either
* change this function the same way or
* re-copy the original function and remove the variables
* offsets, sourceIndex, and nextSourceIndex.
*/
1178static void U_CALLCONV
1179_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
              UErrorCode *pErrorCode) {
  UConverter *cnv;
  const uint8_t *source, *sourceLimit;
  UChar *target;
  const UChar *targetLimit;

  int32_t prev, count, diff, c;

  int8_t byteIndex;
  uint8_t *bytes;

  /* set up the local pointers */
  cnv=pArgs->converter;
  source=(const uint8_t *)pArgs->source;
  sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  target=pArgs->target;
  targetLimit=pArgs->targetLimit;

  /* get the converter state from UConverter */
  prev=(int32_t)cnv->toUnicodeStatus;
  if(prev==0) {
1
Assuming 'prev' is not equal to 0→
2
←
Taking false branch→
      prev=BOCU1_ASCII_PREV0x40;
  }
  diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
  count=diff&3;
  diff>>=2;

  byteIndex=cnv->toULength;
  bytes=cnv->toUBytes;

  /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
  if(count>0 && byteIndex>0 && target<targetLimit) {
3
←
Assuming 'count' is <= 0→
      goto getTrail;
  }

1215fastSingle:
  /* fast loop for single-byte differences */
  /* use count as the only loop counter variable */
  diff=(int32_t)(sourceLimit-source);
  count=(int32_t)(pArgs->targetLimit-target);
  if(count>diff) {
4
←
Assuming 'count' is <= 'diff'→
5
←
Taking false branch→
      count=diff;
  }
  while(count>0) {
6
←
Assuming 'count' is > 0→
      if(BOCU1_START_NEG_2(0x90 +(-64))<=(c=*source) && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
7
←
Assuming the condition is false→
          c=prev+(c-BOCU1_MIDDLE0x90);
          if(c<0x3000) {
              *target++=(UChar)c;
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
          } else {
              break;
          }
      } else if(c<=0x20) {
8
←
Assuming 'c' is > 32→
9
←
Taking false branch→
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(UChar)c;
      } else {
          break;
      }
      ++source;
      --count;
  }

  /* decode a sequence of single and lead bytes */
  while(source<sourceLimit) {
10
←
 Execution continues on line 1245→
11
←
Assuming 'source' is < 'sourceLimit'→
12
←
Loop condition is true.  Entering loop body→
      if(target12.1
'target' is < 'targetLimit'
>=targetLimit) {
13
←
Taking false branch→
          /* target is full */
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          break;
      }

      c=*source++;
      if(BOCU1_START_NEG_2(0x90 +(-64))<=c && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
          /* Write a code point directly from a single-byte difference. */
          c=prev+(c-BOCU1_MIDDLE0x90);
          if(c<0x3000) {
              *target++=(UChar)c;
              prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
              goto fastSingle;
          }
      } else if(c13.1
'c' is > 32
<=0x20) {
          /*
           * Direct-encoded C0 control code or space.
           * Reset prev for C0 control codes but not for space.
           */
          if(c!=0x20) {
              prev=BOCU1_ASCII_PREV0x40;
          }
          *target++=(UChar)c;
          continue;
      } else if(BOCU1_START_NEG_3((0x90 +(-64))-43)<=c && c<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43) && source<sourceLimit) {
14
←
Assuming the condition is false→
          /* Optimize two-byte case. */
          if(c>=BOCU1_MIDDLE0x90) {
              diff=((int32_t)c-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
          } else {
              diff=((int32_t)c-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
          }

          /* trail byte */
          c=decodeBocu1TrailByte(1, *source++);
          if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
              bytes[0]=source[-2];
              bytes[1]=source[-1];
              byteIndex=2;
              *pErrorCode=U_ILLEGAL_CHAR_FOUND;
              break;
          }
      } else if(c14.1
'c' is not equal to BOCU1_RESET
==BOCU1_RESET0xff) {
15
←
Taking false branch→
          /* only reset the state, no code point */
          prev=BOCU1_ASCII_PREV0x40;
          continue;
      } else {
          /*
           * For multi-byte difference lead bytes, set the decoder state
           * with the partial difference value from the lead byte and
           * with the number of trail bytes.
           */
          bytes[0]=(uint8_t)c;
          byteIndex=1;

          diff=decodeBocu1LeadByte(c);
16
←
Calling 'decodeBocu1LeadByte'→
          count=diff&3;
          diff>>=2;
1304getTrail:
          for(;;) {
              if(source>=sourceLimit) {
                  goto endloop;
              }
              c=bytes[byteIndex++]=*source++;

              /* trail byte in any position */
              c=decodeBocu1TrailByte(count, c);
              if(c<0) {
                  *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                  goto endloop;
              }

              diff+=c;
              if(--count==0) {
                  /* final trail byte, deliver a code point */
                  byteIndex=0;
                  c=prev+diff;
                  if((uint32_t)c>0x10ffff) {
                      *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                      goto endloop;
                  }
                  break;
              }
          }
      }

      /* calculate the next prev and output c */
      prev=BOCU1_PREV(c)((c)<0x3040 || (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev
(c));
      if(c<=0xffff) {
          *target++=(UChar)c;
      } else {
          /* output surrogate pair */
          *target++=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
          if(target<targetLimit) {
              *target++=U16_TRAIL(c)(UChar)(((c)&0x3ff)|0xdc00);
          } else {
              /* target overflow */
              cnv->UCharErrorBuffer[0]=U16_TRAIL(c)(UChar)(((c)&0x3ff)|0xdc00);
              cnv->UCharErrorBufferLength=1;
              *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
              break;
          }
      }
  }
1350endloop:

  if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
      /* set the converter state in UConverter to deal with the next character */
      cnv->toUnicodeStatus=BOCU1_ASCII_PREV0x40;
      cnv->mode=0;
  } else {
      /* set the converter state back into UConverter */
      cnv->toUnicodeStatus=(uint32_t)prev;
      cnv->mode=(diff<<2)|count;
  }
  cnv->toULength=byteIndex;

  /* write back the updated pointers */
  pArgs->source=(const char *)source;
  pArgs->target=target;
  return;
1367}

1369/* miscellaneous ------------------------------------------------------------ */

1371static const UConverterImpl _Bocu1Impl={
  UCNV_BOCU1,

  NULL__null,
  NULL__null,

  NULL__null,
  NULL__null,
  NULL__null,

  _Bocu1ToUnicode,
  _Bocu1ToUnicodeWithOffsets,
  _Bocu1FromUnicode,
  _Bocu1FromUnicodeWithOffsets,
  NULL__null,

  NULL__null,
  NULL__null,
  NULL__null,
  NULL__null,
  ucnv_getCompleteUnicodeSetucnv_getCompleteUnicodeSet_71,

  NULL__null,
  NULL__null
1395};

1397static const UConverterStaticData _Bocu1StaticData={
  sizeof(UConverterStaticData),
  "BOCU-1",
  1214, /* CCSID for BOCU-1 */
  UCNV_IBM, UCNV_BOCU1,
  1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
  { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
  FALSE0, FALSE0,
  0,
  0,
  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1408};

1410const UConverterSharedData _Bocu1Data_Bocu1Data_71=
      UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, &
_Bocu1StaticData, false, false, &_Bocu1Impl, 0, { 0, 0, 0
, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null
, __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null
 } };

1413#endif