../deps/icu-small/source/common/ucnvbocu.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/ucnvbocu.cpp
Warning:	line 623, column 21 Value stored to 'targetCapacity' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnvbocu.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ucnvbocu.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 2002-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	* file name: ucnvbocu.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2002mar27
16	* created by: Markus W. Scherer
17	*
18	* This is an implementation of the Binary Ordered Compression for Unicode,
19	* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
20	*/
21
22	#include "unicode/utypes.h"
23
24	#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_ONLY_HTML_CONVERSION0
25
26	#include "unicode/ucnv.h"
27	#include "unicode/ucnv_cb.h"
28	#include "unicode/utf16.h"
29	#include "putilimp.h"
30	#include "ucnv_bld.h"
31	#include "ucnv_cnv.h"
32	#include "uassert.h"
33
34	/* BOCU-1 constants and macros ---------------------------------------------- */
35
36	/*
37	* BOCU-1 encodes the code points of a Unicode string as
38	* a sequence of byte-encoded differences (slope detection),
39	* preserving lexical order.
40	*
41	* Optimize the difference-taking for runs of Unicode text within
42	* small scripts:
43	*
44	* Most small scripts are allocated within aligned 128-blocks of Unicode
45	* code points. Lexical order is preserved if the "previous code point" state
46	* is always moved into the middle of such a block.
47	*
48	* Additionally, "prev" is moved from anywhere in the Unihan and Hangul
49	* areas into the middle of those areas.
50	*
51	* C0 control codes and space are encoded with their US-ASCII bytes.
52	* "prev" is reset for C0 controls but not for space.
53	*/
54
55	/* initial value for "prev": middle of the ASCII range */
56	#define BOCU1_ASCII_PREV0x40 0x40
57
58	/* bounding byte values for differences */
59	#define BOCU1_MIN0x21 0x21
60	#define BOCU1_MIDDLE0x90 0x90
61	#define BOCU1_MAX_LEAD0xfe 0xfe
62	#define BOCU1_MAX_TRAIL0xff 0xff
63	#define BOCU1_RESET0xff 0xff
64
65	/* number of lead bytes */
66	#define BOCU1_COUNT(0xfe -0x21 +1) (BOCU1_MAX_LEAD0xfe-BOCU1_MIN0x21+1)
67
68	/* adjust trail byte counts for the use of some C0 control byte values */
69	#define BOCU1_TRAIL_CONTROLS_COUNT20 20
70	#define BOCU1_TRAIL_BYTE_OFFSET(0x21 -20) (BOCU1_MIN0x21-BOCU1_TRAIL_CONTROLS_COUNT20)
71
72	/* number of trail bytes */
73	#define BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20) ((BOCU1_MAX_TRAIL0xff-BOCU1_MIN0x21+1)+BOCU1_TRAIL_CONTROLS_COUNT20)
74
75	/*
76	* number of positive and negative single-byte codes
77	* (counting 0==BOCU1_MIDDLE among the positive ones)
78	*/
79	#define BOCU1_SINGLE64 64
80
81	/* number of lead bytes for positive and negative 2/3/4-byte sequences */
82	#define BOCU1_LEAD_243 43
83	#define BOCU1_LEAD_33 3
84	#define BOCU1_LEAD_41 1
85
86	/* The difference value range for single-byters. */
87	#define BOCU1_REACH_POS_1(64 -1) (BOCU1_SINGLE64-1)
88	#define BOCU1_REACH_NEG_1(-64) (-BOCU1_SINGLE64)
89
90	/* The difference value range for double-byters. */
91	#define BOCU1_REACH_POS_2((64 -1)+43((0xff -0x21 +1)+20)) (BOCU1_REACH_POS_1(64 -1)+BOCU1_LEAD_243BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))
92	#define BOCU1_REACH_NEG_2((-64)-43((0xff -0x21 +1)+20)) (BOCU1_REACH_NEG_1(-64)-BOCU1_LEAD_243BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))
93
94	/* The difference value range for 3-byters. */
95	#define BOCU1_REACH_POS_3(((64 -1)+43((0xff -0x21 +1)+20))+3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20)) \
96	(BOCU1_REACH_POS_2((64 -1)+43((0xff -0x21 +1)+20))+BOCU1_LEAD_33BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))
97
98	#define BOCU1_REACH_NEG_3(((-64)-43((0xff -0x21 +1)+20))-3((0xff -0x21 +1)+20)((0xff -0x21 +1)+20)) (BOCU1_REACH_NEG_2((-64)-43((0xff -0x21 +1)+20))-BOCU1_LEAD_33BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20))
99
100	/* The lead byte start values. */
101	#define BOCU1_START_POS_2(0x90 +(64 -1)+1) (BOCU1_MIDDLE0x90+BOCU1_REACH_POS_1(64 -1)+1)
102	#define BOCU1_START_POS_3((0x90 +(64 -1)+1)+43) (BOCU1_START_POS_2(0x90 +(64 -1)+1)+BOCU1_LEAD_243)
103	#define BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3) (BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)+BOCU1_LEAD_33)
104	/* ==BOCU1_MAX_LEAD */
105
106	#define BOCU1_START_NEG_2(0x90 +(-64)) (BOCU1_MIDDLE0x90+BOCU1_REACH_NEG_1(-64))
107	#define BOCU1_START_NEG_3((0x90 +(-64))-43) (BOCU1_START_NEG_2(0x90 +(-64))-BOCU1_LEAD_243)
108	#define BOCU1_START_NEG_4(((0x90 +(-64))-43)-3) (BOCU1_START_NEG_3((0x90 +(-64))-43)-BOCU1_LEAD_33)
109	/* ==BOCU1_MIN+1 */
110
111	/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
112	#define BOCU1_LENGTH_FROM_LEAD(lead)(((0x90 +(-64))<=(lead) && (lead)<(0x90 +(64 -1 )+1)) ? 1 : (((0x90 +(-64))-43)<=(lead) && (lead)< ((0x90 +(64 -1)+1)+43)) ? 2 : ((((0x90 +(-64))-43)-3)<=(lead ) && (lead)<(((0x90 +(64 -1)+1)+43)+3)) ? 3 : 4) \
113	((BOCU1_START_NEG_2(0x90 +(-64))<=(lead) && (lead)<BOCU1_START_POS_2(0x90 +(64 -1)+1)) ? 1 : \
114	(BOCU1_START_NEG_3((0x90 +(-64))-43)<=(lead) && (lead)<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)) ? 2 : \
115	(BOCU1_START_NEG_4(((0x90 +(-64))-43)-3)<=(lead) && (lead)<BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3)) ? 3 : 4)
116
117	/* The length of a byte sequence, according to its packed form. */
118	#define BOCU1_LENGTH_FROM_PACKED(packed)((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) \
119	((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
120
121	/*
122	* 12 commonly used C0 control codes (and space) are only used to encode
123	* themselves directly,
124	* which makes BOCU-1 MIME-usable and reasonably safe for
125	* ASCII-oriented software.
126	*
127	* These controls are
128	* 0 NUL
129	*
130	* 7 BEL
131	* 8 BS
132	*
133	* 9 TAB
134	* a LF
135	* b VT
136	* c FF
137	* d CR
138	*
139	* e SO
140	* f SI
141	*
142	* 1a SUB
143	* 1b ESC
144	*
145	* The other 20 C0 controls are also encoded directly (to preserve order)
146	* but are also used as trail bytes in difference encoding
147	* (for better compression).
148	*/
149	#define BOCU1_TRAIL_TO_BYTE(t)((t)>=20 ? (t)+(0x21 -20) : bocu1TrailToByte[t]) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT20 ? (t)+BOCU1_TRAIL_BYTE_OFFSET(0x21 -20) : bocu1TrailToByte[t])
150
151	/*
152	* Byte value map for control codes,
153	* from external byte values 0x00..0x20
154	* to trail byte values 0..19 (0..0x13) as used in the difference calculation.
155	* External byte values that are illegal as trail bytes are mapped to -1.
156	*/
157	static const int8_t
158	bocu1ByteToTrail[BOCU1_MIN0x21]={
159	/* 0 1 2 3 4 5 6 7 */
160	-1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
161
162	/* 8 9 a b c d e f */
163	-1, -1, -1, -1, -1, -1, -1, -1,
164
165	/* 10 11 12 13 14 15 16 17 */
166	0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
167
168	/* 18 19 1a 1b 1c 1d 1e 1f */
169	0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
170
171	/* 20 */
172	-1
173	};
174
175	/*
176	* Byte value map for control codes,
177	* from trail byte values 0..19 (0..0x13) as used in the difference calculation
178	* to external byte values 0x00..0x20.
179	*/
180	static const int8_t
181	bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT20]={
182	/* 0 1 2 3 4 5 6 7 */
183	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
184
185	/* 8 9 a b c d e f */
186	0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
187
188	/* 10 11 12 13 */
189	0x1c, 0x1d, 0x1e, 0x1f
190	};
191
192	/**
193	* Integer division and modulo with negative numerators
194	* yields negative modulo results and quotients that are one more than
195	* what we need here.
196	* This macro adjust the results so that the modulo-value m is always >=0.
197	*
198	* For positive n, the if() condition is always FALSE.
199	*
200	* @param n Number to be split into quotient and rest.
201	* Will be modified to contain the quotient.
202	* @param d Divisor.
203	* @param m Output variable for the rest (modulo result).
204	*/
205	#define NEGDIVMOD(n, d, m)do { (m)=(n)%(d); (n)/=(d); if((m)<0) { --(n); (m)+=(d); } } while (false) UPRV_BLOCK_MACRO_BEGINdo { \
206	(m)=(n)%(d); \
207	(n)/=(d); \
208	if((m)<0) { \
209	--(n); \
210	(m)+=(d); \
211	} \
212	} UPRV_BLOCK_MACRO_ENDwhile (false)
213
214	/* Faster versions of packDiff() for single-byte-encoded diff values. */
215
216	/** Is a diff value encodable in a single byte? */
217	#define DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1)) (BOCU1_REACH_NEG_1(-64)<=(diff) && (diff)<=BOCU1_REACH_POS_1(64 -1))
218
219	/** Encode a diff value in a single byte. */
220	#define PACK_SINGLE_DIFF(diff)(0x90 +(diff)) (BOCU1_MIDDLE0x90+(diff))
221
222	/** Is a diff value encodable in two bytes? */
223	#define DIFF_IS_DOUBLE(diff)(((-64)-43((0xff -0x21 +1)+20))<=(diff) && (diff) <=((64 -1)+43((0xff -0x21 +1)+20))) (BOCU1_REACH_NEG_2((-64)-43((0xff -0x21 +1)+20))<=(diff) && (diff)<=BOCU1_REACH_POS_2((64 -1)+43((0xff -0x21 +1)+20)))
224
225	/* BOCU-1 implementation functions ------------------------------------------ */
226
227	#define BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40) (((c)&~0x7f)+BOCU1_ASCII_PREV0x40)
228
229	/**
230	* Compute the next "previous" value for differencing
231	* from the current code point.
232	*
233	* @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
234	* @return "previous code point" state value
235	*/
236	static inline int32_t
237	bocu1Prev(int32_t c) {
238	/* compute new prev */
239	if(/* 0x3040<=c && */ c<=0x309f) {
240	/* Hiragana is not 128-aligned */
241	return 0x3070;
242	} else if(0x4e00<=c && c<=0x9fa5) {
243	/* CJK Unihan */
244	return 0x4e00-BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
245	} else if(0xac00<=c /* && c<=0xd7a3 */) {
246	/* Korean Hangul */
247	return (0xd7a3+0xac00)/2;
248	} else {
249	/* mostly small scripts */
250	return BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
251	}
252	}
253
254	/** Fast version of bocu1Prev() for most scripts. */
255	#define BOCU1_PREV(c)((c)<0x3040 \|\| (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev (c)) ((c)<0x3040 \|\| (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40) : bocu1Prev(c))
256
257	/*
258	* The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
259	* The UConverter fields are used as follows:
260	*
261	* fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
262	*
263	* toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
264	* mode decoder's incomplete (diff<<2)\|count (ignored when toULength==0)
265	*/
266
267	/* BOCU-1-from-Unicode conversion functions --------------------------------- */
268
269	/**
270	* Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
271	* and return a packed integer with them.
272	*
273	* The encoding favors small absolute differences with short encodings
274	* to compress runs of same-script characters.
275	*
276	* Optimized version with unrolled loops and fewer floating-point operations
277	* than the standard packDiff().
278	*
279	* @param diff difference value -0x10ffff..0x10ffff
280	* @return
281	* 0x010000zz for 1-byte sequence zz
282	* 0x0200yyzz for 2-byte sequence yy zz
283	* 0x03xxyyzz for 3-byte sequence xx yy zz
284	* 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
285	*/
286	static int32_t
287	packDiff(int32_t diff) {
288	int32_t result, m;
289
290	U_ASSERT(!DIFF_IS_SINGLE(diff))(void)0; /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
291	if(diff>=BOCU1_REACH_NEG_1(-64)) {
292	/* mostly positive differences, and single-byte negative ones */
293	#if 0 /* single-byte case handled in macros, see below */
294	if(diff<=BOCU1_REACH_POS_1(64 -1)) {
295	/* single byte */
296	return 0x01000000\|(BOCU1_MIDDLE0x90+diff);
297	} else
298	#endif
299	if(diff<=BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))) {
300	/* two bytes */
301	diff-=BOCU1_REACH_POS_1(64 -1)+1;
302	result=0x02000000;
303
304	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
305	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
306	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
307
308	result\|=(BOCU1_START_POS_2(0x90 +(64 -1)+1)+diff)<<8;
309	} else if(diff<=BOCU1_REACH_POS_3(((64 -1)+43((0xff -0x21 +1)+20))+3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20))) {
310	/* three bytes */
311	diff-=BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))+1;
312	result=0x03000000;
313
314	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
315	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
316	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
317
318	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
319	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
320	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;
321
322	result\|=(BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)+diff)<<16;
323	} else {
324	/* four bytes */
325	diff-=BOCU1_REACH_POS_3(((64 -1)+43((0xff -0x21 +1)+20))+3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20))+1;
326
327	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
328	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
329	result=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
330
331	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
332	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
333	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;
334
335	/*
336	* We know that / and % would deliver quotient 0 and rest=diff.
337	* Avoid division and modulo for performance.
338	*/
339	result\|=BOCU1_TRAIL_TO_BYTE(diff)((diff)>=20 ? (diff)+(0x21 -20) : bocu1TrailToByte[diff])<<16;
340
341	result\|=((uint32_t)BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3))<<24;
342	}
343	} else {
344	/* two- to four-byte negative differences */
345	if(diff>=BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20))) {
346	/* two bytes */
347	diff-=BOCU1_REACH_NEG_1(-64);
348	result=0x02000000;
349
350	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
351	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
352
353	result\|=(BOCU1_START_NEG_2(0x90 +(-64))+diff)<<8;
354	} else if(diff>=BOCU1_REACH_NEG_3(((-64)-43((0xff -0x21 +1)+20))-3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20))) {
355	/* three bytes */
356	diff-=BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
357	result=0x03000000;
358
359	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
360	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
361
362	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
363	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;
364
365	result\|=(BOCU1_START_NEG_3((0x90 +(-64))-43)+diff)<<16;
366	} else {
367	/* four bytes */
368	diff-=BOCU1_REACH_NEG_3(((-64)-43((0xff -0x21 +1)+20))-3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20));
369
370	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
371	result=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
372
373	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
374	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<8;
375
376	/*
377	* We know that NEGDIVMOD would deliver
378	* quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
379	* Avoid division and modulo for performance.
380	*/
381	m=diff+BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
382	result\|=BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m])<<16;
383
384	result\|=BOCU1_MIN0x21<<24;
385	}
386	}
387	return result;
388	}
389
390
391	static void U_CALLCONV
392	_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
393	UErrorCode *pErrorCode) {
394	UConverter *cnv;
395	const UChar source, sourceLimit;
396	uint8_t *target;
397	int32_t targetCapacity;
398	int32_t *offsets;
399
400	int32_t prev, c, diff;
401
402	int32_t sourceIndex, nextSourceIndex;
403
404	/* set up the local pointers */
405	cnv=pArgs->converter;
406	source=pArgs->source;
407	sourceLimit=pArgs->sourceLimit;
408	target=(uint8_t *)pArgs->target;
409	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
410	offsets=pArgs->offsets;
411
412	/* get the converter state from UConverter */
413	c=cnv->fromUChar32;
414	prev=(int32_t)cnv->fromUnicodeStatus;
415	if(prev==0) {
416	prev=BOCU1_ASCII_PREV0x40;
417	}
418
419	/* sourceIndex=-1 if the current character began in the previous buffer */
420	sourceIndex= c==0 ? 0 : -1;
421	nextSourceIndex=0;
422
423	/* conversion loop */
424	if(c!=0 && targetCapacity>0) {
425	goto getTrail;
426	}
427
428	fastSingle:
429	/* fast loop for single-byte differences */
430	/* use only one loop counter variable, targetCapacity, not also source */
431	diff=(int32_t)(sourceLimit-source);
432	if(targetCapacity>diff) {
433	targetCapacity=diff;
434	}
435	while(targetCapacity>0 && (c=*source)<0x3000) {
436	if(c<=0x20) {
437	if(c!=0x20) {
438	prev=BOCU1_ASCII_PREV0x40;
439	}
440	*target++=(uint8_t)c;
441	*offsets++=nextSourceIndex++;
442	++source;
443	--targetCapacity;
444	} else {
445	diff=c-prev;
446	if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
447	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
448	*target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
449	*offsets++=nextSourceIndex++;
450	++source;
451	--targetCapacity;
452	} else {
453	break;
454	}
455	}
456	}
457	/* restore real values */
458	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
459	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
460
461	/* regular loop for all cases */
462	while(source<sourceLimit) {
463	if(targetCapacity>0) {
464	c=*source++;
465	++nextSourceIndex;
466
467	if(c<=0x20) {
468	/*
469	* ISO C0 control & space:
470	* Encode directly for MIME compatibility,
471	* and reset state except for space, to not disrupt compression.
472	*/
473	if(c!=0x20) {
474	prev=BOCU1_ASCII_PREV0x40;
475	}
476	*target++=(uint8_t)c;
477	*offsets++=sourceIndex;
478	--targetCapacity;
479
480	sourceIndex=nextSourceIndex;
481	continue;
482	}
483
484	if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
485	getTrail:
486	if(source<sourceLimit) {
487	/* test the following code unit */
488	UChar trail=*source;
489	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
490	++source;
491	++nextSourceIndex;
492	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
493	}
494	} else {
495	/* no more input */
496	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
497	break;
498	}
499	}
500
501	/*
502	* all other Unicode code points c==U+0021..U+10ffff
503	* are encoded with the difference c-prev
504	*
505	* a new prev is computed from c,
506	* placed in the middle of a 0x80-block (for most small scripts) or
507	* in the middle of the Unihan and Hangul blocks
508	* to statistically minimize the following difference
509	*/
510	diff=c-prev;
511	prev=BOCU1_PREV(c)((c)<0x3040 \|\| (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev (c));
512	if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
513	*target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
514	*offsets++=sourceIndex;
515	--targetCapacity;
516	sourceIndex=nextSourceIndex;
517	if(c<0x3000) {
518	goto fastSingle;
519	}
520	} else if(DIFF_IS_DOUBLE(diff)(((-64)-43((0xff -0x21 +1)+20))<=(diff) && (diff) <=((64 -1)+43((0xff -0x21 +1)+20))) && 2<=targetCapacity) {
521	/* optimize 2-byte case */
522	int32_t m;
523
524	if(diff>=0) {
525	diff-=BOCU1_REACH_POS_1(64 -1)+1;
526	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
527	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
528	diff+=BOCU1_START_POS_2(0x90 +(64 -1)+1);
529	} else {
530	diff-=BOCU1_REACH_NEG_1(-64);
531	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
532	diff+=BOCU1_START_NEG_2(0x90 +(-64));
533	}
534	*target++=(uint8_t)diff;
535	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
536	*offsets++=sourceIndex;
537	*offsets++=sourceIndex;
538	targetCapacity-=2;
539	sourceIndex=nextSourceIndex;
540	} else {
541	int32_t length; /* will be 2..4 */
542
543	diff=packDiff(diff);
544	length=BOCU1_LENGTH_FROM_PACKED(diff)((uint32_t)(diff)<0x04000000 ? (diff)>>24 : 4);
545
546	/* write the output character bytes from diff and length */
547	/* from the first if in the loop we know that targetCapacity>0 */
548	if(length<=targetCapacity) {
549	switch(length) {
550	/* each branch falls through to the next one */
551	case 4:
552	*target++=(uint8_t)(diff>>24);
553	*offsets++=sourceIndex;
554	U_FALLTHROUGH[[clang::fallthrough]];
555	case 3:
556	*target++=(uint8_t)(diff>>16);
557	*offsets++=sourceIndex;
558	U_FALLTHROUGH[[clang::fallthrough]];
559	case 2:
560	*target++=(uint8_t)(diff>>8);
561	*offsets++=sourceIndex;
562	/* case 1: handled above */
563	*target++=(uint8_t)diff;
564	*offsets++=sourceIndex;
565	U_FALLTHROUGH[[clang::fallthrough]];
566	default:
567	/* will never occur */
568	break;
569	}
570	targetCapacity-=length;
571	sourceIndex=nextSourceIndex;
572	} else {
573	uint8_t *charErrorBuffer;
574
575	/*
576	* We actually do this backwards here:
577	* In order to save an intermediate variable, we output
578	* first to the overflow buffer what does not fit into the
579	* regular target.
580	*/
581	/* we know that 1<=targetCapacity<length<=4 */
582	length-=targetCapacity;
583	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
584	switch(length) {
585	/* each branch falls through to the next one */
586	case 3:
587	*charErrorBuffer++=(uint8_t)(diff>>16);
588	U_FALLTHROUGH[[clang::fallthrough]];
589	case 2:
590	*charErrorBuffer++=(uint8_t)(diff>>8);
591	U_FALLTHROUGH[[clang::fallthrough]];
592	case 1:
593	*charErrorBuffer=(uint8_t)diff;
594	U_FALLTHROUGH[[clang::fallthrough]];
595	default:
596	/* will never occur */
597	break;
598	}
599	cnv->charErrorBufferLength=(int8_t)length;
600
601	/* now output what fits into the regular target */
602	diff>>=8length; / length was reduced by targetCapacity */
603	switch(targetCapacity) {
604	/* each branch falls through to the next one */
605	case 3:
606	*target++=(uint8_t)(diff>>16);
607	*offsets++=sourceIndex;
608	U_FALLTHROUGH[[clang::fallthrough]];
609	case 2:
610	*target++=(uint8_t)(diff>>8);
611	*offsets++=sourceIndex;
612	U_FALLTHROUGH[[clang::fallthrough]];
613	case 1:
614	*target++=(uint8_t)diff;
615	*offsets++=sourceIndex;
616	U_FALLTHROUGH[[clang::fallthrough]];
617	default:
618	/* will never occur */
619	break;
620	}
621
622	/* target overflow */
623	targetCapacity=0;
	Value stored to 'targetCapacity' is never read
624	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
625	break;
626	}
627	}
628	} else {
629	/* target is full */
630	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
631	break;
632	}
633	}
634
635	/* set the converter state back into UConverter */
636	cnv->fromUChar32= c<0 ? -c : 0;
637	cnv->fromUnicodeStatus=(uint32_t)prev;
638
639	/* write back the updated pointers */
640	pArgs->source=source;
641	pArgs->target=(char *)target;
642	pArgs->offsets=offsets;
643	}
644
645	/*
646	* Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
647	* If a change is made in the original function, then either
648	* change this function the same way or
649	* re-copy the original function and remove the variables
650	* offsets, sourceIndex, and nextSourceIndex.
651	*/
652	static void U_CALLCONV
653	_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
654	UErrorCode *pErrorCode) {
655	UConverter *cnv;
656	const UChar source, sourceLimit;
657	uint8_t *target;
658	int32_t targetCapacity;
659
660	int32_t prev, c, diff;
661
662	/* set up the local pointers */
663	cnv=pArgs->converter;
664	source=pArgs->source;
665	sourceLimit=pArgs->sourceLimit;
666	target=(uint8_t *)pArgs->target;
667	targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
668
669	/* get the converter state from UConverter */
670	c=cnv->fromUChar32;
671	prev=(int32_t)cnv->fromUnicodeStatus;
672	if(prev==0) {
673	prev=BOCU1_ASCII_PREV0x40;
674	}
675
676	/* conversion loop */
677	if(c!=0 && targetCapacity>0) {
678	goto getTrail;
679	}
680
681	fastSingle:
682	/* fast loop for single-byte differences */
683	/* use only one loop counter variable, targetCapacity, not also source */
684	diff=(int32_t)(sourceLimit-source);
685	if(targetCapacity>diff) {
686	targetCapacity=diff;
687	}
688	while(targetCapacity>0 && (c=*source)<0x3000) {
689	if(c<=0x20) {
690	if(c!=0x20) {
691	prev=BOCU1_ASCII_PREV0x40;
692	}
693	*target++=(uint8_t)c;
694	} else {
695	diff=c-prev;
696	if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
697	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
698	*target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
699	} else {
700	break;
701	}
702	}
703	++source;
704	--targetCapacity;
705	}
706	/* restore real values */
707	targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
708
709	/* regular loop for all cases */
710	while(source<sourceLimit) {
711	if(targetCapacity>0) {
712	c=*source++;
713
714	if(c<=0x20) {
715	/*
716	* ISO C0 control & space:
717	* Encode directly for MIME compatibility,
718	* and reset state except for space, to not disrupt compression.
719	*/
720	if(c!=0x20) {
721	prev=BOCU1_ASCII_PREV0x40;
722	}
723	*target++=(uint8_t)c;
724	--targetCapacity;
725	continue;
726	}
727
728	if(U16_IS_LEAD(c)(((c)&0xfffffc00)==0xd800)) {
729	getTrail:
730	if(source<sourceLimit) {
731	/* test the following code unit */
732	UChar trail=*source;
733	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
734	++source;
735	c=U16_GET_SUPPLEMENTARY(c, trail)(((UChar32)(c)<<10UL)+(UChar32)(trail)-((0xd800<< 10UL)+0xdc00-0x10000));
736	}
737	} else {
738	/* no more input */
739	c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
740	break;
741	}
742	}
743
744	/*
745	* all other Unicode code points c==U+0021..U+10ffff
746	* are encoded with the difference c-prev
747	*
748	* a new prev is computed from c,
749	* placed in the middle of a 0x80-block (for most small scripts) or
750	* in the middle of the Unihan and Hangul blocks
751	* to statistically minimize the following difference
752	*/
753	diff=c-prev;
754	prev=BOCU1_PREV(c)((c)<0x3040 \|\| (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev (c));
755	if(DIFF_IS_SINGLE(diff)((-64)<=(diff) && (diff)<=(64 -1))) {
756	*target++=(uint8_t)PACK_SINGLE_DIFF(diff)(0x90 +(diff));
757	--targetCapacity;
758	if(c<0x3000) {
759	goto fastSingle;
760	}
761	} else if(DIFF_IS_DOUBLE(diff)(((-64)-43((0xff -0x21 +1)+20))<=(diff) && (diff) <=((64 -1)+43((0xff -0x21 +1)+20))) && 2<=targetCapacity) {
762	/* optimize 2-byte case */
763	int32_t m;
764
765	if(diff>=0) {
766	diff-=BOCU1_REACH_POS_1(64 -1)+1;
767	m=diff%BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
768	diff/=BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
769	diff+=BOCU1_START_POS_2(0x90 +(64 -1)+1);
770	} else {
771	diff-=BOCU1_REACH_NEG_1(-64);
772	NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m)do { (m)=(diff)%(((0xff -0x21 +1)+20)); (diff)/=(((0xff -0x21 +1)+20)); if((m)<0) { --(diff); (m)+=(((0xff -0x21 +1)+20 )); } } while (false);
773	diff+=BOCU1_START_NEG_2(0x90 +(-64));
774	}
775	*target++=(uint8_t)diff;
776	*target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m)((m)>=20 ? (m)+(0x21 -20) : bocu1TrailToByte[m]);
777	targetCapacity-=2;
778	} else {
779	int32_t length; /* will be 2..4 */
780
781	diff=packDiff(diff);
782	length=BOCU1_LENGTH_FROM_PACKED(diff)((uint32_t)(diff)<0x04000000 ? (diff)>>24 : 4);
783
784	/* write the output character bytes from diff and length */
785	/* from the first if in the loop we know that targetCapacity>0 */
786	if(length<=targetCapacity) {
787	switch(length) {
788	/* each branch falls through to the next one */
789	case 4:
790	*target++=(uint8_t)(diff>>24);
791	U_FALLTHROUGH[[clang::fallthrough]];
792	case 3:
793	*target++=(uint8_t)(diff>>16);
794	/* case 2: handled above */
795	*target++=(uint8_t)(diff>>8);
796	/* case 1: handled above */
797	*target++=(uint8_t)diff;
798	U_FALLTHROUGH[[clang::fallthrough]];
799	default:
800	/* will never occur */
801	break;
802	}
803	targetCapacity-=length;
804	} else {
805	uint8_t *charErrorBuffer;
806
807	/*
808	* We actually do this backwards here:
809	* In order to save an intermediate variable, we output
810	* first to the overflow buffer what does not fit into the
811	* regular target.
812	*/
813	/* we know that 1<=targetCapacity<length<=4 */
814	length-=targetCapacity;
815	charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
816	switch(length) {
817	/* each branch falls through to the next one */
818	case 3:
819	*charErrorBuffer++=(uint8_t)(diff>>16);
820	U_FALLTHROUGH[[clang::fallthrough]];
821	case 2:
822	*charErrorBuffer++=(uint8_t)(diff>>8);
823	U_FALLTHROUGH[[clang::fallthrough]];
824	case 1:
825	*charErrorBuffer=(uint8_t)diff;
826	U_FALLTHROUGH[[clang::fallthrough]];
827	default:
828	/* will never occur */
829	break;
830	}
831	cnv->charErrorBufferLength=(int8_t)length;
832
833	/* now output what fits into the regular target */
834	diff>>=8length; / length was reduced by targetCapacity */
835	switch(targetCapacity) {
836	/* each branch falls through to the next one */
837	case 3:
838	*target++=(uint8_t)(diff>>16);
839	U_FALLTHROUGH[[clang::fallthrough]];
840	case 2:
841	*target++=(uint8_t)(diff>>8);
842	U_FALLTHROUGH[[clang::fallthrough]];
843	case 1:
844	*target++=(uint8_t)diff;
845	U_FALLTHROUGH[[clang::fallthrough]];
846	default:
847	/* will never occur */
848	break;
849	}
850
851	/* target overflow */
852	targetCapacity=0;
853	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
854	break;
855	}
856	}
857	} else {
858	/* target is full */
859	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
860	break;
861	}
862	}
863
864	/* set the converter state back into UConverter */
865	cnv->fromUChar32= c<0 ? -c : 0;
866	cnv->fromUnicodeStatus=(uint32_t)prev;
867
868	/* write back the updated pointers */
869	pArgs->source=source;
870	pArgs->target=(char *)target;
871	}
872
873	/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
874
875	/**
876	* Function for BOCU-1 decoder; handles multi-byte lead bytes.
877	*
878	* @param b lead byte;
879	* BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
880	* @return (diff<<2)\|count
881	*/
882	static inline int32_t
883	decodeBocu1LeadByte(int32_t b) {
884	int32_t diff, count;
885
886	if(b>=BOCU1_START_NEG_2(0x90 +(-64))) {
887	/* positive difference */
888	if(b<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43)) {
889	/* two bytes */
890	diff=((int32_t)b-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
891	count=1;
892	} else if(b<BOCU1_START_POS_4(((0x90 +(64 -1)+1)+43)+3)) {
893	/* three bytes */
894	diff=((int32_t)b-BOCU1_START_POS_3((0x90 +(64 -1)+1)+43))BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_2((64 -1)+43*((0xff -0x21 +1)+20))+1;
895	count=2;
896	} else {
897	/* four bytes */
898	diff=BOCU1_REACH_POS_3(((64 -1)+43((0xff -0x21 +1)+20))+3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20))+1;
899	count=3;
900	}
901	} else {
902	/* negative difference */
903	if(b>=BOCU1_START_NEG_3((0x90 +(-64))-43)) {
904	/* two bytes */
905	diff=((int32_t)b-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
906	count=1;
907	} else if(b>BOCU1_MIN0x21) {
908	/* three bytes */
909	diff=((int32_t)b-BOCU1_START_NEG_3((0x90 +(-64))-43))BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_2((-64)-43*((0xff -0x21 +1)+20));
910	count=2;
911	} else {
912	/* four bytes */
913	diff=-BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_3(((-64)-43((0xff -0x21 +1)+20))-3((0xff -0x21 +1)+20)*((0xff -0x21 +1)+20));
914	count=3;
915	}
916	}
917
918	/* return the state for decoding the trail byte(s) */
919	return (diff<<2)\|count;
920	}
921
922	/**
923	* Function for BOCU-1 decoder; handles multi-byte trail bytes.
924	*
925	* @param count number of remaining trail bytes including this one
926	* @param b trail byte
927	* @return new delta for diff including b - <0 indicates an error
928	*
929	* @see decodeBocu1
930	*/
931	static inline int32_t
932	decodeBocu1TrailByte(int32_t count, int32_t b) {
933	if(b<=0x20) {
934	/* skip some C0 controls and make the trail byte range contiguous */
935	b=bocu1ByteToTrail[b];
936	/* b<0 for an illegal trail byte value will result in return<0 below */
937	#if BOCU1_MAX_TRAIL0xff<0xff
938	} else if(b>BOCU1_MAX_TRAIL0xff) {
939	return -99;
940	#endif
941	} else {
942	b-=BOCU1_TRAIL_BYTE_OFFSET(0x21 -20);
943	}
944
945	/* add trail byte into difference and decrement count */
946	if(count==1) {
947	return b;
948	} else if(count==2) {
949	return b*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20);
950	} else /* count==3 */ {
951	return b(BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20));
952	}
953	}
954
955	static void U_CALLCONV
956	_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
957	UErrorCode *pErrorCode) {
958	UConverter *cnv;
959	const uint8_t source, sourceLimit;
960	UChar *target;
961	const UChar *targetLimit;
962	int32_t *offsets;
963
964	int32_t prev, count, diff, c;
965
966	int8_t byteIndex;
967	uint8_t *bytes;
968
969	int32_t sourceIndex, nextSourceIndex;
970
971	/* set up the local pointers */
972	cnv=pArgs->converter;
973	source=(const uint8_t *)pArgs->source;
974	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
975	target=pArgs->target;
976	targetLimit=pArgs->targetLimit;
977	offsets=pArgs->offsets;
978
979	/* get the converter state from UConverter */
980	prev=(int32_t)cnv->toUnicodeStatus;
981	if(prev==0) {
982	prev=BOCU1_ASCII_PREV0x40;
983	}
984	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
985	count=diff&3;
986	diff>>=2;
987
988	byteIndex=cnv->toULength;
989	bytes=cnv->toUBytes;
990
991	/* sourceIndex=-1 if the current character began in the previous buffer */
992	sourceIndex=byteIndex==0 ? 0 : -1;
993	nextSourceIndex=0;
994
995	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
996	if(count>0 && byteIndex>0 && target<targetLimit) {
997	goto getTrail;
998	}
999
1000	fastSingle:
1001	/* fast loop for single-byte differences */
1002	/* use count as the only loop counter variable */
1003	diff=(int32_t)(sourceLimit-source);
1004	count=(int32_t)(pArgs->targetLimit-target);
1005	if(count>diff) {
1006	count=diff;
1007	}
1008	while(count>0) {
1009	if(BOCU1_START_NEG_2(0x90 +(-64))<=(c=*source) && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
1010	c=prev+(c-BOCU1_MIDDLE0x90);
1011	if(c<0x3000) {
1012	*target++=(UChar)c;
1013	*offsets++=nextSourceIndex++;
1014	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
1015	} else {
1016	break;
1017	}
1018	} else if(c<=0x20) {
1019	if(c!=0x20) {
1020	prev=BOCU1_ASCII_PREV0x40;
1021	}
1022	*target++=(UChar)c;
1023	*offsets++=nextSourceIndex++;
1024	} else {
1025	break;
1026	}
1027	++source;
1028	--count;
1029	}
1030	sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
1031
1032	/* decode a sequence of single and lead bytes */
1033	while(source<sourceLimit) {
1034	if(target>=targetLimit) {
1035	/* target is full */
1036	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1037	break;
1038	}
1039
1040	++nextSourceIndex;
1041	c=*source++;
1042	if(BOCU1_START_NEG_2(0x90 +(-64))<=c && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
1043	/* Write a code point directly from a single-byte difference. */
1044	c=prev+(c-BOCU1_MIDDLE0x90);
1045	if(c<0x3000) {
1046	*target++=(UChar)c;
1047	*offsets++=sourceIndex;
1048	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
1049	sourceIndex=nextSourceIndex;
1050	goto fastSingle;
1051	}
1052	} else if(c<=0x20) {
1053	/*
1054	* Direct-encoded C0 control code or space.
1055	* Reset prev for C0 control codes but not for space.
1056	*/
1057	if(c!=0x20) {
1058	prev=BOCU1_ASCII_PREV0x40;
1059	}
1060	*target++=(UChar)c;
1061	*offsets++=sourceIndex;
1062	sourceIndex=nextSourceIndex;
1063	continue;
1064	} else if(BOCU1_START_NEG_3((0x90 +(-64))-43)<=c && c<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43) && source<sourceLimit) {
1065	/* Optimize two-byte case. */
1066	if(c>=BOCU1_MIDDLE0x90) {
1067	diff=((int32_t)c-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
1068	} else {
1069	diff=((int32_t)c-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
1070	}
1071
1072	/* trail byte */
1073	++nextSourceIndex;
1074	c=decodeBocu1TrailByte(1, *source++);
1075	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
1076	bytes[0]=source[-2];
1077	bytes[1]=source[-1];
1078	byteIndex=2;
1079	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1080	break;
1081	}
1082	} else if(c==BOCU1_RESET0xff) {
1083	/* only reset the state, no code point */
1084	prev=BOCU1_ASCII_PREV0x40;
1085	sourceIndex=nextSourceIndex;
1086	continue;
1087	} else {
1088	/*
1089	* For multi-byte difference lead bytes, set the decoder state
1090	* with the partial difference value from the lead byte and
1091	* with the number of trail bytes.
1092	*/
1093	bytes[0]=(uint8_t)c;
1094	byteIndex=1;
1095
1096	diff=decodeBocu1LeadByte(c);
1097	count=diff&3;
1098	diff>>=2;
1099	getTrail:
1100	for(;;) {
1101	if(source>=sourceLimit) {
1102	goto endloop;
1103	}
1104	++nextSourceIndex;
1105	c=bytes[byteIndex++]=*source++;
1106
1107	/* trail byte in any position */
1108	c=decodeBocu1TrailByte(count, c);
1109	if(c<0) {
1110	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1111	goto endloop;
1112	}
1113
1114	diff+=c;
1115	if(--count==0) {
1116	/* final trail byte, deliver a code point */
1117	byteIndex=0;
1118	c=prev+diff;
1119	if((uint32_t)c>0x10ffff) {
1120	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1121	goto endloop;
1122	}
1123	break;
1124	}
1125	}
1126	}
1127
1128	/* calculate the next prev and output c */
1129	prev=BOCU1_PREV(c)((c)<0x3040 \|\| (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev (c));
1130	if(c<=0xffff) {
1131	*target++=(UChar)c;
1132	*offsets++=sourceIndex;
1133	} else {
1134	/* output surrogate pair */
1135	*target++=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
1136	if(target<targetLimit) {
1137	*target++=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
1138	*offsets++=sourceIndex;
1139	*offsets++=sourceIndex;
1140	} else {
1141	/* target overflow */
1142	*offsets++=sourceIndex;
1143	cnv->UCharErrorBuffer[0]=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
1144	cnv->UCharErrorBufferLength=1;
1145	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1146	break;
1147	}
1148	}
1149	sourceIndex=nextSourceIndex;
1150	}
1151	endloop:
1152
1153	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1154	/* set the converter state in UConverter to deal with the next character */
1155	cnv->toUnicodeStatus=BOCU1_ASCII_PREV0x40;
1156	cnv->mode=0;
1157	} else {
1158	/* set the converter state back into UConverter */
1159	cnv->toUnicodeStatus=(uint32_t)prev;
1160	cnv->mode=(diff<<2)\|count;
1161	}
1162	cnv->toULength=byteIndex;
1163
1164	/* write back the updated pointers */
1165	pArgs->source=(const char *)source;
1166	pArgs->target=target;
1167	pArgs->offsets=offsets;
1168	return;
1169	}
1170
1171	/*
1172	* Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
1173	* If a change is made in the original function, then either
1174	* change this function the same way or
1175	* re-copy the original function and remove the variables
1176	* offsets, sourceIndex, and nextSourceIndex.
1177	*/
1178	static void U_CALLCONV
1179	_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
1180	UErrorCode *pErrorCode) {
1181	UConverter *cnv;
1182	const uint8_t source, sourceLimit;
1183	UChar *target;
1184	const UChar *targetLimit;
1185
1186	int32_t prev, count, diff, c;
1187
1188	int8_t byteIndex;
1189	uint8_t *bytes;
1190
1191	/* set up the local pointers */
1192	cnv=pArgs->converter;
1193	source=(const uint8_t *)pArgs->source;
1194	sourceLimit=(const uint8_t *)pArgs->sourceLimit;
1195	target=pArgs->target;
1196	targetLimit=pArgs->targetLimit;
1197
1198	/* get the converter state from UConverter */
1199	prev=(int32_t)cnv->toUnicodeStatus;
1200	if(prev==0) {
1201	prev=BOCU1_ASCII_PREV0x40;
1202	}
1203	diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
1204	count=diff&3;
1205	diff>>=2;
1206
1207	byteIndex=cnv->toULength;
1208	bytes=cnv->toUBytes;
1209
1210	/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
1211	if(count>0 && byteIndex>0 && target<targetLimit) {
1212	goto getTrail;
1213	}
1214
1215	fastSingle:
1216	/* fast loop for single-byte differences */
1217	/* use count as the only loop counter variable */
1218	diff=(int32_t)(sourceLimit-source);
1219	count=(int32_t)(pArgs->targetLimit-target);
1220	if(count>diff) {
1221	count=diff;
1222	}
1223	while(count>0) {
1224	if(BOCU1_START_NEG_2(0x90 +(-64))<=(c=*source) && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
1225	c=prev+(c-BOCU1_MIDDLE0x90);
1226	if(c<0x3000) {
1227	*target++=(UChar)c;
1228	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
1229	} else {
1230	break;
1231	}
1232	} else if(c<=0x20) {
1233	if(c!=0x20) {
1234	prev=BOCU1_ASCII_PREV0x40;
1235	}
1236	*target++=(UChar)c;
1237	} else {
1238	break;
1239	}
1240	++source;
1241	--count;
1242	}
1243
1244	/* decode a sequence of single and lead bytes */
1245	while(source<sourceLimit) {
1246	if(target>=targetLimit) {
1247	/* target is full */
1248	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1249	break;
1250	}
1251
1252	c=*source++;
1253	if(BOCU1_START_NEG_2(0x90 +(-64))<=c && c<BOCU1_START_POS_2(0x90 +(64 -1)+1)) {
1254	/* Write a code point directly from a single-byte difference. */
1255	c=prev+(c-BOCU1_MIDDLE0x90);
1256	if(c<0x3000) {
1257	*target++=(UChar)c;
1258	prev=BOCU1_SIMPLE_PREV(c)(((c)&~0x7f)+0x40);
1259	goto fastSingle;
1260	}
1261	} else if(c<=0x20) {
1262	/*
1263	* Direct-encoded C0 control code or space.
1264	* Reset prev for C0 control codes but not for space.
1265	*/
1266	if(c!=0x20) {
1267	prev=BOCU1_ASCII_PREV0x40;
1268	}
1269	*target++=(UChar)c;
1270	continue;
1271	} else if(BOCU1_START_NEG_3((0x90 +(-64))-43)<=c && c<BOCU1_START_POS_3((0x90 +(64 -1)+1)+43) && source<sourceLimit) {
1272	/* Optimize two-byte case. */
1273	if(c>=BOCU1_MIDDLE0x90) {
1274	diff=((int32_t)c-BOCU1_START_POS_2(0x90 +(64 -1)+1))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_POS_1(64 -1)+1;
1275	} else {
1276	diff=((int32_t)c-BOCU1_START_NEG_2(0x90 +(-64)))*BOCU1_TRAIL_COUNT((0xff -0x21 +1)+20)+BOCU1_REACH_NEG_1(-64);
1277	}
1278
1279	/* trail byte */
1280	c=decodeBocu1TrailByte(1, *source++);
1281	if(c<0 \|\| (uint32_t)(c=prev+diff+c)>0x10ffff) {
1282	bytes[0]=source[-2];
1283	bytes[1]=source[-1];
1284	byteIndex=2;
1285	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1286	break;
1287	}
1288	} else if(c==BOCU1_RESET0xff) {
1289	/* only reset the state, no code point */
1290	prev=BOCU1_ASCII_PREV0x40;
1291	continue;
1292	} else {
1293	/*
1294	* For multi-byte difference lead bytes, set the decoder state
1295	* with the partial difference value from the lead byte and
1296	* with the number of trail bytes.
1297	*/
1298	bytes[0]=(uint8_t)c;
1299	byteIndex=1;
1300
1301	diff=decodeBocu1LeadByte(c);
1302	count=diff&3;
1303	diff>>=2;
1304	getTrail:
1305	for(;;) {
1306	if(source>=sourceLimit) {
1307	goto endloop;
1308	}
1309	c=bytes[byteIndex++]=*source++;
1310
1311	/* trail byte in any position */
1312	c=decodeBocu1TrailByte(count, c);
1313	if(c<0) {
1314	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1315	goto endloop;
1316	}
1317
1318	diff+=c;
1319	if(--count==0) {
1320	/* final trail byte, deliver a code point */
1321	byteIndex=0;
1322	c=prev+diff;
1323	if((uint32_t)c>0x10ffff) {
1324	*pErrorCode=U_ILLEGAL_CHAR_FOUND;
1325	goto endloop;
1326	}
1327	break;
1328	}
1329	}
1330	}
1331
1332	/* calculate the next prev and output c */
1333	prev=BOCU1_PREV(c)((c)<0x3040 \|\| (c)>0xd7a3 ? (((c)&~0x7f)+0x40) : bocu1Prev (c));
1334	if(c<=0xffff) {
1335	*target++=(UChar)c;
1336	} else {
1337	/* output surrogate pair */
1338	*target++=U16_LEAD(c)(UChar)(((c)>>10)+0xd7c0);
1339	if(target<targetLimit) {
1340	*target++=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
1341	} else {
1342	/* target overflow */
1343	cnv->UCharErrorBuffer[0]=U16_TRAIL(c)(UChar)(((c)&0x3ff)\|0xdc00);
1344	cnv->UCharErrorBufferLength=1;
1345	*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1346	break;
1347	}
1348	}
1349	}
1350	endloop:
1351
1352	if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
1353	/* set the converter state in UConverter to deal with the next character */
1354	cnv->toUnicodeStatus=BOCU1_ASCII_PREV0x40;
1355	cnv->mode=0;
1356	} else {
1357	/* set the converter state back into UConverter */
1358	cnv->toUnicodeStatus=(uint32_t)prev;
1359	cnv->mode=(diff<<2)\|count;
1360	}
1361	cnv->toULength=byteIndex;
1362
1363	/* write back the updated pointers */
1364	pArgs->source=(const char *)source;
1365	pArgs->target=target;
1366	return;
1367	}
1368
1369	/* miscellaneous ------------------------------------------------------------ */
1370
1371	static const UConverterImpl _Bocu1Impl={
1372	UCNV_BOCU1,
1373
1374	NULL__null,
1375	NULL__null,
1376
1377	NULL__null,
1378	NULL__null,
1379	NULL__null,
1380
1381	_Bocu1ToUnicode,
1382	_Bocu1ToUnicodeWithOffsets,
1383	_Bocu1FromUnicode,
1384	_Bocu1FromUnicodeWithOffsets,
1385	NULL__null,
1386
1387	NULL__null,
1388	NULL__null,
1389	NULL__null,
1390	NULL__null,
1391	ucnv_getCompleteUnicodeSetucnv_getCompleteUnicodeSet_71,
1392
1393	NULL__null,
1394	NULL__null
1395	};
1396
1397	static const UConverterStaticData _Bocu1StaticData={
1398	sizeof(UConverterStaticData),
1399	"BOCU-1",
1400	1214, /* CCSID for BOCU-1 */
1401	UCNV_IBM, UCNV_BOCU1,
1402	1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
1403	{ 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
1404	FALSE0, FALSE0,
1405	0,
1406	0,
1407	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1408	};
1409
1410	const UConverterSharedData _Bocu1Data_Bocu1Data_71=
1411	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, & _Bocu1StaticData, false, false, &_Bocu1Impl, 0, { 0, 0, 0 , 0, __null, __null, __null, __null, __null, __null, { 0 }, __null , __null, 0, 0, 0, false, 0, 0, __null, __null, __null, __null } };
1412
1413	#endif