../deps/icu-small/source/common/ucnv2022.cpp

Bug Summary

File:	out/../deps/icu-small/source/common/ucnv2022.cpp
Warning:	line 2387, column 5 Value stored to 'oldIsTargetByteDBCS' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ucnv2022.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ucnv2022.cpp

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2000-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* file name: ucnv2022.cpp
9	* encoding: UTF-8
10	* tab size: 8 (not used)
11	* indentation:4
12	*
13	* created on: 2000feb03
14	* created by: Markus W. Scherer
15	*
16	* Change history:
17	*
18	* 06/29/2000 helena Major rewrite of the callback APIs.
19	* 08/08/2000 Ram Included support for ISO-2022-JP-2
20	* Changed implementation of toUnicode
21	* function
22	* 08/21/2000 Ram Added support for ISO-2022-KR
23	* 08/29/2000 Ram Seperated implementation of EBCDIC to
24	* ucnvebdc.c
25	* 09/20/2000 Ram Added support for ISO-2022-CN
26	* Added implementations for getNextUChar()
27	* for specific 2022 country variants.
28	* 10/31/2000 Ram Implemented offsets logic functions
29	*/
30
31	#include "unicode/utypes.h"
32
33	#if !UCONFIG_NO_CONVERSION0 && !UCONFIG_NO_LEGACY_CONVERSION0
34
35	#include "unicode/ucnv.h"
36	#include "unicode/uset.h"
37	#include "unicode/ucnv_err.h"
38	#include "unicode/ucnv_cb.h"
39	#include "unicode/utf16.h"
40	#include "ucnv_imp.h"
41	#include "ucnv_bld.h"
42	#include "ucnv_cnv.h"
43	#include "ucnvmbcs.h"
44	#include "cstring.h"
45	#include "cmemory.h"
46	#include "uassert.h"
47
48	#ifdef U_ENABLE_GENERIC_ISO_2022
49	/*
50	* I am disabling the generic ISO-2022 converter after proposing to do so on
51	* the icu mailing list two days ago.
52	*
53	* Reasons:
54	* 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
55	* its designation sequences, single shifts with return to the previous state,
56	* switch-with-no-return to UTF-16BE or similar, etc.
57	* This is unlike the language-specific variants like ISO-2022-JP which
58	* require a much smaller repertoire of ISO-2022 features.
59	* These variants continue to be supported.
60	* 2. I believe that no one is really using the generic ISO-2022 converter
61	* but rather always one of the language-specific variants.
62	* Note that ICU's generic ISO-2022 converter has always output one escape
63	* sequence followed by UTF-8 for the whole stream.
64	* 3. Switching between subcharsets is extremely slow, because each time
65	* the previous converter is closed and a new one opened,
66	* without any kind of caching, least-recently-used list, etc.
67	* 4. The code is currently buggy, and given the above it does not seem
68	* reasonable to spend the time on maintenance.
69	* 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
70	* This means, for example, that when ISO-8859-7 is designated, the following
71	* ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
72	* The ICU ISO-2022 converter does not handle this - and has no information
73	* about which subconverter would have to be shifted vs. which is designed
74	* for 7-bit ISO-2022.
75	*
76	* Markus Scherer 2003-dec-03
77	*/
78	#endif
79
80	#if !UCONFIG_ONLY_HTML_CONVERSION0
81	static const char SHIFT_IN_STR[] = "\x0F";
82	// static const char SHIFT_OUT_STR[] = "\x0E";
83	#endif
84
85	#define CR0x0D 0x0D
86	#define LF0x0A 0x0A
87	#define H_TAB0x09 0x09
88	#define V_TAB0x0B 0x0B
89	#define SPACE0x20 0x20
90
91	enum {
92	HWKANA_START=0xff61,
93	HWKANA_END=0xff9f
94	};
95
96	/*
97	* 94-character sets with native byte values A1..FE are encoded in ISO 2022
98	* as bytes 21..7E. (Subtract 0x80.)
99	* 96-character sets with native byte values A0..FF are encoded in ISO 2022
100	* as bytes 20..7F. (Subtract 0x80.)
101	* Do not encode C1 control codes with native bytes 80..9F
102	* as bytes 00..1F (C0 control codes).
103	*/
104	enum {
105	GR94_START=0xa1,
106	GR94_END=0xfe,
107	GR96_START=0xa0,
108	GR96_END=0xff
109	};
110
111	/*
112	* ISO 2022 control codes must not be converted from Unicode
113	* because they would mess up the byte stream.
114	* The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
115	* corresponding to SO, SI, and ESC.
116	*/
117	#define IS_2022_CONTROL(c)(((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000 )!=0) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
118
119	/* for ISO-2022-JP and -CN implementations */
120	typedef enum {
121	/* shared values */
122	INVALID_STATE=-1,
123	ASCII = 0,
124
125	SS2_STATE=0x10,
126	SS3_STATE,
127
128	/* JP */
129	ISO8859_1 = 1 ,
130	ISO8859_7 = 2 ,
131	JISX201 = 3,
132	JISX208 = 4,
133	JISX212 = 5,
134	GB2312 =6,
135	KSC5601 =7,
136	HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */
137
138	/* CN */
139	/* the first few enum constants must keep their values because they correspond to myConverterArray[] */
140	GB2312_1=1,
141	ISO_IR_165=2,
142	CNS_11643=3,
143
144	/*
145	* these are used in StateEnum and ISO2022State variables,
146	* but CNS_11643 must be used to index into myConverterArray[]
147	*/
148	CNS_11643_0=0x20,
149	CNS_11643_1,
150	CNS_11643_2,
151	CNS_11643_3,
152	CNS_11643_4,
153	CNS_11643_5,
154	CNS_11643_6,
155	CNS_11643_7
156	} StateEnum;
157
158	/* is the StateEnum charset value for a DBCS charset? */
159	#if UCONFIG_ONLY_HTML_CONVERSION0
160	#define IS_JP_DBCS(cs)(JISX208<=(cs) && (cs)<=KSC5601) (JISX208==(cs))
161	#else
162	#define IS_JP_DBCS(cs)(JISX208<=(cs) && (cs)<=KSC5601) (JISX208<=(cs) && (cs)<=KSC5601)
163	#endif
164
165	#define CSM(cs)((uint16_t)1<<(cs)) ((uint16_t)1<<(cs))
166
167	/*
168	* Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
169	* to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
170	*
171	* Note: The converter uses some leniency:
172	* - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
173	* all versions, not just JIS7 and JIS8.
174	* - ICU does not distinguish between different versions of JIS X 0208.
175	*/
176	#if UCONFIG_ONLY_HTML_CONVERSION0
177	enum { MAX_JA_VERSION=0 };
178	#else
179	enum { MAX_JA_VERSION=4 };
180	#endif
181	static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
182	CSM(ASCII)((uint16_t)1<<(ASCII))\|CSM(JISX201)((uint16_t)1<<(JISX201))\|CSM(JISX208)((uint16_t)1<<(JISX208))\|CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT)),
183	#if !UCONFIG_ONLY_HTML_CONVERSION0
184	CSM(ASCII)((uint16_t)1<<(ASCII))\|CSM(JISX201)((uint16_t)1<<(JISX201))\|CSM(JISX208)((uint16_t)1<<(JISX208))\|CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT))\|CSM(JISX212)((uint16_t)1<<(JISX212)),
185	CSM(ASCII)((uint16_t)1<<(ASCII))\|CSM(JISX201)((uint16_t)1<<(JISX201))\|CSM(JISX208)((uint16_t)1<<(JISX208))\|CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT))\|CSM(JISX212)((uint16_t)1<<(JISX212))\|CSM(GB2312)((uint16_t)1<<(GB2312))\|CSM(KSC5601)((uint16_t)1<<(KSC5601))\|CSM(ISO8859_1)((uint16_t)1<<(ISO8859_1))\|CSM(ISO8859_7)((uint16_t)1<<(ISO8859_7)),
186	CSM(ASCII)((uint16_t)1<<(ASCII))\|CSM(JISX201)((uint16_t)1<<(JISX201))\|CSM(JISX208)((uint16_t)1<<(JISX208))\|CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT))\|CSM(JISX212)((uint16_t)1<<(JISX212))\|CSM(GB2312)((uint16_t)1<<(GB2312))\|CSM(KSC5601)((uint16_t)1<<(KSC5601))\|CSM(ISO8859_1)((uint16_t)1<<(ISO8859_1))\|CSM(ISO8859_7)((uint16_t)1<<(ISO8859_7)),
187	CSM(ASCII)((uint16_t)1<<(ASCII))\|CSM(JISX201)((uint16_t)1<<(JISX201))\|CSM(JISX208)((uint16_t)1<<(JISX208))\|CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT))\|CSM(JISX212)((uint16_t)1<<(JISX212))\|CSM(GB2312)((uint16_t)1<<(GB2312))\|CSM(KSC5601)((uint16_t)1<<(KSC5601))\|CSM(ISO8859_1)((uint16_t)1<<(ISO8859_1))\|CSM(ISO8859_7)((uint16_t)1<<(ISO8859_7))
188	#endif
189	};
190
191	typedef enum {
192	ASCII1=0,
193	LATIN1,
194	SBCS,
195	DBCS,
196	MBCS,
197	HWKANA
198	}Cnv2022Type;
199
200	typedef struct ISO2022State {
201	int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
202	int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
203	int8_t prevG; /* g before single shift (SS2 or SS3) */
204	} ISO2022State;
205
206	#define UCNV_OPTIONS_VERSION_MASK0xf 0xf
207	#define UCNV_2022_MAX_CONVERTERS10 10
208
209	typedef struct{
210	UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS10];
211	UConverter *currentConverter;
212	Cnv2022Type currentType;
213	ISO2022State toU2022State, fromU2022State;
214	uint32_t key;
215	uint32_t version;
216	#ifdef U_ENABLE_GENERIC_ISO_2022
217	UBool isFirstBuffer;
218	#endif
219	UBool isEmptySegment;
220	char name[30];
221	char locale[3];
222	}UConverterDataISO2022;
223
224	/* Protos */
225	/* ISO-2022 ----------------------------------------------------------------- */
226
227	/Forward declaration /
228	U_CFUNCextern "C" void U_CALLCONV
229	ucnv_fromUnicode_UTF8ucnv_fromUnicode_UTF8_71(UConverterFromUnicodeArgs * args,
230	UErrorCode * err);
231	U_CFUNCextern "C" void U_CALLCONV
232	ucnv_fromUnicode_UTF8_OFFSETS_LOGICucnv_fromUnicode_UTF8_OFFSETS_LOGIC_71(UConverterFromUnicodeArgs * args,
233	UErrorCode * err);
234
235	#define ESC_20220x1B 0x1B /ESC/
236
237	typedef enum
238	{
239	INVALID_2022 = -1, /Doesn't correspond to a valid iso 2022 escape sequence/
240	VALID_NON_TERMINAL_2022 = 0, /so far corresponds to a valid iso 2022 escape sequence/
241	VALID_TERMINAL_2022 = 1, /corresponds to a valid iso 2022 escape sequence/
242	VALID_MAYBE_TERMINAL_2022 = 2 /so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence/
243	} UCNV_TableStates_2022;
244
245	/*
246	* The way these state transition arrays work is:
247	* ex : ESC$B is the sequence for JISX208
248	* a) First Iteration: char is ESC
249	* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
250	* int x = normalize_esq_chars_2022[27] which is equal to 1
251	* ii) Search for this value in escSeqStateTable_Key_2022[]
252	* value of x is stored at escSeqStateTable_Key_2022[0]
253	* iii) Save this index as offset
254	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
255	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
256	* b) Switch on this state and continue to next char
257	* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
258	* which is normalize_esq_chars_2022[36] == 4
259	* ii) x is currently 1(from above)
260	* x<<=5 -- x is now 32
261	* x+=normalize_esq_chars_2022[36]
262	* now x is 36
263	* iii) Search for this value in escSeqStateTable_Key_2022[]
264	* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
265	* iv) Get state of this sequence from escSeqStateTable_Value_2022[]
266	* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
267	* c) Switch on this state and continue to next char
268	* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
269	* ii) x is currently 36 (from above)
270	* x<<=5 -- x is now 1152
271	* x+=normalize_esq_chars_2022[66]
272	* now x is 1161
273	* iii) Search for this value in escSeqStateTable_Key_2022[]
274	* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
275	* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
276	* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
277	* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
278	*/
279
280
281	/Below are the 3 arrays depicting a state transition table/
282	static const int8_t normalize_esq_chars_2022[256] = {
283	/* 0 1 2 3 4 5 6 7 8 9 */
284
285	0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
286	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
287	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0
288	,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0
289	,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0
290	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
291	,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12
292	,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28
293	,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0
294	,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
295	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
296	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
297	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
298	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
299	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
300	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
301	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
302	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
303	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
304	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
305	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
306	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
307	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
308	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
309	,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0
310	,0 ,0 ,0 ,0 ,0 ,0
311	};
312
313	#ifdef U_ENABLE_GENERIC_ISO_2022
314	/*
315	* When the generic ISO-2022 converter is completely removed, not just disabled
316	* per #ifdef, then the following state table and the associated tables that are
317	* dimensioned with MAX_STATES_2022 should be trimmed.
318	*
319	* Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
320	* the associated escape sequences starting with ESC ( B should be removed.
321	* This includes the ones with key values 1097 and all of the ones above 1000000.
322	*
323	* For the latter, the tables can simply be truncated.
324	* For the former, since the tables must be kept parallel, it is probably best
325	* to simply duplicate an adjacent table cell, parallel in all tables.
326	*
327	* It may make sense to restructure the tables, especially by using small search
328	* tables for the variants instead of indexing them parallel to the table here.
329	*/
330	#endif
331
332	#define MAX_STATES_202274 74
333	static const int32_t escSeqStateTable_Key_2022[MAX_STATES_202274] = {
334	/* 0 1 2 3 4 5 6 7 8 9 */
335
336	1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096
337	,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106
338	,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257
339	,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940
340	,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644
341	,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138
342	,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630
343	,35947631 ,35947635 ,35947636 ,35947638
344	};
345
346	#ifdef U_ENABLE_GENERIC_ISO_2022
347
348	static const char* const escSeqStateTable_Result_2022[MAX_STATES_202274] = {
349	/* 0 1 2 3 4 5 6 7 8 9 */
350
351	NULL__null ,NULL__null ,NULL__null ,NULL__null ,NULL__null ,NULL__null ,NULL__null ,NULL__null ,"latin1" ,"latin1"
352	,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"
353	,"latin1" ,NULL__null ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL__null ,NULL__null ,NULL__null ,NULL__null ,"UTF8"
354	,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL__null ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"
355	,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"
356	,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
357	,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL__null ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"
358	,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"
359	};
360
361	#endif
362
363	static const int8_t escSeqStateTable_Value_2022[MAX_STATES_202274] = {
364	/* 0 1 2 3 4 5 6 7 8 9 */
365	VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
366	,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
367	,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
368	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
369	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
370	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
371	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
372	,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
373	};
374
375	/* Type def for refactoring changeState_2022 code*/
376	typedef enum{
377	#ifdef U_ENABLE_GENERIC_ISO_2022
378	ISO_2022=0,
379	#endif
380	ISO_2022_JP=1,
381	#if !UCONFIG_ONLY_HTML_CONVERSION0
382	ISO_2022_KR=2,
383	ISO_2022_CN=3
384	#endif
385	} Variant2022;
386
387	/********* ISO 2022 Converter Protos *********/
388	static void U_CALLCONV
389	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode);
390
391	static void U_CALLCONV
392	_ISO2022Close(UConverter *converter);
393
394	static void U_CALLCONV
395	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
396
397	U_CDECL_BEGINextern "C" {
398	static const char * U_CALLCONV
399	_ISO2022getName(const UConverter* cnv);
400	U_CDECL_END}
401
402	static void U_CALLCONV
403	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err);
404
405	U_CDECL_BEGINextern "C" {
406	static UConverter * U_CALLCONV
407	_ISO_2022_SafeClone(const UConverter cnv, void stackBuffer, int32_t pBufferSize, UErrorCode status);
408
409	U_CDECL_END}
410
411	#ifdef U_ENABLE_GENERIC_ISO_2022
412	static void U_CALLCONV
413	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
414	#endif
415
416	namespace {
417
418	/const UConverterSharedData _ISO2022Data;/
419	extern const UConverterSharedData _ISO2022JPData;
420
421	#if !UCONFIG_ONLY_HTML_CONVERSION0
422	extern const UConverterSharedData _ISO2022KRData;
423	extern const UConverterSharedData _ISO2022CNData;
424	#endif
425
426	} // namespace
427
428	/************* Converter implementations ****************/
429
430	/* The purpose of this function is to get around gcc compiler warnings. */
431	static inline void
432	fromUWriteUInt8(UConverter *cnv,
433	const char *bytes, int32_t length,
434	uint8_t *target, const char targetLimit,
435	int32_t **offsets,
436	int32_t sourceIndex,
437	UErrorCode *pErrorCode)
438	{
439	char targetChars = (char )*target;
440	ucnv_fromUWriteBytesucnv_fromUWriteBytes_71(cnv, bytes, length, &targetChars, targetLimit,
441	offsets, sourceIndex, pErrorCode);
442	target = (uint8_t)targetChars;
443
444	}
445
446	static inline void
447	setInitialStateToUnicodeKR(UConverter* /converter/, UConverterDataISO2022 *myConverterData){
448	if(myConverterData->version == 1) {
449	UConverter *cnv = myConverterData->currentConverter;
450
451	cnv->toUnicodeStatus=0; /* offset */
452	cnv->mode=0; /* state */
453	cnv->toULength=0; /* byteIndex */
454	}
455	}
456
457	static inline void
458	setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
459	/* in ISO-2022-KR the designator sequence appears only once
460	* in a file so we append it only once
461	*/
462	if( converter->charErrorBufferLength==0){
463
464	converter->charErrorBufferLength = 4;
465	converter->charErrorBuffer[0] = 0x1b;
466	converter->charErrorBuffer[1] = 0x24;
467	converter->charErrorBuffer[2] = 0x29;
468	converter->charErrorBuffer[3] = 0x43;
469	}
470	if(myConverterData->version == 1) {
471	UConverter *cnv = myConverterData->currentConverter;
472
473	cnv->fromUChar32=0;
474	cnv->fromUnicodeStatus=1; /* prevLength */
475	}
476	}
477
478	static void U_CALLCONV
479	_ISO2022Open(UConverter cnv, UConverterLoadArgs pArgs, UErrorCode *errorCode){
480
481	char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'};
482
483	cnv->extraInfo = uprv_mallocuprv_malloc_71 (sizeof (UConverterDataISO2022));
484	if(cnv->extraInfo != NULL__null) {
485	UConverterNamePieces stackPieces;
486	UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER{ (int32_t)sizeof(UConverterLoadArgs), 0, false, false, 0, 0, __null, __null, __null };
487	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
488	uint32_t version;
489
490	stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
491
492	uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022)):: memset(myConverterData, 0, sizeof(UConverterDataISO2022));
493	myConverterData->currentType = ASCII1;
494	cnv->fromUnicodeStatus =FALSE0;
495	if(pArgs->locale){
496	uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1):: strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1);
497	}
498	version = pArgs->options & UCNV_OPTIONS_VERSION_MASK0xf;
499	myConverterData->version = version;
500	if(myLocale[0]=='j' && (myLocale[1]=='a'\|\| myLocale[1]=='p') &&
501	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
502	{
503	/* open the required converters and cache them */
504	if(version>MAX_JA_VERSION) {
505	// ICU 55 fails to open a converter for an unsupported version.
506	// Previously, it fell back to version 0, but that would yield
507	// unexpected behavior.
508	*errorCode = U_MISSING_RESOURCE_ERROR;
509	return;
510	}
511	if(jpCharsetMasks[version]&CSM(ISO8859_7)((uint16_t)1<<(ISO8859_7))) {
512	myConverterData->myConverterArray[ISO8859_7] =
513	ucnv_loadSharedDataucnv_loadSharedData_71("ISO8859_7", &stackPieces, &stackArgs, errorCode);
514	}
515	myConverterData->myConverterArray[JISX208] =
516	ucnv_loadSharedDataucnv_loadSharedData_71("Shift-JIS", &stackPieces, &stackArgs, errorCode);
517	if(jpCharsetMasks[version]&CSM(JISX212)((uint16_t)1<<(JISX212))) {
518	myConverterData->myConverterArray[JISX212] =
519	ucnv_loadSharedDataucnv_loadSharedData_71("jisx-212", &stackPieces, &stackArgs, errorCode);
520	}
521	if(jpCharsetMasks[version]&CSM(GB2312)((uint16_t)1<<(GB2312))) {
522	myConverterData->myConverterArray[GB2312] =
523	ucnv_loadSharedDataucnv_loadSharedData_71("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
524	}
525	if(jpCharsetMasks[version]&CSM(KSC5601)((uint16_t)1<<(KSC5601))) {
526	myConverterData->myConverterArray[KSC5601] =
527	ucnv_loadSharedDataucnv_loadSharedData_71("ksc_5601", &stackPieces, &stackArgs, errorCode);
528	}
529
530	/* set the function pointers to appropriate functions */
531	cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
532	uprv_strcpy(myConverterData->locale,"ja"):: strcpy(myConverterData->locale, "ja");
533
534	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="):: strcpy(myConverterData->name, "ISO_2022,locale=ja,version=" );
535	size_t len = uprv_strlen(myConverterData->name):: strlen(myConverterData->name);
536	myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
537	myConverterData->name[len+1]='\0';
538	}
539	#if !UCONFIG_ONLY_HTML_CONVERSION0
540	else if(myLocale[0]=='k' && (myLocale[1]=='o'\|\| myLocale[1]=='r') &&
541	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
542	{
543	if(version>1) {
544	// ICU 55 fails to open a converter for an unsupported version.
545	// Previously, it fell back to version 0, but that would yield
546	// unexpected behavior.
547	*errorCode = U_MISSING_RESOURCE_ERROR;
548	return;
549	}
550	const char *cnvName;
551	if(version==1) {
552	cnvName="icu-internal-25546";
553	} else {
554	cnvName="ibm-949";
555	myConverterData->version=version=0;
556	}
557	if(pArgs->onlyTestIsLoadable) {
558	ucnv_canCreateConverterucnv_canCreateConverter_71(cnvName, errorCode); /* errorCode carries result */
559	uprv_freeuprv_free_71(cnv->extraInfo);
560	cnv->extraInfo=NULL__null;
561	return;
562	} else {
563	myConverterData->currentConverter=ucnv_openucnv_open_71(cnvName, errorCode);
564	if (U_FAILURE(*errorCode)) {
565	_ISO2022Close(cnv);
566	return;
567	}
568
569	if(version==1) {
570	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1"):: strcpy(myConverterData->name, "ISO_2022,locale=ko,version=1" );
571	uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->subChars , myConverterData->currentConverter->subChars, 4); } while (false);
572	cnv->subCharLen = myConverterData->currentConverter->subCharLen;
573	}else{
574	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0"):: strcpy(myConverterData->name, "ISO_2022,locale=ko,version=0" );
575	}
576
577	/* initialize the state variables */
578	setInitialStateToUnicodeKR(cnv, myConverterData);
579	setInitialStateFromUnicodeKR(cnv, myConverterData);
580
581	/* set the function pointers to appropriate functions */
582	cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
583	uprv_strcpy(myConverterData->locale,"ko"):: strcpy(myConverterData->locale, "ko");
584	}
585	}
586	else if(((myLocale[0]=='z' && myLocale[1]=='h') \|\| (myLocale[0]=='c'&& myLocale[1]=='n'))&&
587	(myLocale[2]=='_' \|\| myLocale[2]=='\0'))
588	{
589	if(version>2) {
590	// ICU 55 fails to open a converter for an unsupported version.
591	// Previously, it fell back to version 0, but that would yield
592	// unexpected behavior.
593	*errorCode = U_MISSING_RESOURCE_ERROR;
594	return;
595	}
596
597	/* open the required converters and cache them */
598	myConverterData->myConverterArray[GB2312_1] =
599	ucnv_loadSharedDataucnv_loadSharedData_71("ibm-5478", &stackPieces, &stackArgs, errorCode);
600	if(version==1) {
601	myConverterData->myConverterArray[ISO_IR_165] =
602	ucnv_loadSharedDataucnv_loadSharedData_71("iso-ir-165", &stackPieces, &stackArgs, errorCode);
603	}
604	myConverterData->myConverterArray[CNS_11643] =
605	ucnv_loadSharedDataucnv_loadSharedData_71("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
606
607
608	/* set the function pointers to appropriate functions */
609	cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
610	uprv_strcpy(myConverterData->locale,"cn"):: strcpy(myConverterData->locale, "cn");
611
612	if (version==0){
613	myConverterData->version = 0;
614	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0"):: strcpy(myConverterData->name, "ISO_2022,locale=zh,version=0" );
615	}else if (version==1){
616	myConverterData->version = 1;
617	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1"):: strcpy(myConverterData->name, "ISO_2022,locale=zh,version=1" );
618	}else {
619	myConverterData->version = 2;
620	(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"):: strcpy(myConverterData->name, "ISO_2022,locale=zh,version=2" );
621	}
622	}
623	#endif // !UCONFIG_ONLY_HTML_CONVERSION
624	else{
625	#ifdef U_ENABLE_GENERIC_ISO_2022
626	myConverterData->isFirstBuffer = TRUE1;
627
628	/* append the UTF-8 escape sequence */
629	cnv->charErrorBufferLength = 3;
630	cnv->charErrorBuffer[0] = 0x1b;
631	cnv->charErrorBuffer[1] = 0x25;
632	cnv->charErrorBuffer[2] = 0x42;
633
634	cnv->sharedData=(UConverterSharedData*)&_ISO2022Data_ISO2022Data_71;
635	/* initialize the state variables */
636	uprv_strcpy(myConverterData->name,"ISO_2022"):: strcpy(myConverterData->name, "ISO_2022");
637	#else
638	*errorCode = U_MISSING_RESOURCE_ERROR;
639	// Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
640	// data loading error code.
641	return;
642	#endif
643	}
644
645	cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
646
647	if(U_FAILURE(*errorCode) \|\| pArgs->onlyTestIsLoadable) {
648	_ISO2022Close(cnv);
649	}
650	} else {
651	*errorCode = U_MEMORY_ALLOCATION_ERROR;
652	}
653	}
654
655
656	static void U_CALLCONV
657	_ISO2022Close(UConverter *converter) {
658	UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
659	UConverterSharedData **array = myData->myConverterArray;
660	int32_t i;
661
662	if (converter->extraInfo != NULL__null) {
663	/close the array of converter pointers and free the memory/
664	for (i=0; i<UCNV_2022_MAX_CONVERTERS10; i++) {
665	if(array[i]!=NULL__null) {
666	ucnv_unloadSharedDataIfReadyucnv_unloadSharedDataIfReady_71(array[i]);
667	}
668	}
669
670	ucnv_closeucnv_close_71(myData->currentConverter);
671
672	if(!converter->isExtraLocal){
673	uprv_freeuprv_free_71 (converter->extraInfo);
674	converter->extraInfo = NULL__null;
675	}
676	}
677	}
678
679	static void U_CALLCONV
680	_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
681	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) (converter->extraInfo);
682	if(choice<=UCNV_RESET_TO_UNICODE) {
683	uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)):: memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State ));
684	myConverterData->key = 0;
685	myConverterData->isEmptySegment = FALSE0;
686	}
687	if(choice!=UCNV_RESET_TO_UNICODE) {
688	uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)):: memset(&myConverterData->fromU2022State, 0, sizeof( ISO2022State));
689	}
690	#ifdef U_ENABLE_GENERIC_ISO_2022
691	if(myConverterData->locale[0] == 0){
692	if(choice<=UCNV_RESET_TO_UNICODE) {
693	myConverterData->isFirstBuffer = TRUE1;
694	myConverterData->key = 0;
695	if (converter->mode == UCNV_SO0x0E){
696	ucnv_closeucnv_close_71 (myConverterData->currentConverter);
697	myConverterData->currentConverter=NULL__null;
698	}
699	converter->mode = UCNV_SI0x0F;
700	}
701	if(choice!=UCNV_RESET_TO_UNICODE) {
702	/* re-append UTF-8 escape sequence */
703	converter->charErrorBufferLength = 3;
704	converter->charErrorBuffer[0] = 0x1b;
705	converter->charErrorBuffer[1] = 0x28;
706	converter->charErrorBuffer[2] = 0x42;
707	}
708	}
709	else
710	#endif
711	{
712	/* reset the state variables */
713	if(myConverterData->locale[0] == 'k'){
714	if(choice<=UCNV_RESET_TO_UNICODE) {
715	setInitialStateToUnicodeKR(converter, myConverterData);
716	}
717	if(choice!=UCNV_RESET_TO_UNICODE) {
718	setInitialStateFromUnicodeKR(converter, myConverterData);
719	}
720	}
721	}
722	}
723
724	U_CDECL_BEGINextern "C" {
725
726	static const char * U_CALLCONV
727	_ISO2022getName(const UConverter* cnv){
728	if(cnv->extraInfo){
729	UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
730	return myData->name;
731	}
732	return NULL__null;
733	}
734
735	U_CDECL_END}
736
737
738	/************* to unicode *****************/
739	/****************************************************************************
740	* Recognized escape sequences are
741	* <ESC>(B ASCII
742	* <ESC>.A ISO-8859-1
743	* <ESC>.F ISO-8859-7
744	* <ESC>(J JISX-201
745	* <ESC>(I JISX-201
746	* <ESC>$B JISX-208
747	* <ESC>$@ JISX-208
748	* <ESC>$(D JISX-212
749	* <ESC>$A GB2312
750	* <ESC>$(C KSC5601
751	*/
752	static const int8_t nextStateToUnicodeJP[MAX_STATES_202274]= {
753	/* 0 1 2 3 4 5 6 7 8 9 */
754	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
755	,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE
756	,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
757	,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE
758	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
759	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
760	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
761	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
762	};
763
764	#if !UCONFIG_ONLY_HTML_CONVERSION0
765	/************* to unicode *****************/
766	static const int8_t nextStateToUnicodeCN[MAX_STATES_202274]= {
767	/* 0 1 2 3 4 5 6 7 8 9 */
768	INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
769	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
770	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
771	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
772	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165
773	,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
774	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
775	,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
776	};
777	#endif
778
779
780	static UCNV_TableStates_2022
781	getKey_2022(char c,int32_t* key,int32_t* offset){
782	int32_t togo;
783	int32_t low = 0;
784	int32_t hi = MAX_STATES_202274;
785	int32_t oldmid=0;
786
787	togo = normalize_esq_chars_2022[(uint8_t)c];
788	if(togo == 0) {
789	/* not a valid character anywhere in an escape sequence */
790	*key = 0;
791	*offset = 0;
792	return INVALID_2022;
793	}
794	togo = (*key << 5) + togo;
795
796	while (hi != low) /binary search/{
797
798	int32_t mid = (hi+low) >> 1; /Finds median/
799
800	if (mid == oldmid)
801	break;
802
803	if (escSeqStateTable_Key_2022[mid] > togo){
804	hi = mid;
805	}
806	else if (escSeqStateTable_Key_2022[mid] < togo){
807	low = mid;
808	}
809	else /we found it/{
810	*key = togo;
811	*offset = mid;
812	return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
813	}
814	oldmid = mid;
815
816	}
817
818	*key = 0;
819	*offset = 0;
820	return INVALID_2022;
821	}
822
823	/*runs through a state machine to determine the escape sequence - codepage correspondence
824	*/
825	static void
826	changeState_2022(UConverter* _this,
827	const char** source,
828	const char* sourceLimit,
829	Variant2022 var,
830	UErrorCode* err){
831	UCNV_TableStates_2022 value;
832	UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
833	uint32_t key = myData2022->key;
834	int32_t offset = 0;
835	int8_t initialToULength = _this->toULength;
836	char c;
837
838	value = VALID_NON_TERMINAL_2022;
839	while (*source < sourceLimit) {
840	c = (source)++;
841	_this->toUBytes[_this->toULength++]=(uint8_t)c;
842	value = getKey_2022(c,(int32_t *) &key, &offset);
843
844	switch (value){
845
846	case VALID_NON_TERMINAL_2022 :
847	/* continue with the loop */
848	break;
849
850	case VALID_TERMINAL_2022:
851	key = 0;
852	goto DONE;
853
854	case INVALID_2022:
855	goto DONE;
856
857	case VALID_MAYBE_TERMINAL_2022:
858	#ifdef U_ENABLE_GENERIC_ISO_2022
859	/* ESC ( B is ambiguous only for ISO_2022 itself */
860	if(var == ISO_2022) {
861	/* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
862	_this->toULength = 0;
863
864	/* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
865
866	/* continue with the loop */
867	value = VALID_NON_TERMINAL_2022;
868	break;
869	} else
870	#endif
871	{
872	/* not ISO_2022 itself, finish here */
873	value = VALID_TERMINAL_2022;
874	key = 0;
875	goto DONE;
876	}
877	}
878	}
879
880	DONE:
881	myData2022->key = key;
882
883	if (value == VALID_NON_TERMINAL_2022) {
884	/* indicate that the escape sequence is incomplete: key!=0 */
885	return;
886	} else if (value == INVALID_2022 ) {
887	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
888	} else /* value == VALID_TERMINAL_2022 */ {
889	switch(var){
890	#ifdef U_ENABLE_GENERIC_ISO_2022
891	case ISO_2022:
892	{
893	const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
894	if(chosenConverterName == NULL__null) {
895	/* SS2 or SS3 */
896	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
897	_this->toUCallbackReason = UCNV_UNASSIGNED;
898	return;
899	}
900
901	_this->mode = UCNV_SI0x0F;
902	ucnv_closeucnv_close_71(myData2022->currentConverter);
903	myData2022->currentConverter = myUConverter = ucnv_openucnv_open_71(chosenConverterName, err);
904	if(U_SUCCESS(*err)) {
905	myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOPUCNV_TO_U_CALLBACK_STOP_71;
906	_this->mode = UCNV_SO0x0E;
907	}
908	break;
909	}
910	#endif
911	case ISO_2022_JP:
912	{
913	StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
914	switch(tempState) {
915	case INVALID_STATE:
916	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
917	break;
918	case SS2_STATE:
919	if(myData2022->toU2022State.cs[2]!=0) {
920	if(myData2022->toU2022State.g<2) {
921	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
922	}
923	myData2022->toU2022State.g=2;
924	} else {
925	/* illegal to have SS2 before a matching designator */
926	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
927	}
928	break;
929	/* case SS3_STATE: not used in ISO-2022-JP-x */
930	case ISO8859_1:
931	case ISO8859_7:
932	if((jpCharsetMasks[myData2022->version] & CSM(tempState)((uint16_t)1<<(tempState))) == 0) {
933	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
934	} else {
935	/* G2 charset for SS2 */
936	myData2022->toU2022State.cs[2]=(int8_t)tempState;
937	}
938	break;
939	default:
940	if((jpCharsetMasks[myData2022->version] & CSM(tempState)((uint16_t)1<<(tempState))) == 0) {
941	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
942	} else {
943	/* G0 charset */
944	myData2022->toU2022State.cs[0]=(int8_t)tempState;
945	}
946	break;
947	}
948	}
949	break;
950	#if !UCONFIG_ONLY_HTML_CONVERSION0
951	case ISO_2022_CN:
952	{
953	StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
954	switch(tempState) {
955	case INVALID_STATE:
956	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
957	break;
958	case SS2_STATE:
959	if(myData2022->toU2022State.cs[2]!=0) {
960	if(myData2022->toU2022State.g<2) {
961	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
962	}
963	myData2022->toU2022State.g=2;
964	} else {
965	/* illegal to have SS2 before a matching designator */
966	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
967	}
968	break;
969	case SS3_STATE:
970	if(myData2022->toU2022State.cs[3]!=0) {
971	if(myData2022->toU2022State.g<2) {
972	myData2022->toU2022State.prevG=myData2022->toU2022State.g;
973	}
974	myData2022->toU2022State.g=3;
975	} else {
976	/* illegal to have SS3 before a matching designator */
977	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
978	}
979	break;
980	case ISO_IR_165:
981	if(myData2022->version==0) {
982	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
983	break;
984	}
985	U_FALLTHROUGH[[clang::fallthrough]];
986	case GB2312_1:
987	U_FALLTHROUGH[[clang::fallthrough]];
988	case CNS_11643_1:
989	myData2022->toU2022State.cs[1]=(int8_t)tempState;
990	break;
991	case CNS_11643_2:
992	myData2022->toU2022State.cs[2]=(int8_t)tempState;
993	break;
994	default:
995	/* other CNS 11643 planes */
996	if(myData2022->version==0) {
997	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
998	} else {
999	myData2022->toU2022State.cs[3]=(int8_t)tempState;
1000	}
1001	break;
1002	}
1003	}
1004	break;
1005	case ISO_2022_KR:
1006	if(offset==0x30){
1007	/* nothing to be done, just accept this one escape sequence */
1008	} else {
1009	*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
1010	}
1011	break;
1012	#endif // !UCONFIG_ONLY_HTML_CONVERSION
1013
1014	default:
1015	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
1016	break;
1017	}
1018	}
1019	if(U_SUCCESS(*err)) {
1020	_this->toULength = 0;
1021	} else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
1022	if(_this->toULength>1) {
1023	/*
1024	* Ticket 5691: consistent illegal sequences:
1025	* - We include at least the first byte (ESC) in the illegal sequence.
1026	* - If any of the non-initial bytes could be the start of a character,
1027	* we stop the illegal sequence before the first one of those.
1028	* In escape sequences, all following bytes are "printable", that is,
1029	* unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
1030	* they are valid single/lead bytes.
1031	* For simplicity, we always only report the initial ESC byte as the
1032	* illegal sequence and back out all other bytes we looked at.
1033	*/
1034	/* Back out some bytes. */
1035	int8_t backOutDistance=_this->toULength-1;
1036	int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
1037	if(backOutDistance<=bytesFromThisBuffer) {
1038	/* same as initialToULength<=1 */
1039	*source-=backOutDistance;
1040	} else {
1041	/* Back out bytes from the previous buffer: Need to replay them. */
1042	_this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
1043	/* same as -(initialToULength-1) */
1044	/* preToULength is negative! */
1045	uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(_this-> preToU, _this->toUBytes+1, -_this->preToULength); } while (false);
1046	*source-=bytesFromThisBuffer;
1047	}
1048	_this->toULength=1;
1049	}
1050	} else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
1051	_this->toUCallbackReason = UCNV_UNASSIGNED;
1052	}
1053	}
1054
1055	#if !UCONFIG_ONLY_HTML_CONVERSION0
1056	/*Checks the characters of the buffer against valid 2022 escape sequences
1057	*if the match we return a pointer to the initial start of the sequence otherwise
1058	*we return sourceLimit
1059	*/
1060	/*for 2022 looks ahead in the stream
1061	*to determine the longest possible convertible
1062	*data stream
1063	*/
1064	static inline const char*
1065	getEndOfBuffer_2022(const char** source,
1066	const char* sourceLimit,
1067	UBool /flush/){
1068
1069	const char* mySource = *source;
1070
1071	#ifdef U_ENABLE_GENERIC_ISO_2022
1072	if (*source >= sourceLimit)
1073	return sourceLimit;
1074
1075	do{
1076
1077	if (*mySource == ESC_20220x1B){
1078	int8_t i;
1079	int32_t key = 0;
1080	int32_t offset;
1081	UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
1082
1083	/* Kludge: I could not
1084	* figure out the reason for validating an escape sequence
1085	* twice - once here and once in changeState_2022().
1086	* is it possible to have an ESC character in a ISO2022
1087	* byte stream which is valid in a code page? Is it legal?
1088	*/
1089	for (i=0;
1090	(mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
1091	i++) {
1092	value = getKey_2022(*(mySource+i), &key, &offset);
1093	}
1094	if (value > 0 \|\| *mySource==ESC_20220x1B)
1095	return mySource;
1096
1097	if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
1098	return sourceLimit;
1099	}
1100	}while (++mySource < sourceLimit);
1101
1102	return sourceLimit;
1103	#else
1104	while(mySource < sourceLimit && *mySource != ESC_20220x1B) {
1105	++mySource;
1106	}
1107	return mySource;
1108	#endif
1109	}
1110	#endif
1111
1112	/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
1113	* any future change in _MBCSFromUChar32() function should be reflected here.
1114	* @return number of bytes in *value; negative number if fallback; 0 if no mapping
1115	*/
1116	static inline int32_t
1117	MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
1118	UChar32 c,
1119	uint32_t* value,
1120	UBool useFallback,
1121	int outputType)
1122	{
1123	const int32_t *cx;
1124	const uint16_t *table;
1125	uint32_t stage2Entry;
1126	uint32_t myValue;
1127	int32_t length;
1128	const uint8_t *p;
1129	/*
1130	* TODO(markus): Use and require new, faster MBCS conversion table structures.
1131	* Use internal version of ucnv_open() that verifies that the new structures are available,
1132	* else U_INTERNAL_PROGRAM_ERROR.
1133	*/
1134	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
1135	if(c<0x10000 \|\| (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
1136	table=sharedData->mbcs.fromUnicodeTable;
1137	stage2Entry=MBCS_STAGE_2_FROM_U(table, c)((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>> 4)&0x3f) ];
1138	/* get the bytes and the length for the output */
1139	if(outputType==MBCS_OUTPUT_2){
1140	myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((uint16_t )(sharedData->mbcs.fromUnicodeBytes))[16(uint32_t )(uint16_t)(stage2Entry)+((c)&0xf)];
1141	if(myValue<=0xff) {
1142	length=1;
1143	} else {
1144	length=2;
1145	}
1146	} else /* outputType==MBCS_OUTPUT_3 */ {
1147	p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c)((sharedData->mbcs.fromUnicodeBytes)+(16(uint32_t)(uint16_t )(stage2Entry)+((c)&0xf))3);
1148	myValue=((uint32_t)*p<<16)\|((uint32_t)p[1]<<8)\|p[2];
1149	if(myValue<=0xff) {
1150	length=1;
1151	} else if(myValue<=0xffff) {
1152	length=2;
1153	} else {
1154	length=3;
1155	}
1156	}
1157	/* is this code point assigned, or do we use fallbacks? */
1158	if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
1159	/* assigned */
1160	*value=myValue;
1161	return length;
1162	} else if(FROM_U_USE_FALLBACK(useFallback, c)((useFallback) \|\| ((uint32_t)((c)-0xe000)<0x1900 \|\| (uint32_t )((c)-0xf0000)<0x20000)) && myValue!=0) {
1163	/*
1164	* We allow a 0 byte output if the "assigned" bit is set for this entry.
1165	* There is no way with this data structure for fallback output
1166	* to be a zero byte.
1167	*/
1168	*value=myValue;
1169	return -length;
1170	}
1171	}
1172
1173	cx=sharedData->mbcs.extIndexes;
1174	if(cx!=NULL__null) {
1175	return ucnv_extSimpleMatchFromUucnv_extSimpleMatchFromU_71(cx, c, value, useFallback);
1176	}
1177
1178	/* unassigned */
1179	return 0;
1180	}
1181
1182	/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
1183	* any future change in _MBCSSingleFromUChar32() function should be reflected here.
1184	* @param retval pointer to output byte
1185	* @return 1 roundtrip byte 0 no mapping -1 fallback byte
1186	*/
1187	static inline int32_t
1188	MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
1189	UChar32 c,
1190	uint32_t* retval,
1191	UBool useFallback)
1192	{
1193	const uint16_t *table;
1194	int32_t value;
1195	/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
1196	if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY1)) {
1197	return 0;
1198	}
1199	/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
1200	table=sharedData->mbcs.fromUnicodeTable;
1201	/* get the byte for the output */
1202	value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t )sharedData->mbcs.fromUnicodeBytes, c)((uint16_t )sharedData->mbcs.fromUnicodeBytes)[ (table)[ ( table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)& 0xf) ];
1203	/* is this code point assigned, or do we use fallbacks? */
1204	*retval=(uint32_t)(value&0xff);
1205	if(value>=0xf00) {
1206	return 1; /* roundtrip */
1207	} else if(useFallback ? value>=0x800 : value>=0xc00) {
1208	return -1; /* fallback taken */
1209	} else {
1210	return 0; /* no mapping */
1211	}
1212	}
1213
1214	/*
1215	* Check that the result is a 2-byte value with each byte in the range A1..FE
1216	* (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
1217	* to move it to the ISO 2022 range 21..7E.
1218	* Return 0 if out of range.
1219	*/
1220	static inline uint32_t
1221	_2022FromGR94DBCS(uint32_t value) {
1222	if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
1223	(uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
1224	) {
1225	return value - 0x8080; /* shift down to 21..7e byte range */
1226	} else {
1227	return 0; /* not valid for ISO 2022 */
1228	}
1229	}
1230
1231	#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
1232	/*
1233	* This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
1234	* 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
1235	* unchanged.
1236	*/
1237	static inline uint32_t
1238	_2022ToGR94DBCS(uint32_t value) {
1239	uint32_t returnValue = value + 0x8080;
1240	if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
1241	(uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
1242	return returnValue;
1243	} else {
1244	return value;
1245	}
1246	}
1247	#endif
1248
1249	#ifdef U_ENABLE_GENERIC_ISO_2022
1250
1251	/**********************************************************************************
1252	* ISO-2022 Converter
1253	*
1254	*
1255	*/
1256
1257	static void U_CALLCONV
1258	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
1259	UErrorCode* err){
1260	const char* mySourceLimit, *realSourceLimit;
1261	const char* sourceStart;
1262	const UChar* myTargetStart;
1263	UConverter* saveThis;
1264	UConverterDataISO2022* myData;
1265	int8_t length;
1266
1267	saveThis = args->converter;
1268	myData=((UConverterDataISO2022*)(saveThis->extraInfo));
1269
1270	realSourceLimit = args->sourceLimit;
1271	while (args->source < realSourceLimit) {
1272	if(myData->key == 0) { /* are we in the middle of an escape sequence? */
1273	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
1274	mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
1275
1276	if(args->source < mySourceLimit) {
1277	if(myData->currentConverter==NULL__null) {
1278	myData->currentConverter = ucnv_openucnv_open_71("ASCII",err);
1279	if(U_FAILURE(*err)){
1280	return;
1281	}
1282
1283	myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOPUCNV_TO_U_CALLBACK_STOP_71;
1284	saveThis->mode = UCNV_SO0x0E;
1285	}
1286
1287	/* convert to before the ESC or until the end of the buffer */
1288	myData->isFirstBuffer=FALSE0;
1289	sourceStart = args->source;
1290	myTargetStart = args->target;
1291	args->converter = myData->currentConverter;
1292	ucnv_toUnicodeucnv_toUnicode_71(args->converter,
1293	&args->target,
1294	args->targetLimit,
1295	&args->source,
1296	mySourceLimit,
1297	args->offsets,
1298	(UBool)(args->flush && mySourceLimit == realSourceLimit),
1299	err);
1300	args->converter = saveThis;
1301
1302	if (*err == U_BUFFER_OVERFLOW_ERROR) {
1303	/* move the overflow buffer */
1304	length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
1305	myData->currentConverter->UCharErrorBufferLength = 0;
1306	if(length > 0) {
1307	uprv_memcpy(saveThis->UCharErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveThis-> UCharErrorBuffer, myData->currentConverter->UCharErrorBuffer , length*2); } while (false)
1308	myData->currentConverter->UCharErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveThis-> UCharErrorBuffer, myData->currentConverter->UCharErrorBuffer , length*2); } while (false)
1309	lengthU_SIZEOF_UCHAR)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveThis-> UCharErrorBuffer, myData->currentConverter->UCharErrorBuffer , length2); } while (false);
1310	}
1311	return;
1312	}
1313
1314	/*
1315	* At least one of:
1316	* -Error while converting
1317	* -Done with entire buffer
1318	* -Need to write offsets or update the current offset
1319	* (leave that up to the code in ucnv.c)
1320	*
1321	* or else we just stopped at an ESC byte and continue with changeState_2022()
1322	*/
1323	if (U_FAILURE(*err) \|\|
1324	(args->source == realSourceLimit) \|\|
1325	(args->offsets != NULL__null && (args->target != myTargetStart \|\| args->source != sourceStart) \|\|
1326	(mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
1327	) {
1328	/* copy partial or error input for truncated detection and error handling */
1329	if(U_FAILURE(*err)) {
1330	length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
1331	if(length > 0) {
1332	uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveThis-> invalidCharBuffer, myData->currentConverter->invalidCharBuffer , length); } while (false);
1333	}
1334	} else {
1335	length = saveThis->toULength = myData->currentConverter->toULength;
1336	if(length > 0) {
1337	uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveThis-> toUBytes, myData->currentConverter->toUBytes, length); } while (false);
1338	if(args->source < mySourceLimit) {
1339	err = U_TRUNCATED_CHAR_FOUND; / truncated input before ESC */
1340	}
1341	}
1342	}
1343	return;
1344	}
1345	}
1346	}
1347
1348	sourceStart = args->source;
1349	changeState_2022(args->converter,
1350	&(args->source),
1351	realSourceLimit,
1352	ISO_2022,
1353	err);
1354	if (U_FAILURE(*err) \|\| (args->source != sourceStart && args->offsets != NULL__null)) {
1355	/* let the ucnv.c code update its current offset */
1356	return;
1357	}
1358	}
1359	}
1360
1361	#endif
1362
1363	/*
1364	* To Unicode Callback helper function
1365	*/
1366	static void
1367	toUnicodeCallback(UConverter *cnv,
1368	const uint32_t sourceChar, const uint32_t targetUniChar,
1369	UErrorCode* err){
1370	if(sourceChar>0xff){
1371	cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
1372	cnv->toUBytes[1] = (uint8_t)sourceChar;
1373	cnv->toULength = 2;
1374	}
1375	else{
1376	cnv->toUBytes[0] =(char) sourceChar;
1377	cnv->toULength = 1;
1378	}
1379
1380	if(targetUniChar == (missingCharMarker0xFFFF-1/0xfffe/)){
1381	*err = U_INVALID_CHAR_FOUND;
1382	}
1383	else{
1384	*err = U_ILLEGAL_CHAR_FOUND;
1385	}
1386	}
1387
1388	/************************************ISO-2022-JP***********************************************/
1389
1390	/************************************ IMPORTANT ************************************************
1391	* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
1392	* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
1393	* The converter iterates over each Unicode codepoint
1394	* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
1395	* processed one char at a time it would make sense to reduce the extra processing a canned converter
1396	* would do as far as possible.
1397	*
1398	* If the implementation of these macros or structure of sharedData struct change in the future, make
1399	* sure that ISO-2022 is also changed.
1400	***************************************************************************************************
1401	*/
1402
1403	/***************************************************************************************************
1404	* Rules for ISO-2022-jp encoding
1405	* (i) Escape sequences must be fully contained within a line they should not
1406	* span new lines or CRs
1407	* (ii) If the last character on a line is represented by two bytes then an ASCII or
1408	* JIS-Roman character escape sequence should follow before the line terminates
1409	* (iii) If the first character on the line is represented by two bytes then a two
1410	* byte character escape sequence should precede it
1411	* (iv) If no escape sequence is encountered then the characters are ASCII
1412	* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
1413	* and invoked with SS2 (ESC N).
1414	* (vi) If there is any G0 designation in text, there must be a switch to
1415	* ASCII or to JIS X 0201-Roman before a space character (but not
1416	* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
1417	* characters such as tab or CRLF.
1418	* (vi) Supported encodings:
1419	* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
1420	*
1421	* source : RFC-1554
1422	*
1423	* JISX201, JISX208,JISX212 : new .cnv data files created
1424	* KSC5601 : alias to ibm-949 mapping table
1425	* GB2312 : alias to ibm-1386 mapping table
1426	* ISO-8859-1 : Algorithmic implemented as LATIN1 case
1427	* ISO-8859-7 : alias to ibm-9409 mapping table
1428	*/
1429
1430	/* preference order of JP charsets */
1431	static const StateEnum jpCharsetPref[]={
1432	ASCII,
1433	JISX201,
1434	ISO8859_1,
1435	JISX208,
1436	ISO8859_7,
1437	JISX212,
1438	GB2312,
1439	KSC5601,
1440	HWKANA_7BIT
1441	};
1442
1443	/*
1444	* The escape sequences must be in order of the enum constants like JISX201 = 3,
1445	* not in order of jpCharsetPref[]!
1446	*/
1447	static const char escSeqChars[][6] ={
1448	"\x1B\x28\x42", /* <ESC>(B ASCII */
1449	"\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */
1450	"\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */
1451	"\x1B\x28\x4A", /* <ESC>(J JISX-201 */
1452	"\x1B\x24\x42", /* <ESC>$B JISX-208 */
1453	"\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */
1454	"\x1B\x24\x41", /* <ESC>$A GB2312 */
1455	"\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */
1456	"\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */
1457
1458	};
1459	static const int8_t escSeqCharsLen[] ={
1460	3, /* length of <ESC>(B ASCII */
1461	3, /* length of <ESC>.A ISO-8859-1 */
1462	3, /* length of <ESC>.F ISO-8859-7 */
1463	3, /* length of <ESC>(J JISX-201 */
1464	3, /* length of <ESC>$B JISX-208 */
1465	4, /* length of <ESC>$(D JISX-212 */
1466	3, /* length of <ESC>$A GB2312 */
1467	4, /* length of <ESC>$(C KSC5601 */
1468	3 /* length of <ESC>(I HWKANA_7BIT */
1469	};
1470
1471	/*
1472	* The iteration over various code pages works this way:
1473	* i) Get the currentState from myConverterData->currentState
1474	* ii) Check if the character is mapped to a valid character in the currentState
1475	* Yes -> a) set the initIterState to currentState
1476	* b) remain in this state until an invalid character is found
1477	* No -> a) go to the next code page and find the character
1478	* iii) Before changing the state increment the current state check if the current state
1479	* is equal to the intitIteration state
1480	* Yes -> A character that cannot be represented in any of the supported encodings
1481	* break and return a U_INVALID_CHARACTER error
1482	* No -> Continue and find the character in next code page
1483	*
1484	*
1485	* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
1486	*/
1487
1488	/* Map 00..7F to Unicode according to JIS X 0201. */
1489	static inline uint32_t
1490	jisx201ToU(uint32_t value) {
1491	if(value < 0x5c) {
1492	return value;
1493	} else if(value == 0x5c) {
1494	return 0xa5;
1495	} else if(value == 0x7e) {
1496	return 0x203e;
1497	} else /* value <= 0x7f */ {
1498	return value;
1499	}
1500	}
1501
1502	/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
1503	static inline uint32_t
1504	jisx201FromU(uint32_t value) {
1505	if(value<=0x7f) {
1506	if(value!=0x5c && value!=0x7e) {
1507	return value;
1508	}
1509	} else if(value==0xa5) {
1510	return 0x5c;
1511	} else if(value==0x203e) {
1512	return 0x7e;
1513	}
1514	return 0xfffe;
1515	}
1516
1517	/*
1518	* Take a valid Shift-JIS byte pair, check that it is in the range corresponding
1519	* to JIS X 0208, and convert it to a pair of 21..7E bytes.
1520	* Return 0 if the byte pair is out of range.
1521	*/
1522	static inline uint32_t
1523	_2022FromSJIS(uint32_t value) {
1524	uint8_t trail;
1525
1526	if(value > 0xEFFC) {
1527	return 0; /* beyond JIS X 0208 */
1528	}
1529
1530	trail = (uint8_t)value;
1531
1532	value &= 0xff00; /* lead byte */
1533	if(value <= 0x9f00) {
1534	value -= 0x7000;
1535	} else /* 0xe000 <= value <= 0xef00 */ {
1536	value -= 0xb000;
1537	}
1538	value <<= 1;
1539
1540	if(trail <= 0x9e) {
1541	value -= 0x100;
1542	if(trail <= 0x7e) {
1543	value \|= trail - 0x1f;
1544	} else {
1545	value \|= trail - 0x20;
1546	}
1547	} else /* trail <= 0xfc */ {
1548	value \|= trail - 0x7e;
1549	}
1550	return value;
1551	}
1552
1553	/*
1554	* Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
1555	* If either byte is outside 21..7E make sure that the result is not valid
1556	* for Shift-JIS so that the converter catches it.
1557	* Some invalid byte values already turn into equally invalid Shift-JIS
1558	* byte values and need not be tested explicitly.
1559	*/
1560	static inline void
1561	_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
1562	if(c1&1) {
1563	++c1;
1564	if(c2 <= 0x5f) {
1565	c2 += 0x1f;
1566	} else if(c2 <= 0x7e) {
1567	c2 += 0x20;
1568	} else {
1569	c2 = 0; /* invalid */
1570	}
1571	} else {
1572	if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
1573	c2 += 0x7e;
1574	} else {
1575	c2 = 0; /* invalid */
1576	}
1577	}
1578	c1 >>= 1;
1579	if(c1 <= 0x2f) {
1580	c1 += 0x70;
1581	} else if(c1 <= 0x3f) {
1582	c1 += 0xb0;
1583	} else {
1584	c1 = 0; /* invalid */
1585	}
1586	bytes[0] = (char)c1;
1587	bytes[1] = (char)c2;
1588	}
1589
1590	/*
1591	* JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
1592	* Katakana.
1593	* Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
1594	* because Shift-JIS roundtrips half-width Katakana to single bytes.
1595	* These were the only fallbacks in ICU's jisx-208.ucm file.
1596	*/
1597	static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
1598	0x2123, /* U+FF61 */
1599	0x2156,
1600	0x2157,
1601	0x2122,
1602	0x2126,
1603	0x2572,
1604	0x2521,
1605	0x2523,
1606	0x2525,
1607	0x2527,
1608	0x2529,
1609	0x2563,
1610	0x2565,
1611	0x2567,
1612	0x2543,
1613	0x213C, /* U+FF70 */
1614	0x2522,
1615	0x2524,
1616	0x2526,
1617	0x2528,
1618	0x252A,
1619	0x252B,
1620	0x252D,
1621	0x252F,
1622	0x2531,
1623	0x2533,
1624	0x2535,
1625	0x2537,
1626	0x2539,
1627	0x253B,
1628	0x253D,
1629	0x253F, /* U+FF80 */
1630	0x2541,
1631	0x2544,
1632	0x2546,
1633	0x2548,
1634	0x254A,
1635	0x254B,
1636	0x254C,
1637	0x254D,
1638	0x254E,
1639	0x254F,
1640	0x2552,
1641	0x2555,
1642	0x2558,
1643	0x255B,
1644	0x255E,
1645	0x255F, /* U+FF90 */
1646	0x2560,
1647	0x2561,
1648	0x2562,
1649	0x2564,
1650	0x2566,
1651	0x2568,
1652	0x2569,
1653	0x256A,
1654	0x256B,
1655	0x256C,
1656	0x256D,
1657	0x256F,
1658	0x2573,
1659	0x212B,
1660	0x212C /* U+FF9F */
1661	};
1662
1663	static void U_CALLCONV
1664	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
1665	UConverter *cnv = args->converter;
1666	UConverterDataISO2022 *converterData;
1667	ISO2022State *pFromU2022State;
1668	uint8_t target = (uint8_t ) args->target;
1669	const uint8_t targetLimit = (const uint8_t ) args->targetLimit;
1670	const UChar* source = args->source;
1671	const UChar* sourceLimit = args->sourceLimit;
1672	int32_t* offsets = args->offsets;
1673	UChar32 sourceChar;
1674	char buffer[8];
1675	int32_t len, outLen;
1676	int8_t choices[10];
1677	int32_t choiceCount;
1678	uint32_t targetValue = 0;
1679	UBool useFallback;
1680
1681	int32_t i;
1682	int8_t cs, g;
1683
1684	/* set up the state */
1685	converterData = (UConverterDataISO2022*)cnv->extraInfo;
1686	pFromU2022State = &converterData->fromU2022State;
1687
1688	choiceCount = 0;
1689
1690	/* check if the last codepoint of previous buffer was a lead surrogate*/
1691	if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
1692	goto getTrail;
1693	}
1694
1695	while(source < sourceLimit) {
1696	if(target < targetLimit) {
1697
1698	sourceChar = *(source++);
1699	/check if the char is a First surrogate/
1700	if(U16_IS_SURROGATE(sourceChar)(((sourceChar)&0xfffff800)==0xd800)) {
1701	if(U16_IS_SURROGATE_LEAD(sourceChar)(((sourceChar)&0x400)==0)) {
1702	getTrail:
1703	/look ahead to find the trail surrogate/
1704	if(source < sourceLimit) {
1705	/* test the following code unit */
1706	UChar trail=(UChar) *source;
1707	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
1708	source++;
1709	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail)(((UChar32)(sourceChar)<<10UL)+(UChar32)(trail)-((0xd800 <<10UL)+0xdc00-0x10000));
1710	cnv->fromUChar32=0x00;
1711	/* convert this supplementary code point */
1712	/* exit this condition tree */
1713	} else {
1714	/* this is an unmatched lead code unit (1st surrogate) */
1715	/* callback(illegal) */
1716	*err=U_ILLEGAL_CHAR_FOUND;
1717	cnv->fromUChar32=sourceChar;
1718	break;
1719	}
1720	} else {
1721	/* no more input */
1722	cnv->fromUChar32=sourceChar;
1723	break;
1724	}
1725	} else {
1726	/* this is an unmatched trail code unit (2nd surrogate) */
1727	/* callback(illegal) */
1728	*err=U_ILLEGAL_CHAR_FOUND;
1729	cnv->fromUChar32=sourceChar;
1730	break;
1731	}
1732	}
1733
1734	/* do not convert SO/SI/ESC */
1735	if(IS_2022_CONTROL(sourceChar)(((sourceChar)<0x20) && (((uint32_t)1<<(sourceChar ))&0x0800c000)!=0)) {
1736	/* callback(illegal) */
1737	*err=U_ILLEGAL_CHAR_FOUND;
1738	cnv->fromUChar32=sourceChar;
1739	break;
1740	}
1741
1742	/* do the conversion */
1743
1744	if(choiceCount == 0) {
1745	uint16_t csm;
1746
1747	/*
1748	* The csm variable keeps track of which charsets are allowed
1749	* and not used yet while building the choices[].
1750	*/
1751	csm = jpCharsetMasks[converterData->version];
1752	choiceCount = 0;
1753
1754	/* JIS7/8: try single-byte half-width Katakana before JISX208 */
1755	if(converterData->version == 3 \|\| converterData->version == 4) {
1756	choices[choiceCount++] = (int8_t)HWKANA_7BIT;
1757	}
1758	/* Do not try single-byte half-width Katakana for other versions. */
1759	csm &= ~CSM(HWKANA_7BIT)((uint16_t)1<<(HWKANA_7BIT));
1760
1761	/* try the current G0 charset */
1762	choices[choiceCount++] = cs = pFromU2022State->cs[0];
1763	csm &= ~CSM(cs)((uint16_t)1<<(cs));
1764
1765	/* try the current G2 charset */
1766	if((cs = pFromU2022State->cs[2]) != 0) {
1767	choices[choiceCount++] = cs;
1768	csm &= ~CSM(cs)((uint16_t)1<<(cs));
1769	}
1770
1771	/* try all the other possible charsets */
1772	for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref)(int32_t)(sizeof(jpCharsetPref)/sizeof((jpCharsetPref)[0])); ++i) {
1773	cs = (int8_t)jpCharsetPref[i];
1774	if(CSM(cs)((uint16_t)1<<(cs)) & csm) {
1775	choices[choiceCount++] = cs;
1776	csm &= ~CSM(cs)((uint16_t)1<<(cs));
1777	}
1778	}
1779	}
1780
1781	cs = g = 0;
1782	/*
1783	* len==0: no mapping found yet
1784	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
1785	* len>0: found a roundtrip result, done
1786	*/
1787	len = 0;
1788	/*
1789	* We will turn off useFallback after finding a fallback,
1790	* but we still get fallbacks from PUA code points as usual.
1791	* Therefore, we will also need to check that we don't overwrite
1792	* an early fallback with a later one.
1793	*/
1794	useFallback = cnv->useFallback;
1795
1796	for(i = 0; i < choiceCount && len <= 0; ++i) {
1797	uint32_t value;
1798	int32_t len2;
1799	int8_t cs0 = choices[i];
1800	switch(cs0) {
1801	case ASCII:
1802	if(sourceChar <= 0x7f) {
1803	targetValue = (uint32_t)sourceChar;
1804	len = 1;
1805	cs = cs0;
1806	g = 0;
1807	}
1808	break;
1809	case ISO8859_1:
1810	if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
1811	targetValue = (uint32_t)sourceChar - 0x80;
1812	len = 1;
1813	cs = cs0;
1814	g = 2;
1815	}
1816	break;
1817	case HWKANA_7BIT:
1818	if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
1819	if(converterData->version==3) {
1820	/* JIS7: use G1 (SO) */
1821	/* Shift U+FF61..U+FF9F to bytes 21..5F. */
1822	targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
1823	len = 1;
1824	pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
1825	g = 1;
1826	} else if(converterData->version==4) {
1827	/* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
1828	/* Shift U+FF61..U+FF9F to bytes A1..DF. */
1829	targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
1830	len = 1;
1831
1832	cs = pFromU2022State->cs[0];
1833	if(IS_JP_DBCS(cs)(JISX208<=(cs) && (cs)<=KSC5601)) {
1834	/* switch from a DBCS charset to JISX201 */
1835	cs = (int8_t)JISX201;
1836	}
1837	/* else stay in the current G0 charset */
1838	g = 0;
1839	}
1840	/* else do not use HWKANA_7BIT with other versions */
1841	}
1842	break;
1843	case JISX201:
1844	/* G0 SBCS */
1845	value = jisx201FromU(sourceChar);
1846	if(value <= 0x7f) {
1847	targetValue = value;
1848	len = 1;
1849	cs = cs0;
1850	g = 0;
1851	useFallback = FALSE0;
1852	}
1853	break;
1854	case JISX208:
1855	/* G0 DBCS from Shift-JIS table */
1856	len2 = MBCS_FROM_UCHAR32_ISO2022(
1857	converterData->myConverterArray[cs0],
1858	sourceChar, &value,
1859	useFallback, MBCS_OUTPUT_2);
1860	if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1861	value = _2022FromSJIS(value);
1862	if(value != 0) {
1863	targetValue = value;
1864	len = len2;
1865	cs = cs0;
1866	g = 0;
1867	useFallback = FALSE0;
1868	}
1869	} else if(len == 0 && useFallback &&
1870	(uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
1871	targetValue = hwkana_fb[sourceChar - HWKANA_START];
1872	len = -2;
1873	cs = cs0;
1874	g = 0;
1875	useFallback = FALSE0;
1876	}
1877	break;
1878	case ISO8859_7:
1879	/* G0 SBCS forced to 7-bit output */
1880	len2 = MBCS_SINGLE_FROM_UCHAR32(
1881	converterData->myConverterArray[cs0],
1882	sourceChar, &value,
1883	useFallback);
1884	if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
1885	targetValue = value - 0x80;
1886	len = len2;
1887	cs = cs0;
1888	g = 2;
1889	useFallback = FALSE0;
1890	}
1891	break;
1892	default:
1893	/* G0 DBCS */
1894	len2 = MBCS_FROM_UCHAR32_ISO2022(
1895	converterData->myConverterArray[cs0],
1896	sourceChar, &value,
1897	useFallback, MBCS_OUTPUT_2);
1898	if(len2 == 2 \|\| (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
1899	if(cs0 == KSC5601) {
1900	/*
1901	* Check for valid bytes for the encoding scheme.
1902	* This is necessary because the sub-converter (windows-949)
1903	* has a broader encoding scheme than is valid for 2022.
1904	*/
1905	value = _2022FromGR94DBCS(value);
1906	if(value == 0) {
1907	break;
1908	}
1909	}
1910	targetValue = value;
1911	len = len2;
1912	cs = cs0;
1913	g = 0;
1914	useFallback = FALSE0;
1915	}
1916	break;
1917	}
1918	}
1919
1920	if(len != 0) {
1921	if(len < 0) {
1922	len = -len; /* fallback */
1923	}
1924	outLen = 0; /* count output bytes */
1925
1926	/* write SI if necessary (only for JIS7) */
1927	if(pFromU2022State->g == 1 && g == 0) {
1928	buffer[outLen++] = UCNV_SI0x0F;
1929	pFromU2022State->g = 0;
1930	}
1931
1932	/* write the designation sequence if necessary */
1933	if(cs != pFromU2022State->cs[g]) {
1934	int32_t escLen = escSeqCharsLen[cs];
1935	uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(buffer + outLen , escSeqChars[cs], escLen); } while (false);
1936	outLen += escLen;
1937	pFromU2022State->cs[g] = cs;
1938
1939	/* invalidate the choices[] */
1940	choiceCount = 0;
1941	}
1942
1943	/* write the shift sequence if necessary */
1944	if(g != pFromU2022State->g) {
1945	switch(g) {
1946	/* case 0 handled before writing escapes */
1947	case 1:
1948	buffer[outLen++] = UCNV_SO0x0E;
1949	pFromU2022State->g = 1;
1950	break;
1951	default: /* case 2 */
1952	buffer[outLen++] = 0x1b;
1953	buffer[outLen++] = 0x4e;
1954	break;
1955	/* no case 3: no SS3 in ISO-2022-JP-x */
1956	}
1957	}
1958
1959	/* write the output bytes */
1960	if(len == 1) {
1961	buffer[outLen++] = (char)targetValue;
1962	} else /* len == 2 */ {
1963	buffer[outLen++] = (char)(targetValue >> 8);
1964	buffer[outLen++] = (char)targetValue;
1965	}
1966	} else {
1967	/*
1968	* if we cannot find the character after checking all codepages
1969	* then this is an error
1970	*/
1971	*err = U_INVALID_CHAR_FOUND;
1972	cnv->fromUChar32=sourceChar;
1973	break;
1974	}
1975
1976	if(sourceChar == CR0x0D \|\| sourceChar == LF0x0A) {
1977	/* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
1978	pFromU2022State->cs[2] = 0;
1979	choiceCount = 0;
1980	}
1981
1982	/* output outLen>0 bytes in buffer[] */
1983	if(outLen == 1) {
1984	*target++ = buffer[0];
1985	if(offsets) {
1986	offsets++ = (int32_t)(source - args->source - 1); / -1: known to be ASCII */
1987	}
1988	} else if(outLen == 2 && (target + 2) <= targetLimit) {
1989	*target++ = buffer[0];
1990	*target++ = buffer[1];
1991	if(offsets) {
1992	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)((uint32_t)(sourceChar)<=0xffff ? 1 : 2));
1993	*offsets++ = sourceIndex;
1994	*offsets++ = sourceIndex;
1995	}
1996	} else {
1997	fromUWriteUInt8(
1998	cnv,
1999	buffer, outLen,
2000	&target, (const char *)targetLimit,
2001	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)((uint32_t)(sourceChar)<=0xffff ? 1 : 2)),
2002	err);
2003	if(U_FAILURE(*err)) {
2004	break;
2005	}
2006	}
2007	} /* end if(myTargetIndex<myTargetLength) */
2008	else{
2009	*err =U_BUFFER_OVERFLOW_ERROR;
2010	break;
2011	}
2012
2013	}/* end while(mySourceIndex<mySourceLength) */
2014
2015	/*
2016	* the end of the input stream and detection of truncated input
2017	* are handled by the framework, but for ISO-2022-JP conversion
2018	* we need to be in ASCII mode at the very end
2019	*
2020	* conditions:
2021	* successful
2022	* in SO mode or not in ASCII mode
2023	* end of input and no truncated input
2024	*/
2025	if( U_SUCCESS(*err) &&
2026	(pFromU2022State->g!=0 \|\| pFromU2022State->cs[0]!=ASCII) &&
2027	args->flush && source>=sourceLimit && cnv->fromUChar32==0
2028	) {
2029	int32_t sourceIndex;
2030
2031	outLen = 0;
2032
2033	if(pFromU2022State->g != 0) {
2034	buffer[outLen++] = UCNV_SI0x0F;
2035	pFromU2022State->g = 0;
2036	}
2037
2038	if(pFromU2022State->cs[0] != ASCII) {
2039	int32_t escLen = escSeqCharsLen[ASCII];
2040	uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(buffer + outLen , escSeqChars[ASCII], escLen); } while (false);
2041	outLen += escLen;
2042	pFromU2022State->cs[0] = (int8_t)ASCII;
2043	}
2044
2045	/* get the source index of the last input character */
2046	/*
2047	* TODO this would be simpler and more reliable if we used a pair
2048	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2049	* so that we could simply use the prevSourceIndex here;
2050	* this code gives an incorrect result for the rare case of an unmatched
2051	* trail surrogate that is alone in the last buffer of the text stream
2052	*/
2053	sourceIndex=(int32_t)(source-args->source);
2054	if(sourceIndex>0) {
2055	--sourceIndex;
2056	if( U16_IS_TRAIL(args->source[sourceIndex])(((args->source[sourceIndex])&0xfffffc00)==0xdc00) &&
2057	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1])(((args->source[sourceIndex-1])&0xfffffc00)==0xd800))
2058	) {
2059	--sourceIndex;
2060	}
2061	} else {
2062	sourceIndex=-1;
2063	}
2064
2065	fromUWriteUInt8(
2066	cnv,
2067	buffer, outLen,
2068	&target, (const char *)targetLimit,
2069	&offsets, sourceIndex,
2070	err);
2071	}
2072
2073	/save the state and return /
2074	args->source = source;
2075	args->target = (char*)target;
2076	}
2077
2078	/************* to unicode *****************/
2079
2080	static void U_CALLCONV
2081	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2082	UErrorCode* err){
2083	char tempBuf[2];
2084	const char mySource = (char ) args->source;
2085	UChar *myTarget = args->target;
2086	const char *mySourceLimit = args->sourceLimit;
2087	uint32_t targetUniChar = 0x0000;
2088	uint32_t mySourceChar = 0x0000;
2089	uint32_t tmpSourceChar = 0x0000;
2090	UConverterDataISO2022* myData;
2091	ISO2022State *pToU2022State;
2092	StateEnum cs;
2093
2094	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2095	pToU2022State = &myData->toU2022State;
2096
2097	if(myData->key != 0) {
2098	/* continue with a partial escape sequence */
2099	goto escape;
2100	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2101	/* continue with a partial double-byte character */
2102	mySourceChar = args->converter->toUBytes[0];
2103	args->converter->toULength = 0;
2104	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
2105	targetUniChar = missingCharMarker0xFFFF;
2106	goto getTrailByte;
2107	}
2108
2109	while(mySource < mySourceLimit){
2110
2111	targetUniChar =missingCharMarker0xFFFF;
2112
2113	if(myTarget < args->targetLimit){
2114
2115	mySourceChar= (unsigned char) *mySource++;
2116
2117	switch(mySourceChar) {
2118	case UCNV_SI0x0F:
2119	if(myData->version==3) {
2120	pToU2022State->g=0;
2121	continue;
2122	} else {
2123	/* only JIS7 uses SI/SO, not ISO-2022-JP-x */
2124	myData->isEmptySegment = FALSE0; /* reset this, we have a different error */
2125	break;
2126	}
2127
2128	case UCNV_SO0x0E:
2129	if(myData->version==3) {
2130	/* JIS7: switch to G1 half-width Katakana */
2131	pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
2132	pToU2022State->g=1;
2133	continue;
2134	} else {
2135	/* only JIS7 uses SI/SO, not ISO-2022-JP-x */
2136	myData->isEmptySegment = FALSE0; /* reset this, we have a different error */
2137	break;
2138	}
2139
2140	case ESC_20220x1B:
2141	mySource--;
2142	escape:
2143	{
2144	const char * mySourceBefore = mySource;
2145	int8_t toULengthBefore = args->converter->toULength;
2146
2147	changeState_2022(args->converter,&(mySource),
2148	mySourceLimit, ISO_2022_JP,err);
2149
2150	/* If in ISO-2022-JP only and we successfully completed an escape sequence, but previous segment was empty, create an error */
2151	if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
2152	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
2153	args->converter->toUCallbackReason = UCNV_IRREGULAR;
2154	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
2155	}
2156	}
2157
2158	/* invalid or illegal escape sequence */
2159	if(U_FAILURE(*err)){
2160	args->target = myTarget;
2161	args->source = mySource;
2162	myData->isEmptySegment = FALSE0; /* Reset to avoid future spurious errors */
2163	return;
2164	}
2165	/* If we successfully completed an escape sequence, we begin a new segment, empty so far */
2166	if(myData->key==0) {
2167	myData->isEmptySegment = TRUE1;
2168	}
2169	continue;
2170
2171	/* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
2172
2173	case CR0x0D:
2174	case LF0x0A:
2175	/* automatically reset to single-byte mode */
2176	if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
2177	pToU2022State->cs[0] = (int8_t)ASCII;
2178	}
2179	pToU2022State->cs[2] = 0;
2180	pToU2022State->g = 0;
2181	U_FALLTHROUGH[[clang::fallthrough]];
2182	default:
2183	/* convert one or two bytes */
2184	myData->isEmptySegment = FALSE0;
2185	cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
2186	if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
2187	!IS_JP_DBCS(cs)(JISX208<=(cs) && (cs)<=KSC5601)
2188	) {
2189	/* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
2190	targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
2191
2192	/* return from a single-shift state to the previous one */
2193	if(pToU2022State->g >= 2) {
2194	pToU2022State->g=pToU2022State->prevG;
2195	}
2196	} else switch(cs) {
2197	case ASCII:
2198	if(mySourceChar <= 0x7f) {
2199	targetUniChar = mySourceChar;
2200	}
2201	break;
2202	case ISO8859_1:
2203	if(mySourceChar <= 0x7f) {
2204	targetUniChar = mySourceChar + 0x80;
2205	}
2206	/* return from a single-shift state to the previous one */
2207	pToU2022State->g=pToU2022State->prevG;
2208	break;
2209	case ISO8859_7:
2210	if(mySourceChar <= 0x7f) {
2211	/* convert mySourceChar+0x80 to use a normal 8-bit table */
2212	targetUniChar =
2213	_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP((UChar)(uint16_t)((myData->myConverterArray[cs])->mbcs. stateTable[0][(uint8_t)(mySourceChar + 0x80)])
2214	myData->myConverterArray[cs],(UChar)(uint16_t)((myData->myConverterArray[cs])->mbcs. stateTable[0][(uint8_t)(mySourceChar + 0x80)])
2215	mySourceChar + 0x80)(UChar)(uint16_t)((myData->myConverterArray[cs])->mbcs. stateTable[0][(uint8_t)(mySourceChar + 0x80)]);
2216	}
2217	/* return from a single-shift state to the previous one */
2218	pToU2022State->g=pToU2022State->prevG;
2219	break;
2220	case JISX201:
2221	if(mySourceChar <= 0x7f) {
2222	targetUniChar = jisx201ToU(mySourceChar);
2223	}
2224	break;
2225	case HWKANA_7BIT:
2226	if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
2227	/* 7-bit halfwidth Katakana */
2228	targetUniChar = mySourceChar + (HWKANA_START - 0x21);
2229	}
2230	break;
2231	default:
2232	/* G0 DBCS */
2233	if(mySource < mySourceLimit) {
2234	int leadIsOk, trailIsOk;
2235	uint8_t trailByte;
2236	getTrailByte:
2237	trailByte = (uint8_t)*mySource;
2238	/*
2239	* Ticket 5691: consistent illegal sequences:
2240	* - We include at least the first byte in the illegal sequence.
2241	* - If any of the non-initial bytes could be the start of a character,
2242	* we stop the illegal sequence before the first one of those.
2243	*
2244	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
2245	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
2246	* Otherwise we convert or report the pair of bytes.
2247	*/
2248	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
2249	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
2250	if (leadIsOk && trailIsOk) {
2251	++mySource;
2252	tmpSourceChar = (mySourceChar << 8) \| trailByte;
2253	if(cs == JISX208) {
2254	_2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
2255	mySourceChar = tmpSourceChar;
2256	} else {
2257	/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
2258	mySourceChar = tmpSourceChar;
2259	if (cs == KSC5601) {
2260	tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
2261	}
2262	tempBuf[0] = (char)(tmpSourceChar >> 8);
2263	tempBuf[1] = (char)(tmpSourceChar);
2264	}
2265	targetUniChar = ucnv_MBCSSimpleGetNextUCharucnv_MBCSSimpleGetNextUChar_71(myData->myConverterArray[cs], tempBuf, 2, FALSE0);
2266	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte)(((trailByte)<0x20) && (((uint32_t)1<<(trailByte ))&0x0800c000)!=0))) {
2267	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
2268	++mySource;
2269	/* add another bit so that the code below writes 2 bytes in case of error */
2270	mySourceChar = 0x10000 \| (mySourceChar << 8) \| trailByte;
2271	}
2272	} else {
2273	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2274	args->converter->toULength = 1;
2275	goto endloop;
2276	}
2277	} /* End of inner switch */
2278	break;
2279	} /* End of outer switch */
2280	if(targetUniChar < (missingCharMarker0xFFFF-1/0xfffe/)){
2281	if(args->offsets){
2282	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2283	}
2284	*(myTarget++)=(UChar)targetUniChar;
2285	}
2286	else if(targetUniChar > missingCharMarker0xFFFF){
2287	/* disassemble the surrogate pair and write to output*/
2288	targetUniChar-=0x0010000;
2289	*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
2290	if(args->offsets){
2291	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2292	}
2293	++myTarget;
2294	if(myTarget< args->targetLimit){
2295	*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2296	if(args->offsets){
2297	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2298	}
2299	++myTarget;
2300	}else{
2301	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
2302	(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
2303	}
2304
2305	}
2306	else{
2307	/* Call the callback function*/
2308	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2309	break;
2310	}
2311	}
2312	else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */
2313	*err =U_BUFFER_OVERFLOW_ERROR;
2314	break;
2315	}
2316	}
2317	endloop:
2318	args->target = myTarget;
2319	args->source = mySource;
2320	}
2321
2322
2323	#if !UCONFIG_ONLY_HTML_CONVERSION0
2324	/***************************************************************
2325	* Rules for ISO-2022-KR encoding
2326	* i) The KSC5601 designator sequence should appear only once in a file,
2327	* at the beginning of a line before any KSC5601 characters. This usually
2328	* means that it appears by itself on the first line of the file
2329	* ii) There are only 2 shifting sequences SO to shift into double byte mode
2330	* and SI to shift into single byte mode
2331	*/
2332	static void U_CALLCONV
2333	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
2334
2335	UConverter* saveConv = args->converter;
2336	UConverterDataISO2022 myConverterData=(UConverterDataISO2022)saveConv->extraInfo;
2337	args->converter=myConverterData->currentConverter;
2338
2339	myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
2340	ucnv_MBCSFromUnicodeWithOffsetsucnv_MBCSFromUnicodeWithOffsets_71(args,err);
2341	saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
2342
2343	if(*err == U_BUFFER_OVERFLOW_ERROR) {
2344	if(myConverterData->currentConverter->charErrorBufferLength > 0) {
2345	uprv_memcpy(do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveConv-> charErrorBuffer, myConverterData->currentConverter->charErrorBuffer , myConverterData->currentConverter->charErrorBufferLength ); } while (false)
2346	saveConv->charErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveConv-> charErrorBuffer, myConverterData->currentConverter->charErrorBuffer , myConverterData->currentConverter->charErrorBufferLength ); } while (false)
2347	myConverterData->currentConverter->charErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveConv-> charErrorBuffer, myConverterData->currentConverter->charErrorBuffer , myConverterData->currentConverter->charErrorBufferLength ); } while (false)
2348	myConverterData->currentConverter->charErrorBufferLength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(saveConv-> charErrorBuffer, myConverterData->currentConverter->charErrorBuffer , myConverterData->currentConverter->charErrorBufferLength ); } while (false);
2349	}
2350	saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
2351	myConverterData->currentConverter->charErrorBufferLength = 0;
2352	}
2353	args->converter=saveConv;
2354	}
2355
2356	static void U_CALLCONV
2357	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2358
2359	const UChar *source = args->source;
2360	const UChar *sourceLimit = args->sourceLimit;
2361	unsigned char target = (unsigned char ) args->target;
2362	unsigned char targetLimit = (unsigned char ) args->targetLimit;
2363	int32_t* offsets = args->offsets;
2364	uint32_t targetByteUnit = 0x0000;
2365	UChar32 sourceChar = 0x0000;
2366	UBool isTargetByteDBCS;
2367	UBool oldIsTargetByteDBCS;
2368	UConverterDataISO2022 *converterData;
2369	UConverterSharedData* sharedData;
2370	UBool useFallback;
2371	int32_t length =0;
2372
2373	converterData=(UConverterDataISO2022*)args->converter->extraInfo;
2374	/* if the version is 1 then the user is requesting
2375	* conversion with ibm-25546 pass the arguments to
2376	* MBCS converter and return
2377	*/
2378	if(converterData->version==1){
2379	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2380	return;
2381	}
2382
2383	/* initialize data */
2384	sharedData = converterData->currentConverter->sharedData;
2385	useFallback = args->converter->useFallback;
2386	isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
2387	oldIsTargetByteDBCS = isTargetByteDBCS;
	Value stored to 'oldIsTargetByteDBCS' is never read
2388
2389	isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;
2390	if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
2391	goto getTrail;
2392	}
2393	while(source < sourceLimit){
2394
2395	targetByteUnit = missingCharMarker0xFFFF;
2396
2397	if(target < (unsigned char*) args->targetLimit){
2398	sourceChar = *source++;
2399
2400	/* do not convert SO/SI/ESC */
2401	if(IS_2022_CONTROL(sourceChar)(((sourceChar)<0x20) && (((uint32_t)1<<(sourceChar ))&0x0800c000)!=0)) {
2402	/* callback(illegal) */
2403	*err=U_ILLEGAL_CHAR_FOUND;
2404	args->converter->fromUChar32=sourceChar;
2405	break;
2406	}
2407
2408	length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
2409	if(length < 0) {
2410	length = -length; /* fallback */
2411	}
2412	/* only DBCS or SBCS characters are expected*/
2413	/* DB characters with high bit set to 1 are expected */
2414	if( length > 2 \|\| length==0 \|\|
2415	(length == 1 && targetByteUnit > 0x7f) \|\|
2416	(length == 2 &&
2417	((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) \|\|
2418	(uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
2419	) {
2420	targetByteUnit=missingCharMarker0xFFFF;
2421	}
2422	if (targetByteUnit != missingCharMarker0xFFFF){
2423
2424	oldIsTargetByteDBCS = isTargetByteDBCS;
2425	isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
2426	/* append the shift sequence */
2427	if (oldIsTargetByteDBCS != isTargetByteDBCS ){
2428
2429	if (isTargetByteDBCS)
2430	*target++ = UCNV_SO0x0E;
2431	else
2432	*target++ = UCNV_SI0x0F;
2433	if(offsets)
2434	*(offsets++) = (int32_t)(source - args->source-1);
2435	}
2436	/* write the targetUniChar to target */
2437	if(targetByteUnit <= 0x00FF){
2438	if( target < targetLimit){
2439	*(target++) = (unsigned char) targetByteUnit;
2440	if(offsets){
2441	*(offsets++) = (int32_t)(source - args->source-1);
2442	}
2443
2444	}else{
2445	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
2446	*err = U_BUFFER_OVERFLOW_ERROR;
2447	}
2448	}else{
2449	if(target < targetLimit){
2450	*(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
2451	if(offsets){
2452	*(offsets++) = (int32_t)(source - args->source-1);
2453	}
2454	if(target < targetLimit){
2455	*(target++) =(unsigned char) (targetByteUnit -0x80);
2456	if(offsets){
2457	*(offsets++) = (int32_t)(source - args->source-1);
2458	}
2459	}else{
2460	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
2461	*err = U_BUFFER_OVERFLOW_ERROR;
2462	}
2463	}else{
2464	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
2465	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
2466	*err = U_BUFFER_OVERFLOW_ERROR;
2467	}
2468	}
2469
2470	}
2471	else{
2472	/* oops.. the code point is unassingned
2473	* set the error and reason
2474	*/
2475
2476	/check if the char is a First surrogate/
2477	if(U16_IS_SURROGATE(sourceChar)(((sourceChar)&0xfffff800)==0xd800)) {
2478	if(U16_IS_SURROGATE_LEAD(sourceChar)(((sourceChar)&0x400)==0)) {
2479	getTrail:
2480	/look ahead to find the trail surrogate/
2481	if(source < sourceLimit) {
2482	/* test the following code unit */
2483	UChar trail=(UChar) *source;
2484	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
2485	source++;
2486	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail)(((UChar32)(sourceChar)<<10UL)+(UChar32)(trail)-((0xd800 <<10UL)+0xdc00-0x10000));
2487	*err = U_INVALID_CHAR_FOUND;
2488	/* convert this surrogate code point */
2489	/* exit this condition tree */
2490	} else {
2491	/* this is an unmatched lead code unit (1st surrogate) */
2492	/* callback(illegal) */
2493	*err=U_ILLEGAL_CHAR_FOUND;
2494	}
2495	} else {
2496	/* no more input */
2497	*err = U_ZERO_ERROR;
2498	}
2499	} else {
2500	/* this is an unmatched trail code unit (2nd surrogate) */
2501	/* callback(illegal) */
2502	*err=U_ILLEGAL_CHAR_FOUND;
2503	}
2504	} else {
2505	/* callback(unassigned) for a BMP code point */
2506	*err = U_INVALID_CHAR_FOUND;
2507	}
2508
2509	args->converter->fromUChar32=sourceChar;
2510	break;
2511	}
2512	} /* end if(myTargetIndex<myTargetLength) */
2513	else{
2514	*err =U_BUFFER_OVERFLOW_ERROR;
2515	break;
2516	}
2517
2518	}/* end while(mySourceIndex<mySourceLength) */
2519
2520	/*
2521	* the end of the input stream and detection of truncated input
2522	* are handled by the framework, but for ISO-2022-KR conversion
2523	* we need to be in ASCII mode at the very end
2524	*
2525	* conditions:
2526	* successful
2527	* not in ASCII mode
2528	* end of input and no truncated input
2529	*/
2530	if( U_SUCCESS(*err) &&
2531	isTargetByteDBCS &&
2532	args->flush && source>=sourceLimit && args->converter->fromUChar32==0
2533	) {
2534	int32_t sourceIndex;
2535
2536	/* we are switching to ASCII */
2537	isTargetByteDBCS=FALSE0;
2538
2539	/* get the source index of the last input character */
2540	/*
2541	* TODO this would be simpler and more reliable if we used a pair
2542	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
2543	* so that we could simply use the prevSourceIndex here;
2544	* this code gives an incorrect result for the rare case of an unmatched
2545	* trail surrogate that is alone in the last buffer of the text stream
2546	*/
2547	sourceIndex=(int32_t)(source-args->source);
2548	if(sourceIndex>0) {
2549	--sourceIndex;
2550	if( U16_IS_TRAIL(args->source[sourceIndex])(((args->source[sourceIndex])&0xfffffc00)==0xdc00) &&
2551	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1])(((args->source[sourceIndex-1])&0xfffffc00)==0xd800))
2552	) {
2553	--sourceIndex;
2554	}
2555	} else {
2556	sourceIndex=-1;
2557	}
2558
2559	fromUWriteUInt8(
2560	args->converter,
2561	SHIFT_IN_STR, 1,
2562	&target, (const char *)targetLimit,
2563	&offsets, sourceIndex,
2564	err);
2565	}
2566
2567	/save the state and return /
2568	args->source = source;
2569	args->target = (char*)target;
2570	args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
2571	}
2572
2573	/********************** To Unicode *************************************/
2574
2575	static void U_CALLCONV
2576	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
2577	UErrorCode* err){
2578	char const* sourceStart;
2579	UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2580
2581	UConverterToUnicodeArgs subArgs;
2582	int32_t minArgsSize;
2583
2584	/* set up the subconverter arguments */
2585	if(args->size<sizeof(UConverterToUnicodeArgs)) {
2586	minArgsSize = args->size;
2587	} else {
2588	minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
2589	}
2590
2591	uprv_memcpy(&subArgs, args, minArgsSize)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(&subArgs , args, minArgsSize); } while (false);
2592	subArgs.size = (uint16_t)minArgsSize;
2593	subArgs.converter = myData->currentConverter;
2594
2595	/* remember the original start of the input for offsets */
2596	sourceStart = args->source;
2597
2598	if(myData->key != 0) {
2599	/* continue with a partial escape sequence */
2600	goto escape;
2601	}
2602
2603	while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
2604	/Find the end of the buffer e.g : Next Escape Seq \| end of Buffer/
2605	subArgs.source = args->source;
2606	subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
2607	if(subArgs.source != subArgs.sourceLimit) {
2608	/*
2609	* get the current partial byte sequence
2610	*
2611	* it needs to be moved between the public and the subconverter
2612	* so that the conversion framework, which only sees the public
2613	* converter, can handle truncated and illegal input etc.
2614	*/
2615	if(args->converter->toULength > 0) {
2616	uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(subArgs.converter ->toUBytes, args->converter->toUBytes, args->converter ->toULength); } while (false);
2617	}
2618	subArgs.converter->toULength = args->converter->toULength;
2619
2620	/*
2621	* Convert up to the end of the input, or to before the next escape character.
2622	* Does not handle conversion extensions because the preToU[] state etc.
2623	* is not copied.
2624	*/
2625	ucnv_MBCSToUnicodeWithOffsetsucnv_MBCSToUnicodeWithOffsets_71(&subArgs, err);
2626
2627	if(args->offsets != NULL__null && sourceStart != args->source) {
2628	/* update offsets to base them on the actual start of the input */
2629	int32_t *offsets = args->offsets;
2630	UChar *target = args->target;
2631	int32_t delta = (int32_t)(args->source - sourceStart);
2632	while(target < subArgs.target) {
2633	if(*offsets >= 0) {
2634	*offsets += delta;
2635	}
2636	++offsets;
2637	++target;
2638	}
2639	}
2640	args->source = subArgs.source;
2641	args->target = subArgs.target;
2642	args->offsets = subArgs.offsets;
2643
2644	/* copy input/error/overflow buffers */
2645	if(subArgs.converter->toULength > 0) {
2646	uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(args-> converter->toUBytes, subArgs.converter->toUBytes, subArgs .converter->toULength); } while (false);
2647	}
2648	args->converter->toULength = subArgs.converter->toULength;
2649
2650	if(*err == U_BUFFER_OVERFLOW_ERROR) {
2651	if(subArgs.converter->UCharErrorBufferLength > 0) {
2652	uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(args-> converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer , subArgs.converter->UCharErrorBufferLength); } while (false )
2653	subArgs.converter->UCharErrorBufferLength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(args-> converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer , subArgs.converter->UCharErrorBufferLength); } while (false );
2654	}
2655	args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
2656	subArgs.converter->UCharErrorBufferLength = 0;
2657	}
2658	}
2659
2660	if (U_FAILURE(*err) \|\| (args->source == args->sourceLimit)) {
2661	return;
2662	}
2663
2664	escape:
2665	changeState_2022(args->converter,
2666	&(args->source),
2667	args->sourceLimit,
2668	ISO_2022_KR,
2669	err);
2670	}
2671	}
2672
2673	static void U_CALLCONV
2674	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
2675	UErrorCode* err){
2676	char tempBuf[2];
2677	const char mySource = ( char ) args->source;
2678	UChar *myTarget = args->target;
2679	const char *mySourceLimit = args->sourceLimit;
2680	UChar32 targetUniChar = 0x0000;
2681	UChar mySourceChar = 0x0000;
2682	UConverterDataISO2022* myData;
2683	UConverterSharedData* sharedData ;
2684	UBool useFallback;
2685
2686	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
2687	if(myData->version==1){
2688	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
2689	return;
2690	}
2691
2692	/* initialize state */
2693	sharedData = myData->currentConverter->sharedData;
2694	useFallback = args->converter->useFallback;
2695
2696	if(myData->key != 0) {
2697	/* continue with a partial escape sequence */
2698	goto escape;
2699	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
2700	/* continue with a partial double-byte character */
2701	mySourceChar = args->converter->toUBytes[0];
2702	args->converter->toULength = 0;
2703	goto getTrailByte;
2704	}
2705
2706	while(mySource< mySourceLimit){
2707
2708	if(myTarget < args->targetLimit){
2709
2710	mySourceChar= (unsigned char) *mySource++;
2711
2712	if(mySourceChar==UCNV_SI0x0F){
2713	myData->toU2022State.g = 0;
2714	if (myData->isEmptySegment) {
2715	myData->isEmptySegment = FALSE0; /* we are handling it, reset to avoid future spurious errors */
2716	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
2717	args->converter->toUCallbackReason = UCNV_IRREGULAR;
2718	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2719	args->converter->toULength = 1;
2720	args->target = myTarget;
2721	args->source = mySource;
2722	return;
2723	}
2724	/consume the source /
2725	continue;
2726	}else if(mySourceChar==UCNV_SO0x0E){
2727	myData->toU2022State.g = 1;
2728	myData->isEmptySegment = TRUE1; /* Begin a new segment, empty so far */
2729	/consume the source /
2730	continue;
2731	}else if(mySourceChar==ESC_20220x1B){
2732	mySource--;
2733	escape:
2734	myData->isEmptySegment = FALSE0; /* Any invalid ESC sequences will be detected separately, so just reset this */
2735	changeState_2022(args->converter,&(mySource),
2736	mySourceLimit, ISO_2022_KR, err);
2737	if(U_FAILURE(*err)){
2738	args->target = myTarget;
2739	args->source = mySource;
2740	return;
2741	}
2742	continue;
2743	}
2744
2745	myData->isEmptySegment = FALSE0; /* Any invalid char errors will be detected separately, so just reset this */
2746	if(myData->toU2022State.g == 1) {
2747	if(mySource < mySourceLimit) {
2748	int leadIsOk, trailIsOk;
2749	uint8_t trailByte;
2750	getTrailByte:
2751	targetUniChar = missingCharMarker0xFFFF;
2752	trailByte = (uint8_t)*mySource;
2753	/*
2754	* Ticket 5691: consistent illegal sequences:
2755	* - We include at least the first byte in the illegal sequence.
2756	* - If any of the non-initial bytes could be the start of a character,
2757	* we stop the illegal sequence before the first one of those.
2758	*
2759	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
2760	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
2761	* Otherwise we convert or report the pair of bytes.
2762	*/
2763	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
2764	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
2765	if (leadIsOk && trailIsOk) {
2766	++mySource;
2767	tempBuf[0] = (char)(mySourceChar + 0x80);
2768	tempBuf[1] = (char)(trailByte + 0x80);
2769	targetUniChar = ucnv_MBCSSimpleGetNextUCharucnv_MBCSSimpleGetNextUChar_71(sharedData, tempBuf, 2, useFallback);
2770	mySourceChar = (mySourceChar << 8) \| trailByte;
2771	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte)(((trailByte)<0x20) && (((uint32_t)1<<(trailByte ))&0x0800c000)!=0))) {
2772	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
2773	++mySource;
2774	/* add another bit so that the code below writes 2 bytes in case of error */
2775	mySourceChar = static_cast<UChar>(0x10000 \| (mySourceChar << 8) \| trailByte);
2776	}
2777	} else {
2778	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2779	args->converter->toULength = 1;
2780	break;
2781	}
2782	}
2783	else if(mySourceChar <= 0x7f) {
2784	targetUniChar = ucnv_MBCSSimpleGetNextUCharucnv_MBCSSimpleGetNextUChar_71(sharedData, mySource - 1, 1, useFallback);
2785	} else {
2786	targetUniChar = 0xffff;
2787	}
2788	if(targetUniChar < 0xfffe){
2789	if(args->offsets) {
2790	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
2791	}
2792	*(myTarget++)=(UChar)targetUniChar;
2793	}
2794	else {
2795	/* Call the callback function*/
2796	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
2797	break;
2798	}
2799	}
2800	else{
2801	*err =U_BUFFER_OVERFLOW_ERROR;
2802	break;
2803	}
2804	}
2805	args->target = myTarget;
2806	args->source = mySource;
2807	}
2808
2809	/************************* END ISO2022-KR *******************************/
2810
2811	/************************* ISO-2022-CN *******************************
2812	*
2813	* Rules for ISO-2022-CN Encoding:
2814	* i) The designator sequence must appear once on a line before any instance
2815	* of character set it designates.
2816	* ii) If two lines contain characters from the same character set, both lines
2817	* must include the designator sequence.
2818	* iii) Once the designator sequence is known, a shifting sequence has to be found
2819	* to invoke the shifting
2820	* iv) All lines start in ASCII and end in ASCII.
2821	* v) Four shifting sequences are employed for this purpose:
2822	*
2823	* Sequcence ASCII Eq Charsets
2824	* ---------- ------- ---------
2825	* SI <SI> US-ASCII
2826	* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
2827	* SS2 <ESC>N CNS-11643-1992 Plane 2
2828	* SS3 <ESC>O CNS-11643-1992 Planes 3-7
2829	*
2830	* vi)
2831	* SOdesignator : ESC "$" ")" finalchar_for_SO
2832	* SS2designator : ESC "$" "*" finalchar_for_SS2
2833	* SS3designator : ESC "$" "+" finalchar_for_SS3
2834	*
2835	* ESC $ ) A Indicates the bytes following SO are Chinese
2836	* characters as defined in GB 2312-80, until
2837	* another SOdesignation appears
2838	*
2839	*
2840	* ESC $ ) E Indicates the bytes following SO are as defined
2841	* in ISO-IR-165 (for details, see section 2.1),
2842	* until another SOdesignation appears
2843	*
2844	* ESC $ ) G Indicates the bytes following SO are as defined
2845	* in CNS 11643-plane-1, until another
2846	* SOdesignation appears
2847	*
2848	* ESC $ * H Indicates the two bytes immediately following
2849	* SS2 is a Chinese character as defined in CNS
2850	* 11643-plane-2, until another SS2designation
2851	* appears
2852	* (Meaning <ESC>N must precede every 2 byte
2853	* sequence.)
2854	*
2855	* ESC $ + I Indicates the immediate two bytes following SS3
2856	* is a Chinese character as defined in CNS
2857	* 11643-plane-3, until another SS3designation
2858	* appears
2859	* (Meaning <ESC>O must precede every 2 byte
2860	* sequence.)
2861	*
2862	* ESC $ + J Indicates the immediate two bytes following SS3
2863	* is a Chinese character as defined in CNS
2864	* 11643-plane-4, until another SS3designation
2865	* appears
2866	* (In English: <ESC>O must precede every 2 byte
2867	* sequence.)
2868	*
2869	* ESC $ + K Indicates the immediate two bytes following SS3
2870	* is a Chinese character as defined in CNS
2871	* 11643-plane-5, until another SS3designation
2872	* appears
2873	*
2874	* ESC $ + L Indicates the immediate two bytes following SS3
2875	* is a Chinese character as defined in CNS
2876	* 11643-plane-6, until another SS3designation
2877	* appears
2878	*
2879	* ESC $ + M Indicates the immediate two bytes following SS3
2880	* is a Chinese character as defined in CNS
2881	* 11643-plane-7, until another SS3designation
2882	* appears
2883	*
2884	* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
2885	* has its own designation information before any Chinese characters
2886	* appear
2887	*
2888	*/
2889
2890	/* The following are defined this way to make the strings truly readonly */
2891	static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
2892	static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
2893	static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
2894	static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
2895	static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
2896	static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
2897	static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
2898	static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
2899	static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
2900
2901	/******************** ISO2022-CN Data ************************/
2902	static const char* const escSeqCharsCN[10] ={
2903	SHIFT_IN_STR, /* 0 ASCII */
2904	GB_2312_80_STR, /* 1 GB2312_1 */
2905	ISO_IR_165_STR, /* 2 ISO_IR_165 */
2906	CNS_11643_1992_Plane_1_STR,
2907	CNS_11643_1992_Plane_2_STR,
2908	CNS_11643_1992_Plane_3_STR,
2909	CNS_11643_1992_Plane_4_STR,
2910	CNS_11643_1992_Plane_5_STR,
2911	CNS_11643_1992_Plane_6_STR,
2912	CNS_11643_1992_Plane_7_STR
2913	};
2914
2915	static void U_CALLCONV
2916	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
2917	UConverter *cnv = args->converter;
2918	UConverterDataISO2022 *converterData;
2919	ISO2022State *pFromU2022State;
2920	uint8_t target = (uint8_t ) args->target;
2921	const uint8_t targetLimit = (const uint8_t ) args->targetLimit;
2922	const UChar* source = args->source;
2923	const UChar* sourceLimit = args->sourceLimit;
2924	int32_t* offsets = args->offsets;
2925	UChar32 sourceChar;
2926	char buffer[8];
2927	int32_t len;
2928	int8_t choices[3];
2929	int32_t choiceCount;
2930	uint32_t targetValue = 0;
2931	UBool useFallback;
2932
2933	/* set up the state */
2934	converterData = (UConverterDataISO2022*)cnv->extraInfo;
2935	pFromU2022State = &converterData->fromU2022State;
2936
2937	choiceCount = 0;
2938
2939	/* check if the last codepoint of previous buffer was a lead surrogate*/
2940	if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
2941	goto getTrail;
2942	}
2943
2944	while( source < sourceLimit){
2945	if(target < targetLimit){
2946
2947	sourceChar = *(source++);
2948	/check if the char is a First surrogate/
2949	if(U16_IS_SURROGATE(sourceChar)(((sourceChar)&0xfffff800)==0xd800)) {
2950	if(U16_IS_SURROGATE_LEAD(sourceChar)(((sourceChar)&0x400)==0)) {
2951	getTrail:
2952	/look ahead to find the trail surrogate/
2953	if(source < sourceLimit) {
2954	/* test the following code unit */
2955	UChar trail=(UChar) *source;
2956	if(U16_IS_TRAIL(trail)(((trail)&0xfffffc00)==0xdc00)) {
2957	source++;
2958	sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail)(((UChar32)(sourceChar)<<10UL)+(UChar32)(trail)-((0xd800 <<10UL)+0xdc00-0x10000));
2959	cnv->fromUChar32=0x00;
2960	/* convert this supplementary code point */
2961	/* exit this condition tree */
2962	} else {
2963	/* this is an unmatched lead code unit (1st surrogate) */
2964	/* callback(illegal) */
2965	*err=U_ILLEGAL_CHAR_FOUND;
2966	cnv->fromUChar32=sourceChar;
2967	break;
2968	}
2969	} else {
2970	/* no more input */
2971	cnv->fromUChar32=sourceChar;
2972	break;
2973	}
2974	} else {
2975	/* this is an unmatched trail code unit (2nd surrogate) */
2976	/* callback(illegal) */
2977	*err=U_ILLEGAL_CHAR_FOUND;
2978	cnv->fromUChar32=sourceChar;
2979	break;
2980	}
2981	}
2982
2983	/* do the conversion */
2984	if(sourceChar <= 0x007f ){
2985	/* do not convert SO/SI/ESC */
2986	if(IS_2022_CONTROL(sourceChar)(((sourceChar)<0x20) && (((uint32_t)1<<(sourceChar ))&0x0800c000)!=0)) {
2987	/* callback(illegal) */
2988	*err=U_ILLEGAL_CHAR_FOUND;
2989	cnv->fromUChar32=sourceChar;
2990	break;
2991	}
2992
2993	/* US-ASCII */
2994	if(pFromU2022State->g == 0) {
2995	buffer[0] = (char)sourceChar;
2996	len = 1;
2997	} else {
2998	buffer[0] = UCNV_SI0x0F;
2999	buffer[1] = (char)sourceChar;
3000	len = 2;
3001	pFromU2022State->g = 0;
3002	choiceCount = 0;
3003	}
3004	if(sourceChar == CR0x0D \|\| sourceChar == LF0x0A) {
3005	/* reset the state at the end of a line */
3006	uprv_memset(pFromU2022State, 0, sizeof(ISO2022State)):: memset(pFromU2022State, 0, sizeof(ISO2022State));
3007	choiceCount = 0;
3008	}
3009	}
3010	else{
3011	/* convert U+0080..U+10ffff */
3012	int32_t i;
3013	int8_t cs, g;
3014
3015	if(choiceCount == 0) {
3016	/* try the current SO/G1 converter first */
3017	choices[0] = pFromU2022State->cs[1];
3018
3019	/* default to GB2312_1 if none is designated yet */
3020	if(choices[0] == 0) {
3021	choices[0] = GB2312_1;
3022	}
3023
3024	if(converterData->version == 0) {
3025	/* ISO-2022-CN */
3026
3027	/* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
3028	if(choices[0] == GB2312_1) {
3029	choices[1] = (int8_t)CNS_11643_1;
3030	} else {
3031	choices[1] = (int8_t)GB2312_1;
3032	}
3033
3034	choiceCount = 2;
3035	} else if (converterData->version == 1) {
3036	/* ISO-2022-CN-EXT */
3037
3038	/* try one of the other converters */
3039	switch(choices[0]) {
3040	case GB2312_1:
3041	choices[1] = (int8_t)CNS_11643_1;
3042	choices[2] = (int8_t)ISO_IR_165;
3043	break;
3044	case ISO_IR_165:
3045	choices[1] = (int8_t)GB2312_1;
3046	choices[2] = (int8_t)CNS_11643_1;
3047	break;
3048	default: /* CNS_11643_x */
3049	choices[1] = (int8_t)GB2312_1;
3050	choices[2] = (int8_t)ISO_IR_165;
3051	break;
3052	}
3053
3054	choiceCount = 3;
3055	} else {
3056	choices[0] = (int8_t)CNS_11643_1;
3057	choices[1] = (int8_t)GB2312_1;
3058	}
3059	}
3060
3061	cs = g = 0;
3062	/*
3063	* len==0: no mapping found yet
3064	* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
3065	* len>0: found a roundtrip result, done
3066	*/
3067	len = 0;
3068	/*
3069	* We will turn off useFallback after finding a fallback,
3070	* but we still get fallbacks from PUA code points as usual.
3071	* Therefore, we will also need to check that we don't overwrite
3072	* an early fallback with a later one.
3073	*/
3074	useFallback = cnv->useFallback;
3075
3076	for(i = 0; i < choiceCount && len <= 0; ++i) {
3077	int8_t cs0 = choices[i];
3078	if(cs0 > 0) {
3079	uint32_t value;
3080	int32_t len2;
3081	if(cs0 >= CNS_11643_0) {
3082	len2 = MBCS_FROM_UCHAR32_ISO2022(
3083	converterData->myConverterArray[CNS_11643],
3084	sourceChar,
3085	&value,
3086	useFallback,
3087	MBCS_OUTPUT_3);
3088	if(len2 == 3 \|\| (len2 == -3 && len == 0)) {
3089	targetValue = value;
3090	cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
3091	if(len2 >= 0) {
3092	len = 2;
3093	} else {
3094	len = -2;
3095	useFallback = FALSE0;
3096	}
3097	if(cs == CNS_11643_1) {
3098	g = 1;
3099	} else if(cs == CNS_11643_2) {
3100	g = 2;
3101	} else /* plane 3..7 */ if(converterData->version == 1) {
3102	g = 3;
3103	} else {
3104	/* ISO-2022-CN (without -EXT) does not support plane 3..7 */
3105	len = 0;
3106	}
3107	}
3108	} else {
3109	/* GB2312_1 or ISO-IR-165 */
3110	U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS)(void)0;
3111	len2 = MBCS_FROM_UCHAR32_ISO2022(
3112	converterData->myConverterArray[cs0],
3113	sourceChar,
3114	&value,
3115	useFallback,
3116	MBCS_OUTPUT_2);
3117	if(len2 == 2 \|\| (len2 == -2 && len == 0)) {
3118	targetValue = value;
3119	len = len2;
3120	cs = cs0;
3121	g = 1;
3122	useFallback = FALSE0;
3123	}
3124	}
3125	}
3126	}
3127
3128	if(len != 0) {
3129	len = 0; /* count output bytes; it must have been abs(len) == 2 */
3130
3131	/* write the designation sequence if necessary */
3132	if(cs != pFromU2022State->cs[g]) {
3133	if(cs < CNS_11643) {
3134	uprv_memcpy(buffer, escSeqCharsCN[cs], 4)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(buffer, escSeqCharsCN [cs], 4); } while (false);
3135	} else {
3136	U_ASSERT(cs >= CNS_11643_1)(void)0;
3137	uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(buffer, escSeqCharsCN [CNS_11643 + (cs - CNS_11643_1)], 4); } while (false);
3138	}
3139	len = 4;
3140	pFromU2022State->cs[g] = cs;
3141	if(g == 1) {
3142	/* changing the SO/G1 charset invalidates the choices[] */
3143	choiceCount = 0;
3144	}
3145	}
3146
3147	/* write the shift sequence if necessary */
3148	if(g != pFromU2022State->g) {
3149	switch(g) {
3150	case 1:
3151	buffer[len++] = UCNV_SO0x0E;
3152
3153	/* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
3154	pFromU2022State->g = 1;
3155	break;
3156	case 2:
3157	buffer[len++] = 0x1b;
3158	buffer[len++] = 0x4e;
3159	break;
3160	default: /* case 3 */
3161	buffer[len++] = 0x1b;
3162	buffer[len++] = 0x4f;
3163	break;
3164	}
3165	}
3166
3167	/* write the two output bytes */
3168	buffer[len++] = (char)(targetValue >> 8);
3169	buffer[len++] = (char)targetValue;
3170	} else {
3171	/* if we cannot find the character after checking all codepages
3172	* then this is an error
3173	*/
3174	*err = U_INVALID_CHAR_FOUND;
3175	cnv->fromUChar32=sourceChar;
3176	break;
3177	}
3178	}
3179
3180	/* output len>0 bytes in buffer[] */
3181	if(len == 1) {
3182	*target++ = buffer[0];
3183	if(offsets) {
3184	offsets++ = (int32_t)(source - args->source - 1); / -1: known to be ASCII */
3185	}
3186	} else if(len == 2 && (target + 2) <= targetLimit) {
3187	*target++ = buffer[0];
3188	*target++ = buffer[1];
3189	if(offsets) {
3190	int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)((uint32_t)(sourceChar)<=0xffff ? 1 : 2));
3191	*offsets++ = sourceIndex;
3192	*offsets++ = sourceIndex;
3193	}
3194	} else {
3195	fromUWriteUInt8(
3196	cnv,
3197	buffer, len,
3198	&target, (const char *)targetLimit,
3199	&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)((uint32_t)(sourceChar)<=0xffff ? 1 : 2)),
3200	err);
3201	if(U_FAILURE(*err)) {
3202	break;
3203	}
3204	}
3205	} /* end if(myTargetIndex<myTargetLength) */
3206	else{
3207	*err =U_BUFFER_OVERFLOW_ERROR;
3208	break;
3209	}
3210
3211	}/* end while(mySourceIndex<mySourceLength) */
3212
3213	/*
3214	* the end of the input stream and detection of truncated input
3215	* are handled by the framework, but for ISO-2022-CN conversion
3216	* we need to be in ASCII mode at the very end
3217	*
3218	* conditions:
3219	* successful
3220	* not in ASCII mode
3221	* end of input and no truncated input
3222	*/
3223	if( U_SUCCESS(*err) &&
3224	pFromU2022State->g!=0 &&
3225	args->flush && source>=sourceLimit && cnv->fromUChar32==0
3226	) {
3227	int32_t sourceIndex;
3228
3229	/* we are switching to ASCII */
3230	pFromU2022State->g=0;
3231
3232	/* get the source index of the last input character */
3233	/*
3234	* TODO this would be simpler and more reliable if we used a pair
3235	* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
3236	* so that we could simply use the prevSourceIndex here;
3237	* this code gives an incorrect result for the rare case of an unmatched
3238	* trail surrogate that is alone in the last buffer of the text stream
3239	*/
3240	sourceIndex=(int32_t)(source-args->source);
3241	if(sourceIndex>0) {
3242	--sourceIndex;
3243	if( U16_IS_TRAIL(args->source[sourceIndex])(((args->source[sourceIndex])&0xfffffc00)==0xdc00) &&
3244	(sourceIndex==0 \|\| U16_IS_LEAD(args->source[sourceIndex-1])(((args->source[sourceIndex-1])&0xfffffc00)==0xd800))
3245	) {
3246	--sourceIndex;
3247	}
3248	} else {
3249	sourceIndex=-1;
3250	}
3251
3252	fromUWriteUInt8(
3253	cnv,
3254	SHIFT_IN_STR, 1,
3255	&target, (const char *)targetLimit,
3256	&offsets, sourceIndex,
3257	err);
3258	}
3259
3260	/save the state and return /
3261	args->source = source;
3262	args->target = (char*)target;
3263	}
3264
3265
3266	static void U_CALLCONV
3267	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
3268	UErrorCode* err){
3269	char tempBuf[3];
3270	const char mySource = (char ) args->source;
3271	UChar *myTarget = args->target;
3272	const char *mySourceLimit = args->sourceLimit;
3273	uint32_t targetUniChar = 0x0000;
3274	uint32_t mySourceChar = 0x0000;
3275	UConverterDataISO2022* myData;
3276	ISO2022State *pToU2022State;
3277
3278	myData=(UConverterDataISO2022*)(args->converter->extraInfo);
3279	pToU2022State = &myData->toU2022State;
3280
3281	if(myData->key != 0) {
3282	/* continue with a partial escape sequence */
3283	goto escape;
3284	} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
3285	/* continue with a partial double-byte character */
3286	mySourceChar = args->converter->toUBytes[0];
3287	args->converter->toULength = 0;
3288	targetUniChar = missingCharMarker0xFFFF;
3289	goto getTrailByte;
3290	}
3291
3292	while(mySource < mySourceLimit){
3293
3294	targetUniChar =missingCharMarker0xFFFF;
3295
3296	if(myTarget < args->targetLimit){
3297
3298	mySourceChar= (unsigned char) *mySource++;
3299
3300	switch(mySourceChar){
3301	case UCNV_SI0x0F:
3302	pToU2022State->g=0;
3303	if (myData->isEmptySegment) {
3304	myData->isEmptySegment = FALSE0; /* we are handling it, reset to avoid future spurious errors */
3305	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
3306	args->converter->toUCallbackReason = UCNV_IRREGULAR;
3307	args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
3308	args->converter->toULength = 1;
3309	args->target = myTarget;
3310	args->source = mySource;
3311	return;
3312	}
3313	continue;
3314
3315	case UCNV_SO0x0E:
3316	if(pToU2022State->cs[1] != 0) {
3317	pToU2022State->g=1;
3318	myData->isEmptySegment = TRUE1; /* Begin a new segment, empty so far */
3319	continue;
3320	} else {
3321	/* illegal to have SO before a matching designator */
3322	myData->isEmptySegment = FALSE0; /* Handling a different error, reset this to avoid future spurious errs */
3323	break;
3324	}
3325
3326	case ESC_20220x1B:
3327	mySource--;
3328	escape:
3329	{
3330	const char * mySourceBefore = mySource;
3331	int8_t toULengthBefore = args->converter->toULength;
3332
3333	changeState_2022(args->converter,&(mySource),
3334	mySourceLimit, ISO_2022_CN,err);
3335
3336	/* After SO there must be at least one character before a designator (designator error handled separately) */
3337	if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
3338	*err = U_ILLEGAL_ESCAPE_SEQUENCE;
3339	args->converter->toUCallbackReason = UCNV_IRREGULAR;
3340	args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
3341	}
3342	}
3343
3344	/* invalid or illegal escape sequence */
3345	if(U_FAILURE(*err)){
3346	args->target = myTarget;
3347	args->source = mySource;
3348	myData->isEmptySegment = FALSE0; /* Reset to avoid future spurious errors */
3349	return;
3350	}
3351	continue;
3352
3353	/* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
3354
3355	case CR0x0D:
3356	case LF0x0A:
3357	uprv_memset(pToU2022State, 0, sizeof(ISO2022State)):: memset(pToU2022State, 0, sizeof(ISO2022State));
3358	U_FALLTHROUGH[[clang::fallthrough]];
3359	default:
3360	/* convert one or two bytes */
3361	myData->isEmptySegment = FALSE0;
3362	if(pToU2022State->g != 0) {
3363	if(mySource < mySourceLimit) {
3364	UConverterSharedData *cnv;
3365	StateEnum tempState;
3366	int32_t tempBufLen;
3367	int leadIsOk, trailIsOk;
3368	uint8_t trailByte;
3369	getTrailByte:
3370	trailByte = (uint8_t)*mySource;
3371	/*
3372	* Ticket 5691: consistent illegal sequences:
3373	* - We include at least the first byte in the illegal sequence.
3374	* - If any of the non-initial bytes could be the start of a character,
3375	* we stop the illegal sequence before the first one of those.
3376	*
3377	* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
3378	* an ESC/SO/SI, we report only the first byte as the illegal sequence.
3379	* Otherwise we convert or report the pair of bytes.
3380	*/
3381	leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
3382	trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
3383	if (leadIsOk && trailIsOk) {
3384	++mySource;
3385	tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
3386	if(tempState >= CNS_11643_0) {
3387	cnv = myData->myConverterArray[CNS_11643];
3388	tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
3389	tempBuf[1] = (char) (mySourceChar);
3390	tempBuf[2] = (char) trailByte;
3391	tempBufLen = 3;
3392
3393	}else{
3394	U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS)(void)0;
3395	cnv = myData->myConverterArray[tempState];
3396	tempBuf[0] = (char) (mySourceChar);
3397	tempBuf[1] = (char) trailByte;
3398	tempBufLen = 2;
3399	}
3400	targetUniChar = ucnv_MBCSSimpleGetNextUCharucnv_MBCSSimpleGetNextUChar_71(cnv, tempBuf, tempBufLen, FALSE0);
3401	mySourceChar = (mySourceChar << 8) \| trailByte;
3402	} else if (!(trailIsOk \|\| IS_2022_CONTROL(trailByte)(((trailByte)<0x20) && (((uint32_t)1<<(trailByte ))&0x0800c000)!=0))) {
3403	/* report a pair of illegal bytes if the second byte is not a DBCS starter */
3404	++mySource;
3405	/* add another bit so that the code below writes 2 bytes in case of error */
3406	mySourceChar = 0x10000 \| (mySourceChar << 8) \| trailByte;
3407	}
3408	if(pToU2022State->g>=2) {
3409	/* return from a single-shift state to the previous one */
3410	pToU2022State->g=pToU2022State->prevG;
3411	}
3412	} else {
3413	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
3414	args->converter->toULength = 1;
3415	goto endloop;
3416	}
3417	}
3418	else{
3419	if(mySourceChar <= 0x7f) {
3420	targetUniChar = (UChar) mySourceChar;
3421	}
3422	}
3423	break;
3424	}
3425	if(targetUniChar < (missingCharMarker0xFFFF-1/0xfffe/)){
3426	if(args->offsets){
3427	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
3428	}
3429	*(myTarget++)=(UChar)targetUniChar;
3430	}
3431	else if(targetUniChar > missingCharMarker0xFFFF){
3432	/* disassemble the surrogate pair and write to output*/
3433	targetUniChar-=0x0010000;
3434	*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
3435	if(args->offsets){
3436	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
3437	}
3438	++myTarget;
3439	if(myTarget< args->targetLimit){
3440	*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
3441	if(args->offsets){
3442	args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
3443	}
3444	++myTarget;
3445	}else{
3446	args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
3447	(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
3448	}
3449
3450	}
3451	else{
3452	/* Call the callback function*/
3453	toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
3454	break;
3455	}
3456	}
3457	else{
3458	*err =U_BUFFER_OVERFLOW_ERROR;
3459	break;
3460	}
3461	}
3462	endloop:
3463	args->target = myTarget;
3464	args->source = mySource;
3465	}
3466	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3467
3468	static void U_CALLCONV
3469	_ISO_2022_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err) {
3470	UConverter *cnv = args->converter;
3471	UConverterDataISO2022 myConverterData=(UConverterDataISO2022 ) cnv->extraInfo;
3472	ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
3473	char p, subchar;
3474	char buffer[8];
3475	int32_t length;
3476
3477	subchar=(char *)cnv->subChars;
3478	length=cnv->subCharLen; /* assume length==1 for most variants */
3479
3480	p = buffer;
3481	switch(myConverterData->locale[0]){
3482	case 'j':
3483	{
3484	int8_t cs;
3485
3486	if(pFromU2022State->g == 1) {
3487	/* JIS7: switch from G1 to G0 */
3488	pFromU2022State->g = 0;
3489	*p++ = UCNV_SI0x0F;
3490	}
3491
3492	cs = pFromU2022State->cs[0];
3493	if(cs != ASCII && cs != JISX201) {
3494	/* not in ASCII or JIS X 0201: switch to ASCII */
3495	pFromU2022State->cs[0] = (int8_t)ASCII;
3496	*p++ = '\x1b';
3497	*p++ = '\x28';
3498	*p++ = '\x42';
3499	}
3500
3501	*p++ = subchar[0];
3502	break;
3503	}
3504	case 'c':
3505	if(pFromU2022State->g != 0) {
3506	/* not in ASCII mode: switch to ASCII */
3507	pFromU2022State->g = 0;
3508	*p++ = UCNV_SI0x0F;
3509	}
3510	*p++ = subchar[0];
3511	break;
3512	case 'k':
3513	if(myConverterData->version == 0) {
3514	if(length == 1) {
3515	if(args->converter->fromUnicodeStatus) {
3516	/* in DBCS mode: switch to SBCS */
3517	args->converter->fromUnicodeStatus = 0;
3518	*p++ = UCNV_SI0x0F;
3519	}
3520	*p++ = subchar[0];
3521	} else /* length == 2*/ {
3522	if(!args->converter->fromUnicodeStatus) {
3523	/* in SBCS mode: switch to DBCS */
3524	args->converter->fromUnicodeStatus = 1;
3525	*p++ = UCNV_SO0x0E;
3526	}
3527	*p++ = subchar[0];
3528	*p++ = subchar[1];
3529	}
3530	break;
3531	} else {
3532	/* save the subconverter's substitution string */
3533	uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
3534	int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
3535
3536	/* set our substitution string into the subconverter */
3537	myConverterData->currentConverter->subChars = (uint8_t *)subchar;
3538	myConverterData->currentConverter->subCharLen = (int8_t)length;
3539
3540	/* let the subconverter write the subchar, set/retrieve fromUChar32 state */
3541	args->converter = myConverterData->currentConverter;
3542	myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
3543	ucnv_cbFromUWriteSubucnv_cbFromUWriteSub_71(args, 0, err);
3544	cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
3545	args->converter = cnv;
3546
3547	/* restore the subconverter's substitution string */
3548	myConverterData->currentConverter->subChars = currentSubChars;
3549	myConverterData->currentConverter->subCharLen = currentSubCharLen;
3550
3551	if(*err == U_BUFFER_OVERFLOW_ERROR) {
3552	if(myConverterData->currentConverter->charErrorBufferLength > 0) {
3553	uprv_memcpy(do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->charErrorBuffer , myConverterData->currentConverter->charErrorBuffer, myConverterData ->currentConverter->charErrorBufferLength); } while (false )
3554	cnv->charErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->charErrorBuffer , myConverterData->currentConverter->charErrorBuffer, myConverterData ->currentConverter->charErrorBufferLength); } while (false )
3555	myConverterData->currentConverter->charErrorBuffer,do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->charErrorBuffer , myConverterData->currentConverter->charErrorBuffer, myConverterData ->currentConverter->charErrorBufferLength); } while (false )
3556	myConverterData->currentConverter->charErrorBufferLength)do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(cnv->charErrorBuffer , myConverterData->currentConverter->charErrorBuffer, myConverterData ->currentConverter->charErrorBufferLength); } while (false );
3557	}
3558	cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
3559	myConverterData->currentConverter->charErrorBufferLength = 0;
3560	}
3561	return;
3562	}
3563	default:
3564	/* not expected */
3565	break;
3566	}
3567	ucnv_cbFromUWriteBytesucnv_cbFromUWriteBytes_71(args,
3568	buffer, (int32_t)(p - buffer),
3569	offsetIndex, err);
3570	}
3571
3572	/*
3573	* Structure for cloning an ISO 2022 converter into a single memory block.
3574	*/
3575	struct cloneStruct
3576	{
3577	UConverter cnv;
3578	UConverter currentConverter;
3579	UConverterDataISO2022 mydata;
3580	};
3581
3582
3583	U_CDECL_BEGINextern "C" {
3584
3585	static UConverter * U_CALLCONV
3586	_ISO_2022_SafeClone(
3587	const UConverter *cnv,
3588	void *stackBuffer,
3589	int32_t *pBufferSize,
3590	UErrorCode *status)
3591	{
3592	struct cloneStruct * localClone;
3593	UConverterDataISO2022 *cnvData;
3594	int32_t i, size;
3595
3596	if (U_FAILURE(*status)){
3597	return nullptr;
3598	}
3599
3600	if (pBufferSize == 0) { / 'preflighting' request - set needed size into pBufferSize /
3601	*pBufferSize = (int32_t)sizeof(struct cloneStruct);
3602	return NULL__null;
3603	}
3604
3605	cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
3606	localClone = (struct cloneStruct *)stackBuffer;
3607
3608	/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
3609
3610	uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022))do { clang diagnostic push clang diagnostic ignored "-Waddress" (void)0; (void)0; clang diagnostic pop :: memcpy(&localClone ->mydata, cnvData, sizeof(UConverterDataISO2022)); } while (false);
3611	localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
3612	localClone->cnv.isExtraLocal = TRUE1;
3613
3614	/* share the subconverters */
3615
3616	if(cnvData->currentConverter != NULL__null) {
3617	size = (int32_t)sizeof(UConverter);
3618	localClone->mydata.currentConverter =
3619	ucnv_safeCloneucnv_safeClone_71(cnvData->currentConverter,
3620	&localClone->currentConverter,
3621	&size, status);
3622	if(U_FAILURE(*status)) {
3623	return NULL__null;
3624	}
3625	}
3626
3627	for(i=0; i<UCNV_2022_MAX_CONVERTERS10; ++i) {
3628	if(cnvData->myConverterArray[i] != NULL__null) {
3629	ucnv_incrementRefCountucnv_incrementRefCount_71(cnvData->myConverterArray[i]);
3630	}
3631	}
3632
3633	return &localClone->cnv;
3634	}
3635
3636	U_CDECL_END}
3637
3638	static void U_CALLCONV
3639	_ISO_2022_GetUnicodeSet(const UConverter *cnv,
3640	const USetAdder *sa,
3641	UConverterUnicodeSet which,
3642	UErrorCode *pErrorCode)
3643	{
3644	int32_t i;
3645	UConverterDataISO2022* cnvData;
3646
3647	if (U_FAILURE(*pErrorCode)) {
3648	return;
3649	}
3650	#ifdef U_ENABLE_GENERIC_ISO_2022
3651	if (cnv->sharedData == &_ISO2022Data_ISO2022Data_71) {
3652	/* We use UTF-8 in this case */
3653	sa->addRange(sa->set, 0, 0xd7FF);
3654	sa->addRange(sa->set, 0xE000, 0x10FFFF);
3655	return;
3656	}
3657	#endif
3658
3659	cnvData = (UConverterDataISO2022*)cnv->extraInfo;
3660
3661	/* open a set and initialize it with code points that are algorithmically round-tripped */
3662	switch(cnvData->locale[0]){
3663	case 'j':
3664	/* include JIS X 0201 which is hardcoded */
3665	sa->add(sa->set, 0xa5);
3666	sa->add(sa->set, 0x203e);
3667	if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)((uint16_t)1<<(ISO8859_1))) {
3668	/* include Latin-1 for some variants of JP */
3669	sa->addRange(sa->set, 0, 0xff);
3670	} else {
3671	/* include ASCII for JP */
3672	sa->addRange(sa->set, 0, 0x7f);
3673	}
3674	if(cnvData->version==3 \|\| cnvData->version==4 \|\| which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
3675	/*
3676	* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
3677	* because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3678	* use half-width Katakana.
3679	* This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3680	* half-width Katakana via the ESC ( I sequence.
3681	* However, we only emit (fromUnicode) half-width Katakana according to the
3682	* definition of each variant.
3683	*
3684	* When including fallbacks,
3685	* we need to include half-width Katakana Unicode code points for all JP variants because
3686	* JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
3687	*/
3688	/* include half-width Katakana for JP */
3689	sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3690	}
3691	break;
3692	#if !UCONFIG_ONLY_HTML_CONVERSION0
3693	case 'c':
3694	case 'z':
3695	/* include ASCII for CN */
3696	sa->addRange(sa->set, 0, 0x7f);
3697	break;
3698	case 'k':
3699	/* there is only one converter for KR, and it is not in the myConverterArray[] */
3700	cnvData->currentConverter->sharedData->impl->getUnicodeSet(
3701	cnvData->currentConverter, sa, which, pErrorCode);
3702	/* the loop over myConverterArray[] will simply not find another converter */
3703	break;
3704	#endif
3705	default:
3706	break;
3707	}
3708
3709	#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
3710	if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&
3711	cnvData->version==0 && i==CNS_11643
3712	) {
3713	/* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
3714	ucnv_MBCSGetUnicodeSetForBytes(
3715	cnvData->myConverterArray[i],
3716	sa, UCNV_ROUNDTRIP_SET,
3717	0, 0x81, 0x82,
3718	pErrorCode);
3719	}
3720	#endif
3721
3722	for (i=0; i<UCNV_2022_MAX_CONVERTERS10; i++) {
3723	UConverterSetFilter filter;
3724	if(cnvData->myConverterArray[i]!=NULL__null) {
3725	if(cnvData->locale[0]=='j' && i==JISX208) {
3726	/*
3727	* Only add code points that map to Shift-JIS codes
3728	* corresponding to JIS X 0208.
3729	*/
3730	filter=UCNV_SET_FILTER_SJIS;
3731	#if !UCONFIG_ONLY_HTML_CONVERSION0
3732	} else if( (cnvData->locale[0]=='c' \|\| cnvData->locale[0]=='z') &&
3733	cnvData->version==0 && i==CNS_11643) {
3734	/*
3735	* Version-specific for CN:
3736	* CN version 0 does not map CNS planes 3..7 although
3737	* they are all available in the CNS conversion table;
3738	* CN version 1 (-EXT) does map them all.
3739	* The two versions create different Unicode sets.
3740	*/
3741	filter=UCNV_SET_FILTER_2022_CN;
3742	} else if(i==KSC5601) {
3743	/*
3744	* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3745	* are broader than GR94.
3746	*/
3747	filter=UCNV_SET_FILTER_GR94DBCS;
3748	#endif
3749	} else {
3750	filter=UCNV_SET_FILTER_NONE;
3751	}
3752	ucnv_MBCSGetFilteredUnicodeSetForUnicodeucnv_MBCSGetFilteredUnicodeSetForUnicode_71(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
3753	}
3754	}
3755
3756	/*
3757	* ISO 2022 converters must not convert SO/SI/ESC despite what
3758	* sub-converters do by themselves.
3759	* Remove these characters from the set.
3760	*/
3761	sa->remove(sa->set, 0x0e);
3762	sa->remove(sa->set, 0x0f);
3763	sa->remove(sa->set, 0x1b);
3764
3765	/* ISO 2022 converters do not convert C1 controls either */
3766	sa->removeRange(sa->set, 0x80, 0x9f);
3767	}
3768
3769	static const UConverterImpl _ISO2022Impl={
3770	UCNV_ISO_2022,
3771
3772	NULL__null,
3773	NULL__null,
3774
3775	_ISO2022Open,
3776	_ISO2022Close,
3777	_ISO2022Reset,
3778
3779	#ifdef U_ENABLE_GENERIC_ISO_2022
3780	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3781	T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
3782	ucnv_fromUnicode_UTF8ucnv_fromUnicode_UTF8_71,
3783	ucnv_fromUnicode_UTF8_OFFSETS_LOGICucnv_fromUnicode_UTF8_OFFSETS_LOGIC_71,
3784	#else
3785	NULL__null,
3786	NULL__null,
3787	NULL__null,
3788	NULL__null,
3789	#endif
3790	NULL__null,
3791
3792	NULL__null,
3793	_ISO2022getName,
3794	_ISO_2022_WriteSub,
3795	_ISO_2022_SafeClone,
3796	_ISO_2022_GetUnicodeSet,
3797
3798	NULL__null,
3799	NULL__null
3800	};
3801	static const UConverterStaticData _ISO2022StaticData={
3802	sizeof(UConverterStaticData),
3803	"ISO_2022",
3804	2022,
3805	UCNV_IBM,
3806	UCNV_ISO_2022,
3807	1,
3808	3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
3809	{ 0x1a, 0, 0, 0 },
3810	1,
3811	FALSE0,
3812	FALSE0,
3813	0,
3814	0,
3815	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3816	};
3817	const UConverterSharedData _ISO2022Data_ISO2022Data_71=
3818	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, & _ISO2022StaticData, false, false, &_ISO2022Impl, 0, { 0, 0 , 0, 0, __null, __null, __null, __null, __null, __null, { 0 } , __null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null , __null } };
3819
3820	/***********JP**************/
3821	static const UConverterImpl _ISO2022JPImpl={
3822	UCNV_ISO_2022,
3823
3824	NULL__null,
3825	NULL__null,
3826
3827	_ISO2022Open,
3828	_ISO2022Close,
3829	_ISO2022Reset,
3830
3831	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3832	UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3833	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3834	UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
3835	NULL__null,
3836
3837	NULL__null,
3838	_ISO2022getName,
3839	_ISO_2022_WriteSub,
3840	_ISO_2022_SafeClone,
3841	_ISO_2022_GetUnicodeSet,
3842
3843	NULL__null,
3844	NULL__null
3845	};
3846	static const UConverterStaticData _ISO2022JPStaticData={
3847	sizeof(UConverterStaticData),
3848	"ISO_2022_JP",
3849	0,
3850	UCNV_IBM,
3851	UCNV_ISO_2022,
3852	1,
3853	6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
3854	{ 0x1a, 0, 0, 0 },
3855	1,
3856	FALSE0,
3857	FALSE0,
3858	0,
3859	0,
3860	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3861	};
3862
3863	namespace {
3864
3865	const UConverterSharedData _ISO2022JPData=
3866	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, & _ISO2022JPStaticData, false, false, &_ISO2022JPImpl, 0, { 0, 0, 0, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null , __null } };
3867
3868	} // namespace
3869
3870	#if !UCONFIG_ONLY_HTML_CONVERSION0
3871	/*********** KR *************/
3872	static const UConverterImpl _ISO2022KRImpl={
3873	UCNV_ISO_2022,
3874
3875	NULL__null,
3876	NULL__null,
3877
3878	_ISO2022Open,
3879	_ISO2022Close,
3880	_ISO2022Reset,
3881
3882	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3883	UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3884	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3885	UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
3886	NULL__null,
3887
3888	NULL__null,
3889	_ISO2022getName,
3890	_ISO_2022_WriteSub,
3891	_ISO_2022_SafeClone,
3892	_ISO_2022_GetUnicodeSet,
3893
3894	NULL__null,
3895	NULL__null
3896	};
3897	static const UConverterStaticData _ISO2022KRStaticData={
3898	sizeof(UConverterStaticData),
3899	"ISO_2022_KR",
3900	0,
3901	UCNV_IBM,
3902	UCNV_ISO_2022,
3903	1,
3904	8, /* max 8 bytes per UChar */
3905	{ 0x1a, 0, 0, 0 },
3906	1,
3907	FALSE0,
3908	FALSE0,
3909	0,
3910	0,
3911	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3912	};
3913
3914	namespace {
3915
3916	const UConverterSharedData _ISO2022KRData=
3917	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, & _ISO2022KRStaticData, false, false, &_ISO2022KRImpl, 0, { 0, 0, 0, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null , __null } };
3918
3919	} // namespace
3920
3921	/************* CN *************/
3922	static const UConverterImpl _ISO2022CNImpl={
3923
3924	UCNV_ISO_2022,
3925
3926	NULL__null,
3927	NULL__null,
3928
3929	_ISO2022Open,
3930	_ISO2022Close,
3931	_ISO2022Reset,
3932
3933	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3934	UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3935	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3936	UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
3937	NULL__null,
3938
3939	NULL__null,
3940	_ISO2022getName,
3941	_ISO_2022_WriteSub,
3942	_ISO_2022_SafeClone,
3943	_ISO_2022_GetUnicodeSet,
3944
3945	NULL__null,
3946	NULL__null
3947	};
3948	static const UConverterStaticData _ISO2022CNStaticData={
3949	sizeof(UConverterStaticData),
3950	"ISO_2022_CN",
3951	0,
3952	UCNV_IBM,
3953	UCNV_ISO_2022,
3954	1,
3955	8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
3956	{ 0x1a, 0, 0, 0 },
3957	1,
3958	FALSE0,
3959	FALSE0,
3960	0,
3961	0,
3962	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
3963	};
3964
3965	namespace {
3966
3967	const UConverterSharedData _ISO2022CNData=
3968	UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl){ sizeof(UConverterSharedData), ~((uint32_t)0), __null, & _ISO2022CNStaticData, false, false, &_ISO2022CNImpl, 0, { 0, 0, 0, 0, __null, __null, __null, __null, __null, __null, { 0 }, __null, __null, 0, 0, 0, false, 0, 0, __null, __null, __null , __null } };
3969
3970	} // namespace
3971	#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
3972
3973	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */