Bug Summary

File:out/../deps/icu-small/source/common/ushape.cpp
Warning:line 1245, column 14
Although the value stored to 'Nw' is used in the enclosing expression, the value is never actually read from 'Nw'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ushape.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/maurizio/node-v18.6.0/out -resource-dir /usr/local/lib/clang/16.0.0 -D V8_DEPRECATION_WARNINGS -D V8_IMMINENT_DEPRECATION_WARNINGS -D _GLIBCXX_USE_CXX11_ABI=1 -D NODE_OPENSSL_CONF_NAME=nodejs_conf -D NODE_OPENSSL_HAS_QUIC -D __STDC_FORMAT_MACROS -D OPENSSL_NO_PINSHARED -D OPENSSL_THREADS -D U_COMMON_IMPLEMENTATION=1 -D U_ATTRIBUTE_DEPRECATED= -D _CRT_SECURE_NO_DEPRECATE= -D U_STATIC_IMPLEMENTATION=1 -D UCONFIG_NO_SERVICE=1 -D U_ENABLE_DYLOAD=0 -D U_HAVE_STD_STRING=1 -D UCONFIG_NO_BREAK_ITERATION=0 -I ../deps/icu-small/source/common -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/x86_64-redhat-linux -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/backward -internal-isystem /usr/local/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-redhat-linux/8/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-deprecated-declarations -Wno-strict-aliasing -std=gnu++17 -fdeprecated-macro -fdebug-compilation-dir=/home/maurizio/node-v18.6.0/out -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-08-22-142216-507842-1 -x c++ ../deps/icu-small/source/common/ushape.cpp
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 ******************************************************************************
5 *
6 * Copyright (C) 2000-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: ushape.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2000jun29
16 * created by: Markus W. Scherer
17 *
18 * Arabic letter shaping implemented by Ayman Roshdy
19 */
20
21#include "unicode/utypes.h"
22#include "unicode/uchar.h"
23#include "unicode/ustring.h"
24#include "unicode/ushape.h"
25#include "cmemory.h"
26#include "putilimp.h"
27#include "ustr_imp.h"
28#include "ubidi_props.h"
29#include "uassert.h"
30
31/*
32 * This implementation is designed for 16-bit Unicode strings.
33 * The main assumption is that the Arabic characters and their
34 * presentation forms each fit into a single UChar.
35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
36 * characters.
37 */
38
39/*
40 * ### TODO in general for letter shaping:
41 * - the letter shaping code is UTF-16-unaware; needs update
42 * + especially invertBuffer()?!
43 * - needs to handle the "Arabic Tail" that is used in some legacy codepages
44 * as a glyph fragment of wide-glyph letters
45 * + IBM Unicode conversion tables map it to U+200B (ZWSP)
46 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
47 * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT
48 */
49
50/* definitions for Arabic letter shaping ------------------------------------ */
51
52#define IRRELEVANT4 4
53#define LAMTYPE16 16
54#define ALEFTYPE32 32
55#define LINKR1 1
56#define LINKL2 2
57#define APRESENT8 8
58#define SHADDA64 64
59#define CSHADDA128 128
60#define COMBINE(64 +128) (SHADDA64+CSHADDA128)
61
62#define HAMZAFE_CHAR0xfe80 0xfe80
63#define HAMZA06_CHAR0x0621 0x0621
64#define YEH_HAMZA_CHAR0x0626 0x0626
65#define YEH_HAMZAFE_CHAR0xFE89 0xFE89
66#define LAMALEF_SPACE_SUB0xFFFF 0xFFFF
67#define TASHKEEL_SPACE_SUB0xFFFE 0xFFFE
68#define NEW_TAIL_CHAR0xFE73 0xFE73
69#define OLD_TAIL_CHAR0x200B 0x200B
70#define LAM_CHAR0x0644 0x0644
71#define SPACE_CHAR0x0020 0x0020
72#define SHADDA_CHAR0xFE7C 0xFE7C
73#define TATWEEL_CHAR0x0640 0x0640
74#define SHADDA_TATWEEL_CHAR0xFE7D 0xFE7D
75#define SHADDA06_CHAR0x0651 0x0651
76
77#define SHAPE_MODE0 0
78#define DESHAPE_MODE1 1
79
80struct uShapeVariables {
81 UChar tailChar;
82 uint32_t uShapeLamalefBegin;
83 uint32_t uShapeLamalefEnd;
84 uint32_t uShapeTashkeelBegin;
85 uint32_t uShapeTashkeelEnd;
86 int spacesRelativeToTextBeginEnd;
87};
88
89static const uint8_t tailFamilyIsolatedFinal[] = {
90 /* FEB1 */ 1,
91 /* FEB2 */ 1,
92 /* FEB3 */ 0,
93 /* FEB4 */ 0,
94 /* FEB5 */ 1,
95 /* FEB6 */ 1,
96 /* FEB7 */ 0,
97 /* FEB8 */ 0,
98 /* FEB9 */ 1,
99 /* FEBA */ 1,
100 /* FEBB */ 0,
101 /* FEBC */ 0,
102 /* FEBD */ 1,
103 /* FEBE */ 1
104};
105
106static const uint8_t tashkeelMedial[] = {
107 /* FE70 */ 0,
108 /* FE71 */ 1,
109 /* FE72 */ 0,
110 /* FE73 */ 0,
111 /* FE74 */ 0,
112 /* FE75 */ 0,
113 /* FE76 */ 0,
114 /* FE77 */ 1,
115 /* FE78 */ 0,
116 /* FE79 */ 1,
117 /* FE7A */ 0,
118 /* FE7B */ 1,
119 /* FE7C */ 0,
120 /* FE7D */ 1,
121 /* FE7E */ 0,
122 /* FE7F */ 1
123};
124
125static const UChar yehHamzaToYeh[] =
126{
127/* isolated*/ 0xFEEF,
128/* final */ 0xFEF0
129};
130
131static const uint8_t IrrelevantPos[] = {
132 0x0, 0x2, 0x4, 0x6,
133 0x8, 0xA, 0xC, 0xE
134};
135
136
137static const UChar convertLamAlef[] =
138{
139/*FEF5*/ 0x0622,
140/*FEF6*/ 0x0622,
141/*FEF7*/ 0x0623,
142/*FEF8*/ 0x0623,
143/*FEF9*/ 0x0625,
144/*FEFA*/ 0x0625,
145/*FEFB*/ 0x0627,
146/*FEFC*/ 0x0627
147};
148
149static const UChar araLink[178]=
150{
151 1 + 32 + 256 * 0x11,/*0x0622*/
152 1 + 32 + 256 * 0x13,/*0x0623*/
153 1 + 256 * 0x15,/*0x0624*/
154 1 + 32 + 256 * 0x17,/*0x0625*/
155 1 + 2 + 256 * 0x19,/*0x0626*/
156 1 + 32 + 256 * 0x1D,/*0x0627*/
157 1 + 2 + 256 * 0x1F,/*0x0628*/
158 1 + 256 * 0x23,/*0x0629*/
159 1 + 2 + 256 * 0x25,/*0x062A*/
160 1 + 2 + 256 * 0x29,/*0x062B*/
161 1 + 2 + 256 * 0x2D,/*0x062C*/
162 1 + 2 + 256 * 0x31,/*0x062D*/
163 1 + 2 + 256 * 0x35,/*0x062E*/
164 1 + 256 * 0x39,/*0x062F*/
165 1 + 256 * 0x3B,/*0x0630*/
166 1 + 256 * 0x3D,/*0x0631*/
167 1 + 256 * 0x3F,/*0x0632*/
168 1 + 2 + 256 * 0x41,/*0x0633*/
169 1 + 2 + 256 * 0x45,/*0x0634*/
170 1 + 2 + 256 * 0x49,/*0x0635*/
171 1 + 2 + 256 * 0x4D,/*0x0636*/
172 1 + 2 + 256 * 0x51,/*0x0637*/
173 1 + 2 + 256 * 0x55,/*0x0638*/
174 1 + 2 + 256 * 0x59,/*0x0639*/
175 1 + 2 + 256 * 0x5D,/*0x063A*/
176 0, 0, 0, 0, 0, /*0x063B-0x063F*/
177 1 + 2, /*0x0640*/
178 1 + 2 + 256 * 0x61,/*0x0641*/
179 1 + 2 + 256 * 0x65,/*0x0642*/
180 1 + 2 + 256 * 0x69,/*0x0643*/
181 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
182 1 + 2 + 256 * 0x71,/*0x0645*/
183 1 + 2 + 256 * 0x75,/*0x0646*/
184 1 + 2 + 256 * 0x79,/*0x0647*/
185 1 + 256 * 0x7D,/*0x0648*/
186 1 + 256 * 0x7F,/*0x0649*/
187 1 + 2 + 256 * 0x81,/*0x064A*/
188 4 + 256 * 1, /*0x064B*/
189 4 + 128 + 256 * 1, /*0x064C*/
190 4 + 128 + 256 * 1, /*0x064D*/
191 4 + 128 + 256 * 1, /*0x064E*/
192 4 + 128 + 256 * 1, /*0x064F*/
193 4 + 128 + 256 * 1, /*0x0650*/
194 4 + 64 + 256 * 3, /*0x0651*/
195 4 + 256 * 1, /*0x0652*/
196 4 + 256 * 7, /*0x0653*/
197 4 + 256 * 8, /*0x0654*/
198 4 + 256 * 8, /*0x0655*/
199 4 + 256 * 1, /*0x0656*/
200 0, 0, 0, 0, 0, /*0x0657-0x065B*/
201 1 + 256 * 0x85,/*0x065C*/
202 1 + 256 * 0x87,/*0x065D*/
203 1 + 256 * 0x89,/*0x065E*/
204 1 + 256 * 0x8B,/*0x065F*/
205 0, 0, 0, 0, 0, /*0x0660-0x0664*/
206 0, 0, 0, 0, 0, /*0x0665-0x0669*/
207 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
208 4 + 256 * 6, /*0x0670*/
209 1 + 8 + 256 * 0x00,/*0x0671*/
210 1 + 32, /*0x0672*/
211 1 + 32, /*0x0673*/
212 0, /*0x0674*/
213 1 + 32, /*0x0675*/
214 1, 1, /*0x0676-0x0677*/
215 1 + 2, /*0x0678*/
216 1 + 2 + 8 + 256 * 0x16,/*0x0679*/
217 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/
218 1 + 2 + 8 + 256 * 0x02,/*0x067B*/
219 1+2, 1+2, /*0x67C-0x067D*/
220 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
221 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/
222 1 + 8 + 256 * 0x38,/*0x0688*/
223 1, 1, 1, /*0x0689-0x068B*/
224 1 + 8 + 256 * 0x34,/*0x068C*/
225 1 + 8 + 256 * 0x32,/*0x068D*/
226 1 + 8 + 256 * 0x36,/*0x068E*/
227 1, 1, /*0x068F-0x0690*/
228 1 + 8 + 256 * 0x3C,/*0x0691*/
229 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/
230 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
231 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
232 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
233 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/
234 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
235 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
236 1+2, 1+2, /*0x06B8-0x06B9*/
237 1 + 8 + 256 * 0x4E,/*0x06BA*/
238 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/
239 1+2, 1+2, /*0x06BC-0x06BD*/
240 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/
241 1+2, /*0x06BF*/
242 1 + 8 + 256 * 0x54,/*0x06C0*/
243 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/
244 1, 1, 1, /*0x06C2-0x06C4*/
245 1 + 8 + 256 * 0x90,/*0x06C5*/
246 1 + 8 + 256 * 0x89,/*0x06C6*/
247 1 + 8 + 256 * 0x87,/*0x06C7*/
248 1 + 8 + 256 * 0x8B,/*0x06C8*/
249 1 + 8 + 256 * 0x92,/*0x06C9*/
250 1, /*0x06CA*/
251 1 + 8 + 256 * 0x8E,/*0x06CB*/
252 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/
253 1, /*0x06CD*/
254 1+2, 1+2, /*0x06CE-0x06CF*/
255 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/
256 1+2, /*0x06D1*/
257 1 + 8 + 256 * 0x5E,/*0x06D2*/
258 1 + 8 + 256 * 0x60 /*0x06D3*/
259};
260
261static const uint8_t presALink[] = {
262/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
263/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
264/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
266/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
267/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
270/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
271/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
272/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2,
274/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4,
280/*FC6*/ 4, 4, 4
281};
282
283static const uint8_t presBLink[]=
284{
285/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
286/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
287/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0,
288/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
289/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
290/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
291/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
292/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
293/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0,
294/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
295};
296
297static const UChar convertFBto06[] =
298{
299/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
300/*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A,
301/*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0,
302/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0,
303/*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9,
304/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA,
305/*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2,
306/*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB,
309/*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0,
310/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
311};
312
313static const UChar convertFEto06[] =
314{
315/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
316/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
317/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
318/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
319/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
320/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
321/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
322/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
323/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
324/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
325};
326
327static const uint8_t shapeTable[4][4][4]=
328{
329 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
330 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
331 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
332 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
333};
334
335/*
336 * This function shapes European digits to Arabic-Indic digits
337 * in-place, writing over the input characters.
338 * Since we know that we are only looking for BMP code points,
339 * we can safely just work with code units (again, at least UTF-16).
340 */
341static void
342_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
343 UChar digitBase,
344 UBool isLogical, UBool lastStrongWasAL) {
345 int32_t i;
346 UChar c;
347
348 digitBase-=0x30;
349
350 /* the iteration direction depends on the type of input */
351 if(isLogical) {
352 for(i=0; i<length; ++i) {
353 c=s[i];
354 switch(ubidi_getClassubidi_getClass_71(c)) {
355 case U_LEFT_TO_RIGHT: /* L */
356 case U_RIGHT_TO_LEFT: /* R */
357 lastStrongWasAL=FALSE0;
358 break;
359 case U_RIGHT_TO_LEFT_ARABIC: /* AL */
360 lastStrongWasAL=TRUE1;
361 break;
362 case U_EUROPEAN_NUMBER: /* EN */
363 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
364 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
365 }
366 break;
367 default :
368 break;
369 }
370 }
371 } else {
372 for(i=length; i>0; /* pre-decrement in the body */) {
373 c=s[--i];
374 switch(ubidi_getClassubidi_getClass_71(c)) {
375 case U_LEFT_TO_RIGHT: /* L */
376 case U_RIGHT_TO_LEFT: /* R */
377 lastStrongWasAL=FALSE0;
378 break;
379 case U_RIGHT_TO_LEFT_ARABIC: /* AL */
380 lastStrongWasAL=TRUE1;
381 break;
382 case U_EUROPEAN_NUMBER: /* EN */
383 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
384 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
385 }
386 break;
387 default :
388 break;
389 }
390 }
391 }
392}
393
394/*
395 *Name : invertBuffer
396 *Function : This function inverts the buffer, it's used
397 * in case the user specifies the buffer to be
398 * U_SHAPE_TEXT_DIRECTION_LOGICAL
399 */
400static void
401invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) {
402 UChar temp;
403 int32_t i=0,j=0;
404 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
405 temp = buffer[i];
406 buffer[i] = buffer[j];
407 buffer[j] = temp;
408 }
409}
410
411/*
412 *Name : changeLamAlef
413 *Function : Converts the Alef characters into an equivalent
414 * LamAlef location in the 0x06xx Range, this is an
415 * intermediate stage in the operation of the program
416 * later it'll be converted into the 0xFExx LamAlefs
417 * in the shaping function.
418 */
419static inline UChar
420changeLamAlef(UChar ch) {
421 switch(ch) {
422 case 0x0622 :
423 return 0x065C;
424 case 0x0623 :
425 return 0x065D;
426 case 0x0625 :
427 return 0x065E;
428 case 0x0627 :
429 return 0x065F;
430 }
431 return 0;
432}
433
434/*
435 *Name : getLink
436 *Function : Resolves the link between the characters as
437 * Arabic characters have four forms :
438 * Isolated, Initial, Middle and Final Form
439 */
440static UChar
441getLink(UChar ch) {
442 if(ch >= 0x0622 && ch <= 0x06D3) {
443 return(araLink[ch-0x0622]);
444 } else if(ch == 0x200D) {
445 return(3);
446 } else if(ch >= 0x206D && ch <= 0x206F) {
447 return(4);
448 }else if(ch >= 0xFB50 && ch <= 0xFC62) {
449 return(presALink[ch-0xFB50]);
450 } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
451 return(presBLink[ch-0xFE70]);
452 }else {
453 return(0);
454 }
455}
456
457/*
458 *Name : countSpaces
459 *Function : Counts the number of spaces
460 * at each end of the logical buffer
461 */
462static void
463countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) {
464 int32_t i = 0;
465 int32_t countl = 0,countr = 0;
466 while((dest[i] == SPACE_CHAR0x0020) && (countl < size)) {
467 countl++;
468 i++;
469 }
470 if (countl < size) { /* the entire buffer is not all space */
471 while(dest[size-1] == SPACE_CHAR0x0020) {
472 countr++;
473 size--;
474 }
475 }
476 *spacesCountl = countl;
477 *spacesCountr = countr;
478}
479
480/*
481 *Name : isTashkeelChar
482 *Function : Returns 1 for Tashkeel characters in 06 range else return 0
483 */
484static inline int32_t
485isTashkeelChar(UChar ch) {
486 return (int32_t)( ch>=0x064B && ch<= 0x0652 );
487}
488
489/*
490 *Name : isTashkeelCharFE
491 *Function : Returns 1 for Tashkeel characters in FE range else return 0
492 */
493static inline int32_t
494isTashkeelCharFE(UChar ch) {
495 return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F );
496}
497
498/*
499 *Name : isAlefChar
500 *Function : Returns 1 for Alef characters else return 0
501 */
502static inline int32_t
503isAlefChar(UChar ch) {
504 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
505}
506
507/*
508 *Name : isLamAlefChar
509 *Function : Returns 1 for LamAlef characters else return 0
510 */
511static inline int32_t
512isLamAlefChar(UChar ch) {
513 return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) );
514}
515
516/*BIDI
517 *Name : isTailChar
518 *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0
519 */
520
521static inline int32_t
522isTailChar(UChar ch) {
523 if(ch == OLD_TAIL_CHAR0x200B || ch == NEW_TAIL_CHAR0xFE73){
524 return 1;
525 }else{
526 return 0;
527 }
528}
529
530/*BIDI
531 *Name : isSeenTailFamilyChar
532 *Function : returns 1 if the character is a seen family isolated character
533 * in the FE range otherwise returns 0
534 */
535
536static inline int32_t
537isSeenTailFamilyChar(UChar ch) {
538 if(ch >= 0xfeb1 && ch < 0xfebf){
539 return tailFamilyIsolatedFinal [ch - 0xFEB1];
540 }else{
541 return 0;
542 }
543}
544
545 /* Name : isSeenFamilyChar
546 * Function : returns 1 if the character is a seen family character in the Unicode
547 * 06 range otherwise returns 0
548 */
549
550static inline int32_t
551isSeenFamilyChar(UChar ch){
552 if(ch >= 0x633 && ch <= 0x636){
553 return 1;
554 }else {
555 return 0;
556 }
557}
558
559/*Start of BIDI*/
560/*
561 *Name : isAlefMaksouraChar
562 *Function : returns 1 if the character is a Alef Maksoura Final or isolated
563 * otherwise returns 0
564 */
565static inline int32_t
566isAlefMaksouraChar(UChar ch) {
567 return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
568}
569
570/*
571 * Name : isYehHamzaChar
572 * Function : returns 1 if the character is a yehHamza isolated or yehhamza
573 * final is found otherwise returns 0
574 */
575static inline int32_t
576isYehHamzaChar(UChar ch) {
577 if((ch==0xFE89)||(ch==0xFE8A)){
578 return 1;
579 }else{
580 return 0;
581 }
582}
583
584 /*
585 * Name: isTashkeelOnTatweelChar
586 * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
587 * Tashkeel on tatweel (FE range), it returns 1 else if the
588 * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
589 * returns 0
590 */
591static inline int32_t
592isTashkeelOnTatweelChar(UChar ch){
593 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR0xFE73 && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR0xFE7D)
594 {
595 return tashkeelMedial [ch - 0xFE70];
596 }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR0xFE7D)) {
597 return 2;
598 }else{
599 return 0;
600 }
601}
602
603/*
604 * Name: isIsolatedTashkeelChar
605 * Function: Checks if the Tashkeel Character is in the isolated form
606 * (i.e. Unicode FE range) returns 1 else if the Tashkeel
607 * with shadda is in the isolated form (i.e. Unicode FC range)
608 * returns 2 otherwise returns 0
609 */
610static inline int32_t
611isIsolatedTashkeelChar(UChar ch){
612 if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR0xFE73 && ch != 0xFE75){
613 return (1 - tashkeelMedial [ch - 0xFE70]);
614 }else if(ch >= 0xfc5e && ch <= 0xfc63){
615 return 1;
616 }else{
617 return 0;
618 }
619}
620
621
622
623
624/*
625 *Name : calculateSize
626 *Function : This function calculates the destSize to be used in preflighting
627 * when the destSize is equal to 0
628 * It is used also to calculate the new destsize in case the
629 * destination buffer will be resized.
630 */
631
632static int32_t
633calculateSize(const UChar *source, int32_t sourceLength,
634int32_t destSize,uint32_t options) {
635 int32_t i = 0;
636
637 int lamAlefOption = 0;
638 int tashkeelOption = 0;
639
640 destSize = sourceLength;
641
642 if (((options&U_SHAPE_LETTERS_MASK0x18) == U_SHAPE_LETTERS_SHAPE8 ||
643 ((options&U_SHAPE_LETTERS_MASK0x18) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18 )) &&
644 ((options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_RESIZE0 )){
645 lamAlefOption = 1;
646 }
647 if((options&U_SHAPE_LETTERS_MASK0x18) == U_SHAPE_LETTERS_SHAPE8 &&
648 ((options&U_SHAPE_TASHKEEL_MASK0xE0000) == U_SHAPE_TASHKEEL_RESIZE0x80000 ) ){
649 tashkeelOption = 1;
650 }
651
652 if(lamAlefOption || tashkeelOption){
653 if((options&U_SHAPE_TEXT_DIRECTION_MASK4)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR4) {
654 for(i=0;i<sourceLength;i++) {
655 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR0x0644)) || (isTashkeelCharFE(source[i])) ) {
656 destSize--;
657 }
658 }
659 }else if((options&U_SHAPE_TEXT_DIRECTION_MASK4)==U_SHAPE_TEXT_DIRECTION_LOGICAL0) {
660 for(i=0;i<sourceLength;i++) {
661 if( ( (source[i] == LAM_CHAR0x0644) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) {
662 destSize--;
663 }
664 }
665 }
666 }
667
668 if ((options&U_SHAPE_LETTERS_MASK0x18) == U_SHAPE_LETTERS_UNSHAPE0x10){
669 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_RESIZE0){
670 for(i=0;i<sourceLength;i++) {
671 if(isLamAlefChar(source[i]))
672 destSize++;
673 }
674 }
675 }
676
677 return destSize;
678}
679
680/*
681 *Name : handleTashkeelWithTatweel
682 *Function : Replaces Tashkeel as following:
683 * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
684 * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
685 * it with Shadda on Tatweel.
686 * Case 3: if the Tashkeel is isolated replace it with Space.
687 *
688 */
689static int32_t
690handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength,
691 int32_t /*destSize*/, uint32_t /*options*/,
692 UErrorCode * /*pErrorCode*/) {
693 int i;
694 for(i = 0; i < sourceLength; i++){
695 if((isTashkeelOnTatweelChar(dest[i]) == 1)){
696 dest[i] = TATWEEL_CHAR0x0640;
697 }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
698 dest[i] = SHADDA_TATWEEL_CHAR0xFE7D;
699 }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR0xFE7C){
700 dest[i] = SPACE_CHAR0x0020;
701 }
702 }
703 return sourceLength;
704}
705
706
707
708/*
709 *Name : handleGeneratedSpaces
710 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
711 * and Tashkeel to space.
712 * handleGeneratedSpaces function puts these generated spaces
713 * according to the options the user specifies. LamAlef and Tashkeel
714 * spaces can be replaced at begin, at end, at near or decrease the
715 * buffer size.
716 *
717 * There is also Auto option for LamAlef and tashkeel, which will put
718 * the spaces at end of the buffer (or end of text if the user used
719 * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END).
720 *
721 * If the text type was visual_LTR and the option
722 * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
723 * option will place the space at the beginning of the buffer and
724 * BEGIN will place the space at the end of the buffer.
725 */
726
727static int32_t
728handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
729 int32_t destSize,
730 uint32_t options,
731 UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
732
733 int32_t i = 0, j = 0;
734 int32_t count = 0;
735 UChar *tempbuffer=NULL__null;
736
737 int lamAlefOption = 0;
738 int tashkeelOption = 0;
739 int shapingMode = SHAPE_MODE0;
740
741 if (shapingMode == 0){
742 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_RESIZE0 ){
743 lamAlefOption = 1;
744 }
745 if ( (options&U_SHAPE_TASHKEEL_MASK0xE0000) == U_SHAPE_TASHKEEL_RESIZE0x80000 ){
746 tashkeelOption = 1;
747 }
748 }
749
750 tempbuffer = (UChar *)uprv_mallocuprv_malloc_71((sourceLength+1)*U_SIZEOF_UCHAR2);
751 /* Test for NULL */
752 if(tempbuffer == NULL__null) {
753 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
754 return 0;
755 }
756
757
758 if (lamAlefOption || tashkeelOption){
759 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (sourceLength+1)*2);
760
761 i = j = 0; count = 0;
762 while(i < sourceLength) {
763 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB0xFFFF) ||
764 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB0xFFFE) ){
765 j--;
766 count++;
767 } else {
768 tempbuffer[j] = dest[i];
769 }
770 i++;
771 j++;
772 }
773
774 while(count >= 0) {
775 tempbuffer[i] = 0x0000;
776 i--;
777 count--;
778 }
779
780 u_memcpyu_memcpy_71(dest, tempbuffer, sourceLength);
781 destSize = u_strlenu_strlen_71(dest);
782 }
783
784 lamAlefOption = 0;
785
786 if (shapingMode == 0){
787 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_NEAR1 ){
788 lamAlefOption = 1;
789 }
790 }
791
792 if (lamAlefOption){
793 /* Lam+Alef is already shaped into LamAlef + FFFF */
794 i = 0;
795 while(i < sourceLength) {
796 if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB0xFFFF){
797 dest[i] = SPACE_CHAR0x0020;
798 }
799 i++;
800 }
801 destSize = sourceLength;
802 }
803 lamAlefOption = 0;
804 tashkeelOption = 0;
805
806 if (shapingMode == 0) {
807 if ( ((options&U_SHAPE_LAMALEF_MASK0x10003) == shapeVars.uShapeLamalefBegin) ||
808 (((options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_AUTO0x10000 )
809 && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
810 lamAlefOption = 1;
811 }
812 if ( (options&U_SHAPE_TASHKEEL_MASK0xE0000) == shapeVars.uShapeTashkeelBegin ) {
813 tashkeelOption = 1;
814 }
815 }
816
817 if(lamAlefOption || tashkeelOption){
818 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (sourceLength+1)*2);
819
820 i = j = sourceLength; count = 0;
821
822 while(i >= 0) {
823 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB0xFFFF) ||
824 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB0xFFFE) ){
825 j++;
826 count++;
827 }else {
828 tempbuffer[j] = dest[i];
829 }
830 i--;
831 j--;
832 }
833
834 for(i=0 ;i < count; i++){
835 tempbuffer[i] = SPACE_CHAR0x0020;
836 }
837
838 u_memcpyu_memcpy_71(dest, tempbuffer, sourceLength);
839 destSize = sourceLength;
840 }
841
842
843
844 lamAlefOption = 0;
845 tashkeelOption = 0;
846
847 if (shapingMode == 0) {
848 if ( ((options&U_SHAPE_LAMALEF_MASK0x10003) == shapeVars.uShapeLamalefEnd) ||
849 (((options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_AUTO0x10000 )
850 && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
851 lamAlefOption = 1;
852 }
853 if ( (options&U_SHAPE_TASHKEEL_MASK0xE0000) == shapeVars.uShapeTashkeelEnd ){
854 tashkeelOption = 1;
855 }
856 }
857
858 if(lamAlefOption || tashkeelOption){
859 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (sourceLength+1)*2);
860
861 i = j = 0; count = 0;
862 while(i < sourceLength) {
863 if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB0xFFFF) ||
864 (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB0xFFFE) ){
865 j--;
866 count++;
867 }else {
868 tempbuffer[j] = dest[i];
869 }
870 i++;
871 j++;
872 }
873
874 while(count >= 0) {
875 tempbuffer[i] = SPACE_CHAR0x0020;
876 i--;
877 count--;
878 }
879
880 u_memcpyu_memcpy_71(dest, tempbuffer, sourceLength);
881 destSize = sourceLength;
882 }
883
884
885 if(tempbuffer){
886 uprv_freeuprv_free_71(tempbuffer);
887 }
888
889 return destSize;
890}
891
892/*
893 *Name :expandCompositCharAtBegin
894 *Function :Expands the LamAlef character to Lam and Alef consuming the required
895 * space from beginning of the buffer. If the text type was visual_LTR
896 * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
897 * the spaces will be located at end of buffer.
898 * If there are no spaces to expand the LamAlef, an error
899 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
900 */
901
902static int32_t
903expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
904 int32_t i = 0,j = 0;
905 int32_t countl = 0;
906 UChar *tempbuffer=NULL__null;
907
908 tempbuffer = (UChar *)uprv_mallocuprv_malloc_71((sourceLength+1)*U_SIZEOF_UCHAR2);
909
910 /* Test for NULL */
911 if(tempbuffer == NULL__null) {
912 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
913 return 0;
914 }
915
916 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (sourceLength+1)*2);
917
918 i = 0;
919 while(dest[i] == SPACE_CHAR0x0020) {
920 countl++;
921 i++;
922 }
923
924 i = j = sourceLength-1;
925
926 while(i >= 0 && j >= 0) {
927 if( countl>0 && isLamAlefChar(dest[i])) {
928 tempbuffer[j] = LAM_CHAR0x0644;
929 /* to ensure the array index is within the range */
930 U_ASSERT(dest[i] >= 0xFEF5u(void)0
931 && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef))(void)0;
932 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
933 j--;
934 countl--;
935 }else {
936 if( countl == 0 && isLamAlefChar(dest[i]) ) {
937 *pErrorCode=U_NO_SPACE_AVAILABLE;
938 }
939 tempbuffer[j] = dest[i];
940 }
941 i--;
942 j--;
943 }
944 u_memcpyu_memcpy_71(dest, tempbuffer, sourceLength);
945
946 uprv_freeuprv_free_71(tempbuffer);
947
948 destSize = sourceLength;
949 return destSize;
950}
951
952/*
953 *Name : expandCompositCharAtEnd
954 *Function : Expands the LamAlef character to Lam and Alef consuming the
955 * required space from end of the buffer. If the text type was
956 * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END
957 * was used, the spaces will be consumed from begin of buffer. If
958 * there are no spaces to expand the LamAlef, an error
959 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
960 */
961
962static int32_t
963expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
964 int32_t i = 0,j = 0;
965
966 int32_t countr = 0;
967 int32_t inpsize = sourceLength;
968
969 UChar *tempbuffer=NULL__null;
970 tempbuffer = (UChar *)uprv_mallocuprv_malloc_71((sourceLength+1)*U_SIZEOF_UCHAR2);
971
972 /* Test for NULL */
973 if(tempbuffer == NULL__null) {
974 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
975 return 0;
976 }
977
978 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (sourceLength+1)*2);
979
980 while(dest[inpsize-1] == SPACE_CHAR0x0020) {
981 countr++;
982 inpsize--;
983 }
984
985 i = sourceLength - countr - 1;
986 j = sourceLength - 1;
987
988 while(i >= 0 && j >= 0) {
989 if( countr>0 && isLamAlefChar(dest[i]) ) {
990 tempbuffer[j] = LAM_CHAR0x0644;
991 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
992 j--;
993 countr--;
994 }else {
995 if ((countr == 0) && isLamAlefChar(dest[i]) ) {
996 *pErrorCode=U_NO_SPACE_AVAILABLE;
997 }
998 tempbuffer[j] = dest[i];
999 }
1000 i--;
1001 j--;
1002 }
1003
1004 if(countr > 0) {
1005 u_memmoveu_memmove_71(tempbuffer, tempbuffer+countr, sourceLength);
1006 if(u_strlenu_strlen_71(tempbuffer) < sourceLength) {
1007 for(i=sourceLength-1;i>=sourceLength-countr;i--) {
1008 tempbuffer[i] = SPACE_CHAR0x0020;
1009 }
1010 }
1011 }
1012 u_memcpyu_memcpy_71(dest, tempbuffer, sourceLength);
1013
1014 uprv_freeuprv_free_71(tempbuffer);
1015
1016 destSize = sourceLength;
1017 return destSize;
1018}
1019
1020/*
1021 *Name : expandCompositCharAtNear
1022 *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
1023 * into Yeh + Hamza, SeenFamily character into SeenFamily character
1024 * + Tail, while consuming the space next to the character.
1025 * If there are no spaces next to the character, an error
1026 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
1027 */
1028
1029static int32_t
1030expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
1031 int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
1032 int32_t i = 0;
1033
1034
1035 UChar lamalefChar, yehhamzaChar;
1036
1037 for(i = 0 ;i<=sourceLength-1;i++) {
1038 if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
1039 if ((i>0) && (dest[i-1] == SPACE_CHAR0x0020) ) {
1040 dest[i-1] = shapeVars.tailChar;
1041 }else {
1042 *pErrorCode=U_NO_SPACE_AVAILABLE;
1043 }
1044 }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) {
1045 if ((i>0) && (dest[i-1] == SPACE_CHAR0x0020) ) {
1046 yehhamzaChar = dest[i];
1047 dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR0xFE89];
1048 dest[i-1] = HAMZAFE_CHAR0xfe80;
1049 }else {
1050
1051 *pErrorCode=U_NO_SPACE_AVAILABLE;
1052 }
1053 }else if(lamAlefOption && isLamAlefChar(dest[i+1])) {
1054 if(dest[i] == SPACE_CHAR0x0020){
1055 lamalefChar = dest[i+1];
1056 dest[i+1] = LAM_CHAR0x0644;
1057 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
1058 }else {
1059 *pErrorCode=U_NO_SPACE_AVAILABLE;
1060 }
1061 }
1062 }
1063 destSize = sourceLength;
1064 return destSize;
1065}
1066 /*
1067 * Name : expandCompositChar
1068 * Function : LamAlef, need special handling, since it expands from one
1069 * character into two characters while shaping or deshaping.
1070 * In order to expand it, near or far spaces according to the
1071 * options user specifies. Also buffer size can be increased.
1072 *
1073 * For SeenFamily characters and YehHamza only the near option is
1074 * supported, while for LamAlef we can take spaces from begin, end,
1075 * near or even increase the buffer size.
1076 * There is also the Auto option for LamAlef only, which will first
1077 * search for a space at end, begin then near, respectively.
1078 * If there are no spaces to expand these characters, an error will be set to
1079 * U_NO_SPACE_AVAILABLE as defined in utypes.h
1080 */
1081
1082static int32_t
1083expandCompositChar(UChar *dest, int32_t sourceLength,
1084 int32_t destSize,uint32_t options,
1085 UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
1086
1087 int32_t i = 0,j = 0;
1088
1089 UChar *tempbuffer=NULL__null;
1090 int yehHamzaOption = 0;
1091 int seenTailOption = 0;
1092 int lamAlefOption = 0;
1093
1094 if (shapingMode == 1){
1095 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_AUTO0x10000){
1096
1097 if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
1098 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1099
1100 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1101 *pErrorCode = U_ZERO_ERROR;
1102 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1103 }
1104 }else {
1105 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1106
1107 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1108 *pErrorCode = U_ZERO_ERROR;
1109 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1110 }
1111 }
1112
1113 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1114 *pErrorCode = U_ZERO_ERROR;
1115 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
1116 seenTailOption, 1,shapeVars);
1117 }
1118 }
1119 }
1120
1121 if (shapingMode == 1){
1122 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == shapeVars.uShapeLamalefEnd){
1123 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1124 }
1125 }
1126
1127 if (shapingMode == 1){
1128 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == shapeVars.uShapeLamalefBegin){
1129 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1130 }
1131 }
1132
1133 if (shapingMode == 0){
1134 if ((options&U_SHAPE_YEHHAMZA_MASK0x3800000) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR0x1000000){
1135 yehHamzaOption = 1;
1136 }
1137 if ((options&U_SHAPE_SEEN_MASK0x700000) == U_SHAPE_SEEN_TWOCELL_NEAR0x200000){
1138 seenTailOption = 1;
1139 }
1140 }
1141 if (shapingMode == 1) {
1142 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_NEAR1) {
1143 lamAlefOption = 1;
1144 }
1145 }
1146
1147
1148 if (yehHamzaOption || seenTailOption || lamAlefOption){
1149 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
1150 seenTailOption,lamAlefOption,shapeVars);
1151 }
1152
1153
1154 if (shapingMode == 1){
1155 if ( (options&U_SHAPE_LAMALEF_MASK0x10003) == U_SHAPE_LAMALEF_RESIZE0){
1156 destSize = calculateSize(dest,sourceLength,destSize,options);
1157 tempbuffer = (UChar *)uprv_mallocuprv_malloc_71((destSize+1)*U_SIZEOF_UCHAR2);
1158
1159 /* Test for NULL */
1160 if(tempbuffer == NULL__null) {
1161 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1162 return 0;
1163 }
1164
1165 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR):: memset(tempbuffer, 0, (destSize+1)*2);
1166
1167 i = j = 0;
1168 while(i < destSize && j < destSize) {
1169 if(isLamAlefChar(dest[i]) ) {
1170 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
1171 tempbuffer[j+1] = LAM_CHAR0x0644;
1172 j++;
1173 }else {
1174 tempbuffer[j] = dest[i];
1175 }
1176 i++;
1177 j++;
1178 }
1179
1180 u_memcpyu_memcpy_71(dest, tempbuffer, destSize);
1181 }
1182 }
1183
1184 if(tempbuffer) {
1185 uprv_freeuprv_free_71(tempbuffer);
1186 }
1187 return destSize;
1188}
1189
1190/*
1191 *Name : shapeUnicode
1192 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
1193 * arabic Unicode buffer in FExx Range
1194 */
1195static int32_t
1196shapeUnicode(UChar *dest, int32_t sourceLength,
1197 int32_t destSize,uint32_t options,
1198 UErrorCode *pErrorCode,
1199 int tashkeelFlag, struct uShapeVariables shapeVars) {
1200
1201 int32_t i, iend;
1202 int32_t step;
1203 int32_t lastPos,Nx, Nw;
1204 unsigned int Shape;
1205 int32_t lamalef_found = 0;
1206 int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0;
1207 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
1208 UChar wLamalef;
1209
1210 /*
1211 * Converts the input buffer from FExx Range into 06xx Range
1212 * to make sure that all characters are in the 06xx range
1213 * even the lamalef is converted to the special region in
1214 * the 06xx range
1215 */
1216 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK0x8000) == U_SHAPE_PRESERVE_PRESENTATION_NOOP0) {
1217 for (i = 0; i < sourceLength; i++) {
1218 UChar inputChar = dest[i];
1219 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
1220 UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
1221 if (c != 0)
1222 dest[i] = c;
1223 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
1224 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
1225 } else {
1226 dest[i] = inputChar ;
1227 }
1228 }
1229 }
1230
1231
1232 /* sets the index to the end of the buffer, together with the step point to -1 */
1233 i = sourceLength - 1;
1234 iend = -1;
1235 step = -1;
1236
1237 /*
1238 * This function resolves the link between the characters .
1239 * Arabic characters have four forms :
1240 * Isolated Form, Initial Form, Middle Form and Final Form
1241 */
1242 currLink = getLink(dest[i]);
1243
1244 lastPos = i;
1245 Nx = -2, Nw = 0;
Although the value stored to 'Nw' is used in the enclosing expression, the value is never actually read from 'Nw'
1246
1247 while (i != iend) {
1248 /* If high byte of currLink > 0 then more than one shape */
1249 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT4) != 0) {
1250 Nw = i + step;
1251 while (Nx < 0) { /* we need to know about next char */
1252 if(Nw == iend) {
1253 nextLink = 0;
1254 Nx = 3000;
1255 } else {
1256 nextLink = getLink(dest[Nw]);
1257 if((nextLink & IRRELEVANT4) == 0) {
1258 Nx = Nw;
1259 } else {
1260 Nw = Nw + step;
1261 }
1262 }
1263 }
1264
1265 if ( ((currLink & ALEFTYPE32) > 0) && ((lastLink & LAMTYPE16) > 0) ) {
1266 lamalef_found = 1;
1267 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
1268 if ( wLamalef != 0) {
1269 dest[i] = LAMALEF_SPACE_SUB0xFFFF; /* The default case is to drop the Alef and replace */
1270 dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */
1271 i=lastPos; /* unicode private use area, this is done to make */
1272 } /* sure that removeLamAlefSpaces() handles only the */
1273 lastLink = prevLink; /* spaces generated during lamalef generation. */
1274 currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */
1275 } /* in removeLamAlefSpaces() */
1276
1277 if ((i > 0) && (dest[i-1] == SPACE_CHAR0x0020)){
1278 if ( isSeenFamilyChar(dest[i])) {
1279 seenfamFound = 1;
1280 } else if (dest[i] == YEH_HAMZA_CHAR0x0626) {
1281 yehhamzaFound = 1;
1282 }
1283 }
1284 else if(i==0){
1285 if ( isSeenFamilyChar(dest[i])){
1286 seenfamFound = 1;
1287 } else if (dest[i] == YEH_HAMZA_CHAR0x0626) {
1288 yehhamzaFound = 1;
1289 }
1290 }
1291
1292 /*
1293 * get the proper shape according to link ability of neighbors
1294 * and of character; depends on the order of the shapes
1295 * (isolated, initial, middle, final) in the compatibility area
1296 */
1297 Shape = shapeTable[nextLink & (LINKR1 + LINKL2)]
1298 [lastLink & (LINKR1 + LINKL2)]
1299 [currLink & (LINKR1 + LINKL2)];
1300
1301 if ((currLink & (LINKR1+LINKL2)) == 1) {
1302 Shape &= 1;
1303 } else if(isTashkeelChar(dest[i])) {
1304 if( (lastLink & LINKL2) && (nextLink & LINKR1) && (tashkeelFlag == 1) &&
1305 dest[i] != 0x064C && dest[i] != 0x064D )
1306 {
1307 Shape = 1;
1308 if( (nextLink&ALEFTYPE32) == ALEFTYPE32 && (lastLink&LAMTYPE16) == LAMTYPE16 ) {
1309 Shape = 0;
1310 }
1311 } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR0x0651){
1312 Shape = 1;
1313 } else {
1314 Shape = 0;
1315 }
1316 }
1317 if ((dest[i] ^ 0x0600) < 0x100) {
1318 if ( isTashkeelChar(dest[i]) ){
1319 if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR0x0651){
1320 dest[i] = TASHKEEL_SPACE_SUB0xFFFE;
1321 tashkeelFound = 1;
1322 } else {
1323 /* to ensure the array index is within the range */
1324 U_ASSERT(dest[i] >= 0x064Bu(void)0
1325 && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos))(void)0;
1326 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast<UChar>(Shape);
1327 }
1328 }else if ((currLink & APRESENT8) > 0) {
1329 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
1330 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT4) == 0) {
1331 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
1332 }
1333 }
1334 }
1335
1336 /* move one notch forward */
1337 if ((currLink & IRRELEVANT4) == 0) {
1338 prevLink = lastLink;
1339 lastLink = currLink;
1340 lastPos = i;
1341 }
1342
1343 i = i + step;
1344 if (i == Nx) {
1345 currLink = nextLink;
1346 Nx = -2;
1347 } else if(i != iend) {
1348 currLink = getLink(dest[i]);
1349 }
1350 }
1351 destSize = sourceLength;
1352 if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){
1353 destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
1354 }
1355
1356 if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
1357 destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE0,shapeVars);
1358 }
1359 return destSize;
1360}
1361
1362/*
1363 *Name : deShapeUnicode
1364 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
1365 * arabic Unicode buffer in 06xx Range
1366 */
1367static int32_t
1368deShapeUnicode(UChar *dest, int32_t sourceLength,
1369 int32_t destSize,uint32_t options,
1370 UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
1371 int32_t i = 0;
1372 int32_t lamalef_found = 0;
1373 int32_t yehHamzaComposeEnabled = 0;
1374 int32_t seenComposeEnabled = 0;
1375
1376 yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK0x3800000) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR0x1000000) ? 1 : 0;
1377 seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK0x700000) == U_SHAPE_SEEN_TWOCELL_NEAR0x200000)? 1 : 0;
1378
1379 /*
1380 *This for loop changes the buffer from the Unicode FE range to
1381 *the Unicode 06 range
1382 */
1383
1384 for(i = 0; i < sourceLength; i++) {
1385 UChar inputChar = dest[i];
1386 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
1387 UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
1388 if (c != 0)
1389 dest[i] = c;
1390 } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR0x0621) || (inputChar == HAMZAFE_CHAR0xfe80))
1391 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) {
1392 dest[i] = SPACE_CHAR0x0020;
1393 dest[i+1] = YEH_HAMZA_CHAR0x0626;
1394 } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1))
1395 && (isSeenTailFamilyChar(dest[i+1])) ) {
1396 dest[i] = SPACE_CHAR0x0020;
1397 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
1398 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ];
1399 } else {
1400 dest[i] = inputChar ;
1401 }
1402
1403 if( isLamAlefChar(dest[i]) )
1404 lamalef_found = 1;
1405 }
1406
1407 destSize = sourceLength;
1408 if (lamalef_found != 0){
1409 destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE1, shapeVars);
1410 }
1411 return destSize;
1412}
1413
1414/*
1415 ****************************************
1416 * u_shapeArabic
1417 ****************************************
1418 */
1419
1420U_CAPIextern "C" int32_t U_EXPORT2
1421u_shapeArabicu_shapeArabic_71(const UChar *source, int32_t sourceLength,
1422 UChar *dest, int32_t destCapacity,
1423 uint32_t options,
1424 UErrorCode *pErrorCode) {
1425
1426 int32_t destLength;
1427 struct uShapeVariables shapeVars = { OLD_TAIL_CHAR0x200B,U_SHAPE_LAMALEF_BEGIN3,U_SHAPE_LAMALEF_END2,U_SHAPE_TASHKEEL_BEGIN0x40000,U_SHAPE_TASHKEEL_END0x60000,0};
1428
1429 /* usual error checking */
1430 if(pErrorCode==NULL__null || U_FAILURE(*pErrorCode)) {
1431 return 0;
1432 }
1433
1434 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
1435 if( source==NULL__null || sourceLength<-1 || (dest==NULL__null && destCapacity!=0) || destCapacity<0 ||
1436 (((options&U_SHAPE_TASHKEEL_MASK0xE0000) > 0) &&
1437 ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18) ) ||
1438 (((options&U_SHAPE_TASHKEEL_MASK0xE0000) > 0) &&
1439 ((options&U_SHAPE_LETTERS_MASK0x18) == U_SHAPE_LETTERS_UNSHAPE0x10)) ||
1440 (options&U_SHAPE_DIGIT_TYPE_RESERVED0x200)==U_SHAPE_DIGIT_TYPE_RESERVED0x200 ||
1441 (options&U_SHAPE_DIGITS_MASK0xe0)==U_SHAPE_DIGITS_RESERVED0xa0 ||
1442 ((options&U_SHAPE_LAMALEF_MASK0x10003) != U_SHAPE_LAMALEF_RESIZE0 &&
1443 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK0x4000) != 0) ||
1444 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK0x4000) == U_SHAPE_AGGREGATE_TASHKEEL0x4000 &&
1445 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18)
1446 )
1447 {
1448 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1449 return 0;
1450 }
1451 /* Validate lamalef options */
1452 if(((options&U_SHAPE_LAMALEF_MASK0x10003) > 0)&&
1453 !(((options & U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_BEGIN3) ||
1454 ((options & U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_END2 ) ||
1455 ((options & U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_RESIZE0 )||
1456 ((options & U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_AUTO0x10000) ||
1457 ((options & U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_NEAR1)))
1458 {
1459 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1460 return 0;
1461 }
1462 /* Validate Tashkeel options */
1463 if(((options&U_SHAPE_TASHKEEL_MASK0xE0000) > 0)&&
1464 !(((options & U_SHAPE_TASHKEEL_MASK0xE0000)==U_SHAPE_TASHKEEL_BEGIN0x40000) ||
1465 ((options & U_SHAPE_TASHKEEL_MASK0xE0000)==U_SHAPE_TASHKEEL_END0x60000 )
1466 ||((options & U_SHAPE_TASHKEEL_MASK0xE0000)==U_SHAPE_TASHKEEL_RESIZE0x80000 )||
1467 ((options & U_SHAPE_TASHKEEL_MASK0xE0000)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL0xC0000)))
1468 {
1469 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1470 return 0;
1471 }
1472 /* determine the source length */
1473 if(sourceLength==-1) {
1474 sourceLength=u_strlenu_strlen_71(source);
1475 }
1476 if(sourceLength<=0) {
1477 return u_terminateUCharsu_terminateUChars_71(dest, destCapacity, 0, pErrorCode);
1478 }
1479
1480 /* check that source and destination do not overlap */
1481 if( dest!=NULL__null &&
1482 ((source<=dest && dest<source+sourceLength) ||
1483 (dest<=source && source<dest+destCapacity))) {
1484 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1485 return 0;
1486 }
1487
1488 /* Does Options contain the new Seen Tail Unicode code point option */
1489 if ( (options&U_SHAPE_TAIL_TYPE_MASK0x8000000) == U_SHAPE_TAIL_NEW_UNICODE0x8000000){
1490 shapeVars.tailChar = NEW_TAIL_CHAR0xFE73;
1491 }else {
1492 shapeVars.tailChar = OLD_TAIL_CHAR0x200B;
1493 }
1494
1495 if((options&U_SHAPE_LETTERS_MASK0x18)!=U_SHAPE_LETTERS_NOOP0) {
1496 UChar buffer[300];
1497 UChar *tempbuffer, *tempsource = NULL__null;
1498 int32_t outputSize, spacesCountl=0, spacesCountr=0;
1499
1500 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK0x4000)>0) {
1501 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK4) == U_SHAPE_TEXT_DIRECTION_LOGICAL0;
1502 int32_t aggregate_tashkeel =
1503 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK0x4000+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18)) ==
1504 (U_SHAPE_AGGREGATE_TASHKEEL0x4000+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18);
1505 int step=logical_order?1:-1;
1506 int j=logical_order?-1:2*sourceLength;
1507 int i=logical_order?-1:sourceLength;
1508 int end=logical_order?sourceLength:-1;
1509 int aggregation_possible = 1;
1510 UChar prev = 0;
1511 UChar prevLink, currLink = 0;
1512 int newSourceLength = 0;
1513 tempsource = (UChar *)uprv_mallocuprv_malloc_71(2*sourceLength*U_SIZEOF_UCHAR2);
1514 if(tempsource == NULL__null) {
1515 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1516 return 0;
1517 }
1518
1519 while ((i+=step) != end) {
1520 prevLink = currLink;
1521 currLink = getLink(source[i]);
1522 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE(64 +128)) == COMBINE(64 +128) && aggregation_possible) {
1523 aggregation_possible = 0;
1524 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
1525 currLink = getLink(tempsource[j]);
1526 } else {
1527 aggregation_possible = 1;
1528 tempsource[j+=step] = source[i];
1529 prev = source[i];
1530 newSourceLength++;
1531 }
1532 }
1533 source = tempsource+(logical_order?0:j);
1534 sourceLength = newSourceLength;
1535 }
1536
1537 /* calculate destination size */
1538 /* TODO: do we ever need to do this pure preflighting? */
1539 if(((options&U_SHAPE_LAMALEF_MASK0x10003)==U_SHAPE_LAMALEF_RESIZE0) ||
1540 ((options&U_SHAPE_TASHKEEL_MASK0xE0000)==U_SHAPE_TASHKEEL_RESIZE0x80000)) {
1541 outputSize=calculateSize(source,sourceLength,destCapacity,options);
1542 } else {
1543 outputSize=sourceLength;
1544 }
1545
1546 if(outputSize>destCapacity) {
1547 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1548 if (tempsource != NULL__null) uprv_freeuprv_free_71(tempsource);
1549 return outputSize;
1550 }
1551
1552 /*
1553 * need a temporary buffer of size max(outputSize, sourceLength)
1554 * because at first we copy source->temp
1555 */
1556 if(sourceLength>outputSize) {
1557 outputSize=sourceLength;
1558 }
1559
1560 /* Start of Arabic letter shaping part */
1561 if(outputSize<=UPRV_LENGTHOF(buffer)(int32_t)(sizeof(buffer)/sizeof((buffer)[0]))) {
1562 outputSize=UPRV_LENGTHOF(buffer)(int32_t)(sizeof(buffer)/sizeof((buffer)[0]));
1563 tempbuffer=buffer;
1564 } else {
1565 tempbuffer = (UChar *)uprv_mallocuprv_malloc_71(outputSize*U_SIZEOF_UCHAR2);
1566
1567 /*Test for NULL*/
1568 if(tempbuffer == NULL__null) {
1569 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1570 if (tempsource != NULL__null) uprv_freeuprv_free_71(tempsource);
1571 return 0;
1572 }
1573 }
1574 u_memcpyu_memcpy_71(tempbuffer, source, sourceLength);
1575 if (tempsource != NULL__null){
1576 uprv_freeuprv_free_71(tempsource);
1577 }
1578
1579 if(sourceLength<outputSize) {
1580 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR):: memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength
)*2)
;
1581 }
1582
1583 if((options&U_SHAPE_TEXT_DIRECTION_MASK4) == U_SHAPE_TEXT_DIRECTION_LOGICAL0) {
1584 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
1585 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
1586 }
1587
1588 if((options&U_SHAPE_TEXT_DIRECTION_MASK4) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR4) {
1589 if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK0x4000000) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END0x4000000) {
1590 shapeVars.spacesRelativeToTextBeginEnd = 1;
1591 shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END2;
1592 shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN3;
1593 shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END0x60000;
1594 shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN0x40000;
1595 }
1596 }
1597
1598 switch(options&U_SHAPE_LETTERS_MASK0x18) {
1599 case U_SHAPE_LETTERS_SHAPE8 :
1600 if( (options&U_SHAPE_TASHKEEL_MASK0xE0000)> 0
1601 && ((options&U_SHAPE_TASHKEEL_MASK0xE0000) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL0xC0000)) {
1602 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
1603 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
1604 }else {
1605 /* default Call the shaping function with tashkeel flag == 1 */
1606 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
1607
1608 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
1609 if( (options&U_SHAPE_TASHKEEL_MASK0xE0000) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL0xC0000){
1610 destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode);
1611 }
1612 }
1613 break;
1614 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED0x18 :
1615 /* Call the shaping function with tashkeel flag == 0 */
1616 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
1617 break;
1618
1619 case U_SHAPE_LETTERS_UNSHAPE0x10 :
1620 /* Call the deshaping function */
1621 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
1622 break;
1623 default :
1624 /* will never occur because of validity checks above */
1625 destLength = 0;
1626 break;
1627 }
1628
1629 /*
1630 * TODO: (markus 2002aug01)
1631 * For as long as we always preflight the outputSize above
1632 * we should U_ASSERT(outputSize==destLength)
1633 * except for the adjustment above before the tempbuffer allocation
1634 */
1635
1636 if((options&U_SHAPE_TEXT_DIRECTION_MASK4) == U_SHAPE_TEXT_DIRECTION_LOGICAL0) {
1637 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
1638 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
1639 }
1640 u_memcpyu_memcpy_71(dest, tempbuffer, uprv_minuprv_min_71(destLength, destCapacity));
1641
1642 if(tempbuffer!=buffer) {
1643 uprv_freeuprv_free_71(tempbuffer);
1644 }
1645
1646 if(destLength>destCapacity) {
1647 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1648 return destLength;
1649 }
1650
1651 /* End of Arabic letter shaping part */
1652 } else {
1653 /*
1654 * No letter shaping:
1655 * just make sure the destination is large enough and copy the string.
1656 */
1657 if(destCapacity<sourceLength) {
1658 /* this catches preflighting, too */
1659 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1660 return sourceLength;
1661 }
1662 u_memcpyu_memcpy_71(dest, source, sourceLength);
1663 destLength=sourceLength;
1664 }
1665
1666 /*
1667 * Perform number shaping.
1668 * With UTF-16 or UTF-32, the length of the string is constant.
1669 * The easiest way to do this is to operate on the destination and
1670 * "shape" the digits in-place.
1671 */
1672 if((options&U_SHAPE_DIGITS_MASK0xe0)!=U_SHAPE_DIGITS_NOOP0) {
1673 UChar digitBase;
1674 int32_t i;
1675
1676 /* select the requested digit group */
1677 switch(options&U_SHAPE_DIGIT_TYPE_MASK0x300) {
1678 case U_SHAPE_DIGIT_TYPE_AN0:
1679 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
1680 break;
1681 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED0x100:
1682 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
1683 break;
1684 default:
1685 /* will never occur because of validity checks above */
1686 digitBase=0;
1687 break;
1688 }
1689
1690 /* perform the requested operation */
1691 switch(options&U_SHAPE_DIGITS_MASK0xe0) {
1692 case U_SHAPE_DIGITS_EN2AN0x20:
1693 /* add (digitBase-'0') to each European (ASCII) digit code point */
1694 digitBase-=0x30;
1695 for(i=0; i<destLength; ++i) {
1696 if(((uint32_t)dest[i]-0x30)<10) {
1697 dest[i]+=digitBase;
1698 }
1699 }
1700 break;
1701 case U_SHAPE_DIGITS_AN2EN0x40:
1702 /* subtract (digitBase-'0') from each Arabic digit code point */
1703 for(i=0; i<destLength; ++i) {
1704 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
1705 dest[i]-=digitBase-0x30;
1706 }
1707 }
1708 break;
1709 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR0x60:
1710 _shapeToArabicDigitsWithContext(dest, destLength,
1711 digitBase,
1712 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK4)==U_SHAPE_TEXT_DIRECTION_LOGICAL0),
1713 FALSE0);
1714 break;
1715 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL0x80:
1716 _shapeToArabicDigitsWithContext(dest, destLength,
1717 digitBase,
1718 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK4)==U_SHAPE_TEXT_DIRECTION_LOGICAL0),
1719 TRUE1);
1720 break;
1721 default:
1722 /* will never occur because of validity checks above */
1723 break;
1724 }
1725 }
1726
1727 return u_terminateUCharsu_terminateUChars_71(dest, destCapacity, destLength, pErrorCode);
1728}