1 | /* ***** BEGIN LICENSE BLOCK *****
|
---|
2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
---|
3 | *
|
---|
4 | * The contents of this file are subject to the Mozilla Public License Version
|
---|
5 | * 1.1 (the "License"); you may not use this file except in compliance with
|
---|
6 | * the License. You may obtain a copy of the License at
|
---|
7 | * http://www.mozilla.org/MPL/
|
---|
8 | *
|
---|
9 | * Software distributed under the License is distributed on an "AS IS" basis,
|
---|
10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
---|
11 | * for the specific language governing rights and limitations under the
|
---|
12 | * License.
|
---|
13 | *
|
---|
14 | * The Original Code is Mozilla.
|
---|
15 | *
|
---|
16 | * The Initial Developer of the Original Code is
|
---|
17 | * Netscape Communications Corporation.
|
---|
18 | * Portions created by the Initial Developer are Copyright (C) 2002
|
---|
19 | * the Initial Developer. All Rights Reserved.
|
---|
20 | *
|
---|
21 | * Contributor(s):
|
---|
22 | * Darin Fisher <[email protected]>
|
---|
23 | * Brian Stell <[email protected]>
|
---|
24 | * Frank Tang <[email protected]>
|
---|
25 | * Brendan Eich <[email protected]>
|
---|
26 | * Sergei Dolgov <[email protected]>
|
---|
27 | *
|
---|
28 | * Alternatively, the contents of this file may be used under the terms of
|
---|
29 | * either the GNU General Public License Version 2 or later (the "GPL"), or
|
---|
30 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
---|
31 | * in which case the provisions of the GPL or the LGPL are applicable instead
|
---|
32 | * of those above. If you wish to allow use of your version of this file only
|
---|
33 | * under the terms of either the GPL or the LGPL, and not to allow others to
|
---|
34 | * use your version of this file under the terms of the MPL, indicate your
|
---|
35 | * decision by deleting the provisions above and replace them with the notice
|
---|
36 | * and other provisions required by the GPL or the LGPL. If you do not delete
|
---|
37 | * the provisions above, a recipient may use your version of this file under
|
---|
38 | * the terms of any one of the MPL, the GPL or the LGPL.
|
---|
39 | *
|
---|
40 | * ***** END LICENSE BLOCK ***** */
|
---|
41 |
|
---|
42 | #include "xpcom-private.h"
|
---|
43 |
|
---|
44 | //-----------------------------------------------------------------------------
|
---|
45 | // XP_UNIX
|
---|
46 | //-----------------------------------------------------------------------------
|
---|
47 | #if defined(XP_UNIX)
|
---|
48 |
|
---|
49 | #include <stdlib.h> // mbtowc, wctomb
|
---|
50 | #include <locale.h> // setlocale
|
---|
51 | #include "nscore.h"
|
---|
52 | #include "prlock.h"
|
---|
53 | #include "nsAString.h"
|
---|
54 | #include "nsReadableUtils.h"
|
---|
55 |
|
---|
56 | //
|
---|
57 | // choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
|
---|
58 | // but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
|
---|
59 | // or not (see bug 206811 and
|
---|
60 | // news://news.mozilla.org:119/[email protected]). we now use
|
---|
61 | // iconv for all platforms where nltypes.h and nllanginfo.h are present
|
---|
62 | // along with iconv.
|
---|
63 | //
|
---|
64 | #if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
|
---|
65 | #define USE_ICONV 1
|
---|
66 | #else
|
---|
67 | #define USE_STDCONV 1
|
---|
68 | #endif
|
---|
69 |
|
---|
70 | static void
|
---|
71 | isolatin1_to_utf16(const char **input, PRUint32 *inputLeft, PRUnichar **output, PRUint32 *outputLeft)
|
---|
72 | {
|
---|
73 | while (*inputLeft && *outputLeft) {
|
---|
74 | **output = (unsigned char) **input;
|
---|
75 | (*input)++;
|
---|
76 | (*inputLeft)--;
|
---|
77 | (*output)++;
|
---|
78 | (*outputLeft)--;
|
---|
79 | }
|
---|
80 | }
|
---|
81 |
|
---|
82 | static void
|
---|
83 | utf16_to_isolatin1(const PRUnichar **input, PRUint32 *inputLeft, char **output, PRUint32 *outputLeft)
|
---|
84 | {
|
---|
85 | while (*inputLeft && *outputLeft) {
|
---|
86 | **output = (unsigned char) **input;
|
---|
87 | (*input)++;
|
---|
88 | (*inputLeft)--;
|
---|
89 | (*output)++;
|
---|
90 | (*outputLeft)--;
|
---|
91 | }
|
---|
92 | }
|
---|
93 |
|
---|
94 | //-----------------------------------------------------------------------------
|
---|
95 | // conversion using iconv
|
---|
96 | //-----------------------------------------------------------------------------
|
---|
97 | #if defined(USE_ICONV)
|
---|
98 | #include <nl_types.h> // CODESET
|
---|
99 | #include <langinfo.h> // nl_langinfo
|
---|
100 | #include <iconv.h> // iconv_open, iconv, iconv_close
|
---|
101 | #include <errno.h>
|
---|
102 |
|
---|
103 | #if defined(HAVE_ICONV_WITH_CONST_INPUT)
|
---|
104 | #define ICONV_INPUT(x) (x)
|
---|
105 | #else
|
---|
106 | #define ICONV_INPUT(x) ((char **)x)
|
---|
107 | #endif
|
---|
108 |
|
---|
109 | // solaris definitely needs this, but we'll enable it by default
|
---|
110 | // just in case... but we know for sure that iconv(3) in glibc
|
---|
111 | // doesn't need this.
|
---|
112 | #if !defined(__GLIBC__)
|
---|
113 | #define ENABLE_UTF8_FALLBACK_SUPPORT
|
---|
114 | #endif
|
---|
115 |
|
---|
116 | #define INVALID_ICONV_T ((iconv_t) -1)
|
---|
117 |
|
---|
118 | static inline size_t
|
---|
119 | xp_iconv(iconv_t converter,
|
---|
120 | const char **input,
|
---|
121 | size_t *inputLeft,
|
---|
122 | char **output,
|
---|
123 | size_t *outputLeft)
|
---|
124 | {
|
---|
125 | size_t res, outputAvail = outputLeft ? *outputLeft : 0;
|
---|
126 | res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft);
|
---|
127 | if (res == (size_t) -1) {
|
---|
128 | // on some platforms (e.g., linux) iconv will fail with
|
---|
129 | // E2BIG if it cannot convert _all_ of its input. it'll
|
---|
130 | // still adjust all of the in/out params correctly, so we
|
---|
131 | // can ignore this error. the assumption is that we will
|
---|
132 | // be called again to complete the conversion.
|
---|
133 | if ((errno == E2BIG) && (*outputLeft < outputAvail))
|
---|
134 | res = 0;
|
---|
135 | }
|
---|
136 | return res;
|
---|
137 | }
|
---|
138 |
|
---|
139 | static inline void
|
---|
140 | xp_iconv_reset(iconv_t converter)
|
---|
141 | {
|
---|
142 | // NOTE: the man pages on Solaris claim that you can pass NULL
|
---|
143 | // for all parameter to reset the converter, but beware the
|
---|
144 | // evil Solaris crash if you go down this route >:-)
|
---|
145 |
|
---|
146 | const char *zero_char_in_ptr = NULL;
|
---|
147 | char *zero_char_out_ptr = NULL;
|
---|
148 | size_t zero_size_in = 0,
|
---|
149 | zero_size_out = 0;
|
---|
150 |
|
---|
151 | xp_iconv(converter, &zero_char_in_ptr,
|
---|
152 | &zero_size_in,
|
---|
153 | &zero_char_out_ptr,
|
---|
154 | &zero_size_out);
|
---|
155 | }
|
---|
156 |
|
---|
157 | static inline iconv_t
|
---|
158 | xp_iconv_open(const char **to_list, const char **from_list)
|
---|
159 | {
|
---|
160 | iconv_t res;
|
---|
161 | const char **from_name;
|
---|
162 | const char **to_name;
|
---|
163 |
|
---|
164 | // try all possible combinations to locate a converter.
|
---|
165 | to_name = to_list;
|
---|
166 | while (*to_name) {
|
---|
167 | if (**to_name) {
|
---|
168 | from_name = from_list;
|
---|
169 | while (*from_name) {
|
---|
170 | if (**from_name) {
|
---|
171 | res = iconv_open(*to_name, *from_name);
|
---|
172 | if (res != INVALID_ICONV_T)
|
---|
173 | return res;
|
---|
174 | }
|
---|
175 | from_name++;
|
---|
176 | }
|
---|
177 | }
|
---|
178 | to_name++;
|
---|
179 | }
|
---|
180 |
|
---|
181 | return INVALID_ICONV_T;
|
---|
182 | }
|
---|
183 |
|
---|
184 | /*
|
---|
185 | * PRUnichar[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
|
---|
186 | * have to use UTF-16 with iconv(3) on platforms where it's supported.
|
---|
187 | * However, the way UTF-16 and UCS-2 are interpreted varies across platforms
|
---|
188 | * and implementations of iconv(3). On Tru64, it also depends on the environment
|
---|
189 | * variable. To avoid the trouble arising from byte-swapping
|
---|
190 | * (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
|
---|
191 | * back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
|
---|
192 | * on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
|
---|
193 | * which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
|
---|
194 | * and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
|
---|
195 | * variable ICONV_BYTEORDER is set to 'big-endian', about which not much
|
---|
196 | * can be done other than adding a note in the release notes. (bug 206811)
|
---|
197 | */
|
---|
198 | static const char *UTF_16_NAMES[] = {
|
---|
199 | #if defined(IS_LITTLE_ENDIAN)
|
---|
200 | "UTF-16LE",
|
---|
201 | #if defined(__GLIBC__)
|
---|
202 | "UNICODELITTLE",
|
---|
203 | #endif
|
---|
204 | "UCS-2LE",
|
---|
205 | #else
|
---|
206 | "UTF-16BE",
|
---|
207 | #if defined(__GLIBC__)
|
---|
208 | "UNICODEBIG",
|
---|
209 | #endif
|
---|
210 | "UCS-2BE",
|
---|
211 | #endif
|
---|
212 | "UTF-16",
|
---|
213 | "UCS-2",
|
---|
214 | "UCS2",
|
---|
215 | "UCS_2",
|
---|
216 | "ucs-2",
|
---|
217 | "ucs2",
|
---|
218 | "ucs_2",
|
---|
219 | NULL
|
---|
220 | };
|
---|
221 |
|
---|
222 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
223 | static const char *UTF_8_NAMES[] = {
|
---|
224 | "UTF-8",
|
---|
225 | "UTF8",
|
---|
226 | "UTF_8",
|
---|
227 | "utf-8",
|
---|
228 | "utf8",
|
---|
229 | "utf_8",
|
---|
230 | NULL
|
---|
231 | };
|
---|
232 | #endif
|
---|
233 |
|
---|
234 | static const char *ISO_8859_1_NAMES[] = {
|
---|
235 | "ISO-8859-1",
|
---|
236 | #if !defined(__GLIBC__)
|
---|
237 | "ISO8859-1",
|
---|
238 | "ISO88591",
|
---|
239 | "ISO_8859_1",
|
---|
240 | "ISO8859_1",
|
---|
241 | "iso-8859-1",
|
---|
242 | "iso8859-1",
|
---|
243 | "iso88591",
|
---|
244 | "iso_8859_1",
|
---|
245 | "iso8859_1",
|
---|
246 | #endif
|
---|
247 | NULL
|
---|
248 | };
|
---|
249 |
|
---|
250 | class nsNativeCharsetConverter
|
---|
251 | {
|
---|
252 | public:
|
---|
253 | nsNativeCharsetConverter();
|
---|
254 | ~nsNativeCharsetConverter();
|
---|
255 |
|
---|
256 | nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft,
|
---|
257 | PRUnichar **output, PRUint32 *outputLeft);
|
---|
258 | nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft,
|
---|
259 | char **output, PRUint32 *outputLeft);
|
---|
260 |
|
---|
261 | static void GlobalInit();
|
---|
262 | static void GlobalShutdown();
|
---|
263 |
|
---|
264 | private:
|
---|
265 | static iconv_t gNativeToUnicode;
|
---|
266 | static iconv_t gUnicodeToNative;
|
---|
267 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
268 | static iconv_t gNativeToUTF8;
|
---|
269 | static iconv_t gUTF8ToNative;
|
---|
270 | static iconv_t gUnicodeToUTF8;
|
---|
271 | static iconv_t gUTF8ToUnicode;
|
---|
272 | #endif
|
---|
273 | static PRLock *gLock;
|
---|
274 | static PRBool gInitialized;
|
---|
275 |
|
---|
276 | static void LazyInit();
|
---|
277 |
|
---|
278 | static void Lock() { if (gLock) PR_Lock(gLock); }
|
---|
279 | static void Unlock() { if (gLock) PR_Unlock(gLock); }
|
---|
280 | };
|
---|
281 |
|
---|
282 | iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
|
---|
283 | iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
|
---|
284 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
285 | iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T;
|
---|
286 | iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T;
|
---|
287 | iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T;
|
---|
288 | iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T;
|
---|
289 | #endif
|
---|
290 | PRLock *nsNativeCharsetConverter::gLock = nsnull;
|
---|
291 | PRBool nsNativeCharsetConverter::gInitialized = PR_FALSE;
|
---|
292 |
|
---|
293 | void
|
---|
294 | nsNativeCharsetConverter::LazyInit()
|
---|
295 | {
|
---|
296 | const char *blank_list[] = { "", NULL };
|
---|
297 | const char **native_charset_list = blank_list;
|
---|
298 | const char *native_charset = nl_langinfo(CODESET);
|
---|
299 | if (native_charset == nsnull) {
|
---|
300 | NS_ERROR("native charset is unknown");
|
---|
301 | // fallback to ISO-8859-1
|
---|
302 | native_charset_list = ISO_8859_1_NAMES;
|
---|
303 | }
|
---|
304 | else
|
---|
305 | native_charset_list[0] = native_charset;
|
---|
306 |
|
---|
307 | gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
|
---|
308 | gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
|
---|
309 |
|
---|
310 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
311 | if (gNativeToUnicode == INVALID_ICONV_T) {
|
---|
312 | gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
|
---|
313 | gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
|
---|
314 | NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
|
---|
315 | NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
|
---|
316 | }
|
---|
317 | if (gUnicodeToNative == INVALID_ICONV_T) {
|
---|
318 | gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
|
---|
319 | gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
|
---|
320 | NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
|
---|
321 | NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
|
---|
322 | }
|
---|
323 | #else
|
---|
324 | NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
|
---|
325 | NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
|
---|
326 | #endif
|
---|
327 |
|
---|
328 | /*
|
---|
329 | * On Solaris 8 (and newer?), the iconv modules converting to UCS-2
|
---|
330 | * prepend a byte order mark unicode character (BOM, u+FEFF) during
|
---|
331 | * the first use of the iconv converter. The same is the case of
|
---|
332 | * glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
|
---|
333 | * However, we use 'UTF-16LE/BE' in both cases, instead so that we
|
---|
334 | * should be safe. But just in case...
|
---|
335 | *
|
---|
336 | * This dummy conversion gets rid of the BOMs and fixes bug 153562.
|
---|
337 | */
|
---|
338 | char dummy_input[1] = { ' ' };
|
---|
339 | char dummy_output[4];
|
---|
340 |
|
---|
341 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
342 | const char *input = dummy_input;
|
---|
343 | size_t input_left = sizeof(dummy_input);
|
---|
344 | char *output = dummy_output;
|
---|
345 | size_t output_left = sizeof(dummy_output);
|
---|
346 |
|
---|
347 | xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
|
---|
348 | }
|
---|
349 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
350 | if (gUTF8ToUnicode != INVALID_ICONV_T) {
|
---|
351 | const char *input = dummy_input;
|
---|
352 | size_t input_left = sizeof(dummy_input);
|
---|
353 | char *output = dummy_output;
|
---|
354 | size_t output_left = sizeof(dummy_output);
|
---|
355 |
|
---|
356 | xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
|
---|
357 | }
|
---|
358 | #endif
|
---|
359 |
|
---|
360 | gInitialized = PR_TRUE;
|
---|
361 | }
|
---|
362 |
|
---|
363 | void
|
---|
364 | nsNativeCharsetConverter::GlobalInit()
|
---|
365 | {
|
---|
366 | gLock = PR_NewLock();
|
---|
367 | NS_ASSERTION(gLock, "lock creation failed");
|
---|
368 | }
|
---|
369 |
|
---|
370 | void
|
---|
371 | nsNativeCharsetConverter::GlobalShutdown()
|
---|
372 | {
|
---|
373 | if (gLock) {
|
---|
374 | PR_DestroyLock(gLock);
|
---|
375 | gLock = nsnull;
|
---|
376 | }
|
---|
377 |
|
---|
378 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
379 | iconv_close(gNativeToUnicode);
|
---|
380 | gNativeToUnicode = INVALID_ICONV_T;
|
---|
381 | }
|
---|
382 |
|
---|
383 | if (gUnicodeToNative != INVALID_ICONV_T) {
|
---|
384 | iconv_close(gUnicodeToNative);
|
---|
385 | gUnicodeToNative = INVALID_ICONV_T;
|
---|
386 | }
|
---|
387 |
|
---|
388 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
389 | if (gNativeToUTF8 != INVALID_ICONV_T) {
|
---|
390 | iconv_close(gNativeToUTF8);
|
---|
391 | gNativeToUTF8 = INVALID_ICONV_T;
|
---|
392 | }
|
---|
393 | if (gUTF8ToNative != INVALID_ICONV_T) {
|
---|
394 | iconv_close(gUTF8ToNative);
|
---|
395 | gUTF8ToNative = INVALID_ICONV_T;
|
---|
396 | }
|
---|
397 | if (gUnicodeToUTF8 != INVALID_ICONV_T) {
|
---|
398 | iconv_close(gUnicodeToUTF8);
|
---|
399 | gUnicodeToUTF8 = INVALID_ICONV_T;
|
---|
400 | }
|
---|
401 | if (gUTF8ToUnicode != INVALID_ICONV_T) {
|
---|
402 | iconv_close(gUTF8ToUnicode);
|
---|
403 | gUTF8ToUnicode = INVALID_ICONV_T;
|
---|
404 | }
|
---|
405 | #endif
|
---|
406 |
|
---|
407 | gInitialized = PR_FALSE;
|
---|
408 | }
|
---|
409 |
|
---|
410 | nsNativeCharsetConverter::nsNativeCharsetConverter()
|
---|
411 | {
|
---|
412 | Lock();
|
---|
413 | if (!gInitialized)
|
---|
414 | LazyInit();
|
---|
415 | }
|
---|
416 |
|
---|
417 | nsNativeCharsetConverter::~nsNativeCharsetConverter()
|
---|
418 | {
|
---|
419 | // reset converters for next time
|
---|
420 | if (gNativeToUnicode != INVALID_ICONV_T)
|
---|
421 | xp_iconv_reset(gNativeToUnicode);
|
---|
422 | if (gUnicodeToNative != INVALID_ICONV_T)
|
---|
423 | xp_iconv_reset(gUnicodeToNative);
|
---|
424 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
425 | if (gNativeToUTF8 != INVALID_ICONV_T)
|
---|
426 | xp_iconv_reset(gNativeToUTF8);
|
---|
427 | if (gUTF8ToNative != INVALID_ICONV_T)
|
---|
428 | xp_iconv_reset(gUTF8ToNative);
|
---|
429 | if (gUnicodeToUTF8 != INVALID_ICONV_T)
|
---|
430 | xp_iconv_reset(gUnicodeToUTF8);
|
---|
431 | if (gUTF8ToUnicode != INVALID_ICONV_T)
|
---|
432 | xp_iconv_reset(gUTF8ToUnicode);
|
---|
433 | #endif
|
---|
434 | Unlock();
|
---|
435 | }
|
---|
436 |
|
---|
437 | nsresult
|
---|
438 | nsNativeCharsetConverter::NativeToUnicode(const char **input,
|
---|
439 | PRUint32 *inputLeft,
|
---|
440 | PRUnichar **output,
|
---|
441 | PRUint32 *outputLeft)
|
---|
442 | {
|
---|
443 | size_t res = 0;
|
---|
444 | size_t inLeft = (size_t) *inputLeft;
|
---|
445 | size_t outLeft = (size_t) *outputLeft * 2;
|
---|
446 |
|
---|
447 | if (gNativeToUnicode != INVALID_ICONV_T) {
|
---|
448 |
|
---|
449 | res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft);
|
---|
450 |
|
---|
451 | *inputLeft = inLeft;
|
---|
452 | *outputLeft = outLeft / 2;
|
---|
453 | if (res != (size_t) -1)
|
---|
454 | return NS_OK;
|
---|
455 |
|
---|
456 | NS_WARNING("conversion from native to utf-16 failed");
|
---|
457 |
|
---|
458 | // reset converter
|
---|
459 | xp_iconv_reset(gNativeToUnicode);
|
---|
460 | }
|
---|
461 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
462 | else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
|
---|
463 | (gUTF8ToUnicode != INVALID_ICONV_T)) {
|
---|
464 | // convert first to UTF8, then from UTF8 to UCS2
|
---|
465 | const char *in = *input;
|
---|
466 |
|
---|
467 | char ubuf[1024];
|
---|
468 |
|
---|
469 | // we assume we're always called with enough space in |output|,
|
---|
470 | // so convert many chars at a time...
|
---|
471 | while (inLeft) {
|
---|
472 | char *p = ubuf;
|
---|
473 | size_t n = sizeof(ubuf);
|
---|
474 | res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
|
---|
475 | if (res == (size_t) -1) {
|
---|
476 | NS_ERROR("conversion from native to utf-8 failed");
|
---|
477 | break;
|
---|
478 | }
|
---|
479 | NS_ASSERTION(outLeft > 0, "bad assumption");
|
---|
480 | p = ubuf;
|
---|
481 | n = sizeof(ubuf) - n;
|
---|
482 | res = xp_iconv(gUTF8ToUnicode, (const char **) &p, &n, (char **) output, &outLeft);
|
---|
483 | if (res == (size_t) -1) {
|
---|
484 | NS_ERROR("conversion from utf-8 to utf-16 failed");
|
---|
485 | break;
|
---|
486 | }
|
---|
487 | }
|
---|
488 |
|
---|
489 | (*input) += (*inputLeft - inLeft);
|
---|
490 | *inputLeft = inLeft;
|
---|
491 | *outputLeft = outLeft / 2;
|
---|
492 |
|
---|
493 | if (res != (size_t) -1)
|
---|
494 | return NS_OK;
|
---|
495 |
|
---|
496 | // reset converters
|
---|
497 | xp_iconv_reset(gNativeToUTF8);
|
---|
498 | xp_iconv_reset(gUTF8ToUnicode);
|
---|
499 | }
|
---|
500 | #endif
|
---|
501 |
|
---|
502 | // fallback: zero-pad and hope for the best
|
---|
503 | // XXX This is lame and we have to do better.
|
---|
504 | isolatin1_to_utf16(input, inputLeft, output, outputLeft);
|
---|
505 |
|
---|
506 | return NS_OK;
|
---|
507 | }
|
---|
508 |
|
---|
509 | nsresult
|
---|
510 | nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
|
---|
511 | PRUint32 *inputLeft,
|
---|
512 | char **output,
|
---|
513 | PRUint32 *outputLeft)
|
---|
514 | {
|
---|
515 | size_t res = 0;
|
---|
516 | size_t inLeft = (size_t) *inputLeft * 2;
|
---|
517 | size_t outLeft = (size_t) *outputLeft;
|
---|
518 |
|
---|
519 | if (gUnicodeToNative != INVALID_ICONV_T) {
|
---|
520 | res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft);
|
---|
521 |
|
---|
522 | if (res != (size_t) -1) {
|
---|
523 | *inputLeft = inLeft / 2;
|
---|
524 | *outputLeft = outLeft;
|
---|
525 | return NS_OK;
|
---|
526 | }
|
---|
527 |
|
---|
528 | NS_ERROR("iconv failed");
|
---|
529 |
|
---|
530 | // reset converter
|
---|
531 | xp_iconv_reset(gUnicodeToNative);
|
---|
532 | }
|
---|
533 | #if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
|
---|
534 | else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
|
---|
535 | (gUTF8ToNative != INVALID_ICONV_T)) {
|
---|
536 | const char *in = (const char *) *input;
|
---|
537 |
|
---|
538 | char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
|
---|
539 |
|
---|
540 | // convert one uchar at a time...
|
---|
541 | while (inLeft && outLeft) {
|
---|
542 | char *p = ubuf;
|
---|
543 | size_t n = sizeof(ubuf), one_uchar = sizeof(PRUnichar);
|
---|
544 | res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
|
---|
545 | if (res == (size_t) -1) {
|
---|
546 | NS_ERROR("conversion from utf-16 to utf-8 failed");
|
---|
547 | break;
|
---|
548 | }
|
---|
549 | p = ubuf;
|
---|
550 | n = sizeof(ubuf) - n;
|
---|
551 | res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft);
|
---|
552 | if (res == (size_t) -1) {
|
---|
553 | if (errno == E2BIG) {
|
---|
554 | // not enough room for last uchar... back up and return.
|
---|
555 | in -= sizeof(PRUnichar);
|
---|
556 | res = 0;
|
---|
557 | }
|
---|
558 | else
|
---|
559 | NS_ERROR("conversion from utf-8 to native failed");
|
---|
560 | break;
|
---|
561 | }
|
---|
562 | inLeft -= sizeof(PRUnichar);
|
---|
563 | }
|
---|
564 |
|
---|
565 | if (res != (size_t) -1) {
|
---|
566 | (*input) += (*inputLeft - inLeft/2);
|
---|
567 | *inputLeft = inLeft/2;
|
---|
568 | *outputLeft = outLeft;
|
---|
569 | return NS_OK;
|
---|
570 | }
|
---|
571 |
|
---|
572 | // reset converters
|
---|
573 | xp_iconv_reset(gUnicodeToUTF8);
|
---|
574 | xp_iconv_reset(gUTF8ToNative);
|
---|
575 | }
|
---|
576 | #endif
|
---|
577 |
|
---|
578 | // fallback: truncate and hope for the best
|
---|
579 | utf16_to_isolatin1(input, inputLeft, output, outputLeft);
|
---|
580 |
|
---|
581 | return NS_OK;
|
---|
582 | }
|
---|
583 |
|
---|
584 | #endif // USE_ICONV
|
---|
585 |
|
---|
586 | //-----------------------------------------------------------------------------
|
---|
587 | // conversion using mb[r]towc/wc[r]tomb
|
---|
588 | //-----------------------------------------------------------------------------
|
---|
589 | #if defined(USE_STDCONV)
|
---|
590 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
591 | #include <wchar.h> // mbrtowc, wcrtomb
|
---|
592 | #endif
|
---|
593 |
|
---|
594 | class nsNativeCharsetConverter
|
---|
595 | {
|
---|
596 | public:
|
---|
597 | nsNativeCharsetConverter();
|
---|
598 |
|
---|
599 | nsresult NativeToUnicode(const char **input , PRUint32 *inputLeft,
|
---|
600 | PRUnichar **output, PRUint32 *outputLeft);
|
---|
601 | nsresult UnicodeToNative(const PRUnichar **input , PRUint32 *inputLeft,
|
---|
602 | char **output, PRUint32 *outputLeft);
|
---|
603 |
|
---|
604 | static void GlobalInit();
|
---|
605 | static void GlobalShutdown() { }
|
---|
606 |
|
---|
607 | private:
|
---|
608 | static PRBool gWCharIsUnicode;
|
---|
609 |
|
---|
610 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
611 | mbstate_t ps;
|
---|
612 | #endif
|
---|
613 | };
|
---|
614 |
|
---|
615 | PRBool nsNativeCharsetConverter::gWCharIsUnicode = PR_FALSE;
|
---|
616 |
|
---|
617 | nsNativeCharsetConverter::nsNativeCharsetConverter()
|
---|
618 | {
|
---|
619 | #if defined(HAVE_WCRTOMB) || defined(HAVE_MBRTOWC)
|
---|
620 | memset(&ps, 0, sizeof(ps));
|
---|
621 | #endif
|
---|
622 | }
|
---|
623 |
|
---|
624 | void
|
---|
625 | nsNativeCharsetConverter::GlobalInit()
|
---|
626 | {
|
---|
627 | // verify that wchar_t for the current locale is actually unicode.
|
---|
628 | // if it is not, then we should avoid calling mbtowc/wctomb and
|
---|
629 | // just fallback on zero-pad/truncation conversion.
|
---|
630 | //
|
---|
631 | // this test cannot be done at build time because the encoding of
|
---|
632 | // wchar_t may depend on the runtime locale. sad, but true!!
|
---|
633 | //
|
---|
634 | // so, if wchar_t is unicode then converting an ASCII character
|
---|
635 | // to wchar_t should not change its numeric value. we'll just
|
---|
636 | // check what happens with the ASCII 'a' character.
|
---|
637 | //
|
---|
638 | // this test is not perfect... obviously, it could yield false
|
---|
639 | // positives, but then at least ASCII text would be converted
|
---|
640 | // properly (or maybe just the 'a' character) -- oh well :(
|
---|
641 |
|
---|
642 | char a = 'a';
|
---|
643 | unsigned int w = 0;
|
---|
644 |
|
---|
645 | #ifndef L4ENV
|
---|
646 | int res = mbtowc((wchar_t *) &w, &a, 1);
|
---|
647 |
|
---|
648 | gWCharIsUnicode = (res != -1 && w == 'a');
|
---|
649 | #else
|
---|
650 | gWCharIsUnicode = 0;
|
---|
651 | #endif
|
---|
652 |
|
---|
653 | #ifdef DEBUG
|
---|
654 | if (!gWCharIsUnicode)
|
---|
655 | NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
|
---|
656 | #endif
|
---|
657 | }
|
---|
658 |
|
---|
659 | nsresult
|
---|
660 | nsNativeCharsetConverter::NativeToUnicode(const char **input,
|
---|
661 | PRUint32 *inputLeft,
|
---|
662 | PRUnichar **output,
|
---|
663 | PRUint32 *outputLeft)
|
---|
664 | {
|
---|
665 | if (gWCharIsUnicode) {
|
---|
666 | #ifndef L4ENV
|
---|
667 | /* We don't have any wchar support built into uclibc just now */
|
---|
668 | int incr;
|
---|
669 |
|
---|
670 | // cannot use wchar_t here since it may have been redefined (e.g.,
|
---|
671 | // via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP.
|
---|
672 | unsigned int tmp = 0;
|
---|
673 | while (*inputLeft && *outputLeft) {
|
---|
674 | #ifdef HAVE_MBRTOWC
|
---|
675 | incr = (int) mbrtowc((wchar_t *) &tmp, *input, *inputLeft, &ps);
|
---|
676 | #else
|
---|
677 | // XXX is this thread-safe?
|
---|
678 | incr = (int) mbtowc((wchar_t *) &tmp, *input, *inputLeft);
|
---|
679 | #endif
|
---|
680 | if (incr < 0) {
|
---|
681 | NS_WARNING("mbtowc failed: possible charset mismatch");
|
---|
682 | // zero-pad and hope for the best
|
---|
683 | tmp = (unsigned char) **input;
|
---|
684 | incr = 1;
|
---|
685 | }
|
---|
686 | **output = (PRUnichar) tmp;
|
---|
687 | (*input) += incr;
|
---|
688 | (*inputLeft) -= incr;
|
---|
689 | (*output)++;
|
---|
690 | (*outputLeft)--;
|
---|
691 | }
|
---|
692 | #endif /* not defined L4ENV */
|
---|
693 | }
|
---|
694 | else {
|
---|
695 | // wchar_t isn't unicode, so the best we can do is treat the
|
---|
696 | // input as if it is isolatin1 :(
|
---|
697 | isolatin1_to_utf16(input, inputLeft, output, outputLeft);
|
---|
698 | }
|
---|
699 |
|
---|
700 | return NS_OK;
|
---|
701 | }
|
---|
702 |
|
---|
703 | nsresult
|
---|
704 | nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
|
---|
705 | PRUint32 *inputLeft,
|
---|
706 | char **output,
|
---|
707 | PRUint32 *outputLeft)
|
---|
708 | {
|
---|
709 | if (gWCharIsUnicode) {
|
---|
710 | #ifndef L4ENV
|
---|
711 | /* We don't have any wchar support built into uclibc just now */
|
---|
712 | int incr;
|
---|
713 |
|
---|
714 | while (*inputLeft && *outputLeft >= MB_CUR_MAX) {
|
---|
715 | #ifdef HAVE_WCRTOMB
|
---|
716 | incr = (int) wcrtomb(*output, (wchar_t) **input, &ps);
|
---|
717 | #else
|
---|
718 | // XXX is this thread-safe?
|
---|
719 | incr = (int) wctomb(*output, (wchar_t) **input);
|
---|
720 | #endif
|
---|
721 | if (incr < 0) {
|
---|
722 | NS_WARNING("mbtowc failed: possible charset mismatch");
|
---|
723 | **output = (unsigned char) **input; // truncate
|
---|
724 | incr = 1;
|
---|
725 | }
|
---|
726 | // most likely we're dead anyways if this assertion should fire
|
---|
727 | NS_ASSERTION(PRUint32(incr) <= *outputLeft, "wrote beyond end of string");
|
---|
728 | (*output) += incr;
|
---|
729 | (*outputLeft) -= incr;
|
---|
730 | (*input)++;
|
---|
731 | (*inputLeft)--;
|
---|
732 | }
|
---|
733 | #endif /* not defined L4ENV */
|
---|
734 | }
|
---|
735 | else {
|
---|
736 | // wchar_t isn't unicode, so the best we can do is treat the
|
---|
737 | // input as if it is isolatin1 :(
|
---|
738 | utf16_to_isolatin1(input, inputLeft, output, outputLeft);
|
---|
739 | }
|
---|
740 |
|
---|
741 | return NS_OK;
|
---|
742 | }
|
---|
743 |
|
---|
744 | #endif // USE_STDCONV
|
---|
745 |
|
---|
746 | //-----------------------------------------------------------------------------
|
---|
747 | // API implementation
|
---|
748 | //-----------------------------------------------------------------------------
|
---|
749 |
|
---|
750 | NS_COM nsresult
|
---|
751 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
752 | {
|
---|
753 | output.Truncate();
|
---|
754 |
|
---|
755 | PRUint32 inputLen = input.Length();
|
---|
756 |
|
---|
757 | nsACString::const_iterator iter;
|
---|
758 | input.BeginReading(iter);
|
---|
759 |
|
---|
760 | //
|
---|
761 | // OPTIMIZATION: preallocate space for largest possible result; convert
|
---|
762 | // directly into the result buffer to avoid intermediate buffer copy.
|
---|
763 | //
|
---|
764 | // this will generally result in a larger allocation, but that seems
|
---|
765 | // better than an extra buffer copy.
|
---|
766 | //
|
---|
767 | output.SetLength(inputLen);
|
---|
768 | nsAString::iterator out_iter;
|
---|
769 | output.BeginWriting(out_iter);
|
---|
770 |
|
---|
771 | PRUnichar *result = out_iter.get();
|
---|
772 | PRUint32 resultLeft = inputLen;
|
---|
773 |
|
---|
774 | const char *buf = iter.get();
|
---|
775 | PRUint32 bufLeft = inputLen;
|
---|
776 |
|
---|
777 | nsNativeCharsetConverter conv;
|
---|
778 | nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
|
---|
779 | if (NS_SUCCEEDED(rv)) {
|
---|
780 | NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
|
---|
781 | output.SetLength(inputLen - resultLeft);
|
---|
782 | }
|
---|
783 | return rv;
|
---|
784 | }
|
---|
785 |
|
---|
786 | NS_COM nsresult
|
---|
787 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
788 | {
|
---|
789 | output.Truncate();
|
---|
790 |
|
---|
791 | nsAString::const_iterator iter, end;
|
---|
792 | input.BeginReading(iter);
|
---|
793 | input.EndReading(end);
|
---|
794 |
|
---|
795 | // cannot easily avoid intermediate buffer copy.
|
---|
796 | char temp[4096];
|
---|
797 |
|
---|
798 | nsNativeCharsetConverter conv;
|
---|
799 |
|
---|
800 | const PRUnichar *buf = iter.get();
|
---|
801 | PRUint32 bufLeft = Distance(iter, end);
|
---|
802 | while (bufLeft) {
|
---|
803 | char *p = temp;
|
---|
804 | PRUint32 tempLeft = sizeof(temp);
|
---|
805 |
|
---|
806 | nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
|
---|
807 | if (NS_FAILED(rv)) return rv;
|
---|
808 |
|
---|
809 | if (tempLeft < sizeof(temp))
|
---|
810 | output.Append(temp, sizeof(temp) - tempLeft);
|
---|
811 | }
|
---|
812 | return NS_OK;
|
---|
813 | }
|
---|
814 |
|
---|
815 | void
|
---|
816 | NS_StartupNativeCharsetUtils()
|
---|
817 | {
|
---|
818 | //
|
---|
819 | // need to initialize the locale or else charset conversion will fail.
|
---|
820 | // better not delay this in case some other component alters the locale
|
---|
821 | // settings.
|
---|
822 | //
|
---|
823 | // XXX we assume that we are called early enough that we should
|
---|
824 | // always be the first to care about the locale's charset.
|
---|
825 | //
|
---|
826 | setlocale(LC_CTYPE, "");
|
---|
827 |
|
---|
828 | nsNativeCharsetConverter::GlobalInit();
|
---|
829 | }
|
---|
830 |
|
---|
831 | void
|
---|
832 | NS_ShutdownNativeCharsetUtils()
|
---|
833 | {
|
---|
834 | nsNativeCharsetConverter::GlobalShutdown();
|
---|
835 | }
|
---|
836 |
|
---|
837 | //-----------------------------------------------------------------------------
|
---|
838 | // XP_BEOS
|
---|
839 | //-----------------------------------------------------------------------------
|
---|
840 | #elif defined(XP_BEOS)
|
---|
841 |
|
---|
842 | #include "nsAString.h"
|
---|
843 | #include "nsReadableUtils.h"
|
---|
844 | #include "nsString.h"
|
---|
845 |
|
---|
846 | NS_COM nsresult
|
---|
847 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
848 | {
|
---|
849 | CopyUTF8toUTF16(input, output);
|
---|
850 | return NS_OK;
|
---|
851 | }
|
---|
852 |
|
---|
853 | NS_COM nsresult
|
---|
854 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
855 | {
|
---|
856 | CopyUTF16toUTF8(input, output);
|
---|
857 | return NS_OK;
|
---|
858 | }
|
---|
859 |
|
---|
860 | void
|
---|
861 | NS_StartupNativeCharsetUtils()
|
---|
862 | {
|
---|
863 | }
|
---|
864 |
|
---|
865 | void
|
---|
866 | NS_ShutdownNativeCharsetUtils()
|
---|
867 | {
|
---|
868 | }
|
---|
869 |
|
---|
870 | //-----------------------------------------------------------------------------
|
---|
871 | // XP_WIN
|
---|
872 | //-----------------------------------------------------------------------------
|
---|
873 | #elif defined(XP_WIN)
|
---|
874 |
|
---|
875 | #include <windows.h>
|
---|
876 | #include "nsAString.h"
|
---|
877 |
|
---|
878 | NS_COM nsresult
|
---|
879 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
880 | {
|
---|
881 | PRUint32 inputLen = input.Length();
|
---|
882 |
|
---|
883 | nsACString::const_iterator iter;
|
---|
884 | input.BeginReading(iter);
|
---|
885 |
|
---|
886 | const char *buf = iter.get();
|
---|
887 |
|
---|
888 | // determine length of result
|
---|
889 | PRUint32 resultLen = 0;
|
---|
890 | int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, NULL, 0);
|
---|
891 | if (n > 0)
|
---|
892 | resultLen += n;
|
---|
893 |
|
---|
894 | // allocate sufficient space
|
---|
895 | output.SetLength(resultLen);
|
---|
896 | if (resultLen > 0) {
|
---|
897 | nsAString::iterator out_iter;
|
---|
898 | output.BeginWriting(out_iter);
|
---|
899 |
|
---|
900 | PRUnichar *result = out_iter.get();
|
---|
901 |
|
---|
902 | ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, result, resultLen);
|
---|
903 | }
|
---|
904 | return NS_OK;
|
---|
905 | }
|
---|
906 |
|
---|
907 | NS_COM nsresult
|
---|
908 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
909 | {
|
---|
910 | PRUint32 inputLen = input.Length();
|
---|
911 |
|
---|
912 | nsAString::const_iterator iter;
|
---|
913 | input.BeginReading(iter);
|
---|
914 |
|
---|
915 | const PRUnichar *buf = iter.get();
|
---|
916 |
|
---|
917 | // determine length of result
|
---|
918 | PRUint32 resultLen = 0;
|
---|
919 |
|
---|
920 | int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, NULL, 0, NULL, NULL);
|
---|
921 | if (n > 0)
|
---|
922 | resultLen += n;
|
---|
923 |
|
---|
924 | // allocate sufficient space
|
---|
925 | output.SetLength(resultLen);
|
---|
926 | if (resultLen > 0) {
|
---|
927 | nsACString::iterator out_iter;
|
---|
928 | output.BeginWriting(out_iter);
|
---|
929 |
|
---|
930 | // default "defaultChar" is '?', which is an illegal character on windows
|
---|
931 | // file system. That will cause file uncreatable. Change it to '_'
|
---|
932 | const char defaultChar = '_';
|
---|
933 |
|
---|
934 | char *result = out_iter.get();
|
---|
935 |
|
---|
936 | ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
|
---|
937 | &defaultChar, NULL);
|
---|
938 | }
|
---|
939 | return NS_OK;
|
---|
940 | }
|
---|
941 |
|
---|
942 | void
|
---|
943 | NS_StartupNativeCharsetUtils()
|
---|
944 | {
|
---|
945 | }
|
---|
946 |
|
---|
947 | void
|
---|
948 | NS_ShutdownNativeCharsetUtils()
|
---|
949 | {
|
---|
950 | }
|
---|
951 |
|
---|
952 | //-----------------------------------------------------------------------------
|
---|
953 | // XP_OS2
|
---|
954 | //-----------------------------------------------------------------------------
|
---|
955 | #elif defined(XP_OS2)
|
---|
956 |
|
---|
957 | #define INCL_DOS
|
---|
958 | #include <os2.h>
|
---|
959 | #include <uconv.h>
|
---|
960 | #include "nsAString.h"
|
---|
961 | #include <ulserrno.h>
|
---|
962 | #include "nsNativeCharsetUtils.h"
|
---|
963 |
|
---|
964 | static UconvObject UnicodeConverter = NULL;
|
---|
965 |
|
---|
966 | NS_COM nsresult
|
---|
967 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
968 | {
|
---|
969 | PRUint32 inputLen = input.Length();
|
---|
970 |
|
---|
971 | nsACString::const_iterator iter;
|
---|
972 | input.BeginReading(iter);
|
---|
973 | const char *inputStr = iter.get();
|
---|
974 |
|
---|
975 | // determine length of result
|
---|
976 | PRUint32 resultLen = inputLen;
|
---|
977 | output.SetLength(resultLen);
|
---|
978 |
|
---|
979 | nsAString::iterator out_iter;
|
---|
980 | output.BeginWriting(out_iter);
|
---|
981 | UniChar *result = (UniChar*)out_iter.get();
|
---|
982 |
|
---|
983 | size_t cSubs = 0;
|
---|
984 | size_t resultLeft = resultLen;
|
---|
985 |
|
---|
986 | if (!UnicodeConverter)
|
---|
987 | NS_StartupNativeCharsetUtils();
|
---|
988 |
|
---|
989 | int unirc = ::UniUconvToUcs(UnicodeConverter, (void**)&inputStr, &inputLen,
|
---|
990 | &result, &resultLeft, &cSubs);
|
---|
991 |
|
---|
992 | NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
|
---|
993 |
|
---|
994 | if (unirc != ULS_SUCCESS) {
|
---|
995 | output.Truncate();
|
---|
996 | return NS_ERROR_FAILURE;
|
---|
997 | }
|
---|
998 |
|
---|
999 | // Need to update string length to reflect how many bytes were actually
|
---|
1000 | // written.
|
---|
1001 | output.Truncate(resultLen - resultLeft);
|
---|
1002 | return NS_OK;
|
---|
1003 | }
|
---|
1004 |
|
---|
1005 | NS_COM nsresult
|
---|
1006 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1007 | {
|
---|
1008 | size_t inputLen = input.Length();
|
---|
1009 |
|
---|
1010 | nsAString::const_iterator iter;
|
---|
1011 | input.BeginReading(iter);
|
---|
1012 | UniChar* inputStr = (UniChar*) NS_CONST_CAST(PRUnichar*, iter.get());
|
---|
1013 |
|
---|
1014 | // maximum length of unicode string of length x converted to native
|
---|
1015 | // codepage is x*2
|
---|
1016 | size_t resultLen = inputLen * 2;
|
---|
1017 | output.SetLength(resultLen);
|
---|
1018 |
|
---|
1019 | nsACString::iterator out_iter;
|
---|
1020 | output.BeginWriting(out_iter);
|
---|
1021 | char *result = out_iter.get();
|
---|
1022 |
|
---|
1023 | size_t cSubs = 0;
|
---|
1024 | size_t resultLeft = resultLen;
|
---|
1025 |
|
---|
1026 | if (!UnicodeConverter)
|
---|
1027 | NS_StartupNativeCharsetUtils();
|
---|
1028 |
|
---|
1029 | int unirc = ::UniUconvFromUcs(UnicodeConverter, &inputStr, &inputLen,
|
---|
1030 | (void**)&result, &resultLeft, &cSubs);
|
---|
1031 |
|
---|
1032 | NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
|
---|
1033 |
|
---|
1034 | if (unirc != ULS_SUCCESS) {
|
---|
1035 | output.Truncate();
|
---|
1036 | return NS_ERROR_FAILURE;
|
---|
1037 | }
|
---|
1038 |
|
---|
1039 | // Need to update string length to reflect how many bytes were actually
|
---|
1040 | // written.
|
---|
1041 | output.Truncate(resultLen - resultLeft);
|
---|
1042 | return NS_OK;
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | void
|
---|
1046 | NS_StartupNativeCharsetUtils()
|
---|
1047 | {
|
---|
1048 | ULONG ulLength;
|
---|
1049 | ULONG ulCodePage;
|
---|
1050 | DosQueryCp(sizeof(ULONG), &ulCodePage, &ulLength);
|
---|
1051 |
|
---|
1052 | UniChar codepage[20];
|
---|
1053 | int unirc = ::UniMapCpToUcsCp(ulCodePage, codepage, 20);
|
---|
1054 | if (unirc == ULS_SUCCESS) {
|
---|
1055 | unirc = ::UniCreateUconvObject(codepage, &UnicodeConverter);
|
---|
1056 | if (unirc == ULS_SUCCESS) {
|
---|
1057 | uconv_attribute_t attr;
|
---|
1058 | ::UniQueryUconvObject(UnicodeConverter, &attr, sizeof(uconv_attribute_t),
|
---|
1059 | NULL, NULL, NULL);
|
---|
1060 | attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
|
---|
1061 | attr.subchar_len=1;
|
---|
1062 | attr.subchar[0]='_';
|
---|
1063 | ::UniSetUconvObject(UnicodeConverter, &attr);
|
---|
1064 | }
|
---|
1065 | }
|
---|
1066 | }
|
---|
1067 |
|
---|
1068 | void
|
---|
1069 | NS_ShutdownNativeCharsetUtils()
|
---|
1070 | {
|
---|
1071 | ::UniFreeUconvObject(UnicodeConverter);
|
---|
1072 | }
|
---|
1073 |
|
---|
1074 | //-----------------------------------------------------------------------------
|
---|
1075 | // XP_MAC
|
---|
1076 | //-----------------------------------------------------------------------------
|
---|
1077 | #elif defined(XP_MAC)
|
---|
1078 |
|
---|
1079 | #include <UnicodeConverter.h>
|
---|
1080 | #include <TextCommon.h>
|
---|
1081 | #include <Script.h>
|
---|
1082 | #include <MacErrors.h>
|
---|
1083 | #include "nsAString.h"
|
---|
1084 |
|
---|
1085 | class nsFSStringConversionMac {
|
---|
1086 | public:
|
---|
1087 | static nsresult UCSToFS(const nsAString& aIn, nsACString& aOut);
|
---|
1088 | static nsresult FSToUCS(const nsACString& ain, nsAString& aOut);
|
---|
1089 |
|
---|
1090 | static void CleanUp();
|
---|
1091 |
|
---|
1092 | private:
|
---|
1093 | static TextEncoding GetSystemEncoding();
|
---|
1094 | static nsresult PrepareEncoder();
|
---|
1095 | static nsresult PrepareDecoder();
|
---|
1096 |
|
---|
1097 | static UnicodeToTextInfo sEncoderInfo;
|
---|
1098 | static TextToUnicodeInfo sDecoderInfo;
|
---|
1099 | };
|
---|
1100 |
|
---|
1101 | UnicodeToTextInfo nsFSStringConversionMac::sEncoderInfo = nsnull;
|
---|
1102 | TextToUnicodeInfo nsFSStringConversionMac::sDecoderInfo = nsnull;
|
---|
1103 |
|
---|
1104 | nsresult nsFSStringConversionMac::UCSToFS(const nsAString& aIn, nsACString& aOut)
|
---|
1105 | {
|
---|
1106 | nsresult rv = PrepareEncoder();
|
---|
1107 | if (NS_FAILED(rv)) return rv;
|
---|
1108 |
|
---|
1109 | OSStatus err = noErr;
|
---|
1110 | char stackBuffer[512];
|
---|
1111 |
|
---|
1112 | aOut.Truncate();
|
---|
1113 |
|
---|
1114 | // for each chunk of |aIn|...
|
---|
1115 | nsReadingIterator<PRUnichar> iter;
|
---|
1116 | aIn.BeginReading(iter);
|
---|
1117 |
|
---|
1118 | PRUint32 fragmentLength = PRUint32(iter.size_forward());
|
---|
1119 | UInt32 bytesLeft = fragmentLength * sizeof(UniChar);
|
---|
1120 |
|
---|
1121 | do {
|
---|
1122 | UInt32 bytesRead = 0, bytesWritten = 0;
|
---|
1123 | err = ::ConvertFromUnicodeToText(sEncoderInfo,
|
---|
1124 | bytesLeft,
|
---|
1125 | (const UniChar*)iter.get(),
|
---|
1126 | kUnicodeUseFallbacksMask | kUnicodeLooseMappingsMask,
|
---|
1127 | 0, nsnull, nsnull, nsnull,
|
---|
1128 | sizeof(stackBuffer),
|
---|
1129 | &bytesRead,
|
---|
1130 | &bytesWritten,
|
---|
1131 | stackBuffer);
|
---|
1132 | if (err == kTECUsedFallbacksStatus)
|
---|
1133 | err = noErr;
|
---|
1134 | else if (err == kTECOutputBufferFullStatus) {
|
---|
1135 | bytesLeft -= bytesRead;
|
---|
1136 | iter.advance(bytesRead / sizeof(UniChar));
|
---|
1137 | }
|
---|
1138 | aOut.Append(stackBuffer, bytesWritten);
|
---|
1139 | }
|
---|
1140 | while (err == kTECOutputBufferFullStatus);
|
---|
1141 |
|
---|
1142 | return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
|
---|
1143 | }
|
---|
1144 |
|
---|
1145 | nsresult nsFSStringConversionMac::FSToUCS(const nsACString& aIn, nsAString& aOut)
|
---|
1146 | {
|
---|
1147 | nsresult rv = PrepareDecoder();
|
---|
1148 | if (NS_FAILED(rv)) return rv;
|
---|
1149 |
|
---|
1150 | OSStatus err = noErr;
|
---|
1151 | UniChar stackBuffer[512];
|
---|
1152 |
|
---|
1153 | aOut.Truncate(0);
|
---|
1154 |
|
---|
1155 | // for each chunk of |aIn|...
|
---|
1156 | nsReadingIterator<char> iter;
|
---|
1157 | aIn.BeginReading(iter);
|
---|
1158 |
|
---|
1159 | PRUint32 fragmentLength = PRUint32(iter.size_forward());
|
---|
1160 | UInt32 bytesLeft = fragmentLength;
|
---|
1161 |
|
---|
1162 | do {
|
---|
1163 | UInt32 bytesRead = 0, bytesWritten = 0;
|
---|
1164 | err = ::ConvertFromTextToUnicode(sDecoderInfo,
|
---|
1165 | bytesLeft,
|
---|
1166 | iter.get(),
|
---|
1167 | kUnicodeUseFallbacksMask | kUnicodeLooseMappingsMask,
|
---|
1168 | 0, nsnull, nsnull, nsnull,
|
---|
1169 | sizeof(stackBuffer),
|
---|
1170 | &bytesRead,
|
---|
1171 | &bytesWritten,
|
---|
1172 | stackBuffer);
|
---|
1173 | if (err == kTECUsedFallbacksStatus)
|
---|
1174 | err = noErr;
|
---|
1175 | else if (err == kTECOutputBufferFullStatus) {
|
---|
1176 | bytesLeft -= bytesRead;
|
---|
1177 | iter.advance(bytesRead);
|
---|
1178 | }
|
---|
1179 | aOut.Append((PRUnichar *)stackBuffer, bytesWritten / sizeof(PRUnichar));
|
---|
1180 | }
|
---|
1181 | while (err == kTECOutputBufferFullStatus);
|
---|
1182 |
|
---|
1183 | return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
|
---|
1184 | }
|
---|
1185 |
|
---|
1186 | void nsFSStringConversionMac::CleanUp()
|
---|
1187 | {
|
---|
1188 | if (sDecoderInfo) {
|
---|
1189 | ::DisposeTextToUnicodeInfo(&sDecoderInfo);
|
---|
1190 | sDecoderInfo = nsnull;
|
---|
1191 | }
|
---|
1192 | if (sEncoderInfo) {
|
---|
1193 | ::DisposeUnicodeToTextInfo(&sEncoderInfo);
|
---|
1194 | sEncoderInfo = nsnull;
|
---|
1195 | }
|
---|
1196 | }
|
---|
1197 |
|
---|
1198 | TextEncoding nsFSStringConversionMac::GetSystemEncoding()
|
---|
1199 | {
|
---|
1200 | OSStatus err;
|
---|
1201 | TextEncoding theEncoding;
|
---|
1202 |
|
---|
1203 | err = ::UpgradeScriptInfoToTextEncoding(smSystemScript, kTextLanguageDontCare,
|
---|
1204 | kTextRegionDontCare, NULL, &theEncoding);
|
---|
1205 |
|
---|
1206 | if (err != noErr)
|
---|
1207 | theEncoding = kTextEncodingMacRoman;
|
---|
1208 |
|
---|
1209 | return theEncoding;
|
---|
1210 | }
|
---|
1211 |
|
---|
1212 | nsresult nsFSStringConversionMac::PrepareEncoder()
|
---|
1213 | {
|
---|
1214 | nsresult rv = NS_OK;
|
---|
1215 | if (!sEncoderInfo) {
|
---|
1216 | OSStatus err;
|
---|
1217 | err = ::CreateUnicodeToTextInfoByEncoding(GetSystemEncoding(), &sEncoderInfo);
|
---|
1218 | if (err)
|
---|
1219 | rv = NS_ERROR_FAILURE;
|
---|
1220 | }
|
---|
1221 | return rv;
|
---|
1222 | }
|
---|
1223 |
|
---|
1224 | nsresult nsFSStringConversionMac::PrepareDecoder()
|
---|
1225 | {
|
---|
1226 | nsresult rv = NS_OK;
|
---|
1227 | if (!sDecoderInfo) {
|
---|
1228 | OSStatus err;
|
---|
1229 | err = ::CreateTextToUnicodeInfoByEncoding(GetSystemEncoding(), &sDecoderInfo);
|
---|
1230 | if (err)
|
---|
1231 | rv = NS_ERROR_FAILURE;
|
---|
1232 | }
|
---|
1233 | return rv;
|
---|
1234 | }
|
---|
1235 |
|
---|
1236 | NS_COM nsresult
|
---|
1237 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
1238 | {
|
---|
1239 | return nsFSStringConversionMac::FSToUCS(input, output);
|
---|
1240 | }
|
---|
1241 |
|
---|
1242 | NS_COM nsresult
|
---|
1243 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1244 | {
|
---|
1245 | return nsFSStringConversionMac::UCSToFS(input, output);
|
---|
1246 | }
|
---|
1247 |
|
---|
1248 | void
|
---|
1249 | NS_StartupNativeCharsetUtils()
|
---|
1250 | {
|
---|
1251 | }
|
---|
1252 |
|
---|
1253 | void
|
---|
1254 | NS_ShutdownNativeCharsetUtils()
|
---|
1255 | {
|
---|
1256 | nsFSStringConversionMac::CleanUp();
|
---|
1257 | }
|
---|
1258 |
|
---|
1259 | //-----------------------------------------------------------------------------
|
---|
1260 | // default : truncate/zeropad
|
---|
1261 | //-----------------------------------------------------------------------------
|
---|
1262 | #else
|
---|
1263 |
|
---|
1264 | #include "nsReadableUtils.h"
|
---|
1265 |
|
---|
1266 | NS_COM nsresult
|
---|
1267 | NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
|
---|
1268 | {
|
---|
1269 | CopyASCIItoUCS2(input, output);
|
---|
1270 | return NS_OK;
|
---|
1271 | }
|
---|
1272 |
|
---|
1273 | NS_COM nsresult
|
---|
1274 | NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
|
---|
1275 | {
|
---|
1276 | CopyUCS2toASCII(input, output);
|
---|
1277 | return NS_OK;
|
---|
1278 | }
|
---|
1279 |
|
---|
1280 | void
|
---|
1281 | NS_StartupNativeCharsetUtils()
|
---|
1282 | {
|
---|
1283 | }
|
---|
1284 |
|
---|
1285 | void
|
---|
1286 | NS_ShutdownNativeCharsetUtils()
|
---|
1287 | {
|
---|
1288 | }
|
---|
1289 |
|
---|
1290 | #endif
|
---|