nsNativeCharsetUtils.cpp@ 60030

最後變更在這個檔案從60030是 46043,由 vboxsync 提交於 12 年前
src/libs/xpcom18a4: remove L4.
屬性 svn:eol-style 設為 `native` 屬性 svn:keywords 設為 `Author Date Id Revision`
檔案大小: 37.3 KB

行
1	/* *** BEGIN LICENSE BLOCK ***
2	* Version: MPL 1.1/GPL 2.0/LGPL 2.1
3	*
4	* The contents of this file are subject to the Mozilla Public License Version
5	* 1.1 (the "License"); you may not use this file except in compliance with
6	* the License. You may obtain a copy of the License at
7	* http://www.mozilla.org/MPL/
8	*
9	* Software distributed under the License is distributed on an "AS IS" basis,
10	* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11	* for the specific language governing rights and limitations under the
12	* License.
13	*
14	* The Original Code is Mozilla.
15	*
16	* The Initial Developer of the Original Code is
17	* Netscape Communications Corporation.
18	* Portions created by the Initial Developer are Copyright (C) 2002
19	* the Initial Developer. All Rights Reserved.
20	*
21	* Contributor(s):
22	* Darin Fisher <[email protected]>
23	* Brian Stell <[email protected]>
24	* Frank Tang <[email protected]>
25	* Brendan Eich <[email protected]>
26	* Sergei Dolgov <[email protected]>
27	*
28	* Alternatively, the contents of this file may be used under the terms of
29	* either the GNU General Public License Version 2 or later (the "GPL"), or
30	* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
31	* in which case the provisions of the GPL or the LGPL are applicable instead
32	* of those above. If you wish to allow use of your version of this file only
33	* under the terms of either the GPL or the LGPL, and not to allow others to
34	* use your version of this file under the terms of the MPL, indicate your
35	* decision by deleting the provisions above and replace them with the notice
36	* and other provisions required by the GPL or the LGPL. If you do not delete
37	* the provisions above, a recipient may use your version of this file under
38	* the terms of any one of the MPL, the GPL or the LGPL.
39	*
40	* *** END LICENSE BLOCK *** */
41
42	#include "xpcom-private.h"
43
44	//-----------------------------------------------------------------------------
45	// XP_UNIX
46	//-----------------------------------------------------------------------------
47	#if defined(XP_UNIX)
48
49	#include <stdlib.h> // mbtowc, wctomb
50	#include <locale.h> // setlocale
51	#include "nscore.h"
52	#include "prlock.h"
53	#include "nsAString.h"
54	#include "nsReadableUtils.h"
55
56	//
57	// choose a conversion library. we used to use mbrtowc/wcrtomb under Linux,
58	// but that doesn't work for non-BMP characters whether we use '-fshort-wchar'
59	// or not (see bug 206811 and
60	// news://news.mozilla.org:119/[email protected]). we now use
61	// iconv for all platforms where nltypes.h and nllanginfo.h are present
62	// along with iconv.
63	//
64	#if defined(HAVE_ICONV) && defined(HAVE_NL_TYPES_H) && defined(HAVE_LANGINFO_CODESET)
65	#define USE_ICONV 1
66	#else
67	#define USE_STDCONV 1
68	#endif
69
70	static void
71	isolatin1_to_utf16(const char *input, PRUint32 inputLeft, PRUnichar *output, PRUint32 outputLeft)
72	{
73	while (inputLeft && outputLeft) {
74	output = (unsigned char) input;
75	(*input)++;
76	(*inputLeft)--;
77	(*output)++;
78	(*outputLeft)--;
79	}
80	}
81
82	static void
83	utf16_to_isolatin1(const PRUnichar *input, PRUint32 inputLeft, char *output, PRUint32 outputLeft)
84	{
85	while (inputLeft && outputLeft) {
86	output = (unsigned char) input;
87	(*input)++;
88	(*inputLeft)--;
89	(*output)++;
90	(*outputLeft)--;
91	}
92	}
93
94	//-----------------------------------------------------------------------------
95	// conversion using iconv
96	//-----------------------------------------------------------------------------
97	#if defined(USE_ICONV)
98	#include <nl_types.h> // CODESET
99	#include <langinfo.h> // nl_langinfo
100	#include <iconv.h> // iconv_open, iconv, iconv_close
101	#include <errno.h>
102
103	#if defined(HAVE_ICONV_WITH_CONST_INPUT)
104	#define ICONV_INPUT(x) (x)
105	#else
106	#define ICONV_INPUT(x) ((char **)x)
107	#endif
108
109	// solaris definitely needs this, but we'll enable it by default
110	// just in case... but we know for sure that iconv(3) in glibc
111	// doesn't need this.
112	#if !defined(__GLIBC__)
113	#define ENABLE_UTF8_FALLBACK_SUPPORT
114	#endif
115
116	#define INVALID_ICONV_T ((iconv_t) -1)
117
118	static inline size_t
119	xp_iconv(iconv_t converter,
120	const char **input,
121	size_t *inputLeft,
122	char **output,
123	size_t *outputLeft)
124	{
125	size_t res, outputAvail = outputLeft ? *outputLeft : 0;
126	res = iconv(converter, ICONV_INPUT(input), inputLeft, output, outputLeft);
127	if (res == (size_t) -1) {
128	// on some platforms (e.g., linux) iconv will fail with
129	// E2BIG if it cannot convert _all_ of its input. it'll
130	// still adjust all of the in/out params correctly, so we
131	// can ignore this error. the assumption is that we will
132	// be called again to complete the conversion.
133	if ((errno == E2BIG) && (*outputLeft < outputAvail))
134	res = 0;
135	}
136	return res;
137	}
138
139	static inline void
140	xp_iconv_reset(iconv_t converter)
141	{
142	// NOTE: the man pages on Solaris claim that you can pass NULL
143	// for all parameter to reset the converter, but beware the
144	// evil Solaris crash if you go down this route >:-)
145
146	const char *zero_char_in_ptr = NULL;
147	char *zero_char_out_ptr = NULL;
148	size_t zero_size_in = 0,
149	zero_size_out = 0;
150
151	xp_iconv(converter, &zero_char_in_ptr,
152	&zero_size_in,
153	&zero_char_out_ptr,
154	&zero_size_out);
155	}
156
157	static inline iconv_t
158	xp_iconv_open(const char to_list, const char from_list)
159	{
160	iconv_t res;
161	const char **from_name;
162	const char **to_name;
163
164	// try all possible combinations to locate a converter.
165	to_name = to_list;
166	while (*to_name) {
167	if (**to_name) {
168	from_name = from_list;
169	while (*from_name) {
170	if (**from_name) {
171	res = iconv_open(to_name, from_name);
172	if (res != INVALID_ICONV_T)
173	return res;
174	}
175	from_name++;
176	}
177	}
178	to_name++;
179	}
180
181	return INVALID_ICONV_T;
182	}
183
184	/*
185	* PRUnichar[] is NOT a UCS-2 array BUT a UTF-16 string. Therefore, we
186	* have to use UTF-16 with iconv(3) on platforms where it's supported.
187	* However, the way UTF-16 and UCS-2 are interpreted varies across platforms
188	* and implementations of iconv(3). On Tru64, it also depends on the environment
189	* variable. To avoid the trouble arising from byte-swapping
190	* (bug 208809), we have to try UTF-16LE/BE and UCS-2LE/BE before falling
191	* back to UTF-16 and UCS-2 and variants. We assume that UTF-16 and UCS-2
192	* on systems without UTF-16LE/BE and UCS-2LE/BE have the native endianness,
193	* which isn't the case of glibc 2.1.x, for which we use 'UNICODELITTLE'
194	* and 'UNICODEBIG'. It's also not true of Tru64 V4 when the environment
195	* variable ICONV_BYTEORDER is set to 'big-endian', about which not much
196	* can be done other than adding a note in the release notes. (bug 206811)
197	*/
198	static const char *UTF_16_NAMES[] = {
199	#if defined(IS_LITTLE_ENDIAN)
200	"UTF-16LE",
201	#if defined(__GLIBC__)
202	"UNICODELITTLE",
203	#endif
204	"UCS-2LE",
205	#else
206	"UTF-16BE",
207	#if defined(__GLIBC__)
208	"UNICODEBIG",
209	#endif
210	"UCS-2BE",
211	#endif
212	"UTF-16",
213	"UCS-2",
214	"UCS2",
215	"UCS_2",
216	"ucs-2",
217	"ucs2",
218	"ucs_2",
219	NULL
220	};
221
222	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
223	static const char *UTF_8_NAMES[] = {
224	"UTF-8",
225	"UTF8",
226	"UTF_8",
227	"utf-8",
228	"utf8",
229	"utf_8",
230	NULL
231	};
232	#endif
233
234	static const char *ISO_8859_1_NAMES[] = {
235	"ISO-8859-1",
236	#if !defined(__GLIBC__)
237	"ISO8859-1",
238	"ISO88591",
239	"ISO_8859_1",
240	"ISO8859_1",
241	"iso-8859-1",
242	"iso8859-1",
243	"iso88591",
244	"iso_8859_1",
245	"iso8859_1",
246	#endif
247	NULL
248	};
249
250	class nsNativeCharsetConverter
251	{
252	public:
253	nsNativeCharsetConverter();
254	~nsNativeCharsetConverter();
255
256	nsresult NativeToUnicode(const char *input , PRUint32 inputLeft,
257	PRUnichar *output, PRUint32 outputLeft);
258	nsresult UnicodeToNative(const PRUnichar *input , PRUint32 inputLeft,
259	char *output, PRUint32 outputLeft);
260
261	static void GlobalInit();
262	static void GlobalShutdown();
263
264	private:
265	static iconv_t gNativeToUnicode;
266	static iconv_t gUnicodeToNative;
267	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
268	static iconv_t gNativeToUTF8;
269	static iconv_t gUTF8ToNative;
270	static iconv_t gUnicodeToUTF8;
271	static iconv_t gUTF8ToUnicode;
272	#endif
273	static PRLock *gLock;
274	static PRBool gInitialized;
275
276	static void LazyInit();
277
278	static void Lock() { if (gLock) PR_Lock(gLock); }
279	static void Unlock() { if (gLock) PR_Unlock(gLock); }
280	};
281
282	iconv_t nsNativeCharsetConverter::gNativeToUnicode = INVALID_ICONV_T;
283	iconv_t nsNativeCharsetConverter::gUnicodeToNative = INVALID_ICONV_T;
284	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
285	iconv_t nsNativeCharsetConverter::gNativeToUTF8 = INVALID_ICONV_T;
286	iconv_t nsNativeCharsetConverter::gUTF8ToNative = INVALID_ICONV_T;
287	iconv_t nsNativeCharsetConverter::gUnicodeToUTF8 = INVALID_ICONV_T;
288	iconv_t nsNativeCharsetConverter::gUTF8ToUnicode = INVALID_ICONV_T;
289	#endif
290	PRLock *nsNativeCharsetConverter::gLock = nsnull;
291	PRBool nsNativeCharsetConverter::gInitialized = PR_FALSE;
292
293	void
294	nsNativeCharsetConverter::LazyInit()
295	{
296	const char *blank_list[] = { "", NULL };
297	const char **native_charset_list = blank_list;
298	const char *native_charset = nl_langinfo(CODESET);
299	if (native_charset == nsnull) {
300	NS_ERROR("native charset is unknown");
301	// fallback to ISO-8859-1
302	native_charset_list = ISO_8859_1_NAMES;
303	}
304	else
305	native_charset_list[0] = native_charset;
306
307	gNativeToUnicode = xp_iconv_open(UTF_16_NAMES, native_charset_list);
308	gUnicodeToNative = xp_iconv_open(native_charset_list, UTF_16_NAMES);
309
310	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
311	if (gNativeToUnicode == INVALID_ICONV_T) {
312	gNativeToUTF8 = xp_iconv_open(UTF_8_NAMES, native_charset_list);
313	gUTF8ToUnicode = xp_iconv_open(UTF_16_NAMES, UTF_8_NAMES);
314	NS_ASSERTION(gNativeToUTF8 != INVALID_ICONV_T, "no native to utf-8 converter");
315	NS_ASSERTION(gUTF8ToUnicode != INVALID_ICONV_T, "no utf-8 to utf-16 converter");
316	}
317	if (gUnicodeToNative == INVALID_ICONV_T) {
318	gUnicodeToUTF8 = xp_iconv_open(UTF_8_NAMES, UTF_16_NAMES);
319	gUTF8ToNative = xp_iconv_open(native_charset_list, UTF_8_NAMES);
320	NS_ASSERTION(gUnicodeToUTF8 != INVALID_ICONV_T, "no utf-16 to utf-8 converter");
321	NS_ASSERTION(gUTF8ToNative != INVALID_ICONV_T, "no utf-8 to native converter");
322	}
323	#else
324	NS_ASSERTION(gNativeToUnicode != INVALID_ICONV_T, "no native to utf-16 converter");
325	NS_ASSERTION(gUnicodeToNative != INVALID_ICONV_T, "no utf-16 to native converter");
326	#endif
327
328	/*
329	* On Solaris 8 (and newer?), the iconv modules converting to UCS-2
330	* prepend a byte order mark unicode character (BOM, u+FEFF) during
331	* the first use of the iconv converter. The same is the case of
332	* glibc 2.2.9x and Tru64 V5 (see bug 208809) when 'UTF-16' is used.
333	* However, we use 'UTF-16LE/BE' in both cases, instead so that we
334	* should be safe. But just in case...
335	*
336	* This dummy conversion gets rid of the BOMs and fixes bug 153562.
337	*/
338	char dummy_input[1] = { ' ' };
339	char dummy_output[4];
340
341	if (gNativeToUnicode != INVALID_ICONV_T) {
342	const char *input = dummy_input;
343	size_t input_left = sizeof(dummy_input);
344	char *output = dummy_output;
345	size_t output_left = sizeof(dummy_output);
346
347	xp_iconv(gNativeToUnicode, &input, &input_left, &output, &output_left);
348	}
349	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
350	if (gUTF8ToUnicode != INVALID_ICONV_T) {
351	const char *input = dummy_input;
352	size_t input_left = sizeof(dummy_input);
353	char *output = dummy_output;
354	size_t output_left = sizeof(dummy_output);
355
356	xp_iconv(gUTF8ToUnicode, &input, &input_left, &output, &output_left);
357	}
358	#endif
359
360	gInitialized = PR_TRUE;
361	}
362
363	void
364	nsNativeCharsetConverter::GlobalInit()
365	{
366	gLock = PR_NewLock();
367	NS_ASSERTION(gLock, "lock creation failed");
368	}
369
370	void
371	nsNativeCharsetConverter::GlobalShutdown()
372	{
373	if (gLock) {
374	PR_DestroyLock(gLock);
375	gLock = nsnull;
376	}
377
378	if (gNativeToUnicode != INVALID_ICONV_T) {
379	iconv_close(gNativeToUnicode);
380	gNativeToUnicode = INVALID_ICONV_T;
381	}
382
383	if (gUnicodeToNative != INVALID_ICONV_T) {
384	iconv_close(gUnicodeToNative);
385	gUnicodeToNative = INVALID_ICONV_T;
386	}
387
388	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
389	if (gNativeToUTF8 != INVALID_ICONV_T) {
390	iconv_close(gNativeToUTF8);
391	gNativeToUTF8 = INVALID_ICONV_T;
392	}
393	if (gUTF8ToNative != INVALID_ICONV_T) {
394	iconv_close(gUTF8ToNative);
395	gUTF8ToNative = INVALID_ICONV_T;
396	}
397	if (gUnicodeToUTF8 != INVALID_ICONV_T) {
398	iconv_close(gUnicodeToUTF8);
399	gUnicodeToUTF8 = INVALID_ICONV_T;
400	}
401	if (gUTF8ToUnicode != INVALID_ICONV_T) {
402	iconv_close(gUTF8ToUnicode);
403	gUTF8ToUnicode = INVALID_ICONV_T;
404	}
405	#endif
406
407	gInitialized = PR_FALSE;
408	}
409
410	nsNativeCharsetConverter::nsNativeCharsetConverter()
411	{
412	Lock();
413	if (!gInitialized)
414	LazyInit();
415	}
416
417	nsNativeCharsetConverter::~nsNativeCharsetConverter()
418	{
419	// reset converters for next time
420	if (gNativeToUnicode != INVALID_ICONV_T)
421	xp_iconv_reset(gNativeToUnicode);
422	if (gUnicodeToNative != INVALID_ICONV_T)
423	xp_iconv_reset(gUnicodeToNative);
424	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
425	if (gNativeToUTF8 != INVALID_ICONV_T)
426	xp_iconv_reset(gNativeToUTF8);
427	if (gUTF8ToNative != INVALID_ICONV_T)
428	xp_iconv_reset(gUTF8ToNative);
429	if (gUnicodeToUTF8 != INVALID_ICONV_T)
430	xp_iconv_reset(gUnicodeToUTF8);
431	if (gUTF8ToUnicode != INVALID_ICONV_T)
432	xp_iconv_reset(gUTF8ToUnicode);
433	#endif
434	Unlock();
435	}
436
437	nsresult
438	nsNativeCharsetConverter::NativeToUnicode(const char **input,
439	PRUint32 *inputLeft,
440	PRUnichar **output,
441	PRUint32 *outputLeft)
442	{
443	size_t res = 0;
444	size_t inLeft = (size_t) *inputLeft;
445	size_t outLeft = (size_t) outputLeft 2;
446
447	if (gNativeToUnicode != INVALID_ICONV_T) {
448
449	res = xp_iconv(gNativeToUnicode, input, &inLeft, (char **) output, &outLeft);
450
451	*inputLeft = inLeft;
452	*outputLeft = outLeft / 2;
453	if (res != (size_t) -1)
454	return NS_OK;
455
456	NS_WARNING("conversion from native to utf-16 failed");
457
458	// reset converter
459	xp_iconv_reset(gNativeToUnicode);
460	}
461	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
462	else if ((gNativeToUTF8 != INVALID_ICONV_T) &&
463	(gUTF8ToUnicode != INVALID_ICONV_T)) {
464	// convert first to UTF8, then from UTF8 to UCS2
465	const char in = input;
466
467	char ubuf[1024];
468
469	// we assume we're always called with enough space in \|output\|,
470	// so convert many chars at a time...
471	while (inLeft) {
472	char *p = ubuf;
473	size_t n = sizeof(ubuf);
474	res = xp_iconv(gNativeToUTF8, &in, &inLeft, &p, &n);
475	if (res == (size_t) -1) {
476	NS_ERROR("conversion from native to utf-8 failed");
477	break;
478	}
479	NS_ASSERTION(outLeft > 0, "bad assumption");
480	p = ubuf;
481	n = sizeof(ubuf) - n;
482	res = xp_iconv(gUTF8ToUnicode, (const char ) &p, &n, (char ) output, &outLeft);
483	if (res == (size_t) -1) {
484	NS_ERROR("conversion from utf-8 to utf-16 failed");
485	break;
486	}
487	}
488
489	(input) += (inputLeft - inLeft);
490	*inputLeft = inLeft;
491	*outputLeft = outLeft / 2;
492
493	if (res != (size_t) -1)
494	return NS_OK;
495
496	// reset converters
497	xp_iconv_reset(gNativeToUTF8);
498	xp_iconv_reset(gUTF8ToUnicode);
499	}
500	#endif
501
502	// fallback: zero-pad and hope for the best
503	// XXX This is lame and we have to do better.
504	isolatin1_to_utf16(input, inputLeft, output, outputLeft);
505
506	return NS_OK;
507	}
508
509	nsresult
510	nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
511	PRUint32 *inputLeft,
512	char **output,
513	PRUint32 *outputLeft)
514	{
515	size_t res = 0;
516	size_t inLeft = (size_t) inputLeft 2;
517	size_t outLeft = (size_t) *outputLeft;
518
519	if (gUnicodeToNative != INVALID_ICONV_T) {
520	res = xp_iconv(gUnicodeToNative, (const char **) input, &inLeft, output, &outLeft);
521
522	if (res != (size_t) -1) {
523	*inputLeft = inLeft / 2;
524	*outputLeft = outLeft;
525	return NS_OK;
526	}
527
528	NS_ERROR("iconv failed");
529
530	// reset converter
531	xp_iconv_reset(gUnicodeToNative);
532	}
533	#if defined(ENABLE_UTF8_FALLBACK_SUPPORT)
534	else if ((gUnicodeToUTF8 != INVALID_ICONV_T) &&
535	(gUTF8ToNative != INVALID_ICONV_T)) {
536	const char in = (const char ) *input;
537
538	char ubuf[6]; // max utf-8 char length (really only needs to be 4 bytes)
539
540	// convert one uchar at a time...
541	while (inLeft && outLeft) {
542	char *p = ubuf;
543	size_t n = sizeof(ubuf), one_uchar = sizeof(PRUnichar);
544	res = xp_iconv(gUnicodeToUTF8, &in, &one_uchar, &p, &n);
545	if (res == (size_t) -1) {
546	NS_ERROR("conversion from utf-16 to utf-8 failed");
547	break;
548	}
549	p = ubuf;
550	n = sizeof(ubuf) - n;
551	res = xp_iconv(gUTF8ToNative, (const char **) &p, &n, output, &outLeft);
552	if (res == (size_t) -1) {
553	if (errno == E2BIG) {
554	// not enough room for last uchar... back up and return.
555	in -= sizeof(PRUnichar);
556	res = 0;
557	}
558	else
559	NS_ERROR("conversion from utf-8 to native failed");
560	break;
561	}
562	inLeft -= sizeof(PRUnichar);
563	}
564
565	if (res != (size_t) -1) {
566	(input) += (inputLeft - inLeft/2);
567	*inputLeft = inLeft/2;
568	*outputLeft = outLeft;
569	return NS_OK;
570	}
571
572	// reset converters
573	xp_iconv_reset(gUnicodeToUTF8);
574	xp_iconv_reset(gUTF8ToNative);
575	}
576	#endif
577
578	// fallback: truncate and hope for the best
579	utf16_to_isolatin1(input, inputLeft, output, outputLeft);
580
581	return NS_OK;
582	}
583
584	#endif // USE_ICONV
585
586	//-----------------------------------------------------------------------------
587	// conversion using mb[r]towc/wc[r]tomb
588	//-----------------------------------------------------------------------------
589	#if defined(USE_STDCONV)
590	#if defined(HAVE_WCRTOMB) \|\| defined(HAVE_MBRTOWC)
591	#include <wchar.h> // mbrtowc, wcrtomb
592	#endif
593
594	class nsNativeCharsetConverter
595	{
596	public:
597	nsNativeCharsetConverter();
598
599	nsresult NativeToUnicode(const char *input , PRUint32 inputLeft,
600	PRUnichar *output, PRUint32 outputLeft);
601	nsresult UnicodeToNative(const PRUnichar *input , PRUint32 inputLeft,
602	char *output, PRUint32 outputLeft);
603
604	static void GlobalInit();
605	static void GlobalShutdown() { }
606
607	private:
608	static PRBool gWCharIsUnicode;
609
610	#if defined(HAVE_WCRTOMB) \|\| defined(HAVE_MBRTOWC)
611	mbstate_t ps;
612	#endif
613	};
614
615	PRBool nsNativeCharsetConverter::gWCharIsUnicode = PR_FALSE;
616
617	nsNativeCharsetConverter::nsNativeCharsetConverter()
618	{
619	#if defined(HAVE_WCRTOMB) \|\| defined(HAVE_MBRTOWC)
620	memset(&ps, 0, sizeof(ps));
621	#endif
622	}
623
624	void
625	nsNativeCharsetConverter::GlobalInit()
626	{
627	// verify that wchar_t for the current locale is actually unicode.
628	// if it is not, then we should avoid calling mbtowc/wctomb and
629	// just fallback on zero-pad/truncation conversion.
630	//
631	// this test cannot be done at build time because the encoding of
632	// wchar_t may depend on the runtime locale. sad, but true!!
633	//
634	// so, if wchar_t is unicode then converting an ASCII character
635	// to wchar_t should not change its numeric value. we'll just
636	// check what happens with the ASCII 'a' character.
637	//
638	// this test is not perfect... obviously, it could yield false
639	// positives, but then at least ASCII text would be converted
640	// properly (or maybe just the 'a' character) -- oh well :(
641
642	char a = 'a';
643	unsigned int w = 0;
644
645	int res = mbtowc((wchar_t *) &w, &a, 1);
646
647	gWCharIsUnicode = (res != -1 && w == 'a');
648
649	#ifdef DEBUG
650	if (!gWCharIsUnicode)
651	NS_WARNING("wchar_t is not unicode (unicode conversion will be lossy)");
652	#endif
653	}
654
655	nsresult
656	nsNativeCharsetConverter::NativeToUnicode(const char **input,
657	PRUint32 *inputLeft,
658	PRUnichar **output,
659	PRUint32 *outputLeft)
660	{
661	if (gWCharIsUnicode) {
662	int incr;
663
664	// cannot use wchar_t here since it may have been redefined (e.g.,
665	// via -fshort-wchar). hopefully, sizeof(tmp) is sufficient XP.
666	unsigned int tmp = 0;
667	while (inputLeft && outputLeft) {
668	#ifdef HAVE_MBRTOWC
669	incr = (int) mbrtowc((wchar_t ) &tmp, input, *inputLeft, &ps);
670	#else
671	// XXX is this thread-safe?
672	incr = (int) mbtowc((wchar_t ) &tmp, input, *inputLeft);
673	#endif
674	if (incr < 0) {
675	NS_WARNING("mbtowc failed: possible charset mismatch");
676	// zero-pad and hope for the best
677	tmp = (unsigned char) **input;
678	incr = 1;
679	}
680	**output = (PRUnichar) tmp;
681	(*input) += incr;
682	(*inputLeft) -= incr;
683	(*output)++;
684	(*outputLeft)--;
685	}
686	}
687	else {
688	// wchar_t isn't unicode, so the best we can do is treat the
689	// input as if it is isolatin1 :(
690	isolatin1_to_utf16(input, inputLeft, output, outputLeft);
691	}
692
693	return NS_OK;
694	}
695
696	nsresult
697	nsNativeCharsetConverter::UnicodeToNative(const PRUnichar **input,
698	PRUint32 *inputLeft,
699	char **output,
700	PRUint32 *outputLeft)
701	{
702	if (gWCharIsUnicode) {
703	int incr;
704
705	while (inputLeft && outputLeft >= MB_CUR_MAX) {
706	#ifdef HAVE_WCRTOMB
707	incr = (int) wcrtomb(output, (wchar_t) *input, &ps);
708	#else
709	// XXX is this thread-safe?
710	incr = (int) wctomb(output, (wchar_t) *input);
711	#endif
712	if (incr < 0) {
713	NS_WARNING("mbtowc failed: possible charset mismatch");
714	output = (unsigned char) input; // truncate
715	incr = 1;
716	}
717	// most likely we're dead anyways if this assertion should fire
718	NS_ASSERTION(PRUint32(incr) <= *outputLeft, "wrote beyond end of string");
719	(*output) += incr;
720	(*outputLeft) -= incr;
721	(*input)++;
722	(*inputLeft)--;
723	}
724	}
725	else {
726	// wchar_t isn't unicode, so the best we can do is treat the
727	// input as if it is isolatin1 :(
728	utf16_to_isolatin1(input, inputLeft, output, outputLeft);
729	}
730
731	return NS_OK;
732	}
733
734	#endif // USE_STDCONV
735
736	//-----------------------------------------------------------------------------
737	// API implementation
738	//-----------------------------------------------------------------------------
739
740	NS_COM nsresult
741	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
742	{
743	output.Truncate();
744
745	PRUint32 inputLen = input.Length();
746
747	nsACString::const_iterator iter;
748	input.BeginReading(iter);
749
750	//
751	// OPTIMIZATION: preallocate space for largest possible result; convert
752	// directly into the result buffer to avoid intermediate buffer copy.
753	//
754	// this will generally result in a larger allocation, but that seems
755	// better than an extra buffer copy.
756	//
757	output.SetLength(inputLen);
758	nsAString::iterator out_iter;
759	output.BeginWriting(out_iter);
760
761	PRUnichar *result = out_iter.get();
762	PRUint32 resultLeft = inputLen;
763
764	const char *buf = iter.get();
765	PRUint32 bufLeft = inputLen;
766
767	nsNativeCharsetConverter conv;
768	nsresult rv = conv.NativeToUnicode(&buf, &bufLeft, &result, &resultLeft);
769	if (NS_SUCCEEDED(rv)) {
770	NS_ASSERTION(bufLeft == 0, "did not consume entire input buffer");
771	output.SetLength(inputLen - resultLeft);
772	}
773	return rv;
774	}
775
776	NS_COM nsresult
777	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
778	{
779	output.Truncate();
780
781	nsAString::const_iterator iter, end;
782	input.BeginReading(iter);
783	input.EndReading(end);
784
785	// cannot easily avoid intermediate buffer copy.
786	char temp[4096];
787
788	nsNativeCharsetConverter conv;
789
790	const PRUnichar *buf = iter.get();
791	PRUint32 bufLeft = Distance(iter, end);
792	while (bufLeft) {
793	char *p = temp;
794	PRUint32 tempLeft = sizeof(temp);
795
796	nsresult rv = conv.UnicodeToNative(&buf, &bufLeft, &p, &tempLeft);
797	if (NS_FAILED(rv)) return rv;
798
799	if (tempLeft < sizeof(temp))
800	output.Append(temp, sizeof(temp) - tempLeft);
801	}
802	return NS_OK;
803	}
804
805	void
806	NS_StartupNativeCharsetUtils()
807	{
808	//
809	// need to initialize the locale or else charset conversion will fail.
810	// better not delay this in case some other component alters the locale
811	// settings.
812	//
813	// XXX we assume that we are called early enough that we should
814	// always be the first to care about the locale's charset.
815	//
816	setlocale(LC_CTYPE, "");
817
818	nsNativeCharsetConverter::GlobalInit();
819	}
820
821	void
822	NS_ShutdownNativeCharsetUtils()
823	{
824	nsNativeCharsetConverter::GlobalShutdown();
825	}
826
827	//-----------------------------------------------------------------------------
828	// XP_BEOS
829	//-----------------------------------------------------------------------------
830	#elif defined(XP_BEOS)
831
832	#include "nsAString.h"
833	#include "nsReadableUtils.h"
834	#include "nsString.h"
835
836	NS_COM nsresult
837	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
838	{
839	CopyUTF8toUTF16(input, output);
840	return NS_OK;
841	}
842
843	NS_COM nsresult
844	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
845	{
846	CopyUTF16toUTF8(input, output);
847	return NS_OK;
848	}
849
850	void
851	NS_StartupNativeCharsetUtils()
852	{
853	}
854
855	void
856	NS_ShutdownNativeCharsetUtils()
857	{
858	}
859
860	//-----------------------------------------------------------------------------
861	// XP_WIN
862	//-----------------------------------------------------------------------------
863	#elif defined(XP_WIN)
864
865	#include <windows.h>
866	#include "nsAString.h"
867
868	NS_COM nsresult
869	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
870	{
871	PRUint32 inputLen = input.Length();
872
873	nsACString::const_iterator iter;
874	input.BeginReading(iter);
875
876	const char *buf = iter.get();
877
878	// determine length of result
879	PRUint32 resultLen = 0;
880	int n = ::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, NULL, 0);
881	if (n > 0)
882	resultLen += n;
883
884	// allocate sufficient space
885	output.SetLength(resultLen);
886	if (resultLen > 0) {
887	nsAString::iterator out_iter;
888	output.BeginWriting(out_iter);
889
890	PRUnichar *result = out_iter.get();
891
892	::MultiByteToWideChar(CP_ACP, 0, buf, inputLen, result, resultLen);
893	}
894	return NS_OK;
895	}
896
897	NS_COM nsresult
898	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
899	{
900	PRUint32 inputLen = input.Length();
901
902	nsAString::const_iterator iter;
903	input.BeginReading(iter);
904
905	const PRUnichar *buf = iter.get();
906
907	// determine length of result
908	PRUint32 resultLen = 0;
909
910	int n = ::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, NULL, 0, NULL, NULL);
911	if (n > 0)
912	resultLen += n;
913
914	// allocate sufficient space
915	output.SetLength(resultLen);
916	if (resultLen > 0) {
917	nsACString::iterator out_iter;
918	output.BeginWriting(out_iter);
919
920	// default "defaultChar" is '?', which is an illegal character on windows
921	// file system. That will cause file uncreatable. Change it to '_'
922	const char defaultChar = '_';
923
924	char *result = out_iter.get();
925
926	::WideCharToMultiByte(CP_ACP, 0, buf, inputLen, result, resultLen,
927	&defaultChar, NULL);
928	}
929	return NS_OK;
930	}
931
932	void
933	NS_StartupNativeCharsetUtils()
934	{
935	}
936
937	void
938	NS_ShutdownNativeCharsetUtils()
939	{
940	}
941
942	//-----------------------------------------------------------------------------
943	// XP_OS2
944	//-----------------------------------------------------------------------------
945	#elif defined(XP_OS2)
946
947	#define INCL_DOS
948	#include <os2.h>
949	#include <uconv.h>
950	#include "nsAString.h"
951	#include <ulserrno.h>
952	#include "nsNativeCharsetUtils.h"
953
954	static UconvObject UnicodeConverter = NULL;
955
956	NS_COM nsresult
957	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
958	{
959	PRUint32 inputLen = input.Length();
960
961	nsACString::const_iterator iter;
962	input.BeginReading(iter);
963	const char *inputStr = iter.get();
964
965	// determine length of result
966	PRUint32 resultLen = inputLen;
967	output.SetLength(resultLen);
968
969	nsAString::iterator out_iter;
970	output.BeginWriting(out_iter);
971	UniChar result = (UniChar)out_iter.get();
972
973	size_t cSubs = 0;
974	size_t resultLeft = resultLen;
975
976	if (!UnicodeConverter)
977	NS_StartupNativeCharsetUtils();
978
979	int unirc = ::UniUconvToUcs(UnicodeConverter, (void**)&inputStr, &inputLen,
980	&result, &resultLeft, &cSubs);
981
982	NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
983
984	if (unirc != ULS_SUCCESS) {
985	output.Truncate();
986	return NS_ERROR_FAILURE;
987	}
988
989	// Need to update string length to reflect how many bytes were actually
990	// written.
991	output.Truncate(resultLen - resultLeft);
992	return NS_OK;
993	}
994
995	NS_COM nsresult
996	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
997	{
998	size_t inputLen = input.Length();
999
1000	nsAString::const_iterator iter;
1001	input.BeginReading(iter);
1002	UniChar* inputStr = (UniChar) NS_CONST_CAST(PRUnichar, iter.get());
1003
1004	// maximum length of unicode string of length x converted to native
1005	// codepage is x*2
1006	size_t resultLen = inputLen * 2;
1007	output.SetLength(resultLen);
1008
1009	nsACString::iterator out_iter;
1010	output.BeginWriting(out_iter);
1011	char *result = out_iter.get();
1012
1013	size_t cSubs = 0;
1014	size_t resultLeft = resultLen;
1015
1016	if (!UnicodeConverter)
1017	NS_StartupNativeCharsetUtils();
1018
1019	int unirc = ::UniUconvFromUcs(UnicodeConverter, &inputStr, &inputLen,
1020	(void**)&result, &resultLeft, &cSubs);
1021
1022	NS_ASSERTION(unirc != UCONV_E2BIG, "Path too big");
1023
1024	if (unirc != ULS_SUCCESS) {
1025	output.Truncate();
1026	return NS_ERROR_FAILURE;
1027	}
1028
1029	// Need to update string length to reflect how many bytes were actually
1030	// written.
1031	output.Truncate(resultLen - resultLeft);
1032	return NS_OK;
1033	}
1034
1035	void
1036	NS_StartupNativeCharsetUtils()
1037	{
1038	ULONG ulLength;
1039	ULONG ulCodePage;
1040	DosQueryCp(sizeof(ULONG), &ulCodePage, &ulLength);
1041
1042	UniChar codepage[20];
1043	int unirc = ::UniMapCpToUcsCp(ulCodePage, codepage, 20);
1044	if (unirc == ULS_SUCCESS) {
1045	unirc = ::UniCreateUconvObject(codepage, &UnicodeConverter);
1046	if (unirc == ULS_SUCCESS) {
1047	uconv_attribute_t attr;
1048	::UniQueryUconvObject(UnicodeConverter, &attr, sizeof(uconv_attribute_t),
1049	NULL, NULL, NULL);
1050	attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
1051	attr.subchar_len=1;
1052	attr.subchar[0]='_';
1053	::UniSetUconvObject(UnicodeConverter, &attr);
1054	}
1055	}
1056	}
1057
1058	void
1059	NS_ShutdownNativeCharsetUtils()
1060	{
1061	::UniFreeUconvObject(UnicodeConverter);
1062	}
1063
1064	//-----------------------------------------------------------------------------
1065	// XP_MAC
1066	//-----------------------------------------------------------------------------
1067	#elif defined(XP_MAC)
1068
1069	#include <UnicodeConverter.h>
1070	#include <TextCommon.h>
1071	#include <Script.h>
1072	#include <MacErrors.h>
1073	#include "nsAString.h"
1074
1075	class nsFSStringConversionMac {
1076	public:
1077	static nsresult UCSToFS(const nsAString& aIn, nsACString& aOut);
1078	static nsresult FSToUCS(const nsACString& ain, nsAString& aOut);
1079
1080	static void CleanUp();
1081
1082	private:
1083	static TextEncoding GetSystemEncoding();
1084	static nsresult PrepareEncoder();
1085	static nsresult PrepareDecoder();
1086
1087	static UnicodeToTextInfo sEncoderInfo;
1088	static TextToUnicodeInfo sDecoderInfo;
1089	};
1090
1091	UnicodeToTextInfo nsFSStringConversionMac::sEncoderInfo = nsnull;
1092	TextToUnicodeInfo nsFSStringConversionMac::sDecoderInfo = nsnull;
1093
1094	nsresult nsFSStringConversionMac::UCSToFS(const nsAString& aIn, nsACString& aOut)
1095	{
1096	nsresult rv = PrepareEncoder();
1097	if (NS_FAILED(rv)) return rv;
1098
1099	OSStatus err = noErr;
1100	char stackBuffer[512];
1101
1102	aOut.Truncate();
1103
1104	// for each chunk of \|aIn\|...
1105	nsReadingIterator<PRUnichar> iter;
1106	aIn.BeginReading(iter);
1107
1108	PRUint32 fragmentLength = PRUint32(iter.size_forward());
1109	UInt32 bytesLeft = fragmentLength * sizeof(UniChar);
1110
1111	do {
1112	UInt32 bytesRead = 0, bytesWritten = 0;
1113	err = ::ConvertFromUnicodeToText(sEncoderInfo,
1114	bytesLeft,
1115	(const UniChar*)iter.get(),
1116	kUnicodeUseFallbacksMask \| kUnicodeLooseMappingsMask,
1117	0, nsnull, nsnull, nsnull,
1118	sizeof(stackBuffer),
1119	&bytesRead,
1120	&bytesWritten,
1121	stackBuffer);
1122	if (err == kTECUsedFallbacksStatus)
1123	err = noErr;
1124	else if (err == kTECOutputBufferFullStatus) {
1125	bytesLeft -= bytesRead;
1126	iter.advance(bytesRead / sizeof(UniChar));
1127	}
1128	aOut.Append(stackBuffer, bytesWritten);
1129	}
1130	while (err == kTECOutputBufferFullStatus);
1131
1132	return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
1133	}
1134
1135	nsresult nsFSStringConversionMac::FSToUCS(const nsACString& aIn, nsAString& aOut)
1136	{
1137	nsresult rv = PrepareDecoder();
1138	if (NS_FAILED(rv)) return rv;
1139
1140	OSStatus err = noErr;
1141	UniChar stackBuffer[512];
1142
1143	aOut.Truncate(0);
1144
1145	// for each chunk of \|aIn\|...
1146	nsReadingIterator<char> iter;
1147	aIn.BeginReading(iter);
1148
1149	PRUint32 fragmentLength = PRUint32(iter.size_forward());
1150	UInt32 bytesLeft = fragmentLength;
1151
1152	do {
1153	UInt32 bytesRead = 0, bytesWritten = 0;
1154	err = ::ConvertFromTextToUnicode(sDecoderInfo,
1155	bytesLeft,
1156	iter.get(),
1157	kUnicodeUseFallbacksMask \| kUnicodeLooseMappingsMask,
1158	0, nsnull, nsnull, nsnull,
1159	sizeof(stackBuffer),
1160	&bytesRead,
1161	&bytesWritten,
1162	stackBuffer);
1163	if (err == kTECUsedFallbacksStatus)
1164	err = noErr;
1165	else if (err == kTECOutputBufferFullStatus) {
1166	bytesLeft -= bytesRead;
1167	iter.advance(bytesRead);
1168	}
1169	aOut.Append((PRUnichar *)stackBuffer, bytesWritten / sizeof(PRUnichar));
1170	}
1171	while (err == kTECOutputBufferFullStatus);
1172
1173	return (err == noErr) ? NS_OK : NS_ERROR_FAILURE;
1174	}
1175
1176	void nsFSStringConversionMac::CleanUp()
1177	{
1178	if (sDecoderInfo) {
1179	::DisposeTextToUnicodeInfo(&sDecoderInfo);
1180	sDecoderInfo = nsnull;
1181	}
1182	if (sEncoderInfo) {
1183	::DisposeUnicodeToTextInfo(&sEncoderInfo);
1184	sEncoderInfo = nsnull;
1185	}
1186	}
1187
1188	TextEncoding nsFSStringConversionMac::GetSystemEncoding()
1189	{
1190	OSStatus err;
1191	TextEncoding theEncoding;
1192
1193	err = ::UpgradeScriptInfoToTextEncoding(smSystemScript, kTextLanguageDontCare,
1194	kTextRegionDontCare, NULL, &theEncoding);
1195
1196	if (err != noErr)
1197	theEncoding = kTextEncodingMacRoman;
1198
1199	return theEncoding;
1200	}
1201
1202	nsresult nsFSStringConversionMac::PrepareEncoder()
1203	{
1204	nsresult rv = NS_OK;
1205	if (!sEncoderInfo) {
1206	OSStatus err;
1207	err = ::CreateUnicodeToTextInfoByEncoding(GetSystemEncoding(), &sEncoderInfo);
1208	if (err)
1209	rv = NS_ERROR_FAILURE;
1210	}
1211	return rv;
1212	}
1213
1214	nsresult nsFSStringConversionMac::PrepareDecoder()
1215	{
1216	nsresult rv = NS_OK;
1217	if (!sDecoderInfo) {
1218	OSStatus err;
1219	err = ::CreateTextToUnicodeInfoByEncoding(GetSystemEncoding(), &sDecoderInfo);
1220	if (err)
1221	rv = NS_ERROR_FAILURE;
1222	}
1223	return rv;
1224	}
1225
1226	NS_COM nsresult
1227	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
1228	{
1229	return nsFSStringConversionMac::FSToUCS(input, output);
1230	}
1231
1232	NS_COM nsresult
1233	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
1234	{
1235	return nsFSStringConversionMac::UCSToFS(input, output);
1236	}
1237
1238	void
1239	NS_StartupNativeCharsetUtils()
1240	{
1241	}
1242
1243	void
1244	NS_ShutdownNativeCharsetUtils()
1245	{
1246	nsFSStringConversionMac::CleanUp();
1247	}
1248
1249	//-----------------------------------------------------------------------------
1250	// default : truncate/zeropad
1251	//-----------------------------------------------------------------------------
1252	#else
1253
1254	#include "nsReadableUtils.h"
1255
1256	NS_COM nsresult
1257	NS_CopyNativeToUnicode(const nsACString &input, nsAString &output)
1258	{
1259	CopyASCIItoUCS2(input, output);
1260	return NS_OK;
1261	}
1262
1263	NS_COM nsresult
1264	NS_CopyUnicodeToNative(const nsAString &input, nsACString &output)
1265	{
1266	CopyUCS2toASCII(input, output);
1267	return NS_OK;
1268	}
1269
1270	void
1271	NS_StartupNativeCharsetUtils()
1272	{
1273	}
1274
1275	void
1276	NS_ShutdownNativeCharsetUtils()
1277	{
1278	}
1279
1280	#endif

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/xpcom18a4/xpcom/io/nsNativeCharsetUtils.cpp@ 60030

以其他格式下載: