encoding.c@ 103285

最後變更在這個檔案從103285是 95312,由 vboxsync 提交於 2 年前
libs/{curl,libxml2}: OSE export fixes, bugref:8515
屬性 svn:eol-style 設為 `native`
檔案大小: 142.6 KB

行
1	/*
2	* encoding.c : implements the encoding conversion functions needed for XML
3	*
4	* Related specs:
5	* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6	* rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7	* [ISO-10646] UTF-8 and UTF-16 in Annexes
8	* [ISO-8859-1] ISO Latin-1 characters codes.
9	* [UNICODE] The Unicode Consortium, "The Unicode Standard --
10	* Worldwide Character Encoding -- Version 1.0", Addison-
11	* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12	* described in Unicode Technical Report #4.
13	* [US-ASCII] Coded Character Set--7-bit American Standard Code for
14	* Information Interchange, ANSI X3.4-1986.
15	*
16	* See Copyright for the status of this software.
17	*
18	* [email protected]
19	*
20	* Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
21	*/
22
23	#define IN_LIBXML
24	#include "libxml.h"
25
26	#include <string.h>
27	#include <limits.h>
28
29	#ifdef HAVE_CTYPE_H
30	#include <ctype.h>
31	#endif
32	#ifdef HAVE_STDLIB_H
33	#include <stdlib.h>
34	#endif
35	#ifdef LIBXML_ICONV_ENABLED
36	#ifdef HAVE_ERRNO_H
37	#include <errno.h>
38	#endif
39	#endif
40	#include <libxml/encoding.h>
41	#include <libxml/xmlmemory.h>
42	#ifdef LIBXML_HTML_ENABLED
43	#include <libxml/HTMLparser.h>
44	#endif
45	#include <libxml/globals.h>
46	#include <libxml/xmlerror.h>
47
48	#include "buf.h"
49	#include "enc.h"
50
51	static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52	static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53
54	typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55	typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56	struct _xmlCharEncodingAlias {
57	const char *name;
58	const char *alias;
59	};
60
61	static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62	static int xmlCharEncodingAliasesNb = 0;
63	static int xmlCharEncodingAliasesMax = 0;
64
65	#if defined(LIBXML_ICONV_ENABLED) \|\| defined(LIBXML_ICU_ENABLED)
66	#if 0
67	#define DEBUG_ENCODING /* Define this to get encoding traces */
68	#endif
69	#else
70	#ifdef LIBXML_ISO8859X_ENABLED
71	static void xmlRegisterCharEncodingHandlersISO8859x (void);
72	#endif
73	#endif
74
75	static int xmlLittleEndian = 1;
76
77	/**
78	* xmlEncodingErrMemory:
79	* @extra: extra information
80	*
81	* Handle an out of memory condition
82	*/
83	static void
84	xmlEncodingErrMemory(const char *extra)
85	{
86	__xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87	}
88
89	/**
90	* xmlErrEncoding:
91	* @error: the error number
92	* @msg: the error message
93	*
94	* n encoding error
95	*/
96	static void LIBXML_ATTR_FORMAT(2,0)
97	xmlEncodingErr(xmlParserErrors error, const char msg, const char val)
98	{
99	__xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100	XML_FROM_I18N, error, XML_ERR_FATAL,
101	NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102	}
103
104	#ifdef LIBXML_ICU_ENABLED
105	static uconv_t*
106	openIcuConverter(const char* name, int toUnicode)
107	{
108	UErrorCode status = U_ZERO_ERROR;
109	uconv_t conv = (uconv_t ) xmlMalloc(sizeof(uconv_t));
110	if (conv == NULL)
111	return NULL;
112
113	conv->pivot_source = conv->pivot_buf;
114	conv->pivot_target = conv->pivot_buf;
115
116	conv->uconv = ucnv_open(name, &status);
117	if (U_FAILURE(status))
118	goto error;
119
120	status = U_ZERO_ERROR;
121	if (toUnicode) {
122	ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123	NULL, NULL, NULL, &status);
124	}
125	else {
126	ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127	NULL, NULL, NULL, &status);
128	}
129	if (U_FAILURE(status))
130	goto error;
131
132	status = U_ZERO_ERROR;
133	conv->utf8 = ucnv_open("UTF-8", &status);
134	if (U_SUCCESS(status))
135	return conv;
136
137	error:
138	if (conv->uconv)
139	ucnv_close(conv->uconv);
140	xmlFree(conv);
141	return NULL;
142	}
143
144	static void
145	closeIcuConverter(uconv_t *conv)
146	{
147	if (conv != NULL) {
148	ucnv_close(conv->uconv);
149	ucnv_close(conv->utf8);
150	xmlFree(conv);
151	}
152	}
153	#endif /* LIBXML_ICU_ENABLED */
154
155	/************************************************************************
156	* *
157	* Conversions To/From UTF8 encoding *
158	* *
159	************************************************************************/
160
161	/**
162	* asciiToUTF8:
163	* @out: a pointer to an array of bytes to store the result
164	* @outlen: the length of @out
165	* @in: a pointer to an array of ASCII chars
166	* @inlen: the length of @in
167	*
168	* Take a block of ASCII chars in and try to convert it to an UTF-8
169	* block of chars out.
170	* Returns 0 if success, or -1 otherwise
171	* The value of @inlen after return is the number of octets consumed
172	* if the return value is positive, else unpredictable.
173	* The value of @outlen after return is the number of octets produced.
174	*/
175	static int
176	asciiToUTF8(unsigned char* out, int *outlen,
177	const unsigned char* in, int *inlen) {
178	unsigned char* outstart = out;
179	const unsigned char* base = in;
180	const unsigned char* processed = in;
181	unsigned char* outend = out + *outlen;
182	const unsigned char* inend;
183	unsigned int c;
184
185	inend = in + (*inlen);
186	while ((in < inend) && (out - outstart + 5 < *outlen)) {
187	c= *in++;
188
189	if (out >= outend)
190	break;
191	if (c < 0x80) {
192	*out++ = c;
193	} else {
194	*outlen = out - outstart;
195	*inlen = processed - base;
196	return(-1);
197	}
198
199	processed = (const unsigned char*) in;
200	}
201	*outlen = out - outstart;
202	*inlen = processed - base;
203	return(*outlen);
204	}
205
206	#ifdef LIBXML_OUTPUT_ENABLED
207	/**
208	* UTF8Toascii:
209	* @out: a pointer to an array of bytes to store the result
210	* @outlen: the length of @out
211	* @in: a pointer to an array of UTF-8 chars
212	* @inlen: the length of @in
213	*
214	* Take a block of UTF-8 chars in and try to convert it to an ASCII
215	* block of chars out.
216	*
217	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218	* The value of @inlen after return is the number of octets consumed
219	* if the return value is positive, else unpredictable.
220	* The value of @outlen after return is the number of octets produced.
221	*/
222	static int
223	UTF8Toascii(unsigned char* out, int *outlen,
224	const unsigned char* in, int *inlen) {
225	const unsigned char* processed = in;
226	const unsigned char* outend;
227	const unsigned char* outstart = out;
228	const unsigned char* instart = in;
229	const unsigned char* inend;
230	unsigned int c, d;
231	int trailing;
232
233	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-1);
234	if (in == NULL) {
235	/*
236	* initialization nothing to do
237	*/
238	*outlen = 0;
239	*inlen = 0;
240	return(0);
241	}
242	inend = in + (*inlen);
243	outend = out + (*outlen);
244	while (in < inend) {
245	d = *in++;
246	if (d < 0x80) { c= d; trailing= 0; }
247	else if (d < 0xC0) {
248	/* trailing byte in leading position */
249	*outlen = out - outstart;
250	*inlen = processed - instart;
251	return(-2);
252	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
253	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
254	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
255	else {
256	/* no chance for this in Ascii */
257	*outlen = out - outstart;
258	*inlen = processed - instart;
259	return(-2);
260	}
261
262	if (inend - in < trailing) {
263	break;
264	}
265
266	for ( ; trailing; trailing--) {
267	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
268	break;
269	c <<= 6;
270	c \|= d & 0x3F;
271	}
272
273	/* assertion: c is a single UTF-4 value */
274	if (c < 0x80) {
275	if (out >= outend)
276	break;
277	*out++ = c;
278	} else {
279	/* no chance for this in Ascii */
280	*outlen = out - outstart;
281	*inlen = processed - instart;
282	return(-2);
283	}
284	processed = in;
285	}
286	*outlen = out - outstart;
287	*inlen = processed - instart;
288	return(*outlen);
289	}
290	#endif /* LIBXML_OUTPUT_ENABLED */
291
292	/**
293	* isolat1ToUTF8:
294	* @out: a pointer to an array of bytes to store the result
295	* @outlen: the length of @out
296	* @in: a pointer to an array of ISO Latin 1 chars
297	* @inlen: the length of @in
298	*
299	* Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300	* block of chars out.
301	* Returns the number of bytes written if success, or -1 otherwise
302	* The value of @inlen after return is the number of octets consumed
303	* if the return value is positive, else unpredictable.
304	* The value of @outlen after return is the number of octets produced.
305	*/
306	int
307	isolat1ToUTF8(unsigned char* out, int *outlen,
308	const unsigned char* in, int *inlen) {
309	unsigned char* outstart = out;
310	const unsigned char* base = in;
311	unsigned char* outend;
312	const unsigned char* inend;
313	const unsigned char* instop;
314
315	if ((out == NULL) \|\| (in == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL))
316	return(-1);
317
318	outend = out + *outlen;
319	inend = in + (*inlen);
320	instop = inend;
321
322	while ((in < inend) && (out < outend - 1)) {
323	if (*in >= 0x80) {
324	out++ = (((in) >> 6) & 0x1F) \| 0xC0;
325	out++ = ((in) & 0x3F) \| 0x80;
326	++in;
327	}
328	if ((instop - in) > (outend - out)) instop = in + (outend - out);
329	while ((in < instop) && (*in < 0x80)) {
330	out++ = in++;
331	}
332	}
333	if ((in < inend) && (out < outend) && (*in < 0x80)) {
334	out++ = in++;
335	}
336	*outlen = out - outstart;
337	*inlen = in - base;
338	return(*outlen);
339	}
340
341	/**
342	* UTF8ToUTF8:
343	* @out: a pointer to an array of bytes to store the result
344	* @outlen: the length of @out
345	* @inb: a pointer to an array of UTF-8 chars
346	* @inlenb: the length of @in in UTF-8 chars
347	*
348	* No op copy operation for UTF8 handling.
349	*
350	* Returns the number of bytes written, or -1 if lack of space.
351	* The value of *inlen after return is the number of octets consumed
352	* if the return value is positive, else unpredictable.
353	*/
354	static int
355	UTF8ToUTF8(unsigned char* out, int *outlen,
356	const unsigned char* inb, int *inlenb)
357	{
358	int len;
359
360	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlenb == NULL))
361	return(-1);
362	if (inb == NULL) {
363	/* inb == NULL means output is initialized. */
364	*outlen = 0;
365	*inlenb = 0;
366	return(0);
367	}
368	if (outlen > inlenb) {
369	len = *inlenb;
370	} else {
371	len = *outlen;
372	}
373	if (len < 0)
374	return(-1);
375
376	/*
377	* FIXME: Conversion functions must assure valid UTF-8, so we have
378	* to check for UTF-8 validity. Preferably, this converter shouldn't
379	* be used at all.
380	*/
381	memcpy(out, inb, len);
382
383	*outlen = len;
384	*inlenb = len;
385	return(*outlen);
386	}
387
388
389	#ifdef LIBXML_OUTPUT_ENABLED
390	/**
391	* UTF8Toisolat1:
392	* @out: a pointer to an array of bytes to store the result
393	* @outlen: the length of @out
394	* @in: a pointer to an array of UTF-8 chars
395	* @inlen: the length of @in
396	*
397	* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398	* block of chars out.
399	*
400	* Returns the number of bytes written if success, -2 if the transcoding fails,
401	or -1 otherwise
402	* The value of @inlen after return is the number of octets consumed
403	* if the return value is positive, else unpredictable.
404	* The value of @outlen after return is the number of octets produced.
405	*/
406	int
407	UTF8Toisolat1(unsigned char* out, int *outlen,
408	const unsigned char* in, int *inlen) {
409	const unsigned char* processed = in;
410	const unsigned char* outend;
411	const unsigned char* outstart = out;
412	const unsigned char* instart = in;
413	const unsigned char* inend;
414	unsigned int c, d;
415	int trailing;
416
417	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-1);
418	if (in == NULL) {
419	/*
420	* initialization nothing to do
421	*/
422	*outlen = 0;
423	*inlen = 0;
424	return(0);
425	}
426	inend = in + (*inlen);
427	outend = out + (*outlen);
428	while (in < inend) {
429	d = *in++;
430	if (d < 0x80) { c= d; trailing= 0; }
431	else if (d < 0xC0) {
432	/* trailing byte in leading position */
433	*outlen = out - outstart;
434	*inlen = processed - instart;
435	return(-2);
436	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
437	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
438	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
439	else {
440	/* no chance for this in IsoLat1 */
441	*outlen = out - outstart;
442	*inlen = processed - instart;
443	return(-2);
444	}
445
446	if (inend - in < trailing) {
447	break;
448	}
449
450	for ( ; trailing; trailing--) {
451	if (in >= inend)
452	break;
453	if (((d= *in++) & 0xC0) != 0x80) {
454	*outlen = out - outstart;
455	*inlen = processed - instart;
456	return(-2);
457	}
458	c <<= 6;
459	c \|= d & 0x3F;
460	}
461
462	/* assertion: c is a single UTF-4 value */
463	if (c <= 0xFF) {
464	if (out >= outend)
465	break;
466	*out++ = c;
467	} else {
468	/* no chance for this in IsoLat1 */
469	*outlen = out - outstart;
470	*inlen = processed - instart;
471	return(-2);
472	}
473	processed = in;
474	}
475	*outlen = out - outstart;
476	*inlen = processed - instart;
477	return(*outlen);
478	}
479	#endif /* LIBXML_OUTPUT_ENABLED */
480
481	/**
482	* UTF16LEToUTF8:
483	* @out: a pointer to an array of bytes to store the result
484	* @outlen: the length of @out
485	* @inb: a pointer to an array of UTF-16LE passwd as a byte array
486	* @inlenb: the length of @in in UTF-16LE chars
487	*
488	* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489	* block of chars out. This function assumes the endian property
490	* is the same between the native type of this machine and the
491	* inputed one.
492	*
493	* Returns the number of bytes written, or -1 if lack of space, or -2
494	* if the transcoding fails (if *in is not a valid utf16 string)
495	* The value of *inlen after return is the number of octets consumed
496	* if the return value is positive, else unpredictable.
497	*/
498	static int
499	UTF16LEToUTF8(unsigned char* out, int *outlen,
500	const unsigned char* inb, int *inlenb)
501	{
502	unsigned char* outstart = out;
503	const unsigned char* processed = inb;
504	unsigned char* outend;
505	unsigned short* in = (unsigned short*) inb;
506	unsigned short* inend;
507	unsigned int c, d, inlen;
508	unsigned char *tmp;
509	int bits;
510
511	if (*outlen == 0) {
512	*inlenb = 0;
513	return(0);
514	}
515	outend = out + *outlen;
516	if ((*inlenb % 2) == 1)
517	(*inlenb)--;
518	inlen = *inlenb / 2;
519	inend = in + inlen;
520	while ((in < inend) && (out - outstart + 5 < *outlen)) {
521	if (xmlLittleEndian) {
522	c= *in++;
523	} else {
524	tmp = (unsigned char *) in;
525	c = *tmp++;
526	c = c \| (((unsigned int)*tmp) << 8);
527	in++;
528	}
529	if ((c & 0xFC00) == 0xD800) { /* surrogates */
530	if (in >= inend) { /* handle split mutli-byte characters */
531	break;
532	}
533	if (xmlLittleEndian) {
534	d = *in++;
535	} else {
536	tmp = (unsigned char *) in;
537	d = *tmp++;
538	d = d \| (((unsigned int)*tmp) << 8);
539	in++;
540	}
541	if ((d & 0xFC00) == 0xDC00) {
542	c &= 0x03FF;
543	c <<= 10;
544	c \|= d & 0x03FF;
545	c += 0x10000;
546	}
547	else {
548	*outlen = out - outstart;
549	*inlenb = processed - inb;
550	return(-2);
551	}
552	}
553
554	/* assertion: c is a single UTF-4 value */
555	if (out >= outend)
556	break;
557	if (c < 0x80) { *out++= c; bits= -6; }
558	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
559	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
560	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
561
562	for ( ; bits >= 0; bits-= 6) {
563	if (out >= outend)
564	break;
565	*out++= ((c >> bits) & 0x3F) \| 0x80;
566	}
567	processed = (const unsigned char*) in;
568	}
569	*outlen = out - outstart;
570	*inlenb = processed - inb;
571	return(*outlen);
572	}
573
574	#ifdef LIBXML_OUTPUT_ENABLED
575	/**
576	* UTF8ToUTF16LE:
577	* @outb: a pointer to an array of bytes to store the result
578	* @outlen: the length of @outb
579	* @in: a pointer to an array of UTF-8 chars
580	* @inlen: the length of @in
581	*
582	* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583	* block of chars out.
584	*
585	* Returns the number of bytes written, or -1 if lack of space, or -2
586	* if the transcoding failed.
587	*/
588	static int
589	UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590	const unsigned char* in, int *inlen)
591	{
592	unsigned short* out = (unsigned short*) outb;
593	const unsigned char* processed = in;
594	const unsigned char *const instart = in;
595	unsigned short* outstart= out;
596	unsigned short* outend;
597	const unsigned char* inend;
598	unsigned int c, d;
599	int trailing;
600	unsigned char *tmp;
601	unsigned short tmp1, tmp2;
602
603	/* UTF16LE encoding has no BOM */
604	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-1);
605	if (in == NULL) {
606	*outlen = 0;
607	*inlen = 0;
608	return(0);
609	}
610	inend= in + *inlen;
611	outend = out + (*outlen / 2);
612	while (in < inend) {
613	d= *in++;
614	if (d < 0x80) { c= d; trailing= 0; }
615	else if (d < 0xC0) {
616	/* trailing byte in leading position */
617	outlen = (out - outstart) 2;
618	*inlen = processed - instart;
619	return(-2);
620	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
621	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
622	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
623	else {
624	/* no chance for this in UTF-16 */
625	outlen = (out - outstart) 2;
626	*inlen = processed - instart;
627	return(-2);
628	}
629
630	if (inend - in < trailing) {
631	break;
632	}
633
634	for ( ; trailing; trailing--) {
635	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
636	break;
637	c <<= 6;
638	c \|= d & 0x3F;
639	}
640
641	/* assertion: c is a single UTF-4 value */
642	if (c < 0x10000) {
643	if (out >= outend)
644	break;
645	if (xmlLittleEndian) {
646	*out++ = c;
647	} else {
648	tmp = (unsigned char *) out;
649	*tmp = c ;
650	*(tmp + 1) = c >> 8 ;
651	out++;
652	}
653	}
654	else if (c < 0x110000) {
655	if (out+1 >= outend)
656	break;
657	c -= 0x10000;
658	if (xmlLittleEndian) {
659	*out++ = 0xD800 \| (c >> 10);
660	*out++ = 0xDC00 \| (c & 0x03FF);
661	} else {
662	tmp1 = 0xD800 \| (c >> 10);
663	tmp = (unsigned char *) out;
664	*tmp = (unsigned char) tmp1;
665	*(tmp + 1) = tmp1 >> 8;
666	out++;
667
668	tmp2 = 0xDC00 \| (c & 0x03FF);
669	tmp = (unsigned char *) out;
670	*tmp = (unsigned char) tmp2;
671	*(tmp + 1) = tmp2 >> 8;
672	out++;
673	}
674	}
675	else
676	break;
677	processed = in;
678	}
679	outlen = (out - outstart) 2;
680	*inlen = processed - instart;
681	return(*outlen);
682	}
683
684	/**
685	* UTF8ToUTF16:
686	* @outb: a pointer to an array of bytes to store the result
687	* @outlen: the length of @outb
688	* @in: a pointer to an array of UTF-8 chars
689	* @inlen: the length of @in
690	*
691	* Take a block of UTF-8 chars in and try to convert it to an UTF-16
692	* block of chars out.
693	*
694	* Returns the number of bytes written, or -1 if lack of space, or -2
695	* if the transcoding failed.
696	*/
697	static int
698	UTF8ToUTF16(unsigned char* outb, int *outlen,
699	const unsigned char* in, int *inlen)
700	{
701	if (in == NULL) {
702	/*
703	* initialization, add the Byte Order Mark for UTF-16LE
704	*/
705	if (*outlen >= 2) {
706	outb[0] = 0xFF;
707	outb[1] = 0xFE;
708	*outlen = 2;
709	*inlen = 0;
710	#ifdef DEBUG_ENCODING
711	xmlGenericError(xmlGenericErrorContext,
712	"Added FFFE Byte Order Mark\n");
713	#endif
714	return(2);
715	}
716	*outlen = 0;
717	*inlen = 0;
718	return(0);
719	}
720	return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721	}
722	#endif /* LIBXML_OUTPUT_ENABLED */
723
724	/**
725	* UTF16BEToUTF8:
726	* @out: a pointer to an array of bytes to store the result
727	* @outlen: the length of @out
728	* @inb: a pointer to an array of UTF-16 passed as a byte array
729	* @inlenb: the length of @in in UTF-16 chars
730	*
731	* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732	* block of chars out. This function assumes the endian property
733	* is the same between the native type of this machine and the
734	* inputed one.
735	*
736	* Returns the number of bytes written, or -1 if lack of space, or -2
737	* if the transcoding fails (if *in is not a valid utf16 string)
738	* The value of *inlen after return is the number of octets consumed
739	* if the return value is positive, else unpredictable.
740	*/
741	static int
742	UTF16BEToUTF8(unsigned char* out, int *outlen,
743	const unsigned char* inb, int *inlenb)
744	{
745	unsigned char* outstart = out;
746	const unsigned char* processed = inb;
747	unsigned char* outend;
748	unsigned short* in = (unsigned short*) inb;
749	unsigned short* inend;
750	unsigned int c, d, inlen;
751	unsigned char *tmp;
752	int bits;
753
754	if (*outlen == 0) {
755	*inlenb = 0;
756	return(0);
757	}
758	outend = out + *outlen;
759	if ((*inlenb % 2) == 1)
760	(*inlenb)--;
761	inlen = *inlenb / 2;
762	inend= in + inlen;
763	while ((in < inend) && (out - outstart + 5 < *outlen)) {
764	if (xmlLittleEndian) {
765	tmp = (unsigned char *) in;
766	c = *tmp++;
767	c = (c << 8) \| (unsigned int) *tmp;
768	in++;
769	} else {
770	c= *in++;
771	}
772	if ((c & 0xFC00) == 0xD800) { /* surrogates */
773	if (in >= inend) { /* handle split mutli-byte characters */
774	break;
775	}
776	if (xmlLittleEndian) {
777	tmp = (unsigned char *) in;
778	d = *tmp++;
779	d = (d << 8) \| (unsigned int) *tmp;
780	in++;
781	} else {
782	d= *in++;
783	}
784	if ((d & 0xFC00) == 0xDC00) {
785	c &= 0x03FF;
786	c <<= 10;
787	c \|= d & 0x03FF;
788	c += 0x10000;
789	}
790	else {
791	*outlen = out - outstart;
792	*inlenb = processed - inb;
793	return(-2);
794	}
795	}
796
797	/* assertion: c is a single UTF-4 value */
798	if (out >= outend)
799	break;
800	if (c < 0x80) { *out++= c; bits= -6; }
801	else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
802	else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
803	else { *out++= ((c >> 18) & 0x07) \| 0xF0; bits= 12; }
804
805	for ( ; bits >= 0; bits-= 6) {
806	if (out >= outend)
807	break;
808	*out++= ((c >> bits) & 0x3F) \| 0x80;
809	}
810	processed = (const unsigned char*) in;
811	}
812	*outlen = out - outstart;
813	*inlenb = processed - inb;
814	return(*outlen);
815	}
816
817	#ifdef LIBXML_OUTPUT_ENABLED
818	/**
819	* UTF8ToUTF16BE:
820	* @outb: a pointer to an array of bytes to store the result
821	* @outlen: the length of @outb
822	* @in: a pointer to an array of UTF-8 chars
823	* @inlen: the length of @in
824	*
825	* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
826	* block of chars out.
827	*
828	* Returns the number of byte written, or -1 by lack of space, or -2
829	* if the transcoding failed.
830	*/
831	static int
832	UTF8ToUTF16BE(unsigned char* outb, int *outlen,
833	const unsigned char* in, int *inlen)
834	{
835	unsigned short* out = (unsigned short*) outb;
836	const unsigned char* processed = in;
837	const unsigned char *const instart = in;
838	unsigned short* outstart= out;
839	unsigned short* outend;
840	const unsigned char* inend;
841	unsigned int c, d;
842	int trailing;
843	unsigned char *tmp;
844	unsigned short tmp1, tmp2;
845
846	/* UTF-16BE has no BOM */
847	if ((outb == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-1);
848	if (in == NULL) {
849	*outlen = 0;
850	*inlen = 0;
851	return(0);
852	}
853	inend= in + *inlen;
854	outend = out + (*outlen / 2);
855	while (in < inend) {
856	d= *in++;
857	if (d < 0x80) { c= d; trailing= 0; }
858	else if (d < 0xC0) {
859	/* trailing byte in leading position */
860	*outlen = out - outstart;
861	*inlen = processed - instart;
862	return(-2);
863	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
864	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
865	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
866	else {
867	/* no chance for this in UTF-16 */
868	*outlen = out - outstart;
869	*inlen = processed - instart;
870	return(-2);
871	}
872
873	if (inend - in < trailing) {
874	break;
875	}
876
877	for ( ; trailing; trailing--) {
878	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80)) break;
879	c <<= 6;
880	c \|= d & 0x3F;
881	}
882
883	/* assertion: c is a single UTF-4 value */
884	if (c < 0x10000) {
885	if (out >= outend) break;
886	if (xmlLittleEndian) {
887	tmp = (unsigned char *) out;
888	*tmp = c >> 8;
889	*(tmp + 1) = c;
890	out++;
891	} else {
892	*out++ = c;
893	}
894	}
895	else if (c < 0x110000) {
896	if (out+1 >= outend) break;
897	c -= 0x10000;
898	if (xmlLittleEndian) {
899	tmp1 = 0xD800 \| (c >> 10);
900	tmp = (unsigned char *) out;
901	*tmp = tmp1 >> 8;
902	*(tmp + 1) = (unsigned char) tmp1;
903	out++;
904
905	tmp2 = 0xDC00 \| (c & 0x03FF);
906	tmp = (unsigned char *) out;
907	*tmp = tmp2 >> 8;
908	*(tmp + 1) = (unsigned char) tmp2;
909	out++;
910	} else {
911	*out++ = 0xD800 \| (c >> 10);
912	*out++ = 0xDC00 \| (c & 0x03FF);
913	}
914	}
915	else
916	break;
917	processed = in;
918	}
919	outlen = (out - outstart) 2;
920	*inlen = processed - instart;
921	return(*outlen);
922	}
923	#endif /* LIBXML_OUTPUT_ENABLED */
924
925	/************************************************************************
926	* *
927	* Generic encoding handling routines *
928	* *
929	************************************************************************/
930
931	/**
932	* xmlDetectCharEncoding:
933	* @in: a pointer to the first bytes of the XML entity, must be at least
934	* 2 bytes long (at least 4 if encoding is UTF4 variant).
935	* @len: pointer to the length of the buffer
936	*
937	* Guess the encoding of the entity using the first bytes of the entity content
938	* according to the non-normative appendix F of the XML-1.0 recommendation.
939	*
940	* Returns one of the XML_CHAR_ENCODING_... values.
941	*/
942	xmlCharEncoding
943	xmlDetectCharEncoding(const unsigned char* in, int len)
944	{
945	if (in == NULL)
946	return(XML_CHAR_ENCODING_NONE);
947	if (len >= 4) {
948	if ((in[0] == 0x00) && (in[1] == 0x00) &&
949	(in[2] == 0x00) && (in[3] == 0x3C))
950	return(XML_CHAR_ENCODING_UCS4BE);
951	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952	(in[2] == 0x00) && (in[3] == 0x00))
953	return(XML_CHAR_ENCODING_UCS4LE);
954	if ((in[0] == 0x00) && (in[1] == 0x00) &&
955	(in[2] == 0x3C) && (in[3] == 0x00))
956	return(XML_CHAR_ENCODING_UCS4_2143);
957	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
958	(in[2] == 0x00) && (in[3] == 0x00))
959	return(XML_CHAR_ENCODING_UCS4_3412);
960	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
961	(in[2] == 0xA7) && (in[3] == 0x94))
962	return(XML_CHAR_ENCODING_EBCDIC);
963	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
964	(in[2] == 0x78) && (in[3] == 0x6D))
965	return(XML_CHAR_ENCODING_UTF8);
966	/*
967	* Although not part of the recommendation, we also
968	* attempt an "auto-recognition" of UTF-16LE and
969	* UTF-16BE encodings.
970	*/
971	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
972	(in[2] == 0x3F) && (in[3] == 0x00))
973	return(XML_CHAR_ENCODING_UTF16LE);
974	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
975	(in[2] == 0x00) && (in[3] == 0x3F))
976	return(XML_CHAR_ENCODING_UTF16BE);
977	}
978	if (len >= 3) {
979	/*
980	* Errata on XML-1.0 June 20 2001
981	* We now allow an UTF8 encoded BOM
982	*/
983	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
984	(in[2] == 0xBF))
985	return(XML_CHAR_ENCODING_UTF8);
986	}
987	/* For UTF-16 we can recognize by the BOM */
988	if (len >= 2) {
989	if ((in[0] == 0xFE) && (in[1] == 0xFF))
990	return(XML_CHAR_ENCODING_UTF16BE);
991	if ((in[0] == 0xFF) && (in[1] == 0xFE))
992	return(XML_CHAR_ENCODING_UTF16LE);
993	}
994	return(XML_CHAR_ENCODING_NONE);
995	}
996
997	/**
998	* xmlCleanupEncodingAliases:
999	*
1000	* Unregisters all aliases
1001	*/
1002	void
1003	xmlCleanupEncodingAliases(void) {
1004	int i;
1005
1006	if (xmlCharEncodingAliases == NULL)
1007	return;
1008
1009	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010	if (xmlCharEncodingAliases[i].name != NULL)
1011	xmlFree((char *) xmlCharEncodingAliases[i].name);
1012	if (xmlCharEncodingAliases[i].alias != NULL)
1013	xmlFree((char *) xmlCharEncodingAliases[i].alias);
1014	}
1015	xmlCharEncodingAliasesNb = 0;
1016	xmlCharEncodingAliasesMax = 0;
1017	xmlFree(xmlCharEncodingAliases);
1018	xmlCharEncodingAliases = NULL;
1019	}
1020
1021	/**
1022	* xmlGetEncodingAlias:
1023	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1024	*
1025	* Lookup an encoding name for the given alias.
1026	*
1027	* Returns NULL if not found, otherwise the original name
1028	*/
1029	const char *
1030	xmlGetEncodingAlias(const char *alias) {
1031	int i;
1032	char upper[100];
1033
1034	if (alias == NULL)
1035	return(NULL);
1036
1037	if (xmlCharEncodingAliases == NULL)
1038	return(NULL);
1039
1040	for (i = 0;i < 99;i++) {
1041	upper[i] = toupper(alias[i]);
1042	if (upper[i] == 0) break;
1043	}
1044	upper[i] = 0;
1045
1046	/*
1047	* Walk down the list looking for a definition of the alias
1048	*/
1049	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051	return(xmlCharEncodingAliases[i].name);
1052	}
1053	}
1054	return(NULL);
1055	}
1056
1057	/**
1058	* xmlAddEncodingAlias:
1059	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1060	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1061	*
1062	* Registers an alias @alias for an encoding named @name. Existing alias
1063	* will be overwritten.
1064	*
1065	* Returns 0 in case of success, -1 in case of error
1066	*/
1067	int
1068	xmlAddEncodingAlias(const char name, const char alias) {
1069	int i;
1070	char upper[100];
1071
1072	if ((name == NULL) \|\| (alias == NULL))
1073	return(-1);
1074
1075	for (i = 0;i < 99;i++) {
1076	upper[i] = toupper(alias[i]);
1077	if (upper[i] == 0) break;
1078	}
1079	upper[i] = 0;
1080
1081	if (xmlCharEncodingAliases == NULL) {
1082	xmlCharEncodingAliasesNb = 0;
1083	xmlCharEncodingAliasesMax = 20;
1084	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085	xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086	if (xmlCharEncodingAliases == NULL)
1087	return(-1);
1088	} else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089	xmlCharEncodingAliasesMax *= 2;
1090	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091	xmlRealloc(xmlCharEncodingAliases,
1092	xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1093	}
1094	/*
1095	* Walk down the list looking for a definition of the alias
1096	*/
1097	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1099	/*
1100	* Replace the definition.
1101	*/
1102	xmlFree((char *) xmlCharEncodingAliases[i].name);
1103	xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104	return(0);
1105	}
1106	}
1107	/*
1108	* Add the definition
1109	*/
1110	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111	xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112	xmlCharEncodingAliasesNb++;
1113	return(0);
1114	}
1115
1116	/**
1117	* xmlDelEncodingAlias:
1118	* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1119	*
1120	* Unregisters an encoding alias @alias
1121	*
1122	* Returns 0 in case of success, -1 in case of error
1123	*/
1124	int
1125	xmlDelEncodingAlias(const char *alias) {
1126	int i;
1127
1128	if (alias == NULL)
1129	return(-1);
1130
1131	if (xmlCharEncodingAliases == NULL)
1132	return(-1);
1133	/*
1134	* Walk down the list looking for a definition of the alias
1135	*/
1136	for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138	xmlFree((char *) xmlCharEncodingAliases[i].name);
1139	xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140	xmlCharEncodingAliasesNb--;
1141	memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142	sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143	return(0);
1144	}
1145	}
1146	return(-1);
1147	}
1148
1149	/**
1150	* xmlParseCharEncoding:
1151	* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1152	*
1153	* Compare the string to the encoding schemes already known. Note
1154	* that the comparison is case insensitive accordingly to the section
1155	* [XML] 4.3.3 Character Encoding in Entities.
1156	*
1157	* Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158	* if not recognized.
1159	*/
1160	xmlCharEncoding
1161	xmlParseCharEncoding(const char* name)
1162	{
1163	const char *alias;
1164	char upper[500];
1165	int i;
1166
1167	if (name == NULL)
1168	return(XML_CHAR_ENCODING_NONE);
1169
1170	/*
1171	* Do the alias resolution
1172	*/
1173	alias = xmlGetEncodingAlias(name);
1174	if (alias != NULL)
1175	name = alias;
1176
1177	for (i = 0;i < 499;i++) {
1178	upper[i] = toupper(name[i]);
1179	if (upper[i] == 0) break;
1180	}
1181	upper[i] = 0;
1182
1183	if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184	if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185	if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1186
1187	/*
1188	* NOTE: if we were able to parse this, the endianness of UTF16 is
1189	* already found and in use
1190	*/
1191	if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192	if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1193
1194	if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195	if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196	if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1197
1198	/*
1199	* NOTE: if we were able to parse this, the endianness of UCS4 is
1200	* already found and in use
1201	*/
1202	if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203	if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204	if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1205
1206
1207	if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208	if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209	if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1210
1211	if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212	if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213	if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1214
1215	if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216	if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217	if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218	if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219	if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220	if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221	if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1222
1223	if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224	if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225	if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1226
1227	#ifdef DEBUG_ENCODING
1228	xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229	#endif
1230	return(XML_CHAR_ENCODING_ERROR);
1231	}
1232
1233	/**
1234	* xmlGetCharEncodingName:
1235	* @enc: the encoding
1236	*
1237	* The "canonical" name for XML encoding.
1238	* C.f. http://www.w3.org/TR/REC-xml#charencoding
1239	* Section 4.3.3 Character Encoding in Entities
1240	*
1241	* Returns the canonical name for the given encoding
1242	*/
1243
1244	const char*
1245	xmlGetCharEncodingName(xmlCharEncoding enc) {
1246	switch (enc) {
1247	case XML_CHAR_ENCODING_ERROR:
1248	return(NULL);
1249	case XML_CHAR_ENCODING_NONE:
1250	return(NULL);
1251	case XML_CHAR_ENCODING_UTF8:
1252	return("UTF-8");
1253	case XML_CHAR_ENCODING_UTF16LE:
1254	return("UTF-16");
1255	case XML_CHAR_ENCODING_UTF16BE:
1256	return("UTF-16");
1257	case XML_CHAR_ENCODING_EBCDIC:
1258	return("EBCDIC");
1259	case XML_CHAR_ENCODING_UCS4LE:
1260	return("ISO-10646-UCS-4");
1261	case XML_CHAR_ENCODING_UCS4BE:
1262	return("ISO-10646-UCS-4");
1263	case XML_CHAR_ENCODING_UCS4_2143:
1264	return("ISO-10646-UCS-4");
1265	case XML_CHAR_ENCODING_UCS4_3412:
1266	return("ISO-10646-UCS-4");
1267	case XML_CHAR_ENCODING_UCS2:
1268	return("ISO-10646-UCS-2");
1269	case XML_CHAR_ENCODING_8859_1:
1270	return("ISO-8859-1");
1271	case XML_CHAR_ENCODING_8859_2:
1272	return("ISO-8859-2");
1273	case XML_CHAR_ENCODING_8859_3:
1274	return("ISO-8859-3");
1275	case XML_CHAR_ENCODING_8859_4:
1276	return("ISO-8859-4");
1277	case XML_CHAR_ENCODING_8859_5:
1278	return("ISO-8859-5");
1279	case XML_CHAR_ENCODING_8859_6:
1280	return("ISO-8859-6");
1281	case XML_CHAR_ENCODING_8859_7:
1282	return("ISO-8859-7");
1283	case XML_CHAR_ENCODING_8859_8:
1284	return("ISO-8859-8");
1285	case XML_CHAR_ENCODING_8859_9:
1286	return("ISO-8859-9");
1287	case XML_CHAR_ENCODING_2022_JP:
1288	return("ISO-2022-JP");
1289	case XML_CHAR_ENCODING_SHIFT_JIS:
1290	return("Shift-JIS");
1291	case XML_CHAR_ENCODING_EUC_JP:
1292	return("EUC-JP");
1293	case XML_CHAR_ENCODING_ASCII:
1294	return(NULL);
1295	}
1296	return(NULL);
1297	}
1298
1299	/************************************************************************
1300	* *
1301	* Char encoding handlers *
1302	* *
1303	************************************************************************/
1304
1305
1306	/* the size should be growable, but it's not a big deal ... */
1307	#define MAX_ENCODING_HANDLERS 50
1308	static xmlCharEncodingHandlerPtr *handlers = NULL;
1309	static int nbCharEncodingHandler = 0;
1310
1311	/*
1312	* The default is UTF-8 for XML, that's also the default used for the
1313	* parser internals, so the default encoding handler is NULL
1314	*/
1315
1316	static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1317
1318	/**
1319	* xmlNewCharEncodingHandler:
1320	* @name: the encoding name, in UTF-8 format (ASCII actually)
1321	* @input: the xmlCharEncodingInputFunc to read that encoding
1322	* @output: the xmlCharEncodingOutputFunc to write that encoding
1323	*
1324	* Create and registers an xmlCharEncodingHandler.
1325	*
1326	* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1327	*/
1328	xmlCharEncodingHandlerPtr
1329	xmlNewCharEncodingHandler(const char *name,
1330	xmlCharEncodingInputFunc input,
1331	xmlCharEncodingOutputFunc output) {
1332	xmlCharEncodingHandlerPtr handler;
1333	const char *alias;
1334	char upper[500];
1335	int i;
1336	char *up = NULL;
1337
1338	/*
1339	* Do the alias resolution
1340	*/
1341	alias = xmlGetEncodingAlias(name);
1342	if (alias != NULL)
1343	name = alias;
1344
1345	/*
1346	* Keep only the uppercase version of the encoding.
1347	*/
1348	if (name == NULL) {
1349	xmlEncodingErr(XML_I18N_NO_NAME,
1350	"xmlNewCharEncodingHandler : no name !\n", NULL);
1351	return(NULL);
1352	}
1353	for (i = 0;i < 499;i++) {
1354	upper[i] = toupper(name[i]);
1355	if (upper[i] == 0) break;
1356	}
1357	upper[i] = 0;
1358	up = xmlMemStrdup(upper);
1359	if (up == NULL) {
1360	xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361	return(NULL);
1362	}
1363
1364	/*
1365	* allocate and fill-up an handler block.
1366	*/
1367	handler = (xmlCharEncodingHandlerPtr)
1368	xmlMalloc(sizeof(xmlCharEncodingHandler));
1369	if (handler == NULL) {
1370	xmlFree(up);
1371	xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372	return(NULL);
1373	}
1374	memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375	handler->input = input;
1376	handler->output = output;
1377	handler->name = up;
1378
1379	#ifdef LIBXML_ICONV_ENABLED
1380	handler->iconv_in = NULL;
1381	handler->iconv_out = NULL;
1382	#endif
1383	#ifdef LIBXML_ICU_ENABLED
1384	handler->uconv_in = NULL;
1385	handler->uconv_out = NULL;
1386	#endif
1387
1388	/*
1389	* registers and returns the handler.
1390	*/
1391	xmlRegisterCharEncodingHandler(handler);
1392	#ifdef DEBUG_ENCODING
1393	xmlGenericError(xmlGenericErrorContext,
1394	"Registered encoding handler for %s\n", name);
1395	#endif
1396	return(handler);
1397	}
1398
1399	/**
1400	* xmlInitCharEncodingHandlers:
1401	*
1402	* Initialize the char encoding support, it registers the default
1403	* encoding supported.
1404	* NOTE: while public, this function usually doesn't need to be called
1405	* in normal processing.
1406	*/
1407	void
1408	xmlInitCharEncodingHandlers(void) {
1409	unsigned short int tst = 0x1234;
1410	unsigned char ptr = (unsigned char ) &tst;
1411
1412	if (handlers != NULL) return;
1413
1414	handlers = (xmlCharEncodingHandlerPtr *)
1415	xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1416
1417	if (*ptr == 0x12) xmlLittleEndian = 0;
1418	else if (*ptr == 0x34) xmlLittleEndian = 1;
1419	else {
1420	xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421	"Odd problem at endianness detection\n", NULL);
1422	}
1423
1424	if (handlers == NULL) {
1425	xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426	return;
1427	}
1428	xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429	#ifdef LIBXML_OUTPUT_ENABLED
1430	xmlUTF16LEHandler =
1431	xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432	xmlUTF16BEHandler =
1433	xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434	xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436	xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437	xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438	#ifdef LIBXML_HTML_ENABLED
1439	xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440	#endif
1441	#else
1442	xmlUTF16LEHandler =
1443	xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444	xmlUTF16BEHandler =
1445	xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446	xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447	xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448	xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449	xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450	#endif /* LIBXML_OUTPUT_ENABLED */
1451	#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452	#ifdef LIBXML_ISO8859X_ENABLED
1453	xmlRegisterCharEncodingHandlersISO8859x ();
1454	#endif
1455	#endif
1456
1457	}
1458
1459	/**
1460	* xmlCleanupCharEncodingHandlers:
1461	*
1462	* Cleanup the memory allocated for the char encoding support, it
1463	* unregisters all the encoding handlers and the aliases.
1464	*/
1465	void
1466	xmlCleanupCharEncodingHandlers(void) {
1467	xmlCleanupEncodingAliases();
1468
1469	if (handlers == NULL) return;
1470
1471	for (;nbCharEncodingHandler > 0;) {
1472	nbCharEncodingHandler--;
1473	if (handlers[nbCharEncodingHandler] != NULL) {
1474	if (handlers[nbCharEncodingHandler]->name != NULL)
1475	xmlFree(handlers[nbCharEncodingHandler]->name);
1476	xmlFree(handlers[nbCharEncodingHandler]);
1477	}
1478	}
1479	xmlFree(handlers);
1480	handlers = NULL;
1481	nbCharEncodingHandler = 0;
1482	xmlDefaultCharEncodingHandler = NULL;
1483	}
1484
1485	/**
1486	* xmlRegisterCharEncodingHandler:
1487	* @handler: the xmlCharEncodingHandlerPtr handler block
1488	*
1489	* Register the char encoding handler, surprising, isn't it ?
1490	*/
1491	void
1492	xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493	if (handlers == NULL) xmlInitCharEncodingHandlers();
1494	if ((handler == NULL) \|\| (handlers == NULL)) {
1495	xmlEncodingErr(XML_I18N_NO_HANDLER,
1496	"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497	goto free_handler;
1498	}
1499
1500	if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501	xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503	"MAX_ENCODING_HANDLERS");
1504	goto free_handler;
1505	}
1506	handlers[nbCharEncodingHandler++] = handler;
1507	return;
1508
1509	free_handler:
1510	if (handler != NULL) {
1511	if (handler->name != NULL) {
1512	xmlFree(handler->name);
1513	}
1514	xmlFree(handler);
1515	}
1516	}
1517
1518	/**
1519	* xmlGetCharEncodingHandler:
1520	* @enc: an xmlCharEncoding value.
1521	*
1522	* Search in the registered set the handler able to read/write that encoding.
1523	*
1524	* Returns the handler or NULL if not found
1525	*/
1526	xmlCharEncodingHandlerPtr
1527	xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528	xmlCharEncodingHandlerPtr handler;
1529
1530	if (handlers == NULL) xmlInitCharEncodingHandlers();
1531	switch (enc) {
1532	case XML_CHAR_ENCODING_ERROR:
1533	return(NULL);
1534	case XML_CHAR_ENCODING_NONE:
1535	return(NULL);
1536	case XML_CHAR_ENCODING_UTF8:
1537	return(NULL);
1538	case XML_CHAR_ENCODING_UTF16LE:
1539	return(xmlUTF16LEHandler);
1540	case XML_CHAR_ENCODING_UTF16BE:
1541	return(xmlUTF16BEHandler);
1542	case XML_CHAR_ENCODING_EBCDIC:
1543	handler = xmlFindCharEncodingHandler("EBCDIC");
1544	if (handler != NULL) return(handler);
1545	handler = xmlFindCharEncodingHandler("ebcdic");
1546	if (handler != NULL) return(handler);
1547	handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548	if (handler != NULL) return(handler);
1549	handler = xmlFindCharEncodingHandler("IBM-037");
1550	if (handler != NULL) return(handler);
1551	break;
1552	case XML_CHAR_ENCODING_UCS4BE:
1553	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554	if (handler != NULL) return(handler);
1555	handler = xmlFindCharEncodingHandler("UCS-4");
1556	if (handler != NULL) return(handler);
1557	handler = xmlFindCharEncodingHandler("UCS4");
1558	if (handler != NULL) return(handler);
1559	break;
1560	case XML_CHAR_ENCODING_UCS4LE:
1561	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562	if (handler != NULL) return(handler);
1563	handler = xmlFindCharEncodingHandler("UCS-4");
1564	if (handler != NULL) return(handler);
1565	handler = xmlFindCharEncodingHandler("UCS4");
1566	if (handler != NULL) return(handler);
1567	break;
1568	case XML_CHAR_ENCODING_UCS4_2143:
1569	break;
1570	case XML_CHAR_ENCODING_UCS4_3412:
1571	break;
1572	case XML_CHAR_ENCODING_UCS2:
1573	handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574	if (handler != NULL) return(handler);
1575	handler = xmlFindCharEncodingHandler("UCS-2");
1576	if (handler != NULL) return(handler);
1577	handler = xmlFindCharEncodingHandler("UCS2");
1578	if (handler != NULL) return(handler);
1579	break;
1580
1581	/*
1582	* We used to keep ISO Latin encodings native in the
1583	* generated data. This led to so many problems that
1584	* this has been removed. One can still change this
1585	* back by registering no-ops encoders for those
1586	*/
1587	case XML_CHAR_ENCODING_8859_1:
1588	handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589	if (handler != NULL) return(handler);
1590	break;
1591	case XML_CHAR_ENCODING_8859_2:
1592	handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593	if (handler != NULL) return(handler);
1594	break;
1595	case XML_CHAR_ENCODING_8859_3:
1596	handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597	if (handler != NULL) return(handler);
1598	break;
1599	case XML_CHAR_ENCODING_8859_4:
1600	handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601	if (handler != NULL) return(handler);
1602	break;
1603	case XML_CHAR_ENCODING_8859_5:
1604	handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605	if (handler != NULL) return(handler);
1606	break;
1607	case XML_CHAR_ENCODING_8859_6:
1608	handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609	if (handler != NULL) return(handler);
1610	break;
1611	case XML_CHAR_ENCODING_8859_7:
1612	handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613	if (handler != NULL) return(handler);
1614	break;
1615	case XML_CHAR_ENCODING_8859_8:
1616	handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617	if (handler != NULL) return(handler);
1618	break;
1619	case XML_CHAR_ENCODING_8859_9:
1620	handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621	if (handler != NULL) return(handler);
1622	break;
1623
1624
1625	case XML_CHAR_ENCODING_2022_JP:
1626	handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627	if (handler != NULL) return(handler);
1628	break;
1629	case XML_CHAR_ENCODING_SHIFT_JIS:
1630	handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631	if (handler != NULL) return(handler);
1632	handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633	if (handler != NULL) return(handler);
1634	handler = xmlFindCharEncodingHandler("Shift_JIS");
1635	if (handler != NULL) return(handler);
1636	break;
1637	case XML_CHAR_ENCODING_EUC_JP:
1638	handler = xmlFindCharEncodingHandler("EUC-JP");
1639	if (handler != NULL) return(handler);
1640	break;
1641	default:
1642	break;
1643	}
1644
1645	#ifdef DEBUG_ENCODING
1646	xmlGenericError(xmlGenericErrorContext,
1647	"No handler found for encoding %d\n", enc);
1648	#endif
1649	return(NULL);
1650	}
1651
1652	/**
1653	* xmlFindCharEncodingHandler:
1654	* @name: a string describing the char encoding.
1655	*
1656	* Search in the registered set the handler able to read/write that encoding.
1657	*
1658	* Returns the handler or NULL if not found
1659	*/
1660	xmlCharEncodingHandlerPtr
1661	xmlFindCharEncodingHandler(const char *name) {
1662	const char *nalias;
1663	const char *norig;
1664	xmlCharEncoding alias;
1665	#ifdef LIBXML_ICONV_ENABLED
1666	xmlCharEncodingHandlerPtr enc;
1667	iconv_t icv_in, icv_out;
1668	#endif /* LIBXML_ICONV_ENABLED */
1669	#ifdef LIBXML_ICU_ENABLED
1670	xmlCharEncodingHandlerPtr encu;
1671	uconv_t ucv_in, ucv_out;
1672	#endif /* LIBXML_ICU_ENABLED */
1673	char upper[100];
1674	int i;
1675
1676	if (handlers == NULL) xmlInitCharEncodingHandlers();
1677	if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678	if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1679
1680	/*
1681	* Do the alias resolution
1682	*/
1683	norig = name;
1684	nalias = xmlGetEncodingAlias(name);
1685	if (nalias != NULL)
1686	name = nalias;
1687
1688	/*
1689	* Check first for directly registered encoding names
1690	*/
1691	for (i = 0;i < 99;i++) {
1692	upper[i] = toupper(name[i]);
1693	if (upper[i] == 0) break;
1694	}
1695	upper[i] = 0;
1696
1697	if (handlers != NULL) {
1698	for (i = 0;i < nbCharEncodingHandler; i++) {
1699	if (!strcmp(upper, handlers[i]->name)) {
1700	#ifdef DEBUG_ENCODING
1701	xmlGenericError(xmlGenericErrorContext,
1702	"Found registered handler for encoding %s\n", name);
1703	#endif
1704	return(handlers[i]);
1705	}
1706	}
1707	}
1708
1709	#ifdef LIBXML_ICONV_ENABLED
1710	/* check whether iconv can handle this */
1711	icv_in = iconv_open("UTF-8", name);
1712	icv_out = iconv_open(name, "UTF-8");
1713	if (icv_in == (iconv_t) -1) {
1714	icv_in = iconv_open("UTF-8", upper);
1715	}
1716	if (icv_out == (iconv_t) -1) {
1717	icv_out = iconv_open(upper, "UTF-8");
1718	}
1719	if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720	enc = (xmlCharEncodingHandlerPtr)
1721	xmlMalloc(sizeof(xmlCharEncodingHandler));
1722	if (enc == NULL) {
1723	iconv_close(icv_in);
1724	iconv_close(icv_out);
1725	return(NULL);
1726	}
1727	memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728	enc->name = xmlMemStrdup(name);
1729	enc->input = NULL;
1730	enc->output = NULL;
1731	enc->iconv_in = icv_in;
1732	enc->iconv_out = icv_out;
1733	#ifdef DEBUG_ENCODING
1734	xmlGenericError(xmlGenericErrorContext,
1735	"Found iconv handler for encoding %s\n", name);
1736	#endif
1737	return enc;
1738	} else if ((icv_in != (iconv_t) -1) \|\| icv_out != (iconv_t) -1) {
1739	xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740	"iconv : problems with filters for '%s'\n", name);
1741	if (icv_in != (iconv_t) -1)
1742	iconv_close(icv_in);
1743	else
1744	iconv_close(icv_out);
1745	}
1746	#endif /* LIBXML_ICONV_ENABLED */
1747	#ifdef LIBXML_ICU_ENABLED
1748	/* check whether icu can handle this */
1749	ucv_in = openIcuConverter(name, 1);
1750	ucv_out = openIcuConverter(name, 0);
1751	if (ucv_in != NULL && ucv_out != NULL) {
1752	encu = (xmlCharEncodingHandlerPtr)
1753	xmlMalloc(sizeof(xmlCharEncodingHandler));
1754	if (encu == NULL) {
1755	closeIcuConverter(ucv_in);
1756	closeIcuConverter(ucv_out);
1757	return(NULL);
1758	}
1759	memset(encu, 0, sizeof(xmlCharEncodingHandler));
1760	encu->name = xmlMemStrdup(name);
1761	encu->input = NULL;
1762	encu->output = NULL;
1763	encu->uconv_in = ucv_in;
1764	encu->uconv_out = ucv_out;
1765	#ifdef DEBUG_ENCODING
1766	xmlGenericError(xmlGenericErrorContext,
1767	"Found ICU converter handler for encoding %s\n", name);
1768	#endif
1769	return encu;
1770	} else if (ucv_in != NULL \|\| ucv_out != NULL) {
1771	closeIcuConverter(ucv_in);
1772	closeIcuConverter(ucv_out);
1773	xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1774	"ICU converter : problems with filters for '%s'\n", name);
1775	}
1776	#endif /* LIBXML_ICU_ENABLED */
1777
1778	#ifdef DEBUG_ENCODING
1779	xmlGenericError(xmlGenericErrorContext,
1780	"No handler found for encoding %s\n", name);
1781	#endif
1782
1783	/*
1784	* Fallback using the canonical names
1785	*/
1786	alias = xmlParseCharEncoding(norig);
1787	if (alias != XML_CHAR_ENCODING_ERROR) {
1788	const char* canon;
1789	canon = xmlGetCharEncodingName(alias);
1790	if ((canon != NULL) && (strcmp(name, canon))) {
1791	return(xmlFindCharEncodingHandler(canon));
1792	}
1793	}
1794
1795	/* If "none of the above", give up */
1796	return(NULL);
1797	}
1798
1799	/************************************************************************
1800	* *
1801	* ICONV based generic conversion functions *
1802	* *
1803	************************************************************************/
1804
1805	#ifdef LIBXML_ICONV_ENABLED
1806	/**
1807	* xmlIconvWrapper:
1808	* @cd: iconv converter data structure
1809	* @out: a pointer to an array of bytes to store the result
1810	* @outlen: the length of @out
1811	* @in: a pointer to an array of input bytes
1812	* @inlen: the length of @in
1813	*
1814	* Returns 0 if success, or
1815	* -1 by lack of space, or
1816	* -2 if the transcoding fails (for *in is not valid utf8 string or
1817	* the result of transformation can't fit into the encoding we want), or
1818	* -3 if there the last byte can't form a single output char.
1819	*
1820	* The value of @inlen after return is the number of octets consumed
1821	* as the return value is positive, else unpredictable.
1822	* The value of @outlen after return is the number of octets produced.
1823	*/
1824	static int
1825	xmlIconvWrapper(iconv_t cd, unsigned char out, int outlen,
1826	const unsigned char in, int inlen) {
1827	size_t icv_inlen, icv_outlen;
1828	const char icv_in = (const char ) in;
1829	char icv_out = (char ) out;
1830	size_t ret;
1831
1832	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\| (in == NULL)) {
1833	if (outlen != NULL) *outlen = 0;
1834	return(-1);
1835	}
1836	icv_inlen = *inlen;
1837	icv_outlen = *outlen;
1838	ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1839	*inlen -= icv_inlen;
1840	*outlen -= icv_outlen;
1841	if ((icv_inlen != 0) \|\| (ret == (size_t) -1)) {
1842	#ifdef EILSEQ
1843	if (errno == EILSEQ) {
1844	return -2;
1845	} else
1846	#endif
1847	#ifdef E2BIG
1848	if (errno == E2BIG) {
1849	return -1;
1850	} else
1851	#endif
1852	#ifdef EINVAL
1853	if (errno == EINVAL) {
1854	return -3;
1855	} else
1856	#endif
1857	{
1858	return -3;
1859	}
1860	}
1861	return 0;
1862	}
1863	#endif /* LIBXML_ICONV_ENABLED */
1864
1865	/************************************************************************
1866	* *
1867	* ICU based generic conversion functions *
1868	* *
1869	************************************************************************/
1870
1871	#ifdef LIBXML_ICU_ENABLED
1872	/**
1873	* xmlUconvWrapper:
1874	* @cd: ICU uconverter data structure
1875	* @toUnicode : non-zero if toUnicode. 0 otherwise.
1876	* @out: a pointer to an array of bytes to store the result
1877	* @outlen: the length of @out
1878	* @in: a pointer to an array of input bytes
1879	* @inlen: the length of @in
1880	* @flush: if true, indicates end of input
1881	*
1882	* Returns 0 if success, or
1883	* -1 by lack of space, or
1884	* -2 if the transcoding fails (for *in is not valid utf8 string or
1885	* the result of transformation can't fit into the encoding we want), or
1886	* -3 if there the last byte can't form a single output char.
1887	*
1888	* The value of @inlen after return is the number of octets consumed
1889	* as the return value is positive, else unpredictable.
1890	* The value of @outlen after return is the number of octets produced.
1891	*/
1892	static int
1893	xmlUconvWrapper(uconv_t cd, int toUnicode, unsigned char out, int *outlen,
1894	const unsigned char in, int inlen, int flush) {
1895	const char ucv_in = (const char ) in;
1896	char ucv_out = (char ) out;
1897	UErrorCode err = U_ZERO_ERROR;
1898
1899	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\| (in == NULL)) {
1900	if (outlen != NULL) *outlen = 0;
1901	return(-1);
1902	}
1903
1904	if (toUnicode) {
1905	/* encoding => UTF-16 => UTF-8 */
1906	ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1907	&ucv_in, ucv_in + *inlen, cd->pivot_buf,
1908	&cd->pivot_source, &cd->pivot_target,
1909	cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1910	} else {
1911	/* UTF-8 => UTF-16 => encoding */
1912	ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1913	&ucv_in, ucv_in + *inlen, cd->pivot_buf,
1914	&cd->pivot_source, &cd->pivot_target,
1915	cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1916	}
1917	inlen = ucv_in - (const char) in;
1918	outlen = ucv_out - (char ) out;
1919	if (U_SUCCESS(err)) {
1920	/* reset pivot buf if this is the last call for input (flush==TRUE) */
1921	if (flush)
1922	cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1923	return 0;
1924	}
1925	if (err == U_BUFFER_OVERFLOW_ERROR)
1926	return -1;
1927	if (err == U_INVALID_CHAR_FOUND \|\| err == U_ILLEGAL_CHAR_FOUND)
1928	return -2;
1929	return -3;
1930	}
1931	#endif /* LIBXML_ICU_ENABLED */
1932
1933	/************************************************************************
1934	* *
1935	* The real API used by libxml for on-the-fly conversion *
1936	* *
1937	************************************************************************/
1938
1939	/**
1940	* xmlEncInputChunk:
1941	* @handler: encoding handler
1942	* @out: a pointer to an array of bytes to store the result
1943	* @outlen: the length of @out
1944	* @in: a pointer to an array of input bytes
1945	* @inlen: the length of @in
1946	* @flush: flush (ICU-related)
1947	*
1948	* Returns 0 if success, or
1949	* -1 by lack of space, or
1950	* -2 if the transcoding fails (for *in is not valid utf8 string or
1951	* the result of transformation can't fit into the encoding we want), or
1952	* -3 if there the last byte can't form a single output char.
1953	*
1954	* The value of @inlen after return is the number of octets consumed
1955	* as the return value is 0, else unpredictable.
1956	* The value of @outlen after return is the number of octets produced.
1957	*/
1958	static int
1959	xmlEncInputChunk(xmlCharEncodingHandler handler, unsigned char out,
1960	int outlen, const unsigned char in, int *inlen, int flush) {
1961	int ret;
1962	(void)flush;
1963
1964	if (handler->input != NULL) {
1965	ret = handler->input(out, outlen, in, inlen);
1966	if (ret > 0)
1967	ret = 0;
1968	}
1969	#ifdef LIBXML_ICONV_ENABLED
1970	else if (handler->iconv_in != NULL) {
1971	ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1972	}
1973	#endif /* LIBXML_ICONV_ENABLED */
1974	#ifdef LIBXML_ICU_ENABLED
1975	else if (handler->uconv_in != NULL) {
1976	ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1977	flush);
1978	}
1979	#endif /* LIBXML_ICU_ENABLED */
1980	else {
1981	*outlen = 0;
1982	*inlen = 0;
1983	ret = -2;
1984	}
1985
1986	return(ret);
1987	}
1988
1989	/**
1990	* xmlEncOutputChunk:
1991	* @handler: encoding handler
1992	* @out: a pointer to an array of bytes to store the result
1993	* @outlen: the length of @out
1994	* @in: a pointer to an array of input bytes
1995	* @inlen: the length of @in
1996	*
1997	* Returns 0 if success, or
1998	* -1 by lack of space, or
1999	* -2 if the transcoding fails (for *in is not valid utf8 string or
2000	* the result of transformation can't fit into the encoding we want), or
2001	* -3 if there the last byte can't form a single output char.
2002	* -4 if no output function was found.
2003	*
2004	* The value of @inlen after return is the number of octets consumed
2005	* as the return value is 0, else unpredictable.
2006	* The value of @outlen after return is the number of octets produced.
2007	*/
2008	static int
2009	xmlEncOutputChunk(xmlCharEncodingHandler handler, unsigned char out,
2010	int outlen, const unsigned char in, int *inlen) {
2011	int ret;
2012
2013	if (handler->output != NULL) {
2014	ret = handler->output(out, outlen, in, inlen);
2015	if (ret > 0)
2016	ret = 0;
2017	}
2018	#ifdef LIBXML_ICONV_ENABLED
2019	else if (handler->iconv_out != NULL) {
2020	ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2021	}
2022	#endif /* LIBXML_ICONV_ENABLED */
2023	#ifdef LIBXML_ICU_ENABLED
2024	else if (handler->uconv_out != NULL) {
2025	ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2026	1);
2027	}
2028	#endif /* LIBXML_ICU_ENABLED */
2029	else {
2030	*outlen = 0;
2031	*inlen = 0;
2032	ret = -4;
2033	}
2034
2035	return(ret);
2036	}
2037
2038	/**
2039	* xmlCharEncFirstLineInt:
2040	* @handler: char encoding transformation data structure
2041	* @out: an xmlBuffer for the output.
2042	* @in: an xmlBuffer for the input
2043	* @len: number of bytes to convert for the first line, or -1
2044	*
2045	* Front-end for the encoding handler input function, but handle only
2046	* the very first line, i.e. limit itself to 45 chars.
2047	*
2048	* Returns the number of byte written if success, or
2049	* -1 general error
2050	* -2 if the transcoding fails (for *in is not valid utf8 string or
2051	* the result of transformation can't fit into the encoding we want), or
2052	*/
2053	int
2054	xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2055	xmlBufferPtr in, int len) {
2056	int ret;
2057	int written;
2058	int toconv;
2059
2060	if (handler == NULL) return(-1);
2061	if (out == NULL) return(-1);
2062	if (in == NULL) return(-1);
2063
2064	/* calculate space available */
2065	written = out->size - out->use - 1; /* count '\0' */
2066	toconv = in->use;
2067	/*
2068	* echo '<?xml version="1.0" encoding="UCS4"?>' \| wc -c => 38
2069	* 45 chars should be sufficient to reach the end of the encoding
2070	* declaration without going too far inside the document content.
2071	* on UTF-16 this means 90bytes, on UCS4 this means 180
2072	* The actual value depending on guessed encoding is passed as @len
2073	* if provided
2074	*/
2075	if (len >= 0) {
2076	if (toconv > len)
2077	toconv = len;
2078	} else {
2079	if (toconv > 180)
2080	toconv = 180;
2081	}
2082	if (toconv * 2 >= written) {
2083	xmlBufferGrow(out, toconv * 2);
2084	written = out->size - out->use - 1;
2085	}
2086
2087	ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2088	in->content, &toconv, 0);
2089	xmlBufferShrink(in, toconv);
2090	out->use += written;
2091	out->content[out->use] = 0;
2092	if (ret == -1) ret = -3;
2093
2094	#ifdef DEBUG_ENCODING
2095	switch (ret) {
2096	case 0:
2097	xmlGenericError(xmlGenericErrorContext,
2098	"converted %d bytes to %d bytes of input\n",
2099	toconv, written);
2100	break;
2101	case -1:
2102	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2103	toconv, written, in->use);
2104	break;
2105	case -2:
2106	xmlGenericError(xmlGenericErrorContext,
2107	"input conversion failed due to input error\n");
2108	break;
2109	case -3:
2110	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2111	toconv, written, in->use);
2112	break;
2113	default:
2114	xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2115	}
2116	#endif /* DEBUG_ENCODING */
2117	/*
2118	* Ignore when input buffer is not on a boundary
2119	*/
2120	if (ret == -3) ret = 0;
2121	if (ret == -1) ret = 0;
2122	return(written ? written : ret);
2123	}
2124
2125	/**
2126	* xmlCharEncFirstLine:
2127	* @handler: char encoding transformation data structure
2128	* @out: an xmlBuffer for the output.
2129	* @in: an xmlBuffer for the input
2130	*
2131	* Front-end for the encoding handler input function, but handle only
2132	* the very first line, i.e. limit itself to 45 chars.
2133	*
2134	* Returns the number of byte written if success, or
2135	* -1 general error
2136	* -2 if the transcoding fails (for *in is not valid utf8 string or
2137	* the result of transformation can't fit into the encoding we want), or
2138	*/
2139	int
2140	xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2141	xmlBufferPtr in) {
2142	return(xmlCharEncFirstLineInt(handler, out, in, -1));
2143	}
2144
2145	/**
2146	* xmlCharEncFirstLineInput:
2147	* @input: a parser input buffer
2148	* @len: number of bytes to convert for the first line, or -1
2149	*
2150	* Front-end for the encoding handler input function, but handle only
2151	* the very first line. Point is that this is based on autodetection
2152	* of the encoding and once that first line is converted we may find
2153	* out that a different decoder is needed to process the input.
2154	*
2155	* Returns the number of byte written if success, or
2156	* -1 general error
2157	* -2 if the transcoding fails (for *in is not valid utf8 string or
2158	* the result of transformation can't fit into the encoding we want), or
2159	*/
2160	int
2161	xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2162	{
2163	int ret;
2164	size_t written;
2165	size_t toconv;
2166	int c_in;
2167	int c_out;
2168	xmlBufPtr in;
2169	xmlBufPtr out;
2170
2171	if ((input == NULL) \|\| (input->encoder == NULL) \|\|
2172	(input->buffer == NULL) \|\| (input->raw == NULL))
2173	return (-1);
2174	out = input->buffer;
2175	in = input->raw;
2176
2177	toconv = xmlBufUse(in);
2178	if (toconv == 0)
2179	return (0);
2180	written = xmlBufAvail(out) - 1; /* count '\0' */
2181	/*
2182	* echo '<?xml version="1.0" encoding="UCS4"?>' \| wc -c => 38
2183	* 45 chars should be sufficient to reach the end of the encoding
2184	* declaration without going too far inside the document content.
2185	* on UTF-16 this means 90bytes, on UCS4 this means 180
2186	* The actual value depending on guessed encoding is passed as @len
2187	* if provided
2188	*/
2189	if (len >= 0) {
2190	if (toconv > (unsigned int) len)
2191	toconv = len;
2192	} else {
2193	if (toconv > 180)
2194	toconv = 180;
2195	}
2196	if (toconv * 2 >= written) {
2197	xmlBufGrow(out, toconv * 2);
2198	written = xmlBufAvail(out) - 1;
2199	}
2200	if (written > 360)
2201	written = 360;
2202
2203	c_in = toconv;
2204	c_out = written;
2205	ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2206	xmlBufContent(in), &c_in, 0);
2207	xmlBufShrink(in, c_in);
2208	xmlBufAddLen(out, c_out);
2209	if (ret == -1)
2210	ret = -3;
2211
2212	switch (ret) {
2213	case 0:
2214	#ifdef DEBUG_ENCODING
2215	xmlGenericError(xmlGenericErrorContext,
2216	"converted %d bytes to %d bytes of input\n",
2217	c_in, c_out);
2218	#endif
2219	break;
2220	case -1:
2221	#ifdef DEBUG_ENCODING
2222	xmlGenericError(xmlGenericErrorContext,
2223	"converted %d bytes to %d bytes of input, %d left\n",
2224	c_in, c_out, (int)xmlBufUse(in));
2225	#endif
2226	break;
2227	case -3:
2228	#ifdef DEBUG_ENCODING
2229	xmlGenericError(xmlGenericErrorContext,
2230	"converted %d bytes to %d bytes of input, %d left\n",
2231	c_in, c_out, (int)xmlBufUse(in));
2232	#endif
2233	break;
2234	case -2: {
2235	char buf[50];
2236	const xmlChar *content = xmlBufContent(in);
2237
2238	snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2239	content[0], content[1],
2240	content[2], content[3]);
2241	buf[49] = 0;
2242	xmlEncodingErr(XML_I18N_CONV_FAILED,
2243	"input conversion failed due to input error, bytes %s\n",
2244	buf);
2245	}
2246	}
2247	/*
2248	* Ignore when input buffer is not on a boundary
2249	*/
2250	if (ret == -3) ret = 0;
2251	if (ret == -1) ret = 0;
2252	return(c_out ? c_out : ret);
2253	}
2254
2255	/**
2256	* xmlCharEncInput:
2257	* @input: a parser input buffer
2258	* @flush: try to flush all the raw buffer
2259	*
2260	* Generic front-end for the encoding handler on parser input
2261	*
2262	* Returns the number of byte written if success, or
2263	* -1 general error
2264	* -2 if the transcoding fails (for *in is not valid utf8 string or
2265	* the result of transformation can't fit into the encoding we want), or
2266	*/
2267	int
2268	xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2269	{
2270	int ret;
2271	size_t written;
2272	size_t toconv;
2273	int c_in;
2274	int c_out;
2275	xmlBufPtr in;
2276	xmlBufPtr out;
2277
2278	if ((input == NULL) \|\| (input->encoder == NULL) \|\|
2279	(input->buffer == NULL) \|\| (input->raw == NULL))
2280	return (-1);
2281	out = input->buffer;
2282	in = input->raw;
2283
2284	toconv = xmlBufUse(in);
2285	if (toconv == 0)
2286	return (0);
2287	if ((toconv > 64 * 1024) && (flush == 0))
2288	toconv = 64 * 1024;
2289	written = xmlBufAvail(out);
2290	if (written > 0)
2291	written--; /* count '\0' */
2292	if (toconv * 2 >= written) {
2293	xmlBufGrow(out, toconv * 2);
2294	written = xmlBufAvail(out);
2295	if (written > 0)
2296	written--; /* count '\0' */
2297	}
2298	if ((written > 128 * 1024) && (flush == 0))
2299	written = 128 * 1024;
2300
2301	c_in = toconv;
2302	c_out = written;
2303	ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2304	xmlBufContent(in), &c_in, flush);
2305	xmlBufShrink(in, c_in);
2306	xmlBufAddLen(out, c_out);
2307	if (ret == -1)
2308	ret = -3;
2309
2310	switch (ret) {
2311	case 0:
2312	#ifdef DEBUG_ENCODING
2313	xmlGenericError(xmlGenericErrorContext,
2314	"converted %d bytes to %d bytes of input\n",
2315	c_in, c_out);
2316	#endif
2317	break;
2318	case -1:
2319	#ifdef DEBUG_ENCODING
2320	xmlGenericError(xmlGenericErrorContext,
2321	"converted %d bytes to %d bytes of input, %d left\n",
2322	c_in, c_out, (int)xmlBufUse(in));
2323	#endif
2324	break;
2325	case -3:
2326	#ifdef DEBUG_ENCODING
2327	xmlGenericError(xmlGenericErrorContext,
2328	"converted %d bytes to %d bytes of input, %d left\n",
2329	c_in, c_out, (int)xmlBufUse(in));
2330	#endif
2331	break;
2332	case -2: {
2333	char buf[50];
2334	const xmlChar *content = xmlBufContent(in);
2335
2336	snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2337	content[0], content[1],
2338	content[2], content[3]);
2339	buf[49] = 0;
2340	xmlEncodingErr(XML_I18N_CONV_FAILED,
2341	"input conversion failed due to input error, bytes %s\n",
2342	buf);
2343	}
2344	}
2345	/*
2346	* Ignore when input buffer is not on a boundary
2347	*/
2348	if (ret == -3)
2349	ret = 0;
2350	return (c_out? c_out : ret);
2351	}
2352
2353	/**
2354	* xmlCharEncInFunc:
2355	* @handler: char encoding transformation data structure
2356	* @out: an xmlBuffer for the output.
2357	* @in: an xmlBuffer for the input
2358	*
2359	* Generic front-end for the encoding handler input function
2360	*
2361	* Returns the number of byte written if success, or
2362	* -1 general error
2363	* -2 if the transcoding fails (for *in is not valid utf8 string or
2364	* the result of transformation can't fit into the encoding we want), or
2365	*/
2366	int
2367	xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2368	xmlBufferPtr in)
2369	{
2370	int ret;
2371	int written;
2372	int toconv;
2373
2374	if (handler == NULL)
2375	return (-1);
2376	if (out == NULL)
2377	return (-1);
2378	if (in == NULL)
2379	return (-1);
2380
2381	toconv = in->use;
2382	if (toconv == 0)
2383	return (0);
2384	written = out->size - out->use -1; /* count '\0' */
2385	if (toconv * 2 >= written) {
2386	xmlBufferGrow(out, out->size + toconv * 2);
2387	written = out->size - out->use - 1;
2388	}
2389	ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2390	in->content, &toconv, 1);
2391	xmlBufferShrink(in, toconv);
2392	out->use += written;
2393	out->content[out->use] = 0;
2394	if (ret == -1)
2395	ret = -3;
2396
2397	switch (ret) {
2398	case 0:
2399	#ifdef DEBUG_ENCODING
2400	xmlGenericError(xmlGenericErrorContext,
2401	"converted %d bytes to %d bytes of input\n",
2402	toconv, written);
2403	#endif
2404	break;
2405	case -1:
2406	#ifdef DEBUG_ENCODING
2407	xmlGenericError(xmlGenericErrorContext,
2408	"converted %d bytes to %d bytes of input, %d left\n",
2409	toconv, written, in->use);
2410	#endif
2411	break;
2412	case -3:
2413	#ifdef DEBUG_ENCODING
2414	xmlGenericError(xmlGenericErrorContext,
2415	"converted %d bytes to %d bytes of input, %d left\n",
2416	toconv, written, in->use);
2417	#endif
2418	break;
2419	case -2: {
2420	char buf[50];
2421
2422	snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2423	in->content[0], in->content[1],
2424	in->content[2], in->content[3]);
2425	buf[49] = 0;
2426	xmlEncodingErr(XML_I18N_CONV_FAILED,
2427	"input conversion failed due to input error, bytes %s\n",
2428	buf);
2429	}
2430	}
2431	/*
2432	* Ignore when input buffer is not on a boundary
2433	*/
2434	if (ret == -3)
2435	ret = 0;
2436	return (written? written : ret);
2437	}
2438
2439	#ifdef LIBXML_OUTPUT_ENABLED
2440	/**
2441	* xmlCharEncOutput:
2442	* @output: a parser output buffer
2443	* @init: is this an initialization call without data
2444	*
2445	* Generic front-end for the encoding handler on parser output
2446	* a first call with @init == 1 has to be made first to initiate the
2447	* output in case of non-stateless encoding needing to initiate their
2448	* state or the output (like the BOM in UTF16).
2449	* In case of UTF8 sequence conversion errors for the given encoder,
2450	* the content will be automatically remapped to a CharRef sequence.
2451	*
2452	* Returns the number of byte written if success, or
2453	* -1 general error
2454	* -2 if the transcoding fails (for *in is not valid utf8 string or
2455	* the result of transformation can't fit into the encoding we want), or
2456	*/
2457	int
2458	xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2459	{
2460	int ret;
2461	size_t written;
2462	int writtentot = 0;
2463	size_t toconv;
2464	int c_in;
2465	int c_out;
2466	xmlBufPtr in;
2467	xmlBufPtr out;
2468
2469	if ((output == NULL) \|\| (output->encoder == NULL) \|\|
2470	(output->buffer == NULL) \|\| (output->conv == NULL))
2471	return (-1);
2472	out = output->conv;
2473	in = output->buffer;
2474
2475	retry:
2476
2477	written = xmlBufAvail(out);
2478	if (written > 0)
2479	written--; /* count '\0' */
2480
2481	/*
2482	* First specific handling of the initialization call
2483	*/
2484	if (init) {
2485	c_in = 0;
2486	c_out = written;
2487	/* TODO: Check return value. */
2488	xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2489	NULL, &c_in);
2490	xmlBufAddLen(out, c_out);
2491	#ifdef DEBUG_ENCODING
2492	xmlGenericError(xmlGenericErrorContext,
2493	"initialized encoder\n");
2494	#endif
2495	return(c_out);
2496	}
2497
2498	/*
2499	* Conversion itself.
2500	*/
2501	toconv = xmlBufUse(in);
2502	if (toconv == 0)
2503	return (writtentot);
2504	if (toconv > 64 * 1024)
2505	toconv = 64 * 1024;
2506	if (toconv * 4 >= written) {
2507	xmlBufGrow(out, toconv * 4);
2508	written = xmlBufAvail(out) - 1;
2509	}
2510	if (written > 256 * 1024)
2511	written = 256 * 1024;
2512
2513	c_in = toconv;
2514	c_out = written;
2515	ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2516	xmlBufContent(in), &c_in);
2517	xmlBufShrink(in, c_in);
2518	xmlBufAddLen(out, c_out);
2519	writtentot += c_out;
2520	if (ret == -1) {
2521	if (c_out > 0) {
2522	/* Can be a limitation of iconv or uconv */
2523	goto retry;
2524	}
2525	ret = -3;
2526	}
2527
2528	/*
2529	* Attempt to handle error cases
2530	*/
2531	switch (ret) {
2532	case 0:
2533	#ifdef DEBUG_ENCODING
2534	xmlGenericError(xmlGenericErrorContext,
2535	"converted %d bytes to %d bytes of output\n",
2536	c_in, c_out);
2537	#endif
2538	break;
2539	case -1:
2540	#ifdef DEBUG_ENCODING
2541	xmlGenericError(xmlGenericErrorContext,
2542	"output conversion failed by lack of space\n");
2543	#endif
2544	break;
2545	case -3:
2546	#ifdef DEBUG_ENCODING
2547	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2548	c_in, c_out, (int) xmlBufUse(in));
2549	#endif
2550	break;
2551	case -4:
2552	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2553	"xmlCharEncOutFunc: no output function !\n", NULL);
2554	ret = -1;
2555	break;
2556	case -2: {
2557	xmlChar charref[20];
2558	int len = (int) xmlBufUse(in);
2559	xmlChar *content = xmlBufContent(in);
2560	int cur, charrefLen;
2561
2562	cur = xmlGetUTF8Char(content, &len);
2563	if (cur <= 0)
2564	break;
2565
2566	#ifdef DEBUG_ENCODING
2567	xmlGenericError(xmlGenericErrorContext,
2568	"handling output conversion error\n");
2569	xmlGenericError(xmlGenericErrorContext,
2570	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2571	content[0], content[1],
2572	content[2], content[3]);
2573	#endif
2574	/*
2575	* Removes the UTF8 sequence, and replace it by a charref
2576	* and continue the transcoding phase, hoping the error
2577	* did not mangle the encoder state.
2578	*/
2579	charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2580	"&#%d;", cur);
2581	xmlBufShrink(in, len);
2582	xmlBufGrow(out, charrefLen * 4);
2583	c_out = xmlBufAvail(out) - 1;
2584	c_in = charrefLen;
2585	ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2586	charref, &c_in);
2587
2588	if ((ret < 0) \|\| (c_in != charrefLen)) {
2589	char buf[50];
2590
2591	snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2592	content[0], content[1],
2593	content[2], content[3]);
2594	buf[49] = 0;
2595	xmlEncodingErr(XML_I18N_CONV_FAILED,
2596	"output conversion failed due to conv error, bytes %s\n",
2597	buf);
2598	if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2599	content[0] = ' ';
2600	break;
2601	}
2602
2603	xmlBufAddLen(out, c_out);
2604	writtentot += c_out;
2605	goto retry;
2606	}
2607	}
2608	return(writtentot ? writtentot : ret);
2609	}
2610	#endif
2611
2612	/**
2613	* xmlCharEncOutFunc:
2614	* @handler: char encoding transformation data structure
2615	* @out: an xmlBuffer for the output.
2616	* @in: an xmlBuffer for the input
2617	*
2618	* Generic front-end for the encoding handler output function
2619	* a first call with @in == NULL has to be made firs to initiate the
2620	* output in case of non-stateless encoding needing to initiate their
2621	* state or the output (like the BOM in UTF16).
2622	* In case of UTF8 sequence conversion errors for the given encoder,
2623	* the content will be automatically remapped to a CharRef sequence.
2624	*
2625	* Returns the number of byte written if success, or
2626	* -1 general error
2627	* -2 if the transcoding fails (for *in is not valid utf8 string or
2628	* the result of transformation can't fit into the encoding we want), or
2629	*/
2630	int
2631	xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2632	xmlBufferPtr in) {
2633	int ret;
2634	int written;
2635	int writtentot = 0;
2636	int toconv;
2637
2638	if (handler == NULL) return(-1);
2639	if (out == NULL) return(-1);
2640
2641	retry:
2642
2643	written = out->size - out->use;
2644
2645	if (written > 0)
2646	written--; /* Gennady: count '/0' */
2647
2648	/*
2649	* First specific handling of in = NULL, i.e. the initialization call
2650	*/
2651	if (in == NULL) {
2652	toconv = 0;
2653	/* TODO: Check return value. */
2654	xmlEncOutputChunk(handler, &out->content[out->use], &written,
2655	NULL, &toconv);
2656	out->use += written;
2657	out->content[out->use] = 0;
2658	#ifdef DEBUG_ENCODING
2659	xmlGenericError(xmlGenericErrorContext,
2660	"initialized encoder\n");
2661	#endif
2662	return(0);
2663	}
2664
2665	/*
2666	* Conversion itself.
2667	*/
2668	toconv = in->use;
2669	if (toconv == 0)
2670	return(0);
2671	if (toconv * 4 >= written) {
2672	xmlBufferGrow(out, toconv * 4);
2673	written = out->size - out->use - 1;
2674	}
2675	ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2676	in->content, &toconv);
2677	xmlBufferShrink(in, toconv);
2678	out->use += written;
2679	writtentot += written;
2680	out->content[out->use] = 0;
2681	if (ret == -1) {
2682	if (written > 0) {
2683	/* Can be a limitation of iconv or uconv */
2684	goto retry;
2685	}
2686	ret = -3;
2687	}
2688
2689	/*
2690	* Attempt to handle error cases
2691	*/
2692	switch (ret) {
2693	case 0:
2694	#ifdef DEBUG_ENCODING
2695	xmlGenericError(xmlGenericErrorContext,
2696	"converted %d bytes to %d bytes of output\n",
2697	toconv, written);
2698	#endif
2699	break;
2700	case -1:
2701	#ifdef DEBUG_ENCODING
2702	xmlGenericError(xmlGenericErrorContext,
2703	"output conversion failed by lack of space\n");
2704	#endif
2705	break;
2706	case -3:
2707	#ifdef DEBUG_ENCODING
2708	xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2709	toconv, written, in->use);
2710	#endif
2711	break;
2712	case -4:
2713	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2714	"xmlCharEncOutFunc: no output function !\n", NULL);
2715	ret = -1;
2716	break;
2717	case -2: {
2718	xmlChar charref[20];
2719	int len = in->use;
2720	const xmlChar utf = (const xmlChar ) in->content;
2721	int cur, charrefLen;
2722
2723	cur = xmlGetUTF8Char(utf, &len);
2724	if (cur <= 0)
2725	break;
2726
2727	#ifdef DEBUG_ENCODING
2728	xmlGenericError(xmlGenericErrorContext,
2729	"handling output conversion error\n");
2730	xmlGenericError(xmlGenericErrorContext,
2731	"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2732	in->content[0], in->content[1],
2733	in->content[2], in->content[3]);
2734	#endif
2735	/*
2736	* Removes the UTF8 sequence, and replace it by a charref
2737	* and continue the transcoding phase, hoping the error
2738	* did not mangle the encoder state.
2739	*/
2740	charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2741	"&#%d;", cur);
2742	xmlBufferShrink(in, len);
2743	xmlBufferGrow(out, charrefLen * 4);
2744	written = out->size - out->use - 1;
2745	toconv = charrefLen;
2746	ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2747	charref, &toconv);
2748
2749	if ((ret < 0) \|\| (toconv != charrefLen)) {
2750	char buf[50];
2751
2752	snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2753	in->content[0], in->content[1],
2754	in->content[2], in->content[3]);
2755	buf[49] = 0;
2756	xmlEncodingErr(XML_I18N_CONV_FAILED,
2757	"output conversion failed due to conv error, bytes %s\n",
2758	buf);
2759	if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2760	in->content[0] = ' ';
2761	break;
2762	}
2763
2764	out->use += written;
2765	writtentot += written;
2766	out->content[out->use] = 0;
2767	goto retry;
2768	}
2769	}
2770	return(writtentot ? writtentot : ret);
2771	}
2772
2773	/**
2774	* xmlCharEncCloseFunc:
2775	* @handler: char encoding transformation data structure
2776	*
2777	* Generic front-end for encoding handler close function
2778	*
2779	* Returns 0 if success, or -1 in case of error
2780	*/
2781	int
2782	xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2783	int ret = 0;
2784	int tofree = 0;
2785	int i, handler_in_list = 0;
2786
2787	if (handler == NULL) return(-1);
2788	if (handler->name == NULL) return(-1);
2789	if (handlers != NULL) {
2790	for (i = 0;i < nbCharEncodingHandler; i++) {
2791	if (handler == handlers[i]) {
2792	handler_in_list = 1;
2793	break;
2794	}
2795	}
2796	}
2797	#ifdef LIBXML_ICONV_ENABLED
2798	/*
2799	* Iconv handlers can be used only once, free the whole block.
2800	* and the associated icon resources.
2801	*/
2802	if ((handler_in_list == 0) &&
2803	((handler->iconv_out != NULL) \|\| (handler->iconv_in != NULL))) {
2804	tofree = 1;
2805	if (handler->iconv_out != NULL) {
2806	if (iconv_close(handler->iconv_out))
2807	ret = -1;
2808	handler->iconv_out = NULL;
2809	}
2810	if (handler->iconv_in != NULL) {
2811	if (iconv_close(handler->iconv_in))
2812	ret = -1;
2813	handler->iconv_in = NULL;
2814	}
2815	}
2816	#endif /* LIBXML_ICONV_ENABLED */
2817	#ifdef LIBXML_ICU_ENABLED
2818	if ((handler_in_list == 0) &&
2819	((handler->uconv_out != NULL) \|\| (handler->uconv_in != NULL))) {
2820	tofree = 1;
2821	if (handler->uconv_out != NULL) {
2822	closeIcuConverter(handler->uconv_out);
2823	handler->uconv_out = NULL;
2824	}
2825	if (handler->uconv_in != NULL) {
2826	closeIcuConverter(handler->uconv_in);
2827	handler->uconv_in = NULL;
2828	}
2829	}
2830	#endif
2831	if (tofree) {
2832	/* free up only dynamic handlers iconv/uconv */
2833	if (handler->name != NULL)
2834	xmlFree(handler->name);
2835	handler->name = NULL;
2836	xmlFree(handler);
2837	}
2838	#ifdef DEBUG_ENCODING
2839	if (ret)
2840	xmlGenericError(xmlGenericErrorContext,
2841	"failed to close the encoding handler\n");
2842	else
2843	xmlGenericError(xmlGenericErrorContext,
2844	"closed the encoding handler\n");
2845	#endif
2846
2847	return(ret);
2848	}
2849
2850	/**
2851	* xmlByteConsumed:
2852	* @ctxt: an XML parser context
2853	*
2854	* This function provides the current index of the parser relative
2855	* to the start of the current entity. This function is computed in
2856	* bytes from the beginning starting at zero and finishing at the
2857	* size in byte of the file if parsing a file. The function is
2858	* of constant cost if the input is UTF-8 but can be costly if run
2859	* on non-UTF-8 input.
2860	*
2861	* Returns the index in bytes from the beginning of the entity or -1
2862	* in case the index could not be computed.
2863	*/
2864	long
2865	xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2866	xmlParserInputPtr in;
2867
2868	if (ctxt == NULL) return(-1);
2869	in = ctxt->input;
2870	if (in == NULL) return(-1);
2871	if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2872	unsigned int unused = 0;
2873	xmlCharEncodingHandler * handler = in->buf->encoder;
2874	/*
2875	* Encoding conversion, compute the number of unused original
2876	* bytes from the input not consumed and subtract that from
2877	* the raw consumed value, this is not a cheap operation
2878	*/
2879	if (in->end - in->cur > 0) {
2880	unsigned char convbuf[32000];
2881	const unsigned char cur = (const unsigned char )in->cur;
2882	int toconv = in->end - in->cur, written = 32000;
2883
2884	int ret;
2885
2886	do {
2887	toconv = in->end - cur;
2888	written = 32000;
2889	ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2890	cur, &toconv);
2891	if (ret < 0) {
2892	if (written > 0)
2893	ret = -2;
2894	else
2895	return(-1);
2896	}
2897	unused += written;
2898	cur += toconv;
2899	} while (ret == -2);
2900	}
2901	if (in->buf->rawconsumed < unused)
2902	return(-1);
2903	return(in->buf->rawconsumed - unused);
2904	}
2905	return(in->consumed + (in->cur - in->base));
2906	}
2907
2908	#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2909	#ifdef LIBXML_ISO8859X_ENABLED
2910
2911	/**
2912	* UTF8ToISO8859x:
2913	* @out: a pointer to an array of bytes to store the result
2914	* @outlen: the length of @out
2915	* @in: a pointer to an array of UTF-8 chars
2916	* @inlen: the length of @in
2917	* @xlattable: the 2-level transcoding table
2918	*
2919	* Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2920	* block of chars out.
2921	*
2922	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2923	* The value of @inlen after return is the number of octets consumed
2924	* as the return value is positive, else unpredictable.
2925	* The value of @outlen after return is the number of octets consumed.
2926	*/
2927	static int
2928	UTF8ToISO8859x(unsigned char* out, int *outlen,
2929	const unsigned char* in, int *inlen,
2930	unsigned char const *xlattable) {
2931	const unsigned char* outstart = out;
2932	const unsigned char* inend;
2933	const unsigned char* instart = in;
2934	const unsigned char* processed = in;
2935
2936	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\|
2937	(xlattable == NULL))
2938	return(-1);
2939	if (in == NULL) {
2940	/*
2941	* initialization nothing to do
2942	*/
2943	*outlen = 0;
2944	*inlen = 0;
2945	return(0);
2946	}
2947	inend = in + (*inlen);
2948	while (in < inend) {
2949	unsigned char d = *in++;
2950	if (d < 0x80) {
2951	*out++ = d;
2952	} else if (d < 0xC0) {
2953	/* trailing byte in leading position */
2954	*outlen = out - outstart;
2955	*inlen = processed - instart;
2956	return(-2);
2957	} else if (d < 0xE0) {
2958	unsigned char c;
2959	if (!(in < inend)) {
2960	/* trailing byte not in input buffer */
2961	*outlen = out - outstart;
2962	*inlen = processed - instart;
2963	return(-3);
2964	}
2965	c = *in++;
2966	if ((c & 0xC0) != 0x80) {
2967	/* not a trailing byte */
2968	*outlen = out - outstart;
2969	*inlen = processed - instart;
2970	return(-2);
2971	}
2972	c = c & 0x3F;
2973	d = d & 0x1F;
2974	d = xlattable [48 + c + xlattable [d] * 64];
2975	if (d == 0) {
2976	/* not in character set */
2977	*outlen = out - outstart;
2978	*inlen = processed - instart;
2979	return(-2);
2980	}
2981	*out++ = d;
2982	} else if (d < 0xF0) {
2983	unsigned char c1;
2984	unsigned char c2;
2985	if (!(in < inend - 1)) {
2986	/* trailing bytes not in input buffer */
2987	*outlen = out - outstart;
2988	*inlen = processed - instart;
2989	return(-3);
2990	}
2991	c1 = *in++;
2992	if ((c1 & 0xC0) != 0x80) {
2993	/* not a trailing byte (c1) */
2994	*outlen = out - outstart;
2995	*inlen = processed - instart;
2996	return(-2);
2997	}
2998	c2 = *in++;
2999	if ((c2 & 0xC0) != 0x80) {
3000	/* not a trailing byte (c2) */
3001	*outlen = out - outstart;
3002	*inlen = processed - instart;
3003	return(-2);
3004	}
3005	c1 = c1 & 0x3F;
3006	c2 = c2 & 0x3F;
3007	d = d & 0x0F;
3008	d = xlattable [48 + c2 + xlattable [48 + c1 +
3009	xlattable [32 + d] * 64] * 64];
3010	if (d == 0) {
3011	/* not in character set */
3012	*outlen = out - outstart;
3013	*inlen = processed - instart;
3014	return(-2);
3015	}
3016	*out++ = d;
3017	} else {
3018	/* cannot transcode >= U+010000 */
3019	*outlen = out - outstart;
3020	*inlen = processed - instart;
3021	return(-2);
3022	}
3023	processed = in;
3024	}
3025	*outlen = out - outstart;
3026	*inlen = processed - instart;
3027	return(*outlen);
3028	}
3029
3030	/**
3031	* ISO8859xToUTF8
3032	* @out: a pointer to an array of bytes to store the result
3033	* @outlen: the length of @out
3034	* @in: a pointer to an array of ISO Latin 1 chars
3035	* @inlen: the length of @in
3036	*
3037	* Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3038	* block of chars out.
3039	* Returns 0 if success, or -1 otherwise
3040	* The value of @inlen after return is the number of octets consumed
3041	* The value of @outlen after return is the number of octets produced.
3042	*/
3043	static int
3044	ISO8859xToUTF8(unsigned char* out, int *outlen,
3045	const unsigned char* in, int *inlen,
3046	unsigned short const *unicodetable) {
3047	unsigned char* outstart = out;
3048	unsigned char* outend;
3049	const unsigned char* instart = in;
3050	const unsigned char* inend;
3051	const unsigned char* instop;
3052	unsigned int c;
3053
3054	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\|
3055	(in == NULL) \|\| (unicodetable == NULL))
3056	return(-1);
3057	outend = out + *outlen;
3058	inend = in + *inlen;
3059	instop = inend;
3060
3061	while ((in < inend) && (out < outend - 2)) {
3062	if (*in >= 0x80) {
3063	c = unicodetable [*in - 0x80];
3064	if (c == 0) {
3065	/* undefined code point */
3066	*outlen = out - outstart;
3067	*inlen = in - instart;
3068	return (-1);
3069	}
3070	if (c < 0x800) {
3071	*out++ = ((c >> 6) & 0x1F) \| 0xC0;
3072	*out++ = (c & 0x3F) \| 0x80;
3073	} else {
3074	*out++ = ((c >> 12) & 0x0F) \| 0xE0;
3075	*out++ = ((c >> 6) & 0x3F) \| 0x80;
3076	*out++ = (c & 0x3F) \| 0x80;
3077	}
3078	++in;
3079	}
3080	if (instop - in > outend - out) instop = in + (outend - out);
3081	while ((*in < 0x80) && (in < instop)) {
3082	out++ = in++;
3083	}
3084	}
3085	if ((in < inend) && (out < outend) && (*in < 0x80)) {
3086	out++ = in++;
3087	}
3088	if ((in < inend) && (out < outend) && (*in < 0x80)) {
3089	out++ = in++;
3090	}
3091	*outlen = out - outstart;
3092	*inlen = in - instart;
3093	return (*outlen);
3094	}
3095
3096
3097	/************************************************************************
3098	* Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
3099	************************************************************************/
3100
3101	static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3102	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3103	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3104	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3105	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3106	0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3107	0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3108	0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3109	0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3110	0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3111	0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3112	0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3113	0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3114	0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3115	0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3116	0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3117	0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3118	};
3119
3120	static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3121	"\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3122	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3129	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3130	"\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3131	"\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3132	"\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3133	"\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3134	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3136	"\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3137	"\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3138	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140	"\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3141	"\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3142	"\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3143	"\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3144	"\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3145	"\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3146	"\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3147	"\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3148	};
3149
3150	static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3151	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3152	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3153	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3154	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3155	0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3156	0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3157	0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3158	0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3159	0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3160	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3161	0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3162	0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3163	0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3164	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3165	0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3166	0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3167	};
3168
3169	static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3170	"\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3171	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3178	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3179	"\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3180	"\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3181	"\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3182	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3183	"\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3184	"\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186	"\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3187	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189	"\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3195	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3196	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3197	"\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3198	"\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3199	"\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3200	"\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3201	};
3202
3203	static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3204	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3205	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3206	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3207	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3208	0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3209	0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3210	0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3211	0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3212	0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3213	0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3214	0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3215	0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3216	0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3217	0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3218	0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3219	0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3220	};
3221
3222	static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3223	"\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3224	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3231	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3232	"\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3233	"\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3234	"\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3235	"\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3236	"\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3237	"\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3238	"\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3239	"\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3240	"\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3241	"\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3242	"\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3243	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3244	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246	"\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3247	"\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3248	"\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3249	"\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3250	};
3251
3252	static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3253	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3254	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3255	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3256	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3257	0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3258	0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3259	0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3260	0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3261	0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3262	0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3263	0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3264	0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3265	0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3266	0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3267	0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3268	0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3269	};
3270
3271	static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3272	"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273	"\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274	"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3279	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3280	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3281	"\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3282	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3283	"\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3284	"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3285	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3286	"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3287	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3288	"\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3289	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291	"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296	"\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299	};
3300
3301	static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3302	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3303	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3304	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3305	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3306	0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3307	0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3308	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3309	0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3310	0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3311	0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3312	0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3313	0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3314	0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3315	0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3316	0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3317	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3318	};
3319
3320	static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3321	"\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322	"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3323	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3328	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3329	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3330	"\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3331	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332	"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3337	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3338	"\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3339	"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3340	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3341	"\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344	};
3345
3346	static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3347	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3348	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3349	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3350	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3351	0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3352	0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3353	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3354	0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3355	0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3356	0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3357	0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3358	0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3359	0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3360	0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3361	0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3362	0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3363	};
3364
3365	static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3366	"\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3367	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368	"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3374	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3375	"\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3376	"\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3377	"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382	"\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3383	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385	"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3389	"\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3390	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3391	"\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3392	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3393	"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3394	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3397	};
3398
3399	static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3400	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3401	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3402	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3403	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3404	0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3405	0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3406	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3407	0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3408	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3410	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3411	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3412	0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3413	0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3414	0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3415	0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3416	};
3417
3418	static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3419	"\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420	"\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3421	"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3427	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3428	"\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3429	"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3430	"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435	"\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3436	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437	"\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3438	"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3443	"\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3444	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3447	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3448	"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3449	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3450	};
3451
3452	static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3453	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3454	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3455	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3456	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3457	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3458	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3459	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3460	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3461	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3462	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3463	0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3464	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3465	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3466	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3467	0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3468	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3469	};
3470
3471	static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3472	"\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3480	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3481	"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3482	"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3483	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3484	"\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3485	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3486	"\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3487	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3489	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490	"\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3492	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3493	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495	};
3496
3497	static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3498	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3499	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3500	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3501	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3502	0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3503	0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3504	0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3505	0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3506	0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3507	0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3508	0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3509	0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3510	0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3511	0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3512	0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3513	0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3514	};
3515
3516	static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3517	"\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519	"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3524	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3525	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3526	"\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3527	"\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3528	"\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3529	"\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3530	"\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3531	"\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3532	"\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3533	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534	"\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3535	"\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3536	"\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541	"\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3544	"\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3545	"\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3546	"\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3547	"\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3548	};
3549
3550	static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3551	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3552	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3553	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3554	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3555	0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3556	0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3557	0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3558	0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3559	0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3560	0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3561	0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3562	0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3563	0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3564	0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3565	0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3566	0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3567	};
3568
3569	static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3570	"\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572	"\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3577	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3578	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3579	"\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3584	"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3585	"\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3586	"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3587	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3588	"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3589	"\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3593	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3594	"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3595	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3597	};
3598
3599	static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3600	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3601	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3602	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3603	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3604	0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3605	0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3606	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3607	0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3608	0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3609	0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3610	0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3611	0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3612	0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3613	0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3614	0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3615	0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3616	};
3617
3618	static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3619	"\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621	"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3626	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3627	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3628	"\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3629	"\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3630	"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3636	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638	"\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3639	"\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3640	"\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3641	"\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3642	"\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3643	"\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3644	"\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3645	"\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3646	"\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3647	"\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3648	"\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3649	"\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3650	};
3651
3652	static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3653	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3654	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3655	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3656	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3657	0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3658	0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3659	0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3660	0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3661	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3662	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3663	0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3664	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3665	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3666	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3667	0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3668	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3669	};
3670
3671	static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3672	"\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674	"\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3680	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3681	"\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3682	"\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3686	"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3687	"\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3688	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3689	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3692	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693	"\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695	"\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702	"\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3706	"\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3707	"\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3708	"\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3709	"\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3710	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3711	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3712	"\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3713	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3714	"\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3715	};
3716
3717	static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3718	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3719	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3720	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3721	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3722	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3723	0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3724	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3725	0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3726	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3727	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3728	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3729	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3730	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3731	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3732	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3733	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3734	};
3735
3736	static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3737	"\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739	"\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3745	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3746	"\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3747	"\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3748	"\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3755	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757	"\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758	"\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759	"\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3760	"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3761	"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3762	"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3763	"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3764	};
3765
3766	static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3767	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3768	0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3769	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3770	0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3771	0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3772	0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3773	0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3774	0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3775	0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3776	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3777	0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3778	0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3779	0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3780	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3781	0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3782	0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3783	};
3784
3785	static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3786	"\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3787	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788	"\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3793	"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3794	"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3795	"\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3796	"\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3797	"\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3798	"\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3799	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801	"\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3802	"\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3803	"\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3804	"\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3805	"\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3811	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3812	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3814	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3815	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3818	"\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3819	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3820	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3821	"\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3822	"\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3823	"\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3824	"\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3825	};
3826
3827
3828	/*
3829	* auto-generated functions for ISO-8859-2 .. ISO-8859-16
3830	*/
3831
3832	static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3833	const unsigned char* in, int *inlen) {
3834	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3835	}
3836	static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3837	const unsigned char* in, int *inlen) {
3838	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3839	}
3840
3841	static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3842	const unsigned char* in, int *inlen) {
3843	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3844	}
3845	static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3846	const unsigned char* in, int *inlen) {
3847	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3848	}
3849
3850	static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3851	const unsigned char* in, int *inlen) {
3852	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3853	}
3854	static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3855	const unsigned char* in, int *inlen) {
3856	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3857	}
3858
3859	static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3860	const unsigned char* in, int *inlen) {
3861	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3862	}
3863	static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3864	const unsigned char* in, int *inlen) {
3865	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3866	}
3867
3868	static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3869	const unsigned char* in, int *inlen) {
3870	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3871	}
3872	static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3873	const unsigned char* in, int *inlen) {
3874	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3875	}
3876
3877	static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3878	const unsigned char* in, int *inlen) {
3879	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3880	}
3881	static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3882	const unsigned char* in, int *inlen) {
3883	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3884	}
3885
3886	static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3887	const unsigned char* in, int *inlen) {
3888	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3889	}
3890	static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3891	const unsigned char* in, int *inlen) {
3892	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3893	}
3894
3895	static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3896	const unsigned char* in, int *inlen) {
3897	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3898	}
3899	static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3900	const unsigned char* in, int *inlen) {
3901	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3902	}
3903
3904	static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3905	const unsigned char* in, int *inlen) {
3906	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3907	}
3908	static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3909	const unsigned char* in, int *inlen) {
3910	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3911	}
3912
3913	static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3914	const unsigned char* in, int *inlen) {
3915	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3916	}
3917	static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3918	const unsigned char* in, int *inlen) {
3919	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3920	}
3921
3922	static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3923	const unsigned char* in, int *inlen) {
3924	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3925	}
3926	static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3927	const unsigned char* in, int *inlen) {
3928	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3929	}
3930
3931	static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3932	const unsigned char* in, int *inlen) {
3933	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3934	}
3935	static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3936	const unsigned char* in, int *inlen) {
3937	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3938	}
3939
3940	static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3941	const unsigned char* in, int *inlen) {
3942	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3943	}
3944	static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3945	const unsigned char* in, int *inlen) {
3946	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3947	}
3948
3949	static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3950	const unsigned char* in, int *inlen) {
3951	return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3952	}
3953	static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3954	const unsigned char* in, int *inlen) {
3955	return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3956	}
3957
3958	static void
3959	xmlRegisterCharEncodingHandlersISO8859x (void) {
3960	xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3961	xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3962	xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3963	xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3964	xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3965	xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3966	xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3967	xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3968	xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3969	xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3970	xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3971	xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3972	xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3973	xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3974	}
3975
3976	#endif
3977	#endif
3978
3979	#define bottom_encoding
3980	#include "elfgcchack.h"

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/libxml2-2.9.14/encoding.c@ 103285

以其他格式下載: