VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/encoding.c@ 104201

最後變更 在這個檔案從104201是 104106,由 vboxsync 提交於 10 月 前

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • 屬性 svn:eol-style 設為 native
檔案大小: 130.5 KB
 
1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * [email protected]
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27#include <limits.h>
28#include <ctype.h>
29#include <stdlib.h>
30
31#ifdef LIBXML_ICONV_ENABLED
32#include <errno.h>
33#endif
34
35#include <libxml/encoding.h>
36#include <libxml/xmlmemory.h>
37#include <libxml/parser.h>
38#ifdef LIBXML_HTML_ENABLED
39#include <libxml/HTMLparser.h>
40#endif
41#include <libxml/xmlerror.h>
42
43#include "private/buf.h"
44#include "private/enc.h"
45#include "private/error.h"
46
47#ifdef LIBXML_ICU_ENABLED
48#include <unicode/ucnv.h>
49/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50#define ICU_PIVOT_BUF_SIZE 1024
51typedef struct _uconv_t uconv_t;
52struct _uconv_t {
53 UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55 UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
56 UChar *pivot_source;
57 UChar *pivot_target;
58};
59#endif
60
61typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63struct _xmlCharEncodingAlias {
64 const char *name;
65 const char *alias;
66};
67
68static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69static int xmlCharEncodingAliasesNb = 0;
70static int xmlCharEncodingAliasesMax = 0;
71
72static int xmlLittleEndian = 1;
73
74#ifdef LIBXML_ICU_ENABLED
75static uconv_t*
76openIcuConverter(const char* name, int toUnicode)
77{
78 UErrorCode status = U_ZERO_ERROR;
79 uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
80 if (conv == NULL)
81 return NULL;
82
83 conv->pivot_source = conv->pivot_buf;
84 conv->pivot_target = conv->pivot_buf;
85
86 conv->uconv = ucnv_open(name, &status);
87 if (U_FAILURE(status))
88 goto error;
89
90 status = U_ZERO_ERROR;
91 if (toUnicode) {
92 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
93 NULL, NULL, NULL, &status);
94 }
95 else {
96 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
97 NULL, NULL, NULL, &status);
98 }
99 if (U_FAILURE(status))
100 goto error;
101
102 status = U_ZERO_ERROR;
103 conv->utf8 = ucnv_open("UTF-8", &status);
104 if (U_SUCCESS(status))
105 return conv;
106
107error:
108 if (conv->uconv)
109 ucnv_close(conv->uconv);
110 xmlFree(conv);
111 return NULL;
112}
113
114static void
115closeIcuConverter(uconv_t *conv)
116{
117 if (conv != NULL) {
118 ucnv_close(conv->uconv);
119 ucnv_close(conv->utf8);
120 xmlFree(conv);
121 }
122}
123#endif /* LIBXML_ICU_ENABLED */
124
125/************************************************************************
126 * *
127 * Conversions To/From UTF8 encoding *
128 * *
129 ************************************************************************/
130
131/**
132 * asciiToUTF8:
133 * @out: a pointer to an array of bytes to store the result
134 * @outlen: the length of @out
135 * @in: a pointer to an array of ASCII chars
136 * @inlen: the length of @in
137 *
138 * Take a block of ASCII chars in and try to convert it to an UTF-8
139 * block of chars out.
140 *
141 * Returns the number of bytes written or an XML_ENC_ERR code.
142 *
143 * The value of @inlen after return is the number of octets consumed
144 * if the return value is positive, else unpredictable.
145 * The value of @outlen after return is the number of octets produced.
146 */
147static int
148asciiToUTF8(unsigned char* out, int *outlen,
149 const unsigned char* in, int *inlen) {
150 unsigned char* outstart = out;
151 const unsigned char* base = in;
152 const unsigned char* processed = in;
153 unsigned char* outend = out + *outlen;
154 const unsigned char* inend;
155 unsigned int c;
156
157 inend = in + (*inlen);
158 while ((in < inend) && (out - outstart + 5 < *outlen)) {
159 c= *in++;
160
161 if (out >= outend)
162 break;
163 if (c < 0x80) {
164 *out++ = c;
165 } else {
166 *outlen = out - outstart;
167 *inlen = processed - base;
168 return(XML_ENC_ERR_INPUT);
169 }
170
171 processed = (const unsigned char*) in;
172 }
173 *outlen = out - outstart;
174 *inlen = processed - base;
175 return(*outlen);
176}
177
178#ifdef LIBXML_OUTPUT_ENABLED
179/**
180 * UTF8Toascii:
181 * @out: a pointer to an array of bytes to store the result
182 * @outlen: the length of @out
183 * @in: a pointer to an array of UTF-8 chars
184 * @inlen: the length of @in
185 *
186 * Take a block of UTF-8 chars in and try to convert it to an ASCII
187 * block of chars out.
188 *
189 * Returns the number of bytes written or an XML_ENC_ERR code.
190 *
191 * The value of @inlen after return is the number of octets consumed
192 * if the return value is positive, else unpredictable.
193 * The value of @outlen after return is the number of octets produced.
194 */
195static int
196UTF8Toascii(unsigned char* out, int *outlen,
197 const unsigned char* in, int *inlen) {
198 const unsigned char* processed = in;
199 const unsigned char* outend;
200 const unsigned char* outstart = out;
201 const unsigned char* instart = in;
202 const unsigned char* inend;
203 unsigned int c, d;
204 int trailing;
205
206 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
207 return(XML_ENC_ERR_INTERNAL);
208 if (in == NULL) {
209 /*
210 * initialization nothing to do
211 */
212 *outlen = 0;
213 *inlen = 0;
214 return(0);
215 }
216 inend = in + (*inlen);
217 outend = out + (*outlen);
218 while (in < inend) {
219 d = *in++;
220 if (d < 0x80) { c= d; trailing= 0; }
221 else if (d < 0xC0) {
222 /* trailing byte in leading position */
223 *outlen = out - outstart;
224 *inlen = processed - instart;
225 return(XML_ENC_ERR_INPUT);
226 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
227 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
228 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
229 else {
230 /* no chance for this in Ascii */
231 *outlen = out - outstart;
232 *inlen = processed - instart;
233 return(XML_ENC_ERR_INPUT);
234 }
235
236 if (inend - in < trailing) {
237 break;
238 }
239
240 for ( ; trailing; trailing--) {
241 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
242 break;
243 c <<= 6;
244 c |= d & 0x3F;
245 }
246
247 /* assertion: c is a single UTF-4 value */
248 if (c < 0x80) {
249 if (out >= outend)
250 break;
251 *out++ = c;
252 } else {
253 /* no chance for this in Ascii */
254 *outlen = out - outstart;
255 *inlen = processed - instart;
256 return(XML_ENC_ERR_INPUT);
257 }
258 processed = in;
259 }
260 *outlen = out - outstart;
261 *inlen = processed - instart;
262 return(*outlen);
263}
264#endif /* LIBXML_OUTPUT_ENABLED */
265
266/**
267 * isolat1ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @in: a pointer to an array of ISO Latin 1 chars
271 * @inlen: the length of @in
272 *
273 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
274 * block of chars out.
275 *
276 * Returns the number of bytes written or an XML_ENC_ERR code.
277 *
278 * The value of @inlen after return is the number of octets consumed
279 * if the return value is positive, else unpredictable.
280 * The value of @outlen after return is the number of octets produced.
281 */
282int
283isolat1ToUTF8(unsigned char* out, int *outlen,
284 const unsigned char* in, int *inlen) {
285 unsigned char* outstart = out;
286 const unsigned char* base = in;
287 unsigned char* outend;
288 const unsigned char* inend;
289 const unsigned char* instop;
290
291 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
292 return(XML_ENC_ERR_INTERNAL);
293
294 outend = out + *outlen;
295 inend = in + (*inlen);
296 instop = inend;
297
298 while ((in < inend) && (out < outend - 1)) {
299 if (*in >= 0x80) {
300 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
301 *out++ = ((*in) & 0x3F) | 0x80;
302 ++in;
303 }
304 if ((instop - in) > (outend - out)) instop = in + (outend - out);
305 while ((in < instop) && (*in < 0x80)) {
306 *out++ = *in++;
307 }
308 }
309 if ((in < inend) && (out < outend) && (*in < 0x80)) {
310 *out++ = *in++;
311 }
312 *outlen = out - outstart;
313 *inlen = in - base;
314 return(*outlen);
315}
316
317/**
318 * UTF8ToUTF8:
319 * @out: a pointer to an array of bytes to store the result
320 * @outlen: the length of @out
321 * @inb: a pointer to an array of UTF-8 chars
322 * @inlenb: the length of @in in UTF-8 chars
323 *
324 * No op copy operation for UTF8 handling.
325 *
326 * Returns the number of bytes written or an XML_ENC_ERR code.
327 *
328 * The value of *inlen after return is the number of octets consumed
329 * if the return value is positive, else unpredictable.
330 */
331static int
332UTF8ToUTF8(unsigned char* out, int *outlen,
333 const unsigned char* inb, int *inlenb)
334{
335 int len;
336
337 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
338 return(XML_ENC_ERR_INTERNAL);
339 if (inb == NULL) {
340 /* inb == NULL means output is initialized. */
341 *outlen = 0;
342 *inlenb = 0;
343 return(0);
344 }
345 if (*outlen > *inlenb) {
346 len = *inlenb;
347 } else {
348 len = *outlen;
349 }
350 if (len < 0)
351 return(XML_ENC_ERR_INTERNAL);
352
353 /*
354 * FIXME: Conversion functions must assure valid UTF-8, so we have
355 * to check for UTF-8 validity. Preferably, this converter shouldn't
356 * be used at all.
357 */
358 memcpy(out, inb, len);
359
360 *outlen = len;
361 *inlenb = len;
362 return(*outlen);
363}
364
365
366#ifdef LIBXML_OUTPUT_ENABLED
367/**
368 * UTF8Toisolat1:
369 * @out: a pointer to an array of bytes to store the result
370 * @outlen: the length of @out
371 * @in: a pointer to an array of UTF-8 chars
372 * @inlen: the length of @in
373 *
374 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
375 * block of chars out.
376 *
377 * Returns the number of bytes written or an XML_ENC_ERR code.
378 *
379 * The value of @inlen after return is the number of octets consumed
380 * if the return value is positive, else unpredictable.
381 * The value of @outlen after return is the number of octets produced.
382 */
383int
384UTF8Toisolat1(unsigned char* out, int *outlen,
385 const unsigned char* in, int *inlen) {
386 const unsigned char* processed = in;
387 const unsigned char* outend;
388 const unsigned char* outstart = out;
389 const unsigned char* instart = in;
390 const unsigned char* inend;
391 unsigned int c, d;
392 int trailing;
393
394 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
395 return(XML_ENC_ERR_INTERNAL);
396 if (in == NULL) {
397 /*
398 * initialization nothing to do
399 */
400 *outlen = 0;
401 *inlen = 0;
402 return(0);
403 }
404 inend = in + (*inlen);
405 outend = out + (*outlen);
406 while (in < inend) {
407 d = *in++;
408 if (d < 0x80) { c= d; trailing= 0; }
409 else if (d < 0xC0) {
410 /* trailing byte in leading position */
411 *outlen = out - outstart;
412 *inlen = processed - instart;
413 return(XML_ENC_ERR_INPUT);
414 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
415 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
416 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
417 else {
418 /* no chance for this in IsoLat1 */
419 *outlen = out - outstart;
420 *inlen = processed - instart;
421 return(XML_ENC_ERR_INPUT);
422 }
423
424 if (inend - in < trailing) {
425 break;
426 }
427
428 for ( ; trailing; trailing--) {
429 if (in >= inend)
430 break;
431 if (((d= *in++) & 0xC0) != 0x80) {
432 *outlen = out - outstart;
433 *inlen = processed - instart;
434 return(XML_ENC_ERR_INPUT);
435 }
436 c <<= 6;
437 c |= d & 0x3F;
438 }
439
440 /* assertion: c is a single UTF-4 value */
441 if (c <= 0xFF) {
442 if (out >= outend)
443 break;
444 *out++ = c;
445 } else {
446 /* no chance for this in IsoLat1 */
447 *outlen = out - outstart;
448 *inlen = processed - instart;
449 return(XML_ENC_ERR_INPUT);
450 }
451 processed = in;
452 }
453 *outlen = out - outstart;
454 *inlen = processed - instart;
455 return(*outlen);
456}
457#endif /* LIBXML_OUTPUT_ENABLED */
458
459/**
460 * UTF16LEToUTF8:
461 * @out: a pointer to an array of bytes to store the result
462 * @outlen: the length of @out
463 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
464 * @inlenb: the length of @in in UTF-16LE chars
465 *
466 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
467 * block of chars out. This function assumes the endian property
468 * is the same between the native type of this machine and the
469 * inputed one.
470 *
471 * Returns the number of bytes written or an XML_ENC_ERR code.
472 *
473 * The value of *inlen after return is the number of octets consumed
474 * if the return value is positive, else unpredictable.
475 */
476static int
477UTF16LEToUTF8(unsigned char* out, int *outlen,
478 const unsigned char* inb, int *inlenb)
479{
480 unsigned char* outstart = out;
481 const unsigned char* processed = inb;
482 unsigned char* outend;
483 unsigned short* in = (unsigned short *) (void *) inb;
484 unsigned short* inend;
485 unsigned int c, d, inlen;
486 unsigned char *tmp;
487 int bits;
488
489 if (*outlen == 0) {
490 *inlenb = 0;
491 return(0);
492 }
493 outend = out + *outlen;
494 if ((*inlenb % 2) == 1)
495 (*inlenb)--;
496 inlen = *inlenb / 2;
497 inend = in + inlen;
498 while ((in < inend) && (out - outstart + 5 < *outlen)) {
499 if (xmlLittleEndian) {
500 c= *in++;
501 } else {
502 tmp = (unsigned char *) in;
503 c = *tmp++;
504 c = c | (*tmp << 8);
505 in++;
506 }
507 if ((c & 0xFC00) == 0xD800) { /* surrogates */
508 if (in >= inend) { /* handle split mutli-byte characters */
509 break;
510 }
511 if (xmlLittleEndian) {
512 d = *in++;
513 } else {
514 tmp = (unsigned char *) in;
515 d = *tmp++;
516 d = d | (*tmp << 8);
517 in++;
518 }
519 if ((d & 0xFC00) == 0xDC00) {
520 c &= 0x03FF;
521 c <<= 10;
522 c |= d & 0x03FF;
523 c += 0x10000;
524 }
525 else {
526 *outlen = out - outstart;
527 *inlenb = processed - inb;
528 return(XML_ENC_ERR_INPUT);
529 }
530 }
531
532 /* assertion: c is a single UTF-4 value */
533 if (out >= outend)
534 break;
535 if (c < 0x80) { *out++= c; bits= -6; }
536 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
537 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
538 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
539
540 for ( ; bits >= 0; bits-= 6) {
541 if (out >= outend)
542 break;
543 *out++= ((c >> bits) & 0x3F) | 0x80;
544 }
545 processed = (const unsigned char*) in;
546 }
547 *outlen = out - outstart;
548 *inlenb = processed - inb;
549 return(*outlen);
550}
551
552#ifdef LIBXML_OUTPUT_ENABLED
553/**
554 * UTF8ToUTF16LE:
555 * @outb: a pointer to an array of bytes to store the result
556 * @outlen: the length of @outb
557 * @in: a pointer to an array of UTF-8 chars
558 * @inlen: the length of @in
559 *
560 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
561 * block of chars out.
562 *
563 * Returns the number of bytes written or an XML_ENC_ERR code.
564 */
565static int
566UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567 const unsigned char* in, int *inlen)
568{
569 unsigned short* out = (unsigned short *) (void *) outb;
570 const unsigned char* processed = in;
571 const unsigned char *const instart = in;
572 unsigned short* outstart= out;
573 unsigned short* outend;
574 const unsigned char* inend;
575 unsigned int c, d;
576 int trailing;
577 unsigned char *tmp;
578 unsigned short tmp1, tmp2;
579
580 /* UTF16LE encoding has no BOM */
581 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
582 return(XML_ENC_ERR_INTERNAL);
583 if (in == NULL) {
584 *outlen = 0;
585 *inlen = 0;
586 return(0);
587 }
588 inend= in + *inlen;
589 outend = out + (*outlen / 2);
590 while (in < inend) {
591 d= *in++;
592 if (d < 0x80) { c= d; trailing= 0; }
593 else if (d < 0xC0) {
594 /* trailing byte in leading position */
595 *outlen = (out - outstart) * 2;
596 *inlen = processed - instart;
597 return(XML_ENC_ERR_INPUT);
598 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
599 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
600 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
601 else {
602 /* no chance for this in UTF-16 */
603 *outlen = (out - outstart) * 2;
604 *inlen = processed - instart;
605 return(XML_ENC_ERR_INPUT);
606 }
607
608 if (inend - in < trailing) {
609 break;
610 }
611
612 for ( ; trailing; trailing--) {
613 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
614 break;
615 c <<= 6;
616 c |= d & 0x3F;
617 }
618
619 /* assertion: c is a single UTF-4 value */
620 if (c < 0x10000) {
621 if (out >= outend)
622 break;
623 if (xmlLittleEndian) {
624 *out++ = c;
625 } else {
626 tmp = (unsigned char *) out;
627 *tmp = (unsigned char) c; /* Explicit truncation */
628 *(tmp + 1) = c >> 8 ;
629 out++;
630 }
631 }
632 else if (c < 0x110000) {
633 if (out+1 >= outend)
634 break;
635 c -= 0x10000;
636 if (xmlLittleEndian) {
637 *out++ = 0xD800 | (c >> 10);
638 *out++ = 0xDC00 | (c & 0x03FF);
639 } else {
640 tmp1 = 0xD800 | (c >> 10);
641 tmp = (unsigned char *) out;
642 *tmp = (unsigned char) tmp1; /* Explicit truncation */
643 *(tmp + 1) = tmp1 >> 8;
644 out++;
645
646 tmp2 = 0xDC00 | (c & 0x03FF);
647 tmp = (unsigned char *) out;
648 *tmp = (unsigned char) tmp2; /* Explicit truncation */
649 *(tmp + 1) = tmp2 >> 8;
650 out++;
651 }
652 }
653 else
654 break;
655 processed = in;
656 }
657 *outlen = (out - outstart) * 2;
658 *inlen = processed - instart;
659 return(*outlen);
660}
661
662/**
663 * UTF8ToUTF16:
664 * @outb: a pointer to an array of bytes to store the result
665 * @outlen: the length of @outb
666 * @in: a pointer to an array of UTF-8 chars
667 * @inlen: the length of @in
668 *
669 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
670 * block of chars out.
671 *
672 * Returns the number of bytes written or an XML_ENC_ERR code.
673 */
674static int
675UTF8ToUTF16(unsigned char* outb, int *outlen,
676 const unsigned char* in, int *inlen)
677{
678 if (in == NULL) {
679 /*
680 * initialization, add the Byte Order Mark for UTF-16LE
681 */
682 if (*outlen >= 2) {
683 outb[0] = 0xFF;
684 outb[1] = 0xFE;
685 *outlen = 2;
686 *inlen = 0;
687 return(2);
688 }
689 *outlen = 0;
690 *inlen = 0;
691 return(0);
692 }
693 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
694}
695#endif /* LIBXML_OUTPUT_ENABLED */
696
697/**
698 * UTF16BEToUTF8:
699 * @out: a pointer to an array of bytes to store the result
700 * @outlen: the length of @out
701 * @inb: a pointer to an array of UTF-16 passed as a byte array
702 * @inlenb: the length of @in in UTF-16 chars
703 *
704 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
705 * block of chars out. This function assumes the endian property
706 * is the same between the native type of this machine and the
707 * inputed one.
708 *
709 * Returns the number of bytes written or an XML_ENC_ERR code.
710 *
711 * The value of *inlen after return is the number of octets consumed
712 * if the return value is positive, else unpredictable.
713 */
714static int
715UTF16BEToUTF8(unsigned char* out, int *outlen,
716 const unsigned char* inb, int *inlenb)
717{
718 unsigned char* outstart = out;
719 const unsigned char* processed = inb;
720 unsigned char* outend;
721 unsigned short* in = (unsigned short *) (void *) inb;
722 unsigned short* inend;
723 unsigned int c, d, inlen;
724 unsigned char *tmp;
725 int bits;
726
727 if (*outlen == 0) {
728 *inlenb = 0;
729 return(0);
730 }
731 outend = out + *outlen;
732 if ((*inlenb % 2) == 1)
733 (*inlenb)--;
734 inlen = *inlenb / 2;
735 inend= in + inlen;
736 while ((in < inend) && (out - outstart + 5 < *outlen)) {
737 if (xmlLittleEndian) {
738 tmp = (unsigned char *) in;
739 c = *tmp++;
740 c = (c << 8) | *tmp;
741 in++;
742 } else {
743 c= *in++;
744 }
745 if ((c & 0xFC00) == 0xD800) { /* surrogates */
746 if (in >= inend) { /* handle split mutli-byte characters */
747 break;
748 }
749 if (xmlLittleEndian) {
750 tmp = (unsigned char *) in;
751 d = *tmp++;
752 d = (d << 8) | *tmp;
753 in++;
754 } else {
755 d= *in++;
756 }
757 if ((d & 0xFC00) == 0xDC00) {
758 c &= 0x03FF;
759 c <<= 10;
760 c |= d & 0x03FF;
761 c += 0x10000;
762 }
763 else {
764 *outlen = out - outstart;
765 *inlenb = processed - inb;
766 return(XML_ENC_ERR_INPUT);
767 }
768 }
769
770 /* assertion: c is a single UTF-4 value */
771 if (out >= outend)
772 break;
773 if (c < 0x80) { *out++= c; bits= -6; }
774 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
775 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
776 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
777
778 for ( ; bits >= 0; bits-= 6) {
779 if (out >= outend)
780 break;
781 *out++= ((c >> bits) & 0x3F) | 0x80;
782 }
783 processed = (const unsigned char*) in;
784 }
785 *outlen = out - outstart;
786 *inlenb = processed - inb;
787 return(*outlen);
788}
789
790#ifdef LIBXML_OUTPUT_ENABLED
791/**
792 * UTF8ToUTF16BE:
793 * @outb: a pointer to an array of bytes to store the result
794 * @outlen: the length of @outb
795 * @in: a pointer to an array of UTF-8 chars
796 * @inlen: the length of @in
797 *
798 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
799 * block of chars out.
800 *
801 * Returns the number of bytes written or an XML_ENC_ERR code.
802 */
803static int
804UTF8ToUTF16BE(unsigned char* outb, int *outlen,
805 const unsigned char* in, int *inlen)
806{
807 unsigned short* out = (unsigned short *) (void *) outb;
808 const unsigned char* processed = in;
809 const unsigned char *const instart = in;
810 unsigned short* outstart= out;
811 unsigned short* outend;
812 const unsigned char* inend;
813 unsigned int c, d;
814 int trailing;
815 unsigned char *tmp;
816 unsigned short tmp1, tmp2;
817
818 /* UTF-16BE has no BOM */
819 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
820 return(XML_ENC_ERR_INTERNAL);
821 if (in == NULL) {
822 *outlen = 0;
823 *inlen = 0;
824 return(0);
825 }
826 inend= in + *inlen;
827 outend = out + (*outlen / 2);
828 while (in < inend) {
829 d= *in++;
830 if (d < 0x80) { c= d; trailing= 0; }
831 else if (d < 0xC0) {
832 /* trailing byte in leading position */
833 *outlen = out - outstart;
834 *inlen = processed - instart;
835 return(XML_ENC_ERR_INPUT);
836 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
837 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
838 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
839 else {
840 /* no chance for this in UTF-16 */
841 *outlen = out - outstart;
842 *inlen = processed - instart;
843 return(XML_ENC_ERR_INPUT);
844 }
845
846 if (inend - in < trailing) {
847 break;
848 }
849
850 for ( ; trailing; trailing--) {
851 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
852 c <<= 6;
853 c |= d & 0x3F;
854 }
855
856 /* assertion: c is a single UTF-4 value */
857 if (c < 0x10000) {
858 if (out >= outend) break;
859 if (xmlLittleEndian) {
860 tmp = (unsigned char *) out;
861 *tmp = c >> 8;
862 *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
863 out++;
864 } else {
865 *out++ = c;
866 }
867 }
868 else if (c < 0x110000) {
869 if (out+1 >= outend) break;
870 c -= 0x10000;
871 if (xmlLittleEndian) {
872 tmp1 = 0xD800 | (c >> 10);
873 tmp = (unsigned char *) out;
874 *tmp = tmp1 >> 8;
875 *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
876 out++;
877
878 tmp2 = 0xDC00 | (c & 0x03FF);
879 tmp = (unsigned char *) out;
880 *tmp = tmp2 >> 8;
881 *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
882 out++;
883 } else {
884 *out++ = 0xD800 | (c >> 10);
885 *out++ = 0xDC00 | (c & 0x03FF);
886 }
887 }
888 else
889 break;
890 processed = in;
891 }
892 *outlen = (out - outstart) * 2;
893 *inlen = processed - instart;
894 return(*outlen);
895}
896#endif /* LIBXML_OUTPUT_ENABLED */
897
898/************************************************************************
899 * *
900 * Generic encoding handling routines *
901 * *
902 ************************************************************************/
903
904/**
905 * xmlDetectCharEncoding:
906 * @in: a pointer to the first bytes of the XML entity, must be at least
907 * 2 bytes long (at least 4 if encoding is UTF4 variant).
908 * @len: pointer to the length of the buffer
909 *
910 * Guess the encoding of the entity using the first bytes of the entity content
911 * according to the non-normative appendix F of the XML-1.0 recommendation.
912 *
913 * Returns one of the XML_CHAR_ENCODING_... values.
914 */
915xmlCharEncoding
916xmlDetectCharEncoding(const unsigned char* in, int len)
917{
918 if (in == NULL)
919 return(XML_CHAR_ENCODING_NONE);
920 if (len >= 4) {
921 if ((in[0] == 0x00) && (in[1] == 0x00) &&
922 (in[2] == 0x00) && (in[3] == 0x3C))
923 return(XML_CHAR_ENCODING_UCS4BE);
924 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
925 (in[2] == 0x00) && (in[3] == 0x00))
926 return(XML_CHAR_ENCODING_UCS4LE);
927 if ((in[0] == 0x00) && (in[1] == 0x00) &&
928 (in[2] == 0x3C) && (in[3] == 0x00))
929 return(XML_CHAR_ENCODING_UCS4_2143);
930 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
931 (in[2] == 0x00) && (in[3] == 0x00))
932 return(XML_CHAR_ENCODING_UCS4_3412);
933 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
934 (in[2] == 0xA7) && (in[3] == 0x94))
935 return(XML_CHAR_ENCODING_EBCDIC);
936 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
937 (in[2] == 0x78) && (in[3] == 0x6D))
938 return(XML_CHAR_ENCODING_UTF8);
939 /*
940 * Although not part of the recommendation, we also
941 * attempt an "auto-recognition" of UTF-16LE and
942 * UTF-16BE encodings.
943 */
944 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
945 (in[2] == 0x3F) && (in[3] == 0x00))
946 return(XML_CHAR_ENCODING_UTF16LE);
947 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
948 (in[2] == 0x00) && (in[3] == 0x3F))
949 return(XML_CHAR_ENCODING_UTF16BE);
950 }
951 if (len >= 3) {
952 /*
953 * Errata on XML-1.0 June 20 2001
954 * We now allow an UTF8 encoded BOM
955 */
956 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
957 (in[2] == 0xBF))
958 return(XML_CHAR_ENCODING_UTF8);
959 }
960 /* For UTF-16 we can recognize by the BOM */
961 if (len >= 2) {
962 if ((in[0] == 0xFE) && (in[1] == 0xFF))
963 return(XML_CHAR_ENCODING_UTF16BE);
964 if ((in[0] == 0xFF) && (in[1] == 0xFE))
965 return(XML_CHAR_ENCODING_UTF16LE);
966 }
967 return(XML_CHAR_ENCODING_NONE);
968}
969
970/**
971 * xmlCleanupEncodingAliases:
972 *
973 * Unregisters all aliases
974 */
975void
976xmlCleanupEncodingAliases(void) {
977 int i;
978
979 if (xmlCharEncodingAliases == NULL)
980 return;
981
982 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
983 if (xmlCharEncodingAliases[i].name != NULL)
984 xmlFree((char *) xmlCharEncodingAliases[i].name);
985 if (xmlCharEncodingAliases[i].alias != NULL)
986 xmlFree((char *) xmlCharEncodingAliases[i].alias);
987 }
988 xmlCharEncodingAliasesNb = 0;
989 xmlCharEncodingAliasesMax = 0;
990 xmlFree(xmlCharEncodingAliases);
991 xmlCharEncodingAliases = NULL;
992}
993
994/**
995 * xmlGetEncodingAlias:
996 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
997 *
998 * Lookup an encoding name for the given alias.
999 *
1000 * Returns NULL if not found, otherwise the original name
1001 */
1002const char *
1003xmlGetEncodingAlias(const char *alias) {
1004 int i;
1005 char upper[100];
1006
1007 if (alias == NULL)
1008 return(NULL);
1009
1010 if (xmlCharEncodingAliases == NULL)
1011 return(NULL);
1012
1013 for (i = 0;i < 99;i++) {
1014 upper[i] = (char) toupper((unsigned char) alias[i]);
1015 if (upper[i] == 0) break;
1016 }
1017 upper[i] = 0;
1018
1019 /*
1020 * Walk down the list looking for a definition of the alias
1021 */
1022 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024 return(xmlCharEncodingAliases[i].name);
1025 }
1026 }
1027 return(NULL);
1028}
1029
1030/**
1031 * xmlAddEncodingAlias:
1032 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1033 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1034 *
1035 * Registers an alias @alias for an encoding named @name. Existing alias
1036 * will be overwritten.
1037 *
1038 * Returns 0 in case of success, -1 in case of error
1039 */
1040int
1041xmlAddEncodingAlias(const char *name, const char *alias) {
1042 int i;
1043 char upper[100];
1044 char *nameCopy, *aliasCopy;
1045
1046 if ((name == NULL) || (alias == NULL))
1047 return(-1);
1048
1049 for (i = 0;i < 99;i++) {
1050 upper[i] = (char) toupper((unsigned char) alias[i]);
1051 if (upper[i] == 0) break;
1052 }
1053 upper[i] = 0;
1054
1055 if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1056 xmlCharEncodingAliasPtr tmp;
1057 size_t newSize = xmlCharEncodingAliasesMax ?
1058 xmlCharEncodingAliasesMax * 2 :
1059 20;
1060
1061 tmp = (xmlCharEncodingAliasPtr)
1062 xmlRealloc(xmlCharEncodingAliases,
1063 newSize * sizeof(xmlCharEncodingAlias));
1064 if (tmp == NULL)
1065 return(-1);
1066 xmlCharEncodingAliases = tmp;
1067 xmlCharEncodingAliasesMax = newSize;
1068 }
1069
1070 /*
1071 * Walk down the list looking for a definition of the alias
1072 */
1073 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075 /*
1076 * Replace the definition.
1077 */
1078 nameCopy = xmlMemStrdup(name);
1079 if (nameCopy == NULL)
1080 return(-1);
1081 xmlFree((char *) xmlCharEncodingAliases[i].name);
1082 xmlCharEncodingAliases[i].name = nameCopy;
1083 return(0);
1084 }
1085 }
1086 /*
1087 * Add the definition
1088 */
1089 nameCopy = xmlMemStrdup(name);
1090 if (nameCopy == NULL)
1091 return(-1);
1092 aliasCopy = xmlMemStrdup(upper);
1093 if (aliasCopy == NULL) {
1094 xmlFree(nameCopy);
1095 return(-1);
1096 }
1097 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1098 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1099 xmlCharEncodingAliasesNb++;
1100 return(0);
1101}
1102
1103/**
1104 * xmlDelEncodingAlias:
1105 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1106 *
1107 * Unregisters an encoding alias @alias
1108 *
1109 * Returns 0 in case of success, -1 in case of error
1110 */
1111int
1112xmlDelEncodingAlias(const char *alias) {
1113 int i;
1114
1115 if (alias == NULL)
1116 return(-1);
1117
1118 if (xmlCharEncodingAliases == NULL)
1119 return(-1);
1120 /*
1121 * Walk down the list looking for a definition of the alias
1122 */
1123 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1124 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1125 xmlFree((char *) xmlCharEncodingAliases[i].name);
1126 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1127 xmlCharEncodingAliasesNb--;
1128 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1129 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1130 return(0);
1131 }
1132 }
1133 return(-1);
1134}
1135
1136/**
1137 * xmlParseCharEncoding:
1138 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1139 *
1140 * Compare the string to the encoding schemes already known. Note
1141 * that the comparison is case insensitive accordingly to the section
1142 * [XML] 4.3.3 Character Encoding in Entities.
1143 *
1144 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1145 * if not recognized.
1146 */
1147xmlCharEncoding
1148xmlParseCharEncoding(const char* name)
1149{
1150 const char *alias;
1151 char upper[500];
1152 int i;
1153
1154 if (name == NULL)
1155 return(XML_CHAR_ENCODING_NONE);
1156
1157 /*
1158 * Do the alias resolution
1159 */
1160 alias = xmlGetEncodingAlias(name);
1161 if (alias != NULL)
1162 name = alias;
1163
1164 for (i = 0;i < 499;i++) {
1165 upper[i] = (char) toupper((unsigned char) name[i]);
1166 if (upper[i] == 0) break;
1167 }
1168 upper[i] = 0;
1169
1170 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1171 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1172 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1173
1174 /*
1175 * NOTE: if we were able to parse this, the endianness of UTF16 is
1176 * already found and in use
1177 */
1178 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1179 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1180
1181 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1182 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1183 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1184
1185 /*
1186 * NOTE: if we were able to parse this, the endianness of UCS4 is
1187 * already found and in use
1188 */
1189 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1190 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1191 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1192
1193
1194 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1195 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1196 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1197
1198 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1199 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1200 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1201
1202 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1203 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1204 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1205 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1206 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1207 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1208 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1209
1210 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1211 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1212 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1213
1214 return(XML_CHAR_ENCODING_ERROR);
1215}
1216
1217/**
1218 * xmlGetCharEncodingName:
1219 * @enc: the encoding
1220 *
1221 * The "canonical" name for XML encoding.
1222 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1223 * Section 4.3.3 Character Encoding in Entities
1224 *
1225 * Returns the canonical name for the given encoding
1226 */
1227
1228const char*
1229xmlGetCharEncodingName(xmlCharEncoding enc) {
1230 switch (enc) {
1231 case XML_CHAR_ENCODING_ERROR:
1232 return(NULL);
1233 case XML_CHAR_ENCODING_NONE:
1234 return(NULL);
1235 case XML_CHAR_ENCODING_UTF8:
1236 return("UTF-8");
1237 case XML_CHAR_ENCODING_UTF16LE:
1238 return("UTF-16");
1239 case XML_CHAR_ENCODING_UTF16BE:
1240 return("UTF-16");
1241 case XML_CHAR_ENCODING_EBCDIC:
1242 return("EBCDIC");
1243 case XML_CHAR_ENCODING_UCS4LE:
1244 return("ISO-10646-UCS-4");
1245 case XML_CHAR_ENCODING_UCS4BE:
1246 return("ISO-10646-UCS-4");
1247 case XML_CHAR_ENCODING_UCS4_2143:
1248 return("ISO-10646-UCS-4");
1249 case XML_CHAR_ENCODING_UCS4_3412:
1250 return("ISO-10646-UCS-4");
1251 case XML_CHAR_ENCODING_UCS2:
1252 return("ISO-10646-UCS-2");
1253 case XML_CHAR_ENCODING_8859_1:
1254 return("ISO-8859-1");
1255 case XML_CHAR_ENCODING_8859_2:
1256 return("ISO-8859-2");
1257 case XML_CHAR_ENCODING_8859_3:
1258 return("ISO-8859-3");
1259 case XML_CHAR_ENCODING_8859_4:
1260 return("ISO-8859-4");
1261 case XML_CHAR_ENCODING_8859_5:
1262 return("ISO-8859-5");
1263 case XML_CHAR_ENCODING_8859_6:
1264 return("ISO-8859-6");
1265 case XML_CHAR_ENCODING_8859_7:
1266 return("ISO-8859-7");
1267 case XML_CHAR_ENCODING_8859_8:
1268 return("ISO-8859-8");
1269 case XML_CHAR_ENCODING_8859_9:
1270 return("ISO-8859-9");
1271 case XML_CHAR_ENCODING_2022_JP:
1272 return("ISO-2022-JP");
1273 case XML_CHAR_ENCODING_SHIFT_JIS:
1274 return("Shift-JIS");
1275 case XML_CHAR_ENCODING_EUC_JP:
1276 return("EUC-JP");
1277 case XML_CHAR_ENCODING_ASCII:
1278 return(NULL);
1279 }
1280 return(NULL);
1281}
1282
1283/************************************************************************
1284 * *
1285 * Char encoding handlers *
1286 * *
1287 ************************************************************************/
1288
1289#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1290 defined(LIBXML_ISO8859X_ENABLED)
1291
1292#define DECLARE_ISO_FUNCS(n) \
1293 static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1294 const unsigned char* in, int *inlen); \
1295 static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1296 const unsigned char* in, int *inlen);
1297
1298/** DOC_DISABLE */
1299DECLARE_ISO_FUNCS(2)
1300DECLARE_ISO_FUNCS(3)
1301DECLARE_ISO_FUNCS(4)
1302DECLARE_ISO_FUNCS(5)
1303DECLARE_ISO_FUNCS(6)
1304DECLARE_ISO_FUNCS(7)
1305DECLARE_ISO_FUNCS(8)
1306DECLARE_ISO_FUNCS(9)
1307DECLARE_ISO_FUNCS(10)
1308DECLARE_ISO_FUNCS(11)
1309DECLARE_ISO_FUNCS(13)
1310DECLARE_ISO_FUNCS(14)
1311DECLARE_ISO_FUNCS(15)
1312DECLARE_ISO_FUNCS(16)
1313/** DOC_ENABLE */
1314
1315#endif /* LIBXML_ISO8859X_ENABLED */
1316
1317#ifdef LIBXML_ICONV_ENABLED
1318 #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1319#else
1320 #define EMPTY_ICONV
1321#endif
1322
1323#ifdef LIBXML_ICU_ENABLED
1324 #define EMPTY_UCONV , NULL, NULL
1325#else
1326 #define EMPTY_UCONV
1327#endif
1328
1329#define MAKE_HANDLER(name, in, out) \
1330 { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1331
1332static const xmlCharEncodingHandler defaultHandlers[] = {
1333 MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8)
1334#ifdef LIBXML_OUTPUT_ENABLED
1335 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1336 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1337 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1338 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1339 ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1340 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1341#ifdef LIBXML_HTML_ENABLED
1342 ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1343#endif
1344#else
1345 ,MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1346 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1347 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1348 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1349 ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1350 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1351#endif /* LIBXML_OUTPUT_ENABLED */
1352
1353#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1354 defined(LIBXML_ISO8859X_ENABLED)
1355 ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1356 ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1357 ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1358 ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1359 ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1360 ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1361 ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1362 ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1363 ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1364 ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1365 ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1366 ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1367 ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1368 ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1369#endif
1370};
1371
1372#define NUM_DEFAULT_HANDLERS \
1373 (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1374
1375static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[1];
1376static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[2];
1377
1378/* the size should be growable, but it's not a big deal ... */
1379#define MAX_ENCODING_HANDLERS 50
1380static xmlCharEncodingHandlerPtr *handlers = NULL;
1381static int nbCharEncodingHandler = 0;
1382
1383/**
1384 * xmlNewCharEncodingHandler:
1385 * @name: the encoding name, in UTF-8 format (ASCII actually)
1386 * @input: the xmlCharEncodingInputFunc to read that encoding
1387 * @output: the xmlCharEncodingOutputFunc to write that encoding
1388 *
1389 * Create and registers an xmlCharEncodingHandler.
1390 *
1391 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1392 */
1393xmlCharEncodingHandlerPtr
1394xmlNewCharEncodingHandler(const char *name,
1395 xmlCharEncodingInputFunc input,
1396 xmlCharEncodingOutputFunc output) {
1397 xmlCharEncodingHandlerPtr handler;
1398 const char *alias;
1399 char upper[500];
1400 int i;
1401 char *up = NULL;
1402
1403 /*
1404 * Do the alias resolution
1405 */
1406 alias = xmlGetEncodingAlias(name);
1407 if (alias != NULL)
1408 name = alias;
1409
1410 /*
1411 * Keep only the uppercase version of the encoding.
1412 */
1413 if (name == NULL)
1414 return(NULL);
1415 for (i = 0;i < 499;i++) {
1416 upper[i] = (char) toupper((unsigned char) name[i]);
1417 if (upper[i] == 0) break;
1418 }
1419 upper[i] = 0;
1420 up = xmlMemStrdup(upper);
1421 if (up == NULL)
1422 return(NULL);
1423
1424 /*
1425 * allocate and fill-up an handler block.
1426 */
1427 handler = (xmlCharEncodingHandlerPtr)
1428 xmlMalloc(sizeof(xmlCharEncodingHandler));
1429 if (handler == NULL) {
1430 xmlFree(up);
1431 return(NULL);
1432 }
1433 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1434 handler->input = input;
1435 handler->output = output;
1436 handler->name = up;
1437
1438#ifdef LIBXML_ICONV_ENABLED
1439 handler->iconv_in = NULL;
1440 handler->iconv_out = NULL;
1441#endif
1442#ifdef LIBXML_ICU_ENABLED
1443 handler->uconv_in = NULL;
1444 handler->uconv_out = NULL;
1445#endif
1446
1447 /*
1448 * registers and returns the handler.
1449 */
1450 xmlRegisterCharEncodingHandler(handler);
1451 return(handler);
1452}
1453
1454/**
1455 * xmlInitCharEncodingHandlers:
1456 *
1457 * DEPRECATED: Alias for xmlInitParser.
1458 */
1459void
1460xmlInitCharEncodingHandlers(void) {
1461 xmlInitParser();
1462}
1463
1464/**
1465 * xmlInitEncodingInternal:
1466 *
1467 * Initialize the char encoding support.
1468 */
1469void
1470xmlInitEncodingInternal(void) {
1471 unsigned short int tst = 0x1234;
1472 unsigned char *ptr = (unsigned char *) &tst;
1473
1474 if (*ptr == 0x12) xmlLittleEndian = 0;
1475 else xmlLittleEndian = 1;
1476}
1477
1478/**
1479 * xmlCleanupCharEncodingHandlers:
1480 *
1481 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1482 * to free global state but see the warnings there. xmlCleanupParser
1483 * should be only called once at program exit. In most cases, you don't
1484 * have call cleanup functions at all.
1485 *
1486 * Cleanup the memory allocated for the char encoding support, it
1487 * unregisters all the encoding handlers and the aliases.
1488 */
1489void
1490xmlCleanupCharEncodingHandlers(void) {
1491 xmlCleanupEncodingAliases();
1492
1493 if (handlers == NULL) return;
1494
1495 for (;nbCharEncodingHandler > 0;) {
1496 nbCharEncodingHandler--;
1497 if (handlers[nbCharEncodingHandler] != NULL) {
1498 if (handlers[nbCharEncodingHandler]->name != NULL)
1499 xmlFree(handlers[nbCharEncodingHandler]->name);
1500 xmlFree(handlers[nbCharEncodingHandler]);
1501 }
1502 }
1503 xmlFree(handlers);
1504 handlers = NULL;
1505 nbCharEncodingHandler = 0;
1506}
1507
1508/**
1509 * xmlRegisterCharEncodingHandler:
1510 * @handler: the xmlCharEncodingHandlerPtr handler block
1511 *
1512 * Register the char encoding handler, surprising, isn't it ?
1513 */
1514void
1515xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1516 if (handler == NULL)
1517 return;
1518 if (handlers == NULL) {
1519 handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1520 if (handlers == NULL)
1521 goto free_handler;
1522 }
1523
1524 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1525 goto free_handler;
1526 handlers[nbCharEncodingHandler++] = handler;
1527 return;
1528
1529free_handler:
1530 if (handler != NULL) {
1531 if (handler->name != NULL) {
1532 xmlFree(handler->name);
1533 }
1534 xmlFree(handler);
1535 }
1536}
1537
1538/**
1539 * xmlGetCharEncodingHandler:
1540 * @enc: an xmlCharEncoding value.
1541 *
1542 * Search in the registered set the handler able to read/write that encoding.
1543 *
1544 * Returns the handler or NULL if not found
1545 */
1546xmlCharEncodingHandlerPtr
1547xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1548 xmlCharEncodingHandlerPtr handler;
1549
1550 switch (enc) {
1551 case XML_CHAR_ENCODING_ERROR:
1552 return(NULL);
1553 case XML_CHAR_ENCODING_NONE:
1554 return(NULL);
1555 case XML_CHAR_ENCODING_UTF8:
1556 return(NULL);
1557 case XML_CHAR_ENCODING_UTF16LE:
1558 return((xmlCharEncodingHandlerPtr) xmlUTF16LEHandler);
1559 case XML_CHAR_ENCODING_UTF16BE:
1560 return((xmlCharEncodingHandlerPtr) xmlUTF16BEHandler);
1561 case XML_CHAR_ENCODING_EBCDIC:
1562 handler = xmlFindCharEncodingHandler("EBCDIC");
1563 if (handler != NULL) return(handler);
1564 handler = xmlFindCharEncodingHandler("ebcdic");
1565 if (handler != NULL) return(handler);
1566 handler = xmlFindCharEncodingHandler("EBCDIC-US");
1567 if (handler != NULL) return(handler);
1568 handler = xmlFindCharEncodingHandler("IBM-037");
1569 if (handler != NULL) return(handler);
1570 break;
1571 case XML_CHAR_ENCODING_UCS4BE:
1572 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1573 if (handler != NULL) return(handler);
1574 handler = xmlFindCharEncodingHandler("UCS-4");
1575 if (handler != NULL) return(handler);
1576 handler = xmlFindCharEncodingHandler("UCS4");
1577 if (handler != NULL) return(handler);
1578 break;
1579 case XML_CHAR_ENCODING_UCS4LE:
1580 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1581 if (handler != NULL) return(handler);
1582 handler = xmlFindCharEncodingHandler("UCS-4");
1583 if (handler != NULL) return(handler);
1584 handler = xmlFindCharEncodingHandler("UCS4");
1585 if (handler != NULL) return(handler);
1586 break;
1587 case XML_CHAR_ENCODING_UCS4_2143:
1588 break;
1589 case XML_CHAR_ENCODING_UCS4_3412:
1590 break;
1591 case XML_CHAR_ENCODING_UCS2:
1592 handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1593 if (handler != NULL) return(handler);
1594 handler = xmlFindCharEncodingHandler("UCS-2");
1595 if (handler != NULL) return(handler);
1596 handler = xmlFindCharEncodingHandler("UCS2");
1597 if (handler != NULL) return(handler);
1598 break;
1599
1600 /*
1601 * We used to keep ISO Latin encodings native in the
1602 * generated data. This led to so many problems that
1603 * this has been removed. One can still change this
1604 * back by registering no-ops encoders for those
1605 */
1606 case XML_CHAR_ENCODING_8859_1:
1607 handler = xmlFindCharEncodingHandler("ISO-8859-1");
1608 if (handler != NULL) return(handler);
1609 break;
1610 case XML_CHAR_ENCODING_8859_2:
1611 handler = xmlFindCharEncodingHandler("ISO-8859-2");
1612 if (handler != NULL) return(handler);
1613 break;
1614 case XML_CHAR_ENCODING_8859_3:
1615 handler = xmlFindCharEncodingHandler("ISO-8859-3");
1616 if (handler != NULL) return(handler);
1617 break;
1618 case XML_CHAR_ENCODING_8859_4:
1619 handler = xmlFindCharEncodingHandler("ISO-8859-4");
1620 if (handler != NULL) return(handler);
1621 break;
1622 case XML_CHAR_ENCODING_8859_5:
1623 handler = xmlFindCharEncodingHandler("ISO-8859-5");
1624 if (handler != NULL) return(handler);
1625 break;
1626 case XML_CHAR_ENCODING_8859_6:
1627 handler = xmlFindCharEncodingHandler("ISO-8859-6");
1628 if (handler != NULL) return(handler);
1629 break;
1630 case XML_CHAR_ENCODING_8859_7:
1631 handler = xmlFindCharEncodingHandler("ISO-8859-7");
1632 if (handler != NULL) return(handler);
1633 break;
1634 case XML_CHAR_ENCODING_8859_8:
1635 handler = xmlFindCharEncodingHandler("ISO-8859-8");
1636 if (handler != NULL) return(handler);
1637 break;
1638 case XML_CHAR_ENCODING_8859_9:
1639 handler = xmlFindCharEncodingHandler("ISO-8859-9");
1640 if (handler != NULL) return(handler);
1641 break;
1642
1643
1644 case XML_CHAR_ENCODING_2022_JP:
1645 handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1646 if (handler != NULL) return(handler);
1647 break;
1648 case XML_CHAR_ENCODING_SHIFT_JIS:
1649 handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1650 if (handler != NULL) return(handler);
1651 handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1652 if (handler != NULL) return(handler);
1653 handler = xmlFindCharEncodingHandler("Shift_JIS");
1654 if (handler != NULL) return(handler);
1655 break;
1656 case XML_CHAR_ENCODING_EUC_JP:
1657 handler = xmlFindCharEncodingHandler("EUC-JP");
1658 if (handler != NULL) return(handler);
1659 break;
1660 default:
1661 break;
1662 }
1663
1664 return(NULL);
1665}
1666
1667/**
1668 * xmlFindCharEncodingHandler:
1669 * @name: a string describing the char encoding.
1670 *
1671 * Search in the registered set the handler able to read/write that encoding
1672 * or create a new one.
1673 *
1674 * Returns the handler or NULL if not found
1675 */
1676xmlCharEncodingHandlerPtr
1677xmlFindCharEncodingHandler(const char *name) {
1678 const char *nalias;
1679 const char *norig;
1680 xmlCharEncoding alias;
1681#ifdef LIBXML_ICONV_ENABLED
1682 xmlCharEncodingHandlerPtr enc;
1683 iconv_t icv_in, icv_out;
1684#endif /* LIBXML_ICONV_ENABLED */
1685#ifdef LIBXML_ICU_ENABLED
1686 xmlCharEncodingHandlerPtr encu;
1687 uconv_t *ucv_in, *ucv_out;
1688#endif /* LIBXML_ICU_ENABLED */
1689 char upper[100];
1690 int i;
1691
1692 if (name == NULL) return(NULL);
1693 if (name[0] == 0) return(NULL);
1694
1695 /*
1696 * Do the alias resolution
1697 */
1698 norig = name;
1699 nalias = xmlGetEncodingAlias(name);
1700 if (nalias != NULL)
1701 name = nalias;
1702
1703 /*
1704 * Check first for directly registered encoding names
1705 */
1706 for (i = 0;i < 99;i++) {
1707 upper[i] = (char) toupper((unsigned char) name[i]);
1708 if (upper[i] == 0) break;
1709 }
1710 upper[i] = 0;
1711
1712 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1713 if (strcmp(upper, defaultHandlers[i].name) == 0)
1714 return((xmlCharEncodingHandlerPtr) &defaultHandlers[i]);
1715 }
1716
1717 if (handlers != NULL) {
1718 for (i = 0;i < nbCharEncodingHandler; i++) {
1719 if (!strcmp(upper, handlers[i]->name)) {
1720 return(handlers[i]);
1721 }
1722 }
1723 }
1724
1725#ifdef LIBXML_ICONV_ENABLED
1726 /* check whether iconv can handle this */
1727 icv_in = iconv_open("UTF-8", name);
1728 icv_out = iconv_open(name, "UTF-8");
1729 if (icv_in == (iconv_t) -1) {
1730 icv_in = iconv_open("UTF-8", upper);
1731 }
1732 if (icv_out == (iconv_t) -1) {
1733 icv_out = iconv_open(upper, "UTF-8");
1734 }
1735 if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1736 enc = (xmlCharEncodingHandlerPtr)
1737 xmlMalloc(sizeof(xmlCharEncodingHandler));
1738 if (enc == NULL) {
1739 iconv_close(icv_in);
1740 iconv_close(icv_out);
1741 return(NULL);
1742 }
1743 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1744 enc->name = xmlMemStrdup(name);
1745 if (enc->name == NULL) {
1746 xmlFree(enc);
1747 iconv_close(icv_in);
1748 iconv_close(icv_out);
1749 return(NULL);
1750 }
1751 enc->input = NULL;
1752 enc->output = NULL;
1753 enc->iconv_in = icv_in;
1754 enc->iconv_out = icv_out;
1755 return enc;
1756 } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1757 if (icv_in != (iconv_t) -1)
1758 iconv_close(icv_in);
1759 else
1760 iconv_close(icv_out);
1761 }
1762#endif /* LIBXML_ICONV_ENABLED */
1763#ifdef LIBXML_ICU_ENABLED
1764 /* check whether icu can handle this */
1765 ucv_in = openIcuConverter(name, 1);
1766 ucv_out = openIcuConverter(name, 0);
1767 if (ucv_in != NULL && ucv_out != NULL) {
1768 encu = (xmlCharEncodingHandlerPtr)
1769 xmlMalloc(sizeof(xmlCharEncodingHandler));
1770 if (encu == NULL) {
1771 closeIcuConverter(ucv_in);
1772 closeIcuConverter(ucv_out);
1773 return(NULL);
1774 }
1775 memset(encu, 0, sizeof(xmlCharEncodingHandler));
1776 encu->name = xmlMemStrdup(name);
1777 if (encu->name == NULL) {
1778 xmlFree(encu);
1779 closeIcuConverter(ucv_in);
1780 closeIcuConverter(ucv_out);
1781 return(NULL);
1782 }
1783 encu->input = NULL;
1784 encu->output = NULL;
1785 encu->uconv_in = ucv_in;
1786 encu->uconv_out = ucv_out;
1787 return encu;
1788 } else if (ucv_in != NULL || ucv_out != NULL) {
1789 closeIcuConverter(ucv_in);
1790 closeIcuConverter(ucv_out);
1791 }
1792#endif /* LIBXML_ICU_ENABLED */
1793
1794 /*
1795 * Fallback using the canonical names
1796 */
1797 alias = xmlParseCharEncoding(norig);
1798 if (alias != XML_CHAR_ENCODING_ERROR) {
1799 const char* canon;
1800 canon = xmlGetCharEncodingName(alias);
1801 if ((canon != NULL) && (strcmp(name, canon))) {
1802 return(xmlFindCharEncodingHandler(canon));
1803 }
1804 }
1805
1806 /* If "none of the above", give up */
1807 return(NULL);
1808}
1809
1810/************************************************************************
1811 * *
1812 * ICONV based generic conversion functions *
1813 * *
1814 ************************************************************************/
1815
1816#ifdef LIBXML_ICONV_ENABLED
1817/**
1818 * xmlIconvWrapper:
1819 * @cd: iconv converter data structure
1820 * @out: a pointer to an array of bytes to store the result
1821 * @outlen: the length of @out
1822 * @in: a pointer to an array of input bytes
1823 * @inlen: the length of @in
1824 *
1825 * Returns an XML_ENC_ERR code.
1826 *
1827 * The value of @inlen after return is the number of octets consumed
1828 * as the return value is positive, else unpredictable.
1829 * The value of @outlen after return is the number of octets produced.
1830 */
1831static int
1832xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1833 const unsigned char *in, int *inlen) {
1834 size_t icv_inlen, icv_outlen;
1835 const char *icv_in = (const char *) in;
1836 char *icv_out = (char *) out;
1837 size_t ret;
1838
1839 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1840 if (outlen != NULL) *outlen = 0;
1841 return(XML_ENC_ERR_INTERNAL);
1842 }
1843 icv_inlen = *inlen;
1844 icv_outlen = *outlen;
1845 /*
1846 * Some versions take const, other versions take non-const input.
1847 */
1848 ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1849 *inlen -= icv_inlen;
1850 *outlen -= icv_outlen;
1851 if (ret == (size_t) -1) {
1852 if (errno == EILSEQ)
1853 return(XML_ENC_ERR_INPUT);
1854 if (errno == E2BIG)
1855 return(XML_ENC_ERR_SPACE);
1856 if (errno == EINVAL)
1857 return(XML_ENC_ERR_PARTIAL);
1858 return(XML_ENC_ERR_INTERNAL);
1859 }
1860 return(XML_ENC_ERR_SUCCESS);
1861}
1862#endif /* LIBXML_ICONV_ENABLED */
1863
1864/************************************************************************
1865 * *
1866 * ICU based generic conversion functions *
1867 * *
1868 ************************************************************************/
1869
1870#ifdef LIBXML_ICU_ENABLED
1871/**
1872 * xmlUconvWrapper:
1873 * @cd: ICU uconverter data structure
1874 * @toUnicode : non-zero if toUnicode. 0 otherwise.
1875 * @out: a pointer to an array of bytes to store the result
1876 * @outlen: the length of @out
1877 * @in: a pointer to an array of input bytes
1878 * @inlen: the length of @in
1879 *
1880 * Returns an XML_ENC_ERR code.
1881 *
1882 * The value of @inlen after return is the number of octets consumed
1883 * as the return value is positive, else unpredictable.
1884 * The value of @outlen after return is the number of octets produced.
1885 */
1886static int
1887xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1888 const unsigned char *in, int *inlen) {
1889 const char *ucv_in = (const char *) in;
1890 char *ucv_out = (char *) out;
1891 UErrorCode err = U_ZERO_ERROR;
1892
1893 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1894 if (outlen != NULL) *outlen = 0;
1895 return(XML_ENC_ERR_INTERNAL);
1896 }
1897
1898 /*
1899 * Note that the ICU API is stateful. It can always consume a certain
1900 * amount of input even if the output buffer would overflow. The
1901 * remaining input must be processed by calling ucnv_convertEx with a
1902 * possibly empty input buffer.
1903 *
1904 * ucnv_convertEx is always called with reset and flush set to 0,
1905 * so we don't mess up the state. This should never generate
1906 * U_TRUNCATED_CHAR_FOUND errors.
1907 *
1908 * This also means that ICU xmlCharEncodingHandlers should never be
1909 * reused. It would be a lot nicer if there was a way to emulate the
1910 * stateless iconv API.
1911 */
1912 if (toUnicode) {
1913 /* encoding => UTF-16 => UTF-8 */
1914 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1915 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1916 &cd->pivot_source, &cd->pivot_target,
1917 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1918 } else {
1919 /* UTF-8 => UTF-16 => encoding */
1920 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1921 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1922 &cd->pivot_source, &cd->pivot_target,
1923 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
1924 }
1925 *inlen = ucv_in - (const char*) in;
1926 *outlen = ucv_out - (char *) out;
1927 if (U_SUCCESS(err)) {
1928 return(XML_ENC_ERR_SUCCESS);
1929 }
1930 if (err == U_BUFFER_OVERFLOW_ERROR)
1931 return(XML_ENC_ERR_SPACE);
1932 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1933 return(XML_ENC_ERR_INPUT);
1934 return(XML_ENC_ERR_PARTIAL);
1935}
1936#endif /* LIBXML_ICU_ENABLED */
1937
1938/************************************************************************
1939 * *
1940 * The real API used by libxml for on-the-fly conversion *
1941 * *
1942 ************************************************************************/
1943
1944/**
1945 * xmlEncConvertError:
1946 * @code: XML_ENC_ERR code
1947 *
1948 * Convert XML_ENC_ERR to libxml2 error codes.
1949 */
1950static int
1951xmlEncConvertError(int code) {
1952 int ret;
1953
1954 switch (code) {
1955 case XML_ENC_ERR_SUCCESS:
1956 ret = XML_ERR_OK;
1957 break;
1958 case XML_ENC_ERR_INPUT:
1959 ret = XML_ERR_INVALID_ENCODING;
1960 break;
1961 case XML_ENC_ERR_MEMORY:
1962 ret = XML_ERR_NO_MEMORY;
1963 break;
1964 default:
1965 ret = XML_ERR_INTERNAL_ERROR;
1966 break;
1967 }
1968
1969 return(ret);
1970}
1971
1972/**
1973 * xmlEncInputChunk:
1974 * @handler: encoding handler
1975 * @out: a pointer to an array of bytes to store the result
1976 * @outlen: the length of @out
1977 * @in: a pointer to an array of input bytes
1978 * @inlen: the length of @in
1979 *
1980 * The value of @inlen after return is the number of octets consumed
1981 * as the return value is 0, else unpredictable.
1982 * The value of @outlen after return is the number of octets produced.
1983 *
1984 * Returns an XML_ENC_ERR code.
1985 */
1986int
1987xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1988 int *outlen, const unsigned char *in, int *inlen) {
1989 int ret;
1990
1991 if (handler->input != NULL) {
1992 int oldinlen = *inlen;
1993
1994 ret = handler->input(out, outlen, in, inlen);
1995 if (ret >= 0) {
1996 /*
1997 * The built-in converters don't signal XML_ENC_ERR_SPACE.
1998 */
1999 if (*inlen < oldinlen) {
2000 if (*outlen > 0)
2001 ret = XML_ENC_ERR_SPACE;
2002 else
2003 ret = XML_ENC_ERR_PARTIAL;
2004 } else {
2005 ret = XML_ENC_ERR_SUCCESS;
2006 }
2007 }
2008 }
2009#ifdef LIBXML_ICONV_ENABLED
2010 else if (handler->iconv_in != NULL) {
2011 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2012 }
2013#endif /* LIBXML_ICONV_ENABLED */
2014#ifdef LIBXML_ICU_ENABLED
2015 else if (handler->uconv_in != NULL) {
2016 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2017 }
2018#endif /* LIBXML_ICU_ENABLED */
2019 else {
2020 *outlen = 0;
2021 *inlen = 0;
2022 ret = XML_ENC_ERR_INTERNAL;
2023 }
2024
2025 /* Ignore partial errors when reading. */
2026 if (ret == XML_ENC_ERR_PARTIAL)
2027 ret = XML_ENC_ERR_SUCCESS;
2028
2029 return(ret);
2030}
2031
2032/**
2033 * xmlEncOutputChunk:
2034 * @handler: encoding handler
2035 * @out: a pointer to an array of bytes to store the result
2036 * @outlen: the length of @out
2037 * @in: a pointer to an array of input bytes
2038 * @inlen: the length of @in
2039 *
2040 * Returns an XML_ENC_ERR code.
2041 *
2042 * The value of @inlen after return is the number of octets consumed
2043 * as the return value is 0, else unpredictable.
2044 * The value of @outlen after return is the number of octets produced.
2045 */
2046static int
2047xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2048 int *outlen, const unsigned char *in, int *inlen) {
2049 int ret;
2050
2051 if (handler->output != NULL) {
2052 int oldinlen = *inlen;
2053
2054 ret = handler->output(out, outlen, in, inlen);
2055 if (ret >= 0) {
2056 /*
2057 * The built-in converters don't signal XML_ENC_ERR_SPACE.
2058 */
2059 if (*inlen < oldinlen) {
2060 if (*outlen > 0)
2061 ret = XML_ENC_ERR_SPACE;
2062 else
2063 ret = XML_ENC_ERR_PARTIAL;
2064 } else {
2065 ret = XML_ENC_ERR_SUCCESS;
2066 }
2067 }
2068 }
2069#ifdef LIBXML_ICONV_ENABLED
2070 else if (handler->iconv_out != NULL) {
2071 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2072 }
2073#endif /* LIBXML_ICONV_ENABLED */
2074#ifdef LIBXML_ICU_ENABLED
2075 else if (handler->uconv_out != NULL) {
2076 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2077 }
2078#endif /* LIBXML_ICU_ENABLED */
2079 else {
2080 *outlen = 0;
2081 *inlen = 0;
2082 ret = XML_ENC_ERR_INTERNAL;
2083 }
2084
2085 /* We shouldn't generate partial sequences when writing. */
2086 if (ret == XML_ENC_ERR_PARTIAL)
2087 ret = XML_ENC_ERR_INTERNAL;
2088
2089 return(ret);
2090}
2091
2092/**
2093 * xmlCharEncFirstLine:
2094 * @handler: char encoding transformation data structure
2095 * @out: an xmlBuffer for the output.
2096 * @in: an xmlBuffer for the input
2097 *
2098 * DEPERECATED: Don't use.
2099 *
2100 * Returns the number of bytes written or an XML_ENC_ERR code.
2101 */
2102int
2103xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2104 xmlBufferPtr in) {
2105 return(xmlCharEncInFunc(handler, out, in));
2106}
2107
2108/**
2109 * xmlCharEncInput:
2110 * @input: a parser input buffer
2111 *
2112 * Generic front-end for the encoding handler on parser input
2113 *
2114 * Returns the number of bytes written or an XML_ENC_ERR code.
2115 */
2116int
2117xmlCharEncInput(xmlParserInputBufferPtr input)
2118{
2119 int ret;
2120 size_t avail;
2121 size_t toconv;
2122 int c_in;
2123 int c_out;
2124 xmlBufPtr in;
2125 xmlBufPtr out;
2126 const xmlChar *inData;
2127 size_t inTotal = 0;
2128
2129 if ((input == NULL) || (input->encoder == NULL) ||
2130 (input->buffer == NULL) || (input->raw == NULL))
2131 return(XML_ENC_ERR_INTERNAL);
2132 out = input->buffer;
2133 in = input->raw;
2134
2135 toconv = xmlBufUse(in);
2136 if (toconv == 0)
2137 return (0);
2138 inData = xmlBufContent(in);
2139 inTotal = 0;
2140
2141 do {
2142 c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2143
2144 avail = xmlBufAvail(out);
2145 if (avail > INT_MAX)
2146 avail = INT_MAX;
2147 if (avail < 4096) {
2148 if (xmlBufGrow(out, 4096) < 0) {
2149 input->error = XML_ERR_NO_MEMORY;
2150 return(XML_ENC_ERR_MEMORY);
2151 }
2152 avail = xmlBufAvail(out);
2153 }
2154
2155 c_in = toconv;
2156 c_out = avail;
2157 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2158 inData, &c_in);
2159 inTotal += c_in;
2160 inData += c_in;
2161 toconv -= c_in;
2162 xmlBufAddLen(out, c_out);
2163 } while (ret == XML_ENC_ERR_SPACE);
2164
2165 xmlBufShrink(in, inTotal);
2166
2167 if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2168 input->rawconsumed = ULONG_MAX;
2169 else
2170 input->rawconsumed += c_in;
2171
2172 if ((c_out == 0) && (ret != 0)) {
2173 if (input->error == 0)
2174 input->error = xmlEncConvertError(ret);
2175 return(ret);
2176 }
2177
2178 return (c_out);
2179}
2180
2181/**
2182 * xmlCharEncInFunc:
2183 * @handler: char encoding transformation data structure
2184 * @out: an xmlBuffer for the output.
2185 * @in: an xmlBuffer for the input
2186 *
2187 * Generic front-end for the encoding handler input function
2188 *
2189 * Returns the number of bytes written or an XML_ENC_ERR code.
2190 */
2191int
2192xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2193 xmlBufferPtr in)
2194{
2195 int ret;
2196 int written;
2197 int toconv;
2198
2199 if (handler == NULL)
2200 return(XML_ENC_ERR_INTERNAL);
2201 if (out == NULL)
2202 return(XML_ENC_ERR_INTERNAL);
2203 if (in == NULL)
2204 return(XML_ENC_ERR_INTERNAL);
2205
2206 toconv = in->use;
2207 if (toconv == 0)
2208 return (0);
2209 written = out->size - out->use -1; /* count '\0' */
2210 if (toconv * 2 >= written) {
2211 xmlBufferGrow(out, out->size + toconv * 2);
2212 written = out->size - out->use - 1;
2213 }
2214 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2215 in->content, &toconv);
2216 xmlBufferShrink(in, toconv);
2217 out->use += written;
2218 out->content[out->use] = 0;
2219
2220 return (written? written : ret);
2221}
2222
2223#ifdef LIBXML_OUTPUT_ENABLED
2224/**
2225 * xmlCharEncOutput:
2226 * @output: a parser output buffer
2227 * @init: is this an initialization call without data
2228 *
2229 * Generic front-end for the encoding handler on parser output
2230 * a first call with @init == 1 has to be made first to initiate the
2231 * output in case of non-stateless encoding needing to initiate their
2232 * state or the output (like the BOM in UTF16).
2233 * In case of UTF8 sequence conversion errors for the given encoder,
2234 * the content will be automatically remapped to a CharRef sequence.
2235 *
2236 * Returns the number of bytes written or an XML_ENC_ERR code.
2237 */
2238int
2239xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2240{
2241 int ret;
2242 size_t written;
2243 int writtentot = 0;
2244 size_t toconv;
2245 int c_in;
2246 int c_out;
2247 xmlBufPtr in;
2248 xmlBufPtr out;
2249
2250 if ((output == NULL) || (output->encoder == NULL) ||
2251 (output->buffer == NULL) || (output->conv == NULL))
2252 return(XML_ENC_ERR_INTERNAL);
2253 out = output->conv;
2254 in = output->buffer;
2255
2256retry:
2257
2258 written = xmlBufAvail(out);
2259
2260 /*
2261 * First specific handling of the initialization call
2262 */
2263 if (init) {
2264 c_in = 0;
2265 c_out = written;
2266 /* TODO: Check return value. */
2267 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2268 NULL, &c_in);
2269 xmlBufAddLen(out, c_out);
2270 return(c_out);
2271 }
2272
2273 /*
2274 * Conversion itself.
2275 */
2276 toconv = xmlBufUse(in);
2277 if (toconv > 64 * 1024)
2278 toconv = 64 * 1024;
2279 if (toconv * 4 >= written) {
2280 xmlBufGrow(out, toconv * 4);
2281 written = xmlBufAvail(out);
2282 }
2283 if (written > 256 * 1024)
2284 written = 256 * 1024;
2285
2286 c_in = toconv;
2287 c_out = written;
2288 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2289 xmlBufContent(in), &c_in);
2290 xmlBufShrink(in, c_in);
2291 xmlBufAddLen(out, c_out);
2292 writtentot += c_out;
2293
2294 if (ret == XML_ENC_ERR_SPACE)
2295 goto retry;
2296
2297 /*
2298 * Attempt to handle error cases
2299 */
2300 if (ret == XML_ENC_ERR_INPUT) {
2301 xmlChar charref[20];
2302 int len = xmlBufUse(in);
2303 xmlChar *content = xmlBufContent(in);
2304 int cur, charrefLen;
2305
2306 cur = xmlGetUTF8Char(content, &len);
2307 if (cur <= 0)
2308 goto error;
2309
2310 /*
2311 * Removes the UTF8 sequence, and replace it by a charref
2312 * and continue the transcoding phase, hoping the error
2313 * did not mangle the encoder state.
2314 */
2315 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2316 "&#%d;", cur);
2317 xmlBufShrink(in, len);
2318 xmlBufGrow(out, charrefLen * 4);
2319 c_out = xmlBufAvail(out);
2320 c_in = charrefLen;
2321 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2322 charref, &c_in);
2323 if ((ret < 0) || (c_in != charrefLen)) {
2324 ret = XML_ENC_ERR_INTERNAL;
2325 goto error;
2326 }
2327
2328 xmlBufAddLen(out, c_out);
2329 writtentot += c_out;
2330 goto retry;
2331 }
2332
2333error:
2334 if ((writtentot <= 0) && (ret != 0)) {
2335 if (output->error == 0)
2336 output->error = xmlEncConvertError(ret);
2337 return(ret);
2338 }
2339
2340 return(writtentot);
2341}
2342#endif
2343
2344/**
2345 * xmlCharEncOutFunc:
2346 * @handler: char encoding transformation data structure
2347 * @out: an xmlBuffer for the output.
2348 * @in: an xmlBuffer for the input
2349 *
2350 * Generic front-end for the encoding handler output function
2351 * a first call with @in == NULL has to be made firs to initiate the
2352 * output in case of non-stateless encoding needing to initiate their
2353 * state or the output (like the BOM in UTF16).
2354 * In case of UTF8 sequence conversion errors for the given encoder,
2355 * the content will be automatically remapped to a CharRef sequence.
2356 *
2357 * Returns the number of bytes written or an XML_ENC_ERR code.
2358 */
2359int
2360xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2361 xmlBufferPtr in) {
2362 int ret;
2363 int written;
2364 int writtentot = 0;
2365 int toconv;
2366
2367 if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2368 if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2369
2370retry:
2371
2372 written = out->size - out->use;
2373
2374 if (written > 0)
2375 written--; /* Gennady: count '/0' */
2376
2377 /*
2378 * First specific handling of in = NULL, i.e. the initialization call
2379 */
2380 if (in == NULL) {
2381 toconv = 0;
2382 /* TODO: Check return value. */
2383 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2384 NULL, &toconv);
2385 out->use += written;
2386 out->content[out->use] = 0;
2387 return(0);
2388 }
2389
2390 /*
2391 * Conversion itself.
2392 */
2393 toconv = in->use;
2394 if (toconv * 4 >= written) {
2395 xmlBufferGrow(out, toconv * 4);
2396 written = out->size - out->use - 1;
2397 }
2398 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2399 in->content, &toconv);
2400 xmlBufferShrink(in, toconv);
2401 out->use += written;
2402 writtentot += written;
2403 out->content[out->use] = 0;
2404
2405 if (ret == XML_ENC_ERR_SPACE)
2406 goto retry;
2407
2408 /*
2409 * Attempt to handle error cases
2410 */
2411 if (ret == XML_ENC_ERR_INPUT) {
2412 xmlChar charref[20];
2413 int len = in->use;
2414 const xmlChar *utf = (const xmlChar *) in->content;
2415 int cur, charrefLen;
2416
2417 cur = xmlGetUTF8Char(utf, &len);
2418 if (cur <= 0)
2419 return(ret);
2420
2421 /*
2422 * Removes the UTF8 sequence, and replace it by a charref
2423 * and continue the transcoding phase, hoping the error
2424 * did not mangle the encoder state.
2425 */
2426 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2427 "&#%d;", cur);
2428 xmlBufferShrink(in, len);
2429 xmlBufferGrow(out, charrefLen * 4);
2430 written = out->size - out->use - 1;
2431 toconv = charrefLen;
2432 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2433 charref, &toconv);
2434 if ((ret < 0) || (toconv != charrefLen))
2435 return(XML_ENC_ERR_INTERNAL);
2436
2437 out->use += written;
2438 writtentot += written;
2439 out->content[out->use] = 0;
2440 goto retry;
2441 }
2442 return(writtentot ? writtentot : ret);
2443}
2444
2445/**
2446 * xmlCharEncCloseFunc:
2447 * @handler: char encoding transformation data structure
2448 *
2449 * Generic front-end for encoding handler close function
2450 *
2451 * Returns 0 if success, or -1 in case of error
2452 */
2453int
2454xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2455 int ret = 0;
2456 int tofree = 0;
2457 int i = 0;
2458
2459 if (handler == NULL) return(-1);
2460
2461 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2462 if (handler == &defaultHandlers[i])
2463 return(0);
2464 }
2465
2466 if (handlers != NULL) {
2467 for (i = 0;i < nbCharEncodingHandler; i++) {
2468 if (handler == handlers[i])
2469 return(0);
2470 }
2471 }
2472#ifdef LIBXML_ICONV_ENABLED
2473 /*
2474 * Iconv handlers can be used only once, free the whole block.
2475 * and the associated icon resources.
2476 */
2477 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2478 tofree = 1;
2479 if (handler->iconv_out != NULL) {
2480 if (iconv_close(handler->iconv_out))
2481 ret = -1;
2482 handler->iconv_out = NULL;
2483 }
2484 if (handler->iconv_in != NULL) {
2485 if (iconv_close(handler->iconv_in))
2486 ret = -1;
2487 handler->iconv_in = NULL;
2488 }
2489 }
2490#endif /* LIBXML_ICONV_ENABLED */
2491#ifdef LIBXML_ICU_ENABLED
2492 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2493 tofree = 1;
2494 if (handler->uconv_out != NULL) {
2495 closeIcuConverter(handler->uconv_out);
2496 handler->uconv_out = NULL;
2497 }
2498 if (handler->uconv_in != NULL) {
2499 closeIcuConverter(handler->uconv_in);
2500 handler->uconv_in = NULL;
2501 }
2502 }
2503#endif
2504 if (tofree) {
2505 /* free up only dynamic handlers iconv/uconv */
2506 if (handler->name != NULL)
2507 xmlFree(handler->name);
2508 handler->name = NULL;
2509 xmlFree(handler);
2510 }
2511
2512 return(ret);
2513}
2514
2515/**
2516 * xmlByteConsumed:
2517 * @ctxt: an XML parser context
2518 *
2519 * This function provides the current index of the parser relative
2520 * to the start of the current entity. This function is computed in
2521 * bytes from the beginning starting at zero and finishing at the
2522 * size in byte of the file if parsing a file. The function is
2523 * of constant cost if the input is UTF-8 but can be costly if run
2524 * on non-UTF-8 input.
2525 *
2526 * Returns the index in bytes from the beginning of the entity or -1
2527 * in case the index could not be computed.
2528 */
2529long
2530xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2531 xmlParserInputPtr in;
2532
2533 if (ctxt == NULL) return(-1);
2534 in = ctxt->input;
2535 if (in == NULL) return(-1);
2536 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2537 unsigned int unused = 0;
2538 xmlCharEncodingHandler * handler = in->buf->encoder;
2539 /*
2540 * Encoding conversion, compute the number of unused original
2541 * bytes from the input not consumed and subtract that from
2542 * the raw consumed value, this is not a cheap operation
2543 */
2544 if (in->end - in->cur > 0) {
2545 unsigned char convbuf[32000];
2546 const unsigned char *cur = (const unsigned char *)in->cur;
2547 int toconv = in->end - in->cur, written = 32000;
2548
2549 int ret;
2550
2551 do {
2552 toconv = in->end - cur;
2553 written = 32000;
2554 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2555 cur, &toconv);
2556 if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2557 return(-1);
2558 unused += written;
2559 cur += toconv;
2560 } while (ret == XML_ENC_ERR_SPACE);
2561 }
2562 if (in->buf->rawconsumed < unused)
2563 return(-1);
2564 return(in->buf->rawconsumed - unused);
2565 }
2566 return(in->consumed + (in->cur - in->base));
2567}
2568
2569#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2570#ifdef LIBXML_ISO8859X_ENABLED
2571
2572/**
2573 * UTF8ToISO8859x:
2574 * @out: a pointer to an array of bytes to store the result
2575 * @outlen: the length of @out
2576 * @in: a pointer to an array of UTF-8 chars
2577 * @inlen: the length of @in
2578 * @xlattable: the 2-level transcoding table
2579 *
2580 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2581 * block of chars out.
2582 *
2583 * Returns the number of bytes written or an XML_ENC_ERR code.
2584 *
2585 * The value of @inlen after return is the number of octets consumed
2586 * as the return value is positive, else unpredictable.
2587 * The value of @outlen after return is the number of octets consumed.
2588 */
2589static int
2590UTF8ToISO8859x(unsigned char* out, int *outlen,
2591 const unsigned char* in, int *inlen,
2592 const unsigned char* const xlattable) {
2593 const unsigned char* outstart = out;
2594 const unsigned char* inend;
2595 const unsigned char* instart = in;
2596 const unsigned char* processed = in;
2597
2598 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2599 (xlattable == NULL))
2600 return(XML_ENC_ERR_INTERNAL);
2601 if (in == NULL) {
2602 /*
2603 * initialization nothing to do
2604 */
2605 *outlen = 0;
2606 *inlen = 0;
2607 return(0);
2608 }
2609 inend = in + (*inlen);
2610 while (in < inend) {
2611 unsigned char d = *in++;
2612 if (d < 0x80) {
2613 *out++ = d;
2614 } else if (d < 0xC0) {
2615 /* trailing byte in leading position */
2616 *outlen = out - outstart;
2617 *inlen = processed - instart;
2618 return(XML_ENC_ERR_INPUT);
2619 } else if (d < 0xE0) {
2620 unsigned char c;
2621 if (!(in < inend)) {
2622 /* trailing byte not in input buffer */
2623 *outlen = out - outstart;
2624 *inlen = processed - instart;
2625 return(XML_ENC_ERR_PARTIAL);
2626 }
2627 c = *in++;
2628 if ((c & 0xC0) != 0x80) {
2629 /* not a trailing byte */
2630 *outlen = out - outstart;
2631 *inlen = processed - instart;
2632 return(XML_ENC_ERR_INPUT);
2633 }
2634 c = c & 0x3F;
2635 d = d & 0x1F;
2636 d = xlattable [48 + c + xlattable [d] * 64];
2637 if (d == 0) {
2638 /* not in character set */
2639 *outlen = out - outstart;
2640 *inlen = processed - instart;
2641 return(XML_ENC_ERR_INPUT);
2642 }
2643 *out++ = d;
2644 } else if (d < 0xF0) {
2645 unsigned char c1;
2646 unsigned char c2;
2647 if (!(in < inend - 1)) {
2648 /* trailing bytes not in input buffer */
2649 *outlen = out - outstart;
2650 *inlen = processed - instart;
2651 return(XML_ENC_ERR_PARTIAL);
2652 }
2653 c1 = *in++;
2654 if ((c1 & 0xC0) != 0x80) {
2655 /* not a trailing byte (c1) */
2656 *outlen = out - outstart;
2657 *inlen = processed - instart;
2658 return(XML_ENC_ERR_INPUT);
2659 }
2660 c2 = *in++;
2661 if ((c2 & 0xC0) != 0x80) {
2662 /* not a trailing byte (c2) */
2663 *outlen = out - outstart;
2664 *inlen = processed - instart;
2665 return(XML_ENC_ERR_INPUT);
2666 }
2667 c1 = c1 & 0x3F;
2668 c2 = c2 & 0x3F;
2669 d = d & 0x0F;
2670 d = xlattable [48 + c2 + xlattable [48 + c1 +
2671 xlattable [32 + d] * 64] * 64];
2672 if (d == 0) {
2673 /* not in character set */
2674 *outlen = out - outstart;
2675 *inlen = processed - instart;
2676 return(XML_ENC_ERR_INPUT);
2677 }
2678 *out++ = d;
2679 } else {
2680 /* cannot transcode >= U+010000 */
2681 *outlen = out - outstart;
2682 *inlen = processed - instart;
2683 return(XML_ENC_ERR_INPUT);
2684 }
2685 processed = in;
2686 }
2687 *outlen = out - outstart;
2688 *inlen = processed - instart;
2689 return(*outlen);
2690}
2691
2692/**
2693 * ISO8859xToUTF8
2694 * @out: a pointer to an array of bytes to store the result
2695 * @outlen: the length of @out
2696 * @in: a pointer to an array of ISO Latin 1 chars
2697 * @inlen: the length of @in
2698 *
2699 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2700 * block of chars out.
2701 *
2702 * Returns the number of bytes written or an XML_ENC_ERR code.
2703 *
2704 * The value of @inlen after return is the number of octets consumed
2705 * The value of @outlen after return is the number of octets produced.
2706 */
2707static int
2708ISO8859xToUTF8(unsigned char* out, int *outlen,
2709 const unsigned char* in, int *inlen,
2710 unsigned short const *unicodetable) {
2711 unsigned char* outstart = out;
2712 unsigned char* outend;
2713 const unsigned char* instart = in;
2714 const unsigned char* inend;
2715 const unsigned char* instop;
2716 unsigned int c;
2717
2718 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2719 (in == NULL) || (unicodetable == NULL))
2720 return(XML_ENC_ERR_INTERNAL);
2721 outend = out + *outlen;
2722 inend = in + *inlen;
2723 instop = inend;
2724
2725 while ((in < inend) && (out < outend - 2)) {
2726 if (*in >= 0x80) {
2727 c = unicodetable [*in - 0x80];
2728 if (c == 0) {
2729 /* undefined code point */
2730 *outlen = out - outstart;
2731 *inlen = in - instart;
2732 return(XML_ENC_ERR_INPUT);
2733 }
2734 if (c < 0x800) {
2735 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2736 *out++ = (c & 0x3F) | 0x80;
2737 } else {
2738 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2739 *out++ = ((c >> 6) & 0x3F) | 0x80;
2740 *out++ = (c & 0x3F) | 0x80;
2741 }
2742 ++in;
2743 }
2744 if (instop - in > outend - out) instop = in + (outend - out);
2745 while ((*in < 0x80) && (in < instop)) {
2746 *out++ = *in++;
2747 }
2748 }
2749 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2750 *out++ = *in++;
2751 }
2752 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2753 *out++ = *in++;
2754 }
2755 *outlen = out - outstart;
2756 *inlen = in - instart;
2757 return (*outlen);
2758}
2759
2760
2761/************************************************************************
2762 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2763 ************************************************************************/
2764
2765static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2766 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2767 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2768 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2769 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2770 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2771 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2772 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2773 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2774 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2775 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2776 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2777 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2778 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2779 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2780 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2781 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2782};
2783
2784static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2785 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2786 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2787 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2788 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2789 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2790 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2791 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2792 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2793 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2794 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2795 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2796 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2797 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2798 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2799 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2800 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2801 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2802 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2803 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2804 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2805 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2806 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2807 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2808 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2809 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2810 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2811 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2812};
2813
2814static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2815 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2816 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2817 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2818 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2819 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2820 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2821 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2822 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2823 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2824 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2825 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2826 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2827 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2828 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2829 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2830 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2831};
2832
2833static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2834 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2835 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2836 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2837 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2839 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2840 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2841 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2842 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2843 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2844 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2845 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2846 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2847 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2848 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2851 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2855 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2856 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2857 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2858 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2859 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2860 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2861 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2862 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2863 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2864 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2865};
2866
2867static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2868 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2869 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2870 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2871 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2872 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2873 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2874 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2875 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2876 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2877 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2878 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2879 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2880 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2881 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2882 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2883 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2884};
2885
2886static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2887 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2888 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2892 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2893 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2894 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2895 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2896 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2897 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2898 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2899 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2900 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2901 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2902 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2903 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2904 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2905 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2906 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2907 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2908 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2909 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2910 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2911 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2912 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2913 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2914};
2915
2916static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2917 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2918 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2919 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2920 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2921 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2922 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2923 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2924 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2925 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2926 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2927 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2928 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2929 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2930 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2931 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2932 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2933};
2934
2935static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2936 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2938 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2944 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2945 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2946 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2947 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2948 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2949 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2950 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2951 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2952 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2953 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2954 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2955 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2956 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2957 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2958 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2959 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2960 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2961 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2962 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2963};
2964
2965static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2966 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2967 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2968 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2969 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2970 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2971 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2972 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2973 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2974 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2975 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2976 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2977 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2978 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2979 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2980 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2981 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2982};
2983
2984static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2985 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2991 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2993 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2994 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2995 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3002 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3003 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3004 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3005 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3006 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3007 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3008};
3009
3010static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3011 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3012 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3013 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3014 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3015 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3016 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3017 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3018 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3019 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3020 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3021 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3022 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3023 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3024 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3025 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3026 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3027};
3028
3029static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3030 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3031 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3038 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3039 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3040 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3041 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3042 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3054 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3055 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3056 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3057 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3059 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3060 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061};
3062
3063static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3064 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3065 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3066 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3067 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3068 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3069 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3070 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3071 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3072 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3073 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3074 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3075 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3076 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3077 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3078 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3079 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3080};
3081
3082static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3083 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3084 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3085 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3091 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3092 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3093 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3094 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3103 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3107 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3109 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3112 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3113 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114};
3115
3116static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3117 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3118 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3119 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3120 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3121 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3122 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3123 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3124 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3125 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3126 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3127 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3128 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3129 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3130 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3131 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3132 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3133};
3134
3135static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3136 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3142 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3143 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3144 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3145 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3146 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3147 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3148 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3149 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3150 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3153 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159};
3160
3161static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3162 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3163 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3164 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3165 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3166 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3167 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3168 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3169 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3170 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3171 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3172 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3173 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3174 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3175 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3176 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3177 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3178};
3179
3180static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3181 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3182 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3189 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3190 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3191 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3192 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3193 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3194 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3195 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3199 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3200 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3201 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3202 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3203 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3207 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3208 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3209 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3210 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3211 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3212};
3213
3214static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3215 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3216 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3217 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3218 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3219 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3220 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3221 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3222 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3223 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3224 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3225 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3226 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3227 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3228 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3229 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3230 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3231};
3232
3233static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3234 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3242 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3243 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3249 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3250 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3251 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3252 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3253 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3258 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3259 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261};
3262
3263static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3264 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3265 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3266 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3267 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3268 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3269 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3270 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3271 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3272 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3273 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3274 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3275 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3276 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3277 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3278 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3279 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3280};
3281
3282static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3283 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3291 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3292 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3293 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3294 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3300 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3304 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3305 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3306 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3308 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3309 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3310 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3311 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3312 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3313 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3314};
3315
3316static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3317 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3318 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3319 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3320 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3321 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3322 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3323 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3324 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3325 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3326 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3327 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3328 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3329 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3330 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3331 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3332 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3333};
3334
3335static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3336 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3344 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3345 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3346 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3351 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3352 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3371 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3373 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3374 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3376 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3377 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3378 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3379};
3380
3381static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3382 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3383 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3384 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3385 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3386 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3387 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3388 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3389 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3390 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3391 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3392 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3393 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3394 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3395 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3396 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3397 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3398};
3399
3400static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3401 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3407 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3408 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3409 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3410 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3411 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3412 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3418 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3419 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3420 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3424 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3425 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3426 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3427 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3428};
3429
3430static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3431 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3432 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3433 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3434 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3435 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3436 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3437 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3438 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3439 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3440 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3441 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3442 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3443 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3444 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3445 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3446 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3447};
3448
3449static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3450 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3451 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3458 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3459 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3460 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3461 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3462 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3463 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3464 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3465 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3466 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3467 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3468 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3469 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3470 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3471 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3476 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3479 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3483 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3486 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3487 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3488 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3489};
3490
3491
3492/*
3493 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3494 */
3495
3496static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3497 const unsigned char* in, int *inlen) {
3498 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3499}
3500static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3501 const unsigned char* in, int *inlen) {
3502 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3503}
3504
3505static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3506 const unsigned char* in, int *inlen) {
3507 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3508}
3509static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3510 const unsigned char* in, int *inlen) {
3511 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3512}
3513
3514static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3515 const unsigned char* in, int *inlen) {
3516 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3517}
3518static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3519 const unsigned char* in, int *inlen) {
3520 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3521}
3522
3523static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3524 const unsigned char* in, int *inlen) {
3525 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3526}
3527static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3528 const unsigned char* in, int *inlen) {
3529 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3530}
3531
3532static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3533 const unsigned char* in, int *inlen) {
3534 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3535}
3536static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3537 const unsigned char* in, int *inlen) {
3538 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3539}
3540
3541static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3542 const unsigned char* in, int *inlen) {
3543 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3544}
3545static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3546 const unsigned char* in, int *inlen) {
3547 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3548}
3549
3550static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3551 const unsigned char* in, int *inlen) {
3552 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3553}
3554static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3555 const unsigned char* in, int *inlen) {
3556 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3557}
3558
3559static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3560 const unsigned char* in, int *inlen) {
3561 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3562}
3563static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3564 const unsigned char* in, int *inlen) {
3565 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3566}
3567
3568static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3569 const unsigned char* in, int *inlen) {
3570 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3571}
3572static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3573 const unsigned char* in, int *inlen) {
3574 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3575}
3576
3577static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3578 const unsigned char* in, int *inlen) {
3579 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3580}
3581static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3582 const unsigned char* in, int *inlen) {
3583 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3584}
3585
3586static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3587 const unsigned char* in, int *inlen) {
3588 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3589}
3590static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3591 const unsigned char* in, int *inlen) {
3592 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3593}
3594
3595static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3596 const unsigned char* in, int *inlen) {
3597 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3598}
3599static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3600 const unsigned char* in, int *inlen) {
3601 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3602}
3603
3604static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3605 const unsigned char* in, int *inlen) {
3606 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3607}
3608static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3609 const unsigned char* in, int *inlen) {
3610 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3611}
3612
3613static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3614 const unsigned char* in, int *inlen) {
3615 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3616}
3617static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3618 const unsigned char* in, int *inlen) {
3619 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3620}
3621
3622#endif
3623#endif
3624
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette