VirtualBox

source: vbox/trunk/src/libs/libxml2-2.13.2/encoding.c

最後變更 在這個檔案是 105420,由 vboxsync 提交於 4 月 前

libxml2-2.12.6: Applied and adjusted our libxml2 changes to 2.12.6. bugref:10730

  • 屬性 svn:eol-style 設為 native
檔案大小: 134.0 KB
 
1/*
2 * encoding.c : implements the encoding conversion functions needed for XML
3 *
4 * Related specs:
5 * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6 * rfc2781 UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7 * [ISO-10646] UTF-8 and UTF-16 in Annexes
8 * [ISO-8859-1] ISO Latin-1 characters codes.
9 * [UNICODE] The Unicode Consortium, "The Unicode Standard --
10 * Worldwide Character Encoding -- Version 1.0", Addison-
11 * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
12 * described in Unicode Technical Report #4.
13 * [US-ASCII] Coded Character Set--7-bit American Standard Code for
14 * Information Interchange, ANSI X3.4-1986.
15 *
16 * See Copyright for the status of this software.
17 *
18 * [email protected]
19 *
20 * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <[email protected]>
21 */
22
23#define IN_LIBXML
24#include "libxml.h"
25
26#include <string.h>
27#include <limits.h>
28#include <ctype.h>
29#include <stdlib.h>
30
31#ifdef LIBXML_ICONV_ENABLED
32#include <errno.h>
33#endif
34
35#include <libxml/encoding.h>
36#include <libxml/xmlmemory.h>
37#include <libxml/parser.h>
38#ifdef LIBXML_HTML_ENABLED
39#include <libxml/HTMLparser.h>
40#endif
41#include <libxml/xmlerror.h>
42
43#include "private/buf.h"
44#include "private/enc.h"
45#include "private/error.h"
46
47#ifdef LIBXML_ICU_ENABLED
48#include <unicode/ucnv.h>
49/* Size of pivot buffer, same as icu/source/common/ucnv.cpp CHUNK_SIZE */
50#define ICU_PIVOT_BUF_SIZE 1024
51typedef struct _uconv_t uconv_t;
52struct _uconv_t {
53 UConverter *uconv; /* for conversion between an encoding and UTF-16 */
54 UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */
55 UChar pivot_buf[ICU_PIVOT_BUF_SIZE];
56 UChar *pivot_source;
57 UChar *pivot_target;
58};
59#endif
60
61typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
62typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
63struct _xmlCharEncodingAlias {
64 const char *name;
65 const char *alias;
66};
67
68static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
69static int xmlCharEncodingAliasesNb = 0;
70static int xmlCharEncodingAliasesMax = 0;
71
72static int xmlLittleEndian = 1;
73
74/************************************************************************
75 * *
76 * Conversions To/From UTF8 encoding *
77 * *
78 ************************************************************************/
79
80/**
81 * asciiToUTF8:
82 * @out: a pointer to an array of bytes to store the result
83 * @outlen: the length of @out
84 * @in: a pointer to an array of ASCII chars
85 * @inlen: the length of @in
86 *
87 * Take a block of ASCII chars in and try to convert it to an UTF-8
88 * block of chars out.
89 *
90 * Returns the number of bytes written or an XML_ENC_ERR code.
91 *
92 * The value of @inlen after return is the number of octets consumed
93 * if the return value is positive, else unpredictable.
94 * The value of @outlen after return is the number of octets produced.
95 */
96static int
97asciiToUTF8(unsigned char* out, int *outlen,
98 const unsigned char* in, int *inlen) {
99 unsigned char* outstart = out;
100 const unsigned char* base = in;
101 const unsigned char* processed = in;
102 unsigned char* outend = out + *outlen;
103 const unsigned char* inend;
104 unsigned int c;
105
106 inend = in + (*inlen);
107 while ((in < inend) && (out - outstart + 5 < *outlen)) {
108 c= *in++;
109
110 if (out >= outend)
111 break;
112 if (c < 0x80) {
113 *out++ = c;
114 } else {
115 *outlen = out - outstart;
116 *inlen = processed - base;
117 return(XML_ENC_ERR_INPUT);
118 }
119
120 processed = (const unsigned char*) in;
121 }
122 *outlen = out - outstart;
123 *inlen = processed - base;
124 return(*outlen);
125}
126
127#ifdef LIBXML_OUTPUT_ENABLED
128/**
129 * UTF8Toascii:
130 * @out: a pointer to an array of bytes to store the result
131 * @outlen: the length of @out
132 * @in: a pointer to an array of UTF-8 chars
133 * @inlen: the length of @in
134 *
135 * Take a block of UTF-8 chars in and try to convert it to an ASCII
136 * block of chars out.
137 *
138 * Returns the number of bytes written or an XML_ENC_ERR code.
139 *
140 * The value of @inlen after return is the number of octets consumed
141 * if the return value is positive, else unpredictable.
142 * The value of @outlen after return is the number of octets produced.
143 */
144static int
145UTF8Toascii(unsigned char* out, int *outlen,
146 const unsigned char* in, int *inlen) {
147 const unsigned char* processed = in;
148 const unsigned char* outend;
149 const unsigned char* outstart = out;
150 const unsigned char* instart = in;
151 const unsigned char* inend;
152 unsigned int c, d;
153 int trailing;
154
155 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
156 return(XML_ENC_ERR_INTERNAL);
157 if (in == NULL) {
158 /*
159 * initialization nothing to do
160 */
161 *outlen = 0;
162 *inlen = 0;
163 return(0);
164 }
165 inend = in + (*inlen);
166 outend = out + (*outlen);
167 while (in < inend) {
168 d = *in++;
169 if (d < 0x80) { c= d; trailing= 0; }
170 else if (d < 0xC0) {
171 /* trailing byte in leading position */
172 *outlen = out - outstart;
173 *inlen = processed - instart;
174 return(XML_ENC_ERR_INPUT);
175 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
176 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
177 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
178 else {
179 /* no chance for this in Ascii */
180 *outlen = out - outstart;
181 *inlen = processed - instart;
182 return(XML_ENC_ERR_INPUT);
183 }
184
185 if (inend - in < trailing) {
186 break;
187 }
188
189 for ( ; trailing; trailing--) {
190 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
191 break;
192 c <<= 6;
193 c |= d & 0x3F;
194 }
195
196 /* assertion: c is a single UTF-4 value */
197 if (c < 0x80) {
198 if (out >= outend)
199 break;
200 *out++ = c;
201 } else {
202 /* no chance for this in Ascii */
203 *outlen = out - outstart;
204 *inlen = processed - instart;
205 return(XML_ENC_ERR_INPUT);
206 }
207 processed = in;
208 }
209 *outlen = out - outstart;
210 *inlen = processed - instart;
211 return(*outlen);
212}
213#endif /* LIBXML_OUTPUT_ENABLED */
214
215/**
216 * isolat1ToUTF8:
217 * @out: a pointer to an array of bytes to store the result
218 * @outlen: the length of @out
219 * @in: a pointer to an array of ISO Latin 1 chars
220 * @inlen: the length of @in
221 *
222 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
223 * block of chars out.
224 *
225 * Returns the number of bytes written or an XML_ENC_ERR code.
226 *
227 * The value of @inlen after return is the number of octets consumed
228 * if the return value is positive, else unpredictable.
229 * The value of @outlen after return is the number of octets produced.
230 */
231int
232isolat1ToUTF8(unsigned char* out, int *outlen,
233 const unsigned char* in, int *inlen) {
234 unsigned char* outstart = out;
235 const unsigned char* base = in;
236 unsigned char* outend;
237 const unsigned char* inend;
238 const unsigned char* instop;
239
240 if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
241 return(XML_ENC_ERR_INTERNAL);
242
243 outend = out + *outlen;
244 inend = in + (*inlen);
245 instop = inend;
246
247 while ((in < inend) && (out < outend - 1)) {
248 if (*in >= 0x80) {
249 *out++ = (((*in) >> 6) & 0x1F) | 0xC0;
250 *out++ = ((*in) & 0x3F) | 0x80;
251 ++in;
252 }
253 if ((instop - in) > (outend - out)) instop = in + (outend - out);
254 while ((in < instop) && (*in < 0x80)) {
255 *out++ = *in++;
256 }
257 }
258 if ((in < inend) && (out < outend) && (*in < 0x80)) {
259 *out++ = *in++;
260 }
261 *outlen = out - outstart;
262 *inlen = in - base;
263 return(*outlen);
264}
265
266/**
267 * UTF8ToUTF8:
268 * @out: a pointer to an array of bytes to store the result
269 * @outlen: the length of @out
270 * @inb: a pointer to an array of UTF-8 chars
271 * @inlenb: the length of @in in UTF-8 chars
272 *
273 * No op copy operation for UTF8 handling.
274 *
275 * Returns the number of bytes written or an XML_ENC_ERR code.
276 *
277 * The value of *inlen after return is the number of octets consumed
278 * if the return value is positive, else unpredictable.
279 */
280static int
281UTF8ToUTF8(unsigned char* out, int *outlen,
282 const unsigned char* inb, int *inlenb)
283{
284 int len;
285
286 if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
287 return(XML_ENC_ERR_INTERNAL);
288 if (inb == NULL) {
289 /* inb == NULL means output is initialized. */
290 *outlen = 0;
291 *inlenb = 0;
292 return(0);
293 }
294 if (*outlen > *inlenb) {
295 len = *inlenb;
296 } else {
297 len = *outlen;
298 }
299 if (len < 0)
300 return(XML_ENC_ERR_INTERNAL);
301
302 /*
303 * FIXME: Conversion functions must assure valid UTF-8, so we have
304 * to check for UTF-8 validity. Preferably, this converter shouldn't
305 * be used at all.
306 */
307 memcpy(out, inb, len);
308
309 *outlen = len;
310 *inlenb = len;
311 return(*outlen);
312}
313
314
315#ifdef LIBXML_OUTPUT_ENABLED
316/**
317 * UTF8Toisolat1:
318 * @out: a pointer to an array of bytes to store the result
319 * @outlen: the length of @out
320 * @in: a pointer to an array of UTF-8 chars
321 * @inlen: the length of @in
322 *
323 * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
324 * block of chars out.
325 *
326 * Returns the number of bytes written or an XML_ENC_ERR code.
327 *
328 * The value of @inlen after return is the number of octets consumed
329 * if the return value is positive, else unpredictable.
330 * The value of @outlen after return is the number of octets produced.
331 */
332int
333UTF8Toisolat1(unsigned char* out, int *outlen,
334 const unsigned char* in, int *inlen) {
335 const unsigned char* processed = in;
336 const unsigned char* outend;
337 const unsigned char* outstart = out;
338 const unsigned char* instart = in;
339 const unsigned char* inend;
340 unsigned int c, d;
341 int trailing;
342
343 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
344 return(XML_ENC_ERR_INTERNAL);
345 if (in == NULL) {
346 /*
347 * initialization nothing to do
348 */
349 *outlen = 0;
350 *inlen = 0;
351 return(0);
352 }
353 inend = in + (*inlen);
354 outend = out + (*outlen);
355 while (in < inend) {
356 d = *in++;
357 if (d < 0x80) { c= d; trailing= 0; }
358 else if (d < 0xC0) {
359 /* trailing byte in leading position */
360 *outlen = out - outstart;
361 *inlen = processed - instart;
362 return(XML_ENC_ERR_INPUT);
363 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
364 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
365 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
366 else {
367 /* no chance for this in IsoLat1 */
368 *outlen = out - outstart;
369 *inlen = processed - instart;
370 return(XML_ENC_ERR_INPUT);
371 }
372
373 if (inend - in < trailing) {
374 break;
375 }
376
377 for ( ; trailing; trailing--) {
378 if (in >= inend)
379 break;
380 if (((d= *in++) & 0xC0) != 0x80) {
381 *outlen = out - outstart;
382 *inlen = processed - instart;
383 return(XML_ENC_ERR_INPUT);
384 }
385 c <<= 6;
386 c |= d & 0x3F;
387 }
388
389 /* assertion: c is a single UTF-4 value */
390 if (c <= 0xFF) {
391 if (out >= outend)
392 break;
393 *out++ = c;
394 } else {
395 /* no chance for this in IsoLat1 */
396 *outlen = out - outstart;
397 *inlen = processed - instart;
398 return(XML_ENC_ERR_INPUT);
399 }
400 processed = in;
401 }
402 *outlen = out - outstart;
403 *inlen = processed - instart;
404 return(*outlen);
405}
406#endif /* LIBXML_OUTPUT_ENABLED */
407
408/**
409 * UTF16LEToUTF8:
410 * @out: a pointer to an array of bytes to store the result
411 * @outlen: the length of @out
412 * @inb: a pointer to an array of UTF-16LE passwd as a byte array
413 * @inlenb: the length of @in in UTF-16LE chars
414 *
415 * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
416 * block of chars out. This function assumes the endian property
417 * is the same between the native type of this machine and the
418 * inputed one.
419 *
420 * Returns the number of bytes written or an XML_ENC_ERR code.
421 *
422 * The value of *inlen after return is the number of octets consumed
423 * if the return value is positive, else unpredictable.
424 */
425static int
426UTF16LEToUTF8(unsigned char* out, int *outlen,
427 const unsigned char* inb, int *inlenb)
428{
429 unsigned char* outstart = out;
430 const unsigned char* processed = inb;
431 unsigned char* outend;
432 unsigned short* in = (unsigned short *) (void *) inb;
433 unsigned short* inend;
434 unsigned int c, d, inlen;
435 unsigned char *tmp;
436 int bits;
437
438 if (*outlen == 0) {
439 *inlenb = 0;
440 return(0);
441 }
442 outend = out + *outlen;
443 if ((*inlenb % 2) == 1)
444 (*inlenb)--;
445 inlen = *inlenb / 2;
446 inend = in + inlen;
447 while ((in < inend) && (out - outstart + 5 < *outlen)) {
448 if (xmlLittleEndian) {
449 c= *in++;
450 } else {
451 tmp = (unsigned char *) in;
452 c = *tmp++;
453 c = c | (*tmp << 8);
454 in++;
455 }
456 if ((c & 0xFC00) == 0xD800) { /* surrogates */
457 if (in >= inend) { /* handle split mutli-byte characters */
458 break;
459 }
460 if (xmlLittleEndian) {
461 d = *in++;
462 } else {
463 tmp = (unsigned char *) in;
464 d = *tmp++;
465 d = d | (*tmp << 8);
466 in++;
467 }
468 if ((d & 0xFC00) == 0xDC00) {
469 c &= 0x03FF;
470 c <<= 10;
471 c |= d & 0x03FF;
472 c += 0x10000;
473 }
474 else {
475 *outlen = out - outstart;
476 *inlenb = processed - inb;
477 return(XML_ENC_ERR_INPUT);
478 }
479 }
480
481 /* assertion: c is a single UTF-4 value */
482 if (out >= outend)
483 break;
484 if (c < 0x80) { *out++= c; bits= -6; }
485 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
486 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
487 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
488
489 for ( ; bits >= 0; bits-= 6) {
490 if (out >= outend)
491 break;
492 *out++= ((c >> bits) & 0x3F) | 0x80;
493 }
494 processed = (const unsigned char*) in;
495 }
496 *outlen = out - outstart;
497 *inlenb = processed - inb;
498 return(*outlen);
499}
500
501#ifdef LIBXML_OUTPUT_ENABLED
502/**
503 * UTF8ToUTF16LE:
504 * @outb: a pointer to an array of bytes to store the result
505 * @outlen: the length of @outb
506 * @in: a pointer to an array of UTF-8 chars
507 * @inlen: the length of @in
508 *
509 * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
510 * block of chars out.
511 *
512 * Returns the number of bytes written or an XML_ENC_ERR code.
513 */
514static int
515UTF8ToUTF16LE(unsigned char* outb, int *outlen,
516 const unsigned char* in, int *inlen)
517{
518 unsigned short* out = (unsigned short *) (void *) outb;
519 const unsigned char* processed = in;
520 const unsigned char *const instart = in;
521 unsigned short* outstart= out;
522 unsigned short* outend;
523 const unsigned char* inend;
524 unsigned int c, d;
525 int trailing;
526 unsigned char *tmp;
527 unsigned short tmp1, tmp2;
528
529 /* UTF16LE encoding has no BOM */
530 if ((out == NULL) || (outlen == NULL) || (inlen == NULL))
531 return(XML_ENC_ERR_INTERNAL);
532 if (in == NULL) {
533 *outlen = 0;
534 *inlen = 0;
535 return(0);
536 }
537 inend= in + *inlen;
538 outend = out + (*outlen / 2);
539 while (in < inend) {
540 d= *in++;
541 if (d < 0x80) { c= d; trailing= 0; }
542 else if (d < 0xC0) {
543 /* trailing byte in leading position */
544 *outlen = (out - outstart) * 2;
545 *inlen = processed - instart;
546 return(XML_ENC_ERR_INPUT);
547 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
548 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
549 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
550 else {
551 /* no chance for this in UTF-16 */
552 *outlen = (out - outstart) * 2;
553 *inlen = processed - instart;
554 return(XML_ENC_ERR_INPUT);
555 }
556
557 if (inend - in < trailing) {
558 break;
559 }
560
561 for ( ; trailing; trailing--) {
562 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
563 break;
564 c <<= 6;
565 c |= d & 0x3F;
566 }
567
568 /* assertion: c is a single UTF-4 value */
569 if (c < 0x10000) {
570 if (out >= outend)
571 break;
572 if (xmlLittleEndian) {
573 *out++ = c;
574 } else {
575 tmp = (unsigned char *) out;
576 *tmp = (unsigned char) c; /* Explicit truncation */
577 *(tmp + 1) = c >> 8 ;
578 out++;
579 }
580 }
581 else if (c < 0x110000) {
582 if (out+1 >= outend)
583 break;
584 c -= 0x10000;
585 if (xmlLittleEndian) {
586 *out++ = 0xD800 | (c >> 10);
587 *out++ = 0xDC00 | (c & 0x03FF);
588 } else {
589 tmp1 = 0xD800 | (c >> 10);
590 tmp = (unsigned char *) out;
591 *tmp = (unsigned char) tmp1; /* Explicit truncation */
592 *(tmp + 1) = tmp1 >> 8;
593 out++;
594
595 tmp2 = 0xDC00 | (c & 0x03FF);
596 tmp = (unsigned char *) out;
597 *tmp = (unsigned char) tmp2; /* Explicit truncation */
598 *(tmp + 1) = tmp2 >> 8;
599 out++;
600 }
601 }
602 else
603 break;
604 processed = in;
605 }
606 *outlen = (out - outstart) * 2;
607 *inlen = processed - instart;
608 return(*outlen);
609}
610
611/**
612 * UTF8ToUTF16:
613 * @outb: a pointer to an array of bytes to store the result
614 * @outlen: the length of @outb
615 * @in: a pointer to an array of UTF-8 chars
616 * @inlen: the length of @in
617 *
618 * Take a block of UTF-8 chars in and try to convert it to an UTF-16
619 * block of chars out.
620 *
621 * Returns the number of bytes written or an XML_ENC_ERR code.
622 */
623static int
624UTF8ToUTF16(unsigned char* outb, int *outlen,
625 const unsigned char* in, int *inlen)
626{
627 if (in == NULL) {
628 /*
629 * initialization, add the Byte Order Mark for UTF-16LE
630 */
631 if (*outlen >= 2) {
632 outb[0] = 0xFF;
633 outb[1] = 0xFE;
634 *outlen = 2;
635 *inlen = 0;
636 return(2);
637 }
638 *outlen = 0;
639 *inlen = 0;
640 return(0);
641 }
642 return (UTF8ToUTF16LE(outb, outlen, in, inlen));
643}
644#endif /* LIBXML_OUTPUT_ENABLED */
645
646/**
647 * UTF16BEToUTF8:
648 * @out: a pointer to an array of bytes to store the result
649 * @outlen: the length of @out
650 * @inb: a pointer to an array of UTF-16 passed as a byte array
651 * @inlenb: the length of @in in UTF-16 chars
652 *
653 * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
654 * block of chars out. This function assumes the endian property
655 * is the same between the native type of this machine and the
656 * inputed one.
657 *
658 * Returns the number of bytes written or an XML_ENC_ERR code.
659 *
660 * The value of *inlen after return is the number of octets consumed
661 * if the return value is positive, else unpredictable.
662 */
663static int
664UTF16BEToUTF8(unsigned char* out, int *outlen,
665 const unsigned char* inb, int *inlenb)
666{
667 unsigned char* outstart = out;
668 const unsigned char* processed = inb;
669 unsigned char* outend;
670 unsigned short* in = (unsigned short *) (void *) inb;
671 unsigned short* inend;
672 unsigned int c, d, inlen;
673 unsigned char *tmp;
674 int bits;
675
676 if (*outlen == 0) {
677 *inlenb = 0;
678 return(0);
679 }
680 outend = out + *outlen;
681 if ((*inlenb % 2) == 1)
682 (*inlenb)--;
683 inlen = *inlenb / 2;
684 inend= in + inlen;
685 while ((in < inend) && (out - outstart + 5 < *outlen)) {
686 if (xmlLittleEndian) {
687 tmp = (unsigned char *) in;
688 c = *tmp++;
689 c = (c << 8) | *tmp;
690 in++;
691 } else {
692 c= *in++;
693 }
694 if ((c & 0xFC00) == 0xD800) { /* surrogates */
695 if (in >= inend) { /* handle split mutli-byte characters */
696 break;
697 }
698 if (xmlLittleEndian) {
699 tmp = (unsigned char *) in;
700 d = *tmp++;
701 d = (d << 8) | *tmp;
702 in++;
703 } else {
704 d= *in++;
705 }
706 if ((d & 0xFC00) == 0xDC00) {
707 c &= 0x03FF;
708 c <<= 10;
709 c |= d & 0x03FF;
710 c += 0x10000;
711 }
712 else {
713 *outlen = out - outstart;
714 *inlenb = processed - inb;
715 return(XML_ENC_ERR_INPUT);
716 }
717 }
718
719 /* assertion: c is a single UTF-4 value */
720 if (out >= outend)
721 break;
722 if (c < 0x80) { *out++= c; bits= -6; }
723 else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
724 else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
725 else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
726
727 for ( ; bits >= 0; bits-= 6) {
728 if (out >= outend)
729 break;
730 *out++= ((c >> bits) & 0x3F) | 0x80;
731 }
732 processed = (const unsigned char*) in;
733 }
734 *outlen = out - outstart;
735 *inlenb = processed - inb;
736 return(*outlen);
737}
738
739#ifdef LIBXML_OUTPUT_ENABLED
740/**
741 * UTF8ToUTF16BE:
742 * @outb: a pointer to an array of bytes to store the result
743 * @outlen: the length of @outb
744 * @in: a pointer to an array of UTF-8 chars
745 * @inlen: the length of @in
746 *
747 * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
748 * block of chars out.
749 *
750 * Returns the number of bytes written or an XML_ENC_ERR code.
751 */
752static int
753UTF8ToUTF16BE(unsigned char* outb, int *outlen,
754 const unsigned char* in, int *inlen)
755{
756 unsigned short* out = (unsigned short *) (void *) outb;
757 const unsigned char* processed = in;
758 const unsigned char *const instart = in;
759 unsigned short* outstart= out;
760 unsigned short* outend;
761 const unsigned char* inend;
762 unsigned int c, d;
763 int trailing;
764 unsigned char *tmp;
765 unsigned short tmp1, tmp2;
766
767 /* UTF-16BE has no BOM */
768 if ((outb == NULL) || (outlen == NULL) || (inlen == NULL))
769 return(XML_ENC_ERR_INTERNAL);
770 if (in == NULL) {
771 *outlen = 0;
772 *inlen = 0;
773 return(0);
774 }
775 inend= in + *inlen;
776 outend = out + (*outlen / 2);
777 while (in < inend) {
778 d= *in++;
779 if (d < 0x80) { c= d; trailing= 0; }
780 else if (d < 0xC0) {
781 /* trailing byte in leading position */
782 *outlen = out - outstart;
783 *inlen = processed - instart;
784 return(XML_ENC_ERR_INPUT);
785 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
786 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
787 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
788 else {
789 /* no chance for this in UTF-16 */
790 *outlen = out - outstart;
791 *inlen = processed - instart;
792 return(XML_ENC_ERR_INPUT);
793 }
794
795 if (inend - in < trailing) {
796 break;
797 }
798
799 for ( ; trailing; trailing--) {
800 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) break;
801 c <<= 6;
802 c |= d & 0x3F;
803 }
804
805 /* assertion: c is a single UTF-4 value */
806 if (c < 0x10000) {
807 if (out >= outend) break;
808 if (xmlLittleEndian) {
809 tmp = (unsigned char *) out;
810 *tmp = c >> 8;
811 *(tmp + 1) = (unsigned char) c; /* Explicit truncation */
812 out++;
813 } else {
814 *out++ = c;
815 }
816 }
817 else if (c < 0x110000) {
818 if (out+1 >= outend) break;
819 c -= 0x10000;
820 if (xmlLittleEndian) {
821 tmp1 = 0xD800 | (c >> 10);
822 tmp = (unsigned char *) out;
823 *tmp = tmp1 >> 8;
824 *(tmp + 1) = (unsigned char) tmp1; /* Explicit truncation */
825 out++;
826
827 tmp2 = 0xDC00 | (c & 0x03FF);
828 tmp = (unsigned char *) out;
829 *tmp = tmp2 >> 8;
830 *(tmp + 1) = (unsigned char) tmp2; /* Explicit truncation */
831 out++;
832 } else {
833 *out++ = 0xD800 | (c >> 10);
834 *out++ = 0xDC00 | (c & 0x03FF);
835 }
836 }
837 else
838 break;
839 processed = in;
840 }
841 *outlen = (out - outstart) * 2;
842 *inlen = processed - instart;
843 return(*outlen);
844}
845#endif /* LIBXML_OUTPUT_ENABLED */
846
847/************************************************************************
848 * *
849 * Generic encoding handling routines *
850 * *
851 ************************************************************************/
852
853/**
854 * xmlDetectCharEncoding:
855 * @in: a pointer to the first bytes of the XML entity, must be at least
856 * 2 bytes long (at least 4 if encoding is UTF4 variant).
857 * @len: pointer to the length of the buffer
858 *
859 * Guess the encoding of the entity using the first bytes of the entity content
860 * according to the non-normative appendix F of the XML-1.0 recommendation.
861 *
862 * Returns one of the XML_CHAR_ENCODING_... values.
863 */
864xmlCharEncoding
865xmlDetectCharEncoding(const unsigned char* in, int len)
866{
867 if (in == NULL)
868 return(XML_CHAR_ENCODING_NONE);
869 if (len >= 4) {
870 if ((in[0] == 0x00) && (in[1] == 0x00) &&
871 (in[2] == 0x00) && (in[3] == 0x3C))
872 return(XML_CHAR_ENCODING_UCS4BE);
873 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
874 (in[2] == 0x00) && (in[3] == 0x00))
875 return(XML_CHAR_ENCODING_UCS4LE);
876 if ((in[0] == 0x00) && (in[1] == 0x00) &&
877 (in[2] == 0x3C) && (in[3] == 0x00))
878 return(XML_CHAR_ENCODING_UCS4_2143);
879 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
880 (in[2] == 0x00) && (in[3] == 0x00))
881 return(XML_CHAR_ENCODING_UCS4_3412);
882 if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
883 (in[2] == 0xA7) && (in[3] == 0x94))
884 return(XML_CHAR_ENCODING_EBCDIC);
885 if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
886 (in[2] == 0x78) && (in[3] == 0x6D))
887 return(XML_CHAR_ENCODING_UTF8);
888 /*
889 * Although not part of the recommendation, we also
890 * attempt an "auto-recognition" of UTF-16LE and
891 * UTF-16BE encodings.
892 */
893 if ((in[0] == 0x3C) && (in[1] == 0x00) &&
894 (in[2] == 0x3F) && (in[3] == 0x00))
895 return(XML_CHAR_ENCODING_UTF16LE);
896 if ((in[0] == 0x00) && (in[1] == 0x3C) &&
897 (in[2] == 0x00) && (in[3] == 0x3F))
898 return(XML_CHAR_ENCODING_UTF16BE);
899 }
900 if (len >= 3) {
901 /*
902 * Errata on XML-1.0 June 20 2001
903 * We now allow an UTF8 encoded BOM
904 */
905 if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
906 (in[2] == 0xBF))
907 return(XML_CHAR_ENCODING_UTF8);
908 }
909 /* For UTF-16 we can recognize by the BOM */
910 if (len >= 2) {
911 if ((in[0] == 0xFE) && (in[1] == 0xFF))
912 return(XML_CHAR_ENCODING_UTF16BE);
913 if ((in[0] == 0xFF) && (in[1] == 0xFE))
914 return(XML_CHAR_ENCODING_UTF16LE);
915 }
916 return(XML_CHAR_ENCODING_NONE);
917}
918
919/**
920 * xmlCleanupEncodingAliases:
921 *
922 * Unregisters all aliases
923 */
924void
925xmlCleanupEncodingAliases(void) {
926 int i;
927
928 if (xmlCharEncodingAliases == NULL)
929 return;
930
931 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
932 if (xmlCharEncodingAliases[i].name != NULL)
933 xmlFree((char *) xmlCharEncodingAliases[i].name);
934 if (xmlCharEncodingAliases[i].alias != NULL)
935 xmlFree((char *) xmlCharEncodingAliases[i].alias);
936 }
937 xmlCharEncodingAliasesNb = 0;
938 xmlCharEncodingAliasesMax = 0;
939 xmlFree(xmlCharEncodingAliases);
940 xmlCharEncodingAliases = NULL;
941}
942
943/**
944 * xmlGetEncodingAlias:
945 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
946 *
947 * Lookup an encoding name for the given alias.
948 *
949 * Returns NULL if not found, otherwise the original name
950 */
951const char *
952xmlGetEncodingAlias(const char *alias) {
953 int i;
954 char upper[100];
955
956 if (alias == NULL)
957 return(NULL);
958
959 if (xmlCharEncodingAliases == NULL)
960 return(NULL);
961
962 for (i = 0;i < 99;i++) {
963 upper[i] = (char) toupper((unsigned char) alias[i]);
964 if (upper[i] == 0) break;
965 }
966 upper[i] = 0;
967
968 /*
969 * Walk down the list looking for a definition of the alias
970 */
971 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
972 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
973 return(xmlCharEncodingAliases[i].name);
974 }
975 }
976 return(NULL);
977}
978
979/**
980 * xmlAddEncodingAlias:
981 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
982 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
983 *
984 * Registers an alias @alias for an encoding named @name. Existing alias
985 * will be overwritten.
986 *
987 * Returns 0 in case of success, -1 in case of error
988 */
989int
990xmlAddEncodingAlias(const char *name, const char *alias) {
991 int i;
992 char upper[100];
993 char *nameCopy, *aliasCopy;
994
995 if ((name == NULL) || (alias == NULL))
996 return(-1);
997
998 for (i = 0;i < 99;i++) {
999 upper[i] = (char) toupper((unsigned char) alias[i]);
1000 if (upper[i] == 0) break;
1001 }
1002 upper[i] = 0;
1003
1004 if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1005 xmlCharEncodingAliasPtr tmp;
1006 size_t newSize = xmlCharEncodingAliasesMax ?
1007 xmlCharEncodingAliasesMax * 2 :
1008 20;
1009
1010 tmp = (xmlCharEncodingAliasPtr)
1011 xmlRealloc(xmlCharEncodingAliases,
1012 newSize * sizeof(xmlCharEncodingAlias));
1013 if (tmp == NULL)
1014 return(-1);
1015 xmlCharEncodingAliases = tmp;
1016 xmlCharEncodingAliasesMax = newSize;
1017 }
1018
1019 /*
1020 * Walk down the list looking for a definition of the alias
1021 */
1022 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1023 if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1024 /*
1025 * Replace the definition.
1026 */
1027 nameCopy = xmlMemStrdup(name);
1028 if (nameCopy == NULL)
1029 return(-1);
1030 xmlFree((char *) xmlCharEncodingAliases[i].name);
1031 xmlCharEncodingAliases[i].name = nameCopy;
1032 return(0);
1033 }
1034 }
1035 /*
1036 * Add the definition
1037 */
1038 nameCopy = xmlMemStrdup(name);
1039 if (nameCopy == NULL)
1040 return(-1);
1041 aliasCopy = xmlMemStrdup(upper);
1042 if (aliasCopy == NULL) {
1043 xmlFree(nameCopy);
1044 return(-1);
1045 }
1046 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = nameCopy;
1047 xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = aliasCopy;
1048 xmlCharEncodingAliasesNb++;
1049 return(0);
1050}
1051
1052/**
1053 * xmlDelEncodingAlias:
1054 * @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
1055 *
1056 * Unregisters an encoding alias @alias
1057 *
1058 * Returns 0 in case of success, -1 in case of error
1059 */
1060int
1061xmlDelEncodingAlias(const char *alias) {
1062 int i;
1063
1064 if (alias == NULL)
1065 return(-1);
1066
1067 if (xmlCharEncodingAliases == NULL)
1068 return(-1);
1069 /*
1070 * Walk down the list looking for a definition of the alias
1071 */
1072 for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1073 if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1074 xmlFree((char *) xmlCharEncodingAliases[i].name);
1075 xmlFree((char *) xmlCharEncodingAliases[i].alias);
1076 xmlCharEncodingAliasesNb--;
1077 memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1078 sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1079 return(0);
1080 }
1081 }
1082 return(-1);
1083}
1084
1085/**
1086 * xmlParseCharEncoding:
1087 * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1088 *
1089 * Compare the string to the encoding schemes already known. Note
1090 * that the comparison is case insensitive accordingly to the section
1091 * [XML] 4.3.3 Character Encoding in Entities.
1092 *
1093 * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1094 * if not recognized.
1095 */
1096xmlCharEncoding
1097xmlParseCharEncoding(const char* name)
1098{
1099 const char *alias;
1100 char upper[500];
1101 int i;
1102
1103 if (name == NULL)
1104 return(XML_CHAR_ENCODING_NONE);
1105
1106 /*
1107 * Do the alias resolution
1108 */
1109 alias = xmlGetEncodingAlias(name);
1110 if (alias != NULL)
1111 name = alias;
1112
1113 for (i = 0;i < 499;i++) {
1114 upper[i] = (char) toupper((unsigned char) name[i]);
1115 if (upper[i] == 0) break;
1116 }
1117 upper[i] = 0;
1118
1119 if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1120 if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1121 if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1122
1123 /*
1124 * NOTE: if we were able to parse this, the endianness of UTF16 is
1125 * already found and in use
1126 */
1127 if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1128 if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1129
1130 if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1131 if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1132 if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1133
1134 /*
1135 * NOTE: if we were able to parse this, the endianness of UCS4 is
1136 * already found and in use
1137 */
1138 if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1139 if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1140 if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1141
1142
1143 if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1144 if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1145 if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1146
1147 if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1148 if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1149 if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1150
1151 if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1152 if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1153 if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1154 if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1155 if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1156 if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1157 if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1158
1159 if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1160 if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1161 if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1162
1163 return(XML_CHAR_ENCODING_ERROR);
1164}
1165
1166/**
1167 * xmlGetCharEncodingName:
1168 * @enc: the encoding
1169 *
1170 * The "canonical" name for XML encoding.
1171 * C.f. http://www.w3.org/TR/REC-xml#charencoding
1172 * Section 4.3.3 Character Encoding in Entities
1173 *
1174 * Returns the canonical name for the given encoding
1175 */
1176
1177const char*
1178xmlGetCharEncodingName(xmlCharEncoding enc) {
1179 switch (enc) {
1180 case XML_CHAR_ENCODING_ERROR:
1181 return(NULL);
1182 case XML_CHAR_ENCODING_NONE:
1183 return(NULL);
1184 case XML_CHAR_ENCODING_UTF8:
1185 return("UTF-8");
1186 case XML_CHAR_ENCODING_UTF16LE:
1187 return("UTF-16");
1188 case XML_CHAR_ENCODING_UTF16BE:
1189 return("UTF-16");
1190 case XML_CHAR_ENCODING_EBCDIC:
1191 return("EBCDIC");
1192 case XML_CHAR_ENCODING_UCS4LE:
1193 return("ISO-10646-UCS-4");
1194 case XML_CHAR_ENCODING_UCS4BE:
1195 return("ISO-10646-UCS-4");
1196 case XML_CHAR_ENCODING_UCS4_2143:
1197 return("ISO-10646-UCS-4");
1198 case XML_CHAR_ENCODING_UCS4_3412:
1199 return("ISO-10646-UCS-4");
1200 case XML_CHAR_ENCODING_UCS2:
1201 return("ISO-10646-UCS-2");
1202 case XML_CHAR_ENCODING_8859_1:
1203 return("ISO-8859-1");
1204 case XML_CHAR_ENCODING_8859_2:
1205 return("ISO-8859-2");
1206 case XML_CHAR_ENCODING_8859_3:
1207 return("ISO-8859-3");
1208 case XML_CHAR_ENCODING_8859_4:
1209 return("ISO-8859-4");
1210 case XML_CHAR_ENCODING_8859_5:
1211 return("ISO-8859-5");
1212 case XML_CHAR_ENCODING_8859_6:
1213 return("ISO-8859-6");
1214 case XML_CHAR_ENCODING_8859_7:
1215 return("ISO-8859-7");
1216 case XML_CHAR_ENCODING_8859_8:
1217 return("ISO-8859-8");
1218 case XML_CHAR_ENCODING_8859_9:
1219 return("ISO-8859-9");
1220 case XML_CHAR_ENCODING_2022_JP:
1221 return("ISO-2022-JP");
1222 case XML_CHAR_ENCODING_SHIFT_JIS:
1223 return("Shift-JIS");
1224 case XML_CHAR_ENCODING_EUC_JP:
1225 return("EUC-JP");
1226 case XML_CHAR_ENCODING_ASCII:
1227 return(NULL);
1228 }
1229 return(NULL);
1230}
1231
1232/************************************************************************
1233 * *
1234 * Char encoding handlers *
1235 * *
1236 ************************************************************************/
1237
1238#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1239 defined(LIBXML_ISO8859X_ENABLED)
1240
1241#define DECLARE_ISO_FUNCS(n) \
1242 static int ISO8859_##n##ToUTF8(unsigned char* out, int *outlen, \
1243 const unsigned char* in, int *inlen); \
1244 static int UTF8ToISO8859_##n(unsigned char* out, int *outlen, \
1245 const unsigned char* in, int *inlen);
1246
1247/** DOC_DISABLE */
1248DECLARE_ISO_FUNCS(2)
1249DECLARE_ISO_FUNCS(3)
1250DECLARE_ISO_FUNCS(4)
1251DECLARE_ISO_FUNCS(5)
1252DECLARE_ISO_FUNCS(6)
1253DECLARE_ISO_FUNCS(7)
1254DECLARE_ISO_FUNCS(8)
1255DECLARE_ISO_FUNCS(9)
1256DECLARE_ISO_FUNCS(10)
1257DECLARE_ISO_FUNCS(11)
1258DECLARE_ISO_FUNCS(13)
1259DECLARE_ISO_FUNCS(14)
1260DECLARE_ISO_FUNCS(15)
1261DECLARE_ISO_FUNCS(16)
1262/** DOC_ENABLE */
1263
1264#endif /* LIBXML_ISO8859X_ENABLED */
1265
1266#ifdef LIBXML_ICONV_ENABLED
1267 #define EMPTY_ICONV , (iconv_t) 0, (iconv_t) 0
1268#else
1269 #define EMPTY_ICONV
1270#endif
1271
1272#ifdef LIBXML_ICU_ENABLED
1273 #define EMPTY_UCONV , NULL, NULL
1274#else
1275 #define EMPTY_UCONV
1276#endif
1277
1278#define MAKE_HANDLER(name, in, out) \
1279 { (char *) name, in, out EMPTY_ICONV EMPTY_UCONV }
1280
1281static const xmlCharEncodingHandler defaultHandlers[] = {
1282#ifdef LIBXML_OUTPUT_ENABLED
1283 MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE)
1284 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE)
1285 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, UTF8ToUTF16)
1286 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1)
1287 ,MAKE_HANDLER("ASCII", asciiToUTF8, UTF8Toascii)
1288 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, UTF8Toascii)
1289#ifdef LIBXML_HTML_ENABLED
1290 ,MAKE_HANDLER("HTML", NULL, UTF8ToHtml)
1291#endif
1292#else
1293 MAKE_HANDLER("UTF-16LE", UTF16LEToUTF8, NULL)
1294 ,MAKE_HANDLER("UTF-16BE", UTF16BEToUTF8, NULL)
1295 ,MAKE_HANDLER("UTF-16", UTF16LEToUTF8, NULL)
1296 ,MAKE_HANDLER("ISO-8859-1", isolat1ToUTF8, NULL)
1297 ,MAKE_HANDLER("ASCII", asciiToUTF8, NULL)
1298 ,MAKE_HANDLER("US-ASCII", asciiToUTF8, NULL)
1299#endif /* LIBXML_OUTPUT_ENABLED */
1300
1301#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED) && \
1302 defined(LIBXML_ISO8859X_ENABLED)
1303 ,MAKE_HANDLER("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2)
1304 ,MAKE_HANDLER("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3)
1305 ,MAKE_HANDLER("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4)
1306 ,MAKE_HANDLER("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5)
1307 ,MAKE_HANDLER("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6)
1308 ,MAKE_HANDLER("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7)
1309 ,MAKE_HANDLER("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8)
1310 ,MAKE_HANDLER("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9)
1311 ,MAKE_HANDLER("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10)
1312 ,MAKE_HANDLER("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11)
1313 ,MAKE_HANDLER("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13)
1314 ,MAKE_HANDLER("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14)
1315 ,MAKE_HANDLER("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15)
1316 ,MAKE_HANDLER("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16)
1317#endif
1318};
1319
1320#define NUM_DEFAULT_HANDLERS \
1321 (sizeof(defaultHandlers) / sizeof(defaultHandlers[0]))
1322
1323static const xmlCharEncodingHandler xmlUTF8Handler =
1324 MAKE_HANDLER("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1325
1326static const xmlCharEncodingHandler *xmlUTF16LEHandler = &defaultHandlers[0];
1327static const xmlCharEncodingHandler *xmlUTF16BEHandler = &defaultHandlers[1];
1328static const xmlCharEncodingHandler *xmlLatin1Handler = &defaultHandlers[3];
1329static const xmlCharEncodingHandler *xmlAsciiHandler = &defaultHandlers[4];
1330
1331/* the size should be growable, but it's not a big deal ... */
1332#define MAX_ENCODING_HANDLERS 50
1333static xmlCharEncodingHandlerPtr *handlers = NULL;
1334static int nbCharEncodingHandler = 0;
1335
1336/**
1337 * xmlNewCharEncodingHandler:
1338 * @name: the encoding name, in UTF-8 format (ASCII actually)
1339 * @input: the xmlCharEncodingInputFunc to read that encoding
1340 * @output: the xmlCharEncodingOutputFunc to write that encoding
1341 *
1342 * Create and registers an xmlCharEncodingHandler.
1343 *
1344 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1345 */
1346xmlCharEncodingHandlerPtr
1347xmlNewCharEncodingHandler(const char *name,
1348 xmlCharEncodingInputFunc input,
1349 xmlCharEncodingOutputFunc output) {
1350 xmlCharEncodingHandlerPtr handler;
1351 const char *alias;
1352 char upper[500];
1353 int i;
1354 char *up = NULL;
1355
1356 /*
1357 * Do the alias resolution
1358 */
1359 alias = xmlGetEncodingAlias(name);
1360 if (alias != NULL)
1361 name = alias;
1362
1363 /*
1364 * Keep only the uppercase version of the encoding.
1365 */
1366 if (name == NULL)
1367 return(NULL);
1368 for (i = 0;i < 499;i++) {
1369 upper[i] = (char) toupper((unsigned char) name[i]);
1370 if (upper[i] == 0) break;
1371 }
1372 upper[i] = 0;
1373 up = xmlMemStrdup(upper);
1374 if (up == NULL)
1375 return(NULL);
1376
1377 /*
1378 * allocate and fill-up an handler block.
1379 */
1380 handler = (xmlCharEncodingHandlerPtr)
1381 xmlMalloc(sizeof(xmlCharEncodingHandler));
1382 if (handler == NULL) {
1383 xmlFree(up);
1384 return(NULL);
1385 }
1386 memset(handler, 0, sizeof(xmlCharEncodingHandler));
1387 handler->input = input;
1388 handler->output = output;
1389 handler->name = up;
1390
1391#ifdef LIBXML_ICONV_ENABLED
1392 handler->iconv_in = NULL;
1393 handler->iconv_out = NULL;
1394#endif
1395#ifdef LIBXML_ICU_ENABLED
1396 handler->uconv_in = NULL;
1397 handler->uconv_out = NULL;
1398#endif
1399
1400 /*
1401 * registers and returns the handler.
1402 */
1403 xmlRegisterCharEncodingHandler(handler);
1404 return(handler);
1405}
1406
1407/**
1408 * xmlInitCharEncodingHandlers:
1409 *
1410 * DEPRECATED: Alias for xmlInitParser.
1411 */
1412void
1413xmlInitCharEncodingHandlers(void) {
1414 xmlInitParser();
1415}
1416
1417/**
1418 * xmlInitEncodingInternal:
1419 *
1420 * Initialize the char encoding support.
1421 */
1422void
1423xmlInitEncodingInternal(void) {
1424 unsigned short int tst = 0x1234;
1425 unsigned char *ptr = (unsigned char *) &tst;
1426
1427 if (*ptr == 0x12) xmlLittleEndian = 0;
1428 else xmlLittleEndian = 1;
1429}
1430
1431/**
1432 * xmlCleanupCharEncodingHandlers:
1433 *
1434 * DEPRECATED: This function will be made private. Call xmlCleanupParser
1435 * to free global state but see the warnings there. xmlCleanupParser
1436 * should be only called once at program exit. In most cases, you don't
1437 * have call cleanup functions at all.
1438 *
1439 * Cleanup the memory allocated for the char encoding support, it
1440 * unregisters all the encoding handlers and the aliases.
1441 */
1442void
1443xmlCleanupCharEncodingHandlers(void) {
1444 xmlCleanupEncodingAliases();
1445
1446 if (handlers == NULL) return;
1447
1448 for (;nbCharEncodingHandler > 0;) {
1449 nbCharEncodingHandler--;
1450 if (handlers[nbCharEncodingHandler] != NULL) {
1451 if (handlers[nbCharEncodingHandler]->name != NULL)
1452 xmlFree(handlers[nbCharEncodingHandler]->name);
1453 xmlFree(handlers[nbCharEncodingHandler]);
1454 }
1455 }
1456 xmlFree(handlers);
1457 handlers = NULL;
1458 nbCharEncodingHandler = 0;
1459}
1460
1461/**
1462 * xmlRegisterCharEncodingHandler:
1463 * @handler: the xmlCharEncodingHandlerPtr handler block
1464 *
1465 * Register the char encoding handler, surprising, isn't it ?
1466 */
1467void
1468xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1469 if (handler == NULL)
1470 return;
1471 if (handlers == NULL) {
1472 handlers = xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(handlers[0]));
1473 if (handlers == NULL)
1474 goto free_handler;
1475 }
1476
1477 if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS)
1478 goto free_handler;
1479 handlers[nbCharEncodingHandler++] = handler;
1480 return;
1481
1482free_handler:
1483 if (handler != NULL) {
1484 if (handler->name != NULL) {
1485 xmlFree(handler->name);
1486 }
1487 xmlFree(handler);
1488 }
1489}
1490
1491#ifdef LIBXML_ICONV_ENABLED
1492static int
1493xmlCreateIconvHandler(const char *name, xmlCharEncodingHandler **out) {
1494 xmlCharEncodingHandlerPtr enc = NULL;
1495 iconv_t icv_in = (iconv_t) -1;
1496 iconv_t icv_out = (iconv_t) -1;
1497 int ret;
1498
1499 *out = NULL;
1500
1501 icv_in = iconv_open("UTF-8", name);
1502 if (icv_in == (iconv_t) -1) {
1503 if (errno == EINVAL)
1504 ret = XML_ERR_UNSUPPORTED_ENCODING;
1505 else if (errno == ENOMEM)
1506 ret = XML_ERR_NO_MEMORY;
1507 else
1508 ret = XML_ERR_SYSTEM;
1509 goto error;
1510 }
1511
1512 icv_out = iconv_open(name, "UTF-8");
1513 if (icv_out == (iconv_t) -1) {
1514 if (errno == EINVAL)
1515 ret = XML_ERR_UNSUPPORTED_ENCODING;
1516 else if (errno == ENOMEM)
1517 ret = XML_ERR_NO_MEMORY;
1518 else
1519 ret = XML_ERR_SYSTEM;
1520 goto error;
1521 }
1522
1523 enc = xmlMalloc(sizeof(*enc));
1524 if (enc == NULL) {
1525 ret = XML_ERR_NO_MEMORY;
1526 goto error;
1527 }
1528 memset(enc, 0, sizeof(*enc));
1529
1530 enc->name = xmlMemStrdup(name);
1531 if (enc->name == NULL) {
1532 ret = XML_ERR_NO_MEMORY;
1533 goto error;
1534 }
1535 enc->iconv_in = icv_in;
1536 enc->iconv_out = icv_out;
1537
1538 *out = enc;
1539 return(0);
1540
1541error:
1542 if (enc != NULL)
1543 xmlFree(enc);
1544 if (icv_in != (iconv_t) -1)
1545 iconv_close(icv_in);
1546 if (icv_out != (iconv_t) -1)
1547 iconv_close(icv_out);
1548 return(ret);
1549}
1550#endif /* LIBXML_ICONV_ENABLED */
1551
1552#ifdef LIBXML_ICU_ENABLED
1553static int
1554openIcuConverter(const char* name, int toUnicode, uconv_t **out)
1555{
1556 UErrorCode status;
1557 uconv_t *conv;
1558
1559 *out = NULL;
1560
1561 conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
1562 if (conv == NULL)
1563 return(XML_ERR_NO_MEMORY);
1564
1565 conv->pivot_source = conv->pivot_buf;
1566 conv->pivot_target = conv->pivot_buf;
1567
1568 status = U_ZERO_ERROR;
1569 conv->uconv = ucnv_open(name, &status);
1570 if (U_FAILURE(status))
1571 goto error;
1572
1573 status = U_ZERO_ERROR;
1574 if (toUnicode) {
1575 ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
1576 NULL, NULL, NULL, &status);
1577 }
1578 else {
1579 ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
1580 NULL, NULL, NULL, &status);
1581 }
1582 if (U_FAILURE(status))
1583 goto error;
1584
1585 status = U_ZERO_ERROR;
1586 conv->utf8 = ucnv_open("UTF-8", &status);
1587 if (U_FAILURE(status))
1588 goto error;
1589
1590 *out = conv;
1591 return(0);
1592
1593error:
1594 if (conv->uconv)
1595 ucnv_close(conv->uconv);
1596 xmlFree(conv);
1597
1598 if (status == U_FILE_ACCESS_ERROR)
1599 return(XML_ERR_UNSUPPORTED_ENCODING);
1600 if (status == U_MEMORY_ALLOCATION_ERROR)
1601 return(XML_ERR_NO_MEMORY);
1602 return(XML_ERR_SYSTEM);
1603}
1604
1605static void
1606closeIcuConverter(uconv_t *conv)
1607{
1608 if (conv == NULL)
1609 return;
1610 ucnv_close(conv->uconv);
1611 ucnv_close(conv->utf8);
1612 xmlFree(conv);
1613}
1614
1615static int
1616xmlCreateUconvHandler(const char *name, xmlCharEncodingHandler **out) {
1617 xmlCharEncodingHandlerPtr enc = NULL;
1618 uconv_t *ucv_in = NULL;
1619 uconv_t *ucv_out = NULL;
1620 int ret;
1621
1622 ret = openIcuConverter(name, 1, &ucv_in);
1623 if (ret != 0)
1624 goto error;
1625 ret = openIcuConverter(name, 0, &ucv_out);
1626 if (ret != 0)
1627 goto error;
1628
1629 enc = (xmlCharEncodingHandlerPtr)
1630 xmlMalloc(sizeof(xmlCharEncodingHandler));
1631 if (enc == NULL) {
1632 ret = XML_ERR_NO_MEMORY;
1633 goto error;
1634 }
1635 memset(enc, 0, sizeof(xmlCharEncodingHandler));
1636
1637 enc->name = xmlMemStrdup(name);
1638 if (enc->name == NULL) {
1639 ret = XML_ERR_NO_MEMORY;
1640 goto error;
1641 }
1642 enc->input = NULL;
1643 enc->output = NULL;
1644 enc->uconv_in = ucv_in;
1645 enc->uconv_out = ucv_out;
1646
1647 *out = enc;
1648 return(0);
1649
1650error:
1651 if (enc != NULL)
1652 xmlFree(enc);
1653 if (ucv_in != NULL)
1654 closeIcuConverter(ucv_in);
1655 if (ucv_out != NULL)
1656 closeIcuConverter(ucv_out);
1657 return(ret);
1658}
1659#endif /* LIBXML_ICU_ENABLED */
1660
1661/**
1662 * xmlFindExtraHandler:
1663 * @name: a string describing the char encoding.
1664 * @output: boolean, use handler for output
1665 * @out: pointer to resulting handler
1666 *
1667 * Search the non-default handlers for an exact match.
1668 *
1669 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1670 * allocation failed.
1671 */
1672static int
1673xmlFindExtraHandler(const char *name, int output,
1674 xmlCharEncodingHandler **out) {
1675 int ret;
1676 int i;
1677
1678 (void) ret;
1679
1680 if (handlers != NULL) {
1681 for (i = 0; i < nbCharEncodingHandler; i++) {
1682 xmlCharEncodingHandler *handler = handlers[i];
1683
1684 if (!xmlStrcasecmp((const xmlChar *) name,
1685 (const xmlChar *) handler->name)) {
1686 if (output) {
1687 if (handler->output != NULL) {
1688 *out = handler;
1689 return(0);
1690 }
1691 } else {
1692 if (handler->input != NULL) {
1693 *out = handler;
1694 return(0);
1695 }
1696 }
1697 }
1698 }
1699 }
1700
1701#ifdef LIBXML_ICONV_ENABLED
1702 ret = xmlCreateIconvHandler(name, out);
1703 if (*out != NULL)
1704 return(0);
1705 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1706 return(ret);
1707#endif /* LIBXML_ICONV_ENABLED */
1708
1709#ifdef LIBXML_ICU_ENABLED
1710 ret = xmlCreateUconvHandler(name, out);
1711 if (*out != NULL)
1712 return(0);
1713 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1714 return(ret);
1715#endif /* LIBXML_ICU_ENABLED */
1716
1717 return(XML_ERR_UNSUPPORTED_ENCODING);
1718}
1719
1720/**
1721 * xmlFindHandler:
1722 * @name: a string describing the char encoding.
1723 * @output: boolean, use handler for output
1724 * @out: pointer to resulting handler
1725 *
1726 * Search all handlers for an exact match.
1727 *
1728 * Returns 0 on success, 1 if no handler was found, -1 if a memory
1729 * allocation failed.
1730 */
1731static int
1732xmlFindHandler(const char *name, int output, xmlCharEncodingHandler **out) {
1733 int i;
1734
1735 /*
1736 * Check for default handlers
1737 */
1738 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
1739 xmlCharEncodingHandler *handler;
1740
1741 handler = (xmlCharEncodingHandler *) &defaultHandlers[i];
1742
1743 if (xmlStrcasecmp((const xmlChar *) name,
1744 (const xmlChar *) handler->name) == 0) {
1745 if (output) {
1746 if (handler->output != NULL) {
1747 *out = handler;
1748 return(0);
1749 }
1750 } else {
1751 if (handler->input != NULL) {
1752 *out = handler;
1753 return(0);
1754 }
1755 }
1756 }
1757 }
1758
1759 /*
1760 * Check for other handlers
1761 */
1762 return(xmlFindExtraHandler(name, output, out));
1763}
1764
1765/**
1766 * xmlLookupCharEncodingHandler:
1767 * @enc: an xmlCharEncoding value.
1768 * @out: pointer to result
1769 *
1770 * Find or create a handler matching the encoding. If no default or
1771 * registered handler could be found, try to create a handler using
1772 * iconv or ICU if supported.
1773 *
1774 * The handler must be closed with xmlCharEncCloseFunc.
1775 *
1776 * Available since 2.13.0.
1777 *
1778 * Returns an xmlParserErrors error code.
1779 */
1780int
1781xmlLookupCharEncodingHandler(xmlCharEncoding enc,
1782 xmlCharEncodingHandler **out) {
1783 const char *name = NULL;
1784 static const char *const ebcdicNames[] = {
1785 "EBCDIC", "ebcdic", "EBCDIC-US", "IBM-037"
1786 };
1787 static const char *const ucs4Names[] = {
1788 "ISO-10646-UCS-4", "UCS-4", "UCS4"
1789 };
1790 static const char *const ucs2Names[] = {
1791 "ISO-10646-UCS-2", "UCS-2", "UCS2"
1792 };
1793 static const char *const shiftJisNames[] = {
1794 "SHIFT-JIS", "SHIFT_JIS", "Shift_JIS",
1795 };
1796 const char *const *names = NULL;
1797 int numNames = 0;
1798 int ret;
1799 int i;
1800
1801 if (out == NULL)
1802 return(XML_ERR_ARGUMENT);
1803 *out = NULL;
1804
1805 switch (enc) {
1806 case XML_CHAR_ENCODING_ERROR:
1807 return(XML_ERR_UNSUPPORTED_ENCODING);
1808 case XML_CHAR_ENCODING_NONE:
1809 return(0);
1810 case XML_CHAR_ENCODING_UTF8:
1811 return(0);
1812 case XML_CHAR_ENCODING_UTF16LE:
1813 *out = (xmlCharEncodingHandler *) xmlUTF16LEHandler;
1814 return(0);
1815 case XML_CHAR_ENCODING_UTF16BE:
1816 *out = (xmlCharEncodingHandler *) xmlUTF16BEHandler;
1817 return(0);
1818 case XML_CHAR_ENCODING_EBCDIC:
1819 names = ebcdicNames;
1820 numNames = sizeof(ebcdicNames) / sizeof(ebcdicNames[0]);
1821 break;
1822 case XML_CHAR_ENCODING_UCS4BE:
1823 case XML_CHAR_ENCODING_UCS4LE:
1824 names = ucs4Names;
1825 numNames = sizeof(ucs4Names) / sizeof(ucs4Names[0]);
1826 break;
1827 case XML_CHAR_ENCODING_UCS4_2143:
1828 break;
1829 case XML_CHAR_ENCODING_UCS4_3412:
1830 break;
1831 case XML_CHAR_ENCODING_UCS2:
1832 names = ucs2Names;
1833 numNames = sizeof(ucs2Names) / sizeof(ucs2Names[0]);
1834 break;
1835
1836 case XML_CHAR_ENCODING_ASCII:
1837 *out = (xmlCharEncodingHandler *) xmlAsciiHandler;
1838 return(0);
1839 case XML_CHAR_ENCODING_8859_1:
1840 *out = (xmlCharEncodingHandler *) xmlLatin1Handler;
1841 return(0);
1842 case XML_CHAR_ENCODING_8859_2:
1843 name = "ISO-8859-2";
1844 break;
1845 case XML_CHAR_ENCODING_8859_3:
1846 name = "ISO-8859-3";
1847 break;
1848 case XML_CHAR_ENCODING_8859_4:
1849 name = "ISO-8859-4";
1850 break;
1851 case XML_CHAR_ENCODING_8859_5:
1852 name = "ISO-8859-5";
1853 break;
1854 case XML_CHAR_ENCODING_8859_6:
1855 name = "ISO-8859-6";
1856 break;
1857 case XML_CHAR_ENCODING_8859_7:
1858 name = "ISO-8859-7";
1859 break;
1860 case XML_CHAR_ENCODING_8859_8:
1861 name = "ISO-8859-8";
1862 break;
1863 case XML_CHAR_ENCODING_8859_9:
1864 name = "ISO-8859-9";
1865 break;
1866
1867 case XML_CHAR_ENCODING_2022_JP:
1868 name = "ISO-2022-JP";
1869 break;
1870 case XML_CHAR_ENCODING_SHIFT_JIS:
1871 names = shiftJisNames;
1872 numNames = sizeof(shiftJisNames) / sizeof(shiftJisNames[0]);
1873 break;
1874 case XML_CHAR_ENCODING_EUC_JP:
1875 name = "EUC-JP";
1876 break;
1877 default:
1878 break;
1879 }
1880
1881 if (name != NULL)
1882 return(xmlFindExtraHandler(name, 0, out));
1883
1884 if (names != NULL) {
1885 for (i = 0; i < numNames; i++) {
1886 ret = xmlFindExtraHandler(names[i], 0, out);
1887 if (*out != NULL)
1888 return(0);
1889 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1890 return(ret);
1891 }
1892 }
1893
1894 return(XML_ERR_UNSUPPORTED_ENCODING);
1895}
1896
1897/**
1898 * xmlGetCharEncodingHandler:
1899 * @enc: an xmlCharEncoding value.
1900 *
1901 * DEPRECATED: Use xmlLookupCharEncodingHandler which has better error
1902 * reporting.
1903 *
1904 * Returns the handler or NULL if no handler was found or an error
1905 * occurred.
1906 */
1907xmlCharEncodingHandlerPtr
1908xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1909 xmlCharEncodingHandler *ret;
1910
1911 xmlLookupCharEncodingHandler(enc, &ret);
1912 return(ret);
1913}
1914
1915/**
1916 * xmlOpenCharEncodingHandler:
1917 * @name: a string describing the char encoding.
1918 * @output: boolean, use handler for output
1919 * @out: pointer to result
1920 *
1921 * Find or create a handler matching the encoding. If no default or
1922 * registered handler could be found, try to create a handler using
1923 * iconv or ICU if supported.
1924 *
1925 * The handler must be closed with xmlCharEncCloseFunc.
1926 *
1927 * If the encoding is UTF-8, a NULL handler and no error code will
1928 * be returned.
1929 *
1930 * Available since 2.13.0.
1931 *
1932 * Returns an xmlParserErrors error code.
1933 */
1934int
1935xmlOpenCharEncodingHandler(const char *name, int output,
1936 xmlCharEncodingHandler **out) {
1937 const char *nalias;
1938 const char *norig;
1939 xmlCharEncoding enc;
1940 int ret;
1941
1942 if (out == NULL)
1943 return(XML_ERR_ARGUMENT);
1944 *out = NULL;
1945
1946 if (name == NULL)
1947 return(XML_ERR_ARGUMENT);
1948
1949 if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) ||
1950 (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF8") == 0))
1951 return(XML_ERR_OK);
1952
1953 /*
1954 * Do the alias resolution
1955 */
1956 norig = name;
1957 nalias = xmlGetEncodingAlias(name);
1958 if (nalias != NULL)
1959 name = nalias;
1960
1961 ret = xmlFindHandler(name, output, out);
1962 if (*out != NULL)
1963 return(0);
1964 if (ret != XML_ERR_UNSUPPORTED_ENCODING)
1965 return(ret);
1966
1967 /*
1968 * Fallback using the canonical names
1969 *
1970 * TODO: We should make sure that the name of the returned
1971 * handler equals norig.
1972 */
1973 enc = xmlParseCharEncoding(norig);
1974 return(xmlLookupCharEncodingHandler(enc, out));
1975}
1976
1977/**
1978 * xmlFindCharEncodingHandler:
1979 * @name: a string describing the char encoding.
1980 *
1981 * DEPRECATED: Use xmlOpenCharEncodingHandler which has better error
1982 * reporting.
1983 *
1984 * Returns the handler or NULL if no handler was found or an error
1985 * occurred.
1986 */
1987xmlCharEncodingHandlerPtr
1988xmlFindCharEncodingHandler(const char *name) {
1989 xmlCharEncodingHandler *ret;
1990
1991 /*
1992 * This handler shouldn't be used, but we must return a non-NULL
1993 * handler.
1994 */
1995 if ((xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-8") == 0) ||
1996 (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF8") == 0))
1997 return((xmlCharEncodingHandlerPtr) &xmlUTF8Handler);
1998
1999 xmlOpenCharEncodingHandler(name, 0, &ret);
2000 return(ret);
2001}
2002
2003/************************************************************************
2004 * *
2005 * ICONV based generic conversion functions *
2006 * *
2007 ************************************************************************/
2008
2009#ifdef LIBXML_ICONV_ENABLED
2010/**
2011 * xmlIconvWrapper:
2012 * @cd: iconv converter data structure
2013 * @out: a pointer to an array of bytes to store the result
2014 * @outlen: the length of @out
2015 * @in: a pointer to an array of input bytes
2016 * @inlen: the length of @in
2017 *
2018 * Returns an XML_ENC_ERR code.
2019 *
2020 * The value of @inlen after return is the number of octets consumed
2021 * as the return value is positive, else unpredictable.
2022 * The value of @outlen after return is the number of octets produced.
2023 */
2024static int
2025xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
2026 const unsigned char *in, int *inlen) {
2027 size_t icv_inlen, icv_outlen;
2028 const char *icv_in = (const char *) in;
2029 char *icv_out = (char *) out;
2030 size_t ret;
2031
2032 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2033 if (outlen != NULL) *outlen = 0;
2034 return(XML_ENC_ERR_INTERNAL);
2035 }
2036 icv_inlen = *inlen;
2037 icv_outlen = *outlen;
2038 /*
2039 * Some versions take const, other versions take non-const input.
2040 */
2041 ret = iconv(cd, (void *) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
2042 *inlen -= icv_inlen;
2043 *outlen -= icv_outlen;
2044 if (ret == (size_t) -1) {
2045 if (errno == EILSEQ)
2046 return(XML_ENC_ERR_INPUT);
2047 if (errno == E2BIG)
2048 return(XML_ENC_ERR_SPACE);
2049 if (errno == EINVAL)
2050 return(XML_ENC_ERR_PARTIAL);
2051 return(XML_ENC_ERR_INTERNAL);
2052 }
2053 return(XML_ENC_ERR_SUCCESS);
2054}
2055#endif /* LIBXML_ICONV_ENABLED */
2056
2057/************************************************************************
2058 * *
2059 * ICU based generic conversion functions *
2060 * *
2061 ************************************************************************/
2062
2063#ifdef LIBXML_ICU_ENABLED
2064/**
2065 * xmlUconvWrapper:
2066 * @cd: ICU uconverter data structure
2067 * @toUnicode : non-zero if toUnicode. 0 otherwise.
2068 * @out: a pointer to an array of bytes to store the result
2069 * @outlen: the length of @out
2070 * @in: a pointer to an array of input bytes
2071 * @inlen: the length of @in
2072 *
2073 * Returns an XML_ENC_ERR code.
2074 *
2075 * The value of @inlen after return is the number of octets consumed
2076 * as the return value is positive, else unpredictable.
2077 * The value of @outlen after return is the number of octets produced.
2078 */
2079static int
2080xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
2081 const unsigned char *in, int *inlen) {
2082 const char *ucv_in = (const char *) in;
2083 char *ucv_out = (char *) out;
2084 UErrorCode err = U_ZERO_ERROR;
2085
2086 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
2087 if (outlen != NULL) *outlen = 0;
2088 return(XML_ENC_ERR_INTERNAL);
2089 }
2090
2091 /*
2092 * Note that the ICU API is stateful. It can always consume a certain
2093 * amount of input even if the output buffer would overflow. The
2094 * remaining input must be processed by calling ucnv_convertEx with a
2095 * possibly empty input buffer.
2096 *
2097 * ucnv_convertEx is always called with reset and flush set to 0,
2098 * so we don't mess up the state. This should never generate
2099 * U_TRUNCATED_CHAR_FOUND errors.
2100 *
2101 * This also means that ICU xmlCharEncodingHandlers should never be
2102 * reused. It would be a lot nicer if there was a way to emulate the
2103 * stateless iconv API.
2104 */
2105 if (toUnicode) {
2106 /* encoding => UTF-16 => UTF-8 */
2107 ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
2108 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2109 &cd->pivot_source, &cd->pivot_target,
2110 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2111 } else {
2112 /* UTF-8 => UTF-16 => encoding */
2113 ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
2114 &ucv_in, ucv_in + *inlen, cd->pivot_buf,
2115 &cd->pivot_source, &cd->pivot_target,
2116 cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
2117 }
2118 *inlen = ucv_in - (const char*) in;
2119 *outlen = ucv_out - (char *) out;
2120 if (U_SUCCESS(err)) {
2121 return(XML_ENC_ERR_SUCCESS);
2122 }
2123 if (err == U_BUFFER_OVERFLOW_ERROR)
2124 return(XML_ENC_ERR_SPACE);
2125 if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
2126 return(XML_ENC_ERR_INPUT);
2127 return(XML_ENC_ERR_PARTIAL);
2128}
2129#endif /* LIBXML_ICU_ENABLED */
2130
2131/************************************************************************
2132 * *
2133 * The real API used by libxml for on-the-fly conversion *
2134 * *
2135 ************************************************************************/
2136
2137/**
2138 * xmlEncConvertError:
2139 * @code: XML_ENC_ERR code
2140 *
2141 * Convert XML_ENC_ERR to libxml2 error codes.
2142 */
2143static int
2144xmlEncConvertError(int code) {
2145 int ret;
2146
2147 switch (code) {
2148 case XML_ENC_ERR_SUCCESS:
2149 ret = XML_ERR_OK;
2150 break;
2151 case XML_ENC_ERR_INPUT:
2152 ret = XML_ERR_INVALID_ENCODING;
2153 break;
2154 case XML_ENC_ERR_MEMORY:
2155 ret = XML_ERR_NO_MEMORY;
2156 break;
2157 default:
2158 ret = XML_ERR_INTERNAL_ERROR;
2159 break;
2160 }
2161
2162 return(ret);
2163}
2164
2165/**
2166 * xmlEncInputChunk:
2167 * @handler: encoding handler
2168 * @out: a pointer to an array of bytes to store the result
2169 * @outlen: the length of @out
2170 * @in: a pointer to an array of input bytes
2171 * @inlen: the length of @in
2172 *
2173 * The value of @inlen after return is the number of octets consumed
2174 * as the return value is 0, else unpredictable.
2175 * The value of @outlen after return is the number of octets produced.
2176 *
2177 * Returns an XML_ENC_ERR code.
2178 */
2179int
2180xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2181 int *outlen, const unsigned char *in, int *inlen) {
2182 int ret;
2183
2184 if (handler->input != NULL) {
2185 int oldinlen = *inlen;
2186
2187 ret = handler->input(out, outlen, in, inlen);
2188 if (ret >= 0) {
2189 /*
2190 * The built-in converters don't signal XML_ENC_ERR_SPACE.
2191 */
2192 if (*inlen < oldinlen) {
2193 if (*outlen > 0)
2194 ret = XML_ENC_ERR_SPACE;
2195 else
2196 ret = XML_ENC_ERR_PARTIAL;
2197 } else {
2198 ret = XML_ENC_ERR_SUCCESS;
2199 }
2200 }
2201 }
2202#ifdef LIBXML_ICONV_ENABLED
2203 else if (handler->iconv_in != NULL) {
2204 ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
2205 }
2206#endif /* LIBXML_ICONV_ENABLED */
2207#ifdef LIBXML_ICU_ENABLED
2208 else if (handler->uconv_in != NULL) {
2209 ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen);
2210 }
2211#endif /* LIBXML_ICU_ENABLED */
2212 else {
2213 *outlen = 0;
2214 *inlen = 0;
2215 ret = XML_ENC_ERR_INTERNAL;
2216 }
2217
2218 /* Ignore partial errors when reading. */
2219 if (ret == XML_ENC_ERR_PARTIAL)
2220 ret = XML_ENC_ERR_SUCCESS;
2221
2222 return(ret);
2223}
2224
2225/**
2226 * xmlEncOutputChunk:
2227 * @handler: encoding handler
2228 * @out: a pointer to an array of bytes to store the result
2229 * @outlen: the length of @out
2230 * @in: a pointer to an array of input bytes
2231 * @inlen: the length of @in
2232 *
2233 * Returns an XML_ENC_ERR code.
2234 *
2235 * The value of @inlen after return is the number of octets consumed
2236 * as the return value is 0, else unpredictable.
2237 * The value of @outlen after return is the number of octets produced.
2238 */
2239static int
2240xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2241 int *outlen, const unsigned char *in, int *inlen) {
2242 int ret;
2243
2244 if (handler->output != NULL) {
2245 int oldinlen = *inlen;
2246
2247 ret = handler->output(out, outlen, in, inlen);
2248 if (ret >= 0) {
2249 /*
2250 * The built-in converters don't signal XML_ENC_ERR_SPACE.
2251 */
2252 if (*inlen < oldinlen) {
2253 if (*outlen > 0)
2254 ret = XML_ENC_ERR_SPACE;
2255 else
2256 ret = XML_ENC_ERR_PARTIAL;
2257 } else {
2258 ret = XML_ENC_ERR_SUCCESS;
2259 }
2260 }
2261 }
2262#ifdef LIBXML_ICONV_ENABLED
2263 else if (handler->iconv_out != NULL) {
2264 ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2265 }
2266#endif /* LIBXML_ICONV_ENABLED */
2267#ifdef LIBXML_ICU_ENABLED
2268 else if (handler->uconv_out != NULL) {
2269 ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen);
2270 }
2271#endif /* LIBXML_ICU_ENABLED */
2272 else {
2273 *outlen = 0;
2274 *inlen = 0;
2275 ret = XML_ENC_ERR_INTERNAL;
2276 }
2277
2278 /* We shouldn't generate partial sequences when writing. */
2279 if (ret == XML_ENC_ERR_PARTIAL)
2280 ret = XML_ENC_ERR_INTERNAL;
2281
2282 return(ret);
2283}
2284
2285/**
2286 * xmlCharEncFirstLine:
2287 * @handler: char encoding transformation data structure
2288 * @out: an xmlBuffer for the output.
2289 * @in: an xmlBuffer for the input
2290 *
2291 * DEPERECATED: Don't use.
2292 *
2293 * Returns the number of bytes written or an XML_ENC_ERR code.
2294 */
2295int
2296xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2297 xmlBufferPtr in) {
2298 return(xmlCharEncInFunc(handler, out, in));
2299}
2300
2301/**
2302 * xmlCharEncInput:
2303 * @input: a parser input buffer
2304 *
2305 * Generic front-end for the encoding handler on parser input
2306 *
2307 * Returns the number of bytes written or an XML_ENC_ERR code.
2308 */
2309int
2310xmlCharEncInput(xmlParserInputBufferPtr input)
2311{
2312 int ret;
2313 size_t avail;
2314 size_t toconv;
2315 int c_in;
2316 int c_out;
2317 xmlBufPtr in;
2318 xmlBufPtr out;
2319 const xmlChar *inData;
2320 size_t inTotal = 0;
2321
2322 if ((input == NULL) || (input->encoder == NULL) ||
2323 (input->buffer == NULL) || (input->raw == NULL))
2324 return(XML_ENC_ERR_INTERNAL);
2325 out = input->buffer;
2326 in = input->raw;
2327
2328 toconv = xmlBufUse(in);
2329 if (toconv == 0)
2330 return (0);
2331 inData = xmlBufContent(in);
2332 inTotal = 0;
2333
2334 do {
2335 c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv;
2336
2337 avail = xmlBufAvail(out);
2338 if (avail > INT_MAX)
2339 avail = INT_MAX;
2340 if (avail < 4096) {
2341 if (xmlBufGrow(out, 4096) < 0) {
2342 input->error = XML_ERR_NO_MEMORY;
2343 return(XML_ENC_ERR_MEMORY);
2344 }
2345 avail = xmlBufAvail(out);
2346 }
2347
2348 c_in = toconv;
2349 c_out = avail;
2350 ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2351 inData, &c_in);
2352 inTotal += c_in;
2353 inData += c_in;
2354 toconv -= c_in;
2355 xmlBufAddLen(out, c_out);
2356 } while (ret == XML_ENC_ERR_SPACE);
2357
2358 xmlBufShrink(in, inTotal);
2359
2360 if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in)
2361 input->rawconsumed = ULONG_MAX;
2362 else
2363 input->rawconsumed += c_in;
2364
2365 if (((ret != 0) && (c_out == 0)) ||
2366 (ret == XML_ENC_ERR_MEMORY)) {
2367 if (input->error == 0)
2368 input->error = xmlEncConvertError(ret);
2369 return(ret);
2370 }
2371
2372 return (c_out);
2373}
2374
2375/**
2376 * xmlCharEncInFunc:
2377 * @handler: char encoding transformation data structure
2378 * @out: an xmlBuffer for the output.
2379 * @in: an xmlBuffer for the input
2380 *
2381 * Generic front-end for the encoding handler input function
2382 *
2383 * Returns the number of bytes written or an XML_ENC_ERR code.
2384 */
2385int
2386xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2387 xmlBufferPtr in)
2388{
2389 int ret;
2390 int written;
2391 int toconv;
2392
2393 if (handler == NULL)
2394 return(XML_ENC_ERR_INTERNAL);
2395 if (out == NULL)
2396 return(XML_ENC_ERR_INTERNAL);
2397 if (in == NULL)
2398 return(XML_ENC_ERR_INTERNAL);
2399
2400 toconv = in->use;
2401 if (toconv == 0)
2402 return (0);
2403 written = out->size - out->use -1; /* count '\0' */
2404 if (toconv * 2 >= written) {
2405 xmlBufferGrow(out, out->size + toconv * 2);
2406 written = out->size - out->use - 1;
2407 }
2408 ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2409 in->content, &toconv);
2410 xmlBufferShrink(in, toconv);
2411 out->use += written;
2412 out->content[out->use] = 0;
2413
2414 return (written? written : ret);
2415}
2416
2417#ifdef LIBXML_OUTPUT_ENABLED
2418/**
2419 * xmlCharEncOutput:
2420 * @output: a parser output buffer
2421 * @init: is this an initialization call without data
2422 *
2423 * Generic front-end for the encoding handler on parser output
2424 * a first call with @init == 1 has to be made first to initiate the
2425 * output in case of non-stateless encoding needing to initiate their
2426 * state or the output (like the BOM in UTF16).
2427 * In case of UTF8 sequence conversion errors for the given encoder,
2428 * the content will be automatically remapped to a CharRef sequence.
2429 *
2430 * Returns the number of bytes written or an XML_ENC_ERR code.
2431 */
2432int
2433xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2434{
2435 int ret;
2436 size_t written;
2437 int writtentot = 0;
2438 size_t toconv;
2439 int c_in;
2440 int c_out;
2441 xmlBufPtr in;
2442 xmlBufPtr out;
2443
2444 if ((output == NULL) || (output->encoder == NULL) ||
2445 (output->buffer == NULL) || (output->conv == NULL))
2446 return(XML_ENC_ERR_INTERNAL);
2447 out = output->conv;
2448 in = output->buffer;
2449
2450retry:
2451
2452 written = xmlBufAvail(out);
2453
2454 /*
2455 * First specific handling of the initialization call
2456 */
2457 if (init) {
2458 c_in = 0;
2459 c_out = written;
2460 /* TODO: Check return value. */
2461 xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2462 NULL, &c_in);
2463 xmlBufAddLen(out, c_out);
2464 return(c_out);
2465 }
2466
2467 /*
2468 * Conversion itself.
2469 */
2470 toconv = xmlBufUse(in);
2471 if (toconv > 64 * 1024)
2472 toconv = 64 * 1024;
2473 if (toconv * 4 >= written) {
2474 if (xmlBufGrow(out, toconv * 4) < 0) {
2475 ret = XML_ENC_ERR_MEMORY;
2476 goto error;
2477 }
2478 written = xmlBufAvail(out);
2479 }
2480 if (written > 256 * 1024)
2481 written = 256 * 1024;
2482
2483 c_in = toconv;
2484 c_out = written;
2485 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2486 xmlBufContent(in), &c_in);
2487 xmlBufShrink(in, c_in);
2488 xmlBufAddLen(out, c_out);
2489 writtentot += c_out;
2490
2491 if (ret == XML_ENC_ERR_SPACE)
2492 goto retry;
2493
2494 /*
2495 * Attempt to handle error cases
2496 */
2497 if (ret == XML_ENC_ERR_INPUT) {
2498 xmlChar charref[20];
2499 int len = xmlBufUse(in);
2500 xmlChar *content = xmlBufContent(in);
2501 int cur, charrefLen;
2502
2503 cur = xmlGetUTF8Char(content, &len);
2504 if (cur <= 0)
2505 goto error;
2506
2507 /*
2508 * Removes the UTF8 sequence, and replace it by a charref
2509 * and continue the transcoding phase, hoping the error
2510 * did not mangle the encoder state.
2511 */
2512 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2513 "&#%d;", cur);
2514 xmlBufGrow(out, charrefLen * 4);
2515 c_out = xmlBufAvail(out);
2516 c_in = charrefLen;
2517 ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2518 charref, &c_in);
2519 if ((ret < 0) || (c_in != charrefLen)) {
2520 ret = XML_ENC_ERR_INTERNAL;
2521 goto error;
2522 }
2523
2524 xmlBufShrink(in, len);
2525 xmlBufAddLen(out, c_out);
2526 writtentot += c_out;
2527 goto retry;
2528 }
2529
2530error:
2531 if (((writtentot <= 0) && (ret != 0)) ||
2532 (ret == XML_ENC_ERR_MEMORY)) {
2533 if (output->error == 0)
2534 output->error = xmlEncConvertError(ret);
2535 return(ret);
2536 }
2537
2538 return(writtentot);
2539}
2540#endif
2541
2542/**
2543 * xmlCharEncOutFunc:
2544 * @handler: char encoding transformation data structure
2545 * @out: an xmlBuffer for the output.
2546 * @in: an xmlBuffer for the input
2547 *
2548 * Generic front-end for the encoding handler output function
2549 * a first call with @in == NULL has to be made firs to initiate the
2550 * output in case of non-stateless encoding needing to initiate their
2551 * state or the output (like the BOM in UTF16).
2552 * In case of UTF8 sequence conversion errors for the given encoder,
2553 * the content will be automatically remapped to a CharRef sequence.
2554 *
2555 * Returns the number of bytes written or an XML_ENC_ERR code.
2556 */
2557int
2558xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2559 xmlBufferPtr in) {
2560 int ret;
2561 int written;
2562 int writtentot = 0;
2563 int toconv;
2564
2565 if (handler == NULL) return(XML_ENC_ERR_INTERNAL);
2566 if (out == NULL) return(XML_ENC_ERR_INTERNAL);
2567
2568retry:
2569
2570 written = out->size - out->use;
2571
2572 if (written > 0)
2573 written--; /* Gennady: count '/0' */
2574
2575 /*
2576 * First specific handling of in = NULL, i.e. the initialization call
2577 */
2578 if (in == NULL) {
2579 toconv = 0;
2580 /* TODO: Check return value. */
2581 xmlEncOutputChunk(handler, &out->content[out->use], &written,
2582 NULL, &toconv);
2583 out->use += written;
2584 out->content[out->use] = 0;
2585 return(0);
2586 }
2587
2588 /*
2589 * Conversion itself.
2590 */
2591 toconv = in->use;
2592 if (toconv * 4 >= written) {
2593 xmlBufferGrow(out, toconv * 4);
2594 written = out->size - out->use - 1;
2595 }
2596 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2597 in->content, &toconv);
2598 xmlBufferShrink(in, toconv);
2599 out->use += written;
2600 writtentot += written;
2601 out->content[out->use] = 0;
2602
2603 if (ret == XML_ENC_ERR_SPACE)
2604 goto retry;
2605
2606 /*
2607 * Attempt to handle error cases
2608 */
2609 if (ret == XML_ENC_ERR_INPUT) {
2610 xmlChar charref[20];
2611 int len = in->use;
2612 const xmlChar *utf = (const xmlChar *) in->content;
2613 int cur, charrefLen;
2614
2615 cur = xmlGetUTF8Char(utf, &len);
2616 if (cur <= 0)
2617 return(ret);
2618
2619 /*
2620 * Removes the UTF8 sequence, and replace it by a charref
2621 * and continue the transcoding phase, hoping the error
2622 * did not mangle the encoder state.
2623 */
2624 charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2625 "&#%d;", cur);
2626 xmlBufferShrink(in, len);
2627 xmlBufferGrow(out, charrefLen * 4);
2628 written = out->size - out->use - 1;
2629 toconv = charrefLen;
2630 ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2631 charref, &toconv);
2632 if ((ret < 0) || (toconv != charrefLen))
2633 return(XML_ENC_ERR_INTERNAL);
2634
2635 out->use += written;
2636 writtentot += written;
2637 out->content[out->use] = 0;
2638 goto retry;
2639 }
2640 return(writtentot ? writtentot : ret);
2641}
2642
2643/**
2644 * xmlCharEncCloseFunc:
2645 * @handler: char encoding transformation data structure
2646 *
2647 * Generic front-end for encoding handler close function
2648 *
2649 * Returns 0 if success, or -1 in case of error
2650 */
2651int
2652xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2653 int ret = 0;
2654 int tofree = 0;
2655 int i = 0;
2656
2657 if (handler == NULL) return(-1);
2658
2659 for (i = 0; i < (int) NUM_DEFAULT_HANDLERS; i++) {
2660 if (handler == &defaultHandlers[i])
2661 return(0);
2662 }
2663
2664 if (handlers != NULL) {
2665 for (i = 0;i < nbCharEncodingHandler; i++) {
2666 if (handler == handlers[i])
2667 return(0);
2668 }
2669 }
2670#ifdef LIBXML_ICONV_ENABLED
2671 /*
2672 * Iconv handlers can be used only once, free the whole block.
2673 * and the associated icon resources.
2674 */
2675 if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2676 tofree = 1;
2677 if (handler->iconv_out != NULL) {
2678 if (iconv_close(handler->iconv_out))
2679 ret = -1;
2680 handler->iconv_out = NULL;
2681 }
2682 if (handler->iconv_in != NULL) {
2683 if (iconv_close(handler->iconv_in))
2684 ret = -1;
2685 handler->iconv_in = NULL;
2686 }
2687 }
2688#endif /* LIBXML_ICONV_ENABLED */
2689#ifdef LIBXML_ICU_ENABLED
2690 if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2691 tofree = 1;
2692 if (handler->uconv_out != NULL) {
2693 closeIcuConverter(handler->uconv_out);
2694 handler->uconv_out = NULL;
2695 }
2696 if (handler->uconv_in != NULL) {
2697 closeIcuConverter(handler->uconv_in);
2698 handler->uconv_in = NULL;
2699 }
2700 }
2701#endif
2702 if (tofree) {
2703 /* free up only dynamic handlers iconv/uconv */
2704 if (handler->name != NULL)
2705 xmlFree(handler->name);
2706 handler->name = NULL;
2707 xmlFree(handler);
2708 }
2709
2710 return(ret);
2711}
2712
2713/**
2714 * xmlByteConsumed:
2715 * @ctxt: an XML parser context
2716 *
2717 * This function provides the current index of the parser relative
2718 * to the start of the current entity. This function is computed in
2719 * bytes from the beginning starting at zero and finishing at the
2720 * size in byte of the file if parsing a file. The function is
2721 * of constant cost if the input is UTF-8 but can be costly if run
2722 * on non-UTF-8 input.
2723 *
2724 * Returns the index in bytes from the beginning of the entity or -1
2725 * in case the index could not be computed.
2726 */
2727long
2728xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2729 xmlParserInputPtr in;
2730
2731 if (ctxt == NULL) return(-1);
2732 in = ctxt->input;
2733 if (in == NULL) return(-1);
2734 if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2735 unsigned int unused = 0;
2736 xmlCharEncodingHandler * handler = in->buf->encoder;
2737 /*
2738 * Encoding conversion, compute the number of unused original
2739 * bytes from the input not consumed and subtract that from
2740 * the raw consumed value, this is not a cheap operation
2741 */
2742 if (in->end - in->cur > 0) {
2743 unsigned char convbuf[32000];
2744 const unsigned char *cur = (const unsigned char *)in->cur;
2745 int toconv = in->end - in->cur, written = 32000;
2746
2747 int ret;
2748
2749 do {
2750 toconv = in->end - cur;
2751 written = 32000;
2752 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2753 cur, &toconv);
2754 if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE))
2755 return(-1);
2756 unused += written;
2757 cur += toconv;
2758 } while (ret == XML_ENC_ERR_SPACE);
2759 }
2760 if (in->buf->rawconsumed < unused)
2761 return(-1);
2762 return(in->buf->rawconsumed - unused);
2763 }
2764 return(in->consumed + (in->cur - in->base));
2765}
2766
2767#if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2768#ifdef LIBXML_ISO8859X_ENABLED
2769
2770/**
2771 * UTF8ToISO8859x:
2772 * @out: a pointer to an array of bytes to store the result
2773 * @outlen: the length of @out
2774 * @in: a pointer to an array of UTF-8 chars
2775 * @inlen: the length of @in
2776 * @xlattable: the 2-level transcoding table
2777 *
2778 * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2779 * block of chars out.
2780 *
2781 * Returns the number of bytes written or an XML_ENC_ERR code.
2782 *
2783 * The value of @inlen after return is the number of octets consumed
2784 * as the return value is positive, else unpredictable.
2785 * The value of @outlen after return is the number of octets consumed.
2786 */
2787static int
2788UTF8ToISO8859x(unsigned char* out, int *outlen,
2789 const unsigned char* in, int *inlen,
2790 const unsigned char* const xlattable) {
2791 const unsigned char* outstart = out;
2792 const unsigned char* inend;
2793 const unsigned char* instart = in;
2794 const unsigned char* processed = in;
2795
2796 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2797 (xlattable == NULL))
2798 return(XML_ENC_ERR_INTERNAL);
2799 if (in == NULL) {
2800 /*
2801 * initialization nothing to do
2802 */
2803 *outlen = 0;
2804 *inlen = 0;
2805 return(0);
2806 }
2807 inend = in + (*inlen);
2808 while (in < inend) {
2809 unsigned char d = *in++;
2810 if (d < 0x80) {
2811 *out++ = d;
2812 } else if (d < 0xC0) {
2813 /* trailing byte in leading position */
2814 *outlen = out - outstart;
2815 *inlen = processed - instart;
2816 return(XML_ENC_ERR_INPUT);
2817 } else if (d < 0xE0) {
2818 unsigned char c;
2819 if (!(in < inend)) {
2820 /* trailing byte not in input buffer */
2821 *outlen = out - outstart;
2822 *inlen = processed - instart;
2823 return(XML_ENC_ERR_PARTIAL);
2824 }
2825 c = *in++;
2826 if ((c & 0xC0) != 0x80) {
2827 /* not a trailing byte */
2828 *outlen = out - outstart;
2829 *inlen = processed - instart;
2830 return(XML_ENC_ERR_INPUT);
2831 }
2832 c = c & 0x3F;
2833 d = d & 0x1F;
2834 d = xlattable [48 + c + xlattable [d] * 64];
2835 if (d == 0) {
2836 /* not in character set */
2837 *outlen = out - outstart;
2838 *inlen = processed - instart;
2839 return(XML_ENC_ERR_INPUT);
2840 }
2841 *out++ = d;
2842 } else if (d < 0xF0) {
2843 unsigned char c1;
2844 unsigned char c2;
2845 if (!(in < inend - 1)) {
2846 /* trailing bytes not in input buffer */
2847 *outlen = out - outstart;
2848 *inlen = processed - instart;
2849 return(XML_ENC_ERR_PARTIAL);
2850 }
2851 c1 = *in++;
2852 if ((c1 & 0xC0) != 0x80) {
2853 /* not a trailing byte (c1) */
2854 *outlen = out - outstart;
2855 *inlen = processed - instart;
2856 return(XML_ENC_ERR_INPUT);
2857 }
2858 c2 = *in++;
2859 if ((c2 & 0xC0) != 0x80) {
2860 /* not a trailing byte (c2) */
2861 *outlen = out - outstart;
2862 *inlen = processed - instart;
2863 return(XML_ENC_ERR_INPUT);
2864 }
2865 c1 = c1 & 0x3F;
2866 c2 = c2 & 0x3F;
2867 d = d & 0x0F;
2868 d = xlattable [48 + c2 + xlattable [48 + c1 +
2869 xlattable [32 + d] * 64] * 64];
2870 if (d == 0) {
2871 /* not in character set */
2872 *outlen = out - outstart;
2873 *inlen = processed - instart;
2874 return(XML_ENC_ERR_INPUT);
2875 }
2876 *out++ = d;
2877 } else {
2878 /* cannot transcode >= U+010000 */
2879 *outlen = out - outstart;
2880 *inlen = processed - instart;
2881 return(XML_ENC_ERR_INPUT);
2882 }
2883 processed = in;
2884 }
2885 *outlen = out - outstart;
2886 *inlen = processed - instart;
2887 return(*outlen);
2888}
2889
2890/**
2891 * ISO8859xToUTF8
2892 * @out: a pointer to an array of bytes to store the result
2893 * @outlen: the length of @out
2894 * @in: a pointer to an array of ISO Latin 1 chars
2895 * @inlen: the length of @in
2896 *
2897 * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2898 * block of chars out.
2899 *
2900 * Returns the number of bytes written or an XML_ENC_ERR code.
2901 *
2902 * The value of @inlen after return is the number of octets consumed
2903 * The value of @outlen after return is the number of octets produced.
2904 */
2905static int
2906ISO8859xToUTF8(unsigned char* out, int *outlen,
2907 const unsigned char* in, int *inlen,
2908 unsigned short const *unicodetable) {
2909 unsigned char* outstart = out;
2910 unsigned char* outend;
2911 const unsigned char* instart = in;
2912 const unsigned char* inend;
2913 const unsigned char* instop;
2914 unsigned int c;
2915
2916 if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2917 (in == NULL) || (unicodetable == NULL))
2918 return(XML_ENC_ERR_INTERNAL);
2919 outend = out + *outlen;
2920 inend = in + *inlen;
2921 instop = inend;
2922
2923 while ((in < inend) && (out < outend - 2)) {
2924 if (*in >= 0x80) {
2925 c = unicodetable [*in - 0x80];
2926 if (c == 0) {
2927 /* undefined code point */
2928 *outlen = out - outstart;
2929 *inlen = in - instart;
2930 return(XML_ENC_ERR_INPUT);
2931 }
2932 if (c < 0x800) {
2933 *out++ = ((c >> 6) & 0x1F) | 0xC0;
2934 *out++ = (c & 0x3F) | 0x80;
2935 } else {
2936 *out++ = ((c >> 12) & 0x0F) | 0xE0;
2937 *out++ = ((c >> 6) & 0x3F) | 0x80;
2938 *out++ = (c & 0x3F) | 0x80;
2939 }
2940 ++in;
2941 }
2942 if (instop - in > outend - out) instop = in + (outend - out);
2943 while ((*in < 0x80) && (in < instop)) {
2944 *out++ = *in++;
2945 }
2946 }
2947 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2948 *out++ = *in++;
2949 }
2950 if ((in < inend) && (out < outend) && (*in < 0x80)) {
2951 *out++ = *in++;
2952 }
2953 *outlen = out - outstart;
2954 *inlen = in - instart;
2955 return (*outlen);
2956}
2957
2958
2959/************************************************************************
2960 * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding *
2961 ************************************************************************/
2962
2963static const unsigned short xmlunicodetable_ISO8859_2 [128] = {
2964 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2965 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2966 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2967 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2968 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2969 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2970 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2971 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2972 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2973 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2974 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2975 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2976 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2977 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2978 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2979 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2980};
2981
2982static const unsigned char xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2983 "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2984 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2985 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2991 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2992 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2993 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2994 "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2995 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2996 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2998 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2999 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3000 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002 "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3003 "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3004 "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3005 "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3006 "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3007 "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3008 "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3009 "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3010};
3011
3012static const unsigned short xmlunicodetable_ISO8859_3 [128] = {
3013 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3014 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3015 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3016 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3017 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3018 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3019 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3020 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3021 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3022 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3023 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3024 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3025 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3026 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3027 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3028 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3029};
3030
3031static const unsigned char xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3032 "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3033 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3035 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3036 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3037 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3038 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3039 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3040 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3041 "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3042 "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3043 "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3044 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3045 "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3046 "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3048 "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3049 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3051 "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3052 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3053 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3054 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3055 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3056 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3057 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3058 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3059 "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3060 "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3061 "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3062 "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3063};
3064
3065static const unsigned short xmlunicodetable_ISO8859_4 [128] = {
3066 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3067 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3068 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3069 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3070 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3071 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3072 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3073 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3074 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3075 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3076 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3077 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3078 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3079 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3080 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3081 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3082};
3083
3084static const unsigned char xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3085 "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3086 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3087 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3088 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3090 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3091 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3093 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3094 "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3095 "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3096 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3097 "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3098 "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3099 "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3100 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3101 "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3102 "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3103 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3104 "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3105 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3106 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3107 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3108 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3109 "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3110 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3111 "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3112};
3113
3114static const unsigned short xmlunicodetable_ISO8859_5 [128] = {
3115 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3116 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3117 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3118 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3119 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3120 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3121 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3122 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3123 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3124 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3125 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3126 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3127 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3128 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3129 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3130 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3131};
3132
3133static const unsigned char xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3134 "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135 "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3140 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3141 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3142 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3143 "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3144 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3146 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3147 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3148 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3149 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3150 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3151 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3153 "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3154 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3155 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3156 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3157 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3158 "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3159 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3160 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3161};
3162
3163static const unsigned short xmlunicodetable_ISO8859_6 [128] = {
3164 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3165 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3166 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3167 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3168 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3169 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3170 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3171 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3172 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3173 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3174 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3175 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3176 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3177 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3178 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3179 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3180};
3181
3182static const unsigned char xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3183 "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3185 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3191 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3192 "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3193 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3194 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3195 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3196 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3197 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3198 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3199 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3200 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3201 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3202 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3203 "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3204 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3205 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3206};
3207
3208static const unsigned short xmlunicodetable_ISO8859_7 [128] = {
3209 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3210 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3211 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3212 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3213 0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3214 0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3215 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3216 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3217 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3218 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3219 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3220 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3221 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3222 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3223 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3224 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3225};
3226
3227static const unsigned char xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3228 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3229 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3236 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3237 "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3238 "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3239 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3242 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3243 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244 "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3245 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3247 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251 "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3252 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3253 "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3254 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3255 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3256 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259};
3260
3261static const unsigned short xmlunicodetable_ISO8859_8 [128] = {
3262 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3263 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3264 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3265 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3266 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3267 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3268 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3269 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3270 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3271 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3272 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3273 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3274 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3275 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3276 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3277 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3278};
3279
3280static const unsigned char xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3281 "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3283 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3284 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3285 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3286 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3287 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3288 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3289 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3290 "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3291 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3292 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297 "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3298 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299 "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3300 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3301 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3302 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3303 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3305 "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3306 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3310 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3311 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312};
3313
3314static const unsigned short xmlunicodetable_ISO8859_9 [128] = {
3315 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3316 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3317 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3318 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3319 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3320 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3321 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3322 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3323 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3324 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3325 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3326 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3327 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3328 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3329 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3330 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3331};
3332
3333static const unsigned char xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3334 "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3336 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3337 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3338 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3339 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3340 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3342 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3343 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3344 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3345 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3346 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3347 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3348 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3349 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3350 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3351 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3352 "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3353 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3354 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3355 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357};
3358
3359static const unsigned short xmlunicodetable_ISO8859_10 [128] = {
3360 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3361 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3362 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3363 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3364 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3365 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3366 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3367 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3368 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3369 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3370 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3371 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3372 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3373 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3374 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3375 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3376};
3377
3378static const unsigned char xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3379 "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3382 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3387 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3388 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3389 "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3390 "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3391 "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3392 "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3393 "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3394 "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3395 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3397 "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3398 "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3399 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3400 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3401 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3402 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3403 "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3404 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3405 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3406 "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3407 "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3408 "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3409 "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3410};
3411
3412static const unsigned short xmlunicodetable_ISO8859_11 [128] = {
3413 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3414 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3415 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3416 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3417 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3418 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3419 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3420 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3421 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3422 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3423 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3424 0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3425 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3426 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3427 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3428 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3429};
3430
3431static const unsigned char xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3432 "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434 "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3435 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3437 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3440 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3441 "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3442 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3443 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3447 "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3448 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3449 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3450 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3451 "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3452 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3453 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3454 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3455 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3456 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3457 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459};
3460
3461static const unsigned short xmlunicodetable_ISO8859_13 [128] = {
3462 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3463 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3464 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3465 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3466 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3467 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3468 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3469 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3470 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3471 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3472 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3473 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3474 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3475 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3476 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3477 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3478};
3479
3480static const unsigned char xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3481 "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3483 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3484 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3485 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3486 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3488 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3489 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3490 "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3491 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3492 "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3495 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3496 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3497 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3498 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3499 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3500 "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3501 "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3502 "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3503 "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3504 "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3505 "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3506 "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3507 "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3508 "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3509 "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3510 "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3511 "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3512};
3513
3514static const unsigned short xmlunicodetable_ISO8859_14 [128] = {
3515 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3516 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3517 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3518 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3519 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3520 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3521 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3522 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3523 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3524 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3525 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3526 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3527 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3528 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3529 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3530 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3531};
3532
3533static const unsigned char xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3534 "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536 "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3542 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3543 "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3544 "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3545 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3546 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3547 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3548 "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3549 "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3550 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3551 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3552 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3553 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3554 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3555 "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3556 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3557 "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564 "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3565 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3566 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3567 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3568 "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3569 "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570 "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3571 "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3572 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3574 "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3575 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3576 "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3577};
3578
3579static const unsigned short xmlunicodetable_ISO8859_15 [128] = {
3580 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3581 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3582 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3583 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3584 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3585 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3586 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3587 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3588 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3589 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3590 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3591 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3592 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3593 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3594 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3595 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3596};
3597
3598static const unsigned char xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3599 "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3600 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3601 "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3602 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3603 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3604 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3605 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3606 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3607 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3608 "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3609 "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3610 "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3617 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3618 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619 "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621 "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3622 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3623 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3624 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3625 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3626};
3627
3628static const unsigned short xmlunicodetable_ISO8859_16 [128] = {
3629 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3630 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3631 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3632 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3633 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3634 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3635 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3636 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3637 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3638 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3639 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3640 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3641 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3642 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3643 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3644 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3645};
3646
3647static const unsigned char xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3648 "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3649 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3650 "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3651 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3652 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3653 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3654 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3655 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3656 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3657 "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3658 "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3659 "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3660 "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3661 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3662 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3663 "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3664 "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3665 "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3666 "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3667 "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3668 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3669 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3670 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3671 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3674 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3677 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680 "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3681 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682 "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683 "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3684 "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3685 "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3686 "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3687};
3688
3689
3690/*
3691 * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3692 */
3693
3694static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3695 const unsigned char* in, int *inlen) {
3696 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3697}
3698static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3699 const unsigned char* in, int *inlen) {
3700 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3701}
3702
3703static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3704 const unsigned char* in, int *inlen) {
3705 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3706}
3707static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3708 const unsigned char* in, int *inlen) {
3709 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3710}
3711
3712static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3713 const unsigned char* in, int *inlen) {
3714 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3715}
3716static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3717 const unsigned char* in, int *inlen) {
3718 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3719}
3720
3721static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3722 const unsigned char* in, int *inlen) {
3723 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3724}
3725static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3726 const unsigned char* in, int *inlen) {
3727 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3728}
3729
3730static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3731 const unsigned char* in, int *inlen) {
3732 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3733}
3734static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3735 const unsigned char* in, int *inlen) {
3736 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3737}
3738
3739static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3740 const unsigned char* in, int *inlen) {
3741 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3742}
3743static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3744 const unsigned char* in, int *inlen) {
3745 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3746}
3747
3748static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3749 const unsigned char* in, int *inlen) {
3750 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3751}
3752static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3753 const unsigned char* in, int *inlen) {
3754 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3755}
3756
3757static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3758 const unsigned char* in, int *inlen) {
3759 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3760}
3761static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3762 const unsigned char* in, int *inlen) {
3763 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3764}
3765
3766static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3767 const unsigned char* in, int *inlen) {
3768 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3769}
3770static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3771 const unsigned char* in, int *inlen) {
3772 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3773}
3774
3775static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3776 const unsigned char* in, int *inlen) {
3777 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3778}
3779static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3780 const unsigned char* in, int *inlen) {
3781 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3782}
3783
3784static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3785 const unsigned char* in, int *inlen) {
3786 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3787}
3788static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3789 const unsigned char* in, int *inlen) {
3790 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3791}
3792
3793static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3794 const unsigned char* in, int *inlen) {
3795 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3796}
3797static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3798 const unsigned char* in, int *inlen) {
3799 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3800}
3801
3802static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3803 const unsigned char* in, int *inlen) {
3804 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3805}
3806static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3807 const unsigned char* in, int *inlen) {
3808 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3809}
3810
3811static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3812 const unsigned char* in, int *inlen) {
3813 return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3814}
3815static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3816 const unsigned char* in, int *inlen) {
3817 return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3818}
3819
3820#endif
3821#endif
3822
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette