VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/parserInternals.c@ 104201

最後變更 在這個檔案從104201是 104106,由 vboxsync 提交於 10 月 前

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • 屬性 svn:eol-style 設為 native
檔案大小: 68.6 KB
 
1/*
2 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3 * XML and HTML parsers.
4 *
5 * See Copyright for the status of this software.
6 *
7 * [email protected]
8 */
9
10#define IN_LIBXML
11#include "libxml.h"
12
13#if defined(_WIN32)
14#define XML_DIR_SEP '\\'
15#else
16#define XML_DIR_SEP '/'
17#endif
18
19#include <string.h>
20#include <ctype.h>
21#include <stdlib.h>
22
23#include <libxml/xmlmemory.h>
24#include <libxml/tree.h>
25#include <libxml/parser.h>
26#include <libxml/parserInternals.h>
27#include <libxml/entities.h>
28#include <libxml/xmlerror.h>
29#include <libxml/encoding.h>
30#include <libxml/xmlIO.h>
31#include <libxml/uri.h>
32#include <libxml/dict.h>
33#include <libxml/xmlsave.h>
34#ifdef LIBXML_CATALOG_ENABLED
35#include <libxml/catalog.h>
36#endif
37#include <libxml/chvalid.h>
38
39#define CUR(ctxt) ctxt->input->cur
40#define END(ctxt) ctxt->input->end
41
42#include "private/buf.h"
43#include "private/enc.h"
44#include "private/error.h"
45#include "private/io.h"
46#include "private/parser.h"
47
48/*
49 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
50 * factor of serialized output after entity expansion.
51 */
52#define XML_MAX_AMPLIFICATION_DEFAULT 5
53
54/*
55 * Various global defaults for parsing
56 */
57
58/**
59 * xmlCheckVersion:
60 * @version: the include version number
61 *
62 * check the compiled lib version against the include one.
63 * This can warn or immediately kill the application
64 */
65void
66xmlCheckVersion(int version) {
67 int myversion = LIBXML_VERSION;
68
69 xmlInitParser();
70
71 if ((myversion / 10000) != (version / 10000)) {
72 xmlGenericError(xmlGenericErrorContext,
73 "Fatal: program compiled against libxml %d using libxml %d\n",
74 (version / 10000), (myversion / 10000));
75 fprintf(stderr,
76 "Fatal: program compiled against libxml %d using libxml %d\n",
77 (version / 10000), (myversion / 10000));
78 }
79 if ((myversion / 100) < (version / 100)) {
80 xmlGenericError(xmlGenericErrorContext,
81 "Warning: program compiled against libxml %d using older %d\n",
82 (version / 100), (myversion / 100));
83 }
84}
85
86
87/************************************************************************
88 * *
89 * Some factorized error routines *
90 * *
91 ************************************************************************/
92
93
94/**
95 * xmlErrMemory:
96 * @ctxt: an XML parser context
97 * @extra: extra information
98 *
99 * Handle a redefinition of attribute error
100 */
101void
102xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
103{
104 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
105 (ctxt->instate == XML_PARSER_EOF))
106 return;
107 if (ctxt != NULL) {
108 ctxt->errNo = XML_ERR_NO_MEMORY;
109 ctxt->instate = XML_PARSER_EOF;
110 ctxt->disableSAX = 1;
111 }
112 if (extra)
113 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
114 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
115 NULL, NULL, 0, 0,
116 "Memory allocation failed : %s\n", extra);
117 else
118 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
119 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
120 NULL, NULL, 0, 0, "Memory allocation failed\n");
121}
122
123/**
124 * __xmlErrEncoding:
125 * @ctxt: an XML parser context
126 * @xmlerr: the error number
127 * @msg: the error message
128 * @str1: an string info
129 * @str2: an string info
130 *
131 * Handle an encoding error
132 */
133void
134__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
135 const char *msg, const xmlChar * str1, const xmlChar * str2)
136{
137 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
138 (ctxt->instate == XML_PARSER_EOF))
139 return;
140 if (ctxt != NULL)
141 ctxt->errNo = xmlerr;
142 __xmlRaiseError(NULL, NULL, NULL,
143 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
144 NULL, 0, (const char *) str1, (const char *) str2,
145 NULL, 0, 0, msg, str1, str2);
146 if (ctxt != NULL) {
147 ctxt->wellFormed = 0;
148 if (ctxt->recovery == 0)
149 ctxt->disableSAX = 1;
150 }
151}
152
153/**
154 * xmlErrInternal:
155 * @ctxt: an XML parser context
156 * @msg: the error message
157 * @str: error information
158 *
159 * Handle an internal error
160 */
161static void LIBXML_ATTR_FORMAT(2,0)
162xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
163{
164 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
165 (ctxt->instate == XML_PARSER_EOF))
166 return;
167 if (ctxt != NULL)
168 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
169 __xmlRaiseError(NULL, NULL, NULL,
170 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
171 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
172 0, 0, msg, str);
173 if (ctxt != NULL) {
174 ctxt->wellFormed = 0;
175 if (ctxt->recovery == 0)
176 ctxt->disableSAX = 1;
177 }
178}
179
180/**
181 * xmlFatalErr:
182 * @ctxt: an XML parser context
183 * @error: the error number
184 * @info: extra information string
185 *
186 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
187 */
188void
189xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
190{
191 const char *errmsg;
192
193 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
194 (ctxt->instate == XML_PARSER_EOF))
195 return;
196 switch (error) {
197 case XML_ERR_INVALID_HEX_CHARREF:
198 errmsg = "CharRef: invalid hexadecimal value";
199 break;
200 case XML_ERR_INVALID_DEC_CHARREF:
201 errmsg = "CharRef: invalid decimal value";
202 break;
203 case XML_ERR_INVALID_CHARREF:
204 errmsg = "CharRef: invalid value";
205 break;
206 case XML_ERR_INTERNAL_ERROR:
207 errmsg = "internal error";
208 break;
209 case XML_ERR_PEREF_AT_EOF:
210 errmsg = "PEReference at end of document";
211 break;
212 case XML_ERR_PEREF_IN_PROLOG:
213 errmsg = "PEReference in prolog";
214 break;
215 case XML_ERR_PEREF_IN_EPILOG:
216 errmsg = "PEReference in epilog";
217 break;
218 case XML_ERR_PEREF_NO_NAME:
219 errmsg = "PEReference: no name";
220 break;
221 case XML_ERR_PEREF_SEMICOL_MISSING:
222 errmsg = "PEReference: expecting ';'";
223 break;
224 case XML_ERR_ENTITY_LOOP:
225 errmsg = "Detected an entity reference loop";
226 break;
227 case XML_ERR_ENTITY_NOT_STARTED:
228 errmsg = "EntityValue: \" or ' expected";
229 break;
230 case XML_ERR_ENTITY_PE_INTERNAL:
231 errmsg = "PEReferences forbidden in internal subset";
232 break;
233 case XML_ERR_ENTITY_NOT_FINISHED:
234 errmsg = "EntityValue: \" or ' expected";
235 break;
236 case XML_ERR_ATTRIBUTE_NOT_STARTED:
237 errmsg = "AttValue: \" or ' expected";
238 break;
239 case XML_ERR_LT_IN_ATTRIBUTE:
240 errmsg = "Unescaped '<' not allowed in attributes values";
241 break;
242 case XML_ERR_LITERAL_NOT_STARTED:
243 errmsg = "SystemLiteral \" or ' expected";
244 break;
245 case XML_ERR_LITERAL_NOT_FINISHED:
246 errmsg = "Unfinished System or Public ID \" or ' expected";
247 break;
248 case XML_ERR_MISPLACED_CDATA_END:
249 errmsg = "Sequence ']]>' not allowed in content";
250 break;
251 case XML_ERR_URI_REQUIRED:
252 errmsg = "SYSTEM or PUBLIC, the URI is missing";
253 break;
254 case XML_ERR_PUBID_REQUIRED:
255 errmsg = "PUBLIC, the Public Identifier is missing";
256 break;
257 case XML_ERR_HYPHEN_IN_COMMENT:
258 errmsg = "Comment must not contain '--' (double-hyphen)";
259 break;
260 case XML_ERR_PI_NOT_STARTED:
261 errmsg = "xmlParsePI : no target name";
262 break;
263 case XML_ERR_RESERVED_XML_NAME:
264 errmsg = "Invalid PI name";
265 break;
266 case XML_ERR_NOTATION_NOT_STARTED:
267 errmsg = "NOTATION: Name expected here";
268 break;
269 case XML_ERR_NOTATION_NOT_FINISHED:
270 errmsg = "'>' required to close NOTATION declaration";
271 break;
272 case XML_ERR_VALUE_REQUIRED:
273 errmsg = "Entity value required";
274 break;
275 case XML_ERR_URI_FRAGMENT:
276 errmsg = "Fragment not allowed";
277 break;
278 case XML_ERR_ATTLIST_NOT_STARTED:
279 errmsg = "'(' required to start ATTLIST enumeration";
280 break;
281 case XML_ERR_NMTOKEN_REQUIRED:
282 errmsg = "NmToken expected in ATTLIST enumeration";
283 break;
284 case XML_ERR_ATTLIST_NOT_FINISHED:
285 errmsg = "')' required to finish ATTLIST enumeration";
286 break;
287 case XML_ERR_MIXED_NOT_STARTED:
288 errmsg = "MixedContentDecl : '|' or ')*' expected";
289 break;
290 case XML_ERR_PCDATA_REQUIRED:
291 errmsg = "MixedContentDecl : '#PCDATA' expected";
292 break;
293 case XML_ERR_ELEMCONTENT_NOT_STARTED:
294 errmsg = "ContentDecl : Name or '(' expected";
295 break;
296 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
297 errmsg = "ContentDecl : ',' '|' or ')' expected";
298 break;
299 case XML_ERR_PEREF_IN_INT_SUBSET:
300 errmsg =
301 "PEReference: forbidden within markup decl in internal subset";
302 break;
303 case XML_ERR_GT_REQUIRED:
304 errmsg = "expected '>'";
305 break;
306 case XML_ERR_CONDSEC_INVALID:
307 errmsg = "XML conditional section '[' expected";
308 break;
309 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
310 errmsg = "Content error in the external subset";
311 break;
312 case XML_ERR_CONDSEC_INVALID_KEYWORD:
313 errmsg =
314 "conditional section INCLUDE or IGNORE keyword expected";
315 break;
316 case XML_ERR_CONDSEC_NOT_FINISHED:
317 errmsg = "XML conditional section not closed";
318 break;
319 case XML_ERR_XMLDECL_NOT_STARTED:
320 errmsg = "Text declaration '<?xml' required";
321 break;
322 case XML_ERR_XMLDECL_NOT_FINISHED:
323 errmsg = "parsing XML declaration: '?>' expected";
324 break;
325 case XML_ERR_EXT_ENTITY_STANDALONE:
326 errmsg = "external parsed entities cannot be standalone";
327 break;
328 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
329 errmsg = "EntityRef: expecting ';'";
330 break;
331 case XML_ERR_DOCTYPE_NOT_FINISHED:
332 errmsg = "DOCTYPE improperly terminated";
333 break;
334 case XML_ERR_LTSLASH_REQUIRED:
335 errmsg = "EndTag: '</' not found";
336 break;
337 case XML_ERR_EQUAL_REQUIRED:
338 errmsg = "expected '='";
339 break;
340 case XML_ERR_STRING_NOT_CLOSED:
341 errmsg = "String not closed expecting \" or '";
342 break;
343 case XML_ERR_STRING_NOT_STARTED:
344 errmsg = "String not started expecting ' or \"";
345 break;
346 case XML_ERR_ENCODING_NAME:
347 errmsg = "Invalid XML encoding name";
348 break;
349 case XML_ERR_STANDALONE_VALUE:
350 errmsg = "standalone accepts only 'yes' or 'no'";
351 break;
352 case XML_ERR_DOCUMENT_EMPTY:
353 errmsg = "Document is empty";
354 break;
355 case XML_ERR_DOCUMENT_END:
356 errmsg = "Extra content at the end of the document";
357 break;
358 case XML_ERR_NOT_WELL_BALANCED:
359 errmsg = "chunk is not well balanced";
360 break;
361 case XML_ERR_EXTRA_CONTENT:
362 errmsg = "extra content at the end of well balanced chunk";
363 break;
364 case XML_ERR_VERSION_MISSING:
365 errmsg = "Malformed declaration expecting version";
366 break;
367 case XML_ERR_NAME_TOO_LONG:
368 errmsg = "Name too long";
369 break;
370 case XML_ERR_INVALID_ENCODING:
371 errmsg = "Invalid bytes in character encoding";
372 break;
373 case XML_IO_UNKNOWN:
374 errmsg = "I/O error";
375 break;
376#if 0
377 case:
378 errmsg = "";
379 break;
380#endif
381 default:
382 errmsg = "Unregistered error message";
383 }
384 if (ctxt != NULL)
385 ctxt->errNo = error;
386 if (info == NULL) {
387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
388 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
389 errmsg);
390 } else {
391 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
392 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
393 errmsg, info);
394 }
395 if (ctxt != NULL) {
396 ctxt->wellFormed = 0;
397 if (ctxt->recovery == 0)
398 ctxt->disableSAX = 1;
399 }
400}
401
402/**
403 * xmlErrEncodingInt:
404 * @ctxt: an XML parser context
405 * @error: the error number
406 * @msg: the error message
407 * @val: an integer value
408 *
409 * n encoding error
410 */
411static void LIBXML_ATTR_FORMAT(3,0)
412xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413 const char *msg, int val)
414{
415 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
416 (ctxt->instate == XML_PARSER_EOF))
417 return;
418 if (ctxt != NULL)
419 ctxt->errNo = error;
420 __xmlRaiseError(NULL, NULL, NULL,
421 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
422 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
423 if (ctxt != NULL) {
424 ctxt->wellFormed = 0;
425 if (ctxt->recovery == 0)
426 ctxt->disableSAX = 1;
427 }
428}
429
430/**
431 * xmlIsLetter:
432 * @c: an unicode character (int)
433 *
434 * Check whether the character is allowed by the production
435 * [84] Letter ::= BaseChar | Ideographic
436 *
437 * Returns 0 if not, non-zero otherwise
438 */
439int
440xmlIsLetter(int c) {
441 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
442}
443
444/************************************************************************
445 * *
446 * Input handling functions for progressive parsing *
447 * *
448 ************************************************************************/
449
450/* we need to keep enough input to show errors in context */
451#define LINE_LEN 80
452
453/**
454 * xmlHaltParser:
455 * @ctxt: an XML parser context
456 *
457 * Blocks further parser processing don't override error
458 * for internal use
459 */
460void
461xmlHaltParser(xmlParserCtxtPtr ctxt) {
462 if (ctxt == NULL)
463 return;
464 ctxt->instate = XML_PARSER_EOF;
465 ctxt->disableSAX = 1;
466 while (ctxt->inputNr > 1)
467 xmlFreeInputStream(inputPop(ctxt));
468 if (ctxt->input != NULL) {
469 /*
470 * in case there was a specific allocation deallocate before
471 * overriding base
472 */
473 if (ctxt->input->free != NULL) {
474 ctxt->input->free((xmlChar *) ctxt->input->base);
475 ctxt->input->free = NULL;
476 }
477 if (ctxt->input->buf != NULL) {
478 xmlFreeParserInputBuffer(ctxt->input->buf);
479 ctxt->input->buf = NULL;
480 }
481 ctxt->input->cur = BAD_CAST"";
482 ctxt->input->length = 0;
483 ctxt->input->base = ctxt->input->cur;
484 ctxt->input->end = ctxt->input->cur;
485 }
486}
487
488/**
489 * xmlParserInputRead:
490 * @in: an XML parser input
491 * @len: an indicative size for the lookahead
492 *
493 * DEPRECATED: This function was internal and is deprecated.
494 *
495 * Returns -1 as this is an error to use it.
496 */
497int
498xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
499 return(-1);
500}
501
502/**
503 * xmlParserGrow:
504 * @ctxt: an XML parser context
505 *
506 * Grow the input buffer.
507 *
508 * Returns the number of bytes read or -1 in case of error.
509 */
510int
511xmlParserGrow(xmlParserCtxtPtr ctxt) {
512 xmlParserInputPtr in = ctxt->input;
513 xmlParserInputBufferPtr buf = in->buf;
514 ptrdiff_t curEnd = in->end - in->cur;
515 ptrdiff_t curBase = in->cur - in->base;
516 int ret;
517
518 if (buf == NULL)
519 return(0);
520 /* Don't grow push parser buffer. */
521 if ((ctxt->progressive) && (ctxt->inputNr <= 1))
522 return(0);
523 /* Don't grow memory buffers. */
524 if ((buf->encoder == NULL) && (buf->readcallback == NULL))
525 return(0);
526 if (buf->error != 0)
527 return(-1);
528
529 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
530 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
531 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
532 xmlErrMemory(ctxt, "Huge input lookup");
533 xmlHaltParser(ctxt);
534 return(-1);
535 }
536
537 if (curEnd >= INPUT_CHUNK)
538 return(0);
539
540 ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
541 xmlBufUpdateInput(buf->buffer, in, curBase);
542
543 if (ret < 0) {
544 xmlFatalErr(ctxt, buf->error, NULL);
545 /* Buffer contents may be lost in case of memory errors. */
546 if (buf->error == XML_ERR_NO_MEMORY)
547 xmlHaltParser(ctxt);
548 }
549
550 return(ret);
551}
552
553/**
554 * xmlParserInputGrow:
555 * @in: an XML parser input
556 * @len: an indicative size for the lookahead
557 *
558 * DEPRECATED: Don't use.
559 *
560 * This function increase the input for the parser. It tries to
561 * preserve pointers to the input buffer, and keep already read data
562 *
563 * Returns the amount of char read, or -1 in case of error, 0 indicate the
564 * end of this entity
565 */
566int
567xmlParserInputGrow(xmlParserInputPtr in, int len) {
568 int ret;
569 size_t indx;
570
571 if ((in == NULL) || (len < 0)) return(-1);
572 if (in->buf == NULL) return(-1);
573 if (in->base == NULL) return(-1);
574 if (in->cur == NULL) return(-1);
575 if (in->buf->buffer == NULL) return(-1);
576
577 /* Don't grow memory buffers. */
578 if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
579 return(0);
580
581 indx = in->cur - in->base;
582 if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
583 return(0);
584 }
585 ret = xmlParserInputBufferGrow(in->buf, len);
586
587 in->base = xmlBufContent(in->buf->buffer);
588 if (in->base == NULL) {
589 in->base = BAD_CAST "";
590 in->cur = in->base;
591 in->end = in->base;
592 return(-1);
593 }
594 in->cur = in->base + indx;
595 in->end = xmlBufEnd(in->buf->buffer);
596
597 return(ret);
598}
599
600/**
601 * xmlParserShrink:
602 * @ctxt: an XML parser context
603 *
604 * Shrink the input buffer.
605 */
606void
607xmlParserShrink(xmlParserCtxtPtr ctxt) {
608 xmlParserInputPtr in = ctxt->input;
609 xmlParserInputBufferPtr buf = in->buf;
610 size_t used;
611
612 if (buf == NULL)
613 return;
614 /* Don't shrink pull parser memory buffers. */
615 if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) &&
616 (buf->encoder == NULL) &&
617 (buf->readcallback == NULL))
618 return;
619
620 used = in->cur - in->base;
621 /*
622 * Do not shrink on large buffers whose only a tiny fraction
623 * was consumed
624 */
625 if (used > INPUT_CHUNK) {
626 size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
627
628 if (res > 0) {
629 used -= res;
630 if ((res > ULONG_MAX) ||
631 (in->consumed > ULONG_MAX - (unsigned long)res))
632 in->consumed = ULONG_MAX;
633 else
634 in->consumed += res;
635 }
636 }
637
638 xmlBufUpdateInput(buf->buffer, in, used);
639}
640
641/**
642 * xmlParserInputShrink:
643 * @in: an XML parser input
644 *
645 * DEPRECATED: Don't use.
646 *
647 * This function removes used input for the parser.
648 */
649void
650xmlParserInputShrink(xmlParserInputPtr in) {
651 size_t used;
652 size_t ret;
653
654 if (in == NULL) return;
655 if (in->buf == NULL) return;
656 if (in->base == NULL) return;
657 if (in->cur == NULL) return;
658 if (in->buf->buffer == NULL) return;
659
660 used = in->cur - in->base;
661 /*
662 * Do not shrink on large buffers whose only a tiny fraction
663 * was consumed
664 */
665 if (used > INPUT_CHUNK) {
666 ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
667 if (ret > 0) {
668 used -= ret;
669 if ((ret > ULONG_MAX) ||
670 (in->consumed > ULONG_MAX - (unsigned long)ret))
671 in->consumed = ULONG_MAX;
672 else
673 in->consumed += ret;
674 }
675 }
676
677 if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
678 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
679 }
680
681 in->base = xmlBufContent(in->buf->buffer);
682 if (in->base == NULL) {
683 /* TODO: raise error */
684 in->base = BAD_CAST "";
685 in->cur = in->base;
686 in->end = in->base;
687 return;
688 }
689 in->cur = in->base + used;
690 in->end = xmlBufEnd(in->buf->buffer);
691}
692
693/************************************************************************
694 * *
695 * UTF8 character input and related functions *
696 * *
697 ************************************************************************/
698
699/**
700 * xmlNextChar:
701 * @ctxt: the XML parser context
702 *
703 * DEPRECATED: Internal function, do not use.
704 *
705 * Skip to the next char input char.
706 */
707
708void
709xmlNextChar(xmlParserCtxtPtr ctxt)
710{
711 const unsigned char *cur;
712 size_t avail;
713 int c;
714
715 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
716 (ctxt->input == NULL))
717 return;
718
719 avail = ctxt->input->end - ctxt->input->cur;
720
721 if (avail < INPUT_CHUNK) {
722 xmlParserGrow(ctxt);
723 if ((ctxt->instate == XML_PARSER_EOF) ||
724 (ctxt->input->cur >= ctxt->input->end))
725 return;
726 avail = ctxt->input->end - ctxt->input->cur;
727 }
728
729 cur = ctxt->input->cur;
730 c = *cur;
731
732 if (c < 0x80) {
733 if (c == '\n') {
734 ctxt->input->cur++;
735 ctxt->input->line++;
736 ctxt->input->col = 1;
737 } else if (c == '\r') {
738 /*
739 * 2.11 End-of-Line Handling
740 * the literal two-character sequence "#xD#xA" or a standalone
741 * literal #xD, an XML processor must pass to the application
742 * the single character #xA.
743 */
744 ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
745 ctxt->input->line++;
746 ctxt->input->col = 1;
747 return;
748 } else {
749 ctxt->input->cur++;
750 ctxt->input->col++;
751 }
752 } else {
753 ctxt->input->col++;
754
755 if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
756 goto encoding_error;
757
758 if (c < 0xe0) {
759 /* 2-byte code */
760 if (c < 0xc2)
761 goto encoding_error;
762 ctxt->input->cur += 2;
763 } else {
764 unsigned int val = (c << 8) | cur[1];
765
766 if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
767 goto encoding_error;
768
769 if (c < 0xf0) {
770 /* 3-byte code */
771 if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
772 goto encoding_error;
773 ctxt->input->cur += 3;
774 } else {
775 if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
776 goto encoding_error;
777
778 /* 4-byte code */
779 if ((val < 0xf090) || (val >= 0xf490))
780 goto encoding_error;
781 ctxt->input->cur += 4;
782 }
783 }
784 }
785
786 return;
787
788encoding_error:
789 /* Only report the first error */
790 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
791 if ((ctxt == NULL) || (ctxt->input == NULL) ||
792 (ctxt->input->end - ctxt->input->cur < 4)) {
793 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
794 "Input is not proper UTF-8, indicate encoding !\n",
795 NULL, NULL);
796 } else {
797 char buffer[150];
798
799 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
800 ctxt->input->cur[0], ctxt->input->cur[1],
801 ctxt->input->cur[2], ctxt->input->cur[3]);
802 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
803 "Input is not proper UTF-8, indicate encoding !\n%s",
804 BAD_CAST buffer, NULL);
805 }
806 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
807 }
808 ctxt->input->cur++;
809 return;
810}
811
812/**
813 * xmlCurrentChar:
814 * @ctxt: the XML parser context
815 * @len: pointer to the length of the char read
816 *
817 * DEPRECATED: Internal function, do not use.
818 *
819 * The current char value, if using UTF-8 this may actually span multiple
820 * bytes in the input buffer. Implement the end of line normalization:
821 * 2.11 End-of-Line Handling
822 * Wherever an external parsed entity or the literal entity value
823 * of an internal parsed entity contains either the literal two-character
824 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
825 * must pass to the application the single character #xA.
826 * This behavior can conveniently be produced by normalizing all
827 * line breaks to #xA on input, before parsing.)
828 *
829 * Returns the current char value and its length
830 */
831
832int
833xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
834 const unsigned char *cur;
835 size_t avail;
836 int c;
837
838 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
839 if (ctxt->instate == XML_PARSER_EOF)
840 return(0);
841
842 avail = ctxt->input->end - ctxt->input->cur;
843
844 if (avail < INPUT_CHUNK) {
845 xmlParserGrow(ctxt);
846 if (ctxt->instate == XML_PARSER_EOF)
847 return(0);
848 avail = ctxt->input->end - ctxt->input->cur;
849 }
850
851 cur = ctxt->input->cur;
852 c = *cur;
853
854 if (c < 0x80) {
855 /* 1-byte code */
856 if (c < 0x20) {
857 /*
858 * 2.11 End-of-Line Handling
859 * the literal two-character sequence "#xD#xA" or a standalone
860 * literal #xD, an XML processor must pass to the application
861 * the single character #xA.
862 */
863 if (c == '\r') {
864 /*
865 * TODO: This function shouldn't change the 'cur' pointer
866 * as side effect, but the NEXTL macro in parser.c relies
867 * on this behavior when incrementing line numbers.
868 */
869 if (cur[1] == '\n')
870 ctxt->input->cur++;
871 *len = 1;
872 c = '\n';
873 } else if (c == 0) {
874 if (ctxt->input->cur >= ctxt->input->end) {
875 *len = 0;
876 } else {
877 *len = 1;
878 /*
879 * TODO: Null bytes should be handled by callers,
880 * but this can be tricky.
881 */
882 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
883 "Char 0x0 out of allowed range\n", c);
884 }
885 } else {
886 *len = 1;
887 }
888 } else {
889 *len = 1;
890 }
891
892 return(c);
893 } else {
894 int val;
895
896 if (avail < 2)
897 goto incomplete_sequence;
898 if ((cur[1] & 0xc0) != 0x80)
899 goto encoding_error;
900
901 if (c < 0xe0) {
902 /* 2-byte code */
903 if (c < 0xc2)
904 goto encoding_error;
905 val = (c & 0x1f) << 6;
906 val |= cur[1] & 0x3f;
907 *len = 2;
908 } else {
909 if (avail < 3)
910 goto incomplete_sequence;
911 if ((cur[2] & 0xc0) != 0x80)
912 goto encoding_error;
913
914 if (c < 0xf0) {
915 /* 3-byte code */
916 val = (c & 0xf) << 12;
917 val |= (cur[1] & 0x3f) << 6;
918 val |= cur[2] & 0x3f;
919 if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
920 goto encoding_error;
921 *len = 3;
922 } else {
923 if (avail < 4)
924 goto incomplete_sequence;
925 if ((cur[3] & 0xc0) != 0x80)
926 goto encoding_error;
927
928 /* 4-byte code */
929 val = (c & 0x0f) << 18;
930 val |= (cur[1] & 0x3f) << 12;
931 val |= (cur[2] & 0x3f) << 6;
932 val |= cur[3] & 0x3f;
933 if ((val < 0x10000) || (val >= 0x110000))
934 goto encoding_error;
935 *len = 4;
936 }
937 }
938
939 return(val);
940 }
941
942encoding_error:
943 /* Only report the first error */
944 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
945 if (ctxt->input->end - ctxt->input->cur < 4) {
946 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
947 "Input is not proper UTF-8, indicate encoding !\n",
948 NULL, NULL);
949 } else {
950 char buffer[150];
951
952 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
953 ctxt->input->cur[0], ctxt->input->cur[1],
954 ctxt->input->cur[2], ctxt->input->cur[3]);
955 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
956 "Input is not proper UTF-8, indicate encoding !\n%s",
957 BAD_CAST buffer, NULL);
958 }
959 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
960 }
961 *len = 1;
962 return(0xFFFD); /* U+FFFD Replacement Character */
963
964incomplete_sequence:
965 /*
966 * An encoding problem may arise from a truncated input buffer
967 * splitting a character in the middle. In that case do not raise
968 * an error but return 0. This should only happen when push parsing
969 * char data.
970 */
971 *len = 0;
972 return(0);
973}
974
975/**
976 * xmlStringCurrentChar:
977 * @ctxt: the XML parser context
978 * @cur: pointer to the beginning of the char
979 * @len: pointer to the length of the char read
980 *
981 * DEPRECATED: Internal function, do not use.
982 *
983 * The current char value, if using UTF-8 this may actually span multiple
984 * bytes in the input buffer.
985 *
986 * Returns the current char value and its length
987 */
988
989int
990xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
991 const xmlChar *cur, int *len) {
992 int c;
993
994 if ((cur == NULL) || (len == NULL))
995 return(0);
996
997 /* cur is zero-terminated, so we can lie about its length. */
998 *len = 4;
999 c = xmlGetUTF8Char(cur, len);
1000
1001 return((c < 0) ? 0 : c);
1002}
1003
1004/**
1005 * xmlCopyCharMultiByte:
1006 * @out: pointer to an array of xmlChar
1007 * @val: the char value
1008 *
1009 * append the char value in the array
1010 *
1011 * Returns the number of xmlChar written
1012 */
1013int
1014xmlCopyCharMultiByte(xmlChar *out, int val) {
1015 if ((out == NULL) || (val < 0)) return(0);
1016 /*
1017 * We are supposed to handle UTF8, check it's valid
1018 * From rfc2044: encoding of the Unicode values on UTF-8:
1019 *
1020 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1021 * 0000 0000-0000 007F 0xxxxxxx
1022 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1023 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1024 */
1025 if (val >= 0x80) {
1026 xmlChar *savedout = out;
1027 int bits;
1028 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1029 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1030 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1031 else {
1032 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
1033 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1034 val);
1035 return(0);
1036 }
1037 for ( ; bits >= 0; bits-= 6)
1038 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1039 return (out - savedout);
1040 }
1041 *out = val;
1042 return 1;
1043}
1044
1045/**
1046 * xmlCopyChar:
1047 * @len: Ignored, compatibility
1048 * @out: pointer to an array of xmlChar
1049 * @val: the char value
1050 *
1051 * append the char value in the array
1052 *
1053 * Returns the number of xmlChar written
1054 */
1055
1056int
1057xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1058 if ((out == NULL) || (val < 0)) return(0);
1059 /* the len parameter is ignored */
1060 if (val >= 0x80) {
1061 return(xmlCopyCharMultiByte (out, val));
1062 }
1063 *out = val;
1064 return 1;
1065}
1066
1067/************************************************************************
1068 * *
1069 * Commodity functions to switch encodings *
1070 * *
1071 ************************************************************************/
1072
1073static xmlCharEncodingHandlerPtr
1074xmlDetectEBCDIC(xmlParserInputPtr input) {
1075 xmlChar out[200];
1076 xmlCharEncodingHandlerPtr handler;
1077 int inlen, outlen, res, i;
1078
1079 /*
1080 * To detect the EBCDIC code page, we convert the first 200 bytes
1081 * to EBCDIC-US and try to find the encoding declaration.
1082 */
1083 handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1084 if (handler == NULL)
1085 return(NULL);
1086 outlen = sizeof(out) - 1;
1087 inlen = input->end - input->cur;
1088 res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1089 if (res < 0)
1090 return(handler);
1091 out[outlen] = 0;
1092
1093 for (i = 0; i < outlen; i++) {
1094 if (out[i] == '>')
1095 break;
1096 if ((out[i] == 'e') &&
1097 (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1098 int start, cur, quote;
1099
1100 i += 8;
1101 while (IS_BLANK_CH(out[i]))
1102 i += 1;
1103 if (out[i++] != '=')
1104 break;
1105 while (IS_BLANK_CH(out[i]))
1106 i += 1;
1107 quote = out[i++];
1108 if ((quote != '\'') && (quote != '"'))
1109 break;
1110 start = i;
1111 cur = out[i];
1112 while (((cur >= 'a') && (cur <= 'z')) ||
1113 ((cur >= 'A') && (cur <= 'Z')) ||
1114 ((cur >= '0') && (cur <= '9')) ||
1115 (cur == '.') || (cur == '_') ||
1116 (cur == '-'))
1117 cur = out[++i];
1118 if (cur != quote)
1119 break;
1120 out[i] = 0;
1121 xmlCharEncCloseFunc(handler);
1122 return(xmlFindCharEncodingHandler((char *) out + start));
1123 }
1124 }
1125
1126 /*
1127 * ICU handlers are stateful, so we have to recreate them.
1128 */
1129 xmlCharEncCloseFunc(handler);
1130 return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC));
1131}
1132
1133/**
1134 * xmlSwitchEncoding:
1135 * @ctxt: the parser context
1136 * @enc: the encoding value (number)
1137 *
1138 * Use encoding specified by enum to decode input data.
1139 *
1140 * This function can be used to enforce the encoding of chunks passed
1141 * to xmlParseChunk.
1142 *
1143 * Returns 0 in case of success, -1 otherwise
1144 */
1145int
1146xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1147{
1148 xmlCharEncodingHandlerPtr handler = NULL;
1149 int check = 1;
1150 int ret;
1151
1152 if ((ctxt == NULL) || (ctxt->input == NULL))
1153 return(-1);
1154
1155 switch (enc) {
1156 case XML_CHAR_ENCODING_NONE:
1157 case XML_CHAR_ENCODING_UTF8:
1158 case XML_CHAR_ENCODING_ASCII:
1159 check = 0;
1160 break;
1161 case XML_CHAR_ENCODING_EBCDIC:
1162 handler = xmlDetectEBCDIC(ctxt->input);
1163 break;
1164 default:
1165 handler = xmlGetCharEncodingHandler(enc);
1166 break;
1167 }
1168
1169 if ((check) && (handler == NULL)) {
1170 const char *name = xmlGetCharEncodingName(enc);
1171
1172 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1173 "encoding not supported: %s\n",
1174 BAD_CAST (name ? name : "<null>"), NULL);
1175 /*
1176 * TODO: We could recover from errors in external entities
1177 * if we didn't stop the parser. But most callers of this
1178 * function don't check the return value.
1179 */
1180 xmlStopParser(ctxt);
1181 return(-1);
1182 }
1183
1184 ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1185
1186 if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1187 ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1188 }
1189
1190 return(ret);
1191}
1192
1193/**
1194 * xmlSwitchInputEncoding:
1195 * @ctxt: the parser context
1196 * @input: the input stream
1197 * @handler: the encoding handler
1198 *
1199 * DEPRECATED: Internal function, don't use.
1200 *
1201 * Use encoding handler to decode input data.
1202 *
1203 * Returns 0 in case of success, -1 otherwise
1204 */
1205int
1206xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1207 xmlCharEncodingHandlerPtr handler)
1208{
1209 int nbchars;
1210 xmlParserInputBufferPtr in;
1211
1212 if ((input == NULL) || (input->buf == NULL)) {
1213 xmlCharEncCloseFunc(handler);
1214 return (-1);
1215 }
1216 in = input->buf;
1217
1218 input->flags |= XML_INPUT_HAS_ENCODING;
1219
1220 /*
1221 * UTF-8 requires no encoding handler.
1222 */
1223 if ((handler != NULL) &&
1224 (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1225 xmlCharEncCloseFunc(handler);
1226 handler = NULL;
1227 }
1228
1229 if (in->encoder == handler)
1230 return (0);
1231
1232 if (in->encoder != NULL) {
1233 /*
1234 * Switching encodings during parsing is a really bad idea,
1235 * but Chromium can switch between ISO-8859-1 and UTF-16 before
1236 * separate calls to xmlParseChunk.
1237 *
1238 * TODO: We should check whether the "raw" input buffer is empty and
1239 * convert the old content using the old encoder.
1240 */
1241
1242 xmlCharEncCloseFunc(in->encoder);
1243 in->encoder = handler;
1244 return (0);
1245 }
1246
1247 in->encoder = handler;
1248
1249 /*
1250 * Is there already some content down the pipe to convert ?
1251 */
1252 if (xmlBufIsEmpty(in->buffer) == 0) {
1253 size_t processed;
1254
1255 /*
1256 * Shrink the current input buffer.
1257 * Move it as the raw buffer and create a new input buffer
1258 */
1259 processed = input->cur - input->base;
1260 xmlBufShrink(in->buffer, processed);
1261 input->consumed += processed;
1262 in->raw = in->buffer;
1263 in->buffer = xmlBufCreate();
1264 in->rawconsumed = processed;
1265
1266 nbchars = xmlCharEncInput(in);
1267 xmlBufResetInput(in->buffer, input);
1268 if (nbchars < 0) {
1269 /* TODO: This could be an out of memory or an encoding error. */
1270 xmlErrInternal(ctxt,
1271 "switching encoding: encoder error\n",
1272 NULL);
1273 xmlHaltParser(ctxt);
1274 return (-1);
1275 }
1276 }
1277 return (0);
1278}
1279
1280/**
1281 * xmlSwitchToEncoding:
1282 * @ctxt: the parser context
1283 * @handler: the encoding handler
1284 *
1285 * Use encoding handler to decode input data.
1286 *
1287 * This function can be used to enforce the encoding of chunks passed
1288 * to xmlParseChunk.
1289 *
1290 * Returns 0 in case of success, -1 otherwise
1291 */
1292int
1293xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1294{
1295 if (ctxt == NULL)
1296 return(-1);
1297 return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1298}
1299
1300/**
1301 * xmlDetectEncoding:
1302 * @ctxt: the parser context
1303 *
1304 * Handle optional BOM, detect and switch to encoding.
1305 *
1306 * Assumes that there are at least four bytes in the input buffer.
1307 */
1308void
1309xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1310 const xmlChar *in;
1311 xmlCharEncoding enc;
1312 int bomSize;
1313 int autoFlag = 0;
1314
1315 if (xmlParserGrow(ctxt) < 0)
1316 return;
1317 in = ctxt->input->cur;
1318 if (ctxt->input->end - in < 4)
1319 return;
1320
1321 if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1322 /*
1323 * If the encoding was already set, only skip the BOM which was
1324 * possibly decoded to UTF-8.
1325 */
1326 if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1327 ctxt->input->cur += 3;
1328 }
1329
1330 return;
1331 }
1332
1333 enc = XML_CHAR_ENCODING_NONE;
1334 bomSize = 0;
1335
1336 switch (in[0]) {
1337 case 0x00:
1338 if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1339 enc = XML_CHAR_ENCODING_UCS4BE;
1340 autoFlag = XML_INPUT_AUTO_OTHER;
1341 } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1342 enc = XML_CHAR_ENCODING_UTF16BE;
1343 autoFlag = XML_INPUT_AUTO_UTF16BE;
1344 }
1345 break;
1346
1347 case 0x3C:
1348 if (in[1] == 0x00) {
1349 if ((in[2] == 0x00) && (in[3] == 0x00)) {
1350 enc = XML_CHAR_ENCODING_UCS4LE;
1351 autoFlag = XML_INPUT_AUTO_OTHER;
1352 } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1353 enc = XML_CHAR_ENCODING_UTF16LE;
1354 autoFlag = XML_INPUT_AUTO_UTF16LE;
1355 }
1356 }
1357 break;
1358
1359 case 0x4C:
1360 if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1361 enc = XML_CHAR_ENCODING_EBCDIC;
1362 autoFlag = XML_INPUT_AUTO_OTHER;
1363 }
1364 break;
1365
1366 case 0xEF:
1367 if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1368 enc = XML_CHAR_ENCODING_UTF8;
1369 autoFlag = XML_INPUT_AUTO_UTF8;
1370 bomSize = 3;
1371 }
1372 break;
1373
1374 case 0xFE:
1375 if (in[1] == 0xFF) {
1376 enc = XML_CHAR_ENCODING_UTF16BE;
1377 autoFlag = XML_INPUT_AUTO_UTF16BE;
1378 bomSize = 2;
1379 }
1380 break;
1381
1382 case 0xFF:
1383 if (in[1] == 0xFE) {
1384 enc = XML_CHAR_ENCODING_UTF16LE;
1385 autoFlag = XML_INPUT_AUTO_UTF16LE;
1386 bomSize = 2;
1387 }
1388 break;
1389 }
1390
1391 if (bomSize > 0) {
1392 ctxt->input->cur += bomSize;
1393 }
1394
1395 if (enc != XML_CHAR_ENCODING_NONE) {
1396 ctxt->input->flags |= autoFlag;
1397 xmlSwitchEncoding(ctxt, enc);
1398 }
1399}
1400
1401/**
1402 * xmlSetDeclaredEncoding:
1403 * @ctxt: the parser context
1404 * @encoding: declared encoding
1405 *
1406 * Set the encoding from a declaration in the document.
1407 *
1408 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1409 * about encoding mismatches.
1410 *
1411 * Takes ownership of 'encoding'.
1412 */
1413void
1414xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1415 if (ctxt->encoding != NULL)
1416 xmlFree((xmlChar *) ctxt->encoding);
1417 ctxt->encoding = encoding;
1418
1419 if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1420 ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1421 xmlCharEncodingHandlerPtr handler;
1422
1423 handler = xmlFindCharEncodingHandler((const char *) encoding);
1424 if (handler == NULL) {
1425 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1426 "Unsupported encoding: %s\n",
1427 encoding, NULL);
1428 return;
1429 }
1430
1431 xmlSwitchToEncoding(ctxt, handler);
1432 ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1433 } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1434 static const char *allowedUTF8[] = {
1435 "UTF-8", "UTF8", NULL
1436 };
1437 static const char *allowedUTF16LE[] = {
1438 "UTF-16", "UTF-16LE", "UTF16", NULL
1439 };
1440 static const char *allowedUTF16BE[] = {
1441 "UTF-16", "UTF-16BE", "UTF16", NULL
1442 };
1443 const char **allowed = NULL;
1444 const char *autoEnc = NULL;
1445
1446 switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1447 case XML_INPUT_AUTO_UTF8:
1448 allowed = allowedUTF8;
1449 autoEnc = "UTF-8";
1450 break;
1451 case XML_INPUT_AUTO_UTF16LE:
1452 allowed = allowedUTF16LE;
1453 autoEnc = "UTF-16LE";
1454 break;
1455 case XML_INPUT_AUTO_UTF16BE:
1456 allowed = allowedUTF16BE;
1457 autoEnc = "UTF-16BE";
1458 break;
1459 }
1460
1461 if (allowed != NULL) {
1462 const char **p;
1463 int match = 0;
1464
1465 for (p = allowed; *p != NULL; p++) {
1466 if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1467 match = 1;
1468 break;
1469 }
1470 }
1471
1472 if (match == 0) {
1473 xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1474 "Encoding '%s' doesn't match "
1475 "auto-detected '%s'\n",
1476 encoding, BAD_CAST autoEnc);
1477 }
1478 }
1479 }
1480}
1481
1482/**
1483 * xmlGetActualEncoding:
1484 * @ctxt: the parser context
1485 *
1486 * Returns the actual used to parse the document. This can differ from
1487 * the declared encoding.
1488 */
1489const xmlChar *
1490xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1491 const xmlChar *encoding = NULL;
1492
1493 if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1494 (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1495 /* Preserve encoding exactly */
1496 encoding = ctxt->encoding;
1497 } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1498 encoding = BAD_CAST ctxt->input->buf->encoder->name;
1499 } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1500 encoding = BAD_CAST "UTF-8";
1501 }
1502
1503 return(encoding);
1504}
1505
1506/************************************************************************
1507 * *
1508 * Commodity functions to handle entities processing *
1509 * *
1510 ************************************************************************/
1511
1512/**
1513 * xmlFreeInputStream:
1514 * @input: an xmlParserInputPtr
1515 *
1516 * Free up an input stream.
1517 */
1518void
1519xmlFreeInputStream(xmlParserInputPtr input) {
1520 if (input == NULL) return;
1521
1522 if (input->filename != NULL) xmlFree((char *) input->filename);
1523 if (input->directory != NULL) xmlFree((char *) input->directory);
1524 if (input->version != NULL) xmlFree((char *) input->version);
1525 if ((input->free != NULL) && (input->base != NULL))
1526 input->free((xmlChar *) input->base);
1527 if (input->buf != NULL)
1528 xmlFreeParserInputBuffer(input->buf);
1529 xmlFree(input);
1530}
1531
1532/**
1533 * xmlNewInputStream:
1534 * @ctxt: an XML parser context
1535 *
1536 * Create a new input stream structure.
1537 *
1538 * Returns the new input stream or NULL
1539 */
1540xmlParserInputPtr
1541xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1542 xmlParserInputPtr input;
1543
1544 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1545 if (input == NULL) {
1546 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1547 return(NULL);
1548 }
1549 memset(input, 0, sizeof(xmlParserInput));
1550 input->line = 1;
1551 input->col = 1;
1552
1553 /*
1554 * If the context is NULL the id cannot be initialized, but that
1555 * should not happen while parsing which is the situation where
1556 * the id is actually needed.
1557 */
1558 if (ctxt != NULL) {
1559 if (input->id >= INT_MAX) {
1560 xmlErrMemory(ctxt, "Input ID overflow\n");
1561 return(NULL);
1562 }
1563 input->id = ctxt->input_id++;
1564 }
1565
1566 return(input);
1567}
1568
1569/**
1570 * xmlNewIOInputStream:
1571 * @ctxt: an XML parser context
1572 * @input: an I/O Input
1573 * @enc: the charset encoding if known
1574 *
1575 * Create a new input stream structure encapsulating the @input into
1576 * a stream suitable for the parser.
1577 *
1578 * Returns the new input stream or NULL
1579 */
1580xmlParserInputPtr
1581xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1582 xmlCharEncoding enc) {
1583 xmlParserInputPtr inputStream;
1584
1585 if (input == NULL) return(NULL);
1586 if (xmlParserDebugEntities)
1587 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1588 inputStream = xmlNewInputStream(ctxt);
1589 if (inputStream == NULL) {
1590 return(NULL);
1591 }
1592 inputStream->filename = NULL;
1593 inputStream->buf = input;
1594 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1595
1596 if (enc != XML_CHAR_ENCODING_NONE) {
1597 xmlSwitchEncoding(ctxt, enc);
1598 }
1599
1600 return(inputStream);
1601}
1602
1603/**
1604 * xmlNewEntityInputStream:
1605 * @ctxt: an XML parser context
1606 * @entity: an Entity pointer
1607 *
1608 * DEPRECATED: Internal function, do not use.
1609 *
1610 * Create a new input stream based on an xmlEntityPtr
1611 *
1612 * Returns the new input stream or NULL
1613 */
1614xmlParserInputPtr
1615xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1616 xmlParserInputPtr input;
1617
1618 if (entity == NULL) {
1619 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1620 NULL);
1621 return(NULL);
1622 }
1623 if (xmlParserDebugEntities)
1624 xmlGenericError(xmlGenericErrorContext,
1625 "new input from entity: %s\n", entity->name);
1626 if (entity->content == NULL) {
1627 switch (entity->etype) {
1628 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1629 xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1630 entity->name);
1631 break;
1632 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1633 case XML_EXTERNAL_PARAMETER_ENTITY:
1634 input = xmlLoadExternalEntity((char *) entity->URI,
1635 (char *) entity->ExternalID, ctxt);
1636 if (input != NULL)
1637 input->entity = entity;
1638 return(input);
1639 case XML_INTERNAL_GENERAL_ENTITY:
1640 xmlErrInternal(ctxt,
1641 "Internal entity %s without content !\n",
1642 entity->name);
1643 break;
1644 case XML_INTERNAL_PARAMETER_ENTITY:
1645 xmlErrInternal(ctxt,
1646 "Internal parameter entity %s without content !\n",
1647 entity->name);
1648 break;
1649 case XML_INTERNAL_PREDEFINED_ENTITY:
1650 xmlErrInternal(ctxt,
1651 "Predefined entity %s without content !\n",
1652 entity->name);
1653 break;
1654 }
1655 return(NULL);
1656 }
1657 input = xmlNewInputStream(ctxt);
1658 if (input == NULL) {
1659 return(NULL);
1660 }
1661 if (entity->URI != NULL)
1662 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1663 input->base = entity->content;
1664 if (entity->length == 0)
1665 entity->length = xmlStrlen(entity->content);
1666 input->cur = entity->content;
1667 input->length = entity->length;
1668 input->end = &entity->content[input->length];
1669 input->entity = entity;
1670 return(input);
1671}
1672
1673/**
1674 * xmlNewStringInputStream:
1675 * @ctxt: an XML parser context
1676 * @buffer: an memory buffer
1677 *
1678 * Create a new input stream based on a memory buffer.
1679 * Returns the new input stream
1680 */
1681xmlParserInputPtr
1682xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1683 xmlParserInputPtr input;
1684 xmlParserInputBufferPtr buf;
1685
1686 if (buffer == NULL) {
1687 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1688 NULL);
1689 return(NULL);
1690 }
1691 if (xmlParserDebugEntities)
1692 xmlGenericError(xmlGenericErrorContext,
1693 "new fixed input: %.30s\n", buffer);
1694 buf = xmlParserInputBufferCreateString(buffer);
1695 if (buf == NULL) {
1696 xmlErrMemory(ctxt, NULL);
1697 return(NULL);
1698 }
1699 input = xmlNewInputStream(ctxt);
1700 if (input == NULL) {
1701 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1702 xmlFreeParserInputBuffer(buf);
1703 return(NULL);
1704 }
1705 input->buf = buf;
1706 xmlBufResetInput(input->buf->buffer, input);
1707 return(input);
1708}
1709
1710/**
1711 * xmlNewInputFromFile:
1712 * @ctxt: an XML parser context
1713 * @filename: the filename to use as entity
1714 *
1715 * Create a new input stream based on a file or an URL.
1716 *
1717 * Returns the new input stream or NULL in case of error
1718 */
1719xmlParserInputPtr
1720xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1721 xmlParserInputBufferPtr buf;
1722 xmlParserInputPtr inputStream;
1723 char *directory = NULL;
1724 xmlChar *URI = NULL;
1725
1726 if (xmlParserDebugEntities)
1727 xmlGenericError(xmlGenericErrorContext,
1728 "new input from file: %s\n", filename);
1729 if (ctxt == NULL) return(NULL);
1730 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1731 if (buf == NULL) {
1732 if (filename == NULL)
1733 __xmlLoaderErr(ctxt,
1734 "failed to load external entity: NULL filename \n",
1735 NULL);
1736 else
1737 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1738 (const char *) filename);
1739 return(NULL);
1740 }
1741
1742 inputStream = xmlNewInputStream(ctxt);
1743 if (inputStream == NULL) {
1744 xmlFreeParserInputBuffer(buf);
1745 return(NULL);
1746 }
1747
1748 inputStream->buf = buf;
1749 inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1750 if (inputStream == NULL)
1751 return(NULL);
1752
1753 if (inputStream->filename == NULL)
1754 URI = xmlStrdup((xmlChar *) filename);
1755 else
1756 URI = xmlStrdup((xmlChar *) inputStream->filename);
1757 directory = xmlParserGetDirectory((const char *) URI);
1758 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1759 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1760 if (URI != NULL) xmlFree((char *) URI);
1761 inputStream->directory = directory;
1762
1763 xmlBufResetInput(inputStream->buf->buffer, inputStream);
1764 if ((ctxt->directory == NULL) && (directory != NULL))
1765 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1766 return(inputStream);
1767}
1768
1769/************************************************************************
1770 * *
1771 * Commodity functions to handle parser contexts *
1772 * *
1773 ************************************************************************/
1774
1775/**
1776 * xmlInitSAXParserCtxt:
1777 * @ctxt: XML parser context
1778 * @sax: SAX handlert
1779 * @userData: user data
1780 *
1781 * Initialize a SAX parser context
1782 *
1783 * Returns 0 in case of success and -1 in case of error
1784 */
1785
1786static int
1787xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1788 void *userData)
1789{
1790 xmlParserInputPtr input;
1791
1792 if(ctxt==NULL) {
1793 xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1794 return(-1);
1795 }
1796
1797 xmlInitParser();
1798
1799 if (ctxt->dict == NULL)
1800 ctxt->dict = xmlDictCreate();
1801 if (ctxt->dict == NULL) {
1802 xmlErrMemory(NULL, "cannot initialize parser context\n");
1803 return(-1);
1804 }
1805 xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1806
1807 if (ctxt->sax == NULL)
1808 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1809 if (ctxt->sax == NULL) {
1810 xmlErrMemory(NULL, "cannot initialize parser context\n");
1811 return(-1);
1812 }
1813 if (sax == NULL) {
1814 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1815 xmlSAXVersion(ctxt->sax, 2);
1816 ctxt->userData = ctxt;
1817 } else {
1818 if (sax->initialized == XML_SAX2_MAGIC) {
1819 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1820 } else {
1821 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1822 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1823 }
1824 ctxt->userData = userData ? userData : ctxt;
1825 }
1826
1827 ctxt->maxatts = 0;
1828 ctxt->atts = NULL;
1829 /* Allocate the Input stack */
1830 if (ctxt->inputTab == NULL) {
1831 ctxt->inputTab = (xmlParserInputPtr *)
1832 xmlMalloc(5 * sizeof(xmlParserInputPtr));
1833 ctxt->inputMax = 5;
1834 }
1835 if (ctxt->inputTab == NULL) {
1836 xmlErrMemory(NULL, "cannot initialize parser context\n");
1837 ctxt->inputNr = 0;
1838 ctxt->inputMax = 0;
1839 ctxt->input = NULL;
1840 return(-1);
1841 }
1842 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1843 xmlFreeInputStream(input);
1844 }
1845 ctxt->inputNr = 0;
1846 ctxt->input = NULL;
1847
1848 ctxt->version = NULL;
1849 ctxt->encoding = NULL;
1850 ctxt->standalone = -1;
1851 ctxt->hasExternalSubset = 0;
1852 ctxt->hasPErefs = 0;
1853 ctxt->html = 0;
1854 ctxt->external = 0;
1855 ctxt->instate = XML_PARSER_START;
1856 ctxt->token = 0;
1857 ctxt->directory = NULL;
1858
1859 /* Allocate the Node stack */
1860 if (ctxt->nodeTab == NULL) {
1861 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1862 ctxt->nodeMax = 10;
1863 }
1864 if (ctxt->nodeTab == NULL) {
1865 xmlErrMemory(NULL, "cannot initialize parser context\n");
1866 ctxt->nodeNr = 0;
1867 ctxt->nodeMax = 0;
1868 ctxt->node = NULL;
1869 ctxt->inputNr = 0;
1870 ctxt->inputMax = 0;
1871 ctxt->input = NULL;
1872 return(-1);
1873 }
1874 ctxt->nodeNr = 0;
1875 ctxt->node = NULL;
1876
1877 /* Allocate the Name stack */
1878 if (ctxt->nameTab == NULL) {
1879 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1880 ctxt->nameMax = 10;
1881 }
1882 if (ctxt->nameTab == NULL) {
1883 xmlErrMemory(NULL, "cannot initialize parser context\n");
1884 ctxt->nodeNr = 0;
1885 ctxt->nodeMax = 0;
1886 ctxt->node = NULL;
1887 ctxt->inputNr = 0;
1888 ctxt->inputMax = 0;
1889 ctxt->input = NULL;
1890 ctxt->nameNr = 0;
1891 ctxt->nameMax = 0;
1892 ctxt->name = NULL;
1893 return(-1);
1894 }
1895 ctxt->nameNr = 0;
1896 ctxt->name = NULL;
1897
1898 /* Allocate the space stack */
1899 if (ctxt->spaceTab == NULL) {
1900 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1901 ctxt->spaceMax = 10;
1902 }
1903 if (ctxt->spaceTab == NULL) {
1904 xmlErrMemory(NULL, "cannot initialize parser context\n");
1905 ctxt->nodeNr = 0;
1906 ctxt->nodeMax = 0;
1907 ctxt->node = NULL;
1908 ctxt->inputNr = 0;
1909 ctxt->inputMax = 0;
1910 ctxt->input = NULL;
1911 ctxt->nameNr = 0;
1912 ctxt->nameMax = 0;
1913 ctxt->name = NULL;
1914 ctxt->spaceNr = 0;
1915 ctxt->spaceMax = 0;
1916 ctxt->space = NULL;
1917 return(-1);
1918 }
1919 ctxt->spaceNr = 1;
1920 ctxt->spaceMax = 10;
1921 ctxt->spaceTab[0] = -1;
1922 ctxt->space = &ctxt->spaceTab[0];
1923 ctxt->myDoc = NULL;
1924 ctxt->wellFormed = 1;
1925 ctxt->nsWellFormed = 1;
1926 ctxt->valid = 1;
1927 ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1928 if (ctxt->loadsubset) {
1929 ctxt->options |= XML_PARSE_DTDLOAD;
1930 }
1931 ctxt->validate = xmlDoValidityCheckingDefaultValue;
1932 ctxt->pedantic = xmlPedanticParserDefaultValue;
1933 if (ctxt->pedantic) {
1934 ctxt->options |= XML_PARSE_PEDANTIC;
1935 }
1936 ctxt->linenumbers = xmlLineNumbersDefaultValue;
1937 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1938 if (ctxt->keepBlanks == 0) {
1939 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1940 ctxt->options |= XML_PARSE_NOBLANKS;
1941 }
1942
1943 ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1944 ctxt->vctxt.userData = ctxt;
1945 ctxt->vctxt.error = xmlParserValidityError;
1946 ctxt->vctxt.warning = xmlParserValidityWarning;
1947 if (ctxt->validate) {
1948 if (xmlGetWarningsDefaultValue == 0)
1949 ctxt->vctxt.warning = NULL;
1950 else
1951 ctxt->vctxt.warning = xmlParserValidityWarning;
1952 ctxt->vctxt.nodeMax = 0;
1953 ctxt->options |= XML_PARSE_DTDVALID;
1954 }
1955 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1956 if (ctxt->replaceEntities) {
1957 ctxt->options |= XML_PARSE_NOENT;
1958 }
1959 ctxt->record_info = 0;
1960 ctxt->checkIndex = 0;
1961 ctxt->inSubset = 0;
1962 ctxt->errNo = XML_ERR_OK;
1963 ctxt->depth = 0;
1964 ctxt->catalogs = NULL;
1965 ctxt->sizeentities = 0;
1966 ctxt->sizeentcopy = 0;
1967 ctxt->input_id = 1;
1968 ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
1969 xmlInitNodeInfoSeq(&ctxt->node_seq);
1970
1971 if (ctxt->nsdb == NULL) {
1972 ctxt->nsdb = xmlParserNsCreate();
1973 if (ctxt->nsdb == NULL) {
1974 xmlErrMemory(ctxt, NULL);
1975 return(-1);
1976 }
1977 }
1978
1979 return(0);
1980}
1981
1982/**
1983 * xmlInitParserCtxt:
1984 * @ctxt: an XML parser context
1985 *
1986 * DEPRECATED: Internal function which will be made private in a future
1987 * version.
1988 *
1989 * Initialize a parser context
1990 *
1991 * Returns 0 in case of success and -1 in case of error
1992 */
1993
1994int
1995xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1996{
1997 return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1998}
1999
2000/**
2001 * xmlFreeParserCtxt:
2002 * @ctxt: an XML parser context
2003 *
2004 * Free all the memory used by a parser context. However the parsed
2005 * document in ctxt->myDoc is not freed.
2006 */
2007
2008void
2009xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2010{
2011 xmlParserInputPtr input;
2012
2013 if (ctxt == NULL) return;
2014
2015 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2016 xmlFreeInputStream(input);
2017 }
2018 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2019 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2020 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2021 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2022 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2023 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2024 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2025 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2026 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2027#ifdef LIBXML_SAX1_ENABLED
2028 if ((ctxt->sax != NULL) &&
2029 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2030#else
2031 if (ctxt->sax != NULL)
2032#endif /* LIBXML_SAX1_ENABLED */
2033 xmlFree(ctxt->sax);
2034 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2035 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2036 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2037 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2038 if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2039 if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2040 if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2041 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2042 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2043 if (ctxt->attsDefault != NULL)
2044 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2045 if (ctxt->attsSpecial != NULL)
2046 xmlHashFree(ctxt->attsSpecial, NULL);
2047 if (ctxt->freeElems != NULL) {
2048 xmlNodePtr cur, next;
2049
2050 cur = ctxt->freeElems;
2051 while (cur != NULL) {
2052 next = cur->next;
2053 xmlFree(cur);
2054 cur = next;
2055 }
2056 }
2057 if (ctxt->freeAttrs != NULL) {
2058 xmlAttrPtr cur, next;
2059
2060 cur = ctxt->freeAttrs;
2061 while (cur != NULL) {
2062 next = cur->next;
2063 xmlFree(cur);
2064 cur = next;
2065 }
2066 }
2067 /*
2068 * cleanup the error strings
2069 */
2070 if (ctxt->lastError.message != NULL)
2071 xmlFree(ctxt->lastError.message);
2072 if (ctxt->lastError.file != NULL)
2073 xmlFree(ctxt->lastError.file);
2074 if (ctxt->lastError.str1 != NULL)
2075 xmlFree(ctxt->lastError.str1);
2076 if (ctxt->lastError.str2 != NULL)
2077 xmlFree(ctxt->lastError.str2);
2078 if (ctxt->lastError.str3 != NULL)
2079 xmlFree(ctxt->lastError.str3);
2080
2081#ifdef LIBXML_CATALOG_ENABLED
2082 if (ctxt->catalogs != NULL)
2083 xmlCatalogFreeLocal(ctxt->catalogs);
2084#endif
2085 xmlFree(ctxt);
2086}
2087
2088/**
2089 * xmlNewParserCtxt:
2090 *
2091 * Allocate and initialize a new parser context.
2092 *
2093 * Returns the xmlParserCtxtPtr or NULL
2094 */
2095
2096xmlParserCtxtPtr
2097xmlNewParserCtxt(void)
2098{
2099 return(xmlNewSAXParserCtxt(NULL, NULL));
2100}
2101
2102/**
2103 * xmlNewSAXParserCtxt:
2104 * @sax: SAX handler
2105 * @userData: user data
2106 *
2107 * Allocate and initialize a new SAX parser context. If userData is NULL,
2108 * the parser context will be passed as user data.
2109 *
2110 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2111 */
2112
2113xmlParserCtxtPtr
2114xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2115{
2116 xmlParserCtxtPtr ctxt;
2117
2118 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2119 if (ctxt == NULL) {
2120 xmlErrMemory(NULL, "cannot allocate parser context\n");
2121 return(NULL);
2122 }
2123 memset(ctxt, 0, sizeof(xmlParserCtxt));
2124 if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2125 xmlFreeParserCtxt(ctxt);
2126 return(NULL);
2127 }
2128 return(ctxt);
2129}
2130
2131/************************************************************************
2132 * *
2133 * Handling of node information *
2134 * *
2135 ************************************************************************/
2136
2137/**
2138 * xmlClearParserCtxt:
2139 * @ctxt: an XML parser context
2140 *
2141 * Clear (release owned resources) and reinitialize a parser context
2142 */
2143
2144void
2145xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2146{
2147 if (ctxt==NULL)
2148 return;
2149 xmlClearNodeInfoSeq(&ctxt->node_seq);
2150 xmlCtxtReset(ctxt);
2151}
2152
2153
2154/**
2155 * xmlParserFindNodeInfo:
2156 * @ctx: an XML parser context
2157 * @node: an XML node within the tree
2158 *
2159 * DEPRECATED: Don't use.
2160 *
2161 * Find the parser node info struct for a given node
2162 *
2163 * Returns an xmlParserNodeInfo block pointer or NULL
2164 */
2165const xmlParserNodeInfo *
2166xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
2167{
2168 unsigned long pos;
2169
2170 if ((ctx == NULL) || (node == NULL))
2171 return (NULL);
2172 /* Find position where node should be at */
2173 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2174 if (pos < ctx->node_seq.length
2175 && ctx->node_seq.buffer[pos].node == node)
2176 return &ctx->node_seq.buffer[pos];
2177 else
2178 return NULL;
2179}
2180
2181
2182/**
2183 * xmlInitNodeInfoSeq:
2184 * @seq: a node info sequence pointer
2185 *
2186 * DEPRECATED: Don't use.
2187 *
2188 * -- Initialize (set to initial state) node info sequence
2189 */
2190void
2191xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2192{
2193 if (seq == NULL)
2194 return;
2195 seq->length = 0;
2196 seq->maximum = 0;
2197 seq->buffer = NULL;
2198}
2199
2200/**
2201 * xmlClearNodeInfoSeq:
2202 * @seq: a node info sequence pointer
2203 *
2204 * DEPRECATED: Don't use.
2205 *
2206 * -- Clear (release memory and reinitialize) node
2207 * info sequence
2208 */
2209void
2210xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2211{
2212 if (seq == NULL)
2213 return;
2214 if (seq->buffer != NULL)
2215 xmlFree(seq->buffer);
2216 xmlInitNodeInfoSeq(seq);
2217}
2218
2219/**
2220 * xmlParserFindNodeInfoIndex:
2221 * @seq: a node info sequence pointer
2222 * @node: an XML node pointer
2223 *
2224 * DEPRECATED: Don't use.
2225 *
2226 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2227 * the given node is or should be at in a sorted sequence
2228 *
2229 * Returns a long indicating the position of the record
2230 */
2231unsigned long
2232xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2233 const xmlNodePtr node)
2234{
2235 unsigned long upper, lower, middle;
2236 int found = 0;
2237
2238 if ((seq == NULL) || (node == NULL))
2239 return ((unsigned long) -1);
2240
2241 /* Do a binary search for the key */
2242 lower = 1;
2243 upper = seq->length;
2244 middle = 0;
2245 while (lower <= upper && !found) {
2246 middle = lower + (upper - lower) / 2;
2247 if (node == seq->buffer[middle - 1].node)
2248 found = 1;
2249 else if (node < seq->buffer[middle - 1].node)
2250 upper = middle - 1;
2251 else
2252 lower = middle + 1;
2253 }
2254
2255 /* Return position */
2256 if (middle == 0 || seq->buffer[middle - 1].node < node)
2257 return middle;
2258 else
2259 return middle - 1;
2260}
2261
2262
2263/**
2264 * xmlParserAddNodeInfo:
2265 * @ctxt: an XML parser context
2266 * @info: a node info sequence pointer
2267 *
2268 * DEPRECATED: Don't use.
2269 *
2270 * Insert node info record into the sorted sequence
2271 */
2272void
2273xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2274 const xmlParserNodeInfoPtr info)
2275{
2276 unsigned long pos;
2277
2278 if ((ctxt == NULL) || (info == NULL)) return;
2279
2280 /* Find pos and check to see if node is already in the sequence */
2281 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2282 info->node);
2283
2284 if ((pos < ctxt->node_seq.length) &&
2285 (ctxt->node_seq.buffer != NULL) &&
2286 (ctxt->node_seq.buffer[pos].node == info->node)) {
2287 ctxt->node_seq.buffer[pos] = *info;
2288 }
2289
2290 /* Otherwise, we need to add new node to buffer */
2291 else {
2292 if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2293 (ctxt->node_seq.buffer == NULL)) {
2294 xmlParserNodeInfo *tmp_buffer;
2295 unsigned int byte_size;
2296
2297 if (ctxt->node_seq.maximum == 0)
2298 ctxt->node_seq.maximum = 2;
2299 byte_size = (sizeof(*ctxt->node_seq.buffer) *
2300 (2 * ctxt->node_seq.maximum));
2301
2302 if (ctxt->node_seq.buffer == NULL)
2303 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2304 else
2305 tmp_buffer =
2306 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2307 byte_size);
2308
2309 if (tmp_buffer == NULL) {
2310 xmlErrMemory(ctxt, "failed to allocate buffer\n");
2311 return;
2312 }
2313 ctxt->node_seq.buffer = tmp_buffer;
2314 ctxt->node_seq.maximum *= 2;
2315 }
2316
2317 /* If position is not at end, move elements out of the way */
2318 if (pos != ctxt->node_seq.length) {
2319 unsigned long i;
2320
2321 for (i = ctxt->node_seq.length; i > pos; i--)
2322 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2323 }
2324
2325 /* Copy element and increase length */
2326 ctxt->node_seq.buffer[pos] = *info;
2327 ctxt->node_seq.length++;
2328 }
2329}
2330
2331/************************************************************************
2332 * *
2333 * Defaults settings *
2334 * *
2335 ************************************************************************/
2336/**
2337 * xmlPedanticParserDefault:
2338 * @val: int 0 or 1
2339 *
2340 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2341 *
2342 * Set and return the previous value for enabling pedantic warnings.
2343 *
2344 * Returns the last value for 0 for no substitution, 1 for substitution.
2345 */
2346
2347int
2348xmlPedanticParserDefault(int val) {
2349 int old = xmlPedanticParserDefaultValue;
2350
2351 xmlPedanticParserDefaultValue = val;
2352 return(old);
2353}
2354
2355/**
2356 * xmlLineNumbersDefault:
2357 * @val: int 0 or 1
2358 *
2359 * DEPRECATED: The modern options API always enables line numbers.
2360 *
2361 * Set and return the previous value for enabling line numbers in elements
2362 * contents. This may break on old application and is turned off by default.
2363 *
2364 * Returns the last value for 0 for no substitution, 1 for substitution.
2365 */
2366
2367int
2368xmlLineNumbersDefault(int val) {
2369 int old = xmlLineNumbersDefaultValue;
2370
2371 xmlLineNumbersDefaultValue = val;
2372 return(old);
2373}
2374
2375/**
2376 * xmlSubstituteEntitiesDefault:
2377 * @val: int 0 or 1
2378 *
2379 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2380 *
2381 * Set and return the previous value for default entity support.
2382 * Initially the parser always keep entity references instead of substituting
2383 * entity values in the output. This function has to be used to change the
2384 * default parser behavior
2385 * SAX::substituteEntities() has to be used for changing that on a file by
2386 * file basis.
2387 *
2388 * Returns the last value for 0 for no substitution, 1 for substitution.
2389 */
2390
2391int
2392xmlSubstituteEntitiesDefault(int val) {
2393 int old = xmlSubstituteEntitiesDefaultValue;
2394
2395 xmlSubstituteEntitiesDefaultValue = val;
2396 return(old);
2397}
2398
2399/**
2400 * xmlKeepBlanksDefault:
2401 * @val: int 0 or 1
2402 *
2403 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2404 *
2405 * Set and return the previous value for default blanks text nodes support.
2406 * The 1.x version of the parser used an heuristic to try to detect
2407 * ignorable white spaces. As a result the SAX callback was generating
2408 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2409 * using the DOM output text nodes containing those blanks were not generated.
2410 * The 2.x and later version will switch to the XML standard way and
2411 * ignorableWhitespace() are only generated when running the parser in
2412 * validating mode and when the current element doesn't allow CDATA or
2413 * mixed content.
2414 * This function is provided as a way to force the standard behavior
2415 * on 1.X libs and to switch back to the old mode for compatibility when
2416 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2417 * by using xmlIsBlankNode() commodity function to detect the "empty"
2418 * nodes generated.
2419 * This value also affect autogeneration of indentation when saving code
2420 * if blanks sections are kept, indentation is not generated.
2421 *
2422 * Returns the last value for 0 for no substitution, 1 for substitution.
2423 */
2424
2425int
2426xmlKeepBlanksDefault(int val) {
2427 int old = xmlKeepBlanksDefaultValue;
2428
2429 xmlKeepBlanksDefaultValue = val;
2430#ifdef LIBXML_OUTPUT_ENABLED
2431 if (!val)
2432 xmlIndentTreeOutput = 1;
2433#endif
2434 return(old);
2435}
2436
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette