VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.4/parser.c@ 70579

最後變更 在這個檔案從70579是 65950,由 vboxsync 提交於 8 年 前

libxml 2.9.4: fix export

  • 屬性 svn:eol-style 設為 native
檔案大小: 430.7 KB
 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <limits.h>
44#include <string.h>
45#include <stdarg.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/threads.h>
48#include <libxml/globals.h>
49#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
58#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
61#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
65#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
83#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
86
87#include "buf.h"
88#include "enc.h"
89
90static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
99/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127 xmlEntityPtr ent, size_t replacement)
128{
129 size_t consumed = 0;
130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
148 ++ctxt->depth;
149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
151 --ctxt->depth;
152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
153 ent->content[0] = 0;
154 }
155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
204 size = ent->checked / 2;
205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
223 * strange we got no data for checking
224 */
225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
229 }
230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
234/**
235 * xmlParserMaxDepth:
236 *
237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
241 */
242unsigned int xmlParserMaxDepth = 256;
243
244
245
246#define SAX2 1
247#define XML_PARSER_BIG_BUFFER_SIZE 300
248#define XML_PARSER_BUFFER_SIZE 100
249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
262/*
263 * List of XML prefixed PI allowed by W3C specs
264 */
265
266static const char *xmlW3CPIs[] = {
267 "xml-stylesheet",
268 "xml-model",
269 NULL
270};
271
272
273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
276
277static xmlParserErrors
278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
280 void *user_data, int depth, const xmlChar *URL,
281 const xmlChar *ID, xmlNodePtr *list);
282
283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
286#ifdef LIBXML_LEGACY_ENABLED
287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
290#endif /* LIBXML_LEGACY_ENABLED */
291
292static xmlParserErrors
293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
295
296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
299/************************************************************************
300 * *
301 * Some factorized error routines *
302 * *
303 ************************************************************************/
304
305/**
306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
322
323 if (prefix == NULL)
324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
328 else
329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
351{
352 const char *errmsg;
353
354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
359 errmsg = "CharRef: invalid hexadecimal value";
360 break;
361 case XML_ERR_INVALID_DEC_CHARREF:
362 errmsg = "CharRef: invalid decimal value";
363 break;
364 case XML_ERR_INVALID_CHARREF:
365 errmsg = "CharRef: invalid value";
366 break;
367 case XML_ERR_INTERNAL_ERROR:
368 errmsg = "internal error";
369 break;
370 case XML_ERR_PEREF_AT_EOF:
371 errmsg = "PEReference at end of document";
372 break;
373 case XML_ERR_PEREF_IN_PROLOG:
374 errmsg = "PEReference in prolog";
375 break;
376 case XML_ERR_PEREF_IN_EPILOG:
377 errmsg = "PEReference in epilog";
378 break;
379 case XML_ERR_PEREF_NO_NAME:
380 errmsg = "PEReference: no name";
381 break;
382 case XML_ERR_PEREF_SEMICOL_MISSING:
383 errmsg = "PEReference: expecting ';'";
384 break;
385 case XML_ERR_ENTITY_LOOP:
386 errmsg = "Detected an entity reference loop";
387 break;
388 case XML_ERR_ENTITY_NOT_STARTED:
389 errmsg = "EntityValue: \" or ' expected";
390 break;
391 case XML_ERR_ENTITY_PE_INTERNAL:
392 errmsg = "PEReferences forbidden in internal subset";
393 break;
394 case XML_ERR_ENTITY_NOT_FINISHED:
395 errmsg = "EntityValue: \" or ' expected";
396 break;
397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
398 errmsg = "AttValue: \" or ' expected";
399 break;
400 case XML_ERR_LT_IN_ATTRIBUTE:
401 errmsg = "Unescaped '<' not allowed in attributes values";
402 break;
403 case XML_ERR_LITERAL_NOT_STARTED:
404 errmsg = "SystemLiteral \" or ' expected";
405 break;
406 case XML_ERR_LITERAL_NOT_FINISHED:
407 errmsg = "Unfinished System or Public ID \" or ' expected";
408 break;
409 case XML_ERR_MISPLACED_CDATA_END:
410 errmsg = "Sequence ']]>' not allowed in content";
411 break;
412 case XML_ERR_URI_REQUIRED:
413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
414 break;
415 case XML_ERR_PUBID_REQUIRED:
416 errmsg = "PUBLIC, the Public Identifier is missing";
417 break;
418 case XML_ERR_HYPHEN_IN_COMMENT:
419 errmsg = "Comment must not contain '--' (double-hyphen)";
420 break;
421 case XML_ERR_PI_NOT_STARTED:
422 errmsg = "xmlParsePI : no target name";
423 break;
424 case XML_ERR_RESERVED_XML_NAME:
425 errmsg = "Invalid PI name";
426 break;
427 case XML_ERR_NOTATION_NOT_STARTED:
428 errmsg = "NOTATION: Name expected here";
429 break;
430 case XML_ERR_NOTATION_NOT_FINISHED:
431 errmsg = "'>' required to close NOTATION declaration";
432 break;
433 case XML_ERR_VALUE_REQUIRED:
434 errmsg = "Entity value required";
435 break;
436 case XML_ERR_URI_FRAGMENT:
437 errmsg = "Fragment not allowed";
438 break;
439 case XML_ERR_ATTLIST_NOT_STARTED:
440 errmsg = "'(' required to start ATTLIST enumeration";
441 break;
442 case XML_ERR_NMTOKEN_REQUIRED:
443 errmsg = "NmToken expected in ATTLIST enumeration";
444 break;
445 case XML_ERR_ATTLIST_NOT_FINISHED:
446 errmsg = "')' required to finish ATTLIST enumeration";
447 break;
448 case XML_ERR_MIXED_NOT_STARTED:
449 errmsg = "MixedContentDecl : '|' or ')*' expected";
450 break;
451 case XML_ERR_PCDATA_REQUIRED:
452 errmsg = "MixedContentDecl : '#PCDATA' expected";
453 break;
454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
455 errmsg = "ContentDecl : Name or '(' expected";
456 break;
457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
458 errmsg = "ContentDecl : ',' '|' or ')' expected";
459 break;
460 case XML_ERR_PEREF_IN_INT_SUBSET:
461 errmsg =
462 "PEReference: forbidden within markup decl in internal subset";
463 break;
464 case XML_ERR_GT_REQUIRED:
465 errmsg = "expected '>'";
466 break;
467 case XML_ERR_CONDSEC_INVALID:
468 errmsg = "XML conditional section '[' expected";
469 break;
470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
471 errmsg = "Content error in the external subset";
472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
475 "conditional section INCLUDE or IGNORE keyword expected";
476 break;
477 case XML_ERR_CONDSEC_NOT_FINISHED:
478 errmsg = "XML conditional section not closed";
479 break;
480 case XML_ERR_XMLDECL_NOT_STARTED:
481 errmsg = "Text declaration '<?xml' required";
482 break;
483 case XML_ERR_XMLDECL_NOT_FINISHED:
484 errmsg = "parsing XML declaration: '?>' expected";
485 break;
486 case XML_ERR_EXT_ENTITY_STANDALONE:
487 errmsg = "external parsed entities cannot be standalone";
488 break;
489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
490 errmsg = "EntityRef: expecting ';'";
491 break;
492 case XML_ERR_DOCTYPE_NOT_FINISHED:
493 errmsg = "DOCTYPE improperly terminated";
494 break;
495 case XML_ERR_LTSLASH_REQUIRED:
496 errmsg = "EndTag: '</' not found";
497 break;
498 case XML_ERR_EQUAL_REQUIRED:
499 errmsg = "expected '='";
500 break;
501 case XML_ERR_STRING_NOT_CLOSED:
502 errmsg = "String not closed expecting \" or '";
503 break;
504 case XML_ERR_STRING_NOT_STARTED:
505 errmsg = "String not started expecting ' or \"";
506 break;
507 case XML_ERR_ENCODING_NAME:
508 errmsg = "Invalid XML encoding name";
509 break;
510 case XML_ERR_STANDALONE_VALUE:
511 errmsg = "standalone accepts only 'yes' or 'no'";
512 break;
513 case XML_ERR_DOCUMENT_EMPTY:
514 errmsg = "Document is empty";
515 break;
516 case XML_ERR_DOCUMENT_END:
517 errmsg = "Extra content at the end of the document";
518 break;
519 case XML_ERR_NOT_WELL_BALANCED:
520 errmsg = "chunk is not well balanced";
521 break;
522 case XML_ERR_EXTRA_CONTENT:
523 errmsg = "extra content at the end of well balanced chunk";
524 break;
525 case XML_ERR_VERSION_MISSING:
526 errmsg = "Malformed declaration expecting version";
527 break;
528 case XML_ERR_NAME_TOO_LONG:
529 errmsg = "Name too long use XML_PARSE_HUGE option";
530 break;
531#if 0
532 case:
533 errmsg = "";
534 break;
535#endif
536 default:
537 errmsg = "Unregistered error message";
538 }
539 if (ctxt != NULL)
540 ctxt->errNo = error;
541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
565static void LIBXML_ATTR_FORMAT(3,0)
566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
568{
569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
572 if (ctxt != NULL)
573 ctxt->errNo = error;
574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
581}
582
583/**
584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
593static void LIBXML_ATTR_FORMAT(3,0)
594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
597 xmlStructuredErrorFunc schannel = NULL;
598
599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
604 schannel = ctxt->sax->serror;
605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
629 * Handle a validity error.
630 */
631static void LIBXML_ATTR_FORMAT(3,0)
632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, const xmlChar *str2)
634{
635 xmlStructuredErrorFunc schannel = NULL;
636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
647 ctxt->vctxt.error, ctxt->vctxt.userData,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
652 ctxt->valid = 0;
653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
659 }
660}
661
662/**
663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
671static void LIBXML_ATTR_FORMAT(3,0)
672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
673 const char *msg, int val)
674{
675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
678 if (ctxt != NULL)
679 ctxt->errNo = error;
680 __xmlRaiseError(NULL, NULL, NULL,
681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
688}
689
690/**
691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
701static void LIBXML_ATTR_FORMAT(3,0)
702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
703 const char *msg, const xmlChar *str1, int val,
704 const xmlChar *str2)
705{
706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
709 if (ctxt != NULL)
710 ctxt->errNo = error;
711 __xmlRaiseError(NULL, NULL, NULL,
712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
720}
721
722/**
723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
731static void LIBXML_ATTR_FORMAT(3,0)
732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
733 const char *msg, const xmlChar * val)
734{
735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
738 if (ctxt != NULL)
739 ctxt->errNo = error;
740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
749}
750
751/**
752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
760static void LIBXML_ATTR_FORMAT(3,0)
761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
767 if (ctxt != NULL)
768 ctxt->errNo = error;
769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
785static void LIBXML_ATTR_FORMAT(3,0)
786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
790{
791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
794 if (ctxt != NULL)
795 ctxt->errNo = error;
796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
802}
803
804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
812 * Handle a namespace warning error
813 */
814static void LIBXML_ATTR_FORMAT(3,0)
815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
829/************************************************************************
830 * *
831 * Library wide options *
832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
849 case XML_WITH_THREAD:
850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
855 case XML_WITH_TREE:
856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
861 case XML_WITH_OUTPUT:
862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
867 case XML_WITH_PUSH:
868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
873 case XML_WITH_READER:
874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
879 case XML_WITH_PATTERN:
880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
885 case XML_WITH_WRITER:
886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
891 case XML_WITH_SAX1:
892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
897 case XML_WITH_FTP:
898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
903 case XML_WITH_HTTP:
904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
909 case XML_WITH_VALID:
910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
915 case XML_WITH_HTML:
916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
921 case XML_WITH_LEGACY:
922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
927 case XML_WITH_C14N:
928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
933 case XML_WITH_CATALOG:
934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
939 case XML_WITH_XPATH:
940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
945 case XML_WITH_XPTR:
946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
951 case XML_WITH_XINCLUDE:
952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
957 case XML_WITH_ICONV:
958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
963 case XML_WITH_ISO8859X:
964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
969 case XML_WITH_UNICODE:
970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
975 case XML_WITH_REGEXP:
976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
981 case XML_WITH_AUTOMATA:
982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
987 case XML_WITH_EXPR:
988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
993 case XML_WITH_SCHEMAS:
994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
999 case XML_WITH_SCHEMATRON:
1000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
1005 case XML_WITH_MODULES:
1006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
1011 case XML_WITH_DEBUG:
1012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
1017 case XML_WITH_DEBUG_MEM:
1018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
1023 case XML_WITH_DEBUG_RUN:
1024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
1028#endif
1029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
1035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
1041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
1047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
1055 * SAX2 defaulted attributes handling *
1056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
1068#ifdef LIBXML_SAX1_ENABLED
1069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
1075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
1081 xmlErrMemory(ctxt, NULL);
1082 }
1083}
1084
1085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
1090 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1091};
1092
1093/**
1094 * xmlAttrNormalizeSpace:
1095 * @src: the source string
1096 * @dst: the target string
1097 *
1098 * Normalize the space in non CDATA attribute values:
1099 * If the attribute type is not CDATA, then the XML processor MUST further
1100 * process the normalized attribute value by discarding any leading and
1101 * trailing space (#x20) characters, and by replacing sequences of space
1102 * (#x20) characters by a single space (#x20) character.
1103 * Note that the size of dst need to be at least src, and if one doesn't need
1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105 * passing src as dst is just fine.
1106 *
1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108 * is needed.
1109 */
1110static xmlChar *
1111xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112{
1113 if ((src == NULL) || (dst == NULL))
1114 return(NULL);
1115
1116 while (*src == 0x20) src++;
1117 while (*src != 0) {
1118 if (*src == 0x20) {
1119 while (*src == 0x20) src++;
1120 if (*src != 0)
1121 *dst++ = 0x20;
1122 } else {
1123 *dst++ = *src++;
1124 }
1125 }
1126 *dst = 0;
1127 if (dst == src)
1128 return(NULL);
1129 return(dst);
1130}
1131
1132/**
1133 * xmlAttrNormalizeSpace2:
1134 * @src: the source string
1135 *
1136 * Normalize the space in non CDATA attribute values, a slightly more complex
1137 * front end to avoid allocation problems when running on attribute values
1138 * coming from the input.
1139 *
1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1141 * is needed.
1142 */
1143static const xmlChar *
1144xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1145{
1146 int i;
1147 int remove_head = 0;
1148 int need_realloc = 0;
1149 const xmlChar *cur;
1150
1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1152 return(NULL);
1153 i = *len;
1154 if (i <= 0)
1155 return(NULL);
1156
1157 cur = src;
1158 while (*cur == 0x20) {
1159 cur++;
1160 remove_head++;
1161 }
1162 while (*cur != 0) {
1163 if (*cur == 0x20) {
1164 cur++;
1165 if ((*cur == 0x20) || (*cur == 0)) {
1166 need_realloc = 1;
1167 break;
1168 }
1169 } else
1170 cur++;
1171 }
1172 if (need_realloc) {
1173 xmlChar *ret;
1174
1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1176 if (ret == NULL) {
1177 xmlErrMemory(ctxt, NULL);
1178 return(NULL);
1179 }
1180 xmlAttrNormalizeSpace(ret, ret);
1181 *len = (int) strlen((const char *)ret);
1182 return(ret);
1183 } else if (remove_head) {
1184 *len -= remove_head;
1185 memmove(src, src + remove_head, 1 + *len);
1186 return(src);
1187 }
1188 return(NULL);
1189}
1190
1191/**
1192 * xmlAddDefAttrs:
1193 * @ctxt: an XML parser context
1194 * @fullname: the element fullname
1195 * @fullattr: the attribute fullname
1196 * @value: the attribute value
1197 *
1198 * Add a defaulted attribute for an element
1199 */
1200static void
1201xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202 const xmlChar *fullname,
1203 const xmlChar *fullattr,
1204 const xmlChar *value) {
1205 xmlDefAttrsPtr defaults;
1206 int len;
1207 const xmlChar *name;
1208 const xmlChar *prefix;
1209
1210 /*
1211 * Allows to detect attribute redefinitions
1212 */
1213 if (ctxt->attsSpecial != NULL) {
1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1215 return;
1216 }
1217
1218 if (ctxt->attsDefault == NULL) {
1219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1220 if (ctxt->attsDefault == NULL)
1221 goto mem_error;
1222 }
1223
1224 /*
1225 * split the element name into prefix:localname , the string found
1226 * are within the DTD and then not associated to namespace names.
1227 */
1228 name = xmlSplitQName3(fullname, &len);
1229 if (name == NULL) {
1230 name = xmlDictLookup(ctxt->dict, fullname, -1);
1231 prefix = NULL;
1232 } else {
1233 name = xmlDictLookup(ctxt->dict, name, -1);
1234 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1235 }
1236
1237 /*
1238 * make sure there is some storage
1239 */
1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241 if (defaults == NULL) {
1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1243 (4 * 5) * sizeof(const xmlChar *));
1244 if (defaults == NULL)
1245 goto mem_error;
1246 defaults->nbAttrs = 0;
1247 defaults->maxAttrs = 4;
1248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 defaults, NULL) < 0) {
1250 xmlFree(defaults);
1251 goto mem_error;
1252 }
1253 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1254 xmlDefAttrsPtr temp;
1255
1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1258 if (temp == NULL)
1259 goto mem_error;
1260 defaults = temp;
1261 defaults->maxAttrs *= 2;
1262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1264 xmlFree(defaults);
1265 goto mem_error;
1266 }
1267 }
1268
1269 /*
1270 * Split the element name into prefix:localname , the string found
1271 * are within the DTD and hen not associated to namespace names.
1272 */
1273 name = xmlSplitQName3(fullattr, &len);
1274 if (name == NULL) {
1275 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1276 prefix = NULL;
1277 } else {
1278 name = xmlDictLookup(ctxt->dict, name, -1);
1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1280 }
1281
1282 defaults->values[5 * defaults->nbAttrs] = name;
1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1284 /* intern the string and precompute the end */
1285 len = xmlStrlen(value);
1286 value = xmlDictLookup(ctxt->dict, value, len);
1287 defaults->values[5 * defaults->nbAttrs + 2] = value;
1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1289 if (ctxt->external)
1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1291 else
1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1293 defaults->nbAttrs++;
1294
1295 return;
1296
1297mem_error:
1298 xmlErrMemory(ctxt, NULL);
1299 return;
1300}
1301
1302/**
1303 * xmlAddSpecialAttr:
1304 * @ctxt: an XML parser context
1305 * @fullname: the element fullname
1306 * @fullattr: the attribute fullname
1307 * @type: the attribute type
1308 *
1309 * Register this attribute type
1310 */
1311static void
1312xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 const xmlChar *fullname,
1314 const xmlChar *fullattr,
1315 int type)
1316{
1317 if (ctxt->attsSpecial == NULL) {
1318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1319 if (ctxt->attsSpecial == NULL)
1320 goto mem_error;
1321 }
1322
1323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1324 return;
1325
1326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327 (void *) (long) type);
1328 return;
1329
1330mem_error:
1331 xmlErrMemory(ctxt, NULL);
1332 return;
1333}
1334
1335/**
1336 * xmlCleanSpecialAttrCallback:
1337 *
1338 * Removes CDATA attributes from the special attribute table
1339 */
1340static void
1341xmlCleanSpecialAttrCallback(void *payload, void *data,
1342 const xmlChar *fullname, const xmlChar *fullattr,
1343 const xmlChar *unused ATTRIBUTE_UNUSED) {
1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1345
1346 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1348 }
1349}
1350
1351/**
1352 * xmlCleanSpecialAttr:
1353 * @ctxt: an XML parser context
1354 *
1355 * Trim the list of attributes defined to remove all those of type
1356 * CDATA as they are not special. This call should be done when finishing
1357 * to parse the DTD and before starting to parse the document root.
1358 */
1359static void
1360xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1361{
1362 if (ctxt->attsSpecial == NULL)
1363 return;
1364
1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1366
1367 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368 xmlHashFree(ctxt->attsSpecial, NULL);
1369 ctxt->attsSpecial = NULL;
1370 }
1371 return;
1372}
1373
1374/**
1375 * xmlCheckLanguageID:
1376 * @lang: pointer to the string value
1377 *
1378 * Checks that the value conforms to the LanguageID production:
1379 *
1380 * NOTE: this is somewhat deprecated, those productions were removed from
1381 * the XML Second edition.
1382 *
1383 * [33] LanguageID ::= Langcode ('-' Subcode)*
1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388 * [38] Subcode ::= ([a-z] | [A-Z])+
1389 *
1390 * The current REC reference the sucessors of RFC 1766, currently 5646
1391 *
1392 * http://www.rfc-editor.org/rfc/rfc5646.txt
1393 * langtag = language
1394 * ["-" script]
1395 * ["-" region]
1396 * *("-" variant)
1397 * *("-" extension)
1398 * ["-" privateuse]
1399 * language = 2*3ALPHA ; shortest ISO 639 code
1400 * ["-" extlang] ; sometimes followed by
1401 * ; extended language subtags
1402 * / 4ALPHA ; or reserved for future use
1403 * / 5*8ALPHA ; or registered language subtag
1404 *
1405 * extlang = 3ALPHA ; selected ISO 639 codes
1406 * *2("-" 3ALPHA) ; permanently reserved
1407 *
1408 * script = 4ALPHA ; ISO 15924 code
1409 *
1410 * region = 2ALPHA ; ISO 3166-1 code
1411 * / 3DIGIT ; UN M.49 code
1412 *
1413 * variant = 5*8alphanum ; registered variants
1414 * / (DIGIT 3alphanum)
1415 *
1416 * extension = singleton 1*("-" (2*8alphanum))
1417 *
1418 * ; Single alphanumerics
1419 * ; "x" reserved for private use
1420 * singleton = DIGIT ; 0 - 9
1421 * / %x41-57 ; A - W
1422 * / %x59-5A ; Y - Z
1423 * / %x61-77 ; a - w
1424 * / %x79-7A ; y - z
1425 *
1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427 * The parser below doesn't try to cope with extension or privateuse
1428 * that could be added but that's not interoperable anyway
1429 *
1430 * Returns 1 if correct 0 otherwise
1431 **/
1432int
1433xmlCheckLanguageID(const xmlChar * lang)
1434{
1435 const xmlChar *cur = lang, *nxt;
1436
1437 if (cur == NULL)
1438 return (0);
1439 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1440 ((cur[0] == 'I') && (cur[1] == '-')) ||
1441 ((cur[0] == 'x') && (cur[1] == '-')) ||
1442 ((cur[0] == 'X') && (cur[1] == '-'))) {
1443 /*
1444 * Still allow IANA code and user code which were coming
1445 * from the previous version of the XML-1.0 specification
1446 * it's deprecated but we should not fail
1447 */
1448 cur += 2;
1449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1450 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1451 cur++;
1452 return(cur[0] == 0);
1453 }
1454 nxt = cur;
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458 if (nxt - cur >= 4) {
1459 /*
1460 * Reserved
1461 */
1462 if ((nxt - cur > 8) || (nxt[0] != 0))
1463 return(0);
1464 return(1);
1465 }
1466 if (nxt - cur < 2)
1467 return(0);
1468 /* we got an ISO 639 code */
1469 if (nxt[0] == 0)
1470 return(1);
1471 if (nxt[0] != '-')
1472 return(0);
1473
1474 nxt++;
1475 cur = nxt;
1476 /* now we can have extlang or script or region or variant */
1477 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1478 goto region_m49;
1479
1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1482 nxt++;
1483 if (nxt - cur == 4)
1484 goto script;
1485 if (nxt - cur == 2)
1486 goto region;
1487 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1488 goto variant;
1489 if (nxt - cur != 3)
1490 return(0);
1491 /* we parsed an extlang */
1492 if (nxt[0] == 0)
1493 return(1);
1494 if (nxt[0] != '-')
1495 return(0);
1496
1497 nxt++;
1498 cur = nxt;
1499 /* now we can have script or region or variant */
1500 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1501 goto region_m49;
1502
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur == 2)
1507 goto region;
1508 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1509 goto variant;
1510 if (nxt - cur != 4)
1511 return(0);
1512 /* we parsed a script */
1513script:
1514 if (nxt[0] == 0)
1515 return(1);
1516 if (nxt[0] != '-')
1517 return(0);
1518
1519 nxt++;
1520 cur = nxt;
1521 /* now we can have region or variant */
1522 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1523 goto region_m49;
1524
1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1527 nxt++;
1528
1529 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1530 goto variant;
1531 if (nxt - cur != 2)
1532 return(0);
1533 /* we parsed a region */
1534region:
1535 if (nxt[0] == 0)
1536 return(1);
1537 if (nxt[0] != '-')
1538 return(0);
1539
1540 nxt++;
1541 cur = nxt;
1542 /* now we can just have a variant */
1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1545 nxt++;
1546
1547 if ((nxt - cur < 5) || (nxt - cur > 8))
1548 return(0);
1549
1550 /* we parsed a variant */
1551variant:
1552 if (nxt[0] == 0)
1553 return(1);
1554 if (nxt[0] != '-')
1555 return(0);
1556 /* extensions and private use subtags not checked */
1557 return (1);
1558
1559region_m49:
1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1562 nxt += 3;
1563 goto region;
1564 }
1565 return(0);
1566}
1567
1568/************************************************************************
1569 * *
1570 * Parser stacks related functions and macros *
1571 * *
1572 ************************************************************************/
1573
1574static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575 const xmlChar ** str);
1576
1577#ifdef SAX2
1578/**
1579 * nsPush:
1580 * @ctxt: an XML parser context
1581 * @prefix: the namespace prefix or NULL
1582 * @URL: the namespace name
1583 *
1584 * Pushes a new parser namespace on top of the ns stack
1585 *
1586 * Returns -1 in case of error, -2 if the namespace should be discarded
1587 * and the index in the stack otherwise.
1588 */
1589static int
1590nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1591{
1592 if (ctxt->options & XML_PARSE_NSCLEAN) {
1593 int i;
1594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1595 if (ctxt->nsTab[i] == prefix) {
1596 /* in scope */
1597 if (ctxt->nsTab[i + 1] == URL)
1598 return(-2);
1599 /* out of scope keep it */
1600 break;
1601 }
1602 }
1603 }
1604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1605 ctxt->nsMax = 10;
1606 ctxt->nsNr = 0;
1607 ctxt->nsTab = (const xmlChar **)
1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 if (ctxt->nsTab == NULL) {
1610 xmlErrMemory(ctxt, NULL);
1611 ctxt->nsMax = 0;
1612 return (-1);
1613 }
1614 } else if (ctxt->nsNr >= ctxt->nsMax) {
1615 const xmlChar ** tmp;
1616 ctxt->nsMax *= 2;
1617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1619 if (tmp == NULL) {
1620 xmlErrMemory(ctxt, NULL);
1621 ctxt->nsMax /= 2;
1622 return (-1);
1623 }
1624 ctxt->nsTab = tmp;
1625 }
1626 ctxt->nsTab[ctxt->nsNr++] = prefix;
1627 ctxt->nsTab[ctxt->nsNr++] = URL;
1628 return (ctxt->nsNr);
1629}
1630/**
1631 * nsPop:
1632 * @ctxt: an XML parser context
1633 * @nr: the number to pop
1634 *
1635 * Pops the top @nr parser prefix/namespace from the ns stack
1636 *
1637 * Returns the number of namespaces removed
1638 */
1639static int
1640nsPop(xmlParserCtxtPtr ctxt, int nr)
1641{
1642 int i;
1643
1644 if (ctxt->nsTab == NULL) return(0);
1645 if (ctxt->nsNr < nr) {
1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1647 nr = ctxt->nsNr;
1648 }
1649 if (ctxt->nsNr <= 0)
1650 return (0);
1651
1652 for (i = 0;i < nr;i++) {
1653 ctxt->nsNr--;
1654 ctxt->nsTab[ctxt->nsNr] = NULL;
1655 }
1656 return(nr);
1657}
1658#endif
1659
1660static int
1661xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662 const xmlChar **atts;
1663 int *attallocs;
1664 int maxatts;
1665
1666 if (ctxt->atts == NULL) {
1667 maxatts = 55; /* allow for 10 attrs by default */
1668 atts = (const xmlChar **)
1669 xmlMalloc(maxatts * sizeof(xmlChar *));
1670 if (atts == NULL) goto mem_error;
1671 ctxt->atts = atts;
1672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 if (attallocs == NULL) goto mem_error;
1674 ctxt->attallocs = attallocs;
1675 ctxt->maxatts = maxatts;
1676 } else if (nr + 5 > ctxt->maxatts) {
1677 maxatts = (nr + 5) * 2;
1678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 maxatts * sizeof(const xmlChar *));
1680 if (atts == NULL) goto mem_error;
1681 ctxt->atts = atts;
1682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 (maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
1686 ctxt->maxatts = maxatts;
1687 }
1688 return(ctxt->maxatts);
1689mem_error:
1690 xmlErrMemory(ctxt, NULL);
1691 return(-1);
1692}
1693
1694/**
1695 * inputPush:
1696 * @ctxt: an XML parser context
1697 * @value: the parser input
1698 *
1699 * Pushes a new parser input on top of the input stack
1700 *
1701 * Returns -1 in case of error, the index in the stack otherwise
1702 */
1703int
1704inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705{
1706 if ((ctxt == NULL) || (value == NULL))
1707 return(-1);
1708 if (ctxt->inputNr >= ctxt->inputMax) {
1709 ctxt->inputMax *= 2;
1710 ctxt->inputTab =
1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 ctxt->inputMax *
1713 sizeof(ctxt->inputTab[0]));
1714 if (ctxt->inputTab == NULL) {
1715 xmlErrMemory(ctxt, NULL);
1716 xmlFreeInputStream(value);
1717 ctxt->inputMax /= 2;
1718 value = NULL;
1719 return (-1);
1720 }
1721 }
1722 ctxt->inputTab[ctxt->inputNr] = value;
1723 ctxt->input = value;
1724 return (ctxt->inputNr++);
1725}
1726/**
1727 * inputPop:
1728 * @ctxt: an XML parser context
1729 *
1730 * Pops the top parser input from the input stack
1731 *
1732 * Returns the input just removed
1733 */
1734xmlParserInputPtr
1735inputPop(xmlParserCtxtPtr ctxt)
1736{
1737 xmlParserInputPtr ret;
1738
1739 if (ctxt == NULL)
1740 return(NULL);
1741 if (ctxt->inputNr <= 0)
1742 return (NULL);
1743 ctxt->inputNr--;
1744 if (ctxt->inputNr > 0)
1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1746 else
1747 ctxt->input = NULL;
1748 ret = ctxt->inputTab[ctxt->inputNr];
1749 ctxt->inputTab[ctxt->inputNr] = NULL;
1750 return (ret);
1751}
1752/**
1753 * nodePush:
1754 * @ctxt: an XML parser context
1755 * @value: the element node
1756 *
1757 * Pushes a new element node on top of the node stack
1758 *
1759 * Returns -1 in case of error, the index in the stack otherwise
1760 */
1761int
1762nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763{
1764 if (ctxt == NULL) return(0);
1765 if (ctxt->nodeNr >= ctxt->nodeMax) {
1766 xmlNodePtr *tmp;
1767
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
1770 sizeof(ctxt->nodeTab[0]));
1771 if (tmp == NULL) {
1772 xmlErrMemory(ctxt, NULL);
1773 return (-1);
1774 }
1775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
1777 }
1778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782 xmlParserMaxDepth);
1783 xmlHaltParser(ctxt);
1784 return(-1);
1785 }
1786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1789}
1790
1791/**
1792 * nodePop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element node from the node stack
1796 *
1797 * Returns the node just removed
1798 */
1799xmlNodePtr
1800nodePop(xmlParserCtxtPtr ctxt)
1801{
1802 xmlNodePtr ret;
1803
1804 if (ctxt == NULL) return(NULL);
1805 if (ctxt->nodeNr <= 0)
1806 return (NULL);
1807 ctxt->nodeNr--;
1808 if (ctxt->nodeNr > 0)
1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1810 else
1811 ctxt->node = NULL;
1812 ret = ctxt->nodeTab[ctxt->nodeNr];
1813 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1814 return (ret);
1815}
1816
1817#ifdef LIBXML_PUSH_ENABLED
1818/**
1819 * nameNsPush:
1820 * @ctxt: an XML parser context
1821 * @value: the element name
1822 * @prefix: the element prefix
1823 * @URI: the element namespace name
1824 *
1825 * Pushes a new element name/prefix/URL on top of the name stack
1826 *
1827 * Returns -1 in case of error, the index in the stack otherwise
1828 */
1829static int
1830nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1832{
1833 if (ctxt->nameNr >= ctxt->nameMax) {
1834 const xmlChar * *tmp;
1835 void **tmp2;
1836 ctxt->nameMax *= 2;
1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1838 ctxt->nameMax *
1839 sizeof(ctxt->nameTab[0]));
1840 if (tmp == NULL) {
1841 ctxt->nameMax /= 2;
1842 goto mem_error;
1843 }
1844 ctxt->nameTab = tmp;
1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1846 ctxt->nameMax * 3 *
1847 sizeof(ctxt->pushTab[0]));
1848 if (tmp2 == NULL) {
1849 ctxt->nameMax /= 2;
1850 goto mem_error;
1851 }
1852 ctxt->pushTab = tmp2;
1853 }
1854 ctxt->nameTab[ctxt->nameNr] = value;
1855 ctxt->name = value;
1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1859 return (ctxt->nameNr++);
1860mem_error:
1861 xmlErrMemory(ctxt, NULL);
1862 return (-1);
1863}
1864/**
1865 * nameNsPop:
1866 * @ctxt: an XML parser context
1867 *
1868 * Pops the top element/prefix/URI name from the name stack
1869 *
1870 * Returns the name just removed
1871 */
1872static const xmlChar *
1873nameNsPop(xmlParserCtxtPtr ctxt)
1874{
1875 const xmlChar *ret;
1876
1877 if (ctxt->nameNr <= 0)
1878 return (NULL);
1879 ctxt->nameNr--;
1880 if (ctxt->nameNr > 0)
1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1882 else
1883 ctxt->name = NULL;
1884 ret = ctxt->nameTab[ctxt->nameNr];
1885 ctxt->nameTab[ctxt->nameNr] = NULL;
1886 return (ret);
1887}
1888#endif /* LIBXML_PUSH_ENABLED */
1889
1890/**
1891 * namePush:
1892 * @ctxt: an XML parser context
1893 * @value: the element name
1894 *
1895 * Pushes a new element name on top of the name stack
1896 *
1897 * Returns -1 in case of error, the index in the stack otherwise
1898 */
1899int
1900namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1901{
1902 if (ctxt == NULL) return (-1);
1903
1904 if (ctxt->nameNr >= ctxt->nameMax) {
1905 const xmlChar * *tmp;
1906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1907 ctxt->nameMax * 2 *
1908 sizeof(ctxt->nameTab[0]));
1909 if (tmp == NULL) {
1910 goto mem_error;
1911 }
1912 ctxt->nameTab = tmp;
1913 ctxt->nameMax *= 2;
1914 }
1915 ctxt->nameTab[ctxt->nameNr] = value;
1916 ctxt->name = value;
1917 return (ctxt->nameNr++);
1918mem_error:
1919 xmlErrMemory(ctxt, NULL);
1920 return (-1);
1921}
1922/**
1923 * namePop:
1924 * @ctxt: an XML parser context
1925 *
1926 * Pops the top element name from the name stack
1927 *
1928 * Returns the name just removed
1929 */
1930const xmlChar *
1931namePop(xmlParserCtxtPtr ctxt)
1932{
1933 const xmlChar *ret;
1934
1935 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1936 return (NULL);
1937 ctxt->nameNr--;
1938 if (ctxt->nameNr > 0)
1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1940 else
1941 ctxt->name = NULL;
1942 ret = ctxt->nameTab[ctxt->nameNr];
1943 ctxt->nameTab[ctxt->nameNr] = NULL;
1944 return (ret);
1945}
1946
1947static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1948 if (ctxt->spaceNr >= ctxt->spaceMax) {
1949 int *tmp;
1950
1951 ctxt->spaceMax *= 2;
1952 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1954 if (tmp == NULL) {
1955 xmlErrMemory(ctxt, NULL);
1956 ctxt->spaceMax /=2;
1957 return(-1);
1958 }
1959 ctxt->spaceTab = tmp;
1960 }
1961 ctxt->spaceTab[ctxt->spaceNr] = val;
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963 return(ctxt->spaceNr++);
1964}
1965
1966static int spacePop(xmlParserCtxtPtr ctxt) {
1967 int ret;
1968 if (ctxt->spaceNr <= 0) return(0);
1969 ctxt->spaceNr--;
1970 if (ctxt->spaceNr > 0)
1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1972 else
1973 ctxt->space = &ctxt->spaceTab[0];
1974 ret = ctxt->spaceTab[ctxt->spaceNr];
1975 ctxt->spaceTab[ctxt->spaceNr] = -1;
1976 return(ret);
1977}
1978
1979/*
1980 * Macros for accessing the content. Those should be used only by the parser,
1981 * and not exported.
1982 *
1983 * Dirty macros, i.e. one often need to make assumption on the context to
1984 * use them
1985 *
1986 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1987 * To be used with extreme caution since operations consuming
1988 * characters may move the input buffer to a different location !
1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1990 * This should be used internally by the parser
1991 * only to compare to ASCII values otherwise it would break when
1992 * running with UTF-8 encoding.
1993 * RAW same as CUR but in the input buffer, bypass any token
1994 * extraction that may have been done
1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1996 * to compare on ASCII based substring.
1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1998 * strings without newlines within the parser.
1999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2000 * defined char within the parser.
2001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2002 *
2003 * NEXT Skip to the next character, this does the proper decoding
2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2005 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2006 * CUR_CHAR(l) returns the current unicode character (int), set l
2007 * to the number of xmlChars used for the encoding [0-5].
2008 * CUR_SCHAR same but operate on a string instead of the context
2009 * COPY_BUF copy the current unicode char to the target buffer, increment
2010 * the index
2011 * GROW, SHRINK handling of input buffers
2012 */
2013
2014#define RAW (*ctxt->input->cur)
2015#define CUR (*ctxt->input->cur)
2016#define NXT(val) ctxt->input->cur[(val)]
2017#define CUR_PTR ctxt->input->cur
2018#define BASE_PTR ctxt->input->base
2019
2020#define CMP4( s, c1, c2, c3, c4 ) \
2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023#define CMP5( s, c1, c2, c3, c4, c5 ) \
2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033 ((unsigned char *) s)[ 8 ] == c9 )
2034#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036 ((unsigned char *) s)[ 9 ] == c10 )
2037
2038#define SKIP(val) do { \
2039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2041 if ((*ctxt->input->cur == 0) && \
2042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2043 xmlPopInput(ctxt); \
2044 } while (0)
2045
2046#define SKIPL(val) do { \
2047 int skipl; \
2048 for(skipl=0; skipl<val; skipl++) { \
2049 if (*(ctxt->input->cur) == '\n') { \
2050 ctxt->input->line++; ctxt->input->col = 1; \
2051 } else ctxt->input->col++; \
2052 ctxt->nbChars++; \
2053 ctxt->input->cur++; \
2054 } \
2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2056 if ((*ctxt->input->cur == 0) && \
2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2058 xmlPopInput(ctxt); \
2059 } while (0)
2060
2061#define SHRINK if ((ctxt->progressive == 0) && \
2062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2064 xmlSHRINK (ctxt);
2065
2066static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067 xmlParserInputShrink(ctxt->input);
2068 if ((*ctxt->input->cur == 0) &&
2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2070 xmlPopInput(ctxt);
2071 }
2072
2073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
2078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2086 xmlHaltParser(ctxt);
2087 return;
2088 }
2089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2090 if ((ctxt->input->cur > ctxt->input->end) ||
2091 (ctxt->input->cur < ctxt->input->base)) {
2092 xmlHaltParser(ctxt);
2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2094 return;
2095 }
2096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2098 xmlPopInput(ctxt);
2099}
2100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
2105#define NEXT1 { \
2106 ctxt->input->col++; \
2107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
2109 if (*ctxt->input->cur == 0) \
2110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
2113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
2117 ctxt->input->cur += l; \
2118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2119 } while (0)
2120
2121#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2123
2124#define COPY_BUF(l,b,i,v) \
2125 if (l == 1) b[i++] = (xmlChar) v; \
2126 else i += xmlCopyCharMultiByte(&b[i],v)
2127
2128/**
2129 * xmlSkipBlankChars:
2130 * @ctxt: the XML parser context
2131 *
2132 * skip all blanks character found at that point in the input streams.
2133 * It pops up finished entities in the process if allowable at that point.
2134 *
2135 * Returns the number of space chars skipped
2136 */
2137
2138int
2139xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2140 int res = 0;
2141
2142 /*
2143 * It's Okay to use CUR/NEXT here since all the blanks are on
2144 * the ASCII range.
2145 */
2146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2147 const xmlChar *cur;
2148 /*
2149 * if we are in the document content, go really fast
2150 */
2151 cur = ctxt->input->cur;
2152 while (IS_BLANK_CH(*cur)) {
2153 if (*cur == '\n') {
2154 ctxt->input->line++; ctxt->input->col = 1;
2155 } else {
2156 ctxt->input->col++;
2157 }
2158 cur++;
2159 res++;
2160 if (*cur == 0) {
2161 ctxt->input->cur = cur;
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 cur = ctxt->input->cur;
2164 }
2165 }
2166 ctxt->input->cur = cur;
2167 } else {
2168 int cur;
2169 do {
2170 cur = CUR;
2171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 (ctxt->instate != XML_PARSER_EOF))) {
2173 NEXT;
2174 cur = CUR;
2175 res++;
2176 }
2177 while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 (ctxt->instate != XML_PARSER_COMMENT)) {
2179 xmlPopInput(ctxt);
2180 cur = CUR;
2181 }
2182 /*
2183 * Need to handle support of entities branching here
2184 */
2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 (ctxt->instate != XML_PARSER_EOF));
2188 }
2189 return(res);
2190}
2191
2192/************************************************************************
2193 * *
2194 * Commodity functions to handle entities *
2195 * *
2196 ************************************************************************/
2197
2198/**
2199 * xmlPopInput:
2200 * @ctxt: an XML parser context
2201 *
2202 * xmlPopInput: the current input pointed by ctxt->input came to an end
2203 * pop it and return the next char.
2204 *
2205 * Returns the current xmlChar in the parser context
2206 */
2207xmlChar
2208xmlPopInput(xmlParserCtxtPtr ctxt) {
2209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "Popping input %d\n", ctxt->inputNr);
2213 xmlFreeInputStream(inputPop(ctxt));
2214 if ((*ctxt->input->cur == 0) &&
2215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 return(xmlPopInput(ctxt));
2217 return(CUR);
2218}
2219
2220/**
2221 * xmlPushInput:
2222 * @ctxt: an XML parser context
2223 * @input: an XML parser input fragment (entity, XML fragment ...).
2224 *
2225 * xmlPushInput: switch to a new input stream which is stacked on top
2226 * of the previous one(s).
2227 * Returns -1 in case of error or the index in the input stack
2228 */
2229int
2230xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2231 int ret;
2232 if (input == NULL) return(-1);
2233
2234 if (xmlParserDebugEntities) {
2235 if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 xmlGenericError(xmlGenericErrorContext,
2237 "%s(%d): ", ctxt->input->filename,
2238 ctxt->input->line);
2239 xmlGenericError(xmlGenericErrorContext,
2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2241 }
2242 ret = inputPush(ctxt, input);
2243 if (ctxt->instate == XML_PARSER_EOF)
2244 return(-1);
2245 GROW;
2246 return(ret);
2247}
2248
2249/**
2250 * xmlParseCharRef:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse Reference declarations
2254 *
2255 * [66] CharRef ::= '&#' [0-9]+ ';' |
2256 * '&#x' [0-9a-fA-F]+ ';'
2257 *
2258 * [ WFC: Legal Character ]
2259 * Characters referred to using character references must match the
2260 * production for Char.
2261 *
2262 * Returns the value parsed (as an int), 0 in case of error
2263 */
2264int
2265xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2266 unsigned int val = 0;
2267 int count = 0;
2268 unsigned int outofrange = 0;
2269
2270 /*
2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2272 */
2273 if ((RAW == '&') && (NXT(1) == '#') &&
2274 (NXT(2) == 'x')) {
2275 SKIP(3);
2276 GROW;
2277 while (RAW != ';') { /* loop blocked by count */
2278 if (count++ > 20) {
2279 count = 0;
2280 GROW;
2281 if (ctxt->instate == XML_PARSER_EOF)
2282 return(0);
2283 }
2284 if ((RAW >= '0') && (RAW <= '9'))
2285 val = val * 16 + (CUR - '0');
2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 val = val * 16 + (CUR - 'a') + 10;
2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 val = val * 16 + (CUR - 'A') + 10;
2290 else {
2291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2292 val = 0;
2293 break;
2294 }
2295 if (val > 0x10FFFF)
2296 outofrange = val;
2297
2298 NEXT;
2299 count++;
2300 }
2301 if (RAW == ';') {
2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2303 ctxt->input->col++;
2304 ctxt->nbChars ++;
2305 ctxt->input->cur++;
2306 }
2307 } else if ((RAW == '&') && (NXT(1) == '#')) {
2308 SKIP(2);
2309 GROW;
2310 while (RAW != ';') { /* loop blocked by count */
2311 if (count++ > 20) {
2312 count = 0;
2313 GROW;
2314 if (ctxt->instate == XML_PARSER_EOF)
2315 return(0);
2316 }
2317 if ((RAW >= '0') && (RAW <= '9'))
2318 val = val * 10 + (CUR - '0');
2319 else {
2320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2321 val = 0;
2322 break;
2323 }
2324 if (val > 0x10FFFF)
2325 outofrange = val;
2326
2327 NEXT;
2328 count++;
2329 }
2330 if (RAW == ';') {
2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2332 ctxt->input->col++;
2333 ctxt->nbChars ++;
2334 ctxt->input->cur++;
2335 }
2336 } else {
2337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2338 }
2339
2340 /*
2341 * [ WFC: Legal Character ]
2342 * Characters referred to using character references must match the
2343 * production for Char.
2344 */
2345 if ((IS_CHAR(val) && (outofrange == 0))) {
2346 return(val);
2347 } else {
2348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349 "xmlParseCharRef: invalid xmlChar value %d\n",
2350 val);
2351 }
2352 return(0);
2353}
2354
2355/**
2356 * xmlParseStringCharRef:
2357 * @ctxt: an XML parser context
2358 * @str: a pointer to an index in the string
2359 *
2360 * parse Reference declarations, variant parsing from a string rather
2361 * than an an input flow.
2362 *
2363 * [66] CharRef ::= '&#' [0-9]+ ';' |
2364 * '&#x' [0-9a-fA-F]+ ';'
2365 *
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
2368 * production for Char.
2369 *
2370 * Returns the value parsed (as an int), 0 in case of error, str will be
2371 * updated to the current value of the index
2372 */
2373static int
2374xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2375 const xmlChar *ptr;
2376 xmlChar cur;
2377 unsigned int val = 0;
2378 unsigned int outofrange = 0;
2379
2380 if ((str == NULL) || (*str == NULL)) return(0);
2381 ptr = *str;
2382 cur = *ptr;
2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2384 ptr += 3;
2385 cur = *ptr;
2386 while (cur != ';') { /* Non input consuming loop */
2387 if ((cur >= '0') && (cur <= '9'))
2388 val = val * 16 + (cur - '0');
2389 else if ((cur >= 'a') && (cur <= 'f'))
2390 val = val * 16 + (cur - 'a') + 10;
2391 else if ((cur >= 'A') && (cur <= 'F'))
2392 val = val * 16 + (cur - 'A') + 10;
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2395 val = 0;
2396 break;
2397 }
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
2401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else if ((cur == '&') && (ptr[1] == '#')){
2407 ptr += 2;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loops */
2410 if ((cur >= '0') && (cur <= '9'))
2411 val = val * 10 + (cur - '0');
2412 else {
2413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2414 val = 0;
2415 break;
2416 }
2417 if (val > 0x10FFFF)
2418 outofrange = val;
2419
2420 ptr++;
2421 cur = *ptr;
2422 }
2423 if (cur == ';')
2424 ptr++;
2425 } else {
2426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2427 return(0);
2428 }
2429 *str = ptr;
2430
2431 /*
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
2434 * production for Char.
2435 */
2436 if ((IS_CHAR(val) && (outofrange == 0))) {
2437 return(val);
2438 } else {
2439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2441 val);
2442 }
2443 return(0);
2444}
2445
2446/**
2447 * xmlNewBlanksWrapperInputStream:
2448 * @ctxt: an XML parser context
2449 * @entity: an Entity pointer
2450 *
2451 * Create a new input stream for wrapping
2452 * blanks around a PEReference
2453 *
2454 * Returns the new input stream or NULL
2455 */
2456
2457static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2458
2459static xmlParserInputPtr
2460xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461 xmlParserInputPtr input;
2462 xmlChar *buffer;
2463 size_t length;
2464 if (entity == NULL) {
2465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 "xmlNewBlanksWrapperInputStream entity\n");
2467 return(NULL);
2468 }
2469 if (xmlParserDebugEntities)
2470 xmlGenericError(xmlGenericErrorContext,
2471 "new blanks wrapper for entity: %s\n", entity->name);
2472 input = xmlNewInputStream(ctxt);
2473 if (input == NULL) {
2474 return(NULL);
2475 }
2476 length = xmlStrlen(entity->name) + 5;
2477 buffer = xmlMallocAtomic(length);
2478 if (buffer == NULL) {
2479 xmlErrMemory(ctxt, NULL);
2480 xmlFree(input);
2481 return(NULL);
2482 }
2483 buffer [0] = ' ';
2484 buffer [1] = '%';
2485 buffer [length-3] = ';';
2486 buffer [length-2] = ' ';
2487 buffer [length-1] = 0;
2488 memcpy(buffer + 2, entity->name, length - 5);
2489 input->free = deallocblankswrapper;
2490 input->base = buffer;
2491 input->cur = buffer;
2492 input->length = length;
2493 input->end = &buffer[length];
2494 return(input);
2495}
2496
2497/**
2498 * xmlParserHandlePEReference:
2499 * @ctxt: the parser context
2500 *
2501 * [69] PEReference ::= '%' Name ';'
2502 *
2503 * [ WFC: No Recursion ]
2504 * A parsed entity must not contain a recursive
2505 * reference to itself, either directly or indirectly.
2506 *
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an internal DTD
2509 * subset which contains no parameter entity references, or a document
2510 * with "standalone='yes'", ... ... The declaration of a parameter
2511 * entity must precede any reference to it...
2512 *
2513 * [ VC: Entity Declared ]
2514 * In a document with an external subset or external parameter entities
2515 * with "standalone='no'", ... ... The declaration of a parameter entity
2516 * must precede any reference to it...
2517 *
2518 * [ WFC: In DTD ]
2519 * Parameter-entity references may only appear in the DTD.
2520 * NOTE: misleading but this is handled.
2521 *
2522 * A PEReference may have been detected in the current input stream
2523 * the handling is done accordingly to
2524 * http://www.w3.org/TR/REC-xml#entproc
2525 * i.e.
2526 * - Included in literal in entity values
2527 * - Included as Parameter Entity reference within DTDs
2528 */
2529void
2530xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2531 const xmlChar *name;
2532 xmlEntityPtr entity = NULL;
2533 xmlParserInputPtr input;
2534
2535 if (RAW != '%') return;
2536 switch(ctxt->instate) {
2537 case XML_PARSER_CDATA_SECTION:
2538 return;
2539 case XML_PARSER_COMMENT:
2540 return;
2541 case XML_PARSER_START_TAG:
2542 return;
2543 case XML_PARSER_END_TAG:
2544 return;
2545 case XML_PARSER_EOF:
2546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2547 return;
2548 case XML_PARSER_PROLOG:
2549 case XML_PARSER_START:
2550 case XML_PARSER_MISC:
2551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2552 return;
2553 case XML_PARSER_ENTITY_DECL:
2554 case XML_PARSER_CONTENT:
2555 case XML_PARSER_ATTRIBUTE_VALUE:
2556 case XML_PARSER_PI:
2557 case XML_PARSER_SYSTEM_LITERAL:
2558 case XML_PARSER_PUBLIC_LITERAL:
2559 /* we just ignore it there */
2560 return;
2561 case XML_PARSER_EPILOG:
2562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2563 return;
2564 case XML_PARSER_ENTITY_VALUE:
2565 /*
2566 * NOTE: in the case of entity values, we don't do the
2567 * substitution here since we need the literal
2568 * entity value to be able to save the internal
2569 * subset of the document.
2570 * This will be handled by xmlStringDecodeEntities
2571 */
2572 return;
2573 case XML_PARSER_DTD:
2574 /*
2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 * In the internal DTD subset, parameter-entity references
2577 * can occur only where markup declarations can occur, not
2578 * within markup declarations.
2579 * In that case this is handled in xmlParseMarkupDecl
2580 */
2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2582 return;
2583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2584 return;
2585 break;
2586 case XML_PARSER_IGNORE:
2587 return;
2588 }
2589
2590 NEXT;
2591 name = xmlParseName(ctxt);
2592 if (xmlParserDebugEntities)
2593 xmlGenericError(xmlGenericErrorContext,
2594 "PEReference: %s\n", name);
2595 if (name == NULL) {
2596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2597 } else {
2598 if (RAW == ';') {
2599 NEXT;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2602 if (ctxt->instate == XML_PARSER_EOF)
2603 return;
2604 if (entity == NULL) {
2605
2606 /*
2607 * [ WFC: Entity Declared ]
2608 * In a document without any DTD, a document with only an
2609 * internal DTD subset which contains no parameter entity
2610 * references, or a document with "standalone='yes'", ...
2611 * ... The declaration of a parameter entity must precede
2612 * any reference to it...
2613 */
2614 if ((ctxt->standalone == 1) ||
2615 ((ctxt->hasExternalSubset == 0) &&
2616 (ctxt->hasPErefs == 0))) {
2617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2618 "PEReference: %%%s; not found\n", name);
2619 } else {
2620 /*
2621 * [ VC: Entity Declared ]
2622 * In a document with an external subset or external
2623 * parameter entities with "standalone='no'", ...
2624 * ... The declaration of a parameter entity must precede
2625 * any reference to it...
2626 */
2627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 "PEReference: %%%s; not found\n",
2630 name, NULL);
2631 } else
2632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 "PEReference: %%%s; not found\n",
2634 name, NULL);
2635 ctxt->valid = 0;
2636 }
2637 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2638 } else if (ctxt->input->free != deallocblankswrapper) {
2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2640 if (xmlPushInput(ctxt, input) < 0)
2641 return;
2642 } else {
2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2645 xmlChar start[4];
2646 xmlCharEncoding enc;
2647
2648 /*
2649 * Note: external parameter entities will not be loaded, it
2650 * is not required for a non-validating parser, unless the
2651 * option of validating, or substituting entities were
2652 * given. Doing so is far more secure as the parser will
2653 * only process data coming from the document entity by
2654 * default.
2655 */
2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 (ctxt->replaceEntities == 0) &&
2662 (ctxt->validate == 0))
2663 return;
2664
2665 /*
2666 * handle the extra spaces added before and after
2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2668 * this is done independently.
2669 */
2670 input = xmlNewEntityInputStream(ctxt, entity);
2671 if (xmlPushInput(ctxt, input) < 0)
2672 return;
2673
2674 /*
2675 * Get the 4 first bytes and decode the charset
2676 * if enc != XML_CHAR_ENCODING_NONE
2677 * plug some encoding conversion routines.
2678 * Note that, since we may have some non-UTF8
2679 * encoding (like UTF16, bug 135229), the 'length'
2680 * is not known, but we can calculate based upon
2681 * the amount of data in the buffer.
2682 */
2683 GROW
2684 if (ctxt->instate == XML_PARSER_EOF)
2685 return;
2686 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2687 start[0] = RAW;
2688 start[1] = NXT(1);
2689 start[2] = NXT(2);
2690 start[3] = NXT(3);
2691 enc = xmlDetectCharEncoding(start, 4);
2692 if (enc != XML_CHAR_ENCODING_NONE) {
2693 xmlSwitchEncoding(ctxt, enc);
2694 }
2695 }
2696
2697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 (IS_BLANK_CH(NXT(5)))) {
2700 xmlParseTextDecl(ctxt);
2701 }
2702 } else {
2703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 "PEReference: %s is not a parameter entity\n",
2705 name);
2706 }
2707 }
2708 } else {
2709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2710 }
2711 }
2712}
2713
2714/*
2715 * Macro used to grow the current buffer.
2716 * buffer##_size is expected to be a size_t
2717 * mem_error: is expected to handle memory allocation failures
2718 */
2719#define growBuffer(buffer, n) { \
2720 xmlChar *tmp; \
2721 size_t new_size = buffer##_size * 2 + n; \
2722 if (new_size < buffer##_size) goto mem_error; \
2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2724 if (tmp == NULL) goto mem_error; \
2725 buffer = tmp; \
2726 buffer##_size = new_size; \
2727}
2728
2729/**
2730 * xmlStringLenDecodeEntities:
2731 * @ctxt: the parser context
2732 * @str: the input string
2733 * @len: the string length
2734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735 * @end: an end marker xmlChar, 0 if none
2736 * @end2: an end marker xmlChar, 0 if none
2737 * @end3: an end marker xmlChar, 0 if none
2738 *
2739 * Takes a entity string content and process to do the adequate substitutions.
2740 *
2741 * [67] Reference ::= EntityRef | CharRef
2742 *
2743 * [69] PEReference ::= '%' Name ';'
2744 *
2745 * Returns A newly allocated string with the substitution done. The caller
2746 * must deallocate it !
2747 */
2748xmlChar *
2749xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2751 xmlChar *buffer = NULL;
2752 size_t buffer_size = 0;
2753 size_t nbchars = 0;
2754
2755 xmlChar *current = NULL;
2756 xmlChar *rep = NULL;
2757 const xmlChar *last;
2758 xmlEntityPtr ent;
2759 int c,l;
2760
2761 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2762 return(NULL);
2763 last = str + len;
2764
2765 if (((ctxt->depth > 40) &&
2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 (ctxt->depth > 1024)) {
2768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2769 return(NULL);
2770 }
2771
2772 /*
2773 * allocate a translation buffer.
2774 */
2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2777 if (buffer == NULL) goto mem_error;
2778
2779 /*
2780 * OK loop until we reach one of the ending char or a size limit.
2781 * we are operating on already parsed values.
2782 */
2783 if (str < last)
2784 c = CUR_SCHAR(str, l);
2785 else
2786 c = 0;
2787 while ((c != 0) && (c != end) && /* non input consuming loop */
2788 (c != end2) && (c != end3)) {
2789
2790 if (c == 0) break;
2791 if ((c == '&') && (str[1] == '#')) {
2792 int val = xmlParseStringCharRef(ctxt, &str);
2793 if (val != 0) {
2794 COPY_BUF(0,buffer,nbchars,val);
2795 }
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 if (xmlParserDebugEntities)
2801 xmlGenericError(xmlGenericErrorContext,
2802 "String decoding Entity Reference: %.30s\n",
2803 str);
2804 ent = xmlParseStringEntityRef(ctxt, &str);
2805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2807 goto int_error;
2808 xmlParserEntityCheck(ctxt, 0, ent, 0);
2809 if (ent != NULL)
2810 ctxt->nbentities += ent->checked / 2;
2811 if ((ent != NULL) &&
2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 if (ent->content != NULL) {
2814 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 }
2818 } else {
2819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 "predefined entity has no content\n");
2821 }
2822 } else if ((ent != NULL) && (ent->content != NULL)) {
2823 ctxt->depth++;
2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2825 0, 0, 0);
2826 ctxt->depth--;
2827
2828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2830 goto int_error;
2831
2832 if (rep != NULL) {
2833 current = rep;
2834 while (*current != 0) { /* non input consuming loop */
2835 buffer[nbchars++] = *current++;
2836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2838 goto int_error;
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2840 }
2841 }
2842 xmlFree(rep);
2843 rep = NULL;
2844 }
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2848
2849 buffer[nbchars++] = '&';
2850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2852 }
2853 for (;i > 0;i--)
2854 buffer[nbchars++] = *cur++;
2855 buffer[nbchars++] = ';';
2856 }
2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 if (xmlParserDebugEntities)
2859 xmlGenericError(xmlGenericErrorContext,
2860 "String decoding PE Reference: %.30s\n", str);
2861 ent = xmlParseStringPEReference(ctxt, &str);
2862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2863 goto int_error;
2864 xmlParserEntityCheck(ctxt, 0, ent, 0);
2865 if (ent != NULL)
2866 ctxt->nbentities += ent->checked / 2;
2867 if (ent != NULL) {
2868 if (ent->content == NULL) {
2869 /*
2870 * Note: external parsed entities will not be loaded,
2871 * it is not required for a non-validating parser to
2872 * complete external PEreferences coming from the
2873 * internal subset
2874 */
2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 (ctxt->validate != 0)) {
2878 xmlLoadEntityContent(ctxt, ent);
2879 } else {
2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 "not validating will not read content for PE entity %s\n",
2882 ent->name, NULL);
2883 }
2884 }
2885 ctxt->depth++;
2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2887 0, 0, 0);
2888 ctxt->depth--;
2889 if (rep != NULL) {
2890 current = rep;
2891 while (*current != 0) { /* non input consuming loop */
2892 buffer[nbchars++] = *current++;
2893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2895 goto int_error;
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2897 }
2898 }
2899 xmlFree(rep);
2900 rep = NULL;
2901 }
2902 }
2903 } else {
2904 COPY_BUF(l,buffer,nbchars,c);
2905 str += l;
2906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2908 }
2909 }
2910 if (str < last)
2911 c = CUR_SCHAR(str, l);
2912 else
2913 c = 0;
2914 }
2915 buffer[nbchars] = 0;
2916 return(buffer);
2917
2918mem_error:
2919 xmlErrMemory(ctxt, NULL);
2920int_error:
2921 if (rep != NULL)
2922 xmlFree(rep);
2923 if (buffer != NULL)
2924 xmlFree(buffer);
2925 return(NULL);
2926}
2927
2928/**
2929 * xmlStringDecodeEntities:
2930 * @ctxt: the parser context
2931 * @str: the input string
2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933 * @end: an end marker xmlChar, 0 if none
2934 * @end2: an end marker xmlChar, 0 if none
2935 * @end3: an end marker xmlChar, 0 if none
2936 *
2937 * Takes a entity string content and process to do the adequate substitutions.
2938 *
2939 * [67] Reference ::= EntityRef | CharRef
2940 *
2941 * [69] PEReference ::= '%' Name ';'
2942 *
2943 * Returns A newly allocated string with the substitution done. The caller
2944 * must deallocate it !
2945 */
2946xmlChar *
2947xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 xmlChar end, xmlChar end2, xmlChar end3) {
2949 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2951 end, end2, end3));
2952}
2953
2954/************************************************************************
2955 * *
2956 * Commodity functions, cleanup needed ? *
2957 * *
2958 ************************************************************************/
2959
2960/**
2961 * areBlanks:
2962 * @ctxt: an XML parser context
2963 * @str: a xmlChar *
2964 * @len: the size of @str
2965 * @blank_chars: we know the chars are blanks
2966 *
2967 * Is this a sequence of blank chars that one can ignore ?
2968 *
2969 * Returns 1 if ignorable 0 otherwise.
2970 */
2971
2972static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2973 int blank_chars) {
2974 int i, ret;
2975 xmlNodePtr lastChild;
2976
2977 /*
2978 * Don't spend time trying to differentiate them, the same callback is
2979 * used !
2980 */
2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2982 return(0);
2983
2984 /*
2985 * Check for xml:space value.
2986 */
2987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988 (*(ctxt->space) == -2))
2989 return(0);
2990
2991 /*
2992 * Check that the string is made of blanks
2993 */
2994 if (blank_chars == 0) {
2995 for (i = 0;i < len;i++)
2996 if (!(IS_BLANK_CH(str[i]))) return(0);
2997 }
2998
2999 /*
3000 * Look if the element is mixed content in the DTD if available
3001 */
3002 if (ctxt->node == NULL) return(0);
3003 if (ctxt->myDoc != NULL) {
3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005 if (ret == 0) return(1);
3006 if (ret == 1) return(0);
3007 }
3008
3009 /*
3010 * Otherwise, heuristic :-\
3011 */
3012 if ((RAW != '<') && (RAW != 0xD)) return(0);
3013 if ((ctxt->node->children == NULL) &&
3014 (RAW == '<') && (NXT(1) == '/')) return(0);
3015
3016 lastChild = xmlGetLastChild(ctxt->node);
3017 if (lastChild == NULL) {
3018 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019 (ctxt->node->content != NULL)) return(0);
3020 } else if (xmlNodeIsText(lastChild))
3021 return(0);
3022 else if ((ctxt->node->children != NULL) &&
3023 (xmlNodeIsText(ctxt->node->children)))
3024 return(0);
3025 return(1);
3026}
3027
3028/************************************************************************
3029 * *
3030 * Extra stuff for namespace support *
3031 * Relates to http://www.w3.org/TR/WD-xml-names *
3032 * *
3033 ************************************************************************/
3034
3035/**
3036 * xmlSplitQName:
3037 * @ctxt: an XML parser context
3038 * @name: an XML parser context
3039 * @prefix: a xmlChar **
3040 *
3041 * parse an UTF8 encoded XML qualified name string
3042 *
3043 * [NS 5] QName ::= (Prefix ':')? LocalPart
3044 *
3045 * [NS 6] Prefix ::= NCName
3046 *
3047 * [NS 7] LocalPart ::= NCName
3048 *
3049 * Returns the local part, and prefix is updated
3050 * to get the Prefix if any.
3051 */
3052
3053xmlChar *
3054xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055 xmlChar buf[XML_MAX_NAMELEN + 5];
3056 xmlChar *buffer = NULL;
3057 int len = 0;
3058 int max = XML_MAX_NAMELEN;
3059 xmlChar *ret = NULL;
3060 const xmlChar *cur = name;
3061 int c;
3062
3063 if (prefix == NULL) return(NULL);
3064 *prefix = NULL;
3065
3066 if (cur == NULL) return(NULL);
3067
3068#ifndef XML_XML_NAMESPACE
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3073#endif
3074
3075 /* nasty but well=formed */
3076 if (cur[0] == ':')
3077 return(xmlStrdup(name));
3078
3079 c = *cur++;
3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3081 buf[len++] = c;
3082 c = *cur++;
3083 }
3084 if (len >= max) {
3085 /*
3086 * Okay someone managed to make a huge name, so he's ready to pay
3087 * for the processing speed.
3088 */
3089 max = len * 2;
3090
3091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3092 if (buffer == NULL) {
3093 xmlErrMemory(ctxt, NULL);
3094 return(NULL);
3095 }
3096 memcpy(buffer, buf, len);
3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 if (len + 10 > max) {
3099 xmlChar *tmp;
3100
3101 max *= 2;
3102 tmp = (xmlChar *) xmlRealloc(buffer,
3103 max * sizeof(xmlChar));
3104 if (tmp == NULL) {
3105 xmlFree(buffer);
3106 xmlErrMemory(ctxt, NULL);
3107 return(NULL);
3108 }
3109 buffer = tmp;
3110 }
3111 buffer[len++] = c;
3112 c = *cur++;
3113 }
3114 buffer[len] = 0;
3115 }
3116
3117 if ((c == ':') && (*cur == 0)) {
3118 if (buffer != NULL)
3119 xmlFree(buffer);
3120 *prefix = NULL;
3121 return(xmlStrdup(name));
3122 }
3123
3124 if (buffer == NULL)
3125 ret = xmlStrndup(buf, len);
3126 else {
3127 ret = buffer;
3128 buffer = NULL;
3129 max = XML_MAX_NAMELEN;
3130 }
3131
3132
3133 if (c == ':') {
3134 c = *cur;
3135 *prefix = ret;
3136 if (c == 0) {
3137 return(xmlStrndup(BAD_CAST "", 0));
3138 }
3139 len = 0;
3140
3141 /*
3142 * Check that the first character is proper to start
3143 * a new name
3144 */
3145 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 ((c >= 0x41) && (c <= 0x5A)) ||
3147 (c == '_') || (c == ':'))) {
3148 int l;
3149 int first = CUR_SCHAR(cur, l);
3150
3151 if (!IS_LETTER(first) && (first != '_')) {
3152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3153 "Name %s is not XML Namespace compliant\n",
3154 name);
3155 }
3156 }
3157 cur++;
3158
3159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3160 buf[len++] = c;
3161 c = *cur++;
3162 }
3163 if (len >= max) {
3164 /*
3165 * Okay someone managed to make a huge name, so he's ready to pay
3166 * for the processing speed.
3167 */
3168 max = len * 2;
3169
3170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3171 if (buffer == NULL) {
3172 xmlErrMemory(ctxt, NULL);
3173 return(NULL);
3174 }
3175 memcpy(buffer, buf, len);
3176 while (c != 0) { /* tested bigname2.xml */
3177 if (len + 10 > max) {
3178 xmlChar *tmp;
3179
3180 max *= 2;
3181 tmp = (xmlChar *) xmlRealloc(buffer,
3182 max * sizeof(xmlChar));
3183 if (tmp == NULL) {
3184 xmlErrMemory(ctxt, NULL);
3185 xmlFree(buffer);
3186 return(NULL);
3187 }
3188 buffer = tmp;
3189 }
3190 buffer[len++] = c;
3191 c = *cur++;
3192 }
3193 buffer[len] = 0;
3194 }
3195
3196 if (buffer == NULL)
3197 ret = xmlStrndup(buf, len);
3198 else {
3199 ret = buffer;
3200 }
3201 }
3202
3203 return(ret);
3204}
3205
3206/************************************************************************
3207 * *
3208 * The parser itself *
3209 * Relates to http://www.w3.org/TR/REC-xml *
3210 * *
3211 ************************************************************************/
3212
3213/************************************************************************
3214 * *
3215 * Routines to parse Name, NCName and NmToken *
3216 * *
3217 ************************************************************************/
3218#ifdef DEBUG
3219static unsigned long nbParseName = 0;
3220static unsigned long nbParseNmToken = 0;
3221static unsigned long nbParseNCName = 0;
3222static unsigned long nbParseNCNameComplex = 0;
3223static unsigned long nbParseNameComplex = 0;
3224static unsigned long nbParseStringName = 0;
3225#endif
3226
3227/*
3228 * The two following functions are related to the change of accepted
3229 * characters for Name and NmToken in the Revision 5 of XML-1.0
3230 * They correspond to the modified production [4] and the new production [4a]
3231 * changes in that revision. Also note that the macros used for the
3232 * productions Letter, Digit, CombiningChar and Extender are not needed
3233 * anymore.
3234 * We still keep compatibility to pre-revision5 parsing semantic if the
3235 * new XML_PARSE_OLD10 option is given to the parser.
3236 */
3237static int
3238xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 (((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))
3260 return(1);
3261 } else {
3262 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3263 return(1);
3264 }
3265 return(0);
3266}
3267
3268static int
3269xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271 /*
3272 * Use the new checks of production [4] [4a] amd [5] of the
3273 * Update 5 of XML-1.0
3274 */
3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 (((c >= 'a') && (c <= 'z')) ||
3277 ((c >= 'A') && (c <= 'Z')) ||
3278 ((c >= '0') && (c <= '9')) || /* !start */
3279 (c == '_') || (c == ':') ||
3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 ((c >= 0xC0) && (c <= 0xD6)) ||
3282 ((c >= 0xD8) && (c <= 0xF6)) ||
3283 ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 ((c >= 0x370) && (c <= 0x37D)) ||
3286 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 ((c >= 0x200C) && (c <= 0x200D)) ||
3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 ((c >= 0x2070) && (c <= 0x218F)) ||
3290 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 ((c >= 0x10000) && (c <= 0xEFFFF))))
3295 return(1);
3296 } else {
3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
3299 (c == '_') || (c == ':') ||
3300 (IS_COMBINING(c)) ||
3301 (IS_EXTENDER(c)))
3302 return(1);
3303 }
3304 return(0);
3305}
3306
3307static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3308 int *len, int *alloc, int normalize);
3309
3310static const xmlChar *
3311xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3312 int len = 0, l;
3313 int c;
3314 int count = 0;
3315
3316#ifdef DEBUG
3317 nbParseNameComplex++;
3318#endif
3319
3320 /*
3321 * Handler for more complex cases
3322 */
3323 GROW;
3324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
3326 c = CUR_CHAR(l);
3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328 /*
3329 * Use the new checks of production [4] [4a] amd [5] of the
3330 * Update 5 of XML-1.0
3331 */
3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 (!(((c >= 'a') && (c <= 'z')) ||
3334 ((c >= 'A') && (c <= 'Z')) ||
3335 (c == '_') || (c == ':') ||
3336 ((c >= 0xC0) && (c <= 0xD6)) ||
3337 ((c >= 0xD8) && (c <= 0xF6)) ||
3338 ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x2070) && (c <= 0x218F)) ||
3343 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3348 return(NULL);
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 (((c >= 'a') && (c <= 'z')) ||
3355 ((c >= 'A') && (c <= 'Z')) ||
3356 ((c >= '0') && (c <= '9')) || /* !start */
3357 (c == '_') || (c == ':') ||
3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 ((c >= 0xC0) && (c <= 0xD6)) ||
3360 ((c >= 0xD8) && (c <= 0xF6)) ||
3361 ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 ((c >= 0x370) && (c <= 0x37D)) ||
3364 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 ((c >= 0x200C) && (c <= 0x200D)) ||
3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 ((c >= 0x2070) && (c <= 0x218F)) ||
3368 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 ((c >= 0x10000) && (c <= 0xEFFFF))
3373 )) {
3374 if (count++ > XML_PARSER_CHUNK_SIZE) {
3375 count = 0;
3376 GROW;
3377 if (ctxt->instate == XML_PARSER_EOF)
3378 return(NULL);
3379 }
3380 len += l;
3381 NEXTL(l);
3382 c = CUR_CHAR(l);
3383 }
3384 } else {
3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 (!IS_LETTER(c) && (c != '_') &&
3387 (c != ':'))) {
3388 return(NULL);
3389 }
3390 len += l;
3391 NEXTL(l);
3392 c = CUR_CHAR(l);
3393
3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 (c == '.') || (c == '-') ||
3397 (c == '_') || (c == ':') ||
3398 (IS_COMBINING(c)) ||
3399 (IS_EXTENDER(c)))) {
3400 if (count++ > XML_PARSER_CHUNK_SIZE) {
3401 count = 0;
3402 GROW;
3403 if (ctxt->instate == XML_PARSER_EOF)
3404 return(NULL);
3405 }
3406 len += l;
3407 NEXTL(l);
3408 c = CUR_CHAR(l);
3409 if (c == 0) {
3410 count = 0;
3411 GROW;
3412 if (ctxt->instate == XML_PARSER_EOF)
3413 return(NULL);
3414 c = CUR_CHAR(l);
3415 }
3416 }
3417 }
3418 if ((len > XML_MAX_NAME_LENGTH) &&
3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3421 return(NULL);
3422 }
3423 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3424 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3425 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3426}
3427
3428/**
3429 * xmlParseName:
3430 * @ctxt: an XML parser context
3431 *
3432 * parse an XML name.
3433 *
3434 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3435 * CombiningChar | Extender
3436 *
3437 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3438 *
3439 * [6] Names ::= Name (#x20 Name)*
3440 *
3441 * Returns the Name parsed or NULL
3442 */
3443
3444const xmlChar *
3445xmlParseName(xmlParserCtxtPtr ctxt) {
3446 const xmlChar *in;
3447 const xmlChar *ret;
3448 int count = 0;
3449
3450 GROW;
3451
3452#ifdef DEBUG
3453 nbParseName++;
3454#endif
3455
3456 /*
3457 * Accelerator for simple ASCII names
3458 */
3459 in = ctxt->input->cur;
3460 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3461 ((*in >= 0x41) && (*in <= 0x5A)) ||
3462 (*in == '_') || (*in == ':')) {
3463 in++;
3464 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3465 ((*in >= 0x41) && (*in <= 0x5A)) ||
3466 ((*in >= 0x30) && (*in <= 0x39)) ||
3467 (*in == '_') || (*in == '-') ||
3468 (*in == ':') || (*in == '.'))
3469 in++;
3470 if ((*in > 0) && (*in < 0x80)) {
3471 count = in - ctxt->input->cur;
3472 if ((count > XML_MAX_NAME_LENGTH) &&
3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3475 return(NULL);
3476 }
3477 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3478 ctxt->input->cur = in;
3479 ctxt->nbChars += count;
3480 ctxt->input->col += count;
3481 if (ret == NULL)
3482 xmlErrMemory(ctxt, NULL);
3483 return(ret);
3484 }
3485 }
3486 /* accelerator for special cases */
3487 return(xmlParseNameComplex(ctxt));
3488}
3489
3490static const xmlChar *
3491xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3492 int len = 0, l;
3493 int c;
3494 int count = 0;
3495 size_t startPosition = 0;
3496
3497#ifdef DEBUG
3498 nbParseNCNameComplex++;
3499#endif
3500
3501 /*
3502 * Handler for more complex cases
3503 */
3504 GROW;
3505 startPosition = CUR_PTR - BASE_PTR;
3506 c = CUR_CHAR(l);
3507 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3508 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3509 return(NULL);
3510 }
3511
3512 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3513 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3514 if (count++ > XML_PARSER_CHUNK_SIZE) {
3515 if ((len > XML_MAX_NAME_LENGTH) &&
3516 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3517 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3518 return(NULL);
3519 }
3520 count = 0;
3521 GROW;
3522 if (ctxt->instate == XML_PARSER_EOF)
3523 return(NULL);
3524 }
3525 len += l;
3526 NEXTL(l);
3527 c = CUR_CHAR(l);
3528 if (c == 0) {
3529 count = 0;
3530 /*
3531 * when shrinking to extend the buffer we really need to preserve
3532 * the part of the name we already parsed. Hence rolling back
3533 * by current lenght.
3534 */
3535 ctxt->input->cur -= l;
3536 GROW;
3537 ctxt->input->cur += l;
3538 if (ctxt->instate == XML_PARSER_EOF)
3539 return(NULL);
3540 c = CUR_CHAR(l);
3541 }
3542 }
3543 if ((len > XML_MAX_NAME_LENGTH) &&
3544 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3545 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3546 return(NULL);
3547 }
3548 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3549}
3550
3551/**
3552 * xmlParseNCName:
3553 * @ctxt: an XML parser context
3554 * @len: length of the string parsed
3555 *
3556 * parse an XML name.
3557 *
3558 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3559 * CombiningChar | Extender
3560 *
3561 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3562 *
3563 * Returns the Name parsed or NULL
3564 */
3565
3566static const xmlChar *
3567xmlParseNCName(xmlParserCtxtPtr ctxt) {
3568 const xmlChar *in, *e;
3569 const xmlChar *ret;
3570 int count = 0;
3571
3572#ifdef DEBUG
3573 nbParseNCName++;
3574#endif
3575
3576 /*
3577 * Accelerator for simple ASCII names
3578 */
3579 in = ctxt->input->cur;
3580 e = ctxt->input->end;
3581 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3582 ((*in >= 0x41) && (*in <= 0x5A)) ||
3583 (*in == '_')) && (in < e)) {
3584 in++;
3585 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3586 ((*in >= 0x41) && (*in <= 0x5A)) ||
3587 ((*in >= 0x30) && (*in <= 0x39)) ||
3588 (*in == '_') || (*in == '-') ||
3589 (*in == '.')) && (in < e))
3590 in++;
3591 if (in >= e)
3592 goto complex;
3593 if ((*in > 0) && (*in < 0x80)) {
3594 count = in - ctxt->input->cur;
3595 if ((count > XML_MAX_NAME_LENGTH) &&
3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3598 return(NULL);
3599 }
3600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3601 ctxt->input->cur = in;
3602 ctxt->nbChars += count;
3603 ctxt->input->col += count;
3604 if (ret == NULL) {
3605 xmlErrMemory(ctxt, NULL);
3606 }
3607 return(ret);
3608 }
3609 }
3610complex:
3611 return(xmlParseNCNameComplex(ctxt));
3612}
3613
3614/**
3615 * xmlParseNameAndCompare:
3616 * @ctxt: an XML parser context
3617 *
3618 * parse an XML name and compares for match
3619 * (specialized for endtag parsing)
3620 *
3621 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3622 * and the name for mismatch
3623 */
3624
3625static const xmlChar *
3626xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3627 register const xmlChar *cmp = other;
3628 register const xmlChar *in;
3629 const xmlChar *ret;
3630
3631 GROW;
3632 if (ctxt->instate == XML_PARSER_EOF)
3633 return(NULL);
3634
3635 in = ctxt->input->cur;
3636 while (*in != 0 && *in == *cmp) {
3637 ++in;
3638 ++cmp;
3639 ctxt->input->col++;
3640 }
3641 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3642 /* success */
3643 ctxt->input->cur = in;
3644 return (const xmlChar*) 1;
3645 }
3646 /* failure (or end of input buffer), check with full function */
3647 ret = xmlParseName (ctxt);
3648 /* strings coming from the dictionary direct compare possible */
3649 if (ret == other) {
3650 return (const xmlChar*) 1;
3651 }
3652 return ret;
3653}
3654
3655/**
3656 * xmlParseStringName:
3657 * @ctxt: an XML parser context
3658 * @str: a pointer to the string pointer (IN/OUT)
3659 *
3660 * parse an XML name.
3661 *
3662 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3663 * CombiningChar | Extender
3664 *
3665 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3666 *
3667 * [6] Names ::= Name (#x20 Name)*
3668 *
3669 * Returns the Name parsed or NULL. The @str pointer
3670 * is updated to the current location in the string.
3671 */
3672
3673static xmlChar *
3674xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3675 xmlChar buf[XML_MAX_NAMELEN + 5];
3676 const xmlChar *cur = *str;
3677 int len = 0, l;
3678 int c;
3679
3680#ifdef DEBUG
3681 nbParseStringName++;
3682#endif
3683
3684 c = CUR_SCHAR(cur, l);
3685 if (!xmlIsNameStartChar(ctxt, c)) {
3686 return(NULL);
3687 }
3688
3689 COPY_BUF(l,buf,len,c);
3690 cur += l;
3691 c = CUR_SCHAR(cur, l);
3692 while (xmlIsNameChar(ctxt, c)) {
3693 COPY_BUF(l,buf,len,c);
3694 cur += l;
3695 c = CUR_SCHAR(cur, l);
3696 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3697 /*
3698 * Okay someone managed to make a huge name, so he's ready to pay
3699 * for the processing speed.
3700 */
3701 xmlChar *buffer;
3702 int max = len * 2;
3703
3704 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3705 if (buffer == NULL) {
3706 xmlErrMemory(ctxt, NULL);
3707 return(NULL);
3708 }
3709 memcpy(buffer, buf, len);
3710 while (xmlIsNameChar(ctxt, c)) {
3711 if (len + 10 > max) {
3712 xmlChar *tmp;
3713
3714 if ((len > XML_MAX_NAME_LENGTH) &&
3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3717 xmlFree(buffer);
3718 return(NULL);
3719 }
3720 max *= 2;
3721 tmp = (xmlChar *) xmlRealloc(buffer,
3722 max * sizeof(xmlChar));
3723 if (tmp == NULL) {
3724 xmlErrMemory(ctxt, NULL);
3725 xmlFree(buffer);
3726 return(NULL);
3727 }
3728 buffer = tmp;
3729 }
3730 COPY_BUF(l,buffer,len,c);
3731 cur += l;
3732 c = CUR_SCHAR(cur, l);
3733 }
3734 buffer[len] = 0;
3735 *str = cur;
3736 return(buffer);
3737 }
3738 }
3739 if ((len > XML_MAX_NAME_LENGTH) &&
3740 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3741 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3742 return(NULL);
3743 }
3744 *str = cur;
3745 return(xmlStrndup(buf, len));
3746}
3747
3748/**
3749 * xmlParseNmtoken:
3750 * @ctxt: an XML parser context
3751 *
3752 * parse an XML Nmtoken.
3753 *
3754 * [7] Nmtoken ::= (NameChar)+
3755 *
3756 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3757 *
3758 * Returns the Nmtoken parsed or NULL
3759 */
3760
3761xmlChar *
3762xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3763 xmlChar buf[XML_MAX_NAMELEN + 5];
3764 int len = 0, l;
3765 int c;
3766 int count = 0;
3767
3768#ifdef DEBUG
3769 nbParseNmToken++;
3770#endif
3771
3772 GROW;
3773 if (ctxt->instate == XML_PARSER_EOF)
3774 return(NULL);
3775 c = CUR_CHAR(l);
3776
3777 while (xmlIsNameChar(ctxt, c)) {
3778 if (count++ > XML_PARSER_CHUNK_SIZE) {
3779 count = 0;
3780 GROW;
3781 }
3782 COPY_BUF(l,buf,len,c);
3783 NEXTL(l);
3784 c = CUR_CHAR(l);
3785 if (c == 0) {
3786 count = 0;
3787 GROW;
3788 if (ctxt->instate == XML_PARSER_EOF)
3789 return(NULL);
3790 c = CUR_CHAR(l);
3791 }
3792 if (len >= XML_MAX_NAMELEN) {
3793 /*
3794 * Okay someone managed to make a huge token, so he's ready to pay
3795 * for the processing speed.
3796 */
3797 xmlChar *buffer;
3798 int max = len * 2;
3799
3800 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3801 if (buffer == NULL) {
3802 xmlErrMemory(ctxt, NULL);
3803 return(NULL);
3804 }
3805 memcpy(buffer, buf, len);
3806 while (xmlIsNameChar(ctxt, c)) {
3807 if (count++ > XML_PARSER_CHUNK_SIZE) {
3808 count = 0;
3809 GROW;
3810 if (ctxt->instate == XML_PARSER_EOF) {
3811 xmlFree(buffer);
3812 return(NULL);
3813 }
3814 }
3815 if (len + 10 > max) {
3816 xmlChar *tmp;
3817
3818 if ((max > XML_MAX_NAME_LENGTH) &&
3819 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3820 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3821 xmlFree(buffer);
3822 return(NULL);
3823 }
3824 max *= 2;
3825 tmp = (xmlChar *) xmlRealloc(buffer,
3826 max * sizeof(xmlChar));
3827 if (tmp == NULL) {
3828 xmlErrMemory(ctxt, NULL);
3829 xmlFree(buffer);
3830 return(NULL);
3831 }
3832 buffer = tmp;
3833 }
3834 COPY_BUF(l,buffer,len,c);
3835 NEXTL(l);
3836 c = CUR_CHAR(l);
3837 }
3838 buffer[len] = 0;
3839 return(buffer);
3840 }
3841 }
3842 if (len == 0)
3843 return(NULL);
3844 if ((len > XML_MAX_NAME_LENGTH) &&
3845 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3846 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3847 return(NULL);
3848 }
3849 return(xmlStrndup(buf, len));
3850}
3851
3852/**
3853 * xmlParseEntityValue:
3854 * @ctxt: an XML parser context
3855 * @orig: if non-NULL store a copy of the original entity value
3856 *
3857 * parse a value for ENTITY declarations
3858 *
3859 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3860 * "'" ([^%&'] | PEReference | Reference)* "'"
3861 *
3862 * Returns the EntityValue parsed with reference substituted or NULL
3863 */
3864
3865xmlChar *
3866xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3867 xmlChar *buf = NULL;
3868 int len = 0;
3869 int size = XML_PARSER_BUFFER_SIZE;
3870 int c, l;
3871 xmlChar stop;
3872 xmlChar *ret = NULL;
3873 const xmlChar *cur = NULL;
3874 xmlParserInputPtr input;
3875
3876 if (RAW == '"') stop = '"';
3877 else if (RAW == '\'') stop = '\'';
3878 else {
3879 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3880 return(NULL);
3881 }
3882 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3883 if (buf == NULL) {
3884 xmlErrMemory(ctxt, NULL);
3885 return(NULL);
3886 }
3887
3888 /*
3889 * The content of the entity definition is copied in a buffer.
3890 */
3891
3892 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3893 input = ctxt->input;
3894 GROW;
3895 if (ctxt->instate == XML_PARSER_EOF) {
3896 xmlFree(buf);
3897 return(NULL);
3898 }
3899 NEXT;
3900 c = CUR_CHAR(l);
3901 /*
3902 * NOTE: 4.4.5 Included in Literal
3903 * When a parameter entity reference appears in a literal entity
3904 * value, ... a single or double quote character in the replacement
3905 * text is always treated as a normal data character and will not
3906 * terminate the literal.
3907 * In practice it means we stop the loop only when back at parsing
3908 * the initial entity and the quote is found
3909 */
3910 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3911 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3912 if (len + 5 >= size) {
3913 xmlChar *tmp;
3914
3915 size *= 2;
3916 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3917 if (tmp == NULL) {
3918 xmlErrMemory(ctxt, NULL);
3919 xmlFree(buf);
3920 return(NULL);
3921 }
3922 buf = tmp;
3923 }
3924 COPY_BUF(l,buf,len,c);
3925 NEXTL(l);
3926 /*
3927 * Pop-up of finished entities.
3928 */
3929 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3930 xmlPopInput(ctxt);
3931
3932 GROW;
3933 c = CUR_CHAR(l);
3934 if (c == 0) {
3935 GROW;
3936 c = CUR_CHAR(l);
3937 }
3938 }
3939 buf[len] = 0;
3940 if (ctxt->instate == XML_PARSER_EOF) {
3941 xmlFree(buf);
3942 return(NULL);
3943 }
3944
3945 /*
3946 * Raise problem w.r.t. '&' and '%' being used in non-entities
3947 * reference constructs. Note Charref will be handled in
3948 * xmlStringDecodeEntities()
3949 */
3950 cur = buf;
3951 while (*cur != 0) { /* non input consuming */
3952 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3953 xmlChar *name;
3954 xmlChar tmp = *cur;
3955
3956 cur++;
3957 name = xmlParseStringName(ctxt, &cur);
3958 if ((name == NULL) || (*cur != ';')) {
3959 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3960 "EntityValue: '%c' forbidden except for entities references\n",
3961 tmp);
3962 }
3963 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3964 (ctxt->inputNr == 1)) {
3965 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3966 }
3967 if (name != NULL)
3968 xmlFree(name);
3969 if (*cur == 0)
3970 break;
3971 }
3972 cur++;
3973 }
3974
3975 /*
3976 * Then PEReference entities are substituted.
3977 */
3978 if (c != stop) {
3979 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3980 xmlFree(buf);
3981 } else {
3982 NEXT;
3983 /*
3984 * NOTE: 4.4.7 Bypassed
3985 * When a general entity reference appears in the EntityValue in
3986 * an entity declaration, it is bypassed and left as is.
3987 * so XML_SUBSTITUTE_REF is not set here.
3988 */
3989 ++ctxt->depth;
3990 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3991 0, 0, 0);
3992 --ctxt->depth;
3993 if (orig != NULL)
3994 *orig = buf;
3995 else
3996 xmlFree(buf);
3997 }
3998
3999 return(ret);
4000}
4001
4002/**
4003 * xmlParseAttValueComplex:
4004 * @ctxt: an XML parser context
4005 * @len: the resulting attribute len
4006 * @normalize: wether to apply the inner normalization
4007 *
4008 * parse a value for an attribute, this is the fallback function
4009 * of xmlParseAttValue() when the attribute parsing requires handling
4010 * of non-ASCII characters, or normalization compaction.
4011 *
4012 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4013 */
4014static xmlChar *
4015xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4016 xmlChar limit = 0;
4017 xmlChar *buf = NULL;
4018 xmlChar *rep = NULL;
4019 size_t len = 0;
4020 size_t buf_size = 0;
4021 int c, l, in_space = 0;
4022 xmlChar *current = NULL;
4023 xmlEntityPtr ent;
4024
4025 if (NXT(0) == '"') {
4026 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4027 limit = '"';
4028 NEXT;
4029 } else if (NXT(0) == '\'') {
4030 limit = '\'';
4031 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4032 NEXT;
4033 } else {
4034 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4035 return(NULL);
4036 }
4037
4038 /*
4039 * allocate a translation buffer.
4040 */
4041 buf_size = XML_PARSER_BUFFER_SIZE;
4042 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4043 if (buf == NULL) goto mem_error;
4044
4045 /*
4046 * OK loop until we reach one of the ending char or a size limit.
4047 */
4048 c = CUR_CHAR(l);
4049 while (((NXT(0) != limit) && /* checked */
4050 (IS_CHAR(c)) && (c != '<')) &&
4051 (ctxt->instate != XML_PARSER_EOF)) {
4052 /*
4053 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4054 * special option is given
4055 */
4056 if ((len > XML_MAX_TEXT_LENGTH) &&
4057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059 "AttValue length too long\n");
4060 goto mem_error;
4061 }
4062 if (c == 0) break;
4063 if (c == '&') {
4064 in_space = 0;
4065 if (NXT(1) == '#') {
4066 int val = xmlParseCharRef(ctxt);
4067
4068 if (val == '&') {
4069 if (ctxt->replaceEntities) {
4070 if (len + 10 > buf_size) {
4071 growBuffer(buf, 10);
4072 }
4073 buf[len++] = '&';
4074 } else {
4075 /*
4076 * The reparsing will be done in xmlStringGetNodeList()
4077 * called by the attribute() function in SAX.c
4078 */
4079 if (len + 10 > buf_size) {
4080 growBuffer(buf, 10);
4081 }
4082 buf[len++] = '&';
4083 buf[len++] = '#';
4084 buf[len++] = '3';
4085 buf[len++] = '8';
4086 buf[len++] = ';';
4087 }
4088 } else if (val != 0) {
4089 if (len + 10 > buf_size) {
4090 growBuffer(buf, 10);
4091 }
4092 len += xmlCopyChar(0, &buf[len], val);
4093 }
4094 } else {
4095 ent = xmlParseEntityRef(ctxt);
4096 ctxt->nbentities++;
4097 if (ent != NULL)
4098 ctxt->nbentities += ent->owner;
4099 if ((ent != NULL) &&
4100 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4101 if (len + 10 > buf_size) {
4102 growBuffer(buf, 10);
4103 }
4104 if ((ctxt->replaceEntities == 0) &&
4105 (ent->content[0] == '&')) {
4106 buf[len++] = '&';
4107 buf[len++] = '#';
4108 buf[len++] = '3';
4109 buf[len++] = '8';
4110 buf[len++] = ';';
4111 } else {
4112 buf[len++] = ent->content[0];
4113 }
4114 } else if ((ent != NULL) &&
4115 (ctxt->replaceEntities != 0)) {
4116 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4117 ++ctxt->depth;
4118 rep = xmlStringDecodeEntities(ctxt, ent->content,
4119 XML_SUBSTITUTE_REF,
4120 0, 0, 0);
4121 --ctxt->depth;
4122 if (rep != NULL) {
4123 current = rep;
4124 while (*current != 0) { /* non input consuming */
4125 if ((*current == 0xD) || (*current == 0xA) ||
4126 (*current == 0x9)) {
4127 buf[len++] = 0x20;
4128 current++;
4129 } else
4130 buf[len++] = *current++;
4131 if (len + 10 > buf_size) {
4132 growBuffer(buf, 10);
4133 }
4134 }
4135 xmlFree(rep);
4136 rep = NULL;
4137 }
4138 } else {
4139 if (len + 10 > buf_size) {
4140 growBuffer(buf, 10);
4141 }
4142 if (ent->content != NULL)
4143 buf[len++] = ent->content[0];
4144 }
4145 } else if (ent != NULL) {
4146 int i = xmlStrlen(ent->name);
4147 const xmlChar *cur = ent->name;
4148
4149 /*
4150 * This may look absurd but is needed to detect
4151 * entities problems
4152 */
4153 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4154 (ent->content != NULL) && (ent->checked == 0)) {
4155 unsigned long oldnbent = ctxt->nbentities;
4156
4157 ++ctxt->depth;
4158 rep = xmlStringDecodeEntities(ctxt, ent->content,
4159 XML_SUBSTITUTE_REF, 0, 0, 0);
4160 --ctxt->depth;
4161
4162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4163 if (rep != NULL) {
4164 if (xmlStrchr(rep, '<'))
4165 ent->checked |= 1;
4166 xmlFree(rep);
4167 rep = NULL;
4168 }
4169 }
4170
4171 /*
4172 * Just output the reference
4173 */
4174 buf[len++] = '&';
4175 while (len + i + 10 > buf_size) {
4176 growBuffer(buf, i + 10);
4177 }
4178 for (;i > 0;i--)
4179 buf[len++] = *cur++;
4180 buf[len++] = ';';
4181 }
4182 }
4183 } else {
4184 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4185 if ((len != 0) || (!normalize)) {
4186 if ((!normalize) || (!in_space)) {
4187 COPY_BUF(l,buf,len,0x20);
4188 while (len + 10 > buf_size) {
4189 growBuffer(buf, 10);
4190 }
4191 }
4192 in_space = 1;
4193 }
4194 } else {
4195 in_space = 0;
4196 COPY_BUF(l,buf,len,c);
4197 if (len + 10 > buf_size) {
4198 growBuffer(buf, 10);
4199 }
4200 }
4201 NEXTL(l);
4202 }
4203 GROW;
4204 c = CUR_CHAR(l);
4205 }
4206 if (ctxt->instate == XML_PARSER_EOF)
4207 goto error;
4208
4209 if ((in_space) && (normalize)) {
4210 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4211 }
4212 buf[len] = 0;
4213 if (RAW == '<') {
4214 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4215 } else if (RAW != limit) {
4216 if ((c != 0) && (!IS_CHAR(c))) {
4217 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4218 "invalid character in attribute value\n");
4219 } else {
4220 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4221 "AttValue: ' expected\n");
4222 }
4223 } else
4224 NEXT;
4225
4226 /*
4227 * There we potentially risk an overflow, don't allow attribute value of
4228 * length more than INT_MAX it is a very reasonnable assumption !
4229 */
4230 if (len >= INT_MAX) {
4231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4232 "AttValue length too long\n");
4233 goto mem_error;
4234 }
4235
4236 if (attlen != NULL) *attlen = (int) len;
4237 return(buf);
4238
4239mem_error:
4240 xmlErrMemory(ctxt, NULL);
4241error:
4242 if (buf != NULL)
4243 xmlFree(buf);
4244 if (rep != NULL)
4245 xmlFree(rep);
4246 return(NULL);
4247}
4248
4249/**
4250 * xmlParseAttValue:
4251 * @ctxt: an XML parser context
4252 *
4253 * parse a value for an attribute
4254 * Note: the parser won't do substitution of entities here, this
4255 * will be handled later in xmlStringGetNodeList
4256 *
4257 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4258 * "'" ([^<&'] | Reference)* "'"
4259 *
4260 * 3.3.3 Attribute-Value Normalization:
4261 * Before the value of an attribute is passed to the application or
4262 * checked for validity, the XML processor must normalize it as follows:
4263 * - a character reference is processed by appending the referenced
4264 * character to the attribute value
4265 * - an entity reference is processed by recursively processing the
4266 * replacement text of the entity
4267 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4268 * appending #x20 to the normalized value, except that only a single
4269 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4270 * parsed entity or the literal entity value of an internal parsed entity
4271 * - other characters are processed by appending them to the normalized value
4272 * If the declared value is not CDATA, then the XML processor must further
4273 * process the normalized attribute value by discarding any leading and
4274 * trailing space (#x20) characters, and by replacing sequences of space
4275 * (#x20) characters by a single space (#x20) character.
4276 * All attributes for which no declaration has been read should be treated
4277 * by a non-validating parser as if declared CDATA.
4278 *
4279 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4280 */
4281
4282
4283xmlChar *
4284xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4285 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4286 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4287}
4288
4289/**
4290 * xmlParseSystemLiteral:
4291 * @ctxt: an XML parser context
4292 *
4293 * parse an XML Literal
4294 *
4295 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4296 *
4297 * Returns the SystemLiteral parsed or NULL
4298 */
4299
4300xmlChar *
4301xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4302 xmlChar *buf = NULL;
4303 int len = 0;
4304 int size = XML_PARSER_BUFFER_SIZE;
4305 int cur, l;
4306 xmlChar stop;
4307 int state = ctxt->instate;
4308 int count = 0;
4309
4310 SHRINK;
4311 if (RAW == '"') {
4312 NEXT;
4313 stop = '"';
4314 } else if (RAW == '\'') {
4315 NEXT;
4316 stop = '\'';
4317 } else {
4318 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4319 return(NULL);
4320 }
4321
4322 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4323 if (buf == NULL) {
4324 xmlErrMemory(ctxt, NULL);
4325 return(NULL);
4326 }
4327 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4328 cur = CUR_CHAR(l);
4329 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4330 if (len + 5 >= size) {
4331 xmlChar *tmp;
4332
4333 if ((size > XML_MAX_NAME_LENGTH) &&
4334 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4335 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4336 xmlFree(buf);
4337 ctxt->instate = (xmlParserInputState) state;
4338 return(NULL);
4339 }
4340 size *= 2;
4341 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4342 if (tmp == NULL) {
4343 xmlFree(buf);
4344 xmlErrMemory(ctxt, NULL);
4345 ctxt->instate = (xmlParserInputState) state;
4346 return(NULL);
4347 }
4348 buf = tmp;
4349 }
4350 count++;
4351 if (count > 50) {
4352 GROW;
4353 count = 0;
4354 if (ctxt->instate == XML_PARSER_EOF) {
4355 xmlFree(buf);
4356 return(NULL);
4357 }
4358 }
4359 COPY_BUF(l,buf,len,cur);
4360 NEXTL(l);
4361 cur = CUR_CHAR(l);
4362 if (cur == 0) {
4363 GROW;
4364 SHRINK;
4365 cur = CUR_CHAR(l);
4366 }
4367 }
4368 buf[len] = 0;
4369 ctxt->instate = (xmlParserInputState) state;
4370 if (!IS_CHAR(cur)) {
4371 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4372 } else {
4373 NEXT;
4374 }
4375 return(buf);
4376}
4377
4378/**
4379 * xmlParsePubidLiteral:
4380 * @ctxt: an XML parser context
4381 *
4382 * parse an XML public literal
4383 *
4384 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4385 *
4386 * Returns the PubidLiteral parsed or NULL.
4387 */
4388
4389xmlChar *
4390xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4391 xmlChar *buf = NULL;
4392 int len = 0;
4393 int size = XML_PARSER_BUFFER_SIZE;
4394 xmlChar cur;
4395 xmlChar stop;
4396 int count = 0;
4397 xmlParserInputState oldstate = ctxt->instate;
4398
4399 SHRINK;
4400 if (RAW == '"') {
4401 NEXT;
4402 stop = '"';
4403 } else if (RAW == '\'') {
4404 NEXT;
4405 stop = '\'';
4406 } else {
4407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4408 return(NULL);
4409 }
4410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4411 if (buf == NULL) {
4412 xmlErrMemory(ctxt, NULL);
4413 return(NULL);
4414 }
4415 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4416 cur = CUR;
4417 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4418 if (len + 1 >= size) {
4419 xmlChar *tmp;
4420
4421 if ((size > XML_MAX_NAME_LENGTH) &&
4422 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4423 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424 xmlFree(buf);
4425 return(NULL);
4426 }
4427 size *= 2;
4428 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4429 if (tmp == NULL) {
4430 xmlErrMemory(ctxt, NULL);
4431 xmlFree(buf);
4432 return(NULL);
4433 }
4434 buf = tmp;
4435 }
4436 buf[len++] = cur;
4437 count++;
4438 if (count > 50) {
4439 GROW;
4440 count = 0;
4441 if (ctxt->instate == XML_PARSER_EOF) {
4442 xmlFree(buf);
4443 return(NULL);
4444 }
4445 }
4446 NEXT;
4447 cur = CUR;
4448 if (cur == 0) {
4449 GROW;
4450 SHRINK;
4451 cur = CUR;
4452 }
4453 }
4454 buf[len] = 0;
4455 if (cur != stop) {
4456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4457 } else {
4458 NEXT;
4459 }
4460 ctxt->instate = oldstate;
4461 return(buf);
4462}
4463
4464static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4465
4466/*
4467 * used for the test in the inner loop of the char data testing
4468 */
4469static const unsigned char test_char_data[256] = {
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4502};
4503
4504/**
4505 * xmlParseCharData:
4506 * @ctxt: an XML parser context
4507 * @cdata: int indicating whether we are within a CDATA section
4508 *
4509 * parse a CharData section.
4510 * if we are within a CDATA section ']]>' marks an end of section.
4511 *
4512 * The right angle bracket (>) may be represented using the string "&gt;",
4513 * and must, for compatibility, be escaped using "&gt;" or a character
4514 * reference when it appears in the string "]]>" in content, when that
4515 * string is not marking the end of a CDATA section.
4516 *
4517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4518 */
4519
4520void
4521xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4522 const xmlChar *in;
4523 int nbchar = 0;
4524 int line = ctxt->input->line;
4525 int col = ctxt->input->col;
4526 int ccol;
4527
4528 SHRINK;
4529 GROW;
4530 /*
4531 * Accelerated common case where input don't need to be
4532 * modified before passing it to the handler.
4533 */
4534 if (!cdata) {
4535 in = ctxt->input->cur;
4536 do {
4537get_more_space:
4538 while (*in == 0x20) { in++; ctxt->input->col++; }
4539 if (*in == 0xA) {
4540 do {
4541 ctxt->input->line++; ctxt->input->col = 1;
4542 in++;
4543 } while (*in == 0xA);
4544 goto get_more_space;
4545 }
4546 if (*in == '<') {
4547 nbchar = in - ctxt->input->cur;
4548 if (nbchar > 0) {
4549 const xmlChar *tmp = ctxt->input->cur;
4550 ctxt->input->cur = in;
4551
4552 if ((ctxt->sax != NULL) &&
4553 (ctxt->sax->ignorableWhitespace !=
4554 ctxt->sax->characters)) {
4555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4556 if (ctxt->sax->ignorableWhitespace != NULL)
4557 ctxt->sax->ignorableWhitespace(ctxt->userData,
4558 tmp, nbchar);
4559 } else {
4560 if (ctxt->sax->characters != NULL)
4561 ctxt->sax->characters(ctxt->userData,
4562 tmp, nbchar);
4563 if (*ctxt->space == -1)
4564 *ctxt->space = -2;
4565 }
4566 } else if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->characters != NULL)) {
4568 ctxt->sax->characters(ctxt->userData,
4569 tmp, nbchar);
4570 }
4571 }
4572 return;
4573 }
4574
4575get_more:
4576 ccol = ctxt->input->col;
4577 while (test_char_data[*in]) {
4578 in++;
4579 ccol++;
4580 }
4581 ctxt->input->col = ccol;
4582 if (*in == 0xA) {
4583 do {
4584 ctxt->input->line++; ctxt->input->col = 1;
4585 in++;
4586 } while (*in == 0xA);
4587 goto get_more;
4588 }
4589 if (*in == ']') {
4590 if ((in[1] == ']') && (in[2] == '>')) {
4591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4592 ctxt->input->cur = in;
4593 return;
4594 }
4595 in++;
4596 ctxt->input->col++;
4597 goto get_more;
4598 }
4599 nbchar = in - ctxt->input->cur;
4600 if (nbchar > 0) {
4601 if ((ctxt->sax != NULL) &&
4602 (ctxt->sax->ignorableWhitespace !=
4603 ctxt->sax->characters) &&
4604 (IS_BLANK_CH(*ctxt->input->cur))) {
4605 const xmlChar *tmp = ctxt->input->cur;
4606 ctxt->input->cur = in;
4607
4608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4609 if (ctxt->sax->ignorableWhitespace != NULL)
4610 ctxt->sax->ignorableWhitespace(ctxt->userData,
4611 tmp, nbchar);
4612 } else {
4613 if (ctxt->sax->characters != NULL)
4614 ctxt->sax->characters(ctxt->userData,
4615 tmp, nbchar);
4616 if (*ctxt->space == -1)
4617 *ctxt->space = -2;
4618 }
4619 line = ctxt->input->line;
4620 col = ctxt->input->col;
4621 } else if (ctxt->sax != NULL) {
4622 if (ctxt->sax->characters != NULL)
4623 ctxt->sax->characters(ctxt->userData,
4624 ctxt->input->cur, nbchar);
4625 line = ctxt->input->line;
4626 col = ctxt->input->col;
4627 }
4628 /* something really bad happened in the SAX callback */
4629 if (ctxt->instate != XML_PARSER_CONTENT)
4630 return;
4631 }
4632 ctxt->input->cur = in;
4633 if (*in == 0xD) {
4634 in++;
4635 if (*in == 0xA) {
4636 ctxt->input->cur = in;
4637 in++;
4638 ctxt->input->line++; ctxt->input->col = 1;
4639 continue; /* while */
4640 }
4641 in--;
4642 }
4643 if (*in == '<') {
4644 return;
4645 }
4646 if (*in == '&') {
4647 return;
4648 }
4649 SHRINK;
4650 GROW;
4651 if (ctxt->instate == XML_PARSER_EOF)
4652 return;
4653 in = ctxt->input->cur;
4654 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4655 nbchar = 0;
4656 }
4657 ctxt->input->line = line;
4658 ctxt->input->col = col;
4659 xmlParseCharDataComplex(ctxt, cdata);
4660}
4661
4662/**
4663 * xmlParseCharDataComplex:
4664 * @ctxt: an XML parser context
4665 * @cdata: int indicating whether we are within a CDATA section
4666 *
4667 * parse a CharData section.this is the fallback function
4668 * of xmlParseCharData() when the parsing requires handling
4669 * of non-ASCII characters.
4670 */
4671static void
4672xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4673 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4674 int nbchar = 0;
4675 int cur, l;
4676 int count = 0;
4677
4678 SHRINK;
4679 GROW;
4680 cur = CUR_CHAR(l);
4681 while ((cur != '<') && /* checked */
4682 (cur != '&') &&
4683 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4684 if ((cur == ']') && (NXT(1) == ']') &&
4685 (NXT(2) == '>')) {
4686 if (cdata) break;
4687 else {
4688 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4689 }
4690 }
4691 COPY_BUF(l,buf,nbchar,cur);
4692 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4693 buf[nbchar] = 0;
4694
4695 /*
4696 * OK the segment is to be consumed as chars.
4697 */
4698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4699 if (areBlanks(ctxt, buf, nbchar, 0)) {
4700 if (ctxt->sax->ignorableWhitespace != NULL)
4701 ctxt->sax->ignorableWhitespace(ctxt->userData,
4702 buf, nbchar);
4703 } else {
4704 if (ctxt->sax->characters != NULL)
4705 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4706 if ((ctxt->sax->characters !=
4707 ctxt->sax->ignorableWhitespace) &&
4708 (*ctxt->space == -1))
4709 *ctxt->space = -2;
4710 }
4711 }
4712 nbchar = 0;
4713 /* something really bad happened in the SAX callback */
4714 if (ctxt->instate != XML_PARSER_CONTENT)
4715 return;
4716 }
4717 count++;
4718 if (count > 50) {
4719 GROW;
4720 count = 0;
4721 if (ctxt->instate == XML_PARSER_EOF)
4722 return;
4723 }
4724 NEXTL(l);
4725 cur = CUR_CHAR(l);
4726 }
4727 if (nbchar != 0) {
4728 buf[nbchar] = 0;
4729 /*
4730 * OK the segment is to be consumed as chars.
4731 */
4732 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4733 if (areBlanks(ctxt, buf, nbchar, 0)) {
4734 if (ctxt->sax->ignorableWhitespace != NULL)
4735 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4736 } else {
4737 if (ctxt->sax->characters != NULL)
4738 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4739 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4740 (*ctxt->space == -1))
4741 *ctxt->space = -2;
4742 }
4743 }
4744 }
4745 if ((cur != 0) && (!IS_CHAR(cur))) {
4746 /* Generate the error and skip the offending character */
4747 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4748 "PCDATA invalid Char value %d\n",
4749 cur);
4750 NEXTL(l);
4751 }
4752}
4753
4754/**
4755 * xmlParseExternalID:
4756 * @ctxt: an XML parser context
4757 * @publicID: a xmlChar** receiving PubidLiteral
4758 * @strict: indicate whether we should restrict parsing to only
4759 * production [75], see NOTE below
4760 *
4761 * Parse an External ID or a Public ID
4762 *
4763 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4764 * 'PUBLIC' S PubidLiteral S SystemLiteral
4765 *
4766 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4768 *
4769 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4770 *
4771 * Returns the function returns SystemLiteral and in the second
4772 * case publicID receives PubidLiteral, is strict is off
4773 * it is possible to return NULL and have publicID set.
4774 */
4775
4776xmlChar *
4777xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4778 xmlChar *URI = NULL;
4779
4780 SHRINK;
4781
4782 *publicID = NULL;
4783 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4784 SKIP(6);
4785 if (!IS_BLANK_CH(CUR)) {
4786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4787 "Space required after 'SYSTEM'\n");
4788 }
4789 SKIP_BLANKS;
4790 URI = xmlParseSystemLiteral(ctxt);
4791 if (URI == NULL) {
4792 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4793 }
4794 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4795 SKIP(6);
4796 if (!IS_BLANK_CH(CUR)) {
4797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4798 "Space required after 'PUBLIC'\n");
4799 }
4800 SKIP_BLANKS;
4801 *publicID = xmlParsePubidLiteral(ctxt);
4802 if (*publicID == NULL) {
4803 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4804 }
4805 if (strict) {
4806 /*
4807 * We don't handle [83] so "S SystemLiteral" is required.
4808 */
4809 if (!IS_BLANK_CH(CUR)) {
4810 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4811 "Space required after the Public Identifier\n");
4812 }
4813 } else {
4814 /*
4815 * We handle [83] so we return immediately, if
4816 * "S SystemLiteral" is not detected. From a purely parsing
4817 * point of view that's a nice mess.
4818 */
4819 const xmlChar *ptr;
4820 GROW;
4821
4822 ptr = CUR_PTR;
4823 if (!IS_BLANK_CH(*ptr)) return(NULL);
4824
4825 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4826 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4827 }
4828 SKIP_BLANKS;
4829 URI = xmlParseSystemLiteral(ctxt);
4830 if (URI == NULL) {
4831 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4832 }
4833 }
4834 return(URI);
4835}
4836
4837/**
4838 * xmlParseCommentComplex:
4839 * @ctxt: an XML parser context
4840 * @buf: the already parsed part of the buffer
4841 * @len: number of bytes filles in the buffer
4842 * @size: allocated size of the buffer
4843 *
4844 * Skip an XML (SGML) comment <!-- .... -->
4845 * The spec says that "For compatibility, the string "--" (double-hyphen)
4846 * must not occur within comments. "
4847 * This is the slow routine in case the accelerator for ascii didn't work
4848 *
4849 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4850 */
4851static void
4852xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4853 size_t len, size_t size) {
4854 int q, ql;
4855 int r, rl;
4856 int cur, l;
4857 size_t count = 0;
4858 int inputid;
4859
4860 inputid = ctxt->input->id;
4861
4862 if (buf == NULL) {
4863 len = 0;
4864 size = XML_PARSER_BUFFER_SIZE;
4865 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4866 if (buf == NULL) {
4867 xmlErrMemory(ctxt, NULL);
4868 return;
4869 }
4870 }
4871 GROW; /* Assure there's enough input data */
4872 q = CUR_CHAR(ql);
4873 if (q == 0)
4874 goto not_terminated;
4875 if (!IS_CHAR(q)) {
4876 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4877 "xmlParseComment: invalid xmlChar value %d\n",
4878 q);
4879 xmlFree (buf);
4880 return;
4881 }
4882 NEXTL(ql);
4883 r = CUR_CHAR(rl);
4884 if (r == 0)
4885 goto not_terminated;
4886 if (!IS_CHAR(r)) {
4887 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4888 "xmlParseComment: invalid xmlChar value %d\n",
4889 q);
4890 xmlFree (buf);
4891 return;
4892 }
4893 NEXTL(rl);
4894 cur = CUR_CHAR(l);
4895 if (cur == 0)
4896 goto not_terminated;
4897 while (IS_CHAR(cur) && /* checked */
4898 ((cur != '>') ||
4899 (r != '-') || (q != '-'))) {
4900 if ((r == '-') && (q == '-')) {
4901 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4902 }
4903 if ((len > XML_MAX_TEXT_LENGTH) &&
4904 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4905 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4906 "Comment too big found", NULL);
4907 xmlFree (buf);
4908 return;
4909 }
4910 if (len + 5 >= size) {
4911 xmlChar *new_buf;
4912 size_t new_size;
4913
4914 new_size = size * 2;
4915 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4916 if (new_buf == NULL) {
4917 xmlFree (buf);
4918 xmlErrMemory(ctxt, NULL);
4919 return;
4920 }
4921 buf = new_buf;
4922 size = new_size;
4923 }
4924 COPY_BUF(ql,buf,len,q);
4925 q = r;
4926 ql = rl;
4927 r = cur;
4928 rl = l;
4929
4930 count++;
4931 if (count > 50) {
4932 GROW;
4933 count = 0;
4934 if (ctxt->instate == XML_PARSER_EOF) {
4935 xmlFree(buf);
4936 return;
4937 }
4938 }
4939 NEXTL(l);
4940 cur = CUR_CHAR(l);
4941 if (cur == 0) {
4942 SHRINK;
4943 GROW;
4944 cur = CUR_CHAR(l);
4945 }
4946 }
4947 buf[len] = 0;
4948 if (cur == 0) {
4949 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950 "Comment not terminated \n<!--%.50s\n", buf);
4951 } else if (!IS_CHAR(cur)) {
4952 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4953 "xmlParseComment: invalid xmlChar value %d\n",
4954 cur);
4955 } else {
4956 if (inputid != ctxt->input->id) {
4957 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958 "Comment doesn't start and stop in the same entity\n");
4959 }
4960 NEXT;
4961 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4962 (!ctxt->disableSAX))
4963 ctxt->sax->comment(ctxt->userData, buf);
4964 }
4965 xmlFree(buf);
4966 return;
4967not_terminated:
4968 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4969 "Comment not terminated\n", NULL);
4970 xmlFree(buf);
4971 return;
4972}
4973
4974/**
4975 * xmlParseComment:
4976 * @ctxt: an XML parser context
4977 *
4978 * Skip an XML (SGML) comment <!-- .... -->
4979 * The spec says that "For compatibility, the string "--" (double-hyphen)
4980 * must not occur within comments. "
4981 *
4982 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4983 */
4984void
4985xmlParseComment(xmlParserCtxtPtr ctxt) {
4986 xmlChar *buf = NULL;
4987 size_t size = XML_PARSER_BUFFER_SIZE;
4988 size_t len = 0;
4989 xmlParserInputState state;
4990 const xmlChar *in;
4991 size_t nbchar = 0;
4992 int ccol;
4993 int inputid;
4994
4995 /*
4996 * Check that there is a comment right here.
4997 */
4998 if ((RAW != '<') || (NXT(1) != '!') ||
4999 (NXT(2) != '-') || (NXT(3) != '-')) return;
5000 state = ctxt->instate;
5001 ctxt->instate = XML_PARSER_COMMENT;
5002 inputid = ctxt->input->id;
5003 SKIP(4);
5004 SHRINK;
5005 GROW;
5006
5007 /*
5008 * Accelerated common case where input don't need to be
5009 * modified before passing it to the handler.
5010 */
5011 in = ctxt->input->cur;
5012 do {
5013 if (*in == 0xA) {
5014 do {
5015 ctxt->input->line++; ctxt->input->col = 1;
5016 in++;
5017 } while (*in == 0xA);
5018 }
5019get_more:
5020 ccol = ctxt->input->col;
5021 while (((*in > '-') && (*in <= 0x7F)) ||
5022 ((*in >= 0x20) && (*in < '-')) ||
5023 (*in == 0x09)) {
5024 in++;
5025 ccol++;
5026 }
5027 ctxt->input->col = ccol;
5028 if (*in == 0xA) {
5029 do {
5030 ctxt->input->line++; ctxt->input->col = 1;
5031 in++;
5032 } while (*in == 0xA);
5033 goto get_more;
5034 }
5035 nbchar = in - ctxt->input->cur;
5036 /*
5037 * save current set of data
5038 */
5039 if (nbchar > 0) {
5040 if ((ctxt->sax != NULL) &&
5041 (ctxt->sax->comment != NULL)) {
5042 if (buf == NULL) {
5043 if ((*in == '-') && (in[1] == '-'))
5044 size = nbchar + 1;
5045 else
5046 size = XML_PARSER_BUFFER_SIZE + nbchar;
5047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5048 if (buf == NULL) {
5049 xmlErrMemory(ctxt, NULL);
5050 ctxt->instate = state;
5051 return;
5052 }
5053 len = 0;
5054 } else if (len + nbchar + 1 >= size) {
5055 xmlChar *new_buf;
5056 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5057 new_buf = (xmlChar *) xmlRealloc(buf,
5058 size * sizeof(xmlChar));
5059 if (new_buf == NULL) {
5060 xmlFree (buf);
5061 xmlErrMemory(ctxt, NULL);
5062 ctxt->instate = state;
5063 return;
5064 }
5065 buf = new_buf;
5066 }
5067 memcpy(&buf[len], ctxt->input->cur, nbchar);
5068 len += nbchar;
5069 buf[len] = 0;
5070 }
5071 }
5072 if ((len > XML_MAX_TEXT_LENGTH) &&
5073 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5075 "Comment too big found", NULL);
5076 xmlFree (buf);
5077 return;
5078 }
5079 ctxt->input->cur = in;
5080 if (*in == 0xA) {
5081 in++;
5082 ctxt->input->line++; ctxt->input->col = 1;
5083 }
5084 if (*in == 0xD) {
5085 in++;
5086 if (*in == 0xA) {
5087 ctxt->input->cur = in;
5088 in++;
5089 ctxt->input->line++; ctxt->input->col = 1;
5090 continue; /* while */
5091 }
5092 in--;
5093 }
5094 SHRINK;
5095 GROW;
5096 if (ctxt->instate == XML_PARSER_EOF) {
5097 xmlFree(buf);
5098 return;
5099 }
5100 in = ctxt->input->cur;
5101 if (*in == '-') {
5102 if (in[1] == '-') {
5103 if (in[2] == '>') {
5104 if (ctxt->input->id != inputid) {
5105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5106 "comment doesn't start and stop in the same entity\n");
5107 }
5108 SKIP(3);
5109 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5110 (!ctxt->disableSAX)) {
5111 if (buf != NULL)
5112 ctxt->sax->comment(ctxt->userData, buf);
5113 else
5114 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5115 }
5116 if (buf != NULL)
5117 xmlFree(buf);
5118 if (ctxt->instate != XML_PARSER_EOF)
5119 ctxt->instate = state;
5120 return;
5121 }
5122 if (buf != NULL) {
5123 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5124 "Double hyphen within comment: "
5125 "<!--%.50s\n",
5126 buf);
5127 } else
5128 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5129 "Double hyphen within comment\n", NULL);
5130 in++;
5131 ctxt->input->col++;
5132 }
5133 in++;
5134 ctxt->input->col++;
5135 goto get_more;
5136 }
5137 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5138 xmlParseCommentComplex(ctxt, buf, len, size);
5139 ctxt->instate = state;
5140 return;
5141}
5142
5143
5144/**
5145 * xmlParsePITarget:
5146 * @ctxt: an XML parser context
5147 *
5148 * parse the name of a PI
5149 *
5150 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5151 *
5152 * Returns the PITarget name or NULL
5153 */
5154
5155const xmlChar *
5156xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5157 const xmlChar *name;
5158
5159 name = xmlParseName(ctxt);
5160 if ((name != NULL) &&
5161 ((name[0] == 'x') || (name[0] == 'X')) &&
5162 ((name[1] == 'm') || (name[1] == 'M')) &&
5163 ((name[2] == 'l') || (name[2] == 'L'))) {
5164 int i;
5165 if ((name[0] == 'x') && (name[1] == 'm') &&
5166 (name[2] == 'l') && (name[3] == 0)) {
5167 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5168 "XML declaration allowed only at the start of the document\n");
5169 return(name);
5170 } else if (name[3] == 0) {
5171 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5172 return(name);
5173 }
5174 for (i = 0;;i++) {
5175 if (xmlW3CPIs[i] == NULL) break;
5176 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5177 return(name);
5178 }
5179 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5180 "xmlParsePITarget: invalid name prefix 'xml'\n",
5181 NULL, NULL);
5182 }
5183 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5184 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5185 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5186 }
5187 return(name);
5188}
5189
5190#ifdef LIBXML_CATALOG_ENABLED
5191/**
5192 * xmlParseCatalogPI:
5193 * @ctxt: an XML parser context
5194 * @catalog: the PI value string
5195 *
5196 * parse an XML Catalog Processing Instruction.
5197 *
5198 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5199 *
5200 * Occurs only if allowed by the user and if happening in the Misc
5201 * part of the document before any doctype informations
5202 * This will add the given catalog to the parsing context in order
5203 * to be used if there is a resolution need further down in the document
5204 */
5205
5206static void
5207xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5208 xmlChar *URL = NULL;
5209 const xmlChar *tmp, *base;
5210 xmlChar marker;
5211
5212 tmp = catalog;
5213 while (IS_BLANK_CH(*tmp)) tmp++;
5214 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5215 goto error;
5216 tmp += 7;
5217 while (IS_BLANK_CH(*tmp)) tmp++;
5218 if (*tmp != '=') {
5219 return;
5220 }
5221 tmp++;
5222 while (IS_BLANK_CH(*tmp)) tmp++;
5223 marker = *tmp;
5224 if ((marker != '\'') && (marker != '"'))
5225 goto error;
5226 tmp++;
5227 base = tmp;
5228 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5229 if (*tmp == 0)
5230 goto error;
5231 URL = xmlStrndup(base, tmp - base);
5232 tmp++;
5233 while (IS_BLANK_CH(*tmp)) tmp++;
5234 if (*tmp != 0)
5235 goto error;
5236
5237 if (URL != NULL) {
5238 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5239 xmlFree(URL);
5240 }
5241 return;
5242
5243error:
5244 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5245 "Catalog PI syntax error: %s\n",
5246 catalog, NULL);
5247 if (URL != NULL)
5248 xmlFree(URL);
5249}
5250#endif
5251
5252/**
5253 * xmlParsePI:
5254 * @ctxt: an XML parser context
5255 *
5256 * parse an XML Processing Instruction.
5257 *
5258 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5259 *
5260 * The processing is transfered to SAX once parsed.
5261 */
5262
5263void
5264xmlParsePI(xmlParserCtxtPtr ctxt) {
5265 xmlChar *buf = NULL;
5266 size_t len = 0;
5267 size_t size = XML_PARSER_BUFFER_SIZE;
5268 int cur, l;
5269 const xmlChar *target;
5270 xmlParserInputState state;
5271 int count = 0;
5272
5273 if ((RAW == '<') && (NXT(1) == '?')) {
5274 xmlParserInputPtr input = ctxt->input;
5275 state = ctxt->instate;
5276 ctxt->instate = XML_PARSER_PI;
5277 /*
5278 * this is a Processing Instruction.
5279 */
5280 SKIP(2);
5281 SHRINK;
5282
5283 /*
5284 * Parse the target name and check for special support like
5285 * namespace.
5286 */
5287 target = xmlParsePITarget(ctxt);
5288 if (target != NULL) {
5289 if ((RAW == '?') && (NXT(1) == '>')) {
5290 if (input != ctxt->input) {
5291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5292 "PI declaration doesn't start and stop in the same entity\n");
5293 }
5294 SKIP(2);
5295
5296 /*
5297 * SAX: PI detected.
5298 */
5299 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5300 (ctxt->sax->processingInstruction != NULL))
5301 ctxt->sax->processingInstruction(ctxt->userData,
5302 target, NULL);
5303 if (ctxt->instate != XML_PARSER_EOF)
5304 ctxt->instate = state;
5305 return;
5306 }
5307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5308 if (buf == NULL) {
5309 xmlErrMemory(ctxt, NULL);
5310 ctxt->instate = state;
5311 return;
5312 }
5313 cur = CUR;
5314 if (!IS_BLANK(cur)) {
5315 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5316 "ParsePI: PI %s space expected\n", target);
5317 }
5318 SKIP_BLANKS;
5319 cur = CUR_CHAR(l);
5320 while (IS_CHAR(cur) && /* checked */
5321 ((cur != '?') || (NXT(1) != '>'))) {
5322 if (len + 5 >= size) {
5323 xmlChar *tmp;
5324 size_t new_size = size * 2;
5325 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5326 if (tmp == NULL) {
5327 xmlErrMemory(ctxt, NULL);
5328 xmlFree(buf);
5329 ctxt->instate = state;
5330 return;
5331 }
5332 buf = tmp;
5333 size = new_size;
5334 }
5335 count++;
5336 if (count > 50) {
5337 GROW;
5338 if (ctxt->instate == XML_PARSER_EOF) {
5339 xmlFree(buf);
5340 return;
5341 }
5342 count = 0;
5343 if ((len > XML_MAX_TEXT_LENGTH) &&
5344 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "PI %s too big found", target);
5347 xmlFree(buf);
5348 ctxt->instate = state;
5349 return;
5350 }
5351 }
5352 COPY_BUF(l,buf,len,cur);
5353 NEXTL(l);
5354 cur = CUR_CHAR(l);
5355 if (cur == 0) {
5356 SHRINK;
5357 GROW;
5358 cur = CUR_CHAR(l);
5359 }
5360 }
5361 if ((len > XML_MAX_TEXT_LENGTH) &&
5362 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5363 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5364 "PI %s too big found", target);
5365 xmlFree(buf);
5366 ctxt->instate = state;
5367 return;
5368 }
5369 buf[len] = 0;
5370 if (cur != '?') {
5371 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5372 "ParsePI: PI %s never end ...\n", target);
5373 } else {
5374 if (input != ctxt->input) {
5375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 "PI declaration doesn't start and stop in the same entity\n");
5377 }
5378 SKIP(2);
5379
5380#ifdef LIBXML_CATALOG_ENABLED
5381 if (((state == XML_PARSER_MISC) ||
5382 (state == XML_PARSER_START)) &&
5383 (xmlStrEqual(target, XML_CATALOG_PI))) {
5384 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5385 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5386 (allow == XML_CATA_ALLOW_ALL))
5387 xmlParseCatalogPI(ctxt, buf);
5388 }
5389#endif
5390
5391
5392 /*
5393 * SAX: PI detected.
5394 */
5395 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5396 (ctxt->sax->processingInstruction != NULL))
5397 ctxt->sax->processingInstruction(ctxt->userData,
5398 target, buf);
5399 }
5400 xmlFree(buf);
5401 } else {
5402 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5403 }
5404 if (ctxt->instate != XML_PARSER_EOF)
5405 ctxt->instate = state;
5406 }
5407}
5408
5409/**
5410 * xmlParseNotationDecl:
5411 * @ctxt: an XML parser context
5412 *
5413 * parse a notation declaration
5414 *
5415 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5416 *
5417 * Hence there is actually 3 choices:
5418 * 'PUBLIC' S PubidLiteral
5419 * 'PUBLIC' S PubidLiteral S SystemLiteral
5420 * and 'SYSTEM' S SystemLiteral
5421 *
5422 * See the NOTE on xmlParseExternalID().
5423 */
5424
5425void
5426xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5427 const xmlChar *name;
5428 xmlChar *Pubid;
5429 xmlChar *Systemid;
5430
5431 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5432 xmlParserInputPtr input = ctxt->input;
5433 SHRINK;
5434 SKIP(10);
5435 if (!IS_BLANK_CH(CUR)) {
5436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '<!NOTATION'\n");
5438 return;
5439 }
5440 SKIP_BLANKS;
5441
5442 name = xmlParseName(ctxt);
5443 if (name == NULL) {
5444 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5445 return;
5446 }
5447 if (!IS_BLANK_CH(CUR)) {
5448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the NOTATION name'\n");
5450 return;
5451 }
5452 if (xmlStrchr(name, ':') != NULL) {
5453 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5454 "colons are forbidden from notation names '%s'\n",
5455 name, NULL, NULL);
5456 }
5457 SKIP_BLANKS;
5458
5459 /*
5460 * Parse the IDs.
5461 */
5462 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5463 SKIP_BLANKS;
5464
5465 if (RAW == '>') {
5466 if (input != ctxt->input) {
5467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Notation declaration doesn't start and stop in the same entity\n");
5469 }
5470 NEXT;
5471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5472 (ctxt->sax->notationDecl != NULL))
5473 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5474 } else {
5475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5476 }
5477 if (Systemid != NULL) xmlFree(Systemid);
5478 if (Pubid != NULL) xmlFree(Pubid);
5479 }
5480}
5481
5482/**
5483 * xmlParseEntityDecl:
5484 * @ctxt: an XML parser context
5485 *
5486 * parse <!ENTITY declarations
5487 *
5488 * [70] EntityDecl ::= GEDecl | PEDecl
5489 *
5490 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5491 *
5492 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5493 *
5494 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5495 *
5496 * [74] PEDef ::= EntityValue | ExternalID
5497 *
5498 * [76] NDataDecl ::= S 'NDATA' S Name
5499 *
5500 * [ VC: Notation Declared ]
5501 * The Name must match the declared name of a notation.
5502 */
5503
5504void
5505xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5506 const xmlChar *name = NULL;
5507 xmlChar *value = NULL;
5508 xmlChar *URI = NULL, *literal = NULL;
5509 const xmlChar *ndata = NULL;
5510 int isParameter = 0;
5511 xmlChar *orig = NULL;
5512 int skipped;
5513
5514 /* GROW; done in the caller */
5515 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5516 xmlParserInputPtr input = ctxt->input;
5517 SHRINK;
5518 SKIP(8);
5519 skipped = SKIP_BLANKS;
5520 if (skipped == 0) {
5521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space required after '<!ENTITY'\n");
5523 }
5524
5525 if (RAW == '%') {
5526 NEXT;
5527 skipped = SKIP_BLANKS;
5528 if (skipped == 0) {
5529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5530 "Space required after '%%'\n");
5531 }
5532 isParameter = 1;
5533 }
5534
5535 name = xmlParseName(ctxt);
5536 if (name == NULL) {
5537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5538 "xmlParseEntityDecl: no name\n");
5539 return;
5540 }
5541 if (xmlStrchr(name, ':') != NULL) {
5542 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5543 "colons are forbidden from entities names '%s'\n",
5544 name, NULL, NULL);
5545 }
5546 skipped = SKIP_BLANKS;
5547 if (skipped == 0) {
5548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5549 "Space required after the entity name\n");
5550 }
5551
5552 ctxt->instate = XML_PARSER_ENTITY_DECL;
5553 /*
5554 * handle the various case of definitions...
5555 */
5556 if (isParameter) {
5557 if ((RAW == '"') || (RAW == '\'')) {
5558 value = xmlParseEntityValue(ctxt, &orig);
5559 if (value) {
5560 if ((ctxt->sax != NULL) &&
5561 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5562 ctxt->sax->entityDecl(ctxt->userData, name,
5563 XML_INTERNAL_PARAMETER_ENTITY,
5564 NULL, NULL, value);
5565 }
5566 } else {
5567 URI = xmlParseExternalID(ctxt, &literal, 1);
5568 if ((URI == NULL) && (literal == NULL)) {
5569 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5570 }
5571 if (URI) {
5572 xmlURIPtr uri;
5573
5574 uri = xmlParseURI((const char *) URI);
5575 if (uri == NULL) {
5576 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5577 "Invalid URI: %s\n", URI);
5578 /*
5579 * This really ought to be a well formedness error
5580 * but the XML Core WG decided otherwise c.f. issue
5581 * E26 of the XML erratas.
5582 */
5583 } else {
5584 if (uri->fragment != NULL) {
5585 /*
5586 * Okay this is foolish to block those but not
5587 * invalid URIs.
5588 */
5589 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5590 } else {
5591 if ((ctxt->sax != NULL) &&
5592 (!ctxt->disableSAX) &&
5593 (ctxt->sax->entityDecl != NULL))
5594 ctxt->sax->entityDecl(ctxt->userData, name,
5595 XML_EXTERNAL_PARAMETER_ENTITY,
5596 literal, URI, NULL);
5597 }
5598 xmlFreeURI(uri);
5599 }
5600 }
5601 }
5602 } else {
5603 if ((RAW == '"') || (RAW == '\'')) {
5604 value = xmlParseEntityValue(ctxt, &orig);
5605 if ((ctxt->sax != NULL) &&
5606 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5607 ctxt->sax->entityDecl(ctxt->userData, name,
5608 XML_INTERNAL_GENERAL_ENTITY,
5609 NULL, NULL, value);
5610 /*
5611 * For expat compatibility in SAX mode.
5612 */
5613 if ((ctxt->myDoc == NULL) ||
5614 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5615 if (ctxt->myDoc == NULL) {
5616 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5617 if (ctxt->myDoc == NULL) {
5618 xmlErrMemory(ctxt, "New Doc failed");
5619 return;
5620 }
5621 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5622 }
5623 if (ctxt->myDoc->intSubset == NULL)
5624 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5625 BAD_CAST "fake", NULL, NULL);
5626
5627 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5628 NULL, NULL, value);
5629 }
5630 } else {
5631 URI = xmlParseExternalID(ctxt, &literal, 1);
5632 if ((URI == NULL) && (literal == NULL)) {
5633 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5634 }
5635 if (URI) {
5636 xmlURIPtr uri;
5637
5638 uri = xmlParseURI((const char *)URI);
5639 if (uri == NULL) {
5640 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5641 "Invalid URI: %s\n", URI);
5642 /*
5643 * This really ought to be a well formedness error
5644 * but the XML Core WG decided otherwise c.f. issue
5645 * E26 of the XML erratas.
5646 */
5647 } else {
5648 if (uri->fragment != NULL) {
5649 /*
5650 * Okay this is foolish to block those but not
5651 * invalid URIs.
5652 */
5653 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5654 }
5655 xmlFreeURI(uri);
5656 }
5657 }
5658 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5660 "Space required before 'NDATA'\n");
5661 }
5662 SKIP_BLANKS;
5663 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5664 SKIP(5);
5665 if (!IS_BLANK_CH(CUR)) {
5666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required after 'NDATA'\n");
5668 }
5669 SKIP_BLANKS;
5670 ndata = xmlParseName(ctxt);
5671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5672 (ctxt->sax->unparsedEntityDecl != NULL))
5673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5674 literal, URI, ndata);
5675 } else {
5676 if ((ctxt->sax != NULL) &&
5677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5678 ctxt->sax->entityDecl(ctxt->userData, name,
5679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5680 literal, URI, NULL);
5681 /*
5682 * For expat compatibility in SAX mode.
5683 * assuming the entity repalcement was asked for
5684 */
5685 if ((ctxt->replaceEntities != 0) &&
5686 ((ctxt->myDoc == NULL) ||
5687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5688 if (ctxt->myDoc == NULL) {
5689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5690 if (ctxt->myDoc == NULL) {
5691 xmlErrMemory(ctxt, "New Doc failed");
5692 return;
5693 }
5694 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5695 }
5696
5697 if (ctxt->myDoc->intSubset == NULL)
5698 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5699 BAD_CAST "fake", NULL, NULL);
5700 xmlSAX2EntityDecl(ctxt, name,
5701 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5702 literal, URI, NULL);
5703 }
5704 }
5705 }
5706 }
5707 if (ctxt->instate == XML_PARSER_EOF)
5708 return;
5709 SKIP_BLANKS;
5710 if (RAW != '>') {
5711 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5712 "xmlParseEntityDecl: entity %s not terminated\n", name);
5713 xmlHaltParser(ctxt);
5714 } else {
5715 if (input != ctxt->input) {
5716 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5717 "Entity declaration doesn't start and stop in the same entity\n");
5718 }
5719 NEXT;
5720 }
5721 if (orig != NULL) {
5722 /*
5723 * Ugly mechanism to save the raw entity value.
5724 */
5725 xmlEntityPtr cur = NULL;
5726
5727 if (isParameter) {
5728 if ((ctxt->sax != NULL) &&
5729 (ctxt->sax->getParameterEntity != NULL))
5730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5731 } else {
5732 if ((ctxt->sax != NULL) &&
5733 (ctxt->sax->getEntity != NULL))
5734 cur = ctxt->sax->getEntity(ctxt->userData, name);
5735 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5736 cur = xmlSAX2GetEntity(ctxt, name);
5737 }
5738 }
5739 if (cur != NULL) {
5740 if (cur->orig != NULL)
5741 xmlFree(orig);
5742 else
5743 cur->orig = orig;
5744 } else
5745 xmlFree(orig);
5746 }
5747 if (value != NULL) xmlFree(value);
5748 if (URI != NULL) xmlFree(URI);
5749 if (literal != NULL) xmlFree(literal);
5750 }
5751}
5752
5753/**
5754 * xmlParseDefaultDecl:
5755 * @ctxt: an XML parser context
5756 * @value: Receive a possible fixed default value for the attribute
5757 *
5758 * Parse an attribute default declaration
5759 *
5760 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5761 *
5762 * [ VC: Required Attribute ]
5763 * if the default declaration is the keyword #REQUIRED, then the
5764 * attribute must be specified for all elements of the type in the
5765 * attribute-list declaration.
5766 *
5767 * [ VC: Attribute Default Legal ]
5768 * The declared default value must meet the lexical constraints of
5769 * the declared attribute type c.f. xmlValidateAttributeDecl()
5770 *
5771 * [ VC: Fixed Attribute Default ]
5772 * if an attribute has a default value declared with the #FIXED
5773 * keyword, instances of that attribute must match the default value.
5774 *
5775 * [ WFC: No < in Attribute Values ]
5776 * handled in xmlParseAttValue()
5777 *
5778 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5779 * or XML_ATTRIBUTE_FIXED.
5780 */
5781
5782int
5783xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5784 int val;
5785 xmlChar *ret;
5786
5787 *value = NULL;
5788 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5789 SKIP(9);
5790 return(XML_ATTRIBUTE_REQUIRED);
5791 }
5792 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5793 SKIP(8);
5794 return(XML_ATTRIBUTE_IMPLIED);
5795 }
5796 val = XML_ATTRIBUTE_NONE;
5797 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5798 SKIP(6);
5799 val = XML_ATTRIBUTE_FIXED;
5800 if (!IS_BLANK_CH(CUR)) {
5801 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5802 "Space required after '#FIXED'\n");
5803 }
5804 SKIP_BLANKS;
5805 }
5806 ret = xmlParseAttValue(ctxt);
5807 ctxt->instate = XML_PARSER_DTD;
5808 if (ret == NULL) {
5809 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5810 "Attribute default value declaration error\n");
5811 } else
5812 *value = ret;
5813 return(val);
5814}
5815
5816/**
5817 * xmlParseNotationType:
5818 * @ctxt: an XML parser context
5819 *
5820 * parse an Notation attribute type.
5821 *
5822 * Note: the leading 'NOTATION' S part has already being parsed...
5823 *
5824 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5825 *
5826 * [ VC: Notation Attributes ]
5827 * Values of this type must match one of the notation names included
5828 * in the declaration; all notation names in the declaration must be declared.
5829 *
5830 * Returns: the notation attribute tree built while parsing
5831 */
5832
5833xmlEnumerationPtr
5834xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5835 const xmlChar *name;
5836 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5837
5838 if (RAW != '(') {
5839 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5840 return(NULL);
5841 }
5842 SHRINK;
5843 do {
5844 NEXT;
5845 SKIP_BLANKS;
5846 name = xmlParseName(ctxt);
5847 if (name == NULL) {
5848 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5849 "Name expected in NOTATION declaration\n");
5850 xmlFreeEnumeration(ret);
5851 return(NULL);
5852 }
5853 tmp = ret;
5854 while (tmp != NULL) {
5855 if (xmlStrEqual(name, tmp->name)) {
5856 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5857 "standalone: attribute notation value token %s duplicated\n",
5858 name, NULL);
5859 if (!xmlDictOwns(ctxt->dict, name))
5860 xmlFree((xmlChar *) name);
5861 break;
5862 }
5863 tmp = tmp->next;
5864 }
5865 if (tmp == NULL) {
5866 cur = xmlCreateEnumeration(name);
5867 if (cur == NULL) {
5868 xmlFreeEnumeration(ret);
5869 return(NULL);
5870 }
5871 if (last == NULL) ret = last = cur;
5872 else {
5873 last->next = cur;
5874 last = cur;
5875 }
5876 }
5877 SKIP_BLANKS;
5878 } while (RAW == '|');
5879 if (RAW != ')') {
5880 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5881 xmlFreeEnumeration(ret);
5882 return(NULL);
5883 }
5884 NEXT;
5885 return(ret);
5886}
5887
5888/**
5889 * xmlParseEnumerationType:
5890 * @ctxt: an XML parser context
5891 *
5892 * parse an Enumeration attribute type.
5893 *
5894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5895 *
5896 * [ VC: Enumeration ]
5897 * Values of this type must match one of the Nmtoken tokens in
5898 * the declaration
5899 *
5900 * Returns: the enumeration attribute tree built while parsing
5901 */
5902
5903xmlEnumerationPtr
5904xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5905 xmlChar *name;
5906 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5907
5908 if (RAW != '(') {
5909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5910 return(NULL);
5911 }
5912 SHRINK;
5913 do {
5914 NEXT;
5915 SKIP_BLANKS;
5916 name = xmlParseNmtoken(ctxt);
5917 if (name == NULL) {
5918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5919 return(ret);
5920 }
5921 tmp = ret;
5922 while (tmp != NULL) {
5923 if (xmlStrEqual(name, tmp->name)) {
5924 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5925 "standalone: attribute enumeration value token %s duplicated\n",
5926 name, NULL);
5927 if (!xmlDictOwns(ctxt->dict, name))
5928 xmlFree(name);
5929 break;
5930 }
5931 tmp = tmp->next;
5932 }
5933 if (tmp == NULL) {
5934 cur = xmlCreateEnumeration(name);
5935 if (!xmlDictOwns(ctxt->dict, name))
5936 xmlFree(name);
5937 if (cur == NULL) {
5938 xmlFreeEnumeration(ret);
5939 return(NULL);
5940 }
5941 if (last == NULL) ret = last = cur;
5942 else {
5943 last->next = cur;
5944 last = cur;
5945 }
5946 }
5947 SKIP_BLANKS;
5948 } while (RAW == '|');
5949 if (RAW != ')') {
5950 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5951 return(ret);
5952 }
5953 NEXT;
5954 return(ret);
5955}
5956
5957/**
5958 * xmlParseEnumeratedType:
5959 * @ctxt: an XML parser context
5960 * @tree: the enumeration tree built while parsing
5961 *
5962 * parse an Enumerated attribute type.
5963 *
5964 * [57] EnumeratedType ::= NotationType | Enumeration
5965 *
5966 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5967 *
5968 *
5969 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5970 */
5971
5972int
5973xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5974 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5975 SKIP(8);
5976 if (!IS_BLANK_CH(CUR)) {
5977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5978 "Space required after 'NOTATION'\n");
5979 return(0);
5980 }
5981 SKIP_BLANKS;
5982 *tree = xmlParseNotationType(ctxt);
5983 if (*tree == NULL) return(0);
5984 return(XML_ATTRIBUTE_NOTATION);
5985 }
5986 *tree = xmlParseEnumerationType(ctxt);
5987 if (*tree == NULL) return(0);
5988 return(XML_ATTRIBUTE_ENUMERATION);
5989}
5990
5991/**
5992 * xmlParseAttributeType:
5993 * @ctxt: an XML parser context
5994 * @tree: the enumeration tree built while parsing
5995 *
5996 * parse the Attribute list def for an element
5997 *
5998 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5999 *
6000 * [55] StringType ::= 'CDATA'
6001 *
6002 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6003 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6004 *
6005 * Validity constraints for attribute values syntax are checked in
6006 * xmlValidateAttributeValue()
6007 *
6008 * [ VC: ID ]
6009 * Values of type ID must match the Name production. A name must not
6010 * appear more than once in an XML document as a value of this type;
6011 * i.e., ID values must uniquely identify the elements which bear them.
6012 *
6013 * [ VC: One ID per Element Type ]
6014 * No element type may have more than one ID attribute specified.
6015 *
6016 * [ VC: ID Attribute Default ]
6017 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6018 *
6019 * [ VC: IDREF ]
6020 * Values of type IDREF must match the Name production, and values
6021 * of type IDREFS must match Names; each IDREF Name must match the value
6022 * of an ID attribute on some element in the XML document; i.e. IDREF
6023 * values must match the value of some ID attribute.
6024 *
6025 * [ VC: Entity Name ]
6026 * Values of type ENTITY must match the Name production, values
6027 * of type ENTITIES must match Names; each Entity Name must match the
6028 * name of an unparsed entity declared in the DTD.
6029 *
6030 * [ VC: Name Token ]
6031 * Values of type NMTOKEN must match the Nmtoken production; values
6032 * of type NMTOKENS must match Nmtokens.
6033 *
6034 * Returns the attribute type
6035 */
6036int
6037xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6038 SHRINK;
6039 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6040 SKIP(5);
6041 return(XML_ATTRIBUTE_CDATA);
6042 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6043 SKIP(6);
6044 return(XML_ATTRIBUTE_IDREFS);
6045 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6046 SKIP(5);
6047 return(XML_ATTRIBUTE_IDREF);
6048 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6049 SKIP(2);
6050 return(XML_ATTRIBUTE_ID);
6051 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6052 SKIP(6);
6053 return(XML_ATTRIBUTE_ENTITY);
6054 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6055 SKIP(8);
6056 return(XML_ATTRIBUTE_ENTITIES);
6057 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6058 SKIP(8);
6059 return(XML_ATTRIBUTE_NMTOKENS);
6060 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6061 SKIP(7);
6062 return(XML_ATTRIBUTE_NMTOKEN);
6063 }
6064 return(xmlParseEnumeratedType(ctxt, tree));
6065}
6066
6067/**
6068 * xmlParseAttributeListDecl:
6069 * @ctxt: an XML parser context
6070 *
6071 * : parse the Attribute list def for an element
6072 *
6073 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6074 *
6075 * [53] AttDef ::= S Name S AttType S DefaultDecl
6076 *
6077 */
6078void
6079xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6080 const xmlChar *elemName;
6081 const xmlChar *attrName;
6082 xmlEnumerationPtr tree;
6083
6084 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6085 xmlParserInputPtr input = ctxt->input;
6086
6087 SKIP(9);
6088 if (!IS_BLANK_CH(CUR)) {
6089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6090 "Space required after '<!ATTLIST'\n");
6091 }
6092 SKIP_BLANKS;
6093 elemName = xmlParseName(ctxt);
6094 if (elemName == NULL) {
6095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6096 "ATTLIST: no name for Element\n");
6097 return;
6098 }
6099 SKIP_BLANKS;
6100 GROW;
6101 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6102 const xmlChar *check = CUR_PTR;
6103 int type;
6104 int def;
6105 xmlChar *defaultValue = NULL;
6106
6107 GROW;
6108 tree = NULL;
6109 attrName = xmlParseName(ctxt);
6110 if (attrName == NULL) {
6111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6112 "ATTLIST: no name for Attribute\n");
6113 break;
6114 }
6115 GROW;
6116 if (!IS_BLANK_CH(CUR)) {
6117 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6118 "Space required after the attribute name\n");
6119 break;
6120 }
6121 SKIP_BLANKS;
6122
6123 type = xmlParseAttributeType(ctxt, &tree);
6124 if (type <= 0) {
6125 break;
6126 }
6127
6128 GROW;
6129 if (!IS_BLANK_CH(CUR)) {
6130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6131 "Space required after the attribute type\n");
6132 if (tree != NULL)
6133 xmlFreeEnumeration(tree);
6134 break;
6135 }
6136 SKIP_BLANKS;
6137
6138 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6139 if (def <= 0) {
6140 if (defaultValue != NULL)
6141 xmlFree(defaultValue);
6142 if (tree != NULL)
6143 xmlFreeEnumeration(tree);
6144 break;
6145 }
6146 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6147 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6148
6149 GROW;
6150 if (RAW != '>') {
6151 if (!IS_BLANK_CH(CUR)) {
6152 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6153 "Space required after the attribute default value\n");
6154 if (defaultValue != NULL)
6155 xmlFree(defaultValue);
6156 if (tree != NULL)
6157 xmlFreeEnumeration(tree);
6158 break;
6159 }
6160 SKIP_BLANKS;
6161 }
6162 if (check == CUR_PTR) {
6163 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6164 "in xmlParseAttributeListDecl\n");
6165 if (defaultValue != NULL)
6166 xmlFree(defaultValue);
6167 if (tree != NULL)
6168 xmlFreeEnumeration(tree);
6169 break;
6170 }
6171 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6172 (ctxt->sax->attributeDecl != NULL))
6173 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6174 type, def, defaultValue, tree);
6175 else if (tree != NULL)
6176 xmlFreeEnumeration(tree);
6177
6178 if ((ctxt->sax2) && (defaultValue != NULL) &&
6179 (def != XML_ATTRIBUTE_IMPLIED) &&
6180 (def != XML_ATTRIBUTE_REQUIRED)) {
6181 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6182 }
6183 if (ctxt->sax2) {
6184 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6185 }
6186 if (defaultValue != NULL)
6187 xmlFree(defaultValue);
6188 GROW;
6189 }
6190 if (RAW == '>') {
6191 if (input != ctxt->input) {
6192 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6193 "Attribute list declaration doesn't start and stop in the same entity\n",
6194 NULL, NULL);
6195 }
6196 NEXT;
6197 }
6198 }
6199}
6200
6201/**
6202 * xmlParseElementMixedContentDecl:
6203 * @ctxt: an XML parser context
6204 * @inputchk: the input used for the current entity, needed for boundary checks
6205 *
6206 * parse the declaration for a Mixed Element content
6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6208 *
6209 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6210 * '(' S? '#PCDATA' S? ')'
6211 *
6212 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6213 *
6214 * [ VC: No Duplicate Types ]
6215 * The same name must not appear more than once in a single
6216 * mixed-content declaration.
6217 *
6218 * returns: the list of the xmlElementContentPtr describing the element choices
6219 */
6220xmlElementContentPtr
6221xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6222 xmlElementContentPtr ret = NULL, cur = NULL, n;
6223 const xmlChar *elem = NULL;
6224
6225 GROW;
6226 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6227 SKIP(7);
6228 SKIP_BLANKS;
6229 SHRINK;
6230 if (RAW == ')') {
6231 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6232 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6233"Element content declaration doesn't start and stop in the same entity\n",
6234 NULL, NULL);
6235 }
6236 NEXT;
6237 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6238 if (ret == NULL)
6239 return(NULL);
6240 if (RAW == '*') {
6241 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6242 NEXT;
6243 }
6244 return(ret);
6245 }
6246 if ((RAW == '(') || (RAW == '|')) {
6247 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6248 if (ret == NULL) return(NULL);
6249 }
6250 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6251 NEXT;
6252 if (elem == NULL) {
6253 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6254 if (ret == NULL) return(NULL);
6255 ret->c1 = cur;
6256 if (cur != NULL)
6257 cur->parent = ret;
6258 cur = ret;
6259 } else {
6260 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6261 if (n == NULL) return(NULL);
6262 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6263 if (n->c1 != NULL)
6264 n->c1->parent = n;
6265 cur->c2 = n;
6266 if (n != NULL)
6267 n->parent = cur;
6268 cur = n;
6269 }
6270 SKIP_BLANKS;
6271 elem = xmlParseName(ctxt);
6272 if (elem == NULL) {
6273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6274 "xmlParseElementMixedContentDecl : Name expected\n");
6275 xmlFreeDocElementContent(ctxt->myDoc, cur);
6276 return(NULL);
6277 }
6278 SKIP_BLANKS;
6279 GROW;
6280 }
6281 if ((RAW == ')') && (NXT(1) == '*')) {
6282 if (elem != NULL) {
6283 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6284 XML_ELEMENT_CONTENT_ELEMENT);
6285 if (cur->c2 != NULL)
6286 cur->c2->parent = cur;
6287 }
6288 if (ret != NULL)
6289 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6290 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6291 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6292"Element content declaration doesn't start and stop in the same entity\n",
6293 NULL, NULL);
6294 }
6295 SKIP(2);
6296 } else {
6297 xmlFreeDocElementContent(ctxt->myDoc, ret);
6298 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6299 return(NULL);
6300 }
6301
6302 } else {
6303 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6304 }
6305 return(ret);
6306}
6307
6308/**
6309 * xmlParseElementChildrenContentDeclPriv:
6310 * @ctxt: an XML parser context
6311 * @inputchk: the input used for the current entity, needed for boundary checks
6312 * @depth: the level of recursion
6313 *
6314 * parse the declaration for a Mixed Element content
6315 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6316 *
6317 *
6318 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6319 *
6320 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6321 *
6322 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6323 *
6324 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6325 *
6326 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6327 * TODO Parameter-entity replacement text must be properly nested
6328 * with parenthesized groups. That is to say, if either of the
6329 * opening or closing parentheses in a choice, seq, or Mixed
6330 * construct is contained in the replacement text for a parameter
6331 * entity, both must be contained in the same replacement text. For
6332 * interoperability, if a parameter-entity reference appears in a
6333 * choice, seq, or Mixed construct, its replacement text should not
6334 * be empty, and neither the first nor last non-blank character of
6335 * the replacement text should be a connector (| or ,).
6336 *
6337 * Returns the tree of xmlElementContentPtr describing the element
6338 * hierarchy.
6339 */
6340static xmlElementContentPtr
6341xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6342 int depth) {
6343 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6344 const xmlChar *elem;
6345 xmlChar type = 0;
6346
6347 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6348 (depth > 2048)) {
6349 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6350"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6351 depth);
6352 return(NULL);
6353 }
6354 SKIP_BLANKS;
6355 GROW;
6356 if (RAW == '(') {
6357 int inputid = ctxt->input->id;
6358
6359 /* Recurse on first child */
6360 NEXT;
6361 SKIP_BLANKS;
6362 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6363 depth + 1);
6364 SKIP_BLANKS;
6365 GROW;
6366 } else {
6367 elem = xmlParseName(ctxt);
6368 if (elem == NULL) {
6369 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6370 return(NULL);
6371 }
6372 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6373 if (cur == NULL) {
6374 xmlErrMemory(ctxt, NULL);
6375 return(NULL);
6376 }
6377 GROW;
6378 if (RAW == '?') {
6379 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6380 NEXT;
6381 } else if (RAW == '*') {
6382 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6383 NEXT;
6384 } else if (RAW == '+') {
6385 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6386 NEXT;
6387 } else {
6388 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6389 }
6390 GROW;
6391 }
6392 SKIP_BLANKS;
6393 SHRINK;
6394 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6395 /*
6396 * Each loop we parse one separator and one element.
6397 */
6398 if (RAW == ',') {
6399 if (type == 0) type = CUR;
6400
6401 /*
6402 * Detect "Name | Name , Name" error
6403 */
6404 else if (type != CUR) {
6405 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6406 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6407 type);
6408 if ((last != NULL) && (last != ret))
6409 xmlFreeDocElementContent(ctxt->myDoc, last);
6410 if (ret != NULL)
6411 xmlFreeDocElementContent(ctxt->myDoc, ret);
6412 return(NULL);
6413 }
6414 NEXT;
6415
6416 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6417 if (op == NULL) {
6418 if ((last != NULL) && (last != ret))
6419 xmlFreeDocElementContent(ctxt->myDoc, last);
6420 xmlFreeDocElementContent(ctxt->myDoc, ret);
6421 return(NULL);
6422 }
6423 if (last == NULL) {
6424 op->c1 = ret;
6425 if (ret != NULL)
6426 ret->parent = op;
6427 ret = cur = op;
6428 } else {
6429 cur->c2 = op;
6430 if (op != NULL)
6431 op->parent = cur;
6432 op->c1 = last;
6433 if (last != NULL)
6434 last->parent = op;
6435 cur =op;
6436 last = NULL;
6437 }
6438 } else if (RAW == '|') {
6439 if (type == 0) type = CUR;
6440
6441 /*
6442 * Detect "Name , Name | Name" error
6443 */
6444 else if (type != CUR) {
6445 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6446 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6447 type);
6448 if ((last != NULL) && (last != ret))
6449 xmlFreeDocElementContent(ctxt->myDoc, last);
6450 if (ret != NULL)
6451 xmlFreeDocElementContent(ctxt->myDoc, ret);
6452 return(NULL);
6453 }
6454 NEXT;
6455
6456 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6457 if (op == NULL) {
6458 if ((last != NULL) && (last != ret))
6459 xmlFreeDocElementContent(ctxt->myDoc, last);
6460 if (ret != NULL)
6461 xmlFreeDocElementContent(ctxt->myDoc, ret);
6462 return(NULL);
6463 }
6464 if (last == NULL) {
6465 op->c1 = ret;
6466 if (ret != NULL)
6467 ret->parent = op;
6468 ret = cur = op;
6469 } else {
6470 cur->c2 = op;
6471 if (op != NULL)
6472 op->parent = cur;
6473 op->c1 = last;
6474 if (last != NULL)
6475 last->parent = op;
6476 cur =op;
6477 last = NULL;
6478 }
6479 } else {
6480 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6481 if ((last != NULL) && (last != ret))
6482 xmlFreeDocElementContent(ctxt->myDoc, last);
6483 if (ret != NULL)
6484 xmlFreeDocElementContent(ctxt->myDoc, ret);
6485 return(NULL);
6486 }
6487 GROW;
6488 SKIP_BLANKS;
6489 GROW;
6490 if (RAW == '(') {
6491 int inputid = ctxt->input->id;
6492 /* Recurse on second child */
6493 NEXT;
6494 SKIP_BLANKS;
6495 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6496 depth + 1);
6497 SKIP_BLANKS;
6498 } else {
6499 elem = xmlParseName(ctxt);
6500 if (elem == NULL) {
6501 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6502 if (ret != NULL)
6503 xmlFreeDocElementContent(ctxt->myDoc, ret);
6504 return(NULL);
6505 }
6506 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6507 if (last == NULL) {
6508 if (ret != NULL)
6509 xmlFreeDocElementContent(ctxt->myDoc, ret);
6510 return(NULL);
6511 }
6512 if (RAW == '?') {
6513 last->ocur = XML_ELEMENT_CONTENT_OPT;
6514 NEXT;
6515 } else if (RAW == '*') {
6516 last->ocur = XML_ELEMENT_CONTENT_MULT;
6517 NEXT;
6518 } else if (RAW == '+') {
6519 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6520 NEXT;
6521 } else {
6522 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6523 }
6524 }
6525 SKIP_BLANKS;
6526 GROW;
6527 }
6528 if ((cur != NULL) && (last != NULL)) {
6529 cur->c2 = last;
6530 if (last != NULL)
6531 last->parent = cur;
6532 }
6533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6535"Element content declaration doesn't start and stop in the same entity\n",
6536 NULL, NULL);
6537 }
6538 NEXT;
6539 if (RAW == '?') {
6540 if (ret != NULL) {
6541 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6542 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6544 else
6545 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6546 }
6547 NEXT;
6548 } else if (RAW == '*') {
6549 if (ret != NULL) {
6550 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6551 cur = ret;
6552 /*
6553 * Some normalization:
6554 * (a | b* | c?)* == (a | b | c)*
6555 */
6556 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6557 if ((cur->c1 != NULL) &&
6558 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6560 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6561 if ((cur->c2 != NULL) &&
6562 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6563 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6564 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6565 cur = cur->c2;
6566 }
6567 }
6568 NEXT;
6569 } else if (RAW == '+') {
6570 if (ret != NULL) {
6571 int found = 0;
6572
6573 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6574 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6575 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6576 else
6577 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6578 /*
6579 * Some normalization:
6580 * (a | b*)+ == (a | b)*
6581 * (a | b?)+ == (a | b)*
6582 */
6583 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6584 if ((cur->c1 != NULL) &&
6585 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6586 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6587 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6588 found = 1;
6589 }
6590 if ((cur->c2 != NULL) &&
6591 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6592 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6593 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6594 found = 1;
6595 }
6596 cur = cur->c2;
6597 }
6598 if (found)
6599 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6600 }
6601 NEXT;
6602 }
6603 return(ret);
6604}
6605
6606/**
6607 * xmlParseElementChildrenContentDecl:
6608 * @ctxt: an XML parser context
6609 * @inputchk: the input used for the current entity, needed for boundary checks
6610 *
6611 * parse the declaration for a Mixed Element content
6612 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6613 *
6614 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6615 *
6616 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6617 *
6618 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6619 *
6620 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6621 *
6622 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6623 * TODO Parameter-entity replacement text must be properly nested
6624 * with parenthesized groups. That is to say, if either of the
6625 * opening or closing parentheses in a choice, seq, or Mixed
6626 * construct is contained in the replacement text for a parameter
6627 * entity, both must be contained in the same replacement text. For
6628 * interoperability, if a parameter-entity reference appears in a
6629 * choice, seq, or Mixed construct, its replacement text should not
6630 * be empty, and neither the first nor last non-blank character of
6631 * the replacement text should be a connector (| or ,).
6632 *
6633 * Returns the tree of xmlElementContentPtr describing the element
6634 * hierarchy.
6635 */
6636xmlElementContentPtr
6637xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6638 /* stub left for API/ABI compat */
6639 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6640}
6641
6642/**
6643 * xmlParseElementContentDecl:
6644 * @ctxt: an XML parser context
6645 * @name: the name of the element being defined.
6646 * @result: the Element Content pointer will be stored here if any
6647 *
6648 * parse the declaration for an Element content either Mixed or Children,
6649 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6650 *
6651 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6652 *
6653 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6654 */
6655
6656int
6657xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6658 xmlElementContentPtr *result) {
6659
6660 xmlElementContentPtr tree = NULL;
6661 int inputid = ctxt->input->id;
6662 int res;
6663
6664 *result = NULL;
6665
6666 if (RAW != '(') {
6667 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6668 "xmlParseElementContentDecl : %s '(' expected\n", name);
6669 return(-1);
6670 }
6671 NEXT;
6672 GROW;
6673 if (ctxt->instate == XML_PARSER_EOF)
6674 return(-1);
6675 SKIP_BLANKS;
6676 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6677 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6678 res = XML_ELEMENT_TYPE_MIXED;
6679 } else {
6680 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6681 res = XML_ELEMENT_TYPE_ELEMENT;
6682 }
6683 SKIP_BLANKS;
6684 *result = tree;
6685 return(res);
6686}
6687
6688/**
6689 * xmlParseElementDecl:
6690 * @ctxt: an XML parser context
6691 *
6692 * parse an Element declaration.
6693 *
6694 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6695 *
6696 * [ VC: Unique Element Type Declaration ]
6697 * No element type may be declared more than once
6698 *
6699 * Returns the type of the element, or -1 in case of error
6700 */
6701int
6702xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6703 const xmlChar *name;
6704 int ret = -1;
6705 xmlElementContentPtr content = NULL;
6706
6707 /* GROW; done in the caller */
6708 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6709 xmlParserInputPtr input = ctxt->input;
6710
6711 SKIP(9);
6712 if (!IS_BLANK_CH(CUR)) {
6713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714 "Space required after 'ELEMENT'\n");
6715 return(-1);
6716 }
6717 SKIP_BLANKS;
6718 name = xmlParseName(ctxt);
6719 if (name == NULL) {
6720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6721 "xmlParseElementDecl: no name for Element\n");
6722 return(-1);
6723 }
6724 while ((RAW == 0) && (ctxt->inputNr > 1))
6725 xmlPopInput(ctxt);
6726 if (!IS_BLANK_CH(CUR)) {
6727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6728 "Space required after the element name\n");
6729 }
6730 SKIP_BLANKS;
6731 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6732 SKIP(5);
6733 /*
6734 * Element must always be empty.
6735 */
6736 ret = XML_ELEMENT_TYPE_EMPTY;
6737 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6738 (NXT(2) == 'Y')) {
6739 SKIP(3);
6740 /*
6741 * Element is a generic container.
6742 */
6743 ret = XML_ELEMENT_TYPE_ANY;
6744 } else if (RAW == '(') {
6745 ret = xmlParseElementContentDecl(ctxt, name, &content);
6746 } else {
6747 /*
6748 * [ WFC: PEs in Internal Subset ] error handling.
6749 */
6750 if ((RAW == '%') && (ctxt->external == 0) &&
6751 (ctxt->inputNr == 1)) {
6752 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6753 "PEReference: forbidden within markup decl in internal subset\n");
6754 } else {
6755 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6756 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6757 }
6758 return(-1);
6759 }
6760
6761 SKIP_BLANKS;
6762 /*
6763 * Pop-up of finished entities.
6764 */
6765 while ((RAW == 0) && (ctxt->inputNr > 1))
6766 xmlPopInput(ctxt);
6767 SKIP_BLANKS;
6768
6769 if (RAW != '>') {
6770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6771 if (content != NULL) {
6772 xmlFreeDocElementContent(ctxt->myDoc, content);
6773 }
6774 } else {
6775 if (input != ctxt->input) {
6776 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777 "Element declaration doesn't start and stop in the same entity\n");
6778 }
6779
6780 NEXT;
6781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6782 (ctxt->sax->elementDecl != NULL)) {
6783 if (content != NULL)
6784 content->parent = NULL;
6785 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6786 content);
6787 if ((content != NULL) && (content->parent == NULL)) {
6788 /*
6789 * this is a trick: if xmlAddElementDecl is called,
6790 * instead of copying the full tree it is plugged directly
6791 * if called from the parser. Avoid duplicating the
6792 * interfaces or change the API/ABI
6793 */
6794 xmlFreeDocElementContent(ctxt->myDoc, content);
6795 }
6796 } else if (content != NULL) {
6797 xmlFreeDocElementContent(ctxt->myDoc, content);
6798 }
6799 }
6800 }
6801 return(ret);
6802}
6803
6804/**
6805 * xmlParseConditionalSections
6806 * @ctxt: an XML parser context
6807 *
6808 * [61] conditionalSect ::= includeSect | ignoreSect
6809 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6810 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6811 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6812 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6813 */
6814
6815static void
6816xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6817 int id = ctxt->input->id;
6818
6819 SKIP(3);
6820 SKIP_BLANKS;
6821 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6822 SKIP(7);
6823 SKIP_BLANKS;
6824 if (RAW != '[') {
6825 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6826 xmlHaltParser(ctxt);
6827 return;
6828 } else {
6829 if (ctxt->input->id != id) {
6830 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831 "All markup of the conditional section is not in the same entity\n",
6832 NULL, NULL);
6833 }
6834 NEXT;
6835 }
6836 if (xmlParserDebugEntities) {
6837 if ((ctxt->input != NULL) && (ctxt->input->filename))
6838 xmlGenericError(xmlGenericErrorContext,
6839 "%s(%d): ", ctxt->input->filename,
6840 ctxt->input->line);
6841 xmlGenericError(xmlGenericErrorContext,
6842 "Entering INCLUDE Conditional Section\n");
6843 }
6844
6845 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6846 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6847 const xmlChar *check = CUR_PTR;
6848 unsigned int cons = ctxt->input->consumed;
6849
6850 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6851 xmlParseConditionalSections(ctxt);
6852 } else if (IS_BLANK_CH(CUR)) {
6853 NEXT;
6854 } else if (RAW == '%') {
6855 xmlParsePEReference(ctxt);
6856 } else
6857 xmlParseMarkupDecl(ctxt);
6858
6859 /*
6860 * Pop-up of finished entities.
6861 */
6862 while ((RAW == 0) && (ctxt->inputNr > 1))
6863 xmlPopInput(ctxt);
6864
6865 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6867 xmlHaltParser(ctxt);
6868 break;
6869 }
6870 }
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6875 ctxt->input->line);
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Leaving INCLUDE Conditional Section\n");
6878 }
6879
6880 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6881 int state;
6882 xmlParserInputState instate;
6883 int depth = 0;
6884
6885 SKIP(6);
6886 SKIP_BLANKS;
6887 if (RAW != '[') {
6888 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6889 xmlHaltParser(ctxt);
6890 return;
6891 } else {
6892 if (ctxt->input->id != id) {
6893 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6894 "All markup of the conditional section is not in the same entity\n",
6895 NULL, NULL);
6896 }
6897 NEXT;
6898 }
6899 if (xmlParserDebugEntities) {
6900 if ((ctxt->input != NULL) && (ctxt->input->filename))
6901 xmlGenericError(xmlGenericErrorContext,
6902 "%s(%d): ", ctxt->input->filename,
6903 ctxt->input->line);
6904 xmlGenericError(xmlGenericErrorContext,
6905 "Entering IGNORE Conditional Section\n");
6906 }
6907
6908 /*
6909 * Parse up to the end of the conditional section
6910 * But disable SAX event generating DTD building in the meantime
6911 */
6912 state = ctxt->disableSAX;
6913 instate = ctxt->instate;
6914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6915 ctxt->instate = XML_PARSER_IGNORE;
6916
6917 while (((depth >= 0) && (RAW != 0)) &&
6918 (ctxt->instate != XML_PARSER_EOF)) {
6919 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6920 depth++;
6921 SKIP(3);
6922 continue;
6923 }
6924 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6925 if (--depth >= 0) SKIP(3);
6926 continue;
6927 }
6928 NEXT;
6929 continue;
6930 }
6931
6932 ctxt->disableSAX = state;
6933 ctxt->instate = instate;
6934
6935 if (xmlParserDebugEntities) {
6936 if ((ctxt->input != NULL) && (ctxt->input->filename))
6937 xmlGenericError(xmlGenericErrorContext,
6938 "%s(%d): ", ctxt->input->filename,
6939 ctxt->input->line);
6940 xmlGenericError(xmlGenericErrorContext,
6941 "Leaving IGNORE Conditional Section\n");
6942 }
6943
6944 } else {
6945 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6946 xmlHaltParser(ctxt);
6947 return;
6948 }
6949
6950 if (RAW == 0)
6951 SHRINK;
6952
6953 if (RAW == 0) {
6954 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6955 } else {
6956 if (ctxt->input->id != id) {
6957 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6958 "All markup of the conditional section is not in the same entity\n",
6959 NULL, NULL);
6960 }
6961 if ((ctxt-> instate != XML_PARSER_EOF) &&
6962 ((ctxt->input->cur + 3) <= ctxt->input->end))
6963 SKIP(3);
6964 }
6965}
6966
6967/**
6968 * xmlParseMarkupDecl:
6969 * @ctxt: an XML parser context
6970 *
6971 * parse Markup declarations
6972 *
6973 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6974 * NotationDecl | PI | Comment
6975 *
6976 * [ VC: Proper Declaration/PE Nesting ]
6977 * Parameter-entity replacement text must be properly nested with
6978 * markup declarations. That is to say, if either the first character
6979 * or the last character of a markup declaration (markupdecl above) is
6980 * contained in the replacement text for a parameter-entity reference,
6981 * both must be contained in the same replacement text.
6982 *
6983 * [ WFC: PEs in Internal Subset ]
6984 * In the internal DTD subset, parameter-entity references can occur
6985 * only where markup declarations can occur, not within markup declarations.
6986 * (This does not apply to references that occur in external parameter
6987 * entities or to the external subset.)
6988 */
6989void
6990xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6991 GROW;
6992 if (CUR == '<') {
6993 if (NXT(1) == '!') {
6994 switch (NXT(2)) {
6995 case 'E':
6996 if (NXT(3) == 'L')
6997 xmlParseElementDecl(ctxt);
6998 else if (NXT(3) == 'N')
6999 xmlParseEntityDecl(ctxt);
7000 break;
7001 case 'A':
7002 xmlParseAttributeListDecl(ctxt);
7003 break;
7004 case 'N':
7005 xmlParseNotationDecl(ctxt);
7006 break;
7007 case '-':
7008 xmlParseComment(ctxt);
7009 break;
7010 default:
7011 /* there is an error but it will be detected later */
7012 break;
7013 }
7014 } else if (NXT(1) == '?') {
7015 xmlParsePI(ctxt);
7016 }
7017 }
7018
7019 /*
7020 * detect requirement to exit there and act accordingly
7021 * and avoid having instate overriden later on
7022 */
7023 if (ctxt->instate == XML_PARSER_EOF)
7024 return;
7025
7026 /*
7027 * This is only for internal subset. On external entities,
7028 * the replacement is done before parsing stage
7029 */
7030 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7031 xmlParsePEReference(ctxt);
7032
7033 /*
7034 * Conditional sections are allowed from entities included
7035 * by PE References in the internal subset.
7036 */
7037 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039 xmlParseConditionalSections(ctxt);
7040 }
7041 }
7042
7043 ctxt->instate = XML_PARSER_DTD;
7044}
7045
7046/**
7047 * xmlParseTextDecl:
7048 * @ctxt: an XML parser context
7049 *
7050 * parse an XML declaration header for external entities
7051 *
7052 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7053 */
7054
7055void
7056xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7057 xmlChar *version;
7058 const xmlChar *encoding;
7059
7060 /*
7061 * We know that '<?xml' is here.
7062 */
7063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7064 SKIP(5);
7065 } else {
7066 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7067 return;
7068 }
7069
7070 if (!IS_BLANK_CH(CUR)) {
7071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7072 "Space needed after '<?xml'\n");
7073 }
7074 SKIP_BLANKS;
7075
7076 /*
7077 * We may have the VersionInfo here.
7078 */
7079 version = xmlParseVersionInfo(ctxt);
7080 if (version == NULL)
7081 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7082 else {
7083 if (!IS_BLANK_CH(CUR)) {
7084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7085 "Space needed here\n");
7086 }
7087 }
7088 ctxt->input->version = version;
7089
7090 /*
7091 * We must have the encoding declaration
7092 */
7093 encoding = xmlParseEncodingDecl(ctxt);
7094 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7095 /*
7096 * The XML REC instructs us to stop parsing right here
7097 */
7098 return;
7099 }
7100 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7101 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7102 "Missing encoding in text declaration\n");
7103 }
7104
7105 SKIP_BLANKS;
7106 if ((RAW == '?') && (NXT(1) == '>')) {
7107 SKIP(2);
7108 } else if (RAW == '>') {
7109 /* Deprecated old WD ... */
7110 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7111 NEXT;
7112 } else {
7113 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7114 MOVETO_ENDTAG(CUR_PTR);
7115 NEXT;
7116 }
7117}
7118
7119/**
7120 * xmlParseExternalSubset:
7121 * @ctxt: an XML parser context
7122 * @ExternalID: the external identifier
7123 * @SystemID: the system identifier (or URL)
7124 *
7125 * parse Markup declarations from an external subset
7126 *
7127 * [30] extSubset ::= textDecl? extSubsetDecl
7128 *
7129 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7130 */
7131void
7132xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7133 const xmlChar *SystemID) {
7134 xmlDetectSAX2(ctxt);
7135 GROW;
7136
7137 if ((ctxt->encoding == NULL) &&
7138 (ctxt->input->end - ctxt->input->cur >= 4)) {
7139 xmlChar start[4];
7140 xmlCharEncoding enc;
7141
7142 start[0] = RAW;
7143 start[1] = NXT(1);
7144 start[2] = NXT(2);
7145 start[3] = NXT(3);
7146 enc = xmlDetectCharEncoding(start, 4);
7147 if (enc != XML_CHAR_ENCODING_NONE)
7148 xmlSwitchEncoding(ctxt, enc);
7149 }
7150
7151 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7152 xmlParseTextDecl(ctxt);
7153 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7154 /*
7155 * The XML REC instructs us to stop parsing right here
7156 */
7157 xmlHaltParser(ctxt);
7158 return;
7159 }
7160 }
7161 if (ctxt->myDoc == NULL) {
7162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7163 if (ctxt->myDoc == NULL) {
7164 xmlErrMemory(ctxt, "New Doc failed");
7165 return;
7166 }
7167 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7168 }
7169 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7170 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7171
7172 ctxt->instate = XML_PARSER_DTD;
7173 ctxt->external = 1;
7174 while (((RAW == '<') && (NXT(1) == '?')) ||
7175 ((RAW == '<') && (NXT(1) == '!')) ||
7176 (RAW == '%') || IS_BLANK_CH(CUR)) {
7177 const xmlChar *check = CUR_PTR;
7178 unsigned int cons = ctxt->input->consumed;
7179
7180 GROW;
7181 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7182 xmlParseConditionalSections(ctxt);
7183 } else if (IS_BLANK_CH(CUR)) {
7184 NEXT;
7185 } else if (RAW == '%') {
7186 xmlParsePEReference(ctxt);
7187 } else
7188 xmlParseMarkupDecl(ctxt);
7189
7190 /*
7191 * Pop-up of finished entities.
7192 */
7193 while ((RAW == 0) && (ctxt->inputNr > 1))
7194 xmlPopInput(ctxt);
7195
7196 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7197 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7198 break;
7199 }
7200 }
7201
7202 if (RAW != 0) {
7203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7204 }
7205
7206}
7207
7208/**
7209 * xmlParseReference:
7210 * @ctxt: an XML parser context
7211 *
7212 * parse and handle entity references in content, depending on the SAX
7213 * interface, this may end-up in a call to character() if this is a
7214 * CharRef, a predefined entity, if there is no reference() callback.
7215 * or if the parser was asked to switch to that mode.
7216 *
7217 * [67] Reference ::= EntityRef | CharRef
7218 */
7219void
7220xmlParseReference(xmlParserCtxtPtr ctxt) {
7221 xmlEntityPtr ent;
7222 xmlChar *val;
7223 int was_checked;
7224 xmlNodePtr list = NULL;
7225 xmlParserErrors ret = XML_ERR_OK;
7226
7227
7228 if (RAW != '&')
7229 return;
7230
7231 /*
7232 * Simple case of a CharRef
7233 */
7234 if (NXT(1) == '#') {
7235 int i = 0;
7236 xmlChar out[10];
7237 int hex = NXT(2);
7238 int value = xmlParseCharRef(ctxt);
7239
7240 if (value == 0)
7241 return;
7242 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7243 /*
7244 * So we are using non-UTF-8 buffers
7245 * Check that the char fit on 8bits, if not
7246 * generate a CharRef.
7247 */
7248 if (value <= 0xFF) {
7249 out[0] = value;
7250 out[1] = 0;
7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252 (!ctxt->disableSAX))
7253 ctxt->sax->characters(ctxt->userData, out, 1);
7254 } else {
7255 if ((hex == 'x') || (hex == 'X'))
7256 snprintf((char *)out, sizeof(out), "#x%X", value);
7257 else
7258 snprintf((char *)out, sizeof(out), "#%d", value);
7259 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7260 (!ctxt->disableSAX))
7261 ctxt->sax->reference(ctxt->userData, out);
7262 }
7263 } else {
7264 /*
7265 * Just encode the value in UTF-8
7266 */
7267 COPY_BUF(0 ,out, i, value);
7268 out[i] = 0;
7269 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7270 (!ctxt->disableSAX))
7271 ctxt->sax->characters(ctxt->userData, out, i);
7272 }
7273 return;
7274 }
7275
7276 /*
7277 * We are seeing an entity reference
7278 */
7279 ent = xmlParseEntityRef(ctxt);
7280 if (ent == NULL) return;
7281 if (!ctxt->wellFormed)
7282 return;
7283 was_checked = ent->checked;
7284
7285 /* special case of predefined entities */
7286 if ((ent->name == NULL) ||
7287 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7288 val = ent->content;
7289 if (val == NULL) return;
7290 /*
7291 * inline the entity.
7292 */
7293 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7294 (!ctxt->disableSAX))
7295 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7296 return;
7297 }
7298
7299 /*
7300 * The first reference to the entity trigger a parsing phase
7301 * where the ent->children is filled with the result from
7302 * the parsing.
7303 * Note: external parsed entities will not be loaded, it is not
7304 * required for a non-validating parser, unless the parsing option
7305 * of validating, or substituting entities were given. Doing so is
7306 * far more secure as the parser will only process data coming from
7307 * the document entity by default.
7308 */
7309 if (((ent->checked == 0) ||
7310 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7311 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7312 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7313 unsigned long oldnbent = ctxt->nbentities;
7314
7315 /*
7316 * This is a bit hackish but this seems the best
7317 * way to make sure both SAX and DOM entity support
7318 * behaves okay.
7319 */
7320 void *user_data;
7321 if (ctxt->userData == ctxt)
7322 user_data = NULL;
7323 else
7324 user_data = ctxt->userData;
7325
7326 /*
7327 * Check that this entity is well formed
7328 * 4.3.2: An internal general parsed entity is well-formed
7329 * if its replacement text matches the production labeled
7330 * content.
7331 */
7332 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7333 ctxt->depth++;
7334 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7335 user_data, &list);
7336 ctxt->depth--;
7337
7338 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7339 ctxt->depth++;
7340 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7341 user_data, ctxt->depth, ent->URI,
7342 ent->ExternalID, &list);
7343 ctxt->depth--;
7344 } else {
7345 ret = XML_ERR_ENTITY_PE_INTERNAL;
7346 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7347 "invalid entity type found\n", NULL);
7348 }
7349
7350 /*
7351 * Store the number of entities needing parsing for this entity
7352 * content and do checkings
7353 */
7354 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7355 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7356 ent->checked |= 1;
7357 if (ret == XML_ERR_ENTITY_LOOP) {
7358 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7359 xmlFreeNodeList(list);
7360 return;
7361 }
7362 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7363 xmlFreeNodeList(list);
7364 return;
7365 }
7366
7367 if ((ret == XML_ERR_OK) && (list != NULL)) {
7368 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7369 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7370 (ent->children == NULL)) {
7371 ent->children = list;
7372 if (ctxt->replaceEntities) {
7373 /*
7374 * Prune it directly in the generated document
7375 * except for single text nodes.
7376 */
7377 if (((list->type == XML_TEXT_NODE) &&
7378 (list->next == NULL)) ||
7379 (ctxt->parseMode == XML_PARSE_READER)) {
7380 list->parent = (xmlNodePtr) ent;
7381 list = NULL;
7382 ent->owner = 1;
7383 } else {
7384 ent->owner = 0;
7385 while (list != NULL) {
7386 list->parent = (xmlNodePtr) ctxt->node;
7387 list->doc = ctxt->myDoc;
7388 if (list->next == NULL)
7389 ent->last = list;
7390 list = list->next;
7391 }
7392 list = ent->children;
7393#ifdef LIBXML_LEGACY_ENABLED
7394 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7395 xmlAddEntityReference(ent, list, NULL);
7396#endif /* LIBXML_LEGACY_ENABLED */
7397 }
7398 } else {
7399 ent->owner = 1;
7400 while (list != NULL) {
7401 list->parent = (xmlNodePtr) ent;
7402 xmlSetTreeDoc(list, ent->doc);
7403 if (list->next == NULL)
7404 ent->last = list;
7405 list = list->next;
7406 }
7407 }
7408 } else {
7409 xmlFreeNodeList(list);
7410 list = NULL;
7411 }
7412 } else if ((ret != XML_ERR_OK) &&
7413 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7415 "Entity '%s' failed to parse\n", ent->name);
7416 xmlParserEntityCheck(ctxt, 0, ent, 0);
7417 } else if (list != NULL) {
7418 xmlFreeNodeList(list);
7419 list = NULL;
7420 }
7421 if (ent->checked == 0)
7422 ent->checked = 2;
7423 } else if (ent->checked != 1) {
7424 ctxt->nbentities += ent->checked / 2;
7425 }
7426
7427 /*
7428 * Now that the entity content has been gathered
7429 * provide it to the application, this can take different forms based
7430 * on the parsing modes.
7431 */
7432 if (ent->children == NULL) {
7433 /*
7434 * Probably running in SAX mode and the callbacks don't
7435 * build the entity content. So unless we already went
7436 * though parsing for first checking go though the entity
7437 * content to generate callbacks associated to the entity
7438 */
7439 if (was_checked != 0) {
7440 void *user_data;
7441 /*
7442 * This is a bit hackish but this seems the best
7443 * way to make sure both SAX and DOM entity support
7444 * behaves okay.
7445 */
7446 if (ctxt->userData == ctxt)
7447 user_data = NULL;
7448 else
7449 user_data = ctxt->userData;
7450
7451 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7452 ctxt->depth++;
7453 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7454 ent->content, user_data, NULL);
7455 ctxt->depth--;
7456 } else if (ent->etype ==
7457 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7458 ctxt->depth++;
7459 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7460 ctxt->sax, user_data, ctxt->depth,
7461 ent->URI, ent->ExternalID, NULL);
7462 ctxt->depth--;
7463 } else {
7464 ret = XML_ERR_ENTITY_PE_INTERNAL;
7465 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7466 "invalid entity type found\n", NULL);
7467 }
7468 if (ret == XML_ERR_ENTITY_LOOP) {
7469 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7470 return;
7471 }
7472 }
7473 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7474 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7475 /*
7476 * Entity reference callback comes second, it's somewhat
7477 * superfluous but a compatibility to historical behaviour
7478 */
7479 ctxt->sax->reference(ctxt->userData, ent->name);
7480 }
7481 return;
7482 }
7483
7484 /*
7485 * If we didn't get any children for the entity being built
7486 */
7487 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7488 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7489 /*
7490 * Create a node.
7491 */
7492 ctxt->sax->reference(ctxt->userData, ent->name);
7493 return;
7494 }
7495
7496 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7497 /*
7498 * There is a problem on the handling of _private for entities
7499 * (bug 155816): Should we copy the content of the field from
7500 * the entity (possibly overwriting some value set by the user
7501 * when a copy is created), should we leave it alone, or should
7502 * we try to take care of different situations? The problem
7503 * is exacerbated by the usage of this field by the xmlReader.
7504 * To fix this bug, we look at _private on the created node
7505 * and, if it's NULL, we copy in whatever was in the entity.
7506 * If it's not NULL we leave it alone. This is somewhat of a
7507 * hack - maybe we should have further tests to determine
7508 * what to do.
7509 */
7510 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7511 /*
7512 * Seems we are generating the DOM content, do
7513 * a simple tree copy for all references except the first
7514 * In the first occurrence list contains the replacement.
7515 */
7516 if (((list == NULL) && (ent->owner == 0)) ||
7517 (ctxt->parseMode == XML_PARSE_READER)) {
7518 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7519
7520 /*
7521 * We are copying here, make sure there is no abuse
7522 */
7523 ctxt->sizeentcopy += ent->length + 5;
7524 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7525 return;
7526
7527 /*
7528 * when operating on a reader, the entities definitions
7529 * are always owning the entities subtree.
7530 if (ctxt->parseMode == XML_PARSE_READER)
7531 ent->owner = 1;
7532 */
7533
7534 cur = ent->children;
7535 while (cur != NULL) {
7536 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7537 if (nw != NULL) {
7538 if (nw->_private == NULL)
7539 nw->_private = cur->_private;
7540 if (firstChild == NULL){
7541 firstChild = nw;
7542 }
7543 nw = xmlAddChild(ctxt->node, nw);
7544 }
7545 if (cur == ent->last) {
7546 /*
7547 * needed to detect some strange empty
7548 * node cases in the reader tests
7549 */
7550 if ((ctxt->parseMode == XML_PARSE_READER) &&
7551 (nw != NULL) &&
7552 (nw->type == XML_ELEMENT_NODE) &&
7553 (nw->children == NULL))
7554 nw->extra = 1;
7555
7556 break;
7557 }
7558 cur = cur->next;
7559 }
7560#ifdef LIBXML_LEGACY_ENABLED
7561 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7562 xmlAddEntityReference(ent, firstChild, nw);
7563#endif /* LIBXML_LEGACY_ENABLED */
7564 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7565 xmlNodePtr nw = NULL, cur, next, last,
7566 firstChild = NULL;
7567
7568 /*
7569 * We are copying here, make sure there is no abuse
7570 */
7571 ctxt->sizeentcopy += ent->length + 5;
7572 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7573 return;
7574
7575 /*
7576 * Copy the entity child list and make it the new
7577 * entity child list. The goal is to make sure any
7578 * ID or REF referenced will be the one from the
7579 * document content and not the entity copy.
7580 */
7581 cur = ent->children;
7582 ent->children = NULL;
7583 last = ent->last;
7584 ent->last = NULL;
7585 while (cur != NULL) {
7586 next = cur->next;
7587 cur->next = NULL;
7588 cur->parent = NULL;
7589 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7590 if (nw != NULL) {
7591 if (nw->_private == NULL)
7592 nw->_private = cur->_private;
7593 if (firstChild == NULL){
7594 firstChild = cur;
7595 }
7596 xmlAddChild((xmlNodePtr) ent, nw);
7597 xmlAddChild(ctxt->node, cur);
7598 }
7599 if (cur == last)
7600 break;
7601 cur = next;
7602 }
7603 if (ent->owner == 0)
7604 ent->owner = 1;
7605#ifdef LIBXML_LEGACY_ENABLED
7606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7607 xmlAddEntityReference(ent, firstChild, nw);
7608#endif /* LIBXML_LEGACY_ENABLED */
7609 } else {
7610 const xmlChar *nbktext;
7611
7612 /*
7613 * the name change is to avoid coalescing of the
7614 * node with a possible previous text one which
7615 * would make ent->children a dangling pointer
7616 */
7617 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7618 -1);
7619 if (ent->children->type == XML_TEXT_NODE)
7620 ent->children->name = nbktext;
7621 if ((ent->last != ent->children) &&
7622 (ent->last->type == XML_TEXT_NODE))
7623 ent->last->name = nbktext;
7624 xmlAddChildList(ctxt->node, ent->children);
7625 }
7626
7627 /*
7628 * This is to avoid a nasty side effect, see
7629 * characters() in SAX.c
7630 */
7631 ctxt->nodemem = 0;
7632 ctxt->nodelen = 0;
7633 return;
7634 }
7635 }
7636}
7637
7638/**
7639 * xmlParseEntityRef:
7640 * @ctxt: an XML parser context
7641 *
7642 * parse ENTITY references declarations
7643 *
7644 * [68] EntityRef ::= '&' Name ';'
7645 *
7646 * [ WFC: Entity Declared ]
7647 * In a document without any DTD, a document with only an internal DTD
7648 * subset which contains no parameter entity references, or a document
7649 * with "standalone='yes'", the Name given in the entity reference
7650 * must match that in an entity declaration, except that well-formed
7651 * documents need not declare any of the following entities: amp, lt,
7652 * gt, apos, quot. The declaration of a parameter entity must precede
7653 * any reference to it. Similarly, the declaration of a general entity
7654 * must precede any reference to it which appears in a default value in an
7655 * attribute-list declaration. Note that if entities are declared in the
7656 * external subset or in external parameter entities, a non-validating
7657 * processor is not obligated to read and process their declarations;
7658 * for such documents, the rule that an entity must be declared is a
7659 * well-formedness constraint only if standalone='yes'.
7660 *
7661 * [ WFC: Parsed Entity ]
7662 * An entity reference must not contain the name of an unparsed entity
7663 *
7664 * Returns the xmlEntityPtr if found, or NULL otherwise.
7665 */
7666xmlEntityPtr
7667xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7668 const xmlChar *name;
7669 xmlEntityPtr ent = NULL;
7670
7671 GROW;
7672 if (ctxt->instate == XML_PARSER_EOF)
7673 return(NULL);
7674
7675 if (RAW != '&')
7676 return(NULL);
7677 NEXT;
7678 name = xmlParseName(ctxt);
7679 if (name == NULL) {
7680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 "xmlParseEntityRef: no name\n");
7682 return(NULL);
7683 }
7684 if (RAW != ';') {
7685 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7686 return(NULL);
7687 }
7688 NEXT;
7689
7690 /*
7691 * Predefined entities override any extra definition
7692 */
7693 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7694 ent = xmlGetPredefinedEntity(name);
7695 if (ent != NULL)
7696 return(ent);
7697 }
7698
7699 /*
7700 * Increase the number of entity references parsed
7701 */
7702 ctxt->nbentities++;
7703
7704 /*
7705 * Ask first SAX for entity resolution, otherwise try the
7706 * entities which may have stored in the parser context.
7707 */
7708 if (ctxt->sax != NULL) {
7709 if (ctxt->sax->getEntity != NULL)
7710 ent = ctxt->sax->getEntity(ctxt->userData, name);
7711 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7712 (ctxt->options & XML_PARSE_OLDSAX))
7713 ent = xmlGetPredefinedEntity(name);
7714 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7715 (ctxt->userData==ctxt)) {
7716 ent = xmlSAX2GetEntity(ctxt, name);
7717 }
7718 }
7719 if (ctxt->instate == XML_PARSER_EOF)
7720 return(NULL);
7721 /*
7722 * [ WFC: Entity Declared ]
7723 * In a document without any DTD, a document with only an
7724 * internal DTD subset which contains no parameter entity
7725 * references, or a document with "standalone='yes'", the
7726 * Name given in the entity reference must match that in an
7727 * entity declaration, except that well-formed documents
7728 * need not declare any of the following entities: amp, lt,
7729 * gt, apos, quot.
7730 * The declaration of a parameter entity must precede any
7731 * reference to it.
7732 * Similarly, the declaration of a general entity must
7733 * precede any reference to it which appears in a default
7734 * value in an attribute-list declaration. Note that if
7735 * entities are declared in the external subset or in
7736 * external parameter entities, a non-validating processor
7737 * is not obligated to read and process their declarations;
7738 * for such documents, the rule that an entity must be
7739 * declared is a well-formedness constraint only if
7740 * standalone='yes'.
7741 */
7742 if (ent == NULL) {
7743 if ((ctxt->standalone == 1) ||
7744 ((ctxt->hasExternalSubset == 0) &&
7745 (ctxt->hasPErefs == 0))) {
7746 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7747 "Entity '%s' not defined\n", name);
7748 } else {
7749 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7750 "Entity '%s' not defined\n", name);
7751 if ((ctxt->inSubset == 0) &&
7752 (ctxt->sax != NULL) &&
7753 (ctxt->sax->reference != NULL)) {
7754 ctxt->sax->reference(ctxt->userData, name);
7755 }
7756 }
7757 xmlParserEntityCheck(ctxt, 0, ent, 0);
7758 ctxt->valid = 0;
7759 }
7760
7761 /*
7762 * [ WFC: Parsed Entity ]
7763 * An entity reference must not contain the name of an
7764 * unparsed entity
7765 */
7766 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7767 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7768 "Entity reference to unparsed entity %s\n", name);
7769 }
7770
7771 /*
7772 * [ WFC: No External Entity References ]
7773 * Attribute values cannot contain direct or indirect
7774 * entity references to external entities.
7775 */
7776 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7777 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7779 "Attribute references external entity '%s'\n", name);
7780 }
7781 /*
7782 * [ WFC: No < in Attribute Values ]
7783 * The replacement text of any entity referred to directly or
7784 * indirectly in an attribute value (other than "&lt;") must
7785 * not contain a <.
7786 */
7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7788 (ent != NULL) &&
7789 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7790 if (((ent->checked & 1) || (ent->checked == 0)) &&
7791 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7792 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7793 "'<' in entity '%s' is not allowed in attributes values\n", name);
7794 }
7795 }
7796
7797 /*
7798 * Internal check, no parameter entities here ...
7799 */
7800 else {
7801 switch (ent->etype) {
7802 case XML_INTERNAL_PARAMETER_ENTITY:
7803 case XML_EXTERNAL_PARAMETER_ENTITY:
7804 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7805 "Attempt to reference the parameter entity '%s'\n",
7806 name);
7807 break;
7808 default:
7809 break;
7810 }
7811 }
7812
7813 /*
7814 * [ WFC: No Recursion ]
7815 * A parsed entity must not contain a recursive reference
7816 * to itself, either directly or indirectly.
7817 * Done somewhere else
7818 */
7819 return(ent);
7820}
7821
7822/**
7823 * xmlParseStringEntityRef:
7824 * @ctxt: an XML parser context
7825 * @str: a pointer to an index in the string
7826 *
7827 * parse ENTITY references declarations, but this version parses it from
7828 * a string value.
7829 *
7830 * [68] EntityRef ::= '&' Name ';'
7831 *
7832 * [ WFC: Entity Declared ]
7833 * In a document without any DTD, a document with only an internal DTD
7834 * subset which contains no parameter entity references, or a document
7835 * with "standalone='yes'", the Name given in the entity reference
7836 * must match that in an entity declaration, except that well-formed
7837 * documents need not declare any of the following entities: amp, lt,
7838 * gt, apos, quot. The declaration of a parameter entity must precede
7839 * any reference to it. Similarly, the declaration of a general entity
7840 * must precede any reference to it which appears in a default value in an
7841 * attribute-list declaration. Note that if entities are declared in the
7842 * external subset or in external parameter entities, a non-validating
7843 * processor is not obligated to read and process their declarations;
7844 * for such documents, the rule that an entity must be declared is a
7845 * well-formedness constraint only if standalone='yes'.
7846 *
7847 * [ WFC: Parsed Entity ]
7848 * An entity reference must not contain the name of an unparsed entity
7849 *
7850 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7851 * is updated to the current location in the string.
7852 */
7853static xmlEntityPtr
7854xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7855 xmlChar *name;
7856 const xmlChar *ptr;
7857 xmlChar cur;
7858 xmlEntityPtr ent = NULL;
7859
7860 if ((str == NULL) || (*str == NULL))
7861 return(NULL);
7862 ptr = *str;
7863 cur = *ptr;
7864 if (cur != '&')
7865 return(NULL);
7866
7867 ptr++;
7868 name = xmlParseStringName(ctxt, &ptr);
7869 if (name == NULL) {
7870 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7871 "xmlParseStringEntityRef: no name\n");
7872 *str = ptr;
7873 return(NULL);
7874 }
7875 if (*ptr != ';') {
7876 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7877 xmlFree(name);
7878 *str = ptr;
7879 return(NULL);
7880 }
7881 ptr++;
7882
7883
7884 /*
7885 * Predefined entities override any extra definition
7886 */
7887 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7888 ent = xmlGetPredefinedEntity(name);
7889 if (ent != NULL) {
7890 xmlFree(name);
7891 *str = ptr;
7892 return(ent);
7893 }
7894 }
7895
7896 /*
7897 * Increate the number of entity references parsed
7898 */
7899 ctxt->nbentities++;
7900
7901 /*
7902 * Ask first SAX for entity resolution, otherwise try the
7903 * entities which may have stored in the parser context.
7904 */
7905 if (ctxt->sax != NULL) {
7906 if (ctxt->sax->getEntity != NULL)
7907 ent = ctxt->sax->getEntity(ctxt->userData, name);
7908 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7909 ent = xmlGetPredefinedEntity(name);
7910 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7911 ent = xmlSAX2GetEntity(ctxt, name);
7912 }
7913 }
7914 if (ctxt->instate == XML_PARSER_EOF) {
7915 xmlFree(name);
7916 return(NULL);
7917 }
7918
7919 /*
7920 * [ WFC: Entity Declared ]
7921 * In a document without any DTD, a document with only an
7922 * internal DTD subset which contains no parameter entity
7923 * references, or a document with "standalone='yes'", the
7924 * Name given in the entity reference must match that in an
7925 * entity declaration, except that well-formed documents
7926 * need not declare any of the following entities: amp, lt,
7927 * gt, apos, quot.
7928 * The declaration of a parameter entity must precede any
7929 * reference to it.
7930 * Similarly, the declaration of a general entity must
7931 * precede any reference to it which appears in a default
7932 * value in an attribute-list declaration. Note that if
7933 * entities are declared in the external subset or in
7934 * external parameter entities, a non-validating processor
7935 * is not obligated to read and process their declarations;
7936 * for such documents, the rule that an entity must be
7937 * declared is a well-formedness constraint only if
7938 * standalone='yes'.
7939 */
7940 if (ent == NULL) {
7941 if ((ctxt->standalone == 1) ||
7942 ((ctxt->hasExternalSubset == 0) &&
7943 (ctxt->hasPErefs == 0))) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7945 "Entity '%s' not defined\n", name);
7946 } else {
7947 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7948 "Entity '%s' not defined\n",
7949 name);
7950 }
7951 xmlParserEntityCheck(ctxt, 0, ent, 0);
7952 /* TODO ? check regressions ctxt->valid = 0; */
7953 }
7954
7955 /*
7956 * [ WFC: Parsed Entity ]
7957 * An entity reference must not contain the name of an
7958 * unparsed entity
7959 */
7960 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7962 "Entity reference to unparsed entity %s\n", name);
7963 }
7964
7965 /*
7966 * [ WFC: No External Entity References ]
7967 * Attribute values cannot contain direct or indirect
7968 * entity references to external entities.
7969 */
7970 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7971 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7972 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7973 "Attribute references external entity '%s'\n", name);
7974 }
7975 /*
7976 * [ WFC: No < in Attribute Values ]
7977 * The replacement text of any entity referred to directly or
7978 * indirectly in an attribute value (other than "&lt;") must
7979 * not contain a <.
7980 */
7981 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7982 (ent != NULL) && (ent->content != NULL) &&
7983 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7984 (xmlStrchr(ent->content, '<'))) {
7985 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7986 "'<' in entity '%s' is not allowed in attributes values\n",
7987 name);
7988 }
7989
7990 /*
7991 * Internal check, no parameter entities here ...
7992 */
7993 else {
7994 switch (ent->etype) {
7995 case XML_INTERNAL_PARAMETER_ENTITY:
7996 case XML_EXTERNAL_PARAMETER_ENTITY:
7997 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7998 "Attempt to reference the parameter entity '%s'\n",
7999 name);
8000 break;
8001 default:
8002 break;
8003 }
8004 }
8005
8006 /*
8007 * [ WFC: No Recursion ]
8008 * A parsed entity must not contain a recursive reference
8009 * to itself, either directly or indirectly.
8010 * Done somewhere else
8011 */
8012
8013 xmlFree(name);
8014 *str = ptr;
8015 return(ent);
8016}
8017
8018/**
8019 * xmlParsePEReference:
8020 * @ctxt: an XML parser context
8021 *
8022 * parse PEReference declarations
8023 * The entity content is handled directly by pushing it's content as
8024 * a new input stream.
8025 *
8026 * [69] PEReference ::= '%' Name ';'
8027 *
8028 * [ WFC: No Recursion ]
8029 * A parsed entity must not contain a recursive
8030 * reference to itself, either directly or indirectly.
8031 *
8032 * [ WFC: Entity Declared ]
8033 * In a document without any DTD, a document with only an internal DTD
8034 * subset which contains no parameter entity references, or a document
8035 * with "standalone='yes'", ... ... The declaration of a parameter
8036 * entity must precede any reference to it...
8037 *
8038 * [ VC: Entity Declared ]
8039 * In a document with an external subset or external parameter entities
8040 * with "standalone='no'", ... ... The declaration of a parameter entity
8041 * must precede any reference to it...
8042 *
8043 * [ WFC: In DTD ]
8044 * Parameter-entity references may only appear in the DTD.
8045 * NOTE: misleading but this is handled.
8046 */
8047void
8048xmlParsePEReference(xmlParserCtxtPtr ctxt)
8049{
8050 const xmlChar *name;
8051 xmlEntityPtr entity = NULL;
8052 xmlParserInputPtr input;
8053
8054 if (RAW != '%')
8055 return;
8056 NEXT;
8057 name = xmlParseName(ctxt);
8058 if (name == NULL) {
8059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8060 "xmlParsePEReference: no name\n");
8061 return;
8062 }
8063 if (RAW != ';') {
8064 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8065 return;
8066 }
8067
8068 NEXT;
8069
8070 /*
8071 * Increate the number of entity references parsed
8072 */
8073 ctxt->nbentities++;
8074
8075 /*
8076 * Request the entity from SAX
8077 */
8078 if ((ctxt->sax != NULL) &&
8079 (ctxt->sax->getParameterEntity != NULL))
8080 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8081 if (ctxt->instate == XML_PARSER_EOF)
8082 return;
8083 if (entity == NULL) {
8084 /*
8085 * [ WFC: Entity Declared ]
8086 * In a document without any DTD, a document with only an
8087 * internal DTD subset which contains no parameter entity
8088 * references, or a document with "standalone='yes'", ...
8089 * ... The declaration of a parameter entity must precede
8090 * any reference to it...
8091 */
8092 if ((ctxt->standalone == 1) ||
8093 ((ctxt->hasExternalSubset == 0) &&
8094 (ctxt->hasPErefs == 0))) {
8095 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8096 "PEReference: %%%s; not found\n",
8097 name);
8098 } else {
8099 /*
8100 * [ VC: Entity Declared ]
8101 * In a document with an external subset or external
8102 * parameter entities with "standalone='no'", ...
8103 * ... The declaration of a parameter entity must
8104 * precede any reference to it...
8105 */
8106 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8107 "PEReference: %%%s; not found\n",
8108 name, NULL);
8109 ctxt->valid = 0;
8110 }
8111 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8112 } else {
8113 /*
8114 * Internal checking in case the entity quest barfed
8115 */
8116 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8117 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8119 "Internal: %%%s; is not a parameter entity\n",
8120 name, NULL);
8121 } else if (ctxt->input->free != deallocblankswrapper) {
8122 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8123 if (xmlPushInput(ctxt, input) < 0)
8124 return;
8125 } else {
8126 /*
8127 * TODO !!!
8128 * handle the extra spaces added before and after
8129 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8130 */
8131 input = xmlNewEntityInputStream(ctxt, entity);
8132 if (xmlPushInput(ctxt, input) < 0)
8133 return;
8134 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8135 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8136 (IS_BLANK_CH(NXT(5)))) {
8137 xmlParseTextDecl(ctxt);
8138 if (ctxt->errNo ==
8139 XML_ERR_UNSUPPORTED_ENCODING) {
8140 /*
8141 * The XML REC instructs us to stop parsing
8142 * right here
8143 */
8144 xmlHaltParser(ctxt);
8145 return;
8146 }
8147 }
8148 }
8149 }
8150 ctxt->hasPErefs = 1;
8151}
8152
8153/**
8154 * xmlLoadEntityContent:
8155 * @ctxt: an XML parser context
8156 * @entity: an unloaded system entity
8157 *
8158 * Load the original content of the given system entity from the
8159 * ExternalID/SystemID given. This is to be used for Included in Literal
8160 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8161 *
8162 * Returns 0 in case of success and -1 in case of failure
8163 */
8164static int
8165xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8166 xmlParserInputPtr input;
8167 xmlBufferPtr buf;
8168 int l, c;
8169 int count = 0;
8170
8171 if ((ctxt == NULL) || (entity == NULL) ||
8172 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8173 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8174 (entity->content != NULL)) {
8175 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8176 "xmlLoadEntityContent parameter error");
8177 return(-1);
8178 }
8179
8180 if (xmlParserDebugEntities)
8181 xmlGenericError(xmlGenericErrorContext,
8182 "Reading %s entity content input\n", entity->name);
8183
8184 buf = xmlBufferCreate();
8185 if (buf == NULL) {
8186 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8187 "xmlLoadEntityContent parameter error");
8188 return(-1);
8189 }
8190
8191 input = xmlNewEntityInputStream(ctxt, entity);
8192 if (input == NULL) {
8193 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8194 "xmlLoadEntityContent input error");
8195 xmlBufferFree(buf);
8196 return(-1);
8197 }
8198
8199 /*
8200 * Push the entity as the current input, read char by char
8201 * saving to the buffer until the end of the entity or an error
8202 */
8203 if (xmlPushInput(ctxt, input) < 0) {
8204 xmlBufferFree(buf);
8205 return(-1);
8206 }
8207
8208 GROW;
8209 c = CUR_CHAR(l);
8210 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8211 (IS_CHAR(c))) {
8212 xmlBufferAdd(buf, ctxt->input->cur, l);
8213 if (count++ > XML_PARSER_CHUNK_SIZE) {
8214 count = 0;
8215 GROW;
8216 if (ctxt->instate == XML_PARSER_EOF) {
8217 xmlBufferFree(buf);
8218 return(-1);
8219 }
8220 }
8221 NEXTL(l);
8222 c = CUR_CHAR(l);
8223 if (c == 0) {
8224 count = 0;
8225 GROW;
8226 if (ctxt->instate == XML_PARSER_EOF) {
8227 xmlBufferFree(buf);
8228 return(-1);
8229 }
8230 c = CUR_CHAR(l);
8231 }
8232 }
8233
8234 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8235 xmlPopInput(ctxt);
8236 } else if (!IS_CHAR(c)) {
8237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8238 "xmlLoadEntityContent: invalid char value %d\n",
8239 c);
8240 xmlBufferFree(buf);
8241 return(-1);
8242 }
8243 entity->content = buf->content;
8244 buf->content = NULL;
8245 xmlBufferFree(buf);
8246
8247 return(0);
8248}
8249
8250/**
8251 * xmlParseStringPEReference:
8252 * @ctxt: an XML parser context
8253 * @str: a pointer to an index in the string
8254 *
8255 * parse PEReference declarations
8256 *
8257 * [69] PEReference ::= '%' Name ';'
8258 *
8259 * [ WFC: No Recursion ]
8260 * A parsed entity must not contain a recursive
8261 * reference to itself, either directly or indirectly.
8262 *
8263 * [ WFC: Entity Declared ]
8264 * In a document without any DTD, a document with only an internal DTD
8265 * subset which contains no parameter entity references, or a document
8266 * with "standalone='yes'", ... ... The declaration of a parameter
8267 * entity must precede any reference to it...
8268 *
8269 * [ VC: Entity Declared ]
8270 * In a document with an external subset or external parameter entities
8271 * with "standalone='no'", ... ... The declaration of a parameter entity
8272 * must precede any reference to it...
8273 *
8274 * [ WFC: In DTD ]
8275 * Parameter-entity references may only appear in the DTD.
8276 * NOTE: misleading but this is handled.
8277 *
8278 * Returns the string of the entity content.
8279 * str is updated to the current value of the index
8280 */
8281static xmlEntityPtr
8282xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8283 const xmlChar *ptr;
8284 xmlChar cur;
8285 xmlChar *name;
8286 xmlEntityPtr entity = NULL;
8287
8288 if ((str == NULL) || (*str == NULL)) return(NULL);
8289 ptr = *str;
8290 cur = *ptr;
8291 if (cur != '%')
8292 return(NULL);
8293 ptr++;
8294 name = xmlParseStringName(ctxt, &ptr);
8295 if (name == NULL) {
8296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8297 "xmlParseStringPEReference: no name\n");
8298 *str = ptr;
8299 return(NULL);
8300 }
8301 cur = *ptr;
8302 if (cur != ';') {
8303 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8304 xmlFree(name);
8305 *str = ptr;
8306 return(NULL);
8307 }
8308 ptr++;
8309
8310 /*
8311 * Increate the number of entity references parsed
8312 */
8313 ctxt->nbentities++;
8314
8315 /*
8316 * Request the entity from SAX
8317 */
8318 if ((ctxt->sax != NULL) &&
8319 (ctxt->sax->getParameterEntity != NULL))
8320 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8321 if (ctxt->instate == XML_PARSER_EOF) {
8322 xmlFree(name);
8323 return(NULL);
8324 }
8325 if (entity == NULL) {
8326 /*
8327 * [ WFC: Entity Declared ]
8328 * In a document without any DTD, a document with only an
8329 * internal DTD subset which contains no parameter entity
8330 * references, or a document with "standalone='yes'", ...
8331 * ... The declaration of a parameter entity must precede
8332 * any reference to it...
8333 */
8334 if ((ctxt->standalone == 1) ||
8335 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8337 "PEReference: %%%s; not found\n", name);
8338 } else {
8339 /*
8340 * [ VC: Entity Declared ]
8341 * In a document with an external subset or external
8342 * parameter entities with "standalone='no'", ...
8343 * ... The declaration of a parameter entity must
8344 * precede any reference to it...
8345 */
8346 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8347 "PEReference: %%%s; not found\n",
8348 name, NULL);
8349 ctxt->valid = 0;
8350 }
8351 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8352 } else {
8353 /*
8354 * Internal checking in case the entity quest barfed
8355 */
8356 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8357 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8358 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8359 "%%%s; is not a parameter entity\n",
8360 name, NULL);
8361 }
8362 }
8363 ctxt->hasPErefs = 1;
8364 xmlFree(name);
8365 *str = ptr;
8366 return(entity);
8367}
8368
8369/**
8370 * xmlParseDocTypeDecl:
8371 * @ctxt: an XML parser context
8372 *
8373 * parse a DOCTYPE declaration
8374 *
8375 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8376 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8377 *
8378 * [ VC: Root Element Type ]
8379 * The Name in the document type declaration must match the element
8380 * type of the root element.
8381 */
8382
8383void
8384xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8385 const xmlChar *name = NULL;
8386 xmlChar *ExternalID = NULL;
8387 xmlChar *URI = NULL;
8388
8389 /*
8390 * We know that '<!DOCTYPE' has been detected.
8391 */
8392 SKIP(9);
8393
8394 SKIP_BLANKS;
8395
8396 /*
8397 * Parse the DOCTYPE name.
8398 */
8399 name = xmlParseName(ctxt);
8400 if (name == NULL) {
8401 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8402 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8403 }
8404 ctxt->intSubName = name;
8405
8406 SKIP_BLANKS;
8407
8408 /*
8409 * Check for SystemID and ExternalID
8410 */
8411 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8412
8413 if ((URI != NULL) || (ExternalID != NULL)) {
8414 ctxt->hasExternalSubset = 1;
8415 }
8416 ctxt->extSubURI = URI;
8417 ctxt->extSubSystem = ExternalID;
8418
8419 SKIP_BLANKS;
8420
8421 /*
8422 * Create and update the internal subset.
8423 */
8424 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8425 (!ctxt->disableSAX))
8426 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8427 if (ctxt->instate == XML_PARSER_EOF)
8428 return;
8429
8430 /*
8431 * Is there any internal subset declarations ?
8432 * they are handled separately in xmlParseInternalSubset()
8433 */
8434 if (RAW == '[')
8435 return;
8436
8437 /*
8438 * We should be at the end of the DOCTYPE declaration.
8439 */
8440 if (RAW != '>') {
8441 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8442 }
8443 NEXT;
8444}
8445
8446/**
8447 * xmlParseInternalSubset:
8448 * @ctxt: an XML parser context
8449 *
8450 * parse the internal subset declaration
8451 *
8452 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8453 */
8454
8455static void
8456xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8457 /*
8458 * Is there any DTD definition ?
8459 */
8460 if (RAW == '[') {
8461 ctxt->instate = XML_PARSER_DTD;
8462 NEXT;
8463 /*
8464 * Parse the succession of Markup declarations and
8465 * PEReferences.
8466 * Subsequence (markupdecl | PEReference | S)*
8467 */
8468 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8469 const xmlChar *check = CUR_PTR;
8470 unsigned int cons = ctxt->input->consumed;
8471
8472 SKIP_BLANKS;
8473 xmlParseMarkupDecl(ctxt);
8474 xmlParsePEReference(ctxt);
8475
8476 /*
8477 * Pop-up of finished entities.
8478 */
8479 while ((RAW == 0) && (ctxt->inputNr > 1))
8480 xmlPopInput(ctxt);
8481
8482 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8483 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8484 "xmlParseInternalSubset: error detected in Markup declaration\n");
8485 break;
8486 }
8487 }
8488 if (RAW == ']') {
8489 NEXT;
8490 SKIP_BLANKS;
8491 }
8492 }
8493
8494 /*
8495 * We should be at the end of the DOCTYPE declaration.
8496 */
8497 if (RAW != '>') {
8498 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8499 return;
8500 }
8501 NEXT;
8502}
8503
8504#ifdef LIBXML_SAX1_ENABLED
8505/**
8506 * xmlParseAttribute:
8507 * @ctxt: an XML parser context
8508 * @value: a xmlChar ** used to store the value of the attribute
8509 *
8510 * parse an attribute
8511 *
8512 * [41] Attribute ::= Name Eq AttValue
8513 *
8514 * [ WFC: No External Entity References ]
8515 * Attribute values cannot contain direct or indirect entity references
8516 * to external entities.
8517 *
8518 * [ WFC: No < in Attribute Values ]
8519 * The replacement text of any entity referred to directly or indirectly in
8520 * an attribute value (other than "&lt;") must not contain a <.
8521 *
8522 * [ VC: Attribute Value Type ]
8523 * The attribute must have been declared; the value must be of the type
8524 * declared for it.
8525 *
8526 * [25] Eq ::= S? '=' S?
8527 *
8528 * With namespace:
8529 *
8530 * [NS 11] Attribute ::= QName Eq AttValue
8531 *
8532 * Also the case QName == xmlns:??? is handled independently as a namespace
8533 * definition.
8534 *
8535 * Returns the attribute name, and the value in *value.
8536 */
8537
8538const xmlChar *
8539xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8540 const xmlChar *name;
8541 xmlChar *val;
8542
8543 *value = NULL;
8544 GROW;
8545 name = xmlParseName(ctxt);
8546 if (name == NULL) {
8547 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8548 "error parsing attribute name\n");
8549 return(NULL);
8550 }
8551
8552 /*
8553 * read the value
8554 */
8555 SKIP_BLANKS;
8556 if (RAW == '=') {
8557 NEXT;
8558 SKIP_BLANKS;
8559 val = xmlParseAttValue(ctxt);
8560 ctxt->instate = XML_PARSER_CONTENT;
8561 } else {
8562 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8563 "Specification mandate value for attribute %s\n", name);
8564 return(NULL);
8565 }
8566
8567 /*
8568 * Check that xml:lang conforms to the specification
8569 * No more registered as an error, just generate a warning now
8570 * since this was deprecated in XML second edition
8571 */
8572 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8573 if (!xmlCheckLanguageID(val)) {
8574 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8575 "Malformed value for xml:lang : %s\n",
8576 val, NULL);
8577 }
8578 }
8579
8580 /*
8581 * Check that xml:space conforms to the specification
8582 */
8583 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8584 if (xmlStrEqual(val, BAD_CAST "default"))
8585 *(ctxt->space) = 0;
8586 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8587 *(ctxt->space) = 1;
8588 else {
8589 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8590"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8591 val, NULL);
8592 }
8593 }
8594
8595 *value = val;
8596 return(name);
8597}
8598
8599/**
8600 * xmlParseStartTag:
8601 * @ctxt: an XML parser context
8602 *
8603 * parse a start of tag either for rule element or
8604 * EmptyElement. In both case we don't parse the tag closing chars.
8605 *
8606 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8607 *
8608 * [ WFC: Unique Att Spec ]
8609 * No attribute name may appear more than once in the same start-tag or
8610 * empty-element tag.
8611 *
8612 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8613 *
8614 * [ WFC: Unique Att Spec ]
8615 * No attribute name may appear more than once in the same start-tag or
8616 * empty-element tag.
8617 *
8618 * With namespace:
8619 *
8620 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8621 *
8622 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8623 *
8624 * Returns the element name parsed
8625 */
8626
8627const xmlChar *
8628xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8629 const xmlChar *name;
8630 const xmlChar *attname;
8631 xmlChar *attvalue;
8632 const xmlChar **atts = ctxt->atts;
8633 int nbatts = 0;
8634 int maxatts = ctxt->maxatts;
8635 int i;
8636
8637 if (RAW != '<') return(NULL);
8638 NEXT1;
8639
8640 name = xmlParseName(ctxt);
8641 if (name == NULL) {
8642 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8643 "xmlParseStartTag: invalid element name\n");
8644 return(NULL);
8645 }
8646
8647 /*
8648 * Now parse the attributes, it ends up with the ending
8649 *
8650 * (S Attribute)* S?
8651 */
8652 SKIP_BLANKS;
8653 GROW;
8654
8655 while (((RAW != '>') &&
8656 ((RAW != '/') || (NXT(1) != '>')) &&
8657 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8658 const xmlChar *q = CUR_PTR;
8659 unsigned int cons = ctxt->input->consumed;
8660
8661 attname = xmlParseAttribute(ctxt, &attvalue);
8662 if ((attname != NULL) && (attvalue != NULL)) {
8663 /*
8664 * [ WFC: Unique Att Spec ]
8665 * No attribute name may appear more than once in the same
8666 * start-tag or empty-element tag.
8667 */
8668 for (i = 0; i < nbatts;i += 2) {
8669 if (xmlStrEqual(atts[i], attname)) {
8670 xmlErrAttributeDup(ctxt, NULL, attname);
8671 xmlFree(attvalue);
8672 goto failed;
8673 }
8674 }
8675 /*
8676 * Add the pair to atts
8677 */
8678 if (atts == NULL) {
8679 maxatts = 22; /* allow for 10 attrs by default */
8680 atts = (const xmlChar **)
8681 xmlMalloc(maxatts * sizeof(xmlChar *));
8682 if (atts == NULL) {
8683 xmlErrMemory(ctxt, NULL);
8684 if (attvalue != NULL)
8685 xmlFree(attvalue);
8686 goto failed;
8687 }
8688 ctxt->atts = atts;
8689 ctxt->maxatts = maxatts;
8690 } else if (nbatts + 4 > maxatts) {
8691 const xmlChar **n;
8692
8693 maxatts *= 2;
8694 n = (const xmlChar **) xmlRealloc((void *) atts,
8695 maxatts * sizeof(const xmlChar *));
8696 if (n == NULL) {
8697 xmlErrMemory(ctxt, NULL);
8698 if (attvalue != NULL)
8699 xmlFree(attvalue);
8700 goto failed;
8701 }
8702 atts = n;
8703 ctxt->atts = atts;
8704 ctxt->maxatts = maxatts;
8705 }
8706 atts[nbatts++] = attname;
8707 atts[nbatts++] = attvalue;
8708 atts[nbatts] = NULL;
8709 atts[nbatts + 1] = NULL;
8710 } else {
8711 if (attvalue != NULL)
8712 xmlFree(attvalue);
8713 }
8714
8715failed:
8716
8717 GROW
8718 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8719 break;
8720 if (!IS_BLANK_CH(RAW)) {
8721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8722 "attributes construct error\n");
8723 }
8724 SKIP_BLANKS;
8725 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8726 (attname == NULL) && (attvalue == NULL)) {
8727 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8728 "xmlParseStartTag: problem parsing attributes\n");
8729 break;
8730 }
8731 SHRINK;
8732 GROW;
8733 }
8734
8735 /*
8736 * SAX: Start of Element !
8737 */
8738 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8739 (!ctxt->disableSAX)) {
8740 if (nbatts > 0)
8741 ctxt->sax->startElement(ctxt->userData, name, atts);
8742 else
8743 ctxt->sax->startElement(ctxt->userData, name, NULL);
8744 }
8745
8746 if (atts != NULL) {
8747 /* Free only the content strings */
8748 for (i = 1;i < nbatts;i+=2)
8749 if (atts[i] != NULL)
8750 xmlFree((xmlChar *) atts[i]);
8751 }
8752 return(name);
8753}
8754
8755/**
8756 * xmlParseEndTag1:
8757 * @ctxt: an XML parser context
8758 * @line: line of the start tag
8759 * @nsNr: number of namespaces on the start tag
8760 *
8761 * parse an end of tag
8762 *
8763 * [42] ETag ::= '</' Name S? '>'
8764 *
8765 * With namespace
8766 *
8767 * [NS 9] ETag ::= '</' QName S? '>'
8768 */
8769
8770static void
8771xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8772 const xmlChar *name;
8773
8774 GROW;
8775 if ((RAW != '<') || (NXT(1) != '/')) {
8776 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8777 "xmlParseEndTag: '</' not found\n");
8778 return;
8779 }
8780 SKIP(2);
8781
8782 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8783
8784 /*
8785 * We should definitely be at the ending "S? '>'" part
8786 */
8787 GROW;
8788 SKIP_BLANKS;
8789 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8790 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8791 } else
8792 NEXT1;
8793
8794 /*
8795 * [ WFC: Element Type Match ]
8796 * The Name in an element's end-tag must match the element type in the
8797 * start-tag.
8798 *
8799 */
8800 if (name != (xmlChar*)1) {
8801 if (name == NULL) name = BAD_CAST "unparseable";
8802 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8803 "Opening and ending tag mismatch: %s line %d and %s\n",
8804 ctxt->name, line, name);
8805 }
8806
8807 /*
8808 * SAX: End of Tag
8809 */
8810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8811 (!ctxt->disableSAX))
8812 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8813
8814 namePop(ctxt);
8815 spacePop(ctxt);
8816 return;
8817}
8818
8819/**
8820 * xmlParseEndTag:
8821 * @ctxt: an XML parser context
8822 *
8823 * parse an end of tag
8824 *
8825 * [42] ETag ::= '</' Name S? '>'
8826 *
8827 * With namespace
8828 *
8829 * [NS 9] ETag ::= '</' QName S? '>'
8830 */
8831
8832void
8833xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8834 xmlParseEndTag1(ctxt, 0);
8835}
8836#endif /* LIBXML_SAX1_ENABLED */
8837
8838/************************************************************************
8839 * *
8840 * SAX 2 specific operations *
8841 * *
8842 ************************************************************************/
8843
8844/*
8845 * xmlGetNamespace:
8846 * @ctxt: an XML parser context
8847 * @prefix: the prefix to lookup
8848 *
8849 * Lookup the namespace name for the @prefix (which ca be NULL)
8850 * The prefix must come from the @ctxt->dict dictionary
8851 *
8852 * Returns the namespace name or NULL if not bound
8853 */
8854static const xmlChar *
8855xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8856 int i;
8857
8858 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8859 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8860 if (ctxt->nsTab[i] == prefix) {
8861 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8862 return(NULL);
8863 return(ctxt->nsTab[i + 1]);
8864 }
8865 return(NULL);
8866}
8867
8868/**
8869 * xmlParseQName:
8870 * @ctxt: an XML parser context
8871 * @prefix: pointer to store the prefix part
8872 *
8873 * parse an XML Namespace QName
8874 *
8875 * [6] QName ::= (Prefix ':')? LocalPart
8876 * [7] Prefix ::= NCName
8877 * [8] LocalPart ::= NCName
8878 *
8879 * Returns the Name parsed or NULL
8880 */
8881
8882static const xmlChar *
8883xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8884 const xmlChar *l, *p;
8885
8886 GROW;
8887
8888 l = xmlParseNCName(ctxt);
8889 if (l == NULL) {
8890 if (CUR == ':') {
8891 l = xmlParseName(ctxt);
8892 if (l != NULL) {
8893 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8894 "Failed to parse QName '%s'\n", l, NULL, NULL);
8895 *prefix = NULL;
8896 return(l);
8897 }
8898 }
8899 return(NULL);
8900 }
8901 if (CUR == ':') {
8902 NEXT;
8903 p = l;
8904 l = xmlParseNCName(ctxt);
8905 if (l == NULL) {
8906 xmlChar *tmp;
8907
8908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8909 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8910 l = xmlParseNmtoken(ctxt);
8911 if (l == NULL)
8912 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8913 else {
8914 tmp = xmlBuildQName(l, p, NULL, 0);
8915 xmlFree((char *)l);
8916 }
8917 p = xmlDictLookup(ctxt->dict, tmp, -1);
8918 if (tmp != NULL) xmlFree(tmp);
8919 *prefix = NULL;
8920 return(p);
8921 }
8922 if (CUR == ':') {
8923 xmlChar *tmp;
8924
8925 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8926 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8927 NEXT;
8928 tmp = (xmlChar *) xmlParseName(ctxt);
8929 if (tmp != NULL) {
8930 tmp = xmlBuildQName(tmp, l, NULL, 0);
8931 l = xmlDictLookup(ctxt->dict, tmp, -1);
8932 if (tmp != NULL) xmlFree(tmp);
8933 *prefix = p;
8934 return(l);
8935 }
8936 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8937 l = xmlDictLookup(ctxt->dict, tmp, -1);
8938 if (tmp != NULL) xmlFree(tmp);
8939 *prefix = p;
8940 return(l);
8941 }
8942 *prefix = p;
8943 } else
8944 *prefix = NULL;
8945 return(l);
8946}
8947
8948/**
8949 * xmlParseQNameAndCompare:
8950 * @ctxt: an XML parser context
8951 * @name: the localname
8952 * @prefix: the prefix, if any.
8953 *
8954 * parse an XML name and compares for match
8955 * (specialized for endtag parsing)
8956 *
8957 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8958 * and the name for mismatch
8959 */
8960
8961static const xmlChar *
8962xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8963 xmlChar const *prefix) {
8964 const xmlChar *cmp;
8965 const xmlChar *in;
8966 const xmlChar *ret;
8967 const xmlChar *prefix2;
8968
8969 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8970
8971 GROW;
8972 in = ctxt->input->cur;
8973
8974 cmp = prefix;
8975 while (*in != 0 && *in == *cmp) {
8976 ++in;
8977 ++cmp;
8978 }
8979 if ((*cmp == 0) && (*in == ':')) {
8980 in++;
8981 cmp = name;
8982 while (*in != 0 && *in == *cmp) {
8983 ++in;
8984 ++cmp;
8985 }
8986 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8987 /* success */
8988 ctxt->input->cur = in;
8989 return((const xmlChar*) 1);
8990 }
8991 }
8992 /*
8993 * all strings coms from the dictionary, equality can be done directly
8994 */
8995 ret = xmlParseQName (ctxt, &prefix2);
8996 if ((ret == name) && (prefix == prefix2))
8997 return((const xmlChar*) 1);
8998 return ret;
8999}
9000
9001/**
9002 * xmlParseAttValueInternal:
9003 * @ctxt: an XML parser context
9004 * @len: attribute len result
9005 * @alloc: whether the attribute was reallocated as a new string
9006 * @normalize: if 1 then further non-CDATA normalization must be done
9007 *
9008 * parse a value for an attribute.
9009 * NOTE: if no normalization is needed, the routine will return pointers
9010 * directly from the data buffer.
9011 *
9012 * 3.3.3 Attribute-Value Normalization:
9013 * Before the value of an attribute is passed to the application or
9014 * checked for validity, the XML processor must normalize it as follows:
9015 * - a character reference is processed by appending the referenced
9016 * character to the attribute value
9017 * - an entity reference is processed by recursively processing the
9018 * replacement text of the entity
9019 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9020 * appending #x20 to the normalized value, except that only a single
9021 * #x20 is appended for a "#xD#xA" sequence that is part of an external
9022 * parsed entity or the literal entity value of an internal parsed entity
9023 * - other characters are processed by appending them to the normalized value
9024 * If the declared value is not CDATA, then the XML processor must further
9025 * process the normalized attribute value by discarding any leading and
9026 * trailing space (#x20) characters, and by replacing sequences of space
9027 * (#x20) characters by a single space (#x20) character.
9028 * All attributes for which no declaration has been read should be treated
9029 * by a non-validating parser as if declared CDATA.
9030 *
9031 * Returns the AttValue parsed or NULL. The value has to be freed by the
9032 * caller if it was copied, this can be detected by val[*len] == 0.
9033 */
9034
9035static xmlChar *
9036xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9037 int normalize)
9038{
9039 xmlChar limit = 0;
9040 const xmlChar *in = NULL, *start, *end, *last;
9041 xmlChar *ret = NULL;
9042 int line, col;
9043
9044 GROW;
9045 in = (xmlChar *) CUR_PTR;
9046 line = ctxt->input->line;
9047 col = ctxt->input->col;
9048 if (*in != '"' && *in != '\'') {
9049 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9050 return (NULL);
9051 }
9052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9053
9054 /*
9055 * try to handle in this routine the most common case where no
9056 * allocation of a new string is required and where content is
9057 * pure ASCII.
9058 */
9059 limit = *in++;
9060 col++;
9061 end = ctxt->input->end;
9062 start = in;
9063 if (in >= end) {
9064 const xmlChar *oldbase = ctxt->input->base;
9065 GROW;
9066 if (oldbase != ctxt->input->base) {
9067 long delta = ctxt->input->base - oldbase;
9068 start = start + delta;
9069 in = in + delta;
9070 }
9071 end = ctxt->input->end;
9072 }
9073 if (normalize) {
9074 /*
9075 * Skip any leading spaces
9076 */
9077 while ((in < end) && (*in != limit) &&
9078 ((*in == 0x20) || (*in == 0x9) ||
9079 (*in == 0xA) || (*in == 0xD))) {
9080 if (*in == 0xA) {
9081 line++; col = 1;
9082 } else {
9083 col++;
9084 }
9085 in++;
9086 start = in;
9087 if (in >= end) {
9088 const xmlChar *oldbase = ctxt->input->base;
9089 GROW;
9090 if (ctxt->instate == XML_PARSER_EOF)
9091 return(NULL);
9092 if (oldbase != ctxt->input->base) {
9093 long delta = ctxt->input->base - oldbase;
9094 start = start + delta;
9095 in = in + delta;
9096 }
9097 end = ctxt->input->end;
9098 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9101 "AttValue length too long\n");
9102 return(NULL);
9103 }
9104 }
9105 }
9106 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9108 col++;
9109 if ((*in++ == 0x20) && (*in == 0x20)) break;
9110 if (in >= end) {
9111 const xmlChar *oldbase = ctxt->input->base;
9112 GROW;
9113 if (ctxt->instate == XML_PARSER_EOF)
9114 return(NULL);
9115 if (oldbase != ctxt->input->base) {
9116 long delta = ctxt->input->base - oldbase;
9117 start = start + delta;
9118 in = in + delta;
9119 }
9120 end = ctxt->input->end;
9121 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9122 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9123 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9124 "AttValue length too long\n");
9125 return(NULL);
9126 }
9127 }
9128 }
9129 last = in;
9130 /*
9131 * skip the trailing blanks
9132 */
9133 while ((last[-1] == 0x20) && (last > start)) last--;
9134 while ((in < end) && (*in != limit) &&
9135 ((*in == 0x20) || (*in == 0x9) ||
9136 (*in == 0xA) || (*in == 0xD))) {
9137 if (*in == 0xA) {
9138 line++, col = 1;
9139 } else {
9140 col++;
9141 }
9142 in++;
9143 if (in >= end) {
9144 const xmlChar *oldbase = ctxt->input->base;
9145 GROW;
9146 if (ctxt->instate == XML_PARSER_EOF)
9147 return(NULL);
9148 if (oldbase != ctxt->input->base) {
9149 long delta = ctxt->input->base - oldbase;
9150 start = start + delta;
9151 in = in + delta;
9152 last = last + delta;
9153 }
9154 end = ctxt->input->end;
9155 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9156 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9157 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9158 "AttValue length too long\n");
9159 return(NULL);
9160 }
9161 }
9162 }
9163 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9164 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9166 "AttValue length too long\n");
9167 return(NULL);
9168 }
9169 if (*in != limit) goto need_complex;
9170 } else {
9171 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9172 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9173 in++;
9174 col++;
9175 if (in >= end) {
9176 const xmlChar *oldbase = ctxt->input->base;
9177 GROW;
9178 if (ctxt->instate == XML_PARSER_EOF)
9179 return(NULL);
9180 if (oldbase != ctxt->input->base) {
9181 long delta = ctxt->input->base - oldbase;
9182 start = start + delta;
9183 in = in + delta;
9184 }
9185 end = ctxt->input->end;
9186 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9187 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9188 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9189 "AttValue length too long\n");
9190 return(NULL);
9191 }
9192 }
9193 }
9194 last = in;
9195 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9196 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9197 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9198 "AttValue length too long\n");
9199 return(NULL);
9200 }
9201 if (*in != limit) goto need_complex;
9202 }
9203 in++;
9204 col++;
9205 if (len != NULL) {
9206 *len = last - start;
9207 ret = (xmlChar *) start;
9208 } else {
9209 if (alloc) *alloc = 1;
9210 ret = xmlStrndup(start, last - start);
9211 }
9212 CUR_PTR = in;
9213 ctxt->input->line = line;
9214 ctxt->input->col = col;
9215 if (alloc) *alloc = 0;
9216 return ret;
9217need_complex:
9218 if (alloc) *alloc = 1;
9219 return xmlParseAttValueComplex(ctxt, len, normalize);
9220}
9221
9222/**
9223 * xmlParseAttribute2:
9224 * @ctxt: an XML parser context
9225 * @pref: the element prefix
9226 * @elem: the element name
9227 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9228 * @value: a xmlChar ** used to store the value of the attribute
9229 * @len: an int * to save the length of the attribute
9230 * @alloc: an int * to indicate if the attribute was allocated
9231 *
9232 * parse an attribute in the new SAX2 framework.
9233 *
9234 * Returns the attribute name, and the value in *value, .
9235 */
9236
9237static const xmlChar *
9238xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9239 const xmlChar * pref, const xmlChar * elem,
9240 const xmlChar ** prefix, xmlChar ** value,
9241 int *len, int *alloc)
9242{
9243 const xmlChar *name;
9244 xmlChar *val, *internal_val = NULL;
9245 int normalize = 0;
9246
9247 *value = NULL;
9248 GROW;
9249 name = xmlParseQName(ctxt, prefix);
9250 if (name == NULL) {
9251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9252 "error parsing attribute name\n");
9253 return (NULL);
9254 }
9255
9256 /*
9257 * get the type if needed
9258 */
9259 if (ctxt->attsSpecial != NULL) {
9260 int type;
9261
9262 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9263 pref, elem, *prefix, name);
9264 if (type != 0)
9265 normalize = 1;
9266 }
9267
9268 /*
9269 * read the value
9270 */
9271 SKIP_BLANKS;
9272 if (RAW == '=') {
9273 NEXT;
9274 SKIP_BLANKS;
9275 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9276 if (normalize) {
9277 /*
9278 * Sometimes a second normalisation pass for spaces is needed
9279 * but that only happens if charrefs or entities refernces
9280 * have been used in the attribute value, i.e. the attribute
9281 * value have been extracted in an allocated string already.
9282 */
9283 if (*alloc) {
9284 const xmlChar *val2;
9285
9286 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9287 if ((val2 != NULL) && (val2 != val)) {
9288 xmlFree(val);
9289 val = (xmlChar *) val2;
9290 }
9291 }
9292 }
9293 ctxt->instate = XML_PARSER_CONTENT;
9294 } else {
9295 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9296 "Specification mandate value for attribute %s\n",
9297 name);
9298 return (NULL);
9299 }
9300
9301 if (*prefix == ctxt->str_xml) {
9302 /*
9303 * Check that xml:lang conforms to the specification
9304 * No more registered as an error, just generate a warning now
9305 * since this was deprecated in XML second edition
9306 */
9307 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9308 internal_val = xmlStrndup(val, *len);
9309 if (!xmlCheckLanguageID(internal_val)) {
9310 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9311 "Malformed value for xml:lang : %s\n",
9312 internal_val, NULL);
9313 }
9314 }
9315
9316 /*
9317 * Check that xml:space conforms to the specification
9318 */
9319 if (xmlStrEqual(name, BAD_CAST "space")) {
9320 internal_val = xmlStrndup(val, *len);
9321 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9322 *(ctxt->space) = 0;
9323 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9324 *(ctxt->space) = 1;
9325 else {
9326 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9327 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9328 internal_val, NULL);
9329 }
9330 }
9331 if (internal_val) {
9332 xmlFree(internal_val);
9333 }
9334 }
9335
9336 *value = val;
9337 return (name);
9338}
9339/**
9340 * xmlParseStartTag2:
9341 * @ctxt: an XML parser context
9342 *
9343 * parse a start of tag either for rule element or
9344 * EmptyElement. In both case we don't parse the tag closing chars.
9345 * This routine is called when running SAX2 parsing
9346 *
9347 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9348 *
9349 * [ WFC: Unique Att Spec ]
9350 * No attribute name may appear more than once in the same start-tag or
9351 * empty-element tag.
9352 *
9353 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9354 *
9355 * [ WFC: Unique Att Spec ]
9356 * No attribute name may appear more than once in the same start-tag or
9357 * empty-element tag.
9358 *
9359 * With namespace:
9360 *
9361 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9362 *
9363 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9364 *
9365 * Returns the element name parsed
9366 */
9367
9368static const xmlChar *
9369xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9370 const xmlChar **URI, int *tlen) {
9371 const xmlChar *localname;
9372 const xmlChar *prefix;
9373 const xmlChar *attname;
9374 const xmlChar *aprefix;
9375 const xmlChar *nsname;
9376 xmlChar *attvalue;
9377 const xmlChar **atts = ctxt->atts;
9378 int maxatts = ctxt->maxatts;
9379 int nratts, nbatts, nbdef;
9380 int i, j, nbNs, attval, oldline, oldcol, inputNr;
9381 const xmlChar *base;
9382 unsigned long cur;
9383 int nsNr = ctxt->nsNr;
9384
9385 if (RAW != '<') return(NULL);
9386 NEXT1;
9387
9388 /*
9389 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9390 * point since the attribute values may be stored as pointers to
9391 * the buffer and calling SHRINK would destroy them !
9392 * The Shrinking is only possible once the full set of attribute
9393 * callbacks have been done.
9394 */
9395reparse:
9396 SHRINK;
9397 base = ctxt->input->base;
9398 cur = ctxt->input->cur - ctxt->input->base;
9399 inputNr = ctxt->inputNr;
9400 oldline = ctxt->input->line;
9401 oldcol = ctxt->input->col;
9402 nbatts = 0;
9403 nratts = 0;
9404 nbdef = 0;
9405 nbNs = 0;
9406 attval = 0;
9407 /* Forget any namespaces added during an earlier parse of this element. */
9408 ctxt->nsNr = nsNr;
9409
9410 localname = xmlParseQName(ctxt, &prefix);
9411 if (localname == NULL) {
9412 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9413 "StartTag: invalid element name\n");
9414 return(NULL);
9415 }
9416 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9417
9418 /*
9419 * Now parse the attributes, it ends up with the ending
9420 *
9421 * (S Attribute)* S?
9422 */
9423 SKIP_BLANKS;
9424 GROW;
9425 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9426 goto base_changed;
9427
9428 while (((RAW != '>') &&
9429 ((RAW != '/') || (NXT(1) != '>')) &&
9430 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9431 const xmlChar *q = CUR_PTR;
9432 unsigned int cons = ctxt->input->consumed;
9433 int len = -1, alloc = 0;
9434
9435 attname = xmlParseAttribute2(ctxt, prefix, localname,
9436 &aprefix, &attvalue, &len, &alloc);
9437 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
9438 if ((attvalue != NULL) && (alloc != 0))
9439 xmlFree(attvalue);
9440 attvalue = NULL;
9441 goto base_changed;
9442 }
9443 if ((attname != NULL) && (attvalue != NULL)) {
9444 if (len < 0) len = xmlStrlen(attvalue);
9445 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9446 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9447 xmlURIPtr uri;
9448
9449 if (URL == NULL) {
9450 xmlErrMemory(ctxt, "dictionary allocation failure");
9451 if ((attvalue != NULL) && (alloc != 0))
9452 xmlFree(attvalue);
9453 return(NULL);
9454 }
9455 if (*URL != 0) {
9456 uri = xmlParseURI((const char *) URL);
9457 if (uri == NULL) {
9458 xmlNsErr(ctxt, XML_WAR_NS_URI,
9459 "xmlns: '%s' is not a valid URI\n",
9460 URL, NULL, NULL);
9461 } else {
9462 if (uri->scheme == NULL) {
9463 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9464 "xmlns: URI %s is not absolute\n",
9465 URL, NULL, NULL);
9466 }
9467 xmlFreeURI(uri);
9468 }
9469 if (URL == ctxt->str_xml_ns) {
9470 if (attname != ctxt->str_xml) {
9471 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9472 "xml namespace URI cannot be the default namespace\n",
9473 NULL, NULL, NULL);
9474 }
9475 goto skip_default_ns;
9476 }
9477 if ((len == 29) &&
9478 (xmlStrEqual(URL,
9479 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9480 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9481 "reuse of the xmlns namespace name is forbidden\n",
9482 NULL, NULL, NULL);
9483 goto skip_default_ns;
9484 }
9485 }
9486 /*
9487 * check that it's not a defined namespace
9488 */
9489 for (j = 1;j <= nbNs;j++)
9490 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9491 break;
9492 if (j <= nbNs)
9493 xmlErrAttributeDup(ctxt, NULL, attname);
9494 else
9495 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9496skip_default_ns:
9497 if ((attvalue != NULL) && (alloc != 0)) {
9498 xmlFree(attvalue);
9499 attvalue = NULL;
9500 }
9501 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9502 break;
9503 if (!IS_BLANK_CH(RAW)) {
9504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9505 "attributes construct error\n");
9506 break;
9507 }
9508 SKIP_BLANKS;
9509 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9510 goto base_changed;
9511 continue;
9512 }
9513 if (aprefix == ctxt->str_xmlns) {
9514 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9515 xmlURIPtr uri;
9516
9517 if (attname == ctxt->str_xml) {
9518 if (URL != ctxt->str_xml_ns) {
9519 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9520 "xml namespace prefix mapped to wrong URI\n",
9521 NULL, NULL, NULL);
9522 }
9523 /*
9524 * Do not keep a namespace definition node
9525 */
9526 goto skip_ns;
9527 }
9528 if (URL == ctxt->str_xml_ns) {
9529 if (attname != ctxt->str_xml) {
9530 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9531 "xml namespace URI mapped to wrong prefix\n",
9532 NULL, NULL, NULL);
9533 }
9534 goto skip_ns;
9535 }
9536 if (attname == ctxt->str_xmlns) {
9537 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9538 "redefinition of the xmlns prefix is forbidden\n",
9539 NULL, NULL, NULL);
9540 goto skip_ns;
9541 }
9542 if ((len == 29) &&
9543 (xmlStrEqual(URL,
9544 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 "reuse of the xmlns namespace name is forbidden\n",
9547 NULL, NULL, NULL);
9548 goto skip_ns;
9549 }
9550 if ((URL == NULL) || (URL[0] == 0)) {
9551 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9552 "xmlns:%s: Empty XML namespace is not allowed\n",
9553 attname, NULL, NULL);
9554 goto skip_ns;
9555 } else {
9556 uri = xmlParseURI((const char *) URL);
9557 if (uri == NULL) {
9558 xmlNsErr(ctxt, XML_WAR_NS_URI,
9559 "xmlns:%s: '%s' is not a valid URI\n",
9560 attname, URL, NULL);
9561 } else {
9562 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9563 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9564 "xmlns:%s: URI %s is not absolute\n",
9565 attname, URL, NULL);
9566 }
9567 xmlFreeURI(uri);
9568 }
9569 }
9570
9571 /*
9572 * check that it's not a defined namespace
9573 */
9574 for (j = 1;j <= nbNs;j++)
9575 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9576 break;
9577 if (j <= nbNs)
9578 xmlErrAttributeDup(ctxt, aprefix, attname);
9579 else
9580 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9581skip_ns:
9582 if ((attvalue != NULL) && (alloc != 0)) {
9583 xmlFree(attvalue);
9584 attvalue = NULL;
9585 }
9586 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9587 break;
9588 if (!IS_BLANK_CH(RAW)) {
9589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9590 "attributes construct error\n");
9591 break;
9592 }
9593 SKIP_BLANKS;
9594 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9595 goto base_changed;
9596 continue;
9597 }
9598
9599 /*
9600 * Add the pair to atts
9601 */
9602 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9603 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9604 if (attvalue[len] == 0)
9605 xmlFree(attvalue);
9606 goto failed;
9607 }
9608 maxatts = ctxt->maxatts;
9609 atts = ctxt->atts;
9610 }
9611 ctxt->attallocs[nratts++] = alloc;
9612 atts[nbatts++] = attname;
9613 atts[nbatts++] = aprefix;
9614 atts[nbatts++] = NULL; /* the URI will be fetched later */
9615 atts[nbatts++] = attvalue;
9616 attvalue += len;
9617 atts[nbatts++] = attvalue;
9618 /*
9619 * tag if some deallocation is needed
9620 */
9621 if (alloc != 0) attval = 1;
9622 } else {
9623 if ((attvalue != NULL) && (attvalue[len] == 0))
9624 xmlFree(attvalue);
9625 }
9626
9627failed:
9628
9629 GROW
9630 if (ctxt->instate == XML_PARSER_EOF)
9631 break;
9632 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9633 goto base_changed;
9634 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9635 break;
9636 if (!IS_BLANK_CH(RAW)) {
9637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9638 "attributes construct error\n");
9639 break;
9640 }
9641 SKIP_BLANKS;
9642 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9643 (attname == NULL) && (attvalue == NULL)) {
9644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9645 "xmlParseStartTag: problem parsing attributes\n");
9646 break;
9647 }
9648 GROW;
9649 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9650 goto base_changed;
9651 }
9652
9653 /*
9654 * The attributes defaulting
9655 */
9656 if (ctxt->attsDefault != NULL) {
9657 xmlDefAttrsPtr defaults;
9658
9659 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9660 if (defaults != NULL) {
9661 for (i = 0;i < defaults->nbAttrs;i++) {
9662 attname = defaults->values[5 * i];
9663 aprefix = defaults->values[5 * i + 1];
9664
9665 /*
9666 * special work for namespaces defaulted defs
9667 */
9668 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9669 /*
9670 * check that it's not a defined namespace
9671 */
9672 for (j = 1;j <= nbNs;j++)
9673 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9674 break;
9675 if (j <= nbNs) continue;
9676
9677 nsname = xmlGetNamespace(ctxt, NULL);
9678 if (nsname != defaults->values[5 * i + 2]) {
9679 if (nsPush(ctxt, NULL,
9680 defaults->values[5 * i + 2]) > 0)
9681 nbNs++;
9682 }
9683 } else if (aprefix == ctxt->str_xmlns) {
9684 /*
9685 * check that it's not a defined namespace
9686 */
9687 for (j = 1;j <= nbNs;j++)
9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9689 break;
9690 if (j <= nbNs) continue;
9691
9692 nsname = xmlGetNamespace(ctxt, attname);
9693 if (nsname != defaults->values[2]) {
9694 if (nsPush(ctxt, attname,
9695 defaults->values[5 * i + 2]) > 0)
9696 nbNs++;
9697 }
9698 } else {
9699 /*
9700 * check that it's not a defined attribute
9701 */
9702 for (j = 0;j < nbatts;j+=5) {
9703 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9704 break;
9705 }
9706 if (j < nbatts) continue;
9707
9708 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9709 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9710 return(NULL);
9711 }
9712 maxatts = ctxt->maxatts;
9713 atts = ctxt->atts;
9714 }
9715 atts[nbatts++] = attname;
9716 atts[nbatts++] = aprefix;
9717 if (aprefix == NULL)
9718 atts[nbatts++] = NULL;
9719 else
9720 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9721 atts[nbatts++] = defaults->values[5 * i + 2];
9722 atts[nbatts++] = defaults->values[5 * i + 3];
9723 if ((ctxt->standalone == 1) &&
9724 (defaults->values[5 * i + 4] != NULL)) {
9725 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9726 "standalone: attribute %s on %s defaulted from external subset\n",
9727 attname, localname);
9728 }
9729 nbdef++;
9730 }
9731 }
9732 }
9733 }
9734
9735 /*
9736 * The attributes checkings
9737 */
9738 for (i = 0; i < nbatts;i += 5) {
9739 /*
9740 * The default namespace does not apply to attribute names.
9741 */
9742 if (atts[i + 1] != NULL) {
9743 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9744 if (nsname == NULL) {
9745 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9746 "Namespace prefix %s for %s on %s is not defined\n",
9747 atts[i + 1], atts[i], localname);
9748 }
9749 atts[i + 2] = nsname;
9750 } else
9751 nsname = NULL;
9752 /*
9753 * [ WFC: Unique Att Spec ]
9754 * No attribute name may appear more than once in the same
9755 * start-tag or empty-element tag.
9756 * As extended by the Namespace in XML REC.
9757 */
9758 for (j = 0; j < i;j += 5) {
9759 if (atts[i] == atts[j]) {
9760 if (atts[i+1] == atts[j+1]) {
9761 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9762 break;
9763 }
9764 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9765 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9766 "Namespaced Attribute %s in '%s' redefined\n",
9767 atts[i], nsname, NULL);
9768 break;
9769 }
9770 }
9771 }
9772 }
9773
9774 nsname = xmlGetNamespace(ctxt, prefix);
9775 if ((prefix != NULL) && (nsname == NULL)) {
9776 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9777 "Namespace prefix %s on %s is not defined\n",
9778 prefix, localname, NULL);
9779 }
9780 *pref = prefix;
9781 *URI = nsname;
9782
9783 /*
9784 * SAX: Start of Element !
9785 */
9786 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9787 (!ctxt->disableSAX)) {
9788 if (nbNs > 0)
9789 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9790 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9791 nbatts / 5, nbdef, atts);
9792 else
9793 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9794 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9795 }
9796
9797 /*
9798 * Free up attribute allocated strings if needed
9799 */
9800 if (attval != 0) {
9801 for (i = 3,j = 0; j < nratts;i += 5,j++)
9802 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9803 xmlFree((xmlChar *) atts[i]);
9804 }
9805
9806 return(localname);
9807
9808base_changed:
9809 /*
9810 * the attribute strings are valid iif the base didn't changed
9811 */
9812 if (attval != 0) {
9813 for (i = 3,j = 0; j < nratts;i += 5,j++)
9814 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9815 xmlFree((xmlChar *) atts[i]);
9816 }
9817
9818 /*
9819 * We can't switch from one entity to another in the middle
9820 * of a start tag
9821 */
9822 if (inputNr != ctxt->inputNr) {
9823 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9824 "Start tag doesn't start and stop in the same entity\n");
9825 return(NULL);
9826 }
9827
9828 ctxt->input->cur = ctxt->input->base + cur;
9829 ctxt->input->line = oldline;
9830 ctxt->input->col = oldcol;
9831 if (ctxt->wellFormed == 1) {
9832 goto reparse;
9833 }
9834 return(NULL);
9835}
9836
9837/**
9838 * xmlParseEndTag2:
9839 * @ctxt: an XML parser context
9840 * @line: line of the start tag
9841 * @nsNr: number of namespaces on the start tag
9842 *
9843 * parse an end of tag
9844 *
9845 * [42] ETag ::= '</' Name S? '>'
9846 *
9847 * With namespace
9848 *
9849 * [NS 9] ETag ::= '</' QName S? '>'
9850 */
9851
9852static void
9853xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9854 const xmlChar *URI, int line, int nsNr, int tlen) {
9855 const xmlChar *name;
9856 size_t curLength;
9857
9858 GROW;
9859 if ((RAW != '<') || (NXT(1) != '/')) {
9860 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9861 return;
9862 }
9863 SKIP(2);
9864
9865 curLength = ctxt->input->end - ctxt->input->cur;
9866 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9867 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9868 if ((curLength >= (size_t)(tlen + 1)) &&
9869 (ctxt->input->cur[tlen] == '>')) {
9870 ctxt->input->cur += tlen + 1;
9871 ctxt->input->col += tlen + 1;
9872 goto done;
9873 }
9874 ctxt->input->cur += tlen;
9875 ctxt->input->col += tlen;
9876 name = (xmlChar*)1;
9877 } else {
9878 if (prefix == NULL)
9879 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9880 else
9881 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9882 }
9883
9884 /*
9885 * We should definitely be at the ending "S? '>'" part
9886 */
9887 GROW;
9888 if (ctxt->instate == XML_PARSER_EOF)
9889 return;
9890 SKIP_BLANKS;
9891 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9892 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9893 } else
9894 NEXT1;
9895
9896 /*
9897 * [ WFC: Element Type Match ]
9898 * The Name in an element's end-tag must match the element type in the
9899 * start-tag.
9900 *
9901 */
9902 if (name != (xmlChar*)1) {
9903 if (name == NULL) name = BAD_CAST "unparseable";
9904 if ((line == 0) && (ctxt->node != NULL))
9905 line = ctxt->node->line;
9906 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9907 "Opening and ending tag mismatch: %s line %d and %s\n",
9908 ctxt->name, line, name);
9909 }
9910
9911 /*
9912 * SAX: End of Tag
9913 */
9914done:
9915 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9916 (!ctxt->disableSAX))
9917 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9918
9919 spacePop(ctxt);
9920 if (nsNr != 0)
9921 nsPop(ctxt, nsNr);
9922 return;
9923}
9924
9925/**
9926 * xmlParseCDSect:
9927 * @ctxt: an XML parser context
9928 *
9929 * Parse escaped pure raw content.
9930 *
9931 * [18] CDSect ::= CDStart CData CDEnd
9932 *
9933 * [19] CDStart ::= '<![CDATA['
9934 *
9935 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9936 *
9937 * [21] CDEnd ::= ']]>'
9938 */
9939void
9940xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9941 xmlChar *buf = NULL;
9942 int len = 0;
9943 int size = XML_PARSER_BUFFER_SIZE;
9944 int r, rl;
9945 int s, sl;
9946 int cur, l;
9947 int count = 0;
9948
9949 /* Check 2.6.0 was NXT(0) not RAW */
9950 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9951 SKIP(9);
9952 } else
9953 return;
9954
9955 ctxt->instate = XML_PARSER_CDATA_SECTION;
9956 r = CUR_CHAR(rl);
9957 if (!IS_CHAR(r)) {
9958 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9959 ctxt->instate = XML_PARSER_CONTENT;
9960 return;
9961 }
9962 NEXTL(rl);
9963 s = CUR_CHAR(sl);
9964 if (!IS_CHAR(s)) {
9965 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9966 ctxt->instate = XML_PARSER_CONTENT;
9967 return;
9968 }
9969 NEXTL(sl);
9970 cur = CUR_CHAR(l);
9971 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9972 if (buf == NULL) {
9973 xmlErrMemory(ctxt, NULL);
9974 return;
9975 }
9976 while (IS_CHAR(cur) &&
9977 ((r != ']') || (s != ']') || (cur != '>'))) {
9978 if (len + 5 >= size) {
9979 xmlChar *tmp;
9980
9981 if ((size > XML_MAX_TEXT_LENGTH) &&
9982 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9983 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9984 "CData section too big found", NULL);
9985 xmlFree (buf);
9986 return;
9987 }
9988 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9989 if (tmp == NULL) {
9990 xmlFree(buf);
9991 xmlErrMemory(ctxt, NULL);
9992 return;
9993 }
9994 buf = tmp;
9995 size *= 2;
9996 }
9997 COPY_BUF(rl,buf,len,r);
9998 r = s;
9999 rl = sl;
10000 s = cur;
10001 sl = l;
10002 count++;
10003 if (count > 50) {
10004 GROW;
10005 if (ctxt->instate == XML_PARSER_EOF) {
10006 xmlFree(buf);
10007 return;
10008 }
10009 count = 0;
10010 }
10011 NEXTL(l);
10012 cur = CUR_CHAR(l);
10013 }
10014 buf[len] = 0;
10015 ctxt->instate = XML_PARSER_CONTENT;
10016 if (cur != '>') {
10017 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10018 "CData section not finished\n%.50s\n", buf);
10019 xmlFree(buf);
10020 return;
10021 }
10022 NEXTL(l);
10023
10024 /*
10025 * OK the buffer is to be consumed as cdata.
10026 */
10027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10028 if (ctxt->sax->cdataBlock != NULL)
10029 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10030 else if (ctxt->sax->characters != NULL)
10031 ctxt->sax->characters(ctxt->userData, buf, len);
10032 }
10033 xmlFree(buf);
10034}
10035
10036/**
10037 * xmlParseContent:
10038 * @ctxt: an XML parser context
10039 *
10040 * Parse a content:
10041 *
10042 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10043 */
10044
10045void
10046xmlParseContent(xmlParserCtxtPtr ctxt) {
10047 GROW;
10048 while ((RAW != 0) &&
10049 ((RAW != '<') || (NXT(1) != '/')) &&
10050 (ctxt->instate != XML_PARSER_EOF)) {
10051 const xmlChar *test = CUR_PTR;
10052 unsigned int cons = ctxt->input->consumed;
10053 const xmlChar *cur = ctxt->input->cur;
10054
10055 /*
10056 * First case : a Processing Instruction.
10057 */
10058 if ((*cur == '<') && (cur[1] == '?')) {
10059 xmlParsePI(ctxt);
10060 }
10061
10062 /*
10063 * Second case : a CDSection
10064 */
10065 /* 2.6.0 test was *cur not RAW */
10066 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10067 xmlParseCDSect(ctxt);
10068 }
10069
10070 /*
10071 * Third case : a comment
10072 */
10073 else if ((*cur == '<') && (NXT(1) == '!') &&
10074 (NXT(2) == '-') && (NXT(3) == '-')) {
10075 xmlParseComment(ctxt);
10076 ctxt->instate = XML_PARSER_CONTENT;
10077 }
10078
10079 /*
10080 * Fourth case : a sub-element.
10081 */
10082 else if (*cur == '<') {
10083 xmlParseElement(ctxt);
10084 }
10085
10086 /*
10087 * Fifth case : a reference. If if has not been resolved,
10088 * parsing returns it's Name, create the node
10089 */
10090
10091 else if (*cur == '&') {
10092 xmlParseReference(ctxt);
10093 }
10094
10095 /*
10096 * Last case, text. Note that References are handled directly.
10097 */
10098 else {
10099 xmlParseCharData(ctxt, 0);
10100 }
10101
10102 GROW;
10103 /*
10104 * Pop-up of finished entities.
10105 */
10106 while ((RAW == 0) && (ctxt->inputNr > 1))
10107 xmlPopInput(ctxt);
10108 SHRINK;
10109
10110 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10112 "detected an error in element content\n");
10113 xmlHaltParser(ctxt);
10114 break;
10115 }
10116 }
10117}
10118
10119/**
10120 * xmlParseElement:
10121 * @ctxt: an XML parser context
10122 *
10123 * parse an XML element, this is highly recursive
10124 *
10125 * [39] element ::= EmptyElemTag | STag content ETag
10126 *
10127 * [ WFC: Element Type Match ]
10128 * The Name in an element's end-tag must match the element type in the
10129 * start-tag.
10130 *
10131 */
10132
10133void
10134xmlParseElement(xmlParserCtxtPtr ctxt) {
10135 const xmlChar *name;
10136 const xmlChar *prefix = NULL;
10137 const xmlChar *URI = NULL;
10138 xmlParserNodeInfo node_info;
10139 int line, tlen = 0;
10140 xmlNodePtr ret;
10141 int nsNr = ctxt->nsNr;
10142
10143 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10144 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10145 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10146 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10147 xmlParserMaxDepth);
10148 xmlHaltParser(ctxt);
10149 return;
10150 }
10151
10152 /* Capture start position */
10153 if (ctxt->record_info) {
10154 node_info.begin_pos = ctxt->input->consumed +
10155 (CUR_PTR - ctxt->input->base);
10156 node_info.begin_line = ctxt->input->line;
10157 }
10158
10159 if (ctxt->spaceNr == 0)
10160 spacePush(ctxt, -1);
10161 else if (*ctxt->space == -2)
10162 spacePush(ctxt, -1);
10163 else
10164 spacePush(ctxt, *ctxt->space);
10165
10166 line = ctxt->input->line;
10167#ifdef LIBXML_SAX1_ENABLED
10168 if (ctxt->sax2)
10169#endif /* LIBXML_SAX1_ENABLED */
10170 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10171#ifdef LIBXML_SAX1_ENABLED
10172 else
10173 name = xmlParseStartTag(ctxt);
10174#endif /* LIBXML_SAX1_ENABLED */
10175 if (ctxt->instate == XML_PARSER_EOF)
10176 return;
10177 if (name == NULL) {
10178 spacePop(ctxt);
10179 return;
10180 }
10181 namePush(ctxt, name);
10182 ret = ctxt->node;
10183
10184#ifdef LIBXML_VALID_ENABLED
10185 /*
10186 * [ VC: Root Element Type ]
10187 * The Name in the document type declaration must match the element
10188 * type of the root element.
10189 */
10190 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10191 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10192 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10193#endif /* LIBXML_VALID_ENABLED */
10194
10195 /*
10196 * Check for an Empty Element.
10197 */
10198 if ((RAW == '/') && (NXT(1) == '>')) {
10199 SKIP(2);
10200 if (ctxt->sax2) {
10201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10202 (!ctxt->disableSAX))
10203 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10204#ifdef LIBXML_SAX1_ENABLED
10205 } else {
10206 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10207 (!ctxt->disableSAX))
10208 ctxt->sax->endElement(ctxt->userData, name);
10209#endif /* LIBXML_SAX1_ENABLED */
10210 }
10211 namePop(ctxt);
10212 spacePop(ctxt);
10213 if (nsNr != ctxt->nsNr)
10214 nsPop(ctxt, ctxt->nsNr - nsNr);
10215 if ( ret != NULL && ctxt->record_info ) {
10216 node_info.end_pos = ctxt->input->consumed +
10217 (CUR_PTR - ctxt->input->base);
10218 node_info.end_line = ctxt->input->line;
10219 node_info.node = ret;
10220 xmlParserAddNodeInfo(ctxt, &node_info);
10221 }
10222 return;
10223 }
10224 if (RAW == '>') {
10225 NEXT1;
10226 } else {
10227 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10228 "Couldn't find end of Start Tag %s line %d\n",
10229 name, line, NULL);
10230
10231 /*
10232 * end of parsing of this node.
10233 */
10234 nodePop(ctxt);
10235 namePop(ctxt);
10236 spacePop(ctxt);
10237 if (nsNr != ctxt->nsNr)
10238 nsPop(ctxt, ctxt->nsNr - nsNr);
10239
10240 /*
10241 * Capture end position and add node
10242 */
10243 if ( ret != NULL && ctxt->record_info ) {
10244 node_info.end_pos = ctxt->input->consumed +
10245 (CUR_PTR - ctxt->input->base);
10246 node_info.end_line = ctxt->input->line;
10247 node_info.node = ret;
10248 xmlParserAddNodeInfo(ctxt, &node_info);
10249 }
10250 return;
10251 }
10252
10253 /*
10254 * Parse the content of the element:
10255 */
10256 xmlParseContent(ctxt);
10257 if (ctxt->instate == XML_PARSER_EOF)
10258 return;
10259 if (!IS_BYTE_CHAR(RAW)) {
10260 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10261 "Premature end of data in tag %s line %d\n",
10262 name, line, NULL);
10263
10264 /*
10265 * end of parsing of this node.
10266 */
10267 nodePop(ctxt);
10268 namePop(ctxt);
10269 spacePop(ctxt);
10270 if (nsNr != ctxt->nsNr)
10271 nsPop(ctxt, ctxt->nsNr - nsNr);
10272 return;
10273 }
10274
10275 /*
10276 * parse the end of tag: '</' should be here.
10277 */
10278 if (ctxt->sax2) {
10279 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10280 namePop(ctxt);
10281 }
10282#ifdef LIBXML_SAX1_ENABLED
10283 else
10284 xmlParseEndTag1(ctxt, line);
10285#endif /* LIBXML_SAX1_ENABLED */
10286
10287 /*
10288 * Capture end position and add node
10289 */
10290 if ( ret != NULL && ctxt->record_info ) {
10291 node_info.end_pos = ctxt->input->consumed +
10292 (CUR_PTR - ctxt->input->base);
10293 node_info.end_line = ctxt->input->line;
10294 node_info.node = ret;
10295 xmlParserAddNodeInfo(ctxt, &node_info);
10296 }
10297}
10298
10299/**
10300 * xmlParseVersionNum:
10301 * @ctxt: an XML parser context
10302 *
10303 * parse the XML version value.
10304 *
10305 * [26] VersionNum ::= '1.' [0-9]+
10306 *
10307 * In practice allow [0-9].[0-9]+ at that level
10308 *
10309 * Returns the string giving the XML version number, or NULL
10310 */
10311xmlChar *
10312xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10313 xmlChar *buf = NULL;
10314 int len = 0;
10315 int size = 10;
10316 xmlChar cur;
10317
10318 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10319 if (buf == NULL) {
10320 xmlErrMemory(ctxt, NULL);
10321 return(NULL);
10322 }
10323 cur = CUR;
10324 if (!((cur >= '0') && (cur <= '9'))) {
10325 xmlFree(buf);
10326 return(NULL);
10327 }
10328 buf[len++] = cur;
10329 NEXT;
10330 cur=CUR;
10331 if (cur != '.') {
10332 xmlFree(buf);
10333 return(NULL);
10334 }
10335 buf[len++] = cur;
10336 NEXT;
10337 cur=CUR;
10338 while ((cur >= '0') && (cur <= '9')) {
10339 if (len + 1 >= size) {
10340 xmlChar *tmp;
10341
10342 size *= 2;
10343 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10344 if (tmp == NULL) {
10345 xmlFree(buf);
10346 xmlErrMemory(ctxt, NULL);
10347 return(NULL);
10348 }
10349 buf = tmp;
10350 }
10351 buf[len++] = cur;
10352 NEXT;
10353 cur=CUR;
10354 }
10355 buf[len] = 0;
10356 return(buf);
10357}
10358
10359/**
10360 * xmlParseVersionInfo:
10361 * @ctxt: an XML parser context
10362 *
10363 * parse the XML version.
10364 *
10365 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10366 *
10367 * [25] Eq ::= S? '=' S?
10368 *
10369 * Returns the version string, e.g. "1.0"
10370 */
10371
10372xmlChar *
10373xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10374 xmlChar *version = NULL;
10375
10376 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10377 SKIP(7);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
10380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10381 return(NULL);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '"') {
10386 NEXT;
10387 version = xmlParseVersionNum(ctxt);
10388 if (RAW != '"') {
10389 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10390 } else
10391 NEXT;
10392 } else if (RAW == '\''){
10393 NEXT;
10394 version = xmlParseVersionNum(ctxt);
10395 if (RAW != '\'') {
10396 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10397 } else
10398 NEXT;
10399 } else {
10400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10401 }
10402 }
10403 return(version);
10404}
10405
10406/**
10407 * xmlParseEncName:
10408 * @ctxt: an XML parser context
10409 *
10410 * parse the XML encoding name
10411 *
10412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10413 *
10414 * Returns the encoding name value or NULL
10415 */
10416xmlChar *
10417xmlParseEncName(xmlParserCtxtPtr ctxt) {
10418 xmlChar *buf = NULL;
10419 int len = 0;
10420 int size = 10;
10421 xmlChar cur;
10422
10423 cur = CUR;
10424 if (((cur >= 'a') && (cur <= 'z')) ||
10425 ((cur >= 'A') && (cur <= 'Z'))) {
10426 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10427 if (buf == NULL) {
10428 xmlErrMemory(ctxt, NULL);
10429 return(NULL);
10430 }
10431
10432 buf[len++] = cur;
10433 NEXT;
10434 cur = CUR;
10435 while (((cur >= 'a') && (cur <= 'z')) ||
10436 ((cur >= 'A') && (cur <= 'Z')) ||
10437 ((cur >= '0') && (cur <= '9')) ||
10438 (cur == '.') || (cur == '_') ||
10439 (cur == '-')) {
10440 if (len + 1 >= size) {
10441 xmlChar *tmp;
10442
10443 size *= 2;
10444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10445 if (tmp == NULL) {
10446 xmlErrMemory(ctxt, NULL);
10447 xmlFree(buf);
10448 return(NULL);
10449 }
10450 buf = tmp;
10451 }
10452 buf[len++] = cur;
10453 NEXT;
10454 cur = CUR;
10455 if (cur == 0) {
10456 SHRINK;
10457 GROW;
10458 cur = CUR;
10459 }
10460 }
10461 buf[len] = 0;
10462 } else {
10463 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10464 }
10465 return(buf);
10466}
10467
10468/**
10469 * xmlParseEncodingDecl:
10470 * @ctxt: an XML parser context
10471 *
10472 * parse the XML encoding declaration
10473 *
10474 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10475 *
10476 * this setups the conversion filters.
10477 *
10478 * Returns the encoding value or NULL
10479 */
10480
10481const xmlChar *
10482xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10483 xmlChar *encoding = NULL;
10484
10485 SKIP_BLANKS;
10486 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10487 SKIP(8);
10488 SKIP_BLANKS;
10489 if (RAW != '=') {
10490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10491 return(NULL);
10492 }
10493 NEXT;
10494 SKIP_BLANKS;
10495 if (RAW == '"') {
10496 NEXT;
10497 encoding = xmlParseEncName(ctxt);
10498 if (RAW != '"') {
10499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10500 xmlFree((xmlChar *) encoding);
10501 return(NULL);
10502 } else
10503 NEXT;
10504 } else if (RAW == '\''){
10505 NEXT;
10506 encoding = xmlParseEncName(ctxt);
10507 if (RAW != '\'') {
10508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10509 xmlFree((xmlChar *) encoding);
10510 return(NULL);
10511 } else
10512 NEXT;
10513 } else {
10514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10515 }
10516
10517 /*
10518 * Non standard parsing, allowing the user to ignore encoding
10519 */
10520 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10521 xmlFree((xmlChar *) encoding);
10522 return(NULL);
10523 }
10524
10525 /*
10526 * UTF-16 encoding stwich has already taken place at this stage,
10527 * more over the little-endian/big-endian selection is already done
10528 */
10529 if ((encoding != NULL) &&
10530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10532 /*
10533 * If no encoding was passed to the parser, that we are
10534 * using UTF-16 and no decoder is present i.e. the
10535 * document is apparently UTF-8 compatible, then raise an
10536 * encoding mismatch fatal error
10537 */
10538 if ((ctxt->encoding == NULL) &&
10539 (ctxt->input->buf != NULL) &&
10540 (ctxt->input->buf->encoder == NULL)) {
10541 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10542 "Document labelled UTF-16 but has UTF-8 content\n");
10543 }
10544 if (ctxt->encoding != NULL)
10545 xmlFree((xmlChar *) ctxt->encoding);
10546 ctxt->encoding = encoding;
10547 }
10548 /*
10549 * UTF-8 encoding is handled natively
10550 */
10551 else if ((encoding != NULL) &&
10552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10554 if (ctxt->encoding != NULL)
10555 xmlFree((xmlChar *) ctxt->encoding);
10556 ctxt->encoding = encoding;
10557 }
10558 else if (encoding != NULL) {
10559 xmlCharEncodingHandlerPtr handler;
10560
10561 if (ctxt->input->encoding != NULL)
10562 xmlFree((xmlChar *) ctxt->input->encoding);
10563 ctxt->input->encoding = encoding;
10564
10565 handler = xmlFindCharEncodingHandler((const char *) encoding);
10566 if (handler != NULL) {
10567 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10568 /* failed to convert */
10569 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10570 return(NULL);
10571 }
10572 } else {
10573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10574 "Unsupported encoding %s\n", encoding);
10575 return(NULL);
10576 }
10577 }
10578 }
10579 return(encoding);
10580}
10581
10582/**
10583 * xmlParseSDDecl:
10584 * @ctxt: an XML parser context
10585 *
10586 * parse the XML standalone declaration
10587 *
10588 * [32] SDDecl ::= S 'standalone' Eq
10589 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10590 *
10591 * [ VC: Standalone Document Declaration ]
10592 * TODO The standalone document declaration must have the value "no"
10593 * if any external markup declarations contain declarations of:
10594 * - attributes with default values, if elements to which these
10595 * attributes apply appear in the document without specifications
10596 * of values for these attributes, or
10597 * - entities (other than amp, lt, gt, apos, quot), if references
10598 * to those entities appear in the document, or
10599 * - attributes with values subject to normalization, where the
10600 * attribute appears in the document with a value which will change
10601 * as a result of normalization, or
10602 * - element types with element content, if white space occurs directly
10603 * within any instance of those types.
10604 *
10605 * Returns:
10606 * 1 if standalone="yes"
10607 * 0 if standalone="no"
10608 * -2 if standalone attribute is missing or invalid
10609 * (A standalone value of -2 means that the XML declaration was found,
10610 * but no value was specified for the standalone attribute).
10611 */
10612
10613int
10614xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10615 int standalone = -2;
10616
10617 SKIP_BLANKS;
10618 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10619 SKIP(10);
10620 SKIP_BLANKS;
10621 if (RAW != '=') {
10622 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10623 return(standalone);
10624 }
10625 NEXT;
10626 SKIP_BLANKS;
10627 if (RAW == '\''){
10628 NEXT;
10629 if ((RAW == 'n') && (NXT(1) == 'o')) {
10630 standalone = 0;
10631 SKIP(2);
10632 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10633 (NXT(2) == 's')) {
10634 standalone = 1;
10635 SKIP(3);
10636 } else {
10637 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10638 }
10639 if (RAW != '\'') {
10640 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10641 } else
10642 NEXT;
10643 } else if (RAW == '"'){
10644 NEXT;
10645 if ((RAW == 'n') && (NXT(1) == 'o')) {
10646 standalone = 0;
10647 SKIP(2);
10648 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10649 (NXT(2) == 's')) {
10650 standalone = 1;
10651 SKIP(3);
10652 } else {
10653 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10654 }
10655 if (RAW != '"') {
10656 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10657 } else
10658 NEXT;
10659 } else {
10660 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10661 }
10662 }
10663 return(standalone);
10664}
10665
10666/**
10667 * xmlParseXMLDecl:
10668 * @ctxt: an XML parser context
10669 *
10670 * parse an XML declaration header
10671 *
10672 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10673 */
10674
10675void
10676xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10677 xmlChar *version;
10678
10679 /*
10680 * This value for standalone indicates that the document has an
10681 * XML declaration but it does not have a standalone attribute.
10682 * It will be overwritten later if a standalone attribute is found.
10683 */
10684 ctxt->input->standalone = -2;
10685
10686 /*
10687 * We know that '<?xml' is here.
10688 */
10689 SKIP(5);
10690
10691 if (!IS_BLANK_CH(RAW)) {
10692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10693 "Blank needed after '<?xml'\n");
10694 }
10695 SKIP_BLANKS;
10696
10697 /*
10698 * We must have the VersionInfo here.
10699 */
10700 version = xmlParseVersionInfo(ctxt);
10701 if (version == NULL) {
10702 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10703 } else {
10704 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10705 /*
10706 * Changed here for XML-1.0 5th edition
10707 */
10708 if (ctxt->options & XML_PARSE_OLD10) {
10709 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10710 "Unsupported version '%s'\n",
10711 version);
10712 } else {
10713 if ((version[0] == '1') && ((version[1] == '.'))) {
10714 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10715 "Unsupported version '%s'\n",
10716 version, NULL);
10717 } else {
10718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10719 "Unsupported version '%s'\n",
10720 version);
10721 }
10722 }
10723 }
10724 if (ctxt->version != NULL)
10725 xmlFree((void *) ctxt->version);
10726 ctxt->version = version;
10727 }
10728
10729 /*
10730 * We may have the encoding declaration
10731 */
10732 if (!IS_BLANK_CH(RAW)) {
10733 if ((RAW == '?') && (NXT(1) == '>')) {
10734 SKIP(2);
10735 return;
10736 }
10737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10738 }
10739 xmlParseEncodingDecl(ctxt);
10740 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10741 (ctxt->instate == XML_PARSER_EOF)) {
10742 /*
10743 * The XML REC instructs us to stop parsing right here
10744 */
10745 return;
10746 }
10747
10748 /*
10749 * We may have the standalone status.
10750 */
10751 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10752 if ((RAW == '?') && (NXT(1) == '>')) {
10753 SKIP(2);
10754 return;
10755 }
10756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10757 }
10758
10759 /*
10760 * We can grow the input buffer freely at that point
10761 */
10762 GROW;
10763
10764 SKIP_BLANKS;
10765 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10766
10767 SKIP_BLANKS;
10768 if ((RAW == '?') && (NXT(1) == '>')) {
10769 SKIP(2);
10770 } else if (RAW == '>') {
10771 /* Deprecated old WD ... */
10772 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10773 NEXT;
10774 } else {
10775 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10776 MOVETO_ENDTAG(CUR_PTR);
10777 NEXT;
10778 }
10779}
10780
10781/**
10782 * xmlParseMisc:
10783 * @ctxt: an XML parser context
10784 *
10785 * parse an XML Misc* optional field.
10786 *
10787 * [27] Misc ::= Comment | PI | S
10788 */
10789
10790void
10791xmlParseMisc(xmlParserCtxtPtr ctxt) {
10792 while ((ctxt->instate != XML_PARSER_EOF) &&
10793 (((RAW == '<') && (NXT(1) == '?')) ||
10794 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10795 IS_BLANK_CH(CUR))) {
10796 if ((RAW == '<') && (NXT(1) == '?')) {
10797 xmlParsePI(ctxt);
10798 } else if (IS_BLANK_CH(CUR)) {
10799 NEXT;
10800 } else
10801 xmlParseComment(ctxt);
10802 }
10803}
10804
10805/**
10806 * xmlParseDocument:
10807 * @ctxt: an XML parser context
10808 *
10809 * parse an XML document (and build a tree if using the standard SAX
10810 * interface).
10811 *
10812 * [1] document ::= prolog element Misc*
10813 *
10814 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10815 *
10816 * Returns 0, -1 in case of error. the parser context is augmented
10817 * as a result of the parsing.
10818 */
10819
10820int
10821xmlParseDocument(xmlParserCtxtPtr ctxt) {
10822 xmlChar start[4];
10823 xmlCharEncoding enc;
10824
10825 xmlInitParser();
10826
10827 if ((ctxt == NULL) || (ctxt->input == NULL))
10828 return(-1);
10829
10830 GROW;
10831
10832 /*
10833 * SAX: detecting the level.
10834 */
10835 xmlDetectSAX2(ctxt);
10836
10837 /*
10838 * SAX: beginning of the document processing.
10839 */
10840 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10841 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10842 if (ctxt->instate == XML_PARSER_EOF)
10843 return(-1);
10844
10845 if ((ctxt->encoding == NULL) &&
10846 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10847 /*
10848 * Get the 4 first bytes and decode the charset
10849 * if enc != XML_CHAR_ENCODING_NONE
10850 * plug some encoding conversion routines.
10851 */
10852 start[0] = RAW;
10853 start[1] = NXT(1);
10854 start[2] = NXT(2);
10855 start[3] = NXT(3);
10856 enc = xmlDetectCharEncoding(&start[0], 4);
10857 if (enc != XML_CHAR_ENCODING_NONE) {
10858 xmlSwitchEncoding(ctxt, enc);
10859 }
10860 }
10861
10862
10863 if (CUR == 0) {
10864 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10865 return(-1);
10866 }
10867
10868 /*
10869 * Check for the XMLDecl in the Prolog.
10870 * do not GROW here to avoid the detected encoder to decode more
10871 * than just the first line, unless the amount of data is really
10872 * too small to hold "<?xml version="1.0" encoding="foo"
10873 */
10874 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10875 GROW;
10876 }
10877 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10878
10879 /*
10880 * Note that we will switch encoding on the fly.
10881 */
10882 xmlParseXMLDecl(ctxt);
10883 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10884 (ctxt->instate == XML_PARSER_EOF)) {
10885 /*
10886 * The XML REC instructs us to stop parsing right here
10887 */
10888 return(-1);
10889 }
10890 ctxt->standalone = ctxt->input->standalone;
10891 SKIP_BLANKS;
10892 } else {
10893 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10894 }
10895 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10896 ctxt->sax->startDocument(ctxt->userData);
10897 if (ctxt->instate == XML_PARSER_EOF)
10898 return(-1);
10899 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10900 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10901 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10902 }
10903
10904 /*
10905 * The Misc part of the Prolog
10906 */
10907 GROW;
10908 xmlParseMisc(ctxt);
10909
10910 /*
10911 * Then possibly doc type declaration(s) and more Misc
10912 * (doctypedecl Misc*)?
10913 */
10914 GROW;
10915 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10916
10917 ctxt->inSubset = 1;
10918 xmlParseDocTypeDecl(ctxt);
10919 if (RAW == '[') {
10920 ctxt->instate = XML_PARSER_DTD;
10921 xmlParseInternalSubset(ctxt);
10922 if (ctxt->instate == XML_PARSER_EOF)
10923 return(-1);
10924 }
10925
10926 /*
10927 * Create and update the external subset.
10928 */
10929 ctxt->inSubset = 2;
10930 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10931 (!ctxt->disableSAX))
10932 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10933 ctxt->extSubSystem, ctxt->extSubURI);
10934 if (ctxt->instate == XML_PARSER_EOF)
10935 return(-1);
10936 ctxt->inSubset = 0;
10937
10938 xmlCleanSpecialAttr(ctxt);
10939
10940 ctxt->instate = XML_PARSER_PROLOG;
10941 xmlParseMisc(ctxt);
10942 }
10943
10944 /*
10945 * Time to start parsing the tree itself
10946 */
10947 GROW;
10948 if (RAW != '<') {
10949 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950 "Start tag expected, '<' not found\n");
10951 } else {
10952 ctxt->instate = XML_PARSER_CONTENT;
10953 xmlParseElement(ctxt);
10954 ctxt->instate = XML_PARSER_EPILOG;
10955
10956
10957 /*
10958 * The Misc part at the end
10959 */
10960 xmlParseMisc(ctxt);
10961
10962 if (RAW != 0) {
10963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10964 }
10965 ctxt->instate = XML_PARSER_EOF;
10966 }
10967
10968 /*
10969 * SAX: end of the document processing.
10970 */
10971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10972 ctxt->sax->endDocument(ctxt->userData);
10973
10974 /*
10975 * Remove locally kept entity definitions if the tree was not built
10976 */
10977 if ((ctxt->myDoc != NULL) &&
10978 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10979 xmlFreeDoc(ctxt->myDoc);
10980 ctxt->myDoc = NULL;
10981 }
10982
10983 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10984 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10985 if (ctxt->valid)
10986 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10987 if (ctxt->nsWellFormed)
10988 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10989 if (ctxt->options & XML_PARSE_OLD10)
10990 ctxt->myDoc->properties |= XML_DOC_OLD10;
10991 }
10992 if (! ctxt->wellFormed) {
10993 ctxt->valid = 0;
10994 return(-1);
10995 }
10996 return(0);
10997}
10998
10999/**
11000 * xmlParseExtParsedEnt:
11001 * @ctxt: an XML parser context
11002 *
11003 * parse a general parsed entity
11004 * An external general parsed entity is well-formed if it matches the
11005 * production labeled extParsedEnt.
11006 *
11007 * [78] extParsedEnt ::= TextDecl? content
11008 *
11009 * Returns 0, -1 in case of error. the parser context is augmented
11010 * as a result of the parsing.
11011 */
11012
11013int
11014xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11015 xmlChar start[4];
11016 xmlCharEncoding enc;
11017
11018 if ((ctxt == NULL) || (ctxt->input == NULL))
11019 return(-1);
11020
11021 xmlDefaultSAXHandlerInit();
11022
11023 xmlDetectSAX2(ctxt);
11024
11025 GROW;
11026
11027 /*
11028 * SAX: beginning of the document processing.
11029 */
11030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11031 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11032
11033 /*
11034 * Get the 4 first bytes and decode the charset
11035 * if enc != XML_CHAR_ENCODING_NONE
11036 * plug some encoding conversion routines.
11037 */
11038 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11039 start[0] = RAW;
11040 start[1] = NXT(1);
11041 start[2] = NXT(2);
11042 start[3] = NXT(3);
11043 enc = xmlDetectCharEncoding(start, 4);
11044 if (enc != XML_CHAR_ENCODING_NONE) {
11045 xmlSwitchEncoding(ctxt, enc);
11046 }
11047 }
11048
11049
11050 if (CUR == 0) {
11051 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11052 }
11053
11054 /*
11055 * Check for the XMLDecl in the Prolog.
11056 */
11057 GROW;
11058 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11059
11060 /*
11061 * Note that we will switch encoding on the fly.
11062 */
11063 xmlParseXMLDecl(ctxt);
11064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11065 /*
11066 * The XML REC instructs us to stop parsing right here
11067 */
11068 return(-1);
11069 }
11070 SKIP_BLANKS;
11071 } else {
11072 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11073 }
11074 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11075 ctxt->sax->startDocument(ctxt->userData);
11076 if (ctxt->instate == XML_PARSER_EOF)
11077 return(-1);
11078
11079 /*
11080 * Doing validity checking on chunk doesn't make sense
11081 */
11082 ctxt->instate = XML_PARSER_CONTENT;
11083 ctxt->validate = 0;
11084 ctxt->loadsubset = 0;
11085 ctxt->depth = 0;
11086
11087 xmlParseContent(ctxt);
11088 if (ctxt->instate == XML_PARSER_EOF)
11089 return(-1);
11090
11091 if ((RAW == '<') && (NXT(1) == '/')) {
11092 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11093 } else if (RAW != 0) {
11094 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11095 }
11096
11097 /*
11098 * SAX: end of the document processing.
11099 */
11100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11101 ctxt->sax->endDocument(ctxt->userData);
11102
11103 if (! ctxt->wellFormed) return(-1);
11104 return(0);
11105}
11106
11107#ifdef LIBXML_PUSH_ENABLED
11108/************************************************************************
11109 * *
11110 * Progressive parsing interfaces *
11111 * *
11112 ************************************************************************/
11113
11114/**
11115 * xmlParseLookupSequence:
11116 * @ctxt: an XML parser context
11117 * @first: the first char to lookup
11118 * @next: the next char to lookup or zero
11119 * @third: the next char to lookup or zero
11120 *
11121 * Try to find if a sequence (first, next, third) or just (first next) or
11122 * (first) is available in the input stream.
11123 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11124 * to avoid rescanning sequences of bytes, it DOES change the state of the
11125 * parser, do not use liberally.
11126 *
11127 * Returns the index to the current parsing point if the full sequence
11128 * is available, -1 otherwise.
11129 */
11130static int
11131xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11132 xmlChar next, xmlChar third) {
11133 int base, len;
11134 xmlParserInputPtr in;
11135 const xmlChar *buf;
11136
11137 in = ctxt->input;
11138 if (in == NULL) return(-1);
11139 base = in->cur - in->base;
11140 if (base < 0) return(-1);
11141 if (ctxt->checkIndex > base)
11142 base = ctxt->checkIndex;
11143 if (in->buf == NULL) {
11144 buf = in->base;
11145 len = in->length;
11146 } else {
11147 buf = xmlBufContent(in->buf->buffer);
11148 len = xmlBufUse(in->buf->buffer);
11149 }
11150 /* take into account the sequence length */
11151 if (third) len -= 2;
11152 else if (next) len --;
11153 for (;base < len;base++) {
11154 if (buf[base] == first) {
11155 if (third != 0) {
11156 if ((buf[base + 1] != next) ||
11157 (buf[base + 2] != third)) continue;
11158 } else if (next != 0) {
11159 if (buf[base + 1] != next) continue;
11160 }
11161 ctxt->checkIndex = 0;
11162#ifdef DEBUG_PUSH
11163 if (next == 0)
11164 xmlGenericError(xmlGenericErrorContext,
11165 "PP: lookup '%c' found at %d\n",
11166 first, base);
11167 else if (third == 0)
11168 xmlGenericError(xmlGenericErrorContext,
11169 "PP: lookup '%c%c' found at %d\n",
11170 first, next, base);
11171 else
11172 xmlGenericError(xmlGenericErrorContext,
11173 "PP: lookup '%c%c%c' found at %d\n",
11174 first, next, third, base);
11175#endif
11176 return(base - (in->cur - in->base));
11177 }
11178 }
11179 ctxt->checkIndex = base;
11180#ifdef DEBUG_PUSH
11181 if (next == 0)
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: lookup '%c' failed\n", first);
11184 else if (third == 0)
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: lookup '%c%c' failed\n", first, next);
11187 else
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: lookup '%c%c%c' failed\n", first, next, third);
11190#endif
11191 return(-1);
11192}
11193
11194/**
11195 * xmlParseGetLasts:
11196 * @ctxt: an XML parser context
11197 * @lastlt: pointer to store the last '<' from the input
11198 * @lastgt: pointer to store the last '>' from the input
11199 *
11200 * Lookup the last < and > in the current chunk
11201 */
11202static void
11203xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11204 const xmlChar **lastgt) {
11205 const xmlChar *tmp;
11206
11207 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11208 xmlGenericError(xmlGenericErrorContext,
11209 "Internal error: xmlParseGetLasts\n");
11210 return;
11211 }
11212 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11213 tmp = ctxt->input->end;
11214 tmp--;
11215 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11216 if (tmp < ctxt->input->base) {
11217 *lastlt = NULL;
11218 *lastgt = NULL;
11219 } else {
11220 *lastlt = tmp;
11221 tmp++;
11222 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11223 if (*tmp == '\'') {
11224 tmp++;
11225 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11226 if (tmp < ctxt->input->end) tmp++;
11227 } else if (*tmp == '"') {
11228 tmp++;
11229 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11230 if (tmp < ctxt->input->end) tmp++;
11231 } else
11232 tmp++;
11233 }
11234 if (tmp < ctxt->input->end)
11235 *lastgt = tmp;
11236 else {
11237 tmp = *lastlt;
11238 tmp--;
11239 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11240 if (tmp >= ctxt->input->base)
11241 *lastgt = tmp;
11242 else
11243 *lastgt = NULL;
11244 }
11245 }
11246 } else {
11247 *lastlt = NULL;
11248 *lastgt = NULL;
11249 }
11250}
11251/**
11252 * xmlCheckCdataPush:
11253 * @cur: pointer to the block of characters
11254 * @len: length of the block in bytes
11255 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11256 *
11257 * Check that the block of characters is okay as SCdata content [20]
11258 *
11259 * Returns the number of bytes to pass if okay, a negative index where an
11260 * UTF-8 error occured otherwise
11261 */
11262static int
11263xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11264 int ix;
11265 unsigned char c;
11266 int codepoint;
11267
11268 if ((utf == NULL) || (len <= 0))
11269 return(0);
11270
11271 for (ix = 0; ix < len;) { /* string is 0-terminated */
11272 c = utf[ix];
11273 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11274 if (c >= 0x20)
11275 ix++;
11276 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11277 ix++;
11278 else
11279 return(-ix);
11280 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11281 if (ix + 2 > len) return(complete ? -ix : ix);
11282 if ((utf[ix+1] & 0xc0 ) != 0x80)
11283 return(-ix);
11284 codepoint = (utf[ix] & 0x1f) << 6;
11285 codepoint |= utf[ix+1] & 0x3f;
11286 if (!xmlIsCharQ(codepoint))
11287 return(-ix);
11288 ix += 2;
11289 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11290 if (ix + 3 > len) return(complete ? -ix : ix);
11291 if (((utf[ix+1] & 0xc0) != 0x80) ||
11292 ((utf[ix+2] & 0xc0) != 0x80))
11293 return(-ix);
11294 codepoint = (utf[ix] & 0xf) << 12;
11295 codepoint |= (utf[ix+1] & 0x3f) << 6;
11296 codepoint |= utf[ix+2] & 0x3f;
11297 if (!xmlIsCharQ(codepoint))
11298 return(-ix);
11299 ix += 3;
11300 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11301 if (ix + 4 > len) return(complete ? -ix : ix);
11302 if (((utf[ix+1] & 0xc0) != 0x80) ||
11303 ((utf[ix+2] & 0xc0) != 0x80) ||
11304 ((utf[ix+3] & 0xc0) != 0x80))
11305 return(-ix);
11306 codepoint = (utf[ix] & 0x7) << 18;
11307 codepoint |= (utf[ix+1] & 0x3f) << 12;
11308 codepoint |= (utf[ix+2] & 0x3f) << 6;
11309 codepoint |= utf[ix+3] & 0x3f;
11310 if (!xmlIsCharQ(codepoint))
11311 return(-ix);
11312 ix += 4;
11313 } else /* unknown encoding */
11314 return(-ix);
11315 }
11316 return(ix);
11317}
11318
11319/**
11320 * xmlParseTryOrFinish:
11321 * @ctxt: an XML parser context
11322 * @terminate: last chunk indicator
11323 *
11324 * Try to progress on parsing
11325 *
11326 * Returns zero if no parsing was possible
11327 */
11328static int
11329xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11330 int ret = 0;
11331 int avail, tlen;
11332 xmlChar cur, next;
11333 const xmlChar *lastlt, *lastgt;
11334
11335 if (ctxt->input == NULL)
11336 return(0);
11337
11338#ifdef DEBUG_PUSH
11339 switch (ctxt->instate) {
11340 case XML_PARSER_EOF:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try EOF\n"); break;
11343 case XML_PARSER_START:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try START\n"); break;
11346 case XML_PARSER_MISC:
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: try MISC\n");break;
11349 case XML_PARSER_COMMENT:
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: try COMMENT\n");break;
11352 case XML_PARSER_PROLOG:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: try PROLOG\n");break;
11355 case XML_PARSER_START_TAG:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try START_TAG\n");break;
11358 case XML_PARSER_CONTENT:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try CONTENT\n");break;
11361 case XML_PARSER_CDATA_SECTION:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: try CDATA_SECTION\n");break;
11364 case XML_PARSER_END_TAG:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: try END_TAG\n");break;
11367 case XML_PARSER_ENTITY_DECL:
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: try ENTITY_DECL\n");break;
11370 case XML_PARSER_ENTITY_VALUE:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: try ENTITY_VALUE\n");break;
11373 case XML_PARSER_ATTRIBUTE_VALUE:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: try ATTRIBUTE_VALUE\n");break;
11376 case XML_PARSER_DTD:
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: try DTD\n");break;
11379 case XML_PARSER_EPILOG:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: try EPILOG\n");break;
11382 case XML_PARSER_PI:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: try PI\n");break;
11385 case XML_PARSER_IGNORE:
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: try IGNORE\n");break;
11388 }
11389#endif
11390
11391 if ((ctxt->input != NULL) &&
11392 (ctxt->input->cur - ctxt->input->base > 4096)) {
11393 xmlSHRINK(ctxt);
11394 ctxt->checkIndex = 0;
11395 }
11396 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11397
11398 while (ctxt->instate != XML_PARSER_EOF) {
11399 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11400 return(0);
11401
11402
11403 /*
11404 * Pop-up of finished entities.
11405 */
11406 while ((RAW == 0) && (ctxt->inputNr > 1))
11407 xmlPopInput(ctxt);
11408
11409 if (ctxt->input == NULL) break;
11410 if (ctxt->input->buf == NULL)
11411 avail = ctxt->input->length -
11412 (ctxt->input->cur - ctxt->input->base);
11413 else {
11414 /*
11415 * If we are operating on converted input, try to flush
11416 * remainng chars to avoid them stalling in the non-converted
11417 * buffer. But do not do this in document start where
11418 * encoding="..." may not have been read and we work on a
11419 * guessed encoding.
11420 */
11421 if ((ctxt->instate != XML_PARSER_START) &&
11422 (ctxt->input->buf->raw != NULL) &&
11423 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11425 ctxt->input);
11426 size_t current = ctxt->input->cur - ctxt->input->base;
11427
11428 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11430 base, current);
11431 }
11432 avail = xmlBufUse(ctxt->input->buf->buffer) -
11433 (ctxt->input->cur - ctxt->input->base);
11434 }
11435 if (avail < 1)
11436 goto done;
11437 switch (ctxt->instate) {
11438 case XML_PARSER_EOF:
11439 /*
11440 * Document parsing is done !
11441 */
11442 goto done;
11443 case XML_PARSER_START:
11444 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11445 xmlChar start[4];
11446 xmlCharEncoding enc;
11447
11448 /*
11449 * Very first chars read from the document flow.
11450 */
11451 if (avail < 4)
11452 goto done;
11453
11454 /*
11455 * Get the 4 first bytes and decode the charset
11456 * if enc != XML_CHAR_ENCODING_NONE
11457 * plug some encoding conversion routines,
11458 * else xmlSwitchEncoding will set to (default)
11459 * UTF8.
11460 */
11461 start[0] = RAW;
11462 start[1] = NXT(1);
11463 start[2] = NXT(2);
11464 start[3] = NXT(3);
11465 enc = xmlDetectCharEncoding(start, 4);
11466 xmlSwitchEncoding(ctxt, enc);
11467 break;
11468 }
11469
11470 if (avail < 2)
11471 goto done;
11472 cur = ctxt->input->cur[0];
11473 next = ctxt->input->cur[1];
11474 if (cur == 0) {
11475 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11476 ctxt->sax->setDocumentLocator(ctxt->userData,
11477 &xmlDefaultSAXLocator);
11478 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11479 xmlHaltParser(ctxt);
11480#ifdef DEBUG_PUSH
11481 xmlGenericError(xmlGenericErrorContext,
11482 "PP: entering EOF\n");
11483#endif
11484 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11485 ctxt->sax->endDocument(ctxt->userData);
11486 goto done;
11487 }
11488 if ((cur == '<') && (next == '?')) {
11489 /* PI or XML decl */
11490 if (avail < 5) return(ret);
11491 if ((!terminate) &&
11492 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11493 return(ret);
11494 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11495 ctxt->sax->setDocumentLocator(ctxt->userData,
11496 &xmlDefaultSAXLocator);
11497 if ((ctxt->input->cur[2] == 'x') &&
11498 (ctxt->input->cur[3] == 'm') &&
11499 (ctxt->input->cur[4] == 'l') &&
11500 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11501 ret += 5;
11502#ifdef DEBUG_PUSH
11503 xmlGenericError(xmlGenericErrorContext,
11504 "PP: Parsing XML Decl\n");
11505#endif
11506 xmlParseXMLDecl(ctxt);
11507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11508 /*
11509 * The XML REC instructs us to stop parsing right
11510 * here
11511 */
11512 xmlHaltParser(ctxt);
11513 return(0);
11514 }
11515 ctxt->standalone = ctxt->input->standalone;
11516 if ((ctxt->encoding == NULL) &&
11517 (ctxt->input->encoding != NULL))
11518 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11519 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11520 (!ctxt->disableSAX))
11521 ctxt->sax->startDocument(ctxt->userData);
11522 ctxt->instate = XML_PARSER_MISC;
11523#ifdef DEBUG_PUSH
11524 xmlGenericError(xmlGenericErrorContext,
11525 "PP: entering MISC\n");
11526#endif
11527 } else {
11528 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11529 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11530 (!ctxt->disableSAX))
11531 ctxt->sax->startDocument(ctxt->userData);
11532 ctxt->instate = XML_PARSER_MISC;
11533#ifdef DEBUG_PUSH
11534 xmlGenericError(xmlGenericErrorContext,
11535 "PP: entering MISC\n");
11536#endif
11537 }
11538 } else {
11539 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11540 ctxt->sax->setDocumentLocator(ctxt->userData,
11541 &xmlDefaultSAXLocator);
11542 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11543 if (ctxt->version == NULL) {
11544 xmlErrMemory(ctxt, NULL);
11545 break;
11546 }
11547 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11548 (!ctxt->disableSAX))
11549 ctxt->sax->startDocument(ctxt->userData);
11550 ctxt->instate = XML_PARSER_MISC;
11551#ifdef DEBUG_PUSH
11552 xmlGenericError(xmlGenericErrorContext,
11553 "PP: entering MISC\n");
11554#endif
11555 }
11556 break;
11557 case XML_PARSER_START_TAG: {
11558 const xmlChar *name;
11559 const xmlChar *prefix = NULL;
11560 const xmlChar *URI = NULL;
11561 int nsNr = ctxt->nsNr;
11562
11563 if ((avail < 2) && (ctxt->inputNr == 1))
11564 goto done;
11565 cur = ctxt->input->cur[0];
11566 if (cur != '<') {
11567 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11568 xmlHaltParser(ctxt);
11569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11570 ctxt->sax->endDocument(ctxt->userData);
11571 goto done;
11572 }
11573 if (!terminate) {
11574 if (ctxt->progressive) {
11575 /* > can be found unescaped in attribute values */
11576 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11577 goto done;
11578 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11579 goto done;
11580 }
11581 }
11582 if (ctxt->spaceNr == 0)
11583 spacePush(ctxt, -1);
11584 else if (*ctxt->space == -2)
11585 spacePush(ctxt, -1);
11586 else
11587 spacePush(ctxt, *ctxt->space);
11588#ifdef LIBXML_SAX1_ENABLED
11589 if (ctxt->sax2)
11590#endif /* LIBXML_SAX1_ENABLED */
11591 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11592#ifdef LIBXML_SAX1_ENABLED
11593 else
11594 name = xmlParseStartTag(ctxt);
11595#endif /* LIBXML_SAX1_ENABLED */
11596 if (ctxt->instate == XML_PARSER_EOF)
11597 goto done;
11598 if (name == NULL) {
11599 spacePop(ctxt);
11600 xmlHaltParser(ctxt);
11601 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11602 ctxt->sax->endDocument(ctxt->userData);
11603 goto done;
11604 }
11605#ifdef LIBXML_VALID_ENABLED
11606 /*
11607 * [ VC: Root Element Type ]
11608 * The Name in the document type declaration must match
11609 * the element type of the root element.
11610 */
11611 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11612 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11613 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11614#endif /* LIBXML_VALID_ENABLED */
11615
11616 /*
11617 * Check for an Empty Element.
11618 */
11619 if ((RAW == '/') && (NXT(1) == '>')) {
11620 SKIP(2);
11621
11622 if (ctxt->sax2) {
11623 if ((ctxt->sax != NULL) &&
11624 (ctxt->sax->endElementNs != NULL) &&
11625 (!ctxt->disableSAX))
11626 ctxt->sax->endElementNs(ctxt->userData, name,
11627 prefix, URI);
11628 if (ctxt->nsNr - nsNr > 0)
11629 nsPop(ctxt, ctxt->nsNr - nsNr);
11630#ifdef LIBXML_SAX1_ENABLED
11631 } else {
11632 if ((ctxt->sax != NULL) &&
11633 (ctxt->sax->endElement != NULL) &&
11634 (!ctxt->disableSAX))
11635 ctxt->sax->endElement(ctxt->userData, name);
11636#endif /* LIBXML_SAX1_ENABLED */
11637 }
11638 if (ctxt->instate == XML_PARSER_EOF)
11639 goto done;
11640 spacePop(ctxt);
11641 if (ctxt->nameNr == 0) {
11642 ctxt->instate = XML_PARSER_EPILOG;
11643 } else {
11644 ctxt->instate = XML_PARSER_CONTENT;
11645 }
11646 ctxt->progressive = 1;
11647 break;
11648 }
11649 if (RAW == '>') {
11650 NEXT;
11651 } else {
11652 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11653 "Couldn't find end of Start Tag %s\n",
11654 name);
11655 nodePop(ctxt);
11656 spacePop(ctxt);
11657 }
11658 if (ctxt->sax2)
11659 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11660#ifdef LIBXML_SAX1_ENABLED
11661 else
11662 namePush(ctxt, name);
11663#endif /* LIBXML_SAX1_ENABLED */
11664
11665 ctxt->instate = XML_PARSER_CONTENT;
11666 ctxt->progressive = 1;
11667 break;
11668 }
11669 case XML_PARSER_CONTENT: {
11670 const xmlChar *test;
11671 unsigned int cons;
11672 if ((avail < 2) && (ctxt->inputNr == 1))
11673 goto done;
11674 cur = ctxt->input->cur[0];
11675 next = ctxt->input->cur[1];
11676
11677 test = CUR_PTR;
11678 cons = ctxt->input->consumed;
11679 if ((cur == '<') && (next == '/')) {
11680 ctxt->instate = XML_PARSER_END_TAG;
11681 break;
11682 } else if ((cur == '<') && (next == '?')) {
11683 if ((!terminate) &&
11684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11685 ctxt->progressive = XML_PARSER_PI;
11686 goto done;
11687 }
11688 xmlParsePI(ctxt);
11689 ctxt->instate = XML_PARSER_CONTENT;
11690 ctxt->progressive = 1;
11691 } else if ((cur == '<') && (next != '!')) {
11692 ctxt->instate = XML_PARSER_START_TAG;
11693 break;
11694 } else if ((cur == '<') && (next == '!') &&
11695 (ctxt->input->cur[2] == '-') &&
11696 (ctxt->input->cur[3] == '-')) {
11697 int term;
11698
11699 if (avail < 4)
11700 goto done;
11701 ctxt->input->cur += 4;
11702 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11703 ctxt->input->cur -= 4;
11704 if ((!terminate) && (term < 0)) {
11705 ctxt->progressive = XML_PARSER_COMMENT;
11706 goto done;
11707 }
11708 xmlParseComment(ctxt);
11709 ctxt->instate = XML_PARSER_CONTENT;
11710 ctxt->progressive = 1;
11711 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11712 (ctxt->input->cur[2] == '[') &&
11713 (ctxt->input->cur[3] == 'C') &&
11714 (ctxt->input->cur[4] == 'D') &&
11715 (ctxt->input->cur[5] == 'A') &&
11716 (ctxt->input->cur[6] == 'T') &&
11717 (ctxt->input->cur[7] == 'A') &&
11718 (ctxt->input->cur[8] == '[')) {
11719 SKIP(9);
11720 ctxt->instate = XML_PARSER_CDATA_SECTION;
11721 break;
11722 } else if ((cur == '<') && (next == '!') &&
11723 (avail < 9)) {
11724 goto done;
11725 } else if (cur == '&') {
11726 if ((!terminate) &&
11727 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11728 goto done;
11729 xmlParseReference(ctxt);
11730 } else {
11731 /* TODO Avoid the extra copy, handle directly !!! */
11732 /*
11733 * Goal of the following test is:
11734 * - minimize calls to the SAX 'character' callback
11735 * when they are mergeable
11736 * - handle an problem for isBlank when we only parse
11737 * a sequence of blank chars and the next one is
11738 * not available to check against '<' presence.
11739 * - tries to homogenize the differences in SAX
11740 * callbacks between the push and pull versions
11741 * of the parser.
11742 */
11743 if ((ctxt->inputNr == 1) &&
11744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11745 if (!terminate) {
11746 if (ctxt->progressive) {
11747 if ((lastlt == NULL) ||
11748 (ctxt->input->cur > lastlt))
11749 goto done;
11750 } else if (xmlParseLookupSequence(ctxt,
11751 '<', 0, 0) < 0) {
11752 goto done;
11753 }
11754 }
11755 }
11756 ctxt->checkIndex = 0;
11757 xmlParseCharData(ctxt, 0);
11758 }
11759 /*
11760 * Pop-up of finished entities.
11761 */
11762 while ((RAW == 0) && (ctxt->inputNr > 1))
11763 xmlPopInput(ctxt);
11764 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11765 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11766 "detected an error in element content\n");
11767 xmlHaltParser(ctxt);
11768 break;
11769 }
11770 break;
11771 }
11772 case XML_PARSER_END_TAG:
11773 if (avail < 2)
11774 goto done;
11775 if (!terminate) {
11776 if (ctxt->progressive) {
11777 /* > can be found unescaped in attribute values */
11778 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11779 goto done;
11780 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11781 goto done;
11782 }
11783 }
11784 if (ctxt->sax2) {
11785 xmlParseEndTag2(ctxt,
11786 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11787 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11788 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11789 nameNsPop(ctxt);
11790 }
11791#ifdef LIBXML_SAX1_ENABLED
11792 else
11793 xmlParseEndTag1(ctxt, 0);
11794#endif /* LIBXML_SAX1_ENABLED */
11795 if (ctxt->instate == XML_PARSER_EOF) {
11796 /* Nothing */
11797 } else if (ctxt->nameNr == 0) {
11798 ctxt->instate = XML_PARSER_EPILOG;
11799 } else {
11800 ctxt->instate = XML_PARSER_CONTENT;
11801 }
11802 break;
11803 case XML_PARSER_CDATA_SECTION: {
11804 /*
11805 * The Push mode need to have the SAX callback for
11806 * cdataBlock merge back contiguous callbacks.
11807 */
11808 int base;
11809
11810 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11811 if (base < 0) {
11812 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11813 int tmp;
11814
11815 tmp = xmlCheckCdataPush(ctxt->input->cur,
11816 XML_PARSER_BIG_BUFFER_SIZE, 0);
11817 if (tmp < 0) {
11818 tmp = -tmp;
11819 ctxt->input->cur += tmp;
11820 goto encoding_error;
11821 }
11822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11823 if (ctxt->sax->cdataBlock != NULL)
11824 ctxt->sax->cdataBlock(ctxt->userData,
11825 ctxt->input->cur, tmp);
11826 else if (ctxt->sax->characters != NULL)
11827 ctxt->sax->characters(ctxt->userData,
11828 ctxt->input->cur, tmp);
11829 }
11830 if (ctxt->instate == XML_PARSER_EOF)
11831 goto done;
11832 SKIPL(tmp);
11833 ctxt->checkIndex = 0;
11834 }
11835 goto done;
11836 } else {
11837 int tmp;
11838
11839 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11840 if ((tmp < 0) || (tmp != base)) {
11841 tmp = -tmp;
11842 ctxt->input->cur += tmp;
11843 goto encoding_error;
11844 }
11845 if ((ctxt->sax != NULL) && (base == 0) &&
11846 (ctxt->sax->cdataBlock != NULL) &&
11847 (!ctxt->disableSAX)) {
11848 /*
11849 * Special case to provide identical behaviour
11850 * between pull and push parsers on enpty CDATA
11851 * sections
11852 */
11853 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11854 (!strncmp((const char *)&ctxt->input->cur[-9],
11855 "<![CDATA[", 9)))
11856 ctxt->sax->cdataBlock(ctxt->userData,
11857 BAD_CAST "", 0);
11858 } else if ((ctxt->sax != NULL) && (base > 0) &&
11859 (!ctxt->disableSAX)) {
11860 if (ctxt->sax->cdataBlock != NULL)
11861 ctxt->sax->cdataBlock(ctxt->userData,
11862 ctxt->input->cur, base);
11863 else if (ctxt->sax->characters != NULL)
11864 ctxt->sax->characters(ctxt->userData,
11865 ctxt->input->cur, base);
11866 }
11867 if (ctxt->instate == XML_PARSER_EOF)
11868 goto done;
11869 SKIPL(base + 3);
11870 ctxt->checkIndex = 0;
11871 ctxt->instate = XML_PARSER_CONTENT;
11872#ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: entering CONTENT\n");
11875#endif
11876 }
11877 break;
11878 }
11879 case XML_PARSER_MISC:
11880 SKIP_BLANKS;
11881 if (ctxt->input->buf == NULL)
11882 avail = ctxt->input->length -
11883 (ctxt->input->cur - ctxt->input->base);
11884 else
11885 avail = xmlBufUse(ctxt->input->buf->buffer) -
11886 (ctxt->input->cur - ctxt->input->base);
11887 if (avail < 2)
11888 goto done;
11889 cur = ctxt->input->cur[0];
11890 next = ctxt->input->cur[1];
11891 if ((cur == '<') && (next == '?')) {
11892 if ((!terminate) &&
11893 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11894 ctxt->progressive = XML_PARSER_PI;
11895 goto done;
11896 }
11897#ifdef DEBUG_PUSH
11898 xmlGenericError(xmlGenericErrorContext,
11899 "PP: Parsing PI\n");
11900#endif
11901 xmlParsePI(ctxt);
11902 if (ctxt->instate == XML_PARSER_EOF)
11903 goto done;
11904 ctxt->instate = XML_PARSER_MISC;
11905 ctxt->progressive = 1;
11906 ctxt->checkIndex = 0;
11907 } else if ((cur == '<') && (next == '!') &&
11908 (ctxt->input->cur[2] == '-') &&
11909 (ctxt->input->cur[3] == '-')) {
11910 if ((!terminate) &&
11911 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11912 ctxt->progressive = XML_PARSER_COMMENT;
11913 goto done;
11914 }
11915#ifdef DEBUG_PUSH
11916 xmlGenericError(xmlGenericErrorContext,
11917 "PP: Parsing Comment\n");
11918#endif
11919 xmlParseComment(ctxt);
11920 if (ctxt->instate == XML_PARSER_EOF)
11921 goto done;
11922 ctxt->instate = XML_PARSER_MISC;
11923 ctxt->progressive = 1;
11924 ctxt->checkIndex = 0;
11925 } else if ((cur == '<') && (next == '!') &&
11926 (ctxt->input->cur[2] == 'D') &&
11927 (ctxt->input->cur[3] == 'O') &&
11928 (ctxt->input->cur[4] == 'C') &&
11929 (ctxt->input->cur[5] == 'T') &&
11930 (ctxt->input->cur[6] == 'Y') &&
11931 (ctxt->input->cur[7] == 'P') &&
11932 (ctxt->input->cur[8] == 'E')) {
11933 if ((!terminate) &&
11934 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11935 ctxt->progressive = XML_PARSER_DTD;
11936 goto done;
11937 }
11938#ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: Parsing internal subset\n");
11941#endif
11942 ctxt->inSubset = 1;
11943 ctxt->progressive = 0;
11944 ctxt->checkIndex = 0;
11945 xmlParseDocTypeDecl(ctxt);
11946 if (ctxt->instate == XML_PARSER_EOF)
11947 goto done;
11948 if (RAW == '[') {
11949 ctxt->instate = XML_PARSER_DTD;
11950#ifdef DEBUG_PUSH
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: entering DTD\n");
11953#endif
11954 } else {
11955 /*
11956 * Create and update the external subset.
11957 */
11958 ctxt->inSubset = 2;
11959 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11960 (ctxt->sax->externalSubset != NULL))
11961 ctxt->sax->externalSubset(ctxt->userData,
11962 ctxt->intSubName, ctxt->extSubSystem,
11963 ctxt->extSubURI);
11964 ctxt->inSubset = 0;
11965 xmlCleanSpecialAttr(ctxt);
11966 ctxt->instate = XML_PARSER_PROLOG;
11967#ifdef DEBUG_PUSH
11968 xmlGenericError(xmlGenericErrorContext,
11969 "PP: entering PROLOG\n");
11970#endif
11971 }
11972 } else if ((cur == '<') && (next == '!') &&
11973 (avail < 9)) {
11974 goto done;
11975 } else {
11976 ctxt->instate = XML_PARSER_START_TAG;
11977 ctxt->progressive = XML_PARSER_START_TAG;
11978 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11979#ifdef DEBUG_PUSH
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: entering START_TAG\n");
11982#endif
11983 }
11984 break;
11985 case XML_PARSER_PROLOG:
11986 SKIP_BLANKS;
11987 if (ctxt->input->buf == NULL)
11988 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11989 else
11990 avail = xmlBufUse(ctxt->input->buf->buffer) -
11991 (ctxt->input->cur - ctxt->input->base);
11992 if (avail < 2)
11993 goto done;
11994 cur = ctxt->input->cur[0];
11995 next = ctxt->input->cur[1];
11996 if ((cur == '<') && (next == '?')) {
11997 if ((!terminate) &&
11998 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11999 ctxt->progressive = XML_PARSER_PI;
12000 goto done;
12001 }
12002#ifdef DEBUG_PUSH
12003 xmlGenericError(xmlGenericErrorContext,
12004 "PP: Parsing PI\n");
12005#endif
12006 xmlParsePI(ctxt);
12007 if (ctxt->instate == XML_PARSER_EOF)
12008 goto done;
12009 ctxt->instate = XML_PARSER_PROLOG;
12010 ctxt->progressive = 1;
12011 } else if ((cur == '<') && (next == '!') &&
12012 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12013 if ((!terminate) &&
12014 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12015 ctxt->progressive = XML_PARSER_COMMENT;
12016 goto done;
12017 }
12018#ifdef DEBUG_PUSH
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: Parsing Comment\n");
12021#endif
12022 xmlParseComment(ctxt);
12023 if (ctxt->instate == XML_PARSER_EOF)
12024 goto done;
12025 ctxt->instate = XML_PARSER_PROLOG;
12026 ctxt->progressive = 1;
12027 } else if ((cur == '<') && (next == '!') &&
12028 (avail < 4)) {
12029 goto done;
12030 } else {
12031 ctxt->instate = XML_PARSER_START_TAG;
12032 if (ctxt->progressive == 0)
12033 ctxt->progressive = XML_PARSER_START_TAG;
12034 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
12035#ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038#endif
12039 }
12040 break;
12041 case XML_PARSER_EPILOG:
12042 SKIP_BLANKS;
12043 if (ctxt->input->buf == NULL)
12044 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12045 else
12046 avail = xmlBufUse(ctxt->input->buf->buffer) -
12047 (ctxt->input->cur - ctxt->input->base);
12048 if (avail < 2)
12049 goto done;
12050 cur = ctxt->input->cur[0];
12051 next = ctxt->input->cur[1];
12052 if ((cur == '<') && (next == '?')) {
12053 if ((!terminate) &&
12054 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12055 ctxt->progressive = XML_PARSER_PI;
12056 goto done;
12057 }
12058#ifdef DEBUG_PUSH
12059 xmlGenericError(xmlGenericErrorContext,
12060 "PP: Parsing PI\n");
12061#endif
12062 xmlParsePI(ctxt);
12063 if (ctxt->instate == XML_PARSER_EOF)
12064 goto done;
12065 ctxt->instate = XML_PARSER_EPILOG;
12066 ctxt->progressive = 1;
12067 } else if ((cur == '<') && (next == '!') &&
12068 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12069 if ((!terminate) &&
12070 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12071 ctxt->progressive = XML_PARSER_COMMENT;
12072 goto done;
12073 }
12074#ifdef DEBUG_PUSH
12075 xmlGenericError(xmlGenericErrorContext,
12076 "PP: Parsing Comment\n");
12077#endif
12078 xmlParseComment(ctxt);
12079 if (ctxt->instate == XML_PARSER_EOF)
12080 goto done;
12081 ctxt->instate = XML_PARSER_EPILOG;
12082 ctxt->progressive = 1;
12083 } else if ((cur == '<') && (next == '!') &&
12084 (avail < 4)) {
12085 goto done;
12086 } else {
12087 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12088 xmlHaltParser(ctxt);
12089#ifdef DEBUG_PUSH
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: entering EOF\n");
12092#endif
12093 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12094 ctxt->sax->endDocument(ctxt->userData);
12095 goto done;
12096 }
12097 break;
12098 case XML_PARSER_DTD: {
12099 /*
12100 * Sorry but progressive parsing of the internal subset
12101 * is not expected to be supported. We first check that
12102 * the full content of the internal subset is available and
12103 * the parsing is launched only at that point.
12104 * Internal subset ends up with "']' S? '>'" in an unescaped
12105 * section and not in a ']]>' sequence which are conditional
12106 * sections (whoever argued to keep that crap in XML deserve
12107 * a place in hell !).
12108 */
12109 int base, i;
12110 xmlChar *buf;
12111 xmlChar quote = 0;
12112 size_t use;
12113
12114 base = ctxt->input->cur - ctxt->input->base;
12115 if (base < 0) return(0);
12116 if (ctxt->checkIndex > base)
12117 base = ctxt->checkIndex;
12118 buf = xmlBufContent(ctxt->input->buf->buffer);
12119 use = xmlBufUse(ctxt->input->buf->buffer);
12120 for (;(unsigned int) base < use; base++) {
12121 if (quote != 0) {
12122 if (buf[base] == quote)
12123 quote = 0;
12124 continue;
12125 }
12126 if ((quote == 0) && (buf[base] == '<')) {
12127 int found = 0;
12128 /* special handling of comments */
12129 if (((unsigned int) base + 4 < use) &&
12130 (buf[base + 1] == '!') &&
12131 (buf[base + 2] == '-') &&
12132 (buf[base + 3] == '-')) {
12133 for (;(unsigned int) base + 3 < use; base++) {
12134 if ((buf[base] == '-') &&
12135 (buf[base + 1] == '-') &&
12136 (buf[base + 2] == '>')) {
12137 found = 1;
12138 base += 2;
12139 break;
12140 }
12141 }
12142 if (!found) {
12143#if 0
12144 fprintf(stderr, "unfinished comment\n");
12145#endif
12146 break; /* for */
12147 }
12148 continue;
12149 }
12150 }
12151 if (buf[base] == '"') {
12152 quote = '"';
12153 continue;
12154 }
12155 if (buf[base] == '\'') {
12156 quote = '\'';
12157 continue;
12158 }
12159 if (buf[base] == ']') {
12160#if 0
12161 fprintf(stderr, "%c%c%c%c: ", buf[base],
12162 buf[base + 1], buf[base + 2], buf[base + 3]);
12163#endif
12164 if ((unsigned int) base +1 >= use)
12165 break;
12166 if (buf[base + 1] == ']') {
12167 /* conditional crap, skip both ']' ! */
12168 base++;
12169 continue;
12170 }
12171 for (i = 1; (unsigned int) base + i < use; i++) {
12172 if (buf[base + i] == '>') {
12173#if 0
12174 fprintf(stderr, "found\n");
12175#endif
12176 goto found_end_int_subset;
12177 }
12178 if (!IS_BLANK_CH(buf[base + i])) {
12179#if 0
12180 fprintf(stderr, "not found\n");
12181#endif
12182 goto not_end_of_int_subset;
12183 }
12184 }
12185#if 0
12186 fprintf(stderr, "end of stream\n");
12187#endif
12188 break;
12189
12190 }
12191not_end_of_int_subset:
12192 continue; /* for */
12193 }
12194 /*
12195 * We didn't found the end of the Internal subset
12196 */
12197 if (quote == 0)
12198 ctxt->checkIndex = base;
12199 else
12200 ctxt->checkIndex = 0;
12201#ifdef DEBUG_PUSH
12202 if (next == 0)
12203 xmlGenericError(xmlGenericErrorContext,
12204 "PP: lookup of int subset end filed\n");
12205#endif
12206 goto done;
12207
12208found_end_int_subset:
12209 ctxt->checkIndex = 0;
12210 xmlParseInternalSubset(ctxt);
12211 if (ctxt->instate == XML_PARSER_EOF)
12212 goto done;
12213 ctxt->inSubset = 2;
12214 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12215 (ctxt->sax->externalSubset != NULL))
12216 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12217 ctxt->extSubSystem, ctxt->extSubURI);
12218 ctxt->inSubset = 0;
12219 xmlCleanSpecialAttr(ctxt);
12220 if (ctxt->instate == XML_PARSER_EOF)
12221 goto done;
12222 ctxt->instate = XML_PARSER_PROLOG;
12223 ctxt->checkIndex = 0;
12224#ifdef DEBUG_PUSH
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: entering PROLOG\n");
12227#endif
12228 break;
12229 }
12230 case XML_PARSER_COMMENT:
12231 xmlGenericError(xmlGenericErrorContext,
12232 "PP: internal error, state == COMMENT\n");
12233 ctxt->instate = XML_PARSER_CONTENT;
12234#ifdef DEBUG_PUSH
12235 xmlGenericError(xmlGenericErrorContext,
12236 "PP: entering CONTENT\n");
12237#endif
12238 break;
12239 case XML_PARSER_IGNORE:
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: internal error, state == IGNORE");
12242 ctxt->instate = XML_PARSER_DTD;
12243#ifdef DEBUG_PUSH
12244 xmlGenericError(xmlGenericErrorContext,
12245 "PP: entering DTD\n");
12246#endif
12247 break;
12248 case XML_PARSER_PI:
12249 xmlGenericError(xmlGenericErrorContext,
12250 "PP: internal error, state == PI\n");
12251 ctxt->instate = XML_PARSER_CONTENT;
12252#ifdef DEBUG_PUSH
12253 xmlGenericError(xmlGenericErrorContext,
12254 "PP: entering CONTENT\n");
12255#endif
12256 break;
12257 case XML_PARSER_ENTITY_DECL:
12258 xmlGenericError(xmlGenericErrorContext,
12259 "PP: internal error, state == ENTITY_DECL\n");
12260 ctxt->instate = XML_PARSER_DTD;
12261#ifdef DEBUG_PUSH
12262 xmlGenericError(xmlGenericErrorContext,
12263 "PP: entering DTD\n");
12264#endif
12265 break;
12266 case XML_PARSER_ENTITY_VALUE:
12267 xmlGenericError(xmlGenericErrorContext,
12268 "PP: internal error, state == ENTITY_VALUE\n");
12269 ctxt->instate = XML_PARSER_CONTENT;
12270#ifdef DEBUG_PUSH
12271 xmlGenericError(xmlGenericErrorContext,
12272 "PP: entering DTD\n");
12273#endif
12274 break;
12275 case XML_PARSER_ATTRIBUTE_VALUE:
12276 xmlGenericError(xmlGenericErrorContext,
12277 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12278 ctxt->instate = XML_PARSER_START_TAG;
12279#ifdef DEBUG_PUSH
12280 xmlGenericError(xmlGenericErrorContext,
12281 "PP: entering START_TAG\n");
12282#endif
12283 break;
12284 case XML_PARSER_SYSTEM_LITERAL:
12285 xmlGenericError(xmlGenericErrorContext,
12286 "PP: internal error, state == SYSTEM_LITERAL\n");
12287 ctxt->instate = XML_PARSER_START_TAG;
12288#ifdef DEBUG_PUSH
12289 xmlGenericError(xmlGenericErrorContext,
12290 "PP: entering START_TAG\n");
12291#endif
12292 break;
12293 case XML_PARSER_PUBLIC_LITERAL:
12294 xmlGenericError(xmlGenericErrorContext,
12295 "PP: internal error, state == PUBLIC_LITERAL\n");
12296 ctxt->instate = XML_PARSER_START_TAG;
12297#ifdef DEBUG_PUSH
12298 xmlGenericError(xmlGenericErrorContext,
12299 "PP: entering START_TAG\n");
12300#endif
12301 break;
12302 }
12303 }
12304done:
12305#ifdef DEBUG_PUSH
12306 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12307#endif
12308 return(ret);
12309encoding_error:
12310 {
12311 char buffer[150];
12312
12313 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12314 ctxt->input->cur[0], ctxt->input->cur[1],
12315 ctxt->input->cur[2], ctxt->input->cur[3]);
12316 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12317 "Input is not proper UTF-8, indicate encoding !\n%s",
12318 BAD_CAST buffer, NULL);
12319 }
12320 return(0);
12321}
12322
12323/**
12324 * xmlParseCheckTransition:
12325 * @ctxt: an XML parser context
12326 * @chunk: a char array
12327 * @size: the size in byte of the chunk
12328 *
12329 * Check depending on the current parser state if the chunk given must be
12330 * processed immediately or one need more data to advance on parsing.
12331 *
12332 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12333 */
12334static int
12335xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12336 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12337 return(-1);
12338 if (ctxt->instate == XML_PARSER_START_TAG) {
12339 if (memchr(chunk, '>', size) != NULL)
12340 return(1);
12341 return(0);
12342 }
12343 if (ctxt->progressive == XML_PARSER_COMMENT) {
12344 if (memchr(chunk, '>', size) != NULL)
12345 return(1);
12346 return(0);
12347 }
12348 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12349 if (memchr(chunk, '>', size) != NULL)
12350 return(1);
12351 return(0);
12352 }
12353 if (ctxt->progressive == XML_PARSER_PI) {
12354 if (memchr(chunk, '>', size) != NULL)
12355 return(1);
12356 return(0);
12357 }
12358 if (ctxt->instate == XML_PARSER_END_TAG) {
12359 if (memchr(chunk, '>', size) != NULL)
12360 return(1);
12361 return(0);
12362 }
12363 if ((ctxt->progressive == XML_PARSER_DTD) ||
12364 (ctxt->instate == XML_PARSER_DTD)) {
12365 if (memchr(chunk, '>', size) != NULL)
12366 return(1);
12367 return(0);
12368 }
12369 return(1);
12370}
12371
12372/**
12373 * xmlParseChunk:
12374 * @ctxt: an XML parser context
12375 * @chunk: an char array
12376 * @size: the size in byte of the chunk
12377 * @terminate: last chunk indicator
12378 *
12379 * Parse a Chunk of memory
12380 *
12381 * Returns zero if no error, the xmlParserErrors otherwise.
12382 */
12383int
12384xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12385 int terminate) {
12386 int end_in_lf = 0;
12387 int remain = 0;
12388 size_t old_avail = 0;
12389 size_t avail = 0;
12390
12391 if (ctxt == NULL)
12392 return(XML_ERR_INTERNAL_ERROR);
12393 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12394 return(ctxt->errNo);
12395 if (ctxt->instate == XML_PARSER_EOF)
12396 return(-1);
12397 if (ctxt->instate == XML_PARSER_START)
12398 xmlDetectSAX2(ctxt);
12399 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12400 (chunk[size - 1] == '\r')) {
12401 end_in_lf = 1;
12402 size--;
12403 }
12404
12405xmldecl_done:
12406
12407 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12408 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12409 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12410 size_t cur = ctxt->input->cur - ctxt->input->base;
12411 int res;
12412
12413 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12414 /*
12415 * Specific handling if we autodetected an encoding, we should not
12416 * push more than the first line ... which depend on the encoding
12417 * And only push the rest once the final encoding was detected
12418 */
12419 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12420 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12421 unsigned int len = 45;
12422
12423 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12424 BAD_CAST "UTF-16")) ||
12425 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12426 BAD_CAST "UTF16")))
12427 len = 90;
12428 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12429 BAD_CAST "UCS-4")) ||
12430 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12431 BAD_CAST "UCS4")))
12432 len = 180;
12433
12434 if (ctxt->input->buf->rawconsumed < len)
12435 len -= ctxt->input->buf->rawconsumed;
12436
12437 /*
12438 * Change size for reading the initial declaration only
12439 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12440 * will blindly copy extra bytes from memory.
12441 */
12442 if ((unsigned int) size > len) {
12443 remain = size - len;
12444 size = len;
12445 } else {
12446 remain = 0;
12447 }
12448 }
12449 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12450 if (res < 0) {
12451 ctxt->errNo = XML_PARSER_EOF;
12452 xmlHaltParser(ctxt);
12453 return (XML_PARSER_EOF);
12454 }
12455 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12456#ifdef DEBUG_PUSH
12457 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12458#endif
12459
12460 } else if (ctxt->instate != XML_PARSER_EOF) {
12461 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12462 xmlParserInputBufferPtr in = ctxt->input->buf;
12463 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12464 (in->raw != NULL)) {
12465 int nbchars;
12466 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12467 size_t current = ctxt->input->cur - ctxt->input->base;
12468
12469 nbchars = xmlCharEncInput(in, terminate);
12470 if (nbchars < 0) {
12471 /* TODO 2.6.0 */
12472 xmlGenericError(xmlGenericErrorContext,
12473 "xmlParseChunk: encoder error\n");
12474 return(XML_ERR_INVALID_ENCODING);
12475 }
12476 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12477 }
12478 }
12479 }
12480 if (remain != 0) {
12481 xmlParseTryOrFinish(ctxt, 0);
12482 } else {
12483 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12484 avail = xmlBufUse(ctxt->input->buf->buffer);
12485 /*
12486 * Depending on the current state it may not be such
12487 * a good idea to try parsing if there is nothing in the chunk
12488 * which would be worth doing a parser state transition and we
12489 * need to wait for more data
12490 */
12491 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12492 (old_avail == 0) || (avail == 0) ||
12493 (xmlParseCheckTransition(ctxt,
12494 (const char *)&ctxt->input->base[old_avail],
12495 avail - old_avail)))
12496 xmlParseTryOrFinish(ctxt, terminate);
12497 }
12498 if (ctxt->instate == XML_PARSER_EOF)
12499 return(ctxt->errNo);
12500
12501 if ((ctxt->input != NULL) &&
12502 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12503 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12504 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12505 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12506 xmlHaltParser(ctxt);
12507 }
12508 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12509 return(ctxt->errNo);
12510
12511 if (remain != 0) {
12512 chunk += size;
12513 size = remain;
12514 remain = 0;
12515 goto xmldecl_done;
12516 }
12517 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12518 (ctxt->input->buf != NULL)) {
12519 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12520 ctxt->input);
12521 size_t current = ctxt->input->cur - ctxt->input->base;
12522
12523 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12524
12525 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12526 base, current);
12527 }
12528 if (terminate) {
12529 /*
12530 * Check for termination
12531 */
12532 int cur_avail = 0;
12533
12534 if (ctxt->input != NULL) {
12535 if (ctxt->input->buf == NULL)
12536 cur_avail = ctxt->input->length -
12537 (ctxt->input->cur - ctxt->input->base);
12538 else
12539 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12540 (ctxt->input->cur - ctxt->input->base);
12541 }
12542
12543 if ((ctxt->instate != XML_PARSER_EOF) &&
12544 (ctxt->instate != XML_PARSER_EPILOG)) {
12545 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12546 }
12547 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12548 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12549 }
12550 if (ctxt->instate != XML_PARSER_EOF) {
12551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12552 ctxt->sax->endDocument(ctxt->userData);
12553 }
12554 ctxt->instate = XML_PARSER_EOF;
12555 }
12556 if (ctxt->wellFormed == 0)
12557 return((xmlParserErrors) ctxt->errNo);
12558 else
12559 return(0);
12560}
12561
12562/************************************************************************
12563 * *
12564 * I/O front end functions to the parser *
12565 * *
12566 ************************************************************************/
12567
12568/**
12569 * xmlCreatePushParserCtxt:
12570 * @sax: a SAX handler
12571 * @user_data: The user data returned on SAX callbacks
12572 * @chunk: a pointer to an array of chars
12573 * @size: number of chars in the array
12574 * @filename: an optional file name or URI
12575 *
12576 * Create a parser context for using the XML parser in push mode.
12577 * If @buffer and @size are non-NULL, the data is used to detect
12578 * the encoding. The remaining characters will be parsed so they
12579 * don't need to be fed in again through xmlParseChunk.
12580 * To allow content encoding detection, @size should be >= 4
12581 * The value of @filename is used for fetching external entities
12582 * and error/warning reports.
12583 *
12584 * Returns the new parser context or NULL
12585 */
12586
12587xmlParserCtxtPtr
12588xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12589 const char *chunk, int size, const char *filename) {
12590 xmlParserCtxtPtr ctxt;
12591 xmlParserInputPtr inputStream;
12592 xmlParserInputBufferPtr buf;
12593 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12594
12595 /*
12596 * plug some encoding conversion routines
12597 */
12598 if ((chunk != NULL) && (size >= 4))
12599 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12600
12601 buf = xmlAllocParserInputBuffer(enc);
12602 if (buf == NULL) return(NULL);
12603
12604 ctxt = xmlNewParserCtxt();
12605 if (ctxt == NULL) {
12606 xmlErrMemory(NULL, "creating parser: out of memory\n");
12607 xmlFreeParserInputBuffer(buf);
12608 return(NULL);
12609 }
12610 ctxt->dictNames = 1;
12611 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12612 if (ctxt->pushTab == NULL) {
12613 xmlErrMemory(ctxt, NULL);
12614 xmlFreeParserInputBuffer(buf);
12615 xmlFreeParserCtxt(ctxt);
12616 return(NULL);
12617 }
12618 if (sax != NULL) {
12619#ifdef LIBXML_SAX1_ENABLED
12620 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12621#endif /* LIBXML_SAX1_ENABLED */
12622 xmlFree(ctxt->sax);
12623 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12624 if (ctxt->sax == NULL) {
12625 xmlErrMemory(ctxt, NULL);
12626 xmlFreeParserInputBuffer(buf);
12627 xmlFreeParserCtxt(ctxt);
12628 return(NULL);
12629 }
12630 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12631 if (sax->initialized == XML_SAX2_MAGIC)
12632 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12633 else
12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12635 if (user_data != NULL)
12636 ctxt->userData = user_data;
12637 }
12638 if (filename == NULL) {
12639 ctxt->directory = NULL;
12640 } else {
12641 ctxt->directory = xmlParserGetDirectory(filename);
12642 }
12643
12644 inputStream = xmlNewInputStream(ctxt);
12645 if (inputStream == NULL) {
12646 xmlFreeParserCtxt(ctxt);
12647 xmlFreeParserInputBuffer(buf);
12648 return(NULL);
12649 }
12650
12651 if (filename == NULL)
12652 inputStream->filename = NULL;
12653 else {
12654 inputStream->filename = (char *)
12655 xmlCanonicPath((const xmlChar *) filename);
12656 if (inputStream->filename == NULL) {
12657 xmlFreeParserCtxt(ctxt);
12658 xmlFreeParserInputBuffer(buf);
12659 return(NULL);
12660 }
12661 }
12662 inputStream->buf = buf;
12663 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12664 inputPush(ctxt, inputStream);
12665
12666 /*
12667 * If the caller didn't provide an initial 'chunk' for determining
12668 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12669 * that it can be automatically determined later
12670 */
12671 if ((size == 0) || (chunk == NULL)) {
12672 ctxt->charset = XML_CHAR_ENCODING_NONE;
12673 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12674 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12675 size_t cur = ctxt->input->cur - ctxt->input->base;
12676
12677 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12678
12679 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12680#ifdef DEBUG_PUSH
12681 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12682#endif
12683 }
12684
12685 if (enc != XML_CHAR_ENCODING_NONE) {
12686 xmlSwitchEncoding(ctxt, enc);
12687 }
12688
12689 return(ctxt);
12690}
12691#endif /* LIBXML_PUSH_ENABLED */
12692
12693/**
12694 * xmlHaltParser:
12695 * @ctxt: an XML parser context
12696 *
12697 * Blocks further parser processing don't override error
12698 * for internal use
12699 */
12700static void
12701xmlHaltParser(xmlParserCtxtPtr ctxt) {
12702 if (ctxt == NULL)
12703 return;
12704 ctxt->instate = XML_PARSER_EOF;
12705 ctxt->disableSAX = 1;
12706 if (ctxt->input != NULL) {
12707 /*
12708 * in case there was a specific allocation deallocate before
12709 * overriding base
12710 */
12711 if (ctxt->input->free != NULL) {
12712 ctxt->input->free((xmlChar *) ctxt->input->base);
12713 ctxt->input->free = NULL;
12714 }
12715 ctxt->input->cur = BAD_CAST"";
12716 ctxt->input->base = ctxt->input->cur;
12717 }
12718}
12719
12720/**
12721 * xmlStopParser:
12722 * @ctxt: an XML parser context
12723 *
12724 * Blocks further parser processing
12725 */
12726void
12727xmlStopParser(xmlParserCtxtPtr ctxt) {
12728 if (ctxt == NULL)
12729 return;
12730 xmlHaltParser(ctxt);
12731 ctxt->errNo = XML_ERR_USER_STOP;
12732}
12733
12734/**
12735 * xmlCreateIOParserCtxt:
12736 * @sax: a SAX handler
12737 * @user_data: The user data returned on SAX callbacks
12738 * @ioread: an I/O read function
12739 * @ioclose: an I/O close function
12740 * @ioctx: an I/O handler
12741 * @enc: the charset encoding if known
12742 *
12743 * Create a parser context for using the XML parser with an existing
12744 * I/O stream
12745 *
12746 * Returns the new parser context or NULL
12747 */
12748xmlParserCtxtPtr
12749xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12750 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12751 void *ioctx, xmlCharEncoding enc) {
12752 xmlParserCtxtPtr ctxt;
12753 xmlParserInputPtr inputStream;
12754 xmlParserInputBufferPtr buf;
12755
12756 if (ioread == NULL) return(NULL);
12757
12758 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12759 if (buf == NULL) {
12760 if (ioclose != NULL)
12761 ioclose(ioctx);
12762 return (NULL);
12763 }
12764
12765 ctxt = xmlNewParserCtxt();
12766 if (ctxt == NULL) {
12767 xmlFreeParserInputBuffer(buf);
12768 return(NULL);
12769 }
12770 if (sax != NULL) {
12771#ifdef LIBXML_SAX1_ENABLED
12772 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12773#endif /* LIBXML_SAX1_ENABLED */
12774 xmlFree(ctxt->sax);
12775 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12776 if (ctxt->sax == NULL) {
12777 xmlErrMemory(ctxt, NULL);
12778 xmlFreeParserCtxt(ctxt);
12779 return(NULL);
12780 }
12781 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12782 if (sax->initialized == XML_SAX2_MAGIC)
12783 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12784 else
12785 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12786 if (user_data != NULL)
12787 ctxt->userData = user_data;
12788 }
12789
12790 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12791 if (inputStream == NULL) {
12792 xmlFreeParserCtxt(ctxt);
12793 return(NULL);
12794 }
12795 inputPush(ctxt, inputStream);
12796
12797 return(ctxt);
12798}
12799
12800#ifdef LIBXML_VALID_ENABLED
12801/************************************************************************
12802 * *
12803 * Front ends when parsing a DTD *
12804 * *
12805 ************************************************************************/
12806
12807/**
12808 * xmlIOParseDTD:
12809 * @sax: the SAX handler block or NULL
12810 * @input: an Input Buffer
12811 * @enc: the charset encoding if known
12812 *
12813 * Load and parse a DTD
12814 *
12815 * Returns the resulting xmlDtdPtr or NULL in case of error.
12816 * @input will be freed by the function in any case.
12817 */
12818
12819xmlDtdPtr
12820xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12821 xmlCharEncoding enc) {
12822 xmlDtdPtr ret = NULL;
12823 xmlParserCtxtPtr ctxt;
12824 xmlParserInputPtr pinput = NULL;
12825 xmlChar start[4];
12826
12827 if (input == NULL)
12828 return(NULL);
12829
12830 ctxt = xmlNewParserCtxt();
12831 if (ctxt == NULL) {
12832 xmlFreeParserInputBuffer(input);
12833 return(NULL);
12834 }
12835
12836 /* We are loading a DTD */
12837 ctxt->options |= XML_PARSE_DTDLOAD;
12838
12839 /*
12840 * Set-up the SAX context
12841 */
12842 if (sax != NULL) {
12843 if (ctxt->sax != NULL)
12844 xmlFree(ctxt->sax);
12845 ctxt->sax = sax;
12846 ctxt->userData = ctxt;
12847 }
12848 xmlDetectSAX2(ctxt);
12849
12850 /*
12851 * generate a parser input from the I/O handler
12852 */
12853
12854 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12855 if (pinput == NULL) {
12856 if (sax != NULL) ctxt->sax = NULL;
12857 xmlFreeParserInputBuffer(input);
12858 xmlFreeParserCtxt(ctxt);
12859 return(NULL);
12860 }
12861
12862 /*
12863 * plug some encoding conversion routines here.
12864 */
12865 if (xmlPushInput(ctxt, pinput) < 0) {
12866 if (sax != NULL) ctxt->sax = NULL;
12867 xmlFreeParserCtxt(ctxt);
12868 return(NULL);
12869 }
12870 if (enc != XML_CHAR_ENCODING_NONE) {
12871 xmlSwitchEncoding(ctxt, enc);
12872 }
12873
12874 pinput->filename = NULL;
12875 pinput->line = 1;
12876 pinput->col = 1;
12877 pinput->base = ctxt->input->cur;
12878 pinput->cur = ctxt->input->cur;
12879 pinput->free = NULL;
12880
12881 /*
12882 * let's parse that entity knowing it's an external subset.
12883 */
12884 ctxt->inSubset = 2;
12885 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12886 if (ctxt->myDoc == NULL) {
12887 xmlErrMemory(ctxt, "New Doc failed");
12888 return(NULL);
12889 }
12890 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 BAD_CAST "none", BAD_CAST "none");
12893
12894 if ((enc == XML_CHAR_ENCODING_NONE) &&
12895 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12896 /*
12897 * Get the 4 first bytes and decode the charset
12898 * if enc != XML_CHAR_ENCODING_NONE
12899 * plug some encoding conversion routines.
12900 */
12901 start[0] = RAW;
12902 start[1] = NXT(1);
12903 start[2] = NXT(2);
12904 start[3] = NXT(3);
12905 enc = xmlDetectCharEncoding(start, 4);
12906 if (enc != XML_CHAR_ENCODING_NONE) {
12907 xmlSwitchEncoding(ctxt, enc);
12908 }
12909 }
12910
12911 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12912
12913 if (ctxt->myDoc != NULL) {
12914 if (ctxt->wellFormed) {
12915 ret = ctxt->myDoc->extSubset;
12916 ctxt->myDoc->extSubset = NULL;
12917 if (ret != NULL) {
12918 xmlNodePtr tmp;
12919
12920 ret->doc = NULL;
12921 tmp = ret->children;
12922 while (tmp != NULL) {
12923 tmp->doc = NULL;
12924 tmp = tmp->next;
12925 }
12926 }
12927 } else {
12928 ret = NULL;
12929 }
12930 xmlFreeDoc(ctxt->myDoc);
12931 ctxt->myDoc = NULL;
12932 }
12933 if (sax != NULL) ctxt->sax = NULL;
12934 xmlFreeParserCtxt(ctxt);
12935
12936 return(ret);
12937}
12938
12939/**
12940 * xmlSAXParseDTD:
12941 * @sax: the SAX handler block
12942 * @ExternalID: a NAME* containing the External ID of the DTD
12943 * @SystemID: a NAME* containing the URL to the DTD
12944 *
12945 * Load and parse an external subset.
12946 *
12947 * Returns the resulting xmlDtdPtr or NULL in case of error.
12948 */
12949
12950xmlDtdPtr
12951xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12952 const xmlChar *SystemID) {
12953 xmlDtdPtr ret = NULL;
12954 xmlParserCtxtPtr ctxt;
12955 xmlParserInputPtr input = NULL;
12956 xmlCharEncoding enc;
12957 xmlChar* systemIdCanonic;
12958
12959 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12960
12961 ctxt = xmlNewParserCtxt();
12962 if (ctxt == NULL) {
12963 return(NULL);
12964 }
12965
12966 /* We are loading a DTD */
12967 ctxt->options |= XML_PARSE_DTDLOAD;
12968
12969 /*
12970 * Set-up the SAX context
12971 */
12972 if (sax != NULL) {
12973 if (ctxt->sax != NULL)
12974 xmlFree(ctxt->sax);
12975 ctxt->sax = sax;
12976 ctxt->userData = ctxt;
12977 }
12978
12979 /*
12980 * Canonicalise the system ID
12981 */
12982 systemIdCanonic = xmlCanonicPath(SystemID);
12983 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12984 xmlFreeParserCtxt(ctxt);
12985 return(NULL);
12986 }
12987
12988 /*
12989 * Ask the Entity resolver to load the damn thing
12990 */
12991
12992 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12993 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12994 systemIdCanonic);
12995 if (input == NULL) {
12996 if (sax != NULL) ctxt->sax = NULL;
12997 xmlFreeParserCtxt(ctxt);
12998 if (systemIdCanonic != NULL)
12999 xmlFree(systemIdCanonic);
13000 return(NULL);
13001 }
13002
13003 /*
13004 * plug some encoding conversion routines here.
13005 */
13006 if (xmlPushInput(ctxt, input) < 0) {
13007 if (sax != NULL) ctxt->sax = NULL;
13008 xmlFreeParserCtxt(ctxt);
13009 if (systemIdCanonic != NULL)
13010 xmlFree(systemIdCanonic);
13011 return(NULL);
13012 }
13013 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13014 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13015 xmlSwitchEncoding(ctxt, enc);
13016 }
13017
13018 if (input->filename == NULL)
13019 input->filename = (char *) systemIdCanonic;
13020 else
13021 xmlFree(systemIdCanonic);
13022 input->line = 1;
13023 input->col = 1;
13024 input->base = ctxt->input->cur;
13025 input->cur = ctxt->input->cur;
13026 input->free = NULL;
13027
13028 /*
13029 * let's parse that entity knowing it's an external subset.
13030 */
13031 ctxt->inSubset = 2;
13032 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
13033 if (ctxt->myDoc == NULL) {
13034 xmlErrMemory(ctxt, "New Doc failed");
13035 if (sax != NULL) ctxt->sax = NULL;
13036 xmlFreeParserCtxt(ctxt);
13037 return(NULL);
13038 }
13039 ctxt->myDoc->properties = XML_DOC_INTERNAL;
13040 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13041 ExternalID, SystemID);
13042 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13043
13044 if (ctxt->myDoc != NULL) {
13045 if (ctxt->wellFormed) {
13046 ret = ctxt->myDoc->extSubset;
13047 ctxt->myDoc->extSubset = NULL;
13048 if (ret != NULL) {
13049 xmlNodePtr tmp;
13050
13051 ret->doc = NULL;
13052 tmp = ret->children;
13053 while (tmp != NULL) {
13054 tmp->doc = NULL;
13055 tmp = tmp->next;
13056 }
13057 }
13058 } else {
13059 ret = NULL;
13060 }
13061 xmlFreeDoc(ctxt->myDoc);
13062 ctxt->myDoc = NULL;
13063 }
13064 if (sax != NULL) ctxt->sax = NULL;
13065 xmlFreeParserCtxt(ctxt);
13066
13067 return(ret);
13068}
13069
13070
13071/**
13072 * xmlParseDTD:
13073 * @ExternalID: a NAME* containing the External ID of the DTD
13074 * @SystemID: a NAME* containing the URL to the DTD
13075 *
13076 * Load and parse an external subset.
13077 *
13078 * Returns the resulting xmlDtdPtr or NULL in case of error.
13079 */
13080
13081xmlDtdPtr
13082xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13083 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13084}
13085#endif /* LIBXML_VALID_ENABLED */
13086
13087/************************************************************************
13088 * *
13089 * Front ends when parsing an Entity *
13090 * *
13091 ************************************************************************/
13092
13093/**
13094 * xmlParseCtxtExternalEntity:
13095 * @ctx: the existing parsing context
13096 * @URL: the URL for the entity to load
13097 * @ID: the System ID for the entity to load
13098 * @lst: the return value for the set of parsed nodes
13099 *
13100 * Parse an external general entity within an existing parsing context
13101 * An external general parsed entity is well-formed if it matches the
13102 * production labeled extParsedEnt.
13103 *
13104 * [78] extParsedEnt ::= TextDecl? content
13105 *
13106 * Returns 0 if the entity is well formed, -1 in case of args problem and
13107 * the parser error code otherwise
13108 */
13109
13110int
13111xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
13112 const xmlChar *ID, xmlNodePtr *lst) {
13113 xmlParserCtxtPtr ctxt;
13114 xmlDocPtr newDoc;
13115 xmlNodePtr newRoot;
13116 xmlSAXHandlerPtr oldsax = NULL;
13117 int ret = 0;
13118 xmlChar start[4];
13119 xmlCharEncoding enc;
13120
13121 if (ctx == NULL) return(-1);
13122
13123 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13124 (ctx->depth > 1024)) {
13125 return(XML_ERR_ENTITY_LOOP);
13126 }
13127
13128 if (lst != NULL)
13129 *lst = NULL;
13130 if ((URL == NULL) && (ID == NULL))
13131 return(-1);
13132 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13133 return(-1);
13134
13135 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13136 if (ctxt == NULL) {
13137 return(-1);
13138 }
13139
13140 oldsax = ctxt->sax;
13141 ctxt->sax = ctx->sax;
13142 xmlDetectSAX2(ctxt);
13143 newDoc = xmlNewDoc(BAD_CAST "1.0");
13144 if (newDoc == NULL) {
13145 xmlFreeParserCtxt(ctxt);
13146 return(-1);
13147 }
13148 newDoc->properties = XML_DOC_INTERNAL;
13149 if (ctx->myDoc->dict) {
13150 newDoc->dict = ctx->myDoc->dict;
13151 xmlDictReference(newDoc->dict);
13152 }
13153 if (ctx->myDoc != NULL) {
13154 newDoc->intSubset = ctx->myDoc->intSubset;
13155 newDoc->extSubset = ctx->myDoc->extSubset;
13156 }
13157 if (ctx->myDoc->URL != NULL) {
13158 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13159 }
13160 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13161 if (newRoot == NULL) {
13162 ctxt->sax = oldsax;
13163 xmlFreeParserCtxt(ctxt);
13164 newDoc->intSubset = NULL;
13165 newDoc->extSubset = NULL;
13166 xmlFreeDoc(newDoc);
13167 return(-1);
13168 }
13169 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13170 nodePush(ctxt, newDoc->children);
13171 if (ctx->myDoc == NULL) {
13172 ctxt->myDoc = newDoc;
13173 } else {
13174 ctxt->myDoc = ctx->myDoc;
13175 newDoc->children->doc = ctx->myDoc;
13176 }
13177
13178 /*
13179 * Get the 4 first bytes and decode the charset
13180 * if enc != XML_CHAR_ENCODING_NONE
13181 * plug some encoding conversion routines.
13182 */
13183 GROW
13184 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13185 start[0] = RAW;
13186 start[1] = NXT(1);
13187 start[2] = NXT(2);
13188 start[3] = NXT(3);
13189 enc = xmlDetectCharEncoding(start, 4);
13190 if (enc != XML_CHAR_ENCODING_NONE) {
13191 xmlSwitchEncoding(ctxt, enc);
13192 }
13193 }
13194
13195 /*
13196 * Parse a possible text declaration first
13197 */
13198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13199 xmlParseTextDecl(ctxt);
13200 /*
13201 * An XML-1.0 document can't reference an entity not XML-1.0
13202 */
13203 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13204 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13205 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13206 "Version mismatch between document and entity\n");
13207 }
13208 }
13209
13210 /*
13211 * If the user provided its own SAX callbacks then reuse the
13212 * useData callback field, otherwise the expected setup in a
13213 * DOM builder is to have userData == ctxt
13214 */
13215 if (ctx->userData == ctx)
13216 ctxt->userData = ctxt;
13217 else
13218 ctxt->userData = ctx->userData;
13219
13220 /*
13221 * Doing validity checking on chunk doesn't make sense
13222 */
13223 ctxt->instate = XML_PARSER_CONTENT;
13224 ctxt->validate = ctx->validate;
13225 ctxt->valid = ctx->valid;
13226 ctxt->loadsubset = ctx->loadsubset;
13227 ctxt->depth = ctx->depth + 1;
13228 ctxt->replaceEntities = ctx->replaceEntities;
13229 if (ctxt->validate) {
13230 ctxt->vctxt.error = ctx->vctxt.error;
13231 ctxt->vctxt.warning = ctx->vctxt.warning;
13232 } else {
13233 ctxt->vctxt.error = NULL;
13234 ctxt->vctxt.warning = NULL;
13235 }
13236 ctxt->vctxt.nodeTab = NULL;
13237 ctxt->vctxt.nodeNr = 0;
13238 ctxt->vctxt.nodeMax = 0;
13239 ctxt->vctxt.node = NULL;
13240 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13241 ctxt->dict = ctx->dict;
13242 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13243 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13244 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13245 ctxt->dictNames = ctx->dictNames;
13246 ctxt->attsDefault = ctx->attsDefault;
13247 ctxt->attsSpecial = ctx->attsSpecial;
13248 ctxt->linenumbers = ctx->linenumbers;
13249
13250 xmlParseContent(ctxt);
13251
13252 ctx->validate = ctxt->validate;
13253 ctx->valid = ctxt->valid;
13254 if ((RAW == '<') && (NXT(1) == '/')) {
13255 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13256 } else if (RAW != 0) {
13257 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13258 }
13259 if (ctxt->node != newDoc->children) {
13260 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13261 }
13262
13263 if (!ctxt->wellFormed) {
13264 if (ctxt->errNo == 0)
13265 ret = 1;
13266 else
13267 ret = ctxt->errNo;
13268 } else {
13269 if (lst != NULL) {
13270 xmlNodePtr cur;
13271
13272 /*
13273 * Return the newly created nodeset after unlinking it from
13274 * they pseudo parent.
13275 */
13276 cur = newDoc->children->children;
13277 *lst = cur;
13278 while (cur != NULL) {
13279 cur->parent = NULL;
13280 cur = cur->next;
13281 }
13282 newDoc->children->children = NULL;
13283 }
13284 ret = 0;
13285 }
13286 ctxt->sax = oldsax;
13287 ctxt->dict = NULL;
13288 ctxt->attsDefault = NULL;
13289 ctxt->attsSpecial = NULL;
13290 xmlFreeParserCtxt(ctxt);
13291 newDoc->intSubset = NULL;
13292 newDoc->extSubset = NULL;
13293 xmlFreeDoc(newDoc);
13294
13295 return(ret);
13296}
13297
13298/**
13299 * xmlParseExternalEntityPrivate:
13300 * @doc: the document the chunk pertains to
13301 * @oldctxt: the previous parser context if available
13302 * @sax: the SAX handler bloc (possibly NULL)
13303 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13304 * @depth: Used for loop detection, use 0
13305 * @URL: the URL for the entity to load
13306 * @ID: the System ID for the entity to load
13307 * @list: the return value for the set of parsed nodes
13308 *
13309 * Private version of xmlParseExternalEntity()
13310 *
13311 * Returns 0 if the entity is well formed, -1 in case of args problem and
13312 * the parser error code otherwise
13313 */
13314
13315static xmlParserErrors
13316xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13317 xmlSAXHandlerPtr sax,
13318 void *user_data, int depth, const xmlChar *URL,
13319 const xmlChar *ID, xmlNodePtr *list) {
13320 xmlParserCtxtPtr ctxt;
13321 xmlDocPtr newDoc;
13322 xmlNodePtr newRoot;
13323 xmlSAXHandlerPtr oldsax = NULL;
13324 xmlParserErrors ret = XML_ERR_OK;
13325 xmlChar start[4];
13326 xmlCharEncoding enc;
13327
13328 if (((depth > 40) &&
13329 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13330 (depth > 1024)) {
13331 return(XML_ERR_ENTITY_LOOP);
13332 }
13333
13334 if (list != NULL)
13335 *list = NULL;
13336 if ((URL == NULL) && (ID == NULL))
13337 return(XML_ERR_INTERNAL_ERROR);
13338 if (doc == NULL)
13339 return(XML_ERR_INTERNAL_ERROR);
13340
13341
13342 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13343 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13344 ctxt->userData = ctxt;
13345 if (oldctxt != NULL) {
13346 ctxt->_private = oldctxt->_private;
13347 ctxt->loadsubset = oldctxt->loadsubset;
13348 ctxt->validate = oldctxt->validate;
13349 ctxt->external = oldctxt->external;
13350 ctxt->record_info = oldctxt->record_info;
13351 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13352 ctxt->node_seq.length = oldctxt->node_seq.length;
13353 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13354 } else {
13355 /*
13356 * Doing validity checking on chunk without context
13357 * doesn't make sense
13358 */
13359 ctxt->_private = NULL;
13360 ctxt->validate = 0;
13361 ctxt->external = 2;
13362 ctxt->loadsubset = 0;
13363 }
13364 if (sax != NULL) {
13365 oldsax = ctxt->sax;
13366 ctxt->sax = sax;
13367 if (user_data != NULL)
13368 ctxt->userData = user_data;
13369 }
13370 xmlDetectSAX2(ctxt);
13371 newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 if (newDoc == NULL) {
13373 ctxt->node_seq.maximum = 0;
13374 ctxt->node_seq.length = 0;
13375 ctxt->node_seq.buffer = NULL;
13376 xmlFreeParserCtxt(ctxt);
13377 return(XML_ERR_INTERNAL_ERROR);
13378 }
13379 newDoc->properties = XML_DOC_INTERNAL;
13380 newDoc->intSubset = doc->intSubset;
13381 newDoc->extSubset = doc->extSubset;
13382 newDoc->dict = doc->dict;
13383 xmlDictReference(newDoc->dict);
13384
13385 if (doc->URL != NULL) {
13386 newDoc->URL = xmlStrdup(doc->URL);
13387 }
13388 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13389 if (newRoot == NULL) {
13390 if (sax != NULL)
13391 ctxt->sax = oldsax;
13392 ctxt->node_seq.maximum = 0;
13393 ctxt->node_seq.length = 0;
13394 ctxt->node_seq.buffer = NULL;
13395 xmlFreeParserCtxt(ctxt);
13396 newDoc->intSubset = NULL;
13397 newDoc->extSubset = NULL;
13398 xmlFreeDoc(newDoc);
13399 return(XML_ERR_INTERNAL_ERROR);
13400 }
13401 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13402 nodePush(ctxt, newDoc->children);
13403 ctxt->myDoc = doc;
13404 newRoot->doc = doc;
13405
13406 /*
13407 * Get the 4 first bytes and decode the charset
13408 * if enc != XML_CHAR_ENCODING_NONE
13409 * plug some encoding conversion routines.
13410 */
13411 GROW;
13412 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13413 start[0] = RAW;
13414 start[1] = NXT(1);
13415 start[2] = NXT(2);
13416 start[3] = NXT(3);
13417 enc = xmlDetectCharEncoding(start, 4);
13418 if (enc != XML_CHAR_ENCODING_NONE) {
13419 xmlSwitchEncoding(ctxt, enc);
13420 }
13421 }
13422
13423 /*
13424 * Parse a possible text declaration first
13425 */
13426 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13427 xmlParseTextDecl(ctxt);
13428 }
13429
13430 ctxt->instate = XML_PARSER_CONTENT;
13431 ctxt->depth = depth;
13432
13433 xmlParseContent(ctxt);
13434
13435 if ((RAW == '<') && (NXT(1) == '/')) {
13436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13437 } else if (RAW != 0) {
13438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13439 }
13440 if (ctxt->node != newDoc->children) {
13441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13442 }
13443
13444 if (!ctxt->wellFormed) {
13445 if (ctxt->errNo == 0)
13446 ret = XML_ERR_INTERNAL_ERROR;
13447 else
13448 ret = (xmlParserErrors)ctxt->errNo;
13449 } else {
13450 if (list != NULL) {
13451 xmlNodePtr cur;
13452
13453 /*
13454 * Return the newly created nodeset after unlinking it from
13455 * they pseudo parent.
13456 */
13457 cur = newDoc->children->children;
13458 *list = cur;
13459 while (cur != NULL) {
13460 cur->parent = NULL;
13461 cur = cur->next;
13462 }
13463 newDoc->children->children = NULL;
13464 }
13465 ret = XML_ERR_OK;
13466 }
13467
13468 /*
13469 * Record in the parent context the number of entities replacement
13470 * done when parsing that reference.
13471 */
13472 if (oldctxt != NULL)
13473 oldctxt->nbentities += ctxt->nbentities;
13474
13475 /*
13476 * Also record the size of the entity parsed
13477 */
13478 if (ctxt->input != NULL && oldctxt != NULL) {
13479 oldctxt->sizeentities += ctxt->input->consumed;
13480 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13481 }
13482 /*
13483 * And record the last error if any
13484 */
13485 if (ctxt->lastError.code != XML_ERR_OK)
13486 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13487
13488 if (sax != NULL)
13489 ctxt->sax = oldsax;
13490 if (oldctxt != NULL) {
13491 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13492 oldctxt->node_seq.length = ctxt->node_seq.length;
13493 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13494 }
13495 ctxt->node_seq.maximum = 0;
13496 ctxt->node_seq.length = 0;
13497 ctxt->node_seq.buffer = NULL;
13498 xmlFreeParserCtxt(ctxt);
13499 newDoc->intSubset = NULL;
13500 newDoc->extSubset = NULL;
13501 xmlFreeDoc(newDoc);
13502
13503 return(ret);
13504}
13505
13506#ifdef LIBXML_SAX1_ENABLED
13507/**
13508 * xmlParseExternalEntity:
13509 * @doc: the document the chunk pertains to
13510 * @sax: the SAX handler bloc (possibly NULL)
13511 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13512 * @depth: Used for loop detection, use 0
13513 * @URL: the URL for the entity to load
13514 * @ID: the System ID for the entity to load
13515 * @lst: the return value for the set of parsed nodes
13516 *
13517 * Parse an external general entity
13518 * An external general parsed entity is well-formed if it matches the
13519 * production labeled extParsedEnt.
13520 *
13521 * [78] extParsedEnt ::= TextDecl? content
13522 *
13523 * Returns 0 if the entity is well formed, -1 in case of args problem and
13524 * the parser error code otherwise
13525 */
13526
13527int
13528xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13529 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13530 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13531 ID, lst));
13532}
13533
13534/**
13535 * xmlParseBalancedChunkMemory:
13536 * @doc: the document the chunk pertains to
13537 * @sax: the SAX handler bloc (possibly NULL)
13538 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13539 * @depth: Used for loop detection, use 0
13540 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13541 * @lst: the return value for the set of parsed nodes
13542 *
13543 * Parse a well-balanced chunk of an XML document
13544 * called by the parser
13545 * The allowed sequence for the Well Balanced Chunk is the one defined by
13546 * the content production in the XML grammar:
13547 *
13548 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13549 *
13550 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13551 * the parser error code otherwise
13552 */
13553
13554int
13555xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13557 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13558 depth, string, lst, 0 );
13559}
13560#endif /* LIBXML_SAX1_ENABLED */
13561
13562/**
13563 * xmlParseBalancedChunkMemoryInternal:
13564 * @oldctxt: the existing parsing context
13565 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13566 * @user_data: the user data field for the parser context
13567 * @lst: the return value for the set of parsed nodes
13568 *
13569 *
13570 * Parse a well-balanced chunk of an XML document
13571 * called by the parser
13572 * The allowed sequence for the Well Balanced Chunk is the one defined by
13573 * the content production in the XML grammar:
13574 *
13575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13576 *
13577 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13578 * error code otherwise
13579 *
13580 * In case recover is set to 1, the nodelist will not be empty even if
13581 * the parsed chunk is not well balanced.
13582 */
13583static xmlParserErrors
13584xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13585 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13586 xmlParserCtxtPtr ctxt;
13587 xmlDocPtr newDoc = NULL;
13588 xmlNodePtr newRoot;
13589 xmlSAXHandlerPtr oldsax = NULL;
13590 xmlNodePtr content = NULL;
13591 xmlNodePtr last = NULL;
13592 int size;
13593 xmlParserErrors ret = XML_ERR_OK;
13594#ifdef SAX2
13595 int i;
13596#endif
13597
13598 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13599 (oldctxt->depth > 1024)) {
13600 return(XML_ERR_ENTITY_LOOP);
13601 }
13602
13603
13604 if (lst != NULL)
13605 *lst = NULL;
13606 if (string == NULL)
13607 return(XML_ERR_INTERNAL_ERROR);
13608
13609 size = xmlStrlen(string);
13610
13611 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13612 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13613 if (user_data != NULL)
13614 ctxt->userData = user_data;
13615 else
13616 ctxt->userData = ctxt;
13617 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13618 ctxt->dict = oldctxt->dict;
13619 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13620 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13621 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13622
13623#ifdef SAX2
13624 /* propagate namespaces down the entity */
13625 for (i = 0;i < oldctxt->nsNr;i += 2) {
13626 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13627 }
13628#endif
13629
13630 oldsax = ctxt->sax;
13631 ctxt->sax = oldctxt->sax;
13632 xmlDetectSAX2(ctxt);
13633 ctxt->replaceEntities = oldctxt->replaceEntities;
13634 ctxt->options = oldctxt->options;
13635
13636 ctxt->_private = oldctxt->_private;
13637 if (oldctxt->myDoc == NULL) {
13638 newDoc = xmlNewDoc(BAD_CAST "1.0");
13639 if (newDoc == NULL) {
13640 ctxt->sax = oldsax;
13641 ctxt->dict = NULL;
13642 xmlFreeParserCtxt(ctxt);
13643 return(XML_ERR_INTERNAL_ERROR);
13644 }
13645 newDoc->properties = XML_DOC_INTERNAL;
13646 newDoc->dict = ctxt->dict;
13647 xmlDictReference(newDoc->dict);
13648 ctxt->myDoc = newDoc;
13649 } else {
13650 ctxt->myDoc = oldctxt->myDoc;
13651 content = ctxt->myDoc->children;
13652 last = ctxt->myDoc->last;
13653 }
13654 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13655 if (newRoot == NULL) {
13656 ctxt->sax = oldsax;
13657 ctxt->dict = NULL;
13658 xmlFreeParserCtxt(ctxt);
13659 if (newDoc != NULL) {
13660 xmlFreeDoc(newDoc);
13661 }
13662 return(XML_ERR_INTERNAL_ERROR);
13663 }
13664 ctxt->myDoc->children = NULL;
13665 ctxt->myDoc->last = NULL;
13666 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13667 nodePush(ctxt, ctxt->myDoc->children);
13668 ctxt->instate = XML_PARSER_CONTENT;
13669 ctxt->depth = oldctxt->depth + 1;
13670
13671 ctxt->validate = 0;
13672 ctxt->loadsubset = oldctxt->loadsubset;
13673 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13674 /*
13675 * ID/IDREF registration will be done in xmlValidateElement below
13676 */
13677 ctxt->loadsubset |= XML_SKIP_IDS;
13678 }
13679 ctxt->dictNames = oldctxt->dictNames;
13680 ctxt->attsDefault = oldctxt->attsDefault;
13681 ctxt->attsSpecial = oldctxt->attsSpecial;
13682
13683 xmlParseContent(ctxt);
13684 if ((RAW == '<') && (NXT(1) == '/')) {
13685 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13686 } else if (RAW != 0) {
13687 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13688 }
13689 if (ctxt->node != ctxt->myDoc->children) {
13690 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13691 }
13692
13693 if (!ctxt->wellFormed) {
13694 if (ctxt->errNo == 0)
13695 ret = XML_ERR_INTERNAL_ERROR;
13696 else
13697 ret = (xmlParserErrors)ctxt->errNo;
13698 } else {
13699 ret = XML_ERR_OK;
13700 }
13701
13702 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13703 xmlNodePtr cur;
13704
13705 /*
13706 * Return the newly created nodeset after unlinking it from
13707 * they pseudo parent.
13708 */
13709 cur = ctxt->myDoc->children->children;
13710 *lst = cur;
13711 while (cur != NULL) {
13712#ifdef LIBXML_VALID_ENABLED
13713 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13714 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13715 (cur->type == XML_ELEMENT_NODE)) {
13716 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13717 oldctxt->myDoc, cur);
13718 }
13719#endif /* LIBXML_VALID_ENABLED */
13720 cur->parent = NULL;
13721 cur = cur->next;
13722 }
13723 ctxt->myDoc->children->children = NULL;
13724 }
13725 if (ctxt->myDoc != NULL) {
13726 xmlFreeNode(ctxt->myDoc->children);
13727 ctxt->myDoc->children = content;
13728 ctxt->myDoc->last = last;
13729 }
13730
13731 /*
13732 * Record in the parent context the number of entities replacement
13733 * done when parsing that reference.
13734 */
13735 if (oldctxt != NULL)
13736 oldctxt->nbentities += ctxt->nbentities;
13737
13738 /*
13739 * Also record the last error if any
13740 */
13741 if (ctxt->lastError.code != XML_ERR_OK)
13742 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13743
13744 ctxt->sax = oldsax;
13745 ctxt->dict = NULL;
13746 ctxt->attsDefault = NULL;
13747 ctxt->attsSpecial = NULL;
13748 xmlFreeParserCtxt(ctxt);
13749 if (newDoc != NULL) {
13750 xmlFreeDoc(newDoc);
13751 }
13752
13753 return(ret);
13754}
13755
13756/**
13757 * xmlParseInNodeContext:
13758 * @node: the context node
13759 * @data: the input string
13760 * @datalen: the input string length in bytes
13761 * @options: a combination of xmlParserOption
13762 * @lst: the return value for the set of parsed nodes
13763 *
13764 * Parse a well-balanced chunk of an XML document
13765 * within the context (DTD, namespaces, etc ...) of the given node.
13766 *
13767 * The allowed sequence for the data is a Well Balanced Chunk defined by
13768 * the content production in the XML grammar:
13769 *
13770 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13771 *
13772 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13773 * error code otherwise
13774 */
13775xmlParserErrors
13776xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13777 int options, xmlNodePtr *lst) {
13778#ifdef SAX2
13779 xmlParserCtxtPtr ctxt;
13780 xmlDocPtr doc = NULL;
13781 xmlNodePtr fake, cur;
13782 int nsnr = 0;
13783
13784 xmlParserErrors ret = XML_ERR_OK;
13785
13786 /*
13787 * check all input parameters, grab the document
13788 */
13789 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13790 return(XML_ERR_INTERNAL_ERROR);
13791 switch (node->type) {
13792 case XML_ELEMENT_NODE:
13793 case XML_ATTRIBUTE_NODE:
13794 case XML_TEXT_NODE:
13795 case XML_CDATA_SECTION_NODE:
13796 case XML_ENTITY_REF_NODE:
13797 case XML_PI_NODE:
13798 case XML_COMMENT_NODE:
13799 case XML_DOCUMENT_NODE:
13800 case XML_HTML_DOCUMENT_NODE:
13801 break;
13802 default:
13803 return(XML_ERR_INTERNAL_ERROR);
13804
13805 }
13806 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13807 (node->type != XML_DOCUMENT_NODE) &&
13808 (node->type != XML_HTML_DOCUMENT_NODE))
13809 node = node->parent;
13810 if (node == NULL)
13811 return(XML_ERR_INTERNAL_ERROR);
13812 if (node->type == XML_ELEMENT_NODE)
13813 doc = node->doc;
13814 else
13815 doc = (xmlDocPtr) node;
13816 if (doc == NULL)
13817 return(XML_ERR_INTERNAL_ERROR);
13818
13819 /*
13820 * allocate a context and set-up everything not related to the
13821 * node position in the tree
13822 */
13823 if (doc->type == XML_DOCUMENT_NODE)
13824 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13825#ifdef LIBXML_HTML_ENABLED
13826 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13827 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13828 /*
13829 * When parsing in context, it makes no sense to add implied
13830 * elements like html/body/etc...
13831 */
13832 options |= HTML_PARSE_NOIMPLIED;
13833 }
13834#endif
13835 else
13836 return(XML_ERR_INTERNAL_ERROR);
13837
13838 if (ctxt == NULL)
13839 return(XML_ERR_NO_MEMORY);
13840
13841 /*
13842 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13843 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13844 * we must wait until the last moment to free the original one.
13845 */
13846 if (doc->dict != NULL) {
13847 if (ctxt->dict != NULL)
13848 xmlDictFree(ctxt->dict);
13849 ctxt->dict = doc->dict;
13850 } else
13851 options |= XML_PARSE_NODICT;
13852
13853 if (doc->encoding != NULL) {
13854 xmlCharEncodingHandlerPtr hdlr;
13855
13856 if (ctxt->encoding != NULL)
13857 xmlFree((xmlChar *) ctxt->encoding);
13858 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13859
13860 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13861 if (hdlr != NULL) {
13862 xmlSwitchToEncoding(ctxt, hdlr);
13863 } else {
13864 return(XML_ERR_UNSUPPORTED_ENCODING);
13865 }
13866 }
13867
13868 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13869 xmlDetectSAX2(ctxt);
13870 ctxt->myDoc = doc;
13871 /* parsing in context, i.e. as within existing content */
13872 ctxt->instate = XML_PARSER_CONTENT;
13873
13874 fake = xmlNewComment(NULL);
13875 if (fake == NULL) {
13876 xmlFreeParserCtxt(ctxt);
13877 return(XML_ERR_NO_MEMORY);
13878 }
13879 xmlAddChild(node, fake);
13880
13881 if (node->type == XML_ELEMENT_NODE) {
13882 nodePush(ctxt, node);
13883 /*
13884 * initialize the SAX2 namespaces stack
13885 */
13886 cur = node;
13887 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13888 xmlNsPtr ns = cur->nsDef;
13889 const xmlChar *iprefix, *ihref;
13890
13891 while (ns != NULL) {
13892 if (ctxt->dict) {
13893 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13894 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13895 } else {
13896 iprefix = ns->prefix;
13897 ihref = ns->href;
13898 }
13899
13900 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13901 nsPush(ctxt, iprefix, ihref);
13902 nsnr++;
13903 }
13904 ns = ns->next;
13905 }
13906 cur = cur->parent;
13907 }
13908 }
13909
13910 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13911 /*
13912 * ID/IDREF registration will be done in xmlValidateElement below
13913 */
13914 ctxt->loadsubset |= XML_SKIP_IDS;
13915 }
13916
13917#ifdef LIBXML_HTML_ENABLED
13918 if (doc->type == XML_HTML_DOCUMENT_NODE)
13919 __htmlParseContent(ctxt);
13920 else
13921#endif
13922 xmlParseContent(ctxt);
13923
13924 nsPop(ctxt, nsnr);
13925 if ((RAW == '<') && (NXT(1) == '/')) {
13926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13927 } else if (RAW != 0) {
13928 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13929 }
13930 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13931 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13932 ctxt->wellFormed = 0;
13933 }
13934
13935 if (!ctxt->wellFormed) {
13936 if (ctxt->errNo == 0)
13937 ret = XML_ERR_INTERNAL_ERROR;
13938 else
13939 ret = (xmlParserErrors)ctxt->errNo;
13940 } else {
13941 ret = XML_ERR_OK;
13942 }
13943
13944 /*
13945 * Return the newly created nodeset after unlinking it from
13946 * the pseudo sibling.
13947 */
13948
13949 cur = fake->next;
13950 fake->next = NULL;
13951 node->last = fake;
13952
13953 if (cur != NULL) {
13954 cur->prev = NULL;
13955 }
13956
13957 *lst = cur;
13958
13959 while (cur != NULL) {
13960 cur->parent = NULL;
13961 cur = cur->next;
13962 }
13963
13964 xmlUnlinkNode(fake);
13965 xmlFreeNode(fake);
13966
13967
13968 if (ret != XML_ERR_OK) {
13969 xmlFreeNodeList(*lst);
13970 *lst = NULL;
13971 }
13972
13973 if (doc->dict != NULL)
13974 ctxt->dict = NULL;
13975 xmlFreeParserCtxt(ctxt);
13976
13977 return(ret);
13978#else /* !SAX2 */
13979 return(XML_ERR_INTERNAL_ERROR);
13980#endif
13981}
13982
13983#ifdef LIBXML_SAX1_ENABLED
13984/**
13985 * xmlParseBalancedChunkMemoryRecover:
13986 * @doc: the document the chunk pertains to
13987 * @sax: the SAX handler bloc (possibly NULL)
13988 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13989 * @depth: Used for loop detection, use 0
13990 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13991 * @lst: the return value for the set of parsed nodes
13992 * @recover: return nodes even if the data is broken (use 0)
13993 *
13994 *
13995 * Parse a well-balanced chunk of an XML document
13996 * called by the parser
13997 * The allowed sequence for the Well Balanced Chunk is the one defined by
13998 * the content production in the XML grammar:
13999 *
14000 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14001 *
14002 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14003 * the parser error code otherwise
14004 *
14005 * In case recover is set to 1, the nodelist will not be empty even if
14006 * the parsed chunk is not well balanced, assuming the parsing succeeded to
14007 * some extent.
14008 */
14009int
14010xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
14011 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
14012 int recover) {
14013 xmlParserCtxtPtr ctxt;
14014 xmlDocPtr newDoc;
14015 xmlSAXHandlerPtr oldsax = NULL;
14016 xmlNodePtr content, newRoot;
14017 int size;
14018 int ret = 0;
14019
14020 if (depth > 40) {
14021 return(XML_ERR_ENTITY_LOOP);
14022 }
14023
14024
14025 if (lst != NULL)
14026 *lst = NULL;
14027 if (string == NULL)
14028 return(-1);
14029
14030 size = xmlStrlen(string);
14031
14032 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14033 if (ctxt == NULL) return(-1);
14034 ctxt->userData = ctxt;
14035 if (sax != NULL) {
14036 oldsax = ctxt->sax;
14037 ctxt->sax = sax;
14038 if (user_data != NULL)
14039 ctxt->userData = user_data;
14040 }
14041 newDoc = xmlNewDoc(BAD_CAST "1.0");
14042 if (newDoc == NULL) {
14043 xmlFreeParserCtxt(ctxt);
14044 return(-1);
14045 }
14046 newDoc->properties = XML_DOC_INTERNAL;
14047 if ((doc != NULL) && (doc->dict != NULL)) {
14048 xmlDictFree(ctxt->dict);
14049 ctxt->dict = doc->dict;
14050 xmlDictReference(ctxt->dict);
14051 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14052 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14053 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14054 ctxt->dictNames = 1;
14055 } else {
14056 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
14057 }
14058 if (doc != NULL) {
14059 newDoc->intSubset = doc->intSubset;
14060 newDoc->extSubset = doc->extSubset;
14061 }
14062 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14063 if (newRoot == NULL) {
14064 if (sax != NULL)
14065 ctxt->sax = oldsax;
14066 xmlFreeParserCtxt(ctxt);
14067 newDoc->intSubset = NULL;
14068 newDoc->extSubset = NULL;
14069 xmlFreeDoc(newDoc);
14070 return(-1);
14071 }
14072 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14073 nodePush(ctxt, newRoot);
14074 if (doc == NULL) {
14075 ctxt->myDoc = newDoc;
14076 } else {
14077 ctxt->myDoc = newDoc;
14078 newDoc->children->doc = doc;
14079 /* Ensure that doc has XML spec namespace */
14080 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14081 newDoc->oldNs = doc->oldNs;
14082 }
14083 ctxt->instate = XML_PARSER_CONTENT;
14084 ctxt->depth = depth;
14085
14086 /*
14087 * Doing validity checking on chunk doesn't make sense
14088 */
14089 ctxt->validate = 0;
14090 ctxt->loadsubset = 0;
14091 xmlDetectSAX2(ctxt);
14092
14093 if ( doc != NULL ){
14094 content = doc->children;
14095 doc->children = NULL;
14096 xmlParseContent(ctxt);
14097 doc->children = content;
14098 }
14099 else {
14100 xmlParseContent(ctxt);
14101 }
14102 if ((RAW == '<') && (NXT(1) == '/')) {
14103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14104 } else if (RAW != 0) {
14105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
14106 }
14107 if (ctxt->node != newDoc->children) {
14108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
14109 }
14110
14111 if (!ctxt->wellFormed) {
14112 if (ctxt->errNo == 0)
14113 ret = 1;
14114 else
14115 ret = ctxt->errNo;
14116 } else {
14117 ret = 0;
14118 }
14119
14120 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14121 xmlNodePtr cur;
14122
14123 /*
14124 * Return the newly created nodeset after unlinking it from
14125 * they pseudo parent.
14126 */
14127 cur = newDoc->children->children;
14128 *lst = cur;
14129 while (cur != NULL) {
14130 xmlSetTreeDoc(cur, doc);
14131 cur->parent = NULL;
14132 cur = cur->next;
14133 }
14134 newDoc->children->children = NULL;
14135 }
14136
14137 if (sax != NULL)
14138 ctxt->sax = oldsax;
14139 xmlFreeParserCtxt(ctxt);
14140 newDoc->intSubset = NULL;
14141 newDoc->extSubset = NULL;
14142 newDoc->oldNs = NULL;
14143 xmlFreeDoc(newDoc);
14144
14145 return(ret);
14146}
14147
14148/**
14149 * xmlSAXParseEntity:
14150 * @sax: the SAX handler block
14151 * @filename: the filename
14152 *
14153 * parse an XML external entity out of context and build a tree.
14154 * It use the given SAX function block to handle the parsing callback.
14155 * If sax is NULL, fallback to the default DOM tree building routines.
14156 *
14157 * [78] extParsedEnt ::= TextDecl? content
14158 *
14159 * This correspond to a "Well Balanced" chunk
14160 *
14161 * Returns the resulting document tree
14162 */
14163
14164xmlDocPtr
14165xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14166 xmlDocPtr ret;
14167 xmlParserCtxtPtr ctxt;
14168
14169 ctxt = xmlCreateFileParserCtxt(filename);
14170 if (ctxt == NULL) {
14171 return(NULL);
14172 }
14173 if (sax != NULL) {
14174 if (ctxt->sax != NULL)
14175 xmlFree(ctxt->sax);
14176 ctxt->sax = sax;
14177 ctxt->userData = NULL;
14178 }
14179
14180 xmlParseExtParsedEnt(ctxt);
14181
14182 if (ctxt->wellFormed)
14183 ret = ctxt->myDoc;
14184 else {
14185 ret = NULL;
14186 xmlFreeDoc(ctxt->myDoc);
14187 ctxt->myDoc = NULL;
14188 }
14189 if (sax != NULL)
14190 ctxt->sax = NULL;
14191 xmlFreeParserCtxt(ctxt);
14192
14193 return(ret);
14194}
14195
14196/**
14197 * xmlParseEntity:
14198 * @filename: the filename
14199 *
14200 * parse an XML external entity out of context and build a tree.
14201 *
14202 * [78] extParsedEnt ::= TextDecl? content
14203 *
14204 * This correspond to a "Well Balanced" chunk
14205 *
14206 * Returns the resulting document tree
14207 */
14208
14209xmlDocPtr
14210xmlParseEntity(const char *filename) {
14211 return(xmlSAXParseEntity(NULL, filename));
14212}
14213#endif /* LIBXML_SAX1_ENABLED */
14214
14215/**
14216 * xmlCreateEntityParserCtxtInternal:
14217 * @URL: the entity URL
14218 * @ID: the entity PUBLIC ID
14219 * @base: a possible base for the target URI
14220 * @pctx: parser context used to set options on new context
14221 *
14222 * Create a parser context for an external entity
14223 * Automatic support for ZLIB/Compress compressed document is provided
14224 * by default if found at compile-time.
14225 *
14226 * Returns the new parser context or NULL
14227 */
14228static xmlParserCtxtPtr
14229xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14230 const xmlChar *base, xmlParserCtxtPtr pctx) {
14231 xmlParserCtxtPtr ctxt;
14232 xmlParserInputPtr inputStream;
14233 char *directory = NULL;
14234 xmlChar *uri;
14235
14236 ctxt = xmlNewParserCtxt();
14237 if (ctxt == NULL) {
14238 return(NULL);
14239 }
14240
14241 if (pctx != NULL) {
14242 ctxt->options = pctx->options;
14243 ctxt->_private = pctx->_private;
14244 }
14245
14246 uri = xmlBuildURI(URL, base);
14247
14248 if (uri == NULL) {
14249 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14250 if (inputStream == NULL) {
14251 xmlFreeParserCtxt(ctxt);
14252 return(NULL);
14253 }
14254
14255 inputPush(ctxt, inputStream);
14256
14257 if ((ctxt->directory == NULL) && (directory == NULL))
14258 directory = xmlParserGetDirectory((char *)URL);
14259 if ((ctxt->directory == NULL) && (directory != NULL))
14260 ctxt->directory = directory;
14261 } else {
14262 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14263 if (inputStream == NULL) {
14264 xmlFree(uri);
14265 xmlFreeParserCtxt(ctxt);
14266 return(NULL);
14267 }
14268
14269 inputPush(ctxt, inputStream);
14270
14271 if ((ctxt->directory == NULL) && (directory == NULL))
14272 directory = xmlParserGetDirectory((char *)uri);
14273 if ((ctxt->directory == NULL) && (directory != NULL))
14274 ctxt->directory = directory;
14275 xmlFree(uri);
14276 }
14277 return(ctxt);
14278}
14279
14280/**
14281 * xmlCreateEntityParserCtxt:
14282 * @URL: the entity URL
14283 * @ID: the entity PUBLIC ID
14284 * @base: a possible base for the target URI
14285 *
14286 * Create a parser context for an external entity
14287 * Automatic support for ZLIB/Compress compressed document is provided
14288 * by default if found at compile-time.
14289 *
14290 * Returns the new parser context or NULL
14291 */
14292xmlParserCtxtPtr
14293xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14294 const xmlChar *base) {
14295 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14296
14297}
14298
14299/************************************************************************
14300 * *
14301 * Front ends when parsing from a file *
14302 * *
14303 ************************************************************************/
14304
14305/**
14306 * xmlCreateURLParserCtxt:
14307 * @filename: the filename or URL
14308 * @options: a combination of xmlParserOption
14309 *
14310 * Create a parser context for a file or URL content.
14311 * Automatic support for ZLIB/Compress compressed document is provided
14312 * by default if found at compile-time and for file accesses
14313 *
14314 * Returns the new parser context or NULL
14315 */
14316xmlParserCtxtPtr
14317xmlCreateURLParserCtxt(const char *filename, int options)
14318{
14319 xmlParserCtxtPtr ctxt;
14320 xmlParserInputPtr inputStream;
14321 char *directory = NULL;
14322
14323 ctxt = xmlNewParserCtxt();
14324 if (ctxt == NULL) {
14325 xmlErrMemory(NULL, "cannot allocate parser context");
14326 return(NULL);
14327 }
14328
14329 if (options)
14330 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14331 ctxt->linenumbers = 1;
14332
14333 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14334 if (inputStream == NULL) {
14335 xmlFreeParserCtxt(ctxt);
14336 return(NULL);
14337 }
14338
14339 inputPush(ctxt, inputStream);
14340 if ((ctxt->directory == NULL) && (directory == NULL))
14341 directory = xmlParserGetDirectory(filename);
14342 if ((ctxt->directory == NULL) && (directory != NULL))
14343 ctxt->directory = directory;
14344
14345 return(ctxt);
14346}
14347
14348/**
14349 * xmlCreateFileParserCtxt:
14350 * @filename: the filename
14351 *
14352 * Create a parser context for a file content.
14353 * Automatic support for ZLIB/Compress compressed document is provided
14354 * by default if found at compile-time.
14355 *
14356 * Returns the new parser context or NULL
14357 */
14358xmlParserCtxtPtr
14359xmlCreateFileParserCtxt(const char *filename)
14360{
14361 return(xmlCreateURLParserCtxt(filename, 0));
14362}
14363
14364#ifdef LIBXML_SAX1_ENABLED
14365/**
14366 * xmlSAXParseFileWithData:
14367 * @sax: the SAX handler block
14368 * @filename: the filename
14369 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14370 * documents
14371 * @data: the userdata
14372 *
14373 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14374 * compressed document is provided by default if found at compile-time.
14375 * It use the given SAX function block to handle the parsing callback.
14376 * If sax is NULL, fallback to the default DOM tree building routines.
14377 *
14378 * User data (void *) is stored within the parser context in the
14379 * context's _private member, so it is available nearly everywhere in libxml
14380 *
14381 * Returns the resulting document tree
14382 */
14383
14384xmlDocPtr
14385xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14386 int recovery, void *data) {
14387 xmlDocPtr ret;
14388 xmlParserCtxtPtr ctxt;
14389
14390 xmlInitParser();
14391
14392 ctxt = xmlCreateFileParserCtxt(filename);
14393 if (ctxt == NULL) {
14394 return(NULL);
14395 }
14396 if (sax != NULL) {
14397 if (ctxt->sax != NULL)
14398 xmlFree(ctxt->sax);
14399 ctxt->sax = sax;
14400 }
14401 xmlDetectSAX2(ctxt);
14402 if (data!=NULL) {
14403 ctxt->_private = data;
14404 }
14405
14406 if (ctxt->directory == NULL)
14407 ctxt->directory = xmlParserGetDirectory(filename);
14408
14409 ctxt->recovery = recovery;
14410
14411 xmlParseDocument(ctxt);
14412
14413 if ((ctxt->wellFormed) || recovery) {
14414 ret = ctxt->myDoc;
14415 if (ret != NULL) {
14416 if (ctxt->input->buf->compressed > 0)
14417 ret->compression = 9;
14418 else
14419 ret->compression = ctxt->input->buf->compressed;
14420 }
14421 }
14422 else {
14423 ret = NULL;
14424 xmlFreeDoc(ctxt->myDoc);
14425 ctxt->myDoc = NULL;
14426 }
14427 if (sax != NULL)
14428 ctxt->sax = NULL;
14429 xmlFreeParserCtxt(ctxt);
14430
14431 return(ret);
14432}
14433
14434/**
14435 * xmlSAXParseFile:
14436 * @sax: the SAX handler block
14437 * @filename: the filename
14438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14439 * documents
14440 *
14441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14442 * compressed document is provided by default if found at compile-time.
14443 * It use the given SAX function block to handle the parsing callback.
14444 * If sax is NULL, fallback to the default DOM tree building routines.
14445 *
14446 * Returns the resulting document tree
14447 */
14448
14449xmlDocPtr
14450xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14451 int recovery) {
14452 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14453}
14454
14455/**
14456 * xmlRecoverDoc:
14457 * @cur: a pointer to an array of xmlChar
14458 *
14459 * parse an XML in-memory document and build a tree.
14460 * In the case the document is not Well Formed, a attempt to build a
14461 * tree is tried anyway
14462 *
14463 * Returns the resulting document tree or NULL in case of failure
14464 */
14465
14466xmlDocPtr
14467xmlRecoverDoc(const xmlChar *cur) {
14468 return(xmlSAXParseDoc(NULL, cur, 1));
14469}
14470
14471/**
14472 * xmlParseFile:
14473 * @filename: the filename
14474 *
14475 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14476 * compressed document is provided by default if found at compile-time.
14477 *
14478 * Returns the resulting document tree if the file was wellformed,
14479 * NULL otherwise.
14480 */
14481
14482xmlDocPtr
14483xmlParseFile(const char *filename) {
14484 return(xmlSAXParseFile(NULL, filename, 0));
14485}
14486
14487/**
14488 * xmlRecoverFile:
14489 * @filename: the filename
14490 *
14491 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14492 * compressed document is provided by default if found at compile-time.
14493 * In the case the document is not Well Formed, it attempts to build
14494 * a tree anyway
14495 *
14496 * Returns the resulting document tree or NULL in case of failure
14497 */
14498
14499xmlDocPtr
14500xmlRecoverFile(const char *filename) {
14501 return(xmlSAXParseFile(NULL, filename, 1));
14502}
14503
14504
14505/**
14506 * xmlSetupParserForBuffer:
14507 * @ctxt: an XML parser context
14508 * @buffer: a xmlChar * buffer
14509 * @filename: a file name
14510 *
14511 * Setup the parser context to parse a new buffer; Clears any prior
14512 * contents from the parser context. The buffer parameter must not be
14513 * NULL, but the filename parameter can be
14514 */
14515void
14516xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14517 const char* filename)
14518{
14519 xmlParserInputPtr input;
14520
14521 if ((ctxt == NULL) || (buffer == NULL))
14522 return;
14523
14524 input = xmlNewInputStream(ctxt);
14525 if (input == NULL) {
14526 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14527 xmlClearParserCtxt(ctxt);
14528 return;
14529 }
14530
14531 xmlClearParserCtxt(ctxt);
14532 if (filename != NULL)
14533 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14534 input->base = buffer;
14535 input->cur = buffer;
14536 input->end = &buffer[xmlStrlen(buffer)];
14537 inputPush(ctxt, input);
14538}
14539
14540/**
14541 * xmlSAXUserParseFile:
14542 * @sax: a SAX handler
14543 * @user_data: The user data returned on SAX callbacks
14544 * @filename: a file name
14545 *
14546 * parse an XML file and call the given SAX handler routines.
14547 * Automatic support for ZLIB/Compress compressed document is provided
14548 *
14549 * Returns 0 in case of success or a error number otherwise
14550 */
14551int
14552xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14553 const char *filename) {
14554 int ret = 0;
14555 xmlParserCtxtPtr ctxt;
14556
14557 ctxt = xmlCreateFileParserCtxt(filename);
14558 if (ctxt == NULL) return -1;
14559 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14560 xmlFree(ctxt->sax);
14561 ctxt->sax = sax;
14562 xmlDetectSAX2(ctxt);
14563
14564 if (user_data != NULL)
14565 ctxt->userData = user_data;
14566
14567 xmlParseDocument(ctxt);
14568
14569 if (ctxt->wellFormed)
14570 ret = 0;
14571 else {
14572 if (ctxt->errNo != 0)
14573 ret = ctxt->errNo;
14574 else
14575 ret = -1;
14576 }
14577 if (sax != NULL)
14578 ctxt->sax = NULL;
14579 if (ctxt->myDoc != NULL) {
14580 xmlFreeDoc(ctxt->myDoc);
14581 ctxt->myDoc = NULL;
14582 }
14583 xmlFreeParserCtxt(ctxt);
14584
14585 return ret;
14586}
14587#endif /* LIBXML_SAX1_ENABLED */
14588
14589/************************************************************************
14590 * *
14591 * Front ends when parsing from memory *
14592 * *
14593 ************************************************************************/
14594
14595/**
14596 * xmlCreateMemoryParserCtxt:
14597 * @buffer: a pointer to a char array
14598 * @size: the size of the array
14599 *
14600 * Create a parser context for an XML in-memory document.
14601 *
14602 * Returns the new parser context or NULL
14603 */
14604xmlParserCtxtPtr
14605xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14606 xmlParserCtxtPtr ctxt;
14607 xmlParserInputPtr input;
14608 xmlParserInputBufferPtr buf;
14609
14610 if (buffer == NULL)
14611 return(NULL);
14612 if (size <= 0)
14613 return(NULL);
14614
14615 ctxt = xmlNewParserCtxt();
14616 if (ctxt == NULL)
14617 return(NULL);
14618
14619 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14620 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14621 if (buf == NULL) {
14622 xmlFreeParserCtxt(ctxt);
14623 return(NULL);
14624 }
14625
14626 input = xmlNewInputStream(ctxt);
14627 if (input == NULL) {
14628 xmlFreeParserInputBuffer(buf);
14629 xmlFreeParserCtxt(ctxt);
14630 return(NULL);
14631 }
14632
14633 input->filename = NULL;
14634 input->buf = buf;
14635 xmlBufResetInput(input->buf->buffer, input);
14636
14637 inputPush(ctxt, input);
14638 return(ctxt);
14639}
14640
14641#ifdef LIBXML_SAX1_ENABLED
14642/**
14643 * xmlSAXParseMemoryWithData:
14644 * @sax: the SAX handler block
14645 * @buffer: an pointer to a char array
14646 * @size: the size of the array
14647 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14648 * documents
14649 * @data: the userdata
14650 *
14651 * parse an XML in-memory block and use the given SAX function block
14652 * to handle the parsing callback. If sax is NULL, fallback to the default
14653 * DOM tree building routines.
14654 *
14655 * User data (void *) is stored within the parser context in the
14656 * context's _private member, so it is available nearly everywhere in libxml
14657 *
14658 * Returns the resulting document tree
14659 */
14660
14661xmlDocPtr
14662xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14663 int size, int recovery, void *data) {
14664 xmlDocPtr ret;
14665 xmlParserCtxtPtr ctxt;
14666
14667 xmlInitParser();
14668
14669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14670 if (ctxt == NULL) return(NULL);
14671 if (sax != NULL) {
14672 if (ctxt->sax != NULL)
14673 xmlFree(ctxt->sax);
14674 ctxt->sax = sax;
14675 }
14676 xmlDetectSAX2(ctxt);
14677 if (data!=NULL) {
14678 ctxt->_private=data;
14679 }
14680
14681 ctxt->recovery = recovery;
14682
14683 xmlParseDocument(ctxt);
14684
14685 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14686 else {
14687 ret = NULL;
14688 xmlFreeDoc(ctxt->myDoc);
14689 ctxt->myDoc = NULL;
14690 }
14691 if (sax != NULL)
14692 ctxt->sax = NULL;
14693 xmlFreeParserCtxt(ctxt);
14694
14695 return(ret);
14696}
14697
14698/**
14699 * xmlSAXParseMemory:
14700 * @sax: the SAX handler block
14701 * @buffer: an pointer to a char array
14702 * @size: the size of the array
14703 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14704 * documents
14705 *
14706 * parse an XML in-memory block and use the given SAX function block
14707 * to handle the parsing callback. If sax is NULL, fallback to the default
14708 * DOM tree building routines.
14709 *
14710 * Returns the resulting document tree
14711 */
14712xmlDocPtr
14713xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14714 int size, int recovery) {
14715 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14716}
14717
14718/**
14719 * xmlParseMemory:
14720 * @buffer: an pointer to a char array
14721 * @size: the size of the array
14722 *
14723 * parse an XML in-memory block and build a tree.
14724 *
14725 * Returns the resulting document tree
14726 */
14727
14728xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14729 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14730}
14731
14732/**
14733 * xmlRecoverMemory:
14734 * @buffer: an pointer to a char array
14735 * @size: the size of the array
14736 *
14737 * parse an XML in-memory block and build a tree.
14738 * In the case the document is not Well Formed, an attempt to
14739 * build a tree is tried anyway
14740 *
14741 * Returns the resulting document tree or NULL in case of error
14742 */
14743
14744xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14745 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14746}
14747
14748/**
14749 * xmlSAXUserParseMemory:
14750 * @sax: a SAX handler
14751 * @user_data: The user data returned on SAX callbacks
14752 * @buffer: an in-memory XML document input
14753 * @size: the length of the XML document in bytes
14754 *
14755 * A better SAX parsing routine.
14756 * parse an XML in-memory buffer and call the given SAX handler routines.
14757 *
14758 * Returns 0 in case of success or a error number otherwise
14759 */
14760int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14761 const char *buffer, int size) {
14762 int ret = 0;
14763 xmlParserCtxtPtr ctxt;
14764
14765 xmlInitParser();
14766
14767 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14768 if (ctxt == NULL) return -1;
14769 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14770 xmlFree(ctxt->sax);
14771 ctxt->sax = sax;
14772 xmlDetectSAX2(ctxt);
14773
14774 if (user_data != NULL)
14775 ctxt->userData = user_data;
14776
14777 xmlParseDocument(ctxt);
14778
14779 if (ctxt->wellFormed)
14780 ret = 0;
14781 else {
14782 if (ctxt->errNo != 0)
14783 ret = ctxt->errNo;
14784 else
14785 ret = -1;
14786 }
14787 if (sax != NULL)
14788 ctxt->sax = NULL;
14789 if (ctxt->myDoc != NULL) {
14790 xmlFreeDoc(ctxt->myDoc);
14791 ctxt->myDoc = NULL;
14792 }
14793 xmlFreeParserCtxt(ctxt);
14794
14795 return ret;
14796}
14797#endif /* LIBXML_SAX1_ENABLED */
14798
14799/**
14800 * xmlCreateDocParserCtxt:
14801 * @cur: a pointer to an array of xmlChar
14802 *
14803 * Creates a parser context for an XML in-memory document.
14804 *
14805 * Returns the new parser context or NULL
14806 */
14807xmlParserCtxtPtr
14808xmlCreateDocParserCtxt(const xmlChar *cur) {
14809 int len;
14810
14811 if (cur == NULL)
14812 return(NULL);
14813 len = xmlStrlen(cur);
14814 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14815}
14816
14817#ifdef LIBXML_SAX1_ENABLED
14818/**
14819 * xmlSAXParseDoc:
14820 * @sax: the SAX handler block
14821 * @cur: a pointer to an array of xmlChar
14822 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14823 * documents
14824 *
14825 * parse an XML in-memory document and build a tree.
14826 * It use the given SAX function block to handle the parsing callback.
14827 * If sax is NULL, fallback to the default DOM tree building routines.
14828 *
14829 * Returns the resulting document tree
14830 */
14831
14832xmlDocPtr
14833xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14834 xmlDocPtr ret;
14835 xmlParserCtxtPtr ctxt;
14836 xmlSAXHandlerPtr oldsax = NULL;
14837
14838 if (cur == NULL) return(NULL);
14839
14840
14841 ctxt = xmlCreateDocParserCtxt(cur);
14842 if (ctxt == NULL) return(NULL);
14843 if (sax != NULL) {
14844 oldsax = ctxt->sax;
14845 ctxt->sax = sax;
14846 ctxt->userData = NULL;
14847 }
14848 xmlDetectSAX2(ctxt);
14849
14850 xmlParseDocument(ctxt);
14851 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14852 else {
14853 ret = NULL;
14854 xmlFreeDoc(ctxt->myDoc);
14855 ctxt->myDoc = NULL;
14856 }
14857 if (sax != NULL)
14858 ctxt->sax = oldsax;
14859 xmlFreeParserCtxt(ctxt);
14860
14861 return(ret);
14862}
14863
14864/**
14865 * xmlParseDoc:
14866 * @cur: a pointer to an array of xmlChar
14867 *
14868 * parse an XML in-memory document and build a tree.
14869 *
14870 * Returns the resulting document tree
14871 */
14872
14873xmlDocPtr
14874xmlParseDoc(const xmlChar *cur) {
14875 return(xmlSAXParseDoc(NULL, cur, 0));
14876}
14877#endif /* LIBXML_SAX1_ENABLED */
14878
14879#ifdef LIBXML_LEGACY_ENABLED
14880/************************************************************************
14881 * *
14882 * Specific function to keep track of entities references *
14883 * and used by the XSLT debugger *
14884 * *
14885 ************************************************************************/
14886
14887static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14888
14889/**
14890 * xmlAddEntityReference:
14891 * @ent : A valid entity
14892 * @firstNode : A valid first node for children of entity
14893 * @lastNode : A valid last node of children entity
14894 *
14895 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14896 */
14897static void
14898xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14899 xmlNodePtr lastNode)
14900{
14901 if (xmlEntityRefFunc != NULL) {
14902 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14903 }
14904}
14905
14906
14907/**
14908 * xmlSetEntityReferenceFunc:
14909 * @func: A valid function
14910 *
14911 * Set the function to call call back when a xml reference has been made
14912 */
14913void
14914xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14915{
14916 xmlEntityRefFunc = func;
14917}
14918#endif /* LIBXML_LEGACY_ENABLED */
14919
14920/************************************************************************
14921 * *
14922 * Miscellaneous *
14923 * *
14924 ************************************************************************/
14925
14926#ifdef LIBXML_XPATH_ENABLED
14927#include <libxml/xpath.h>
14928#endif
14929
14930extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14931static int xmlParserInitialized = 0;
14932
14933/**
14934 * xmlInitParser:
14935 *
14936 * Initialization function for the XML parser.
14937 * This is not reentrant. Call once before processing in case of
14938 * use in multithreaded programs.
14939 */
14940
14941void
14942xmlInitParser(void) {
14943 if (xmlParserInitialized != 0)
14944 return;
14945
14946#ifdef LIBXML_THREAD_ENABLED
14947 __xmlGlobalInitMutexLock();
14948 if (xmlParserInitialized == 0) {
14949#endif
14950 xmlInitThreads();
14951 xmlInitGlobals();
14952 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14953 (xmlGenericError == NULL))
14954 initGenericErrorDefaultFunc(NULL);
14955 xmlInitMemory();
14956 xmlInitializeDict();
14957 xmlInitCharEncodingHandlers();
14958 xmlDefaultSAXHandlerInit();
14959 xmlRegisterDefaultInputCallbacks();
14960#ifdef LIBXML_OUTPUT_ENABLED
14961 xmlRegisterDefaultOutputCallbacks();
14962#endif /* LIBXML_OUTPUT_ENABLED */
14963#ifdef LIBXML_HTML_ENABLED
14964 htmlInitAutoClose();
14965 htmlDefaultSAXHandlerInit();
14966#endif
14967#ifdef LIBXML_XPATH_ENABLED
14968 xmlXPathInit();
14969#endif
14970 xmlParserInitialized = 1;
14971#ifdef LIBXML_THREAD_ENABLED
14972 }
14973 __xmlGlobalInitMutexUnlock();
14974#endif
14975}
14976
14977/**
14978 * xmlCleanupParser:
14979 *
14980 * This function name is somewhat misleading. It does not clean up
14981 * parser state, it cleans up memory allocated by the library itself.
14982 * It is a cleanup function for the XML library. It tries to reclaim all
14983 * related global memory allocated for the library processing.
14984 * It doesn't deallocate any document related memory. One should
14985 * call xmlCleanupParser() only when the process has finished using
14986 * the library and all XML/HTML documents built with it.
14987 * See also xmlInitParser() which has the opposite function of preparing
14988 * the library for operations.
14989 *
14990 * WARNING: if your application is multithreaded or has plugin support
14991 * calling this may crash the application if another thread or
14992 * a plugin is still using libxml2. It's sometimes very hard to
14993 * guess if libxml2 is in use in the application, some libraries
14994 * or plugins may use it without notice. In case of doubt abstain
14995 * from calling this function or do it just before calling exit()
14996 * to avoid leak reports from valgrind !
14997 */
14998
14999void
15000xmlCleanupParser(void) {
15001 if (!xmlParserInitialized)
15002 return;
15003
15004 xmlCleanupCharEncodingHandlers();
15005#ifdef LIBXML_CATALOG_ENABLED
15006 xmlCatalogCleanup();
15007#endif
15008 xmlDictCleanup();
15009 xmlCleanupInputCallbacks();
15010#ifdef LIBXML_OUTPUT_ENABLED
15011 xmlCleanupOutputCallbacks();
15012#endif
15013#ifdef LIBXML_SCHEMAS_ENABLED
15014 xmlSchemaCleanupTypes();
15015 xmlRelaxNGCleanupTypes();
15016#endif
15017 xmlResetLastError();
15018 xmlCleanupGlobals();
15019 xmlCleanupThreads(); /* must be last if called not from the main thread */
15020 xmlCleanupMemory();
15021 xmlParserInitialized = 0;
15022}
15023
15024/************************************************************************
15025 * *
15026 * New set (2.6.0) of simpler and more flexible APIs *
15027 * *
15028 ************************************************************************/
15029
15030/**
15031 * DICT_FREE:
15032 * @str: a string
15033 *
15034 * Free a string if it is not owned by the "dict" dictionary in the
15035 * current scope
15036 */
15037#define DICT_FREE(str) \
15038 if ((str) && ((!dict) || \
15039 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15040 xmlFree((char *)(str));
15041
15042/**
15043 * xmlCtxtReset:
15044 * @ctxt: an XML parser context
15045 *
15046 * Reset a parser context
15047 */
15048void
15049xmlCtxtReset(xmlParserCtxtPtr ctxt)
15050{
15051 xmlParserInputPtr input;
15052 xmlDictPtr dict;
15053
15054 if (ctxt == NULL)
15055 return;
15056
15057 dict = ctxt->dict;
15058
15059 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15060 xmlFreeInputStream(input);
15061 }
15062 ctxt->inputNr = 0;
15063 ctxt->input = NULL;
15064
15065 ctxt->spaceNr = 0;
15066 if (ctxt->spaceTab != NULL) {
15067 ctxt->spaceTab[0] = -1;
15068 ctxt->space = &ctxt->spaceTab[0];
15069 } else {
15070 ctxt->space = NULL;
15071 }
15072
15073
15074 ctxt->nodeNr = 0;
15075 ctxt->node = NULL;
15076
15077 ctxt->nameNr = 0;
15078 ctxt->name = NULL;
15079
15080 DICT_FREE(ctxt->version);
15081 ctxt->version = NULL;
15082 DICT_FREE(ctxt->encoding);
15083 ctxt->encoding = NULL;
15084 DICT_FREE(ctxt->directory);
15085 ctxt->directory = NULL;
15086 DICT_FREE(ctxt->extSubURI);
15087 ctxt->extSubURI = NULL;
15088 DICT_FREE(ctxt->extSubSystem);
15089 ctxt->extSubSystem = NULL;
15090 if (ctxt->myDoc != NULL)
15091 xmlFreeDoc(ctxt->myDoc);
15092 ctxt->myDoc = NULL;
15093
15094 ctxt->standalone = -1;
15095 ctxt->hasExternalSubset = 0;
15096 ctxt->hasPErefs = 0;
15097 ctxt->html = 0;
15098 ctxt->external = 0;
15099 ctxt->instate = XML_PARSER_START;
15100 ctxt->token = 0;
15101
15102 ctxt->wellFormed = 1;
15103 ctxt->nsWellFormed = 1;
15104 ctxt->disableSAX = 0;
15105 ctxt->valid = 1;
15106#if 0
15107 ctxt->vctxt.userData = ctxt;
15108 ctxt->vctxt.error = xmlParserValidityError;
15109 ctxt->vctxt.warning = xmlParserValidityWarning;
15110#endif
15111 ctxt->record_info = 0;
15112 ctxt->nbChars = 0;
15113 ctxt->checkIndex = 0;
15114 ctxt->inSubset = 0;
15115 ctxt->errNo = XML_ERR_OK;
15116 ctxt->depth = 0;
15117 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15118 ctxt->catalogs = NULL;
15119 ctxt->nbentities = 0;
15120 ctxt->sizeentities = 0;
15121 ctxt->sizeentcopy = 0;
15122 xmlInitNodeInfoSeq(&ctxt->node_seq);
15123
15124 if (ctxt->attsDefault != NULL) {
15125 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15126 ctxt->attsDefault = NULL;
15127 }
15128 if (ctxt->attsSpecial != NULL) {
15129 xmlHashFree(ctxt->attsSpecial, NULL);
15130 ctxt->attsSpecial = NULL;
15131 }
15132
15133#ifdef LIBXML_CATALOG_ENABLED
15134 if (ctxt->catalogs != NULL)
15135 xmlCatalogFreeLocal(ctxt->catalogs);
15136#endif
15137 if (ctxt->lastError.code != XML_ERR_OK)
15138 xmlResetError(&ctxt->lastError);
15139}
15140
15141/**
15142 * xmlCtxtResetPush:
15143 * @ctxt: an XML parser context
15144 * @chunk: a pointer to an array of chars
15145 * @size: number of chars in the array
15146 * @filename: an optional file name or URI
15147 * @encoding: the document encoding, or NULL
15148 *
15149 * Reset a push parser context
15150 *
15151 * Returns 0 in case of success and 1 in case of error
15152 */
15153int
15154xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15155 int size, const char *filename, const char *encoding)
15156{
15157 xmlParserInputPtr inputStream;
15158 xmlParserInputBufferPtr buf;
15159 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15160
15161 if (ctxt == NULL)
15162 return(1);
15163
15164 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15165 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15166
15167 buf = xmlAllocParserInputBuffer(enc);
15168 if (buf == NULL)
15169 return(1);
15170
15171 if (ctxt == NULL) {
15172 xmlFreeParserInputBuffer(buf);
15173 return(1);
15174 }
15175
15176 xmlCtxtReset(ctxt);
15177
15178 if (ctxt->pushTab == NULL) {
15179 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15180 sizeof(xmlChar *));
15181 if (ctxt->pushTab == NULL) {
15182 xmlErrMemory(ctxt, NULL);
15183 xmlFreeParserInputBuffer(buf);
15184 return(1);
15185 }
15186 }
15187
15188 if (filename == NULL) {
15189 ctxt->directory = NULL;
15190 } else {
15191 ctxt->directory = xmlParserGetDirectory(filename);
15192 }
15193
15194 inputStream = xmlNewInputStream(ctxt);
15195 if (inputStream == NULL) {
15196 xmlFreeParserInputBuffer(buf);
15197 return(1);
15198 }
15199
15200 if (filename == NULL)
15201 inputStream->filename = NULL;
15202 else
15203 inputStream->filename = (char *)
15204 xmlCanonicPath((const xmlChar *) filename);
15205 inputStream->buf = buf;
15206 xmlBufResetInput(buf->buffer, inputStream);
15207
15208 inputPush(ctxt, inputStream);
15209
15210 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15211 (ctxt->input->buf != NULL)) {
15212 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15213 size_t cur = ctxt->input->cur - ctxt->input->base;
15214
15215 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15216
15217 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15218#ifdef DEBUG_PUSH
15219 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15220#endif
15221 }
15222
15223 if (encoding != NULL) {
15224 xmlCharEncodingHandlerPtr hdlr;
15225
15226 if (ctxt->encoding != NULL)
15227 xmlFree((xmlChar *) ctxt->encoding);
15228 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15229
15230 hdlr = xmlFindCharEncodingHandler(encoding);
15231 if (hdlr != NULL) {
15232 xmlSwitchToEncoding(ctxt, hdlr);
15233 } else {
15234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15235 "Unsupported encoding %s\n", BAD_CAST encoding);
15236 }
15237 } else if (enc != XML_CHAR_ENCODING_NONE) {
15238 xmlSwitchEncoding(ctxt, enc);
15239 }
15240
15241 return(0);
15242}
15243
15244
15245/**
15246 * xmlCtxtUseOptionsInternal:
15247 * @ctxt: an XML parser context
15248 * @options: a combination of xmlParserOption
15249 * @encoding: the user provided encoding to use
15250 *
15251 * Applies the options to the parser context
15252 *
15253 * Returns 0 in case of success, the set of unknown or unimplemented options
15254 * in case of error.
15255 */
15256static int
15257xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15258{
15259 if (ctxt == NULL)
15260 return(-1);
15261 if (encoding != NULL) {
15262 if (ctxt->encoding != NULL)
15263 xmlFree((xmlChar *) ctxt->encoding);
15264 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15265 }
15266 if (options & XML_PARSE_RECOVER) {
15267 ctxt->recovery = 1;
15268 options -= XML_PARSE_RECOVER;
15269 ctxt->options |= XML_PARSE_RECOVER;
15270 } else
15271 ctxt->recovery = 0;
15272 if (options & XML_PARSE_DTDLOAD) {
15273 ctxt->loadsubset = XML_DETECT_IDS;
15274 options -= XML_PARSE_DTDLOAD;
15275 ctxt->options |= XML_PARSE_DTDLOAD;
15276 } else
15277 ctxt->loadsubset = 0;
15278 if (options & XML_PARSE_DTDATTR) {
15279 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15280 options -= XML_PARSE_DTDATTR;
15281 ctxt->options |= XML_PARSE_DTDATTR;
15282 }
15283 if (options & XML_PARSE_NOENT) {
15284 ctxt->replaceEntities = 1;
15285 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15286 options -= XML_PARSE_NOENT;
15287 ctxt->options |= XML_PARSE_NOENT;
15288 } else
15289 ctxt->replaceEntities = 0;
15290 if (options & XML_PARSE_PEDANTIC) {
15291 ctxt->pedantic = 1;
15292 options -= XML_PARSE_PEDANTIC;
15293 ctxt->options |= XML_PARSE_PEDANTIC;
15294 } else
15295 ctxt->pedantic = 0;
15296 if (options & XML_PARSE_NOBLANKS) {
15297 ctxt->keepBlanks = 0;
15298 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15299 options -= XML_PARSE_NOBLANKS;
15300 ctxt->options |= XML_PARSE_NOBLANKS;
15301 } else
15302 ctxt->keepBlanks = 1;
15303 if (options & XML_PARSE_DTDVALID) {
15304 ctxt->validate = 1;
15305 if (options & XML_PARSE_NOWARNING)
15306 ctxt->vctxt.warning = NULL;
15307 if (options & XML_PARSE_NOERROR)
15308 ctxt->vctxt.error = NULL;
15309 options -= XML_PARSE_DTDVALID;
15310 ctxt->options |= XML_PARSE_DTDVALID;
15311 } else
15312 ctxt->validate = 0;
15313 if (options & XML_PARSE_NOWARNING) {
15314 ctxt->sax->warning = NULL;
15315 options -= XML_PARSE_NOWARNING;
15316 }
15317 if (options & XML_PARSE_NOERROR) {
15318 ctxt->sax->error = NULL;
15319 ctxt->sax->fatalError = NULL;
15320 options -= XML_PARSE_NOERROR;
15321 }
15322#ifdef LIBXML_SAX1_ENABLED
15323 if (options & XML_PARSE_SAX1) {
15324 ctxt->sax->startElement = xmlSAX2StartElement;
15325 ctxt->sax->endElement = xmlSAX2EndElement;
15326 ctxt->sax->startElementNs = NULL;
15327 ctxt->sax->endElementNs = NULL;
15328 ctxt->sax->initialized = 1;
15329 options -= XML_PARSE_SAX1;
15330 ctxt->options |= XML_PARSE_SAX1;
15331 }
15332#endif /* LIBXML_SAX1_ENABLED */
15333 if (options & XML_PARSE_NODICT) {
15334 ctxt->dictNames = 0;
15335 options -= XML_PARSE_NODICT;
15336 ctxt->options |= XML_PARSE_NODICT;
15337 } else {
15338 ctxt->dictNames = 1;
15339 }
15340 if (options & XML_PARSE_NOCDATA) {
15341 ctxt->sax->cdataBlock = NULL;
15342 options -= XML_PARSE_NOCDATA;
15343 ctxt->options |= XML_PARSE_NOCDATA;
15344 }
15345 if (options & XML_PARSE_NSCLEAN) {
15346 ctxt->options |= XML_PARSE_NSCLEAN;
15347 options -= XML_PARSE_NSCLEAN;
15348 }
15349 if (options & XML_PARSE_NONET) {
15350 ctxt->options |= XML_PARSE_NONET;
15351 options -= XML_PARSE_NONET;
15352 }
15353 if (options & XML_PARSE_COMPACT) {
15354 ctxt->options |= XML_PARSE_COMPACT;
15355 options -= XML_PARSE_COMPACT;
15356 }
15357 if (options & XML_PARSE_OLD10) {
15358 ctxt->options |= XML_PARSE_OLD10;
15359 options -= XML_PARSE_OLD10;
15360 }
15361 if (options & XML_PARSE_NOBASEFIX) {
15362 ctxt->options |= XML_PARSE_NOBASEFIX;
15363 options -= XML_PARSE_NOBASEFIX;
15364 }
15365 if (options & XML_PARSE_HUGE) {
15366 ctxt->options |= XML_PARSE_HUGE;
15367 options -= XML_PARSE_HUGE;
15368 if (ctxt->dict != NULL)
15369 xmlDictSetLimit(ctxt->dict, 0);
15370 }
15371 if (options & XML_PARSE_OLDSAX) {
15372 ctxt->options |= XML_PARSE_OLDSAX;
15373 options -= XML_PARSE_OLDSAX;
15374 }
15375 if (options & XML_PARSE_IGNORE_ENC) {
15376 ctxt->options |= XML_PARSE_IGNORE_ENC;
15377 options -= XML_PARSE_IGNORE_ENC;
15378 }
15379 if (options & XML_PARSE_BIG_LINES) {
15380 ctxt->options |= XML_PARSE_BIG_LINES;
15381 options -= XML_PARSE_BIG_LINES;
15382 }
15383 ctxt->linenumbers = 1;
15384 return (options);
15385}
15386
15387/**
15388 * xmlCtxtUseOptions:
15389 * @ctxt: an XML parser context
15390 * @options: a combination of xmlParserOption
15391 *
15392 * Applies the options to the parser context
15393 *
15394 * Returns 0 in case of success, the set of unknown or unimplemented options
15395 * in case of error.
15396 */
15397int
15398xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15399{
15400 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15401}
15402
15403/**
15404 * xmlDoRead:
15405 * @ctxt: an XML parser context
15406 * @URL: the base URL to use for the document
15407 * @encoding: the document encoding, or NULL
15408 * @options: a combination of xmlParserOption
15409 * @reuse: keep the context for reuse
15410 *
15411 * Common front-end for the xmlRead functions
15412 *
15413 * Returns the resulting document tree or NULL
15414 */
15415static xmlDocPtr
15416xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15417 int options, int reuse)
15418{
15419 xmlDocPtr ret;
15420
15421 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15422 if (encoding != NULL) {
15423 xmlCharEncodingHandlerPtr hdlr;
15424
15425 hdlr = xmlFindCharEncodingHandler(encoding);
15426 if (hdlr != NULL)
15427 xmlSwitchToEncoding(ctxt, hdlr);
15428 }
15429 if ((URL != NULL) && (ctxt->input != NULL) &&
15430 (ctxt->input->filename == NULL))
15431 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15432 xmlParseDocument(ctxt);
15433 if ((ctxt->wellFormed) || ctxt->recovery)
15434 ret = ctxt->myDoc;
15435 else {
15436 ret = NULL;
15437 if (ctxt->myDoc != NULL) {
15438 xmlFreeDoc(ctxt->myDoc);
15439 }
15440 }
15441 ctxt->myDoc = NULL;
15442 if (!reuse) {
15443 xmlFreeParserCtxt(ctxt);
15444 }
15445
15446 return (ret);
15447}
15448
15449/**
15450 * xmlReadDoc:
15451 * @cur: a pointer to a zero terminated string
15452 * @URL: the base URL to use for the document
15453 * @encoding: the document encoding, or NULL
15454 * @options: a combination of xmlParserOption
15455 *
15456 * parse an XML in-memory document and build a tree.
15457 *
15458 * Returns the resulting document tree
15459 */
15460xmlDocPtr
15461xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15462{
15463 xmlParserCtxtPtr ctxt;
15464
15465 if (cur == NULL)
15466 return (NULL);
15467 xmlInitParser();
15468
15469 ctxt = xmlCreateDocParserCtxt(cur);
15470 if (ctxt == NULL)
15471 return (NULL);
15472 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15473}
15474
15475/**
15476 * xmlReadFile:
15477 * @filename: a file or URL
15478 * @encoding: the document encoding, or NULL
15479 * @options: a combination of xmlParserOption
15480 *
15481 * parse an XML file from the filesystem or the network.
15482 *
15483 * Returns the resulting document tree
15484 */
15485xmlDocPtr
15486xmlReadFile(const char *filename, const char *encoding, int options)
15487{
15488 xmlParserCtxtPtr ctxt;
15489
15490 xmlInitParser();
15491 ctxt = xmlCreateURLParserCtxt(filename, options);
15492 if (ctxt == NULL)
15493 return (NULL);
15494 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15495}
15496
15497/**
15498 * xmlReadMemory:
15499 * @buffer: a pointer to a char array
15500 * @size: the size of the array
15501 * @URL: the base URL to use for the document
15502 * @encoding: the document encoding, or NULL
15503 * @options: a combination of xmlParserOption
15504 *
15505 * parse an XML in-memory document and build a tree.
15506 *
15507 * Returns the resulting document tree
15508 */
15509xmlDocPtr
15510xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15511{
15512 xmlParserCtxtPtr ctxt;
15513
15514 xmlInitParser();
15515 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15516 if (ctxt == NULL)
15517 return (NULL);
15518 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15519}
15520
15521/**
15522 * xmlReadFd:
15523 * @fd: an open file descriptor
15524 * @URL: the base URL to use for the document
15525 * @encoding: the document encoding, or NULL
15526 * @options: a combination of xmlParserOption
15527 *
15528 * parse an XML from a file descriptor and build a tree.
15529 * NOTE that the file descriptor will not be closed when the
15530 * reader is closed or reset.
15531 *
15532 * Returns the resulting document tree
15533 */
15534xmlDocPtr
15535xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15536{
15537 xmlParserCtxtPtr ctxt;
15538 xmlParserInputBufferPtr input;
15539 xmlParserInputPtr stream;
15540
15541 if (fd < 0)
15542 return (NULL);
15543 xmlInitParser();
15544
15545 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15546 if (input == NULL)
15547 return (NULL);
15548 input->closecallback = NULL;
15549 ctxt = xmlNewParserCtxt();
15550 if (ctxt == NULL) {
15551 xmlFreeParserInputBuffer(input);
15552 return (NULL);
15553 }
15554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555 if (stream == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 xmlFreeParserCtxt(ctxt);
15558 return (NULL);
15559 }
15560 inputPush(ctxt, stream);
15561 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15562}
15563
15564/**
15565 * xmlReadIO:
15566 * @ioread: an I/O read function
15567 * @ioclose: an I/O close function
15568 * @ioctx: an I/O handler
15569 * @URL: the base URL to use for the document
15570 * @encoding: the document encoding, or NULL
15571 * @options: a combination of xmlParserOption
15572 *
15573 * parse an XML document from I/O functions and source and build a tree.
15574 *
15575 * Returns the resulting document tree
15576 */
15577xmlDocPtr
15578xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15579 void *ioctx, const char *URL, const char *encoding, int options)
15580{
15581 xmlParserCtxtPtr ctxt;
15582 xmlParserInputBufferPtr input;
15583 xmlParserInputPtr stream;
15584
15585 if (ioread == NULL)
15586 return (NULL);
15587 xmlInitParser();
15588
15589 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15590 XML_CHAR_ENCODING_NONE);
15591 if (input == NULL) {
15592 if (ioclose != NULL)
15593 ioclose(ioctx);
15594 return (NULL);
15595 }
15596 ctxt = xmlNewParserCtxt();
15597 if (ctxt == NULL) {
15598 xmlFreeParserInputBuffer(input);
15599 return (NULL);
15600 }
15601 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15602 if (stream == NULL) {
15603 xmlFreeParserInputBuffer(input);
15604 xmlFreeParserCtxt(ctxt);
15605 return (NULL);
15606 }
15607 inputPush(ctxt, stream);
15608 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15609}
15610
15611/**
15612 * xmlCtxtReadDoc:
15613 * @ctxt: an XML parser context
15614 * @cur: a pointer to a zero terminated string
15615 * @URL: the base URL to use for the document
15616 * @encoding: the document encoding, or NULL
15617 * @options: a combination of xmlParserOption
15618 *
15619 * parse an XML in-memory document and build a tree.
15620 * This reuses the existing @ctxt parser context
15621 *
15622 * Returns the resulting document tree
15623 */
15624xmlDocPtr
15625xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15626 const char *URL, const char *encoding, int options)
15627{
15628 xmlParserInputPtr stream;
15629
15630 if (cur == NULL)
15631 return (NULL);
15632 if (ctxt == NULL)
15633 return (NULL);
15634 xmlInitParser();
15635
15636 xmlCtxtReset(ctxt);
15637
15638 stream = xmlNewStringInputStream(ctxt, cur);
15639 if (stream == NULL) {
15640 return (NULL);
15641 }
15642 inputPush(ctxt, stream);
15643 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15644}
15645
15646/**
15647 * xmlCtxtReadFile:
15648 * @ctxt: an XML parser context
15649 * @filename: a file or URL
15650 * @encoding: the document encoding, or NULL
15651 * @options: a combination of xmlParserOption
15652 *
15653 * parse an XML file from the filesystem or the network.
15654 * This reuses the existing @ctxt parser context
15655 *
15656 * Returns the resulting document tree
15657 */
15658xmlDocPtr
15659xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15660 const char *encoding, int options)
15661{
15662 xmlParserInputPtr stream;
15663
15664 if (filename == NULL)
15665 return (NULL);
15666 if (ctxt == NULL)
15667 return (NULL);
15668 xmlInitParser();
15669
15670 xmlCtxtReset(ctxt);
15671
15672 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15673 if (stream == NULL) {
15674 return (NULL);
15675 }
15676 inputPush(ctxt, stream);
15677 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15678}
15679
15680/**
15681 * xmlCtxtReadMemory:
15682 * @ctxt: an XML parser context
15683 * @buffer: a pointer to a char array
15684 * @size: the size of the array
15685 * @URL: the base URL to use for the document
15686 * @encoding: the document encoding, or NULL
15687 * @options: a combination of xmlParserOption
15688 *
15689 * parse an XML in-memory document and build a tree.
15690 * This reuses the existing @ctxt parser context
15691 *
15692 * Returns the resulting document tree
15693 */
15694xmlDocPtr
15695xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15696 const char *URL, const char *encoding, int options)
15697{
15698 xmlParserInputBufferPtr input;
15699 xmlParserInputPtr stream;
15700
15701 if (ctxt == NULL)
15702 return (NULL);
15703 if (buffer == NULL)
15704 return (NULL);
15705 xmlInitParser();
15706
15707 xmlCtxtReset(ctxt);
15708
15709 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15710 if (input == NULL) {
15711 return(NULL);
15712 }
15713
15714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15715 if (stream == NULL) {
15716 xmlFreeParserInputBuffer(input);
15717 return(NULL);
15718 }
15719
15720 inputPush(ctxt, stream);
15721 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15722}
15723
15724/**
15725 * xmlCtxtReadFd:
15726 * @ctxt: an XML parser context
15727 * @fd: an open file descriptor
15728 * @URL: the base URL to use for the document
15729 * @encoding: the document encoding, or NULL
15730 * @options: a combination of xmlParserOption
15731 *
15732 * parse an XML from a file descriptor and build a tree.
15733 * This reuses the existing @ctxt parser context
15734 * NOTE that the file descriptor will not be closed when the
15735 * reader is closed or reset.
15736 *
15737 * Returns the resulting document tree
15738 */
15739xmlDocPtr
15740xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15741 const char *URL, const char *encoding, int options)
15742{
15743 xmlParserInputBufferPtr input;
15744 xmlParserInputPtr stream;
15745
15746 if (fd < 0)
15747 return (NULL);
15748 if (ctxt == NULL)
15749 return (NULL);
15750 xmlInitParser();
15751
15752 xmlCtxtReset(ctxt);
15753
15754
15755 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15756 if (input == NULL)
15757 return (NULL);
15758 input->closecallback = NULL;
15759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15760 if (stream == NULL) {
15761 xmlFreeParserInputBuffer(input);
15762 return (NULL);
15763 }
15764 inputPush(ctxt, stream);
15765 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15766}
15767
15768/**
15769 * xmlCtxtReadIO:
15770 * @ctxt: an XML parser context
15771 * @ioread: an I/O read function
15772 * @ioclose: an I/O close function
15773 * @ioctx: an I/O handler
15774 * @URL: the base URL to use for the document
15775 * @encoding: the document encoding, or NULL
15776 * @options: a combination of xmlParserOption
15777 *
15778 * parse an XML document from I/O functions and source and build a tree.
15779 * This reuses the existing @ctxt parser context
15780 *
15781 * Returns the resulting document tree
15782 */
15783xmlDocPtr
15784xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15785 xmlInputCloseCallback ioclose, void *ioctx,
15786 const char *URL,
15787 const char *encoding, int options)
15788{
15789 xmlParserInputBufferPtr input;
15790 xmlParserInputPtr stream;
15791
15792 if (ioread == NULL)
15793 return (NULL);
15794 if (ctxt == NULL)
15795 return (NULL);
15796 xmlInitParser();
15797
15798 xmlCtxtReset(ctxt);
15799
15800 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15801 XML_CHAR_ENCODING_NONE);
15802 if (input == NULL) {
15803 if (ioclose != NULL)
15804 ioclose(ioctx);
15805 return (NULL);
15806 }
15807 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15808 if (stream == NULL) {
15809 xmlFreeParserInputBuffer(input);
15810 return (NULL);
15811 }
15812 inputPush(ctxt, stream);
15813 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15814}
15815
15816#define bottom_parser
15817#include "elfgcchack.h"
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette