VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.31/parser.c@ 41752

最後變更 在這個檔案從41752是 39921,由 vboxsync 提交於 13 年 前

libxml-2.6.31 upstream fixes

  • 屬性 svn:eol-style 設為 native
檔案大小: 373.3 KB
 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <string.h>
44#include <stdarg.h>
45#include <libxml/xmlmemory.h>
46#include <libxml/threads.h>
47#include <libxml/globals.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
60#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
83static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86/************************************************************************
87 * *
88 * Arbitrary limits set in the parser. *
89 * *
90 ************************************************************************/
91
92#define XML_PARSER_BIG_ENTITY 1000
93#define XML_PARSER_LOT_ENTITY 5000
94
95/*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101#define XML_PARSER_NON_LINEAR 10
102
103/*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature.
110 */
111static int
112xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
113 xmlEntityPtr ent)
114{
115 unsigned long consumed = 0;
116
117 if (ctxt == NULL)
118 return (0);
119 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
120 return (1);
121 if (size != 0) {
122 /*
123 * Do the check based on the replacement size of the entity
124 */
125 if (size < XML_PARSER_BIG_ENTITY)
126 return(0);
127
128 /*
129 * A limit on the amount of text data reasonably used
130 */
131 if (ctxt->input != NULL) {
132 consumed = ctxt->input->consumed +
133 (ctxt->input->cur - ctxt->input->base);
134 }
135 consumed += ctxt->sizeentities;
136
137 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
138 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
139 return (0);
140 } else if (ent != NULL) {
141 /*
142 * use the number of parsed entities in the replacement
143 */
144 size = ent->owner;
145
146 /*
147 * The amount of data parsed counting entities size only once
148 */
149 if (ctxt->input != NULL) {
150 consumed = ctxt->input->consumed +
151 (ctxt->input->cur - ctxt->input->base);
152 }
153 consumed += ctxt->sizeentities;
154
155 /*
156 * Check the density of entities for the amount of data
157 * knowing an entity reference will take at least 3 bytes
158 */
159 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
160 return (0);
161 } else {
162 /*
163 * strange we got no data for checking just return
164 */
165 return (0);
166 }
167
168 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
169 return (1);
170}
171
172/**
173 * xmlParserMaxDepth:
174 *
175 * arbitrary depth limit for the XML documents that we allow to
176 * process. This is not a limitation of the parser but a safety
177 * boundary feature.
178 */
179unsigned int xmlParserMaxDepth = 1024;
180
181#define SAX2 1
182
183#define XML_PARSER_BIG_BUFFER_SIZE 300
184#define XML_PARSER_BUFFER_SIZE 100
185
186#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
187
188/*
189 * List of XML prefixed PI allowed by W3C specs
190 */
191
192static const char *xmlW3CPIs[] = {
193 "xml-stylesheet",
194 NULL
195};
196
197
198/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200 const xmlChar **str);
201
202static xmlParserErrors
203xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
204 xmlSAXHandlerPtr sax,
205 void *user_data, int depth, const xmlChar *URL,
206 const xmlChar *ID, xmlNodePtr *list);
207
208#ifdef LIBXML_LEGACY_ENABLED
209static void
210xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
211 xmlNodePtr lastNode);
212#endif /* LIBXML_LEGACY_ENABLED */
213
214static xmlParserErrors
215xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
216 const xmlChar *string, void *user_data, xmlNodePtr *lst);
217
218/************************************************************************
219 * *
220 * Some factorized error routines *
221 * *
222 ************************************************************************/
223
224/**
225 * xmlErrAttributeDup:
226 * @ctxt: an XML parser context
227 * @prefix: the attribute prefix
228 * @localname: the attribute localname
229 *
230 * Handle a redefinition of attribute error
231 */
232static void
233xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
234 const xmlChar * localname)
235{
236 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
237 (ctxt->instate == XML_PARSER_EOF))
238 return;
239 if (ctxt != NULL)
240 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
241 if (prefix == NULL)
242 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
243 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
244 (const char *) localname, NULL, NULL, 0, 0,
245 "Attribute %s redefined\n", localname);
246 else
247 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
248 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
249 (const char *) prefix, (const char *) localname,
250 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
251 localname);
252 if (ctxt != NULL) {
253 ctxt->wellFormed = 0;
254 if (ctxt->recovery == 0)
255 ctxt->disableSAX = 1;
256 }
257}
258
259/**
260 * xmlFatalErr:
261 * @ctxt: an XML parser context
262 * @error: the error number
263 * @extra: extra information string
264 *
265 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
266 */
267static void
268xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
269{
270 const char *errmsg;
271
272 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
273 (ctxt->instate == XML_PARSER_EOF))
274 return;
275 switch (error) {
276 case XML_ERR_INVALID_HEX_CHARREF:
277 errmsg = "CharRef: invalid hexadecimal value\n";
278 break;
279 case XML_ERR_INVALID_DEC_CHARREF:
280 errmsg = "CharRef: invalid decimal value\n";
281 break;
282 case XML_ERR_INVALID_CHARREF:
283 errmsg = "CharRef: invalid value\n";
284 break;
285 case XML_ERR_INTERNAL_ERROR:
286 errmsg = "internal error";
287 break;
288 case XML_ERR_PEREF_AT_EOF:
289 errmsg = "PEReference at end of document\n";
290 break;
291 case XML_ERR_PEREF_IN_PROLOG:
292 errmsg = "PEReference in prolog\n";
293 break;
294 case XML_ERR_PEREF_IN_EPILOG:
295 errmsg = "PEReference in epilog\n";
296 break;
297 case XML_ERR_PEREF_NO_NAME:
298 errmsg = "PEReference: no name\n";
299 break;
300 case XML_ERR_PEREF_SEMICOL_MISSING:
301 errmsg = "PEReference: expecting ';'\n";
302 break;
303 case XML_ERR_ENTITY_LOOP:
304 errmsg = "Detected an entity reference loop\n";
305 break;
306 case XML_ERR_ENTITY_NOT_STARTED:
307 errmsg = "EntityValue: \" or ' expected\n";
308 break;
309 case XML_ERR_ENTITY_PE_INTERNAL:
310 errmsg = "PEReferences forbidden in internal subset\n";
311 break;
312 case XML_ERR_ENTITY_NOT_FINISHED:
313 errmsg = "EntityValue: \" or ' expected\n";
314 break;
315 case XML_ERR_ATTRIBUTE_NOT_STARTED:
316 errmsg = "AttValue: \" or ' expected\n";
317 break;
318 case XML_ERR_LT_IN_ATTRIBUTE:
319 errmsg = "Unescaped '<' not allowed in attributes values\n";
320 break;
321 case XML_ERR_LITERAL_NOT_STARTED:
322 errmsg = "SystemLiteral \" or ' expected\n";
323 break;
324 case XML_ERR_LITERAL_NOT_FINISHED:
325 errmsg = "Unfinished System or Public ID \" or ' expected\n";
326 break;
327 case XML_ERR_MISPLACED_CDATA_END:
328 errmsg = "Sequence ']]>' not allowed in content\n";
329 break;
330 case XML_ERR_URI_REQUIRED:
331 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
332 break;
333 case XML_ERR_PUBID_REQUIRED:
334 errmsg = "PUBLIC, the Public Identifier is missing\n";
335 break;
336 case XML_ERR_HYPHEN_IN_COMMENT:
337 errmsg = "Comment must not contain '--' (double-hyphen)\n";
338 break;
339 case XML_ERR_PI_NOT_STARTED:
340 errmsg = "xmlParsePI : no target name\n";
341 break;
342 case XML_ERR_RESERVED_XML_NAME:
343 errmsg = "Invalid PI name\n";
344 break;
345 case XML_ERR_NOTATION_NOT_STARTED:
346 errmsg = "NOTATION: Name expected here\n";
347 break;
348 case XML_ERR_NOTATION_NOT_FINISHED:
349 errmsg = "'>' required to close NOTATION declaration\n";
350 break;
351 case XML_ERR_VALUE_REQUIRED:
352 errmsg = "Entity value required\n";
353 break;
354 case XML_ERR_URI_FRAGMENT:
355 errmsg = "Fragment not allowed";
356 break;
357 case XML_ERR_ATTLIST_NOT_STARTED:
358 errmsg = "'(' required to start ATTLIST enumeration\n";
359 break;
360 case XML_ERR_NMTOKEN_REQUIRED:
361 errmsg = "NmToken expected in ATTLIST enumeration\n";
362 break;
363 case XML_ERR_ATTLIST_NOT_FINISHED:
364 errmsg = "')' required to finish ATTLIST enumeration\n";
365 break;
366 case XML_ERR_MIXED_NOT_STARTED:
367 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
368 break;
369 case XML_ERR_PCDATA_REQUIRED:
370 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
371 break;
372 case XML_ERR_ELEMCONTENT_NOT_STARTED:
373 errmsg = "ContentDecl : Name or '(' expected\n";
374 break;
375 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
376 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
377 break;
378 case XML_ERR_PEREF_IN_INT_SUBSET:
379 errmsg =
380 "PEReference: forbidden within markup decl in internal subset\n";
381 break;
382 case XML_ERR_GT_REQUIRED:
383 errmsg = "expected '>'\n";
384 break;
385 case XML_ERR_CONDSEC_INVALID:
386 errmsg = "XML conditional section '[' expected\n";
387 break;
388 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
389 errmsg = "Content error in the external subset\n";
390 break;
391 case XML_ERR_CONDSEC_INVALID_KEYWORD:
392 errmsg =
393 "conditional section INCLUDE or IGNORE keyword expected\n";
394 break;
395 case XML_ERR_CONDSEC_NOT_FINISHED:
396 errmsg = "XML conditional section not closed\n";
397 break;
398 case XML_ERR_XMLDECL_NOT_STARTED:
399 errmsg = "Text declaration '<?xml' required\n";
400 break;
401 case XML_ERR_XMLDECL_NOT_FINISHED:
402 errmsg = "parsing XML declaration: '?>' expected\n";
403 break;
404 case XML_ERR_EXT_ENTITY_STANDALONE:
405 errmsg = "external parsed entities cannot be standalone\n";
406 break;
407 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
408 errmsg = "EntityRef: expecting ';'\n";
409 break;
410 case XML_ERR_DOCTYPE_NOT_FINISHED:
411 errmsg = "DOCTYPE improperly terminated\n";
412 break;
413 case XML_ERR_LTSLASH_REQUIRED:
414 errmsg = "EndTag: '</' not found\n";
415 break;
416 case XML_ERR_EQUAL_REQUIRED:
417 errmsg = "expected '='\n";
418 break;
419 case XML_ERR_STRING_NOT_CLOSED:
420 errmsg = "String not closed expecting \" or '\n";
421 break;
422 case XML_ERR_STRING_NOT_STARTED:
423 errmsg = "String not started expecting ' or \"\n";
424 break;
425 case XML_ERR_ENCODING_NAME:
426 errmsg = "Invalid XML encoding name\n";
427 break;
428 case XML_ERR_STANDALONE_VALUE:
429 errmsg = "standalone accepts only 'yes' or 'no'\n";
430 break;
431 case XML_ERR_DOCUMENT_EMPTY:
432 errmsg = "Document is empty\n";
433 break;
434 case XML_ERR_DOCUMENT_END:
435 errmsg = "Extra content at the end of the document\n";
436 break;
437 case XML_ERR_NOT_WELL_BALANCED:
438 errmsg = "chunk is not well balanced\n";
439 break;
440 case XML_ERR_EXTRA_CONTENT:
441 errmsg = "extra content at the end of well balanced chunk\n";
442 break;
443 case XML_ERR_VERSION_MISSING:
444 errmsg = "Malformed declaration expecting version\n";
445 break;
446#if 0
447 case:
448 errmsg = "\n";
449 break;
450#endif
451 default:
452 errmsg = "Unregistered error message\n";
453 }
454 if (ctxt != NULL)
455 ctxt->errNo = error;
456 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
457 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
458 info);
459 if (ctxt != NULL) {
460 ctxt->wellFormed = 0;
461 if (ctxt->recovery == 0)
462 ctxt->disableSAX = 1;
463 }
464}
465
466/**
467 * xmlFatalErrMsg:
468 * @ctxt: an XML parser context
469 * @error: the error number
470 * @msg: the error message
471 *
472 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
473 */
474static void
475xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
476 const char *msg)
477{
478 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
479 (ctxt->instate == XML_PARSER_EOF))
480 return;
481 if (ctxt != NULL)
482 ctxt->errNo = error;
483 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
484 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
485 if (ctxt != NULL) {
486 ctxt->wellFormed = 0;
487 if (ctxt->recovery == 0)
488 ctxt->disableSAX = 1;
489 }
490}
491
492/**
493 * xmlWarningMsg:
494 * @ctxt: an XML parser context
495 * @error: the error number
496 * @msg: the error message
497 * @str1: extra data
498 * @str2: extra data
499 *
500 * Handle a warning.
501 */
502static void
503xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
504 const char *msg, const xmlChar *str1, const xmlChar *str2)
505{
506 xmlStructuredErrorFunc schannel = NULL;
507
508 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
509 (ctxt->instate == XML_PARSER_EOF))
510 return;
511 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
512 (ctxt->sax->initialized == XML_SAX2_MAGIC))
513 schannel = ctxt->sax->serror;
514 __xmlRaiseError(schannel,
515 (ctxt->sax) ? ctxt->sax->warning : NULL,
516 ctxt->userData,
517 ctxt, NULL, XML_FROM_PARSER, error,
518 XML_ERR_WARNING, NULL, 0,
519 (const char *) str1, (const char *) str2, NULL, 0, 0,
520 msg, (const char *) str1, (const char *) str2);
521}
522
523/**
524 * xmlValidityError:
525 * @ctxt: an XML parser context
526 * @error: the error number
527 * @msg: the error message
528 * @str1: extra data
529 *
530 * Handle a validity error.
531 */
532static void
533xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
534 const char *msg, const xmlChar *str1)
535{
536 xmlStructuredErrorFunc schannel = NULL;
537
538 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
539 (ctxt->instate == XML_PARSER_EOF))
540 return;
541 if (ctxt != NULL) {
542 ctxt->errNo = error;
543 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
544 schannel = ctxt->sax->serror;
545 }
546 __xmlRaiseError(schannel,
547 ctxt->vctxt.error, ctxt->vctxt.userData,
548 ctxt, NULL, XML_FROM_DTD, error,
549 XML_ERR_ERROR, NULL, 0, (const char *) str1,
550 NULL, NULL, 0, 0,
551 msg, (const char *) str1);
552 if (ctxt != NULL) {
553 ctxt->valid = 0;
554 }
555}
556
557/**
558 * xmlFatalErrMsgInt:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: an integer value
563 *
564 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
565 */
566static void
567xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, int val)
569{
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
573 if (ctxt != NULL)
574 ctxt->errNo = error;
575 __xmlRaiseError(NULL, NULL, NULL,
576 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
577 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
578 if (ctxt != NULL) {
579 ctxt->wellFormed = 0;
580 if (ctxt->recovery == 0)
581 ctxt->disableSAX = 1;
582 }
583}
584
585/**
586 * xmlFatalErrMsgStrIntStr:
587 * @ctxt: an XML parser context
588 * @error: the error number
589 * @msg: the error message
590 * @str1: an string info
591 * @val: an integer value
592 * @str2: an string info
593 *
594 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
595 */
596static void
597xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
598 const char *msg, const xmlChar *str1, int val,
599 const xmlChar *str2)
600{
601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602 (ctxt->instate == XML_PARSER_EOF))
603 return;
604 if (ctxt != NULL)
605 ctxt->errNo = error;
606 __xmlRaiseError(NULL, NULL, NULL,
607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608 NULL, 0, (const char *) str1, (const char *) str2,
609 NULL, val, 0, msg, str1, val, str2);
610 if (ctxt != NULL) {
611 ctxt->wellFormed = 0;
612 if (ctxt->recovery == 0)
613 ctxt->disableSAX = 1;
614 }
615}
616
617/**
618 * xmlFatalErrMsgStr:
619 * @ctxt: an XML parser context
620 * @error: the error number
621 * @msg: the error message
622 * @val: a string value
623 *
624 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
625 */
626static void
627xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
628 const char *msg, const xmlChar * val)
629{
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
632 return;
633 if (ctxt != NULL)
634 ctxt->errNo = error;
635 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
636 XML_FROM_PARSER, error, XML_ERR_FATAL,
637 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
638 val);
639 if (ctxt != NULL) {
640 ctxt->wellFormed = 0;
641 if (ctxt->recovery == 0)
642 ctxt->disableSAX = 1;
643 }
644}
645
646/**
647 * xmlErrMsgStr:
648 * @ctxt: an XML parser context
649 * @error: the error number
650 * @msg: the error message
651 * @val: a string value
652 *
653 * Handle a non fatal parser error
654 */
655static void
656xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
657 const char *msg, const xmlChar * val)
658{
659 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
660 (ctxt->instate == XML_PARSER_EOF))
661 return;
662 if (ctxt != NULL)
663 ctxt->errNo = error;
664 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
665 XML_FROM_PARSER, error, XML_ERR_ERROR,
666 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
667 val);
668}
669
670/**
671 * xmlNsErr:
672 * @ctxt: an XML parser context
673 * @error: the error number
674 * @msg: the message
675 * @info1: extra information string
676 * @info2: extra information string
677 *
678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
679 */
680static void
681xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
682 const char *msg,
683 const xmlChar * info1, const xmlChar * info2,
684 const xmlChar * info3)
685{
686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
689 if (ctxt != NULL)
690 ctxt->errNo = error;
691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
692 XML_ERR_ERROR, NULL, 0, (const char *) info1,
693 (const char *) info2, (const char *) info3, 0, 0, msg,
694 info1, info2, info3);
695 if (ctxt != NULL)
696 ctxt->nsWellFormed = 0;
697}
698
699/************************************************************************
700 * *
701 * Library wide options *
702 * *
703 ************************************************************************/
704
705/**
706 * xmlHasFeature:
707 * @feature: the feature to be examined
708 *
709 * Examines if the library has been compiled with a given feature.
710 *
711 * Returns a non-zero value if the feature exist, otherwise zero.
712 * Returns zero (0) if the feature does not exist or an unknown
713 * unknown feature is requested, non-zero otherwise.
714 */
715int
716xmlHasFeature(xmlFeature feature)
717{
718 switch (feature) {
719 case XML_WITH_THREAD:
720#ifdef LIBXML_THREAD_ENABLED
721 return(1);
722#else
723 return(0);
724#endif
725 case XML_WITH_TREE:
726#ifdef LIBXML_TREE_ENABLED
727 return(1);
728#else
729 return(0);
730#endif
731 case XML_WITH_OUTPUT:
732#ifdef LIBXML_OUTPUT_ENABLED
733 return(1);
734#else
735 return(0);
736#endif
737 case XML_WITH_PUSH:
738#ifdef LIBXML_PUSH_ENABLED
739 return(1);
740#else
741 return(0);
742#endif
743 case XML_WITH_READER:
744#ifdef LIBXML_READER_ENABLED
745 return(1);
746#else
747 return(0);
748#endif
749 case XML_WITH_PATTERN:
750#ifdef LIBXML_PATTERN_ENABLED
751 return(1);
752#else
753 return(0);
754#endif
755 case XML_WITH_WRITER:
756#ifdef LIBXML_WRITER_ENABLED
757 return(1);
758#else
759 return(0);
760#endif
761 case XML_WITH_SAX1:
762#ifdef LIBXML_SAX1_ENABLED
763 return(1);
764#else
765 return(0);
766#endif
767 case XML_WITH_FTP:
768#ifdef LIBXML_FTP_ENABLED
769 return(1);
770#else
771 return(0);
772#endif
773 case XML_WITH_HTTP:
774#ifdef LIBXML_HTTP_ENABLED
775 return(1);
776#else
777 return(0);
778#endif
779 case XML_WITH_VALID:
780#ifdef LIBXML_VALID_ENABLED
781 return(1);
782#else
783 return(0);
784#endif
785 case XML_WITH_HTML:
786#ifdef LIBXML_HTML_ENABLED
787 return(1);
788#else
789 return(0);
790#endif
791 case XML_WITH_LEGACY:
792#ifdef LIBXML_LEGACY_ENABLED
793 return(1);
794#else
795 return(0);
796#endif
797 case XML_WITH_C14N:
798#ifdef LIBXML_C14N_ENABLED
799 return(1);
800#else
801 return(0);
802#endif
803 case XML_WITH_CATALOG:
804#ifdef LIBXML_CATALOG_ENABLED
805 return(1);
806#else
807 return(0);
808#endif
809 case XML_WITH_XPATH:
810#ifdef LIBXML_XPATH_ENABLED
811 return(1);
812#else
813 return(0);
814#endif
815 case XML_WITH_XPTR:
816#ifdef LIBXML_XPTR_ENABLED
817 return(1);
818#else
819 return(0);
820#endif
821 case XML_WITH_XINCLUDE:
822#ifdef LIBXML_XINCLUDE_ENABLED
823 return(1);
824#else
825 return(0);
826#endif
827 case XML_WITH_ICONV:
828#ifdef LIBXML_ICONV_ENABLED
829 return(1);
830#else
831 return(0);
832#endif
833 case XML_WITH_ISO8859X:
834#ifdef LIBXML_ISO8859X_ENABLED
835 return(1);
836#else
837 return(0);
838#endif
839 case XML_WITH_UNICODE:
840#ifdef LIBXML_UNICODE_ENABLED
841 return(1);
842#else
843 return(0);
844#endif
845 case XML_WITH_REGEXP:
846#ifdef LIBXML_REGEXP_ENABLED
847 return(1);
848#else
849 return(0);
850#endif
851 case XML_WITH_AUTOMATA:
852#ifdef LIBXML_AUTOMATA_ENABLED
853 return(1);
854#else
855 return(0);
856#endif
857 case XML_WITH_EXPR:
858#ifdef LIBXML_EXPR_ENABLED
859 return(1);
860#else
861 return(0);
862#endif
863 case XML_WITH_SCHEMAS:
864#ifdef LIBXML_SCHEMAS_ENABLED
865 return(1);
866#else
867 return(0);
868#endif
869 case XML_WITH_SCHEMATRON:
870#ifdef LIBXML_SCHEMATRON_ENABLED
871 return(1);
872#else
873 return(0);
874#endif
875 case XML_WITH_MODULES:
876#ifdef LIBXML_MODULES_ENABLED
877 return(1);
878#else
879 return(0);
880#endif
881 case XML_WITH_DEBUG:
882#ifdef LIBXML_DEBUG_ENABLED
883 return(1);
884#else
885 return(0);
886#endif
887 case XML_WITH_DEBUG_MEM:
888#ifdef DEBUG_MEMORY_LOCATION
889 return(1);
890#else
891 return(0);
892#endif
893 case XML_WITH_DEBUG_RUN:
894#ifdef LIBXML_DEBUG_RUNTIME
895 return(1);
896#else
897 return(0);
898#endif
899 case XML_WITH_ZLIB:
900#ifdef LIBXML_ZLIB_ENABLED
901 return(1);
902#else
903 return(0);
904#endif
905 default:
906 break;
907 }
908 return(0);
909}
910
911/************************************************************************
912 * *
913 * SAX2 defaulted attributes handling *
914 * *
915 ************************************************************************/
916
917/**
918 * xmlDetectSAX2:
919 * @ctxt: an XML parser context
920 *
921 * Do the SAX2 detection and specific intialization
922 */
923static void
924xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
925 if (ctxt == NULL) return;
926#ifdef LIBXML_SAX1_ENABLED
927 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
928 ((ctxt->sax->startElementNs != NULL) ||
929 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
930#else
931 ctxt->sax2 = 1;
932#endif /* LIBXML_SAX1_ENABLED */
933
934 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
935 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
936 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
937 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
938 (ctxt->str_xml_ns == NULL)) {
939 xmlErrMemory(ctxt, NULL);
940 }
941}
942
943typedef struct _xmlDefAttrs xmlDefAttrs;
944typedef xmlDefAttrs *xmlDefAttrsPtr;
945struct _xmlDefAttrs {
946 int nbAttrs; /* number of defaulted attributes on that element */
947 int maxAttrs; /* the size of the array */
948 const xmlChar *values[4]; /* array of localname/prefix/values */
949};
950
951/**
952 * xmlAddDefAttrs:
953 * @ctxt: an XML parser context
954 * @fullname: the element fullname
955 * @fullattr: the attribute fullname
956 * @value: the attribute value
957 *
958 * Add a defaulted attribute for an element
959 */
960static void
961xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
962 const xmlChar *fullname,
963 const xmlChar *fullattr,
964 const xmlChar *value) {
965 xmlDefAttrsPtr defaults;
966 int len;
967 const xmlChar *name;
968 const xmlChar *prefix;
969
970 if (ctxt->attsDefault == NULL) {
971 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
972 if (ctxt->attsDefault == NULL)
973 goto mem_error;
974 }
975
976 /*
977 * split the element name into prefix:localname , the string found
978 * are within the DTD and then not associated to namespace names.
979 */
980 name = xmlSplitQName3(fullname, &len);
981 if (name == NULL) {
982 name = xmlDictLookup(ctxt->dict, fullname, -1);
983 prefix = NULL;
984 } else {
985 name = xmlDictLookup(ctxt->dict, name, -1);
986 prefix = xmlDictLookup(ctxt->dict, fullname, len);
987 }
988
989 /*
990 * make sure there is some storage
991 */
992 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
993 if (defaults == NULL) {
994 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
995 (4 * 4) * sizeof(const xmlChar *));
996 if (defaults == NULL)
997 goto mem_error;
998 defaults->nbAttrs = 0;
999 defaults->maxAttrs = 4;
1000 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1001 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1002 xmlDefAttrsPtr temp;
1003
1004 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1005 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
1006 if (temp == NULL)
1007 goto mem_error;
1008 defaults = temp;
1009 defaults->maxAttrs *= 2;
1010 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
1011 }
1012
1013 /*
1014 * Split the element name into prefix:localname , the string found
1015 * are within the DTD and hen not associated to namespace names.
1016 */
1017 name = xmlSplitQName3(fullattr, &len);
1018 if (name == NULL) {
1019 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1020 prefix = NULL;
1021 } else {
1022 name = xmlDictLookup(ctxt->dict, name, -1);
1023 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1024 }
1025
1026 defaults->values[4 * defaults->nbAttrs] = name;
1027 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1028 /* intern the string and precompute the end */
1029 len = xmlStrlen(value);
1030 value = xmlDictLookup(ctxt->dict, value, len);
1031 defaults->values[4 * defaults->nbAttrs + 2] = value;
1032 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1033 defaults->nbAttrs++;
1034
1035 return;
1036
1037mem_error:
1038 xmlErrMemory(ctxt, NULL);
1039 return;
1040}
1041
1042/**
1043 * xmlAddSpecialAttr:
1044 * @ctxt: an XML parser context
1045 * @fullname: the element fullname
1046 * @fullattr: the attribute fullname
1047 * @type: the attribute type
1048 *
1049 * Register this attribute type
1050 */
1051static void
1052xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1053 const xmlChar *fullname,
1054 const xmlChar *fullattr,
1055 int type)
1056{
1057 if (ctxt->attsSpecial == NULL) {
1058 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1059 if (ctxt->attsSpecial == NULL)
1060 goto mem_error;
1061 }
1062
1063 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1064 return;
1065
1066 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1067 (void *) (long) type);
1068 return;
1069
1070mem_error:
1071 xmlErrMemory(ctxt, NULL);
1072 return;
1073}
1074
1075/**
1076 * xmlCleanSpecialAttrCallback:
1077 *
1078 * Removes CDATA attributes from the special attribute table
1079 */
1080static void
1081xmlCleanSpecialAttrCallback(void *payload, void *data,
1082 const xmlChar *fullname, const xmlChar *fullattr,
1083 const xmlChar *unused ATTRIBUTE_UNUSED) {
1084 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1085
1086 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1087 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1088 }
1089}
1090
1091/**
1092 * xmlCleanSpecialAttr:
1093 * @ctxt: an XML parser context
1094 *
1095 * Trim the list of attributes defined to remove all those of type
1096 * CDATA as they are not special. This call should be done when finishing
1097 * to parse the DTD and before starting to parse the document root.
1098 */
1099static void
1100xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1101{
1102 if (ctxt->attsSpecial == NULL)
1103 return;
1104
1105 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1106
1107 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1108 xmlHashFree(ctxt->attsSpecial, NULL);
1109 ctxt->attsSpecial = NULL;
1110 }
1111 return;
1112}
1113
1114/**
1115 * xmlCheckLanguageID:
1116 * @lang: pointer to the string value
1117 *
1118 * Checks that the value conforms to the LanguageID production:
1119 *
1120 * NOTE: this is somewhat deprecated, those productions were removed from
1121 * the XML Second edition.
1122 *
1123 * [33] LanguageID ::= Langcode ('-' Subcode)*
1124 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1125 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1126 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1127 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1128 * [38] Subcode ::= ([a-z] | [A-Z])+
1129 *
1130 * Returns 1 if correct 0 otherwise
1131 **/
1132int
1133xmlCheckLanguageID(const xmlChar * lang)
1134{
1135 const xmlChar *cur = lang;
1136
1137 if (cur == NULL)
1138 return (0);
1139 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1140 ((cur[0] == 'I') && (cur[1] == '-'))) {
1141 /*
1142 * IANA code
1143 */
1144 cur += 2;
1145 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1146 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1147 cur++;
1148 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1149 ((cur[0] == 'X') && (cur[1] == '-'))) {
1150 /*
1151 * User code
1152 */
1153 cur += 2;
1154 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1155 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1156 cur++;
1157 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1158 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1159 /*
1160 * ISO639
1161 */
1162 cur++;
1163 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1164 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1165 cur++;
1166 else
1167 return (0);
1168 } else
1169 return (0);
1170 while (cur[0] != 0) { /* non input consuming */
1171 if (cur[0] != '-')
1172 return (0);
1173 cur++;
1174 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1175 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1176 cur++;
1177 else
1178 return (0);
1179 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1180 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1181 cur++;
1182 }
1183 return (1);
1184}
1185
1186/************************************************************************
1187 * *
1188 * Parser stacks related functions and macros *
1189 * *
1190 ************************************************************************/
1191
1192xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1193 const xmlChar ** str);
1194
1195#ifdef SAX2
1196/**
1197 * nsPush:
1198 * @ctxt: an XML parser context
1199 * @prefix: the namespace prefix or NULL
1200 * @URL: the namespace name
1201 *
1202 * Pushes a new parser namespace on top of the ns stack
1203 *
1204 * Returns -1 in case of error, -2 if the namespace should be discarded
1205 * and the index in the stack otherwise.
1206 */
1207static int
1208nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1209{
1210 if (ctxt->options & XML_PARSE_NSCLEAN) {
1211 int i;
1212 for (i = 0;i < ctxt->nsNr;i += 2) {
1213 if (ctxt->nsTab[i] == prefix) {
1214 /* in scope */
1215 if (ctxt->nsTab[i + 1] == URL)
1216 return(-2);
1217 /* out of scope keep it */
1218 break;
1219 }
1220 }
1221 }
1222 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1223 ctxt->nsMax = 10;
1224 ctxt->nsNr = 0;
1225 ctxt->nsTab = (const xmlChar **)
1226 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1227 if (ctxt->nsTab == NULL) {
1228 xmlErrMemory(ctxt, NULL);
1229 ctxt->nsMax = 0;
1230 return (-1);
1231 }
1232 } else if (ctxt->nsNr >= ctxt->nsMax) {
1233 ctxt->nsMax *= 2;
1234 ctxt->nsTab = (const xmlChar **)
1235 xmlRealloc((char *) ctxt->nsTab,
1236 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1237 if (ctxt->nsTab == NULL) {
1238 xmlErrMemory(ctxt, NULL);
1239 ctxt->nsMax /= 2;
1240 return (-1);
1241 }
1242 }
1243 ctxt->nsTab[ctxt->nsNr++] = prefix;
1244 ctxt->nsTab[ctxt->nsNr++] = URL;
1245 return (ctxt->nsNr);
1246}
1247/**
1248 * nsPop:
1249 * @ctxt: an XML parser context
1250 * @nr: the number to pop
1251 *
1252 * Pops the top @nr parser prefix/namespace from the ns stack
1253 *
1254 * Returns the number of namespaces removed
1255 */
1256static int
1257nsPop(xmlParserCtxtPtr ctxt, int nr)
1258{
1259 int i;
1260
1261 if (ctxt->nsTab == NULL) return(0);
1262 if (ctxt->nsNr < nr) {
1263 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1264 nr = ctxt->nsNr;
1265 }
1266 if (ctxt->nsNr <= 0)
1267 return (0);
1268
1269 for (i = 0;i < nr;i++) {
1270 ctxt->nsNr--;
1271 ctxt->nsTab[ctxt->nsNr] = NULL;
1272 }
1273 return(nr);
1274}
1275#endif
1276
1277static int
1278xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1279 const xmlChar **atts;
1280 int *attallocs;
1281 int maxatts;
1282
1283 if (ctxt->atts == NULL) {
1284 maxatts = 55; /* allow for 10 attrs by default */
1285 atts = (const xmlChar **)
1286 xmlMalloc(maxatts * sizeof(xmlChar *));
1287 if (atts == NULL) goto mem_error;
1288 ctxt->atts = atts;
1289 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1290 if (attallocs == NULL) goto mem_error;
1291 ctxt->attallocs = attallocs;
1292 ctxt->maxatts = maxatts;
1293 } else if (nr + 5 > ctxt->maxatts) {
1294 maxatts = (nr + 5) * 2;
1295 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1296 maxatts * sizeof(const xmlChar *));
1297 if (atts == NULL) goto mem_error;
1298 ctxt->atts = atts;
1299 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1300 (maxatts / 5) * sizeof(int));
1301 if (attallocs == NULL) goto mem_error;
1302 ctxt->attallocs = attallocs;
1303 ctxt->maxatts = maxatts;
1304 }
1305 return(ctxt->maxatts);
1306mem_error:
1307 xmlErrMemory(ctxt, NULL);
1308 return(-1);
1309}
1310
1311/**
1312 * inputPush:
1313 * @ctxt: an XML parser context
1314 * @value: the parser input
1315 *
1316 * Pushes a new parser input on top of the input stack
1317 *
1318 * Returns 0 in case of error, the index in the stack otherwise
1319 */
1320int
1321inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1322{
1323 if ((ctxt == NULL) || (value == NULL))
1324 return(0);
1325 if (ctxt->inputNr >= ctxt->inputMax) {
1326 ctxt->inputMax *= 2;
1327 ctxt->inputTab =
1328 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1329 ctxt->inputMax *
1330 sizeof(ctxt->inputTab[0]));
1331 if (ctxt->inputTab == NULL) {
1332 xmlErrMemory(ctxt, NULL);
1333 return (0);
1334 }
1335 }
1336 ctxt->inputTab[ctxt->inputNr] = value;
1337 ctxt->input = value;
1338 return (ctxt->inputNr++);
1339}
1340/**
1341 * inputPop:
1342 * @ctxt: an XML parser context
1343 *
1344 * Pops the top parser input from the input stack
1345 *
1346 * Returns the input just removed
1347 */
1348xmlParserInputPtr
1349inputPop(xmlParserCtxtPtr ctxt)
1350{
1351 xmlParserInputPtr ret;
1352
1353 if (ctxt == NULL)
1354 return(NULL);
1355 if (ctxt->inputNr <= 0)
1356 return (NULL);
1357 ctxt->inputNr--;
1358 if (ctxt->inputNr > 0)
1359 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1360 else
1361 ctxt->input = NULL;
1362 ret = ctxt->inputTab[ctxt->inputNr];
1363 ctxt->inputTab[ctxt->inputNr] = NULL;
1364 return (ret);
1365}
1366/**
1367 * nodePush:
1368 * @ctxt: an XML parser context
1369 * @value: the element node
1370 *
1371 * Pushes a new element node on top of the node stack
1372 *
1373 * Returns 0 in case of error, the index in the stack otherwise
1374 */
1375int
1376nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1377{
1378 if (ctxt == NULL) return(0);
1379 if (ctxt->nodeNr >= ctxt->nodeMax) {
1380 xmlNodePtr *tmp;
1381
1382 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1383 ctxt->nodeMax * 2 *
1384 sizeof(ctxt->nodeTab[0]));
1385 if (tmp == NULL) {
1386 xmlErrMemory(ctxt, NULL);
1387 return (0);
1388 }
1389 ctxt->nodeTab = tmp;
1390 ctxt->nodeMax *= 2;
1391 }
1392 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1393 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1394 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1395 xmlParserMaxDepth);
1396 ctxt->instate = XML_PARSER_EOF;
1397 return(0);
1398 }
1399 ctxt->nodeTab[ctxt->nodeNr] = value;
1400 ctxt->node = value;
1401 return (ctxt->nodeNr++);
1402}
1403/**
1404 * nodePop:
1405 * @ctxt: an XML parser context
1406 *
1407 * Pops the top element node from the node stack
1408 *
1409 * Returns the node just removed
1410 */
1411xmlNodePtr
1412nodePop(xmlParserCtxtPtr ctxt)
1413{
1414 xmlNodePtr ret;
1415
1416 if (ctxt == NULL) return(NULL);
1417 if (ctxt->nodeNr <= 0)
1418 return (NULL);
1419 ctxt->nodeNr--;
1420 if (ctxt->nodeNr > 0)
1421 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1422 else
1423 ctxt->node = NULL;
1424 ret = ctxt->nodeTab[ctxt->nodeNr];
1425 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1426 return (ret);
1427}
1428
1429#ifdef LIBXML_PUSH_ENABLED
1430/**
1431 * nameNsPush:
1432 * @ctxt: an XML parser context
1433 * @value: the element name
1434 * @prefix: the element prefix
1435 * @URI: the element namespace name
1436 *
1437 * Pushes a new element name/prefix/URL on top of the name stack
1438 *
1439 * Returns -1 in case of error, the index in the stack otherwise
1440 */
1441static int
1442nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1443 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1444{
1445 if (ctxt->nameNr >= ctxt->nameMax) {
1446 const xmlChar * *tmp;
1447 void **tmp2;
1448 ctxt->nameMax *= 2;
1449 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1450 ctxt->nameMax *
1451 sizeof(ctxt->nameTab[0]));
1452 if (tmp == NULL) {
1453 ctxt->nameMax /= 2;
1454 goto mem_error;
1455 }
1456 ctxt->nameTab = tmp;
1457 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1458 ctxt->nameMax * 3 *
1459 sizeof(ctxt->pushTab[0]));
1460 if (tmp2 == NULL) {
1461 ctxt->nameMax /= 2;
1462 goto mem_error;
1463 }
1464 ctxt->pushTab = tmp2;
1465 }
1466 ctxt->nameTab[ctxt->nameNr] = value;
1467 ctxt->name = value;
1468 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1469 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1470 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1471 return (ctxt->nameNr++);
1472mem_error:
1473 xmlErrMemory(ctxt, NULL);
1474 return (-1);
1475}
1476/**
1477 * nameNsPop:
1478 * @ctxt: an XML parser context
1479 *
1480 * Pops the top element/prefix/URI name from the name stack
1481 *
1482 * Returns the name just removed
1483 */
1484static const xmlChar *
1485nameNsPop(xmlParserCtxtPtr ctxt)
1486{
1487 const xmlChar *ret;
1488
1489 if (ctxt->nameNr <= 0)
1490 return (NULL);
1491 ctxt->nameNr--;
1492 if (ctxt->nameNr > 0)
1493 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1494 else
1495 ctxt->name = NULL;
1496 ret = ctxt->nameTab[ctxt->nameNr];
1497 ctxt->nameTab[ctxt->nameNr] = NULL;
1498 return (ret);
1499}
1500#endif /* LIBXML_PUSH_ENABLED */
1501
1502/**
1503 * namePush:
1504 * @ctxt: an XML parser context
1505 * @value: the element name
1506 *
1507 * Pushes a new element name on top of the name stack
1508 *
1509 * Returns -1 in case of error, the index in the stack otherwise
1510 */
1511int
1512namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1513{
1514 if (ctxt == NULL) return (-1);
1515
1516 if (ctxt->nameNr >= ctxt->nameMax) {
1517 const xmlChar * *tmp;
1518 ctxt->nameMax *= 2;
1519 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1520 ctxt->nameMax *
1521 sizeof(ctxt->nameTab[0]));
1522 if (tmp == NULL) {
1523 ctxt->nameMax /= 2;
1524 goto mem_error;
1525 }
1526 ctxt->nameTab = tmp;
1527 }
1528 ctxt->nameTab[ctxt->nameNr] = value;
1529 ctxt->name = value;
1530 return (ctxt->nameNr++);
1531mem_error:
1532 xmlErrMemory(ctxt, NULL);
1533 return (-1);
1534}
1535/**
1536 * namePop:
1537 * @ctxt: an XML parser context
1538 *
1539 * Pops the top element name from the name stack
1540 *
1541 * Returns the name just removed
1542 */
1543const xmlChar *
1544namePop(xmlParserCtxtPtr ctxt)
1545{
1546 const xmlChar *ret;
1547
1548 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1549 return (NULL);
1550 ctxt->nameNr--;
1551 if (ctxt->nameNr > 0)
1552 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1553 else
1554 ctxt->name = NULL;
1555 ret = ctxt->nameTab[ctxt->nameNr];
1556 ctxt->nameTab[ctxt->nameNr] = NULL;
1557 return (ret);
1558}
1559
1560static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1561 if (ctxt->spaceNr >= ctxt->spaceMax) {
1562 ctxt->spaceMax *= 2;
1563 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1564 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1565 if (ctxt->spaceTab == NULL) {
1566 xmlErrMemory(ctxt, NULL);
1567 return(0);
1568 }
1569 }
1570 ctxt->spaceTab[ctxt->spaceNr] = val;
1571 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1572 return(ctxt->spaceNr++);
1573}
1574
1575static int spacePop(xmlParserCtxtPtr ctxt) {
1576 int ret;
1577 if (ctxt->spaceNr <= 0) return(0);
1578 ctxt->spaceNr--;
1579 if (ctxt->spaceNr > 0)
1580 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1581 else
1582 ctxt->space = &ctxt->spaceTab[0];
1583 ret = ctxt->spaceTab[ctxt->spaceNr];
1584 ctxt->spaceTab[ctxt->spaceNr] = -1;
1585 return(ret);
1586}
1587
1588/*
1589 * Macros for accessing the content. Those should be used only by the parser,
1590 * and not exported.
1591 *
1592 * Dirty macros, i.e. one often need to make assumption on the context to
1593 * use them
1594 *
1595 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1596 * To be used with extreme caution since operations consuming
1597 * characters may move the input buffer to a different location !
1598 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1599 * This should be used internally by the parser
1600 * only to compare to ASCII values otherwise it would break when
1601 * running with UTF-8 encoding.
1602 * RAW same as CUR but in the input buffer, bypass any token
1603 * extraction that may have been done
1604 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1605 * to compare on ASCII based substring.
1606 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1607 * strings without newlines within the parser.
1608 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1609 * defined char within the parser.
1610 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1611 *
1612 * NEXT Skip to the next character, this does the proper decoding
1613 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1614 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1615 * CUR_CHAR(l) returns the current unicode character (int), set l
1616 * to the number of xmlChars used for the encoding [0-5].
1617 * CUR_SCHAR same but operate on a string instead of the context
1618 * COPY_BUF copy the current unicode char to the target buffer, increment
1619 * the index
1620 * GROW, SHRINK handling of input buffers
1621 */
1622
1623#define RAW (*ctxt->input->cur)
1624#define CUR (*ctxt->input->cur)
1625#define NXT(val) ctxt->input->cur[(val)]
1626#define CUR_PTR ctxt->input->cur
1627
1628#define CMP4( s, c1, c2, c3, c4 ) \
1629 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1630 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1631#define CMP5( s, c1, c2, c3, c4, c5 ) \
1632 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1633#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1634 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1635#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1636 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1637#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1638 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1639#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1640 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1641 ((unsigned char *) s)[ 8 ] == c9 )
1642#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1643 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1644 ((unsigned char *) s)[ 9 ] == c10 )
1645
1646#define SKIP(val) do { \
1647 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1648 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1649 if ((*ctxt->input->cur == 0) && \
1650 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1651 xmlPopInput(ctxt); \
1652 } while (0)
1653
1654#define SKIPL(val) do { \
1655 int skipl; \
1656 for(skipl=0; skipl<val; skipl++) { \
1657 if (*(ctxt->input->cur) == '\n') { \
1658 ctxt->input->line++; ctxt->input->col = 1; \
1659 } else ctxt->input->col++; \
1660 ctxt->nbChars++; \
1661 ctxt->input->cur++; \
1662 } \
1663 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1664 if ((*ctxt->input->cur == 0) && \
1665 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1666 xmlPopInput(ctxt); \
1667 } while (0)
1668
1669#define SHRINK if ((ctxt->progressive == 0) && \
1670 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1671 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1672 xmlSHRINK (ctxt);
1673
1674static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1675 xmlParserInputShrink(ctxt->input);
1676 if ((*ctxt->input->cur == 0) &&
1677 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1678 xmlPopInput(ctxt);
1679 }
1680
1681#define GROW if ((ctxt->progressive == 0) && \
1682 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1683 xmlGROW (ctxt);
1684
1685static void xmlGROW (xmlParserCtxtPtr ctxt) {
1686 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1687 if ((*ctxt->input->cur == 0) &&
1688 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1689 xmlPopInput(ctxt);
1690}
1691
1692#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1693
1694#define NEXT xmlNextChar(ctxt)
1695
1696#define NEXT1 { \
1697 ctxt->input->col++; \
1698 ctxt->input->cur++; \
1699 ctxt->nbChars++; \
1700 if (*ctxt->input->cur == 0) \
1701 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1702 }
1703
1704#define NEXTL(l) do { \
1705 if (*(ctxt->input->cur) == '\n') { \
1706 ctxt->input->line++; ctxt->input->col = 1; \
1707 } else ctxt->input->col++; \
1708 ctxt->input->cur += l; \
1709 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1710 } while (0)
1711
1712#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1713#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1714
1715#define COPY_BUF(l,b,i,v) \
1716 if (l == 1) b[i++] = (xmlChar) v; \
1717 else i += xmlCopyCharMultiByte(&b[i],v)
1718
1719/**
1720 * xmlSkipBlankChars:
1721 * @ctxt: the XML parser context
1722 *
1723 * skip all blanks character found at that point in the input streams.
1724 * It pops up finished entities in the process if allowable at that point.
1725 *
1726 * Returns the number of space chars skipped
1727 */
1728
1729int
1730xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1731 int res = 0;
1732
1733 /*
1734 * It's Okay to use CUR/NEXT here since all the blanks are on
1735 * the ASCII range.
1736 */
1737 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1738 const xmlChar *cur;
1739 /*
1740 * if we are in the document content, go really fast
1741 */
1742 cur = ctxt->input->cur;
1743 while (IS_BLANK_CH(*cur)) {
1744 if (*cur == '\n') {
1745 ctxt->input->line++; ctxt->input->col = 1;
1746 }
1747 cur++;
1748 res++;
1749 if (*cur == 0) {
1750 ctxt->input->cur = cur;
1751 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1752 cur = ctxt->input->cur;
1753 }
1754 }
1755 ctxt->input->cur = cur;
1756 } else {
1757 int cur;
1758 do {
1759 cur = CUR;
1760 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1761 NEXT;
1762 cur = CUR;
1763 res++;
1764 }
1765 while ((cur == 0) && (ctxt->inputNr > 1) &&
1766 (ctxt->instate != XML_PARSER_COMMENT)) {
1767 xmlPopInput(ctxt);
1768 cur = CUR;
1769 }
1770 /*
1771 * Need to handle support of entities branching here
1772 */
1773 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1774 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1775 }
1776 return(res);
1777}
1778
1779/************************************************************************
1780 * *
1781 * Commodity functions to handle entities *
1782 * *
1783 ************************************************************************/
1784
1785/**
1786 * xmlPopInput:
1787 * @ctxt: an XML parser context
1788 *
1789 * xmlPopInput: the current input pointed by ctxt->input came to an end
1790 * pop it and return the next char.
1791 *
1792 * Returns the current xmlChar in the parser context
1793 */
1794xmlChar
1795xmlPopInput(xmlParserCtxtPtr ctxt) {
1796 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1797 if (xmlParserDebugEntities)
1798 xmlGenericError(xmlGenericErrorContext,
1799 "Popping input %d\n", ctxt->inputNr);
1800 xmlFreeInputStream(inputPop(ctxt));
1801 if ((*ctxt->input->cur == 0) &&
1802 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1803 return(xmlPopInput(ctxt));
1804 return(CUR);
1805}
1806
1807/**
1808 * xmlPushInput:
1809 * @ctxt: an XML parser context
1810 * @input: an XML parser input fragment (entity, XML fragment ...).
1811 *
1812 * xmlPushInput: switch to a new input stream which is stacked on top
1813 * of the previous one(s).
1814 */
1815void
1816xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1817 if (input == NULL) return;
1818
1819 if (xmlParserDebugEntities) {
1820 if ((ctxt->input != NULL) && (ctxt->input->filename))
1821 xmlGenericError(xmlGenericErrorContext,
1822 "%s(%d): ", ctxt->input->filename,
1823 ctxt->input->line);
1824 xmlGenericError(xmlGenericErrorContext,
1825 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1826 }
1827 inputPush(ctxt, input);
1828 GROW;
1829}
1830
1831/**
1832 * xmlParseCharRef:
1833 * @ctxt: an XML parser context
1834 *
1835 * parse Reference declarations
1836 *
1837 * [66] CharRef ::= '&#' [0-9]+ ';' |
1838 * '&#x' [0-9a-fA-F]+ ';'
1839 *
1840 * [ WFC: Legal Character ]
1841 * Characters referred to using character references must match the
1842 * production for Char.
1843 *
1844 * Returns the value parsed (as an int), 0 in case of error
1845 */
1846int
1847xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1848 unsigned int val = 0;
1849 int count = 0;
1850 unsigned int outofrange = 0;
1851
1852 /*
1853 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1854 */
1855 if ((RAW == '&') && (NXT(1) == '#') &&
1856 (NXT(2) == 'x')) {
1857 SKIP(3);
1858 GROW;
1859 while (RAW != ';') { /* loop blocked by count */
1860 if (count++ > 20) {
1861 count = 0;
1862 GROW;
1863 }
1864 if ((RAW >= '0') && (RAW <= '9'))
1865 val = val * 16 + (CUR - '0');
1866 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1867 val = val * 16 + (CUR - 'a') + 10;
1868 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1869 val = val * 16 + (CUR - 'A') + 10;
1870 else {
1871 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1872 val = 0;
1873 break;
1874 }
1875 if (val > 0x10FFFF)
1876 outofrange = val;
1877
1878 NEXT;
1879 count++;
1880 }
1881 if (RAW == ';') {
1882 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1883 ctxt->input->col++;
1884 ctxt->nbChars ++;
1885 ctxt->input->cur++;
1886 }
1887 } else if ((RAW == '&') && (NXT(1) == '#')) {
1888 SKIP(2);
1889 GROW;
1890 while (RAW != ';') { /* loop blocked by count */
1891 if (count++ > 20) {
1892 count = 0;
1893 GROW;
1894 }
1895 if ((RAW >= '0') && (RAW <= '9'))
1896 val = val * 10 + (CUR - '0');
1897 else {
1898 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1899 val = 0;
1900 break;
1901 }
1902 if (val > 0x10FFFF)
1903 outofrange = val;
1904
1905 NEXT;
1906 count++;
1907 }
1908 if (RAW == ';') {
1909 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1910 ctxt->input->col++;
1911 ctxt->nbChars ++;
1912 ctxt->input->cur++;
1913 }
1914 } else {
1915 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1916 }
1917
1918 /*
1919 * [ WFC: Legal Character ]
1920 * Characters referred to using character references must match the
1921 * production for Char.
1922 */
1923 if ((IS_CHAR(val) && (outofrange == 0))) {
1924 return(val);
1925 } else {
1926 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1927 "xmlParseCharRef: invalid xmlChar value %d\n",
1928 val);
1929 }
1930 return(0);
1931}
1932
1933/**
1934 * xmlParseStringCharRef:
1935 * @ctxt: an XML parser context
1936 * @str: a pointer to an index in the string
1937 *
1938 * parse Reference declarations, variant parsing from a string rather
1939 * than an an input flow.
1940 *
1941 * [66] CharRef ::= '&#' [0-9]+ ';' |
1942 * '&#x' [0-9a-fA-F]+ ';'
1943 *
1944 * [ WFC: Legal Character ]
1945 * Characters referred to using character references must match the
1946 * production for Char.
1947 *
1948 * Returns the value parsed (as an int), 0 in case of error, str will be
1949 * updated to the current value of the index
1950 */
1951static int
1952xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1953 const xmlChar *ptr;
1954 xmlChar cur;
1955 unsigned int val = 0;
1956 unsigned int outofrange = 0;
1957
1958 if ((str == NULL) || (*str == NULL)) return(0);
1959 ptr = *str;
1960 cur = *ptr;
1961 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1962 ptr += 3;
1963 cur = *ptr;
1964 while (cur != ';') { /* Non input consuming loop */
1965 if ((cur >= '0') && (cur <= '9'))
1966 val = val * 16 + (cur - '0');
1967 else if ((cur >= 'a') && (cur <= 'f'))
1968 val = val * 16 + (cur - 'a') + 10;
1969 else if ((cur >= 'A') && (cur <= 'F'))
1970 val = val * 16 + (cur - 'A') + 10;
1971 else {
1972 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1973 val = 0;
1974 break;
1975 }
1976 if (val > 0x10FFFF)
1977 outofrange = val;
1978
1979 ptr++;
1980 cur = *ptr;
1981 }
1982 if (cur == ';')
1983 ptr++;
1984 } else if ((cur == '&') && (ptr[1] == '#')){
1985 ptr += 2;
1986 cur = *ptr;
1987 while (cur != ';') { /* Non input consuming loops */
1988 if ((cur >= '0') && (cur <= '9'))
1989 val = val * 10 + (cur - '0');
1990 else {
1991 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1992 val = 0;
1993 break;
1994 }
1995 if (val > 0x10FFFF)
1996 outofrange = val;
1997
1998 ptr++;
1999 cur = *ptr;
2000 }
2001 if (cur == ';')
2002 ptr++;
2003 } else {
2004 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2005 return(0);
2006 }
2007 *str = ptr;
2008
2009 /*
2010 * [ WFC: Legal Character ]
2011 * Characters referred to using character references must match the
2012 * production for Char.
2013 */
2014 if ((IS_CHAR(val) && (outofrange == 0))) {
2015 return(val);
2016 } else {
2017 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2018 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2019 val);
2020 }
2021 return(0);
2022}
2023
2024/**
2025 * xmlNewBlanksWrapperInputStream:
2026 * @ctxt: an XML parser context
2027 * @entity: an Entity pointer
2028 *
2029 * Create a new input stream for wrapping
2030 * blanks around a PEReference
2031 *
2032 * Returns the new input stream or NULL
2033 */
2034
2035static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2036
2037static xmlParserInputPtr
2038xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2039 xmlParserInputPtr input;
2040 xmlChar *buffer;
2041 size_t length;
2042 if (entity == NULL) {
2043 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2044 "xmlNewBlanksWrapperInputStream entity\n");
2045 return(NULL);
2046 }
2047 if (xmlParserDebugEntities)
2048 xmlGenericError(xmlGenericErrorContext,
2049 "new blanks wrapper for entity: %s\n", entity->name);
2050 input = xmlNewInputStream(ctxt);
2051 if (input == NULL) {
2052 return(NULL);
2053 }
2054 length = xmlStrlen(entity->name) + 5;
2055 buffer = xmlMallocAtomic(length);
2056 if (buffer == NULL) {
2057 xmlErrMemory(ctxt, NULL);
2058 return(NULL);
2059 }
2060 buffer [0] = ' ';
2061 buffer [1] = '%';
2062 buffer [length-3] = ';';
2063 buffer [length-2] = ' ';
2064 buffer [length-1] = 0;
2065 memcpy(buffer + 2, entity->name, length - 5);
2066 input->free = deallocblankswrapper;
2067 input->base = buffer;
2068 input->cur = buffer;
2069 input->length = length;
2070 input->end = &buffer[length];
2071 return(input);
2072}
2073
2074/**
2075 * xmlParserHandlePEReference:
2076 * @ctxt: the parser context
2077 *
2078 * [69] PEReference ::= '%' Name ';'
2079 *
2080 * [ WFC: No Recursion ]
2081 * A parsed entity must not contain a recursive
2082 * reference to itself, either directly or indirectly.
2083 *
2084 * [ WFC: Entity Declared ]
2085 * In a document without any DTD, a document with only an internal DTD
2086 * subset which contains no parameter entity references, or a document
2087 * with "standalone='yes'", ... ... The declaration of a parameter
2088 * entity must precede any reference to it...
2089 *
2090 * [ VC: Entity Declared ]
2091 * In a document with an external subset or external parameter entities
2092 * with "standalone='no'", ... ... The declaration of a parameter entity
2093 * must precede any reference to it...
2094 *
2095 * [ WFC: In DTD ]
2096 * Parameter-entity references may only appear in the DTD.
2097 * NOTE: misleading but this is handled.
2098 *
2099 * A PEReference may have been detected in the current input stream
2100 * the handling is done accordingly to
2101 * http://www.w3.org/TR/REC-xml#entproc
2102 * i.e.
2103 * - Included in literal in entity values
2104 * - Included as Parameter Entity reference within DTDs
2105 */
2106void
2107xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2108 const xmlChar *name;
2109 xmlEntityPtr entity = NULL;
2110 xmlParserInputPtr input;
2111
2112 if (RAW != '%') return;
2113 switch(ctxt->instate) {
2114 case XML_PARSER_CDATA_SECTION:
2115 return;
2116 case XML_PARSER_COMMENT:
2117 return;
2118 case XML_PARSER_START_TAG:
2119 return;
2120 case XML_PARSER_END_TAG:
2121 return;
2122 case XML_PARSER_EOF:
2123 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2124 return;
2125 case XML_PARSER_PROLOG:
2126 case XML_PARSER_START:
2127 case XML_PARSER_MISC:
2128 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2129 return;
2130 case XML_PARSER_ENTITY_DECL:
2131 case XML_PARSER_CONTENT:
2132 case XML_PARSER_ATTRIBUTE_VALUE:
2133 case XML_PARSER_PI:
2134 case XML_PARSER_SYSTEM_LITERAL:
2135 case XML_PARSER_PUBLIC_LITERAL:
2136 /* we just ignore it there */
2137 return;
2138 case XML_PARSER_EPILOG:
2139 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2140 return;
2141 case XML_PARSER_ENTITY_VALUE:
2142 /*
2143 * NOTE: in the case of entity values, we don't do the
2144 * substitution here since we need the literal
2145 * entity value to be able to save the internal
2146 * subset of the document.
2147 * This will be handled by xmlStringDecodeEntities
2148 */
2149 return;
2150 case XML_PARSER_DTD:
2151 /*
2152 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2153 * In the internal DTD subset, parameter-entity references
2154 * can occur only where markup declarations can occur, not
2155 * within markup declarations.
2156 * In that case this is handled in xmlParseMarkupDecl
2157 */
2158 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2159 return;
2160 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2161 return;
2162 break;
2163 case XML_PARSER_IGNORE:
2164 return;
2165 }
2166
2167 NEXT;
2168 name = xmlParseName(ctxt);
2169 if (xmlParserDebugEntities)
2170 xmlGenericError(xmlGenericErrorContext,
2171 "PEReference: %s\n", name);
2172 if (name == NULL) {
2173 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2174 } else {
2175 if (RAW == ';') {
2176 NEXT;
2177 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2178 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2179 if (entity == NULL) {
2180
2181 /*
2182 * [ WFC: Entity Declared ]
2183 * In a document without any DTD, a document with only an
2184 * internal DTD subset which contains no parameter entity
2185 * references, or a document with "standalone='yes'", ...
2186 * ... The declaration of a parameter entity must precede
2187 * any reference to it...
2188 */
2189 if ((ctxt->standalone == 1) ||
2190 ((ctxt->hasExternalSubset == 0) &&
2191 (ctxt->hasPErefs == 0))) {
2192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2193 "PEReference: %%%s; not found\n", name);
2194 } else {
2195 /*
2196 * [ VC: Entity Declared ]
2197 * In a document with an external subset or external
2198 * parameter entities with "standalone='no'", ...
2199 * ... The declaration of a parameter entity must precede
2200 * any reference to it...
2201 */
2202 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2203 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2204 "PEReference: %%%s; not found\n",
2205 name);
2206 } else
2207 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2208 "PEReference: %%%s; not found\n",
2209 name, NULL);
2210 ctxt->valid = 0;
2211 }
2212 } else if (ctxt->input->free != deallocblankswrapper) {
2213 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2214 xmlPushInput(ctxt, input);
2215 } else {
2216 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2217 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2218 xmlChar start[4];
2219 xmlCharEncoding enc;
2220
2221 /*
2222 * handle the extra spaces added before and after
2223 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2224 * this is done independently.
2225 */
2226 input = xmlNewEntityInputStream(ctxt, entity);
2227 xmlPushInput(ctxt, input);
2228
2229 /*
2230 * Get the 4 first bytes and decode the charset
2231 * if enc != XML_CHAR_ENCODING_NONE
2232 * plug some encoding conversion routines.
2233 * Note that, since we may have some non-UTF8
2234 * encoding (like UTF16, bug 135229), the 'length'
2235 * is not known, but we can calculate based upon
2236 * the amount of data in the buffer.
2237 */
2238 GROW
2239 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2240 start[0] = RAW;
2241 start[1] = NXT(1);
2242 start[2] = NXT(2);
2243 start[3] = NXT(3);
2244 enc = xmlDetectCharEncoding(start, 4);
2245 if (enc != XML_CHAR_ENCODING_NONE) {
2246 xmlSwitchEncoding(ctxt, enc);
2247 }
2248 }
2249
2250 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2251 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2252 (IS_BLANK_CH(NXT(5)))) {
2253 xmlParseTextDecl(ctxt);
2254 }
2255 } else {
2256 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2257 "PEReference: %s is not a parameter entity\n",
2258 name);
2259 }
2260 }
2261 } else {
2262 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2263 }
2264 }
2265}
2266
2267/*
2268 * Macro used to grow the current buffer.
2269 */
2270#define growBuffer(buffer, n) { \
2271 xmlChar *tmp; \
2272 buffer##_size *= 2; \
2273 buffer##_size += n; \
2274 tmp = (xmlChar *) \
2275 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2276 if (tmp == NULL) goto mem_error; \
2277 buffer = tmp; \
2278}
2279
2280/**
2281 * xmlStringLenDecodeEntities:
2282 * @ctxt: the parser context
2283 * @str: the input string
2284 * @len: the string length
2285 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2286 * @end: an end marker xmlChar, 0 if none
2287 * @end2: an end marker xmlChar, 0 if none
2288 * @end3: an end marker xmlChar, 0 if none
2289 *
2290 * Takes a entity string content and process to do the adequate substitutions.
2291 *
2292 * [67] Reference ::= EntityRef | CharRef
2293 *
2294 * [69] PEReference ::= '%' Name ';'
2295 *
2296 * Returns A newly allocated string with the substitution done. The caller
2297 * must deallocate it !
2298 */
2299xmlChar *
2300xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2301 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2302 xmlChar *buffer = NULL;
2303 int buffer_size = 0;
2304
2305 xmlChar *current = NULL;
2306 const xmlChar *last;
2307 xmlEntityPtr ent;
2308 int c,l;
2309 int nbchars = 0;
2310
2311 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2312 return(NULL);
2313 last = str + len;
2314
2315 if (ctxt->depth > 40) {
2316 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2317 return(NULL);
2318 }
2319
2320 /*
2321 * allocate a translation buffer.
2322 */
2323 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2324 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2325 if (buffer == NULL) goto mem_error;
2326
2327 /*
2328 * OK loop until we reach one of the ending char or a size limit.
2329 * we are operating on already parsed values.
2330 */
2331 if (str < last)
2332 c = CUR_SCHAR(str, l);
2333 else
2334 c = 0;
2335 while ((c != 0) && (c != end) && /* non input consuming loop */
2336 (c != end2) && (c != end3)) {
2337
2338 if (c == 0) break;
2339 if ((c == '&') && (str[1] == '#')) {
2340 int val = xmlParseStringCharRef(ctxt, &str);
2341 if (val != 0) {
2342 COPY_BUF(0,buffer,nbchars,val);
2343 }
2344 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2345 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2346 }
2347 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2348 if (xmlParserDebugEntities)
2349 xmlGenericError(xmlGenericErrorContext,
2350 "String decoding Entity Reference: %.30s\n",
2351 str);
2352 ent = xmlParseStringEntityRef(ctxt, &str);
2353 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2354 goto int_error;
2355 if (ent != NULL)
2356 ctxt->nbentities += ent->owner;
2357 if ((ent != NULL) &&
2358 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2359 if (ent->content != NULL) {
2360 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2361 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2362 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2363 }
2364 } else {
2365 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2366 "predefined entity has no content\n");
2367 }
2368 } else if ((ent != NULL) && (ent->content != NULL)) {
2369 xmlChar *rep;
2370
2371 ctxt->depth++;
2372 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2373 0, 0, 0);
2374 ctxt->depth--;
2375 if (rep != NULL) {
2376 current = rep;
2377 while (*current != 0) { /* non input consuming loop */
2378 buffer[nbchars++] = *current++;
2379 if (nbchars >
2380 buffer_size - XML_PARSER_BUFFER_SIZE) {
2381 if (xmlParserEntityCheck(ctxt, nbchars, ent)) {
2382 xmlFree(rep);
2383 goto int_error;
2384 }
2385 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2386 }
2387 }
2388 xmlFree(rep);
2389 }
2390 } else if (ent != NULL) {
2391 int i = xmlStrlen(ent->name);
2392 const xmlChar *cur = ent->name;
2393
2394 buffer[nbchars++] = '&';
2395 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2396 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2397 }
2398 for (;i > 0;i--)
2399 buffer[nbchars++] = *cur++;
2400 buffer[nbchars++] = ';';
2401 }
2402 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2403 if (xmlParserDebugEntities)
2404 xmlGenericError(xmlGenericErrorContext,
2405 "String decoding PE Reference: %.30s\n", str);
2406 ent = xmlParseStringPEReference(ctxt, &str);
2407 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2408 goto int_error;
2409 if (ent != NULL)
2410 ctxt->nbentities += ent->owner;
2411 if (ent != NULL) {
2412 xmlChar *rep;
2413
2414 ctxt->depth++;
2415 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2416 0, 0, 0);
2417 ctxt->depth--;
2418 if (rep != NULL) {
2419 current = rep;
2420 while (*current != 0) { /* non input consuming loop */
2421 buffer[nbchars++] = *current++;
2422 if (nbchars >
2423 buffer_size - XML_PARSER_BUFFER_SIZE) {
2424 if (xmlParserEntityCheck(ctxt, nbchars, ent)) {
2425 xmlFree(rep);
2426 goto int_error;
2427 }
2428 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2429 }
2430 }
2431 xmlFree(rep);
2432 }
2433 }
2434 } else {
2435 COPY_BUF(l,buffer,nbchars,c);
2436 str += l;
2437 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2438 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2439 }
2440 }
2441 if (str < last)
2442 c = CUR_SCHAR(str, l);
2443 else
2444 c = 0;
2445 }
2446 buffer[nbchars++] = 0;
2447 return(buffer);
2448
2449mem_error:
2450 xmlErrMemory(ctxt, NULL);
2451int_error:
2452 if (buffer != NULL)
2453 xmlFree(buffer);
2454 return(NULL);
2455}
2456
2457/**
2458 * xmlStringDecodeEntities:
2459 * @ctxt: the parser context
2460 * @str: the input string
2461 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2462 * @end: an end marker xmlChar, 0 if none
2463 * @end2: an end marker xmlChar, 0 if none
2464 * @end3: an end marker xmlChar, 0 if none
2465 *
2466 * Takes a entity string content and process to do the adequate substitutions.
2467 *
2468 * [67] Reference ::= EntityRef | CharRef
2469 *
2470 * [69] PEReference ::= '%' Name ';'
2471 *
2472 * Returns A newly allocated string with the substitution done. The caller
2473 * must deallocate it !
2474 */
2475xmlChar *
2476xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2477 xmlChar end, xmlChar end2, xmlChar end3) {
2478 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2479 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2480 end, end2, end3));
2481}
2482
2483/************************************************************************
2484 * *
2485 * Commodity functions, cleanup needed ? *
2486 * *
2487 ************************************************************************/
2488
2489/**
2490 * areBlanks:
2491 * @ctxt: an XML parser context
2492 * @str: a xmlChar *
2493 * @len: the size of @str
2494 * @blank_chars: we know the chars are blanks
2495 *
2496 * Is this a sequence of blank chars that one can ignore ?
2497 *
2498 * Returns 1 if ignorable 0 otherwise.
2499 */
2500
2501static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2502 int blank_chars) {
2503 int i, ret;
2504 xmlNodePtr lastChild;
2505
2506 /*
2507 * Don't spend time trying to differentiate them, the same callback is
2508 * used !
2509 */
2510 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2511 return(0);
2512
2513 /*
2514 * Check for xml:space value.
2515 */
2516 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2517 (*(ctxt->space) == -2))
2518 return(0);
2519
2520 /*
2521 * Check that the string is made of blanks
2522 */
2523 if (blank_chars == 0) {
2524 for (i = 0;i < len;i++)
2525 if (!(IS_BLANK_CH(str[i]))) return(0);
2526 }
2527
2528 /*
2529 * Look if the element is mixed content in the DTD if available
2530 */
2531 if (ctxt->node == NULL) return(0);
2532 if (ctxt->myDoc != NULL) {
2533 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2534 if (ret == 0) return(1);
2535 if (ret == 1) return(0);
2536 }
2537
2538 /*
2539 * Otherwise, heuristic :-\
2540 */
2541 if ((RAW != '<') && (RAW != 0xD)) return(0);
2542 if ((ctxt->node->children == NULL) &&
2543 (RAW == '<') && (NXT(1) == '/')) return(0);
2544
2545 lastChild = xmlGetLastChild(ctxt->node);
2546 if (lastChild == NULL) {
2547 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2548 (ctxt->node->content != NULL)) return(0);
2549 } else if (xmlNodeIsText(lastChild))
2550 return(0);
2551 else if ((ctxt->node->children != NULL) &&
2552 (xmlNodeIsText(ctxt->node->children)))
2553 return(0);
2554 return(1);
2555}
2556
2557/************************************************************************
2558 * *
2559 * Extra stuff for namespace support *
2560 * Relates to http://www.w3.org/TR/WD-xml-names *
2561 * *
2562 ************************************************************************/
2563
2564/**
2565 * xmlSplitQName:
2566 * @ctxt: an XML parser context
2567 * @name: an XML parser context
2568 * @prefix: a xmlChar **
2569 *
2570 * parse an UTF8 encoded XML qualified name string
2571 *
2572 * [NS 5] QName ::= (Prefix ':')? LocalPart
2573 *
2574 * [NS 6] Prefix ::= NCName
2575 *
2576 * [NS 7] LocalPart ::= NCName
2577 *
2578 * Returns the local part, and prefix is updated
2579 * to get the Prefix if any.
2580 */
2581
2582xmlChar *
2583xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2584 xmlChar buf[XML_MAX_NAMELEN + 5];
2585 xmlChar *buffer = NULL;
2586 int len = 0;
2587 int max = XML_MAX_NAMELEN;
2588 xmlChar *ret = NULL;
2589 const xmlChar *cur = name;
2590 int c;
2591
2592 if (prefix == NULL) return(NULL);
2593 *prefix = NULL;
2594
2595 if (cur == NULL) return(NULL);
2596
2597#ifndef XML_XML_NAMESPACE
2598 /* xml: prefix is not really a namespace */
2599 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2600 (cur[2] == 'l') && (cur[3] == ':'))
2601 return(xmlStrdup(name));
2602#endif
2603
2604 /* nasty but well=formed */
2605 if (cur[0] == ':')
2606 return(xmlStrdup(name));
2607
2608 c = *cur++;
2609 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2610 buf[len++] = c;
2611 c = *cur++;
2612 }
2613 if (len >= max) {
2614 /*
2615 * Okay someone managed to make a huge name, so he's ready to pay
2616 * for the processing speed.
2617 */
2618 max = len * 2;
2619
2620 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2621 if (buffer == NULL) {
2622 xmlErrMemory(ctxt, NULL);
2623 return(NULL);
2624 }
2625 memcpy(buffer, buf, len);
2626 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2627 if (len + 10 > max) {
2628 xmlChar *tmp;
2629
2630 max *= 2;
2631 tmp = (xmlChar *) xmlRealloc(buffer,
2632 max * sizeof(xmlChar));
2633 if (tmp == NULL) {
2634 xmlFree(tmp);
2635 xmlErrMemory(ctxt, NULL);
2636 return(NULL);
2637 }
2638 buffer = tmp;
2639 }
2640 buffer[len++] = c;
2641 c = *cur++;
2642 }
2643 buffer[len] = 0;
2644 }
2645
2646 if ((c == ':') && (*cur == 0)) {
2647 if (buffer != NULL)
2648 xmlFree(buffer);
2649 *prefix = NULL;
2650 return(xmlStrdup(name));
2651 }
2652
2653 if (buffer == NULL)
2654 ret = xmlStrndup(buf, len);
2655 else {
2656 ret = buffer;
2657 buffer = NULL;
2658 max = XML_MAX_NAMELEN;
2659 }
2660
2661
2662 if (c == ':') {
2663 c = *cur;
2664 *prefix = ret;
2665 if (c == 0) {
2666 return(xmlStrndup(BAD_CAST "", 0));
2667 }
2668 len = 0;
2669
2670 /*
2671 * Check that the first character is proper to start
2672 * a new name
2673 */
2674 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2675 ((c >= 0x41) && (c <= 0x5A)) ||
2676 (c == '_') || (c == ':'))) {
2677 int l;
2678 int first = CUR_SCHAR(cur, l);
2679
2680 if (!IS_LETTER(first) && (first != '_')) {
2681 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2682 "Name %s is not XML Namespace compliant\n",
2683 name);
2684 }
2685 }
2686 cur++;
2687
2688 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2689 buf[len++] = c;
2690 c = *cur++;
2691 }
2692 if (len >= max) {
2693 /*
2694 * Okay someone managed to make a huge name, so he's ready to pay
2695 * for the processing speed.
2696 */
2697 max = len * 2;
2698
2699 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2700 if (buffer == NULL) {
2701 xmlErrMemory(ctxt, NULL);
2702 return(NULL);
2703 }
2704 memcpy(buffer, buf, len);
2705 while (c != 0) { /* tested bigname2.xml */
2706 if (len + 10 > max) {
2707 xmlChar *tmp;
2708
2709 max *= 2;
2710 tmp = (xmlChar *) xmlRealloc(buffer,
2711 max * sizeof(xmlChar));
2712 if (tmp == NULL) {
2713 xmlErrMemory(ctxt, NULL);
2714 xmlFree(buffer);
2715 return(NULL);
2716 }
2717 buffer = tmp;
2718 }
2719 buffer[len++] = c;
2720 c = *cur++;
2721 }
2722 buffer[len] = 0;
2723 }
2724
2725 if (buffer == NULL)
2726 ret = xmlStrndup(buf, len);
2727 else {
2728 ret = buffer;
2729 }
2730 }
2731
2732 return(ret);
2733}
2734
2735/************************************************************************
2736 * *
2737 * The parser itself *
2738 * Relates to http://www.w3.org/TR/REC-xml *
2739 * *
2740 ************************************************************************/
2741
2742static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2743static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2744 int *len, int *alloc, int normalize);
2745
2746/**
2747 * xmlParseName:
2748 * @ctxt: an XML parser context
2749 *
2750 * parse an XML name.
2751 *
2752 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2753 * CombiningChar | Extender
2754 *
2755 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2756 *
2757 * [6] Names ::= Name (#x20 Name)*
2758 *
2759 * Returns the Name parsed or NULL
2760 */
2761
2762const xmlChar *
2763xmlParseName(xmlParserCtxtPtr ctxt) {
2764 const xmlChar *in;
2765 const xmlChar *ret;
2766 int count = 0;
2767
2768 GROW;
2769
2770 /*
2771 * Accelerator for simple ASCII names
2772 */
2773 in = ctxt->input->cur;
2774 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2775 ((*in >= 0x41) && (*in <= 0x5A)) ||
2776 (*in == '_') || (*in == ':')) {
2777 in++;
2778 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2779 ((*in >= 0x41) && (*in <= 0x5A)) ||
2780 ((*in >= 0x30) && (*in <= 0x39)) ||
2781 (*in == '_') || (*in == '-') ||
2782 (*in == ':') || (*in == '.'))
2783 in++;
2784 if ((*in > 0) && (*in < 0x80)) {
2785 count = in - ctxt->input->cur;
2786 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2787 ctxt->input->cur = in;
2788 ctxt->nbChars += count;
2789 ctxt->input->col += count;
2790 if (ret == NULL)
2791 xmlErrMemory(ctxt, NULL);
2792 return(ret);
2793 }
2794 }
2795 return(xmlParseNameComplex(ctxt));
2796}
2797
2798/**
2799 * xmlParseNameAndCompare:
2800 * @ctxt: an XML parser context
2801 *
2802 * parse an XML name and compares for match
2803 * (specialized for endtag parsing)
2804 *
2805 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2806 * and the name for mismatch
2807 */
2808
2809static const xmlChar *
2810xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2811 register const xmlChar *cmp = other;
2812 register const xmlChar *in;
2813 const xmlChar *ret;
2814
2815 GROW;
2816
2817 in = ctxt->input->cur;
2818 while (*in != 0 && *in == *cmp) {
2819 ++in;
2820 ++cmp;
2821 ctxt->input->col++;
2822 }
2823 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2824 /* success */
2825 ctxt->input->cur = in;
2826 return (const xmlChar*) 1;
2827 }
2828 /* failure (or end of input buffer), check with full function */
2829 ret = xmlParseName (ctxt);
2830 /* strings coming from the dictionnary direct compare possible */
2831 if (ret == other) {
2832 return (const xmlChar*) 1;
2833 }
2834 return ret;
2835}
2836
2837static const xmlChar *
2838xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2839 int len = 0, l;
2840 int c;
2841 int count = 0;
2842
2843 /*
2844 * Handler for more complex cases
2845 */
2846 GROW;
2847 c = CUR_CHAR(l);
2848 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2849 (!IS_LETTER(c) && (c != '_') &&
2850 (c != ':'))) {
2851 return(NULL);
2852 }
2853
2854 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2855 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
2858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c)))) {
2860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 len += l;
2865 NEXTL(l);
2866 c = CUR_CHAR(l);
2867 }
2868 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2869 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2870 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2871}
2872
2873/**
2874 * xmlParseStringName:
2875 * @ctxt: an XML parser context
2876 * @str: a pointer to the string pointer (IN/OUT)
2877 *
2878 * parse an XML name.
2879 *
2880 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2881 * CombiningChar | Extender
2882 *
2883 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2884 *
2885 * [6] Names ::= Name (#x20 Name)*
2886 *
2887 * Returns the Name parsed or NULL. The @str pointer
2888 * is updated to the current location in the string.
2889 */
2890
2891static xmlChar *
2892xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 const xmlChar *cur = *str;
2895 int len = 0, l;
2896 int c;
2897
2898 c = CUR_SCHAR(cur, l);
2899 if (!IS_LETTER(c) && (c != '_') &&
2900 (c != ':')) {
2901 return(NULL);
2902 }
2903
2904 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2905 (c == '.') || (c == '-') ||
2906 (c == '_') || (c == ':') ||
2907 (IS_COMBINING(c)) ||
2908 (IS_EXTENDER(c))) {
2909 COPY_BUF(l,buf,len,c);
2910 cur += l;
2911 c = CUR_SCHAR(cur, l);
2912 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2913 /*
2914 * Okay someone managed to make a huge name, so he's ready to pay
2915 * for the processing speed.
2916 */
2917 xmlChar *buffer;
2918 int max = len * 2;
2919
2920 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2921 if (buffer == NULL) {
2922 xmlErrMemory(ctxt, NULL);
2923 return(NULL);
2924 }
2925 memcpy(buffer, buf, len);
2926 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2927 /* test bigentname.xml */
2928 (c == '.') || (c == '-') ||
2929 (c == '_') || (c == ':') ||
2930 (IS_COMBINING(c)) ||
2931 (IS_EXTENDER(c))) {
2932 if (len + 10 > max) {
2933 xmlChar *tmp;
2934 max *= 2;
2935 tmp = (xmlChar *) xmlRealloc(buffer,
2936 max * sizeof(xmlChar));
2937 if (tmp == NULL) {
2938 xmlErrMemory(ctxt, NULL);
2939 xmlFree(buffer);
2940 return(NULL);
2941 }
2942 buffer = tmp;
2943 }
2944 COPY_BUF(l,buffer,len,c);
2945 cur += l;
2946 c = CUR_SCHAR(cur, l);
2947 }
2948 buffer[len] = 0;
2949 *str = cur;
2950 return(buffer);
2951 }
2952 }
2953 *str = cur;
2954 return(xmlStrndup(buf, len));
2955}
2956
2957/**
2958 * xmlParseNmtoken:
2959 * @ctxt: an XML parser context
2960 *
2961 * parse an XML Nmtoken.
2962 *
2963 * [7] Nmtoken ::= (NameChar)+
2964 *
2965 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2966 *
2967 * Returns the Nmtoken parsed or NULL
2968 */
2969
2970xmlChar *
2971xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2972 xmlChar buf[XML_MAX_NAMELEN + 5];
2973 int len = 0, l;
2974 int c;
2975 int count = 0;
2976
2977 GROW;
2978 c = CUR_CHAR(l);
2979
2980 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2981 (c == '.') || (c == '-') ||
2982 (c == '_') || (c == ':') ||
2983 (IS_COMBINING(c)) ||
2984 (IS_EXTENDER(c))) {
2985 if (count++ > 100) {
2986 count = 0;
2987 GROW;
2988 }
2989 COPY_BUF(l,buf,len,c);
2990 NEXTL(l);
2991 c = CUR_CHAR(l);
2992 if (len >= XML_MAX_NAMELEN) {
2993 /*
2994 * Okay someone managed to make a huge token, so he's ready to pay
2995 * for the processing speed.
2996 */
2997 xmlChar *buffer;
2998 int max = len * 2;
2999
3000 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3001 if (buffer == NULL) {
3002 xmlErrMemory(ctxt, NULL);
3003 return(NULL);
3004 }
3005 memcpy(buffer, buf, len);
3006 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
3007 (c == '.') || (c == '-') ||
3008 (c == '_') || (c == ':') ||
3009 (IS_COMBINING(c)) ||
3010 (IS_EXTENDER(c))) {
3011 if (count++ > 100) {
3012 count = 0;
3013 GROW;
3014 }
3015 if (len + 10 > max) {
3016 xmlChar *tmp;
3017
3018 max *= 2;
3019 tmp = (xmlChar *) xmlRealloc(buffer,
3020 max * sizeof(xmlChar));
3021 if (tmp == NULL) {
3022 xmlErrMemory(ctxt, NULL);
3023 xmlFree(buffer);
3024 return(NULL);
3025 }
3026 buffer = tmp;
3027 }
3028 COPY_BUF(l,buffer,len,c);
3029 NEXTL(l);
3030 c = CUR_CHAR(l);
3031 }
3032 buffer[len] = 0;
3033 return(buffer);
3034 }
3035 }
3036 if (len == 0)
3037 return(NULL);
3038 return(xmlStrndup(buf, len));
3039}
3040
3041/**
3042 * xmlParseEntityValue:
3043 * @ctxt: an XML parser context
3044 * @orig: if non-NULL store a copy of the original entity value
3045 *
3046 * parse a value for ENTITY declarations
3047 *
3048 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3049 * "'" ([^%&'] | PEReference | Reference)* "'"
3050 *
3051 * Returns the EntityValue parsed with reference substituted or NULL
3052 */
3053
3054xmlChar *
3055xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3056 xmlChar *buf = NULL;
3057 int len = 0;
3058 int size = XML_PARSER_BUFFER_SIZE;
3059 int c, l;
3060 xmlChar stop;
3061 xmlChar *ret = NULL;
3062 const xmlChar *cur = NULL;
3063 xmlParserInputPtr input;
3064
3065 if (RAW == '"') stop = '"';
3066 else if (RAW == '\'') stop = '\'';
3067 else {
3068 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3069 return(NULL);
3070 }
3071 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3072 if (buf == NULL) {
3073 xmlErrMemory(ctxt, NULL);
3074 return(NULL);
3075 }
3076
3077 /*
3078 * The content of the entity definition is copied in a buffer.
3079 */
3080
3081 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3082 input = ctxt->input;
3083 GROW;
3084 NEXT;
3085 c = CUR_CHAR(l);
3086 /*
3087 * NOTE: 4.4.5 Included in Literal
3088 * When a parameter entity reference appears in a literal entity
3089 * value, ... a single or double quote character in the replacement
3090 * text is always treated as a normal data character and will not
3091 * terminate the literal.
3092 * In practice it means we stop the loop only when back at parsing
3093 * the initial entity and the quote is found
3094 */
3095 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
3096 (ctxt->input != input))) {
3097 if (len + 5 >= size) {
3098 xmlChar *tmp;
3099
3100 size *= 2;
3101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3102 if (tmp == NULL) {
3103 xmlErrMemory(ctxt, NULL);
3104 xmlFree(buf);
3105 return(NULL);
3106 }
3107 buf = tmp;
3108 }
3109 COPY_BUF(l,buf,len,c);
3110 NEXTL(l);
3111 /*
3112 * Pop-up of finished entities.
3113 */
3114 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3115 xmlPopInput(ctxt);
3116
3117 GROW;
3118 c = CUR_CHAR(l);
3119 if (c == 0) {
3120 GROW;
3121 c = CUR_CHAR(l);
3122 }
3123 }
3124 buf[len] = 0;
3125
3126 /*
3127 * Raise problem w.r.t. '&' and '%' being used in non-entities
3128 * reference constructs. Note Charref will be handled in
3129 * xmlStringDecodeEntities()
3130 */
3131 cur = buf;
3132 while (*cur != 0) { /* non input consuming */
3133 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3134 xmlChar *name;
3135 xmlChar tmp = *cur;
3136
3137 cur++;
3138 name = xmlParseStringName(ctxt, &cur);
3139 if ((name == NULL) || (*cur != ';')) {
3140 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3141 "EntityValue: '%c' forbidden except for entities references\n",
3142 tmp);
3143 }
3144 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3145 (ctxt->inputNr == 1)) {
3146 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3147 }
3148 if (name != NULL)
3149 xmlFree(name);
3150 if (*cur == 0)
3151 break;
3152 }
3153 cur++;
3154 }
3155
3156 /*
3157 * Then PEReference entities are substituted.
3158 */
3159 if (c != stop) {
3160 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3161 xmlFree(buf);
3162 } else {
3163 NEXT;
3164 /*
3165 * NOTE: 4.4.7 Bypassed
3166 * When a general entity reference appears in the EntityValue in
3167 * an entity declaration, it is bypassed and left as is.
3168 * so XML_SUBSTITUTE_REF is not set here.
3169 */
3170 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3171 0, 0, 0);
3172 if (orig != NULL)
3173 *orig = buf;
3174 else
3175 xmlFree(buf);
3176 }
3177
3178 return(ret);
3179}
3180
3181/**
3182 * xmlParseAttValueComplex:
3183 * @ctxt: an XML parser context
3184 * @len: the resulting attribute len
3185 * @normalize: wether to apply the inner normalization
3186 *
3187 * parse a value for an attribute, this is the fallback function
3188 * of xmlParseAttValue() when the attribute parsing requires handling
3189 * of non-ASCII characters, or normalization compaction.
3190 *
3191 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3192 */
3193static xmlChar *
3194xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3195 xmlChar limit = 0;
3196 xmlChar *buf = NULL;
3197 int len = 0;
3198 int buf_size = 0;
3199 int c, l, in_space = 0;
3200 xmlChar *current = NULL;
3201 xmlEntityPtr ent;
3202
3203 if (NXT(0) == '"') {
3204 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3205 limit = '"';
3206 NEXT;
3207 } else if (NXT(0) == '\'') {
3208 limit = '\'';
3209 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3210 NEXT;
3211 } else {
3212 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3213 return(NULL);
3214 }
3215
3216 /*
3217 * allocate a translation buffer.
3218 */
3219 buf_size = XML_PARSER_BUFFER_SIZE;
3220 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3221 if (buf == NULL) goto mem_error;
3222
3223 /*
3224 * OK loop until we reach one of the ending char or a size limit.
3225 */
3226 c = CUR_CHAR(l);
3227 while ((NXT(0) != limit) && /* checked */
3228 (IS_CHAR(c)) && (c != '<')) {
3229 if (c == 0) break;
3230 if (c == '&') {
3231 in_space = 0;
3232 if (NXT(1) == '#') {
3233 int val = xmlParseCharRef(ctxt);
3234
3235 if (val == '&') {
3236 if (ctxt->replaceEntities) {
3237 if (len > buf_size - 10) {
3238 growBuffer(buf, 10);
3239 }
3240 buf[len++] = '&';
3241 } else {
3242 /*
3243 * The reparsing will be done in xmlStringGetNodeList()
3244 * called by the attribute() function in SAX.c
3245 */
3246 if (len > buf_size - 10) {
3247 growBuffer(buf, 10);
3248 }
3249 buf[len++] = '&';
3250 buf[len++] = '#';
3251 buf[len++] = '3';
3252 buf[len++] = '8';
3253 buf[len++] = ';';
3254 }
3255 } else {
3256 if (len > buf_size - 10) {
3257 growBuffer(buf, 10);
3258 }
3259 len += xmlCopyChar(0, &buf[len], val);
3260 }
3261 } else {
3262 ent = xmlParseEntityRef(ctxt);
3263 ctxt->nbentities++;
3264 if (ent != NULL)
3265 ctxt->nbentities += ent->owner;
3266 if ((ent != NULL) &&
3267 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3268 if (len > buf_size - 10) {
3269 growBuffer(buf, 10);
3270 }
3271 if ((ctxt->replaceEntities == 0) &&
3272 (ent->content[0] == '&')) {
3273 buf[len++] = '&';
3274 buf[len++] = '#';
3275 buf[len++] = '3';
3276 buf[len++] = '8';
3277 buf[len++] = ';';
3278 } else {
3279 buf[len++] = ent->content[0];
3280 }
3281 } else if ((ent != NULL) &&
3282 (ctxt->replaceEntities != 0)) {
3283 xmlChar *rep;
3284
3285 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3286 rep = xmlStringDecodeEntities(ctxt, ent->content,
3287 XML_SUBSTITUTE_REF,
3288 0, 0, 0);
3289 if (rep != NULL) {
3290 current = rep;
3291 while (*current != 0) { /* non input consuming */
3292 buf[len++] = *current++;
3293 if (len > buf_size - 10) {
3294 growBuffer(buf, 10);
3295 }
3296 }
3297 xmlFree(rep);
3298 }
3299 } else {
3300 if (len > buf_size - 10) {
3301 growBuffer(buf, 10);
3302 }
3303 if (ent->content != NULL)
3304 buf[len++] = ent->content[0];
3305 }
3306 } else if (ent != NULL) {
3307 int i = xmlStrlen(ent->name);
3308 const xmlChar *cur = ent->name;
3309
3310 /*
3311 * This may look absurd but is needed to detect
3312 * entities problems
3313 */
3314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3315 (ent->content != NULL)) {
3316 xmlChar *rep;
3317 rep = xmlStringDecodeEntities(ctxt, ent->content,
3318 XML_SUBSTITUTE_REF, 0, 0, 0);
3319 if (rep != NULL)
3320 xmlFree(rep);
3321 }
3322
3323 /*
3324 * Just output the reference
3325 */
3326 buf[len++] = '&';
3327 while (len > buf_size - i - 10) {
3328 growBuffer(buf, i + 10);
3329 }
3330 for (;i > 0;i--)
3331 buf[len++] = *cur++;
3332 buf[len++] = ';';
3333 }
3334 }
3335 } else {
3336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3337 if ((len != 0) || (!normalize)) {
3338 if ((!normalize) || (!in_space)) {
3339 COPY_BUF(l,buf,len,0x20);
3340 if (len > buf_size - 10) {
3341 growBuffer(buf, 10);
3342 }
3343 }
3344 in_space = 1;
3345 }
3346 } else {
3347 in_space = 0;
3348 COPY_BUF(l,buf,len,c);
3349 if (len > buf_size - 10) {
3350 growBuffer(buf, 10);
3351 }
3352 }
3353 NEXTL(l);
3354 }
3355 GROW;
3356 c = CUR_CHAR(l);
3357 }
3358 if ((in_space) && (normalize)) {
3359 while (buf[len - 1] == 0x20) len--;
3360 }
3361 buf[len] = 0;
3362 if (RAW == '<') {
3363 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3364 } else if (RAW != limit) {
3365 if ((c != 0) && (!IS_CHAR(c))) {
3366 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3367 "invalid character in attribute value\n");
3368 } else {
3369 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3370 "AttValue: ' expected\n");
3371 }
3372 } else
3373 NEXT;
3374 if (attlen != NULL) *attlen = len;
3375 return(buf);
3376
3377mem_error:
3378 xmlErrMemory(ctxt, NULL);
3379 return(NULL);
3380}
3381
3382/**
3383 * xmlParseAttValue:
3384 * @ctxt: an XML parser context
3385 *
3386 * parse a value for an attribute
3387 * Note: the parser won't do substitution of entities here, this
3388 * will be handled later in xmlStringGetNodeList
3389 *
3390 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3391 * "'" ([^<&'] | Reference)* "'"
3392 *
3393 * 3.3.3 Attribute-Value Normalization:
3394 * Before the value of an attribute is passed to the application or
3395 * checked for validity, the XML processor must normalize it as follows:
3396 * - a character reference is processed by appending the referenced
3397 * character to the attribute value
3398 * - an entity reference is processed by recursively processing the
3399 * replacement text of the entity
3400 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3401 * appending #x20 to the normalized value, except that only a single
3402 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3403 * parsed entity or the literal entity value of an internal parsed entity
3404 * - other characters are processed by appending them to the normalized value
3405 * If the declared value is not CDATA, then the XML processor must further
3406 * process the normalized attribute value by discarding any leading and
3407 * trailing space (#x20) characters, and by replacing sequences of space
3408 * (#x20) characters by a single space (#x20) character.
3409 * All attributes for which no declaration has been read should be treated
3410 * by a non-validating parser as if declared CDATA.
3411 *
3412 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3413 */
3414
3415
3416xmlChar *
3417xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3418 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3419 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3420}
3421
3422/**
3423 * xmlParseSystemLiteral:
3424 * @ctxt: an XML parser context
3425 *
3426 * parse an XML Literal
3427 *
3428 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3429 *
3430 * Returns the SystemLiteral parsed or NULL
3431 */
3432
3433xmlChar *
3434xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3435 xmlChar *buf = NULL;
3436 int len = 0;
3437 int size = XML_PARSER_BUFFER_SIZE;
3438 int cur, l;
3439 xmlChar stop;
3440 int state = ctxt->instate;
3441 int count = 0;
3442
3443 SHRINK;
3444 if (RAW == '"') {
3445 NEXT;
3446 stop = '"';
3447 } else if (RAW == '\'') {
3448 NEXT;
3449 stop = '\'';
3450 } else {
3451 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3452 return(NULL);
3453 }
3454
3455 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3456 if (buf == NULL) {
3457 xmlErrMemory(ctxt, NULL);
3458 return(NULL);
3459 }
3460 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3461 cur = CUR_CHAR(l);
3462 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3463 if (len + 5 >= size) {
3464 xmlChar *tmp;
3465
3466 size *= 2;
3467 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3468 if (tmp == NULL) {
3469 xmlFree(buf);
3470 xmlErrMemory(ctxt, NULL);
3471 ctxt->instate = (xmlParserInputState) state;
3472 return(NULL);
3473 }
3474 buf = tmp;
3475 }
3476 count++;
3477 if (count > 50) {
3478 GROW;
3479 count = 0;
3480 }
3481 COPY_BUF(l,buf,len,cur);
3482 NEXTL(l);
3483 cur = CUR_CHAR(l);
3484 if (cur == 0) {
3485 GROW;
3486 SHRINK;
3487 cur = CUR_CHAR(l);
3488 }
3489 }
3490 buf[len] = 0;
3491 ctxt->instate = (xmlParserInputState) state;
3492 if (!IS_CHAR(cur)) {
3493 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3494 } else {
3495 NEXT;
3496 }
3497 return(buf);
3498}
3499
3500/**
3501 * xmlParsePubidLiteral:
3502 * @ctxt: an XML parser context
3503 *
3504 * parse an XML public literal
3505 *
3506 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3507 *
3508 * Returns the PubidLiteral parsed or NULL.
3509 */
3510
3511xmlChar *
3512xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3513 xmlChar *buf = NULL;
3514 int len = 0;
3515 int size = XML_PARSER_BUFFER_SIZE;
3516 xmlChar cur;
3517 xmlChar stop;
3518 int count = 0;
3519 xmlParserInputState oldstate = ctxt->instate;
3520
3521 SHRINK;
3522 if (RAW == '"') {
3523 NEXT;
3524 stop = '"';
3525 } else if (RAW == '\'') {
3526 NEXT;
3527 stop = '\'';
3528 } else {
3529 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3530 return(NULL);
3531 }
3532 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3533 if (buf == NULL) {
3534 xmlErrMemory(ctxt, NULL);
3535 return(NULL);
3536 }
3537 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3538 cur = CUR;
3539 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3540 if (len + 1 >= size) {
3541 xmlChar *tmp;
3542
3543 size *= 2;
3544 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3545 if (tmp == NULL) {
3546 xmlErrMemory(ctxt, NULL);
3547 xmlFree(buf);
3548 return(NULL);
3549 }
3550 buf = tmp;
3551 }
3552 buf[len++] = cur;
3553 count++;
3554 if (count > 50) {
3555 GROW;
3556 count = 0;
3557 }
3558 NEXT;
3559 cur = CUR;
3560 if (cur == 0) {
3561 GROW;
3562 SHRINK;
3563 cur = CUR;
3564 }
3565 }
3566 buf[len] = 0;
3567 if (cur != stop) {
3568 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3569 } else {
3570 NEXT;
3571 }
3572 ctxt->instate = oldstate;
3573 return(buf);
3574}
3575
3576void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3577
3578/*
3579 * used for the test in the inner loop of the char data testing
3580 */
3581static const unsigned char test_char_data[256] = {
3582 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3583 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3584 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3585 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3586 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3587 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3588 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3589 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3590 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3591 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3592 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3593 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3594 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3595 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3596 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3597 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3598 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3599 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3600 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3601 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3602 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3603 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3604 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3605 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3606 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3607 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3608 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3609 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3610 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3611 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3612 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3613 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3614};
3615
3616/**
3617 * xmlParseCharData:
3618 * @ctxt: an XML parser context
3619 * @cdata: int indicating whether we are within a CDATA section
3620 *
3621 * parse a CharData section.
3622 * if we are within a CDATA section ']]>' marks an end of section.
3623 *
3624 * The right angle bracket (>) may be represented using the string "&gt;",
3625 * and must, for compatibility, be escaped using "&gt;" or a character
3626 * reference when it appears in the string "]]>" in content, when that
3627 * string is not marking the end of a CDATA section.
3628 *
3629 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3630 */
3631
3632void
3633xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3634 const xmlChar *in;
3635 int nbchar = 0;
3636 int line = ctxt->input->line;
3637 int col = ctxt->input->col;
3638 int ccol;
3639
3640 SHRINK;
3641 GROW;
3642 /*
3643 * Accelerated common case where input don't need to be
3644 * modified before passing it to the handler.
3645 */
3646 if (!cdata) {
3647 in = ctxt->input->cur;
3648 do {
3649get_more_space:
3650 while (*in == 0x20) { in++; ctxt->input->col++; }
3651 if (*in == 0xA) {
3652 do {
3653 ctxt->input->line++; ctxt->input->col = 1;
3654 in++;
3655 } while (*in == 0xA);
3656 goto get_more_space;
3657 }
3658 if (*in == '<') {
3659 nbchar = in - ctxt->input->cur;
3660 if (nbchar > 0) {
3661 const xmlChar *tmp = ctxt->input->cur;
3662 ctxt->input->cur = in;
3663
3664 if ((ctxt->sax != NULL) &&
3665 (ctxt->sax->ignorableWhitespace !=
3666 ctxt->sax->characters)) {
3667 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3668 if (ctxt->sax->ignorableWhitespace != NULL)
3669 ctxt->sax->ignorableWhitespace(ctxt->userData,
3670 tmp, nbchar);
3671 } else {
3672 if (ctxt->sax->characters != NULL)
3673 ctxt->sax->characters(ctxt->userData,
3674 tmp, nbchar);
3675 if (*ctxt->space == -1)
3676 *ctxt->space = -2;
3677 }
3678 } else if ((ctxt->sax != NULL) &&
3679 (ctxt->sax->characters != NULL)) {
3680 ctxt->sax->characters(ctxt->userData,
3681 tmp, nbchar);
3682 }
3683 }
3684 return;
3685 }
3686
3687get_more:
3688 ccol = ctxt->input->col;
3689 while (test_char_data[*in]) {
3690 in++;
3691 ccol++;
3692 }
3693 ctxt->input->col = ccol;
3694 if (*in == 0xA) {
3695 do {
3696 ctxt->input->line++; ctxt->input->col = 1;
3697 in++;
3698 } while (*in == 0xA);
3699 goto get_more;
3700 }
3701 if (*in == ']') {
3702 if ((in[1] == ']') && (in[2] == '>')) {
3703 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3704 ctxt->input->cur = in;
3705 return;
3706 }
3707 in++;
3708 ctxt->input->col++;
3709 goto get_more;
3710 }
3711 nbchar = in - ctxt->input->cur;
3712 if (nbchar > 0) {
3713 if ((ctxt->sax != NULL) &&
3714 (ctxt->sax->ignorableWhitespace !=
3715 ctxt->sax->characters) &&
3716 (IS_BLANK_CH(*ctxt->input->cur))) {
3717 const xmlChar *tmp = ctxt->input->cur;
3718 ctxt->input->cur = in;
3719
3720 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3721 if (ctxt->sax->ignorableWhitespace != NULL)
3722 ctxt->sax->ignorableWhitespace(ctxt->userData,
3723 tmp, nbchar);
3724 } else {
3725 if (ctxt->sax->characters != NULL)
3726 ctxt->sax->characters(ctxt->userData,
3727 tmp, nbchar);
3728 if (*ctxt->space == -1)
3729 *ctxt->space = -2;
3730 }
3731 line = ctxt->input->line;
3732 col = ctxt->input->col;
3733 } else if (ctxt->sax != NULL) {
3734 if (ctxt->sax->characters != NULL)
3735 ctxt->sax->characters(ctxt->userData,
3736 ctxt->input->cur, nbchar);
3737 line = ctxt->input->line;
3738 col = ctxt->input->col;
3739 }
3740 }
3741 ctxt->input->cur = in;
3742 if (*in == 0xD) {
3743 in++;
3744 if (*in == 0xA) {
3745 ctxt->input->cur = in;
3746 in++;
3747 ctxt->input->line++; ctxt->input->col = 1;
3748 continue; /* while */
3749 }
3750 in--;
3751 }
3752 if (*in == '<') {
3753 return;
3754 }
3755 if (*in == '&') {
3756 return;
3757 }
3758 SHRINK;
3759 GROW;
3760 in = ctxt->input->cur;
3761 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3762 nbchar = 0;
3763 }
3764 ctxt->input->line = line;
3765 ctxt->input->col = col;
3766 xmlParseCharDataComplex(ctxt, cdata);
3767}
3768
3769/**
3770 * xmlParseCharDataComplex:
3771 * @ctxt: an XML parser context
3772 * @cdata: int indicating whether we are within a CDATA section
3773 *
3774 * parse a CharData section.this is the fallback function
3775 * of xmlParseCharData() when the parsing requires handling
3776 * of non-ASCII characters.
3777 */
3778void
3779xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3780 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3781 int nbchar = 0;
3782 int cur, l;
3783 int count = 0;
3784
3785 SHRINK;
3786 GROW;
3787 cur = CUR_CHAR(l);
3788 while ((cur != '<') && /* checked */
3789 (cur != '&') &&
3790 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3791 if ((cur == ']') && (NXT(1) == ']') &&
3792 (NXT(2) == '>')) {
3793 if (cdata) break;
3794 else {
3795 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3796 }
3797 }
3798 COPY_BUF(l,buf,nbchar,cur);
3799 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3800 buf[nbchar] = 0;
3801
3802 /*
3803 * OK the segment is to be consumed as chars.
3804 */
3805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3806 if (areBlanks(ctxt, buf, nbchar, 0)) {
3807 if (ctxt->sax->ignorableWhitespace != NULL)
3808 ctxt->sax->ignorableWhitespace(ctxt->userData,
3809 buf, nbchar);
3810 } else {
3811 if (ctxt->sax->characters != NULL)
3812 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3813 if ((ctxt->sax->characters !=
3814 ctxt->sax->ignorableWhitespace) &&
3815 (*ctxt->space == -1))
3816 *ctxt->space = -2;
3817 }
3818 }
3819 nbchar = 0;
3820 }
3821 count++;
3822 if (count > 50) {
3823 GROW;
3824 count = 0;
3825 }
3826 NEXTL(l);
3827 cur = CUR_CHAR(l);
3828 }
3829 if (nbchar != 0) {
3830 buf[nbchar] = 0;
3831 /*
3832 * OK the segment is to be consumed as chars.
3833 */
3834 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3835 if (areBlanks(ctxt, buf, nbchar, 0)) {
3836 if (ctxt->sax->ignorableWhitespace != NULL)
3837 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3838 } else {
3839 if (ctxt->sax->characters != NULL)
3840 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3841 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3842 (*ctxt->space == -1))
3843 *ctxt->space = -2;
3844 }
3845 }
3846 }
3847 if ((cur != 0) && (!IS_CHAR(cur))) {
3848 /* Generate the error and skip the offending character */
3849 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3850 "PCDATA invalid Char value %d\n",
3851 cur);
3852 NEXTL(l);
3853 }
3854}
3855
3856/**
3857 * xmlParseExternalID:
3858 * @ctxt: an XML parser context
3859 * @publicID: a xmlChar** receiving PubidLiteral
3860 * @strict: indicate whether we should restrict parsing to only
3861 * production [75], see NOTE below
3862 *
3863 * Parse an External ID or a Public ID
3864 *
3865 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3866 * 'PUBLIC' S PubidLiteral S SystemLiteral
3867 *
3868 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3869 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3870 *
3871 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3872 *
3873 * Returns the function returns SystemLiteral and in the second
3874 * case publicID receives PubidLiteral, is strict is off
3875 * it is possible to return NULL and have publicID set.
3876 */
3877
3878xmlChar *
3879xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3880 xmlChar *URI = NULL;
3881
3882 SHRINK;
3883
3884 *publicID = NULL;
3885 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3886 SKIP(6);
3887 if (!IS_BLANK_CH(CUR)) {
3888 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3889 "Space required after 'SYSTEM'\n");
3890 }
3891 SKIP_BLANKS;
3892 URI = xmlParseSystemLiteral(ctxt);
3893 if (URI == NULL) {
3894 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3895 }
3896 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3897 SKIP(6);
3898 if (!IS_BLANK_CH(CUR)) {
3899 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3900 "Space required after 'PUBLIC'\n");
3901 }
3902 SKIP_BLANKS;
3903 *publicID = xmlParsePubidLiteral(ctxt);
3904 if (*publicID == NULL) {
3905 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3906 }
3907 if (strict) {
3908 /*
3909 * We don't handle [83] so "S SystemLiteral" is required.
3910 */
3911 if (!IS_BLANK_CH(CUR)) {
3912 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3913 "Space required after the Public Identifier\n");
3914 }
3915 } else {
3916 /*
3917 * We handle [83] so we return immediately, if
3918 * "S SystemLiteral" is not detected. From a purely parsing
3919 * point of view that's a nice mess.
3920 */
3921 const xmlChar *ptr;
3922 GROW;
3923
3924 ptr = CUR_PTR;
3925 if (!IS_BLANK_CH(*ptr)) return(NULL);
3926
3927 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3928 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3929 }
3930 SKIP_BLANKS;
3931 URI = xmlParseSystemLiteral(ctxt);
3932 if (URI == NULL) {
3933 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3934 }
3935 }
3936 return(URI);
3937}
3938
3939/**
3940 * xmlParseCommentComplex:
3941 * @ctxt: an XML parser context
3942 * @buf: the already parsed part of the buffer
3943 * @len: number of bytes filles in the buffer
3944 * @size: allocated size of the buffer
3945 *
3946 * Skip an XML (SGML) comment <!-- .... -->
3947 * The spec says that "For compatibility, the string "--" (double-hyphen)
3948 * must not occur within comments. "
3949 * This is the slow routine in case the accelerator for ascii didn't work
3950 *
3951 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3952 */
3953static void
3954xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3955 int q, ql;
3956 int r, rl;
3957 int cur, l;
3958 xmlParserInputPtr input = ctxt->input;
3959 int count = 0;
3960
3961 if (buf == NULL) {
3962 len = 0;
3963 size = XML_PARSER_BUFFER_SIZE;
3964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3965 if (buf == NULL) {
3966 xmlErrMemory(ctxt, NULL);
3967 return;
3968 }
3969 }
3970 GROW; /* Assure there's enough input data */
3971 q = CUR_CHAR(ql);
3972 if (q == 0)
3973 goto not_terminated;
3974 if (!IS_CHAR(q)) {
3975 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3976 "xmlParseComment: invalid xmlChar value %d\n",
3977 q);
3978 xmlFree (buf);
3979 return;
3980 }
3981 NEXTL(ql);
3982 r = CUR_CHAR(rl);
3983 if (r == 0)
3984 goto not_terminated;
3985 if (!IS_CHAR(r)) {
3986 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3987 "xmlParseComment: invalid xmlChar value %d\n",
3988 q);
3989 xmlFree (buf);
3990 return;
3991 }
3992 NEXTL(rl);
3993 cur = CUR_CHAR(l);
3994 if (cur == 0)
3995 goto not_terminated;
3996 while (IS_CHAR(cur) && /* checked */
3997 ((cur != '>') ||
3998 (r != '-') || (q != '-'))) {
3999 if ((r == '-') && (q == '-')) {
4000 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4001 }
4002 if (len + 5 >= size) {
4003 xmlChar *new_buf;
4004 size *= 2;
4005 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4006 if (new_buf == NULL) {
4007 xmlFree (buf);
4008 xmlErrMemory(ctxt, NULL);
4009 return;
4010 }
4011 buf = new_buf;
4012 }
4013 COPY_BUF(ql,buf,len,q);
4014 q = r;
4015 ql = rl;
4016 r = cur;
4017 rl = l;
4018
4019 count++;
4020 if (count > 50) {
4021 GROW;
4022 count = 0;
4023 }
4024 NEXTL(l);
4025 cur = CUR_CHAR(l);
4026 if (cur == 0) {
4027 SHRINK;
4028 GROW;
4029 cur = CUR_CHAR(l);
4030 }
4031 }
4032 buf[len] = 0;
4033 if (cur == 0) {
4034 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4035 "Comment not terminated \n<!--%.50s\n", buf);
4036 } else if (!IS_CHAR(cur)) {
4037 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4038 "xmlParseComment: invalid xmlChar value %d\n",
4039 cur);
4040 } else {
4041 if (input != ctxt->input) {
4042 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4043 "Comment doesn't start and stop in the same entity\n");
4044 }
4045 NEXT;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4047 (!ctxt->disableSAX))
4048 ctxt->sax->comment(ctxt->userData, buf);
4049 }
4050 xmlFree(buf);
4051 return;
4052not_terminated:
4053 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4054 "Comment not terminated\n", NULL);
4055 xmlFree(buf);
4056 return;
4057}
4058
4059/**
4060 * xmlParseComment:
4061 * @ctxt: an XML parser context
4062 *
4063 * Skip an XML (SGML) comment <!-- .... -->
4064 * The spec says that "For compatibility, the string "--" (double-hyphen)
4065 * must not occur within comments. "
4066 *
4067 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4068 */
4069void
4070xmlParseComment(xmlParserCtxtPtr ctxt) {
4071 xmlChar *buf = NULL;
4072 int size = XML_PARSER_BUFFER_SIZE;
4073 int len = 0;
4074 xmlParserInputState state;
4075 const xmlChar *in;
4076 int nbchar = 0, ccol;
4077
4078 /*
4079 * Check that there is a comment right here.
4080 */
4081 if ((RAW != '<') || (NXT(1) != '!') ||
4082 (NXT(2) != '-') || (NXT(3) != '-')) return;
4083
4084 state = ctxt->instate;
4085 ctxt->instate = XML_PARSER_COMMENT;
4086 SKIP(4);
4087 SHRINK;
4088 GROW;
4089
4090 /*
4091 * Accelerated common case where input don't need to be
4092 * modified before passing it to the handler.
4093 */
4094 in = ctxt->input->cur;
4095 do {
4096 if (*in == 0xA) {
4097 do {
4098 ctxt->input->line++; ctxt->input->col = 1;
4099 in++;
4100 } while (*in == 0xA);
4101 }
4102get_more:
4103 ccol = ctxt->input->col;
4104 while (((*in > '-') && (*in <= 0x7F)) ||
4105 ((*in >= 0x20) && (*in < '-')) ||
4106 (*in == 0x09)) {
4107 in++;
4108 ccol++;
4109 }
4110 ctxt->input->col = ccol;
4111 if (*in == 0xA) {
4112 do {
4113 ctxt->input->line++; ctxt->input->col = 1;
4114 in++;
4115 } while (*in == 0xA);
4116 goto get_more;
4117 }
4118 nbchar = in - ctxt->input->cur;
4119 /*
4120 * save current set of data
4121 */
4122 if (nbchar > 0) {
4123 if ((ctxt->sax != NULL) &&
4124 (ctxt->sax->comment != NULL)) {
4125 if (buf == NULL) {
4126 if ((*in == '-') && (in[1] == '-'))
4127 size = nbchar + 1;
4128 else
4129 size = XML_PARSER_BUFFER_SIZE + nbchar;
4130 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4131 if (buf == NULL) {
4132 xmlErrMemory(ctxt, NULL);
4133 ctxt->instate = state;
4134 return;
4135 }
4136 len = 0;
4137 } else if (len + nbchar + 1 >= size) {
4138 xmlChar *new_buf;
4139 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4140 new_buf = (xmlChar *) xmlRealloc(buf,
4141 size * sizeof(xmlChar));
4142 if (new_buf == NULL) {
4143 xmlFree (buf);
4144 xmlErrMemory(ctxt, NULL);
4145 ctxt->instate = state;
4146 return;
4147 }
4148 buf = new_buf;
4149 }
4150 memcpy(&buf[len], ctxt->input->cur, nbchar);
4151 len += nbchar;
4152 buf[len] = 0;
4153 }
4154 }
4155 ctxt->input->cur = in;
4156 if (*in == 0xA) {
4157 in++;
4158 ctxt->input->line++; ctxt->input->col = 1;
4159 }
4160 if (*in == 0xD) {
4161 in++;
4162 if (*in == 0xA) {
4163 ctxt->input->cur = in;
4164 in++;
4165 ctxt->input->line++; ctxt->input->col = 1;
4166 continue; /* while */
4167 }
4168 in--;
4169 }
4170 SHRINK;
4171 GROW;
4172 in = ctxt->input->cur;
4173 if (*in == '-') {
4174 if (in[1] == '-') {
4175 if (in[2] == '>') {
4176 SKIP(3);
4177 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4178 (!ctxt->disableSAX)) {
4179 if (buf != NULL)
4180 ctxt->sax->comment(ctxt->userData, buf);
4181 else
4182 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4183 }
4184 if (buf != NULL)
4185 xmlFree(buf);
4186 ctxt->instate = state;
4187 return;
4188 }
4189 if (buf != NULL)
4190 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4191 "Comment not terminated \n<!--%.50s\n",
4192 buf);
4193 else
4194 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4195 "Comment not terminated \n", NULL);
4196 in++;
4197 ctxt->input->col++;
4198 }
4199 in++;
4200 ctxt->input->col++;
4201 goto get_more;
4202 }
4203 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4204 xmlParseCommentComplex(ctxt, buf, len, size);
4205 ctxt->instate = state;
4206 return;
4207}
4208
4209
4210/**
4211 * xmlParsePITarget:
4212 * @ctxt: an XML parser context
4213 *
4214 * parse the name of a PI
4215 *
4216 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4217 *
4218 * Returns the PITarget name or NULL
4219 */
4220
4221const xmlChar *
4222xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4223 const xmlChar *name;
4224
4225 name = xmlParseName(ctxt);
4226 if ((name != NULL) &&
4227 ((name[0] == 'x') || (name[0] == 'X')) &&
4228 ((name[1] == 'm') || (name[1] == 'M')) &&
4229 ((name[2] == 'l') || (name[2] == 'L'))) {
4230 int i;
4231 if ((name[0] == 'x') && (name[1] == 'm') &&
4232 (name[2] == 'l') && (name[3] == 0)) {
4233 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4234 "XML declaration allowed only at the start of the document\n");
4235 return(name);
4236 } else if (name[3] == 0) {
4237 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4238 return(name);
4239 }
4240 for (i = 0;;i++) {
4241 if (xmlW3CPIs[i] == NULL) break;
4242 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4243 return(name);
4244 }
4245 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4246 "xmlParsePITarget: invalid name prefix 'xml'\n",
4247 NULL, NULL);
4248 }
4249 return(name);
4250}
4251
4252#ifdef LIBXML_CATALOG_ENABLED
4253/**
4254 * xmlParseCatalogPI:
4255 * @ctxt: an XML parser context
4256 * @catalog: the PI value string
4257 *
4258 * parse an XML Catalog Processing Instruction.
4259 *
4260 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4261 *
4262 * Occurs only if allowed by the user and if happening in the Misc
4263 * part of the document before any doctype informations
4264 * This will add the given catalog to the parsing context in order
4265 * to be used if there is a resolution need further down in the document
4266 */
4267
4268static void
4269xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4270 xmlChar *URL = NULL;
4271 const xmlChar *tmp, *base;
4272 xmlChar marker;
4273
4274 tmp = catalog;
4275 while (IS_BLANK_CH(*tmp)) tmp++;
4276 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4277 goto error;
4278 tmp += 7;
4279 while (IS_BLANK_CH(*tmp)) tmp++;
4280 if (*tmp != '=') {
4281 return;
4282 }
4283 tmp++;
4284 while (IS_BLANK_CH(*tmp)) tmp++;
4285 marker = *tmp;
4286 if ((marker != '\'') && (marker != '"'))
4287 goto error;
4288 tmp++;
4289 base = tmp;
4290 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4291 if (*tmp == 0)
4292 goto error;
4293 URL = xmlStrndup(base, tmp - base);
4294 tmp++;
4295 while (IS_BLANK_CH(*tmp)) tmp++;
4296 if (*tmp != 0)
4297 goto error;
4298
4299 if (URL != NULL) {
4300 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4301 xmlFree(URL);
4302 }
4303 return;
4304
4305error:
4306 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4307 "Catalog PI syntax error: %s\n",
4308 catalog, NULL);
4309 if (URL != NULL)
4310 xmlFree(URL);
4311}
4312#endif
4313
4314/**
4315 * xmlParsePI:
4316 * @ctxt: an XML parser context
4317 *
4318 * parse an XML Processing Instruction.
4319 *
4320 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4321 *
4322 * The processing is transfered to SAX once parsed.
4323 */
4324
4325void
4326xmlParsePI(xmlParserCtxtPtr ctxt) {
4327 xmlChar *buf = NULL;
4328 int len = 0;
4329 int size = XML_PARSER_BUFFER_SIZE;
4330 int cur, l;
4331 const xmlChar *target;
4332 xmlParserInputState state;
4333 int count = 0;
4334
4335 if ((RAW == '<') && (NXT(1) == '?')) {
4336 xmlParserInputPtr input = ctxt->input;
4337 state = ctxt->instate;
4338 ctxt->instate = XML_PARSER_PI;
4339 /*
4340 * this is a Processing Instruction.
4341 */
4342 SKIP(2);
4343 SHRINK;
4344
4345 /*
4346 * Parse the target name and check for special support like
4347 * namespace.
4348 */
4349 target = xmlParsePITarget(ctxt);
4350 if (target != NULL) {
4351 if ((RAW == '?') && (NXT(1) == '>')) {
4352 if (input != ctxt->input) {
4353 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4354 "PI declaration doesn't start and stop in the same entity\n");
4355 }
4356 SKIP(2);
4357
4358 /*
4359 * SAX: PI detected.
4360 */
4361 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4362 (ctxt->sax->processingInstruction != NULL))
4363 ctxt->sax->processingInstruction(ctxt->userData,
4364 target, NULL);
4365 if (ctxt->instate != XML_PARSER_EOF)
4366 ctxt->instate = state;
4367 return;
4368 }
4369 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4370 if (buf == NULL) {
4371 xmlErrMemory(ctxt, NULL);
4372 ctxt->instate = state;
4373 return;
4374 }
4375 cur = CUR;
4376 if (!IS_BLANK(cur)) {
4377 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4378 "ParsePI: PI %s space expected\n", target);
4379 }
4380 SKIP_BLANKS;
4381 cur = CUR_CHAR(l);
4382 while (IS_CHAR(cur) && /* checked */
4383 ((cur != '?') || (NXT(1) != '>'))) {
4384 if (len + 5 >= size) {
4385 xmlChar *tmp;
4386
4387 size *= 2;
4388 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4389 if (tmp == NULL) {
4390 xmlErrMemory(ctxt, NULL);
4391 xmlFree(buf);
4392 ctxt->instate = state;
4393 return;
4394 }
4395 buf = tmp;
4396 }
4397 count++;
4398 if (count > 50) {
4399 GROW;
4400 count = 0;
4401 }
4402 COPY_BUF(l,buf,len,cur);
4403 NEXTL(l);
4404 cur = CUR_CHAR(l);
4405 if (cur == 0) {
4406 SHRINK;
4407 GROW;
4408 cur = CUR_CHAR(l);
4409 }
4410 }
4411 buf[len] = 0;
4412 if (cur != '?') {
4413 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4414 "ParsePI: PI %s never end ...\n", target);
4415 } else {
4416 if (input != ctxt->input) {
4417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4418 "PI declaration doesn't start and stop in the same entity\n");
4419 }
4420 SKIP(2);
4421
4422#ifdef LIBXML_CATALOG_ENABLED
4423 if (((state == XML_PARSER_MISC) ||
4424 (state == XML_PARSER_START)) &&
4425 (xmlStrEqual(target, XML_CATALOG_PI))) {
4426 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4427 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4428 (allow == XML_CATA_ALLOW_ALL))
4429 xmlParseCatalogPI(ctxt, buf);
4430 }
4431#endif
4432
4433
4434 /*
4435 * SAX: PI detected.
4436 */
4437 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4438 (ctxt->sax->processingInstruction != NULL))
4439 ctxt->sax->processingInstruction(ctxt->userData,
4440 target, buf);
4441 }
4442 xmlFree(buf);
4443 } else {
4444 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4445 }
4446 if (ctxt->instate != XML_PARSER_EOF)
4447 ctxt->instate = state;
4448 }
4449}
4450
4451/**
4452 * xmlParseNotationDecl:
4453 * @ctxt: an XML parser context
4454 *
4455 * parse a notation declaration
4456 *
4457 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4458 *
4459 * Hence there is actually 3 choices:
4460 * 'PUBLIC' S PubidLiteral
4461 * 'PUBLIC' S PubidLiteral S SystemLiteral
4462 * and 'SYSTEM' S SystemLiteral
4463 *
4464 * See the NOTE on xmlParseExternalID().
4465 */
4466
4467void
4468xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4469 const xmlChar *name;
4470 xmlChar *Pubid;
4471 xmlChar *Systemid;
4472
4473 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4474 xmlParserInputPtr input = ctxt->input;
4475 SHRINK;
4476 SKIP(10);
4477 if (!IS_BLANK_CH(CUR)) {
4478 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4479 "Space required after '<!NOTATION'\n");
4480 return;
4481 }
4482 SKIP_BLANKS;
4483
4484 name = xmlParseName(ctxt);
4485 if (name == NULL) {
4486 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4487 return;
4488 }
4489 if (!IS_BLANK_CH(CUR)) {
4490 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4491 "Space required after the NOTATION name'\n");
4492 return;
4493 }
4494 SKIP_BLANKS;
4495
4496 /*
4497 * Parse the IDs.
4498 */
4499 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4500 SKIP_BLANKS;
4501
4502 if (RAW == '>') {
4503 if (input != ctxt->input) {
4504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4505 "Notation declaration doesn't start and stop in the same entity\n");
4506 }
4507 NEXT;
4508 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4509 (ctxt->sax->notationDecl != NULL))
4510 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4511 } else {
4512 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4513 }
4514 if (Systemid != NULL) xmlFree(Systemid);
4515 if (Pubid != NULL) xmlFree(Pubid);
4516 }
4517}
4518
4519/**
4520 * xmlParseEntityDecl:
4521 * @ctxt: an XML parser context
4522 *
4523 * parse <!ENTITY declarations
4524 *
4525 * [70] EntityDecl ::= GEDecl | PEDecl
4526 *
4527 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4528 *
4529 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4530 *
4531 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4532 *
4533 * [74] PEDef ::= EntityValue | ExternalID
4534 *
4535 * [76] NDataDecl ::= S 'NDATA' S Name
4536 *
4537 * [ VC: Notation Declared ]
4538 * The Name must match the declared name of a notation.
4539 */
4540
4541void
4542xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4543 const xmlChar *name = NULL;
4544 xmlChar *value = NULL;
4545 xmlChar *URI = NULL, *literal = NULL;
4546 const xmlChar *ndata = NULL;
4547 int isParameter = 0;
4548 xmlChar *orig = NULL;
4549 int skipped;
4550 unsigned long oldnbent = ctxt->nbentities;
4551
4552 /* GROW; done in the caller */
4553 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4554 xmlParserInputPtr input = ctxt->input;
4555 SHRINK;
4556 SKIP(8);
4557 skipped = SKIP_BLANKS;
4558 if (skipped == 0) {
4559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4560 "Space required after '<!ENTITY'\n");
4561 }
4562
4563 if (RAW == '%') {
4564 NEXT;
4565 skipped = SKIP_BLANKS;
4566 if (skipped == 0) {
4567 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4568 "Space required after '%'\n");
4569 }
4570 isParameter = 1;
4571 }
4572
4573 name = xmlParseName(ctxt);
4574 if (name == NULL) {
4575 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4576 "xmlParseEntityDecl: no name\n");
4577 return;
4578 }
4579 skipped = SKIP_BLANKS;
4580 if (skipped == 0) {
4581 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4582 "Space required after the entity name\n");
4583 }
4584
4585 ctxt->instate = XML_PARSER_ENTITY_DECL;
4586 /*
4587 * handle the various case of definitions...
4588 */
4589 if (isParameter) {
4590 if ((RAW == '"') || (RAW == '\'')) {
4591 value = xmlParseEntityValue(ctxt, &orig);
4592 if (value) {
4593 if ((ctxt->sax != NULL) &&
4594 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4595 ctxt->sax->entityDecl(ctxt->userData, name,
4596 XML_INTERNAL_PARAMETER_ENTITY,
4597 NULL, NULL, value);
4598 }
4599 } else {
4600 URI = xmlParseExternalID(ctxt, &literal, 1);
4601 if ((URI == NULL) && (literal == NULL)) {
4602 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4603 }
4604 if (URI) {
4605 xmlURIPtr uri;
4606
4607 uri = xmlParseURI((const char *) URI);
4608 if (uri == NULL) {
4609 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4610 "Invalid URI: %s\n", URI);
4611 /*
4612 * This really ought to be a well formedness error
4613 * but the XML Core WG decided otherwise c.f. issue
4614 * E26 of the XML erratas.
4615 */
4616 } else {
4617 if (uri->fragment != NULL) {
4618 /*
4619 * Okay this is foolish to block those but not
4620 * invalid URIs.
4621 */
4622 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4623 } else {
4624 if ((ctxt->sax != NULL) &&
4625 (!ctxt->disableSAX) &&
4626 (ctxt->sax->entityDecl != NULL))
4627 ctxt->sax->entityDecl(ctxt->userData, name,
4628 XML_EXTERNAL_PARAMETER_ENTITY,
4629 literal, URI, NULL);
4630 }
4631 xmlFreeURI(uri);
4632 }
4633 }
4634 }
4635 } else {
4636 if ((RAW == '"') || (RAW == '\'')) {
4637 value = xmlParseEntityValue(ctxt, &orig);
4638 if ((ctxt->sax != NULL) &&
4639 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4640 ctxt->sax->entityDecl(ctxt->userData, name,
4641 XML_INTERNAL_GENERAL_ENTITY,
4642 NULL, NULL, value);
4643 /*
4644 * For expat compatibility in SAX mode.
4645 */
4646 if ((ctxt->myDoc == NULL) ||
4647 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4648 if (ctxt->myDoc == NULL) {
4649 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4650 }
4651 if (ctxt->myDoc->intSubset == NULL)
4652 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4653 BAD_CAST "fake", NULL, NULL);
4654
4655 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4656 NULL, NULL, value);
4657 }
4658 } else {
4659 URI = xmlParseExternalID(ctxt, &literal, 1);
4660 if ((URI == NULL) && (literal == NULL)) {
4661 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4662 }
4663 if (URI) {
4664 xmlURIPtr uri;
4665
4666 uri = xmlParseURI((const char *)URI);
4667 if (uri == NULL) {
4668 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4669 "Invalid URI: %s\n", URI);
4670 /*
4671 * This really ought to be a well formedness error
4672 * but the XML Core WG decided otherwise c.f. issue
4673 * E26 of the XML erratas.
4674 */
4675 } else {
4676 if (uri->fragment != NULL) {
4677 /*
4678 * Okay this is foolish to block those but not
4679 * invalid URIs.
4680 */
4681 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4682 }
4683 xmlFreeURI(uri);
4684 }
4685 }
4686 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4688 "Space required before 'NDATA'\n");
4689 }
4690 SKIP_BLANKS;
4691 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4692 SKIP(5);
4693 if (!IS_BLANK_CH(CUR)) {
4694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4695 "Space required after 'NDATA'\n");
4696 }
4697 SKIP_BLANKS;
4698 ndata = xmlParseName(ctxt);
4699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4700 (ctxt->sax->unparsedEntityDecl != NULL))
4701 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4702 literal, URI, ndata);
4703 } else {
4704 if ((ctxt->sax != NULL) &&
4705 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4706 ctxt->sax->entityDecl(ctxt->userData, name,
4707 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4708 literal, URI, NULL);
4709 /*
4710 * For expat compatibility in SAX mode.
4711 * assuming the entity repalcement was asked for
4712 */
4713 if ((ctxt->replaceEntities != 0) &&
4714 ((ctxt->myDoc == NULL) ||
4715 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4716 if (ctxt->myDoc == NULL) {
4717 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4718 }
4719
4720 if (ctxt->myDoc->intSubset == NULL)
4721 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4722 BAD_CAST "fake", NULL, NULL);
4723 xmlSAX2EntityDecl(ctxt, name,
4724 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4725 literal, URI, NULL);
4726 }
4727 }
4728 }
4729 }
4730 SKIP_BLANKS;
4731 if (RAW != '>') {
4732 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4733 "xmlParseEntityDecl: entity %s not terminated\n", name);
4734 } else {
4735 if (input != ctxt->input) {
4736 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4737 "Entity declaration doesn't start and stop in the same entity\n");
4738 }
4739 NEXT;
4740 }
4741 if (orig != NULL) {
4742 /*
4743 * Ugly mechanism to save the raw entity value.
4744 */
4745 xmlEntityPtr cur = NULL;
4746
4747 if (isParameter) {
4748 if ((ctxt->sax != NULL) &&
4749 (ctxt->sax->getParameterEntity != NULL))
4750 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4751 } else {
4752 if ((ctxt->sax != NULL) &&
4753 (ctxt->sax->getEntity != NULL))
4754 cur = ctxt->sax->getEntity(ctxt->userData, name);
4755 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4756 cur = xmlSAX2GetEntity(ctxt, name);
4757 }
4758 }
4759 if (cur != NULL) {
4760 if ((cur->owner != 0) || (cur->children == NULL)) {
4761 cur->owner = ctxt->nbentities - oldnbent;
4762 if (cur->owner == 0)
4763 cur->owner = 1;
4764 }
4765 if (cur->orig != NULL)
4766 xmlFree(orig);
4767 else
4768 cur->orig = orig;
4769 } else
4770 xmlFree(orig);
4771 }
4772 if (value != NULL) xmlFree(value);
4773 if (URI != NULL) xmlFree(URI);
4774 if (literal != NULL) xmlFree(literal);
4775 }
4776}
4777
4778/**
4779 * xmlParseDefaultDecl:
4780 * @ctxt: an XML parser context
4781 * @value: Receive a possible fixed default value for the attribute
4782 *
4783 * Parse an attribute default declaration
4784 *
4785 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4786 *
4787 * [ VC: Required Attribute ]
4788 * if the default declaration is the keyword #REQUIRED, then the
4789 * attribute must be specified for all elements of the type in the
4790 * attribute-list declaration.
4791 *
4792 * [ VC: Attribute Default Legal ]
4793 * The declared default value must meet the lexical constraints of
4794 * the declared attribute type c.f. xmlValidateAttributeDecl()
4795 *
4796 * [ VC: Fixed Attribute Default ]
4797 * if an attribute has a default value declared with the #FIXED
4798 * keyword, instances of that attribute must match the default value.
4799 *
4800 * [ WFC: No < in Attribute Values ]
4801 * handled in xmlParseAttValue()
4802 *
4803 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4804 * or XML_ATTRIBUTE_FIXED.
4805 */
4806
4807int
4808xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4809 int val;
4810 xmlChar *ret;
4811
4812 *value = NULL;
4813 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4814 SKIP(9);
4815 return(XML_ATTRIBUTE_REQUIRED);
4816 }
4817 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4818 SKIP(8);
4819 return(XML_ATTRIBUTE_IMPLIED);
4820 }
4821 val = XML_ATTRIBUTE_NONE;
4822 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4823 SKIP(6);
4824 val = XML_ATTRIBUTE_FIXED;
4825 if (!IS_BLANK_CH(CUR)) {
4826 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4827 "Space required after '#FIXED'\n");
4828 }
4829 SKIP_BLANKS;
4830 }
4831 ret = xmlParseAttValue(ctxt);
4832 ctxt->instate = XML_PARSER_DTD;
4833 if (ret == NULL) {
4834 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4835 "Attribute default value declaration error\n");
4836 } else
4837 *value = ret;
4838 return(val);
4839}
4840
4841/**
4842 * xmlParseNotationType:
4843 * @ctxt: an XML parser context
4844 *
4845 * parse an Notation attribute type.
4846 *
4847 * Note: the leading 'NOTATION' S part has already being parsed...
4848 *
4849 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4850 *
4851 * [ VC: Notation Attributes ]
4852 * Values of this type must match one of the notation names included
4853 * in the declaration; all notation names in the declaration must be declared.
4854 *
4855 * Returns: the notation attribute tree built while parsing
4856 */
4857
4858xmlEnumerationPtr
4859xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4860 const xmlChar *name;
4861 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4862
4863 if (RAW != '(') {
4864 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4865 return(NULL);
4866 }
4867 SHRINK;
4868 do {
4869 NEXT;
4870 SKIP_BLANKS;
4871 name = xmlParseName(ctxt);
4872 if (name == NULL) {
4873 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4874 "Name expected in NOTATION declaration\n");
4875 xmlFreeEnumeration(ret);
4876 return(NULL);
4877 }
4878 cur = xmlCreateEnumeration(name);
4879 if (cur == NULL) {
4880 xmlFreeEnumeration(ret);
4881 return(NULL);
4882 }
4883 if (last == NULL) ret = last = cur;
4884 else {
4885 last->next = cur;
4886 last = cur;
4887 }
4888 SKIP_BLANKS;
4889 } while (RAW == '|');
4890 if (RAW != ')') {
4891 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4892 xmlFreeEnumeration(ret);
4893 return(NULL);
4894 }
4895 NEXT;
4896 return(ret);
4897}
4898
4899/**
4900 * xmlParseEnumerationType:
4901 * @ctxt: an XML parser context
4902 *
4903 * parse an Enumeration attribute type.
4904 *
4905 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4906 *
4907 * [ VC: Enumeration ]
4908 * Values of this type must match one of the Nmtoken tokens in
4909 * the declaration
4910 *
4911 * Returns: the enumeration attribute tree built while parsing
4912 */
4913
4914xmlEnumerationPtr
4915xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4916 xmlChar *name;
4917 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4918
4919 if (RAW != '(') {
4920 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4921 return(NULL);
4922 }
4923 SHRINK;
4924 do {
4925 NEXT;
4926 SKIP_BLANKS;
4927 name = xmlParseNmtoken(ctxt);
4928 if (name == NULL) {
4929 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4930 return(ret);
4931 }
4932 cur = xmlCreateEnumeration(name);
4933 xmlFree(name);
4934 if (cur == NULL) {
4935 xmlFreeEnumeration(ret);
4936 return(NULL);
4937 }
4938 if (last == NULL) ret = last = cur;
4939 else {
4940 last->next = cur;
4941 last = cur;
4942 }
4943 SKIP_BLANKS;
4944 } while (RAW == '|');
4945 if (RAW != ')') {
4946 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4947 return(ret);
4948 }
4949 NEXT;
4950 return(ret);
4951}
4952
4953/**
4954 * xmlParseEnumeratedType:
4955 * @ctxt: an XML parser context
4956 * @tree: the enumeration tree built while parsing
4957 *
4958 * parse an Enumerated attribute type.
4959 *
4960 * [57] EnumeratedType ::= NotationType | Enumeration
4961 *
4962 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4963 *
4964 *
4965 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4966 */
4967
4968int
4969xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4970 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4971 SKIP(8);
4972 if (!IS_BLANK_CH(CUR)) {
4973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4974 "Space required after 'NOTATION'\n");
4975 return(0);
4976 }
4977 SKIP_BLANKS;
4978 *tree = xmlParseNotationType(ctxt);
4979 if (*tree == NULL) return(0);
4980 return(XML_ATTRIBUTE_NOTATION);
4981 }
4982 *tree = xmlParseEnumerationType(ctxt);
4983 if (*tree == NULL) return(0);
4984 return(XML_ATTRIBUTE_ENUMERATION);
4985}
4986
4987/**
4988 * xmlParseAttributeType:
4989 * @ctxt: an XML parser context
4990 * @tree: the enumeration tree built while parsing
4991 *
4992 * parse the Attribute list def for an element
4993 *
4994 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4995 *
4996 * [55] StringType ::= 'CDATA'
4997 *
4998 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4999 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5000 *
5001 * Validity constraints for attribute values syntax are checked in
5002 * xmlValidateAttributeValue()
5003 *
5004 * [ VC: ID ]
5005 * Values of type ID must match the Name production. A name must not
5006 * appear more than once in an XML document as a value of this type;
5007 * i.e., ID values must uniquely identify the elements which bear them.
5008 *
5009 * [ VC: One ID per Element Type ]
5010 * No element type may have more than one ID attribute specified.
5011 *
5012 * [ VC: ID Attribute Default ]
5013 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5014 *
5015 * [ VC: IDREF ]
5016 * Values of type IDREF must match the Name production, and values
5017 * of type IDREFS must match Names; each IDREF Name must match the value
5018 * of an ID attribute on some element in the XML document; i.e. IDREF
5019 * values must match the value of some ID attribute.
5020 *
5021 * [ VC: Entity Name ]
5022 * Values of type ENTITY must match the Name production, values
5023 * of type ENTITIES must match Names; each Entity Name must match the
5024 * name of an unparsed entity declared in the DTD.
5025 *
5026 * [ VC: Name Token ]
5027 * Values of type NMTOKEN must match the Nmtoken production; values
5028 * of type NMTOKENS must match Nmtokens.
5029 *
5030 * Returns the attribute type
5031 */
5032int
5033xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5034 SHRINK;
5035 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5036 SKIP(5);
5037 return(XML_ATTRIBUTE_CDATA);
5038 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5039 SKIP(6);
5040 return(XML_ATTRIBUTE_IDREFS);
5041 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5042 SKIP(5);
5043 return(XML_ATTRIBUTE_IDREF);
5044 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5045 SKIP(2);
5046 return(XML_ATTRIBUTE_ID);
5047 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5048 SKIP(6);
5049 return(XML_ATTRIBUTE_ENTITY);
5050 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5051 SKIP(8);
5052 return(XML_ATTRIBUTE_ENTITIES);
5053 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5054 SKIP(8);
5055 return(XML_ATTRIBUTE_NMTOKENS);
5056 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5057 SKIP(7);
5058 return(XML_ATTRIBUTE_NMTOKEN);
5059 }
5060 return(xmlParseEnumeratedType(ctxt, tree));
5061}
5062
5063/**
5064 * xmlParseAttributeListDecl:
5065 * @ctxt: an XML parser context
5066 *
5067 * : parse the Attribute list def for an element
5068 *
5069 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5070 *
5071 * [53] AttDef ::= S Name S AttType S DefaultDecl
5072 *
5073 */
5074void
5075xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5076 const xmlChar *elemName;
5077 const xmlChar *attrName;
5078 xmlEnumerationPtr tree;
5079
5080 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5081 xmlParserInputPtr input = ctxt->input;
5082
5083 SKIP(9);
5084 if (!IS_BLANK_CH(CUR)) {
5085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5086 "Space required after '<!ATTLIST'\n");
5087 }
5088 SKIP_BLANKS;
5089 elemName = xmlParseName(ctxt);
5090 if (elemName == NULL) {
5091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5092 "ATTLIST: no name for Element\n");
5093 return;
5094 }
5095 SKIP_BLANKS;
5096 GROW;
5097 while (RAW != '>') {
5098 const xmlChar *check = CUR_PTR;
5099 int type;
5100 int def;
5101 xmlChar *defaultValue = NULL;
5102
5103 GROW;
5104 tree = NULL;
5105 attrName = xmlParseName(ctxt);
5106 if (attrName == NULL) {
5107 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5108 "ATTLIST: no name for Attribute\n");
5109 break;
5110 }
5111 GROW;
5112 if (!IS_BLANK_CH(CUR)) {
5113 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5114 "Space required after the attribute name\n");
5115 break;
5116 }
5117 SKIP_BLANKS;
5118
5119 type = xmlParseAttributeType(ctxt, &tree);
5120 if (type <= 0) {
5121 break;
5122 }
5123
5124 GROW;
5125 if (!IS_BLANK_CH(CUR)) {
5126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5127 "Space required after the attribute type\n");
5128 if (tree != NULL)
5129 xmlFreeEnumeration(tree);
5130 break;
5131 }
5132 SKIP_BLANKS;
5133
5134 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5135 if (def <= 0) {
5136 if (defaultValue != NULL)
5137 xmlFree(defaultValue);
5138 if (tree != NULL)
5139 xmlFreeEnumeration(tree);
5140 break;
5141 }
5142
5143 GROW;
5144 if (RAW != '>') {
5145 if (!IS_BLANK_CH(CUR)) {
5146 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5147 "Space required after the attribute default value\n");
5148 if (defaultValue != NULL)
5149 xmlFree(defaultValue);
5150 if (tree != NULL)
5151 xmlFreeEnumeration(tree);
5152 break;
5153 }
5154 SKIP_BLANKS;
5155 }
5156 if (check == CUR_PTR) {
5157 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5158 "in xmlParseAttributeListDecl\n");
5159 if (defaultValue != NULL)
5160 xmlFree(defaultValue);
5161 if (tree != NULL)
5162 xmlFreeEnumeration(tree);
5163 break;
5164 }
5165 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5166 (ctxt->sax->attributeDecl != NULL))
5167 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5168 type, def, defaultValue, tree);
5169 else if (tree != NULL)
5170 xmlFreeEnumeration(tree);
5171
5172 if ((ctxt->sax2) && (defaultValue != NULL) &&
5173 (def != XML_ATTRIBUTE_IMPLIED) &&
5174 (def != XML_ATTRIBUTE_REQUIRED)) {
5175 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5176 }
5177 if (ctxt->sax2) {
5178 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5179 }
5180 if (defaultValue != NULL)
5181 xmlFree(defaultValue);
5182 GROW;
5183 }
5184 if (RAW == '>') {
5185 if (input != ctxt->input) {
5186 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5187 "Attribute list declaration doesn't start and stop in the same entity\n");
5188 }
5189 NEXT;
5190 }
5191 }
5192}
5193
5194/**
5195 * xmlParseElementMixedContentDecl:
5196 * @ctxt: an XML parser context
5197 * @inputchk: the input used for the current entity, needed for boundary checks
5198 *
5199 * parse the declaration for a Mixed Element content
5200 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5201 *
5202 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5203 * '(' S? '#PCDATA' S? ')'
5204 *
5205 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5206 *
5207 * [ VC: No Duplicate Types ]
5208 * The same name must not appear more than once in a single
5209 * mixed-content declaration.
5210 *
5211 * returns: the list of the xmlElementContentPtr describing the element choices
5212 */
5213xmlElementContentPtr
5214xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5215 xmlElementContentPtr ret = NULL, cur = NULL, n;
5216 const xmlChar *elem = NULL;
5217
5218 GROW;
5219 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5220 SKIP(7);
5221 SKIP_BLANKS;
5222 SHRINK;
5223 if (RAW == ')') {
5224 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5225 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5226"Element content declaration doesn't start and stop in the same entity\n",
5227 NULL);
5228 }
5229 NEXT;
5230 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5231 if (RAW == '*') {
5232 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5233 NEXT;
5234 }
5235 return(ret);
5236 }
5237 if ((RAW == '(') || (RAW == '|')) {
5238 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5239 if (ret == NULL) return(NULL);
5240 }
5241 while (RAW == '|') {
5242 NEXT;
5243 if (elem == NULL) {
5244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5245 if (ret == NULL) return(NULL);
5246 ret->c1 = cur;
5247 if (cur != NULL)
5248 cur->parent = ret;
5249 cur = ret;
5250 } else {
5251 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5252 if (n == NULL) return(NULL);
5253 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5254 if (n->c1 != NULL)
5255 n->c1->parent = n;
5256 cur->c2 = n;
5257 if (n != NULL)
5258 n->parent = cur;
5259 cur = n;
5260 }
5261 SKIP_BLANKS;
5262 elem = xmlParseName(ctxt);
5263 if (elem == NULL) {
5264 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5265 "xmlParseElementMixedContentDecl : Name expected\n");
5266 xmlFreeDocElementContent(ctxt->myDoc, cur);
5267 return(NULL);
5268 }
5269 SKIP_BLANKS;
5270 GROW;
5271 }
5272 if ((RAW == ')') && (NXT(1) == '*')) {
5273 if (elem != NULL) {
5274 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5275 XML_ELEMENT_CONTENT_ELEMENT);
5276 if (cur->c2 != NULL)
5277 cur->c2->parent = cur;
5278 }
5279 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5280 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5281 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5282"Element content declaration doesn't start and stop in the same entity\n",
5283 NULL);
5284 }
5285 SKIP(2);
5286 } else {
5287 xmlFreeDocElementContent(ctxt->myDoc, ret);
5288 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5289 return(NULL);
5290 }
5291
5292 } else {
5293 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5294 }
5295 return(ret);
5296}
5297
5298/**
5299 * xmlParseElementChildrenContentDecl:
5300 * @ctxt: an XML parser context
5301 * @inputchk: the input used for the current entity, needed for boundary checks
5302 *
5303 * parse the declaration for a Mixed Element content
5304 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5305 *
5306 *
5307 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5308 *
5309 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5310 *
5311 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5312 *
5313 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5314 *
5315 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5316 * TODO Parameter-entity replacement text must be properly nested
5317 * with parenthesized groups. That is to say, if either of the
5318 * opening or closing parentheses in a choice, seq, or Mixed
5319 * construct is contained in the replacement text for a parameter
5320 * entity, both must be contained in the same replacement text. For
5321 * interoperability, if a parameter-entity reference appears in a
5322 * choice, seq, or Mixed construct, its replacement text should not
5323 * be empty, and neither the first nor last non-blank character of
5324 * the replacement text should be a connector (| or ,).
5325 *
5326 * Returns the tree of xmlElementContentPtr describing the element
5327 * hierarchy.
5328 */
5329xmlElementContentPtr
5330xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5331 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5332 const xmlChar *elem;
5333 xmlChar type = 0;
5334
5335 if (ctxt->depth > 128) {
5336 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5337 "xmlParseElementChildrenContentDecl : depth %d too deep\n",
5338 ctxt->depth);
5339 return(NULL);
5340 }
5341 SKIP_BLANKS;
5342 GROW;
5343 if (RAW == '(') {
5344 int inputid = ctxt->input->id;
5345
5346 /* Recurse on first child */
5347 NEXT;
5348 SKIP_BLANKS;
5349 ctxt->depth++;
5350 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5351 ctxt->depth--;
5352 SKIP_BLANKS;
5353 GROW;
5354 } else {
5355 elem = xmlParseName(ctxt);
5356 if (elem == NULL) {
5357 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5358 return(NULL);
5359 }
5360 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5361 if (cur == NULL) {
5362 xmlErrMemory(ctxt, NULL);
5363 return(NULL);
5364 }
5365 GROW;
5366 if (RAW == '?') {
5367 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5368 NEXT;
5369 } else if (RAW == '*') {
5370 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5371 NEXT;
5372 } else if (RAW == '+') {
5373 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5374 NEXT;
5375 } else {
5376 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5377 }
5378 GROW;
5379 }
5380 SKIP_BLANKS;
5381 SHRINK;
5382 while (RAW != ')') {
5383 /*
5384 * Each loop we parse one separator and one element.
5385 */
5386 if (RAW == ',') {
5387 if (type == 0) type = CUR;
5388
5389 /*
5390 * Detect "Name | Name , Name" error
5391 */
5392 else if (type != CUR) {
5393 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5394 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5395 type);
5396 if ((last != NULL) && (last != ret))
5397 xmlFreeDocElementContent(ctxt->myDoc, last);
5398 if (ret != NULL)
5399 xmlFreeDocElementContent(ctxt->myDoc, ret);
5400 return(NULL);
5401 }
5402 NEXT;
5403
5404 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5405 if (op == NULL) {
5406 if ((last != NULL) && (last != ret))
5407 xmlFreeDocElementContent(ctxt->myDoc, last);
5408 xmlFreeDocElementContent(ctxt->myDoc, ret);
5409 return(NULL);
5410 }
5411 if (last == NULL) {
5412 op->c1 = ret;
5413 if (ret != NULL)
5414 ret->parent = op;
5415 ret = cur = op;
5416 } else {
5417 cur->c2 = op;
5418 if (op != NULL)
5419 op->parent = cur;
5420 op->c1 = last;
5421 if (last != NULL)
5422 last->parent = op;
5423 cur =op;
5424 last = NULL;
5425 }
5426 } else if (RAW == '|') {
5427 if (type == 0) type = CUR;
5428
5429 /*
5430 * Detect "Name , Name | Name" error
5431 */
5432 else if (type != CUR) {
5433 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5434 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5435 type);
5436 if ((last != NULL) && (last != ret))
5437 xmlFreeDocElementContent(ctxt->myDoc, last);
5438 if (ret != NULL)
5439 xmlFreeDocElementContent(ctxt->myDoc, ret);
5440 return(NULL);
5441 }
5442 NEXT;
5443
5444 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5445 if (op == NULL) {
5446 if ((last != NULL) && (last != ret))
5447 xmlFreeDocElementContent(ctxt->myDoc, last);
5448 if (ret != NULL)
5449 xmlFreeDocElementContent(ctxt->myDoc, ret);
5450 return(NULL);
5451 }
5452 if (last == NULL) {
5453 op->c1 = ret;
5454 if (ret != NULL)
5455 ret->parent = op;
5456 ret = cur = op;
5457 } else {
5458 cur->c2 = op;
5459 if (op != NULL)
5460 op->parent = cur;
5461 op->c1 = last;
5462 if (last != NULL)
5463 last->parent = op;
5464 cur =op;
5465 last = NULL;
5466 }
5467 } else {
5468 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5469 if (ret != NULL)
5470 xmlFreeDocElementContent(ctxt->myDoc, ret);
5471 return(NULL);
5472 }
5473 GROW;
5474 SKIP_BLANKS;
5475 GROW;
5476 if (RAW == '(') {
5477 int inputid = ctxt->input->id;
5478 /* Recurse on second child */
5479 NEXT;
5480 SKIP_BLANKS;
5481 ctxt->depth++;
5482 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5483 ctxt->depth--;
5484 SKIP_BLANKS;
5485 } else {
5486 elem = xmlParseName(ctxt);
5487 if (elem == NULL) {
5488 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5489 if (ret != NULL)
5490 xmlFreeDocElementContent(ctxt->myDoc, ret);
5491 return(NULL);
5492 }
5493 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5494 if (RAW == '?') {
5495 last->ocur = XML_ELEMENT_CONTENT_OPT;
5496 NEXT;
5497 } else if (RAW == '*') {
5498 last->ocur = XML_ELEMENT_CONTENT_MULT;
5499 NEXT;
5500 } else if (RAW == '+') {
5501 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5502 NEXT;
5503 } else {
5504 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5505 }
5506 }
5507 SKIP_BLANKS;
5508 GROW;
5509 }
5510 if ((cur != NULL) && (last != NULL)) {
5511 cur->c2 = last;
5512 if (last != NULL)
5513 last->parent = cur;
5514 }
5515 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5516 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5517"Element content declaration doesn't start and stop in the same entity\n",
5518 NULL);
5519 }
5520 NEXT;
5521 if (RAW == '?') {
5522 if (ret != NULL) {
5523 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5524 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5525 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5526 else
5527 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5528 }
5529 NEXT;
5530 } else if (RAW == '*') {
5531 if (ret != NULL) {
5532 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5533 cur = ret;
5534 /*
5535 * Some normalization:
5536 * (a | b* | c?)* == (a | b | c)*
5537 */
5538 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5539 if ((cur->c1 != NULL) &&
5540 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5541 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5542 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5543 if ((cur->c2 != NULL) &&
5544 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5545 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5546 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5547 cur = cur->c2;
5548 }
5549 }
5550 NEXT;
5551 } else if (RAW == '+') {
5552 if (ret != NULL) {
5553 int found = 0;
5554
5555 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5556 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5557 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5558 else
5559 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5560 /*
5561 * Some normalization:
5562 * (a | b*)+ == (a | b)*
5563 * (a | b?)+ == (a | b)*
5564 */
5565 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5566 if ((cur->c1 != NULL) &&
5567 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5568 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5569 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5570 found = 1;
5571 }
5572 if ((cur->c2 != NULL) &&
5573 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5574 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5575 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5576 found = 1;
5577 }
5578 cur = cur->c2;
5579 }
5580 if (found)
5581 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5582 }
5583 NEXT;
5584 }
5585 return(ret);
5586}
5587
5588/**
5589 * xmlParseElementContentDecl:
5590 * @ctxt: an XML parser context
5591 * @name: the name of the element being defined.
5592 * @result: the Element Content pointer will be stored here if any
5593 *
5594 * parse the declaration for an Element content either Mixed or Children,
5595 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5596 *
5597 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5598 *
5599 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5600 */
5601
5602int
5603xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5604 xmlElementContentPtr *result) {
5605
5606 xmlElementContentPtr tree = NULL;
5607 int inputid = ctxt->input->id;
5608 int res;
5609
5610 *result = NULL;
5611
5612 if (RAW != '(') {
5613 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5614 "xmlParseElementContentDecl : %s '(' expected\n", name);
5615 return(-1);
5616 }
5617 NEXT;
5618 GROW;
5619 SKIP_BLANKS;
5620 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5621 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5622 res = XML_ELEMENT_TYPE_MIXED;
5623 } else {
5624 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5625 res = XML_ELEMENT_TYPE_ELEMENT;
5626 }
5627 SKIP_BLANKS;
5628 *result = tree;
5629 return(res);
5630}
5631
5632/**
5633 * xmlParseElementDecl:
5634 * @ctxt: an XML parser context
5635 *
5636 * parse an Element declaration.
5637 *
5638 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5639 *
5640 * [ VC: Unique Element Type Declaration ]
5641 * No element type may be declared more than once
5642 *
5643 * Returns the type of the element, or -1 in case of error
5644 */
5645int
5646xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5647 const xmlChar *name;
5648 int ret = -1;
5649 xmlElementContentPtr content = NULL;
5650
5651 /* GROW; done in the caller */
5652 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5653 xmlParserInputPtr input = ctxt->input;
5654
5655 SKIP(9);
5656 if (!IS_BLANK_CH(CUR)) {
5657 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5658 "Space required after 'ELEMENT'\n");
5659 }
5660 SKIP_BLANKS;
5661 name = xmlParseName(ctxt);
5662 if (name == NULL) {
5663 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5664 "xmlParseElementDecl: no name for Element\n");
5665 return(-1);
5666 }
5667 while ((RAW == 0) && (ctxt->inputNr > 1))
5668 xmlPopInput(ctxt);
5669 if (!IS_BLANK_CH(CUR)) {
5670 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5671 "Space required after the element name\n");
5672 }
5673 SKIP_BLANKS;
5674 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5675 SKIP(5);
5676 /*
5677 * Element must always be empty.
5678 */
5679 ret = XML_ELEMENT_TYPE_EMPTY;
5680 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5681 (NXT(2) == 'Y')) {
5682 SKIP(3);
5683 /*
5684 * Element is a generic container.
5685 */
5686 ret = XML_ELEMENT_TYPE_ANY;
5687 } else if (RAW == '(') {
5688 ret = xmlParseElementContentDecl(ctxt, name, &content);
5689 } else {
5690 /*
5691 * [ WFC: PEs in Internal Subset ] error handling.
5692 */
5693 if ((RAW == '%') && (ctxt->external == 0) &&
5694 (ctxt->inputNr == 1)) {
5695 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5696 "PEReference: forbidden within markup decl in internal subset\n");
5697 } else {
5698 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5699 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5700 }
5701 return(-1);
5702 }
5703
5704 SKIP_BLANKS;
5705 /*
5706 * Pop-up of finished entities.
5707 */
5708 while ((RAW == 0) && (ctxt->inputNr > 1))
5709 xmlPopInput(ctxt);
5710 SKIP_BLANKS;
5711
5712 if (RAW != '>') {
5713 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5714 if (content != NULL) {
5715 xmlFreeDocElementContent(ctxt->myDoc, content);
5716 }
5717 } else {
5718 if (input != ctxt->input) {
5719 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5720 "Element declaration doesn't start and stop in the same entity\n");
5721 }
5722
5723 NEXT;
5724 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5725 (ctxt->sax->elementDecl != NULL)) {
5726 if (content != NULL)
5727 content->parent = NULL;
5728 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5729 content);
5730 if ((content != NULL) && (content->parent == NULL)) {
5731 /*
5732 * this is a trick: if xmlAddElementDecl is called,
5733 * instead of copying the full tree it is plugged directly
5734 * if called from the parser. Avoid duplicating the
5735 * interfaces or change the API/ABI
5736 */
5737 xmlFreeDocElementContent(ctxt->myDoc, content);
5738 }
5739 } else if (content != NULL) {
5740 xmlFreeDocElementContent(ctxt->myDoc, content);
5741 }
5742 }
5743 }
5744 return(ret);
5745}
5746
5747/**
5748 * xmlParseConditionalSections
5749 * @ctxt: an XML parser context
5750 *
5751 * [61] conditionalSect ::= includeSect | ignoreSect
5752 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5753 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5754 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5755 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5756 */
5757
5758static void
5759xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5760 SKIP(3);
5761 SKIP_BLANKS;
5762 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5763 SKIP(7);
5764 SKIP_BLANKS;
5765 if (RAW != '[') {
5766 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5767 } else {
5768 NEXT;
5769 }
5770 if (xmlParserDebugEntities) {
5771 if ((ctxt->input != NULL) && (ctxt->input->filename))
5772 xmlGenericError(xmlGenericErrorContext,
5773 "%s(%d): ", ctxt->input->filename,
5774 ctxt->input->line);
5775 xmlGenericError(xmlGenericErrorContext,
5776 "Entering INCLUDE Conditional Section\n");
5777 }
5778
5779 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5780 (NXT(2) != '>'))) {
5781 const xmlChar *check = CUR_PTR;
5782 unsigned int cons = ctxt->input->consumed;
5783
5784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5785 xmlParseConditionalSections(ctxt);
5786 } else if (IS_BLANK_CH(CUR)) {
5787 NEXT;
5788 } else if (RAW == '%') {
5789 xmlParsePEReference(ctxt);
5790 } else
5791 xmlParseMarkupDecl(ctxt);
5792
5793 /*
5794 * Pop-up of finished entities.
5795 */
5796 while ((RAW == 0) && (ctxt->inputNr > 1))
5797 xmlPopInput(ctxt);
5798
5799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5800 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5801 break;
5802 }
5803 }
5804 if (xmlParserDebugEntities) {
5805 if ((ctxt->input != NULL) && (ctxt->input->filename))
5806 xmlGenericError(xmlGenericErrorContext,
5807 "%s(%d): ", ctxt->input->filename,
5808 ctxt->input->line);
5809 xmlGenericError(xmlGenericErrorContext,
5810 "Leaving INCLUDE Conditional Section\n");
5811 }
5812
5813 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5814 int state;
5815 xmlParserInputState instate;
5816 int depth = 0;
5817
5818 SKIP(6);
5819 SKIP_BLANKS;
5820 if (RAW != '[') {
5821 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5822 } else {
5823 NEXT;
5824 }
5825 if (xmlParserDebugEntities) {
5826 if ((ctxt->input != NULL) && (ctxt->input->filename))
5827 xmlGenericError(xmlGenericErrorContext,
5828 "%s(%d): ", ctxt->input->filename,
5829 ctxt->input->line);
5830 xmlGenericError(xmlGenericErrorContext,
5831 "Entering IGNORE Conditional Section\n");
5832 }
5833
5834 /*
5835 * Parse up to the end of the conditional section
5836 * But disable SAX event generating DTD building in the meantime
5837 */
5838 state = ctxt->disableSAX;
5839 instate = ctxt->instate;
5840 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5841 ctxt->instate = XML_PARSER_IGNORE;
5842
5843 while ((depth >= 0) && (RAW != 0)) {
5844 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5845 depth++;
5846 SKIP(3);
5847 continue;
5848 }
5849 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5850 if (--depth >= 0) SKIP(3);
5851 continue;
5852 }
5853 NEXT;
5854 continue;
5855 }
5856
5857 ctxt->disableSAX = state;
5858 ctxt->instate = instate;
5859
5860 if (xmlParserDebugEntities) {
5861 if ((ctxt->input != NULL) && (ctxt->input->filename))
5862 xmlGenericError(xmlGenericErrorContext,
5863 "%s(%d): ", ctxt->input->filename,
5864 ctxt->input->line);
5865 xmlGenericError(xmlGenericErrorContext,
5866 "Leaving IGNORE Conditional Section\n");
5867 }
5868
5869 } else {
5870 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5871 }
5872
5873 if (RAW == 0)
5874 SHRINK;
5875
5876 if (RAW == 0) {
5877 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5878 } else {
5879 SKIP(3);
5880 }
5881}
5882
5883/**
5884 * xmlParseMarkupDecl:
5885 * @ctxt: an XML parser context
5886 *
5887 * parse Markup declarations
5888 *
5889 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5890 * NotationDecl | PI | Comment
5891 *
5892 * [ VC: Proper Declaration/PE Nesting ]
5893 * Parameter-entity replacement text must be properly nested with
5894 * markup declarations. That is to say, if either the first character
5895 * or the last character of a markup declaration (markupdecl above) is
5896 * contained in the replacement text for a parameter-entity reference,
5897 * both must be contained in the same replacement text.
5898 *
5899 * [ WFC: PEs in Internal Subset ]
5900 * In the internal DTD subset, parameter-entity references can occur
5901 * only where markup declarations can occur, not within markup declarations.
5902 * (This does not apply to references that occur in external parameter
5903 * entities or to the external subset.)
5904 */
5905void
5906xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5907 GROW;
5908 if (CUR == '<') {
5909 if (NXT(1) == '!') {
5910 switch (NXT(2)) {
5911 case 'E':
5912 if (NXT(3) == 'L')
5913 xmlParseElementDecl(ctxt);
5914 else if (NXT(3) == 'N')
5915 xmlParseEntityDecl(ctxt);
5916 break;
5917 case 'A':
5918 xmlParseAttributeListDecl(ctxt);
5919 break;
5920 case 'N':
5921 xmlParseNotationDecl(ctxt);
5922 break;
5923 case '-':
5924 xmlParseComment(ctxt);
5925 break;
5926 default:
5927 /* there is an error but it will be detected later */
5928 break;
5929 }
5930 } else if (NXT(1) == '?') {
5931 xmlParsePI(ctxt);
5932 }
5933 }
5934 /*
5935 * This is only for internal subset. On external entities,
5936 * the replacement is done before parsing stage
5937 */
5938 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5939 xmlParsePEReference(ctxt);
5940
5941 /*
5942 * Conditional sections are allowed from entities included
5943 * by PE References in the internal subset.
5944 */
5945 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5946 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5947 xmlParseConditionalSections(ctxt);
5948 }
5949 }
5950
5951 ctxt->instate = XML_PARSER_DTD;
5952}
5953
5954/**
5955 * xmlParseTextDecl:
5956 * @ctxt: an XML parser context
5957 *
5958 * parse an XML declaration header for external entities
5959 *
5960 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5961 *
5962 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5963 */
5964
5965void
5966xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5967 xmlChar *version;
5968 const xmlChar *encoding;
5969
5970 /*
5971 * We know that '<?xml' is here.
5972 */
5973 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5974 SKIP(5);
5975 } else {
5976 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5977 return;
5978 }
5979
5980 if (!IS_BLANK_CH(CUR)) {
5981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982 "Space needed after '<?xml'\n");
5983 }
5984 SKIP_BLANKS;
5985
5986 /*
5987 * We may have the VersionInfo here.
5988 */
5989 version = xmlParseVersionInfo(ctxt);
5990 if (version == NULL)
5991 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5992 else {
5993 if (!IS_BLANK_CH(CUR)) {
5994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5995 "Space needed here\n");
5996 }
5997 }
5998 ctxt->input->version = version;
5999
6000 /*
6001 * We must have the encoding declaration
6002 */
6003 encoding = xmlParseEncodingDecl(ctxt);
6004 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6005 /*
6006 * The XML REC instructs us to stop parsing right here
6007 */
6008 return;
6009 }
6010 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6011 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6012 "Missing encoding in text declaration\n");
6013 }
6014
6015 SKIP_BLANKS;
6016 if ((RAW == '?') && (NXT(1) == '>')) {
6017 SKIP(2);
6018 } else if (RAW == '>') {
6019 /* Deprecated old WD ... */
6020 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6021 NEXT;
6022 } else {
6023 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6024 MOVETO_ENDTAG(CUR_PTR);
6025 NEXT;
6026 }
6027}
6028
6029/**
6030 * xmlParseExternalSubset:
6031 * @ctxt: an XML parser context
6032 * @ExternalID: the external identifier
6033 * @SystemID: the system identifier (or URL)
6034 *
6035 * parse Markup declarations from an external subset
6036 *
6037 * [30] extSubset ::= textDecl? extSubsetDecl
6038 *
6039 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6040 */
6041void
6042xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6043 const xmlChar *SystemID) {
6044 xmlDetectSAX2(ctxt);
6045 GROW;
6046 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6047 xmlParseTextDecl(ctxt);
6048 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6049 /*
6050 * The XML REC instructs us to stop parsing right here
6051 */
6052 ctxt->instate = XML_PARSER_EOF;
6053 return;
6054 }
6055 }
6056 if (ctxt->myDoc == NULL) {
6057 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6058 }
6059 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6060 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6061
6062 ctxt->instate = XML_PARSER_DTD;
6063 ctxt->external = 1;
6064 while (((RAW == '<') && (NXT(1) == '?')) ||
6065 ((RAW == '<') && (NXT(1) == '!')) ||
6066 (RAW == '%') || IS_BLANK_CH(CUR)) {
6067 const xmlChar *check = CUR_PTR;
6068 unsigned int cons = ctxt->input->consumed;
6069
6070 GROW;
6071 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6072 xmlParseConditionalSections(ctxt);
6073 } else if (IS_BLANK_CH(CUR)) {
6074 NEXT;
6075 } else if (RAW == '%') {
6076 xmlParsePEReference(ctxt);
6077 } else
6078 xmlParseMarkupDecl(ctxt);
6079
6080 /*
6081 * Pop-up of finished entities.
6082 */
6083 while ((RAW == 0) && (ctxt->inputNr > 1))
6084 xmlPopInput(ctxt);
6085
6086 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6087 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6088 break;
6089 }
6090 }
6091
6092 if (RAW != 0) {
6093 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6094 }
6095
6096}
6097
6098/**
6099 * xmlParseReference:
6100 * @ctxt: an XML parser context
6101 *
6102 * parse and handle entity references in content, depending on the SAX
6103 * interface, this may end-up in a call to character() if this is a
6104 * CharRef, a predefined entity, if there is no reference() callback.
6105 * or if the parser was asked to switch to that mode.
6106 *
6107 * [67] Reference ::= EntityRef | CharRef
6108 */
6109void
6110xmlParseReference(xmlParserCtxtPtr ctxt) {
6111 xmlEntityPtr ent;
6112 xmlChar *val;
6113 if (RAW != '&') return;
6114
6115 if (NXT(1) == '#') {
6116 int i = 0;
6117 xmlChar out[10];
6118 int hex = NXT(2);
6119 int value = xmlParseCharRef(ctxt);
6120
6121 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6122 /*
6123 * So we are using non-UTF-8 buffers
6124 * Check that the char fit on 8bits, if not
6125 * generate a CharRef.
6126 */
6127 if (value <= 0xFF) {
6128 out[0] = value;
6129 out[1] = 0;
6130 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6131 (!ctxt->disableSAX))
6132 ctxt->sax->characters(ctxt->userData, out, 1);
6133 } else {
6134 if ((hex == 'x') || (hex == 'X'))
6135 snprintf((char *)out, sizeof(out), "#x%X", value);
6136 else
6137 snprintf((char *)out, sizeof(out), "#%d", value);
6138 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6139 (!ctxt->disableSAX))
6140 ctxt->sax->reference(ctxt->userData, out);
6141 }
6142 } else {
6143 /*
6144 * Just encode the value in UTF-8
6145 */
6146 COPY_BUF(0 ,out, i, value);
6147 out[i] = 0;
6148 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6149 (!ctxt->disableSAX))
6150 ctxt->sax->characters(ctxt->userData, out, i);
6151 }
6152 } else {
6153 int was_checked;
6154
6155 ent = xmlParseEntityRef(ctxt);
6156 if (ent == NULL) return;
6157 if (!ctxt->wellFormed)
6158 return;
6159 was_checked = ent->checked;
6160 if ((ent->name != NULL) &&
6161 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6162 xmlNodePtr list = NULL;
6163 xmlParserErrors ret = XML_ERR_OK;
6164
6165
6166 /*
6167 * The first reference to the entity trigger a parsing phase
6168 * where the ent->children is filled with the result from
6169 * the parsing.
6170 */
6171 if (ent->checked == 0) {
6172 xmlChar *value;
6173
6174 value = ent->content;
6175
6176 /*
6177 * Check that this entity is well formed
6178 */
6179 if ((value != NULL) && (value[0] != 0) &&
6180 (value[1] == 0) && (value[0] == '<') &&
6181 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6182 /*
6183 * DONE: get definite answer on this !!!
6184 * Lots of entity decls are used to declare a single
6185 * char
6186 * <!ENTITY lt "<">
6187 * Which seems to be valid since
6188 * 2.4: The ampersand character (&) and the left angle
6189 * bracket (<) may appear in their literal form only
6190 * when used ... They are also legal within the literal
6191 * entity value of an internal entity declaration;i
6192 * see "4.3.2 Well-Formed Parsed Entities".
6193 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6194 * Looking at the OASIS test suite and James Clark
6195 * tests, this is broken. However the XML REC uses
6196 * it. Is the XML REC not well-formed ????
6197 * This is a hack to avoid this problem
6198 *
6199 * ANSWER: since lt gt amp .. are already defined,
6200 * this is a redefinition and hence the fact that the
6201 * content is not well balanced is not a Wf error, this
6202 * is lousy but acceptable.
6203 */
6204 list = xmlNewDocText(ctxt->myDoc, value);
6205 if (list != NULL) {
6206 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6207 (ent->children == NULL)) {
6208 ent->children = list;
6209 ent->last = list;
6210 if (ent->owner == 0)
6211 ent->owner = 1;
6212 list->parent = (xmlNodePtr) ent;
6213 } else {
6214 xmlFreeNodeList(list);
6215 }
6216 } else if (list != NULL) {
6217 xmlFreeNodeList(list);
6218 }
6219 } else {
6220 unsigned long oldnbent = ctxt->nbentities;
6221 /*
6222 * 4.3.2: An internal general parsed entity is well-formed
6223 * if its replacement text matches the production labeled
6224 * content.
6225 */
6226
6227 void *user_data;
6228 /*
6229 * This is a bit hackish but this seems the best
6230 * way to make sure both SAX and DOM entity support
6231 * behaves okay.
6232 */
6233 if (ctxt->userData == ctxt)
6234 user_data = NULL;
6235 else
6236 user_data = ctxt->userData;
6237
6238 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6239 ctxt->depth++;
6240 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6241 value, user_data, &list);
6242 ctxt->depth--;
6243
6244 } else if (ent->etype ==
6245 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6246 ctxt->depth++;
6247 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6248 ctxt->sax, user_data, ctxt->depth,
6249 ent->URI, ent->ExternalID, &list);
6250 ctxt->depth--;
6251 } else {
6252 ret = XML_ERR_ENTITY_PE_INTERNAL;
6253 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6254 "invalid entity type found\n", NULL);
6255 }
6256 /*
6257 * Store the number of entities needing parsing for entity
6258 * content and do checkings
6259 */
6260 if ((ent->owner != 0) || (ent->children == NULL)) {
6261 ent->owner = ctxt->nbentities - oldnbent;
6262 if (ent->owner == 0)
6263 ent->owner = 1;
6264 }
6265 if (ret == XML_ERR_ENTITY_LOOP) {
6266 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6267 xmlFreeNodeList(list);
6268 return;
6269 }
6270 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6271 xmlFreeNodeList(list);
6272 return;
6273 }
6274 if (ret == XML_ERR_ENTITY_LOOP) {
6275 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6276 return;
6277 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6278 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6279 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6280 (ent->children == NULL)) {
6281 ent->children = list;
6282 if (ctxt->replaceEntities) {
6283 /*
6284 * Prune it directly in the generated document
6285 * except for single text nodes.
6286 */
6287 if (((list->type == XML_TEXT_NODE) &&
6288 (list->next == NULL)) ||
6289 (ctxt->parseMode == XML_PARSE_READER)) {
6290 list->parent = (xmlNodePtr) ent;
6291 list = NULL;
6292 if (ent->owner == 0)
6293 ent->owner = 1;
6294 } else {
6295 ent->owner = 0;
6296 while (list != NULL) {
6297 list->parent = (xmlNodePtr) ctxt->node;
6298 list->doc = ctxt->myDoc;
6299 if (list->next == NULL)
6300 ent->last = list;
6301 list = list->next;
6302 }
6303 list = ent->children;
6304#ifdef LIBXML_LEGACY_ENABLED
6305 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6306 xmlAddEntityReference(ent, list, NULL);
6307#endif /* LIBXML_LEGACY_ENABLED */
6308 }
6309 } else {
6310 if (ent->owner == 0)
6311 ent->owner = 1;
6312 while (list != NULL) {
6313 list->parent = (xmlNodePtr) ent;
6314 if (list->next == NULL)
6315 ent->last = list;
6316 list = list->next;
6317 }
6318 }
6319 } else {
6320 xmlFreeNodeList(list);
6321 list = NULL;
6322 }
6323 } else if ((ret != XML_ERR_OK) &&
6324 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6325 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6326 "Entity '%s' failed to parse\n", ent->name);
6327 } else if (list != NULL) {
6328 xmlFreeNodeList(list);
6329 list = NULL;
6330 } else if (ent->owner != 1) {
6331 ctxt->nbentities += ent->owner;
6332 }
6333 }
6334 ent->checked = 1;
6335 }
6336
6337 if (ent->children == NULL) {
6338 /*
6339 * Probably running in SAX mode and the callbacks don't
6340 * build the entity content. So unless we already went
6341 * though parsing for first checking go though the entity
6342 * content to generate callbacks associated to the entity
6343 */
6344 if (was_checked == 1) {
6345 void *user_data;
6346 /*
6347 * This is a bit hackish but this seems the best
6348 * way to make sure both SAX and DOM entity support
6349 * behaves okay.
6350 */
6351 if (ctxt->userData == ctxt)
6352 user_data = NULL;
6353 else
6354 user_data = ctxt->userData;
6355
6356 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6357 ctxt->depth++;
6358 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6359 ent->content, user_data, NULL);
6360 ctxt->depth--;
6361 } else if (ent->etype ==
6362 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6363 ctxt->depth++;
6364 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6365 ctxt->sax, user_data, ctxt->depth,
6366 ent->URI, ent->ExternalID, NULL);
6367 ctxt->depth--;
6368 } else {
6369 ret = XML_ERR_ENTITY_PE_INTERNAL;
6370 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6371 "invalid entity type found\n", NULL);
6372 }
6373 if (ret == XML_ERR_ENTITY_LOOP) {
6374 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6375 return;
6376 }
6377 }
6378 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6379 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6380 /*
6381 * Entity reference callback comes second, it's somewhat
6382 * superfluous but a compatibility to historical behaviour
6383 */
6384 ctxt->sax->reference(ctxt->userData, ent->name);
6385 }
6386 return;
6387 }
6388 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6389 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6390 /*
6391 * Create a node.
6392 */
6393 ctxt->sax->reference(ctxt->userData, ent->name);
6394 return;
6395 }
6396 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6397 /*
6398 * There is a problem on the handling of _private for entities
6399 * (bug 155816): Should we copy the content of the field from
6400 * the entity (possibly overwriting some value set by the user
6401 * when a copy is created), should we leave it alone, or should
6402 * we try to take care of different situations? The problem
6403 * is exacerbated by the usage of this field by the xmlReader.
6404 * To fix this bug, we look at _private on the created node
6405 * and, if it's NULL, we copy in whatever was in the entity.
6406 * If it's not NULL we leave it alone. This is somewhat of a
6407 * hack - maybe we should have further tests to determine
6408 * what to do.
6409 */
6410 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6411 /*
6412 * Seems we are generating the DOM content, do
6413 * a simple tree copy for all references except the first
6414 * In the first occurrence list contains the replacement.
6415 * progressive == 2 means we are operating on the Reader
6416 * and since nodes are discarded we must copy all the time.
6417 */
6418 if (((list == NULL) && (ent->owner == 0)) ||
6419 (ctxt->parseMode == XML_PARSE_READER)) {
6420 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6421
6422 /*
6423 * when operating on a reader, the entities definitions
6424 * are always owning the entities subtree.
6425 if (ctxt->parseMode == XML_PARSE_READER)
6426 ent->owner = 1;
6427 */
6428
6429 cur = ent->children;
6430 while (cur != NULL) {
6431 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6432 if (nw != NULL) {
6433 if (nw->_private == NULL)
6434 nw->_private = cur->_private;
6435 if (firstChild == NULL){
6436 firstChild = nw;
6437 }
6438 nw = xmlAddChild(ctxt->node, nw);
6439 }
6440 if (cur == ent->last) {
6441 /*
6442 * needed to detect some strange empty
6443 * node cases in the reader tests
6444 */
6445 if ((ctxt->parseMode == XML_PARSE_READER) &&
6446 (nw != NULL) &&
6447 (nw->type == XML_ELEMENT_NODE) &&
6448 (nw->children == NULL))
6449 nw->extra = 1;
6450
6451 break;
6452 }
6453 cur = cur->next;
6454 }
6455#ifdef LIBXML_LEGACY_ENABLED
6456 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6457 xmlAddEntityReference(ent, firstChild, nw);
6458#endif /* LIBXML_LEGACY_ENABLED */
6459 } else if (list == NULL) {
6460 xmlNodePtr nw = NULL, cur, next, last,
6461 firstChild = NULL;
6462 /*
6463 * Copy the entity child list and make it the new
6464 * entity child list. The goal is to make sure any
6465 * ID or REF referenced will be the one from the
6466 * document content and not the entity copy.
6467 */
6468 cur = ent->children;
6469 ent->children = NULL;
6470 last = ent->last;
6471 ent->last = NULL;
6472 while (cur != NULL) {
6473 next = cur->next;
6474 cur->next = NULL;
6475 cur->parent = NULL;
6476 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6477 if (nw != NULL) {
6478 if (nw->_private == NULL)
6479 nw->_private = cur->_private;
6480 if (firstChild == NULL){
6481 firstChild = cur;
6482 }
6483 xmlAddChild((xmlNodePtr) ent, nw);
6484 xmlAddChild(ctxt->node, cur);
6485 }
6486 if (cur == last)
6487 break;
6488 cur = next;
6489 }
6490 if (ent->owner == 0)
6491 ent->owner = 1;
6492#ifdef LIBXML_LEGACY_ENABLED
6493 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6494 xmlAddEntityReference(ent, firstChild, nw);
6495#endif /* LIBXML_LEGACY_ENABLED */
6496 } else {
6497 const xmlChar *nbktext;
6498
6499 /*
6500 * the name change is to avoid coalescing of the
6501 * node with a possible previous text one which
6502 * would make ent->children a dangling pointer
6503 */
6504 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6505 -1);
6506 if (ent->children->type == XML_TEXT_NODE)
6507 ent->children->name = nbktext;
6508 if ((ent->last != ent->children) &&
6509 (ent->last->type == XML_TEXT_NODE))
6510 ent->last->name = nbktext;
6511 xmlAddChildList(ctxt->node, ent->children);
6512 }
6513
6514 /*
6515 * This is to avoid a nasty side effect, see
6516 * characters() in SAX.c
6517 */
6518 ctxt->nodemem = 0;
6519 ctxt->nodelen = 0;
6520 return;
6521 }
6522 }
6523 } else {
6524 val = ent->content;
6525 if (val == NULL) return;
6526 /*
6527 * inline the entity.
6528 */
6529 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6530 (!ctxt->disableSAX))
6531 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6532 }
6533 }
6534}
6535
6536/**
6537 * xmlParseEntityRef:
6538 * @ctxt: an XML parser context
6539 *
6540 * parse ENTITY references declarations
6541 *
6542 * [68] EntityRef ::= '&' Name ';'
6543 *
6544 * [ WFC: Entity Declared ]
6545 * In a document without any DTD, a document with only an internal DTD
6546 * subset which contains no parameter entity references, or a document
6547 * with "standalone='yes'", the Name given in the entity reference
6548 * must match that in an entity declaration, except that well-formed
6549 * documents need not declare any of the following entities: amp, lt,
6550 * gt, apos, quot. The declaration of a parameter entity must precede
6551 * any reference to it. Similarly, the declaration of a general entity
6552 * must precede any reference to it which appears in a default value in an
6553 * attribute-list declaration. Note that if entities are declared in the
6554 * external subset or in external parameter entities, a non-validating
6555 * processor is not obligated to read and process their declarations;
6556 * for such documents, the rule that an entity must be declared is a
6557 * well-formedness constraint only if standalone='yes'.
6558 *
6559 * [ WFC: Parsed Entity ]
6560 * An entity reference must not contain the name of an unparsed entity
6561 *
6562 * Returns the xmlEntityPtr if found, or NULL otherwise.
6563 */
6564xmlEntityPtr
6565xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6566 const xmlChar *name;
6567 xmlEntityPtr ent = NULL;
6568
6569 GROW;
6570
6571 if (RAW == '&') {
6572 NEXT;
6573 name = xmlParseName(ctxt);
6574 if (name == NULL) {
6575 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6576 "xmlParseEntityRef: no name\n");
6577 } else {
6578 if (RAW == ';') {
6579 NEXT;
6580 /*
6581 * Increase the number of entity references parsed
6582 */
6583 ctxt->nbentities++;
6584
6585 /*
6586 * Ask first SAX for entity resolution, otherwise try the
6587 * predefined set.
6588 */
6589 if (ctxt->sax != NULL) {
6590 if (ctxt->sax->getEntity != NULL)
6591 ent = ctxt->sax->getEntity(ctxt->userData, name);
6592 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6593 ent = xmlGetPredefinedEntity(name);
6594 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6595 (ctxt->userData==ctxt)) {
6596 ent = xmlSAX2GetEntity(ctxt, name);
6597 }
6598 }
6599 /*
6600 * [ WFC: Entity Declared ]
6601 * In a document without any DTD, a document with only an
6602 * internal DTD subset which contains no parameter entity
6603 * references, or a document with "standalone='yes'", the
6604 * Name given in the entity reference must match that in an
6605 * entity declaration, except that well-formed documents
6606 * need not declare any of the following entities: amp, lt,
6607 * gt, apos, quot.
6608 * The declaration of a parameter entity must precede any
6609 * reference to it.
6610 * Similarly, the declaration of a general entity must
6611 * precede any reference to it which appears in a default
6612 * value in an attribute-list declaration. Note that if
6613 * entities are declared in the external subset or in
6614 * external parameter entities, a non-validating processor
6615 * is not obligated to read and process their declarations;
6616 * for such documents, the rule that an entity must be
6617 * declared is a well-formedness constraint only if
6618 * standalone='yes'.
6619 */
6620 if (ent == NULL) {
6621 if ((ctxt->standalone == 1) ||
6622 ((ctxt->hasExternalSubset == 0) &&
6623 (ctxt->hasPErefs == 0))) {
6624 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6625 "Entity '%s' not defined\n", name);
6626 } else {
6627 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6628 "Entity '%s' not defined\n", name);
6629 if ((ctxt->inSubset == 0) &&
6630 (ctxt->sax != NULL) &&
6631 (ctxt->sax->reference != NULL)) {
6632 ctxt->sax->reference(ctxt->userData, name);
6633 }
6634 }
6635 ctxt->valid = 0;
6636 }
6637
6638 /*
6639 * [ WFC: Parsed Entity ]
6640 * An entity reference must not contain the name of an
6641 * unparsed entity
6642 */
6643 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6644 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6645 "Entity reference to unparsed entity %s\n", name);
6646 }
6647
6648 /*
6649 * [ WFC: No External Entity References ]
6650 * Attribute values cannot contain direct or indirect
6651 * entity references to external entities.
6652 */
6653 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6654 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6655 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6656 "Attribute references external entity '%s'\n", name);
6657 }
6658 /*
6659 * [ WFC: No < in Attribute Values ]
6660 * The replacement text of any entity referred to directly or
6661 * indirectly in an attribute value (other than "&lt;") must
6662 * not contain a <.
6663 */
6664 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6665 (ent != NULL) &&
6666 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6667 (ent->content != NULL) &&
6668 (xmlStrchr(ent->content, '<'))) {
6669 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6670 "'<' in entity '%s' is not allowed in attributes values\n", name);
6671 }
6672
6673 /*
6674 * Internal check, no parameter entities here ...
6675 */
6676 else {
6677 switch (ent->etype) {
6678 case XML_INTERNAL_PARAMETER_ENTITY:
6679 case XML_EXTERNAL_PARAMETER_ENTITY:
6680 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6681 "Attempt to reference the parameter entity '%s'\n",
6682 name);
6683 break;
6684 default:
6685 break;
6686 }
6687 }
6688
6689 /*
6690 * [ WFC: No Recursion ]
6691 * A parsed entity must not contain a recursive reference
6692 * to itself, either directly or indirectly.
6693 * Done somewhere else
6694 */
6695
6696 } else {
6697 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6698 }
6699 }
6700 }
6701 return(ent);
6702}
6703
6704/**
6705 * xmlParseStringEntityRef:
6706 * @ctxt: an XML parser context
6707 * @str: a pointer to an index in the string
6708 *
6709 * parse ENTITY references declarations, but this version parses it from
6710 * a string value.
6711 *
6712 * [68] EntityRef ::= '&' Name ';'
6713 *
6714 * [ WFC: Entity Declared ]
6715 * In a document without any DTD, a document with only an internal DTD
6716 * subset which contains no parameter entity references, or a document
6717 * with "standalone='yes'", the Name given in the entity reference
6718 * must match that in an entity declaration, except that well-formed
6719 * documents need not declare any of the following entities: amp, lt,
6720 * gt, apos, quot. The declaration of a parameter entity must precede
6721 * any reference to it. Similarly, the declaration of a general entity
6722 * must precede any reference to it which appears in a default value in an
6723 * attribute-list declaration. Note that if entities are declared in the
6724 * external subset or in external parameter entities, a non-validating
6725 * processor is not obligated to read and process their declarations;
6726 * for such documents, the rule that an entity must be declared is a
6727 * well-formedness constraint only if standalone='yes'.
6728 *
6729 * [ WFC: Parsed Entity ]
6730 * An entity reference must not contain the name of an unparsed entity
6731 *
6732 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6733 * is updated to the current location in the string.
6734 */
6735xmlEntityPtr
6736xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6737 xmlChar *name;
6738 const xmlChar *ptr;
6739 xmlChar cur;
6740 xmlEntityPtr ent = NULL;
6741
6742 if ((str == NULL) || (*str == NULL))
6743 return(NULL);
6744 ptr = *str;
6745 cur = *ptr;
6746 if (cur == '&') {
6747 ptr++;
6748 cur = *ptr;
6749 name = xmlParseStringName(ctxt, &ptr);
6750 if (name == NULL) {
6751 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6752 "xmlParseStringEntityRef: no name\n");
6753 } else {
6754 if (*ptr == ';') {
6755 ptr++;
6756 /*
6757 * Increase the number of entity references parsed
6758 */
6759 ctxt->nbentities++;
6760 /*
6761 * Ask first SAX for entity resolution, otherwise try the
6762 * predefined set.
6763 */
6764 if (ctxt->sax != NULL) {
6765 if (ctxt->sax->getEntity != NULL)
6766 ent = ctxt->sax->getEntity(ctxt->userData, name);
6767 if (ent == NULL)
6768 ent = xmlGetPredefinedEntity(name);
6769 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6770 ent = xmlSAX2GetEntity(ctxt, name);
6771 }
6772 }
6773 /*
6774 * [ WFC: Entity Declared ]
6775 * In a document without any DTD, a document with only an
6776 * internal DTD subset which contains no parameter entity
6777 * references, or a document with "standalone='yes'", the
6778 * Name given in the entity reference must match that in an
6779 * entity declaration, except that well-formed documents
6780 * need not declare any of the following entities: amp, lt,
6781 * gt, apos, quot.
6782 * The declaration of a parameter entity must precede any
6783 * reference to it.
6784 * Similarly, the declaration of a general entity must
6785 * precede any reference to it which appears in a default
6786 * value in an attribute-list declaration. Note that if
6787 * entities are declared in the external subset or in
6788 * external parameter entities, a non-validating processor
6789 * is not obligated to read and process their declarations;
6790 * for such documents, the rule that an entity must be
6791 * declared is a well-formedness constraint only if
6792 * standalone='yes'.
6793 */
6794 if (ent == NULL) {
6795 if ((ctxt->standalone == 1) ||
6796 ((ctxt->hasExternalSubset == 0) &&
6797 (ctxt->hasPErefs == 0))) {
6798 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6799 "Entity '%s' not defined\n", name);
6800 } else {
6801 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6802 "Entity '%s' not defined\n",
6803 name);
6804 }
6805 /* TODO ? check regressions ctxt->valid = 0; */
6806 }
6807
6808 /*
6809 * [ WFC: Parsed Entity ]
6810 * An entity reference must not contain the name of an
6811 * unparsed entity
6812 */
6813 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6814 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6815 "Entity reference to unparsed entity %s\n", name);
6816 }
6817
6818 /*
6819 * [ WFC: No External Entity References ]
6820 * Attribute values cannot contain direct or indirect
6821 * entity references to external entities.
6822 */
6823 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6824 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6825 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6826 "Attribute references external entity '%s'\n", name);
6827 }
6828 /*
6829 * [ WFC: No < in Attribute Values ]
6830 * The replacement text of any entity referred to directly or
6831 * indirectly in an attribute value (other than "&lt;") must
6832 * not contain a <.
6833 */
6834 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6835 (ent != NULL) &&
6836 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6837 (ent->content != NULL) &&
6838 (xmlStrchr(ent->content, '<'))) {
6839 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6840 "'<' in entity '%s' is not allowed in attributes values\n",
6841 name);
6842 }
6843
6844 /*
6845 * Internal check, no parameter entities here ...
6846 */
6847 else {
6848 switch (ent->etype) {
6849 case XML_INTERNAL_PARAMETER_ENTITY:
6850 case XML_EXTERNAL_PARAMETER_ENTITY:
6851 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6852 "Attempt to reference the parameter entity '%s'\n",
6853 name);
6854 break;
6855 default:
6856 break;
6857 }
6858 }
6859
6860 /*
6861 * [ WFC: No Recursion ]
6862 * A parsed entity must not contain a recursive reference
6863 * to itself, either directly or indirectly.
6864 * Done somewhere else
6865 */
6866
6867 } else {
6868 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6869 }
6870 xmlFree(name);
6871 }
6872 }
6873 *str = ptr;
6874 return(ent);
6875}
6876
6877/**
6878 * xmlParsePEReference:
6879 * @ctxt: an XML parser context
6880 *
6881 * parse PEReference declarations
6882 * The entity content is handled directly by pushing it's content as
6883 * a new input stream.
6884 *
6885 * [69] PEReference ::= '%' Name ';'
6886 *
6887 * [ WFC: No Recursion ]
6888 * A parsed entity must not contain a recursive
6889 * reference to itself, either directly or indirectly.
6890 *
6891 * [ WFC: Entity Declared ]
6892 * In a document without any DTD, a document with only an internal DTD
6893 * subset which contains no parameter entity references, or a document
6894 * with "standalone='yes'", ... ... The declaration of a parameter
6895 * entity must precede any reference to it...
6896 *
6897 * [ VC: Entity Declared ]
6898 * In a document with an external subset or external parameter entities
6899 * with "standalone='no'", ... ... The declaration of a parameter entity
6900 * must precede any reference to it...
6901 *
6902 * [ WFC: In DTD ]
6903 * Parameter-entity references may only appear in the DTD.
6904 * NOTE: misleading but this is handled.
6905 */
6906void
6907xmlParsePEReference(xmlParserCtxtPtr ctxt)
6908{
6909 const xmlChar *name;
6910 xmlEntityPtr entity = NULL;
6911 xmlParserInputPtr input;
6912
6913 if (RAW == '%') {
6914 NEXT;
6915 name = xmlParseName(ctxt);
6916 if (name == NULL) {
6917 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6918 "xmlParsePEReference: no name\n");
6919 } else {
6920 if (RAW == ';') {
6921 NEXT;
6922 /*
6923 * Increase the number of entity references parsed
6924 */
6925 ctxt->nbentities++;
6926
6927 if ((ctxt->sax != NULL) &&
6928 (ctxt->sax->getParameterEntity != NULL))
6929 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6930 name);
6931 if (entity == NULL) {
6932 /*
6933 * [ WFC: Entity Declared ]
6934 * In a document without any DTD, a document with only an
6935 * internal DTD subset which contains no parameter entity
6936 * references, or a document with "standalone='yes'", ...
6937 * ... The declaration of a parameter entity must precede
6938 * any reference to it...
6939 */
6940 if ((ctxt->standalone == 1) ||
6941 ((ctxt->hasExternalSubset == 0) &&
6942 (ctxt->hasPErefs == 0))) {
6943 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6944 "PEReference: %%%s; not found\n",
6945 name);
6946 } else {
6947 /*
6948 * [ VC: Entity Declared ]
6949 * In a document with an external subset or external
6950 * parameter entities with "standalone='no'", ...
6951 * ... The declaration of a parameter entity must
6952 * precede any reference to it...
6953 */
6954 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6955 "PEReference: %%%s; not found\n",
6956 name, NULL);
6957 ctxt->valid = 0;
6958 }
6959 } else {
6960 /*
6961 * Internal checking in case the entity quest barfed
6962 */
6963 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6964 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6965 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6966 "Internal: %%%s; is not a parameter entity\n",
6967 name, NULL);
6968 } else if (ctxt->input->free != deallocblankswrapper) {
6969 input =
6970 xmlNewBlanksWrapperInputStream(ctxt, entity);
6971 xmlPushInput(ctxt, input);
6972 } else {
6973 /*
6974 * TODO !!!
6975 * handle the extra spaces added before and after
6976 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6977 */
6978 input = xmlNewEntityInputStream(ctxt, entity);
6979 xmlPushInput(ctxt, input);
6980 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6981 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6982 (IS_BLANK_CH(NXT(5)))) {
6983 xmlParseTextDecl(ctxt);
6984 if (ctxt->errNo ==
6985 XML_ERR_UNSUPPORTED_ENCODING) {
6986 /*
6987 * The XML REC instructs us to stop parsing
6988 * right here
6989 */
6990 ctxt->instate = XML_PARSER_EOF;
6991 return;
6992 }
6993 }
6994 }
6995 }
6996 ctxt->hasPErefs = 1;
6997 } else {
6998 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6999 }
7000 }
7001 }
7002}
7003
7004/**
7005 * xmlParseStringPEReference:
7006 * @ctxt: an XML parser context
7007 * @str: a pointer to an index in the string
7008 *
7009 * parse PEReference declarations
7010 *
7011 * [69] PEReference ::= '%' Name ';'
7012 *
7013 * [ WFC: No Recursion ]
7014 * A parsed entity must not contain a recursive
7015 * reference to itself, either directly or indirectly.
7016 *
7017 * [ WFC: Entity Declared ]
7018 * In a document without any DTD, a document with only an internal DTD
7019 * subset which contains no parameter entity references, or a document
7020 * with "standalone='yes'", ... ... The declaration of a parameter
7021 * entity must precede any reference to it...
7022 *
7023 * [ VC: Entity Declared ]
7024 * In a document with an external subset or external parameter entities
7025 * with "standalone='no'", ... ... The declaration of a parameter entity
7026 * must precede any reference to it...
7027 *
7028 * [ WFC: In DTD ]
7029 * Parameter-entity references may only appear in the DTD.
7030 * NOTE: misleading but this is handled.
7031 *
7032 * Returns the string of the entity content.
7033 * str is updated to the current value of the index
7034 */
7035xmlEntityPtr
7036xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7037 const xmlChar *ptr;
7038 xmlChar cur;
7039 xmlChar *name;
7040 xmlEntityPtr entity = NULL;
7041
7042 if ((str == NULL) || (*str == NULL)) return(NULL);
7043 ptr = *str;
7044 cur = *ptr;
7045 if (cur == '%') {
7046 ptr++;
7047 cur = *ptr;
7048 name = xmlParseStringName(ctxt, &ptr);
7049 if (name == NULL) {
7050 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7051 "xmlParseStringPEReference: no name\n");
7052 } else {
7053 cur = *ptr;
7054 if (cur == ';') {
7055 ptr++;
7056 cur = *ptr;
7057 /*
7058 * Increase the number of entity references parsed
7059 */
7060 ctxt->nbentities++;
7061
7062 if ((ctxt->sax != NULL) &&
7063 (ctxt->sax->getParameterEntity != NULL))
7064 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7065 name);
7066 if (entity == NULL) {
7067 /*
7068 * [ WFC: Entity Declared ]
7069 * In a document without any DTD, a document with only an
7070 * internal DTD subset which contains no parameter entity
7071 * references, or a document with "standalone='yes'", ...
7072 * ... The declaration of a parameter entity must precede
7073 * any reference to it...
7074 */
7075 if ((ctxt->standalone == 1) ||
7076 ((ctxt->hasExternalSubset == 0) &&
7077 (ctxt->hasPErefs == 0))) {
7078 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7079 "PEReference: %%%s; not found\n", name);
7080 } else {
7081 /*
7082 * [ VC: Entity Declared ]
7083 * In a document with an external subset or external
7084 * parameter entities with "standalone='no'", ...
7085 * ... The declaration of a parameter entity must
7086 * precede any reference to it...
7087 */
7088 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7089 "PEReference: %%%s; not found\n",
7090 name, NULL);
7091 ctxt->valid = 0;
7092 }
7093 } else {
7094 /*
7095 * Internal checking in case the entity quest barfed
7096 */
7097 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7098 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7099 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7100 "%%%s; is not a parameter entity\n",
7101 name, NULL);
7102 }
7103 }
7104 ctxt->hasPErefs = 1;
7105 } else {
7106 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7107 }
7108 xmlFree(name);
7109 }
7110 }
7111 *str = ptr;
7112 return(entity);
7113}
7114
7115/**
7116 * xmlParseDocTypeDecl:
7117 * @ctxt: an XML parser context
7118 *
7119 * parse a DOCTYPE declaration
7120 *
7121 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7122 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7123 *
7124 * [ VC: Root Element Type ]
7125 * The Name in the document type declaration must match the element
7126 * type of the root element.
7127 */
7128
7129void
7130xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7131 const xmlChar *name = NULL;
7132 xmlChar *ExternalID = NULL;
7133 xmlChar *URI = NULL;
7134
7135 /*
7136 * We know that '<!DOCTYPE' has been detected.
7137 */
7138 SKIP(9);
7139
7140 SKIP_BLANKS;
7141
7142 /*
7143 * Parse the DOCTYPE name.
7144 */
7145 name = xmlParseName(ctxt);
7146 if (name == NULL) {
7147 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7148 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7149 }
7150 ctxt->intSubName = name;
7151
7152 SKIP_BLANKS;
7153
7154 /*
7155 * Check for SystemID and ExternalID
7156 */
7157 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7158
7159 if ((URI != NULL) || (ExternalID != NULL)) {
7160 ctxt->hasExternalSubset = 1;
7161 }
7162 ctxt->extSubURI = URI;
7163 ctxt->extSubSystem = ExternalID;
7164
7165 SKIP_BLANKS;
7166
7167 /*
7168 * Create and update the internal subset.
7169 */
7170 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7171 (!ctxt->disableSAX))
7172 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7173
7174 /*
7175 * Is there any internal subset declarations ?
7176 * they are handled separately in xmlParseInternalSubset()
7177 */
7178 if (RAW == '[')
7179 return;
7180
7181 /*
7182 * We should be at the end of the DOCTYPE declaration.
7183 */
7184 if (RAW != '>') {
7185 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7186 }
7187 NEXT;
7188}
7189
7190/**
7191 * xmlParseInternalSubset:
7192 * @ctxt: an XML parser context
7193 *
7194 * parse the internal subset declaration
7195 *
7196 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7197 */
7198
7199static void
7200xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7201 /*
7202 * Is there any DTD definition ?
7203 */
7204 if (RAW == '[') {
7205 ctxt->instate = XML_PARSER_DTD;
7206 NEXT;
7207 /*
7208 * Parse the succession of Markup declarations and
7209 * PEReferences.
7210 * Subsequence (markupdecl | PEReference | S)*
7211 */
7212 while (RAW != ']') {
7213 const xmlChar *check = CUR_PTR;
7214 unsigned int cons = ctxt->input->consumed;
7215
7216 SKIP_BLANKS;
7217 xmlParseMarkupDecl(ctxt);
7218 xmlParsePEReference(ctxt);
7219
7220 /*
7221 * Pop-up of finished entities.
7222 */
7223 while ((RAW == 0) && (ctxt->inputNr > 1))
7224 xmlPopInput(ctxt);
7225
7226 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7227 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7228 "xmlParseInternalSubset: error detected in Markup declaration\n");
7229 break;
7230 }
7231 }
7232 if (RAW == ']') {
7233 NEXT;
7234 SKIP_BLANKS;
7235 }
7236 }
7237
7238 /*
7239 * We should be at the end of the DOCTYPE declaration.
7240 */
7241 if (RAW != '>') {
7242 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7243 }
7244 NEXT;
7245}
7246
7247#ifdef LIBXML_SAX1_ENABLED
7248/**
7249 * xmlParseAttribute:
7250 * @ctxt: an XML parser context
7251 * @value: a xmlChar ** used to store the value of the attribute
7252 *
7253 * parse an attribute
7254 *
7255 * [41] Attribute ::= Name Eq AttValue
7256 *
7257 * [ WFC: No External Entity References ]
7258 * Attribute values cannot contain direct or indirect entity references
7259 * to external entities.
7260 *
7261 * [ WFC: No < in Attribute Values ]
7262 * The replacement text of any entity referred to directly or indirectly in
7263 * an attribute value (other than "&lt;") must not contain a <.
7264 *
7265 * [ VC: Attribute Value Type ]
7266 * The attribute must have been declared; the value must be of the type
7267 * declared for it.
7268 *
7269 * [25] Eq ::= S? '=' S?
7270 *
7271 * With namespace:
7272 *
7273 * [NS 11] Attribute ::= QName Eq AttValue
7274 *
7275 * Also the case QName == xmlns:??? is handled independently as a namespace
7276 * definition.
7277 *
7278 * Returns the attribute name, and the value in *value.
7279 */
7280
7281const xmlChar *
7282xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7283 const xmlChar *name;
7284 xmlChar *val;
7285
7286 *value = NULL;
7287 GROW;
7288 name = xmlParseName(ctxt);
7289 if (name == NULL) {
7290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7291 "error parsing attribute name\n");
7292 return(NULL);
7293 }
7294
7295 /*
7296 * read the value
7297 */
7298 SKIP_BLANKS;
7299 if (RAW == '=') {
7300 NEXT;
7301 SKIP_BLANKS;
7302 val = xmlParseAttValue(ctxt);
7303 ctxt->instate = XML_PARSER_CONTENT;
7304 } else {
7305 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7306 "Specification mandate value for attribute %s\n", name);
7307 return(NULL);
7308 }
7309
7310 /*
7311 * Check that xml:lang conforms to the specification
7312 * No more registered as an error, just generate a warning now
7313 * since this was deprecated in XML second edition
7314 */
7315 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7316 if (!xmlCheckLanguageID(val)) {
7317 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7318 "Malformed value for xml:lang : %s\n",
7319 val, NULL);
7320 }
7321 }
7322
7323 /*
7324 * Check that xml:space conforms to the specification
7325 */
7326 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7327 if (xmlStrEqual(val, BAD_CAST "default"))
7328 *(ctxt->space) = 0;
7329 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7330 *(ctxt->space) = 1;
7331 else {
7332 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7333"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7334 val, NULL);
7335 }
7336 }
7337
7338 *value = val;
7339 return(name);
7340}
7341
7342/**
7343 * xmlParseStartTag:
7344 * @ctxt: an XML parser context
7345 *
7346 * parse a start of tag either for rule element or
7347 * EmptyElement. In both case we don't parse the tag closing chars.
7348 *
7349 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7350 *
7351 * [ WFC: Unique Att Spec ]
7352 * No attribute name may appear more than once in the same start-tag or
7353 * empty-element tag.
7354 *
7355 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7356 *
7357 * [ WFC: Unique Att Spec ]
7358 * No attribute name may appear more than once in the same start-tag or
7359 * empty-element tag.
7360 *
7361 * With namespace:
7362 *
7363 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7364 *
7365 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7366 *
7367 * Returns the element name parsed
7368 */
7369
7370const xmlChar *
7371xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7372 const xmlChar *name;
7373 const xmlChar *attname;
7374 xmlChar *attvalue;
7375 const xmlChar **atts = ctxt->atts;
7376 int nbatts = 0;
7377 int maxatts = ctxt->maxatts;
7378 int i;
7379
7380 if (RAW != '<') return(NULL);
7381 NEXT1;
7382
7383 name = xmlParseName(ctxt);
7384 if (name == NULL) {
7385 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7386 "xmlParseStartTag: invalid element name\n");
7387 return(NULL);
7388 }
7389
7390 /*
7391 * Now parse the attributes, it ends up with the ending
7392 *
7393 * (S Attribute)* S?
7394 */
7395 SKIP_BLANKS;
7396 GROW;
7397
7398 while ((RAW != '>') &&
7399 ((RAW != '/') || (NXT(1) != '>')) &&
7400 (IS_BYTE_CHAR(RAW))) {
7401 const xmlChar *q = CUR_PTR;
7402 unsigned int cons = ctxt->input->consumed;
7403
7404 attname = xmlParseAttribute(ctxt, &attvalue);
7405 if ((attname != NULL) && (attvalue != NULL)) {
7406 /*
7407 * [ WFC: Unique Att Spec ]
7408 * No attribute name may appear more than once in the same
7409 * start-tag or empty-element tag.
7410 */
7411 for (i = 0; i < nbatts;i += 2) {
7412 if (xmlStrEqual(atts[i], attname)) {
7413 xmlErrAttributeDup(ctxt, NULL, attname);
7414 xmlFree(attvalue);
7415 goto failed;
7416 }
7417 }
7418 /*
7419 * Add the pair to atts
7420 */
7421 if (atts == NULL) {
7422 maxatts = 22; /* allow for 10 attrs by default */
7423 atts = (const xmlChar **)
7424 xmlMalloc(maxatts * sizeof(xmlChar *));
7425 if (atts == NULL) {
7426 xmlErrMemory(ctxt, NULL);
7427 if (attvalue != NULL)
7428 xmlFree(attvalue);
7429 goto failed;
7430 }
7431 ctxt->atts = atts;
7432 ctxt->maxatts = maxatts;
7433 } else if (nbatts + 4 > maxatts) {
7434 const xmlChar **n;
7435
7436 maxatts *= 2;
7437 n = (const xmlChar **) xmlRealloc((void *) atts,
7438 maxatts * sizeof(const xmlChar *));
7439 if (n == NULL) {
7440 xmlErrMemory(ctxt, NULL);
7441 if (attvalue != NULL)
7442 xmlFree(attvalue);
7443 goto failed;
7444 }
7445 atts = n;
7446 ctxt->atts = atts;
7447 ctxt->maxatts = maxatts;
7448 }
7449 atts[nbatts++] = attname;
7450 atts[nbatts++] = attvalue;
7451 atts[nbatts] = NULL;
7452 atts[nbatts + 1] = NULL;
7453 } else {
7454 if (attvalue != NULL)
7455 xmlFree(attvalue);
7456 }
7457
7458failed:
7459
7460 GROW
7461 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7462 break;
7463 if (!IS_BLANK_CH(RAW)) {
7464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7465 "attributes construct error\n");
7466 }
7467 SKIP_BLANKS;
7468 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7469 (attname == NULL) && (attvalue == NULL)) {
7470 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7471 "xmlParseStartTag: problem parsing attributes\n");
7472 break;
7473 }
7474 SHRINK;
7475 GROW;
7476 }
7477
7478 /*
7479 * SAX: Start of Element !
7480 */
7481 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7482 (!ctxt->disableSAX)) {
7483 if (nbatts > 0)
7484 ctxt->sax->startElement(ctxt->userData, name, atts);
7485 else
7486 ctxt->sax->startElement(ctxt->userData, name, NULL);
7487 }
7488
7489 if (atts != NULL) {
7490 /* Free only the content strings */
7491 for (i = 1;i < nbatts;i+=2)
7492 if (atts[i] != NULL)
7493 xmlFree((xmlChar *) atts[i]);
7494 }
7495 return(name);
7496}
7497
7498/**
7499 * xmlParseEndTag1:
7500 * @ctxt: an XML parser context
7501 * @line: line of the start tag
7502 * @nsNr: number of namespaces on the start tag
7503 *
7504 * parse an end of tag
7505 *
7506 * [42] ETag ::= '</' Name S? '>'
7507 *
7508 * With namespace
7509 *
7510 * [NS 9] ETag ::= '</' QName S? '>'
7511 */
7512
7513static void
7514xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7515 const xmlChar *name;
7516
7517 GROW;
7518 if ((RAW != '<') || (NXT(1) != '/')) {
7519 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7520 "xmlParseEndTag: '</' not found\n");
7521 return;
7522 }
7523 SKIP(2);
7524
7525 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7526
7527 /*
7528 * We should definitely be at the ending "S? '>'" part
7529 */
7530 GROW;
7531 SKIP_BLANKS;
7532 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7533 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7534 } else
7535 NEXT1;
7536
7537 /*
7538 * [ WFC: Element Type Match ]
7539 * The Name in an element's end-tag must match the element type in the
7540 * start-tag.
7541 *
7542 */
7543 if (name != (xmlChar*)1) {
7544 if (name == NULL) name = BAD_CAST "unparseable";
7545 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7546 "Opening and ending tag mismatch: %s line %d and %s\n",
7547 ctxt->name, line, name);
7548 }
7549
7550 /*
7551 * SAX: End of Tag
7552 */
7553 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7554 (!ctxt->disableSAX))
7555 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7556
7557 namePop(ctxt);
7558 spacePop(ctxt);
7559 return;
7560}
7561
7562/**
7563 * xmlParseEndTag:
7564 * @ctxt: an XML parser context
7565 *
7566 * parse an end of tag
7567 *
7568 * [42] ETag ::= '</' Name S? '>'
7569 *
7570 * With namespace
7571 *
7572 * [NS 9] ETag ::= '</' QName S? '>'
7573 */
7574
7575void
7576xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7577 xmlParseEndTag1(ctxt, 0);
7578}
7579#endif /* LIBXML_SAX1_ENABLED */
7580
7581/************************************************************************
7582 * *
7583 * SAX 2 specific operations *
7584 * *
7585 ************************************************************************/
7586
7587static const xmlChar *
7588xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7589 int len = 0, l;
7590 int c;
7591 int count = 0;
7592
7593 /*
7594 * Handler for more complex cases
7595 */
7596 GROW;
7597 c = CUR_CHAR(l);
7598 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7599 (!IS_LETTER(c) && (c != '_'))) {
7600 return(NULL);
7601 }
7602
7603 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7604 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7605 (c == '.') || (c == '-') || (c == '_') ||
7606 (IS_COMBINING(c)) ||
7607 (IS_EXTENDER(c)))) {
7608 if (count++ > 100) {
7609 count = 0;
7610 GROW;
7611 }
7612 len += l;
7613 NEXTL(l);
7614 c = CUR_CHAR(l);
7615 }
7616 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7617}
7618
7619/*
7620 * xmlGetNamespace:
7621 * @ctxt: an XML parser context
7622 * @prefix: the prefix to lookup
7623 *
7624 * Lookup the namespace name for the @prefix (which ca be NULL)
7625 * The prefix must come from the @ctxt->dict dictionnary
7626 *
7627 * Returns the namespace name or NULL if not bound
7628 */
7629static const xmlChar *
7630xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7631 int i;
7632
7633 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7634 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7635 if (ctxt->nsTab[i] == prefix) {
7636 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7637 return(NULL);
7638 return(ctxt->nsTab[i + 1]);
7639 }
7640 return(NULL);
7641}
7642
7643/**
7644 * xmlParseNCName:
7645 * @ctxt: an XML parser context
7646 * @len: lenght of the string parsed
7647 *
7648 * parse an XML name.
7649 *
7650 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7651 * CombiningChar | Extender
7652 *
7653 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7654 *
7655 * Returns the Name parsed or NULL
7656 */
7657
7658static const xmlChar *
7659xmlParseNCName(xmlParserCtxtPtr ctxt) {
7660 const xmlChar *in;
7661 const xmlChar *ret;
7662 int count = 0;
7663
7664 /*
7665 * Accelerator for simple ASCII names
7666 */
7667 in = ctxt->input->cur;
7668 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7669 ((*in >= 0x41) && (*in <= 0x5A)) ||
7670 (*in == '_')) {
7671 in++;
7672 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7673 ((*in >= 0x41) && (*in <= 0x5A)) ||
7674 ((*in >= 0x30) && (*in <= 0x39)) ||
7675 (*in == '_') || (*in == '-') ||
7676 (*in == '.'))
7677 in++;
7678 if ((*in > 0) && (*in < 0x80)) {
7679 count = in - ctxt->input->cur;
7680 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7681 ctxt->input->cur = in;
7682 ctxt->nbChars += count;
7683 ctxt->input->col += count;
7684 if (ret == NULL) {
7685 xmlErrMemory(ctxt, NULL);
7686 }
7687 return(ret);
7688 }
7689 }
7690 return(xmlParseNCNameComplex(ctxt));
7691}
7692
7693/**
7694 * xmlParseQName:
7695 * @ctxt: an XML parser context
7696 * @prefix: pointer to store the prefix part
7697 *
7698 * parse an XML Namespace QName
7699 *
7700 * [6] QName ::= (Prefix ':')? LocalPart
7701 * [7] Prefix ::= NCName
7702 * [8] LocalPart ::= NCName
7703 *
7704 * Returns the Name parsed or NULL
7705 */
7706
7707static const xmlChar *
7708xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7709 const xmlChar *l, *p;
7710
7711 GROW;
7712
7713 l = xmlParseNCName(ctxt);
7714 if (l == NULL) {
7715 if (CUR == ':') {
7716 l = xmlParseName(ctxt);
7717 if (l != NULL) {
7718 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7719 "Failed to parse QName '%s'\n", l, NULL, NULL);
7720 *prefix = NULL;
7721 return(l);
7722 }
7723 }
7724 return(NULL);
7725 }
7726 if (CUR == ':') {
7727 NEXT;
7728 p = l;
7729 l = xmlParseNCName(ctxt);
7730 if (l == NULL) {
7731 xmlChar *tmp;
7732
7733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7734 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7735 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7736 p = xmlDictLookup(ctxt->dict, tmp, -1);
7737 if (tmp != NULL) xmlFree(tmp);
7738 *prefix = NULL;
7739 return(p);
7740 }
7741 if (CUR == ':') {
7742 xmlChar *tmp;
7743
7744 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7745 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7746 NEXT;
7747 tmp = (xmlChar *) xmlParseName(ctxt);
7748 if (tmp != NULL) {
7749 tmp = xmlBuildQName(tmp, l, NULL, 0);
7750 l = xmlDictLookup(ctxt->dict, tmp, -1);
7751 if (tmp != NULL) xmlFree(tmp);
7752 *prefix = p;
7753 return(l);
7754 }
7755 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7756 l = xmlDictLookup(ctxt->dict, tmp, -1);
7757 if (tmp != NULL) xmlFree(tmp);
7758 *prefix = p;
7759 return(l);
7760 }
7761 *prefix = p;
7762 } else
7763 *prefix = NULL;
7764 return(l);
7765}
7766
7767/**
7768 * xmlParseQNameAndCompare:
7769 * @ctxt: an XML parser context
7770 * @name: the localname
7771 * @prefix: the prefix, if any.
7772 *
7773 * parse an XML name and compares for match
7774 * (specialized for endtag parsing)
7775 *
7776 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7777 * and the name for mismatch
7778 */
7779
7780static const xmlChar *
7781xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7782 xmlChar const *prefix) {
7783 const xmlChar *cmp = name;
7784 const xmlChar *in;
7785 const xmlChar *ret;
7786 const xmlChar *prefix2;
7787
7788 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7789
7790 GROW;
7791 in = ctxt->input->cur;
7792
7793 cmp = prefix;
7794 while (*in != 0 && *in == *cmp) {
7795 ++in;
7796 ++cmp;
7797 }
7798 if ((*cmp == 0) && (*in == ':')) {
7799 in++;
7800 cmp = name;
7801 while (*in != 0 && *in == *cmp) {
7802 ++in;
7803 ++cmp;
7804 }
7805 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7806 /* success */
7807 ctxt->input->cur = in;
7808 return((const xmlChar*) 1);
7809 }
7810 }
7811 /*
7812 * all strings coms from the dictionary, equality can be done directly
7813 */
7814 ret = xmlParseQName (ctxt, &prefix2);
7815 if ((ret == name) && (prefix == prefix2))
7816 return((const xmlChar*) 1);
7817 return ret;
7818}
7819
7820/**
7821 * xmlParseAttValueInternal:
7822 * @ctxt: an XML parser context
7823 * @len: attribute len result
7824 * @alloc: whether the attribute was reallocated as a new string
7825 * @normalize: if 1 then further non-CDATA normalization must be done
7826 *
7827 * parse a value for an attribute.
7828 * NOTE: if no normalization is needed, the routine will return pointers
7829 * directly from the data buffer.
7830 *
7831 * 3.3.3 Attribute-Value Normalization:
7832 * Before the value of an attribute is passed to the application or
7833 * checked for validity, the XML processor must normalize it as follows:
7834 * - a character reference is processed by appending the referenced
7835 * character to the attribute value
7836 * - an entity reference is processed by recursively processing the
7837 * replacement text of the entity
7838 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7839 * appending #x20 to the normalized value, except that only a single
7840 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7841 * parsed entity or the literal entity value of an internal parsed entity
7842 * - other characters are processed by appending them to the normalized value
7843 * If the declared value is not CDATA, then the XML processor must further
7844 * process the normalized attribute value by discarding any leading and
7845 * trailing space (#x20) characters, and by replacing sequences of space
7846 * (#x20) characters by a single space (#x20) character.
7847 * All attributes for which no declaration has been read should be treated
7848 * by a non-validating parser as if declared CDATA.
7849 *
7850 * Returns the AttValue parsed or NULL. The value has to be freed by the
7851 * caller if it was copied, this can be detected by val[*len] == 0.
7852 */
7853
7854static xmlChar *
7855xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7856 int normalize)
7857{
7858 xmlChar limit = 0;
7859 const xmlChar *in = NULL, *start, *end, *last;
7860 xmlChar *ret = NULL;
7861
7862 GROW;
7863 in = (xmlChar *) CUR_PTR;
7864 if (*in != '"' && *in != '\'') {
7865 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7866 return (NULL);
7867 }
7868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7869
7870 /*
7871 * try to handle in this routine the most common case where no
7872 * allocation of a new string is required and where content is
7873 * pure ASCII.
7874 */
7875 limit = *in++;
7876 end = ctxt->input->end;
7877 start = in;
7878 if (in >= end) {
7879 const xmlChar *oldbase = ctxt->input->base;
7880 GROW;
7881 if (oldbase != ctxt->input->base) {
7882 long delta = ctxt->input->base - oldbase;
7883 start = start + delta;
7884 in = in + delta;
7885 }
7886 end = ctxt->input->end;
7887 }
7888 if (normalize) {
7889 /*
7890 * Skip any leading spaces
7891 */
7892 while ((in < end) && (*in != limit) &&
7893 ((*in == 0x20) || (*in == 0x9) ||
7894 (*in == 0xA) || (*in == 0xD))) {
7895 in++;
7896 start = in;
7897 if (in >= end) {
7898 const xmlChar *oldbase = ctxt->input->base;
7899 GROW;
7900 if (oldbase != ctxt->input->base) {
7901 long delta = ctxt->input->base - oldbase;
7902 start = start + delta;
7903 in = in + delta;
7904 }
7905 end = ctxt->input->end;
7906 }
7907 }
7908 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7909 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7910 if ((*in++ == 0x20) && (*in == 0x20)) break;
7911 if (in >= end) {
7912 const xmlChar *oldbase = ctxt->input->base;
7913 GROW;
7914 if (oldbase != ctxt->input->base) {
7915 long delta = ctxt->input->base - oldbase;
7916 start = start + delta;
7917 in = in + delta;
7918 }
7919 end = ctxt->input->end;
7920 }
7921 }
7922 last = in;
7923 /*
7924 * skip the trailing blanks
7925 */
7926 while ((last[-1] == 0x20) && (last > start)) last--;
7927 while ((in < end) && (*in != limit) &&
7928 ((*in == 0x20) || (*in == 0x9) ||
7929 (*in == 0xA) || (*in == 0xD))) {
7930 in++;
7931 if (in >= end) {
7932 const xmlChar *oldbase = ctxt->input->base;
7933 GROW;
7934 if (oldbase != ctxt->input->base) {
7935 long delta = ctxt->input->base - oldbase;
7936 start = start + delta;
7937 in = in + delta;
7938 last = last + delta;
7939 }
7940 end = ctxt->input->end;
7941 }
7942 }
7943 if (*in != limit) goto need_complex;
7944 } else {
7945 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7946 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7947 in++;
7948 if (in >= end) {
7949 const xmlChar *oldbase = ctxt->input->base;
7950 GROW;
7951 if (oldbase != ctxt->input->base) {
7952 long delta = ctxt->input->base - oldbase;
7953 start = start + delta;
7954 in = in + delta;
7955 }
7956 end = ctxt->input->end;
7957 }
7958 }
7959 last = in;
7960 if (*in != limit) goto need_complex;
7961 }
7962 in++;
7963 if (len != NULL) {
7964 *len = last - start;
7965 ret = (xmlChar *) start;
7966 } else {
7967 if (alloc) *alloc = 1;
7968 ret = xmlStrndup(start, last - start);
7969 }
7970 CUR_PTR = in;
7971 if (alloc) *alloc = 0;
7972 return ret;
7973need_complex:
7974 if (alloc) *alloc = 1;
7975 return xmlParseAttValueComplex(ctxt, len, normalize);
7976}
7977
7978/**
7979 * xmlParseAttribute2:
7980 * @ctxt: an XML parser context
7981 * @pref: the element prefix
7982 * @elem: the element name
7983 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7984 * @value: a xmlChar ** used to store the value of the attribute
7985 * @len: an int * to save the length of the attribute
7986 * @alloc: an int * to indicate if the attribute was allocated
7987 *
7988 * parse an attribute in the new SAX2 framework.
7989 *
7990 * Returns the attribute name, and the value in *value, .
7991 */
7992
7993static const xmlChar *
7994xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7995 const xmlChar *pref, const xmlChar *elem,
7996 const xmlChar **prefix, xmlChar **value,
7997 int *len, int *alloc) {
7998 const xmlChar *name;
7999 xmlChar *val, *internal_val = NULL;
8000 int normalize = 0;
8001
8002 *value = NULL;
8003 GROW;
8004 name = xmlParseQName(ctxt, prefix);
8005 if (name == NULL) {
8006 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8007 "error parsing attribute name\n");
8008 return(NULL);
8009 }
8010
8011 /*
8012 * get the type if needed
8013 */
8014 if (ctxt->attsSpecial != NULL) {
8015 int type;
8016
8017 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
8018 pref, elem, *prefix, name);
8019 if (type != 0) normalize = 1;
8020 }
8021
8022 /*
8023 * read the value
8024 */
8025 SKIP_BLANKS;
8026 if (RAW == '=') {
8027 NEXT;
8028 SKIP_BLANKS;
8029 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8030 ctxt->instate = XML_PARSER_CONTENT;
8031 } else {
8032 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8033 "Specification mandate value for attribute %s\n", name);
8034 return(NULL);
8035 }
8036
8037 if (*prefix == ctxt->str_xml) {
8038 /*
8039 * Check that xml:lang conforms to the specification
8040 * No more registered as an error, just generate a warning now
8041 * since this was deprecated in XML second edition
8042 */
8043 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8044 internal_val = xmlStrndup(val, *len);
8045 if (!xmlCheckLanguageID(internal_val)) {
8046 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8047 "Malformed value for xml:lang : %s\n",
8048 internal_val, NULL);
8049 }
8050 }
8051
8052 /*
8053 * Check that xml:space conforms to the specification
8054 */
8055 if (xmlStrEqual(name, BAD_CAST "space")) {
8056 internal_val = xmlStrndup(val, *len);
8057 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8058 *(ctxt->space) = 0;
8059 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8060 *(ctxt->space) = 1;
8061 else {
8062 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8063"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8064 internal_val, NULL);
8065 }
8066 }
8067 if (internal_val) {
8068 xmlFree(internal_val);
8069 }
8070 }
8071
8072 *value = val;
8073 return(name);
8074}
8075
8076/**
8077 * xmlParseStartTag2:
8078 * @ctxt: an XML parser context
8079 *
8080 * parse a start of tag either for rule element or
8081 * EmptyElement. In both case we don't parse the tag closing chars.
8082 * This routine is called when running SAX2 parsing
8083 *
8084 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8085 *
8086 * [ WFC: Unique Att Spec ]
8087 * No attribute name may appear more than once in the same start-tag or
8088 * empty-element tag.
8089 *
8090 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8091 *
8092 * [ WFC: Unique Att Spec ]
8093 * No attribute name may appear more than once in the same start-tag or
8094 * empty-element tag.
8095 *
8096 * With namespace:
8097 *
8098 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8099 *
8100 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8101 *
8102 * Returns the element name parsed
8103 */
8104
8105static const xmlChar *
8106xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8107 const xmlChar **URI, int *tlen) {
8108 const xmlChar *localname;
8109 const xmlChar *prefix;
8110 const xmlChar *attname;
8111 const xmlChar *aprefix;
8112 const xmlChar *nsname;
8113 xmlChar *attvalue;
8114 const xmlChar **atts = ctxt->atts;
8115 int maxatts = ctxt->maxatts;
8116 int nratts, nbatts, nbdef;
8117 int i, j, nbNs, attval, oldline, oldcol;
8118 const xmlChar *base;
8119 unsigned long cur;
8120 int nsNr = ctxt->nsNr;
8121
8122 if (RAW != '<') return(NULL);
8123 NEXT1;
8124
8125 /*
8126 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8127 * point since the attribute values may be stored as pointers to
8128 * the buffer and calling SHRINK would destroy them !
8129 * The Shrinking is only possible once the full set of attribute
8130 * callbacks have been done.
8131 */
8132reparse:
8133 SHRINK;
8134 base = ctxt->input->base;
8135 cur = ctxt->input->cur - ctxt->input->base;
8136 oldline = ctxt->input->line;
8137 oldcol = ctxt->input->col;
8138 nbatts = 0;
8139 nratts = 0;
8140 nbdef = 0;
8141 nbNs = 0;
8142 attval = 0;
8143 /* Forget any namespaces added during an earlier parse of this element. */
8144 ctxt->nsNr = nsNr;
8145
8146 localname = xmlParseQName(ctxt, &prefix);
8147 if (localname == NULL) {
8148 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8149 "StartTag: invalid element name\n");
8150 return(NULL);
8151 }
8152 *tlen = ctxt->input->cur - ctxt->input->base - cur;
8153
8154 /*
8155 * Now parse the attributes, it ends up with the ending
8156 *
8157 * (S Attribute)* S?
8158 */
8159 SKIP_BLANKS;
8160 GROW;
8161 if (ctxt->input->base != base) goto base_changed;
8162
8163 while ((RAW != '>') &&
8164 ((RAW != '/') || (NXT(1) != '>')) &&
8165 (IS_BYTE_CHAR(RAW))) {
8166 const xmlChar *q = CUR_PTR;
8167 unsigned int cons = ctxt->input->consumed;
8168 int len = -1, alloc = 0;
8169
8170 attname = xmlParseAttribute2(ctxt, prefix, localname,
8171 &aprefix, &attvalue, &len, &alloc);
8172 if (ctxt->input->base != base) {
8173 if ((attvalue != NULL) && (alloc != 0))
8174 xmlFree(attvalue);
8175 attvalue = NULL;
8176 goto base_changed;
8177 }
8178 if ((attname != NULL) && (attvalue != NULL)) {
8179 if (len < 0) len = xmlStrlen(attvalue);
8180 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8181 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8182 xmlURIPtr uri;
8183
8184 if (*URL != 0) {
8185 uri = xmlParseURI((const char *) URL);
8186 if (uri == NULL) {
8187 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8188 "xmlns: %s not a valid URI\n",
8189 URL, NULL);
8190 } else {
8191 if (uri->scheme == NULL) {
8192 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8193 "xmlns: URI %s is not absolute\n",
8194 URL, NULL);
8195 }
8196 xmlFreeURI(uri);
8197 }
8198 }
8199 /*
8200 * check that it's not a defined namespace
8201 */
8202 for (j = 1;j <= nbNs;j++)
8203 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8204 break;
8205 if (j <= nbNs)
8206 xmlErrAttributeDup(ctxt, NULL, attname);
8207 else
8208 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8209 if (alloc != 0) xmlFree(attvalue);
8210 SKIP_BLANKS;
8211 continue;
8212 }
8213 if (aprefix == ctxt->str_xmlns) {
8214 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8215 xmlURIPtr uri;
8216
8217 if (attname == ctxt->str_xml) {
8218 if (URL != ctxt->str_xml_ns) {
8219 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8220 "xml namespace prefix mapped to wrong URI\n",
8221 NULL, NULL, NULL);
8222 }
8223 /*
8224 * Do not keep a namespace definition node
8225 */
8226 if (alloc != 0) xmlFree(attvalue);
8227 SKIP_BLANKS;
8228 continue;
8229 }
8230 uri = xmlParseURI((const char *) URL);
8231 if (uri == NULL) {
8232 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8233 "xmlns:%s: '%s' is not a valid URI\n",
8234 attname, URL);
8235 } else {
8236 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8237 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8238 "xmlns:%s: URI %s is not absolute\n",
8239 attname, URL);
8240 }
8241 xmlFreeURI(uri);
8242 }
8243
8244 /*
8245 * check that it's not a defined namespace
8246 */
8247 for (j = 1;j <= nbNs;j++)
8248 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8249 break;
8250 if (j <= nbNs)
8251 xmlErrAttributeDup(ctxt, aprefix, attname);
8252 else
8253 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8254 if (alloc != 0) xmlFree(attvalue);
8255 SKIP_BLANKS;
8256 if (ctxt->input->base != base) goto base_changed;
8257 continue;
8258 }
8259
8260 /*
8261 * Add the pair to atts
8262 */
8263 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8264 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8265 if (attvalue[len] == 0)
8266 xmlFree(attvalue);
8267 goto failed;
8268 }
8269 maxatts = ctxt->maxatts;
8270 atts = ctxt->atts;
8271 }
8272 ctxt->attallocs[nratts++] = alloc;
8273 atts[nbatts++] = attname;
8274 atts[nbatts++] = aprefix;
8275 atts[nbatts++] = NULL; /* the URI will be fetched later */
8276 atts[nbatts++] = attvalue;
8277 attvalue += len;
8278 atts[nbatts++] = attvalue;
8279 /*
8280 * tag if some deallocation is needed
8281 */
8282 if (alloc != 0) attval = 1;
8283 } else {
8284 if ((attvalue != NULL) && (attvalue[len] == 0))
8285 xmlFree(attvalue);
8286 }
8287
8288failed:
8289
8290 GROW
8291 if (ctxt->input->base != base) goto base_changed;
8292 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8293 break;
8294 if (!IS_BLANK_CH(RAW)) {
8295 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8296 "attributes construct error\n");
8297 break;
8298 }
8299 SKIP_BLANKS;
8300 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8301 (attname == NULL) && (attvalue == NULL)) {
8302 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8303 "xmlParseStartTag: problem parsing attributes\n");
8304 break;
8305 }
8306 GROW;
8307 if (ctxt->input->base != base) goto base_changed;
8308 }
8309
8310 /*
8311 * The attributes defaulting
8312 */
8313 if (ctxt->attsDefault != NULL) {
8314 xmlDefAttrsPtr defaults;
8315
8316 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8317 if (defaults != NULL) {
8318 for (i = 0;i < defaults->nbAttrs;i++) {
8319 attname = defaults->values[4 * i];
8320 aprefix = defaults->values[4 * i + 1];
8321
8322 /*
8323 * special work for namespaces defaulted defs
8324 */
8325 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8326 /*
8327 * check that it's not a defined namespace
8328 */
8329 for (j = 1;j <= nbNs;j++)
8330 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8331 break;
8332 if (j <= nbNs) continue;
8333
8334 nsname = xmlGetNamespace(ctxt, NULL);
8335 if (nsname != defaults->values[4 * i + 2]) {
8336 if (nsPush(ctxt, NULL,
8337 defaults->values[4 * i + 2]) > 0)
8338 nbNs++;
8339 }
8340 } else if (aprefix == ctxt->str_xmlns) {
8341 /*
8342 * check that it's not a defined namespace
8343 */
8344 for (j = 1;j <= nbNs;j++)
8345 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8346 break;
8347 if (j <= nbNs) continue;
8348
8349 nsname = xmlGetNamespace(ctxt, attname);
8350 if (nsname != defaults->values[2]) {
8351 if (nsPush(ctxt, attname,
8352 defaults->values[4 * i + 2]) > 0)
8353 nbNs++;
8354 }
8355 } else {
8356 /*
8357 * check that it's not a defined attribute
8358 */
8359 for (j = 0;j < nbatts;j+=5) {
8360 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8361 break;
8362 }
8363 if (j < nbatts) continue;
8364
8365 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8366 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8367 return(NULL);
8368 }
8369 maxatts = ctxt->maxatts;
8370 atts = ctxt->atts;
8371 }
8372 atts[nbatts++] = attname;
8373 atts[nbatts++] = aprefix;
8374 if (aprefix == NULL)
8375 atts[nbatts++] = NULL;
8376 else
8377 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8378 atts[nbatts++] = defaults->values[4 * i + 2];
8379 atts[nbatts++] = defaults->values[4 * i + 3];
8380 nbdef++;
8381 }
8382 }
8383 }
8384 }
8385
8386 /*
8387 * The attributes checkings
8388 */
8389 for (i = 0; i < nbatts;i += 5) {
8390 /*
8391 * The default namespace does not apply to attribute names.
8392 */
8393 if (atts[i + 1] != NULL) {
8394 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8395 if (nsname == NULL) {
8396 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8397 "Namespace prefix %s for %s on %s is not defined\n",
8398 atts[i + 1], atts[i], localname);
8399 }
8400 atts[i + 2] = nsname;
8401 } else
8402 nsname = NULL;
8403 /*
8404 * [ WFC: Unique Att Spec ]
8405 * No attribute name may appear more than once in the same
8406 * start-tag or empty-element tag.
8407 * As extended by the Namespace in XML REC.
8408 */
8409 for (j = 0; j < i;j += 5) {
8410 if (atts[i] == atts[j]) {
8411 if (atts[i+1] == atts[j+1]) {
8412 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8413 break;
8414 }
8415 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8416 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8417 "Namespaced Attribute %s in '%s' redefined\n",
8418 atts[i], nsname, NULL);
8419 break;
8420 }
8421 }
8422 }
8423 }
8424
8425 nsname = xmlGetNamespace(ctxt, prefix);
8426 if ((prefix != NULL) && (nsname == NULL)) {
8427 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8428 "Namespace prefix %s on %s is not defined\n",
8429 prefix, localname, NULL);
8430 }
8431 *pref = prefix;
8432 *URI = nsname;
8433
8434 /*
8435 * SAX: Start of Element !
8436 */
8437 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8438 (!ctxt->disableSAX)) {
8439 if (nbNs > 0)
8440 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8441 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8442 nbatts / 5, nbdef, atts);
8443 else
8444 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8445 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8446 }
8447
8448 /*
8449 * Free up attribute allocated strings if needed
8450 */
8451 if (attval != 0) {
8452 for (i = 3,j = 0; j < nratts;i += 5,j++)
8453 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8454 xmlFree((xmlChar *) atts[i]);
8455 }
8456
8457 return(localname);
8458
8459base_changed:
8460 /*
8461 * the attribute strings are valid iif the base didn't changed
8462 */
8463 if (attval != 0) {
8464 for (i = 3,j = 0; j < nratts;i += 5,j++)
8465 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8466 xmlFree((xmlChar *) atts[i]);
8467 }
8468 ctxt->input->cur = ctxt->input->base + cur;
8469 ctxt->input->line = oldline;
8470 ctxt->input->col = oldcol;
8471 if (ctxt->wellFormed == 1) {
8472 goto reparse;
8473 }
8474 return(NULL);
8475}
8476
8477/**
8478 * xmlParseEndTag2:
8479 * @ctxt: an XML parser context
8480 * @line: line of the start tag
8481 * @nsNr: number of namespaces on the start tag
8482 *
8483 * parse an end of tag
8484 *
8485 * [42] ETag ::= '</' Name S? '>'
8486 *
8487 * With namespace
8488 *
8489 * [NS 9] ETag ::= '</' QName S? '>'
8490 */
8491
8492static void
8493xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8494 const xmlChar *URI, int line, int nsNr, int tlen) {
8495 const xmlChar *name;
8496
8497 GROW;
8498 if ((RAW != '<') || (NXT(1) != '/')) {
8499 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8500 return;
8501 }
8502 SKIP(2);
8503
8504 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8505 if (ctxt->input->cur[tlen] == '>') {
8506 ctxt->input->cur += tlen + 1;
8507 goto done;
8508 }
8509 ctxt->input->cur += tlen;
8510 name = (xmlChar*)1;
8511 } else {
8512 if (prefix == NULL)
8513 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8514 else
8515 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8516 }
8517
8518 /*
8519 * We should definitely be at the ending "S? '>'" part
8520 */
8521 GROW;
8522 SKIP_BLANKS;
8523 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8524 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8525 } else
8526 NEXT1;
8527
8528 /*
8529 * [ WFC: Element Type Match ]
8530 * The Name in an element's end-tag must match the element type in the
8531 * start-tag.
8532 *
8533 */
8534 if (name != (xmlChar*)1) {
8535 if (name == NULL) name = BAD_CAST "unparseable";
8536 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8537 "Opening and ending tag mismatch: %s line %d and %s\n",
8538 ctxt->name, line, name);
8539 }
8540
8541 /*
8542 * SAX: End of Tag
8543 */
8544done:
8545 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8546 (!ctxt->disableSAX))
8547 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8548
8549 spacePop(ctxt);
8550 if (nsNr != 0)
8551 nsPop(ctxt, nsNr);
8552 return;
8553}
8554
8555/**
8556 * xmlParseCDSect:
8557 * @ctxt: an XML parser context
8558 *
8559 * Parse escaped pure raw content.
8560 *
8561 * [18] CDSect ::= CDStart CData CDEnd
8562 *
8563 * [19] CDStart ::= '<![CDATA['
8564 *
8565 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8566 *
8567 * [21] CDEnd ::= ']]>'
8568 */
8569void
8570xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8571 xmlChar *buf = NULL;
8572 int len = 0;
8573 int size = XML_PARSER_BUFFER_SIZE;
8574 int r, rl;
8575 int s, sl;
8576 int cur, l;
8577 int count = 0;
8578
8579 /* Check 2.6.0 was NXT(0) not RAW */
8580 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8581 SKIP(9);
8582 } else
8583 return;
8584
8585 ctxt->instate = XML_PARSER_CDATA_SECTION;
8586 r = CUR_CHAR(rl);
8587 if (!IS_CHAR(r)) {
8588 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8589 ctxt->instate = XML_PARSER_CONTENT;
8590 return;
8591 }
8592 NEXTL(rl);
8593 s = CUR_CHAR(sl);
8594 if (!IS_CHAR(s)) {
8595 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8596 ctxt->instate = XML_PARSER_CONTENT;
8597 return;
8598 }
8599 NEXTL(sl);
8600 cur = CUR_CHAR(l);
8601 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8602 if (buf == NULL) {
8603 xmlErrMemory(ctxt, NULL);
8604 return;
8605 }
8606 while (IS_CHAR(cur) &&
8607 ((r != ']') || (s != ']') || (cur != '>'))) {
8608 if (len + 5 >= size) {
8609 xmlChar *tmp;
8610
8611 size *= 2;
8612 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8613 if (tmp == NULL) {
8614 xmlFree(buf);
8615 xmlErrMemory(ctxt, NULL);
8616 return;
8617 }
8618 buf = tmp;
8619 }
8620 COPY_BUF(rl,buf,len,r);
8621 r = s;
8622 rl = sl;
8623 s = cur;
8624 sl = l;
8625 count++;
8626 if (count > 50) {
8627 GROW;
8628 count = 0;
8629 }
8630 NEXTL(l);
8631 cur = CUR_CHAR(l);
8632 }
8633 buf[len] = 0;
8634 ctxt->instate = XML_PARSER_CONTENT;
8635 if (cur != '>') {
8636 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8637 "CData section not finished\n%.50s\n", buf);
8638 xmlFree(buf);
8639 return;
8640 }
8641 NEXTL(l);
8642
8643 /*
8644 * OK the buffer is to be consumed as cdata.
8645 */
8646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8647 if (ctxt->sax->cdataBlock != NULL)
8648 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8649 else if (ctxt->sax->characters != NULL)
8650 ctxt->sax->characters(ctxt->userData, buf, len);
8651 }
8652 xmlFree(buf);
8653}
8654
8655/**
8656 * xmlParseContent:
8657 * @ctxt: an XML parser context
8658 *
8659 * Parse a content:
8660 *
8661 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8662 */
8663
8664void
8665xmlParseContent(xmlParserCtxtPtr ctxt) {
8666 GROW;
8667 while ((RAW != 0) &&
8668 ((RAW != '<') || (NXT(1) != '/')) &&
8669 (ctxt->instate != XML_PARSER_EOF)) {
8670 const xmlChar *test = CUR_PTR;
8671 unsigned int cons = ctxt->input->consumed;
8672 const xmlChar *cur = ctxt->input->cur;
8673
8674 /*
8675 * First case : a Processing Instruction.
8676 */
8677 if ((*cur == '<') && (cur[1] == '?')) {
8678 xmlParsePI(ctxt);
8679 }
8680
8681 /*
8682 * Second case : a CDSection
8683 */
8684 /* 2.6.0 test was *cur not RAW */
8685 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8686 xmlParseCDSect(ctxt);
8687 }
8688
8689 /*
8690 * Third case : a comment
8691 */
8692 else if ((*cur == '<') && (NXT(1) == '!') &&
8693 (NXT(2) == '-') && (NXT(3) == '-')) {
8694 xmlParseComment(ctxt);
8695 ctxt->instate = XML_PARSER_CONTENT;
8696 }
8697
8698 /*
8699 * Fourth case : a sub-element.
8700 */
8701 else if (*cur == '<') {
8702 xmlParseElement(ctxt);
8703 }
8704
8705 /*
8706 * Fifth case : a reference. If if has not been resolved,
8707 * parsing returns it's Name, create the node
8708 */
8709
8710 else if (*cur == '&') {
8711 xmlParseReference(ctxt);
8712 }
8713
8714 /*
8715 * Last case, text. Note that References are handled directly.
8716 */
8717 else {
8718 xmlParseCharData(ctxt, 0);
8719 }
8720
8721 GROW;
8722 /*
8723 * Pop-up of finished entities.
8724 */
8725 while ((RAW == 0) && (ctxt->inputNr > 1))
8726 xmlPopInput(ctxt);
8727 SHRINK;
8728
8729 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8730 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8731 "detected an error in element content\n");
8732 ctxt->instate = XML_PARSER_EOF;
8733 break;
8734 }
8735 }
8736}
8737
8738/**
8739 * xmlParseElement:
8740 * @ctxt: an XML parser context
8741 *
8742 * parse an XML element, this is highly recursive
8743 *
8744 * [39] element ::= EmptyElemTag | STag content ETag
8745 *
8746 * [ WFC: Element Type Match ]
8747 * The Name in an element's end-tag must match the element type in the
8748 * start-tag.
8749 *
8750 */
8751
8752void
8753xmlParseElement(xmlParserCtxtPtr ctxt) {
8754 const xmlChar *name;
8755 const xmlChar *prefix;
8756 const xmlChar *URI;
8757 xmlParserNodeInfo node_info;
8758 int line, tlen;
8759 xmlNodePtr ret;
8760 int nsNr = ctxt->nsNr;
8761
8762 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8763 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8764 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8765 xmlParserMaxDepth);
8766 ctxt->instate = XML_PARSER_EOF;
8767 return;
8768 }
8769
8770 /* Capture start position */
8771 if (ctxt->record_info) {
8772 node_info.begin_pos = ctxt->input->consumed +
8773 (CUR_PTR - ctxt->input->base);
8774 node_info.begin_line = ctxt->input->line;
8775 }
8776
8777 if (ctxt->spaceNr == 0)
8778 spacePush(ctxt, -1);
8779 else if (*ctxt->space == -2)
8780 spacePush(ctxt, -1);
8781 else
8782 spacePush(ctxt, *ctxt->space);
8783
8784 line = ctxt->input->line;
8785#ifdef LIBXML_SAX1_ENABLED
8786 if (ctxt->sax2)
8787#endif /* LIBXML_SAX1_ENABLED */
8788 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8789#ifdef LIBXML_SAX1_ENABLED
8790 else
8791 name = xmlParseStartTag(ctxt);
8792#endif /* LIBXML_SAX1_ENABLED */
8793 if (ctxt->instate == XML_PARSER_EOF)
8794 return;
8795 if (name == NULL) {
8796 spacePop(ctxt);
8797 return;
8798 }
8799 namePush(ctxt, name);
8800 ret = ctxt->node;
8801
8802#ifdef LIBXML_VALID_ENABLED
8803 /*
8804 * [ VC: Root Element Type ]
8805 * The Name in the document type declaration must match the element
8806 * type of the root element.
8807 */
8808 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8809 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8810 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8811#endif /* LIBXML_VALID_ENABLED */
8812
8813 /*
8814 * Check for an Empty Element.
8815 */
8816 if ((RAW == '/') && (NXT(1) == '>')) {
8817 SKIP(2);
8818 if (ctxt->sax2) {
8819 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8820 (!ctxt->disableSAX))
8821 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8822#ifdef LIBXML_SAX1_ENABLED
8823 } else {
8824 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8825 (!ctxt->disableSAX))
8826 ctxt->sax->endElement(ctxt->userData, name);
8827#endif /* LIBXML_SAX1_ENABLED */
8828 }
8829 namePop(ctxt);
8830 spacePop(ctxt);
8831 if (nsNr != ctxt->nsNr)
8832 nsPop(ctxt, ctxt->nsNr - nsNr);
8833 if ( ret != NULL && ctxt->record_info ) {
8834 node_info.end_pos = ctxt->input->consumed +
8835 (CUR_PTR - ctxt->input->base);
8836 node_info.end_line = ctxt->input->line;
8837 node_info.node = ret;
8838 xmlParserAddNodeInfo(ctxt, &node_info);
8839 }
8840 return;
8841 }
8842 if (RAW == '>') {
8843 NEXT1;
8844 } else {
8845 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8846 "Couldn't find end of Start Tag %s line %d\n",
8847 name, line, NULL);
8848
8849 /*
8850 * end of parsing of this node.
8851 */
8852 nodePop(ctxt);
8853 namePop(ctxt);
8854 spacePop(ctxt);
8855 if (nsNr != ctxt->nsNr)
8856 nsPop(ctxt, ctxt->nsNr - nsNr);
8857
8858 /*
8859 * Capture end position and add node
8860 */
8861 if ( ret != NULL && ctxt->record_info ) {
8862 node_info.end_pos = ctxt->input->consumed +
8863 (CUR_PTR - ctxt->input->base);
8864 node_info.end_line = ctxt->input->line;
8865 node_info.node = ret;
8866 xmlParserAddNodeInfo(ctxt, &node_info);
8867 }
8868 return;
8869 }
8870
8871 /*
8872 * Parse the content of the element:
8873 */
8874 xmlParseContent(ctxt);
8875 if (!IS_BYTE_CHAR(RAW)) {
8876 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8877 "Premature end of data in tag %s line %d\n",
8878 name, line, NULL);
8879
8880 /*
8881 * end of parsing of this node.
8882 */
8883 nodePop(ctxt);
8884 namePop(ctxt);
8885 spacePop(ctxt);
8886 if (nsNr != ctxt->nsNr)
8887 nsPop(ctxt, ctxt->nsNr - nsNr);
8888 return;
8889 }
8890
8891 /*
8892 * parse the end of tag: '</' should be here.
8893 */
8894 if (ctxt->sax2) {
8895 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8896 namePop(ctxt);
8897 }
8898#ifdef LIBXML_SAX1_ENABLED
8899 else
8900 xmlParseEndTag1(ctxt, line);
8901#endif /* LIBXML_SAX1_ENABLED */
8902
8903 /*
8904 * Capture end position and add node
8905 */
8906 if ( ret != NULL && ctxt->record_info ) {
8907 node_info.end_pos = ctxt->input->consumed +
8908 (CUR_PTR - ctxt->input->base);
8909 node_info.end_line = ctxt->input->line;
8910 node_info.node = ret;
8911 xmlParserAddNodeInfo(ctxt, &node_info);
8912 }
8913}
8914
8915/**
8916 * xmlParseVersionNum:
8917 * @ctxt: an XML parser context
8918 *
8919 * parse the XML version value.
8920 *
8921 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8922 *
8923 * Returns the string giving the XML version number, or NULL
8924 */
8925xmlChar *
8926xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8927 xmlChar *buf = NULL;
8928 int len = 0;
8929 int size = 10;
8930 xmlChar cur;
8931
8932 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8933 if (buf == NULL) {
8934 xmlErrMemory(ctxt, NULL);
8935 return(NULL);
8936 }
8937 cur = CUR;
8938 while (((cur >= 'a') && (cur <= 'z')) ||
8939 ((cur >= 'A') && (cur <= 'Z')) ||
8940 ((cur >= '0') && (cur <= '9')) ||
8941 (cur == '_') || (cur == '.') ||
8942 (cur == ':') || (cur == '-')) {
8943 if (len + 1 >= size) {
8944 xmlChar *tmp;
8945
8946 size *= 2;
8947 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8948 if (tmp == NULL) {
8949 xmlErrMemory(ctxt, NULL);
8950 return(NULL);
8951 }
8952 buf = tmp;
8953 }
8954 buf[len++] = cur;
8955 NEXT;
8956 cur=CUR;
8957 }
8958 buf[len] = 0;
8959 return(buf);
8960}
8961
8962/**
8963 * xmlParseVersionInfo:
8964 * @ctxt: an XML parser context
8965 *
8966 * parse the XML version.
8967 *
8968 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8969 *
8970 * [25] Eq ::= S? '=' S?
8971 *
8972 * Returns the version string, e.g. "1.0"
8973 */
8974
8975xmlChar *
8976xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8977 xmlChar *version = NULL;
8978
8979 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8980 SKIP(7);
8981 SKIP_BLANKS;
8982 if (RAW != '=') {
8983 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8984 return(NULL);
8985 }
8986 NEXT;
8987 SKIP_BLANKS;
8988 if (RAW == '"') {
8989 NEXT;
8990 version = xmlParseVersionNum(ctxt);
8991 if (RAW != '"') {
8992 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8993 } else
8994 NEXT;
8995 } else if (RAW == '\''){
8996 NEXT;
8997 version = xmlParseVersionNum(ctxt);
8998 if (RAW != '\'') {
8999 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9000 } else
9001 NEXT;
9002 } else {
9003 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9004 }
9005 }
9006 return(version);
9007}
9008
9009/**
9010 * xmlParseEncName:
9011 * @ctxt: an XML parser context
9012 *
9013 * parse the XML encoding name
9014 *
9015 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9016 *
9017 * Returns the encoding name value or NULL
9018 */
9019xmlChar *
9020xmlParseEncName(xmlParserCtxtPtr ctxt) {
9021 xmlChar *buf = NULL;
9022 int len = 0;
9023 int size = 10;
9024 xmlChar cur;
9025
9026 cur = CUR;
9027 if (((cur >= 'a') && (cur <= 'z')) ||
9028 ((cur >= 'A') && (cur <= 'Z'))) {
9029 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9030 if (buf == NULL) {
9031 xmlErrMemory(ctxt, NULL);
9032 return(NULL);
9033 }
9034
9035 buf[len++] = cur;
9036 NEXT;
9037 cur = CUR;
9038 while (((cur >= 'a') && (cur <= 'z')) ||
9039 ((cur >= 'A') && (cur <= 'Z')) ||
9040 ((cur >= '0') && (cur <= '9')) ||
9041 (cur == '.') || (cur == '_') ||
9042 (cur == '-')) {
9043 if (len + 1 >= size) {
9044 xmlChar *tmp;
9045
9046 size *= 2;
9047 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9048 if (tmp == NULL) {
9049 xmlErrMemory(ctxt, NULL);
9050 xmlFree(buf);
9051 return(NULL);
9052 }
9053 buf = tmp;
9054 }
9055 buf[len++] = cur;
9056 NEXT;
9057 cur = CUR;
9058 if (cur == 0) {
9059 SHRINK;
9060 GROW;
9061 cur = CUR;
9062 }
9063 }
9064 buf[len] = 0;
9065 } else {
9066 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
9067 }
9068 return(buf);
9069}
9070
9071/**
9072 * xmlParseEncodingDecl:
9073 * @ctxt: an XML parser context
9074 *
9075 * parse the XML encoding declaration
9076 *
9077 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9078 *
9079 * this setups the conversion filters.
9080 *
9081 * Returns the encoding value or NULL
9082 */
9083
9084const xmlChar *
9085xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9086 xmlChar *encoding = NULL;
9087
9088 SKIP_BLANKS;
9089 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
9090 SKIP(8);
9091 SKIP_BLANKS;
9092 if (RAW != '=') {
9093 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9094 return(NULL);
9095 }
9096 NEXT;
9097 SKIP_BLANKS;
9098 if (RAW == '"') {
9099 NEXT;
9100 encoding = xmlParseEncName(ctxt);
9101 if (RAW != '"') {
9102 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9103 } else
9104 NEXT;
9105 } else if (RAW == '\''){
9106 NEXT;
9107 encoding = xmlParseEncName(ctxt);
9108 if (RAW != '\'') {
9109 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9110 } else
9111 NEXT;
9112 } else {
9113 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9114 }
9115 /*
9116 * UTF-16 encoding stwich has already taken place at this stage,
9117 * more over the little-endian/big-endian selection is already done
9118 */
9119 if ((encoding != NULL) &&
9120 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9121 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
9122 if (ctxt->encoding != NULL)
9123 xmlFree((xmlChar *) ctxt->encoding);
9124 ctxt->encoding = encoding;
9125 }
9126 /*
9127 * UTF-8 encoding is handled natively
9128 */
9129 else if ((encoding != NULL) &&
9130 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9131 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
9132 if (ctxt->encoding != NULL)
9133 xmlFree((xmlChar *) ctxt->encoding);
9134 ctxt->encoding = encoding;
9135 }
9136 else if (encoding != NULL) {
9137 xmlCharEncodingHandlerPtr handler;
9138
9139 if (ctxt->input->encoding != NULL)
9140 xmlFree((xmlChar *) ctxt->input->encoding);
9141 ctxt->input->encoding = encoding;
9142
9143 handler = xmlFindCharEncodingHandler((const char *) encoding);
9144 if (handler != NULL) {
9145 xmlSwitchToEncoding(ctxt, handler);
9146 } else {
9147 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
9148 "Unsupported encoding %s\n", encoding);
9149 return(NULL);
9150 }
9151 }
9152 }
9153 return(encoding);
9154}
9155
9156/**
9157 * xmlParseSDDecl:
9158 * @ctxt: an XML parser context
9159 *
9160 * parse the XML standalone declaration
9161 *
9162 * [32] SDDecl ::= S 'standalone' Eq
9163 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9164 *
9165 * [ VC: Standalone Document Declaration ]
9166 * TODO The standalone document declaration must have the value "no"
9167 * if any external markup declarations contain declarations of:
9168 * - attributes with default values, if elements to which these
9169 * attributes apply appear in the document without specifications
9170 * of values for these attributes, or
9171 * - entities (other than amp, lt, gt, apos, quot), if references
9172 * to those entities appear in the document, or
9173 * - attributes with values subject to normalization, where the
9174 * attribute appears in the document with a value which will change
9175 * as a result of normalization, or
9176 * - element types with element content, if white space occurs directly
9177 * within any instance of those types.
9178 *
9179 * Returns:
9180 * 1 if standalone="yes"
9181 * 0 if standalone="no"
9182 * -2 if standalone attribute is missing or invalid
9183 * (A standalone value of -2 means that the XML declaration was found,
9184 * but no value was specified for the standalone attribute).
9185 */
9186
9187int
9188xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
9189 int standalone = -2;
9190
9191 SKIP_BLANKS;
9192 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9193 SKIP(10);
9194 SKIP_BLANKS;
9195 if (RAW != '=') {
9196 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9197 return(standalone);
9198 }
9199 NEXT;
9200 SKIP_BLANKS;
9201 if (RAW == '\''){
9202 NEXT;
9203 if ((RAW == 'n') && (NXT(1) == 'o')) {
9204 standalone = 0;
9205 SKIP(2);
9206 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9207 (NXT(2) == 's')) {
9208 standalone = 1;
9209 SKIP(3);
9210 } else {
9211 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9212 }
9213 if (RAW != '\'') {
9214 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9215 } else
9216 NEXT;
9217 } else if (RAW == '"'){
9218 NEXT;
9219 if ((RAW == 'n') && (NXT(1) == 'o')) {
9220 standalone = 0;
9221 SKIP(2);
9222 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9223 (NXT(2) == 's')) {
9224 standalone = 1;
9225 SKIP(3);
9226 } else {
9227 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9228 }
9229 if (RAW != '"') {
9230 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9231 } else
9232 NEXT;
9233 } else {
9234 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9235 }
9236 }
9237 return(standalone);
9238}
9239
9240/**
9241 * xmlParseXMLDecl:
9242 * @ctxt: an XML parser context
9243 *
9244 * parse an XML declaration header
9245 *
9246 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9247 */
9248
9249void
9250xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9251 xmlChar *version;
9252
9253 /*
9254 * This value for standalone indicates that the document has an
9255 * XML declaration but it does not have a standalone attribute.
9256 * It will be overwritten later if a standalone attribute is found.
9257 */
9258 ctxt->input->standalone = -2;
9259
9260 /*
9261 * We know that '<?xml' is here.
9262 */
9263 SKIP(5);
9264
9265 if (!IS_BLANK_CH(RAW)) {
9266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9267 "Blank needed after '<?xml'\n");
9268 }
9269 SKIP_BLANKS;
9270
9271 /*
9272 * We must have the VersionInfo here.
9273 */
9274 version = xmlParseVersionInfo(ctxt);
9275 if (version == NULL) {
9276 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9277 } else {
9278 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9279 /*
9280 * TODO: Blueberry should be detected here
9281 */
9282 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9283 "Unsupported version '%s'\n",
9284 version, NULL);
9285 }
9286 if (ctxt->version != NULL)
9287 xmlFree((void *) ctxt->version);
9288 ctxt->version = version;
9289 }
9290
9291 /*
9292 * We may have the encoding declaration
9293 */
9294 if (!IS_BLANK_CH(RAW)) {
9295 if ((RAW == '?') && (NXT(1) == '>')) {
9296 SKIP(2);
9297 return;
9298 }
9299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9300 }
9301 xmlParseEncodingDecl(ctxt);
9302 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9303 /*
9304 * The XML REC instructs us to stop parsing right here
9305 */
9306 return;
9307 }
9308
9309 /*
9310 * We may have the standalone status.
9311 */
9312 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9313 if ((RAW == '?') && (NXT(1) == '>')) {
9314 SKIP(2);
9315 return;
9316 }
9317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9318 }
9319 SKIP_BLANKS;
9320 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9321
9322 SKIP_BLANKS;
9323 if ((RAW == '?') && (NXT(1) == '>')) {
9324 SKIP(2);
9325 } else if (RAW == '>') {
9326 /* Deprecated old WD ... */
9327 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9328 NEXT;
9329 } else {
9330 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9331 MOVETO_ENDTAG(CUR_PTR);
9332 NEXT;
9333 }
9334}
9335
9336/**
9337 * xmlParseMisc:
9338 * @ctxt: an XML parser context
9339 *
9340 * parse an XML Misc* optional field.
9341 *
9342 * [27] Misc ::= Comment | PI | S
9343 */
9344
9345void
9346xmlParseMisc(xmlParserCtxtPtr ctxt) {
9347 while (((RAW == '<') && (NXT(1) == '?')) ||
9348 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9349 IS_BLANK_CH(CUR)) {
9350 if ((RAW == '<') && (NXT(1) == '?')) {
9351 xmlParsePI(ctxt);
9352 } else if (IS_BLANK_CH(CUR)) {
9353 NEXT;
9354 } else
9355 xmlParseComment(ctxt);
9356 }
9357}
9358
9359/**
9360 * xmlParseDocument:
9361 * @ctxt: an XML parser context
9362 *
9363 * parse an XML document (and build a tree if using the standard SAX
9364 * interface).
9365 *
9366 * [1] document ::= prolog element Misc*
9367 *
9368 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9369 *
9370 * Returns 0, -1 in case of error. the parser context is augmented
9371 * as a result of the parsing.
9372 */
9373
9374int
9375xmlParseDocument(xmlParserCtxtPtr ctxt) {
9376 xmlChar start[4];
9377 xmlCharEncoding enc;
9378
9379 xmlInitParser();
9380
9381 if ((ctxt == NULL) || (ctxt->input == NULL))
9382 return(-1);
9383
9384 GROW;
9385
9386 /*
9387 * SAX: detecting the level.
9388 */
9389 xmlDetectSAX2(ctxt);
9390
9391 /*
9392 * SAX: beginning of the document processing.
9393 */
9394 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9395 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9396
9397 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9398 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9399 /*
9400 * Get the 4 first bytes and decode the charset
9401 * if enc != XML_CHAR_ENCODING_NONE
9402 * plug some encoding conversion routines.
9403 */
9404 start[0] = RAW;
9405 start[1] = NXT(1);
9406 start[2] = NXT(2);
9407 start[3] = NXT(3);
9408 enc = xmlDetectCharEncoding(&start[0], 4);
9409 if (enc != XML_CHAR_ENCODING_NONE) {
9410 xmlSwitchEncoding(ctxt, enc);
9411 }
9412 }
9413
9414
9415 if (CUR == 0) {
9416 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9417 }
9418
9419 /*
9420 * Check for the XMLDecl in the Prolog.
9421 */
9422 GROW;
9423 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9424
9425 /*
9426 * Note that we will switch encoding on the fly.
9427 */
9428 xmlParseXMLDecl(ctxt);
9429 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9430 /*
9431 * The XML REC instructs us to stop parsing right here
9432 */
9433 return(-1);
9434 }
9435 ctxt->standalone = ctxt->input->standalone;
9436 SKIP_BLANKS;
9437 } else {
9438 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9439 }
9440 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9441 ctxt->sax->startDocument(ctxt->userData);
9442
9443 /*
9444 * The Misc part of the Prolog
9445 */
9446 GROW;
9447 xmlParseMisc(ctxt);
9448
9449 /*
9450 * Then possibly doc type declaration(s) and more Misc
9451 * (doctypedecl Misc*)?
9452 */
9453 GROW;
9454 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9455
9456 ctxt->inSubset = 1;
9457 xmlParseDocTypeDecl(ctxt);
9458 if (RAW == '[') {
9459 ctxt->instate = XML_PARSER_DTD;
9460 xmlParseInternalSubset(ctxt);
9461 }
9462
9463 /*
9464 * Create and update the external subset.
9465 */
9466 ctxt->inSubset = 2;
9467 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9468 (!ctxt->disableSAX))
9469 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9470 ctxt->extSubSystem, ctxt->extSubURI);
9471 ctxt->inSubset = 0;
9472
9473 xmlCleanSpecialAttr(ctxt);
9474
9475 ctxt->instate = XML_PARSER_PROLOG;
9476 xmlParseMisc(ctxt);
9477 }
9478
9479 /*
9480 * Time to start parsing the tree itself
9481 */
9482 GROW;
9483 if (RAW != '<') {
9484 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9485 "Start tag expected, '<' not found\n");
9486 } else {
9487 ctxt->instate = XML_PARSER_CONTENT;
9488 xmlParseElement(ctxt);
9489 ctxt->instate = XML_PARSER_EPILOG;
9490
9491
9492 /*
9493 * The Misc part at the end
9494 */
9495 xmlParseMisc(ctxt);
9496
9497 if (RAW != 0) {
9498 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9499 }
9500 ctxt->instate = XML_PARSER_EOF;
9501 }
9502
9503 /*
9504 * SAX: end of the document processing.
9505 */
9506 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9507 ctxt->sax->endDocument(ctxt->userData);
9508
9509 /*
9510 * Remove locally kept entity definitions if the tree was not built
9511 */
9512 if ((ctxt->myDoc != NULL) &&
9513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9514 xmlFreeDoc(ctxt->myDoc);
9515 ctxt->myDoc = NULL;
9516 }
9517
9518 if (! ctxt->wellFormed) {
9519 ctxt->valid = 0;
9520 return(-1);
9521 }
9522 return(0);
9523}
9524
9525/**
9526 * xmlParseExtParsedEnt:
9527 * @ctxt: an XML parser context
9528 *
9529 * parse a general parsed entity
9530 * An external general parsed entity is well-formed if it matches the
9531 * production labeled extParsedEnt.
9532 *
9533 * [78] extParsedEnt ::= TextDecl? content
9534 *
9535 * Returns 0, -1 in case of error. the parser context is augmented
9536 * as a result of the parsing.
9537 */
9538
9539int
9540xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9541 xmlChar start[4];
9542 xmlCharEncoding enc;
9543
9544 if ((ctxt == NULL) || (ctxt->input == NULL))
9545 return(-1);
9546
9547 xmlDefaultSAXHandlerInit();
9548
9549 xmlDetectSAX2(ctxt);
9550
9551 GROW;
9552
9553 /*
9554 * SAX: beginning of the document processing.
9555 */
9556 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9557 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9558
9559 /*
9560 * Get the 4 first bytes and decode the charset
9561 * if enc != XML_CHAR_ENCODING_NONE
9562 * plug some encoding conversion routines.
9563 */
9564 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9565 start[0] = RAW;
9566 start[1] = NXT(1);
9567 start[2] = NXT(2);
9568 start[3] = NXT(3);
9569 enc = xmlDetectCharEncoding(start, 4);
9570 if (enc != XML_CHAR_ENCODING_NONE) {
9571 xmlSwitchEncoding(ctxt, enc);
9572 }
9573 }
9574
9575
9576 if (CUR == 0) {
9577 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9578 }
9579
9580 /*
9581 * Check for the XMLDecl in the Prolog.
9582 */
9583 GROW;
9584 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9585
9586 /*
9587 * Note that we will switch encoding on the fly.
9588 */
9589 xmlParseXMLDecl(ctxt);
9590 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9591 /*
9592 * The XML REC instructs us to stop parsing right here
9593 */
9594 return(-1);
9595 }
9596 SKIP_BLANKS;
9597 } else {
9598 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9599 }
9600 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9601 ctxt->sax->startDocument(ctxt->userData);
9602
9603 /*
9604 * Doing validity checking on chunk doesn't make sense
9605 */
9606 ctxt->instate = XML_PARSER_CONTENT;
9607 ctxt->validate = 0;
9608 ctxt->loadsubset = 0;
9609 ctxt->depth = 0;
9610
9611 xmlParseContent(ctxt);
9612
9613 if ((RAW == '<') && (NXT(1) == '/')) {
9614 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9615 } else if (RAW != 0) {
9616 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9617 }
9618
9619 /*
9620 * SAX: end of the document processing.
9621 */
9622 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9623 ctxt->sax->endDocument(ctxt->userData);
9624
9625 if (! ctxt->wellFormed) return(-1);
9626 return(0);
9627}
9628
9629#ifdef LIBXML_PUSH_ENABLED
9630/************************************************************************
9631 * *
9632 * Progressive parsing interfaces *
9633 * *
9634 ************************************************************************/
9635
9636/**
9637 * xmlParseLookupSequence:
9638 * @ctxt: an XML parser context
9639 * @first: the first char to lookup
9640 * @next: the next char to lookup or zero
9641 * @third: the next char to lookup or zero
9642 *
9643 * Try to find if a sequence (first, next, third) or just (first next) or
9644 * (first) is available in the input stream.
9645 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9646 * to avoid rescanning sequences of bytes, it DOES change the state of the
9647 * parser, do not use liberally.
9648 *
9649 * Returns the index to the current parsing point if the full sequence
9650 * is available, -1 otherwise.
9651 */
9652static int
9653xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9654 xmlChar next, xmlChar third) {
9655 int base, len;
9656 xmlParserInputPtr in;
9657 const xmlChar *buf;
9658
9659 in = ctxt->input;
9660 if (in == NULL) return(-1);
9661 base = in->cur - in->base;
9662 if (base < 0) return(-1);
9663 if (ctxt->checkIndex > base)
9664 base = ctxt->checkIndex;
9665 if (in->buf == NULL) {
9666 buf = in->base;
9667 len = in->length;
9668 } else {
9669 buf = in->buf->buffer->content;
9670 len = in->buf->buffer->use;
9671 }
9672 /* take into account the sequence length */
9673 if (third) len -= 2;
9674 else if (next) len --;
9675 for (;base < len;base++) {
9676 if (buf[base] == first) {
9677 if (third != 0) {
9678 if ((buf[base + 1] != next) ||
9679 (buf[base + 2] != third)) continue;
9680 } else if (next != 0) {
9681 if (buf[base + 1] != next) continue;
9682 }
9683 ctxt->checkIndex = 0;
9684#ifdef DEBUG_PUSH
9685 if (next == 0)
9686 xmlGenericError(xmlGenericErrorContext,
9687 "PP: lookup '%c' found at %d\n",
9688 first, base);
9689 else if (third == 0)
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: lookup '%c%c' found at %d\n",
9692 first, next, base);
9693 else
9694 xmlGenericError(xmlGenericErrorContext,
9695 "PP: lookup '%c%c%c' found at %d\n",
9696 first, next, third, base);
9697#endif
9698 return(base - (in->cur - in->base));
9699 }
9700 }
9701 ctxt->checkIndex = base;
9702#ifdef DEBUG_PUSH
9703 if (next == 0)
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: lookup '%c' failed\n", first);
9706 else if (third == 0)
9707 xmlGenericError(xmlGenericErrorContext,
9708 "PP: lookup '%c%c' failed\n", first, next);
9709 else
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: lookup '%c%c%c' failed\n", first, next, third);
9712#endif
9713 return(-1);
9714}
9715
9716/**
9717 * xmlParseGetLasts:
9718 * @ctxt: an XML parser context
9719 * @lastlt: pointer to store the last '<' from the input
9720 * @lastgt: pointer to store the last '>' from the input
9721 *
9722 * Lookup the last < and > in the current chunk
9723 */
9724static void
9725xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9726 const xmlChar **lastgt) {
9727 const xmlChar *tmp;
9728
9729 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9730 xmlGenericError(xmlGenericErrorContext,
9731 "Internal error: xmlParseGetLasts\n");
9732 return;
9733 }
9734 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9735 tmp = ctxt->input->end;
9736 tmp--;
9737 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9738 if (tmp < ctxt->input->base) {
9739 *lastlt = NULL;
9740 *lastgt = NULL;
9741 } else {
9742 *lastlt = tmp;
9743 tmp++;
9744 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9745 if (*tmp == '\'') {
9746 tmp++;
9747 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9748 if (tmp < ctxt->input->end) tmp++;
9749 } else if (*tmp == '"') {
9750 tmp++;
9751 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9752 if (tmp < ctxt->input->end) tmp++;
9753 } else
9754 tmp++;
9755 }
9756 if (tmp < ctxt->input->end)
9757 *lastgt = tmp;
9758 else {
9759 tmp = *lastlt;
9760 tmp--;
9761 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9762 if (tmp >= ctxt->input->base)
9763 *lastgt = tmp;
9764 else
9765 *lastgt = NULL;
9766 }
9767 }
9768 } else {
9769 *lastlt = NULL;
9770 *lastgt = NULL;
9771 }
9772}
9773/**
9774 * xmlCheckCdataPush:
9775 * @cur: pointer to the bock of characters
9776 * @len: length of the block in bytes
9777 *
9778 * Check that the block of characters is okay as SCdata content [20]
9779 *
9780 * Returns the number of bytes to pass if okay, a negative index where an
9781 * UTF-8 error occured otherwise
9782 */
9783static int
9784xmlCheckCdataPush(const xmlChar *utf, int len) {
9785 int ix;
9786 unsigned char c;
9787 int codepoint;
9788
9789 if ((utf == NULL) || (len <= 0))
9790 return(0);
9791
9792 for (ix = 0; ix < len;) { /* string is 0-terminated */
9793 c = utf[ix];
9794 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9795 if (c >= 0x20)
9796 ix++;
9797 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9798 ix++;
9799 else
9800 return(-ix);
9801 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9802 if (ix + 2 > len) return(ix);
9803 if ((utf[ix+1] & 0xc0 ) != 0x80)
9804 return(-ix);
9805 codepoint = (utf[ix] & 0x1f) << 6;
9806 codepoint |= utf[ix+1] & 0x3f;
9807 if (!xmlIsCharQ(codepoint))
9808 return(-ix);
9809 ix += 2;
9810 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9811 if (ix + 3 > len) return(ix);
9812 if (((utf[ix+1] & 0xc0) != 0x80) ||
9813 ((utf[ix+2] & 0xc0) != 0x80))
9814 return(-ix);
9815 codepoint = (utf[ix] & 0xf) << 12;
9816 codepoint |= (utf[ix+1] & 0x3f) << 6;
9817 codepoint |= utf[ix+2] & 0x3f;
9818 if (!xmlIsCharQ(codepoint))
9819 return(-ix);
9820 ix += 3;
9821 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9822 if (ix + 4 > len) return(ix);
9823 if (((utf[ix+1] & 0xc0) != 0x80) ||
9824 ((utf[ix+2] & 0xc0) != 0x80) ||
9825 ((utf[ix+3] & 0xc0) != 0x80))
9826 return(-ix);
9827 codepoint = (utf[ix] & 0x7) << 18;
9828 codepoint |= (utf[ix+1] & 0x3f) << 12;
9829 codepoint |= (utf[ix+2] & 0x3f) << 6;
9830 codepoint |= utf[ix+3] & 0x3f;
9831 if (!xmlIsCharQ(codepoint))
9832 return(-ix);
9833 ix += 4;
9834 } else /* unknown encoding */
9835 return(-ix);
9836 }
9837 return(ix);
9838}
9839
9840/**
9841 * xmlParseTryOrFinish:
9842 * @ctxt: an XML parser context
9843 * @terminate: last chunk indicator
9844 *
9845 * Try to progress on parsing
9846 *
9847 * Returns zero if no parsing was possible
9848 */
9849static int
9850xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9851 int ret = 0;
9852 int avail, tlen;
9853 xmlChar cur, next;
9854 const xmlChar *lastlt, *lastgt;
9855
9856 if (ctxt->input == NULL)
9857 return(0);
9858
9859#ifdef DEBUG_PUSH
9860 switch (ctxt->instate) {
9861 case XML_PARSER_EOF:
9862 xmlGenericError(xmlGenericErrorContext,
9863 "PP: try EOF\n"); break;
9864 case XML_PARSER_START:
9865 xmlGenericError(xmlGenericErrorContext,
9866 "PP: try START\n"); break;
9867 case XML_PARSER_MISC:
9868 xmlGenericError(xmlGenericErrorContext,
9869 "PP: try MISC\n");break;
9870 case XML_PARSER_COMMENT:
9871 xmlGenericError(xmlGenericErrorContext,
9872 "PP: try COMMENT\n");break;
9873 case XML_PARSER_PROLOG:
9874 xmlGenericError(xmlGenericErrorContext,
9875 "PP: try PROLOG\n");break;
9876 case XML_PARSER_START_TAG:
9877 xmlGenericError(xmlGenericErrorContext,
9878 "PP: try START_TAG\n");break;
9879 case XML_PARSER_CONTENT:
9880 xmlGenericError(xmlGenericErrorContext,
9881 "PP: try CONTENT\n");break;
9882 case XML_PARSER_CDATA_SECTION:
9883 xmlGenericError(xmlGenericErrorContext,
9884 "PP: try CDATA_SECTION\n");break;
9885 case XML_PARSER_END_TAG:
9886 xmlGenericError(xmlGenericErrorContext,
9887 "PP: try END_TAG\n");break;
9888 case XML_PARSER_ENTITY_DECL:
9889 xmlGenericError(xmlGenericErrorContext,
9890 "PP: try ENTITY_DECL\n");break;
9891 case XML_PARSER_ENTITY_VALUE:
9892 xmlGenericError(xmlGenericErrorContext,
9893 "PP: try ENTITY_VALUE\n");break;
9894 case XML_PARSER_ATTRIBUTE_VALUE:
9895 xmlGenericError(xmlGenericErrorContext,
9896 "PP: try ATTRIBUTE_VALUE\n");break;
9897 case XML_PARSER_DTD:
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: try DTD\n");break;
9900 case XML_PARSER_EPILOG:
9901 xmlGenericError(xmlGenericErrorContext,
9902 "PP: try EPILOG\n");break;
9903 case XML_PARSER_PI:
9904 xmlGenericError(xmlGenericErrorContext,
9905 "PP: try PI\n");break;
9906 case XML_PARSER_IGNORE:
9907 xmlGenericError(xmlGenericErrorContext,
9908 "PP: try IGNORE\n");break;
9909 }
9910#endif
9911
9912 if ((ctxt->input != NULL) &&
9913 (ctxt->input->cur - ctxt->input->base > 4096)) {
9914 xmlSHRINK(ctxt);
9915 ctxt->checkIndex = 0;
9916 }
9917 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9918
9919 while (1) {
9920 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9921 return(0);
9922
9923
9924 /*
9925 * Pop-up of finished entities.
9926 */
9927 while ((RAW == 0) && (ctxt->inputNr > 1))
9928 xmlPopInput(ctxt);
9929
9930 if (ctxt->input == NULL) break;
9931 if (ctxt->input->buf == NULL)
9932 avail = ctxt->input->length -
9933 (ctxt->input->cur - ctxt->input->base);
9934 else {
9935 /*
9936 * If we are operating on converted input, try to flush
9937 * remainng chars to avoid them stalling in the non-converted
9938 * buffer.
9939 */
9940 if ((ctxt->input->buf->raw != NULL) &&
9941 (ctxt->input->buf->raw->use > 0)) {
9942 int base = ctxt->input->base -
9943 ctxt->input->buf->buffer->content;
9944 int current = ctxt->input->cur - ctxt->input->base;
9945
9946 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9947 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9948 ctxt->input->cur = ctxt->input->base + current;
9949 ctxt->input->end =
9950 &ctxt->input->buf->buffer->content[
9951 ctxt->input->buf->buffer->use];
9952 }
9953 avail = ctxt->input->buf->buffer->use -
9954 (ctxt->input->cur - ctxt->input->base);
9955 }
9956 if (avail < 1)
9957 goto done;
9958 switch (ctxt->instate) {
9959 case XML_PARSER_EOF:
9960 /*
9961 * Document parsing is done !
9962 */
9963 goto done;
9964 case XML_PARSER_START:
9965 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9966 xmlChar start[4];
9967 xmlCharEncoding enc;
9968
9969 /*
9970 * Very first chars read from the document flow.
9971 */
9972 if (avail < 4)
9973 goto done;
9974
9975 /*
9976 * Get the 4 first bytes and decode the charset
9977 * if enc != XML_CHAR_ENCODING_NONE
9978 * plug some encoding conversion routines,
9979 * else xmlSwitchEncoding will set to (default)
9980 * UTF8.
9981 */
9982 start[0] = RAW;
9983 start[1] = NXT(1);
9984 start[2] = NXT(2);
9985 start[3] = NXT(3);
9986 enc = xmlDetectCharEncoding(start, 4);
9987 xmlSwitchEncoding(ctxt, enc);
9988 break;
9989 }
9990
9991 if (avail < 2)
9992 goto done;
9993 cur = ctxt->input->cur[0];
9994 next = ctxt->input->cur[1];
9995 if (cur == 0) {
9996 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9997 ctxt->sax->setDocumentLocator(ctxt->userData,
9998 &xmlDefaultSAXLocator);
9999 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10000 ctxt->instate = XML_PARSER_EOF;
10001#ifdef DEBUG_PUSH
10002 xmlGenericError(xmlGenericErrorContext,
10003 "PP: entering EOF\n");
10004#endif
10005 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10006 ctxt->sax->endDocument(ctxt->userData);
10007 goto done;
10008 }
10009 if ((cur == '<') && (next == '?')) {
10010 /* PI or XML decl */
10011 if (avail < 5) return(ret);
10012 if ((!terminate) &&
10013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10014 return(ret);
10015 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10016 ctxt->sax->setDocumentLocator(ctxt->userData,
10017 &xmlDefaultSAXLocator);
10018 if ((ctxt->input->cur[2] == 'x') &&
10019 (ctxt->input->cur[3] == 'm') &&
10020 (ctxt->input->cur[4] == 'l') &&
10021 (IS_BLANK_CH(ctxt->input->cur[5]))) {
10022 ret += 5;
10023#ifdef DEBUG_PUSH
10024 xmlGenericError(xmlGenericErrorContext,
10025 "PP: Parsing XML Decl\n");
10026#endif
10027 xmlParseXMLDecl(ctxt);
10028 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10029 /*
10030 * The XML REC instructs us to stop parsing right
10031 * here
10032 */
10033 ctxt->instate = XML_PARSER_EOF;
10034 return(0);
10035 }
10036 ctxt->standalone = ctxt->input->standalone;
10037 if ((ctxt->encoding == NULL) &&
10038 (ctxt->input->encoding != NULL))
10039 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10040 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10041 (!ctxt->disableSAX))
10042 ctxt->sax->startDocument(ctxt->userData);
10043 ctxt->instate = XML_PARSER_MISC;
10044#ifdef DEBUG_PUSH
10045 xmlGenericError(xmlGenericErrorContext,
10046 "PP: entering MISC\n");
10047#endif
10048 } else {
10049 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10050 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10051 (!ctxt->disableSAX))
10052 ctxt->sax->startDocument(ctxt->userData);
10053 ctxt->instate = XML_PARSER_MISC;
10054#ifdef DEBUG_PUSH
10055 xmlGenericError(xmlGenericErrorContext,
10056 "PP: entering MISC\n");
10057#endif
10058 }
10059 } else {
10060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10061 ctxt->sax->setDocumentLocator(ctxt->userData,
10062 &xmlDefaultSAXLocator);
10063 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10064 if (ctxt->version == NULL) {
10065 xmlErrMemory(ctxt, NULL);
10066 break;
10067 }
10068 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10069 (!ctxt->disableSAX))
10070 ctxt->sax->startDocument(ctxt->userData);
10071 ctxt->instate = XML_PARSER_MISC;
10072#ifdef DEBUG_PUSH
10073 xmlGenericError(xmlGenericErrorContext,
10074 "PP: entering MISC\n");
10075#endif
10076 }
10077 break;
10078 case XML_PARSER_START_TAG: {
10079 const xmlChar *name;
10080 const xmlChar *prefix;
10081 const xmlChar *URI;
10082 int nsNr = ctxt->nsNr;
10083
10084 if ((avail < 2) && (ctxt->inputNr == 1))
10085 goto done;
10086 cur = ctxt->input->cur[0];
10087 if (cur != '<') {
10088 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10089 ctxt->instate = XML_PARSER_EOF;
10090 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10091 ctxt->sax->endDocument(ctxt->userData);
10092 goto done;
10093 }
10094 if (!terminate) {
10095 if (ctxt->progressive) {
10096 /* > can be found unescaped in attribute values */
10097 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10098 goto done;
10099 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10100 goto done;
10101 }
10102 }
10103 if (ctxt->spaceNr == 0)
10104 spacePush(ctxt, -1);
10105 else if (*ctxt->space == -2)
10106 spacePush(ctxt, -1);
10107 else
10108 spacePush(ctxt, *ctxt->space);
10109#ifdef LIBXML_SAX1_ENABLED
10110 if (ctxt->sax2)
10111#endif /* LIBXML_SAX1_ENABLED */
10112 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10113#ifdef LIBXML_SAX1_ENABLED
10114 else
10115 name = xmlParseStartTag(ctxt);
10116#endif /* LIBXML_SAX1_ENABLED */
10117 if (ctxt->instate == XML_PARSER_EOF)
10118 goto done;
10119 if (name == NULL) {
10120 spacePop(ctxt);
10121 ctxt->instate = XML_PARSER_EOF;
10122 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10123 ctxt->sax->endDocument(ctxt->userData);
10124 goto done;
10125 }
10126#ifdef LIBXML_VALID_ENABLED
10127 /*
10128 * [ VC: Root Element Type ]
10129 * The Name in the document type declaration must match
10130 * the element type of the root element.
10131 */
10132 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10133 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10134 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10135#endif /* LIBXML_VALID_ENABLED */
10136
10137 /*
10138 * Check for an Empty Element.
10139 */
10140 if ((RAW == '/') && (NXT(1) == '>')) {
10141 SKIP(2);
10142
10143 if (ctxt->sax2) {
10144 if ((ctxt->sax != NULL) &&
10145 (ctxt->sax->endElementNs != NULL) &&
10146 (!ctxt->disableSAX))
10147 ctxt->sax->endElementNs(ctxt->userData, name,
10148 prefix, URI);
10149 if (ctxt->nsNr - nsNr > 0)
10150 nsPop(ctxt, ctxt->nsNr - nsNr);
10151#ifdef LIBXML_SAX1_ENABLED
10152 } else {
10153 if ((ctxt->sax != NULL) &&
10154 (ctxt->sax->endElement != NULL) &&
10155 (!ctxt->disableSAX))
10156 ctxt->sax->endElement(ctxt->userData, name);
10157#endif /* LIBXML_SAX1_ENABLED */
10158 }
10159 spacePop(ctxt);
10160 if (ctxt->nameNr == 0) {
10161 ctxt->instate = XML_PARSER_EPILOG;
10162 } else {
10163 ctxt->instate = XML_PARSER_CONTENT;
10164 }
10165 break;
10166 }
10167 if (RAW == '>') {
10168 NEXT;
10169 } else {
10170 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10171 "Couldn't find end of Start Tag %s\n",
10172 name);
10173 nodePop(ctxt);
10174 spacePop(ctxt);
10175 }
10176 if (ctxt->sax2)
10177 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
10178#ifdef LIBXML_SAX1_ENABLED
10179 else
10180 namePush(ctxt, name);
10181#endif /* LIBXML_SAX1_ENABLED */
10182
10183 ctxt->instate = XML_PARSER_CONTENT;
10184 break;
10185 }
10186 case XML_PARSER_CONTENT: {
10187 const xmlChar *test;
10188 unsigned int cons;
10189 if ((avail < 2) && (ctxt->inputNr == 1))
10190 goto done;
10191 cur = ctxt->input->cur[0];
10192 next = ctxt->input->cur[1];
10193
10194 test = CUR_PTR;
10195 cons = ctxt->input->consumed;
10196 if ((cur == '<') && (next == '/')) {
10197 ctxt->instate = XML_PARSER_END_TAG;
10198 break;
10199 } else if ((cur == '<') && (next == '?')) {
10200 if ((!terminate) &&
10201 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10202 goto done;
10203 xmlParsePI(ctxt);
10204 } else if ((cur == '<') && (next != '!')) {
10205 ctxt->instate = XML_PARSER_START_TAG;
10206 break;
10207 } else if ((cur == '<') && (next == '!') &&
10208 (ctxt->input->cur[2] == '-') &&
10209 (ctxt->input->cur[3] == '-')) {
10210 int term;
10211
10212 if (avail < 4)
10213 goto done;
10214 ctxt->input->cur += 4;
10215 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10216 ctxt->input->cur -= 4;
10217 if ((!terminate) && (term < 0))
10218 goto done;
10219 xmlParseComment(ctxt);
10220 ctxt->instate = XML_PARSER_CONTENT;
10221 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10222 (ctxt->input->cur[2] == '[') &&
10223 (ctxt->input->cur[3] == 'C') &&
10224 (ctxt->input->cur[4] == 'D') &&
10225 (ctxt->input->cur[5] == 'A') &&
10226 (ctxt->input->cur[6] == 'T') &&
10227 (ctxt->input->cur[7] == 'A') &&
10228 (ctxt->input->cur[8] == '[')) {
10229 SKIP(9);
10230 ctxt->instate = XML_PARSER_CDATA_SECTION;
10231 break;
10232 } else if ((cur == '<') && (next == '!') &&
10233 (avail < 9)) {
10234 goto done;
10235 } else if (cur == '&') {
10236 if ((!terminate) &&
10237 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10238 goto done;
10239 xmlParseReference(ctxt);
10240 } else {
10241 /* TODO Avoid the extra copy, handle directly !!! */
10242 /*
10243 * Goal of the following test is:
10244 * - minimize calls to the SAX 'character' callback
10245 * when they are mergeable
10246 * - handle an problem for isBlank when we only parse
10247 * a sequence of blank chars and the next one is
10248 * not available to check against '<' presence.
10249 * - tries to homogenize the differences in SAX
10250 * callbacks between the push and pull versions
10251 * of the parser.
10252 */
10253 if ((ctxt->inputNr == 1) &&
10254 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10255 if (!terminate) {
10256 if (ctxt->progressive) {
10257 if ((lastlt == NULL) ||
10258 (ctxt->input->cur > lastlt))
10259 goto done;
10260 } else if (xmlParseLookupSequence(ctxt,
10261 '<', 0, 0) < 0) {
10262 goto done;
10263 }
10264 }
10265 }
10266 ctxt->checkIndex = 0;
10267 xmlParseCharData(ctxt, 0);
10268 }
10269 /*
10270 * Pop-up of finished entities.
10271 */
10272 while ((RAW == 0) && (ctxt->inputNr > 1))
10273 xmlPopInput(ctxt);
10274 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10275 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10276 "detected an error in element content\n");
10277 ctxt->instate = XML_PARSER_EOF;
10278 break;
10279 }
10280 break;
10281 }
10282 case XML_PARSER_END_TAG:
10283 if (avail < 2)
10284 goto done;
10285 if (!terminate) {
10286 if (ctxt->progressive) {
10287 /* > can be found unescaped in attribute values */
10288 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10289 goto done;
10290 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10291 goto done;
10292 }
10293 }
10294 if (ctxt->sax2) {
10295 xmlParseEndTag2(ctxt,
10296 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10297 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10298 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10299 nameNsPop(ctxt);
10300 }
10301#ifdef LIBXML_SAX1_ENABLED
10302 else
10303 xmlParseEndTag1(ctxt, 0);
10304#endif /* LIBXML_SAX1_ENABLED */
10305 if (ctxt->instate == XML_PARSER_EOF) {
10306 /* Nothing */
10307 } else if (ctxt->nameNr == 0) {
10308 ctxt->instate = XML_PARSER_EPILOG;
10309 } else {
10310 ctxt->instate = XML_PARSER_CONTENT;
10311 }
10312 break;
10313 case XML_PARSER_CDATA_SECTION: {
10314 /*
10315 * The Push mode need to have the SAX callback for
10316 * cdataBlock merge back contiguous callbacks.
10317 */
10318 int base;
10319
10320 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10321 if (base < 0) {
10322 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10323 int tmp;
10324
10325 tmp = xmlCheckCdataPush(ctxt->input->cur,
10326 XML_PARSER_BIG_BUFFER_SIZE);
10327 if (tmp < 0) {
10328 tmp = -tmp;
10329 ctxt->input->cur += tmp;
10330 goto encoding_error;
10331 }
10332 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10333 if (ctxt->sax->cdataBlock != NULL)
10334 ctxt->sax->cdataBlock(ctxt->userData,
10335 ctxt->input->cur, tmp);
10336 else if (ctxt->sax->characters != NULL)
10337 ctxt->sax->characters(ctxt->userData,
10338 ctxt->input->cur, tmp);
10339 }
10340 SKIPL(tmp);
10341 ctxt->checkIndex = 0;
10342 }
10343 goto done;
10344 } else {
10345 int tmp;
10346
10347 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10348 if ((tmp < 0) || (tmp != base)) {
10349 tmp = -tmp;
10350 ctxt->input->cur += tmp;
10351 goto encoding_error;
10352 }
10353 if ((ctxt->sax != NULL) && (base > 0) &&
10354 (!ctxt->disableSAX)) {
10355 if (ctxt->sax->cdataBlock != NULL)
10356 ctxt->sax->cdataBlock(ctxt->userData,
10357 ctxt->input->cur, base);
10358 else if (ctxt->sax->characters != NULL)
10359 ctxt->sax->characters(ctxt->userData,
10360 ctxt->input->cur, base);
10361 }
10362 SKIPL(base + 3);
10363 ctxt->checkIndex = 0;
10364 ctxt->instate = XML_PARSER_CONTENT;
10365#ifdef DEBUG_PUSH
10366 xmlGenericError(xmlGenericErrorContext,
10367 "PP: entering CONTENT\n");
10368#endif
10369 }
10370 break;
10371 }
10372 case XML_PARSER_MISC:
10373 SKIP_BLANKS;
10374 if (ctxt->input->buf == NULL)
10375 avail = ctxt->input->length -
10376 (ctxt->input->cur - ctxt->input->base);
10377 else
10378 avail = ctxt->input->buf->buffer->use -
10379 (ctxt->input->cur - ctxt->input->base);
10380 if (avail < 2)
10381 goto done;
10382 cur = ctxt->input->cur[0];
10383 next = ctxt->input->cur[1];
10384 if ((cur == '<') && (next == '?')) {
10385 if ((!terminate) &&
10386 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10387 goto done;
10388#ifdef DEBUG_PUSH
10389 xmlGenericError(xmlGenericErrorContext,
10390 "PP: Parsing PI\n");
10391#endif
10392 xmlParsePI(ctxt);
10393 ctxt->checkIndex = 0;
10394 } else if ((cur == '<') && (next == '!') &&
10395 (ctxt->input->cur[2] == '-') &&
10396 (ctxt->input->cur[3] == '-')) {
10397 if ((!terminate) &&
10398 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10399 goto done;
10400#ifdef DEBUG_PUSH
10401 xmlGenericError(xmlGenericErrorContext,
10402 "PP: Parsing Comment\n");
10403#endif
10404 xmlParseComment(ctxt);
10405 ctxt->instate = XML_PARSER_MISC;
10406 ctxt->checkIndex = 0;
10407 } else if ((cur == '<') && (next == '!') &&
10408 (ctxt->input->cur[2] == 'D') &&
10409 (ctxt->input->cur[3] == 'O') &&
10410 (ctxt->input->cur[4] == 'C') &&
10411 (ctxt->input->cur[5] == 'T') &&
10412 (ctxt->input->cur[6] == 'Y') &&
10413 (ctxt->input->cur[7] == 'P') &&
10414 (ctxt->input->cur[8] == 'E')) {
10415 if ((!terminate) &&
10416 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10417 goto done;
10418#ifdef DEBUG_PUSH
10419 xmlGenericError(xmlGenericErrorContext,
10420 "PP: Parsing internal subset\n");
10421#endif
10422 ctxt->inSubset = 1;
10423 xmlParseDocTypeDecl(ctxt);
10424 if (RAW == '[') {
10425 ctxt->instate = XML_PARSER_DTD;
10426#ifdef DEBUG_PUSH
10427 xmlGenericError(xmlGenericErrorContext,
10428 "PP: entering DTD\n");
10429#endif
10430 } else {
10431 /*
10432 * Create and update the external subset.
10433 */
10434 ctxt->inSubset = 2;
10435 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10436 (ctxt->sax->externalSubset != NULL))
10437 ctxt->sax->externalSubset(ctxt->userData,
10438 ctxt->intSubName, ctxt->extSubSystem,
10439 ctxt->extSubURI);
10440 ctxt->inSubset = 0;
10441 xmlCleanSpecialAttr(ctxt);
10442 ctxt->instate = XML_PARSER_PROLOG;
10443#ifdef DEBUG_PUSH
10444 xmlGenericError(xmlGenericErrorContext,
10445 "PP: entering PROLOG\n");
10446#endif
10447 }
10448 } else if ((cur == '<') && (next == '!') &&
10449 (avail < 9)) {
10450 goto done;
10451 } else {
10452 ctxt->instate = XML_PARSER_START_TAG;
10453 ctxt->progressive = 1;
10454 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10455#ifdef DEBUG_PUSH
10456 xmlGenericError(xmlGenericErrorContext,
10457 "PP: entering START_TAG\n");
10458#endif
10459 }
10460 break;
10461 case XML_PARSER_PROLOG:
10462 SKIP_BLANKS;
10463 if (ctxt->input->buf == NULL)
10464 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10465 else
10466 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10467 if (avail < 2)
10468 goto done;
10469 cur = ctxt->input->cur[0];
10470 next = ctxt->input->cur[1];
10471 if ((cur == '<') && (next == '?')) {
10472 if ((!terminate) &&
10473 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10474 goto done;
10475#ifdef DEBUG_PUSH
10476 xmlGenericError(xmlGenericErrorContext,
10477 "PP: Parsing PI\n");
10478#endif
10479 xmlParsePI(ctxt);
10480 } else if ((cur == '<') && (next == '!') &&
10481 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10482 if ((!terminate) &&
10483 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10484 goto done;
10485#ifdef DEBUG_PUSH
10486 xmlGenericError(xmlGenericErrorContext,
10487 "PP: Parsing Comment\n");
10488#endif
10489 xmlParseComment(ctxt);
10490 ctxt->instate = XML_PARSER_PROLOG;
10491 } else if ((cur == '<') && (next == '!') &&
10492 (avail < 4)) {
10493 goto done;
10494 } else {
10495 ctxt->instate = XML_PARSER_START_TAG;
10496 if (ctxt->progressive == 0)
10497 ctxt->progressive = 1;
10498 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10499#ifdef DEBUG_PUSH
10500 xmlGenericError(xmlGenericErrorContext,
10501 "PP: entering START_TAG\n");
10502#endif
10503 }
10504 break;
10505 case XML_PARSER_EPILOG:
10506 SKIP_BLANKS;
10507 if (ctxt->input->buf == NULL)
10508 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10509 else
10510 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10511 if (avail < 2)
10512 goto done;
10513 cur = ctxt->input->cur[0];
10514 next = ctxt->input->cur[1];
10515 if ((cur == '<') && (next == '?')) {
10516 if ((!terminate) &&
10517 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10518 goto done;
10519#ifdef DEBUG_PUSH
10520 xmlGenericError(xmlGenericErrorContext,
10521 "PP: Parsing PI\n");
10522#endif
10523 xmlParsePI(ctxt);
10524 ctxt->instate = XML_PARSER_EPILOG;
10525 } else if ((cur == '<') && (next == '!') &&
10526 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10527 if ((!terminate) &&
10528 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10529 goto done;
10530#ifdef DEBUG_PUSH
10531 xmlGenericError(xmlGenericErrorContext,
10532 "PP: Parsing Comment\n");
10533#endif
10534 xmlParseComment(ctxt);
10535 ctxt->instate = XML_PARSER_EPILOG;
10536 } else if ((cur == '<') && (next == '!') &&
10537 (avail < 4)) {
10538 goto done;
10539 } else {
10540 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10541 ctxt->instate = XML_PARSER_EOF;
10542#ifdef DEBUG_PUSH
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: entering EOF\n");
10545#endif
10546 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10547 ctxt->sax->endDocument(ctxt->userData);
10548 goto done;
10549 }
10550 break;
10551 case XML_PARSER_DTD: {
10552 /*
10553 * Sorry but progressive parsing of the internal subset
10554 * is not expected to be supported. We first check that
10555 * the full content of the internal subset is available and
10556 * the parsing is launched only at that point.
10557 * Internal subset ends up with "']' S? '>'" in an unescaped
10558 * section and not in a ']]>' sequence which are conditional
10559 * sections (whoever argued to keep that crap in XML deserve
10560 * a place in hell !).
10561 */
10562 int base, i;
10563 xmlChar *buf;
10564 xmlChar quote = 0;
10565
10566 base = ctxt->input->cur - ctxt->input->base;
10567 if (base < 0) return(0);
10568 if (ctxt->checkIndex > base)
10569 base = ctxt->checkIndex;
10570 buf = ctxt->input->buf->buffer->content;
10571 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10572 base++) {
10573 if (quote != 0) {
10574 if (buf[base] == quote)
10575 quote = 0;
10576 continue;
10577 }
10578 if ((quote == 0) && (buf[base] == '<')) {
10579 int found = 0;
10580 /* special handling of comments */
10581 if (((unsigned int) base + 4 <
10582 ctxt->input->buf->buffer->use) &&
10583 (buf[base + 1] == '!') &&
10584 (buf[base + 2] == '-') &&
10585 (buf[base + 3] == '-')) {
10586 for (;(unsigned int) base + 3 <
10587 ctxt->input->buf->buffer->use; base++) {
10588 if ((buf[base] == '-') &&
10589 (buf[base + 1] == '-') &&
10590 (buf[base + 2] == '>')) {
10591 found = 1;
10592 base += 2;
10593 break;
10594 }
10595 }
10596 if (!found) {
10597#if 0
10598 fprintf(stderr, "unfinished comment\n");
10599#endif
10600 break; /* for */
10601 }
10602 continue;
10603 }
10604 }
10605 if (buf[base] == '"') {
10606 quote = '"';
10607 continue;
10608 }
10609 if (buf[base] == '\'') {
10610 quote = '\'';
10611 continue;
10612 }
10613 if (buf[base] == ']') {
10614#if 0
10615 fprintf(stderr, "%c%c%c%c: ", buf[base],
10616 buf[base + 1], buf[base + 2], buf[base + 3]);
10617#endif
10618 if ((unsigned int) base +1 >=
10619 ctxt->input->buf->buffer->use)
10620 break;
10621 if (buf[base + 1] == ']') {
10622 /* conditional crap, skip both ']' ! */
10623 base++;
10624 continue;
10625 }
10626 for (i = 1;
10627 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10628 i++) {
10629 if (buf[base + i] == '>') {
10630#if 0
10631 fprintf(stderr, "found\n");
10632#endif
10633 goto found_end_int_subset;
10634 }
10635 if (!IS_BLANK_CH(buf[base + i])) {
10636#if 0
10637 fprintf(stderr, "not found\n");
10638#endif
10639 goto not_end_of_int_subset;
10640 }
10641 }
10642#if 0
10643 fprintf(stderr, "end of stream\n");
10644#endif
10645 break;
10646
10647 }
10648not_end_of_int_subset:
10649 continue; /* for */
10650 }
10651 /*
10652 * We didn't found the end of the Internal subset
10653 */
10654#ifdef DEBUG_PUSH
10655 if (next == 0)
10656 xmlGenericError(xmlGenericErrorContext,
10657 "PP: lookup of int subset end filed\n");
10658#endif
10659 goto done;
10660
10661found_end_int_subset:
10662 xmlParseInternalSubset(ctxt);
10663 ctxt->inSubset = 2;
10664 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10665 (ctxt->sax->externalSubset != NULL))
10666 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10667 ctxt->extSubSystem, ctxt->extSubURI);
10668 ctxt->inSubset = 0;
10669 xmlCleanSpecialAttr(ctxt);
10670 ctxt->instate = XML_PARSER_PROLOG;
10671 ctxt->checkIndex = 0;
10672#ifdef DEBUG_PUSH
10673 xmlGenericError(xmlGenericErrorContext,
10674 "PP: entering PROLOG\n");
10675#endif
10676 break;
10677 }
10678 case XML_PARSER_COMMENT:
10679 xmlGenericError(xmlGenericErrorContext,
10680 "PP: internal error, state == COMMENT\n");
10681 ctxt->instate = XML_PARSER_CONTENT;
10682#ifdef DEBUG_PUSH
10683 xmlGenericError(xmlGenericErrorContext,
10684 "PP: entering CONTENT\n");
10685#endif
10686 break;
10687 case XML_PARSER_IGNORE:
10688 xmlGenericError(xmlGenericErrorContext,
10689 "PP: internal error, state == IGNORE");
10690 ctxt->instate = XML_PARSER_DTD;
10691#ifdef DEBUG_PUSH
10692 xmlGenericError(xmlGenericErrorContext,
10693 "PP: entering DTD\n");
10694#endif
10695 break;
10696 case XML_PARSER_PI:
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: internal error, state == PI\n");
10699 ctxt->instate = XML_PARSER_CONTENT;
10700#ifdef DEBUG_PUSH
10701 xmlGenericError(xmlGenericErrorContext,
10702 "PP: entering CONTENT\n");
10703#endif
10704 break;
10705 case XML_PARSER_ENTITY_DECL:
10706 xmlGenericError(xmlGenericErrorContext,
10707 "PP: internal error, state == ENTITY_DECL\n");
10708 ctxt->instate = XML_PARSER_DTD;
10709#ifdef DEBUG_PUSH
10710 xmlGenericError(xmlGenericErrorContext,
10711 "PP: entering DTD\n");
10712#endif
10713 break;
10714 case XML_PARSER_ENTITY_VALUE:
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: internal error, state == ENTITY_VALUE\n");
10717 ctxt->instate = XML_PARSER_CONTENT;
10718#ifdef DEBUG_PUSH
10719 xmlGenericError(xmlGenericErrorContext,
10720 "PP: entering DTD\n");
10721#endif
10722 break;
10723 case XML_PARSER_ATTRIBUTE_VALUE:
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10726 ctxt->instate = XML_PARSER_START_TAG;
10727#ifdef DEBUG_PUSH
10728 xmlGenericError(xmlGenericErrorContext,
10729 "PP: entering START_TAG\n");
10730#endif
10731 break;
10732 case XML_PARSER_SYSTEM_LITERAL:
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: internal error, state == SYSTEM_LITERAL\n");
10735 ctxt->instate = XML_PARSER_START_TAG;
10736#ifdef DEBUG_PUSH
10737 xmlGenericError(xmlGenericErrorContext,
10738 "PP: entering START_TAG\n");
10739#endif
10740 break;
10741 case XML_PARSER_PUBLIC_LITERAL:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: internal error, state == PUBLIC_LITERAL\n");
10744 ctxt->instate = XML_PARSER_START_TAG;
10745#ifdef DEBUG_PUSH
10746 xmlGenericError(xmlGenericErrorContext,
10747 "PP: entering START_TAG\n");
10748#endif
10749 break;
10750 }
10751 }
10752done:
10753#ifdef DEBUG_PUSH
10754 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10755#endif
10756 return(ret);
10757encoding_error:
10758 {
10759 char buffer[150];
10760
10761 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10762 ctxt->input->cur[0], ctxt->input->cur[1],
10763 ctxt->input->cur[2], ctxt->input->cur[3]);
10764 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10765 "Input is not proper UTF-8, indicate encoding !\n%s",
10766 BAD_CAST buffer, NULL);
10767 }
10768 return(0);
10769}
10770
10771/**
10772 * xmlParseChunk:
10773 * @ctxt: an XML parser context
10774 * @chunk: an char array
10775 * @size: the size in byte of the chunk
10776 * @terminate: last chunk indicator
10777 *
10778 * Parse a Chunk of memory
10779 *
10780 * Returns zero if no error, the xmlParserErrors otherwise.
10781 */
10782int
10783xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10784 int terminate) {
10785 int end_in_lf = 0;
10786
10787 if (ctxt == NULL)
10788 return(XML_ERR_INTERNAL_ERROR);
10789 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10790 return(ctxt->errNo);
10791 if (ctxt->instate == XML_PARSER_START)
10792 xmlDetectSAX2(ctxt);
10793 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10794 (chunk[size - 1] == '\r')) {
10795 end_in_lf = 1;
10796 size--;
10797 }
10798 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10799 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10800 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10801 int cur = ctxt->input->cur - ctxt->input->base;
10802 int res;
10803
10804 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10805 if (res < 0) {
10806 ctxt->errNo = XML_PARSER_EOF;
10807 ctxt->disableSAX = 1;
10808 return (XML_PARSER_EOF);
10809 }
10810 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10811 ctxt->input->cur = ctxt->input->base + cur;
10812 ctxt->input->end =
10813 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10814#ifdef DEBUG_PUSH
10815 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10816#endif
10817
10818 } else if (ctxt->instate != XML_PARSER_EOF) {
10819 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10820 xmlParserInputBufferPtr in = ctxt->input->buf;
10821 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10822 (in->raw != NULL)) {
10823 int nbchars;
10824
10825 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10826 if (nbchars < 0) {
10827 /* TODO 2.6.0 */
10828 xmlGenericError(xmlGenericErrorContext,
10829 "xmlParseChunk: encoder error\n");
10830 return(XML_ERR_INVALID_ENCODING);
10831 }
10832 }
10833 }
10834 }
10835 xmlParseTryOrFinish(ctxt, terminate);
10836 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10837 (ctxt->input->buf != NULL)) {
10838 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10839 }
10840 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10841 return(ctxt->errNo);
10842 if (terminate) {
10843 /*
10844 * Check for termination
10845 */
10846 int avail = 0;
10847
10848 if (ctxt->input != NULL) {
10849 if (ctxt->input->buf == NULL)
10850 avail = ctxt->input->length -
10851 (ctxt->input->cur - ctxt->input->base);
10852 else
10853 avail = ctxt->input->buf->buffer->use -
10854 (ctxt->input->cur - ctxt->input->base);
10855 }
10856
10857 if ((ctxt->instate != XML_PARSER_EOF) &&
10858 (ctxt->instate != XML_PARSER_EPILOG)) {
10859 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10860 }
10861 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10862 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10863 }
10864 if (ctxt->instate != XML_PARSER_EOF) {
10865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10866 ctxt->sax->endDocument(ctxt->userData);
10867 }
10868 ctxt->instate = XML_PARSER_EOF;
10869 }
10870 return((xmlParserErrors) ctxt->errNo);
10871}
10872
10873/************************************************************************
10874 * *
10875 * I/O front end functions to the parser *
10876 * *
10877 ************************************************************************/
10878
10879/**
10880 * xmlCreatePushParserCtxt:
10881 * @sax: a SAX handler
10882 * @user_data: The user data returned on SAX callbacks
10883 * @chunk: a pointer to an array of chars
10884 * @size: number of chars in the array
10885 * @filename: an optional file name or URI
10886 *
10887 * Create a parser context for using the XML parser in push mode.
10888 * If @buffer and @size are non-NULL, the data is used to detect
10889 * the encoding. The remaining characters will be parsed so they
10890 * don't need to be fed in again through xmlParseChunk.
10891 * To allow content encoding detection, @size should be >= 4
10892 * The value of @filename is used for fetching external entities
10893 * and error/warning reports.
10894 *
10895 * Returns the new parser context or NULL
10896 */
10897
10898xmlParserCtxtPtr
10899xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10900 const char *chunk, int size, const char *filename) {
10901 xmlParserCtxtPtr ctxt;
10902 xmlParserInputPtr inputStream;
10903 xmlParserInputBufferPtr buf;
10904 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10905
10906 /*
10907 * plug some encoding conversion routines
10908 */
10909 if ((chunk != NULL) && (size >= 4))
10910 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10911
10912 buf = xmlAllocParserInputBuffer(enc);
10913 if (buf == NULL) return(NULL);
10914
10915 ctxt = xmlNewParserCtxt();
10916 if (ctxt == NULL) {
10917 xmlErrMemory(NULL, "creating parser: out of memory\n");
10918 xmlFreeParserInputBuffer(buf);
10919 return(NULL);
10920 }
10921 ctxt->dictNames = 1;
10922 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10923 if (ctxt->pushTab == NULL) {
10924 xmlErrMemory(ctxt, NULL);
10925 xmlFreeParserInputBuffer(buf);
10926 xmlFreeParserCtxt(ctxt);
10927 return(NULL);
10928 }
10929 if (sax != NULL) {
10930#ifdef LIBXML_SAX1_ENABLED
10931 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10932#endif /* LIBXML_SAX1_ENABLED */
10933 xmlFree(ctxt->sax);
10934 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10935 if (ctxt->sax == NULL) {
10936 xmlErrMemory(ctxt, NULL);
10937 xmlFreeParserInputBuffer(buf);
10938 xmlFreeParserCtxt(ctxt);
10939 return(NULL);
10940 }
10941 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10942 if (sax->initialized == XML_SAX2_MAGIC)
10943 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10944 else
10945 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10946 if (user_data != NULL)
10947 ctxt->userData = user_data;
10948 }
10949 if (filename == NULL) {
10950 ctxt->directory = NULL;
10951 } else {
10952 ctxt->directory = xmlParserGetDirectory(filename);
10953 }
10954
10955 inputStream = xmlNewInputStream(ctxt);
10956 if (inputStream == NULL) {
10957 xmlFreeParserCtxt(ctxt);
10958 xmlFreeParserInputBuffer(buf);
10959 return(NULL);
10960 }
10961
10962 if (filename == NULL)
10963 inputStream->filename = NULL;
10964 else {
10965 inputStream->filename = (char *)
10966 xmlCanonicPath((const xmlChar *) filename);
10967 if (inputStream->filename == NULL) {
10968 xmlFreeParserCtxt(ctxt);
10969 xmlFreeParserInputBuffer(buf);
10970 return(NULL);
10971 }
10972 }
10973 inputStream->buf = buf;
10974 inputStream->base = inputStream->buf->buffer->content;
10975 inputStream->cur = inputStream->buf->buffer->content;
10976 inputStream->end =
10977 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10978
10979 inputPush(ctxt, inputStream);
10980
10981 /*
10982 * If the caller didn't provide an initial 'chunk' for determining
10983 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10984 * that it can be automatically determined later
10985 */
10986 if ((size == 0) || (chunk == NULL)) {
10987 ctxt->charset = XML_CHAR_ENCODING_NONE;
10988 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10989 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10990 int cur = ctxt->input->cur - ctxt->input->base;
10991
10992 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10993
10994 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10995 ctxt->input->cur = ctxt->input->base + cur;
10996 ctxt->input->end =
10997 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10998#ifdef DEBUG_PUSH
10999 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11000#endif
11001 }
11002
11003 if (enc != XML_CHAR_ENCODING_NONE) {
11004 xmlSwitchEncoding(ctxt, enc);
11005 }
11006
11007 return(ctxt);
11008}
11009#endif /* LIBXML_PUSH_ENABLED */
11010
11011/**
11012 * xmlStopParser:
11013 * @ctxt: an XML parser context
11014 *
11015 * Blocks further parser processing
11016 */
11017void
11018xmlStopParser(xmlParserCtxtPtr ctxt) {
11019 if (ctxt == NULL)
11020 return;
11021 ctxt->instate = XML_PARSER_EOF;
11022 ctxt->disableSAX = 1;
11023 if (ctxt->input != NULL) {
11024 ctxt->input->cur = BAD_CAST"";
11025 ctxt->input->base = ctxt->input->cur;
11026 }
11027}
11028
11029/**
11030 * xmlCreateIOParserCtxt:
11031 * @sax: a SAX handler
11032 * @user_data: The user data returned on SAX callbacks
11033 * @ioread: an I/O read function
11034 * @ioclose: an I/O close function
11035 * @ioctx: an I/O handler
11036 * @enc: the charset encoding if known
11037 *
11038 * Create a parser context for using the XML parser with an existing
11039 * I/O stream
11040 *
11041 * Returns the new parser context or NULL
11042 */
11043xmlParserCtxtPtr
11044xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11045 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11046 void *ioctx, xmlCharEncoding enc) {
11047 xmlParserCtxtPtr ctxt;
11048 xmlParserInputPtr inputStream;
11049 xmlParserInputBufferPtr buf;
11050
11051 if (ioread == NULL) return(NULL);
11052
11053 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11054 if (buf == NULL) return(NULL);
11055
11056 ctxt = xmlNewParserCtxt();
11057 if (ctxt == NULL) {
11058 xmlFreeParserInputBuffer(buf);
11059 return(NULL);
11060 }
11061 if (sax != NULL) {
11062#ifdef LIBXML_SAX1_ENABLED
11063 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
11064#endif /* LIBXML_SAX1_ENABLED */
11065 xmlFree(ctxt->sax);
11066 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11067 if (ctxt->sax == NULL) {
11068 xmlErrMemory(ctxt, NULL);
11069 xmlFreeParserCtxt(ctxt);
11070 return(NULL);
11071 }
11072 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11073 if (sax->initialized == XML_SAX2_MAGIC)
11074 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11075 else
11076 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
11077 if (user_data != NULL)
11078 ctxt->userData = user_data;
11079 }
11080
11081 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11082 if (inputStream == NULL) {
11083 xmlFreeParserCtxt(ctxt);
11084 return(NULL);
11085 }
11086 inputPush(ctxt, inputStream);
11087
11088 return(ctxt);
11089}
11090
11091#ifdef LIBXML_VALID_ENABLED
11092/************************************************************************
11093 * *
11094 * Front ends when parsing a DTD *
11095 * *
11096 ************************************************************************/
11097
11098/**
11099 * xmlIOParseDTD:
11100 * @sax: the SAX handler block or NULL
11101 * @input: an Input Buffer
11102 * @enc: the charset encoding if known
11103 *
11104 * Load and parse a DTD
11105 *
11106 * Returns the resulting xmlDtdPtr or NULL in case of error.
11107 * @input will be freed by the function in any case.
11108 */
11109
11110xmlDtdPtr
11111xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11112 xmlCharEncoding enc) {
11113 xmlDtdPtr ret = NULL;
11114 xmlParserCtxtPtr ctxt;
11115 xmlParserInputPtr pinput = NULL;
11116 xmlChar start[4];
11117
11118 if (input == NULL)
11119 return(NULL);
11120
11121 ctxt = xmlNewParserCtxt();
11122 if (ctxt == NULL) {
11123 xmlFreeParserInputBuffer(input);
11124 return(NULL);
11125 }
11126
11127 /*
11128 * Set-up the SAX context
11129 */
11130 if (sax != NULL) {
11131 if (ctxt->sax != NULL)
11132 xmlFree(ctxt->sax);
11133 ctxt->sax = sax;
11134 ctxt->userData = ctxt;
11135 }
11136 xmlDetectSAX2(ctxt);
11137
11138 /*
11139 * generate a parser input from the I/O handler
11140 */
11141
11142 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11143 if (pinput == NULL) {
11144 if (sax != NULL) ctxt->sax = NULL;
11145 xmlFreeParserInputBuffer(input);
11146 xmlFreeParserCtxt(ctxt);
11147 return(NULL);
11148 }
11149
11150 /*
11151 * plug some encoding conversion routines here.
11152 */
11153 xmlPushInput(ctxt, pinput);
11154 if (enc != XML_CHAR_ENCODING_NONE) {
11155 xmlSwitchEncoding(ctxt, enc);
11156 }
11157
11158 pinput->filename = NULL;
11159 pinput->line = 1;
11160 pinput->col = 1;
11161 pinput->base = ctxt->input->cur;
11162 pinput->cur = ctxt->input->cur;
11163 pinput->free = NULL;
11164
11165 /*
11166 * let's parse that entity knowing it's an external subset.
11167 */
11168 ctxt->inSubset = 2;
11169 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11170 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11171 BAD_CAST "none", BAD_CAST "none");
11172
11173 if ((enc == XML_CHAR_ENCODING_NONE) &&
11174 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
11175 /*
11176 * Get the 4 first bytes and decode the charset
11177 * if enc != XML_CHAR_ENCODING_NONE
11178 * plug some encoding conversion routines.
11179 */
11180 start[0] = RAW;
11181 start[1] = NXT(1);
11182 start[2] = NXT(2);
11183 start[3] = NXT(3);
11184 enc = xmlDetectCharEncoding(start, 4);
11185 if (enc != XML_CHAR_ENCODING_NONE) {
11186 xmlSwitchEncoding(ctxt, enc);
11187 }
11188 }
11189
11190 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11191
11192 if (ctxt->myDoc != NULL) {
11193 if (ctxt->wellFormed) {
11194 ret = ctxt->myDoc->extSubset;
11195 ctxt->myDoc->extSubset = NULL;
11196 if (ret != NULL) {
11197 xmlNodePtr tmp;
11198
11199 ret->doc = NULL;
11200 tmp = ret->children;
11201 while (tmp != NULL) {
11202 tmp->doc = NULL;
11203 tmp = tmp->next;
11204 }
11205 }
11206 } else {
11207 ret = NULL;
11208 }
11209 xmlFreeDoc(ctxt->myDoc);
11210 ctxt->myDoc = NULL;
11211 }
11212 if (sax != NULL) ctxt->sax = NULL;
11213 xmlFreeParserCtxt(ctxt);
11214
11215 return(ret);
11216}
11217
11218/**
11219 * xmlSAXParseDTD:
11220 * @sax: the SAX handler block
11221 * @ExternalID: a NAME* containing the External ID of the DTD
11222 * @SystemID: a NAME* containing the URL to the DTD
11223 *
11224 * Load and parse an external subset.
11225 *
11226 * Returns the resulting xmlDtdPtr or NULL in case of error.
11227 */
11228
11229xmlDtdPtr
11230xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11231 const xmlChar *SystemID) {
11232 xmlDtdPtr ret = NULL;
11233 xmlParserCtxtPtr ctxt;
11234 xmlParserInputPtr input = NULL;
11235 xmlCharEncoding enc;
11236 xmlChar* systemIdCanonic;
11237
11238 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11239
11240 ctxt = xmlNewParserCtxt();
11241 if (ctxt == NULL) {
11242 return(NULL);
11243 }
11244
11245 /*
11246 * Set-up the SAX context
11247 */
11248 if (sax != NULL) {
11249 if (ctxt->sax != NULL)
11250 xmlFree(ctxt->sax);
11251 ctxt->sax = sax;
11252 ctxt->userData = ctxt;
11253 }
11254
11255 /*
11256 * Canonicalise the system ID
11257 */
11258 systemIdCanonic = xmlCanonicPath(SystemID);
11259 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11260 xmlFreeParserCtxt(ctxt);
11261 return(NULL);
11262 }
11263
11264 /*
11265 * Ask the Entity resolver to load the damn thing
11266 */
11267
11268 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11269 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11270 systemIdCanonic);
11271 if (input == NULL) {
11272 if (sax != NULL) ctxt->sax = NULL;
11273 xmlFreeParserCtxt(ctxt);
11274 if (systemIdCanonic != NULL)
11275 xmlFree(systemIdCanonic);
11276 return(NULL);
11277 }
11278
11279 /*
11280 * plug some encoding conversion routines here.
11281 */
11282 xmlPushInput(ctxt, input);
11283 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11284 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11285 xmlSwitchEncoding(ctxt, enc);
11286 }
11287
11288 if (input->filename == NULL)
11289 input->filename = (char *) systemIdCanonic;
11290 else
11291 xmlFree(systemIdCanonic);
11292 input->line = 1;
11293 input->col = 1;
11294 input->base = ctxt->input->cur;
11295 input->cur = ctxt->input->cur;
11296 input->free = NULL;
11297
11298 /*
11299 * let's parse that entity knowing it's an external subset.
11300 */
11301 ctxt->inSubset = 2;
11302 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11303 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11304 ExternalID, SystemID);
11305 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11306
11307 if (ctxt->myDoc != NULL) {
11308 if (ctxt->wellFormed) {
11309 ret = ctxt->myDoc->extSubset;
11310 ctxt->myDoc->extSubset = NULL;
11311 if (ret != NULL) {
11312 xmlNodePtr tmp;
11313
11314 ret->doc = NULL;
11315 tmp = ret->children;
11316 while (tmp != NULL) {
11317 tmp->doc = NULL;
11318 tmp = tmp->next;
11319 }
11320 }
11321 } else {
11322 ret = NULL;
11323 }
11324 xmlFreeDoc(ctxt->myDoc);
11325 ctxt->myDoc = NULL;
11326 }
11327 if (sax != NULL) ctxt->sax = NULL;
11328 xmlFreeParserCtxt(ctxt);
11329
11330 return(ret);
11331}
11332
11333
11334/**
11335 * xmlParseDTD:
11336 * @ExternalID: a NAME* containing the External ID of the DTD
11337 * @SystemID: a NAME* containing the URL to the DTD
11338 *
11339 * Load and parse an external subset.
11340 *
11341 * Returns the resulting xmlDtdPtr or NULL in case of error.
11342 */
11343
11344xmlDtdPtr
11345xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11346 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11347}
11348#endif /* LIBXML_VALID_ENABLED */
11349
11350/************************************************************************
11351 * *
11352 * Front ends when parsing an Entity *
11353 * *
11354 ************************************************************************/
11355
11356/**
11357 * xmlParseCtxtExternalEntity:
11358 * @ctx: the existing parsing context
11359 * @URL: the URL for the entity to load
11360 * @ID: the System ID for the entity to load
11361 * @lst: the return value for the set of parsed nodes
11362 *
11363 * Parse an external general entity within an existing parsing context
11364 * An external general parsed entity is well-formed if it matches the
11365 * production labeled extParsedEnt.
11366 *
11367 * [78] extParsedEnt ::= TextDecl? content
11368 *
11369 * Returns 0 if the entity is well formed, -1 in case of args problem and
11370 * the parser error code otherwise
11371 */
11372
11373int
11374xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11375 const xmlChar *ID, xmlNodePtr *lst) {
11376 xmlParserCtxtPtr ctxt;
11377 xmlDocPtr newDoc;
11378 xmlNodePtr newRoot;
11379 xmlSAXHandlerPtr oldsax = NULL;
11380 int ret = 0;
11381 xmlChar start[4];
11382 xmlCharEncoding enc;
11383 xmlParserInputPtr inputStream;
11384 char *directory = NULL;
11385
11386 if (ctx == NULL) return(-1);
11387
11388 if (ctx->depth > 40) {
11389 return(XML_ERR_ENTITY_LOOP);
11390 }
11391
11392 if (lst != NULL)
11393 *lst = NULL;
11394 if ((URL == NULL) && (ID == NULL))
11395 return(-1);
11396 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11397 return(-1);
11398
11399 ctxt = xmlNewParserCtxt();
11400 if (ctxt == NULL) {
11401 return(-1);
11402 }
11403
11404 ctxt->userData = ctxt;
11405 ctxt->_private = ctx->_private;
11406
11407 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11408 if (inputStream == NULL) {
11409 xmlFreeParserCtxt(ctxt);
11410 return(-1);
11411 }
11412
11413 inputPush(ctxt, inputStream);
11414
11415 if ((ctxt->directory == NULL) && (directory == NULL))
11416 directory = xmlParserGetDirectory((char *)URL);
11417 if ((ctxt->directory == NULL) && (directory != NULL))
11418 ctxt->directory = directory;
11419
11420 oldsax = ctxt->sax;
11421 ctxt->sax = ctx->sax;
11422 xmlDetectSAX2(ctxt);
11423 newDoc = xmlNewDoc(BAD_CAST "1.0");
11424 if (newDoc == NULL) {
11425 xmlFreeParserCtxt(ctxt);
11426 return(-1);
11427 }
11428 if (ctx->myDoc->dict) {
11429 newDoc->dict = ctx->myDoc->dict;
11430 xmlDictReference(newDoc->dict);
11431 }
11432 if (ctx->myDoc != NULL) {
11433 newDoc->intSubset = ctx->myDoc->intSubset;
11434 newDoc->extSubset = ctx->myDoc->extSubset;
11435 }
11436 if (ctx->myDoc->URL != NULL) {
11437 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11438 }
11439 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11440 if (newRoot == NULL) {
11441 ctxt->sax = oldsax;
11442 xmlFreeParserCtxt(ctxt);
11443 newDoc->intSubset = NULL;
11444 newDoc->extSubset = NULL;
11445 xmlFreeDoc(newDoc);
11446 return(-1);
11447 }
11448 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11449 nodePush(ctxt, newDoc->children);
11450 if (ctx->myDoc == NULL) {
11451 ctxt->myDoc = newDoc;
11452 } else {
11453 ctxt->myDoc = ctx->myDoc;
11454 newDoc->children->doc = ctx->myDoc;
11455 }
11456
11457 /*
11458 * Get the 4 first bytes and decode the charset
11459 * if enc != XML_CHAR_ENCODING_NONE
11460 * plug some encoding conversion routines.
11461 */
11462 GROW
11463 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11464 start[0] = RAW;
11465 start[1] = NXT(1);
11466 start[2] = NXT(2);
11467 start[3] = NXT(3);
11468 enc = xmlDetectCharEncoding(start, 4);
11469 if (enc != XML_CHAR_ENCODING_NONE) {
11470 xmlSwitchEncoding(ctxt, enc);
11471 }
11472 }
11473
11474 /*
11475 * Parse a possible text declaration first
11476 */
11477 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11478 xmlParseTextDecl(ctxt);
11479 }
11480
11481 /*
11482 * Doing validity checking on chunk doesn't make sense
11483 */
11484 ctxt->instate = XML_PARSER_CONTENT;
11485 ctxt->validate = ctx->validate;
11486 ctxt->valid = ctx->valid;
11487 ctxt->loadsubset = ctx->loadsubset;
11488 ctxt->depth = ctx->depth + 1;
11489 ctxt->replaceEntities = ctx->replaceEntities;
11490 if (ctxt->validate) {
11491 ctxt->vctxt.error = ctx->vctxt.error;
11492 ctxt->vctxt.warning = ctx->vctxt.warning;
11493 } else {
11494 ctxt->vctxt.error = NULL;
11495 ctxt->vctxt.warning = NULL;
11496 }
11497 ctxt->vctxt.nodeTab = NULL;
11498 ctxt->vctxt.nodeNr = 0;
11499 ctxt->vctxt.nodeMax = 0;
11500 ctxt->vctxt.node = NULL;
11501 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11502 ctxt->dict = ctx->dict;
11503 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11504 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11505 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11506 ctxt->dictNames = ctx->dictNames;
11507 ctxt->attsDefault = ctx->attsDefault;
11508 ctxt->attsSpecial = ctx->attsSpecial;
11509 ctxt->linenumbers = ctx->linenumbers;
11510
11511 xmlParseContent(ctxt);
11512
11513 ctx->validate = ctxt->validate;
11514 ctx->valid = ctxt->valid;
11515 if ((RAW == '<') && (NXT(1) == '/')) {
11516 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11517 } else if (RAW != 0) {
11518 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11519 }
11520 if (ctxt->node != newDoc->children) {
11521 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11522 }
11523
11524 if (!ctxt->wellFormed) {
11525 if (ctxt->errNo == 0)
11526 ret = 1;
11527 else
11528 ret = ctxt->errNo;
11529 } else {
11530 if (lst != NULL) {
11531 xmlNodePtr cur;
11532
11533 /*
11534 * Return the newly created nodeset after unlinking it from
11535 * they pseudo parent.
11536 */
11537 cur = newDoc->children->children;
11538 *lst = cur;
11539 while (cur != NULL) {
11540 cur->parent = NULL;
11541 cur = cur->next;
11542 }
11543 newDoc->children->children = NULL;
11544 }
11545 ret = 0;
11546 }
11547 ctxt->sax = oldsax;
11548 ctxt->dict = NULL;
11549 ctxt->attsDefault = NULL;
11550 ctxt->attsSpecial = NULL;
11551 xmlFreeParserCtxt(ctxt);
11552 newDoc->intSubset = NULL;
11553 newDoc->extSubset = NULL;
11554 xmlFreeDoc(newDoc);
11555
11556 return(ret);
11557}
11558
11559/**
11560 * xmlParseExternalEntityPrivate:
11561 * @doc: the document the chunk pertains to
11562 * @oldctxt: the previous parser context if available
11563 * @sax: the SAX handler bloc (possibly NULL)
11564 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11565 * @depth: Used for loop detection, use 0
11566 * @URL: the URL for the entity to load
11567 * @ID: the System ID for the entity to load
11568 * @list: the return value for the set of parsed nodes
11569 *
11570 * Private version of xmlParseExternalEntity()
11571 *
11572 * Returns 0 if the entity is well formed, -1 in case of args problem and
11573 * the parser error code otherwise
11574 */
11575
11576static xmlParserErrors
11577xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11578 xmlSAXHandlerPtr sax,
11579 void *user_data, int depth, const xmlChar *URL,
11580 const xmlChar *ID, xmlNodePtr *list) {
11581 xmlParserCtxtPtr ctxt;
11582 xmlDocPtr newDoc;
11583 xmlNodePtr newRoot;
11584 xmlSAXHandlerPtr oldsax = NULL;
11585 xmlParserErrors ret = XML_ERR_OK;
11586 xmlChar start[4];
11587 xmlCharEncoding enc;
11588
11589 if (depth > 40) {
11590 return(XML_ERR_ENTITY_LOOP);
11591 }
11592
11593
11594
11595 if (list != NULL)
11596 *list = NULL;
11597 if ((URL == NULL) && (ID == NULL))
11598 return(XML_ERR_INTERNAL_ERROR);
11599 if (doc == NULL)
11600 return(XML_ERR_INTERNAL_ERROR);
11601
11602
11603 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11604 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11605 ctxt->userData = ctxt;
11606 if (oldctxt != NULL) {
11607 ctxt->_private = oldctxt->_private;
11608 ctxt->loadsubset = oldctxt->loadsubset;
11609 ctxt->validate = oldctxt->validate;
11610 ctxt->external = oldctxt->external;
11611 ctxt->record_info = oldctxt->record_info;
11612 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11613 ctxt->node_seq.length = oldctxt->node_seq.length;
11614 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11615 } else {
11616 /*
11617 * Doing validity checking on chunk without context
11618 * doesn't make sense
11619 */
11620 ctxt->_private = NULL;
11621 ctxt->validate = 0;
11622 ctxt->external = 2;
11623 ctxt->loadsubset = 0;
11624 }
11625 if (sax != NULL) {
11626 oldsax = ctxt->sax;
11627 ctxt->sax = sax;
11628 if (user_data != NULL)
11629 ctxt->userData = user_data;
11630 }
11631 xmlDetectSAX2(ctxt);
11632 newDoc = xmlNewDoc(BAD_CAST "1.0");
11633 if (newDoc == NULL) {
11634 ctxt->node_seq.maximum = 0;
11635 ctxt->node_seq.length = 0;
11636 ctxt->node_seq.buffer = NULL;
11637 xmlFreeParserCtxt(ctxt);
11638 return(XML_ERR_INTERNAL_ERROR);
11639 }
11640 newDoc->intSubset = doc->intSubset;
11641 newDoc->extSubset = doc->extSubset;
11642 newDoc->dict = doc->dict;
11643 xmlDictReference(newDoc->dict);
11644
11645 if (doc->URL != NULL) {
11646 newDoc->URL = xmlStrdup(doc->URL);
11647 }
11648 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11649 if (newRoot == NULL) {
11650 if (sax != NULL)
11651 ctxt->sax = oldsax;
11652 ctxt->node_seq.maximum = 0;
11653 ctxt->node_seq.length = 0;
11654 ctxt->node_seq.buffer = NULL;
11655 xmlFreeParserCtxt(ctxt);
11656 newDoc->intSubset = NULL;
11657 newDoc->extSubset = NULL;
11658 xmlFreeDoc(newDoc);
11659 return(XML_ERR_INTERNAL_ERROR);
11660 }
11661 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11662 nodePush(ctxt, newDoc->children);
11663 ctxt->myDoc = doc;
11664 newRoot->doc = doc;
11665
11666 /*
11667 * Get the 4 first bytes and decode the charset
11668 * if enc != XML_CHAR_ENCODING_NONE
11669 * plug some encoding conversion routines.
11670 */
11671 GROW;
11672 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11673 start[0] = RAW;
11674 start[1] = NXT(1);
11675 start[2] = NXT(2);
11676 start[3] = NXT(3);
11677 enc = xmlDetectCharEncoding(start, 4);
11678 if (enc != XML_CHAR_ENCODING_NONE) {
11679 xmlSwitchEncoding(ctxt, enc);
11680 }
11681 }
11682
11683 /*
11684 * Parse a possible text declaration first
11685 */
11686 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11687 xmlParseTextDecl(ctxt);
11688 }
11689
11690 ctxt->instate = XML_PARSER_CONTENT;
11691 ctxt->depth = depth;
11692
11693 xmlParseContent(ctxt);
11694
11695 if ((RAW == '<') && (NXT(1) == '/')) {
11696 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11697 } else if (RAW != 0) {
11698 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11699 }
11700 if (ctxt->node != newDoc->children) {
11701 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11702 }
11703
11704 if (!ctxt->wellFormed) {
11705 if (ctxt->errNo == 0)
11706 ret = XML_ERR_INTERNAL_ERROR;
11707 else
11708 ret = (xmlParserErrors)ctxt->errNo;
11709 } else {
11710 if (list != NULL) {
11711 xmlNodePtr cur;
11712
11713 /*
11714 * Return the newly created nodeset after unlinking it from
11715 * they pseudo parent.
11716 */
11717 cur = newDoc->children->children;
11718 *list = cur;
11719 while (cur != NULL) {
11720 cur->parent = NULL;
11721 cur = cur->next;
11722 }
11723 newDoc->children->children = NULL;
11724 }
11725 ret = XML_ERR_OK;
11726 }
11727
11728 /*
11729 * Record in the parent context the number of entities replacement
11730 * done when parsing that reference.
11731 */
11732 oldctxt->nbentities += ctxt->nbentities;
11733 /*
11734 * Also record the size of the entity parsed
11735 */
11736 if (ctxt->input != NULL) {
11737 oldctxt->sizeentities += ctxt->input->consumed;
11738 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
11739 }
11740 /*
11741 * And record the last error if any
11742 */
11743 if (ctxt->lastError.code != XML_ERR_OK)
11744 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
11745
11746 if (sax != NULL)
11747 ctxt->sax = oldsax;
11748 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11749 oldctxt->node_seq.length = ctxt->node_seq.length;
11750 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11751 oldctxt->nbentities += ctxt->nbentities;
11752 ctxt->node_seq.maximum = 0;
11753 ctxt->node_seq.length = 0;
11754 ctxt->node_seq.buffer = NULL;
11755 xmlFreeParserCtxt(ctxt);
11756 newDoc->intSubset = NULL;
11757 newDoc->extSubset = NULL;
11758 xmlFreeDoc(newDoc);
11759
11760 return(ret);
11761}
11762
11763#ifdef LIBXML_SAX1_ENABLED
11764/**
11765 * xmlParseExternalEntity:
11766 * @doc: the document the chunk pertains to
11767 * @sax: the SAX handler bloc (possibly NULL)
11768 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11769 * @depth: Used for loop detection, use 0
11770 * @URL: the URL for the entity to load
11771 * @ID: the System ID for the entity to load
11772 * @lst: the return value for the set of parsed nodes
11773 *
11774 * Parse an external general entity
11775 * An external general parsed entity is well-formed if it matches the
11776 * production labeled extParsedEnt.
11777 *
11778 * [78] extParsedEnt ::= TextDecl? content
11779 *
11780 * Returns 0 if the entity is well formed, -1 in case of args problem and
11781 * the parser error code otherwise
11782 */
11783
11784int
11785xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11786 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11787 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11788 ID, lst));
11789}
11790
11791/**
11792 * xmlParseBalancedChunkMemory:
11793 * @doc: the document the chunk pertains to
11794 * @sax: the SAX handler bloc (possibly NULL)
11795 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11796 * @depth: Used for loop detection, use 0
11797 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11798 * @lst: the return value for the set of parsed nodes
11799 *
11800 * Parse a well-balanced chunk of an XML document
11801 * called by the parser
11802 * The allowed sequence for the Well Balanced Chunk is the one defined by
11803 * the content production in the XML grammar:
11804 *
11805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11806 *
11807 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11808 * the parser error code otherwise
11809 */
11810
11811int
11812xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11813 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11814 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11815 depth, string, lst, 0 );
11816}
11817#endif /* LIBXML_SAX1_ENABLED */
11818
11819/**
11820 * xmlParseBalancedChunkMemoryInternal:
11821 * @oldctxt: the existing parsing context
11822 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11823 * @user_data: the user data field for the parser context
11824 * @lst: the return value for the set of parsed nodes
11825 *
11826 *
11827 * Parse a well-balanced chunk of an XML document
11828 * called by the parser
11829 * The allowed sequence for the Well Balanced Chunk is the one defined by
11830 * the content production in the XML grammar:
11831 *
11832 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11833 *
11834 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11835 * error code otherwise
11836 *
11837 * In case recover is set to 1, the nodelist will not be empty even if
11838 * the parsed chunk is not well balanced.
11839 */
11840static xmlParserErrors
11841xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11842 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11843 xmlParserCtxtPtr ctxt;
11844 xmlDocPtr newDoc = NULL;
11845 xmlNodePtr newRoot;
11846 xmlSAXHandlerPtr oldsax = NULL;
11847 xmlNodePtr content = NULL;
11848 xmlNodePtr last = NULL;
11849 int size;
11850 xmlParserErrors ret = XML_ERR_OK;
11851
11852 if (oldctxt->depth > 40) {
11853 return(XML_ERR_ENTITY_LOOP);
11854 }
11855
11856
11857 if (lst != NULL)
11858 *lst = NULL;
11859 if (string == NULL)
11860 return(XML_ERR_INTERNAL_ERROR);
11861
11862 size = xmlStrlen(string);
11863
11864 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11865 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11866 if (user_data != NULL)
11867 ctxt->userData = user_data;
11868 else
11869 ctxt->userData = ctxt;
11870 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11871 ctxt->dict = oldctxt->dict;
11872 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11873 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11874 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11875
11876 oldsax = ctxt->sax;
11877 ctxt->sax = oldctxt->sax;
11878 xmlDetectSAX2(ctxt);
11879 ctxt->replaceEntities = oldctxt->replaceEntities;
11880 ctxt->options = oldctxt->options;
11881
11882 ctxt->_private = oldctxt->_private;
11883 if (oldctxt->myDoc == NULL) {
11884 newDoc = xmlNewDoc(BAD_CAST "1.0");
11885 if (newDoc == NULL) {
11886 ctxt->sax = oldsax;
11887 ctxt->dict = NULL;
11888 xmlFreeParserCtxt(ctxt);
11889 return(XML_ERR_INTERNAL_ERROR);
11890 }
11891 newDoc->dict = ctxt->dict;
11892 xmlDictReference(newDoc->dict);
11893 ctxt->myDoc = newDoc;
11894 } else {
11895 ctxt->myDoc = oldctxt->myDoc;
11896 content = ctxt->myDoc->children;
11897 last = ctxt->myDoc->last;
11898 }
11899 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11900 if (newRoot == NULL) {
11901 ctxt->sax = oldsax;
11902 ctxt->dict = NULL;
11903 xmlFreeParserCtxt(ctxt);
11904 if (newDoc != NULL) {
11905 xmlFreeDoc(newDoc);
11906 }
11907 return(XML_ERR_INTERNAL_ERROR);
11908 }
11909 ctxt->myDoc->children = NULL;
11910 ctxt->myDoc->last = NULL;
11911 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11912 nodePush(ctxt, ctxt->myDoc->children);
11913 ctxt->instate = XML_PARSER_CONTENT;
11914 ctxt->depth = oldctxt->depth + 1;
11915
11916 ctxt->validate = 0;
11917 ctxt->loadsubset = oldctxt->loadsubset;
11918 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11919 /*
11920 * ID/IDREF registration will be done in xmlValidateElement below
11921 */
11922 ctxt->loadsubset |= XML_SKIP_IDS;
11923 }
11924 ctxt->dictNames = oldctxt->dictNames;
11925 ctxt->attsDefault = oldctxt->attsDefault;
11926 ctxt->attsSpecial = oldctxt->attsSpecial;
11927
11928 xmlParseContent(ctxt);
11929 if ((RAW == '<') && (NXT(1) == '/')) {
11930 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11931 } else if (RAW != 0) {
11932 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11933 }
11934 if (ctxt->node != ctxt->myDoc->children) {
11935 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11936 }
11937
11938 if (!ctxt->wellFormed) {
11939 if (ctxt->errNo == 0)
11940 ret = XML_ERR_INTERNAL_ERROR;
11941 else
11942 ret = (xmlParserErrors)ctxt->errNo;
11943 } else {
11944 ret = XML_ERR_OK;
11945 }
11946
11947 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11948 xmlNodePtr cur;
11949
11950 /*
11951 * Return the newly created nodeset after unlinking it from
11952 * they pseudo parent.
11953 */
11954 cur = ctxt->myDoc->children->children;
11955 *lst = cur;
11956 while (cur != NULL) {
11957#ifdef LIBXML_VALID_ENABLED
11958 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11959 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11960 (cur->type == XML_ELEMENT_NODE)) {
11961 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11962 oldctxt->myDoc, cur);
11963 }
11964#endif /* LIBXML_VALID_ENABLED */
11965 cur->parent = NULL;
11966 cur = cur->next;
11967 }
11968 ctxt->myDoc->children->children = NULL;
11969 }
11970 if (ctxt->myDoc != NULL) {
11971 xmlFreeNode(ctxt->myDoc->children);
11972 ctxt->myDoc->children = content;
11973 ctxt->myDoc->last = last;
11974 }
11975
11976 /*
11977 * Record in the parent context the number of entities replacement
11978 * done when parsing that reference.
11979 */
11980 oldctxt->nbentities += ctxt->nbentities;
11981 /*
11982 * Also record the last error if any
11983 */
11984 if (ctxt->lastError.code != XML_ERR_OK)
11985 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
11986
11987 ctxt->sax = oldsax;
11988 ctxt->dict = NULL;
11989 ctxt->attsDefault = NULL;
11990 ctxt->attsSpecial = NULL;
11991 xmlFreeParserCtxt(ctxt);
11992 if (newDoc != NULL) {
11993 xmlFreeDoc(newDoc);
11994 }
11995
11996 return(ret);
11997}
11998
11999/**
12000 * xmlParseInNodeContext:
12001 * @node: the context node
12002 * @data: the input string
12003 * @datalen: the input string length in bytes
12004 * @options: a combination of xmlParserOption
12005 * @lst: the return value for the set of parsed nodes
12006 *
12007 * Parse a well-balanced chunk of an XML document
12008 * within the context (DTD, namespaces, etc ...) of the given node.
12009 *
12010 * The allowed sequence for the data is a Well Balanced Chunk defined by
12011 * the content production in the XML grammar:
12012 *
12013 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12014 *
12015 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12016 * error code otherwise
12017 */
12018xmlParserErrors
12019xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12020 int options, xmlNodePtr *lst) {
12021#ifdef SAX2
12022 xmlParserCtxtPtr ctxt;
12023 xmlDocPtr doc = NULL;
12024 xmlNodePtr fake, cur;
12025 int nsnr = 0;
12026
12027 xmlParserErrors ret = XML_ERR_OK;
12028
12029 /*
12030 * check all input parameters, grab the document
12031 */
12032 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12033 return(XML_ERR_INTERNAL_ERROR);
12034 switch (node->type) {
12035 case XML_ELEMENT_NODE:
12036 case XML_ATTRIBUTE_NODE:
12037 case XML_TEXT_NODE:
12038 case XML_CDATA_SECTION_NODE:
12039 case XML_ENTITY_REF_NODE:
12040 case XML_PI_NODE:
12041 case XML_COMMENT_NODE:
12042 case XML_DOCUMENT_NODE:
12043 case XML_HTML_DOCUMENT_NODE:
12044 break;
12045 default:
12046 return(XML_ERR_INTERNAL_ERROR);
12047
12048 }
12049 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12050 (node->type != XML_DOCUMENT_NODE) &&
12051 (node->type != XML_HTML_DOCUMENT_NODE))
12052 node = node->parent;
12053 if (node == NULL)
12054 return(XML_ERR_INTERNAL_ERROR);
12055 if (node->type == XML_ELEMENT_NODE)
12056 doc = node->doc;
12057 else
12058 doc = (xmlDocPtr) node;
12059 if (doc == NULL)
12060 return(XML_ERR_INTERNAL_ERROR);
12061
12062 /*
12063 * allocate a context and set-up everything not related to the
12064 * node position in the tree
12065 */
12066 if (doc->type == XML_DOCUMENT_NODE)
12067 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12068#ifdef LIBXML_HTML_ENABLED
12069 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12070 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12071#endif
12072 else
12073 return(XML_ERR_INTERNAL_ERROR);
12074
12075 if (ctxt == NULL)
12076 return(XML_ERR_NO_MEMORY);
12077 fake = xmlNewComment(NULL);
12078 if (fake == NULL) {
12079 xmlFreeParserCtxt(ctxt);
12080 return(XML_ERR_NO_MEMORY);
12081 }
12082 xmlAddChild(node, fake);
12083
12084 /*
12085 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12086 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12087 * we must wait until the last moment to free the original one.
12088 */
12089 if (doc->dict != NULL) {
12090 if (ctxt->dict != NULL)
12091 xmlDictFree(ctxt->dict);
12092 ctxt->dict = doc->dict;
12093 } else
12094 options |= XML_PARSE_NODICT;
12095
12096 xmlCtxtUseOptions(ctxt, options);
12097 xmlDetectSAX2(ctxt);
12098 ctxt->myDoc = doc;
12099
12100 if (node->type == XML_ELEMENT_NODE) {
12101 nodePush(ctxt, node);
12102 /*
12103 * initialize the SAX2 namespaces stack
12104 */
12105 cur = node;
12106 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12107 xmlNsPtr ns = cur->nsDef;
12108 const xmlChar *iprefix, *ihref;
12109
12110 while (ns != NULL) {
12111 if (ctxt->dict) {
12112 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12113 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12114 } else {
12115 iprefix = ns->prefix;
12116 ihref = ns->href;
12117 }
12118
12119 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12120 nsPush(ctxt, iprefix, ihref);
12121 nsnr++;
12122 }
12123 ns = ns->next;
12124 }
12125 cur = cur->parent;
12126 }
12127 ctxt->instate = XML_PARSER_CONTENT;
12128 }
12129
12130 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12131 /*
12132 * ID/IDREF registration will be done in xmlValidateElement below
12133 */
12134 ctxt->loadsubset |= XML_SKIP_IDS;
12135 }
12136
12137#ifdef LIBXML_HTML_ENABLED
12138 if (doc->type == XML_HTML_DOCUMENT_NODE)
12139 __htmlParseContent(ctxt);
12140 else
12141#endif
12142 xmlParseContent(ctxt);
12143
12144 nsPop(ctxt, nsnr);
12145 if ((RAW == '<') && (NXT(1) == '/')) {
12146 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12147 } else if (RAW != 0) {
12148 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12149 }
12150 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12151 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12152 ctxt->wellFormed = 0;
12153 }
12154
12155 if (!ctxt->wellFormed) {
12156 if (ctxt->errNo == 0)
12157 ret = XML_ERR_INTERNAL_ERROR;
12158 else
12159 ret = (xmlParserErrors)ctxt->errNo;
12160 } else {
12161 ret = XML_ERR_OK;
12162 }
12163
12164 /*
12165 * Return the newly created nodeset after unlinking it from
12166 * the pseudo sibling.
12167 */
12168
12169 cur = fake->next;
12170 fake->next = NULL;
12171 node->last = fake;
12172
12173 if (cur != NULL) {
12174 cur->prev = NULL;
12175 }
12176
12177 *lst = cur;
12178
12179 while (cur != NULL) {
12180 cur->parent = NULL;
12181 cur = cur->next;
12182 }
12183
12184 xmlUnlinkNode(fake);
12185 xmlFreeNode(fake);
12186
12187
12188 if (ret != XML_ERR_OK) {
12189 xmlFreeNodeList(*lst);
12190 *lst = NULL;
12191 }
12192
12193 if (doc->dict != NULL)
12194 ctxt->dict = NULL;
12195 xmlFreeParserCtxt(ctxt);
12196
12197 return(ret);
12198#else /* !SAX2 */
12199 return(XML_ERR_INTERNAL_ERROR);
12200#endif
12201}
12202
12203#ifdef LIBXML_SAX1_ENABLED
12204/**
12205 * xmlParseBalancedChunkMemoryRecover:
12206 * @doc: the document the chunk pertains to
12207 * @sax: the SAX handler bloc (possibly NULL)
12208 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12209 * @depth: Used for loop detection, use 0
12210 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12211 * @lst: the return value for the set of parsed nodes
12212 * @recover: return nodes even if the data is broken (use 0)
12213 *
12214 *
12215 * Parse a well-balanced chunk of an XML document
12216 * called by the parser
12217 * The allowed sequence for the Well Balanced Chunk is the one defined by
12218 * the content production in the XML grammar:
12219 *
12220 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12221 *
12222 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12223 * the parser error code otherwise
12224 *
12225 * In case recover is set to 1, the nodelist will not be empty even if
12226 * the parsed chunk is not well balanced.
12227 */
12228int
12229xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12230 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12231 int recover) {
12232 xmlParserCtxtPtr ctxt;
12233 xmlDocPtr newDoc;
12234 xmlSAXHandlerPtr oldsax = NULL;
12235 xmlNodePtr content, newRoot;
12236 int size;
12237 int ret = 0;
12238
12239 if (depth > 40) {
12240 return(XML_ERR_ENTITY_LOOP);
12241 }
12242
12243
12244 if (lst != NULL)
12245 *lst = NULL;
12246 if (string == NULL)
12247 return(-1);
12248
12249 size = xmlStrlen(string);
12250
12251 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12252 if (ctxt == NULL) return(-1);
12253 ctxt->userData = ctxt;
12254 if (sax != NULL) {
12255 oldsax = ctxt->sax;
12256 ctxt->sax = sax;
12257 if (user_data != NULL)
12258 ctxt->userData = user_data;
12259 }
12260 newDoc = xmlNewDoc(BAD_CAST "1.0");
12261 if (newDoc == NULL) {
12262 xmlFreeParserCtxt(ctxt);
12263 return(-1);
12264 }
12265 if ((doc != NULL) && (doc->dict != NULL)) {
12266 xmlDictFree(ctxt->dict);
12267 ctxt->dict = doc->dict;
12268 xmlDictReference(ctxt->dict);
12269 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12270 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12271 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12272 ctxt->dictNames = 1;
12273 } else {
12274 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12275 }
12276 if (doc != NULL) {
12277 newDoc->intSubset = doc->intSubset;
12278 newDoc->extSubset = doc->extSubset;
12279 }
12280 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12281 if (newRoot == NULL) {
12282 if (sax != NULL)
12283 ctxt->sax = oldsax;
12284 xmlFreeParserCtxt(ctxt);
12285 newDoc->intSubset = NULL;
12286 newDoc->extSubset = NULL;
12287 xmlFreeDoc(newDoc);
12288 return(-1);
12289 }
12290 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12291 nodePush(ctxt, newRoot);
12292 if (doc == NULL) {
12293 ctxt->myDoc = newDoc;
12294 } else {
12295 ctxt->myDoc = newDoc;
12296 newDoc->children->doc = doc;
12297 /* Ensure that doc has XML spec namespace */
12298 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12299 newDoc->oldNs = doc->oldNs;
12300 }
12301 ctxt->instate = XML_PARSER_CONTENT;
12302 ctxt->depth = depth;
12303
12304 /*
12305 * Doing validity checking on chunk doesn't make sense
12306 */
12307 ctxt->validate = 0;
12308 ctxt->loadsubset = 0;
12309 xmlDetectSAX2(ctxt);
12310
12311 if ( doc != NULL ){
12312 content = doc->children;
12313 doc->children = NULL;
12314 xmlParseContent(ctxt);
12315 doc->children = content;
12316 }
12317 else {
12318 xmlParseContent(ctxt);
12319 }
12320 if ((RAW == '<') && (NXT(1) == '/')) {
12321 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12322 } else if (RAW != 0) {
12323 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12324 }
12325 if (ctxt->node != newDoc->children) {
12326 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12327 }
12328
12329 if (!ctxt->wellFormed) {
12330 if (ctxt->errNo == 0)
12331 ret = 1;
12332 else
12333 ret = ctxt->errNo;
12334 } else {
12335 ret = 0;
12336 }
12337
12338 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12339 xmlNodePtr cur;
12340
12341 /*
12342 * Return the newly created nodeset after unlinking it from
12343 * they pseudo parent.
12344 */
12345 cur = newDoc->children->children;
12346 *lst = cur;
12347 while (cur != NULL) {
12348 xmlSetTreeDoc(cur, doc);
12349 cur->parent = NULL;
12350 cur = cur->next;
12351 }
12352 newDoc->children->children = NULL;
12353 }
12354
12355 if (sax != NULL)
12356 ctxt->sax = oldsax;
12357 xmlFreeParserCtxt(ctxt);
12358 newDoc->intSubset = NULL;
12359 newDoc->extSubset = NULL;
12360 newDoc->oldNs = NULL;
12361 xmlFreeDoc(newDoc);
12362
12363 return(ret);
12364}
12365
12366/**
12367 * xmlSAXParseEntity:
12368 * @sax: the SAX handler block
12369 * @filename: the filename
12370 *
12371 * parse an XML external entity out of context and build a tree.
12372 * It use the given SAX function block to handle the parsing callback.
12373 * If sax is NULL, fallback to the default DOM tree building routines.
12374 *
12375 * [78] extParsedEnt ::= TextDecl? content
12376 *
12377 * This correspond to a "Well Balanced" chunk
12378 *
12379 * Returns the resulting document tree
12380 */
12381
12382xmlDocPtr
12383xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12384 xmlDocPtr ret;
12385 xmlParserCtxtPtr ctxt;
12386
12387 ctxt = xmlCreateFileParserCtxt(filename);
12388 if (ctxt == NULL) {
12389 return(NULL);
12390 }
12391 if (sax != NULL) {
12392 if (ctxt->sax != NULL)
12393 xmlFree(ctxt->sax);
12394 ctxt->sax = sax;
12395 ctxt->userData = NULL;
12396 }
12397
12398 xmlParseExtParsedEnt(ctxt);
12399
12400 if (ctxt->wellFormed)
12401 ret = ctxt->myDoc;
12402 else {
12403 ret = NULL;
12404 xmlFreeDoc(ctxt->myDoc);
12405 ctxt->myDoc = NULL;
12406 }
12407 if (sax != NULL)
12408 ctxt->sax = NULL;
12409 xmlFreeParserCtxt(ctxt);
12410
12411 return(ret);
12412}
12413
12414/**
12415 * xmlParseEntity:
12416 * @filename: the filename
12417 *
12418 * parse an XML external entity out of context and build a tree.
12419 *
12420 * [78] extParsedEnt ::= TextDecl? content
12421 *
12422 * This correspond to a "Well Balanced" chunk
12423 *
12424 * Returns the resulting document tree
12425 */
12426
12427xmlDocPtr
12428xmlParseEntity(const char *filename) {
12429 return(xmlSAXParseEntity(NULL, filename));
12430}
12431#endif /* LIBXML_SAX1_ENABLED */
12432
12433/**
12434 * xmlCreateEntityParserCtxt:
12435 * @URL: the entity URL
12436 * @ID: the entity PUBLIC ID
12437 * @base: a possible base for the target URI
12438 *
12439 * Create a parser context for an external entity
12440 * Automatic support for ZLIB/Compress compressed document is provided
12441 * by default if found at compile-time.
12442 *
12443 * Returns the new parser context or NULL
12444 */
12445xmlParserCtxtPtr
12446xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12447 const xmlChar *base) {
12448 xmlParserCtxtPtr ctxt;
12449 xmlParserInputPtr inputStream;
12450 char *directory = NULL;
12451 xmlChar *uri;
12452
12453 ctxt = xmlNewParserCtxt();
12454 if (ctxt == NULL) {
12455 return(NULL);
12456 }
12457
12458 uri = xmlBuildURI(URL, base);
12459
12460 if (uri == NULL) {
12461 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12462 if (inputStream == NULL) {
12463 xmlFreeParserCtxt(ctxt);
12464 return(NULL);
12465 }
12466
12467 inputPush(ctxt, inputStream);
12468
12469 if ((ctxt->directory == NULL) && (directory == NULL))
12470 directory = xmlParserGetDirectory((char *)URL);
12471 if ((ctxt->directory == NULL) && (directory != NULL))
12472 ctxt->directory = directory;
12473 } else {
12474 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12475 if (inputStream == NULL) {
12476 xmlFree(uri);
12477 xmlFreeParserCtxt(ctxt);
12478 return(NULL);
12479 }
12480
12481 inputPush(ctxt, inputStream);
12482
12483 if ((ctxt->directory == NULL) && (directory == NULL))
12484 directory = xmlParserGetDirectory((char *)uri);
12485 if ((ctxt->directory == NULL) && (directory != NULL))
12486 ctxt->directory = directory;
12487 xmlFree(uri);
12488 }
12489 return(ctxt);
12490}
12491
12492/************************************************************************
12493 * *
12494 * Front ends when parsing from a file *
12495 * *
12496 ************************************************************************/
12497
12498/**
12499 * xmlCreateURLParserCtxt:
12500 * @filename: the filename or URL
12501 * @options: a combination of xmlParserOption
12502 *
12503 * Create a parser context for a file or URL content.
12504 * Automatic support for ZLIB/Compress compressed document is provided
12505 * by default if found at compile-time and for file accesses
12506 *
12507 * Returns the new parser context or NULL
12508 */
12509xmlParserCtxtPtr
12510xmlCreateURLParserCtxt(const char *filename, int options)
12511{
12512 xmlParserCtxtPtr ctxt;
12513 xmlParserInputPtr inputStream;
12514 char *directory = NULL;
12515
12516 ctxt = xmlNewParserCtxt();
12517 if (ctxt == NULL) {
12518 xmlErrMemory(NULL, "cannot allocate parser context");
12519 return(NULL);
12520 }
12521
12522 if (options)
12523 xmlCtxtUseOptions(ctxt, options);
12524 ctxt->linenumbers = 1;
12525
12526 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12527 if (inputStream == NULL) {
12528 xmlFreeParserCtxt(ctxt);
12529 return(NULL);
12530 }
12531
12532 inputPush(ctxt, inputStream);
12533 if ((ctxt->directory == NULL) && (directory == NULL))
12534 directory = xmlParserGetDirectory(filename);
12535 if ((ctxt->directory == NULL) && (directory != NULL))
12536 ctxt->directory = directory;
12537
12538 return(ctxt);
12539}
12540
12541/**
12542 * xmlCreateFileParserCtxt:
12543 * @filename: the filename
12544 *
12545 * Create a parser context for a file content.
12546 * Automatic support for ZLIB/Compress compressed document is provided
12547 * by default if found at compile-time.
12548 *
12549 * Returns the new parser context or NULL
12550 */
12551xmlParserCtxtPtr
12552xmlCreateFileParserCtxt(const char *filename)
12553{
12554 return(xmlCreateURLParserCtxt(filename, 0));
12555}
12556
12557#ifdef LIBXML_SAX1_ENABLED
12558/**
12559 * xmlSAXParseFileWithData:
12560 * @sax: the SAX handler block
12561 * @filename: the filename
12562 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12563 * documents
12564 * @data: the userdata
12565 *
12566 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12567 * compressed document is provided by default if found at compile-time.
12568 * It use the given SAX function block to handle the parsing callback.
12569 * If sax is NULL, fallback to the default DOM tree building routines.
12570 *
12571 * User data (void *) is stored within the parser context in the
12572 * context's _private member, so it is available nearly everywhere in libxml
12573 *
12574 * Returns the resulting document tree
12575 */
12576
12577xmlDocPtr
12578xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12579 int recovery, void *data) {
12580 xmlDocPtr ret;
12581 xmlParserCtxtPtr ctxt;
12582 char *directory = NULL;
12583
12584 xmlInitParser();
12585
12586 ctxt = xmlCreateFileParserCtxt(filename);
12587 if (ctxt == NULL) {
12588 return(NULL);
12589 }
12590 if (sax != NULL) {
12591 if (ctxt->sax != NULL)
12592 xmlFree(ctxt->sax);
12593 ctxt->sax = sax;
12594 }
12595 xmlDetectSAX2(ctxt);
12596 if (data!=NULL) {
12597 ctxt->_private = data;
12598 }
12599
12600 if ((ctxt->directory == NULL) && (directory == NULL))
12601 directory = xmlParserGetDirectory(filename);
12602 if ((ctxt->directory == NULL) && (directory != NULL))
12603 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12604
12605 ctxt->recovery = recovery;
12606
12607 xmlParseDocument(ctxt);
12608
12609 if ((ctxt->wellFormed) || recovery) {
12610 ret = ctxt->myDoc;
12611 if (ret != NULL) {
12612 if (ctxt->input->buf->compressed > 0)
12613 ret->compression = 9;
12614 else
12615 ret->compression = ctxt->input->buf->compressed;
12616 }
12617 }
12618 else {
12619 ret = NULL;
12620 xmlFreeDoc(ctxt->myDoc);
12621 ctxt->myDoc = NULL;
12622 }
12623 if (sax != NULL)
12624 ctxt->sax = NULL;
12625 xmlFreeParserCtxt(ctxt);
12626
12627 return(ret);
12628}
12629
12630/**
12631 * xmlSAXParseFile:
12632 * @sax: the SAX handler block
12633 * @filename: the filename
12634 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12635 * documents
12636 *
12637 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12638 * compressed document is provided by default if found at compile-time.
12639 * It use the given SAX function block to handle the parsing callback.
12640 * If sax is NULL, fallback to the default DOM tree building routines.
12641 *
12642 * Returns the resulting document tree
12643 */
12644
12645xmlDocPtr
12646xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12647 int recovery) {
12648 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12649}
12650
12651/**
12652 * xmlRecoverDoc:
12653 * @cur: a pointer to an array of xmlChar
12654 *
12655 * parse an XML in-memory document and build a tree.
12656 * In the case the document is not Well Formed, a tree is built anyway
12657 *
12658 * Returns the resulting document tree
12659 */
12660
12661xmlDocPtr
12662xmlRecoverDoc(xmlChar *cur) {
12663 return(xmlSAXParseDoc(NULL, cur, 1));
12664}
12665
12666/**
12667 * xmlParseFile:
12668 * @filename: the filename
12669 *
12670 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12671 * compressed document is provided by default if found at compile-time.
12672 *
12673 * Returns the resulting document tree if the file was wellformed,
12674 * NULL otherwise.
12675 */
12676
12677xmlDocPtr
12678xmlParseFile(const char *filename) {
12679 return(xmlSAXParseFile(NULL, filename, 0));
12680}
12681
12682/**
12683 * xmlRecoverFile:
12684 * @filename: the filename
12685 *
12686 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12687 * compressed document is provided by default if found at compile-time.
12688 * In the case the document is not Well Formed, a tree is built anyway
12689 *
12690 * Returns the resulting document tree
12691 */
12692
12693xmlDocPtr
12694xmlRecoverFile(const char *filename) {
12695 return(xmlSAXParseFile(NULL, filename, 1));
12696}
12697
12698
12699/**
12700 * xmlSetupParserForBuffer:
12701 * @ctxt: an XML parser context
12702 * @buffer: a xmlChar * buffer
12703 * @filename: a file name
12704 *
12705 * Setup the parser context to parse a new buffer; Clears any prior
12706 * contents from the parser context. The buffer parameter must not be
12707 * NULL, but the filename parameter can be
12708 */
12709void
12710xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12711 const char* filename)
12712{
12713 xmlParserInputPtr input;
12714
12715 if ((ctxt == NULL) || (buffer == NULL))
12716 return;
12717
12718 input = xmlNewInputStream(ctxt);
12719 if (input == NULL) {
12720 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12721 xmlClearParserCtxt(ctxt);
12722 return;
12723 }
12724
12725 xmlClearParserCtxt(ctxt);
12726 if (filename != NULL)
12727 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12728 input->base = buffer;
12729 input->cur = buffer;
12730 input->end = &buffer[xmlStrlen(buffer)];
12731 inputPush(ctxt, input);
12732}
12733
12734/**
12735 * xmlSAXUserParseFile:
12736 * @sax: a SAX handler
12737 * @user_data: The user data returned on SAX callbacks
12738 * @filename: a file name
12739 *
12740 * parse an XML file and call the given SAX handler routines.
12741 * Automatic support for ZLIB/Compress compressed document is provided
12742 *
12743 * Returns 0 in case of success or a error number otherwise
12744 */
12745int
12746xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12747 const char *filename) {
12748 int ret = 0;
12749 xmlParserCtxtPtr ctxt;
12750
12751 ctxt = xmlCreateFileParserCtxt(filename);
12752 if (ctxt == NULL) return -1;
12753 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12754 xmlFree(ctxt->sax);
12755 ctxt->sax = sax;
12756 xmlDetectSAX2(ctxt);
12757
12758 if (user_data != NULL)
12759 ctxt->userData = user_data;
12760
12761 xmlParseDocument(ctxt);
12762
12763 if (ctxt->wellFormed)
12764 ret = 0;
12765 else {
12766 if (ctxt->errNo != 0)
12767 ret = ctxt->errNo;
12768 else
12769 ret = -1;
12770 }
12771 if (sax != NULL)
12772 ctxt->sax = NULL;
12773 if (ctxt->myDoc != NULL) {
12774 xmlFreeDoc(ctxt->myDoc);
12775 ctxt->myDoc = NULL;
12776 }
12777 xmlFreeParserCtxt(ctxt);
12778
12779 return ret;
12780}
12781#endif /* LIBXML_SAX1_ENABLED */
12782
12783/************************************************************************
12784 * *
12785 * Front ends when parsing from memory *
12786 * *
12787 ************************************************************************/
12788
12789/**
12790 * xmlCreateMemoryParserCtxt:
12791 * @buffer: a pointer to a char array
12792 * @size: the size of the array
12793 *
12794 * Create a parser context for an XML in-memory document.
12795 *
12796 * Returns the new parser context or NULL
12797 */
12798xmlParserCtxtPtr
12799xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12800 xmlParserCtxtPtr ctxt;
12801 xmlParserInputPtr input;
12802 xmlParserInputBufferPtr buf;
12803
12804 if (buffer == NULL)
12805 return(NULL);
12806 if (size <= 0)
12807 return(NULL);
12808
12809 ctxt = xmlNewParserCtxt();
12810 if (ctxt == NULL)
12811 return(NULL);
12812
12813 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12814 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12815 if (buf == NULL) {
12816 xmlFreeParserCtxt(ctxt);
12817 return(NULL);
12818 }
12819
12820 input = xmlNewInputStream(ctxt);
12821 if (input == NULL) {
12822 xmlFreeParserInputBuffer(buf);
12823 xmlFreeParserCtxt(ctxt);
12824 return(NULL);
12825 }
12826
12827 input->filename = NULL;
12828 input->buf = buf;
12829 input->base = input->buf->buffer->content;
12830 input->cur = input->buf->buffer->content;
12831 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12832
12833 inputPush(ctxt, input);
12834 return(ctxt);
12835}
12836
12837#ifdef LIBXML_SAX1_ENABLED
12838/**
12839 * xmlSAXParseMemoryWithData:
12840 * @sax: the SAX handler block
12841 * @buffer: an pointer to a char array
12842 * @size: the size of the array
12843 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12844 * documents
12845 * @data: the userdata
12846 *
12847 * parse an XML in-memory block and use the given SAX function block
12848 * to handle the parsing callback. If sax is NULL, fallback to the default
12849 * DOM tree building routines.
12850 *
12851 * User data (void *) is stored within the parser context in the
12852 * context's _private member, so it is available nearly everywhere in libxml
12853 *
12854 * Returns the resulting document tree
12855 */
12856
12857xmlDocPtr
12858xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12859 int size, int recovery, void *data) {
12860 xmlDocPtr ret;
12861 xmlParserCtxtPtr ctxt;
12862
12863 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12864 if (ctxt == NULL) return(NULL);
12865 if (sax != NULL) {
12866 if (ctxt->sax != NULL)
12867 xmlFree(ctxt->sax);
12868 ctxt->sax = sax;
12869 }
12870 xmlDetectSAX2(ctxt);
12871 if (data!=NULL) {
12872 ctxt->_private=data;
12873 }
12874
12875 ctxt->recovery = recovery;
12876
12877 xmlParseDocument(ctxt);
12878
12879 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12880 else {
12881 ret = NULL;
12882 xmlFreeDoc(ctxt->myDoc);
12883 ctxt->myDoc = NULL;
12884 }
12885 if (sax != NULL)
12886 ctxt->sax = NULL;
12887 xmlFreeParserCtxt(ctxt);
12888
12889 return(ret);
12890}
12891
12892/**
12893 * xmlSAXParseMemory:
12894 * @sax: the SAX handler block
12895 * @buffer: an pointer to a char array
12896 * @size: the size of the array
12897 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12898 * documents
12899 *
12900 * parse an XML in-memory block and use the given SAX function block
12901 * to handle the parsing callback. If sax is NULL, fallback to the default
12902 * DOM tree building routines.
12903 *
12904 * Returns the resulting document tree
12905 */
12906xmlDocPtr
12907xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12908 int size, int recovery) {
12909 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12910}
12911
12912/**
12913 * xmlParseMemory:
12914 * @buffer: an pointer to a char array
12915 * @size: the size of the array
12916 *
12917 * parse an XML in-memory block and build a tree.
12918 *
12919 * Returns the resulting document tree
12920 */
12921
12922xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12923 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12924}
12925
12926/**
12927 * xmlRecoverMemory:
12928 * @buffer: an pointer to a char array
12929 * @size: the size of the array
12930 *
12931 * parse an XML in-memory block and build a tree.
12932 * In the case the document is not Well Formed, a tree is built anyway
12933 *
12934 * Returns the resulting document tree
12935 */
12936
12937xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12938 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12939}
12940
12941/**
12942 * xmlSAXUserParseMemory:
12943 * @sax: a SAX handler
12944 * @user_data: The user data returned on SAX callbacks
12945 * @buffer: an in-memory XML document input
12946 * @size: the length of the XML document in bytes
12947 *
12948 * A better SAX parsing routine.
12949 * parse an XML in-memory buffer and call the given SAX handler routines.
12950 *
12951 * Returns 0 in case of success or a error number otherwise
12952 */
12953int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12954 const char *buffer, int size) {
12955 int ret = 0;
12956 xmlParserCtxtPtr ctxt;
12957
12958 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12959 if (ctxt == NULL) return -1;
12960 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12961 xmlFree(ctxt->sax);
12962 ctxt->sax = sax;
12963 xmlDetectSAX2(ctxt);
12964
12965 if (user_data != NULL)
12966 ctxt->userData = user_data;
12967
12968 xmlParseDocument(ctxt);
12969
12970 if (ctxt->wellFormed)
12971 ret = 0;
12972 else {
12973 if (ctxt->errNo != 0)
12974 ret = ctxt->errNo;
12975 else
12976 ret = -1;
12977 }
12978 if (sax != NULL)
12979 ctxt->sax = NULL;
12980 if (ctxt->myDoc != NULL) {
12981 xmlFreeDoc(ctxt->myDoc);
12982 ctxt->myDoc = NULL;
12983 }
12984 xmlFreeParserCtxt(ctxt);
12985
12986 return ret;
12987}
12988#endif /* LIBXML_SAX1_ENABLED */
12989
12990/**
12991 * xmlCreateDocParserCtxt:
12992 * @cur: a pointer to an array of xmlChar
12993 *
12994 * Creates a parser context for an XML in-memory document.
12995 *
12996 * Returns the new parser context or NULL
12997 */
12998xmlParserCtxtPtr
12999xmlCreateDocParserCtxt(const xmlChar *cur) {
13000 int len;
13001
13002 if (cur == NULL)
13003 return(NULL);
13004 len = xmlStrlen(cur);
13005 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13006}
13007
13008#ifdef LIBXML_SAX1_ENABLED
13009/**
13010 * xmlSAXParseDoc:
13011 * @sax: the SAX handler block
13012 * @cur: a pointer to an array of xmlChar
13013 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13014 * documents
13015 *
13016 * parse an XML in-memory document and build a tree.
13017 * It use the given SAX function block to handle the parsing callback.
13018 * If sax is NULL, fallback to the default DOM tree building routines.
13019 *
13020 * Returns the resulting document tree
13021 */
13022
13023xmlDocPtr
13024xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13025 xmlDocPtr ret;
13026 xmlParserCtxtPtr ctxt;
13027 xmlSAXHandlerPtr oldsax = NULL;
13028
13029 if (cur == NULL) return(NULL);
13030
13031
13032 ctxt = xmlCreateDocParserCtxt(cur);
13033 if (ctxt == NULL) return(NULL);
13034 if (sax != NULL) {
13035 oldsax = ctxt->sax;
13036 ctxt->sax = sax;
13037 ctxt->userData = NULL;
13038 }
13039 xmlDetectSAX2(ctxt);
13040
13041 xmlParseDocument(ctxt);
13042 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13043 else {
13044 ret = NULL;
13045 xmlFreeDoc(ctxt->myDoc);
13046 ctxt->myDoc = NULL;
13047 }
13048 if (sax != NULL)
13049 ctxt->sax = oldsax;
13050 xmlFreeParserCtxt(ctxt);
13051
13052 return(ret);
13053}
13054
13055/**
13056 * xmlParseDoc:
13057 * @cur: a pointer to an array of xmlChar
13058 *
13059 * parse an XML in-memory document and build a tree.
13060 *
13061 * Returns the resulting document tree
13062 */
13063
13064xmlDocPtr
13065xmlParseDoc(const xmlChar *cur) {
13066 return(xmlSAXParseDoc(NULL, cur, 0));
13067}
13068#endif /* LIBXML_SAX1_ENABLED */
13069
13070#ifdef LIBXML_LEGACY_ENABLED
13071/************************************************************************
13072 * *
13073 * Specific function to keep track of entities references *
13074 * and used by the XSLT debugger *
13075 * *
13076 ************************************************************************/
13077
13078static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13079
13080/**
13081 * xmlAddEntityReference:
13082 * @ent : A valid entity
13083 * @firstNode : A valid first node for children of entity
13084 * @lastNode : A valid last node of children entity
13085 *
13086 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13087 */
13088static void
13089xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13090 xmlNodePtr lastNode)
13091{
13092 if (xmlEntityRefFunc != NULL) {
13093 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13094 }
13095}
13096
13097
13098/**
13099 * xmlSetEntityReferenceFunc:
13100 * @func: A valid function
13101 *
13102 * Set the function to call call back when a xml reference has been made
13103 */
13104void
13105xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13106{
13107 xmlEntityRefFunc = func;
13108}
13109#endif /* LIBXML_LEGACY_ENABLED */
13110
13111/************************************************************************
13112 * *
13113 * Miscellaneous *
13114 * *
13115 ************************************************************************/
13116
13117#ifdef LIBXML_XPATH_ENABLED
13118#include <libxml/xpath.h>
13119#endif
13120
13121extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
13122static int xmlParserInitialized = 0;
13123
13124/**
13125 * xmlInitParser:
13126 *
13127 * Initialization function for the XML parser.
13128 * This is not reentrant. Call once before processing in case of
13129 * use in multithreaded programs.
13130 */
13131
13132void
13133xmlInitParser(void) {
13134 if (xmlParserInitialized != 0)
13135 return;
13136
13137#ifdef LIBXML_THREAD_ENABLED
13138 __xmlGlobalInitMutexLock();
13139 if (xmlParserInitialized == 0) {
13140#endif
13141 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13142 (xmlGenericError == NULL))
13143 initGenericErrorDefaultFunc(NULL);
13144 xmlInitGlobals();
13145 xmlInitThreads();
13146 xmlInitMemory();
13147 xmlInitCharEncodingHandlers();
13148 xmlDefaultSAXHandlerInit();
13149 xmlRegisterDefaultInputCallbacks();
13150#ifdef LIBXML_OUTPUT_ENABLED
13151 xmlRegisterDefaultOutputCallbacks();
13152#endif /* LIBXML_OUTPUT_ENABLED */
13153#ifdef LIBXML_HTML_ENABLED
13154 htmlInitAutoClose();
13155 htmlDefaultSAXHandlerInit();
13156#endif
13157#ifdef LIBXML_XPATH_ENABLED
13158 xmlXPathInit();
13159#endif
13160 xmlParserInitialized = 1;
13161#ifdef LIBXML_THREAD_ENABLED
13162 }
13163 __xmlGlobalInitMutexUnlock();
13164#endif
13165}
13166
13167/**
13168 * xmlCleanupParser:
13169 *
13170 * Cleanup function for the XML library. It tries to reclaim all
13171 * parsing related global memory allocated for the library processing.
13172 * It doesn't deallocate any document related memory. Calling this
13173 * function should not prevent reusing the library but one should
13174 * call xmlCleanupParser() only when the process has
13175 * finished using the library or XML document built with it.
13176 */
13177
13178void
13179xmlCleanupParser(void) {
13180 if (!xmlParserInitialized)
13181 return;
13182
13183 xmlCleanupCharEncodingHandlers();
13184#ifdef LIBXML_CATALOG_ENABLED
13185 xmlCatalogCleanup();
13186#endif
13187 xmlDictCleanup();
13188 xmlCleanupInputCallbacks();
13189#ifdef LIBXML_OUTPUT_ENABLED
13190 xmlCleanupOutputCallbacks();
13191#endif
13192#ifdef LIBXML_SCHEMAS_ENABLED
13193 xmlSchemaCleanupTypes();
13194 xmlRelaxNGCleanupTypes();
13195#endif
13196 xmlCleanupGlobals();
13197 xmlResetLastError();
13198 xmlCleanupThreads(); /* must be last if called not from the main thread */
13199 xmlCleanupMemory();
13200 xmlParserInitialized = 0;
13201}
13202
13203/************************************************************************
13204 * *
13205 * New set (2.6.0) of simpler and more flexible APIs *
13206 * *
13207 ************************************************************************/
13208
13209/**
13210 * DICT_FREE:
13211 * @str: a string
13212 *
13213 * Free a string if it is not owned by the "dict" dictionnary in the
13214 * current scope
13215 */
13216#define DICT_FREE(str) \
13217 if ((str) && ((!dict) || \
13218 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13219 xmlFree((char *)(str));
13220
13221/**
13222 * xmlCtxtReset:
13223 * @ctxt: an XML parser context
13224 *
13225 * Reset a parser context
13226 */
13227void
13228xmlCtxtReset(xmlParserCtxtPtr ctxt)
13229{
13230 xmlParserInputPtr input;
13231 xmlDictPtr dict;
13232
13233 if (ctxt == NULL)
13234 return;
13235
13236 dict = ctxt->dict;
13237
13238 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13239 xmlFreeInputStream(input);
13240 }
13241 ctxt->inputNr = 0;
13242 ctxt->input = NULL;
13243
13244 ctxt->spaceNr = 0;
13245 if (ctxt->spaceTab != NULL) {
13246 ctxt->spaceTab[0] = -1;
13247 ctxt->space = &ctxt->spaceTab[0];
13248 } else {
13249 ctxt->space = NULL;
13250 }
13251
13252
13253 ctxt->nodeNr = 0;
13254 ctxt->node = NULL;
13255
13256 ctxt->nameNr = 0;
13257 ctxt->name = NULL;
13258
13259 DICT_FREE(ctxt->version);
13260 ctxt->version = NULL;
13261 DICT_FREE(ctxt->encoding);
13262 ctxt->encoding = NULL;
13263 DICT_FREE(ctxt->directory);
13264 ctxt->directory = NULL;
13265 DICT_FREE(ctxt->extSubURI);
13266 ctxt->extSubURI = NULL;
13267 DICT_FREE(ctxt->extSubSystem);
13268 ctxt->extSubSystem = NULL;
13269 if (ctxt->myDoc != NULL)
13270 xmlFreeDoc(ctxt->myDoc);
13271 ctxt->myDoc = NULL;
13272
13273 ctxt->standalone = -1;
13274 ctxt->hasExternalSubset = 0;
13275 ctxt->hasPErefs = 0;
13276 ctxt->html = 0;
13277 ctxt->external = 0;
13278 ctxt->instate = XML_PARSER_START;
13279 ctxt->token = 0;
13280
13281 ctxt->wellFormed = 1;
13282 ctxt->nsWellFormed = 1;
13283 ctxt->disableSAX = 0;
13284 ctxt->valid = 1;
13285#if 0
13286 ctxt->vctxt.userData = ctxt;
13287 ctxt->vctxt.error = xmlParserValidityError;
13288 ctxt->vctxt.warning = xmlParserValidityWarning;
13289#endif
13290 ctxt->record_info = 0;
13291 ctxt->nbChars = 0;
13292 ctxt->checkIndex = 0;
13293 ctxt->inSubset = 0;
13294 ctxt->errNo = XML_ERR_OK;
13295 ctxt->depth = 0;
13296 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13297 ctxt->catalogs = NULL;
13298 ctxt->nbentities = 0;
13299 ctxt->sizeentities = 0;
13300 xmlInitNodeInfoSeq(&ctxt->node_seq);
13301
13302 if (ctxt->attsDefault != NULL) {
13303 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13304 ctxt->attsDefault = NULL;
13305 }
13306 if (ctxt->attsSpecial != NULL) {
13307 xmlHashFree(ctxt->attsSpecial, NULL);
13308 ctxt->attsSpecial = NULL;
13309 }
13310
13311#ifdef LIBXML_CATALOG_ENABLED
13312 if (ctxt->catalogs != NULL)
13313 xmlCatalogFreeLocal(ctxt->catalogs);
13314#endif
13315 if (ctxt->lastError.code != XML_ERR_OK)
13316 xmlResetError(&ctxt->lastError);
13317}
13318
13319/**
13320 * xmlCtxtResetPush:
13321 * @ctxt: an XML parser context
13322 * @chunk: a pointer to an array of chars
13323 * @size: number of chars in the array
13324 * @filename: an optional file name or URI
13325 * @encoding: the document encoding, or NULL
13326 *
13327 * Reset a push parser context
13328 *
13329 * Returns 0 in case of success and 1 in case of error
13330 */
13331int
13332xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13333 int size, const char *filename, const char *encoding)
13334{
13335 xmlParserInputPtr inputStream;
13336 xmlParserInputBufferPtr buf;
13337 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13338
13339 if (ctxt == NULL)
13340 return(1);
13341
13342 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13343 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13344
13345 buf = xmlAllocParserInputBuffer(enc);
13346 if (buf == NULL)
13347 return(1);
13348
13349 if (ctxt == NULL) {
13350 xmlFreeParserInputBuffer(buf);
13351 return(1);
13352 }
13353
13354 xmlCtxtReset(ctxt);
13355
13356 if (ctxt->pushTab == NULL) {
13357 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13358 sizeof(xmlChar *));
13359 if (ctxt->pushTab == NULL) {
13360 xmlErrMemory(ctxt, NULL);
13361 xmlFreeParserInputBuffer(buf);
13362 return(1);
13363 }
13364 }
13365
13366 if (filename == NULL) {
13367 ctxt->directory = NULL;
13368 } else {
13369 ctxt->directory = xmlParserGetDirectory(filename);
13370 }
13371
13372 inputStream = xmlNewInputStream(ctxt);
13373 if (inputStream == NULL) {
13374 xmlFreeParserInputBuffer(buf);
13375 return(1);
13376 }
13377
13378 if (filename == NULL)
13379 inputStream->filename = NULL;
13380 else
13381 inputStream->filename = (char *)
13382 xmlCanonicPath((const xmlChar *) filename);
13383 inputStream->buf = buf;
13384 inputStream->base = inputStream->buf->buffer->content;
13385 inputStream->cur = inputStream->buf->buffer->content;
13386 inputStream->end =
13387 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13388
13389 inputPush(ctxt, inputStream);
13390
13391 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13392 (ctxt->input->buf != NULL)) {
13393 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13394 int cur = ctxt->input->cur - ctxt->input->base;
13395
13396 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13397
13398 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13399 ctxt->input->cur = ctxt->input->base + cur;
13400 ctxt->input->end =
13401 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13402 use];
13403#ifdef DEBUG_PUSH
13404 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13405#endif
13406 }
13407
13408 if (encoding != NULL) {
13409 xmlCharEncodingHandlerPtr hdlr;
13410
13411 hdlr = xmlFindCharEncodingHandler(encoding);
13412 if (hdlr != NULL) {
13413 xmlSwitchToEncoding(ctxt, hdlr);
13414 } else {
13415 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13416 "Unsupported encoding %s\n", BAD_CAST encoding);
13417 }
13418 } else if (enc != XML_CHAR_ENCODING_NONE) {
13419 xmlSwitchEncoding(ctxt, enc);
13420 }
13421
13422 return(0);
13423}
13424
13425/**
13426 * xmlCtxtUseOptions:
13427 * @ctxt: an XML parser context
13428 * @options: a combination of xmlParserOption
13429 *
13430 * Applies the options to the parser context
13431 *
13432 * Returns 0 in case of success, the set of unknown or unimplemented options
13433 * in case of error.
13434 */
13435int
13436xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13437{
13438 if (ctxt == NULL)
13439 return(-1);
13440 if (options & XML_PARSE_RECOVER) {
13441 ctxt->recovery = 1;
13442 options -= XML_PARSE_RECOVER;
13443 } else
13444 ctxt->recovery = 0;
13445 if (options & XML_PARSE_DTDLOAD) {
13446 ctxt->loadsubset = XML_DETECT_IDS;
13447 options -= XML_PARSE_DTDLOAD;
13448 } else
13449 ctxt->loadsubset = 0;
13450 if (options & XML_PARSE_DTDATTR) {
13451 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13452 options -= XML_PARSE_DTDATTR;
13453 }
13454 if (options & XML_PARSE_NOENT) {
13455 ctxt->replaceEntities = 1;
13456 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13457 options -= XML_PARSE_NOENT;
13458 } else
13459 ctxt->replaceEntities = 0;
13460 if (options & XML_PARSE_PEDANTIC) {
13461 ctxt->pedantic = 1;
13462 options -= XML_PARSE_PEDANTIC;
13463 } else
13464 ctxt->pedantic = 0;
13465 if (options & XML_PARSE_NOBLANKS) {
13466 ctxt->keepBlanks = 0;
13467 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13468 options -= XML_PARSE_NOBLANKS;
13469 } else
13470 ctxt->keepBlanks = 1;
13471 if (options & XML_PARSE_DTDVALID) {
13472 ctxt->validate = 1;
13473 if (options & XML_PARSE_NOWARNING)
13474 ctxt->vctxt.warning = NULL;
13475 if (options & XML_PARSE_NOERROR)
13476 ctxt->vctxt.error = NULL;
13477 options -= XML_PARSE_DTDVALID;
13478 } else
13479 ctxt->validate = 0;
13480 if (options & XML_PARSE_NOWARNING) {
13481 ctxt->sax->warning = NULL;
13482 options -= XML_PARSE_NOWARNING;
13483 }
13484 if (options & XML_PARSE_NOERROR) {
13485 ctxt->sax->error = NULL;
13486 ctxt->sax->fatalError = NULL;
13487 options -= XML_PARSE_NOERROR;
13488 }
13489#ifdef LIBXML_SAX1_ENABLED
13490 if (options & XML_PARSE_SAX1) {
13491 ctxt->sax->startElement = xmlSAX2StartElement;
13492 ctxt->sax->endElement = xmlSAX2EndElement;
13493 ctxt->sax->startElementNs = NULL;
13494 ctxt->sax->endElementNs = NULL;
13495 ctxt->sax->initialized = 1;
13496 options -= XML_PARSE_SAX1;
13497 }
13498#endif /* LIBXML_SAX1_ENABLED */
13499 if (options & XML_PARSE_NODICT) {
13500 ctxt->dictNames = 0;
13501 options -= XML_PARSE_NODICT;
13502 } else {
13503 ctxt->dictNames = 1;
13504 }
13505 if (options & XML_PARSE_NOCDATA) {
13506 ctxt->sax->cdataBlock = NULL;
13507 options -= XML_PARSE_NOCDATA;
13508 }
13509 if (options & XML_PARSE_NSCLEAN) {
13510 ctxt->options |= XML_PARSE_NSCLEAN;
13511 options -= XML_PARSE_NSCLEAN;
13512 }
13513 if (options & XML_PARSE_NONET) {
13514 ctxt->options |= XML_PARSE_NONET;
13515 options -= XML_PARSE_NONET;
13516 }
13517 if (options & XML_PARSE_COMPACT) {
13518 ctxt->options |= XML_PARSE_COMPACT;
13519 options -= XML_PARSE_COMPACT;
13520 }
13521 ctxt->linenumbers = 1;
13522 return (options);
13523}
13524
13525/**
13526 * xmlDoRead:
13527 * @ctxt: an XML parser context
13528 * @URL: the base URL to use for the document
13529 * @encoding: the document encoding, or NULL
13530 * @options: a combination of xmlParserOption
13531 * @reuse: keep the context for reuse
13532 *
13533 * Common front-end for the xmlRead functions
13534 *
13535 * Returns the resulting document tree or NULL
13536 */
13537static xmlDocPtr
13538xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13539 int options, int reuse)
13540{
13541 xmlDocPtr ret;
13542
13543 xmlCtxtUseOptions(ctxt, options);
13544 if (encoding != NULL) {
13545 xmlCharEncodingHandlerPtr hdlr;
13546
13547 hdlr = xmlFindCharEncodingHandler(encoding);
13548 if (hdlr != NULL)
13549 xmlSwitchToEncoding(ctxt, hdlr);
13550 }
13551 if ((URL != NULL) && (ctxt->input != NULL) &&
13552 (ctxt->input->filename == NULL))
13553 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13554 xmlParseDocument(ctxt);
13555 if ((ctxt->wellFormed) || ctxt->recovery)
13556 ret = ctxt->myDoc;
13557 else {
13558 ret = NULL;
13559 if (ctxt->myDoc != NULL) {
13560 xmlFreeDoc(ctxt->myDoc);
13561 }
13562 }
13563 ctxt->myDoc = NULL;
13564 if (!reuse) {
13565 xmlFreeParserCtxt(ctxt);
13566 }
13567
13568 return (ret);
13569}
13570
13571/**
13572 * xmlReadDoc:
13573 * @cur: a pointer to a zero terminated string
13574 * @URL: the base URL to use for the document
13575 * @encoding: the document encoding, or NULL
13576 * @options: a combination of xmlParserOption
13577 *
13578 * parse an XML in-memory document and build a tree.
13579 *
13580 * Returns the resulting document tree
13581 */
13582xmlDocPtr
13583xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13584{
13585 xmlParserCtxtPtr ctxt;
13586
13587 if (cur == NULL)
13588 return (NULL);
13589
13590 ctxt = xmlCreateDocParserCtxt(cur);
13591 if (ctxt == NULL)
13592 return (NULL);
13593 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13594}
13595
13596/**
13597 * xmlReadFile:
13598 * @filename: a file or URL
13599 * @encoding: the document encoding, or NULL
13600 * @options: a combination of xmlParserOption
13601 *
13602 * parse an XML file from the filesystem or the network.
13603 *
13604 * Returns the resulting document tree
13605 */
13606xmlDocPtr
13607xmlReadFile(const char *filename, const char *encoding, int options)
13608{
13609 xmlParserCtxtPtr ctxt;
13610
13611 ctxt = xmlCreateURLParserCtxt(filename, options);
13612 if (ctxt == NULL)
13613 return (NULL);
13614 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13615}
13616
13617/**
13618 * xmlReadMemory:
13619 * @buffer: a pointer to a char array
13620 * @size: the size of the array
13621 * @URL: the base URL to use for the document
13622 * @encoding: the document encoding, or NULL
13623 * @options: a combination of xmlParserOption
13624 *
13625 * parse an XML in-memory document and build a tree.
13626 *
13627 * Returns the resulting document tree
13628 */
13629xmlDocPtr
13630xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13631{
13632 xmlParserCtxtPtr ctxt;
13633
13634 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13635 if (ctxt == NULL)
13636 return (NULL);
13637 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13638}
13639
13640/**
13641 * xmlReadFd:
13642 * @fd: an open file descriptor
13643 * @URL: the base URL to use for the document
13644 * @encoding: the document encoding, or NULL
13645 * @options: a combination of xmlParserOption
13646 *
13647 * parse an XML from a file descriptor and build a tree.
13648 * NOTE that the file descriptor will not be closed when the
13649 * reader is closed or reset.
13650 *
13651 * Returns the resulting document tree
13652 */
13653xmlDocPtr
13654xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13655{
13656 xmlParserCtxtPtr ctxt;
13657 xmlParserInputBufferPtr input;
13658 xmlParserInputPtr stream;
13659
13660 if (fd < 0)
13661 return (NULL);
13662
13663 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13664 if (input == NULL)
13665 return (NULL);
13666 input->closecallback = NULL;
13667 ctxt = xmlNewParserCtxt();
13668 if (ctxt == NULL) {
13669 xmlFreeParserInputBuffer(input);
13670 return (NULL);
13671 }
13672 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13673 if (stream == NULL) {
13674 xmlFreeParserInputBuffer(input);
13675 xmlFreeParserCtxt(ctxt);
13676 return (NULL);
13677 }
13678 inputPush(ctxt, stream);
13679 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13680}
13681
13682/**
13683 * xmlReadIO:
13684 * @ioread: an I/O read function
13685 * @ioclose: an I/O close function
13686 * @ioctx: an I/O handler
13687 * @URL: the base URL to use for the document
13688 * @encoding: the document encoding, or NULL
13689 * @options: a combination of xmlParserOption
13690 *
13691 * parse an XML document from I/O functions and source and build a tree.
13692 *
13693 * Returns the resulting document tree
13694 */
13695xmlDocPtr
13696xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13697 void *ioctx, const char *URL, const char *encoding, int options)
13698{
13699 xmlParserCtxtPtr ctxt;
13700 xmlParserInputBufferPtr input;
13701 xmlParserInputPtr stream;
13702
13703 if (ioread == NULL)
13704 return (NULL);
13705
13706 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13707 XML_CHAR_ENCODING_NONE);
13708 if (input == NULL)
13709 return (NULL);
13710 ctxt = xmlNewParserCtxt();
13711 if (ctxt == NULL) {
13712 xmlFreeParserInputBuffer(input);
13713 return (NULL);
13714 }
13715 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13716 if (stream == NULL) {
13717 xmlFreeParserInputBuffer(input);
13718 xmlFreeParserCtxt(ctxt);
13719 return (NULL);
13720 }
13721 inputPush(ctxt, stream);
13722 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13723}
13724
13725/**
13726 * xmlCtxtReadDoc:
13727 * @ctxt: an XML parser context
13728 * @cur: a pointer to a zero terminated string
13729 * @URL: the base URL to use for the document
13730 * @encoding: the document encoding, or NULL
13731 * @options: a combination of xmlParserOption
13732 *
13733 * parse an XML in-memory document and build a tree.
13734 * This reuses the existing @ctxt parser context
13735 *
13736 * Returns the resulting document tree
13737 */
13738xmlDocPtr
13739xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13740 const char *URL, const char *encoding, int options)
13741{
13742 xmlParserInputPtr stream;
13743
13744 if (cur == NULL)
13745 return (NULL);
13746 if (ctxt == NULL)
13747 return (NULL);
13748
13749 xmlCtxtReset(ctxt);
13750
13751 stream = xmlNewStringInputStream(ctxt, cur);
13752 if (stream == NULL) {
13753 return (NULL);
13754 }
13755 inputPush(ctxt, stream);
13756 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13757}
13758
13759/**
13760 * xmlCtxtReadFile:
13761 * @ctxt: an XML parser context
13762 * @filename: a file or URL
13763 * @encoding: the document encoding, or NULL
13764 * @options: a combination of xmlParserOption
13765 *
13766 * parse an XML file from the filesystem or the network.
13767 * This reuses the existing @ctxt parser context
13768 *
13769 * Returns the resulting document tree
13770 */
13771xmlDocPtr
13772xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13773 const char *encoding, int options)
13774{
13775 xmlParserInputPtr stream;
13776
13777 if (filename == NULL)
13778 return (NULL);
13779 if (ctxt == NULL)
13780 return (NULL);
13781
13782 xmlCtxtReset(ctxt);
13783
13784 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13785 if (stream == NULL) {
13786 return (NULL);
13787 }
13788 inputPush(ctxt, stream);
13789 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13790}
13791
13792/**
13793 * xmlCtxtReadMemory:
13794 * @ctxt: an XML parser context
13795 * @buffer: a pointer to a char array
13796 * @size: the size of the array
13797 * @URL: the base URL to use for the document
13798 * @encoding: the document encoding, or NULL
13799 * @options: a combination of xmlParserOption
13800 *
13801 * parse an XML in-memory document and build a tree.
13802 * This reuses the existing @ctxt parser context
13803 *
13804 * Returns the resulting document tree
13805 */
13806xmlDocPtr
13807xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13808 const char *URL, const char *encoding, int options)
13809{
13810 xmlParserInputBufferPtr input;
13811 xmlParserInputPtr stream;
13812
13813 if (ctxt == NULL)
13814 return (NULL);
13815 if (buffer == NULL)
13816 return (NULL);
13817
13818 xmlCtxtReset(ctxt);
13819
13820 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13821 if (input == NULL) {
13822 return(NULL);
13823 }
13824
13825 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13826 if (stream == NULL) {
13827 xmlFreeParserInputBuffer(input);
13828 return(NULL);
13829 }
13830
13831 inputPush(ctxt, stream);
13832 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13833}
13834
13835/**
13836 * xmlCtxtReadFd:
13837 * @ctxt: an XML parser context
13838 * @fd: an open file descriptor
13839 * @URL: the base URL to use for the document
13840 * @encoding: the document encoding, or NULL
13841 * @options: a combination of xmlParserOption
13842 *
13843 * parse an XML from a file descriptor and build a tree.
13844 * This reuses the existing @ctxt parser context
13845 * NOTE that the file descriptor will not be closed when the
13846 * reader is closed or reset.
13847 *
13848 * Returns the resulting document tree
13849 */
13850xmlDocPtr
13851xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13852 const char *URL, const char *encoding, int options)
13853{
13854 xmlParserInputBufferPtr input;
13855 xmlParserInputPtr stream;
13856
13857 if (fd < 0)
13858 return (NULL);
13859 if (ctxt == NULL)
13860 return (NULL);
13861
13862 xmlCtxtReset(ctxt);
13863
13864
13865 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13866 if (input == NULL)
13867 return (NULL);
13868 input->closecallback = NULL;
13869 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13870 if (stream == NULL) {
13871 xmlFreeParserInputBuffer(input);
13872 return (NULL);
13873 }
13874 inputPush(ctxt, stream);
13875 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13876}
13877
13878/**
13879 * xmlCtxtReadIO:
13880 * @ctxt: an XML parser context
13881 * @ioread: an I/O read function
13882 * @ioclose: an I/O close function
13883 * @ioctx: an I/O handler
13884 * @URL: the base URL to use for the document
13885 * @encoding: the document encoding, or NULL
13886 * @options: a combination of xmlParserOption
13887 *
13888 * parse an XML document from I/O functions and source and build a tree.
13889 * This reuses the existing @ctxt parser context
13890 *
13891 * Returns the resulting document tree
13892 */
13893xmlDocPtr
13894xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13895 xmlInputCloseCallback ioclose, void *ioctx,
13896 const char *URL,
13897 const char *encoding, int options)
13898{
13899 xmlParserInputBufferPtr input;
13900 xmlParserInputPtr stream;
13901
13902 if (ioread == NULL)
13903 return (NULL);
13904 if (ctxt == NULL)
13905 return (NULL);
13906
13907 xmlCtxtReset(ctxt);
13908
13909 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13910 XML_CHAR_ENCODING_NONE);
13911 if (input == NULL)
13912 return (NULL);
13913 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13914 if (stream == NULL) {
13915 xmlFreeParserInputBuffer(input);
13916 return (NULL);
13917 }
13918 inputPush(ctxt, stream);
13919 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13920}
13921
13922#define bottom_parser
13923#include "elfgcchack.h"
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette