VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.30/parser.c@ 18639

最後變更 在這個檔案從18639是 16778,由 vboxsync 提交於 16 年 前

libxml2: some fixes from upstream

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Date Revision Author Id
檔案大小: 366.7 KB
 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <string.h>
44#include <stdarg.h>
45#include <libxml/xmlmemory.h>
46#include <libxml/threads.h>
47#include <libxml/globals.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
60#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
83/**
84 * xmlParserMaxDepth:
85 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
90unsigned int xmlParserMaxDepth = 1024;
91
92#define SAX2 1
93
94#define XML_PARSER_BIG_BUFFER_SIZE 300
95#define XML_PARSER_BUFFER_SIZE 100
96
97#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103static const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108
109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113static xmlParserErrors
114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
116 void *user_data, int depth, const xmlChar *URL,
117 const xmlChar *ID, xmlNodePtr *list);
118
119#ifdef LIBXML_LEGACY_ENABLED
120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
123#endif /* LIBXML_LEGACY_ENABLED */
124
125static xmlParserErrors
126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
128
129/************************************************************************
130 * *
131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
152 if (prefix == NULL)
153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
157 else
158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
180{
181 const char *errmsg;
182
183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
190 case XML_ERR_INVALID_DEC_CHARREF:
191 errmsg = "CharRef: invalid decimal value\n";
192 break;
193 case XML_ERR_INVALID_CHARREF:
194 errmsg = "CharRef: invalid value\n";
195 break;
196 case XML_ERR_INTERNAL_ERROR:
197 errmsg = "internal error";
198 break;
199 case XML_ERR_PEREF_AT_EOF:
200 errmsg = "PEReference at end of document\n";
201 break;
202 case XML_ERR_PEREF_IN_PROLOG:
203 errmsg = "PEReference in prolog\n";
204 break;
205 case XML_ERR_PEREF_IN_EPILOG:
206 errmsg = "PEReference in epilog\n";
207 break;
208 case XML_ERR_PEREF_NO_NAME:
209 errmsg = "PEReference: no name\n";
210 break;
211 case XML_ERR_PEREF_SEMICOL_MISSING:
212 errmsg = "PEReference: expecting ';'\n";
213 break;
214 case XML_ERR_ENTITY_LOOP:
215 errmsg = "Detected an entity reference loop\n";
216 break;
217 case XML_ERR_ENTITY_NOT_STARTED:
218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
220 case XML_ERR_ENTITY_PE_INTERNAL:
221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
223 case XML_ERR_ENTITY_NOT_FINISHED:
224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
227 errmsg = "AttValue: \" or ' expected\n";
228 break;
229 case XML_ERR_LT_IN_ATTRIBUTE:
230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
232 case XML_ERR_LITERAL_NOT_STARTED:
233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
235 case XML_ERR_LITERAL_NOT_FINISHED:
236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
238 case XML_ERR_MISPLACED_CDATA_END:
239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
241 case XML_ERR_URI_REQUIRED:
242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
244 case XML_ERR_PUBID_REQUIRED:
245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
247 case XML_ERR_HYPHEN_IN_COMMENT:
248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
250 case XML_ERR_PI_NOT_STARTED:
251 errmsg = "xmlParsePI : no target name\n";
252 break;
253 case XML_ERR_RESERVED_XML_NAME:
254 errmsg = "Invalid PI name\n";
255 break;
256 case XML_ERR_NOTATION_NOT_STARTED:
257 errmsg = "NOTATION: Name expected here\n";
258 break;
259 case XML_ERR_NOTATION_NOT_FINISHED:
260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
262 case XML_ERR_VALUE_REQUIRED:
263 errmsg = "Entity value required\n";
264 break;
265 case XML_ERR_URI_FRAGMENT:
266 errmsg = "Fragment not allowed";
267 break;
268 case XML_ERR_ATTLIST_NOT_STARTED:
269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
271 case XML_ERR_NMTOKEN_REQUIRED:
272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
274 case XML_ERR_ATTLIST_NOT_FINISHED:
275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
277 case XML_ERR_MIXED_NOT_STARTED:
278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
280 case XML_ERR_PCDATA_REQUIRED:
281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
289 case XML_ERR_PEREF_IN_INT_SUBSET:
290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
293 case XML_ERR_GT_REQUIRED:
294 errmsg = "expected '>'\n";
295 break;
296 case XML_ERR_CONDSEC_INVALID:
297 errmsg = "XML conditional section '[' expected\n";
298 break;
299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
306 case XML_ERR_CONDSEC_NOT_FINISHED:
307 errmsg = "XML conditional section not closed\n";
308 break;
309 case XML_ERR_XMLDECL_NOT_STARTED:
310 errmsg = "Text declaration '<?xml' required\n";
311 break;
312 case XML_ERR_XMLDECL_NOT_FINISHED:
313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
315 case XML_ERR_EXT_ENTITY_STANDALONE:
316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
319 errmsg = "EntityRef: expecting ';'\n";
320 break;
321 case XML_ERR_DOCTYPE_NOT_FINISHED:
322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
324 case XML_ERR_LTSLASH_REQUIRED:
325 errmsg = "EndTag: '</' not found\n";
326 break;
327 case XML_ERR_EQUAL_REQUIRED:
328 errmsg = "expected '='\n";
329 break;
330 case XML_ERR_STRING_NOT_CLOSED:
331 errmsg = "String not closed expecting \" or '\n";
332 break;
333 case XML_ERR_STRING_NOT_STARTED:
334 errmsg = "String not started expecting ' or \"\n";
335 break;
336 case XML_ERR_ENCODING_NAME:
337 errmsg = "Invalid XML encoding name\n";
338 break;
339 case XML_ERR_STANDALONE_VALUE:
340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
342 case XML_ERR_DOCUMENT_EMPTY:
343 errmsg = "Document is empty\n";
344 break;
345 case XML_ERR_DOCUMENT_END:
346 errmsg = "Extra content at the end of the document\n";
347 break;
348 case XML_ERR_NOT_WELL_BALANCED:
349 errmsg = "chunk is not well balanced\n";
350 break;
351 case XML_ERR_EXTRA_CONTENT:
352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
354 case XML_ERR_VERSION_MISSING:
355 errmsg = "Malformed declaration expecting version\n";
356 break;
357#if 0
358 case:
359 errmsg = "\n";
360 break;
361#endif
362 default:
363 errmsg = "Unregistered error message\n";
364 }
365 if (ctxt != NULL)
366 ctxt->errNo = error;
367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
375}
376
377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
388{
389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
392 if (ctxt != NULL)
393 ctxt->errNo = error;
394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
401}
402
403/**
404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
417 xmlStructuredErrorFunc schannel = NULL;
418
419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
441 * Handle a validity error.
442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
447 xmlStructuredErrorFunc schannel = NULL;
448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
457 __xmlRaiseError(schannel,
458 ctxt->vctxt.error, ctxt->vctxt.userData,
459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
466}
467
468/**
469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
479 const char *msg, int val)
480{
481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
484 if (ctxt != NULL)
485 ctxt->errNo = error;
486 __xmlRaiseError(NULL, NULL, NULL,
487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
494}
495
496/**
497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
515 if (ctxt != NULL)
516 ctxt->errNo = error;
517 __xmlRaiseError(NULL, NULL, NULL,
518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
526}
527
528/**
529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg, const xmlChar * val)
540{
541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
544 if (ctxt != NULL)
545 ctxt->errNo = error;
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
555}
556
557/**
558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
573 if (ctxt != NULL)
574 ctxt->errNo = error;
575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
596{
597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
600 if (ctxt != NULL)
601 ctxt->errNo = error;
602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
608}
609
610/************************************************************************
611 * *
612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
630 case XML_WITH_THREAD:
631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
636 case XML_WITH_TREE:
637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
642 case XML_WITH_OUTPUT:
643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
648 case XML_WITH_PUSH:
649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
654 case XML_WITH_READER:
655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
660 case XML_WITH_PATTERN:
661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
666 case XML_WITH_WRITER:
667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
672 case XML_WITH_SAX1:
673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
678 case XML_WITH_FTP:
679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
684 case XML_WITH_HTTP:
685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
690 case XML_WITH_VALID:
691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
696 case XML_WITH_HTML:
697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
702 case XML_WITH_LEGACY:
703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
708 case XML_WITH_C14N:
709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
714 case XML_WITH_CATALOG:
715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
720 case XML_WITH_XPATH:
721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
726 case XML_WITH_XPTR:
727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
732 case XML_WITH_XINCLUDE:
733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
738 case XML_WITH_ICONV:
739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
744 case XML_WITH_ISO8859X:
745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
750 case XML_WITH_UNICODE:
751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
756 case XML_WITH_REGEXP:
757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
762 case XML_WITH_AUTOMATA:
763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
768 case XML_WITH_EXPR:
769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
774 case XML_WITH_SCHEMAS:
775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
780 case XML_WITH_SCHEMATRON:
781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
786 case XML_WITH_MODULES:
787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
792 case XML_WITH_DEBUG:
793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
798 case XML_WITH_DEBUG_MEM:
799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
804 case XML_WITH_DEBUG_RUN:
805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
837#ifdef LIBXML_SAX1_ENABLED
838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
852}
853
854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
906 (4 * 4) * sizeof(const xmlChar *));
907 if (defaults == NULL)
908 goto mem_error;
909 defaults->nbAttrs = 0;
910 defaults->maxAttrs = 4;
911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
917 if (temp == NULL)
918 goto mem_error;
919 defaults = temp;
920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
925 * Split the element name into prefix:localname , the string found
926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
949 xmlErrMemory(ctxt, NULL);
950 return;
951}
952
953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
976 return;
977
978mem_error:
979 xmlErrMemory(ctxt, NULL);
980 return;
981}
982
983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
1055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
1064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
1073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
1075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
1079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
1091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
1097 xmlErrMemory(ctxt, NULL);
1098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
1104 xmlRealloc((char *) ctxt->nsTab,
1105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
1149 int *attallocs;
1150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
1153 maxatts = 55; /* allow for 10 attrs by default */
1154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
1156 if (atts == NULL) goto mem_error;
1157 ctxt->atts = atts;
1158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
1161 ctxt->maxatts = maxatts;
1162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
1164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
1166 if (atts == NULL) goto mem_error;
1167 ctxt->atts = atts;
1168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
1172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
1175mem_error:
1176 xmlErrMemory(ctxt, NULL);
1177 return(-1);
1178}
1179
1180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
1183 * @value: the parser input
1184 *
1185 * Pushes a new parser input on top of the input stack
1186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
1188 */
1189int
1190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
1192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
1194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
1201 xmlErrMemory(ctxt, NULL);
1202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
1209/**
1210 * inputPop:
1211 * @ctxt: an XML parser context
1212 *
1213 * Pops the top parser input from the input stack
1214 *
1215 * Returns the input just removed
1216 */
1217xmlParserInputPtr
1218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
1222 if (ctxt == NULL)
1223 return(NULL);
1224 if (ctxt->inputNr <= 0)
1225 return (NULL);
1226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
1232 ctxt->inputTab[ctxt->inputNr] = NULL;
1233 return (ret);
1234}
1235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
1238 * @value: the element node
1239 *
1240 * Pushes a new element node on top of the node stack
1241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
1243 */
1244int
1245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
1247 if (ctxt == NULL) return(0);
1248 if (ctxt->nodeNr >= ctxt->nodeMax) {
1249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
1253 sizeof(ctxt->nodeTab[0]));
1254 if (tmp == NULL) {
1255 xmlErrMemory(ctxt, NULL);
1256 return (0);
1257 }
1258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
1260 }
1261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
1262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
1265 ctxt->instate = XML_PARSER_EOF;
1266 return(0);
1267 }
1268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
1279 */
1280xmlNodePtr
1281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
1285 if (ctxt == NULL) return(NULL);
1286 if (ctxt->nodeNr <= 0)
1287 return (NULL);
1288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
1294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1295 return (ret);
1296}
1297
1298#ifdef LIBXML_PUSH_ENABLED
1299/**
1300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1340 return (ctxt->nameNr++);
1341mem_error:
1342 xmlErrMemory(ctxt, NULL);
1343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
1359 return (NULL);
1360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
1369#endif /* LIBXML_PUSH_ENABLED */
1370
1371/**
1372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
1378 * Returns -1 in case of error, the index in the stack otherwise
1379 */
1380int
1381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1382{
1383 if (ctxt == NULL) return (-1);
1384
1385 if (ctxt->nameNr >= ctxt->nameMax) {
1386 const xmlChar * *tmp;
1387 ctxt->nameMax *= 2;
1388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
1391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
1394 }
1395 ctxt->nameTab = tmp;
1396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
1400mem_error:
1401 xmlErrMemory(ctxt, NULL);
1402 return (-1);
1403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
1412const xmlChar *
1413namePop(xmlParserCtxtPtr ctxt)
1414{
1415 const xmlChar *ret;
1416
1417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
1419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
1425 ctxt->nameTab[ctxt->nameNr] = NULL;
1426 return (ret);
1427}
1428
1429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
1435 xmlErrMemory(ctxt, NULL);
1436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
1444static int spacePop(xmlParserCtxtPtr ctxt) {
1445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
1451 ctxt->space = &ctxt->spaceTab[0];
1452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
1479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
1492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
1494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
1497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
1515#define SKIP(val) do { \
1516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
1517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1518 if ((*ctxt->input->cur == 0) && \
1519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
1523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
1538#define SHRINK if ((ctxt->progressive == 0) && \
1539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
1548 }
1549
1550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
1552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
1559}
1560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
1565#define NEXT1 { \
1566 ctxt->input->col++; \
1567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
1569 if (*ctxt->input->cur == 0) \
1570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
1573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
1577 ctxt->input->cur += l; \
1578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
1586 else i += xmlCopyCharMultiByte(&b[i],v)
1587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1600 int res = 0;
1601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
1606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
1608 /*
1609 * if we are in the document content, go really fast
1610 */
1611 cur = ctxt->input->cur;
1612 while (IS_BLANK_CH(*cur)) {
1613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
1629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
1630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
1645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
1665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
1666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
1670 if ((*ctxt->input->cur == 0) &&
1671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1717 unsigned int val = 0;
1718 int count = 0;
1719 unsigned int outofrange = 0;
1720
1721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
1724 if ((RAW == '&') && (NXT(1) == '#') &&
1725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
1729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
1734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
1740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1741 val = 0;
1742 break;
1743 }
1744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
1747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1752 ctxt->input->col++;
1753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
1756 } else if ((RAW == '&') && (NXT(1) == '#')) {
1757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
1760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
1765 val = val * 10 + (CUR - '0');
1766 else {
1767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1768 val = 0;
1769 break;
1770 }
1771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
1774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1779 ctxt->input->col++;
1780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
1784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
1792 if ((IS_CHAR(val) && (outofrange == 0))) {
1793 return(val);
1794 } else {
1795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
1798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
1820static int
1821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
1824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
1826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
1841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
1842 val = 0;
1843 break;
1844 }
1845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
1848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
1860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
1861 val = 0;
1862 break;
1863 }
1864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
1867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
1873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
1874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
1883 if ((IS_CHAR(val) && (outofrange == 0))) {
1884 return(val);
1885 } else {
1886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
1889 }
1890 return(0);
1891}
1892
1893/**
1894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
1906static xmlParserInputPtr
1907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
1912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
1914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
1924 buffer = xmlMallocAtomic(length);
1925 if (buffer == NULL) {
1926 xmlErrMemory(ctxt, NULL);
1927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
1944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
1973 * - Included as Parameter Entity reference within DTDs
1974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1977 const xmlChar *name;
1978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
1981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
1992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
1993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
1997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
1998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
2004 case XML_PARSER_PUBLIC_LITERAL:
2005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
2008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
2029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2030 return;
2031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
2040 "PEReference: %s\n", name);
2041 if (name == NULL) {
2042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
2061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2062 "PEReference: %%%s; not found\n", name);
2063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
2071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
2079 ctxt->valid = 0;
2080 }
2081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
2084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
2090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2093 * this is done independently.
2094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
2097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
2102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
2106 */
2107 GROW
2108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
2117 }
2118
2119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
2122 xmlParseTextDecl(ctxt);
2123 }
2124 } else {
2125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
2128 }
2129 }
2130 } else {
2131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2132 }
2133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
2140 xmlChar *tmp; \
2141 buffer##_size *= 2; \
2142 tmp = (xmlChar *) \
2143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
2144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
2146}
2147
2148/**
2149 * xmlStringLenDecodeEntities:
2150 * @ctxt: the parser context
2151 * @str: the input string
2152 * @len: the string length
2153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
2158 * Takes a entity string content and process to do the adequate substitutions.
2159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
2168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
2174 const xmlChar *last;
2175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
2179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2180 return(NULL);
2181 last = str + len;
2182
2183 if ((ctxt->depth > 40) || (ctxt->nbentities >= 500000)) {
2184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2193 if (buffer == NULL) goto mem_error;
2194
2195 /*
2196 * OK loop until we reach one of the ending char or a size limit.
2197 * we are operating on already parsed values.
2198 */
2199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
2203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
2212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2222 goto int_error;
2223 ctxt->nbentities++;
2224 if (ent != NULL)
2225 ctxt->nbentities += ent->checked;
2226 if ((ent != NULL) &&
2227 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2228 if (ent->content != NULL) {
2229 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2230 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2231 growBuffer(buffer);
2232 }
2233 } else {
2234 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2235 "predefined entity has no content\n");
2236 }
2237 } else if ((ent != NULL) && (ent->content != NULL)) {
2238 xmlChar *rep;
2239
2240 ctxt->depth++;
2241 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2242 0, 0, 0);
2243 ctxt->depth--;
2244 if (rep != NULL) {
2245 current = rep;
2246 while (*current != 0) { /* non input consuming loop */
2247 buffer[nbchars++] = *current++;
2248 if (nbchars >
2249 buffer_size - XML_PARSER_BUFFER_SIZE) {
2250 growBuffer(buffer);
2251 }
2252 }
2253 xmlFree(rep);
2254 }
2255 } else if (ent != NULL) {
2256 int i = xmlStrlen(ent->name);
2257 const xmlChar *cur = ent->name;
2258
2259 buffer[nbchars++] = '&';
2260 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2261 growBuffer(buffer);
2262 }
2263 for (;i > 0;i--)
2264 buffer[nbchars++] = *cur++;
2265 buffer[nbchars++] = ';';
2266 }
2267 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2268 if (xmlParserDebugEntities)
2269 xmlGenericError(xmlGenericErrorContext,
2270 "String decoding PE Reference: %.30s\n", str);
2271 ent = xmlParseStringPEReference(ctxt, &str);
2272 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2273 goto int_error;
2274 ctxt->nbentities++;
2275 if (ent != NULL)
2276 ctxt->nbentities += ent->checked;
2277 if (ent != NULL) {
2278 xmlChar *rep;
2279
2280 ctxt->depth++;
2281 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2282 0, 0, 0);
2283 ctxt->depth--;
2284 if (rep != NULL) {
2285 current = rep;
2286 while (*current != 0) { /* non input consuming loop */
2287 buffer[nbchars++] = *current++;
2288 if (nbchars >
2289 buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
2293 xmlFree(rep);
2294 }
2295 }
2296 } else {
2297 COPY_BUF(l,buffer,nbchars,c);
2298 str += l;
2299 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2300 growBuffer(buffer);
2301 }
2302 }
2303 if (str < last)
2304 c = CUR_SCHAR(str, l);
2305 else
2306 c = 0;
2307 }
2308 buffer[nbchars++] = 0;
2309 return(buffer);
2310
2311mem_error:
2312 xmlErrMemory(ctxt, NULL);
2313int_error:
2314 if (buffer != NULL)
2315 xmlFree(buffer);
2316 return(NULL);
2317}
2318
2319/**
2320 * xmlStringDecodeEntities:
2321 * @ctxt: the parser context
2322 * @str: the input string
2323 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2324 * @end: an end marker xmlChar, 0 if none
2325 * @end2: an end marker xmlChar, 0 if none
2326 * @end3: an end marker xmlChar, 0 if none
2327 *
2328 * Takes a entity string content and process to do the adequate substitutions.
2329 *
2330 * [67] Reference ::= EntityRef | CharRef
2331 *
2332 * [69] PEReference ::= '%' Name ';'
2333 *
2334 * Returns A newly allocated string with the substitution done. The caller
2335 * must deallocate it !
2336 */
2337xmlChar *
2338xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2339 xmlChar end, xmlChar end2, xmlChar end3) {
2340 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2341 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2342 end, end2, end3));
2343}
2344
2345/************************************************************************
2346 * *
2347 * Commodity functions, cleanup needed ? *
2348 * *
2349 ************************************************************************/
2350
2351/**
2352 * areBlanks:
2353 * @ctxt: an XML parser context
2354 * @str: a xmlChar *
2355 * @len: the size of @str
2356 * @blank_chars: we know the chars are blanks
2357 *
2358 * Is this a sequence of blank chars that one can ignore ?
2359 *
2360 * Returns 1 if ignorable 0 otherwise.
2361 */
2362
2363static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2364 int blank_chars) {
2365 int i, ret;
2366 xmlNodePtr lastChild;
2367
2368 /*
2369 * Don't spend time trying to differentiate them, the same callback is
2370 * used !
2371 */
2372 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2373 return(0);
2374
2375 /*
2376 * Check for xml:space value.
2377 */
2378 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2379 (*(ctxt->space) == -2))
2380 return(0);
2381
2382 /*
2383 * Check that the string is made of blanks
2384 */
2385 if (blank_chars == 0) {
2386 for (i = 0;i < len;i++)
2387 if (!(IS_BLANK_CH(str[i]))) return(0);
2388 }
2389
2390 /*
2391 * Look if the element is mixed content in the DTD if available
2392 */
2393 if (ctxt->node == NULL) return(0);
2394 if (ctxt->myDoc != NULL) {
2395 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2396 if (ret == 0) return(1);
2397 if (ret == 1) return(0);
2398 }
2399
2400 /*
2401 * Otherwise, heuristic :-\
2402 */
2403 if ((RAW != '<') && (RAW != 0xD)) return(0);
2404 if ((ctxt->node->children == NULL) &&
2405 (RAW == '<') && (NXT(1) == '/')) return(0);
2406
2407 lastChild = xmlGetLastChild(ctxt->node);
2408 if (lastChild == NULL) {
2409 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2410 (ctxt->node->content != NULL)) return(0);
2411 } else if (xmlNodeIsText(lastChild))
2412 return(0);
2413 else if ((ctxt->node->children != NULL) &&
2414 (xmlNodeIsText(ctxt->node->children)))
2415 return(0);
2416 return(1);
2417}
2418
2419/************************************************************************
2420 * *
2421 * Extra stuff for namespace support *
2422 * Relates to http://www.w3.org/TR/WD-xml-names *
2423 * *
2424 ************************************************************************/
2425
2426/**
2427 * xmlSplitQName:
2428 * @ctxt: an XML parser context
2429 * @name: an XML parser context
2430 * @prefix: a xmlChar **
2431 *
2432 * parse an UTF8 encoded XML qualified name string
2433 *
2434 * [NS 5] QName ::= (Prefix ':')? LocalPart
2435 *
2436 * [NS 6] Prefix ::= NCName
2437 *
2438 * [NS 7] LocalPart ::= NCName
2439 *
2440 * Returns the local part, and prefix is updated
2441 * to get the Prefix if any.
2442 */
2443
2444xmlChar *
2445xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2446 xmlChar buf[XML_MAX_NAMELEN + 5];
2447 xmlChar *buffer = NULL;
2448 int len = 0;
2449 int max = XML_MAX_NAMELEN;
2450 xmlChar *ret = NULL;
2451 const xmlChar *cur = name;
2452 int c;
2453
2454 if (prefix == NULL) return(NULL);
2455 *prefix = NULL;
2456
2457 if (cur == NULL) return(NULL);
2458
2459#ifndef XML_XML_NAMESPACE
2460 /* xml: prefix is not really a namespace */
2461 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2462 (cur[2] == 'l') && (cur[3] == ':'))
2463 return(xmlStrdup(name));
2464#endif
2465
2466 /* nasty but well=formed */
2467 if (cur[0] == ':')
2468 return(xmlStrdup(name));
2469
2470 c = *cur++;
2471 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2472 buf[len++] = c;
2473 c = *cur++;
2474 }
2475 if (len >= max) {
2476 /*
2477 * Okay someone managed to make a huge name, so he's ready to pay
2478 * for the processing speed.
2479 */
2480 max = len * 2;
2481
2482 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2483 if (buffer == NULL) {
2484 xmlErrMemory(ctxt, NULL);
2485 return(NULL);
2486 }
2487 memcpy(buffer, buf, len);
2488 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2489 if (len + 10 > max) {
2490 xmlChar *tmp;
2491
2492 max *= 2;
2493 tmp = (xmlChar *) xmlRealloc(buffer,
2494 max * sizeof(xmlChar));
2495 if (tmp == NULL) {
2496 xmlFree(tmp);
2497 xmlErrMemory(ctxt, NULL);
2498 return(NULL);
2499 }
2500 buffer = tmp;
2501 }
2502 buffer[len++] = c;
2503 c = *cur++;
2504 }
2505 buffer[len] = 0;
2506 }
2507
2508 if ((c == ':') && (*cur == 0)) {
2509 if (buffer != NULL)
2510 xmlFree(buffer);
2511 *prefix = NULL;
2512 return(xmlStrdup(name));
2513 }
2514
2515 if (buffer == NULL)
2516 ret = xmlStrndup(buf, len);
2517 else {
2518 ret = buffer;
2519 buffer = NULL;
2520 max = XML_MAX_NAMELEN;
2521 }
2522
2523
2524 if (c == ':') {
2525 c = *cur;
2526 *prefix = ret;
2527 if (c == 0) {
2528 return(xmlStrndup(BAD_CAST "", 0));
2529 }
2530 len = 0;
2531
2532 /*
2533 * Check that the first character is proper to start
2534 * a new name
2535 */
2536 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2537 ((c >= 0x41) && (c <= 0x5A)) ||
2538 (c == '_') || (c == ':'))) {
2539 int l;
2540 int first = CUR_SCHAR(cur, l);
2541
2542 if (!IS_LETTER(first) && (first != '_')) {
2543 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2544 "Name %s is not XML Namespace compliant\n",
2545 name);
2546 }
2547 }
2548 cur++;
2549
2550 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2551 buf[len++] = c;
2552 c = *cur++;
2553 }
2554 if (len >= max) {
2555 /*
2556 * Okay someone managed to make a huge name, so he's ready to pay
2557 * for the processing speed.
2558 */
2559 max = len * 2;
2560
2561 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2562 if (buffer == NULL) {
2563 xmlErrMemory(ctxt, NULL);
2564 return(NULL);
2565 }
2566 memcpy(buffer, buf, len);
2567 while (c != 0) { /* tested bigname2.xml */
2568 if (len + 10 > max) {
2569 xmlChar *tmp;
2570
2571 max *= 2;
2572 tmp = (xmlChar *) xmlRealloc(buffer,
2573 max * sizeof(xmlChar));
2574 if (tmp == NULL) {
2575 xmlErrMemory(ctxt, NULL);
2576 xmlFree(buffer);
2577 return(NULL);
2578 }
2579 buffer = tmp;
2580 }
2581 buffer[len++] = c;
2582 c = *cur++;
2583 }
2584 buffer[len] = 0;
2585 }
2586
2587 if (buffer == NULL)
2588 ret = xmlStrndup(buf, len);
2589 else {
2590 ret = buffer;
2591 }
2592 }
2593
2594 return(ret);
2595}
2596
2597/************************************************************************
2598 * *
2599 * The parser itself *
2600 * Relates to http://www.w3.org/TR/REC-xml *
2601 * *
2602 ************************************************************************/
2603
2604static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
2605static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2606 int *len, int *alloc, int normalize);
2607
2608/**
2609 * xmlParseName:
2610 * @ctxt: an XML parser context
2611 *
2612 * parse an XML name.
2613 *
2614 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2615 * CombiningChar | Extender
2616 *
2617 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2618 *
2619 * [6] Names ::= Name (#x20 Name)*
2620 *
2621 * Returns the Name parsed or NULL
2622 */
2623
2624const xmlChar *
2625xmlParseName(xmlParserCtxtPtr ctxt) {
2626 const xmlChar *in;
2627 const xmlChar *ret;
2628 int count = 0;
2629
2630 GROW;
2631
2632 /*
2633 * Accelerator for simple ASCII names
2634 */
2635 in = ctxt->input->cur;
2636 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2637 ((*in >= 0x41) && (*in <= 0x5A)) ||
2638 (*in == '_') || (*in == ':')) {
2639 in++;
2640 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2641 ((*in >= 0x41) && (*in <= 0x5A)) ||
2642 ((*in >= 0x30) && (*in <= 0x39)) ||
2643 (*in == '_') || (*in == '-') ||
2644 (*in == ':') || (*in == '.'))
2645 in++;
2646 if ((*in > 0) && (*in < 0x80)) {
2647 count = in - ctxt->input->cur;
2648 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2649 ctxt->input->cur = in;
2650 ctxt->nbChars += count;
2651 ctxt->input->col += count;
2652 if (ret == NULL)
2653 xmlErrMemory(ctxt, NULL);
2654 return(ret);
2655 }
2656 }
2657 return(xmlParseNameComplex(ctxt));
2658}
2659
2660/**
2661 * xmlParseNameAndCompare:
2662 * @ctxt: an XML parser context
2663 *
2664 * parse an XML name and compares for match
2665 * (specialized for endtag parsing)
2666 *
2667 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2668 * and the name for mismatch
2669 */
2670
2671static const xmlChar *
2672xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2673 register const xmlChar *cmp = other;
2674 register const xmlChar *in;
2675 const xmlChar *ret;
2676
2677 GROW;
2678
2679 in = ctxt->input->cur;
2680 while (*in != 0 && *in == *cmp) {
2681 ++in;
2682 ++cmp;
2683 ctxt->input->col++;
2684 }
2685 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
2686 /* success */
2687 ctxt->input->cur = in;
2688 return (const xmlChar*) 1;
2689 }
2690 /* failure (or end of input buffer), check with full function */
2691 ret = xmlParseName (ctxt);
2692 /* strings coming from the dictionnary direct compare possible */
2693 if (ret == other) {
2694 return (const xmlChar*) 1;
2695 }
2696 return ret;
2697}
2698
2699static const xmlChar *
2700xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2701 int len = 0, l;
2702 int c;
2703 int count = 0;
2704
2705 /*
2706 * Handler for more complex cases
2707 */
2708 GROW;
2709 c = CUR_CHAR(l);
2710 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2711 (!IS_LETTER(c) && (c != '_') &&
2712 (c != ':'))) {
2713 return(NULL);
2714 }
2715
2716 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2717 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2718 (c == '.') || (c == '-') ||
2719 (c == '_') || (c == ':') ||
2720 (IS_COMBINING(c)) ||
2721 (IS_EXTENDER(c)))) {
2722 if (count++ > 100) {
2723 count = 0;
2724 GROW;
2725 }
2726 len += l;
2727 NEXTL(l);
2728 c = CUR_CHAR(l);
2729 }
2730 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2731 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2732 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2733}
2734
2735/**
2736 * xmlParseStringName:
2737 * @ctxt: an XML parser context
2738 * @str: a pointer to the string pointer (IN/OUT)
2739 *
2740 * parse an XML name.
2741 *
2742 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2743 * CombiningChar | Extender
2744 *
2745 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2746 *
2747 * [6] Names ::= Name (#x20 Name)*
2748 *
2749 * Returns the Name parsed or NULL. The @str pointer
2750 * is updated to the current location in the string.
2751 */
2752
2753static xmlChar *
2754xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2755 xmlChar buf[XML_MAX_NAMELEN + 5];
2756 const xmlChar *cur = *str;
2757 int len = 0, l;
2758 int c;
2759
2760 c = CUR_SCHAR(cur, l);
2761 if (!IS_LETTER(c) && (c != '_') &&
2762 (c != ':')) {
2763 return(NULL);
2764 }
2765
2766 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
2767 (c == '.') || (c == '-') ||
2768 (c == '_') || (c == ':') ||
2769 (IS_COMBINING(c)) ||
2770 (IS_EXTENDER(c))) {
2771 COPY_BUF(l,buf,len,c);
2772 cur += l;
2773 c = CUR_SCHAR(cur, l);
2774 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2775 /*
2776 * Okay someone managed to make a huge name, so he's ready to pay
2777 * for the processing speed.
2778 */
2779 xmlChar *buffer;
2780 int max = len * 2;
2781
2782 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2783 if (buffer == NULL) {
2784 xmlErrMemory(ctxt, NULL);
2785 return(NULL);
2786 }
2787 memcpy(buffer, buf, len);
2788 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2789 /* test bigentname.xml */
2790 (c == '.') || (c == '-') ||
2791 (c == '_') || (c == ':') ||
2792 (IS_COMBINING(c)) ||
2793 (IS_EXTENDER(c))) {
2794 if (len + 10 > max) {
2795 xmlChar *tmp;
2796 max *= 2;
2797 tmp = (xmlChar *) xmlRealloc(buffer,
2798 max * sizeof(xmlChar));
2799 if (tmp == NULL) {
2800 xmlErrMemory(ctxt, NULL);
2801 xmlFree(buffer);
2802 return(NULL);
2803 }
2804 buffer = tmp;
2805 }
2806 COPY_BUF(l,buffer,len,c);
2807 cur += l;
2808 c = CUR_SCHAR(cur, l);
2809 }
2810 buffer[len] = 0;
2811 *str = cur;
2812 return(buffer);
2813 }
2814 }
2815 *str = cur;
2816 return(xmlStrndup(buf, len));
2817}
2818
2819/**
2820 * xmlParseNmtoken:
2821 * @ctxt: an XML parser context
2822 *
2823 * parse an XML Nmtoken.
2824 *
2825 * [7] Nmtoken ::= (NameChar)+
2826 *
2827 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
2828 *
2829 * Returns the Nmtoken parsed or NULL
2830 */
2831
2832xmlChar *
2833xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2834 xmlChar buf[XML_MAX_NAMELEN + 5];
2835 int len = 0, l;
2836 int c;
2837 int count = 0;
2838
2839 GROW;
2840 c = CUR_CHAR(l);
2841
2842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2843 (c == '.') || (c == '-') ||
2844 (c == '_') || (c == ':') ||
2845 (IS_COMBINING(c)) ||
2846 (IS_EXTENDER(c))) {
2847 if (count++ > 100) {
2848 count = 0;
2849 GROW;
2850 }
2851 COPY_BUF(l,buf,len,c);
2852 NEXTL(l);
2853 c = CUR_CHAR(l);
2854 if (len >= XML_MAX_NAMELEN) {
2855 /*
2856 * Okay someone managed to make a huge token, so he's ready to pay
2857 * for the processing speed.
2858 */
2859 xmlChar *buffer;
2860 int max = len * 2;
2861
2862 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2863 if (buffer == NULL) {
2864 xmlErrMemory(ctxt, NULL);
2865 return(NULL);
2866 }
2867 memcpy(buffer, buf, len);
2868 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
2869 (c == '.') || (c == '-') ||
2870 (c == '_') || (c == ':') ||
2871 (IS_COMBINING(c)) ||
2872 (IS_EXTENDER(c))) {
2873 if (count++ > 100) {
2874 count = 0;
2875 GROW;
2876 }
2877 if (len + 10 > max) {
2878 xmlChar *tmp;
2879
2880 max *= 2;
2881 tmp = (xmlChar *) xmlRealloc(buffer,
2882 max * sizeof(xmlChar));
2883 if (tmp == NULL) {
2884 xmlErrMemory(ctxt, NULL);
2885 xmlFree(buffer);
2886 return(NULL);
2887 }
2888 buffer = tmp;
2889 }
2890 COPY_BUF(l,buffer,len,c);
2891 NEXTL(l);
2892 c = CUR_CHAR(l);
2893 }
2894 buffer[len] = 0;
2895 return(buffer);
2896 }
2897 }
2898 if (len == 0)
2899 return(NULL);
2900 return(xmlStrndup(buf, len));
2901}
2902
2903/**
2904 * xmlParseEntityValue:
2905 * @ctxt: an XML parser context
2906 * @orig: if non-NULL store a copy of the original entity value
2907 *
2908 * parse a value for ENTITY declarations
2909 *
2910 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2911 * "'" ([^%&'] | PEReference | Reference)* "'"
2912 *
2913 * Returns the EntityValue parsed with reference substituted or NULL
2914 */
2915
2916xmlChar *
2917xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2918 xmlChar *buf = NULL;
2919 int len = 0;
2920 int size = XML_PARSER_BUFFER_SIZE;
2921 int c, l;
2922 xmlChar stop;
2923 xmlChar *ret = NULL;
2924 const xmlChar *cur = NULL;
2925 xmlParserInputPtr input;
2926
2927 if (RAW == '"') stop = '"';
2928 else if (RAW == '\'') stop = '\'';
2929 else {
2930 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
2931 return(NULL);
2932 }
2933 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
2934 if (buf == NULL) {
2935 xmlErrMemory(ctxt, NULL);
2936 return(NULL);
2937 }
2938
2939 /*
2940 * The content of the entity definition is copied in a buffer.
2941 */
2942
2943 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2944 input = ctxt->input;
2945 GROW;
2946 NEXT;
2947 c = CUR_CHAR(l);
2948 /*
2949 * NOTE: 4.4.5 Included in Literal
2950 * When a parameter entity reference appears in a literal entity
2951 * value, ... a single or double quote character in the replacement
2952 * text is always treated as a normal data character and will not
2953 * terminate the literal.
2954 * In practice it means we stop the loop only when back at parsing
2955 * the initial entity and the quote is found
2956 */
2957 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2958 (ctxt->input != input))) {
2959 if (len + 5 >= size) {
2960 xmlChar *tmp;
2961
2962 size *= 2;
2963 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2964 if (tmp == NULL) {
2965 xmlErrMemory(ctxt, NULL);
2966 xmlFree(buf);
2967 return(NULL);
2968 }
2969 buf = tmp;
2970 }
2971 COPY_BUF(l,buf,len,c);
2972 NEXTL(l);
2973 /*
2974 * Pop-up of finished entities.
2975 */
2976 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2977 xmlPopInput(ctxt);
2978
2979 GROW;
2980 c = CUR_CHAR(l);
2981 if (c == 0) {
2982 GROW;
2983 c = CUR_CHAR(l);
2984 }
2985 }
2986 buf[len] = 0;
2987
2988 /*
2989 * Raise problem w.r.t. '&' and '%' being used in non-entities
2990 * reference constructs. Note Charref will be handled in
2991 * xmlStringDecodeEntities()
2992 */
2993 cur = buf;
2994 while (*cur != 0) { /* non input consuming */
2995 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2996 xmlChar *name;
2997 xmlChar tmp = *cur;
2998
2999 cur++;
3000 name = xmlParseStringName(ctxt, &cur);
3001 if ((name == NULL) || (*cur != ';')) {
3002 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3003 "EntityValue: '%c' forbidden except for entities references\n",
3004 tmp);
3005 }
3006 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3007 (ctxt->inputNr == 1)) {
3008 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3009 }
3010 if (name != NULL)
3011 xmlFree(name);
3012 if (*cur == 0)
3013 break;
3014 }
3015 cur++;
3016 }
3017
3018 /*
3019 * Then PEReference entities are substituted.
3020 */
3021 if (c != stop) {
3022 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3023 xmlFree(buf);
3024 } else {
3025 NEXT;
3026 /*
3027 * NOTE: 4.4.7 Bypassed
3028 * When a general entity reference appears in the EntityValue in
3029 * an entity declaration, it is bypassed and left as is.
3030 * so XML_SUBSTITUTE_REF is not set here.
3031 */
3032 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3033 0, 0, 0);
3034 if (orig != NULL)
3035 *orig = buf;
3036 else
3037 xmlFree(buf);
3038 }
3039
3040 return(ret);
3041}
3042
3043/**
3044 * xmlParseAttValueComplex:
3045 * @ctxt: an XML parser context
3046 * @len: the resulting attribute len
3047 * @normalize: wether to apply the inner normalization
3048 *
3049 * parse a value for an attribute, this is the fallback function
3050 * of xmlParseAttValue() when the attribute parsing requires handling
3051 * of non-ASCII characters, or normalization compaction.
3052 *
3053 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3054 */
3055static xmlChar *
3056xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3057 xmlChar limit = 0;
3058 xmlChar *buf = NULL;
3059 int len = 0;
3060 int buf_size = 0;
3061 int c, l, in_space = 0;
3062 xmlChar *current = NULL;
3063 xmlEntityPtr ent;
3064
3065 if (NXT(0) == '"') {
3066 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3067 limit = '"';
3068 NEXT;
3069 } else if (NXT(0) == '\'') {
3070 limit = '\'';
3071 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3072 NEXT;
3073 } else {
3074 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3075 return(NULL);
3076 }
3077
3078 /*
3079 * allocate a translation buffer.
3080 */
3081 buf_size = XML_PARSER_BUFFER_SIZE;
3082 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
3083 if (buf == NULL) goto mem_error;
3084
3085 /*
3086 * OK loop until we reach one of the ending char or a size limit.
3087 */
3088 c = CUR_CHAR(l);
3089 while ((NXT(0) != limit) && /* checked */
3090 (IS_CHAR(c)) && (c != '<')) {
3091 if (c == 0) break;
3092 if (c == '&') {
3093 in_space = 0;
3094 if (NXT(1) == '#') {
3095 int val = xmlParseCharRef(ctxt);
3096
3097 if (val == '&') {
3098 if (ctxt->replaceEntities) {
3099 if (len > buf_size - 10) {
3100 growBuffer(buf);
3101 }
3102 buf[len++] = '&';
3103 } else {
3104 /*
3105 * The reparsing will be done in xmlStringGetNodeList()
3106 * called by the attribute() function in SAX.c
3107 */
3108 if (len > buf_size - 10) {
3109 growBuffer(buf);
3110 }
3111 buf[len++] = '&';
3112 buf[len++] = '#';
3113 buf[len++] = '3';
3114 buf[len++] = '8';
3115 buf[len++] = ';';
3116 }
3117 } else {
3118 if (len > buf_size - 10) {
3119 growBuffer(buf);
3120 }
3121 len += xmlCopyChar(0, &buf[len], val);
3122 }
3123 } else {
3124 ent = xmlParseEntityRef(ctxt);
3125 ctxt->nbentities++;
3126 if (ent != NULL)
3127 ctxt->nbentities += ent->checked;
3128 if ((ent != NULL) &&
3129 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3130 if (len > buf_size - 10) {
3131 growBuffer(buf);
3132 }
3133 if ((ctxt->replaceEntities == 0) &&
3134 (ent->content[0] == '&')) {
3135 buf[len++] = '&';
3136 buf[len++] = '#';
3137 buf[len++] = '3';
3138 buf[len++] = '8';
3139 buf[len++] = ';';
3140 } else {
3141 buf[len++] = ent->content[0];
3142 }
3143 } else if ((ent != NULL) &&
3144 (ctxt->replaceEntities != 0)) {
3145 xmlChar *rep;
3146
3147 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3148 rep = xmlStringDecodeEntities(ctxt, ent->content,
3149 XML_SUBSTITUTE_REF,
3150 0, 0, 0);
3151 if (rep != NULL) {
3152 current = rep;
3153 while (*current != 0) { /* non input consuming */
3154 buf[len++] = *current++;
3155 if (len > buf_size - 10) {
3156 growBuffer(buf);
3157 }
3158 }
3159 xmlFree(rep);
3160 }
3161 } else {
3162 if (len > buf_size - 10) {
3163 growBuffer(buf);
3164 }
3165 if (ent->content != NULL)
3166 buf[len++] = ent->content[0];
3167 }
3168 } else if (ent != NULL) {
3169 int i = xmlStrlen(ent->name);
3170 const xmlChar *cur = ent->name;
3171
3172 /*
3173 * This may look absurd but is needed to detect
3174 * entities problems
3175 */
3176 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3177 (ent->content != NULL)) {
3178 xmlChar *rep;
3179 rep = xmlStringDecodeEntities(ctxt, ent->content,
3180 XML_SUBSTITUTE_REF, 0, 0, 0);
3181 if (rep != NULL)
3182 xmlFree(rep);
3183 }
3184
3185 /*
3186 * Just output the reference
3187 */
3188 buf[len++] = '&';
3189 if (len > buf_size - i - 10) {
3190 growBuffer(buf);
3191 }
3192 for (;i > 0;i--)
3193 buf[len++] = *cur++;
3194 buf[len++] = ';';
3195 }
3196 }
3197 } else {
3198 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3199 if ((len != 0) || (!normalize)) {
3200 if ((!normalize) || (!in_space)) {
3201 COPY_BUF(l,buf,len,0x20);
3202 if (len > buf_size - 10) {
3203 growBuffer(buf);
3204 }
3205 }
3206 in_space = 1;
3207 }
3208 } else {
3209 in_space = 0;
3210 COPY_BUF(l,buf,len,c);
3211 if (len > buf_size - 10) {
3212 growBuffer(buf);
3213 }
3214 }
3215 NEXTL(l);
3216 }
3217 GROW;
3218 c = CUR_CHAR(l);
3219 }
3220 if ((in_space) && (normalize)) {
3221 while (buf[len - 1] == 0x20) len--;
3222 }
3223 buf[len] = 0;
3224 if (RAW == '<') {
3225 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3226 } else if (RAW != limit) {
3227 if ((c != 0) && (!IS_CHAR(c))) {
3228 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3229 "invalid character in attribute value\n");
3230 } else {
3231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3232 "AttValue: ' expected\n");
3233 }
3234 } else
3235 NEXT;
3236 if (attlen != NULL) *attlen = len;
3237 return(buf);
3238
3239mem_error:
3240 xmlErrMemory(ctxt, NULL);
3241 return(NULL);
3242}
3243
3244/**
3245 * xmlParseAttValue:
3246 * @ctxt: an XML parser context
3247 *
3248 * parse a value for an attribute
3249 * Note: the parser won't do substitution of entities here, this
3250 * will be handled later in xmlStringGetNodeList
3251 *
3252 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3253 * "'" ([^<&'] | Reference)* "'"
3254 *
3255 * 3.3.3 Attribute-Value Normalization:
3256 * Before the value of an attribute is passed to the application or
3257 * checked for validity, the XML processor must normalize it as follows:
3258 * - a character reference is processed by appending the referenced
3259 * character to the attribute value
3260 * - an entity reference is processed by recursively processing the
3261 * replacement text of the entity
3262 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3263 * appending #x20 to the normalized value, except that only a single
3264 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3265 * parsed entity or the literal entity value of an internal parsed entity
3266 * - other characters are processed by appending them to the normalized value
3267 * If the declared value is not CDATA, then the XML processor must further
3268 * process the normalized attribute value by discarding any leading and
3269 * trailing space (#x20) characters, and by replacing sequences of space
3270 * (#x20) characters by a single space (#x20) character.
3271 * All attributes for which no declaration has been read should be treated
3272 * by a non-validating parser as if declared CDATA.
3273 *
3274 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3275 */
3276
3277
3278xmlChar *
3279xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3280 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3281 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3282}
3283
3284/**
3285 * xmlParseSystemLiteral:
3286 * @ctxt: an XML parser context
3287 *
3288 * parse an XML Literal
3289 *
3290 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3291 *
3292 * Returns the SystemLiteral parsed or NULL
3293 */
3294
3295xmlChar *
3296xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3297 xmlChar *buf = NULL;
3298 int len = 0;
3299 int size = XML_PARSER_BUFFER_SIZE;
3300 int cur, l;
3301 xmlChar stop;
3302 int state = ctxt->instate;
3303 int count = 0;
3304
3305 SHRINK;
3306 if (RAW == '"') {
3307 NEXT;
3308 stop = '"';
3309 } else if (RAW == '\'') {
3310 NEXT;
3311 stop = '\'';
3312 } else {
3313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3314 return(NULL);
3315 }
3316
3317 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3318 if (buf == NULL) {
3319 xmlErrMemory(ctxt, NULL);
3320 return(NULL);
3321 }
3322 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3323 cur = CUR_CHAR(l);
3324 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
3325 if (len + 5 >= size) {
3326 xmlChar *tmp;
3327
3328 size *= 2;
3329 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3330 if (tmp == NULL) {
3331 xmlFree(buf);
3332 xmlErrMemory(ctxt, NULL);
3333 ctxt->instate = (xmlParserInputState) state;
3334 return(NULL);
3335 }
3336 buf = tmp;
3337 }
3338 count++;
3339 if (count > 50) {
3340 GROW;
3341 count = 0;
3342 }
3343 COPY_BUF(l,buf,len,cur);
3344 NEXTL(l);
3345 cur = CUR_CHAR(l);
3346 if (cur == 0) {
3347 GROW;
3348 SHRINK;
3349 cur = CUR_CHAR(l);
3350 }
3351 }
3352 buf[len] = 0;
3353 ctxt->instate = (xmlParserInputState) state;
3354 if (!IS_CHAR(cur)) {
3355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3356 } else {
3357 NEXT;
3358 }
3359 return(buf);
3360}
3361
3362/**
3363 * xmlParsePubidLiteral:
3364 * @ctxt: an XML parser context
3365 *
3366 * parse an XML public literal
3367 *
3368 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3369 *
3370 * Returns the PubidLiteral parsed or NULL.
3371 */
3372
3373xmlChar *
3374xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3375 xmlChar *buf = NULL;
3376 int len = 0;
3377 int size = XML_PARSER_BUFFER_SIZE;
3378 xmlChar cur;
3379 xmlChar stop;
3380 int count = 0;
3381 xmlParserInputState oldstate = ctxt->instate;
3382
3383 SHRINK;
3384 if (RAW == '"') {
3385 NEXT;
3386 stop = '"';
3387 } else if (RAW == '\'') {
3388 NEXT;
3389 stop = '\'';
3390 } else {
3391 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3392 return(NULL);
3393 }
3394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3395 if (buf == NULL) {
3396 xmlErrMemory(ctxt, NULL);
3397 return(NULL);
3398 }
3399 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
3400 cur = CUR;
3401 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
3402 if (len + 1 >= size) {
3403 xmlChar *tmp;
3404
3405 size *= 2;
3406 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3407 if (tmp == NULL) {
3408 xmlErrMemory(ctxt, NULL);
3409 xmlFree(buf);
3410 return(NULL);
3411 }
3412 buf = tmp;
3413 }
3414 buf[len++] = cur;
3415 count++;
3416 if (count > 50) {
3417 GROW;
3418 count = 0;
3419 }
3420 NEXT;
3421 cur = CUR;
3422 if (cur == 0) {
3423 GROW;
3424 SHRINK;
3425 cur = CUR;
3426 }
3427 }
3428 buf[len] = 0;
3429 if (cur != stop) {
3430 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
3431 } else {
3432 NEXT;
3433 }
3434 ctxt->instate = oldstate;
3435 return(buf);
3436}
3437
3438void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
3439
3440/*
3441 * used for the test in the inner loop of the char data testing
3442 */
3443static const unsigned char test_char_data[256] = {
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3449 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3450 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3451 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3452 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3453 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3454 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3455 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3456 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3457 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3458 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3459 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3476};
3477
3478/**
3479 * xmlParseCharData:
3480 * @ctxt: an XML parser context
3481 * @cdata: int indicating whether we are within a CDATA section
3482 *
3483 * parse a CharData section.
3484 * if we are within a CDATA section ']]>' marks an end of section.
3485 *
3486 * The right angle bracket (>) may be represented using the string "&gt;",
3487 * and must, for compatibility, be escaped using "&gt;" or a character
3488 * reference when it appears in the string "]]>" in content, when that
3489 * string is not marking the end of a CDATA section.
3490 *
3491 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3492 */
3493
3494void
3495xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
3496 const xmlChar *in;
3497 int nbchar = 0;
3498 int line = ctxt->input->line;
3499 int col = ctxt->input->col;
3500 int ccol;
3501
3502 SHRINK;
3503 GROW;
3504 /*
3505 * Accelerated common case where input don't need to be
3506 * modified before passing it to the handler.
3507 */
3508 if (!cdata) {
3509 in = ctxt->input->cur;
3510 do {
3511get_more_space:
3512 while (*in == 0x20) in++;
3513 if (*in == 0xA) {
3514 do {
3515 ctxt->input->line++; ctxt->input->col = 1;
3516 in++;
3517 } while (*in == 0xA);
3518 goto get_more_space;
3519 }
3520 if (*in == '<') {
3521 nbchar = in - ctxt->input->cur;
3522 if (nbchar > 0) {
3523 const xmlChar *tmp = ctxt->input->cur;
3524 ctxt->input->cur = in;
3525
3526 if ((ctxt->sax != NULL) &&
3527 (ctxt->sax->ignorableWhitespace !=
3528 ctxt->sax->characters)) {
3529 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3530 if (ctxt->sax->ignorableWhitespace != NULL)
3531 ctxt->sax->ignorableWhitespace(ctxt->userData,
3532 tmp, nbchar);
3533 } else {
3534 if (ctxt->sax->characters != NULL)
3535 ctxt->sax->characters(ctxt->userData,
3536 tmp, nbchar);
3537 if (*ctxt->space == -1)
3538 *ctxt->space = -2;
3539 }
3540 } else if ((ctxt->sax != NULL) &&
3541 (ctxt->sax->characters != NULL)) {
3542 ctxt->sax->characters(ctxt->userData,
3543 tmp, nbchar);
3544 }
3545 }
3546 return;
3547 }
3548
3549get_more:
3550 ccol = ctxt->input->col;
3551 while (test_char_data[*in]) {
3552 in++;
3553 ccol++;
3554 }
3555 ctxt->input->col = ccol;
3556 if (*in == 0xA) {
3557 do {
3558 ctxt->input->line++; ctxt->input->col = 1;
3559 in++;
3560 } while (*in == 0xA);
3561 goto get_more;
3562 }
3563 if (*in == ']') {
3564 if ((in[1] == ']') && (in[2] == '>')) {
3565 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3566 ctxt->input->cur = in;
3567 return;
3568 }
3569 in++;
3570 ctxt->input->col++;
3571 goto get_more;
3572 }
3573 nbchar = in - ctxt->input->cur;
3574 if (nbchar > 0) {
3575 if ((ctxt->sax != NULL) &&
3576 (ctxt->sax->ignorableWhitespace !=
3577 ctxt->sax->characters) &&
3578 (IS_BLANK_CH(*ctxt->input->cur))) {
3579 const xmlChar *tmp = ctxt->input->cur;
3580 ctxt->input->cur = in;
3581
3582 if (areBlanks(ctxt, tmp, nbchar, 0)) {
3583 if (ctxt->sax->ignorableWhitespace != NULL)
3584 ctxt->sax->ignorableWhitespace(ctxt->userData,
3585 tmp, nbchar);
3586 } else {
3587 if (ctxt->sax->characters != NULL)
3588 ctxt->sax->characters(ctxt->userData,
3589 tmp, nbchar);
3590 if (*ctxt->space == -1)
3591 *ctxt->space = -2;
3592 }
3593 line = ctxt->input->line;
3594 col = ctxt->input->col;
3595 } else if (ctxt->sax != NULL) {
3596 if (ctxt->sax->characters != NULL)
3597 ctxt->sax->characters(ctxt->userData,
3598 ctxt->input->cur, nbchar);
3599 line = ctxt->input->line;
3600 col = ctxt->input->col;
3601 }
3602 /* something really bad happened in the SAX callback */
3603 if (ctxt->instate != XML_PARSER_CONTENT)
3604 return;
3605 }
3606 ctxt->input->cur = in;
3607 if (*in == 0xD) {
3608 in++;
3609 if (*in == 0xA) {
3610 ctxt->input->cur = in;
3611 in++;
3612 ctxt->input->line++; ctxt->input->col = 1;
3613 continue; /* while */
3614 }
3615 in--;
3616 }
3617 if (*in == '<') {
3618 return;
3619 }
3620 if (*in == '&') {
3621 return;
3622 }
3623 SHRINK;
3624 GROW;
3625 in = ctxt->input->cur;
3626 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3627 nbchar = 0;
3628 }
3629 ctxt->input->line = line;
3630 ctxt->input->col = col;
3631 xmlParseCharDataComplex(ctxt, cdata);
3632}
3633
3634/**
3635 * xmlParseCharDataComplex:
3636 * @ctxt: an XML parser context
3637 * @cdata: int indicating whether we are within a CDATA section
3638 *
3639 * parse a CharData section.this is the fallback function
3640 * of xmlParseCharData() when the parsing requires handling
3641 * of non-ASCII characters.
3642 */
3643void
3644xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
3645 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3646 int nbchar = 0;
3647 int cur, l;
3648 int count = 0;
3649
3650 SHRINK;
3651 GROW;
3652 cur = CUR_CHAR(l);
3653 while ((cur != '<') && /* checked */
3654 (cur != '&') &&
3655 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
3656 if ((cur == ']') && (NXT(1) == ']') &&
3657 (NXT(2) == '>')) {
3658 if (cdata) break;
3659 else {
3660 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
3661 }
3662 }
3663 COPY_BUF(l,buf,nbchar,cur);
3664 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
3665 buf[nbchar] = 0;
3666
3667 /*
3668 * OK the segment is to be consumed as chars.
3669 */
3670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3671 if (areBlanks(ctxt, buf, nbchar, 0)) {
3672 if (ctxt->sax->ignorableWhitespace != NULL)
3673 ctxt->sax->ignorableWhitespace(ctxt->userData,
3674 buf, nbchar);
3675 } else {
3676 if (ctxt->sax->characters != NULL)
3677 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3678 if ((ctxt->sax->characters !=
3679 ctxt->sax->ignorableWhitespace) &&
3680 (*ctxt->space == -1))
3681 *ctxt->space = -2;
3682 }
3683 }
3684 nbchar = 0;
3685 /* something really bad happened in the SAX callback */
3686 if (ctxt->instate != XML_PARSER_CONTENT)
3687 return;
3688 }
3689 count++;
3690 if (count > 50) {
3691 GROW;
3692 count = 0;
3693 }
3694 NEXTL(l);
3695 cur = CUR_CHAR(l);
3696 }
3697 if (nbchar != 0) {
3698 buf[nbchar] = 0;
3699 /*
3700 * OK the segment is to be consumed as chars.
3701 */
3702 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3703 if (areBlanks(ctxt, buf, nbchar, 0)) {
3704 if (ctxt->sax->ignorableWhitespace != NULL)
3705 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3706 } else {
3707 if (ctxt->sax->characters != NULL)
3708 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3709 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3710 (*ctxt->space == -1))
3711 *ctxt->space = -2;
3712 }
3713 }
3714 }
3715 if ((cur != 0) && (!IS_CHAR(cur))) {
3716 /* Generate the error and skip the offending character */
3717 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3718 "PCDATA invalid Char value %d\n",
3719 cur);
3720 NEXTL(l);
3721 }
3722}
3723
3724/**
3725 * xmlParseExternalID:
3726 * @ctxt: an XML parser context
3727 * @publicID: a xmlChar** receiving PubidLiteral
3728 * @strict: indicate whether we should restrict parsing to only
3729 * production [75], see NOTE below
3730 *
3731 * Parse an External ID or a Public ID
3732 *
3733 * NOTE: Productions [75] and [83] interact badly since [75] can generate
3734 * 'PUBLIC' S PubidLiteral S SystemLiteral
3735 *
3736 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3737 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3738 *
3739 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3740 *
3741 * Returns the function returns SystemLiteral and in the second
3742 * case publicID receives PubidLiteral, is strict is off
3743 * it is possible to return NULL and have publicID set.
3744 */
3745
3746xmlChar *
3747xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3748 xmlChar *URI = NULL;
3749
3750 SHRINK;
3751
3752 *publicID = NULL;
3753 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
3754 SKIP(6);
3755 if (!IS_BLANK_CH(CUR)) {
3756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3757 "Space required after 'SYSTEM'\n");
3758 }
3759 SKIP_BLANKS;
3760 URI = xmlParseSystemLiteral(ctxt);
3761 if (URI == NULL) {
3762 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3763 }
3764 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
3765 SKIP(6);
3766 if (!IS_BLANK_CH(CUR)) {
3767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3768 "Space required after 'PUBLIC'\n");
3769 }
3770 SKIP_BLANKS;
3771 *publicID = xmlParsePubidLiteral(ctxt);
3772 if (*publicID == NULL) {
3773 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
3774 }
3775 if (strict) {
3776 /*
3777 * We don't handle [83] so "S SystemLiteral" is required.
3778 */
3779 if (!IS_BLANK_CH(CUR)) {
3780 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3781 "Space required after the Public Identifier\n");
3782 }
3783 } else {
3784 /*
3785 * We handle [83] so we return immediately, if
3786 * "S SystemLiteral" is not detected. From a purely parsing
3787 * point of view that's a nice mess.
3788 */
3789 const xmlChar *ptr;
3790 GROW;
3791
3792 ptr = CUR_PTR;
3793 if (!IS_BLANK_CH(*ptr)) return(NULL);
3794
3795 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3796 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3797 }
3798 SKIP_BLANKS;
3799 URI = xmlParseSystemLiteral(ctxt);
3800 if (URI == NULL) {
3801 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
3802 }
3803 }
3804 return(URI);
3805}
3806
3807/**
3808 * xmlParseCommentComplex:
3809 * @ctxt: an XML parser context
3810 * @buf: the already parsed part of the buffer
3811 * @len: number of bytes filles in the buffer
3812 * @size: allocated size of the buffer
3813 *
3814 * Skip an XML (SGML) comment <!-- .... -->
3815 * The spec says that "For compatibility, the string "--" (double-hyphen)
3816 * must not occur within comments. "
3817 * This is the slow routine in case the accelerator for ascii didn't work
3818 *
3819 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3820 */
3821static void
3822xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
3823 int q, ql;
3824 int r, rl;
3825 int cur, l;
3826 xmlParserInputPtr input = ctxt->input;
3827 int count = 0;
3828
3829 if (buf == NULL) {
3830 len = 0;
3831 size = XML_PARSER_BUFFER_SIZE;
3832 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3833 if (buf == NULL) {
3834 xmlErrMemory(ctxt, NULL);
3835 return;
3836 }
3837 }
3838 GROW; /* Assure there's enough input data */
3839 q = CUR_CHAR(ql);
3840 if (q == 0)
3841 goto not_terminated;
3842 if (!IS_CHAR(q)) {
3843 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3844 "xmlParseComment: invalid xmlChar value %d\n",
3845 q);
3846 xmlFree (buf);
3847 return;
3848 }
3849 NEXTL(ql);
3850 r = CUR_CHAR(rl);
3851 if (r == 0)
3852 goto not_terminated;
3853 if (!IS_CHAR(r)) {
3854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3855 "xmlParseComment: invalid xmlChar value %d\n",
3856 q);
3857 xmlFree (buf);
3858 return;
3859 }
3860 NEXTL(rl);
3861 cur = CUR_CHAR(l);
3862 if (cur == 0)
3863 goto not_terminated;
3864 while (IS_CHAR(cur) && /* checked */
3865 ((cur != '>') ||
3866 (r != '-') || (q != '-'))) {
3867 if ((r == '-') && (q == '-')) {
3868 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
3869 }
3870 if (len + 5 >= size) {
3871 xmlChar *new_buf;
3872 size *= 2;
3873 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3874 if (new_buf == NULL) {
3875 xmlFree (buf);
3876 xmlErrMemory(ctxt, NULL);
3877 return;
3878 }
3879 buf = new_buf;
3880 }
3881 COPY_BUF(ql,buf,len,q);
3882 q = r;
3883 ql = rl;
3884 r = cur;
3885 rl = l;
3886
3887 count++;
3888 if (count > 50) {
3889 GROW;
3890 count = 0;
3891 }
3892 NEXTL(l);
3893 cur = CUR_CHAR(l);
3894 if (cur == 0) {
3895 SHRINK;
3896 GROW;
3897 cur = CUR_CHAR(l);
3898 }
3899 }
3900 buf[len] = 0;
3901 if (cur == 0) {
3902 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3903 "Comment not terminated \n<!--%.50s\n", buf);
3904 } else if (!IS_CHAR(cur)) {
3905 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3906 "xmlParseComment: invalid xmlChar value %d\n",
3907 cur);
3908 } else {
3909 if (input != ctxt->input) {
3910 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3911 "Comment doesn't start and stop in the same entity\n");
3912 }
3913 NEXT;
3914 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3915 (!ctxt->disableSAX))
3916 ctxt->sax->comment(ctxt->userData, buf);
3917 }
3918 xmlFree(buf);
3919 return;
3920not_terminated:
3921 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3922 "Comment not terminated\n", NULL);
3923 xmlFree(buf);
3924 return;
3925}
3926
3927/**
3928 * xmlParseComment:
3929 * @ctxt: an XML parser context
3930 *
3931 * Skip an XML (SGML) comment <!-- .... -->
3932 * The spec says that "For compatibility, the string "--" (double-hyphen)
3933 * must not occur within comments. "
3934 *
3935 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3936 */
3937void
3938xmlParseComment(xmlParserCtxtPtr ctxt) {
3939 xmlChar *buf = NULL;
3940 int size = XML_PARSER_BUFFER_SIZE;
3941 int len = 0;
3942 xmlParserInputState state;
3943 const xmlChar *in;
3944 int nbchar = 0, ccol;
3945
3946 /*
3947 * Check that there is a comment right here.
3948 */
3949 if ((RAW != '<') || (NXT(1) != '!') ||
3950 (NXT(2) != '-') || (NXT(3) != '-')) return;
3951
3952 state = ctxt->instate;
3953 ctxt->instate = XML_PARSER_COMMENT;
3954 SKIP(4);
3955 SHRINK;
3956 GROW;
3957
3958 /*
3959 * Accelerated common case where input don't need to be
3960 * modified before passing it to the handler.
3961 */
3962 in = ctxt->input->cur;
3963 do {
3964 if (*in == 0xA) {
3965 do {
3966 ctxt->input->line++; ctxt->input->col = 1;
3967 in++;
3968 } while (*in == 0xA);
3969 }
3970get_more:
3971 ccol = ctxt->input->col;
3972 while (((*in > '-') && (*in <= 0x7F)) ||
3973 ((*in >= 0x20) && (*in < '-')) ||
3974 (*in == 0x09)) {
3975 in++;
3976 ccol++;
3977 }
3978 ctxt->input->col = ccol;
3979 if (*in == 0xA) {
3980 do {
3981 ctxt->input->line++; ctxt->input->col = 1;
3982 in++;
3983 } while (*in == 0xA);
3984 goto get_more;
3985 }
3986 nbchar = in - ctxt->input->cur;
3987 /*
3988 * save current set of data
3989 */
3990 if (nbchar > 0) {
3991 if ((ctxt->sax != NULL) &&
3992 (ctxt->sax->comment != NULL)) {
3993 if (buf == NULL) {
3994 if ((*in == '-') && (in[1] == '-'))
3995 size = nbchar + 1;
3996 else
3997 size = XML_PARSER_BUFFER_SIZE + nbchar;
3998 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3999 if (buf == NULL) {
4000 xmlErrMemory(ctxt, NULL);
4001 ctxt->instate = state;
4002 return;
4003 }
4004 len = 0;
4005 } else if (len + nbchar + 1 >= size) {
4006 xmlChar *new_buf;
4007 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4008 new_buf = (xmlChar *) xmlRealloc(buf,
4009 size * sizeof(xmlChar));
4010 if (new_buf == NULL) {
4011 xmlFree (buf);
4012 xmlErrMemory(ctxt, NULL);
4013 ctxt->instate = state;
4014 return;
4015 }
4016 buf = new_buf;
4017 }
4018 memcpy(&buf[len], ctxt->input->cur, nbchar);
4019 len += nbchar;
4020 buf[len] = 0;
4021 }
4022 }
4023 ctxt->input->cur = in;
4024 if (*in == 0xA) {
4025 in++;
4026 ctxt->input->line++; ctxt->input->col = 1;
4027 }
4028 if (*in == 0xD) {
4029 in++;
4030 if (*in == 0xA) {
4031 ctxt->input->cur = in;
4032 in++;
4033 ctxt->input->line++; ctxt->input->col = 1;
4034 continue; /* while */
4035 }
4036 in--;
4037 }
4038 SHRINK;
4039 GROW;
4040 in = ctxt->input->cur;
4041 if (*in == '-') {
4042 if (in[1] == '-') {
4043 if (in[2] == '>') {
4044 SKIP(3);
4045 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4046 (!ctxt->disableSAX)) {
4047 if (buf != NULL)
4048 ctxt->sax->comment(ctxt->userData, buf);
4049 else
4050 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4051 }
4052 if (buf != NULL)
4053 xmlFree(buf);
4054 ctxt->instate = state;
4055 return;
4056 }
4057 if (buf != NULL)
4058 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4059 "Comment not terminated \n<!--%.50s\n",
4060 buf);
4061 else
4062 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4063 "Comment not terminated \n", NULL);
4064 in++;
4065 ctxt->input->col++;
4066 }
4067 in++;
4068 ctxt->input->col++;
4069 goto get_more;
4070 }
4071 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4072 xmlParseCommentComplex(ctxt, buf, len, size);
4073 ctxt->instate = state;
4074 return;
4075}
4076
4077
4078/**
4079 * xmlParsePITarget:
4080 * @ctxt: an XML parser context
4081 *
4082 * parse the name of a PI
4083 *
4084 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4085 *
4086 * Returns the PITarget name or NULL
4087 */
4088
4089const xmlChar *
4090xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4091 const xmlChar *name;
4092
4093 name = xmlParseName(ctxt);
4094 if ((name != NULL) &&
4095 ((name[0] == 'x') || (name[0] == 'X')) &&
4096 ((name[1] == 'm') || (name[1] == 'M')) &&
4097 ((name[2] == 'l') || (name[2] == 'L'))) {
4098 int i;
4099 if ((name[0] == 'x') && (name[1] == 'm') &&
4100 (name[2] == 'l') && (name[3] == 0)) {
4101 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4102 "XML declaration allowed only at the start of the document\n");
4103 return(name);
4104 } else if (name[3] == 0) {
4105 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4106 return(name);
4107 }
4108 for (i = 0;;i++) {
4109 if (xmlW3CPIs[i] == NULL) break;
4110 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4111 return(name);
4112 }
4113 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4114 "xmlParsePITarget: invalid name prefix 'xml'\n",
4115 NULL, NULL);
4116 }
4117 return(name);
4118}
4119
4120#ifdef LIBXML_CATALOG_ENABLED
4121/**
4122 * xmlParseCatalogPI:
4123 * @ctxt: an XML parser context
4124 * @catalog: the PI value string
4125 *
4126 * parse an XML Catalog Processing Instruction.
4127 *
4128 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4129 *
4130 * Occurs only if allowed by the user and if happening in the Misc
4131 * part of the document before any doctype informations
4132 * This will add the given catalog to the parsing context in order
4133 * to be used if there is a resolution need further down in the document
4134 */
4135
4136static void
4137xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4138 xmlChar *URL = NULL;
4139 const xmlChar *tmp, *base;
4140 xmlChar marker;
4141
4142 tmp = catalog;
4143 while (IS_BLANK_CH(*tmp)) tmp++;
4144 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4145 goto error;
4146 tmp += 7;
4147 while (IS_BLANK_CH(*tmp)) tmp++;
4148 if (*tmp != '=') {
4149 return;
4150 }
4151 tmp++;
4152 while (IS_BLANK_CH(*tmp)) tmp++;
4153 marker = *tmp;
4154 if ((marker != '\'') && (marker != '"'))
4155 goto error;
4156 tmp++;
4157 base = tmp;
4158 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4159 if (*tmp == 0)
4160 goto error;
4161 URL = xmlStrndup(base, tmp - base);
4162 tmp++;
4163 while (IS_BLANK_CH(*tmp)) tmp++;
4164 if (*tmp != 0)
4165 goto error;
4166
4167 if (URL != NULL) {
4168 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4169 xmlFree(URL);
4170 }
4171 return;
4172
4173error:
4174 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4175 "Catalog PI syntax error: %s\n",
4176 catalog, NULL);
4177 if (URL != NULL)
4178 xmlFree(URL);
4179}
4180#endif
4181
4182/**
4183 * xmlParsePI:
4184 * @ctxt: an XML parser context
4185 *
4186 * parse an XML Processing Instruction.
4187 *
4188 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4189 *
4190 * The processing is transfered to SAX once parsed.
4191 */
4192
4193void
4194xmlParsePI(xmlParserCtxtPtr ctxt) {
4195 xmlChar *buf = NULL;
4196 int len = 0;
4197 int size = XML_PARSER_BUFFER_SIZE;
4198 int cur, l;
4199 const xmlChar *target;
4200 xmlParserInputState state;
4201 int count = 0;
4202
4203 if ((RAW == '<') && (NXT(1) == '?')) {
4204 xmlParserInputPtr input = ctxt->input;
4205 state = ctxt->instate;
4206 ctxt->instate = XML_PARSER_PI;
4207 /*
4208 * this is a Processing Instruction.
4209 */
4210 SKIP(2);
4211 SHRINK;
4212
4213 /*
4214 * Parse the target name and check for special support like
4215 * namespace.
4216 */
4217 target = xmlParsePITarget(ctxt);
4218 if (target != NULL) {
4219 if ((RAW == '?') && (NXT(1) == '>')) {
4220 if (input != ctxt->input) {
4221 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4222 "PI declaration doesn't start and stop in the same entity\n");
4223 }
4224 SKIP(2);
4225
4226 /*
4227 * SAX: PI detected.
4228 */
4229 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4230 (ctxt->sax->processingInstruction != NULL))
4231 ctxt->sax->processingInstruction(ctxt->userData,
4232 target, NULL);
4233 ctxt->instate = state;
4234 return;
4235 }
4236 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4237 if (buf == NULL) {
4238 xmlErrMemory(ctxt, NULL);
4239 ctxt->instate = state;
4240 return;
4241 }
4242 cur = CUR;
4243 if (!IS_BLANK(cur)) {
4244 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4245 "ParsePI: PI %s space expected\n", target);
4246 }
4247 SKIP_BLANKS;
4248 cur = CUR_CHAR(l);
4249 while (IS_CHAR(cur) && /* checked */
4250 ((cur != '?') || (NXT(1) != '>'))) {
4251 if (len + 5 >= size) {
4252 xmlChar *tmp;
4253
4254 size *= 2;
4255 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4256 if (tmp == NULL) {
4257 xmlErrMemory(ctxt, NULL);
4258 xmlFree(buf);
4259 ctxt->instate = state;
4260 return;
4261 }
4262 buf = tmp;
4263 }
4264 count++;
4265 if (count > 50) {
4266 GROW;
4267 count = 0;
4268 }
4269 COPY_BUF(l,buf,len,cur);
4270 NEXTL(l);
4271 cur = CUR_CHAR(l);
4272 if (cur == 0) {
4273 SHRINK;
4274 GROW;
4275 cur = CUR_CHAR(l);
4276 }
4277 }
4278 buf[len] = 0;
4279 if (cur != '?') {
4280 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4281 "ParsePI: PI %s never end ...\n", target);
4282 } else {
4283 if (input != ctxt->input) {
4284 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4285 "PI declaration doesn't start and stop in the same entity\n");
4286 }
4287 SKIP(2);
4288
4289#ifdef LIBXML_CATALOG_ENABLED
4290 if (((state == XML_PARSER_MISC) ||
4291 (state == XML_PARSER_START)) &&
4292 (xmlStrEqual(target, XML_CATALOG_PI))) {
4293 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4294 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4295 (allow == XML_CATA_ALLOW_ALL))
4296 xmlParseCatalogPI(ctxt, buf);
4297 }
4298#endif
4299
4300
4301 /*
4302 * SAX: PI detected.
4303 */
4304 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4305 (ctxt->sax->processingInstruction != NULL))
4306 ctxt->sax->processingInstruction(ctxt->userData,
4307 target, buf);
4308 }
4309 xmlFree(buf);
4310 } else {
4311 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
4312 }
4313 ctxt->instate = state;
4314 }
4315}
4316
4317/**
4318 * xmlParseNotationDecl:
4319 * @ctxt: an XML parser context
4320 *
4321 * parse a notation declaration
4322 *
4323 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4324 *
4325 * Hence there is actually 3 choices:
4326 * 'PUBLIC' S PubidLiteral
4327 * 'PUBLIC' S PubidLiteral S SystemLiteral
4328 * and 'SYSTEM' S SystemLiteral
4329 *
4330 * See the NOTE on xmlParseExternalID().
4331 */
4332
4333void
4334xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
4335 const xmlChar *name;
4336 xmlChar *Pubid;
4337 xmlChar *Systemid;
4338
4339 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4340 xmlParserInputPtr input = ctxt->input;
4341 SHRINK;
4342 SKIP(10);
4343 if (!IS_BLANK_CH(CUR)) {
4344 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4345 "Space required after '<!NOTATION'\n");
4346 return;
4347 }
4348 SKIP_BLANKS;
4349
4350 name = xmlParseName(ctxt);
4351 if (name == NULL) {
4352 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4353 return;
4354 }
4355 if (!IS_BLANK_CH(CUR)) {
4356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4357 "Space required after the NOTATION name'\n");
4358 return;
4359 }
4360 SKIP_BLANKS;
4361
4362 /*
4363 * Parse the IDs.
4364 */
4365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4366 SKIP_BLANKS;
4367
4368 if (RAW == '>') {
4369 if (input != ctxt->input) {
4370 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4371 "Notation declaration doesn't start and stop in the same entity\n");
4372 }
4373 NEXT;
4374 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4375 (ctxt->sax->notationDecl != NULL))
4376 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4377 } else {
4378 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4379 }
4380 if (Systemid != NULL) xmlFree(Systemid);
4381 if (Pubid != NULL) xmlFree(Pubid);
4382 }
4383}
4384
4385/**
4386 * xmlParseEntityDecl:
4387 * @ctxt: an XML parser context
4388 *
4389 * parse <!ENTITY declarations
4390 *
4391 * [70] EntityDecl ::= GEDecl | PEDecl
4392 *
4393 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4394 *
4395 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4396 *
4397 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4398 *
4399 * [74] PEDef ::= EntityValue | ExternalID
4400 *
4401 * [76] NDataDecl ::= S 'NDATA' S Name
4402 *
4403 * [ VC: Notation Declared ]
4404 * The Name must match the declared name of a notation.
4405 */
4406
4407void
4408xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
4409 const xmlChar *name = NULL;
4410 xmlChar *value = NULL;
4411 xmlChar *URI = NULL, *literal = NULL;
4412 const xmlChar *ndata = NULL;
4413 int isParameter = 0;
4414 xmlChar *orig = NULL;
4415 int skipped;
4416 unsigned long oldnbent = ctxt->nbentities;
4417
4418 /* GROW; done in the caller */
4419 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
4420 xmlParserInputPtr input = ctxt->input;
4421 SHRINK;
4422 SKIP(8);
4423 skipped = SKIP_BLANKS;
4424 if (skipped == 0) {
4425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4426 "Space required after '<!ENTITY'\n");
4427 }
4428
4429 if (RAW == '%') {
4430 NEXT;
4431 skipped = SKIP_BLANKS;
4432 if (skipped == 0) {
4433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4434 "Space required after '%'\n");
4435 }
4436 isParameter = 1;
4437 }
4438
4439 name = xmlParseName(ctxt);
4440 if (name == NULL) {
4441 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4442 "xmlParseEntityDecl: no name\n");
4443 return;
4444 }
4445 skipped = SKIP_BLANKS;
4446 if (skipped == 0) {
4447 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4448 "Space required after the entity name\n");
4449 }
4450
4451 ctxt->instate = XML_PARSER_ENTITY_DECL;
4452 /*
4453 * handle the various case of definitions...
4454 */
4455 if (isParameter) {
4456 if ((RAW == '"') || (RAW == '\'')) {
4457 value = xmlParseEntityValue(ctxt, &orig);
4458 if (value) {
4459 if ((ctxt->sax != NULL) &&
4460 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4461 ctxt->sax->entityDecl(ctxt->userData, name,
4462 XML_INTERNAL_PARAMETER_ENTITY,
4463 NULL, NULL, value);
4464 }
4465 } else {
4466 URI = xmlParseExternalID(ctxt, &literal, 1);
4467 if ((URI == NULL) && (literal == NULL)) {
4468 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4469 }
4470 if (URI) {
4471 xmlURIPtr uri;
4472
4473 uri = xmlParseURI((const char *) URI);
4474 if (uri == NULL) {
4475 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4476 "Invalid URI: %s\n", URI);
4477 /*
4478 * This really ought to be a well formedness error
4479 * but the XML Core WG decided otherwise c.f. issue
4480 * E26 of the XML erratas.
4481 */
4482 } else {
4483 if (uri->fragment != NULL) {
4484 /*
4485 * Okay this is foolish to block those but not
4486 * invalid URIs.
4487 */
4488 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4489 } else {
4490 if ((ctxt->sax != NULL) &&
4491 (!ctxt->disableSAX) &&
4492 (ctxt->sax->entityDecl != NULL))
4493 ctxt->sax->entityDecl(ctxt->userData, name,
4494 XML_EXTERNAL_PARAMETER_ENTITY,
4495 literal, URI, NULL);
4496 }
4497 xmlFreeURI(uri);
4498 }
4499 }
4500 }
4501 } else {
4502 if ((RAW == '"') || (RAW == '\'')) {
4503 value = xmlParseEntityValue(ctxt, &orig);
4504 if ((ctxt->sax != NULL) &&
4505 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4506 ctxt->sax->entityDecl(ctxt->userData, name,
4507 XML_INTERNAL_GENERAL_ENTITY,
4508 NULL, NULL, value);
4509 /*
4510 * For expat compatibility in SAX mode.
4511 */
4512 if ((ctxt->myDoc == NULL) ||
4513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4514 if (ctxt->myDoc == NULL) {
4515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4516 }
4517 if (ctxt->myDoc->intSubset == NULL)
4518 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4519 BAD_CAST "fake", NULL, NULL);
4520
4521 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4522 NULL, NULL, value);
4523 }
4524 } else {
4525 URI = xmlParseExternalID(ctxt, &literal, 1);
4526 if ((URI == NULL) && (literal == NULL)) {
4527 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
4528 }
4529 if (URI) {
4530 xmlURIPtr uri;
4531
4532 uri = xmlParseURI((const char *)URI);
4533 if (uri == NULL) {
4534 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4535 "Invalid URI: %s\n", URI);
4536 /*
4537 * This really ought to be a well formedness error
4538 * but the XML Core WG decided otherwise c.f. issue
4539 * E26 of the XML erratas.
4540 */
4541 } else {
4542 if (uri->fragment != NULL) {
4543 /*
4544 * Okay this is foolish to block those but not
4545 * invalid URIs.
4546 */
4547 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
4548 }
4549 xmlFreeURI(uri);
4550 }
4551 }
4552 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
4553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4554 "Space required before 'NDATA'\n");
4555 }
4556 SKIP_BLANKS;
4557 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
4558 SKIP(5);
4559 if (!IS_BLANK_CH(CUR)) {
4560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4561 "Space required after 'NDATA'\n");
4562 }
4563 SKIP_BLANKS;
4564 ndata = xmlParseName(ctxt);
4565 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4566 (ctxt->sax->unparsedEntityDecl != NULL))
4567 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4568 literal, URI, ndata);
4569 } else {
4570 if ((ctxt->sax != NULL) &&
4571 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4572 ctxt->sax->entityDecl(ctxt->userData, name,
4573 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4574 literal, URI, NULL);
4575 /*
4576 * For expat compatibility in SAX mode.
4577 * assuming the entity repalcement was asked for
4578 */
4579 if ((ctxt->replaceEntities != 0) &&
4580 ((ctxt->myDoc == NULL) ||
4581 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4582 if (ctxt->myDoc == NULL) {
4583 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4584 }
4585
4586 if (ctxt->myDoc->intSubset == NULL)
4587 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4588 BAD_CAST "fake", NULL, NULL);
4589 xmlSAX2EntityDecl(ctxt, name,
4590 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4591 literal, URI, NULL);
4592 }
4593 }
4594 }
4595 }
4596 SKIP_BLANKS;
4597 if (RAW != '>') {
4598 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
4599 "xmlParseEntityDecl: entity %s not terminated\n", name);
4600 } else {
4601 if (input != ctxt->input) {
4602 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4603 "Entity declaration doesn't start and stop in the same entity\n");
4604 }
4605 NEXT;
4606 }
4607 if (orig != NULL) {
4608 /*
4609 * Ugly mechanism to save the raw entity value.
4610 */
4611 xmlEntityPtr cur = NULL;
4612
4613 if (isParameter) {
4614 if ((ctxt->sax != NULL) &&
4615 (ctxt->sax->getParameterEntity != NULL))
4616 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4617 } else {
4618 if ((ctxt->sax != NULL) &&
4619 (ctxt->sax->getEntity != NULL))
4620 cur = ctxt->sax->getEntity(ctxt->userData, name);
4621 if ((cur == NULL) && (ctxt->userData==ctxt)) {
4622 cur = xmlSAX2GetEntity(ctxt, name);
4623 }
4624 }
4625 if (cur != NULL) {
4626 cur->checked = ctxt->nbentities - oldnbent;
4627 if (cur->orig != NULL)
4628 xmlFree(orig);
4629 else
4630 cur->orig = orig;
4631 } else
4632 xmlFree(orig);
4633 }
4634 if (value != NULL) xmlFree(value);
4635 if (URI != NULL) xmlFree(URI);
4636 if (literal != NULL) xmlFree(literal);
4637 }
4638}
4639
4640/**
4641 * xmlParseDefaultDecl:
4642 * @ctxt: an XML parser context
4643 * @value: Receive a possible fixed default value for the attribute
4644 *
4645 * Parse an attribute default declaration
4646 *
4647 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4648 *
4649 * [ VC: Required Attribute ]
4650 * if the default declaration is the keyword #REQUIRED, then the
4651 * attribute must be specified for all elements of the type in the
4652 * attribute-list declaration.
4653 *
4654 * [ VC: Attribute Default Legal ]
4655 * The declared default value must meet the lexical constraints of
4656 * the declared attribute type c.f. xmlValidateAttributeDecl()
4657 *
4658 * [ VC: Fixed Attribute Default ]
4659 * if an attribute has a default value declared with the #FIXED
4660 * keyword, instances of that attribute must match the default value.
4661 *
4662 * [ WFC: No < in Attribute Values ]
4663 * handled in xmlParseAttValue()
4664 *
4665 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4666 * or XML_ATTRIBUTE_FIXED.
4667 */
4668
4669int
4670xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4671 int val;
4672 xmlChar *ret;
4673
4674 *value = NULL;
4675 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
4676 SKIP(9);
4677 return(XML_ATTRIBUTE_REQUIRED);
4678 }
4679 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
4680 SKIP(8);
4681 return(XML_ATTRIBUTE_IMPLIED);
4682 }
4683 val = XML_ATTRIBUTE_NONE;
4684 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
4685 SKIP(6);
4686 val = XML_ATTRIBUTE_FIXED;
4687 if (!IS_BLANK_CH(CUR)) {
4688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4689 "Space required after '#FIXED'\n");
4690 }
4691 SKIP_BLANKS;
4692 }
4693 ret = xmlParseAttValue(ctxt);
4694 ctxt->instate = XML_PARSER_DTD;
4695 if (ret == NULL) {
4696 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
4697 "Attribute default value declaration error\n");
4698 } else
4699 *value = ret;
4700 return(val);
4701}
4702
4703/**
4704 * xmlParseNotationType:
4705 * @ctxt: an XML parser context
4706 *
4707 * parse an Notation attribute type.
4708 *
4709 * Note: the leading 'NOTATION' S part has already being parsed...
4710 *
4711 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4712 *
4713 * [ VC: Notation Attributes ]
4714 * Values of this type must match one of the notation names included
4715 * in the declaration; all notation names in the declaration must be declared.
4716 *
4717 * Returns: the notation attribute tree built while parsing
4718 */
4719
4720xmlEnumerationPtr
4721xmlParseNotationType(xmlParserCtxtPtr ctxt) {
4722 const xmlChar *name;
4723 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4724
4725 if (RAW != '(') {
4726 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
4727 return(NULL);
4728 }
4729 SHRINK;
4730 do {
4731 NEXT;
4732 SKIP_BLANKS;
4733 name = xmlParseName(ctxt);
4734 if (name == NULL) {
4735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4736 "Name expected in NOTATION declaration\n");
4737 return(ret);
4738 }
4739 cur = xmlCreateEnumeration(name);
4740 if (cur == NULL) return(ret);
4741 if (last == NULL) ret = last = cur;
4742 else {
4743 last->next = cur;
4744 last = cur;
4745 }
4746 SKIP_BLANKS;
4747 } while (RAW == '|');
4748 if (RAW != ')') {
4749 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
4750 if ((last != NULL) && (last != ret))
4751 xmlFreeEnumeration(last);
4752 return(ret);
4753 }
4754 NEXT;
4755 return(ret);
4756}
4757
4758/**
4759 * xmlParseEnumerationType:
4760 * @ctxt: an XML parser context
4761 *
4762 * parse an Enumeration attribute type.
4763 *
4764 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4765 *
4766 * [ VC: Enumeration ]
4767 * Values of this type must match one of the Nmtoken tokens in
4768 * the declaration
4769 *
4770 * Returns: the enumeration attribute tree built while parsing
4771 */
4772
4773xmlEnumerationPtr
4774xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4775 xmlChar *name;
4776 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4777
4778 if (RAW != '(') {
4779 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
4780 return(NULL);
4781 }
4782 SHRINK;
4783 do {
4784 NEXT;
4785 SKIP_BLANKS;
4786 name = xmlParseNmtoken(ctxt);
4787 if (name == NULL) {
4788 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
4789 return(ret);
4790 }
4791 cur = xmlCreateEnumeration(name);
4792 xmlFree(name);
4793 if (cur == NULL) return(ret);
4794 if (last == NULL) ret = last = cur;
4795 else {
4796 last->next = cur;
4797 last = cur;
4798 }
4799 SKIP_BLANKS;
4800 } while (RAW == '|');
4801 if (RAW != ')') {
4802 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
4803 return(ret);
4804 }
4805 NEXT;
4806 return(ret);
4807}
4808
4809/**
4810 * xmlParseEnumeratedType:
4811 * @ctxt: an XML parser context
4812 * @tree: the enumeration tree built while parsing
4813 *
4814 * parse an Enumerated attribute type.
4815 *
4816 * [57] EnumeratedType ::= NotationType | Enumeration
4817 *
4818 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4819 *
4820 *
4821 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4822 */
4823
4824int
4825xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4826 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
4827 SKIP(8);
4828 if (!IS_BLANK_CH(CUR)) {
4829 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4830 "Space required after 'NOTATION'\n");
4831 return(0);
4832 }
4833 SKIP_BLANKS;
4834 *tree = xmlParseNotationType(ctxt);
4835 if (*tree == NULL) return(0);
4836 return(XML_ATTRIBUTE_NOTATION);
4837 }
4838 *tree = xmlParseEnumerationType(ctxt);
4839 if (*tree == NULL) return(0);
4840 return(XML_ATTRIBUTE_ENUMERATION);
4841}
4842
4843/**
4844 * xmlParseAttributeType:
4845 * @ctxt: an XML parser context
4846 * @tree: the enumeration tree built while parsing
4847 *
4848 * parse the Attribute list def for an element
4849 *
4850 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4851 *
4852 * [55] StringType ::= 'CDATA'
4853 *
4854 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4855 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4856 *
4857 * Validity constraints for attribute values syntax are checked in
4858 * xmlValidateAttributeValue()
4859 *
4860 * [ VC: ID ]
4861 * Values of type ID must match the Name production. A name must not
4862 * appear more than once in an XML document as a value of this type;
4863 * i.e., ID values must uniquely identify the elements which bear them.
4864 *
4865 * [ VC: One ID per Element Type ]
4866 * No element type may have more than one ID attribute specified.
4867 *
4868 * [ VC: ID Attribute Default ]
4869 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4870 *
4871 * [ VC: IDREF ]
4872 * Values of type IDREF must match the Name production, and values
4873 * of type IDREFS must match Names; each IDREF Name must match the value
4874 * of an ID attribute on some element in the XML document; i.e. IDREF
4875 * values must match the value of some ID attribute.
4876 *
4877 * [ VC: Entity Name ]
4878 * Values of type ENTITY must match the Name production, values
4879 * of type ENTITIES must match Names; each Entity Name must match the
4880 * name of an unparsed entity declared in the DTD.
4881 *
4882 * [ VC: Name Token ]
4883 * Values of type NMTOKEN must match the Nmtoken production; values
4884 * of type NMTOKENS must match Nmtokens.
4885 *
4886 * Returns the attribute type
4887 */
4888int
4889xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4890 SHRINK;
4891 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
4892 SKIP(5);
4893 return(XML_ATTRIBUTE_CDATA);
4894 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
4895 SKIP(6);
4896 return(XML_ATTRIBUTE_IDREFS);
4897 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
4898 SKIP(5);
4899 return(XML_ATTRIBUTE_IDREF);
4900 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4901 SKIP(2);
4902 return(XML_ATTRIBUTE_ID);
4903 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
4904 SKIP(6);
4905 return(XML_ATTRIBUTE_ENTITY);
4906 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
4907 SKIP(8);
4908 return(XML_ATTRIBUTE_ENTITIES);
4909 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
4910 SKIP(8);
4911 return(XML_ATTRIBUTE_NMTOKENS);
4912 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
4913 SKIP(7);
4914 return(XML_ATTRIBUTE_NMTOKEN);
4915 }
4916 return(xmlParseEnumeratedType(ctxt, tree));
4917}
4918
4919/**
4920 * xmlParseAttributeListDecl:
4921 * @ctxt: an XML parser context
4922 *
4923 * : parse the Attribute list def for an element
4924 *
4925 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4926 *
4927 * [53] AttDef ::= S Name S AttType S DefaultDecl
4928 *
4929 */
4930void
4931xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
4932 const xmlChar *elemName;
4933 const xmlChar *attrName;
4934 xmlEnumerationPtr tree;
4935
4936 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
4937 xmlParserInputPtr input = ctxt->input;
4938
4939 SKIP(9);
4940 if (!IS_BLANK_CH(CUR)) {
4941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4942 "Space required after '<!ATTLIST'\n");
4943 }
4944 SKIP_BLANKS;
4945 elemName = xmlParseName(ctxt);
4946 if (elemName == NULL) {
4947 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4948 "ATTLIST: no name for Element\n");
4949 return;
4950 }
4951 SKIP_BLANKS;
4952 GROW;
4953 while (RAW != '>') {
4954 const xmlChar *check = CUR_PTR;
4955 int type;
4956 int def;
4957 xmlChar *defaultValue = NULL;
4958
4959 GROW;
4960 tree = NULL;
4961 attrName = xmlParseName(ctxt);
4962 if (attrName == NULL) {
4963 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4964 "ATTLIST: no name for Attribute\n");
4965 break;
4966 }
4967 GROW;
4968 if (!IS_BLANK_CH(CUR)) {
4969 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4970 "Space required after the attribute name\n");
4971 break;
4972 }
4973 SKIP_BLANKS;
4974
4975 type = xmlParseAttributeType(ctxt, &tree);
4976 if (type <= 0) {
4977 break;
4978 }
4979
4980 GROW;
4981 if (!IS_BLANK_CH(CUR)) {
4982 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4983 "Space required after the attribute type\n");
4984 if (tree != NULL)
4985 xmlFreeEnumeration(tree);
4986 break;
4987 }
4988 SKIP_BLANKS;
4989
4990 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4991 if (def <= 0) {
4992 if (defaultValue != NULL)
4993 xmlFree(defaultValue);
4994 if (tree != NULL)
4995 xmlFreeEnumeration(tree);
4996 break;
4997 }
4998
4999 GROW;
5000 if (RAW != '>') {
5001 if (!IS_BLANK_CH(CUR)) {
5002 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5003 "Space required after the attribute default value\n");
5004 if (defaultValue != NULL)
5005 xmlFree(defaultValue);
5006 if (tree != NULL)
5007 xmlFreeEnumeration(tree);
5008 break;
5009 }
5010 SKIP_BLANKS;
5011 }
5012 if (check == CUR_PTR) {
5013 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5014 "in xmlParseAttributeListDecl\n");
5015 if (defaultValue != NULL)
5016 xmlFree(defaultValue);
5017 if (tree != NULL)
5018 xmlFreeEnumeration(tree);
5019 break;
5020 }
5021 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5022 (ctxt->sax->attributeDecl != NULL))
5023 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5024 type, def, defaultValue, tree);
5025 else if (tree != NULL)
5026 xmlFreeEnumeration(tree);
5027
5028 if ((ctxt->sax2) && (defaultValue != NULL) &&
5029 (def != XML_ATTRIBUTE_IMPLIED) &&
5030 (def != XML_ATTRIBUTE_REQUIRED)) {
5031 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5032 }
5033 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
5034 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5035 }
5036 if (defaultValue != NULL)
5037 xmlFree(defaultValue);
5038 GROW;
5039 }
5040 if (RAW == '>') {
5041 if (input != ctxt->input) {
5042 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5043 "Attribute list declaration doesn't start and stop in the same entity\n");
5044 }
5045 NEXT;
5046 }
5047 }
5048}
5049
5050/**
5051 * xmlParseElementMixedContentDecl:
5052 * @ctxt: an XML parser context
5053 * @inputchk: the input used for the current entity, needed for boundary checks
5054 *
5055 * parse the declaration for a Mixed Element content
5056 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5057 *
5058 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5059 * '(' S? '#PCDATA' S? ')'
5060 *
5061 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5062 *
5063 * [ VC: No Duplicate Types ]
5064 * The same name must not appear more than once in a single
5065 * mixed-content declaration.
5066 *
5067 * returns: the list of the xmlElementContentPtr describing the element choices
5068 */
5069xmlElementContentPtr
5070xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5071 xmlElementContentPtr ret = NULL, cur = NULL, n;
5072 const xmlChar *elem = NULL;
5073
5074 GROW;
5075 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5076 SKIP(7);
5077 SKIP_BLANKS;
5078 SHRINK;
5079 if (RAW == ')') {
5080 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5081 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5082"Element content declaration doesn't start and stop in the same entity\n",
5083 NULL);
5084 }
5085 NEXT;
5086 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5087 if (RAW == '*') {
5088 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5089 NEXT;
5090 }
5091 return(ret);
5092 }
5093 if ((RAW == '(') || (RAW == '|')) {
5094 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5095 if (ret == NULL) return(NULL);
5096 }
5097 while (RAW == '|') {
5098 NEXT;
5099 if (elem == NULL) {
5100 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5101 if (ret == NULL) return(NULL);
5102 ret->c1 = cur;
5103 if (cur != NULL)
5104 cur->parent = ret;
5105 cur = ret;
5106 } else {
5107 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5108 if (n == NULL) return(NULL);
5109 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5110 if (n->c1 != NULL)
5111 n->c1->parent = n;
5112 cur->c2 = n;
5113 if (n != NULL)
5114 n->parent = cur;
5115 cur = n;
5116 }
5117 SKIP_BLANKS;
5118 elem = xmlParseName(ctxt);
5119 if (elem == NULL) {
5120 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5121 "xmlParseElementMixedContentDecl : Name expected\n");
5122 xmlFreeDocElementContent(ctxt->myDoc, cur);
5123 return(NULL);
5124 }
5125 SKIP_BLANKS;
5126 GROW;
5127 }
5128 if ((RAW == ')') && (NXT(1) == '*')) {
5129 if (elem != NULL) {
5130 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5131 XML_ELEMENT_CONTENT_ELEMENT);
5132 if (cur->c2 != NULL)
5133 cur->c2->parent = cur;
5134 }
5135 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5136 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5137 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5138"Element content declaration doesn't start and stop in the same entity\n",
5139 NULL);
5140 }
5141 SKIP(2);
5142 } else {
5143 xmlFreeDocElementContent(ctxt->myDoc, ret);
5144 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5145 return(NULL);
5146 }
5147
5148 } else {
5149 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5150 }
5151 return(ret);
5152}
5153
5154/**
5155 * xmlParseElementChildrenContentDecl:
5156 * @ctxt: an XML parser context
5157 * @inputchk: the input used for the current entity, needed for boundary checks
5158 *
5159 * parse the declaration for a Mixed Element content
5160 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5161 *
5162 *
5163 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5164 *
5165 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5166 *
5167 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5168 *
5169 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5170 *
5171 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5172 * TODO Parameter-entity replacement text must be properly nested
5173 * with parenthesized groups. That is to say, if either of the
5174 * opening or closing parentheses in a choice, seq, or Mixed
5175 * construct is contained in the replacement text for a parameter
5176 * entity, both must be contained in the same replacement text. For
5177 * interoperability, if a parameter-entity reference appears in a
5178 * choice, seq, or Mixed construct, its replacement text should not
5179 * be empty, and neither the first nor last non-blank character of
5180 * the replacement text should be a connector (| or ,).
5181 *
5182 * Returns the tree of xmlElementContentPtr describing the element
5183 * hierarchy.
5184 */
5185xmlElementContentPtr
5186xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
5187 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5188 const xmlChar *elem;
5189 xmlChar type = 0;
5190
5191 SKIP_BLANKS;
5192 GROW;
5193 if (RAW == '(') {
5194 int inputid = ctxt->input->id;
5195
5196 /* Recurse on first child */
5197 NEXT;
5198 SKIP_BLANKS;
5199 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
5200 SKIP_BLANKS;
5201 GROW;
5202 } else {
5203 elem = xmlParseName(ctxt);
5204 if (elem == NULL) {
5205 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5206 return(NULL);
5207 }
5208 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5209 if (cur == NULL) {
5210 xmlErrMemory(ctxt, NULL);
5211 return(NULL);
5212 }
5213 GROW;
5214 if (RAW == '?') {
5215 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5216 NEXT;
5217 } else if (RAW == '*') {
5218 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5219 NEXT;
5220 } else if (RAW == '+') {
5221 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5222 NEXT;
5223 } else {
5224 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5225 }
5226 GROW;
5227 }
5228 SKIP_BLANKS;
5229 SHRINK;
5230 while (RAW != ')') {
5231 /*
5232 * Each loop we parse one separator and one element.
5233 */
5234 if (RAW == ',') {
5235 if (type == 0) type = CUR;
5236
5237 /*
5238 * Detect "Name | Name , Name" error
5239 */
5240 else if (type != CUR) {
5241 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5242 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5243 type);
5244 if ((last != NULL) && (last != ret))
5245 xmlFreeDocElementContent(ctxt->myDoc, last);
5246 if (ret != NULL)
5247 xmlFreeDocElementContent(ctxt->myDoc, ret);
5248 return(NULL);
5249 }
5250 NEXT;
5251
5252 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
5253 if (op == NULL) {
5254 if ((last != NULL) && (last != ret))
5255 xmlFreeDocElementContent(ctxt->myDoc, last);
5256 xmlFreeDocElementContent(ctxt->myDoc, ret);
5257 return(NULL);
5258 }
5259 if (last == NULL) {
5260 op->c1 = ret;
5261 if (ret != NULL)
5262 ret->parent = op;
5263 ret = cur = op;
5264 } else {
5265 cur->c2 = op;
5266 if (op != NULL)
5267 op->parent = cur;
5268 op->c1 = last;
5269 if (last != NULL)
5270 last->parent = op;
5271 cur =op;
5272 last = NULL;
5273 }
5274 } else if (RAW == '|') {
5275 if (type == 0) type = CUR;
5276
5277 /*
5278 * Detect "Name , Name | Name" error
5279 */
5280 else if (type != CUR) {
5281 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
5282 "xmlParseElementChildrenContentDecl : '%c' expected\n",
5283 type);
5284 if ((last != NULL) && (last != ret))
5285 xmlFreeDocElementContent(ctxt->myDoc, last);
5286 if (ret != NULL)
5287 xmlFreeDocElementContent(ctxt->myDoc, ret);
5288 return(NULL);
5289 }
5290 NEXT;
5291
5292 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5293 if (op == NULL) {
5294 if ((last != NULL) && (last != ret))
5295 xmlFreeDocElementContent(ctxt->myDoc, last);
5296 if (ret != NULL)
5297 xmlFreeDocElementContent(ctxt->myDoc, ret);
5298 return(NULL);
5299 }
5300 if (last == NULL) {
5301 op->c1 = ret;
5302 if (ret != NULL)
5303 ret->parent = op;
5304 ret = cur = op;
5305 } else {
5306 cur->c2 = op;
5307 if (op != NULL)
5308 op->parent = cur;
5309 op->c1 = last;
5310 if (last != NULL)
5311 last->parent = op;
5312 cur =op;
5313 last = NULL;
5314 }
5315 } else {
5316 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
5317 if (ret != NULL)
5318 xmlFreeDocElementContent(ctxt->myDoc, ret);
5319 return(NULL);
5320 }
5321 GROW;
5322 SKIP_BLANKS;
5323 GROW;
5324 if (RAW == '(') {
5325 int inputid = ctxt->input->id;
5326 /* Recurse on second child */
5327 NEXT;
5328 SKIP_BLANKS;
5329 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
5330 SKIP_BLANKS;
5331 } else {
5332 elem = xmlParseName(ctxt);
5333 if (elem == NULL) {
5334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
5335 if (ret != NULL)
5336 xmlFreeDocElementContent(ctxt->myDoc, ret);
5337 return(NULL);
5338 }
5339 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5340 if (RAW == '?') {
5341 last->ocur = XML_ELEMENT_CONTENT_OPT;
5342 NEXT;
5343 } else if (RAW == '*') {
5344 last->ocur = XML_ELEMENT_CONTENT_MULT;
5345 NEXT;
5346 } else if (RAW == '+') {
5347 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5348 NEXT;
5349 } else {
5350 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5351 }
5352 }
5353 SKIP_BLANKS;
5354 GROW;
5355 }
5356 if ((cur != NULL) && (last != NULL)) {
5357 cur->c2 = last;
5358 if (last != NULL)
5359 last->parent = cur;
5360 }
5361 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
5362 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5363"Element content declaration doesn't start and stop in the same entity\n",
5364 NULL);
5365 }
5366 NEXT;
5367 if (RAW == '?') {
5368 if (ret != NULL) {
5369 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5370 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5371 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5372 else
5373 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5374 }
5375 NEXT;
5376 } else if (RAW == '*') {
5377 if (ret != NULL) {
5378 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5379 cur = ret;
5380 /*
5381 * Some normalization:
5382 * (a | b* | c?)* == (a | b | c)*
5383 */
5384 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5385 if ((cur->c1 != NULL) &&
5386 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5387 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5388 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5389 if ((cur->c2 != NULL) &&
5390 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5391 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5392 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5393 cur = cur->c2;
5394 }
5395 }
5396 NEXT;
5397 } else if (RAW == '+') {
5398 if (ret != NULL) {
5399 int found = 0;
5400
5401 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5402 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5403 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5404 else
5405 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
5406 /*
5407 * Some normalization:
5408 * (a | b*)+ == (a | b)*
5409 * (a | b?)+ == (a | b)*
5410 */
5411 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
5412 if ((cur->c1 != NULL) &&
5413 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5414 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5415 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5416 found = 1;
5417 }
5418 if ((cur->c2 != NULL) &&
5419 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5420 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5421 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5422 found = 1;
5423 }
5424 cur = cur->c2;
5425 }
5426 if (found)
5427 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5428 }
5429 NEXT;
5430 }
5431 return(ret);
5432}
5433
5434/**
5435 * xmlParseElementContentDecl:
5436 * @ctxt: an XML parser context
5437 * @name: the name of the element being defined.
5438 * @result: the Element Content pointer will be stored here if any
5439 *
5440 * parse the declaration for an Element content either Mixed or Children,
5441 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5442 *
5443 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5444 *
5445 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5446 */
5447
5448int
5449xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
5450 xmlElementContentPtr *result) {
5451
5452 xmlElementContentPtr tree = NULL;
5453 int inputid = ctxt->input->id;
5454 int res;
5455
5456 *result = NULL;
5457
5458 if (RAW != '(') {
5459 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5460 "xmlParseElementContentDecl : %s '(' expected\n", name);
5461 return(-1);
5462 }
5463 NEXT;
5464 GROW;
5465 SKIP_BLANKS;
5466 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5467 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
5468 res = XML_ELEMENT_TYPE_MIXED;
5469 } else {
5470 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
5471 res = XML_ELEMENT_TYPE_ELEMENT;
5472 }
5473 SKIP_BLANKS;
5474 *result = tree;
5475 return(res);
5476}
5477
5478/**
5479 * xmlParseElementDecl:
5480 * @ctxt: an XML parser context
5481 *
5482 * parse an Element declaration.
5483 *
5484 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5485 *
5486 * [ VC: Unique Element Type Declaration ]
5487 * No element type may be declared more than once
5488 *
5489 * Returns the type of the element, or -1 in case of error
5490 */
5491int
5492xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
5493 const xmlChar *name;
5494 int ret = -1;
5495 xmlElementContentPtr content = NULL;
5496
5497 /* GROW; done in the caller */
5498 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
5499 xmlParserInputPtr input = ctxt->input;
5500
5501 SKIP(9);
5502 if (!IS_BLANK_CH(CUR)) {
5503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5504 "Space required after 'ELEMENT'\n");
5505 }
5506 SKIP_BLANKS;
5507 name = xmlParseName(ctxt);
5508 if (name == NULL) {
5509 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510 "xmlParseElementDecl: no name for Element\n");
5511 return(-1);
5512 }
5513 while ((RAW == 0) && (ctxt->inputNr > 1))
5514 xmlPopInput(ctxt);
5515 if (!IS_BLANK_CH(CUR)) {
5516 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5517 "Space required after the element name\n");
5518 }
5519 SKIP_BLANKS;
5520 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
5521 SKIP(5);
5522 /*
5523 * Element must always be empty.
5524 */
5525 ret = XML_ELEMENT_TYPE_EMPTY;
5526 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5527 (NXT(2) == 'Y')) {
5528 SKIP(3);
5529 /*
5530 * Element is a generic container.
5531 */
5532 ret = XML_ELEMENT_TYPE_ANY;
5533 } else if (RAW == '(') {
5534 ret = xmlParseElementContentDecl(ctxt, name, &content);
5535 } else {
5536 /*
5537 * [ WFC: PEs in Internal Subset ] error handling.
5538 */
5539 if ((RAW == '%') && (ctxt->external == 0) &&
5540 (ctxt->inputNr == 1)) {
5541 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
5542 "PEReference: forbidden within markup decl in internal subset\n");
5543 } else {
5544 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
5545 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5546 }
5547 return(-1);
5548 }
5549
5550 SKIP_BLANKS;
5551 /*
5552 * Pop-up of finished entities.
5553 */
5554 while ((RAW == 0) && (ctxt->inputNr > 1))
5555 xmlPopInput(ctxt);
5556 SKIP_BLANKS;
5557
5558 if (RAW != '>') {
5559 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
5560 if (content != NULL) {
5561 xmlFreeDocElementContent(ctxt->myDoc, content);
5562 }
5563 } else {
5564 if (input != ctxt->input) {
5565 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5566 "Element declaration doesn't start and stop in the same entity\n");
5567 }
5568
5569 NEXT;
5570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5571 (ctxt->sax->elementDecl != NULL)) {
5572 if (content != NULL)
5573 content->parent = NULL;
5574 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5575 content);
5576 if ((content != NULL) && (content->parent == NULL)) {
5577 /*
5578 * this is a trick: if xmlAddElementDecl is called,
5579 * instead of copying the full tree it is plugged directly
5580 * if called from the parser. Avoid duplicating the
5581 * interfaces or change the API/ABI
5582 */
5583 xmlFreeDocElementContent(ctxt->myDoc, content);
5584 }
5585 } else if (content != NULL) {
5586 xmlFreeDocElementContent(ctxt->myDoc, content);
5587 }
5588 }
5589 }
5590 return(ret);
5591}
5592
5593/**
5594 * xmlParseConditionalSections
5595 * @ctxt: an XML parser context
5596 *
5597 * [61] conditionalSect ::= includeSect | ignoreSect
5598 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5599 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5600 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5601 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5602 */
5603
5604static void
5605xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5606 SKIP(3);
5607 SKIP_BLANKS;
5608 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
5609 SKIP(7);
5610 SKIP_BLANKS;
5611 if (RAW != '[') {
5612 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5613 } else {
5614 NEXT;
5615 }
5616 if (xmlParserDebugEntities) {
5617 if ((ctxt->input != NULL) && (ctxt->input->filename))
5618 xmlGenericError(xmlGenericErrorContext,
5619 "%s(%d): ", ctxt->input->filename,
5620 ctxt->input->line);
5621 xmlGenericError(xmlGenericErrorContext,
5622 "Entering INCLUDE Conditional Section\n");
5623 }
5624
5625 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5626 (NXT(2) != '>'))) {
5627 const xmlChar *check = CUR_PTR;
5628 unsigned int cons = ctxt->input->consumed;
5629
5630 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5631 xmlParseConditionalSections(ctxt);
5632 } else if (IS_BLANK_CH(CUR)) {
5633 NEXT;
5634 } else if (RAW == '%') {
5635 xmlParsePEReference(ctxt);
5636 } else
5637 xmlParseMarkupDecl(ctxt);
5638
5639 /*
5640 * Pop-up of finished entities.
5641 */
5642 while ((RAW == 0) && (ctxt->inputNr > 1))
5643 xmlPopInput(ctxt);
5644
5645 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5646 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5647 break;
5648 }
5649 }
5650 if (xmlParserDebugEntities) {
5651 if ((ctxt->input != NULL) && (ctxt->input->filename))
5652 xmlGenericError(xmlGenericErrorContext,
5653 "%s(%d): ", ctxt->input->filename,
5654 ctxt->input->line);
5655 xmlGenericError(xmlGenericErrorContext,
5656 "Leaving INCLUDE Conditional Section\n");
5657 }
5658
5659 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
5660 int state;
5661 xmlParserInputState instate;
5662 int depth = 0;
5663
5664 SKIP(6);
5665 SKIP_BLANKS;
5666 if (RAW != '[') {
5667 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
5668 } else {
5669 NEXT;
5670 }
5671 if (xmlParserDebugEntities) {
5672 if ((ctxt->input != NULL) && (ctxt->input->filename))
5673 xmlGenericError(xmlGenericErrorContext,
5674 "%s(%d): ", ctxt->input->filename,
5675 ctxt->input->line);
5676 xmlGenericError(xmlGenericErrorContext,
5677 "Entering IGNORE Conditional Section\n");
5678 }
5679
5680 /*
5681 * Parse up to the end of the conditional section
5682 * But disable SAX event generating DTD building in the meantime
5683 */
5684 state = ctxt->disableSAX;
5685 instate = ctxt->instate;
5686 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
5687 ctxt->instate = XML_PARSER_IGNORE;
5688
5689 while ((depth >= 0) && (RAW != 0)) {
5690 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5691 depth++;
5692 SKIP(3);
5693 continue;
5694 }
5695 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5696 if (--depth >= 0) SKIP(3);
5697 continue;
5698 }
5699 NEXT;
5700 continue;
5701 }
5702
5703 ctxt->disableSAX = state;
5704 ctxt->instate = instate;
5705
5706 if (xmlParserDebugEntities) {
5707 if ((ctxt->input != NULL) && (ctxt->input->filename))
5708 xmlGenericError(xmlGenericErrorContext,
5709 "%s(%d): ", ctxt->input->filename,
5710 ctxt->input->line);
5711 xmlGenericError(xmlGenericErrorContext,
5712 "Leaving IGNORE Conditional Section\n");
5713 }
5714
5715 } else {
5716 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
5717 }
5718
5719 if (RAW == 0)
5720 SHRINK;
5721
5722 if (RAW == 0) {
5723 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
5724 } else {
5725 SKIP(3);
5726 }
5727}
5728
5729/**
5730 * xmlParseMarkupDecl:
5731 * @ctxt: an XML parser context
5732 *
5733 * parse Markup declarations
5734 *
5735 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5736 * NotationDecl | PI | Comment
5737 *
5738 * [ VC: Proper Declaration/PE Nesting ]
5739 * Parameter-entity replacement text must be properly nested with
5740 * markup declarations. That is to say, if either the first character
5741 * or the last character of a markup declaration (markupdecl above) is
5742 * contained in the replacement text for a parameter-entity reference,
5743 * both must be contained in the same replacement text.
5744 *
5745 * [ WFC: PEs in Internal Subset ]
5746 * In the internal DTD subset, parameter-entity references can occur
5747 * only where markup declarations can occur, not within markup declarations.
5748 * (This does not apply to references that occur in external parameter
5749 * entities or to the external subset.)
5750 */
5751void
5752xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5753 GROW;
5754 if (CUR == '<') {
5755 if (NXT(1) == '!') {
5756 switch (NXT(2)) {
5757 case 'E':
5758 if (NXT(3) == 'L')
5759 xmlParseElementDecl(ctxt);
5760 else if (NXT(3) == 'N')
5761 xmlParseEntityDecl(ctxt);
5762 break;
5763 case 'A':
5764 xmlParseAttributeListDecl(ctxt);
5765 break;
5766 case 'N':
5767 xmlParseNotationDecl(ctxt);
5768 break;
5769 case '-':
5770 xmlParseComment(ctxt);
5771 break;
5772 default:
5773 /* there is an error but it will be detected later */
5774 break;
5775 }
5776 } else if (NXT(1) == '?') {
5777 xmlParsePI(ctxt);
5778 }
5779 }
5780 /*
5781 * This is only for internal subset. On external entities,
5782 * the replacement is done before parsing stage
5783 */
5784 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5785 xmlParsePEReference(ctxt);
5786
5787 /*
5788 * Conditional sections are allowed from entities included
5789 * by PE References in the internal subset.
5790 */
5791 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5792 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5793 xmlParseConditionalSections(ctxt);
5794 }
5795 }
5796
5797 ctxt->instate = XML_PARSER_DTD;
5798}
5799
5800/**
5801 * xmlParseTextDecl:
5802 * @ctxt: an XML parser context
5803 *
5804 * parse an XML declaration header for external entities
5805 *
5806 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5807 *
5808 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5809 */
5810
5811void
5812xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5813 xmlChar *version;
5814 const xmlChar *encoding;
5815
5816 /*
5817 * We know that '<?xml' is here.
5818 */
5819 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
5820 SKIP(5);
5821 } else {
5822 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
5823 return;
5824 }
5825
5826 if (!IS_BLANK_CH(CUR)) {
5827 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5828 "Space needed after '<?xml'\n");
5829 }
5830 SKIP_BLANKS;
5831
5832 /*
5833 * We may have the VersionInfo here.
5834 */
5835 version = xmlParseVersionInfo(ctxt);
5836 if (version == NULL)
5837 version = xmlCharStrdup(XML_DEFAULT_VERSION);
5838 else {
5839 if (!IS_BLANK_CH(CUR)) {
5840 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5841 "Space needed here\n");
5842 }
5843 }
5844 ctxt->input->version = version;
5845
5846 /*
5847 * We must have the encoding declaration
5848 */
5849 encoding = xmlParseEncodingDecl(ctxt);
5850 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5851 /*
5852 * The XML REC instructs us to stop parsing right here
5853 */
5854 return;
5855 }
5856 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5857 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5858 "Missing encoding in text declaration\n");
5859 }
5860
5861 SKIP_BLANKS;
5862 if ((RAW == '?') && (NXT(1) == '>')) {
5863 SKIP(2);
5864 } else if (RAW == '>') {
5865 /* Deprecated old WD ... */
5866 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5867 NEXT;
5868 } else {
5869 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
5870 MOVETO_ENDTAG(CUR_PTR);
5871 NEXT;
5872 }
5873}
5874
5875/**
5876 * xmlParseExternalSubset:
5877 * @ctxt: an XML parser context
5878 * @ExternalID: the external identifier
5879 * @SystemID: the system identifier (or URL)
5880 *
5881 * parse Markup declarations from an external subset
5882 *
5883 * [30] extSubset ::= textDecl? extSubsetDecl
5884 *
5885 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5886 */
5887void
5888xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5889 const xmlChar *SystemID) {
5890 xmlDetectSAX2(ctxt);
5891 GROW;
5892 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
5893 xmlParseTextDecl(ctxt);
5894 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5895 /*
5896 * The XML REC instructs us to stop parsing right here
5897 */
5898 ctxt->instate = XML_PARSER_EOF;
5899 return;
5900 }
5901 }
5902 if (ctxt->myDoc == NULL) {
5903 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5904 }
5905 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5906 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5907
5908 ctxt->instate = XML_PARSER_DTD;
5909 ctxt->external = 1;
5910 while (((RAW == '<') && (NXT(1) == '?')) ||
5911 ((RAW == '<') && (NXT(1) == '!')) ||
5912 (RAW == '%') || IS_BLANK_CH(CUR)) {
5913 const xmlChar *check = CUR_PTR;
5914 unsigned int cons = ctxt->input->consumed;
5915
5916 GROW;
5917 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5918 xmlParseConditionalSections(ctxt);
5919 } else if (IS_BLANK_CH(CUR)) {
5920 NEXT;
5921 } else if (RAW == '%') {
5922 xmlParsePEReference(ctxt);
5923 } else
5924 xmlParseMarkupDecl(ctxt);
5925
5926 /*
5927 * Pop-up of finished entities.
5928 */
5929 while ((RAW == 0) && (ctxt->inputNr > 1))
5930 xmlPopInput(ctxt);
5931
5932 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5933 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5934 break;
5935 }
5936 }
5937
5938 if (RAW != 0) {
5939 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
5940 }
5941
5942}
5943
5944/**
5945 * xmlParseReference:
5946 * @ctxt: an XML parser context
5947 *
5948 * parse and handle entity references in content, depending on the SAX
5949 * interface, this may end-up in a call to character() if this is a
5950 * CharRef, a predefined entity, if there is no reference() callback.
5951 * or if the parser was asked to switch to that mode.
5952 *
5953 * [67] Reference ::= EntityRef | CharRef
5954 */
5955void
5956xmlParseReference(xmlParserCtxtPtr ctxt) {
5957 xmlEntityPtr ent;
5958 xmlChar *val;
5959 if (RAW != '&') return;
5960
5961 if (NXT(1) == '#') {
5962 int i = 0;
5963 xmlChar out[10];
5964 int hex = NXT(2);
5965 int value = xmlParseCharRef(ctxt);
5966
5967 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5968 /*
5969 * So we are using non-UTF-8 buffers
5970 * Check that the char fit on 8bits, if not
5971 * generate a CharRef.
5972 */
5973 if (value <= 0xFF) {
5974 out[0] = value;
5975 out[1] = 0;
5976 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5977 (!ctxt->disableSAX))
5978 ctxt->sax->characters(ctxt->userData, out, 1);
5979 } else {
5980 if ((hex == 'x') || (hex == 'X'))
5981 snprintf((char *)out, sizeof(out), "#x%X", value);
5982 else
5983 snprintf((char *)out, sizeof(out), "#%d", value);
5984 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5985 (!ctxt->disableSAX))
5986 ctxt->sax->reference(ctxt->userData, out);
5987 }
5988 } else {
5989 /*
5990 * Just encode the value in UTF-8
5991 */
5992 COPY_BUF(0 ,out, i, value);
5993 out[i] = 0;
5994 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5995 (!ctxt->disableSAX))
5996 ctxt->sax->characters(ctxt->userData, out, i);
5997 }
5998 } else {
5999 int was_checked;
6000
6001 ent = xmlParseEntityRef(ctxt);
6002 if (ent == NULL) return;
6003 if (!ctxt->wellFormed)
6004 return;
6005 ctxt->nbentities++;
6006 if (ctxt->nbentities >= 500000) {
6007 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6008 return;
6009 }
6010 was_checked = ent->checked;
6011 if ((ent->name != NULL) &&
6012 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6013 xmlNodePtr list = NULL;
6014 xmlParserErrors ret = XML_ERR_OK;
6015
6016
6017 /*
6018 * The first reference to the entity trigger a parsing phase
6019 * where the ent->children is filled with the result from
6020 * the parsing.
6021 */
6022 if (ent->checked == 0) {
6023 xmlChar *value;
6024
6025 value = ent->content;
6026
6027 /*
6028 * Check that this entity is well formed
6029 */
6030 if ((value != NULL) && (value[0] != 0) &&
6031 (value[1] == 0) && (value[0] == '<') &&
6032 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6033 /*
6034 * DONE: get definite answer on this !!!
6035 * Lots of entity decls are used to declare a single
6036 * char
6037 * <!ENTITY lt "<">
6038 * Which seems to be valid since
6039 * 2.4: The ampersand character (&) and the left angle
6040 * bracket (<) may appear in their literal form only
6041 * when used ... They are also legal within the literal
6042 * entity value of an internal entity declaration;i
6043 * see "4.3.2 Well-Formed Parsed Entities".
6044 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6045 * Looking at the OASIS test suite and James Clark
6046 * tests, this is broken. However the XML REC uses
6047 * it. Is the XML REC not well-formed ????
6048 * This is a hack to avoid this problem
6049 *
6050 * ANSWER: since lt gt amp .. are already defined,
6051 * this is a redefinition and hence the fact that the
6052 * content is not well balanced is not a Wf error, this
6053 * is lousy but acceptable.
6054 */
6055 list = xmlNewDocText(ctxt->myDoc, value);
6056 if (list != NULL) {
6057 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6058 (ent->children == NULL)) {
6059 ent->children = list;
6060 ent->last = list;
6061 ent->owner = 1;
6062 list->parent = (xmlNodePtr) ent;
6063 } else {
6064 xmlFreeNodeList(list);
6065 }
6066 } else if (list != NULL) {
6067 xmlFreeNodeList(list);
6068 }
6069 } else {
6070 unsigned long oldnbent = ctxt->nbentities;
6071 /*
6072 * 4.3.2: An internal general parsed entity is well-formed
6073 * if its replacement text matches the production labeled
6074 * content.
6075 */
6076
6077 void *user_data;
6078 /*
6079 * This is a bit hackish but this seems the best
6080 * way to make sure both SAX and DOM entity support
6081 * behaves okay.
6082 */
6083 if (ctxt->userData == ctxt)
6084 user_data = NULL;
6085 else
6086 user_data = ctxt->userData;
6087
6088 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6089 ctxt->depth++;
6090 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6091 value, user_data, &list);
6092 ctxt->depth--;
6093 } else if (ent->etype ==
6094 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6095 ctxt->depth++;
6096 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6097 ctxt->sax, user_data, ctxt->depth,
6098 ent->URI, ent->ExternalID, &list);
6099 ctxt->depth--;
6100 } else {
6101 ret = XML_ERR_ENTITY_PE_INTERNAL;
6102 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6103 "invalid entity type found\n", NULL);
6104 }
6105 ent->checked = ctxt->nbentities - oldnbent;
6106 if (ret == XML_ERR_ENTITY_LOOP) {
6107 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6108 return;
6109 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
6110 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6111 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6112 (ent->children == NULL)) {
6113 ent->children = list;
6114 if (ctxt->replaceEntities) {
6115 /*
6116 * Prune it directly in the generated document
6117 * except for single text nodes.
6118 */
6119 if (((list->type == XML_TEXT_NODE) &&
6120 (list->next == NULL)) ||
6121 (ctxt->parseMode == XML_PARSE_READER)) {
6122 list->parent = (xmlNodePtr) ent;
6123 list = NULL;
6124 ent->owner = 1;
6125 } else {
6126 ent->owner = 0;
6127 while (list != NULL) {
6128 list->parent = (xmlNodePtr) ctxt->node;
6129 list->doc = ctxt->myDoc;
6130 if (list->next == NULL)
6131 ent->last = list;
6132 list = list->next;
6133 }
6134 list = ent->children;
6135#ifdef LIBXML_LEGACY_ENABLED
6136 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6137 xmlAddEntityReference(ent, list, NULL);
6138#endif /* LIBXML_LEGACY_ENABLED */
6139 }
6140 } else {
6141 ent->owner = 1;
6142 while (list != NULL) {
6143 list->parent = (xmlNodePtr) ent;
6144 if (list->next == NULL)
6145 ent->last = list;
6146 list = list->next;
6147 }
6148 }
6149 } else {
6150 xmlFreeNodeList(list);
6151 list = NULL;
6152 }
6153 } else if ((ret != XML_ERR_OK) &&
6154 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6155 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6156 "Entity '%s' failed to parse\n", ent->name);
6157 } else if (list != NULL) {
6158 xmlFreeNodeList(list);
6159 list = NULL;
6160 }
6161 }
6162 if (ent->checked == 0)
6163 ent->checked = 1;
6164 }
6165 ctxt->nbentities += ent->checked;
6166
6167 if (ent->children == NULL) {
6168 /*
6169 * Probably running in SAX mode and the callbacks don't
6170 * build the entity content. So unless we already went
6171 * though parsing for first checking go though the entity
6172 * content to generate callbacks associated to the entity
6173 */
6174 if (was_checked != 0) {
6175 void *user_data;
6176 /*
6177 * This is a bit hackish but this seems the best
6178 * way to make sure both SAX and DOM entity support
6179 * behaves okay.
6180 */
6181 if (ctxt->userData == ctxt)
6182 user_data = NULL;
6183 else
6184 user_data = ctxt->userData;
6185
6186 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6187 ctxt->depth++;
6188 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6189 ent->content, user_data, NULL);
6190 ctxt->depth--;
6191 } else if (ent->etype ==
6192 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6193 ctxt->depth++;
6194 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6195 ctxt->sax, user_data, ctxt->depth,
6196 ent->URI, ent->ExternalID, NULL);
6197 ctxt->depth--;
6198 } else {
6199 ret = XML_ERR_ENTITY_PE_INTERNAL;
6200 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6201 "invalid entity type found\n", NULL);
6202 }
6203 if (ret == XML_ERR_ENTITY_LOOP) {
6204 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6205 return;
6206 }
6207 }
6208 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6209 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6210 /*
6211 * Entity reference callback comes second, it's somewhat
6212 * superfluous but a compatibility to historical behaviour
6213 */
6214 ctxt->sax->reference(ctxt->userData, ent->name);
6215 }
6216 return;
6217 }
6218 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6219 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6220 /*
6221 * Create a node.
6222 */
6223 ctxt->sax->reference(ctxt->userData, ent->name);
6224 return;
6225 }
6226 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6227 /*
6228 * There is a problem on the handling of _private for entities
6229 * (bug 155816): Should we copy the content of the field from
6230 * the entity (possibly overwriting some value set by the user
6231 * when a copy is created), should we leave it alone, or should
6232 * we try to take care of different situations? The problem
6233 * is exacerbated by the usage of this field by the xmlReader.
6234 * To fix this bug, we look at _private on the created node
6235 * and, if it's NULL, we copy in whatever was in the entity.
6236 * If it's not NULL we leave it alone. This is somewhat of a
6237 * hack - maybe we should have further tests to determine
6238 * what to do.
6239 */
6240 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6241 /*
6242 * Seems we are generating the DOM content, do
6243 * a simple tree copy for all references except the first
6244 * In the first occurrence list contains the replacement.
6245 * progressive == 2 means we are operating on the Reader
6246 * and since nodes are discarded we must copy all the time.
6247 */
6248 if (((list == NULL) && (ent->owner == 0)) ||
6249 (ctxt->parseMode == XML_PARSE_READER)) {
6250 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6251
6252 /*
6253 * when operating on a reader, the entities definitions
6254 * are always owning the entities subtree.
6255 if (ctxt->parseMode == XML_PARSE_READER)
6256 ent->owner = 1;
6257 */
6258
6259 cur = ent->children;
6260 while (cur != NULL) {
6261 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6262 if (nw != NULL) {
6263 if (nw->_private == NULL)
6264 nw->_private = cur->_private;
6265 if (firstChild == NULL){
6266 firstChild = nw;
6267 }
6268 nw = xmlAddChild(ctxt->node, nw);
6269 }
6270 if (cur == ent->last) {
6271 /*
6272 * needed to detect some strange empty
6273 * node cases in the reader tests
6274 */
6275 if ((ctxt->parseMode == XML_PARSE_READER) &&
6276 (nw != NULL) &&
6277 (nw->type == XML_ELEMENT_NODE) &&
6278 (nw->children == NULL))
6279 nw->extra = 1;
6280
6281 break;
6282 }
6283 cur = cur->next;
6284 }
6285#ifdef LIBXML_LEGACY_ENABLED
6286 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6287 xmlAddEntityReference(ent, firstChild, nw);
6288#endif /* LIBXML_LEGACY_ENABLED */
6289 } else if (list == NULL) {
6290 xmlNodePtr nw = NULL, cur, next, last,
6291 firstChild = NULL;
6292 /*
6293 * Copy the entity child list and make it the new
6294 * entity child list. The goal is to make sure any
6295 * ID or REF referenced will be the one from the
6296 * document content and not the entity copy.
6297 */
6298 cur = ent->children;
6299 ent->children = NULL;
6300 last = ent->last;
6301 ent->last = NULL;
6302 while (cur != NULL) {
6303 next = cur->next;
6304 cur->next = NULL;
6305 cur->parent = NULL;
6306 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6307 if (nw != NULL) {
6308 if (nw->_private == NULL)
6309 nw->_private = cur->_private;
6310 if (firstChild == NULL){
6311 firstChild = cur;
6312 }
6313 xmlAddChild((xmlNodePtr) ent, nw);
6314 xmlAddChild(ctxt->node, cur);
6315 }
6316 if (cur == last)
6317 break;
6318 cur = next;
6319 }
6320 ent->owner = 1;
6321#ifdef LIBXML_LEGACY_ENABLED
6322 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6323 xmlAddEntityReference(ent, firstChild, nw);
6324#endif /* LIBXML_LEGACY_ENABLED */
6325 } else {
6326 const xmlChar *nbktext;
6327
6328 /*
6329 * the name change is to avoid coalescing of the
6330 * node with a possible previous text one which
6331 * would make ent->children a dangling pointer
6332 */
6333 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6334 -1);
6335 if (ent->children->type == XML_TEXT_NODE)
6336 ent->children->name = nbktext;
6337 if ((ent->last != ent->children) &&
6338 (ent->last->type == XML_TEXT_NODE))
6339 ent->last->name = nbktext;
6340 xmlAddChildList(ctxt->node, ent->children);
6341 }
6342
6343 /*
6344 * This is to avoid a nasty side effect, see
6345 * characters() in SAX.c
6346 */
6347 ctxt->nodemem = 0;
6348 ctxt->nodelen = 0;
6349 return;
6350 }
6351 }
6352 } else {
6353 val = ent->content;
6354 if (val == NULL) return;
6355 /*
6356 * inline the entity.
6357 */
6358 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6359 (!ctxt->disableSAX))
6360 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6361 }
6362 }
6363}
6364
6365/**
6366 * xmlParseEntityRef:
6367 * @ctxt: an XML parser context
6368 *
6369 * parse ENTITY references declarations
6370 *
6371 * [68] EntityRef ::= '&' Name ';'
6372 *
6373 * [ WFC: Entity Declared ]
6374 * In a document without any DTD, a document with only an internal DTD
6375 * subset which contains no parameter entity references, or a document
6376 * with "standalone='yes'", the Name given in the entity reference
6377 * must match that in an entity declaration, except that well-formed
6378 * documents need not declare any of the following entities: amp, lt,
6379 * gt, apos, quot. The declaration of a parameter entity must precede
6380 * any reference to it. Similarly, the declaration of a general entity
6381 * must precede any reference to it which appears in a default value in an
6382 * attribute-list declaration. Note that if entities are declared in the
6383 * external subset or in external parameter entities, a non-validating
6384 * processor is not obligated to read and process their declarations;
6385 * for such documents, the rule that an entity must be declared is a
6386 * well-formedness constraint only if standalone='yes'.
6387 *
6388 * [ WFC: Parsed Entity ]
6389 * An entity reference must not contain the name of an unparsed entity
6390 *
6391 * Returns the xmlEntityPtr if found, or NULL otherwise.
6392 */
6393xmlEntityPtr
6394xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
6395 const xmlChar *name;
6396 xmlEntityPtr ent = NULL;
6397
6398 GROW;
6399
6400 if (RAW == '&') {
6401 NEXT;
6402 name = xmlParseName(ctxt);
6403 if (name == NULL) {
6404 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6405 "xmlParseEntityRef: no name\n");
6406 } else {
6407 if (RAW == ';') {
6408 NEXT;
6409 /*
6410 * Ask first SAX for entity resolution, otherwise try the
6411 * predefined set.
6412 */
6413 if (ctxt->sax != NULL) {
6414 if (ctxt->sax->getEntity != NULL)
6415 ent = ctxt->sax->getEntity(ctxt->userData, name);
6416 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
6417 ent = xmlGetPredefinedEntity(name);
6418 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6419 (ctxt->userData==ctxt)) {
6420 ent = xmlSAX2GetEntity(ctxt, name);
6421 }
6422 }
6423 /*
6424 * [ WFC: Entity Declared ]
6425 * In a document without any DTD, a document with only an
6426 * internal DTD subset which contains no parameter entity
6427 * references, or a document with "standalone='yes'", the
6428 * Name given in the entity reference must match that in an
6429 * entity declaration, except that well-formed documents
6430 * need not declare any of the following entities: amp, lt,
6431 * gt, apos, quot.
6432 * The declaration of a parameter entity must precede any
6433 * reference to it.
6434 * Similarly, the declaration of a general entity must
6435 * precede any reference to it which appears in a default
6436 * value in an attribute-list declaration. Note that if
6437 * entities are declared in the external subset or in
6438 * external parameter entities, a non-validating processor
6439 * is not obligated to read and process their declarations;
6440 * for such documents, the rule that an entity must be
6441 * declared is a well-formedness constraint only if
6442 * standalone='yes'.
6443 */
6444 if (ent == NULL) {
6445 if ((ctxt->standalone == 1) ||
6446 ((ctxt->hasExternalSubset == 0) &&
6447 (ctxt->hasPErefs == 0))) {
6448 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6449 "Entity '%s' not defined\n", name);
6450 } else {
6451 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6452 "Entity '%s' not defined\n", name);
6453 if ((ctxt->inSubset == 0) &&
6454 (ctxt->sax != NULL) &&
6455 (ctxt->sax->reference != NULL)) {
6456 ctxt->sax->reference(ctxt->userData, name);
6457 }
6458 }
6459 ctxt->valid = 0;
6460 }
6461
6462 /*
6463 * [ WFC: Parsed Entity ]
6464 * An entity reference must not contain the name of an
6465 * unparsed entity
6466 */
6467 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6468 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6469 "Entity reference to unparsed entity %s\n", name);
6470 }
6471
6472 /*
6473 * [ WFC: No External Entity References ]
6474 * Attribute values cannot contain direct or indirect
6475 * entity references to external entities.
6476 */
6477 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6478 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6479 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6480 "Attribute references external entity '%s'\n", name);
6481 }
6482 /*
6483 * [ WFC: No < in Attribute Values ]
6484 * The replacement text of any entity referred to directly or
6485 * indirectly in an attribute value (other than "&lt;") must
6486 * not contain a <.
6487 */
6488 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6489 (ent != NULL) &&
6490 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6491 (ent->content != NULL) &&
6492 (xmlStrchr(ent->content, '<'))) {
6493 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6494 "'<' in entity '%s' is not allowed in attributes values\n", name);
6495 }
6496
6497 /*
6498 * Internal check, no parameter entities here ...
6499 */
6500 else {
6501 switch (ent->etype) {
6502 case XML_INTERNAL_PARAMETER_ENTITY:
6503 case XML_EXTERNAL_PARAMETER_ENTITY:
6504 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6505 "Attempt to reference the parameter entity '%s'\n",
6506 name);
6507 break;
6508 default:
6509 break;
6510 }
6511 }
6512
6513 /*
6514 * [ WFC: No Recursion ]
6515 * A parsed entity must not contain a recursive reference
6516 * to itself, either directly or indirectly.
6517 * Done somewhere else
6518 */
6519
6520 } else {
6521 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6522 }
6523 }
6524 }
6525 return(ent);
6526}
6527
6528/**
6529 * xmlParseStringEntityRef:
6530 * @ctxt: an XML parser context
6531 * @str: a pointer to an index in the string
6532 *
6533 * parse ENTITY references declarations, but this version parses it from
6534 * a string value.
6535 *
6536 * [68] EntityRef ::= '&' Name ';'
6537 *
6538 * [ WFC: Entity Declared ]
6539 * In a document without any DTD, a document with only an internal DTD
6540 * subset which contains no parameter entity references, or a document
6541 * with "standalone='yes'", the Name given in the entity reference
6542 * must match that in an entity declaration, except that well-formed
6543 * documents need not declare any of the following entities: amp, lt,
6544 * gt, apos, quot. The declaration of a parameter entity must precede
6545 * any reference to it. Similarly, the declaration of a general entity
6546 * must precede any reference to it which appears in a default value in an
6547 * attribute-list declaration. Note that if entities are declared in the
6548 * external subset or in external parameter entities, a non-validating
6549 * processor is not obligated to read and process their declarations;
6550 * for such documents, the rule that an entity must be declared is a
6551 * well-formedness constraint only if standalone='yes'.
6552 *
6553 * [ WFC: Parsed Entity ]
6554 * An entity reference must not contain the name of an unparsed entity
6555 *
6556 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6557 * is updated to the current location in the string.
6558 */
6559xmlEntityPtr
6560xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6561 xmlChar *name;
6562 const xmlChar *ptr;
6563 xmlChar cur;
6564 xmlEntityPtr ent = NULL;
6565
6566 if ((str == NULL) || (*str == NULL))
6567 return(NULL);
6568 ptr = *str;
6569 cur = *ptr;
6570 if (cur == '&') {
6571 ptr++;
6572 cur = *ptr;
6573 name = xmlParseStringName(ctxt, &ptr);
6574 if (name == NULL) {
6575 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6576 "xmlParseStringEntityRef: no name\n");
6577 } else {
6578 if (*ptr == ';') {
6579 ptr++;
6580 /*
6581 * Ask first SAX for entity resolution, otherwise try the
6582 * predefined set.
6583 */
6584 if (ctxt->sax != NULL) {
6585 if (ctxt->sax->getEntity != NULL)
6586 ent = ctxt->sax->getEntity(ctxt->userData, name);
6587 if (ent == NULL)
6588 ent = xmlGetPredefinedEntity(name);
6589 if ((ent == NULL) && (ctxt->userData==ctxt)) {
6590 ent = xmlSAX2GetEntity(ctxt, name);
6591 }
6592 }
6593 /*
6594 * [ WFC: Entity Declared ]
6595 * In a document without any DTD, a document with only an
6596 * internal DTD subset which contains no parameter entity
6597 * references, or a document with "standalone='yes'", the
6598 * Name given in the entity reference must match that in an
6599 * entity declaration, except that well-formed documents
6600 * need not declare any of the following entities: amp, lt,
6601 * gt, apos, quot.
6602 * The declaration of a parameter entity must precede any
6603 * reference to it.
6604 * Similarly, the declaration of a general entity must
6605 * precede any reference to it which appears in a default
6606 * value in an attribute-list declaration. Note that if
6607 * entities are declared in the external subset or in
6608 * external parameter entities, a non-validating processor
6609 * is not obligated to read and process their declarations;
6610 * for such documents, the rule that an entity must be
6611 * declared is a well-formedness constraint only if
6612 * standalone='yes'.
6613 */
6614 if (ent == NULL) {
6615 if ((ctxt->standalone == 1) ||
6616 ((ctxt->hasExternalSubset == 0) &&
6617 (ctxt->hasPErefs == 0))) {
6618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6619 "Entity '%s' not defined\n", name);
6620 } else {
6621 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
6622 "Entity '%s' not defined\n",
6623 name);
6624 }
6625 /* TODO ? check regressions ctxt->valid = 0; */
6626 }
6627
6628 /*
6629 * [ WFC: Parsed Entity ]
6630 * An entity reference must not contain the name of an
6631 * unparsed entity
6632 */
6633 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
6634 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
6635 "Entity reference to unparsed entity %s\n", name);
6636 }
6637
6638 /*
6639 * [ WFC: No External Entity References ]
6640 * Attribute values cannot contain direct or indirect
6641 * entity references to external entities.
6642 */
6643 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6644 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
6645 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6646 "Attribute references external entity '%s'\n", name);
6647 }
6648 /*
6649 * [ WFC: No < in Attribute Values ]
6650 * The replacement text of any entity referred to directly or
6651 * indirectly in an attribute value (other than "&lt;") must
6652 * not contain a <.
6653 */
6654 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6655 (ent != NULL) &&
6656 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6657 (ent->content != NULL) &&
6658 (xmlStrchr(ent->content, '<'))) {
6659 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6660 "'<' in entity '%s' is not allowed in attributes values\n",
6661 name);
6662 }
6663
6664 /*
6665 * Internal check, no parameter entities here ...
6666 */
6667 else {
6668 switch (ent->etype) {
6669 case XML_INTERNAL_PARAMETER_ENTITY:
6670 case XML_EXTERNAL_PARAMETER_ENTITY:
6671 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6672 "Attempt to reference the parameter entity '%s'\n",
6673 name);
6674 break;
6675 default:
6676 break;
6677 }
6678 }
6679
6680 /*
6681 * [ WFC: No Recursion ]
6682 * A parsed entity must not contain a recursive reference
6683 * to itself, either directly or indirectly.
6684 * Done somewhere else
6685 */
6686
6687 } else {
6688 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6689 }
6690 xmlFree(name);
6691 }
6692 }
6693 *str = ptr;
6694 return(ent);
6695}
6696
6697/**
6698 * xmlParsePEReference:
6699 * @ctxt: an XML parser context
6700 *
6701 * parse PEReference declarations
6702 * The entity content is handled directly by pushing it's content as
6703 * a new input stream.
6704 *
6705 * [69] PEReference ::= '%' Name ';'
6706 *
6707 * [ WFC: No Recursion ]
6708 * A parsed entity must not contain a recursive
6709 * reference to itself, either directly or indirectly.
6710 *
6711 * [ WFC: Entity Declared ]
6712 * In a document without any DTD, a document with only an internal DTD
6713 * subset which contains no parameter entity references, or a document
6714 * with "standalone='yes'", ... ... The declaration of a parameter
6715 * entity must precede any reference to it...
6716 *
6717 * [ VC: Entity Declared ]
6718 * In a document with an external subset or external parameter entities
6719 * with "standalone='no'", ... ... The declaration of a parameter entity
6720 * must precede any reference to it...
6721 *
6722 * [ WFC: In DTD ]
6723 * Parameter-entity references may only appear in the DTD.
6724 * NOTE: misleading but this is handled.
6725 */
6726void
6727xmlParsePEReference(xmlParserCtxtPtr ctxt)
6728{
6729 const xmlChar *name;
6730 xmlEntityPtr entity = NULL;
6731 xmlParserInputPtr input;
6732
6733 if (RAW == '%') {
6734 NEXT;
6735 name = xmlParseName(ctxt);
6736 if (name == NULL) {
6737 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6738 "xmlParsePEReference: no name\n");
6739 } else {
6740 if (RAW == ';') {
6741 NEXT;
6742 if ((ctxt->sax != NULL) &&
6743 (ctxt->sax->getParameterEntity != NULL))
6744 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6745 name);
6746 if (entity == NULL) {
6747 /*
6748 * [ WFC: Entity Declared ]
6749 * In a document without any DTD, a document with only an
6750 * internal DTD subset which contains no parameter entity
6751 * references, or a document with "standalone='yes'", ...
6752 * ... The declaration of a parameter entity must precede
6753 * any reference to it...
6754 */
6755 if ((ctxt->standalone == 1) ||
6756 ((ctxt->hasExternalSubset == 0) &&
6757 (ctxt->hasPErefs == 0))) {
6758 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6759 "PEReference: %%%s; not found\n",
6760 name);
6761 } else {
6762 /*
6763 * [ VC: Entity Declared ]
6764 * In a document with an external subset or external
6765 * parameter entities with "standalone='no'", ...
6766 * ... The declaration of a parameter entity must
6767 * precede any reference to it...
6768 */
6769 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6770 "PEReference: %%%s; not found\n",
6771 name, NULL);
6772 ctxt->valid = 0;
6773 }
6774 } else {
6775 /*
6776 * Internal checking in case the entity quest barfed
6777 */
6778 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6779 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6780 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6781 "Internal: %%%s; is not a parameter entity\n",
6782 name, NULL);
6783 } else if (ctxt->input->free != deallocblankswrapper) {
6784 input =
6785 xmlNewBlanksWrapperInputStream(ctxt, entity);
6786 xmlPushInput(ctxt, input);
6787 } else {
6788 /*
6789 * TODO !!!
6790 * handle the extra spaces added before and after
6791 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6792 */
6793 input = xmlNewEntityInputStream(ctxt, entity);
6794 xmlPushInput(ctxt, input);
6795 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6796 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6797 (IS_BLANK_CH(NXT(5)))) {
6798 xmlParseTextDecl(ctxt);
6799 if (ctxt->errNo ==
6800 XML_ERR_UNSUPPORTED_ENCODING) {
6801 /*
6802 * The XML REC instructs us to stop parsing
6803 * right here
6804 */
6805 ctxt->instate = XML_PARSER_EOF;
6806 return;
6807 }
6808 }
6809 }
6810 }
6811 ctxt->hasPErefs = 1;
6812 } else {
6813 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6814 }
6815 }
6816 }
6817}
6818
6819/**
6820 * xmlParseStringPEReference:
6821 * @ctxt: an XML parser context
6822 * @str: a pointer to an index in the string
6823 *
6824 * parse PEReference declarations
6825 *
6826 * [69] PEReference ::= '%' Name ';'
6827 *
6828 * [ WFC: No Recursion ]
6829 * A parsed entity must not contain a recursive
6830 * reference to itself, either directly or indirectly.
6831 *
6832 * [ WFC: Entity Declared ]
6833 * In a document without any DTD, a document with only an internal DTD
6834 * subset which contains no parameter entity references, or a document
6835 * with "standalone='yes'", ... ... The declaration of a parameter
6836 * entity must precede any reference to it...
6837 *
6838 * [ VC: Entity Declared ]
6839 * In a document with an external subset or external parameter entities
6840 * with "standalone='no'", ... ... The declaration of a parameter entity
6841 * must precede any reference to it...
6842 *
6843 * [ WFC: In DTD ]
6844 * Parameter-entity references may only appear in the DTD.
6845 * NOTE: misleading but this is handled.
6846 *
6847 * Returns the string of the entity content.
6848 * str is updated to the current value of the index
6849 */
6850xmlEntityPtr
6851xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6852 const xmlChar *ptr;
6853 xmlChar cur;
6854 xmlChar *name;
6855 xmlEntityPtr entity = NULL;
6856
6857 if ((str == NULL) || (*str == NULL)) return(NULL);
6858 ptr = *str;
6859 cur = *ptr;
6860 if (cur == '%') {
6861 ptr++;
6862 cur = *ptr;
6863 name = xmlParseStringName(ctxt, &ptr);
6864 if (name == NULL) {
6865 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6866 "xmlParseStringPEReference: no name\n");
6867 } else {
6868 cur = *ptr;
6869 if (cur == ';') {
6870 ptr++;
6871 cur = *ptr;
6872 if ((ctxt->sax != NULL) &&
6873 (ctxt->sax->getParameterEntity != NULL))
6874 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6875 name);
6876 if (entity == NULL) {
6877 /*
6878 * [ WFC: Entity Declared ]
6879 * In a document without any DTD, a document with only an
6880 * internal DTD subset which contains no parameter entity
6881 * references, or a document with "standalone='yes'", ...
6882 * ... The declaration of a parameter entity must precede
6883 * any reference to it...
6884 */
6885 if ((ctxt->standalone == 1) ||
6886 ((ctxt->hasExternalSubset == 0) &&
6887 (ctxt->hasPErefs == 0))) {
6888 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6889 "PEReference: %%%s; not found\n", name);
6890 } else {
6891 /*
6892 * [ VC: Entity Declared ]
6893 * In a document with an external subset or external
6894 * parameter entities with "standalone='no'", ...
6895 * ... The declaration of a parameter entity must
6896 * precede any reference to it...
6897 */
6898 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6899 "PEReference: %%%s; not found\n",
6900 name, NULL);
6901 ctxt->valid = 0;
6902 }
6903 } else {
6904 /*
6905 * Internal checking in case the entity quest barfed
6906 */
6907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6910 "%%%s; is not a parameter entity\n",
6911 name, NULL);
6912 }
6913 }
6914 ctxt->hasPErefs = 1;
6915 } else {
6916 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6917 }
6918 xmlFree(name);
6919 }
6920 }
6921 *str = ptr;
6922 return(entity);
6923}
6924
6925/**
6926 * xmlParseDocTypeDecl:
6927 * @ctxt: an XML parser context
6928 *
6929 * parse a DOCTYPE declaration
6930 *
6931 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6932 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6933 *
6934 * [ VC: Root Element Type ]
6935 * The Name in the document type declaration must match the element
6936 * type of the root element.
6937 */
6938
6939void
6940xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6941 const xmlChar *name = NULL;
6942 xmlChar *ExternalID = NULL;
6943 xmlChar *URI = NULL;
6944
6945 /*
6946 * We know that '<!DOCTYPE' has been detected.
6947 */
6948 SKIP(9);
6949
6950 SKIP_BLANKS;
6951
6952 /*
6953 * Parse the DOCTYPE name.
6954 */
6955 name = xmlParseName(ctxt);
6956 if (name == NULL) {
6957 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6958 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6959 }
6960 ctxt->intSubName = name;
6961
6962 SKIP_BLANKS;
6963
6964 /*
6965 * Check for SystemID and ExternalID
6966 */
6967 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6968
6969 if ((URI != NULL) || (ExternalID != NULL)) {
6970 ctxt->hasExternalSubset = 1;
6971 }
6972 ctxt->extSubURI = URI;
6973 ctxt->extSubSystem = ExternalID;
6974
6975 SKIP_BLANKS;
6976
6977 /*
6978 * Create and update the internal subset.
6979 */
6980 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6981 (!ctxt->disableSAX))
6982 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6983
6984 /*
6985 * Is there any internal subset declarations ?
6986 * they are handled separately in xmlParseInternalSubset()
6987 */
6988 if (RAW == '[')
6989 return;
6990
6991 /*
6992 * We should be at the end of the DOCTYPE declaration.
6993 */
6994 if (RAW != '>') {
6995 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
6996 }
6997 NEXT;
6998}
6999
7000/**
7001 * xmlParseInternalSubset:
7002 * @ctxt: an XML parser context
7003 *
7004 * parse the internal subset declaration
7005 *
7006 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7007 */
7008
7009static void
7010xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7011 /*
7012 * Is there any DTD definition ?
7013 */
7014 if (RAW == '[') {
7015 ctxt->instate = XML_PARSER_DTD;
7016 NEXT;
7017 /*
7018 * Parse the succession of Markup declarations and
7019 * PEReferences.
7020 * Subsequence (markupdecl | PEReference | S)*
7021 */
7022 while (RAW != ']') {
7023 const xmlChar *check = CUR_PTR;
7024 unsigned int cons = ctxt->input->consumed;
7025
7026 SKIP_BLANKS;
7027 xmlParseMarkupDecl(ctxt);
7028 xmlParsePEReference(ctxt);
7029
7030 /*
7031 * Pop-up of finished entities.
7032 */
7033 while ((RAW == 0) && (ctxt->inputNr > 1))
7034 xmlPopInput(ctxt);
7035
7036 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7037 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7038 "xmlParseInternalSubset: error detected in Markup declaration\n");
7039 break;
7040 }
7041 }
7042 if (RAW == ']') {
7043 NEXT;
7044 SKIP_BLANKS;
7045 }
7046 }
7047
7048 /*
7049 * We should be at the end of the DOCTYPE declaration.
7050 */
7051 if (RAW != '>') {
7052 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7053 }
7054 NEXT;
7055}
7056
7057#ifdef LIBXML_SAX1_ENABLED
7058/**
7059 * xmlParseAttribute:
7060 * @ctxt: an XML parser context
7061 * @value: a xmlChar ** used to store the value of the attribute
7062 *
7063 * parse an attribute
7064 *
7065 * [41] Attribute ::= Name Eq AttValue
7066 *
7067 * [ WFC: No External Entity References ]
7068 * Attribute values cannot contain direct or indirect entity references
7069 * to external entities.
7070 *
7071 * [ WFC: No < in Attribute Values ]
7072 * The replacement text of any entity referred to directly or indirectly in
7073 * an attribute value (other than "&lt;") must not contain a <.
7074 *
7075 * [ VC: Attribute Value Type ]
7076 * The attribute must have been declared; the value must be of the type
7077 * declared for it.
7078 *
7079 * [25] Eq ::= S? '=' S?
7080 *
7081 * With namespace:
7082 *
7083 * [NS 11] Attribute ::= QName Eq AttValue
7084 *
7085 * Also the case QName == xmlns:??? is handled independently as a namespace
7086 * definition.
7087 *
7088 * Returns the attribute name, and the value in *value.
7089 */
7090
7091const xmlChar *
7092xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
7093 const xmlChar *name;
7094 xmlChar *val;
7095
7096 *value = NULL;
7097 GROW;
7098 name = xmlParseName(ctxt);
7099 if (name == NULL) {
7100 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7101 "error parsing attribute name\n");
7102 return(NULL);
7103 }
7104
7105 /*
7106 * read the value
7107 */
7108 SKIP_BLANKS;
7109 if (RAW == '=') {
7110 NEXT;
7111 SKIP_BLANKS;
7112 val = xmlParseAttValue(ctxt);
7113 ctxt->instate = XML_PARSER_CONTENT;
7114 } else {
7115 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7116 "Specification mandate value for attribute %s\n", name);
7117 return(NULL);
7118 }
7119
7120 /*
7121 * Check that xml:lang conforms to the specification
7122 * No more registered as an error, just generate a warning now
7123 * since this was deprecated in XML second edition
7124 */
7125 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7126 if (!xmlCheckLanguageID(val)) {
7127 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7128 "Malformed value for xml:lang : %s\n",
7129 val, NULL);
7130 }
7131 }
7132
7133 /*
7134 * Check that xml:space conforms to the specification
7135 */
7136 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7137 if (xmlStrEqual(val, BAD_CAST "default"))
7138 *(ctxt->space) = 0;
7139 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7140 *(ctxt->space) = 1;
7141 else {
7142 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7143"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7144 val, NULL);
7145 }
7146 }
7147
7148 *value = val;
7149 return(name);
7150}
7151
7152/**
7153 * xmlParseStartTag:
7154 * @ctxt: an XML parser context
7155 *
7156 * parse a start of tag either for rule element or
7157 * EmptyElement. In both case we don't parse the tag closing chars.
7158 *
7159 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7160 *
7161 * [ WFC: Unique Att Spec ]
7162 * No attribute name may appear more than once in the same start-tag or
7163 * empty-element tag.
7164 *
7165 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7166 *
7167 * [ WFC: Unique Att Spec ]
7168 * No attribute name may appear more than once in the same start-tag or
7169 * empty-element tag.
7170 *
7171 * With namespace:
7172 *
7173 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7174 *
7175 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7176 *
7177 * Returns the element name parsed
7178 */
7179
7180const xmlChar *
7181xmlParseStartTag(xmlParserCtxtPtr ctxt) {
7182 const xmlChar *name;
7183 const xmlChar *attname;
7184 xmlChar *attvalue;
7185 const xmlChar **atts = ctxt->atts;
7186 int nbatts = 0;
7187 int maxatts = ctxt->maxatts;
7188 int i;
7189
7190 if (RAW != '<') return(NULL);
7191 NEXT1;
7192
7193 name = xmlParseName(ctxt);
7194 if (name == NULL) {
7195 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7196 "xmlParseStartTag: invalid element name\n");
7197 return(NULL);
7198 }
7199
7200 /*
7201 * Now parse the attributes, it ends up with the ending
7202 *
7203 * (S Attribute)* S?
7204 */
7205 SKIP_BLANKS;
7206 GROW;
7207
7208 while ((RAW != '>') &&
7209 ((RAW != '/') || (NXT(1) != '>')) &&
7210 (IS_BYTE_CHAR(RAW))) {
7211 const xmlChar *q = CUR_PTR;
7212 unsigned int cons = ctxt->input->consumed;
7213
7214 attname = xmlParseAttribute(ctxt, &attvalue);
7215 if ((attname != NULL) && (attvalue != NULL)) {
7216 /*
7217 * [ WFC: Unique Att Spec ]
7218 * No attribute name may appear more than once in the same
7219 * start-tag or empty-element tag.
7220 */
7221 for (i = 0; i < nbatts;i += 2) {
7222 if (xmlStrEqual(atts[i], attname)) {
7223 xmlErrAttributeDup(ctxt, NULL, attname);
7224 xmlFree(attvalue);
7225 goto failed;
7226 }
7227 }
7228 /*
7229 * Add the pair to atts
7230 */
7231 if (atts == NULL) {
7232 maxatts = 22; /* allow for 10 attrs by default */
7233 atts = (const xmlChar **)
7234 xmlMalloc(maxatts * sizeof(xmlChar *));
7235 if (atts == NULL) {
7236 xmlErrMemory(ctxt, NULL);
7237 if (attvalue != NULL)
7238 xmlFree(attvalue);
7239 goto failed;
7240 }
7241 ctxt->atts = atts;
7242 ctxt->maxatts = maxatts;
7243 } else if (nbatts + 4 > maxatts) {
7244 const xmlChar **n;
7245
7246 maxatts *= 2;
7247 n = (const xmlChar **) xmlRealloc((void *) atts,
7248 maxatts * sizeof(const xmlChar *));
7249 if (n == NULL) {
7250 xmlErrMemory(ctxt, NULL);
7251 if (attvalue != NULL)
7252 xmlFree(attvalue);
7253 goto failed;
7254 }
7255 atts = n;
7256 ctxt->atts = atts;
7257 ctxt->maxatts = maxatts;
7258 }
7259 atts[nbatts++] = attname;
7260 atts[nbatts++] = attvalue;
7261 atts[nbatts] = NULL;
7262 atts[nbatts + 1] = NULL;
7263 } else {
7264 if (attvalue != NULL)
7265 xmlFree(attvalue);
7266 }
7267
7268failed:
7269
7270 GROW
7271 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7272 break;
7273 if (!IS_BLANK_CH(RAW)) {
7274 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7275 "attributes construct error\n");
7276 }
7277 SKIP_BLANKS;
7278 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7279 (attname == NULL) && (attvalue == NULL)) {
7280 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7281 "xmlParseStartTag: problem parsing attributes\n");
7282 break;
7283 }
7284 SHRINK;
7285 GROW;
7286 }
7287
7288 /*
7289 * SAX: Start of Element !
7290 */
7291 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
7292 (!ctxt->disableSAX)) {
7293 if (nbatts > 0)
7294 ctxt->sax->startElement(ctxt->userData, name, atts);
7295 else
7296 ctxt->sax->startElement(ctxt->userData, name, NULL);
7297 }
7298
7299 if (atts != NULL) {
7300 /* Free only the content strings */
7301 for (i = 1;i < nbatts;i+=2)
7302 if (atts[i] != NULL)
7303 xmlFree((xmlChar *) atts[i]);
7304 }
7305 return(name);
7306}
7307
7308/**
7309 * xmlParseEndTag1:
7310 * @ctxt: an XML parser context
7311 * @line: line of the start tag
7312 * @nsNr: number of namespaces on the start tag
7313 *
7314 * parse an end of tag
7315 *
7316 * [42] ETag ::= '</' Name S? '>'
7317 *
7318 * With namespace
7319 *
7320 * [NS 9] ETag ::= '</' QName S? '>'
7321 */
7322
7323static void
7324xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
7325 const xmlChar *name;
7326
7327 GROW;
7328 if ((RAW != '<') || (NXT(1) != '/')) {
7329 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
7330 "xmlParseEndTag: '</' not found\n");
7331 return;
7332 }
7333 SKIP(2);
7334
7335 name = xmlParseNameAndCompare(ctxt,ctxt->name);
7336
7337 /*
7338 * We should definitely be at the ending "S? '>'" part
7339 */
7340 GROW;
7341 SKIP_BLANKS;
7342 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
7343 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
7344 } else
7345 NEXT1;
7346
7347 /*
7348 * [ WFC: Element Type Match ]
7349 * The Name in an element's end-tag must match the element type in the
7350 * start-tag.
7351 *
7352 */
7353 if (name != (xmlChar*)1) {
7354 if (name == NULL) name = BAD_CAST "unparseable";
7355 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
7356 "Opening and ending tag mismatch: %s line %d and %s\n",
7357 ctxt->name, line, name);
7358 }
7359
7360 /*
7361 * SAX: End of Tag
7362 */
7363 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7364 (!ctxt->disableSAX))
7365 ctxt->sax->endElement(ctxt->userData, ctxt->name);
7366
7367 namePop(ctxt);
7368 spacePop(ctxt);
7369 return;
7370}
7371
7372/**
7373 * xmlParseEndTag:
7374 * @ctxt: an XML parser context
7375 *
7376 * parse an end of tag
7377 *
7378 * [42] ETag ::= '</' Name S? '>'
7379 *
7380 * With namespace
7381 *
7382 * [NS 9] ETag ::= '</' QName S? '>'
7383 */
7384
7385void
7386xmlParseEndTag(xmlParserCtxtPtr ctxt) {
7387 xmlParseEndTag1(ctxt, 0);
7388}
7389#endif /* LIBXML_SAX1_ENABLED */
7390
7391/************************************************************************
7392 * *
7393 * SAX 2 specific operations *
7394 * *
7395 ************************************************************************/
7396
7397static const xmlChar *
7398xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7399 int len = 0, l;
7400 int c;
7401 int count = 0;
7402
7403 /*
7404 * Handler for more complex cases
7405 */
7406 GROW;
7407 c = CUR_CHAR(l);
7408 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
7409 (!IS_LETTER(c) && (c != '_'))) {
7410 return(NULL);
7411 }
7412
7413 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
7414 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
7415 (c == '.') || (c == '-') || (c == '_') ||
7416 (IS_COMBINING(c)) ||
7417 (IS_EXTENDER(c)))) {
7418 if (count++ > 100) {
7419 count = 0;
7420 GROW;
7421 }
7422 len += l;
7423 NEXTL(l);
7424 c = CUR_CHAR(l);
7425 }
7426 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7427}
7428
7429/*
7430 * xmlGetNamespace:
7431 * @ctxt: an XML parser context
7432 * @prefix: the prefix to lookup
7433 *
7434 * Lookup the namespace name for the @prefix (which ca be NULL)
7435 * The prefix must come from the @ctxt->dict dictionnary
7436 *
7437 * Returns the namespace name or NULL if not bound
7438 */
7439static const xmlChar *
7440xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7441 int i;
7442
7443 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
7444 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
7445 if (ctxt->nsTab[i] == prefix) {
7446 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7447 return(NULL);
7448 return(ctxt->nsTab[i + 1]);
7449 }
7450 return(NULL);
7451}
7452
7453/**
7454 * xmlParseNCName:
7455 * @ctxt: an XML parser context
7456 * @len: lenght of the string parsed
7457 *
7458 * parse an XML name.
7459 *
7460 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7461 * CombiningChar | Extender
7462 *
7463 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7464 *
7465 * Returns the Name parsed or NULL
7466 */
7467
7468static const xmlChar *
7469xmlParseNCName(xmlParserCtxtPtr ctxt) {
7470 const xmlChar *in;
7471 const xmlChar *ret;
7472 int count = 0;
7473
7474 /*
7475 * Accelerator for simple ASCII names
7476 */
7477 in = ctxt->input->cur;
7478 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7479 ((*in >= 0x41) && (*in <= 0x5A)) ||
7480 (*in == '_')) {
7481 in++;
7482 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7483 ((*in >= 0x41) && (*in <= 0x5A)) ||
7484 ((*in >= 0x30) && (*in <= 0x39)) ||
7485 (*in == '_') || (*in == '-') ||
7486 (*in == '.'))
7487 in++;
7488 if ((*in > 0) && (*in < 0x80)) {
7489 count = in - ctxt->input->cur;
7490 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7491 ctxt->input->cur = in;
7492 ctxt->nbChars += count;
7493 ctxt->input->col += count;
7494 if (ret == NULL) {
7495 xmlErrMemory(ctxt, NULL);
7496 }
7497 return(ret);
7498 }
7499 }
7500 return(xmlParseNCNameComplex(ctxt));
7501}
7502
7503/**
7504 * xmlParseQName:
7505 * @ctxt: an XML parser context
7506 * @prefix: pointer to store the prefix part
7507 *
7508 * parse an XML Namespace QName
7509 *
7510 * [6] QName ::= (Prefix ':')? LocalPart
7511 * [7] Prefix ::= NCName
7512 * [8] LocalPart ::= NCName
7513 *
7514 * Returns the Name parsed or NULL
7515 */
7516
7517static const xmlChar *
7518xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7519 const xmlChar *l, *p;
7520
7521 GROW;
7522
7523 l = xmlParseNCName(ctxt);
7524 if (l == NULL) {
7525 if (CUR == ':') {
7526 l = xmlParseName(ctxt);
7527 if (l != NULL) {
7528 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7529 "Failed to parse QName '%s'\n", l, NULL, NULL);
7530 *prefix = NULL;
7531 return(l);
7532 }
7533 }
7534 return(NULL);
7535 }
7536 if (CUR == ':') {
7537 NEXT;
7538 p = l;
7539 l = xmlParseNCName(ctxt);
7540 if (l == NULL) {
7541 xmlChar *tmp;
7542
7543 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7544 "Failed to parse QName '%s:'\n", p, NULL, NULL);
7545 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7546 p = xmlDictLookup(ctxt->dict, tmp, -1);
7547 if (tmp != NULL) xmlFree(tmp);
7548 *prefix = NULL;
7549 return(p);
7550 }
7551 if (CUR == ':') {
7552 xmlChar *tmp;
7553
7554 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7555 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
7556 NEXT;
7557 tmp = (xmlChar *) xmlParseName(ctxt);
7558 if (tmp != NULL) {
7559 tmp = xmlBuildQName(tmp, l, NULL, 0);
7560 l = xmlDictLookup(ctxt->dict, tmp, -1);
7561 if (tmp != NULL) xmlFree(tmp);
7562 *prefix = p;
7563 return(l);
7564 }
7565 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7566 l = xmlDictLookup(ctxt->dict, tmp, -1);
7567 if (tmp != NULL) xmlFree(tmp);
7568 *prefix = p;
7569 return(l);
7570 }
7571 *prefix = p;
7572 } else
7573 *prefix = NULL;
7574 return(l);
7575}
7576
7577/**
7578 * xmlParseQNameAndCompare:
7579 * @ctxt: an XML parser context
7580 * @name: the localname
7581 * @prefix: the prefix, if any.
7582 *
7583 * parse an XML name and compares for match
7584 * (specialized for endtag parsing)
7585 *
7586 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7587 * and the name for mismatch
7588 */
7589
7590static const xmlChar *
7591xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7592 xmlChar const *prefix) {
7593 const xmlChar *cmp = name;
7594 const xmlChar *in;
7595 const xmlChar *ret;
7596 const xmlChar *prefix2;
7597
7598 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7599
7600 GROW;
7601 in = ctxt->input->cur;
7602
7603 cmp = prefix;
7604 while (*in != 0 && *in == *cmp) {
7605 ++in;
7606 ++cmp;
7607 }
7608 if ((*cmp == 0) && (*in == ':')) {
7609 in++;
7610 cmp = name;
7611 while (*in != 0 && *in == *cmp) {
7612 ++in;
7613 ++cmp;
7614 }
7615 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
7616 /* success */
7617 ctxt->input->cur = in;
7618 return((const xmlChar*) 1);
7619 }
7620 }
7621 /*
7622 * all strings coms from the dictionary, equality can be done directly
7623 */
7624 ret = xmlParseQName (ctxt, &prefix2);
7625 if ((ret == name) && (prefix == prefix2))
7626 return((const xmlChar*) 1);
7627 return ret;
7628}
7629
7630/**
7631 * xmlParseAttValueInternal:
7632 * @ctxt: an XML parser context
7633 * @len: attribute len result
7634 * @alloc: whether the attribute was reallocated as a new string
7635 * @normalize: if 1 then further non-CDATA normalization must be done
7636 *
7637 * parse a value for an attribute.
7638 * NOTE: if no normalization is needed, the routine will return pointers
7639 * directly from the data buffer.
7640 *
7641 * 3.3.3 Attribute-Value Normalization:
7642 * Before the value of an attribute is passed to the application or
7643 * checked for validity, the XML processor must normalize it as follows:
7644 * - a character reference is processed by appending the referenced
7645 * character to the attribute value
7646 * - an entity reference is processed by recursively processing the
7647 * replacement text of the entity
7648 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7649 * appending #x20 to the normalized value, except that only a single
7650 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7651 * parsed entity or the literal entity value of an internal parsed entity
7652 * - other characters are processed by appending them to the normalized value
7653 * If the declared value is not CDATA, then the XML processor must further
7654 * process the normalized attribute value by discarding any leading and
7655 * trailing space (#x20) characters, and by replacing sequences of space
7656 * (#x20) characters by a single space (#x20) character.
7657 * All attributes for which no declaration has been read should be treated
7658 * by a non-validating parser as if declared CDATA.
7659 *
7660 * Returns the AttValue parsed or NULL. The value has to be freed by the
7661 * caller if it was copied, this can be detected by val[*len] == 0.
7662 */
7663
7664static xmlChar *
7665xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7666 int normalize)
7667{
7668 xmlChar limit = 0;
7669 const xmlChar *in = NULL, *start, *end, *last;
7670 xmlChar *ret = NULL;
7671
7672 GROW;
7673 in = (xmlChar *) CUR_PTR;
7674 if (*in != '"' && *in != '\'') {
7675 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
7676 return (NULL);
7677 }
7678 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
7679
7680 /*
7681 * try to handle in this routine the most common case where no
7682 * allocation of a new string is required and where content is
7683 * pure ASCII.
7684 */
7685 limit = *in++;
7686 end = ctxt->input->end;
7687 start = in;
7688 if (in >= end) {
7689 const xmlChar *oldbase = ctxt->input->base;
7690 GROW;
7691 if (oldbase != ctxt->input->base) {
7692 long delta = ctxt->input->base - oldbase;
7693 start = start + delta;
7694 in = in + delta;
7695 }
7696 end = ctxt->input->end;
7697 }
7698 if (normalize) {
7699 /*
7700 * Skip any leading spaces
7701 */
7702 while ((in < end) && (*in != limit) &&
7703 ((*in == 0x20) || (*in == 0x9) ||
7704 (*in == 0xA) || (*in == 0xD))) {
7705 in++;
7706 start = in;
7707 if (in >= end) {
7708 const xmlChar *oldbase = ctxt->input->base;
7709 GROW;
7710 if (oldbase != ctxt->input->base) {
7711 long delta = ctxt->input->base - oldbase;
7712 start = start + delta;
7713 in = in + delta;
7714 }
7715 end = ctxt->input->end;
7716 }
7717 }
7718 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7719 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7720 if ((*in++ == 0x20) && (*in == 0x20)) break;
7721 if (in >= end) {
7722 const xmlChar *oldbase = ctxt->input->base;
7723 GROW;
7724 if (oldbase != ctxt->input->base) {
7725 long delta = ctxt->input->base - oldbase;
7726 start = start + delta;
7727 in = in + delta;
7728 }
7729 end = ctxt->input->end;
7730 }
7731 }
7732 last = in;
7733 /*
7734 * skip the trailing blanks
7735 */
7736 while ((last[-1] == 0x20) && (last > start)) last--;
7737 while ((in < end) && (*in != limit) &&
7738 ((*in == 0x20) || (*in == 0x9) ||
7739 (*in == 0xA) || (*in == 0xD))) {
7740 in++;
7741 if (in >= end) {
7742 const xmlChar *oldbase = ctxt->input->base;
7743 GROW;
7744 if (oldbase != ctxt->input->base) {
7745 long delta = ctxt->input->base - oldbase;
7746 start = start + delta;
7747 in = in + delta;
7748 last = last + delta;
7749 }
7750 end = ctxt->input->end;
7751 }
7752 }
7753 if (*in != limit) goto need_complex;
7754 } else {
7755 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7756 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7757 in++;
7758 if (in >= end) {
7759 const xmlChar *oldbase = ctxt->input->base;
7760 GROW;
7761 if (oldbase != ctxt->input->base) {
7762 long delta = ctxt->input->base - oldbase;
7763 start = start + delta;
7764 in = in + delta;
7765 }
7766 end = ctxt->input->end;
7767 }
7768 }
7769 last = in;
7770 if (*in != limit) goto need_complex;
7771 }
7772 in++;
7773 if (len != NULL) {
7774 *len = last - start;
7775 ret = (xmlChar *) start;
7776 } else {
7777 if (alloc) *alloc = 1;
7778 ret = xmlStrndup(start, last - start);
7779 }
7780 CUR_PTR = in;
7781 if (alloc) *alloc = 0;
7782 return ret;
7783need_complex:
7784 if (alloc) *alloc = 1;
7785 return xmlParseAttValueComplex(ctxt, len, normalize);
7786}
7787
7788/**
7789 * xmlParseAttribute2:
7790 * @ctxt: an XML parser context
7791 * @pref: the element prefix
7792 * @elem: the element name
7793 * @prefix: a xmlChar ** used to store the value of the attribute prefix
7794 * @value: a xmlChar ** used to store the value of the attribute
7795 * @len: an int * to save the length of the attribute
7796 * @alloc: an int * to indicate if the attribute was allocated
7797 *
7798 * parse an attribute in the new SAX2 framework.
7799 *
7800 * Returns the attribute name, and the value in *value, .
7801 */
7802
7803static const xmlChar *
7804xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7805 const xmlChar *pref, const xmlChar *elem,
7806 const xmlChar **prefix, xmlChar **value,
7807 int *len, int *alloc) {
7808 const xmlChar *name;
7809 xmlChar *val, *internal_val = NULL;
7810 int normalize = 0;
7811
7812 *value = NULL;
7813 GROW;
7814 name = xmlParseQName(ctxt, prefix);
7815 if (name == NULL) {
7816 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7817 "error parsing attribute name\n");
7818 return(NULL);
7819 }
7820
7821 /*
7822 * get the type if needed
7823 */
7824 if (ctxt->attsSpecial != NULL) {
7825 int type;
7826
7827 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7828 pref, elem, *prefix, name);
7829 if (type != 0) normalize = 1;
7830 }
7831
7832 /*
7833 * read the value
7834 */
7835 SKIP_BLANKS;
7836 if (RAW == '=') {
7837 NEXT;
7838 SKIP_BLANKS;
7839 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
7840 ctxt->instate = XML_PARSER_CONTENT;
7841 } else {
7842 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
7843 "Specification mandate value for attribute %s\n", name);
7844 return(NULL);
7845 }
7846
7847 if (*prefix == ctxt->str_xml) {
7848 /*
7849 * Check that xml:lang conforms to the specification
7850 * No more registered as an error, just generate a warning now
7851 * since this was deprecated in XML second edition
7852 */
7853 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7854 internal_val = xmlStrndup(val, *len);
7855 if (!xmlCheckLanguageID(internal_val)) {
7856 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7857 "Malformed value for xml:lang : %s\n",
7858 internal_val, NULL);
7859 }
7860 }
7861
7862 /*
7863 * Check that xml:space conforms to the specification
7864 */
7865 if (xmlStrEqual(name, BAD_CAST "space")) {
7866 internal_val = xmlStrndup(val, *len);
7867 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7868 *(ctxt->space) = 0;
7869 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7870 *(ctxt->space) = 1;
7871 else {
7872 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
7873"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7874 internal_val, NULL);
7875 }
7876 }
7877 if (internal_val) {
7878 xmlFree(internal_val);
7879 }
7880 }
7881
7882 *value = val;
7883 return(name);
7884}
7885
7886/**
7887 * xmlParseStartTag2:
7888 * @ctxt: an XML parser context
7889 *
7890 * parse a start of tag either for rule element or
7891 * EmptyElement. In both case we don't parse the tag closing chars.
7892 * This routine is called when running SAX2 parsing
7893 *
7894 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7895 *
7896 * [ WFC: Unique Att Spec ]
7897 * No attribute name may appear more than once in the same start-tag or
7898 * empty-element tag.
7899 *
7900 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7901 *
7902 * [ WFC: Unique Att Spec ]
7903 * No attribute name may appear more than once in the same start-tag or
7904 * empty-element tag.
7905 *
7906 * With namespace:
7907 *
7908 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7909 *
7910 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7911 *
7912 * Returns the element name parsed
7913 */
7914
7915static const xmlChar *
7916xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7917 const xmlChar **URI, int *tlen) {
7918 const xmlChar *localname;
7919 const xmlChar *prefix;
7920 const xmlChar *attname;
7921 const xmlChar *aprefix;
7922 const xmlChar *nsname;
7923 xmlChar *attvalue;
7924 const xmlChar **atts = ctxt->atts;
7925 int maxatts = ctxt->maxatts;
7926 int nratts, nbatts, nbdef;
7927 int i, j, nbNs, attval, oldline, oldcol;
7928 const xmlChar *base;
7929 unsigned long cur;
7930 int nsNr = ctxt->nsNr;
7931
7932 if (RAW != '<') return(NULL);
7933 NEXT1;
7934
7935 /*
7936 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7937 * point since the attribute values may be stored as pointers to
7938 * the buffer and calling SHRINK would destroy them !
7939 * The Shrinking is only possible once the full set of attribute
7940 * callbacks have been done.
7941 */
7942reparse:
7943 SHRINK;
7944 base = ctxt->input->base;
7945 cur = ctxt->input->cur - ctxt->input->base;
7946 oldline = ctxt->input->line;
7947 oldcol = ctxt->input->col;
7948 nbatts = 0;
7949 nratts = 0;
7950 nbdef = 0;
7951 nbNs = 0;
7952 attval = 0;
7953 /* Forget any namespaces added during an earlier parse of this element. */
7954 ctxt->nsNr = nsNr;
7955
7956 localname = xmlParseQName(ctxt, &prefix);
7957 if (localname == NULL) {
7958 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7959 "StartTag: invalid element name\n");
7960 return(NULL);
7961 }
7962 *tlen = ctxt->input->cur - ctxt->input->base - cur;
7963
7964 /*
7965 * Now parse the attributes, it ends up with the ending
7966 *
7967 * (S Attribute)* S?
7968 */
7969 SKIP_BLANKS;
7970 GROW;
7971 if (ctxt->input->base != base) goto base_changed;
7972
7973 while ((RAW != '>') &&
7974 ((RAW != '/') || (NXT(1) != '>')) &&
7975 (IS_BYTE_CHAR(RAW))) {
7976 const xmlChar *q = CUR_PTR;
7977 unsigned int cons = ctxt->input->consumed;
7978 int len = -1, alloc = 0;
7979
7980 attname = xmlParseAttribute2(ctxt, prefix, localname,
7981 &aprefix, &attvalue, &len, &alloc);
7982 if (ctxt->input->base != base) {
7983 if ((attvalue != NULL) && (alloc != 0))
7984 xmlFree(attvalue);
7985 attvalue = NULL;
7986 goto base_changed;
7987 }
7988 if ((attname != NULL) && (attvalue != NULL)) {
7989 if (len < 0) len = xmlStrlen(attvalue);
7990 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7991 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7992 xmlURIPtr uri;
7993
7994 if (*URL != 0) {
7995 uri = xmlParseURI((const char *) URL);
7996 if (uri == NULL) {
7997 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7998 "xmlns: %s not a valid URI\n",
7999 URL, NULL);
8000 } else {
8001 if (uri->scheme == NULL) {
8002 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8003 "xmlns: URI %s is not absolute\n",
8004 URL, NULL);
8005 }
8006 xmlFreeURI(uri);
8007 }
8008 }
8009 /*
8010 * check that it's not a defined namespace
8011 */
8012 for (j = 1;j <= nbNs;j++)
8013 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8014 break;
8015 if (j <= nbNs)
8016 xmlErrAttributeDup(ctxt, NULL, attname);
8017 else
8018 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
8019 if (alloc != 0) xmlFree(attvalue);
8020 SKIP_BLANKS;
8021 continue;
8022 }
8023 if (aprefix == ctxt->str_xmlns) {
8024 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8025 xmlURIPtr uri;
8026
8027 if (attname == ctxt->str_xml) {
8028 if (URL != ctxt->str_xml_ns) {
8029 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8030 "xml namespace prefix mapped to wrong URI\n",
8031 NULL, NULL, NULL);
8032 }
8033 /*
8034 * Do not keep a namespace definition node
8035 */
8036 if (alloc != 0) xmlFree(attvalue);
8037 SKIP_BLANKS;
8038 continue;
8039 }
8040 uri = xmlParseURI((const char *) URL);
8041 if (uri == NULL) {
8042 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
8043 "xmlns:%s: '%s' is not a valid URI\n",
8044 attname, URL);
8045 } else {
8046 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8047 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
8048 "xmlns:%s: URI %s is not absolute\n",
8049 attname, URL);
8050 }
8051 xmlFreeURI(uri);
8052 }
8053
8054 /*
8055 * check that it's not a defined namespace
8056 */
8057 for (j = 1;j <= nbNs;j++)
8058 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8059 break;
8060 if (j <= nbNs)
8061 xmlErrAttributeDup(ctxt, aprefix, attname);
8062 else
8063 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
8064 if (alloc != 0) xmlFree(attvalue);
8065 SKIP_BLANKS;
8066 if (ctxt->input->base != base) goto base_changed;
8067 continue;
8068 }
8069
8070 /*
8071 * Add the pair to atts
8072 */
8073 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8074 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8075 if (attvalue[len] == 0)
8076 xmlFree(attvalue);
8077 goto failed;
8078 }
8079 maxatts = ctxt->maxatts;
8080 atts = ctxt->atts;
8081 }
8082 ctxt->attallocs[nratts++] = alloc;
8083 atts[nbatts++] = attname;
8084 atts[nbatts++] = aprefix;
8085 atts[nbatts++] = NULL; /* the URI will be fetched later */
8086 atts[nbatts++] = attvalue;
8087 attvalue += len;
8088 atts[nbatts++] = attvalue;
8089 /*
8090 * tag if some deallocation is needed
8091 */
8092 if (alloc != 0) attval = 1;
8093 } else {
8094 if ((attvalue != NULL) && (attvalue[len] == 0))
8095 xmlFree(attvalue);
8096 }
8097
8098failed:
8099
8100 GROW
8101 if (ctxt->input->base != base) goto base_changed;
8102 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8103 break;
8104 if (!IS_BLANK_CH(RAW)) {
8105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8106 "attributes construct error\n");
8107 break;
8108 }
8109 SKIP_BLANKS;
8110 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8111 (attname == NULL) && (attvalue == NULL)) {
8112 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8113 "xmlParseStartTag: problem parsing attributes\n");
8114 break;
8115 }
8116 GROW;
8117 if (ctxt->input->base != base) goto base_changed;
8118 }
8119
8120 /*
8121 * The attributes defaulting
8122 */
8123 if (ctxt->attsDefault != NULL) {
8124 xmlDefAttrsPtr defaults;
8125
8126 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8127 if (defaults != NULL) {
8128 for (i = 0;i < defaults->nbAttrs;i++) {
8129 attname = defaults->values[4 * i];
8130 aprefix = defaults->values[4 * i + 1];
8131
8132 /*
8133 * special work for namespaces defaulted defs
8134 */
8135 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8136 /*
8137 * check that it's not a defined namespace
8138 */
8139 for (j = 1;j <= nbNs;j++)
8140 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8141 break;
8142 if (j <= nbNs) continue;
8143
8144 nsname = xmlGetNamespace(ctxt, NULL);
8145 if (nsname != defaults->values[4 * i + 2]) {
8146 if (nsPush(ctxt, NULL,
8147 defaults->values[4 * i + 2]) > 0)
8148 nbNs++;
8149 }
8150 } else if (aprefix == ctxt->str_xmlns) {
8151 /*
8152 * check that it's not a defined namespace
8153 */
8154 for (j = 1;j <= nbNs;j++)
8155 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8156 break;
8157 if (j <= nbNs) continue;
8158
8159 nsname = xmlGetNamespace(ctxt, attname);
8160 if (nsname != defaults->values[2]) {
8161 if (nsPush(ctxt, attname,
8162 defaults->values[4 * i + 2]) > 0)
8163 nbNs++;
8164 }
8165 } else {
8166 /*
8167 * check that it's not a defined attribute
8168 */
8169 for (j = 0;j < nbatts;j+=5) {
8170 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8171 break;
8172 }
8173 if (j < nbatts) continue;
8174
8175 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8176 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
8177 return(NULL);
8178 }
8179 maxatts = ctxt->maxatts;
8180 atts = ctxt->atts;
8181 }
8182 atts[nbatts++] = attname;
8183 atts[nbatts++] = aprefix;
8184 if (aprefix == NULL)
8185 atts[nbatts++] = NULL;
8186 else
8187 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8188 atts[nbatts++] = defaults->values[4 * i + 2];
8189 atts[nbatts++] = defaults->values[4 * i + 3];
8190 nbdef++;
8191 }
8192 }
8193 }
8194 }
8195
8196 /*
8197 * The attributes checkings
8198 */
8199 for (i = 0; i < nbatts;i += 5) {
8200 /*
8201 * The default namespace does not apply to attribute names.
8202 */
8203 if (atts[i + 1] != NULL) {
8204 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8205 if (nsname == NULL) {
8206 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8207 "Namespace prefix %s for %s on %s is not defined\n",
8208 atts[i + 1], atts[i], localname);
8209 }
8210 atts[i + 2] = nsname;
8211 } else
8212 nsname = NULL;
8213 /*
8214 * [ WFC: Unique Att Spec ]
8215 * No attribute name may appear more than once in the same
8216 * start-tag or empty-element tag.
8217 * As extended by the Namespace in XML REC.
8218 */
8219 for (j = 0; j < i;j += 5) {
8220 if (atts[i] == atts[j]) {
8221 if (atts[i+1] == atts[j+1]) {
8222 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8223 break;
8224 }
8225 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8226 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8227 "Namespaced Attribute %s in '%s' redefined\n",
8228 atts[i], nsname, NULL);
8229 break;
8230 }
8231 }
8232 }
8233 }
8234
8235 nsname = xmlGetNamespace(ctxt, prefix);
8236 if ((prefix != NULL) && (nsname == NULL)) {
8237 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8238 "Namespace prefix %s on %s is not defined\n",
8239 prefix, localname, NULL);
8240 }
8241 *pref = prefix;
8242 *URI = nsname;
8243
8244 /*
8245 * SAX: Start of Element !
8246 */
8247 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8248 (!ctxt->disableSAX)) {
8249 if (nbNs > 0)
8250 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8251 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8252 nbatts / 5, nbdef, atts);
8253 else
8254 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8255 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8256 }
8257
8258 /*
8259 * Free up attribute allocated strings if needed
8260 */
8261 if (attval != 0) {
8262 for (i = 3,j = 0; j < nratts;i += 5,j++)
8263 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8264 xmlFree((xmlChar *) atts[i]);
8265 }
8266
8267 return(localname);
8268
8269base_changed:
8270 /*
8271 * the attribute strings are valid iif the base didn't changed
8272 */
8273 if (attval != 0) {
8274 for (i = 3,j = 0; j < nratts;i += 5,j++)
8275 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8276 xmlFree((xmlChar *) atts[i]);
8277 }
8278 ctxt->input->cur = ctxt->input->base + cur;
8279 ctxt->input->line = oldline;
8280 ctxt->input->col = oldcol;
8281 if (ctxt->wellFormed == 1) {
8282 goto reparse;
8283 }
8284 return(NULL);
8285}
8286
8287/**
8288 * xmlParseEndTag2:
8289 * @ctxt: an XML parser context
8290 * @line: line of the start tag
8291 * @nsNr: number of namespaces on the start tag
8292 *
8293 * parse an end of tag
8294 *
8295 * [42] ETag ::= '</' Name S? '>'
8296 *
8297 * With namespace
8298 *
8299 * [NS 9] ETag ::= '</' QName S? '>'
8300 */
8301
8302static void
8303xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
8304 const xmlChar *URI, int line, int nsNr, int tlen) {
8305 const xmlChar *name;
8306
8307 GROW;
8308 if ((RAW != '<') || (NXT(1) != '/')) {
8309 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
8310 return;
8311 }
8312 SKIP(2);
8313
8314 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
8315 if (ctxt->input->cur[tlen] == '>') {
8316 ctxt->input->cur += tlen + 1;
8317 goto done;
8318 }
8319 ctxt->input->cur += tlen;
8320 name = (xmlChar*)1;
8321 } else {
8322 if (prefix == NULL)
8323 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8324 else
8325 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8326 }
8327
8328 /*
8329 * We should definitely be at the ending "S? '>'" part
8330 */
8331 GROW;
8332 SKIP_BLANKS;
8333 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8334 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8335 } else
8336 NEXT1;
8337
8338 /*
8339 * [ WFC: Element Type Match ]
8340 * The Name in an element's end-tag must match the element type in the
8341 * start-tag.
8342 *
8343 */
8344 if (name != (xmlChar*)1) {
8345 if (name == NULL) name = BAD_CAST "unparseable";
8346 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8347 "Opening and ending tag mismatch: %s line %d and %s\n",
8348 ctxt->name, line, name);
8349 }
8350
8351 /*
8352 * SAX: End of Tag
8353 */
8354done:
8355 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8356 (!ctxt->disableSAX))
8357 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8358
8359 spacePop(ctxt);
8360 if (nsNr != 0)
8361 nsPop(ctxt, nsNr);
8362 return;
8363}
8364
8365/**
8366 * xmlParseCDSect:
8367 * @ctxt: an XML parser context
8368 *
8369 * Parse escaped pure raw content.
8370 *
8371 * [18] CDSect ::= CDStart CData CDEnd
8372 *
8373 * [19] CDStart ::= '<![CDATA['
8374 *
8375 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8376 *
8377 * [21] CDEnd ::= ']]>'
8378 */
8379void
8380xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8381 xmlChar *buf = NULL;
8382 int len = 0;
8383 int size = XML_PARSER_BUFFER_SIZE;
8384 int r, rl;
8385 int s, sl;
8386 int cur, l;
8387 int count = 0;
8388
8389 /* Check 2.6.0 was NXT(0) not RAW */
8390 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8391 SKIP(9);
8392 } else
8393 return;
8394
8395 ctxt->instate = XML_PARSER_CDATA_SECTION;
8396 r = CUR_CHAR(rl);
8397 if (!IS_CHAR(r)) {
8398 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8399 ctxt->instate = XML_PARSER_CONTENT;
8400 return;
8401 }
8402 NEXTL(rl);
8403 s = CUR_CHAR(sl);
8404 if (!IS_CHAR(s)) {
8405 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
8406 ctxt->instate = XML_PARSER_CONTENT;
8407 return;
8408 }
8409 NEXTL(sl);
8410 cur = CUR_CHAR(l);
8411 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8412 if (buf == NULL) {
8413 xmlErrMemory(ctxt, NULL);
8414 return;
8415 }
8416 while (IS_CHAR(cur) &&
8417 ((r != ']') || (s != ']') || (cur != '>'))) {
8418 if (len + 5 >= size) {
8419 xmlChar *tmp;
8420
8421 size *= 2;
8422 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8423 if (tmp == NULL) {
8424 xmlFree(buf);
8425 xmlErrMemory(ctxt, NULL);
8426 return;
8427 }
8428 buf = tmp;
8429 }
8430 COPY_BUF(rl,buf,len,r);
8431 r = s;
8432 rl = sl;
8433 s = cur;
8434 sl = l;
8435 count++;
8436 if (count > 50) {
8437 GROW;
8438 count = 0;
8439 }
8440 NEXTL(l);
8441 cur = CUR_CHAR(l);
8442 }
8443 buf[len] = 0;
8444 ctxt->instate = XML_PARSER_CONTENT;
8445 if (cur != '>') {
8446 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
8447 "CData section not finished\n%.50s\n", buf);
8448 xmlFree(buf);
8449 return;
8450 }
8451 NEXTL(l);
8452
8453 /*
8454 * OK the buffer is to be consumed as cdata.
8455 */
8456 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8457 if (ctxt->sax->cdataBlock != NULL)
8458 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
8459 else if (ctxt->sax->characters != NULL)
8460 ctxt->sax->characters(ctxt->userData, buf, len);
8461 }
8462 xmlFree(buf);
8463}
8464
8465/**
8466 * xmlParseContent:
8467 * @ctxt: an XML parser context
8468 *
8469 * Parse a content:
8470 *
8471 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8472 */
8473
8474void
8475xmlParseContent(xmlParserCtxtPtr ctxt) {
8476 GROW;
8477 while ((RAW != 0) &&
8478 ((RAW != '<') || (NXT(1) != '/')) &&
8479 (ctxt->instate != XML_PARSER_EOF)) {
8480 const xmlChar *test = CUR_PTR;
8481 unsigned int cons = ctxt->input->consumed;
8482 const xmlChar *cur = ctxt->input->cur;
8483
8484 /*
8485 * First case : a Processing Instruction.
8486 */
8487 if ((*cur == '<') && (cur[1] == '?')) {
8488 xmlParsePI(ctxt);
8489 }
8490
8491 /*
8492 * Second case : a CDSection
8493 */
8494 /* 2.6.0 test was *cur not RAW */
8495 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
8496 xmlParseCDSect(ctxt);
8497 }
8498
8499 /*
8500 * Third case : a comment
8501 */
8502 else if ((*cur == '<') && (NXT(1) == '!') &&
8503 (NXT(2) == '-') && (NXT(3) == '-')) {
8504 xmlParseComment(ctxt);
8505 ctxt->instate = XML_PARSER_CONTENT;
8506 }
8507
8508 /*
8509 * Fourth case : a sub-element.
8510 */
8511 else if (*cur == '<') {
8512 xmlParseElement(ctxt);
8513 }
8514
8515 /*
8516 * Fifth case : a reference. If if has not been resolved,
8517 * parsing returns it's Name, create the node
8518 */
8519
8520 else if (*cur == '&') {
8521 xmlParseReference(ctxt);
8522 }
8523
8524 /*
8525 * Last case, text. Note that References are handled directly.
8526 */
8527 else {
8528 xmlParseCharData(ctxt, 0);
8529 }
8530
8531 GROW;
8532 /*
8533 * Pop-up of finished entities.
8534 */
8535 while ((RAW == 0) && (ctxt->inputNr > 1))
8536 xmlPopInput(ctxt);
8537 SHRINK;
8538
8539 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
8540 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8541 "detected an error in element content\n");
8542 ctxt->instate = XML_PARSER_EOF;
8543 break;
8544 }
8545 }
8546}
8547
8548/**
8549 * xmlParseElement:
8550 * @ctxt: an XML parser context
8551 *
8552 * parse an XML element, this is highly recursive
8553 *
8554 * [39] element ::= EmptyElemTag | STag content ETag
8555 *
8556 * [ WFC: Element Type Match ]
8557 * The Name in an element's end-tag must match the element type in the
8558 * start-tag.
8559 *
8560 */
8561
8562void
8563xmlParseElement(xmlParserCtxtPtr ctxt) {
8564 const xmlChar *name;
8565 const xmlChar *prefix;
8566 const xmlChar *URI;
8567 xmlParserNodeInfo node_info;
8568 int line, tlen;
8569 xmlNodePtr ret;
8570 int nsNr = ctxt->nsNr;
8571
8572 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8573 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8574 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8575 xmlParserMaxDepth);
8576 ctxt->instate = XML_PARSER_EOF;
8577 return;
8578 }
8579
8580 /* Capture start position */
8581 if (ctxt->record_info) {
8582 node_info.begin_pos = ctxt->input->consumed +
8583 (CUR_PTR - ctxt->input->base);
8584 node_info.begin_line = ctxt->input->line;
8585 }
8586
8587 if (ctxt->spaceNr == 0)
8588 spacePush(ctxt, -1);
8589 else if (*ctxt->space == -2)
8590 spacePush(ctxt, -1);
8591 else
8592 spacePush(ctxt, *ctxt->space);
8593
8594 line = ctxt->input->line;
8595#ifdef LIBXML_SAX1_ENABLED
8596 if (ctxt->sax2)
8597#endif /* LIBXML_SAX1_ENABLED */
8598 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
8599#ifdef LIBXML_SAX1_ENABLED
8600 else
8601 name = xmlParseStartTag(ctxt);
8602#endif /* LIBXML_SAX1_ENABLED */
8603 if (name == NULL) {
8604 spacePop(ctxt);
8605 return;
8606 }
8607 namePush(ctxt, name);
8608 ret = ctxt->node;
8609
8610#ifdef LIBXML_VALID_ENABLED
8611 /*
8612 * [ VC: Root Element Type ]
8613 * The Name in the document type declaration must match the element
8614 * type of the root element.
8615 */
8616 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8617 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8618 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8619#endif /* LIBXML_VALID_ENABLED */
8620
8621 /*
8622 * Check for an Empty Element.
8623 */
8624 if ((RAW == '/') && (NXT(1) == '>')) {
8625 SKIP(2);
8626 if (ctxt->sax2) {
8627 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8628 (!ctxt->disableSAX))
8629 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
8630#ifdef LIBXML_SAX1_ENABLED
8631 } else {
8632 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8633 (!ctxt->disableSAX))
8634 ctxt->sax->endElement(ctxt->userData, name);
8635#endif /* LIBXML_SAX1_ENABLED */
8636 }
8637 namePop(ctxt);
8638 spacePop(ctxt);
8639 if (nsNr != ctxt->nsNr)
8640 nsPop(ctxt, ctxt->nsNr - nsNr);
8641 if ( ret != NULL && ctxt->record_info ) {
8642 node_info.end_pos = ctxt->input->consumed +
8643 (CUR_PTR - ctxt->input->base);
8644 node_info.end_line = ctxt->input->line;
8645 node_info.node = ret;
8646 xmlParserAddNodeInfo(ctxt, &node_info);
8647 }
8648 return;
8649 }
8650 if (RAW == '>') {
8651 NEXT1;
8652 } else {
8653 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8654 "Couldn't find end of Start Tag %s line %d\n",
8655 name, line, NULL);
8656
8657 /*
8658 * end of parsing of this node.
8659 */
8660 nodePop(ctxt);
8661 namePop(ctxt);
8662 spacePop(ctxt);
8663 if (nsNr != ctxt->nsNr)
8664 nsPop(ctxt, ctxt->nsNr - nsNr);
8665
8666 /*
8667 * Capture end position and add node
8668 */
8669 if ( ret != NULL && ctxt->record_info ) {
8670 node_info.end_pos = ctxt->input->consumed +
8671 (CUR_PTR - ctxt->input->base);
8672 node_info.end_line = ctxt->input->line;
8673 node_info.node = ret;
8674 xmlParserAddNodeInfo(ctxt, &node_info);
8675 }
8676 return;
8677 }
8678
8679 /*
8680 * Parse the content of the element:
8681 */
8682 xmlParseContent(ctxt);
8683 if (!IS_BYTE_CHAR(RAW)) {
8684 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8685 "Premature end of data in tag %s line %d\n",
8686 name, line, NULL);
8687
8688 /*
8689 * end of parsing of this node.
8690 */
8691 nodePop(ctxt);
8692 namePop(ctxt);
8693 spacePop(ctxt);
8694 if (nsNr != ctxt->nsNr)
8695 nsPop(ctxt, ctxt->nsNr - nsNr);
8696 return;
8697 }
8698
8699 /*
8700 * parse the end of tag: '</' should be here.
8701 */
8702 if (ctxt->sax2) {
8703 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
8704 namePop(ctxt);
8705 }
8706#ifdef LIBXML_SAX1_ENABLED
8707 else
8708 xmlParseEndTag1(ctxt, line);
8709#endif /* LIBXML_SAX1_ENABLED */
8710
8711 /*
8712 * Capture end position and add node
8713 */
8714 if ( ret != NULL && ctxt->record_info ) {
8715 node_info.end_pos = ctxt->input->consumed +
8716 (CUR_PTR - ctxt->input->base);
8717 node_info.end_line = ctxt->input->line;
8718 node_info.node = ret;
8719 xmlParserAddNodeInfo(ctxt, &node_info);
8720 }
8721}
8722
8723/**
8724 * xmlParseVersionNum:
8725 * @ctxt: an XML parser context
8726 *
8727 * parse the XML version value.
8728 *
8729 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8730 *
8731 * Returns the string giving the XML version number, or NULL
8732 */
8733xmlChar *
8734xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8735 xmlChar *buf = NULL;
8736 int len = 0;
8737 int size = 10;
8738 xmlChar cur;
8739
8740 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8741 if (buf == NULL) {
8742 xmlErrMemory(ctxt, NULL);
8743 return(NULL);
8744 }
8745 cur = CUR;
8746 while (((cur >= 'a') && (cur <= 'z')) ||
8747 ((cur >= 'A') && (cur <= 'Z')) ||
8748 ((cur >= '0') && (cur <= '9')) ||
8749 (cur == '_') || (cur == '.') ||
8750 (cur == ':') || (cur == '-')) {
8751 if (len + 1 >= size) {
8752 xmlChar *tmp;
8753
8754 size *= 2;
8755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8756 if (tmp == NULL) {
8757 xmlErrMemory(ctxt, NULL);
8758 return(NULL);
8759 }
8760 buf = tmp;
8761 }
8762 buf[len++] = cur;
8763 NEXT;
8764 cur=CUR;
8765 }
8766 buf[len] = 0;
8767 return(buf);
8768}
8769
8770/**
8771 * xmlParseVersionInfo:
8772 * @ctxt: an XML parser context
8773 *
8774 * parse the XML version.
8775 *
8776 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8777 *
8778 * [25] Eq ::= S? '=' S?
8779 *
8780 * Returns the version string, e.g. "1.0"
8781 */
8782
8783xmlChar *
8784xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8785 xmlChar *version = NULL;
8786
8787 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
8788 SKIP(7);
8789 SKIP_BLANKS;
8790 if (RAW != '=') {
8791 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8792 return(NULL);
8793 }
8794 NEXT;
8795 SKIP_BLANKS;
8796 if (RAW == '"') {
8797 NEXT;
8798 version = xmlParseVersionNum(ctxt);
8799 if (RAW != '"') {
8800 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8801 } else
8802 NEXT;
8803 } else if (RAW == '\''){
8804 NEXT;
8805 version = xmlParseVersionNum(ctxt);
8806 if (RAW != '\'') {
8807 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8808 } else
8809 NEXT;
8810 } else {
8811 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8812 }
8813 }
8814 return(version);
8815}
8816
8817/**
8818 * xmlParseEncName:
8819 * @ctxt: an XML parser context
8820 *
8821 * parse the XML encoding name
8822 *
8823 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8824 *
8825 * Returns the encoding name value or NULL
8826 */
8827xmlChar *
8828xmlParseEncName(xmlParserCtxtPtr ctxt) {
8829 xmlChar *buf = NULL;
8830 int len = 0;
8831 int size = 10;
8832 xmlChar cur;
8833
8834 cur = CUR;
8835 if (((cur >= 'a') && (cur <= 'z')) ||
8836 ((cur >= 'A') && (cur <= 'Z'))) {
8837 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
8838 if (buf == NULL) {
8839 xmlErrMemory(ctxt, NULL);
8840 return(NULL);
8841 }
8842
8843 buf[len++] = cur;
8844 NEXT;
8845 cur = CUR;
8846 while (((cur >= 'a') && (cur <= 'z')) ||
8847 ((cur >= 'A') && (cur <= 'Z')) ||
8848 ((cur >= '0') && (cur <= '9')) ||
8849 (cur == '.') || (cur == '_') ||
8850 (cur == '-')) {
8851 if (len + 1 >= size) {
8852 xmlChar *tmp;
8853
8854 size *= 2;
8855 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8856 if (tmp == NULL) {
8857 xmlErrMemory(ctxt, NULL);
8858 xmlFree(buf);
8859 return(NULL);
8860 }
8861 buf = tmp;
8862 }
8863 buf[len++] = cur;
8864 NEXT;
8865 cur = CUR;
8866 if (cur == 0) {
8867 SHRINK;
8868 GROW;
8869 cur = CUR;
8870 }
8871 }
8872 buf[len] = 0;
8873 } else {
8874 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
8875 }
8876 return(buf);
8877}
8878
8879/**
8880 * xmlParseEncodingDecl:
8881 * @ctxt: an XML parser context
8882 *
8883 * parse the XML encoding declaration
8884 *
8885 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8886 *
8887 * this setups the conversion filters.
8888 *
8889 * Returns the encoding value or NULL
8890 */
8891
8892const xmlChar *
8893xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8894 xmlChar *encoding = NULL;
8895
8896 SKIP_BLANKS;
8897 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
8898 SKIP(8);
8899 SKIP_BLANKS;
8900 if (RAW != '=') {
8901 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
8902 return(NULL);
8903 }
8904 NEXT;
8905 SKIP_BLANKS;
8906 if (RAW == '"') {
8907 NEXT;
8908 encoding = xmlParseEncName(ctxt);
8909 if (RAW != '"') {
8910 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8911 } else
8912 NEXT;
8913 } else if (RAW == '\''){
8914 NEXT;
8915 encoding = xmlParseEncName(ctxt);
8916 if (RAW != '\'') {
8917 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
8918 } else
8919 NEXT;
8920 } else {
8921 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
8922 }
8923 /*
8924 * UTF-16 encoding stwich has already taken place at this stage,
8925 * more over the little-endian/big-endian selection is already done
8926 */
8927 if ((encoding != NULL) &&
8928 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8929 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
8930 if (ctxt->encoding != NULL)
8931 xmlFree((xmlChar *) ctxt->encoding);
8932 ctxt->encoding = encoding;
8933 }
8934 /*
8935 * UTF-8 encoding is handled natively
8936 */
8937 else if ((encoding != NULL) &&
8938 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8939 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
8940 if (ctxt->encoding != NULL)
8941 xmlFree((xmlChar *) ctxt->encoding);
8942 ctxt->encoding = encoding;
8943 }
8944 else if (encoding != NULL) {
8945 xmlCharEncodingHandlerPtr handler;
8946
8947 if (ctxt->input->encoding != NULL)
8948 xmlFree((xmlChar *) ctxt->input->encoding);
8949 ctxt->input->encoding = encoding;
8950
8951 handler = xmlFindCharEncodingHandler((const char *) encoding);
8952 if (handler != NULL) {
8953 xmlSwitchToEncoding(ctxt, handler);
8954 } else {
8955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
8956 "Unsupported encoding %s\n", encoding);
8957 return(NULL);
8958 }
8959 }
8960 }
8961 return(encoding);
8962}
8963
8964/**
8965 * xmlParseSDDecl:
8966 * @ctxt: an XML parser context
8967 *
8968 * parse the XML standalone declaration
8969 *
8970 * [32] SDDecl ::= S 'standalone' Eq
8971 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8972 *
8973 * [ VC: Standalone Document Declaration ]
8974 * TODO The standalone document declaration must have the value "no"
8975 * if any external markup declarations contain declarations of:
8976 * - attributes with default values, if elements to which these
8977 * attributes apply appear in the document without specifications
8978 * of values for these attributes, or
8979 * - entities (other than amp, lt, gt, apos, quot), if references
8980 * to those entities appear in the document, or
8981 * - attributes with values subject to normalization, where the
8982 * attribute appears in the document with a value which will change
8983 * as a result of normalization, or
8984 * - element types with element content, if white space occurs directly
8985 * within any instance of those types.
8986 *
8987 * Returns:
8988 * 1 if standalone="yes"
8989 * 0 if standalone="no"
8990 * -2 if standalone attribute is missing or invalid
8991 * (A standalone value of -2 means that the XML declaration was found,
8992 * but no value was specified for the standalone attribute).
8993 */
8994
8995int
8996xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8997 int standalone = -2;
8998
8999 SKIP_BLANKS;
9000 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
9001 SKIP(10);
9002 SKIP_BLANKS;
9003 if (RAW != '=') {
9004 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9005 return(standalone);
9006 }
9007 NEXT;
9008 SKIP_BLANKS;
9009 if (RAW == '\''){
9010 NEXT;
9011 if ((RAW == 'n') && (NXT(1) == 'o')) {
9012 standalone = 0;
9013 SKIP(2);
9014 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9015 (NXT(2) == 's')) {
9016 standalone = 1;
9017 SKIP(3);
9018 } else {
9019 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9020 }
9021 if (RAW != '\'') {
9022 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9023 } else
9024 NEXT;
9025 } else if (RAW == '"'){
9026 NEXT;
9027 if ((RAW == 'n') && (NXT(1) == 'o')) {
9028 standalone = 0;
9029 SKIP(2);
9030 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9031 (NXT(2) == 's')) {
9032 standalone = 1;
9033 SKIP(3);
9034 } else {
9035 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
9036 }
9037 if (RAW != '"') {
9038 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9039 } else
9040 NEXT;
9041 } else {
9042 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9043 }
9044 }
9045 return(standalone);
9046}
9047
9048/**
9049 * xmlParseXMLDecl:
9050 * @ctxt: an XML parser context
9051 *
9052 * parse an XML declaration header
9053 *
9054 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9055 */
9056
9057void
9058xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9059 xmlChar *version;
9060
9061 /*
9062 * This value for standalone indicates that the document has an
9063 * XML declaration but it does not have a standalone attribute.
9064 * It will be overwritten later if a standalone attribute is found.
9065 */
9066 ctxt->input->standalone = -2;
9067
9068 /*
9069 * We know that '<?xml' is here.
9070 */
9071 SKIP(5);
9072
9073 if (!IS_BLANK_CH(RAW)) {
9074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9075 "Blank needed after '<?xml'\n");
9076 }
9077 SKIP_BLANKS;
9078
9079 /*
9080 * We must have the VersionInfo here.
9081 */
9082 version = xmlParseVersionInfo(ctxt);
9083 if (version == NULL) {
9084 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
9085 } else {
9086 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9087 /*
9088 * TODO: Blueberry should be detected here
9089 */
9090 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9091 "Unsupported version '%s'\n",
9092 version, NULL);
9093 }
9094 if (ctxt->version != NULL)
9095 xmlFree((void *) ctxt->version);
9096 ctxt->version = version;
9097 }
9098
9099 /*
9100 * We may have the encoding declaration
9101 */
9102 if (!IS_BLANK_CH(RAW)) {
9103 if ((RAW == '?') && (NXT(1) == '>')) {
9104 SKIP(2);
9105 return;
9106 }
9107 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9108 }
9109 xmlParseEncodingDecl(ctxt);
9110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9111 /*
9112 * The XML REC instructs us to stop parsing right here
9113 */
9114 return;
9115 }
9116
9117 /*
9118 * We may have the standalone status.
9119 */
9120 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
9121 if ((RAW == '?') && (NXT(1) == '>')) {
9122 SKIP(2);
9123 return;
9124 }
9125 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
9126 }
9127 SKIP_BLANKS;
9128 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9129
9130 SKIP_BLANKS;
9131 if ((RAW == '?') && (NXT(1) == '>')) {
9132 SKIP(2);
9133 } else if (RAW == '>') {
9134 /* Deprecated old WD ... */
9135 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9136 NEXT;
9137 } else {
9138 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
9139 MOVETO_ENDTAG(CUR_PTR);
9140 NEXT;
9141 }
9142}
9143
9144/**
9145 * xmlParseMisc:
9146 * @ctxt: an XML parser context
9147 *
9148 * parse an XML Misc* optional field.
9149 *
9150 * [27] Misc ::= Comment | PI | S
9151 */
9152
9153void
9154xmlParseMisc(xmlParserCtxtPtr ctxt) {
9155 while (((RAW == '<') && (NXT(1) == '?')) ||
9156 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
9157 IS_BLANK_CH(CUR)) {
9158 if ((RAW == '<') && (NXT(1) == '?')) {
9159 xmlParsePI(ctxt);
9160 } else if (IS_BLANK_CH(CUR)) {
9161 NEXT;
9162 } else
9163 xmlParseComment(ctxt);
9164 }
9165}
9166
9167/**
9168 * xmlParseDocument:
9169 * @ctxt: an XML parser context
9170 *
9171 * parse an XML document (and build a tree if using the standard SAX
9172 * interface).
9173 *
9174 * [1] document ::= prolog element Misc*
9175 *
9176 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9177 *
9178 * Returns 0, -1 in case of error. the parser context is augmented
9179 * as a result of the parsing.
9180 */
9181
9182int
9183xmlParseDocument(xmlParserCtxtPtr ctxt) {
9184 xmlChar start[4];
9185 xmlCharEncoding enc;
9186
9187 xmlInitParser();
9188
9189 if ((ctxt == NULL) || (ctxt->input == NULL))
9190 return(-1);
9191
9192 GROW;
9193
9194 /*
9195 * SAX: detecting the level.
9196 */
9197 xmlDetectSAX2(ctxt);
9198
9199 /*
9200 * SAX: beginning of the document processing.
9201 */
9202 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9203 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9204
9205 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9206 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
9207 /*
9208 * Get the 4 first bytes and decode the charset
9209 * if enc != XML_CHAR_ENCODING_NONE
9210 * plug some encoding conversion routines.
9211 */
9212 start[0] = RAW;
9213 start[1] = NXT(1);
9214 start[2] = NXT(2);
9215 start[3] = NXT(3);
9216 enc = xmlDetectCharEncoding(&start[0], 4);
9217 if (enc != XML_CHAR_ENCODING_NONE) {
9218 xmlSwitchEncoding(ctxt, enc);
9219 }
9220 }
9221
9222
9223 if (CUR == 0) {
9224 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9225 }
9226
9227 /*
9228 * Check for the XMLDecl in the Prolog.
9229 */
9230 GROW;
9231 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9232
9233 /*
9234 * Note that we will switch encoding on the fly.
9235 */
9236 xmlParseXMLDecl(ctxt);
9237 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9238 /*
9239 * The XML REC instructs us to stop parsing right here
9240 */
9241 return(-1);
9242 }
9243 ctxt->standalone = ctxt->input->standalone;
9244 SKIP_BLANKS;
9245 } else {
9246 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9247 }
9248 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9249 ctxt->sax->startDocument(ctxt->userData);
9250
9251 /*
9252 * The Misc part of the Prolog
9253 */
9254 GROW;
9255 xmlParseMisc(ctxt);
9256
9257 /*
9258 * Then possibly doc type declaration(s) and more Misc
9259 * (doctypedecl Misc*)?
9260 */
9261 GROW;
9262 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
9263
9264 ctxt->inSubset = 1;
9265 xmlParseDocTypeDecl(ctxt);
9266 if (RAW == '[') {
9267 ctxt->instate = XML_PARSER_DTD;
9268 xmlParseInternalSubset(ctxt);
9269 }
9270
9271 /*
9272 * Create and update the external subset.
9273 */
9274 ctxt->inSubset = 2;
9275 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9276 (!ctxt->disableSAX))
9277 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9278 ctxt->extSubSystem, ctxt->extSubURI);
9279 ctxt->inSubset = 0;
9280
9281
9282 ctxt->instate = XML_PARSER_PROLOG;
9283 xmlParseMisc(ctxt);
9284 }
9285
9286 /*
9287 * Time to start parsing the tree itself
9288 */
9289 GROW;
9290 if (RAW != '<') {
9291 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9292 "Start tag expected, '<' not found\n");
9293 } else {
9294 ctxt->instate = XML_PARSER_CONTENT;
9295 xmlParseElement(ctxt);
9296 ctxt->instate = XML_PARSER_EPILOG;
9297
9298
9299 /*
9300 * The Misc part at the end
9301 */
9302 xmlParseMisc(ctxt);
9303
9304 if (RAW != 0) {
9305 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
9306 }
9307 ctxt->instate = XML_PARSER_EOF;
9308 }
9309
9310 /*
9311 * SAX: end of the document processing.
9312 */
9313 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9314 ctxt->sax->endDocument(ctxt->userData);
9315
9316 /*
9317 * Remove locally kept entity definitions if the tree was not built
9318 */
9319 if ((ctxt->myDoc != NULL) &&
9320 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9321 xmlFreeDoc(ctxt->myDoc);
9322 ctxt->myDoc = NULL;
9323 }
9324
9325 if (! ctxt->wellFormed) {
9326 ctxt->valid = 0;
9327 return(-1);
9328 }
9329 return(0);
9330}
9331
9332/**
9333 * xmlParseExtParsedEnt:
9334 * @ctxt: an XML parser context
9335 *
9336 * parse a general parsed entity
9337 * An external general parsed entity is well-formed if it matches the
9338 * production labeled extParsedEnt.
9339 *
9340 * [78] extParsedEnt ::= TextDecl? content
9341 *
9342 * Returns 0, -1 in case of error. the parser context is augmented
9343 * as a result of the parsing.
9344 */
9345
9346int
9347xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9348 xmlChar start[4];
9349 xmlCharEncoding enc;
9350
9351 if ((ctxt == NULL) || (ctxt->input == NULL))
9352 return(-1);
9353
9354 xmlDefaultSAXHandlerInit();
9355
9356 xmlDetectSAX2(ctxt);
9357
9358 GROW;
9359
9360 /*
9361 * SAX: beginning of the document processing.
9362 */
9363 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9364 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9365
9366 /*
9367 * Get the 4 first bytes and decode the charset
9368 * if enc != XML_CHAR_ENCODING_NONE
9369 * plug some encoding conversion routines.
9370 */
9371 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9372 start[0] = RAW;
9373 start[1] = NXT(1);
9374 start[2] = NXT(2);
9375 start[3] = NXT(3);
9376 enc = xmlDetectCharEncoding(start, 4);
9377 if (enc != XML_CHAR_ENCODING_NONE) {
9378 xmlSwitchEncoding(ctxt, enc);
9379 }
9380 }
9381
9382
9383 if (CUR == 0) {
9384 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9385 }
9386
9387 /*
9388 * Check for the XMLDecl in the Prolog.
9389 */
9390 GROW;
9391 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
9392
9393 /*
9394 * Note that we will switch encoding on the fly.
9395 */
9396 xmlParseXMLDecl(ctxt);
9397 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9398 /*
9399 * The XML REC instructs us to stop parsing right here
9400 */
9401 return(-1);
9402 }
9403 SKIP_BLANKS;
9404 } else {
9405 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9406 }
9407 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9408 ctxt->sax->startDocument(ctxt->userData);
9409
9410 /*
9411 * Doing validity checking on chunk doesn't make sense
9412 */
9413 ctxt->instate = XML_PARSER_CONTENT;
9414 ctxt->validate = 0;
9415 ctxt->loadsubset = 0;
9416 ctxt->depth = 0;
9417
9418 xmlParseContent(ctxt);
9419
9420 if ((RAW == '<') && (NXT(1) == '/')) {
9421 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9422 } else if (RAW != 0) {
9423 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
9424 }
9425
9426 /*
9427 * SAX: end of the document processing.
9428 */
9429 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9430 ctxt->sax->endDocument(ctxt->userData);
9431
9432 if (! ctxt->wellFormed) return(-1);
9433 return(0);
9434}
9435
9436#ifdef LIBXML_PUSH_ENABLED
9437/************************************************************************
9438 * *
9439 * Progressive parsing interfaces *
9440 * *
9441 ************************************************************************/
9442
9443/**
9444 * xmlParseLookupSequence:
9445 * @ctxt: an XML parser context
9446 * @first: the first char to lookup
9447 * @next: the next char to lookup or zero
9448 * @third: the next char to lookup or zero
9449 *
9450 * Try to find if a sequence (first, next, third) or just (first next) or
9451 * (first) is available in the input stream.
9452 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9453 * to avoid rescanning sequences of bytes, it DOES change the state of the
9454 * parser, do not use liberally.
9455 *
9456 * Returns the index to the current parsing point if the full sequence
9457 * is available, -1 otherwise.
9458 */
9459static int
9460xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9461 xmlChar next, xmlChar third) {
9462 int base, len;
9463 xmlParserInputPtr in;
9464 const xmlChar *buf;
9465
9466 in = ctxt->input;
9467 if (in == NULL) return(-1);
9468 base = in->cur - in->base;
9469 if (base < 0) return(-1);
9470 if (ctxt->checkIndex > base)
9471 base = ctxt->checkIndex;
9472 if (in->buf == NULL) {
9473 buf = in->base;
9474 len = in->length;
9475 } else {
9476 buf = in->buf->buffer->content;
9477 len = in->buf->buffer->use;
9478 }
9479 /* take into account the sequence length */
9480 if (third) len -= 2;
9481 else if (next) len --;
9482 for (;base < len;base++) {
9483 if (buf[base] == first) {
9484 if (third != 0) {
9485 if ((buf[base + 1] != next) ||
9486 (buf[base + 2] != third)) continue;
9487 } else if (next != 0) {
9488 if (buf[base + 1] != next) continue;
9489 }
9490 ctxt->checkIndex = 0;
9491#ifdef DEBUG_PUSH
9492 if (next == 0)
9493 xmlGenericError(xmlGenericErrorContext,
9494 "PP: lookup '%c' found at %d\n",
9495 first, base);
9496 else if (third == 0)
9497 xmlGenericError(xmlGenericErrorContext,
9498 "PP: lookup '%c%c' found at %d\n",
9499 first, next, base);
9500 else
9501 xmlGenericError(xmlGenericErrorContext,
9502 "PP: lookup '%c%c%c' found at %d\n",
9503 first, next, third, base);
9504#endif
9505 return(base - (in->cur - in->base));
9506 }
9507 }
9508 ctxt->checkIndex = base;
9509#ifdef DEBUG_PUSH
9510 if (next == 0)
9511 xmlGenericError(xmlGenericErrorContext,
9512 "PP: lookup '%c' failed\n", first);
9513 else if (third == 0)
9514 xmlGenericError(xmlGenericErrorContext,
9515 "PP: lookup '%c%c' failed\n", first, next);
9516 else
9517 xmlGenericError(xmlGenericErrorContext,
9518 "PP: lookup '%c%c%c' failed\n", first, next, third);
9519#endif
9520 return(-1);
9521}
9522
9523/**
9524 * xmlParseGetLasts:
9525 * @ctxt: an XML parser context
9526 * @lastlt: pointer to store the last '<' from the input
9527 * @lastgt: pointer to store the last '>' from the input
9528 *
9529 * Lookup the last < and > in the current chunk
9530 */
9531static void
9532xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9533 const xmlChar **lastgt) {
9534 const xmlChar *tmp;
9535
9536 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9537 xmlGenericError(xmlGenericErrorContext,
9538 "Internal error: xmlParseGetLasts\n");
9539 return;
9540 }
9541 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
9542 tmp = ctxt->input->end;
9543 tmp--;
9544 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9545 if (tmp < ctxt->input->base) {
9546 *lastlt = NULL;
9547 *lastgt = NULL;
9548 } else {
9549 *lastlt = tmp;
9550 tmp++;
9551 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9552 if (*tmp == '\'') {
9553 tmp++;
9554 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9555 if (tmp < ctxt->input->end) tmp++;
9556 } else if (*tmp == '"') {
9557 tmp++;
9558 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9559 if (tmp < ctxt->input->end) tmp++;
9560 } else
9561 tmp++;
9562 }
9563 if (tmp < ctxt->input->end)
9564 *lastgt = tmp;
9565 else {
9566 tmp = *lastlt;
9567 tmp--;
9568 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9569 if (tmp >= ctxt->input->base)
9570 *lastgt = tmp;
9571 else
9572 *lastgt = NULL;
9573 }
9574 }
9575 } else {
9576 *lastlt = NULL;
9577 *lastgt = NULL;
9578 }
9579}
9580/**
9581 * xmlCheckCdataPush:
9582 * @cur: pointer to the bock of characters
9583 * @len: length of the block in bytes
9584 *
9585 * Check that the block of characters is okay as SCdata content [20]
9586 *
9587 * Returns the number of bytes to pass if okay, a negative index where an
9588 * UTF-8 error occured otherwise
9589 */
9590static int
9591xmlCheckCdataPush(const xmlChar *utf, int len) {
9592 int ix;
9593 unsigned char c;
9594 int codepoint;
9595
9596 if ((utf == NULL) || (len <= 0))
9597 return(0);
9598
9599 for (ix = 0; ix < len;) { /* string is 0-terminated */
9600 c = utf[ix];
9601 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9602 if (c >= 0x20)
9603 ix++;
9604 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9605 ix++;
9606 else
9607 return(-ix);
9608 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9609 if (ix + 2 > len) return(ix);
9610 if ((utf[ix+1] & 0xc0 ) != 0x80)
9611 return(-ix);
9612 codepoint = (utf[ix] & 0x1f) << 6;
9613 codepoint |= utf[ix+1] & 0x3f;
9614 if (!xmlIsCharQ(codepoint))
9615 return(-ix);
9616 ix += 2;
9617 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9618 if (ix + 3 > len) return(ix);
9619 if (((utf[ix+1] & 0xc0) != 0x80) ||
9620 ((utf[ix+2] & 0xc0) != 0x80))
9621 return(-ix);
9622 codepoint = (utf[ix] & 0xf) << 12;
9623 codepoint |= (utf[ix+1] & 0x3f) << 6;
9624 codepoint |= utf[ix+2] & 0x3f;
9625 if (!xmlIsCharQ(codepoint))
9626 return(-ix);
9627 ix += 3;
9628 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9629 if (ix + 4 > len) return(ix);
9630 if (((utf[ix+1] & 0xc0) != 0x80) ||
9631 ((utf[ix+2] & 0xc0) != 0x80) ||
9632 ((utf[ix+3] & 0xc0) != 0x80))
9633 return(-ix);
9634 codepoint = (utf[ix] & 0x7) << 18;
9635 codepoint |= (utf[ix+1] & 0x3f) << 12;
9636 codepoint |= (utf[ix+2] & 0x3f) << 6;
9637 codepoint |= utf[ix+3] & 0x3f;
9638 if (!xmlIsCharQ(codepoint))
9639 return(-ix);
9640 ix += 4;
9641 } else /* unknown encoding */
9642 return(-ix);
9643 }
9644 return(ix);
9645}
9646
9647/**
9648 * xmlParseTryOrFinish:
9649 * @ctxt: an XML parser context
9650 * @terminate: last chunk indicator
9651 *
9652 * Try to progress on parsing
9653 *
9654 * Returns zero if no parsing was possible
9655 */
9656static int
9657xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9658 int ret = 0;
9659 int avail, tlen;
9660 xmlChar cur, next;
9661 const xmlChar *lastlt, *lastgt;
9662
9663 if (ctxt->input == NULL)
9664 return(0);
9665
9666#ifdef DEBUG_PUSH
9667 switch (ctxt->instate) {
9668 case XML_PARSER_EOF:
9669 xmlGenericError(xmlGenericErrorContext,
9670 "PP: try EOF\n"); break;
9671 case XML_PARSER_START:
9672 xmlGenericError(xmlGenericErrorContext,
9673 "PP: try START\n"); break;
9674 case XML_PARSER_MISC:
9675 xmlGenericError(xmlGenericErrorContext,
9676 "PP: try MISC\n");break;
9677 case XML_PARSER_COMMENT:
9678 xmlGenericError(xmlGenericErrorContext,
9679 "PP: try COMMENT\n");break;
9680 case XML_PARSER_PROLOG:
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: try PROLOG\n");break;
9683 case XML_PARSER_START_TAG:
9684 xmlGenericError(xmlGenericErrorContext,
9685 "PP: try START_TAG\n");break;
9686 case XML_PARSER_CONTENT:
9687 xmlGenericError(xmlGenericErrorContext,
9688 "PP: try CONTENT\n");break;
9689 case XML_PARSER_CDATA_SECTION:
9690 xmlGenericError(xmlGenericErrorContext,
9691 "PP: try CDATA_SECTION\n");break;
9692 case XML_PARSER_END_TAG:
9693 xmlGenericError(xmlGenericErrorContext,
9694 "PP: try END_TAG\n");break;
9695 case XML_PARSER_ENTITY_DECL:
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: try ENTITY_DECL\n");break;
9698 case XML_PARSER_ENTITY_VALUE:
9699 xmlGenericError(xmlGenericErrorContext,
9700 "PP: try ENTITY_VALUE\n");break;
9701 case XML_PARSER_ATTRIBUTE_VALUE:
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: try ATTRIBUTE_VALUE\n");break;
9704 case XML_PARSER_DTD:
9705 xmlGenericError(xmlGenericErrorContext,
9706 "PP: try DTD\n");break;
9707 case XML_PARSER_EPILOG:
9708 xmlGenericError(xmlGenericErrorContext,
9709 "PP: try EPILOG\n");break;
9710 case XML_PARSER_PI:
9711 xmlGenericError(xmlGenericErrorContext,
9712 "PP: try PI\n");break;
9713 case XML_PARSER_IGNORE:
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: try IGNORE\n");break;
9716 }
9717#endif
9718
9719 if ((ctxt->input != NULL) &&
9720 (ctxt->input->cur - ctxt->input->base > 4096)) {
9721 xmlSHRINK(ctxt);
9722 ctxt->checkIndex = 0;
9723 }
9724 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
9725
9726 while (1) {
9727 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9728 return(0);
9729
9730
9731 /*
9732 * Pop-up of finished entities.
9733 */
9734 while ((RAW == 0) && (ctxt->inputNr > 1))
9735 xmlPopInput(ctxt);
9736
9737 if (ctxt->input == NULL) break;
9738 if (ctxt->input->buf == NULL)
9739 avail = ctxt->input->length -
9740 (ctxt->input->cur - ctxt->input->base);
9741 else {
9742 /*
9743 * If we are operating on converted input, try to flush
9744 * remainng chars to avoid them stalling in the non-converted
9745 * buffer.
9746 */
9747 if ((ctxt->input->buf->raw != NULL) &&
9748 (ctxt->input->buf->raw->use > 0)) {
9749 int base = ctxt->input->base -
9750 ctxt->input->buf->buffer->content;
9751 int current = ctxt->input->cur - ctxt->input->base;
9752
9753 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9754 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9755 ctxt->input->cur = ctxt->input->base + current;
9756 ctxt->input->end =
9757 &ctxt->input->buf->buffer->content[
9758 ctxt->input->buf->buffer->use];
9759 }
9760 avail = ctxt->input->buf->buffer->use -
9761 (ctxt->input->cur - ctxt->input->base);
9762 }
9763 if (avail < 1)
9764 goto done;
9765 switch (ctxt->instate) {
9766 case XML_PARSER_EOF:
9767 /*
9768 * Document parsing is done !
9769 */
9770 goto done;
9771 case XML_PARSER_START:
9772 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9773 xmlChar start[4];
9774 xmlCharEncoding enc;
9775
9776 /*
9777 * Very first chars read from the document flow.
9778 */
9779 if (avail < 4)
9780 goto done;
9781
9782 /*
9783 * Get the 4 first bytes and decode the charset
9784 * if enc != XML_CHAR_ENCODING_NONE
9785 * plug some encoding conversion routines,
9786 * else xmlSwitchEncoding will set to (default)
9787 * UTF8.
9788 */
9789 start[0] = RAW;
9790 start[1] = NXT(1);
9791 start[2] = NXT(2);
9792 start[3] = NXT(3);
9793 enc = xmlDetectCharEncoding(start, 4);
9794 xmlSwitchEncoding(ctxt, enc);
9795 break;
9796 }
9797
9798 if (avail < 2)
9799 goto done;
9800 cur = ctxt->input->cur[0];
9801 next = ctxt->input->cur[1];
9802 if (cur == 0) {
9803 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9804 ctxt->sax->setDocumentLocator(ctxt->userData,
9805 &xmlDefaultSAXLocator);
9806 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9807 ctxt->instate = XML_PARSER_EOF;
9808#ifdef DEBUG_PUSH
9809 xmlGenericError(xmlGenericErrorContext,
9810 "PP: entering EOF\n");
9811#endif
9812 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9813 ctxt->sax->endDocument(ctxt->userData);
9814 goto done;
9815 }
9816 if ((cur == '<') && (next == '?')) {
9817 /* PI or XML decl */
9818 if (avail < 5) return(ret);
9819 if ((!terminate) &&
9820 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9821 return(ret);
9822 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9823 ctxt->sax->setDocumentLocator(ctxt->userData,
9824 &xmlDefaultSAXLocator);
9825 if ((ctxt->input->cur[2] == 'x') &&
9826 (ctxt->input->cur[3] == 'm') &&
9827 (ctxt->input->cur[4] == 'l') &&
9828 (IS_BLANK_CH(ctxt->input->cur[5]))) {
9829 ret += 5;
9830#ifdef DEBUG_PUSH
9831 xmlGenericError(xmlGenericErrorContext,
9832 "PP: Parsing XML Decl\n");
9833#endif
9834 xmlParseXMLDecl(ctxt);
9835 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9836 /*
9837 * The XML REC instructs us to stop parsing right
9838 * here
9839 */
9840 ctxt->instate = XML_PARSER_EOF;
9841 return(0);
9842 }
9843 ctxt->standalone = ctxt->input->standalone;
9844 if ((ctxt->encoding == NULL) &&
9845 (ctxt->input->encoding != NULL))
9846 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9847 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9848 (!ctxt->disableSAX))
9849 ctxt->sax->startDocument(ctxt->userData);
9850 ctxt->instate = XML_PARSER_MISC;
9851#ifdef DEBUG_PUSH
9852 xmlGenericError(xmlGenericErrorContext,
9853 "PP: entering MISC\n");
9854#endif
9855 } else {
9856 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9857 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9858 (!ctxt->disableSAX))
9859 ctxt->sax->startDocument(ctxt->userData);
9860 ctxt->instate = XML_PARSER_MISC;
9861#ifdef DEBUG_PUSH
9862 xmlGenericError(xmlGenericErrorContext,
9863 "PP: entering MISC\n");
9864#endif
9865 }
9866 } else {
9867 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9868 ctxt->sax->setDocumentLocator(ctxt->userData,
9869 &xmlDefaultSAXLocator);
9870 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9871 if (ctxt->version == NULL) {
9872 xmlErrMemory(ctxt, NULL);
9873 break;
9874 }
9875 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9876 (!ctxt->disableSAX))
9877 ctxt->sax->startDocument(ctxt->userData);
9878 ctxt->instate = XML_PARSER_MISC;
9879#ifdef DEBUG_PUSH
9880 xmlGenericError(xmlGenericErrorContext,
9881 "PP: entering MISC\n");
9882#endif
9883 }
9884 break;
9885 case XML_PARSER_START_TAG: {
9886 const xmlChar *name;
9887 const xmlChar *prefix;
9888 const xmlChar *URI;
9889 int nsNr = ctxt->nsNr;
9890
9891 if ((avail < 2) && (ctxt->inputNr == 1))
9892 goto done;
9893 cur = ctxt->input->cur[0];
9894 if (cur != '<') {
9895 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
9896 ctxt->instate = XML_PARSER_EOF;
9897 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9898 ctxt->sax->endDocument(ctxt->userData);
9899 goto done;
9900 }
9901 if (!terminate) {
9902 if (ctxt->progressive) {
9903 /* > can be found unescaped in attribute values */
9904 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
9905 goto done;
9906 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9907 goto done;
9908 }
9909 }
9910 if (ctxt->spaceNr == 0)
9911 spacePush(ctxt, -1);
9912 else if (*ctxt->space == -2)
9913 spacePush(ctxt, -1);
9914 else
9915 spacePush(ctxt, *ctxt->space);
9916#ifdef LIBXML_SAX1_ENABLED
9917 if (ctxt->sax2)
9918#endif /* LIBXML_SAX1_ENABLED */
9919 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9920#ifdef LIBXML_SAX1_ENABLED
9921 else
9922 name = xmlParseStartTag(ctxt);
9923#endif /* LIBXML_SAX1_ENABLED */
9924 if (name == NULL) {
9925 spacePop(ctxt);
9926 ctxt->instate = XML_PARSER_EOF;
9927 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9928 ctxt->sax->endDocument(ctxt->userData);
9929 goto done;
9930 }
9931#ifdef LIBXML_VALID_ENABLED
9932 /*
9933 * [ VC: Root Element Type ]
9934 * The Name in the document type declaration must match
9935 * the element type of the root element.
9936 */
9937 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9938 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9939 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9940#endif /* LIBXML_VALID_ENABLED */
9941
9942 /*
9943 * Check for an Empty Element.
9944 */
9945 if ((RAW == '/') && (NXT(1) == '>')) {
9946 SKIP(2);
9947
9948 if (ctxt->sax2) {
9949 if ((ctxt->sax != NULL) &&
9950 (ctxt->sax->endElementNs != NULL) &&
9951 (!ctxt->disableSAX))
9952 ctxt->sax->endElementNs(ctxt->userData, name,
9953 prefix, URI);
9954 if (ctxt->nsNr - nsNr > 0)
9955 nsPop(ctxt, ctxt->nsNr - nsNr);
9956#ifdef LIBXML_SAX1_ENABLED
9957 } else {
9958 if ((ctxt->sax != NULL) &&
9959 (ctxt->sax->endElement != NULL) &&
9960 (!ctxt->disableSAX))
9961 ctxt->sax->endElement(ctxt->userData, name);
9962#endif /* LIBXML_SAX1_ENABLED */
9963 }
9964 spacePop(ctxt);
9965 if (ctxt->nameNr == 0) {
9966 ctxt->instate = XML_PARSER_EPILOG;
9967 } else {
9968 ctxt->instate = XML_PARSER_CONTENT;
9969 }
9970 break;
9971 }
9972 if (RAW == '>') {
9973 NEXT;
9974 } else {
9975 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
9976 "Couldn't find end of Start Tag %s\n",
9977 name);
9978 nodePop(ctxt);
9979 spacePop(ctxt);
9980 }
9981 if (ctxt->sax2)
9982 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9983#ifdef LIBXML_SAX1_ENABLED
9984 else
9985 namePush(ctxt, name);
9986#endif /* LIBXML_SAX1_ENABLED */
9987
9988 ctxt->instate = XML_PARSER_CONTENT;
9989 break;
9990 }
9991 case XML_PARSER_CONTENT: {
9992 const xmlChar *test;
9993 unsigned int cons;
9994 if ((avail < 2) && (ctxt->inputNr == 1))
9995 goto done;
9996 cur = ctxt->input->cur[0];
9997 next = ctxt->input->cur[1];
9998
9999 test = CUR_PTR;
10000 cons = ctxt->input->consumed;
10001 if ((cur == '<') && (next == '/')) {
10002 ctxt->instate = XML_PARSER_END_TAG;
10003 break;
10004 } else if ((cur == '<') && (next == '?')) {
10005 if ((!terminate) &&
10006 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10007 goto done;
10008 xmlParsePI(ctxt);
10009 } else if ((cur == '<') && (next != '!')) {
10010 ctxt->instate = XML_PARSER_START_TAG;
10011 break;
10012 } else if ((cur == '<') && (next == '!') &&
10013 (ctxt->input->cur[2] == '-') &&
10014 (ctxt->input->cur[3] == '-')) {
10015 int term;
10016
10017 if (avail < 4)
10018 goto done;
10019 ctxt->input->cur += 4;
10020 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10021 ctxt->input->cur -= 4;
10022 if ((!terminate) && (term < 0))
10023 goto done;
10024 xmlParseComment(ctxt);
10025 ctxt->instate = XML_PARSER_CONTENT;
10026 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10027 (ctxt->input->cur[2] == '[') &&
10028 (ctxt->input->cur[3] == 'C') &&
10029 (ctxt->input->cur[4] == 'D') &&
10030 (ctxt->input->cur[5] == 'A') &&
10031 (ctxt->input->cur[6] == 'T') &&
10032 (ctxt->input->cur[7] == 'A') &&
10033 (ctxt->input->cur[8] == '[')) {
10034 SKIP(9);
10035 ctxt->instate = XML_PARSER_CDATA_SECTION;
10036 break;
10037 } else if ((cur == '<') && (next == '!') &&
10038 (avail < 9)) {
10039 goto done;
10040 } else if (cur == '&') {
10041 if ((!terminate) &&
10042 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10043 goto done;
10044 xmlParseReference(ctxt);
10045 } else {
10046 /* TODO Avoid the extra copy, handle directly !!! */
10047 /*
10048 * Goal of the following test is:
10049 * - minimize calls to the SAX 'character' callback
10050 * when they are mergeable
10051 * - handle an problem for isBlank when we only parse
10052 * a sequence of blank chars and the next one is
10053 * not available to check against '<' presence.
10054 * - tries to homogenize the differences in SAX
10055 * callbacks between the push and pull versions
10056 * of the parser.
10057 */
10058 if ((ctxt->inputNr == 1) &&
10059 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10060 if (!terminate) {
10061 if (ctxt->progressive) {
10062 if ((lastlt == NULL) ||
10063 (ctxt->input->cur > lastlt))
10064 goto done;
10065 } else if (xmlParseLookupSequence(ctxt,
10066 '<', 0, 0) < 0) {
10067 goto done;
10068 }
10069 }
10070 }
10071 ctxt->checkIndex = 0;
10072 xmlParseCharData(ctxt, 0);
10073 }
10074 /*
10075 * Pop-up of finished entities.
10076 */
10077 while ((RAW == 0) && (ctxt->inputNr > 1))
10078 xmlPopInput(ctxt);
10079 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10080 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10081 "detected an error in element content\n");
10082 ctxt->instate = XML_PARSER_EOF;
10083 break;
10084 }
10085 break;
10086 }
10087 case XML_PARSER_END_TAG:
10088 if (avail < 2)
10089 goto done;
10090 if (!terminate) {
10091 if (ctxt->progressive) {
10092 /* > can be found unescaped in attribute values */
10093 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
10094 goto done;
10095 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10096 goto done;
10097 }
10098 }
10099 if (ctxt->sax2) {
10100 xmlParseEndTag2(ctxt,
10101 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10102 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
10103 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
10104 nameNsPop(ctxt);
10105 }
10106#ifdef LIBXML_SAX1_ENABLED
10107 else
10108 xmlParseEndTag1(ctxt, 0);
10109#endif /* LIBXML_SAX1_ENABLED */
10110 if (ctxt->nameNr == 0) {
10111 ctxt->instate = XML_PARSER_EPILOG;
10112 } else {
10113 ctxt->instate = XML_PARSER_CONTENT;
10114 }
10115 break;
10116 case XML_PARSER_CDATA_SECTION: {
10117 /*
10118 * The Push mode need to have the SAX callback for
10119 * cdataBlock merge back contiguous callbacks.
10120 */
10121 int base;
10122
10123 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10124 if (base < 0) {
10125 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
10126 int tmp;
10127
10128 tmp = xmlCheckCdataPush(ctxt->input->cur,
10129 XML_PARSER_BIG_BUFFER_SIZE);
10130 if (tmp < 0) {
10131 tmp = -tmp;
10132 ctxt->input->cur += tmp;
10133 goto encoding_error;
10134 }
10135 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10136 if (ctxt->sax->cdataBlock != NULL)
10137 ctxt->sax->cdataBlock(ctxt->userData,
10138 ctxt->input->cur, tmp);
10139 else if (ctxt->sax->characters != NULL)
10140 ctxt->sax->characters(ctxt->userData,
10141 ctxt->input->cur, tmp);
10142 }
10143 SKIPL(tmp);
10144 ctxt->checkIndex = 0;
10145 }
10146 goto done;
10147 } else {
10148 int tmp;
10149
10150 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10151 if ((tmp < 0) || (tmp != base)) {
10152 tmp = -tmp;
10153 ctxt->input->cur += tmp;
10154 goto encoding_error;
10155 }
10156 if ((ctxt->sax != NULL) && (base > 0) &&
10157 (!ctxt->disableSAX)) {
10158 if (ctxt->sax->cdataBlock != NULL)
10159 ctxt->sax->cdataBlock(ctxt->userData,
10160 ctxt->input->cur, base);
10161 else if (ctxt->sax->characters != NULL)
10162 ctxt->sax->characters(ctxt->userData,
10163 ctxt->input->cur, base);
10164 }
10165 SKIPL(base + 3);
10166 ctxt->checkIndex = 0;
10167 ctxt->instate = XML_PARSER_CONTENT;
10168#ifdef DEBUG_PUSH
10169 xmlGenericError(xmlGenericErrorContext,
10170 "PP: entering CONTENT\n");
10171#endif
10172 }
10173 break;
10174 }
10175 case XML_PARSER_MISC:
10176 SKIP_BLANKS;
10177 if (ctxt->input->buf == NULL)
10178 avail = ctxt->input->length -
10179 (ctxt->input->cur - ctxt->input->base);
10180 else
10181 avail = ctxt->input->buf->buffer->use -
10182 (ctxt->input->cur - ctxt->input->base);
10183 if (avail < 2)
10184 goto done;
10185 cur = ctxt->input->cur[0];
10186 next = ctxt->input->cur[1];
10187 if ((cur == '<') && (next == '?')) {
10188 if ((!terminate) &&
10189 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10190 goto done;
10191#ifdef DEBUG_PUSH
10192 xmlGenericError(xmlGenericErrorContext,
10193 "PP: Parsing PI\n");
10194#endif
10195 xmlParsePI(ctxt);
10196 ctxt->checkIndex = 0;
10197 } else if ((cur == '<') && (next == '!') &&
10198 (ctxt->input->cur[2] == '-') &&
10199 (ctxt->input->cur[3] == '-')) {
10200 if ((!terminate) &&
10201 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10202 goto done;
10203#ifdef DEBUG_PUSH
10204 xmlGenericError(xmlGenericErrorContext,
10205 "PP: Parsing Comment\n");
10206#endif
10207 xmlParseComment(ctxt);
10208 ctxt->instate = XML_PARSER_MISC;
10209 ctxt->checkIndex = 0;
10210 } else if ((cur == '<') && (next == '!') &&
10211 (ctxt->input->cur[2] == 'D') &&
10212 (ctxt->input->cur[3] == 'O') &&
10213 (ctxt->input->cur[4] == 'C') &&
10214 (ctxt->input->cur[5] == 'T') &&
10215 (ctxt->input->cur[6] == 'Y') &&
10216 (ctxt->input->cur[7] == 'P') &&
10217 (ctxt->input->cur[8] == 'E')) {
10218 if ((!terminate) &&
10219 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10220 goto done;
10221#ifdef DEBUG_PUSH
10222 xmlGenericError(xmlGenericErrorContext,
10223 "PP: Parsing internal subset\n");
10224#endif
10225 ctxt->inSubset = 1;
10226 xmlParseDocTypeDecl(ctxt);
10227 if (RAW == '[') {
10228 ctxt->instate = XML_PARSER_DTD;
10229#ifdef DEBUG_PUSH
10230 xmlGenericError(xmlGenericErrorContext,
10231 "PP: entering DTD\n");
10232#endif
10233 } else {
10234 /*
10235 * Create and update the external subset.
10236 */
10237 ctxt->inSubset = 2;
10238 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10239 (ctxt->sax->externalSubset != NULL))
10240 ctxt->sax->externalSubset(ctxt->userData,
10241 ctxt->intSubName, ctxt->extSubSystem,
10242 ctxt->extSubURI);
10243 ctxt->inSubset = 0;
10244 ctxt->instate = XML_PARSER_PROLOG;
10245#ifdef DEBUG_PUSH
10246 xmlGenericError(xmlGenericErrorContext,
10247 "PP: entering PROLOG\n");
10248#endif
10249 }
10250 } else if ((cur == '<') && (next == '!') &&
10251 (avail < 9)) {
10252 goto done;
10253 } else {
10254 ctxt->instate = XML_PARSER_START_TAG;
10255 ctxt->progressive = 1;
10256 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10257#ifdef DEBUG_PUSH
10258 xmlGenericError(xmlGenericErrorContext,
10259 "PP: entering START_TAG\n");
10260#endif
10261 }
10262 break;
10263 case XML_PARSER_PROLOG:
10264 SKIP_BLANKS;
10265 if (ctxt->input->buf == NULL)
10266 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10267 else
10268 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10269 if (avail < 2)
10270 goto done;
10271 cur = ctxt->input->cur[0];
10272 next = ctxt->input->cur[1];
10273 if ((cur == '<') && (next == '?')) {
10274 if ((!terminate) &&
10275 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10276 goto done;
10277#ifdef DEBUG_PUSH
10278 xmlGenericError(xmlGenericErrorContext,
10279 "PP: Parsing PI\n");
10280#endif
10281 xmlParsePI(ctxt);
10282 } else if ((cur == '<') && (next == '!') &&
10283 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10284 if ((!terminate) &&
10285 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10286 goto done;
10287#ifdef DEBUG_PUSH
10288 xmlGenericError(xmlGenericErrorContext,
10289 "PP: Parsing Comment\n");
10290#endif
10291 xmlParseComment(ctxt);
10292 ctxt->instate = XML_PARSER_PROLOG;
10293 } else if ((cur == '<') && (next == '!') &&
10294 (avail < 4)) {
10295 goto done;
10296 } else {
10297 ctxt->instate = XML_PARSER_START_TAG;
10298 if (ctxt->progressive == 0)
10299 ctxt->progressive = 1;
10300 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
10301#ifdef DEBUG_PUSH
10302 xmlGenericError(xmlGenericErrorContext,
10303 "PP: entering START_TAG\n");
10304#endif
10305 }
10306 break;
10307 case XML_PARSER_EPILOG:
10308 SKIP_BLANKS;
10309 if (ctxt->input->buf == NULL)
10310 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10311 else
10312 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10313 if (avail < 2)
10314 goto done;
10315 cur = ctxt->input->cur[0];
10316 next = ctxt->input->cur[1];
10317 if ((cur == '<') && (next == '?')) {
10318 if ((!terminate) &&
10319 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10320 goto done;
10321#ifdef DEBUG_PUSH
10322 xmlGenericError(xmlGenericErrorContext,
10323 "PP: Parsing PI\n");
10324#endif
10325 xmlParsePI(ctxt);
10326 ctxt->instate = XML_PARSER_EPILOG;
10327 } else if ((cur == '<') && (next == '!') &&
10328 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10329 if ((!terminate) &&
10330 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10331 goto done;
10332#ifdef DEBUG_PUSH
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: Parsing Comment\n");
10335#endif
10336 xmlParseComment(ctxt);
10337 ctxt->instate = XML_PARSER_EPILOG;
10338 } else if ((cur == '<') && (next == '!') &&
10339 (avail < 4)) {
10340 goto done;
10341 } else {
10342 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10343 ctxt->instate = XML_PARSER_EOF;
10344#ifdef DEBUG_PUSH
10345 xmlGenericError(xmlGenericErrorContext,
10346 "PP: entering EOF\n");
10347#endif
10348 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10349 ctxt->sax->endDocument(ctxt->userData);
10350 goto done;
10351 }
10352 break;
10353 case XML_PARSER_DTD: {
10354 /*
10355 * Sorry but progressive parsing of the internal subset
10356 * is not expected to be supported. We first check that
10357 * the full content of the internal subset is available and
10358 * the parsing is launched only at that point.
10359 * Internal subset ends up with "']' S? '>'" in an unescaped
10360 * section and not in a ']]>' sequence which are conditional
10361 * sections (whoever argued to keep that crap in XML deserve
10362 * a place in hell !).
10363 */
10364 int base, i;
10365 xmlChar *buf;
10366 xmlChar quote = 0;
10367
10368 base = ctxt->input->cur - ctxt->input->base;
10369 if (base < 0) return(0);
10370 if (ctxt->checkIndex > base)
10371 base = ctxt->checkIndex;
10372 buf = ctxt->input->buf->buffer->content;
10373 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10374 base++) {
10375 if (quote != 0) {
10376 if (buf[base] == quote)
10377 quote = 0;
10378 continue;
10379 }
10380 if ((quote == 0) && (buf[base] == '<')) {
10381 int found = 0;
10382 /* special handling of comments */
10383 if (((unsigned int) base + 4 <
10384 ctxt->input->buf->buffer->use) &&
10385 (buf[base + 1] == '!') &&
10386 (buf[base + 2] == '-') &&
10387 (buf[base + 3] == '-')) {
10388 for (;(unsigned int) base + 3 <
10389 ctxt->input->buf->buffer->use; base++) {
10390 if ((buf[base] == '-') &&
10391 (buf[base + 1] == '-') &&
10392 (buf[base + 2] == '>')) {
10393 found = 1;
10394 base += 2;
10395 break;
10396 }
10397 }
10398 if (!found) {
10399#if 0
10400 fprintf(stderr, "unfinished comment\n");
10401#endif
10402 break; /* for */
10403 }
10404 continue;
10405 }
10406 }
10407 if (buf[base] == '"') {
10408 quote = '"';
10409 continue;
10410 }
10411 if (buf[base] == '\'') {
10412 quote = '\'';
10413 continue;
10414 }
10415 if (buf[base] == ']') {
10416#if 0
10417 fprintf(stderr, "%c%c%c%c: ", buf[base],
10418 buf[base + 1], buf[base + 2], buf[base + 3]);
10419#endif
10420 if ((unsigned int) base +1 >=
10421 ctxt->input->buf->buffer->use)
10422 break;
10423 if (buf[base + 1] == ']') {
10424 /* conditional crap, skip both ']' ! */
10425 base++;
10426 continue;
10427 }
10428 for (i = 1;
10429 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10430 i++) {
10431 if (buf[base + i] == '>') {
10432#if 0
10433 fprintf(stderr, "found\n");
10434#endif
10435 goto found_end_int_subset;
10436 }
10437 if (!IS_BLANK_CH(buf[base + i])) {
10438#if 0
10439 fprintf(stderr, "not found\n");
10440#endif
10441 goto not_end_of_int_subset;
10442 }
10443 }
10444#if 0
10445 fprintf(stderr, "end of stream\n");
10446#endif
10447 break;
10448
10449 }
10450not_end_of_int_subset:
10451 continue; /* for */
10452 }
10453 /*
10454 * We didn't found the end of the Internal subset
10455 */
10456#ifdef DEBUG_PUSH
10457 if (next == 0)
10458 xmlGenericError(xmlGenericErrorContext,
10459 "PP: lookup of int subset end filed\n");
10460#endif
10461 goto done;
10462
10463found_end_int_subset:
10464 xmlParseInternalSubset(ctxt);
10465 ctxt->inSubset = 2;
10466 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10467 (ctxt->sax->externalSubset != NULL))
10468 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10469 ctxt->extSubSystem, ctxt->extSubURI);
10470 ctxt->inSubset = 0;
10471 ctxt->instate = XML_PARSER_PROLOG;
10472 ctxt->checkIndex = 0;
10473#ifdef DEBUG_PUSH
10474 xmlGenericError(xmlGenericErrorContext,
10475 "PP: entering PROLOG\n");
10476#endif
10477 break;
10478 }
10479 case XML_PARSER_COMMENT:
10480 xmlGenericError(xmlGenericErrorContext,
10481 "PP: internal error, state == COMMENT\n");
10482 ctxt->instate = XML_PARSER_CONTENT;
10483#ifdef DEBUG_PUSH
10484 xmlGenericError(xmlGenericErrorContext,
10485 "PP: entering CONTENT\n");
10486#endif
10487 break;
10488 case XML_PARSER_IGNORE:
10489 xmlGenericError(xmlGenericErrorContext,
10490 "PP: internal error, state == IGNORE");
10491 ctxt->instate = XML_PARSER_DTD;
10492#ifdef DEBUG_PUSH
10493 xmlGenericError(xmlGenericErrorContext,
10494 "PP: entering DTD\n");
10495#endif
10496 break;
10497 case XML_PARSER_PI:
10498 xmlGenericError(xmlGenericErrorContext,
10499 "PP: internal error, state == PI\n");
10500 ctxt->instate = XML_PARSER_CONTENT;
10501#ifdef DEBUG_PUSH
10502 xmlGenericError(xmlGenericErrorContext,
10503 "PP: entering CONTENT\n");
10504#endif
10505 break;
10506 case XML_PARSER_ENTITY_DECL:
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: internal error, state == ENTITY_DECL\n");
10509 ctxt->instate = XML_PARSER_DTD;
10510#ifdef DEBUG_PUSH
10511 xmlGenericError(xmlGenericErrorContext,
10512 "PP: entering DTD\n");
10513#endif
10514 break;
10515 case XML_PARSER_ENTITY_VALUE:
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: internal error, state == ENTITY_VALUE\n");
10518 ctxt->instate = XML_PARSER_CONTENT;
10519#ifdef DEBUG_PUSH
10520 xmlGenericError(xmlGenericErrorContext,
10521 "PP: entering DTD\n");
10522#endif
10523 break;
10524 case XML_PARSER_ATTRIBUTE_VALUE:
10525 xmlGenericError(xmlGenericErrorContext,
10526 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10527 ctxt->instate = XML_PARSER_START_TAG;
10528#ifdef DEBUG_PUSH
10529 xmlGenericError(xmlGenericErrorContext,
10530 "PP: entering START_TAG\n");
10531#endif
10532 break;
10533 case XML_PARSER_SYSTEM_LITERAL:
10534 xmlGenericError(xmlGenericErrorContext,
10535 "PP: internal error, state == SYSTEM_LITERAL\n");
10536 ctxt->instate = XML_PARSER_START_TAG;
10537#ifdef DEBUG_PUSH
10538 xmlGenericError(xmlGenericErrorContext,
10539 "PP: entering START_TAG\n");
10540#endif
10541 break;
10542 case XML_PARSER_PUBLIC_LITERAL:
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: internal error, state == PUBLIC_LITERAL\n");
10545 ctxt->instate = XML_PARSER_START_TAG;
10546#ifdef DEBUG_PUSH
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: entering START_TAG\n");
10549#endif
10550 break;
10551 }
10552 }
10553done:
10554#ifdef DEBUG_PUSH
10555 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10556#endif
10557 return(ret);
10558encoding_error:
10559 {
10560 char buffer[150];
10561
10562 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10563 ctxt->input->cur[0], ctxt->input->cur[1],
10564 ctxt->input->cur[2], ctxt->input->cur[3]);
10565 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10566 "Input is not proper UTF-8, indicate encoding !\n%s",
10567 BAD_CAST buffer, NULL);
10568 }
10569 return(0);
10570}
10571
10572/**
10573 * xmlParseChunk:
10574 * @ctxt: an XML parser context
10575 * @chunk: an char array
10576 * @size: the size in byte of the chunk
10577 * @terminate: last chunk indicator
10578 *
10579 * Parse a Chunk of memory
10580 *
10581 * Returns zero if no error, the xmlParserErrors otherwise.
10582 */
10583int
10584xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10585 int terminate) {
10586 int end_in_lf = 0;
10587
10588 if (ctxt == NULL)
10589 return(XML_ERR_INTERNAL_ERROR);
10590 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10591 return(ctxt->errNo);
10592 if (ctxt->instate == XML_PARSER_START)
10593 xmlDetectSAX2(ctxt);
10594 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10595 (chunk[size - 1] == '\r')) {
10596 end_in_lf = 1;
10597 size--;
10598 }
10599 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10600 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10601 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10602 int cur = ctxt->input->cur - ctxt->input->base;
10603 int res;
10604
10605 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10606 if (res < 0) {
10607 ctxt->errNo = XML_PARSER_EOF;
10608 ctxt->disableSAX = 1;
10609 return (XML_PARSER_EOF);
10610 }
10611 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10612 ctxt->input->cur = ctxt->input->base + cur;
10613 ctxt->input->end =
10614 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10615#ifdef DEBUG_PUSH
10616 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10617#endif
10618
10619 } else if (ctxt->instate != XML_PARSER_EOF) {
10620 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10621 xmlParserInputBufferPtr in = ctxt->input->buf;
10622 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10623 (in->raw != NULL)) {
10624 int nbchars;
10625
10626 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10627 if (nbchars < 0) {
10628 /* TODO 2.6.0 */
10629 xmlGenericError(xmlGenericErrorContext,
10630 "xmlParseChunk: encoder error\n");
10631 return(XML_ERR_INVALID_ENCODING);
10632 }
10633 }
10634 }
10635 }
10636 xmlParseTryOrFinish(ctxt, terminate);
10637 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10638 (ctxt->input->buf != NULL)) {
10639 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10640 }
10641 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10642 return(ctxt->errNo);
10643 if (terminate) {
10644 /*
10645 * Check for termination
10646 */
10647 int avail = 0;
10648
10649 if (ctxt->input != NULL) {
10650 if (ctxt->input->buf == NULL)
10651 avail = ctxt->input->length -
10652 (ctxt->input->cur - ctxt->input->base);
10653 else
10654 avail = ctxt->input->buf->buffer->use -
10655 (ctxt->input->cur - ctxt->input->base);
10656 }
10657
10658 if ((ctxt->instate != XML_PARSER_EOF) &&
10659 (ctxt->instate != XML_PARSER_EPILOG)) {
10660 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10661 }
10662 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
10663 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10664 }
10665 if (ctxt->instate != XML_PARSER_EOF) {
10666 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10667 ctxt->sax->endDocument(ctxt->userData);
10668 }
10669 ctxt->instate = XML_PARSER_EOF;
10670 }
10671 return((xmlParserErrors) ctxt->errNo);
10672}
10673
10674/************************************************************************
10675 * *
10676 * I/O front end functions to the parser *
10677 * *
10678 ************************************************************************/
10679
10680/**
10681 * xmlCreatePushParserCtxt:
10682 * @sax: a SAX handler
10683 * @user_data: The user data returned on SAX callbacks
10684 * @chunk: a pointer to an array of chars
10685 * @size: number of chars in the array
10686 * @filename: an optional file name or URI
10687 *
10688 * Create a parser context for using the XML parser in push mode.
10689 * If @buffer and @size are non-NULL, the data is used to detect
10690 * the encoding. The remaining characters will be parsed so they
10691 * don't need to be fed in again through xmlParseChunk.
10692 * To allow content encoding detection, @size should be >= 4
10693 * The value of @filename is used for fetching external entities
10694 * and error/warning reports.
10695 *
10696 * Returns the new parser context or NULL
10697 */
10698
10699xmlParserCtxtPtr
10700xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10701 const char *chunk, int size, const char *filename) {
10702 xmlParserCtxtPtr ctxt;
10703 xmlParserInputPtr inputStream;
10704 xmlParserInputBufferPtr buf;
10705 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10706
10707 /*
10708 * plug some encoding conversion routines
10709 */
10710 if ((chunk != NULL) && (size >= 4))
10711 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10712
10713 buf = xmlAllocParserInputBuffer(enc);
10714 if (buf == NULL) return(NULL);
10715
10716 ctxt = xmlNewParserCtxt();
10717 if (ctxt == NULL) {
10718 xmlErrMemory(NULL, "creating parser: out of memory\n");
10719 xmlFreeParserInputBuffer(buf);
10720 return(NULL);
10721 }
10722 ctxt->dictNames = 1;
10723 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10724 if (ctxt->pushTab == NULL) {
10725 xmlErrMemory(ctxt, NULL);
10726 xmlFreeParserInputBuffer(buf);
10727 xmlFreeParserCtxt(ctxt);
10728 return(NULL);
10729 }
10730 if (sax != NULL) {
10731#ifdef LIBXML_SAX1_ENABLED
10732 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10733#endif /* LIBXML_SAX1_ENABLED */
10734 xmlFree(ctxt->sax);
10735 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10736 if (ctxt->sax == NULL) {
10737 xmlErrMemory(ctxt, NULL);
10738 xmlFreeParserInputBuffer(buf);
10739 xmlFreeParserCtxt(ctxt);
10740 return(NULL);
10741 }
10742 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10743 if (sax->initialized == XML_SAX2_MAGIC)
10744 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10745 else
10746 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10747 if (user_data != NULL)
10748 ctxt->userData = user_data;
10749 }
10750 if (filename == NULL) {
10751 ctxt->directory = NULL;
10752 } else {
10753 ctxt->directory = xmlParserGetDirectory(filename);
10754 }
10755
10756 inputStream = xmlNewInputStream(ctxt);
10757 if (inputStream == NULL) {
10758 xmlFreeParserCtxt(ctxt);
10759 xmlFreeParserInputBuffer(buf);
10760 return(NULL);
10761 }
10762
10763 if (filename == NULL)
10764 inputStream->filename = NULL;
10765 else {
10766 inputStream->filename = (char *)
10767 xmlCanonicPath((const xmlChar *) filename);
10768 if (inputStream->filename == NULL) {
10769 xmlFreeParserCtxt(ctxt);
10770 xmlFreeParserInputBuffer(buf);
10771 return(NULL);
10772 }
10773 }
10774 inputStream->buf = buf;
10775 inputStream->base = inputStream->buf->buffer->content;
10776 inputStream->cur = inputStream->buf->buffer->content;
10777 inputStream->end =
10778 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
10779
10780 inputPush(ctxt, inputStream);
10781
10782 /*
10783 * If the caller didn't provide an initial 'chunk' for determining
10784 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10785 * that it can be automatically determined later
10786 */
10787 if ((size == 0) || (chunk == NULL)) {
10788 ctxt->charset = XML_CHAR_ENCODING_NONE;
10789 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
10790 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10791 int cur = ctxt->input->cur - ctxt->input->base;
10792
10793 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10794
10795 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10796 ctxt->input->cur = ctxt->input->base + cur;
10797 ctxt->input->end =
10798 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
10799#ifdef DEBUG_PUSH
10800 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10801#endif
10802 }
10803
10804 if (enc != XML_CHAR_ENCODING_NONE) {
10805 xmlSwitchEncoding(ctxt, enc);
10806 }
10807
10808 return(ctxt);
10809}
10810#endif /* LIBXML_PUSH_ENABLED */
10811
10812/**
10813 * xmlStopParser:
10814 * @ctxt: an XML parser context
10815 *
10816 * Blocks further parser processing
10817 */
10818void
10819xmlStopParser(xmlParserCtxtPtr ctxt) {
10820 if (ctxt == NULL)
10821 return;
10822 ctxt->instate = XML_PARSER_EOF;
10823 ctxt->disableSAX = 1;
10824 if (ctxt->input != NULL) {
10825 ctxt->input->cur = BAD_CAST"";
10826 ctxt->input->base = ctxt->input->cur;
10827 }
10828}
10829
10830/**
10831 * xmlCreateIOParserCtxt:
10832 * @sax: a SAX handler
10833 * @user_data: The user data returned on SAX callbacks
10834 * @ioread: an I/O read function
10835 * @ioclose: an I/O close function
10836 * @ioctx: an I/O handler
10837 * @enc: the charset encoding if known
10838 *
10839 * Create a parser context for using the XML parser with an existing
10840 * I/O stream
10841 *
10842 * Returns the new parser context or NULL
10843 */
10844xmlParserCtxtPtr
10845xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10846 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10847 void *ioctx, xmlCharEncoding enc) {
10848 xmlParserCtxtPtr ctxt;
10849 xmlParserInputPtr inputStream;
10850 xmlParserInputBufferPtr buf;
10851
10852 if (ioread == NULL) return(NULL);
10853
10854 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10855 if (buf == NULL) return(NULL);
10856
10857 ctxt = xmlNewParserCtxt();
10858 if (ctxt == NULL) {
10859 xmlFreeParserInputBuffer(buf);
10860 return(NULL);
10861 }
10862 if (sax != NULL) {
10863#ifdef LIBXML_SAX1_ENABLED
10864 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
10865#endif /* LIBXML_SAX1_ENABLED */
10866 xmlFree(ctxt->sax);
10867 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10868 if (ctxt->sax == NULL) {
10869 xmlErrMemory(ctxt, NULL);
10870 xmlFreeParserCtxt(ctxt);
10871 return(NULL);
10872 }
10873 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10874 if (sax->initialized == XML_SAX2_MAGIC)
10875 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10876 else
10877 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
10878 if (user_data != NULL)
10879 ctxt->userData = user_data;
10880 }
10881
10882 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10883 if (inputStream == NULL) {
10884 xmlFreeParserCtxt(ctxt);
10885 return(NULL);
10886 }
10887 inputPush(ctxt, inputStream);
10888
10889 return(ctxt);
10890}
10891
10892#ifdef LIBXML_VALID_ENABLED
10893/************************************************************************
10894 * *
10895 * Front ends when parsing a DTD *
10896 * *
10897 ************************************************************************/
10898
10899/**
10900 * xmlIOParseDTD:
10901 * @sax: the SAX handler block or NULL
10902 * @input: an Input Buffer
10903 * @enc: the charset encoding if known
10904 *
10905 * Load and parse a DTD
10906 *
10907 * Returns the resulting xmlDtdPtr or NULL in case of error.
10908 * @input will be freed by the function in any case.
10909 */
10910
10911xmlDtdPtr
10912xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10913 xmlCharEncoding enc) {
10914 xmlDtdPtr ret = NULL;
10915 xmlParserCtxtPtr ctxt;
10916 xmlParserInputPtr pinput = NULL;
10917 xmlChar start[4];
10918
10919 if (input == NULL)
10920 return(NULL);
10921
10922 ctxt = xmlNewParserCtxt();
10923 if (ctxt == NULL) {
10924 xmlFreeParserInputBuffer(input);
10925 return(NULL);
10926 }
10927
10928 /*
10929 * Set-up the SAX context
10930 */
10931 if (sax != NULL) {
10932 if (ctxt->sax != NULL)
10933 xmlFree(ctxt->sax);
10934 ctxt->sax = sax;
10935 ctxt->userData = ctxt;
10936 }
10937 xmlDetectSAX2(ctxt);
10938
10939 /*
10940 * generate a parser input from the I/O handler
10941 */
10942
10943 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
10944 if (pinput == NULL) {
10945 if (sax != NULL) ctxt->sax = NULL;
10946 xmlFreeParserInputBuffer(input);
10947 xmlFreeParserCtxt(ctxt);
10948 return(NULL);
10949 }
10950
10951 /*
10952 * plug some encoding conversion routines here.
10953 */
10954 xmlPushInput(ctxt, pinput);
10955 if (enc != XML_CHAR_ENCODING_NONE) {
10956 xmlSwitchEncoding(ctxt, enc);
10957 }
10958
10959 pinput->filename = NULL;
10960 pinput->line = 1;
10961 pinput->col = 1;
10962 pinput->base = ctxt->input->cur;
10963 pinput->cur = ctxt->input->cur;
10964 pinput->free = NULL;
10965
10966 /*
10967 * let's parse that entity knowing it's an external subset.
10968 */
10969 ctxt->inSubset = 2;
10970 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10971 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10972 BAD_CAST "none", BAD_CAST "none");
10973
10974 if ((enc == XML_CHAR_ENCODING_NONE) &&
10975 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10976 /*
10977 * Get the 4 first bytes and decode the charset
10978 * if enc != XML_CHAR_ENCODING_NONE
10979 * plug some encoding conversion routines.
10980 */
10981 start[0] = RAW;
10982 start[1] = NXT(1);
10983 start[2] = NXT(2);
10984 start[3] = NXT(3);
10985 enc = xmlDetectCharEncoding(start, 4);
10986 if (enc != XML_CHAR_ENCODING_NONE) {
10987 xmlSwitchEncoding(ctxt, enc);
10988 }
10989 }
10990
10991 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10992
10993 if (ctxt->myDoc != NULL) {
10994 if (ctxt->wellFormed) {
10995 ret = ctxt->myDoc->extSubset;
10996 ctxt->myDoc->extSubset = NULL;
10997 if (ret != NULL) {
10998 xmlNodePtr tmp;
10999
11000 ret->doc = NULL;
11001 tmp = ret->children;
11002 while (tmp != NULL) {
11003 tmp->doc = NULL;
11004 tmp = tmp->next;
11005 }
11006 }
11007 } else {
11008 ret = NULL;
11009 }
11010 xmlFreeDoc(ctxt->myDoc);
11011 ctxt->myDoc = NULL;
11012 }
11013 if (sax != NULL) ctxt->sax = NULL;
11014 xmlFreeParserCtxt(ctxt);
11015
11016 return(ret);
11017}
11018
11019/**
11020 * xmlSAXParseDTD:
11021 * @sax: the SAX handler block
11022 * @ExternalID: a NAME* containing the External ID of the DTD
11023 * @SystemID: a NAME* containing the URL to the DTD
11024 *
11025 * Load and parse an external subset.
11026 *
11027 * Returns the resulting xmlDtdPtr or NULL in case of error.
11028 */
11029
11030xmlDtdPtr
11031xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11032 const xmlChar *SystemID) {
11033 xmlDtdPtr ret = NULL;
11034 xmlParserCtxtPtr ctxt;
11035 xmlParserInputPtr input = NULL;
11036 xmlCharEncoding enc;
11037 xmlChar* systemIdCanonic;
11038
11039 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11040
11041 ctxt = xmlNewParserCtxt();
11042 if (ctxt == NULL) {
11043 return(NULL);
11044 }
11045
11046 /*
11047 * Set-up the SAX context
11048 */
11049 if (sax != NULL) {
11050 if (ctxt->sax != NULL)
11051 xmlFree(ctxt->sax);
11052 ctxt->sax = sax;
11053 ctxt->userData = ctxt;
11054 }
11055
11056 /*
11057 * Canonicalise the system ID
11058 */
11059 systemIdCanonic = xmlCanonicPath(SystemID);
11060 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11061 xmlFreeParserCtxt(ctxt);
11062 return(NULL);
11063 }
11064
11065 /*
11066 * Ask the Entity resolver to load the damn thing
11067 */
11068
11069 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11070 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11071 systemIdCanonic);
11072 if (input == NULL) {
11073 if (sax != NULL) ctxt->sax = NULL;
11074 xmlFreeParserCtxt(ctxt);
11075 if (systemIdCanonic != NULL)
11076 xmlFree(systemIdCanonic);
11077 return(NULL);
11078 }
11079
11080 /*
11081 * plug some encoding conversion routines here.
11082 */
11083 xmlPushInput(ctxt, input);
11084 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11085 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11086 xmlSwitchEncoding(ctxt, enc);
11087 }
11088
11089 if (input->filename == NULL)
11090 input->filename = (char *) systemIdCanonic;
11091 else
11092 xmlFree(systemIdCanonic);
11093 input->line = 1;
11094 input->col = 1;
11095 input->base = ctxt->input->cur;
11096 input->cur = ctxt->input->cur;
11097 input->free = NULL;
11098
11099 /*
11100 * let's parse that entity knowing it's an external subset.
11101 */
11102 ctxt->inSubset = 2;
11103 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11104 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11105 ExternalID, SystemID);
11106 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11107
11108 if (ctxt->myDoc != NULL) {
11109 if (ctxt->wellFormed) {
11110 ret = ctxt->myDoc->extSubset;
11111 ctxt->myDoc->extSubset = NULL;
11112 if (ret != NULL) {
11113 xmlNodePtr tmp;
11114
11115 ret->doc = NULL;
11116 tmp = ret->children;
11117 while (tmp != NULL) {
11118 tmp->doc = NULL;
11119 tmp = tmp->next;
11120 }
11121 }
11122 } else {
11123 ret = NULL;
11124 }
11125 xmlFreeDoc(ctxt->myDoc);
11126 ctxt->myDoc = NULL;
11127 }
11128 if (sax != NULL) ctxt->sax = NULL;
11129 xmlFreeParserCtxt(ctxt);
11130
11131 return(ret);
11132}
11133
11134
11135/**
11136 * xmlParseDTD:
11137 * @ExternalID: a NAME* containing the External ID of the DTD
11138 * @SystemID: a NAME* containing the URL to the DTD
11139 *
11140 * Load and parse an external subset.
11141 *
11142 * Returns the resulting xmlDtdPtr or NULL in case of error.
11143 */
11144
11145xmlDtdPtr
11146xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11147 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11148}
11149#endif /* LIBXML_VALID_ENABLED */
11150
11151/************************************************************************
11152 * *
11153 * Front ends when parsing an Entity *
11154 * *
11155 ************************************************************************/
11156
11157/**
11158 * xmlParseCtxtExternalEntity:
11159 * @ctx: the existing parsing context
11160 * @URL: the URL for the entity to load
11161 * @ID: the System ID for the entity to load
11162 * @lst: the return value for the set of parsed nodes
11163 *
11164 * Parse an external general entity within an existing parsing context
11165 * An external general parsed entity is well-formed if it matches the
11166 * production labeled extParsedEnt.
11167 *
11168 * [78] extParsedEnt ::= TextDecl? content
11169 *
11170 * Returns 0 if the entity is well formed, -1 in case of args problem and
11171 * the parser error code otherwise
11172 */
11173
11174int
11175xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
11176 const xmlChar *ID, xmlNodePtr *lst) {
11177 xmlParserCtxtPtr ctxt;
11178 xmlDocPtr newDoc;
11179 xmlNodePtr newRoot;
11180 xmlSAXHandlerPtr oldsax = NULL;
11181 int ret = 0;
11182 xmlChar start[4];
11183 xmlCharEncoding enc;
11184 xmlParserInputPtr inputStream;
11185 char *directory = NULL;
11186
11187 if (ctx == NULL) return(-1);
11188
11189 if ((ctx->depth > 40) || (ctx->nbentities >= 500000)) {
11190 return(XML_ERR_ENTITY_LOOP);
11191 }
11192
11193 if (lst != NULL)
11194 *lst = NULL;
11195 if ((URL == NULL) && (ID == NULL))
11196 return(-1);
11197 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11198 return(-1);
11199
11200 ctxt = xmlNewParserCtxt();
11201 if (ctxt == NULL) {
11202 return(-1);
11203 }
11204
11205 ctxt->userData = ctxt;
11206 ctxt->_private = ctx->_private;
11207
11208 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11209 if (inputStream == NULL) {
11210 xmlFreeParserCtxt(ctxt);
11211 return(-1);
11212 }
11213
11214 inputPush(ctxt, inputStream);
11215
11216 if ((ctxt->directory == NULL) && (directory == NULL))
11217 directory = xmlParserGetDirectory((char *)URL);
11218 if ((ctxt->directory == NULL) && (directory != NULL))
11219 ctxt->directory = directory;
11220
11221 oldsax = ctxt->sax;
11222 ctxt->sax = ctx->sax;
11223 xmlDetectSAX2(ctxt);
11224 newDoc = xmlNewDoc(BAD_CAST "1.0");
11225 if (newDoc == NULL) {
11226 xmlFreeParserCtxt(ctxt);
11227 return(-1);
11228 }
11229 if (ctx->myDoc->dict) {
11230 newDoc->dict = ctx->myDoc->dict;
11231 xmlDictReference(newDoc->dict);
11232 }
11233 if (ctx->myDoc != NULL) {
11234 newDoc->intSubset = ctx->myDoc->intSubset;
11235 newDoc->extSubset = ctx->myDoc->extSubset;
11236 }
11237 if (ctx->myDoc->URL != NULL) {
11238 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11239 }
11240 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11241 if (newRoot == NULL) {
11242 ctxt->sax = oldsax;
11243 xmlFreeParserCtxt(ctxt);
11244 newDoc->intSubset = NULL;
11245 newDoc->extSubset = NULL;
11246 xmlFreeDoc(newDoc);
11247 return(-1);
11248 }
11249 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11250 nodePush(ctxt, newDoc->children);
11251 if (ctx->myDoc == NULL) {
11252 ctxt->myDoc = newDoc;
11253 } else {
11254 ctxt->myDoc = ctx->myDoc;
11255 newDoc->children->doc = ctx->myDoc;
11256 }
11257
11258 /*
11259 * Get the 4 first bytes and decode the charset
11260 * if enc != XML_CHAR_ENCODING_NONE
11261 * plug some encoding conversion routines.
11262 */
11263 GROW
11264 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11265 start[0] = RAW;
11266 start[1] = NXT(1);
11267 start[2] = NXT(2);
11268 start[3] = NXT(3);
11269 enc = xmlDetectCharEncoding(start, 4);
11270 if (enc != XML_CHAR_ENCODING_NONE) {
11271 xmlSwitchEncoding(ctxt, enc);
11272 }
11273 }
11274
11275 /*
11276 * Parse a possible text declaration first
11277 */
11278 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11279 xmlParseTextDecl(ctxt);
11280 }
11281
11282 /*
11283 * Doing validity checking on chunk doesn't make sense
11284 */
11285 ctxt->instate = XML_PARSER_CONTENT;
11286 ctxt->validate = ctx->validate;
11287 ctxt->valid = ctx->valid;
11288 ctxt->loadsubset = ctx->loadsubset;
11289 ctxt->depth = ctx->depth + 1;
11290 ctxt->replaceEntities = ctx->replaceEntities;
11291 if (ctxt->validate) {
11292 ctxt->vctxt.error = ctx->vctxt.error;
11293 ctxt->vctxt.warning = ctx->vctxt.warning;
11294 } else {
11295 ctxt->vctxt.error = NULL;
11296 ctxt->vctxt.warning = NULL;
11297 }
11298 ctxt->vctxt.nodeTab = NULL;
11299 ctxt->vctxt.nodeNr = 0;
11300 ctxt->vctxt.nodeMax = 0;
11301 ctxt->vctxt.node = NULL;
11302 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11303 ctxt->dict = ctx->dict;
11304 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11305 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11306 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11307 ctxt->dictNames = ctx->dictNames;
11308 ctxt->attsDefault = ctx->attsDefault;
11309 ctxt->attsSpecial = ctx->attsSpecial;
11310 ctxt->linenumbers = ctx->linenumbers;
11311
11312 xmlParseContent(ctxt);
11313
11314 ctx->validate = ctxt->validate;
11315 ctx->valid = ctxt->valid;
11316 if ((RAW == '<') && (NXT(1) == '/')) {
11317 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11318 } else if (RAW != 0) {
11319 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11320 }
11321 if (ctxt->node != newDoc->children) {
11322 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11323 }
11324
11325 if (!ctxt->wellFormed) {
11326 if (ctxt->errNo == 0)
11327 ret = 1;
11328 else
11329 ret = ctxt->errNo;
11330 } else {
11331 if (lst != NULL) {
11332 xmlNodePtr cur;
11333
11334 /*
11335 * Return the newly created nodeset after unlinking it from
11336 * they pseudo parent.
11337 */
11338 cur = newDoc->children->children;
11339 *lst = cur;
11340 while (cur != NULL) {
11341 cur->parent = NULL;
11342 cur = cur->next;
11343 }
11344 newDoc->children->children = NULL;
11345 }
11346 ret = 0;
11347 }
11348 ctxt->sax = oldsax;
11349 ctxt->dict = NULL;
11350 ctxt->attsDefault = NULL;
11351 ctxt->attsSpecial = NULL;
11352 xmlFreeParserCtxt(ctxt);
11353 newDoc->intSubset = NULL;
11354 newDoc->extSubset = NULL;
11355 xmlFreeDoc(newDoc);
11356
11357 return(ret);
11358}
11359
11360/**
11361 * xmlParseExternalEntityPrivate:
11362 * @doc: the document the chunk pertains to
11363 * @oldctxt: the previous parser context if available
11364 * @sax: the SAX handler bloc (possibly NULL)
11365 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11366 * @depth: Used for loop detection, use 0
11367 * @URL: the URL for the entity to load
11368 * @ID: the System ID for the entity to load
11369 * @list: the return value for the set of parsed nodes
11370 *
11371 * Private version of xmlParseExternalEntity()
11372 *
11373 * Returns 0 if the entity is well formed, -1 in case of args problem and
11374 * the parser error code otherwise
11375 */
11376
11377static xmlParserErrors
11378xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11379 xmlSAXHandlerPtr sax,
11380 void *user_data, int depth, const xmlChar *URL,
11381 const xmlChar *ID, xmlNodePtr *list) {
11382 xmlParserCtxtPtr ctxt;
11383 xmlDocPtr newDoc;
11384 xmlNodePtr newRoot;
11385 xmlSAXHandlerPtr oldsax = NULL;
11386 xmlParserErrors ret = XML_ERR_OK;
11387 xmlChar start[4];
11388 xmlCharEncoding enc;
11389
11390 if ((depth > 40) ||
11391 ((oldctxt != NULL) && (oldctxt->nbentities >= 500000))) {
11392 return(XML_ERR_ENTITY_LOOP);
11393 }
11394
11395
11396
11397 if (list != NULL)
11398 *list = NULL;
11399 if ((URL == NULL) && (ID == NULL))
11400 return(XML_ERR_INTERNAL_ERROR);
11401 if (doc == NULL)
11402 return(XML_ERR_INTERNAL_ERROR);
11403
11404
11405 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11406 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11407 ctxt->userData = ctxt;
11408 if (oldctxt != NULL) {
11409 ctxt->_private = oldctxt->_private;
11410 ctxt->loadsubset = oldctxt->loadsubset;
11411 ctxt->validate = oldctxt->validate;
11412 ctxt->external = oldctxt->external;
11413 ctxt->record_info = oldctxt->record_info;
11414 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11415 ctxt->node_seq.length = oldctxt->node_seq.length;
11416 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
11417 } else {
11418 /*
11419 * Doing validity checking on chunk without context
11420 * doesn't make sense
11421 */
11422 ctxt->_private = NULL;
11423 ctxt->validate = 0;
11424 ctxt->external = 2;
11425 ctxt->loadsubset = 0;
11426 }
11427 if (sax != NULL) {
11428 oldsax = ctxt->sax;
11429 ctxt->sax = sax;
11430 if (user_data != NULL)
11431 ctxt->userData = user_data;
11432 }
11433 xmlDetectSAX2(ctxt);
11434 newDoc = xmlNewDoc(BAD_CAST "1.0");
11435 if (newDoc == NULL) {
11436 ctxt->node_seq.maximum = 0;
11437 ctxt->node_seq.length = 0;
11438 ctxt->node_seq.buffer = NULL;
11439 xmlFreeParserCtxt(ctxt);
11440 return(XML_ERR_INTERNAL_ERROR);
11441 }
11442 newDoc->intSubset = doc->intSubset;
11443 newDoc->extSubset = doc->extSubset;
11444 newDoc->dict = doc->dict;
11445 xmlDictReference(newDoc->dict);
11446
11447 if (doc->URL != NULL) {
11448 newDoc->URL = xmlStrdup(doc->URL);
11449 }
11450 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11451 if (newRoot == NULL) {
11452 if (sax != NULL)
11453 ctxt->sax = oldsax;
11454 ctxt->node_seq.maximum = 0;
11455 ctxt->node_seq.length = 0;
11456 ctxt->node_seq.buffer = NULL;
11457 xmlFreeParserCtxt(ctxt);
11458 newDoc->intSubset = NULL;
11459 newDoc->extSubset = NULL;
11460 xmlFreeDoc(newDoc);
11461 return(XML_ERR_INTERNAL_ERROR);
11462 }
11463 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11464 nodePush(ctxt, newDoc->children);
11465 ctxt->myDoc = doc;
11466 newRoot->doc = doc;
11467
11468 /*
11469 * Get the 4 first bytes and decode the charset
11470 * if enc != XML_CHAR_ENCODING_NONE
11471 * plug some encoding conversion routines.
11472 */
11473 GROW;
11474 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11475 start[0] = RAW;
11476 start[1] = NXT(1);
11477 start[2] = NXT(2);
11478 start[3] = NXT(3);
11479 enc = xmlDetectCharEncoding(start, 4);
11480 if (enc != XML_CHAR_ENCODING_NONE) {
11481 xmlSwitchEncoding(ctxt, enc);
11482 }
11483 }
11484
11485 /*
11486 * Parse a possible text declaration first
11487 */
11488 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11489 xmlParseTextDecl(ctxt);
11490 }
11491
11492 ctxt->instate = XML_PARSER_CONTENT;
11493 ctxt->depth = depth;
11494
11495 xmlParseContent(ctxt);
11496
11497 if ((RAW == '<') && (NXT(1) == '/')) {
11498 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11499 } else if (RAW != 0) {
11500 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11501 }
11502 if (ctxt->node != newDoc->children) {
11503 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11504 }
11505
11506 if (!ctxt->wellFormed) {
11507 if (ctxt->errNo == 0)
11508 ret = XML_ERR_INTERNAL_ERROR;
11509 else
11510 ret = (xmlParserErrors)ctxt->errNo;
11511 } else {
11512 if (list != NULL) {
11513 xmlNodePtr cur;
11514
11515 /*
11516 * Return the newly created nodeset after unlinking it from
11517 * they pseudo parent.
11518 */
11519 cur = newDoc->children->children;
11520 *list = cur;
11521 while (cur != NULL) {
11522 cur->parent = NULL;
11523 cur = cur->next;
11524 }
11525 newDoc->children->children = NULL;
11526 }
11527 ret = XML_ERR_OK;
11528 }
11529 if (sax != NULL)
11530 ctxt->sax = oldsax;
11531 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11532 oldctxt->node_seq.length = ctxt->node_seq.length;
11533 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
11534 oldctxt->nbentities += ctxt->nbentities;
11535 ctxt->node_seq.maximum = 0;
11536 ctxt->node_seq.length = 0;
11537 ctxt->node_seq.buffer = NULL;
11538 xmlFreeParserCtxt(ctxt);
11539 newDoc->intSubset = NULL;
11540 newDoc->extSubset = NULL;
11541 xmlFreeDoc(newDoc);
11542
11543 return(ret);
11544}
11545
11546#ifdef LIBXML_SAX1_ENABLED
11547/**
11548 * xmlParseExternalEntity:
11549 * @doc: the document the chunk pertains to
11550 * @sax: the SAX handler bloc (possibly NULL)
11551 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11552 * @depth: Used for loop detection, use 0
11553 * @URL: the URL for the entity to load
11554 * @ID: the System ID for the entity to load
11555 * @lst: the return value for the set of parsed nodes
11556 *
11557 * Parse an external general entity
11558 * An external general parsed entity is well-formed if it matches the
11559 * production labeled extParsedEnt.
11560 *
11561 * [78] extParsedEnt ::= TextDecl? content
11562 *
11563 * Returns 0 if the entity is well formed, -1 in case of args problem and
11564 * the parser error code otherwise
11565 */
11566
11567int
11568xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11569 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
11570 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
11571 ID, lst));
11572}
11573
11574/**
11575 * xmlParseBalancedChunkMemory:
11576 * @doc: the document the chunk pertains to
11577 * @sax: the SAX handler bloc (possibly NULL)
11578 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11579 * @depth: Used for loop detection, use 0
11580 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11581 * @lst: the return value for the set of parsed nodes
11582 *
11583 * Parse a well-balanced chunk of an XML document
11584 * called by the parser
11585 * The allowed sequence for the Well Balanced Chunk is the one defined by
11586 * the content production in the XML grammar:
11587 *
11588 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11589 *
11590 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11591 * the parser error code otherwise
11592 */
11593
11594int
11595xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11596 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
11597 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11598 depth, string, lst, 0 );
11599}
11600#endif /* LIBXML_SAX1_ENABLED */
11601
11602/**
11603 * xmlParseBalancedChunkMemoryInternal:
11604 * @oldctxt: the existing parsing context
11605 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11606 * @user_data: the user data field for the parser context
11607 * @lst: the return value for the set of parsed nodes
11608 *
11609 *
11610 * Parse a well-balanced chunk of an XML document
11611 * called by the parser
11612 * The allowed sequence for the Well Balanced Chunk is the one defined by
11613 * the content production in the XML grammar:
11614 *
11615 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11616 *
11617 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11618 * error code otherwise
11619 *
11620 * In case recover is set to 1, the nodelist will not be empty even if
11621 * the parsed chunk is not well balanced.
11622 */
11623static xmlParserErrors
11624xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11625 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11626 xmlParserCtxtPtr ctxt;
11627 xmlDocPtr newDoc = NULL;
11628 xmlNodePtr newRoot;
11629 xmlSAXHandlerPtr oldsax = NULL;
11630 xmlNodePtr content = NULL;
11631 xmlNodePtr last = NULL;
11632 int size;
11633 xmlParserErrors ret = XML_ERR_OK;
11634
11635 if ((oldctxt->depth > 40) || (oldctxt->nbentities >= 500000)) {
11636 return(XML_ERR_ENTITY_LOOP);
11637 }
11638
11639
11640 if (lst != NULL)
11641 *lst = NULL;
11642 if (string == NULL)
11643 return(XML_ERR_INTERNAL_ERROR);
11644
11645 size = xmlStrlen(string);
11646
11647 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11648 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
11649 if (user_data != NULL)
11650 ctxt->userData = user_data;
11651 else
11652 ctxt->userData = ctxt;
11653 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11654 ctxt->dict = oldctxt->dict;
11655 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11656 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11657 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11658
11659 oldsax = ctxt->sax;
11660 ctxt->sax = oldctxt->sax;
11661 xmlDetectSAX2(ctxt);
11662 ctxt->replaceEntities = oldctxt->replaceEntities;
11663 ctxt->options = oldctxt->options;
11664
11665 ctxt->_private = oldctxt->_private;
11666 if (oldctxt->myDoc == NULL) {
11667 newDoc = xmlNewDoc(BAD_CAST "1.0");
11668 if (newDoc == NULL) {
11669 ctxt->sax = oldsax;
11670 ctxt->dict = NULL;
11671 xmlFreeParserCtxt(ctxt);
11672 return(XML_ERR_INTERNAL_ERROR);
11673 }
11674 newDoc->dict = ctxt->dict;
11675 xmlDictReference(newDoc->dict);
11676 ctxt->myDoc = newDoc;
11677 } else {
11678 ctxt->myDoc = oldctxt->myDoc;
11679 content = ctxt->myDoc->children;
11680 last = ctxt->myDoc->last;
11681 }
11682 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11683 if (newRoot == NULL) {
11684 ctxt->sax = oldsax;
11685 ctxt->dict = NULL;
11686 xmlFreeParserCtxt(ctxt);
11687 if (newDoc != NULL) {
11688 xmlFreeDoc(newDoc);
11689 }
11690 return(XML_ERR_INTERNAL_ERROR);
11691 }
11692 ctxt->myDoc->children = NULL;
11693 ctxt->myDoc->last = NULL;
11694 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
11695 nodePush(ctxt, ctxt->myDoc->children);
11696 ctxt->instate = XML_PARSER_CONTENT;
11697 ctxt->depth = oldctxt->depth + 1;
11698
11699 ctxt->validate = 0;
11700 ctxt->loadsubset = oldctxt->loadsubset;
11701 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11702 /*
11703 * ID/IDREF registration will be done in xmlValidateElement below
11704 */
11705 ctxt->loadsubset |= XML_SKIP_IDS;
11706 }
11707 ctxt->dictNames = oldctxt->dictNames;
11708 ctxt->attsDefault = oldctxt->attsDefault;
11709 ctxt->attsSpecial = oldctxt->attsSpecial;
11710
11711 xmlParseContent(ctxt);
11712 if ((RAW == '<') && (NXT(1) == '/')) {
11713 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11714 } else if (RAW != 0) {
11715 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11716 }
11717 if (ctxt->node != ctxt->myDoc->children) {
11718 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11719 }
11720
11721 if (!ctxt->wellFormed) {
11722 if (ctxt->errNo == 0)
11723 ret = XML_ERR_INTERNAL_ERROR;
11724 else
11725 ret = (xmlParserErrors)ctxt->errNo;
11726 } else {
11727 ret = XML_ERR_OK;
11728 }
11729
11730 if ((lst != NULL) && (ret == XML_ERR_OK)) {
11731 xmlNodePtr cur;
11732
11733 /*
11734 * Return the newly created nodeset after unlinking it from
11735 * they pseudo parent.
11736 */
11737 cur = ctxt->myDoc->children->children;
11738 *lst = cur;
11739 while (cur != NULL) {
11740#ifdef LIBXML_VALID_ENABLED
11741 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11742 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11743 (cur->type == XML_ELEMENT_NODE)) {
11744 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11745 oldctxt->myDoc, cur);
11746 }
11747#endif /* LIBXML_VALID_ENABLED */
11748 cur->parent = NULL;
11749 cur = cur->next;
11750 }
11751 ctxt->myDoc->children->children = NULL;
11752 }
11753 if (ctxt->myDoc != NULL) {
11754 xmlFreeNode(ctxt->myDoc->children);
11755 ctxt->myDoc->children = content;
11756 ctxt->myDoc->last = last;
11757 }
11758
11759 oldctxt->nbentities += ctxt->nbentities;
11760 ctxt->sax = oldsax;
11761 ctxt->dict = NULL;
11762 ctxt->attsDefault = NULL;
11763 ctxt->attsSpecial = NULL;
11764 xmlFreeParserCtxt(ctxt);
11765 if (newDoc != NULL) {
11766 xmlFreeDoc(newDoc);
11767 }
11768
11769 return(ret);
11770}
11771
11772/**
11773 * xmlParseInNodeContext:
11774 * @node: the context node
11775 * @data: the input string
11776 * @datalen: the input string length in bytes
11777 * @options: a combination of xmlParserOption
11778 * @lst: the return value for the set of parsed nodes
11779 *
11780 * Parse a well-balanced chunk of an XML document
11781 * within the context (DTD, namespaces, etc ...) of the given node.
11782 *
11783 * The allowed sequence for the data is a Well Balanced Chunk defined by
11784 * the content production in the XML grammar:
11785 *
11786 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11787 *
11788 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11789 * error code otherwise
11790 */
11791xmlParserErrors
11792xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11793 int options, xmlNodePtr *lst) {
11794#ifdef SAX2
11795 xmlParserCtxtPtr ctxt;
11796 xmlDocPtr doc = NULL;
11797 xmlNodePtr fake, cur;
11798 int nsnr = 0;
11799
11800 xmlParserErrors ret = XML_ERR_OK;
11801
11802 /*
11803 * check all input parameters, grab the document
11804 */
11805 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11806 return(XML_ERR_INTERNAL_ERROR);
11807 switch (node->type) {
11808 case XML_ELEMENT_NODE:
11809 case XML_ATTRIBUTE_NODE:
11810 case XML_TEXT_NODE:
11811 case XML_CDATA_SECTION_NODE:
11812 case XML_ENTITY_REF_NODE:
11813 case XML_PI_NODE:
11814 case XML_COMMENT_NODE:
11815 case XML_DOCUMENT_NODE:
11816 case XML_HTML_DOCUMENT_NODE:
11817 break;
11818 default:
11819 return(XML_ERR_INTERNAL_ERROR);
11820
11821 }
11822 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11823 (node->type != XML_DOCUMENT_NODE) &&
11824 (node->type != XML_HTML_DOCUMENT_NODE))
11825 node = node->parent;
11826 if (node == NULL)
11827 return(XML_ERR_INTERNAL_ERROR);
11828 if (node->type == XML_ELEMENT_NODE)
11829 doc = node->doc;
11830 else
11831 doc = (xmlDocPtr) node;
11832 if (doc == NULL)
11833 return(XML_ERR_INTERNAL_ERROR);
11834
11835 /*
11836 * allocate a context and set-up everything not related to the
11837 * node position in the tree
11838 */
11839 if (doc->type == XML_DOCUMENT_NODE)
11840 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11841#ifdef LIBXML_HTML_ENABLED
11842 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11843 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11844#endif
11845 else
11846 return(XML_ERR_INTERNAL_ERROR);
11847
11848 if (ctxt == NULL)
11849 return(XML_ERR_NO_MEMORY);
11850 fake = xmlNewComment(NULL);
11851 if (fake == NULL) {
11852 xmlFreeParserCtxt(ctxt);
11853 return(XML_ERR_NO_MEMORY);
11854 }
11855 xmlAddChild(node, fake);
11856
11857 /*
11858 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11859 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11860 * we must wait until the last moment to free the original one.
11861 */
11862 if (doc->dict != NULL) {
11863 if (ctxt->dict != NULL)
11864 xmlDictFree(ctxt->dict);
11865 ctxt->dict = doc->dict;
11866 } else
11867 options |= XML_PARSE_NODICT;
11868
11869 xmlCtxtUseOptions(ctxt, options);
11870 xmlDetectSAX2(ctxt);
11871 ctxt->myDoc = doc;
11872
11873 if (node->type == XML_ELEMENT_NODE) {
11874 nodePush(ctxt, node);
11875 /*
11876 * initialize the SAX2 namespaces stack
11877 */
11878 cur = node;
11879 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11880 xmlNsPtr ns = cur->nsDef;
11881 const xmlChar *iprefix, *ihref;
11882
11883 while (ns != NULL) {
11884 if (ctxt->dict) {
11885 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11886 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11887 } else {
11888 iprefix = ns->prefix;
11889 ihref = ns->href;
11890 }
11891
11892 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11893 nsPush(ctxt, iprefix, ihref);
11894 nsnr++;
11895 }
11896 ns = ns->next;
11897 }
11898 cur = cur->parent;
11899 }
11900 ctxt->instate = XML_PARSER_CONTENT;
11901 }
11902
11903 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11904 /*
11905 * ID/IDREF registration will be done in xmlValidateElement below
11906 */
11907 ctxt->loadsubset |= XML_SKIP_IDS;
11908 }
11909
11910#ifdef LIBXML_HTML_ENABLED
11911 if (doc->type == XML_HTML_DOCUMENT_NODE)
11912 __htmlParseContent(ctxt);
11913 else
11914#endif
11915 xmlParseContent(ctxt);
11916
11917 nsPop(ctxt, nsnr);
11918 if ((RAW == '<') && (NXT(1) == '/')) {
11919 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11920 } else if (RAW != 0) {
11921 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11922 }
11923 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11924 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11925 ctxt->wellFormed = 0;
11926 }
11927
11928 if (!ctxt->wellFormed) {
11929 if (ctxt->errNo == 0)
11930 ret = XML_ERR_INTERNAL_ERROR;
11931 else
11932 ret = (xmlParserErrors)ctxt->errNo;
11933 } else {
11934 ret = XML_ERR_OK;
11935 }
11936
11937 /*
11938 * Return the newly created nodeset after unlinking it from
11939 * the pseudo sibling.
11940 */
11941
11942 cur = fake->next;
11943 fake->next = NULL;
11944 node->last = fake;
11945
11946 if (cur != NULL) {
11947 cur->prev = NULL;
11948 }
11949
11950 *lst = cur;
11951
11952 while (cur != NULL) {
11953 cur->parent = NULL;
11954 cur = cur->next;
11955 }
11956
11957 xmlUnlinkNode(fake);
11958 xmlFreeNode(fake);
11959
11960
11961 if (ret != XML_ERR_OK) {
11962 xmlFreeNodeList(*lst);
11963 *lst = NULL;
11964 }
11965
11966 if (doc->dict != NULL)
11967 ctxt->dict = NULL;
11968 xmlFreeParserCtxt(ctxt);
11969
11970 return(ret);
11971#else /* !SAX2 */
11972 return(XML_ERR_INTERNAL_ERROR);
11973#endif
11974}
11975
11976#ifdef LIBXML_SAX1_ENABLED
11977/**
11978 * xmlParseBalancedChunkMemoryRecover:
11979 * @doc: the document the chunk pertains to
11980 * @sax: the SAX handler bloc (possibly NULL)
11981 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11982 * @depth: Used for loop detection, use 0
11983 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11984 * @lst: the return value for the set of parsed nodes
11985 * @recover: return nodes even if the data is broken (use 0)
11986 *
11987 *
11988 * Parse a well-balanced chunk of an XML document
11989 * called by the parser
11990 * The allowed sequence for the Well Balanced Chunk is the one defined by
11991 * the content production in the XML grammar:
11992 *
11993 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11994 *
11995 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11996 * the parser error code otherwise
11997 *
11998 * In case recover is set to 1, the nodelist will not be empty even if
11999 * the parsed chunk is not well balanced.
12000 */
12001int
12002xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12003 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
12004 int recover) {
12005 xmlParserCtxtPtr ctxt;
12006 xmlDocPtr newDoc;
12007 xmlSAXHandlerPtr oldsax = NULL;
12008 xmlNodePtr content, newRoot;
12009 int size;
12010 int ret = 0;
12011
12012 if (depth > 40) {
12013 return(XML_ERR_ENTITY_LOOP);
12014 }
12015
12016
12017 if (lst != NULL)
12018 *lst = NULL;
12019 if (string == NULL)
12020 return(-1);
12021
12022 size = xmlStrlen(string);
12023
12024 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12025 if (ctxt == NULL) return(-1);
12026 ctxt->userData = ctxt;
12027 if (sax != NULL) {
12028 oldsax = ctxt->sax;
12029 ctxt->sax = sax;
12030 if (user_data != NULL)
12031 ctxt->userData = user_data;
12032 }
12033 newDoc = xmlNewDoc(BAD_CAST "1.0");
12034 if (newDoc == NULL) {
12035 xmlFreeParserCtxt(ctxt);
12036 return(-1);
12037 }
12038 if ((doc != NULL) && (doc->dict != NULL)) {
12039 xmlDictFree(ctxt->dict);
12040 ctxt->dict = doc->dict;
12041 xmlDictReference(ctxt->dict);
12042 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12043 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12044 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12045 ctxt->dictNames = 1;
12046 } else {
12047 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
12048 }
12049 if (doc != NULL) {
12050 newDoc->intSubset = doc->intSubset;
12051 newDoc->extSubset = doc->extSubset;
12052 }
12053 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12054 if (newRoot == NULL) {
12055 if (sax != NULL)
12056 ctxt->sax = oldsax;
12057 xmlFreeParserCtxt(ctxt);
12058 newDoc->intSubset = NULL;
12059 newDoc->extSubset = NULL;
12060 xmlFreeDoc(newDoc);
12061 return(-1);
12062 }
12063 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12064 nodePush(ctxt, newRoot);
12065 if (doc == NULL) {
12066 ctxt->myDoc = newDoc;
12067 } else {
12068 ctxt->myDoc = newDoc;
12069 newDoc->children->doc = doc;
12070 /* Ensure that doc has XML spec namespace */
12071 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12072 newDoc->oldNs = doc->oldNs;
12073 }
12074 ctxt->instate = XML_PARSER_CONTENT;
12075 ctxt->depth = depth;
12076
12077 /*
12078 * Doing validity checking on chunk doesn't make sense
12079 */
12080 ctxt->validate = 0;
12081 ctxt->loadsubset = 0;
12082 xmlDetectSAX2(ctxt);
12083
12084 if ( doc != NULL ){
12085 content = doc->children;
12086 doc->children = NULL;
12087 xmlParseContent(ctxt);
12088 doc->children = content;
12089 }
12090 else {
12091 xmlParseContent(ctxt);
12092 }
12093 if ((RAW == '<') && (NXT(1) == '/')) {
12094 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12095 } else if (RAW != 0) {
12096 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12097 }
12098 if (ctxt->node != newDoc->children) {
12099 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12100 }
12101
12102 if (!ctxt->wellFormed) {
12103 if (ctxt->errNo == 0)
12104 ret = 1;
12105 else
12106 ret = ctxt->errNo;
12107 } else {
12108 ret = 0;
12109 }
12110
12111 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12112 xmlNodePtr cur;
12113
12114 /*
12115 * Return the newly created nodeset after unlinking it from
12116 * they pseudo parent.
12117 */
12118 cur = newDoc->children->children;
12119 *lst = cur;
12120 while (cur != NULL) {
12121 xmlSetTreeDoc(cur, doc);
12122 cur->parent = NULL;
12123 cur = cur->next;
12124 }
12125 newDoc->children->children = NULL;
12126 }
12127
12128 if (sax != NULL)
12129 ctxt->sax = oldsax;
12130 xmlFreeParserCtxt(ctxt);
12131 newDoc->intSubset = NULL;
12132 newDoc->extSubset = NULL;
12133 newDoc->oldNs = NULL;
12134 xmlFreeDoc(newDoc);
12135
12136 return(ret);
12137}
12138
12139/**
12140 * xmlSAXParseEntity:
12141 * @sax: the SAX handler block
12142 * @filename: the filename
12143 *
12144 * parse an XML external entity out of context and build a tree.
12145 * It use the given SAX function block to handle the parsing callback.
12146 * If sax is NULL, fallback to the default DOM tree building routines.
12147 *
12148 * [78] extParsedEnt ::= TextDecl? content
12149 *
12150 * This correspond to a "Well Balanced" chunk
12151 *
12152 * Returns the resulting document tree
12153 */
12154
12155xmlDocPtr
12156xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12157 xmlDocPtr ret;
12158 xmlParserCtxtPtr ctxt;
12159
12160 ctxt = xmlCreateFileParserCtxt(filename);
12161 if (ctxt == NULL) {
12162 return(NULL);
12163 }
12164 if (sax != NULL) {
12165 if (ctxt->sax != NULL)
12166 xmlFree(ctxt->sax);
12167 ctxt->sax = sax;
12168 ctxt->userData = NULL;
12169 }
12170
12171 xmlParseExtParsedEnt(ctxt);
12172
12173 if (ctxt->wellFormed)
12174 ret = ctxt->myDoc;
12175 else {
12176 ret = NULL;
12177 xmlFreeDoc(ctxt->myDoc);
12178 ctxt->myDoc = NULL;
12179 }
12180 if (sax != NULL)
12181 ctxt->sax = NULL;
12182 xmlFreeParserCtxt(ctxt);
12183
12184 return(ret);
12185}
12186
12187/**
12188 * xmlParseEntity:
12189 * @filename: the filename
12190 *
12191 * parse an XML external entity out of context and build a tree.
12192 *
12193 * [78] extParsedEnt ::= TextDecl? content
12194 *
12195 * This correspond to a "Well Balanced" chunk
12196 *
12197 * Returns the resulting document tree
12198 */
12199
12200xmlDocPtr
12201xmlParseEntity(const char *filename) {
12202 return(xmlSAXParseEntity(NULL, filename));
12203}
12204#endif /* LIBXML_SAX1_ENABLED */
12205
12206/**
12207 * xmlCreateEntityParserCtxt:
12208 * @URL: the entity URL
12209 * @ID: the entity PUBLIC ID
12210 * @base: a possible base for the target URI
12211 *
12212 * Create a parser context for an external entity
12213 * Automatic support for ZLIB/Compress compressed document is provided
12214 * by default if found at compile-time.
12215 *
12216 * Returns the new parser context or NULL
12217 */
12218xmlParserCtxtPtr
12219xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12220 const xmlChar *base) {
12221 xmlParserCtxtPtr ctxt;
12222 xmlParserInputPtr inputStream;
12223 char *directory = NULL;
12224 xmlChar *uri;
12225
12226 ctxt = xmlNewParserCtxt();
12227 if (ctxt == NULL) {
12228 return(NULL);
12229 }
12230
12231 uri = xmlBuildURI(URL, base);
12232
12233 if (uri == NULL) {
12234 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12235 if (inputStream == NULL) {
12236 xmlFreeParserCtxt(ctxt);
12237 return(NULL);
12238 }
12239
12240 inputPush(ctxt, inputStream);
12241
12242 if ((ctxt->directory == NULL) && (directory == NULL))
12243 directory = xmlParserGetDirectory((char *)URL);
12244 if ((ctxt->directory == NULL) && (directory != NULL))
12245 ctxt->directory = directory;
12246 } else {
12247 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12248 if (inputStream == NULL) {
12249 xmlFree(uri);
12250 xmlFreeParserCtxt(ctxt);
12251 return(NULL);
12252 }
12253
12254 inputPush(ctxt, inputStream);
12255
12256 if ((ctxt->directory == NULL) && (directory == NULL))
12257 directory = xmlParserGetDirectory((char *)uri);
12258 if ((ctxt->directory == NULL) && (directory != NULL))
12259 ctxt->directory = directory;
12260 xmlFree(uri);
12261 }
12262 return(ctxt);
12263}
12264
12265/************************************************************************
12266 * *
12267 * Front ends when parsing from a file *
12268 * *
12269 ************************************************************************/
12270
12271/**
12272 * xmlCreateURLParserCtxt:
12273 * @filename: the filename or URL
12274 * @options: a combination of xmlParserOption
12275 *
12276 * Create a parser context for a file or URL content.
12277 * Automatic support for ZLIB/Compress compressed document is provided
12278 * by default if found at compile-time and for file accesses
12279 *
12280 * Returns the new parser context or NULL
12281 */
12282xmlParserCtxtPtr
12283xmlCreateURLParserCtxt(const char *filename, int options)
12284{
12285 xmlParserCtxtPtr ctxt;
12286 xmlParserInputPtr inputStream;
12287 char *directory = NULL;
12288
12289 ctxt = xmlNewParserCtxt();
12290 if (ctxt == NULL) {
12291 xmlErrMemory(NULL, "cannot allocate parser context");
12292 return(NULL);
12293 }
12294
12295 if (options)
12296 xmlCtxtUseOptions(ctxt, options);
12297 ctxt->linenumbers = 1;
12298
12299 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
12300 if (inputStream == NULL) {
12301 xmlFreeParserCtxt(ctxt);
12302 return(NULL);
12303 }
12304
12305 inputPush(ctxt, inputStream);
12306 if ((ctxt->directory == NULL) && (directory == NULL))
12307 directory = xmlParserGetDirectory(filename);
12308 if ((ctxt->directory == NULL) && (directory != NULL))
12309 ctxt->directory = directory;
12310
12311 return(ctxt);
12312}
12313
12314/**
12315 * xmlCreateFileParserCtxt:
12316 * @filename: the filename
12317 *
12318 * Create a parser context for a file content.
12319 * Automatic support for ZLIB/Compress compressed document is provided
12320 * by default if found at compile-time.
12321 *
12322 * Returns the new parser context or NULL
12323 */
12324xmlParserCtxtPtr
12325xmlCreateFileParserCtxt(const char *filename)
12326{
12327 return(xmlCreateURLParserCtxt(filename, 0));
12328}
12329
12330#ifdef LIBXML_SAX1_ENABLED
12331/**
12332 * xmlSAXParseFileWithData:
12333 * @sax: the SAX handler block
12334 * @filename: the filename
12335 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12336 * documents
12337 * @data: the userdata
12338 *
12339 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12340 * compressed document is provided by default if found at compile-time.
12341 * It use the given SAX function block to handle the parsing callback.
12342 * If sax is NULL, fallback to the default DOM tree building routines.
12343 *
12344 * User data (void *) is stored within the parser context in the
12345 * context's _private member, so it is available nearly everywhere in libxml
12346 *
12347 * Returns the resulting document tree
12348 */
12349
12350xmlDocPtr
12351xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12352 int recovery, void *data) {
12353 xmlDocPtr ret;
12354 xmlParserCtxtPtr ctxt;
12355 char *directory = NULL;
12356
12357 xmlInitParser();
12358
12359 ctxt = xmlCreateFileParserCtxt(filename);
12360 if (ctxt == NULL) {
12361 return(NULL);
12362 }
12363 if (sax != NULL) {
12364 if (ctxt->sax != NULL)
12365 xmlFree(ctxt->sax);
12366 ctxt->sax = sax;
12367 }
12368 xmlDetectSAX2(ctxt);
12369 if (data!=NULL) {
12370 ctxt->_private = data;
12371 }
12372
12373 if ((ctxt->directory == NULL) && (directory == NULL))
12374 directory = xmlParserGetDirectory(filename);
12375 if ((ctxt->directory == NULL) && (directory != NULL))
12376 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12377
12378 ctxt->recovery = recovery;
12379
12380 xmlParseDocument(ctxt);
12381
12382 if ((ctxt->wellFormed) || recovery) {
12383 ret = ctxt->myDoc;
12384 if (ret != NULL) {
12385 if (ctxt->input->buf->compressed > 0)
12386 ret->compression = 9;
12387 else
12388 ret->compression = ctxt->input->buf->compressed;
12389 }
12390 }
12391 else {
12392 ret = NULL;
12393 xmlFreeDoc(ctxt->myDoc);
12394 ctxt->myDoc = NULL;
12395 }
12396 if (sax != NULL)
12397 ctxt->sax = NULL;
12398 xmlFreeParserCtxt(ctxt);
12399
12400 return(ret);
12401}
12402
12403/**
12404 * xmlSAXParseFile:
12405 * @sax: the SAX handler block
12406 * @filename: the filename
12407 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12408 * documents
12409 *
12410 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12411 * compressed document is provided by default if found at compile-time.
12412 * It use the given SAX function block to handle the parsing callback.
12413 * If sax is NULL, fallback to the default DOM tree building routines.
12414 *
12415 * Returns the resulting document tree
12416 */
12417
12418xmlDocPtr
12419xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12420 int recovery) {
12421 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12422}
12423
12424/**
12425 * xmlRecoverDoc:
12426 * @cur: a pointer to an array of xmlChar
12427 *
12428 * parse an XML in-memory document and build a tree.
12429 * In the case the document is not Well Formed, a tree is built anyway
12430 *
12431 * Returns the resulting document tree
12432 */
12433
12434xmlDocPtr
12435xmlRecoverDoc(xmlChar *cur) {
12436 return(xmlSAXParseDoc(NULL, cur, 1));
12437}
12438
12439/**
12440 * xmlParseFile:
12441 * @filename: the filename
12442 *
12443 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12444 * compressed document is provided by default if found at compile-time.
12445 *
12446 * Returns the resulting document tree if the file was wellformed,
12447 * NULL otherwise.
12448 */
12449
12450xmlDocPtr
12451xmlParseFile(const char *filename) {
12452 return(xmlSAXParseFile(NULL, filename, 0));
12453}
12454
12455/**
12456 * xmlRecoverFile:
12457 * @filename: the filename
12458 *
12459 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12460 * compressed document is provided by default if found at compile-time.
12461 * In the case the document is not Well Formed, a tree is built anyway
12462 *
12463 * Returns the resulting document tree
12464 */
12465
12466xmlDocPtr
12467xmlRecoverFile(const char *filename) {
12468 return(xmlSAXParseFile(NULL, filename, 1));
12469}
12470
12471
12472/**
12473 * xmlSetupParserForBuffer:
12474 * @ctxt: an XML parser context
12475 * @buffer: a xmlChar * buffer
12476 * @filename: a file name
12477 *
12478 * Setup the parser context to parse a new buffer; Clears any prior
12479 * contents from the parser context. The buffer parameter must not be
12480 * NULL, but the filename parameter can be
12481 */
12482void
12483xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12484 const char* filename)
12485{
12486 xmlParserInputPtr input;
12487
12488 if ((ctxt == NULL) || (buffer == NULL))
12489 return;
12490
12491 input = xmlNewInputStream(ctxt);
12492 if (input == NULL) {
12493 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
12494 xmlClearParserCtxt(ctxt);
12495 return;
12496 }
12497
12498 xmlClearParserCtxt(ctxt);
12499 if (filename != NULL)
12500 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
12501 input->base = buffer;
12502 input->cur = buffer;
12503 input->end = &buffer[xmlStrlen(buffer)];
12504 inputPush(ctxt, input);
12505}
12506
12507/**
12508 * xmlSAXUserParseFile:
12509 * @sax: a SAX handler
12510 * @user_data: The user data returned on SAX callbacks
12511 * @filename: a file name
12512 *
12513 * parse an XML file and call the given SAX handler routines.
12514 * Automatic support for ZLIB/Compress compressed document is provided
12515 *
12516 * Returns 0 in case of success or a error number otherwise
12517 */
12518int
12519xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12520 const char *filename) {
12521 int ret = 0;
12522 xmlParserCtxtPtr ctxt;
12523
12524 ctxt = xmlCreateFileParserCtxt(filename);
12525 if (ctxt == NULL) return -1;
12526 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12527 xmlFree(ctxt->sax);
12528 ctxt->sax = sax;
12529 xmlDetectSAX2(ctxt);
12530
12531 if (user_data != NULL)
12532 ctxt->userData = user_data;
12533
12534 xmlParseDocument(ctxt);
12535
12536 if (ctxt->wellFormed)
12537 ret = 0;
12538 else {
12539 if (ctxt->errNo != 0)
12540 ret = ctxt->errNo;
12541 else
12542 ret = -1;
12543 }
12544 if (sax != NULL)
12545 ctxt->sax = NULL;
12546 if (ctxt->myDoc != NULL) {
12547 xmlFreeDoc(ctxt->myDoc);
12548 ctxt->myDoc = NULL;
12549 }
12550 xmlFreeParserCtxt(ctxt);
12551
12552 return ret;
12553}
12554#endif /* LIBXML_SAX1_ENABLED */
12555
12556/************************************************************************
12557 * *
12558 * Front ends when parsing from memory *
12559 * *
12560 ************************************************************************/
12561
12562/**
12563 * xmlCreateMemoryParserCtxt:
12564 * @buffer: a pointer to a char array
12565 * @size: the size of the array
12566 *
12567 * Create a parser context for an XML in-memory document.
12568 *
12569 * Returns the new parser context or NULL
12570 */
12571xmlParserCtxtPtr
12572xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12573 xmlParserCtxtPtr ctxt;
12574 xmlParserInputPtr input;
12575 xmlParserInputBufferPtr buf;
12576
12577 if (buffer == NULL)
12578 return(NULL);
12579 if (size <= 0)
12580 return(NULL);
12581
12582 ctxt = xmlNewParserCtxt();
12583 if (ctxt == NULL)
12584 return(NULL);
12585
12586 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
12587 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12588 if (buf == NULL) {
12589 xmlFreeParserCtxt(ctxt);
12590 return(NULL);
12591 }
12592
12593 input = xmlNewInputStream(ctxt);
12594 if (input == NULL) {
12595 xmlFreeParserInputBuffer(buf);
12596 xmlFreeParserCtxt(ctxt);
12597 return(NULL);
12598 }
12599
12600 input->filename = NULL;
12601 input->buf = buf;
12602 input->base = input->buf->buffer->content;
12603 input->cur = input->buf->buffer->content;
12604 input->end = &input->buf->buffer->content[input->buf->buffer->use];
12605
12606 inputPush(ctxt, input);
12607 return(ctxt);
12608}
12609
12610#ifdef LIBXML_SAX1_ENABLED
12611/**
12612 * xmlSAXParseMemoryWithData:
12613 * @sax: the SAX handler block
12614 * @buffer: an pointer to a char array
12615 * @size: the size of the array
12616 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12617 * documents
12618 * @data: the userdata
12619 *
12620 * parse an XML in-memory block and use the given SAX function block
12621 * to handle the parsing callback. If sax is NULL, fallback to the default
12622 * DOM tree building routines.
12623 *
12624 * User data (void *) is stored within the parser context in the
12625 * context's _private member, so it is available nearly everywhere in libxml
12626 *
12627 * Returns the resulting document tree
12628 */
12629
12630xmlDocPtr
12631xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12632 int size, int recovery, void *data) {
12633 xmlDocPtr ret;
12634 xmlParserCtxtPtr ctxt;
12635
12636 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12637 if (ctxt == NULL) return(NULL);
12638 if (sax != NULL) {
12639 if (ctxt->sax != NULL)
12640 xmlFree(ctxt->sax);
12641 ctxt->sax = sax;
12642 }
12643 xmlDetectSAX2(ctxt);
12644 if (data!=NULL) {
12645 ctxt->_private=data;
12646 }
12647
12648 ctxt->recovery = recovery;
12649
12650 xmlParseDocument(ctxt);
12651
12652 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12653 else {
12654 ret = NULL;
12655 xmlFreeDoc(ctxt->myDoc);
12656 ctxt->myDoc = NULL;
12657 }
12658 if (sax != NULL)
12659 ctxt->sax = NULL;
12660 xmlFreeParserCtxt(ctxt);
12661
12662 return(ret);
12663}
12664
12665/**
12666 * xmlSAXParseMemory:
12667 * @sax: the SAX handler block
12668 * @buffer: an pointer to a char array
12669 * @size: the size of the array
12670 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12671 * documents
12672 *
12673 * parse an XML in-memory block and use the given SAX function block
12674 * to handle the parsing callback. If sax is NULL, fallback to the default
12675 * DOM tree building routines.
12676 *
12677 * Returns the resulting document tree
12678 */
12679xmlDocPtr
12680xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12681 int size, int recovery) {
12682 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12683}
12684
12685/**
12686 * xmlParseMemory:
12687 * @buffer: an pointer to a char array
12688 * @size: the size of the array
12689 *
12690 * parse an XML in-memory block and build a tree.
12691 *
12692 * Returns the resulting document tree
12693 */
12694
12695xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12696 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12697}
12698
12699/**
12700 * xmlRecoverMemory:
12701 * @buffer: an pointer to a char array
12702 * @size: the size of the array
12703 *
12704 * parse an XML in-memory block and build a tree.
12705 * In the case the document is not Well Formed, a tree is built anyway
12706 *
12707 * Returns the resulting document tree
12708 */
12709
12710xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12711 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12712}
12713
12714/**
12715 * xmlSAXUserParseMemory:
12716 * @sax: a SAX handler
12717 * @user_data: The user data returned on SAX callbacks
12718 * @buffer: an in-memory XML document input
12719 * @size: the length of the XML document in bytes
12720 *
12721 * A better SAX parsing routine.
12722 * parse an XML in-memory buffer and call the given SAX handler routines.
12723 *
12724 * Returns 0 in case of success or a error number otherwise
12725 */
12726int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12727 const char *buffer, int size) {
12728 int ret = 0;
12729 xmlParserCtxtPtr ctxt;
12730
12731 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12732 if (ctxt == NULL) return -1;
12733 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12734 xmlFree(ctxt->sax);
12735 ctxt->sax = sax;
12736 xmlDetectSAX2(ctxt);
12737
12738 if (user_data != NULL)
12739 ctxt->userData = user_data;
12740
12741 xmlParseDocument(ctxt);
12742
12743 if (ctxt->wellFormed)
12744 ret = 0;
12745 else {
12746 if (ctxt->errNo != 0)
12747 ret = ctxt->errNo;
12748 else
12749 ret = -1;
12750 }
12751 if (sax != NULL)
12752 ctxt->sax = NULL;
12753 if (ctxt->myDoc != NULL) {
12754 xmlFreeDoc(ctxt->myDoc);
12755 ctxt->myDoc = NULL;
12756 }
12757 xmlFreeParserCtxt(ctxt);
12758
12759 return ret;
12760}
12761#endif /* LIBXML_SAX1_ENABLED */
12762
12763/**
12764 * xmlCreateDocParserCtxt:
12765 * @cur: a pointer to an array of xmlChar
12766 *
12767 * Creates a parser context for an XML in-memory document.
12768 *
12769 * Returns the new parser context or NULL
12770 */
12771xmlParserCtxtPtr
12772xmlCreateDocParserCtxt(const xmlChar *cur) {
12773 int len;
12774
12775 if (cur == NULL)
12776 return(NULL);
12777 len = xmlStrlen(cur);
12778 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
12779}
12780
12781#ifdef LIBXML_SAX1_ENABLED
12782/**
12783 * xmlSAXParseDoc:
12784 * @sax: the SAX handler block
12785 * @cur: a pointer to an array of xmlChar
12786 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12787 * documents
12788 *
12789 * parse an XML in-memory document and build a tree.
12790 * It use the given SAX function block to handle the parsing callback.
12791 * If sax is NULL, fallback to the default DOM tree building routines.
12792 *
12793 * Returns the resulting document tree
12794 */
12795
12796xmlDocPtr
12797xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12798 xmlDocPtr ret;
12799 xmlParserCtxtPtr ctxt;
12800 xmlSAXHandlerPtr oldsax = NULL;
12801
12802 if (cur == NULL) return(NULL);
12803
12804
12805 ctxt = xmlCreateDocParserCtxt(cur);
12806 if (ctxt == NULL) return(NULL);
12807 if (sax != NULL) {
12808 oldsax = ctxt->sax;
12809 ctxt->sax = sax;
12810 ctxt->userData = NULL;
12811 }
12812 xmlDetectSAX2(ctxt);
12813
12814 xmlParseDocument(ctxt);
12815 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12816 else {
12817 ret = NULL;
12818 xmlFreeDoc(ctxt->myDoc);
12819 ctxt->myDoc = NULL;
12820 }
12821 if (sax != NULL)
12822 ctxt->sax = oldsax;
12823 xmlFreeParserCtxt(ctxt);
12824
12825 return(ret);
12826}
12827
12828/**
12829 * xmlParseDoc:
12830 * @cur: a pointer to an array of xmlChar
12831 *
12832 * parse an XML in-memory document and build a tree.
12833 *
12834 * Returns the resulting document tree
12835 */
12836
12837xmlDocPtr
12838xmlParseDoc(const xmlChar *cur) {
12839 return(xmlSAXParseDoc(NULL, cur, 0));
12840}
12841#endif /* LIBXML_SAX1_ENABLED */
12842
12843#ifdef LIBXML_LEGACY_ENABLED
12844/************************************************************************
12845 * *
12846 * Specific function to keep track of entities references *
12847 * and used by the XSLT debugger *
12848 * *
12849 ************************************************************************/
12850
12851static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12852
12853/**
12854 * xmlAddEntityReference:
12855 * @ent : A valid entity
12856 * @firstNode : A valid first node for children of entity
12857 * @lastNode : A valid last node of children entity
12858 *
12859 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12860 */
12861static void
12862xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12863 xmlNodePtr lastNode)
12864{
12865 if (xmlEntityRefFunc != NULL) {
12866 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12867 }
12868}
12869
12870
12871/**
12872 * xmlSetEntityReferenceFunc:
12873 * @func: A valid function
12874 *
12875 * Set the function to call call back when a xml reference has been made
12876 */
12877void
12878xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12879{
12880 xmlEntityRefFunc = func;
12881}
12882#endif /* LIBXML_LEGACY_ENABLED */
12883
12884/************************************************************************
12885 * *
12886 * Miscellaneous *
12887 * *
12888 ************************************************************************/
12889
12890#ifdef LIBXML_XPATH_ENABLED
12891#include <libxml/xpath.h>
12892#endif
12893
12894extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
12895static int xmlParserInitialized = 0;
12896
12897/**
12898 * xmlInitParser:
12899 *
12900 * Initialization function for the XML parser.
12901 * This is not reentrant. Call once before processing in case of
12902 * use in multithreaded programs.
12903 */
12904
12905void
12906xmlInitParser(void) {
12907 if (xmlParserInitialized != 0)
12908 return;
12909
12910#ifdef LIBXML_THREAD_ENABLED
12911 __xmlGlobalInitMutexLock();
12912 if (xmlParserInitialized == 0) {
12913#endif
12914 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12915 (xmlGenericError == NULL))
12916 initGenericErrorDefaultFunc(NULL);
12917 xmlInitGlobals();
12918 xmlInitThreads();
12919 xmlInitMemory();
12920 xmlInitCharEncodingHandlers();
12921 xmlDefaultSAXHandlerInit();
12922 xmlRegisterDefaultInputCallbacks();
12923#ifdef LIBXML_OUTPUT_ENABLED
12924 xmlRegisterDefaultOutputCallbacks();
12925#endif /* LIBXML_OUTPUT_ENABLED */
12926#ifdef LIBXML_HTML_ENABLED
12927 htmlInitAutoClose();
12928 htmlDefaultSAXHandlerInit();
12929#endif
12930#ifdef LIBXML_XPATH_ENABLED
12931 xmlXPathInit();
12932#endif
12933 xmlParserInitialized = 1;
12934#ifdef LIBXML_THREAD_ENABLED
12935 }
12936 __xmlGlobalInitMutexUnlock();
12937#endif
12938}
12939
12940/**
12941 * xmlCleanupParser:
12942 *
12943 * Cleanup function for the XML library. It tries to reclaim all
12944 * parsing related global memory allocated for the library processing.
12945 * It doesn't deallocate any document related memory. Calling this
12946 * function should not prevent reusing the library but one should
12947 * call xmlCleanupParser() only when the process has
12948 * finished using the library or XML document built with it.
12949 */
12950
12951void
12952xmlCleanupParser(void) {
12953 if (!xmlParserInitialized)
12954 return;
12955
12956 xmlCleanupCharEncodingHandlers();
12957#ifdef LIBXML_CATALOG_ENABLED
12958 xmlCatalogCleanup();
12959#endif
12960 xmlDictCleanup();
12961 xmlCleanupInputCallbacks();
12962#ifdef LIBXML_OUTPUT_ENABLED
12963 xmlCleanupOutputCallbacks();
12964#endif
12965#ifdef LIBXML_SCHEMAS_ENABLED
12966 xmlSchemaCleanupTypes();
12967 xmlRelaxNGCleanupTypes();
12968#endif
12969 xmlCleanupGlobals();
12970 xmlResetLastError();
12971 xmlCleanupThreads(); /* must be last if called not from the main thread */
12972 xmlCleanupMemory();
12973 xmlParserInitialized = 0;
12974}
12975
12976/************************************************************************
12977 * *
12978 * New set (2.6.0) of simpler and more flexible APIs *
12979 * *
12980 ************************************************************************/
12981
12982/**
12983 * DICT_FREE:
12984 * @str: a string
12985 *
12986 * Free a string if it is not owned by the "dict" dictionnary in the
12987 * current scope
12988 */
12989#define DICT_FREE(str) \
12990 if ((str) && ((!dict) || \
12991 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12992 xmlFree((char *)(str));
12993
12994/**
12995 * xmlCtxtReset:
12996 * @ctxt: an XML parser context
12997 *
12998 * Reset a parser context
12999 */
13000void
13001xmlCtxtReset(xmlParserCtxtPtr ctxt)
13002{
13003 xmlParserInputPtr input;
13004 xmlDictPtr dict;
13005
13006 if (ctxt == NULL)
13007 return;
13008
13009 dict = ctxt->dict;
13010
13011 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13012 xmlFreeInputStream(input);
13013 }
13014 ctxt->inputNr = 0;
13015 ctxt->input = NULL;
13016
13017 ctxt->spaceNr = 0;
13018 if (ctxt->spaceTab != NULL) {
13019 ctxt->spaceTab[0] = -1;
13020 ctxt->space = &ctxt->spaceTab[0];
13021 } else {
13022 ctxt->space = NULL;
13023 }
13024
13025
13026 ctxt->nodeNr = 0;
13027 ctxt->node = NULL;
13028
13029 ctxt->nameNr = 0;
13030 ctxt->name = NULL;
13031
13032 DICT_FREE(ctxt->version);
13033 ctxt->version = NULL;
13034 DICT_FREE(ctxt->encoding);
13035 ctxt->encoding = NULL;
13036 DICT_FREE(ctxt->directory);
13037 ctxt->directory = NULL;
13038 DICT_FREE(ctxt->extSubURI);
13039 ctxt->extSubURI = NULL;
13040 DICT_FREE(ctxt->extSubSystem);
13041 ctxt->extSubSystem = NULL;
13042 if (ctxt->myDoc != NULL)
13043 xmlFreeDoc(ctxt->myDoc);
13044 ctxt->myDoc = NULL;
13045
13046 ctxt->standalone = -1;
13047 ctxt->hasExternalSubset = 0;
13048 ctxt->hasPErefs = 0;
13049 ctxt->html = 0;
13050 ctxt->external = 0;
13051 ctxt->instate = XML_PARSER_START;
13052 ctxt->token = 0;
13053
13054 ctxt->wellFormed = 1;
13055 ctxt->nsWellFormed = 1;
13056 ctxt->disableSAX = 0;
13057 ctxt->valid = 1;
13058#if 0
13059 ctxt->vctxt.userData = ctxt;
13060 ctxt->vctxt.error = xmlParserValidityError;
13061 ctxt->vctxt.warning = xmlParserValidityWarning;
13062#endif
13063 ctxt->record_info = 0;
13064 ctxt->nbChars = 0;
13065 ctxt->checkIndex = 0;
13066 ctxt->inSubset = 0;
13067 ctxt->errNo = XML_ERR_OK;
13068 ctxt->depth = 0;
13069 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13070 ctxt->catalogs = NULL;
13071 ctxt->nbentities = 0;
13072 xmlInitNodeInfoSeq(&ctxt->node_seq);
13073
13074 if (ctxt->attsDefault != NULL) {
13075 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13076 ctxt->attsDefault = NULL;
13077 }
13078 if (ctxt->attsSpecial != NULL) {
13079 xmlHashFree(ctxt->attsSpecial, NULL);
13080 ctxt->attsSpecial = NULL;
13081 }
13082
13083#ifdef LIBXML_CATALOG_ENABLED
13084 if (ctxt->catalogs != NULL)
13085 xmlCatalogFreeLocal(ctxt->catalogs);
13086#endif
13087 if (ctxt->lastError.code != XML_ERR_OK)
13088 xmlResetError(&ctxt->lastError);
13089}
13090
13091/**
13092 * xmlCtxtResetPush:
13093 * @ctxt: an XML parser context
13094 * @chunk: a pointer to an array of chars
13095 * @size: number of chars in the array
13096 * @filename: an optional file name or URI
13097 * @encoding: the document encoding, or NULL
13098 *
13099 * Reset a push parser context
13100 *
13101 * Returns 0 in case of success and 1 in case of error
13102 */
13103int
13104xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13105 int size, const char *filename, const char *encoding)
13106{
13107 xmlParserInputPtr inputStream;
13108 xmlParserInputBufferPtr buf;
13109 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13110
13111 if (ctxt == NULL)
13112 return(1);
13113
13114 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13115 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13116
13117 buf = xmlAllocParserInputBuffer(enc);
13118 if (buf == NULL)
13119 return(1);
13120
13121 if (ctxt == NULL) {
13122 xmlFreeParserInputBuffer(buf);
13123 return(1);
13124 }
13125
13126 xmlCtxtReset(ctxt);
13127
13128 if (ctxt->pushTab == NULL) {
13129 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13130 sizeof(xmlChar *));
13131 if (ctxt->pushTab == NULL) {
13132 xmlErrMemory(ctxt, NULL);
13133 xmlFreeParserInputBuffer(buf);
13134 return(1);
13135 }
13136 }
13137
13138 if (filename == NULL) {
13139 ctxt->directory = NULL;
13140 } else {
13141 ctxt->directory = xmlParserGetDirectory(filename);
13142 }
13143
13144 inputStream = xmlNewInputStream(ctxt);
13145 if (inputStream == NULL) {
13146 xmlFreeParserInputBuffer(buf);
13147 return(1);
13148 }
13149
13150 if (filename == NULL)
13151 inputStream->filename = NULL;
13152 else
13153 inputStream->filename = (char *)
13154 xmlCanonicPath((const xmlChar *) filename);
13155 inputStream->buf = buf;
13156 inputStream->base = inputStream->buf->buffer->content;
13157 inputStream->cur = inputStream->buf->buffer->content;
13158 inputStream->end =
13159 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13160
13161 inputPush(ctxt, inputStream);
13162
13163 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13164 (ctxt->input->buf != NULL)) {
13165 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13166 int cur = ctxt->input->cur - ctxt->input->base;
13167
13168 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13169
13170 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13171 ctxt->input->cur = ctxt->input->base + cur;
13172 ctxt->input->end =
13173 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13174 use];
13175#ifdef DEBUG_PUSH
13176 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13177#endif
13178 }
13179
13180 if (encoding != NULL) {
13181 xmlCharEncodingHandlerPtr hdlr;
13182
13183 hdlr = xmlFindCharEncodingHandler(encoding);
13184 if (hdlr != NULL) {
13185 xmlSwitchToEncoding(ctxt, hdlr);
13186 } else {
13187 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13188 "Unsupported encoding %s\n", BAD_CAST encoding);
13189 }
13190 } else if (enc != XML_CHAR_ENCODING_NONE) {
13191 xmlSwitchEncoding(ctxt, enc);
13192 }
13193
13194 return(0);
13195}
13196
13197/**
13198 * xmlCtxtUseOptions:
13199 * @ctxt: an XML parser context
13200 * @options: a combination of xmlParserOption
13201 *
13202 * Applies the options to the parser context
13203 *
13204 * Returns 0 in case of success, the set of unknown or unimplemented options
13205 * in case of error.
13206 */
13207int
13208xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13209{
13210 if (ctxt == NULL)
13211 return(-1);
13212 if (options & XML_PARSE_RECOVER) {
13213 ctxt->recovery = 1;
13214 options -= XML_PARSE_RECOVER;
13215 } else
13216 ctxt->recovery = 0;
13217 if (options & XML_PARSE_DTDLOAD) {
13218 ctxt->loadsubset = XML_DETECT_IDS;
13219 options -= XML_PARSE_DTDLOAD;
13220 } else
13221 ctxt->loadsubset = 0;
13222 if (options & XML_PARSE_DTDATTR) {
13223 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13224 options -= XML_PARSE_DTDATTR;
13225 }
13226 if (options & XML_PARSE_NOENT) {
13227 ctxt->replaceEntities = 1;
13228 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13229 options -= XML_PARSE_NOENT;
13230 } else
13231 ctxt->replaceEntities = 0;
13232 if (options & XML_PARSE_PEDANTIC) {
13233 ctxt->pedantic = 1;
13234 options -= XML_PARSE_PEDANTIC;
13235 } else
13236 ctxt->pedantic = 0;
13237 if (options & XML_PARSE_NOBLANKS) {
13238 ctxt->keepBlanks = 0;
13239 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13240 options -= XML_PARSE_NOBLANKS;
13241 } else
13242 ctxt->keepBlanks = 1;
13243 if (options & XML_PARSE_DTDVALID) {
13244 ctxt->validate = 1;
13245 if (options & XML_PARSE_NOWARNING)
13246 ctxt->vctxt.warning = NULL;
13247 if (options & XML_PARSE_NOERROR)
13248 ctxt->vctxt.error = NULL;
13249 options -= XML_PARSE_DTDVALID;
13250 } else
13251 ctxt->validate = 0;
13252 if (options & XML_PARSE_NOWARNING) {
13253 ctxt->sax->warning = NULL;
13254 options -= XML_PARSE_NOWARNING;
13255 }
13256 if (options & XML_PARSE_NOERROR) {
13257 ctxt->sax->error = NULL;
13258 ctxt->sax->fatalError = NULL;
13259 options -= XML_PARSE_NOERROR;
13260 }
13261#ifdef LIBXML_SAX1_ENABLED
13262 if (options & XML_PARSE_SAX1) {
13263 ctxt->sax->startElement = xmlSAX2StartElement;
13264 ctxt->sax->endElement = xmlSAX2EndElement;
13265 ctxt->sax->startElementNs = NULL;
13266 ctxt->sax->endElementNs = NULL;
13267 ctxt->sax->initialized = 1;
13268 options -= XML_PARSE_SAX1;
13269 }
13270#endif /* LIBXML_SAX1_ENABLED */
13271 if (options & XML_PARSE_NODICT) {
13272 ctxt->dictNames = 0;
13273 options -= XML_PARSE_NODICT;
13274 } else {
13275 ctxt->dictNames = 1;
13276 }
13277 if (options & XML_PARSE_NOCDATA) {
13278 ctxt->sax->cdataBlock = NULL;
13279 options -= XML_PARSE_NOCDATA;
13280 }
13281 if (options & XML_PARSE_NSCLEAN) {
13282 ctxt->options |= XML_PARSE_NSCLEAN;
13283 options -= XML_PARSE_NSCLEAN;
13284 }
13285 if (options & XML_PARSE_NONET) {
13286 ctxt->options |= XML_PARSE_NONET;
13287 options -= XML_PARSE_NONET;
13288 }
13289 if (options & XML_PARSE_COMPACT) {
13290 ctxt->options |= XML_PARSE_COMPACT;
13291 options -= XML_PARSE_COMPACT;
13292 }
13293 ctxt->linenumbers = 1;
13294 return (options);
13295}
13296
13297/**
13298 * xmlDoRead:
13299 * @ctxt: an XML parser context
13300 * @URL: the base URL to use for the document
13301 * @encoding: the document encoding, or NULL
13302 * @options: a combination of xmlParserOption
13303 * @reuse: keep the context for reuse
13304 *
13305 * Common front-end for the xmlRead functions
13306 *
13307 * Returns the resulting document tree or NULL
13308 */
13309static xmlDocPtr
13310xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13311 int options, int reuse)
13312{
13313 xmlDocPtr ret;
13314
13315 xmlCtxtUseOptions(ctxt, options);
13316 if (encoding != NULL) {
13317 xmlCharEncodingHandlerPtr hdlr;
13318
13319 hdlr = xmlFindCharEncodingHandler(encoding);
13320 if (hdlr != NULL)
13321 xmlSwitchToEncoding(ctxt, hdlr);
13322 }
13323 if ((URL != NULL) && (ctxt->input != NULL) &&
13324 (ctxt->input->filename == NULL))
13325 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
13326 xmlParseDocument(ctxt);
13327 if ((ctxt->wellFormed) || ctxt->recovery)
13328 ret = ctxt->myDoc;
13329 else {
13330 ret = NULL;
13331 if (ctxt->myDoc != NULL) {
13332 xmlFreeDoc(ctxt->myDoc);
13333 }
13334 }
13335 ctxt->myDoc = NULL;
13336 if (!reuse) {
13337 xmlFreeParserCtxt(ctxt);
13338 }
13339
13340 return (ret);
13341}
13342
13343/**
13344 * xmlReadDoc:
13345 * @cur: a pointer to a zero terminated string
13346 * @URL: the base URL to use for the document
13347 * @encoding: the document encoding, or NULL
13348 * @options: a combination of xmlParserOption
13349 *
13350 * parse an XML in-memory document and build a tree.
13351 *
13352 * Returns the resulting document tree
13353 */
13354xmlDocPtr
13355xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
13356{
13357 xmlParserCtxtPtr ctxt;
13358
13359 if (cur == NULL)
13360 return (NULL);
13361
13362 ctxt = xmlCreateDocParserCtxt(cur);
13363 if (ctxt == NULL)
13364 return (NULL);
13365 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13366}
13367
13368/**
13369 * xmlReadFile:
13370 * @filename: a file or URL
13371 * @encoding: the document encoding, or NULL
13372 * @options: a combination of xmlParserOption
13373 *
13374 * parse an XML file from the filesystem or the network.
13375 *
13376 * Returns the resulting document tree
13377 */
13378xmlDocPtr
13379xmlReadFile(const char *filename, const char *encoding, int options)
13380{
13381 xmlParserCtxtPtr ctxt;
13382
13383 ctxt = xmlCreateURLParserCtxt(filename, options);
13384 if (ctxt == NULL)
13385 return (NULL);
13386 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
13387}
13388
13389/**
13390 * xmlReadMemory:
13391 * @buffer: a pointer to a char array
13392 * @size: the size of the array
13393 * @URL: the base URL to use for the document
13394 * @encoding: the document encoding, or NULL
13395 * @options: a combination of xmlParserOption
13396 *
13397 * parse an XML in-memory document and build a tree.
13398 *
13399 * Returns the resulting document tree
13400 */
13401xmlDocPtr
13402xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
13403{
13404 xmlParserCtxtPtr ctxt;
13405
13406 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13407 if (ctxt == NULL)
13408 return (NULL);
13409 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13410}
13411
13412/**
13413 * xmlReadFd:
13414 * @fd: an open file descriptor
13415 * @URL: the base URL to use for the document
13416 * @encoding: the document encoding, or NULL
13417 * @options: a combination of xmlParserOption
13418 *
13419 * parse an XML from a file descriptor and build a tree.
13420 * NOTE that the file descriptor will not be closed when the
13421 * reader is closed or reset.
13422 *
13423 * Returns the resulting document tree
13424 */
13425xmlDocPtr
13426xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13427{
13428 xmlParserCtxtPtr ctxt;
13429 xmlParserInputBufferPtr input;
13430 xmlParserInputPtr stream;
13431
13432 if (fd < 0)
13433 return (NULL);
13434
13435 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13436 if (input == NULL)
13437 return (NULL);
13438 input->closecallback = NULL;
13439 ctxt = xmlNewParserCtxt();
13440 if (ctxt == NULL) {
13441 xmlFreeParserInputBuffer(input);
13442 return (NULL);
13443 }
13444 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13445 if (stream == NULL) {
13446 xmlFreeParserInputBuffer(input);
13447 xmlFreeParserCtxt(ctxt);
13448 return (NULL);
13449 }
13450 inputPush(ctxt, stream);
13451 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13452}
13453
13454/**
13455 * xmlReadIO:
13456 * @ioread: an I/O read function
13457 * @ioclose: an I/O close function
13458 * @ioctx: an I/O handler
13459 * @URL: the base URL to use for the document
13460 * @encoding: the document encoding, or NULL
13461 * @options: a combination of xmlParserOption
13462 *
13463 * parse an XML document from I/O functions and source and build a tree.
13464 *
13465 * Returns the resulting document tree
13466 */
13467xmlDocPtr
13468xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13469 void *ioctx, const char *URL, const char *encoding, int options)
13470{
13471 xmlParserCtxtPtr ctxt;
13472 xmlParserInputBufferPtr input;
13473 xmlParserInputPtr stream;
13474
13475 if (ioread == NULL)
13476 return (NULL);
13477
13478 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13479 XML_CHAR_ENCODING_NONE);
13480 if (input == NULL)
13481 return (NULL);
13482 ctxt = xmlNewParserCtxt();
13483 if (ctxt == NULL) {
13484 xmlFreeParserInputBuffer(input);
13485 return (NULL);
13486 }
13487 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13488 if (stream == NULL) {
13489 xmlFreeParserInputBuffer(input);
13490 xmlFreeParserCtxt(ctxt);
13491 return (NULL);
13492 }
13493 inputPush(ctxt, stream);
13494 return (xmlDoRead(ctxt, URL, encoding, options, 0));
13495}
13496
13497/**
13498 * xmlCtxtReadDoc:
13499 * @ctxt: an XML parser context
13500 * @cur: a pointer to a zero terminated string
13501 * @URL: the base URL to use for the document
13502 * @encoding: the document encoding, or NULL
13503 * @options: a combination of xmlParserOption
13504 *
13505 * parse an XML in-memory document and build a tree.
13506 * This reuses the existing @ctxt parser context
13507 *
13508 * Returns the resulting document tree
13509 */
13510xmlDocPtr
13511xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
13512 const char *URL, const char *encoding, int options)
13513{
13514 xmlParserInputPtr stream;
13515
13516 if (cur == NULL)
13517 return (NULL);
13518 if (ctxt == NULL)
13519 return (NULL);
13520
13521 xmlCtxtReset(ctxt);
13522
13523 stream = xmlNewStringInputStream(ctxt, cur);
13524 if (stream == NULL) {
13525 return (NULL);
13526 }
13527 inputPush(ctxt, stream);
13528 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13529}
13530
13531/**
13532 * xmlCtxtReadFile:
13533 * @ctxt: an XML parser context
13534 * @filename: a file or URL
13535 * @encoding: the document encoding, or NULL
13536 * @options: a combination of xmlParserOption
13537 *
13538 * parse an XML file from the filesystem or the network.
13539 * This reuses the existing @ctxt parser context
13540 *
13541 * Returns the resulting document tree
13542 */
13543xmlDocPtr
13544xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13545 const char *encoding, int options)
13546{
13547 xmlParserInputPtr stream;
13548
13549 if (filename == NULL)
13550 return (NULL);
13551 if (ctxt == NULL)
13552 return (NULL);
13553
13554 xmlCtxtReset(ctxt);
13555
13556 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
13557 if (stream == NULL) {
13558 return (NULL);
13559 }
13560 inputPush(ctxt, stream);
13561 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
13562}
13563
13564/**
13565 * xmlCtxtReadMemory:
13566 * @ctxt: an XML parser context
13567 * @buffer: a pointer to a char array
13568 * @size: the size of the array
13569 * @URL: the base URL to use for the document
13570 * @encoding: the document encoding, or NULL
13571 * @options: a combination of xmlParserOption
13572 *
13573 * parse an XML in-memory document and build a tree.
13574 * This reuses the existing @ctxt parser context
13575 *
13576 * Returns the resulting document tree
13577 */
13578xmlDocPtr
13579xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13580 const char *URL, const char *encoding, int options)
13581{
13582 xmlParserInputBufferPtr input;
13583 xmlParserInputPtr stream;
13584
13585 if (ctxt == NULL)
13586 return (NULL);
13587 if (buffer == NULL)
13588 return (NULL);
13589
13590 xmlCtxtReset(ctxt);
13591
13592 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13593 if (input == NULL) {
13594 return(NULL);
13595 }
13596
13597 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13598 if (stream == NULL) {
13599 xmlFreeParserInputBuffer(input);
13600 return(NULL);
13601 }
13602
13603 inputPush(ctxt, stream);
13604 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13605}
13606
13607/**
13608 * xmlCtxtReadFd:
13609 * @ctxt: an XML parser context
13610 * @fd: an open file descriptor
13611 * @URL: the base URL to use for the document
13612 * @encoding: the document encoding, or NULL
13613 * @options: a combination of xmlParserOption
13614 *
13615 * parse an XML from a file descriptor and build a tree.
13616 * This reuses the existing @ctxt parser context
13617 * NOTE that the file descriptor will not be closed when the
13618 * reader is closed or reset.
13619 *
13620 * Returns the resulting document tree
13621 */
13622xmlDocPtr
13623xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13624 const char *URL, const char *encoding, int options)
13625{
13626 xmlParserInputBufferPtr input;
13627 xmlParserInputPtr stream;
13628
13629 if (fd < 0)
13630 return (NULL);
13631 if (ctxt == NULL)
13632 return (NULL);
13633
13634 xmlCtxtReset(ctxt);
13635
13636
13637 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13638 if (input == NULL)
13639 return (NULL);
13640 input->closecallback = NULL;
13641 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13642 if (stream == NULL) {
13643 xmlFreeParserInputBuffer(input);
13644 return (NULL);
13645 }
13646 inputPush(ctxt, stream);
13647 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13648}
13649
13650/**
13651 * xmlCtxtReadIO:
13652 * @ctxt: an XML parser context
13653 * @ioread: an I/O read function
13654 * @ioclose: an I/O close function
13655 * @ioctx: an I/O handler
13656 * @URL: the base URL to use for the document
13657 * @encoding: the document encoding, or NULL
13658 * @options: a combination of xmlParserOption
13659 *
13660 * parse an XML document from I/O functions and source and build a tree.
13661 * This reuses the existing @ctxt parser context
13662 *
13663 * Returns the resulting document tree
13664 */
13665xmlDocPtr
13666xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13667 xmlInputCloseCallback ioclose, void *ioctx,
13668 const char *URL,
13669 const char *encoding, int options)
13670{
13671 xmlParserInputBufferPtr input;
13672 xmlParserInputPtr stream;
13673
13674 if (ioread == NULL)
13675 return (NULL);
13676 if (ctxt == NULL)
13677 return (NULL);
13678
13679 xmlCtxtReset(ctxt);
13680
13681 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13682 XML_CHAR_ENCODING_NONE);
13683 if (input == NULL)
13684 return (NULL);
13685 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13686 if (stream == NULL) {
13687 xmlFreeParserInputBuffer(input);
13688 return (NULL);
13689 }
13690 inputPush(ctxt, stream);
13691 return (xmlDoRead(ctxt, URL, encoding, options, 1));
13692}
13693
13694#define bottom_parser
13695#include "elfgcchack.h"
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette