HTMLparser.c@ 103285

最後變更在這個檔案從103285是 95312,由 vboxsync 提交於 2 年前
libs/{curl,libxml2}: OSE export fixes, bugref:8515
屬性 svn:eol-style 設為 `native`
檔案大小: 208.4 KB

行
1	/*
2	* HTMLparser.c : an HTML 4.0 non-verifying parser
3	*
4	* See Copyright for the status of this software.
5	*
6	* [email protected]
7	*/
8
9	#define IN_LIBXML
10	#include "libxml.h"
11	#ifdef LIBXML_HTML_ENABLED
12
13	#include <string.h>
14	#ifdef HAVE_CTYPE_H
15	#include <ctype.h>
16	#endif
17	#ifdef HAVE_STDLIB_H
18	#include <stdlib.h>
19	#endif
20	#ifdef HAVE_SYS_STAT_H
21	#include <sys/stat.h>
22	#endif
23	#ifdef HAVE_FCNTL_H
24	#include <fcntl.h>
25	#endif
26	#ifdef HAVE_UNISTD_H
27	#include <unistd.h>
28	#endif
29	#ifdef LIBXML_ZLIB_ENABLED
30	#include <zlib.h>
31	#endif
32
33	#include <libxml/xmlmemory.h>
34	#include <libxml/tree.h>
35	#include <libxml/parser.h>
36	#include <libxml/parserInternals.h>
37	#include <libxml/xmlerror.h>
38	#include <libxml/HTMLparser.h>
39	#include <libxml/HTMLtree.h>
40	#include <libxml/entities.h>
41	#include <libxml/encoding.h>
42	#include <libxml/valid.h>
43	#include <libxml/xmlIO.h>
44	#include <libxml/globals.h>
45	#include <libxml/uri.h>
46
47	#include "buf.h"
48	#include "enc.h"
49
50	#define HTML_MAX_NAMELEN 1000
51	#define HTML_PARSER_BIG_BUFFER_SIZE 1000
52	#define HTML_PARSER_BUFFER_SIZE 100
53
54	/* #define DEBUG */
55	/* #define DEBUG_PUSH */
56
57	static int htmlOmittedDefaultValue = 1;
58
59	xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
60	xmlChar end, xmlChar end2, xmlChar end3);
61	static void htmlParseComment(htmlParserCtxtPtr ctxt);
62
63	/************************************************************************
64	* *
65	* Some factorized error routines *
66	* *
67	************************************************************************/
68
69	/**
70	* htmlErrMemory:
71	* @ctxt: an HTML parser context
72	* @extra: extra information
73	*
74	* Handle a redefinition of attribute error
75	*/
76	static void
77	htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
78	{
79	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
80	(ctxt->instate == XML_PARSER_EOF))
81	return;
82	if (ctxt != NULL) {
83	ctxt->errNo = XML_ERR_NO_MEMORY;
84	ctxt->instate = XML_PARSER_EOF;
85	ctxt->disableSAX = 1;
86	}
87	if (extra)
88	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
89	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
90	NULL, NULL, 0, 0,
91	"Memory allocation failed : %s\n", extra);
92	else
93	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
94	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
95	NULL, NULL, 0, 0, "Memory allocation failed\n");
96	}
97
98	/**
99	* htmlParseErr:
100	* @ctxt: an HTML parser context
101	* @error: the error number
102	* @msg: the error message
103	* @str1: string infor
104	* @str2: string infor
105	*
106	* Handle a fatal parser error, i.e. violating Well-Formedness constraints
107	*/
108	static void LIBXML_ATTR_FORMAT(3,0)
109	htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
110	const char msg, const xmlChar str1, const xmlChar *str2)
111	{
112	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
113	(ctxt->instate == XML_PARSER_EOF))
114	return;
115	if (ctxt != NULL)
116	ctxt->errNo = error;
117	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
118	XML_ERR_ERROR, NULL, 0,
119	(const char ) str1, (const char ) str2,
120	NULL, 0, 0,
121	msg, str1, str2);
122	if (ctxt != NULL)
123	ctxt->wellFormed = 0;
124	}
125
126	/**
127	* htmlParseErrInt:
128	* @ctxt: an HTML parser context
129	* @error: the error number
130	* @msg: the error message
131	* @val: integer info
132	*
133	* Handle a fatal parser error, i.e. violating Well-Formedness constraints
134	*/
135	static void LIBXML_ATTR_FORMAT(3,0)
136	htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
137	const char *msg, int val)
138	{
139	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
140	(ctxt->instate == XML_PARSER_EOF))
141	return;
142	if (ctxt != NULL)
143	ctxt->errNo = error;
144	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_HTML, error,
145	XML_ERR_ERROR, NULL, 0, NULL, NULL,
146	NULL, val, 0, msg, val);
147	if (ctxt != NULL)
148	ctxt->wellFormed = 0;
149	}
150
151	/************************************************************************
152	* *
153	* Parser stacks related functions and macros *
154	* *
155	************************************************************************/
156
157	/**
158	* htmlnamePush:
159	* @ctxt: an HTML parser context
160	* @value: the element name
161	*
162	* Pushes a new element name on top of the name stack
163	*
164	* Returns 0 in case of error, the index in the stack otherwise
165	*/
166	static int
167	htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
168	{
169	if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head")))
170	ctxt->html = 3;
171	if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body")))
172	ctxt->html = 10;
173	if (ctxt->nameNr >= ctxt->nameMax) {
174	ctxt->nameMax *= 2;
175	ctxt->nameTab = (const xmlChar * *)
176	xmlRealloc((xmlChar * *)ctxt->nameTab,
177	ctxt->nameMax *
178	sizeof(ctxt->nameTab[0]));
179	if (ctxt->nameTab == NULL) {
180	htmlErrMemory(ctxt, NULL);
181	return (0);
182	}
183	}
184	ctxt->nameTab[ctxt->nameNr] = value;
185	ctxt->name = value;
186	return (ctxt->nameNr++);
187	}
188	/**
189	* htmlnamePop:
190	* @ctxt: an HTML parser context
191	*
192	* Pops the top element name from the name stack
193	*
194	* Returns the name just removed
195	*/
196	static const xmlChar *
197	htmlnamePop(htmlParserCtxtPtr ctxt)
198	{
199	const xmlChar *ret;
200
201	if (ctxt->nameNr <= 0)
202	return (NULL);
203	ctxt->nameNr--;
204	if (ctxt->nameNr < 0)
205	return (NULL);
206	if (ctxt->nameNr > 0)
207	ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
208	else
209	ctxt->name = NULL;
210	ret = ctxt->nameTab[ctxt->nameNr];
211	ctxt->nameTab[ctxt->nameNr] = NULL;
212	return (ret);
213	}
214
215	/**
216	* htmlNodeInfoPush:
217	* @ctxt: an HTML parser context
218	* @value: the node info
219	*
220	* Pushes a new element name on top of the node info stack
221	*
222	* Returns 0 in case of error, the index in the stack otherwise
223	*/
224	static int
225	htmlNodeInfoPush(htmlParserCtxtPtr ctxt, htmlParserNodeInfo *value)
226	{
227	if (ctxt->nodeInfoNr >= ctxt->nodeInfoMax) {
228	if (ctxt->nodeInfoMax == 0)
229	ctxt->nodeInfoMax = 5;
230	ctxt->nodeInfoMax *= 2;
231	ctxt->nodeInfoTab = (htmlParserNodeInfo *)
232	xmlRealloc((htmlParserNodeInfo *)ctxt->nodeInfoTab,
233	ctxt->nodeInfoMax *
234	sizeof(ctxt->nodeInfoTab[0]));
235	if (ctxt->nodeInfoTab == NULL) {
236	htmlErrMemory(ctxt, NULL);
237	return (0);
238	}
239	}
240	ctxt->nodeInfoTab[ctxt->nodeInfoNr] = *value;
241	ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
242	return (ctxt->nodeInfoNr++);
243	}
244
245	/**
246	* htmlNodeInfoPop:
247	* @ctxt: an HTML parser context
248	*
249	* Pops the top element name from the node info stack
250	*
251	* Returns 0 in case of error, the pointer to NodeInfo otherwise
252	*/
253	static htmlParserNodeInfo *
254	htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
255	{
256	if (ctxt->nodeInfoNr <= 0)
257	return (NULL);
258	ctxt->nodeInfoNr--;
259	if (ctxt->nodeInfoNr < 0)
260	return (NULL);
261	if (ctxt->nodeInfoNr > 0)
262	ctxt->nodeInfo = &ctxt->nodeInfoTab[ctxt->nodeInfoNr - 1];
263	else
264	ctxt->nodeInfo = NULL;
265	return &ctxt->nodeInfoTab[ctxt->nodeInfoNr];
266	}
267
268	/*
269	* Macros for accessing the content. Those should be used only by the parser,
270	* and not exported.
271	*
272	* Dirty macros, i.e. one need to make assumption on the context to use them
273	*
274	* CUR_PTR return the current pointer to the xmlChar to be parsed.
275	* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
276	* in ISO-Latin or UTF-8, and the current 16 bit value if compiled
277	* in UNICODE mode. This should be used internally by the parser
278	* only to compare to ASCII values otherwise it would break when
279	* running with UTF-8 encoding.
280	* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
281	* to compare on ASCII based substring.
282	* UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
283	* it should be used only to compare on ASCII based substring.
284	* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
285	* strings without newlines within the parser.
286	*
287	* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
288	*
289	* CURRENT Returns the current char value, with the full decoding of
290	* UTF-8 if we are using this mode. It returns an int.
291	* NEXT Skip to the next character, this does the proper decoding
292	* in UTF-8 mode. It also pop-up unfinished entities on the fly.
293	* NEXTL(l) Skip the current unicode character of l xmlChars long.
294	* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
295	*/
296
297	#define UPPER (toupper(*ctxt->input->cur))
298
299	#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val)
300
301	#define NXT(val) ctxt->input->cur[(val)]
302
303	#define UPP(val) (toupper(ctxt->input->cur[(val)]))
304
305	#define CUR_PTR ctxt->input->cur
306	#define BASE_PTR ctxt->input->base
307
308	#define SHRINK if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
309	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
310	xmlParserInputShrink(ctxt->input)
311
312	#define GROW if ((ctxt->progressive == 0) && \
313	(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
314	xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
315
316	#define CURRENT ((int) (*ctxt->input->cur))
317
318	#define SKIP_BLANKS htmlSkipBlankChars(ctxt)
319
320	/* Imported from XML */
321
322	/* #define CUR (ctxt->token ? ctxt->token : (int) (ctxt->input->cur)) /
323	#define CUR ((int) (*ctxt->input->cur))
324	#define NEXT xmlNextChar(ctxt)
325
326	#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
327
328
329	#define NEXTL(l) do { \
330	if (*(ctxt->input->cur) == '\n') { \
331	ctxt->input->line++; ctxt->input->col = 1; \
332	} else ctxt->input->col++; \
333	ctxt->token = 0; ctxt->input->cur += l; \
334	} while (0)
335
336	/************
337	\
338	if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
339	if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
340	************/
341
342	#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
343	#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
344
345	#define COPY_BUF(l,b,i,v) \
346	if (l == 1) b[i++] = (xmlChar) v; \
347	else i += xmlCopyChar(l,&b[i],v)
348
349	/**
350	* htmlFindEncoding:
351	* @the HTML parser context
352	*
353	* Ty to find and encoding in the current data available in the input
354	* buffer this is needed to try to switch to the proper encoding when
355	* one face a character error.
356	* That's an heuristic, since it's operating outside of parsing it could
357	* try to use a meta which had been commented out, that's the reason it
358	* should only be used in case of error, not as a default.
359	*
360	* Returns an encoding string or NULL if not found, the string need to
361	* be freed
362	*/
363	static xmlChar *
364	htmlFindEncoding(xmlParserCtxtPtr ctxt) {
365	const xmlChar start, cur, *end;
366
367	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
368	(ctxt->input->encoding != NULL) \|\| (ctxt->input->buf == NULL) \|\|
369	(ctxt->input->buf->encoder != NULL))
370	return(NULL);
371	if ((ctxt->input->cur == NULL) \|\| (ctxt->input->end == NULL))
372	return(NULL);
373
374	start = ctxt->input->cur;
375	end = ctxt->input->end;
376	/* we also expect the input buffer to be zero terminated */
377	if (*end != 0)
378	return(NULL);
379
380	cur = xmlStrcasestr(start, BAD_CAST "HTTP-EQUIV");
381	if (cur == NULL)
382	return(NULL);
383	cur = xmlStrcasestr(cur, BAD_CAST "CONTENT");
384	if (cur == NULL)
385	return(NULL);
386	cur = xmlStrcasestr(cur, BAD_CAST "CHARSET=");
387	if (cur == NULL)
388	return(NULL);
389	cur += 8;
390	start = cur;
391	while (((cur >= 'A') && (cur <= 'Z')) \|\|
392	((cur >= 'a') && (cur <= 'z')) \|\|
393	((cur >= '0') && (cur <= '9')) \|\|
394	(cur == '-') \|\| (cur == '_') \|\| (cur == ':') \|\| (cur == '/'))
395	cur++;
396	if (cur == start)
397	return(NULL);
398	return(xmlStrndup(start, cur - start));
399	}
400
401	/**
402	* htmlCurrentChar:
403	* @ctxt: the HTML parser context
404	* @len: pointer to the length of the char read
405	*
406	* The current char value, if using UTF-8 this may actually span multiple
407	* bytes in the input buffer. Implement the end of line normalization:
408	* 2.11 End-of-Line Handling
409	* If the encoding is unspecified, in the case we find an ISO-Latin-1
410	* char, then the encoding converter is plugged in automatically.
411	*
412	* Returns the current char value and its length
413	*/
414
415	static int
416	htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
417	const unsigned char *cur;
418	unsigned char c;
419	unsigned int val;
420
421	if (ctxt->instate == XML_PARSER_EOF)
422	return(0);
423
424	if (ctxt->token != 0) {
425	*len = 0;
426	return(ctxt->token);
427	}
428	if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
429	xmlChar * guess;
430	xmlCharEncodingHandlerPtr handler;
431
432	/*
433	* Assume it's a fixed length encoding (1) with
434	* a compatible encoding for the ASCII set, since
435	* HTML constructs only use < 128 chars
436	*/
437	if ((int) *ctxt->input->cur < 0x80) {
438	*len = 1;
439	if ((*ctxt->input->cur == 0) &&
440	(ctxt->input->cur < ctxt->input->end)) {
441	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
442	"Char 0x%X out of allowed range\n", 0);
443	return(' ');
444	}
445	return((int) *ctxt->input->cur);
446	}
447
448	/*
449	* Humm this is bad, do an automatic flow conversion
450	*/
451	guess = htmlFindEncoding(ctxt);
452	if (guess == NULL) {
453	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
454	} else {
455	if (ctxt->input->encoding != NULL)
456	xmlFree((xmlChar *) ctxt->input->encoding);
457	ctxt->input->encoding = guess;
458	handler = xmlFindCharEncodingHandler((const char *) guess);
459	if (handler != NULL) {
460	/*
461	* Don't use UTF-8 encoder which isn't required and
462	* can produce invalid UTF-8.
463	*/
464	if (!xmlStrEqual(BAD_CAST handler->name, BAD_CAST "UTF-8"))
465	xmlSwitchToEncoding(ctxt, handler);
466	} else {
467	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
468	"Unsupported encoding %s", guess, NULL);
469	}
470	}
471	ctxt->charset = XML_CHAR_ENCODING_UTF8;
472	}
473
474	/*
475	* We are supposed to handle UTF8, check it's valid
476	* From rfc2044: encoding of the Unicode values on UTF-8:
477	*
478	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
479	* 0000 0000-0000 007F 0xxxxxxx
480	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
481	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
482	*
483	* Check for the 0x110000 limit too
484	*/
485	cur = ctxt->input->cur;
486	c = *cur;
487	if (c & 0x80) {
488	if ((c & 0x40) == 0)
489	goto encoding_error;
490	if (cur[1] == 0) {
491	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
492	cur = ctxt->input->cur;
493	}
494	if ((cur[1] & 0xc0) != 0x80)
495	goto encoding_error;
496	if ((c & 0xe0) == 0xe0) {
497
498	if (cur[2] == 0) {
499	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
500	cur = ctxt->input->cur;
501	}
502	if ((cur[2] & 0xc0) != 0x80)
503	goto encoding_error;
504	if ((c & 0xf0) == 0xf0) {
505	if (cur[3] == 0) {
506	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
507	cur = ctxt->input->cur;
508	}
509	if (((c & 0xf8) != 0xf0) \|\|
510	((cur[3] & 0xc0) != 0x80))
511	goto encoding_error;
512	/* 4-byte code */
513	*len = 4;
514	val = (cur[0] & 0x7) << 18;
515	val \|= (cur[1] & 0x3f) << 12;
516	val \|= (cur[2] & 0x3f) << 6;
517	val \|= cur[3] & 0x3f;
518	if (val < 0x10000)
519	goto encoding_error;
520	} else {
521	/* 3-byte code */
522	*len = 3;
523	val = (cur[0] & 0xf) << 12;
524	val \|= (cur[1] & 0x3f) << 6;
525	val \|= cur[2] & 0x3f;
526	if (val < 0x800)
527	goto encoding_error;
528	}
529	} else {
530	/* 2-byte code */
531	*len = 2;
532	val = (cur[0] & 0x1f) << 6;
533	val \|= cur[1] & 0x3f;
534	if (val < 0x80)
535	goto encoding_error;
536	}
537	if (!IS_CHAR(val)) {
538	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
539	"Char 0x%X out of allowed range\n", val);
540	}
541	return(val);
542	} else {
543	if ((*ctxt->input->cur == 0) &&
544	(ctxt->input->cur < ctxt->input->end)) {
545	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
546	"Char 0x%X out of allowed range\n", 0);
547	*len = 1;
548	return(' ');
549	}
550	/* 1-byte code */
551	*len = 1;
552	return((int) *ctxt->input->cur);
553	}
554
555	encoding_error:
556	/*
557	* If we detect an UTF8 error that probably mean that the
558	* input encoding didn't get properly advertised in the
559	* declaration header. Report the error and switch the encoding
560	* to ISO-Latin-1 (if you don't like this policy, just declare the
561	* encoding !)
562	*/
563	{
564	char buffer[150];
565
566	if (ctxt->input->end - ctxt->input->cur >= 4) {
567	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
568	ctxt->input->cur[0], ctxt->input->cur[1],
569	ctxt->input->cur[2], ctxt->input->cur[3]);
570	} else {
571	snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
572	}
573	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
574	"Input is not proper UTF-8, indicate encoding !\n",
575	BAD_CAST buffer, NULL);
576	}
577
578	/*
579	* Don't switch encodings twice. Note that if there's an encoder, we
580	* shouldn't receive invalid UTF-8 anyway.
581	*
582	* Note that if ctxt->input->buf == NULL, switching encodings is
583	* impossible, see Gitlab issue #34.
584	*/
585	if ((ctxt->input->buf != NULL) &&
586	(ctxt->input->buf->encoder == NULL))
587	xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
588	*len = 1;
589	return((int) *ctxt->input->cur);
590	}
591
592	/**
593	* htmlSkipBlankChars:
594	* @ctxt: the HTML parser context
595	*
596	* skip all blanks character found at that point in the input streams.
597	*
598	* Returns the number of space chars skipped
599	*/
600
601	static int
602	htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
603	int res = 0;
604
605	while (IS_BLANK_CH(*(ctxt->input->cur))) {
606	if ((*ctxt->input->cur == 0) &&
607	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
608	xmlPopInput(ctxt);
609	} else {
610	if (*(ctxt->input->cur) == '\n') {
611	ctxt->input->line++; ctxt->input->col = 1;
612	} else ctxt->input->col++;
613	ctxt->input->cur++;
614	if (*ctxt->input->cur == 0)
615	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
616	}
617	if (res < INT_MAX)
618	res++;
619	}
620	return(res);
621	}
622
623
624
625	/************************************************************************
626	* *
627	* The list of HTML elements and their properties *
628	* *
629	************************************************************************/
630
631	/*
632	* Start Tag: 1 means the start tag can be omitted
633	* End Tag: 1 means the end tag can be omitted
634	* 2 means it's forbidden (empty elements)
635	* 3 means the tag is stylistic and should be closed easily
636	* Depr: this element is deprecated
637	* DTD: 1 means that this element is valid only in the Loose DTD
638	* 2 means that this element is valid only in the Frameset DTD
639	*
640	* Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
641	, subElements , impliedsubelt , Attributes, userdata
642	*/
643
644	/* Definitions and a couple of vars for HTML Elements */
645
646	#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"
647	#define NB_FONTSTYLE 8
648	#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
649	#define NB_PHRASE 10
650	#define SPECIAL "a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
651	#define NB_SPECIAL 16
652	#define INLINE FONTSTYLE, PHRASE, SPECIAL, FORMCTRL
653	#define NB_INLINE NB_PCDATA + NB_FONTSTYLE + NB_PHRASE + NB_SPECIAL + NB_FORMCTRL
654	#define BLOCK HEADING, LIST, "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
655	#define NB_BLOCK NB_HEADING + NB_LIST + 14
656	#define FORMCTRL "input", "select", "textarea", "label", "button"
657	#define NB_FORMCTRL 5
658	#define PCDATA
659	#define NB_PCDATA 0
660	#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"
661	#define NB_HEADING 6
662	#define LIST "ul", "ol", "dir", "menu"
663	#define NB_LIST 4
664	#define MODIFIER
665	#define NB_MODIFIER 0
666	#define FLOW BLOCK,INLINE
667	#define NB_FLOW NB_BLOCK + NB_INLINE
668	#define EMPTY NULL
669
670
671	static const char* const html_flow[] = { FLOW, NULL } ;
672	static const char* const html_inline[] = { INLINE, NULL } ;
673
674	/* placeholders: elts with content but no subelements */
675	static const char* const html_pcdata[] = { NULL } ;
676	#define html_cdata html_pcdata
677
678
679	/* ... and for HTML Attributes */
680
681	#define COREATTRS "id", "class", "style", "title"
682	#define NB_COREATTRS 4
683	#define I18N "lang", "dir"
684	#define NB_I18N 2
685	#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
686	#define NB_EVENTS 9
687	#define ATTRS COREATTRS,I18N,EVENTS
688	#define NB_ATTRS NB_NB_COREATTRS + NB_I18N + NB_EVENTS
689	#define CELLHALIGN "align", "char", "charoff"
690	#define NB_CELLHALIGN 3
691	#define CELLVALIGN "valign"
692	#define NB_CELLVALIGN 1
693
694	static const char* const html_attrs[] = { ATTRS, NULL } ;
695	static const char* const core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;
696	static const char* const core_attrs[] = { COREATTRS, NULL } ;
697	static const char* const i18n_attrs[] = { I18N, NULL } ;
698
699
700	/* Other declarations that should go inline ... */
701	static const char* const a_attrs[] = { ATTRS, "charset", "type", "name",
702	"href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
703	"tabindex", "onfocus", "onblur", NULL } ;
704	static const char* const target_attr[] = { "target", NULL } ;
705	static const char* const rows_cols_attr[] = { "rows", "cols", NULL } ;
706	static const char* const alt_attr[] = { "alt", NULL } ;
707	static const char* const src_alt_attrs[] = { "src", "alt", NULL } ;
708	static const char* const href_attrs[] = { "href", NULL } ;
709	static const char* const clear_attrs[] = { "clear", NULL } ;
710	static const char* const inline_p[] = { INLINE, "p", NULL } ;
711
712	static const char* const flow_param[] = { FLOW, "param", NULL } ;
713	static const char* const applet_attrs[] = { COREATTRS , "codebase",
714	"archive", "alt", "name", "height", "width", "align",
715	"hspace", "vspace", NULL } ;
716	static const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
717	"tabindex", "accesskey", "onfocus", "onblur", NULL } ;
718	static const char* const basefont_attrs[] =
719	{ "id", "size", "color", "face", NULL } ;
720	static const char* const quote_attrs[] = { ATTRS, "cite", NULL } ;
721	static const char* const body_contents[] = { FLOW, "ins", "del", NULL } ;
722	static const char* const body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;
723	static const char* const body_depr[] = { "background", "bgcolor", "text",
724	"link", "vlink", "alink", NULL } ;
725	static const char* const button_attrs[] = { ATTRS, "name", "value", "type",
726	"disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;
727
728
729	static const char* const col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;
730	static const char* const col_elt[] = { "col", NULL } ;
731	static const char* const edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;
732	static const char* const compact_attrs[] = { ATTRS, "compact", NULL } ;
733	static const char* const dl_contents[] = { "dt", "dd", NULL } ;
734	static const char* const compact_attr[] = { "compact", NULL } ;
735	static const char* const label_attr[] = { "label", NULL } ;
736	static const char* const fieldset_contents[] = { FLOW, "legend" } ;
737	static const char* const font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;
738	static const char* const form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;
739	static const char* const form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;
740	static const char* const frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;
741	static const char* const frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;
742	static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;
743	static const char* const head_attrs[] = { I18N, "profile", NULL } ;
744	static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;
745	static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL } ;
746	static const char* const version_attr[] = { "version", NULL } ;
747	static const char* const html_content[] = { "head", "body", "frameset", NULL } ;
748	static const char* const iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;
749	static const char* const img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;
750	static const char* const embed_attrs[] = { COREATTRS, "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL } ;
751	static const char* const input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;
752	static const char* const prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;
753	static const char* const label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;
754	static const char* const legend_attrs[] = { ATTRS, "accesskey", NULL } ;
755	static const char* const align_attr[] = { "align", NULL } ;
756	static const char* const link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;
757	static const char* const map_contents[] = { BLOCK, "area", NULL } ;
758	static const char* const name_attr[] = { "name", NULL } ;
759	static const char* const action_attr[] = { "action", NULL } ;
760	static const char* const blockli_elt[] = { BLOCK, "li", NULL } ;
761	static const char* const meta_attrs[] = { I18N, "http-equiv", "name", "scheme", "charset", NULL } ;
762	static const char* const content_attr[] = { "content", NULL } ;
763	static const char* const type_attr[] = { "type", NULL } ;
764	static const char* const noframes_content[] = { "body", FLOW MODIFIER, NULL } ;
765	static const char* const object_contents[] = { FLOW, "param", NULL } ;
766	static const char* const object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;
767	static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;
768	static const char* const ol_attrs[] = { "type", "compact", "start", NULL} ;
769	static const char* const option_elt[] = { "option", NULL } ;
770	static const char* const optgroup_attrs[] = { ATTRS, "disabled", NULL } ;
771	static const char* const option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;
772	static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;
773	static const char* const width_attr[] = { "width", NULL } ;
774	static const char* const pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;
775	static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;
776	static const char* const language_attr[] = { "language", NULL } ;
777	static const char* const select_content[] = { "optgroup", "option", NULL } ;
778	static const char* const select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;
779	static const char* const style_attrs[] = { I18N, "media", "title", NULL } ;
780	static const char* const table_attrs[] = { ATTRS, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;
781	static const char* const table_depr[] = { "align", "bgcolor", NULL } ;
782	static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;
783	static const char* const tr_elt[] = { "tr", NULL } ;
784	static const char* const talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;
785	static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;
786	static const char* const th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;
787	static const char* const textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;
788	static const char* const tr_contents[] = { "th", "td", NULL } ;
789	static const char* const bgcolor_attr[] = { "bgcolor", NULL } ;
790	static const char* const li_elt[] = { "li", NULL } ;
791	static const char* const ul_depr[] = { "type", "compact", NULL} ;
792	static const char* const dir_attr[] = { "dir", NULL} ;
793
794	#define DECL (const char**)
795
796	static const htmlElemDesc
797	html40ElementTable[] = {
798	{ "a", 0, 0, 0, 0, 0, 0, 1, "anchor ",
799	DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL
800	},
801	{ "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form",
802	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
803	},
804	{ "acronym", 0, 0, 0, 0, 0, 0, 1, "",
805	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
806	},
807	{ "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
808	DECL inline_p , NULL , DECL html_attrs, NULL, NULL
809	},
810	{ "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ",
811	DECL flow_param , NULL , NULL , DECL applet_attrs, NULL
812	},
813	{ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
814	EMPTY , NULL , DECL area_attrs , DECL target_attr, DECL alt_attr
815	},
816	{ "b", 0, 3, 0, 0, 0, 0, 1, "bold text style",
817	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
818	},
819	{ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
820	EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs
821	},
822	{ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
823	EMPTY , NULL , NULL, DECL basefont_attrs, NULL
824	},
825	{ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
826	DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr
827	},
828	{ "big", 0, 3, 0, 0, 0, 0, 1, "large text style",
829	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
830	},
831	{ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
832	DECL html_flow , NULL , DECL quote_attrs , NULL, NULL
833	},
834	{ "body", 1, 1, 0, 0, 0, 0, 0, "document body ",
835	DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL
836	},
837	{ "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ",
838	EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL
839	},
840	{ "button", 0, 0, 0, 0, 0, 0, 2, "push button ",
841	DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL
842	},
843	{ "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ",
844	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
845	},
846	{ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
847	DECL html_flow , NULL , NULL, DECL html_attrs, NULL
848	},
849	{ "cite", 0, 0, 0, 0, 0, 0, 1, "citation",
850	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
851	},
852	{ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
853	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
854	},
855	{ "col", 0, 2, 2, 1, 0, 0, 0, "table column ",
856	EMPTY , NULL , DECL col_attrs , NULL, NULL
857	},
858	{ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
859	DECL col_elt , "col" , DECL col_attrs , NULL, NULL
860	},
861	{ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
862	DECL html_flow , NULL , DECL html_attrs, NULL, NULL
863	},
864	{ "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ",
865	DECL html_flow , NULL , DECL edit_attrs , NULL, NULL
866	},
867	{ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
868	DECL html_inline , NULL , DECL html_attrs, NULL, NULL
869	},
870	{ "dir", 0, 0, 0, 0, 1, 1, 0, "directory list",
871	DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL
872	},
873	{ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
874	DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL
875	},
876	{ "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ",
877	DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL
878	},
879	{ "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ",
880	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
881	},
882	{ "em", 0, 3, 0, 0, 0, 0, 1, "emphasis",
883	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
884	},
885	{ "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
886	EMPTY, NULL, DECL embed_attrs, NULL, NULL
887	},
888	{ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
889	DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL
890	},
891	{ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
892	DECL html_inline, NULL, NULL, DECL font_attrs, NULL
893	},
894	{ "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ",
895	DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr
896	},
897	{ "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " ,
898	EMPTY, NULL, NULL, DECL frame_attrs, NULL
899	},
900	{ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
901	DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL
902	},
903	{ "h1", 0, 0, 0, 0, 0, 0, 0, "heading ",
904	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
905	},
906	{ "h2", 0, 0, 0, 0, 0, 0, 0, "heading ",
907	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
908	},
909	{ "h3", 0, 0, 0, 0, 0, 0, 0, "heading ",
910	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
911	},
912	{ "h4", 0, 0, 0, 0, 0, 0, 0, "heading ",
913	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
914	},
915	{ "h5", 0, 0, 0, 0, 0, 0, 0, "heading ",
916	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
917	},
918	{ "h6", 0, 0, 0, 0, 0, 0, 0, "heading ",
919	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
920	},
921	{ "head", 1, 1, 0, 0, 0, 0, 0, "document head ",
922	DECL head_contents, NULL, DECL head_attrs, NULL, NULL
923	},
924	{ "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
925	EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL
926	},
927	{ "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
928	DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL
929	},
930	{ "i", 0, 3, 0, 0, 0, 0, 1, "italic text style",
931	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
932	},
933	{ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
934	DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL
935	},
936	{ "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ",
937	EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs
938	},
939	{ "input", 0, 2, 2, 1, 0, 0, 1, "form control ",
940	EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL
941	},
942	{ "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text",
943	DECL html_flow, NULL, DECL edit_attrs, NULL, NULL
944	},
945	{ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
946	EMPTY, NULL, NULL, DECL prompt_attrs, NULL
947	},
948	{ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
949	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
950	},
951	{ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
952	DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL
953	},
954	{ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
955	DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL
956	},
957	{ "li", 0, 1, 1, 0, 0, 0, 0, "list item ",
958	DECL html_flow, NULL, DECL html_attrs, NULL, NULL
959	},
960	{ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
961	EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL
962	},
963	{ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
964	DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr
965	},
966	{ "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ",
967	DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL
968	},
969	{ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
970	EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr
971	},
972	{ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
973	DECL noframes_content, "body" , DECL html_attrs, NULL, NULL
974	},
975	{ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
976	DECL html_flow, "div", DECL html_attrs, NULL, NULL
977	},
978	{ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
979	DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL
980	},
981	{ "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ",
982	DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL
983	},
984	{ "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ",
985	DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr
986	},
987	{ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
988	DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL
989	},
990	{ "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ",
991	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL
992	},
993	{ "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
994	EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr
995	},
996	{ "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ",
997	DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL
998	},
999	{ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
1000	DECL html_inline, NULL, DECL quote_attrs, NULL, NULL
1001	},
1002	{ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
1003	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
1004	},
1005	{ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
1006	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1007	},
1008	{ "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
1009	DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr
1010	},
1011	{ "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
1012	DECL select_content, NULL, DECL select_attrs, NULL, NULL
1013	},
1014	{ "small", 0, 3, 0, 0, 0, 0, 1, "small text style",
1015	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1016	},
1017	{ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
1018	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1019	},
1020	{ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
1021	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
1022	},
1023	{ "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis",
1024	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1025	},
1026	{ "style", 0, 0, 0, 0, 0, 0, 0, "style info ",
1027	DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr
1028	},
1029	{ "sub", 0, 3, 0, 0, 0, 0, 1, "subscript",
1030	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1031	},
1032	{ "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ",
1033	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1034	},
1035	{ "table", 0, 0, 0, 0, 0, 0, 0, "",
1036	DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL
1037	},
1038	{ "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ",
1039	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1040	},
1041	{ "td", 0, 0, 0, 0, 0, 0, 0, "table data cell",
1042	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
1043	},
1044	{ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
1045	DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr
1046	},
1047	{ "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ",
1048	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1049	},
1050	{ "th", 0, 1, 0, 0, 0, 0, 0, "table header cell",
1051	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL
1052	},
1053	{ "thead", 0, 1, 0, 0, 0, 0, 0, "table header ",
1054	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL
1055	},
1056	{ "title", 0, 0, 0, 0, 0, 0, 0, "document title ",
1057	DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL
1058	},
1059	{ "tr", 0, 0, 0, 0, 0, 0, 0, "table row ",
1060	DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL
1061	},
1062	{ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
1063	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1064	},
1065	{ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
1066	DECL html_inline, NULL, NULL, DECL html_attrs, NULL
1067	},
1068	{ "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ",
1069	DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL
1070	},
1071	{ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
1072	DECL html_inline, NULL, DECL html_attrs, NULL, NULL
1073	}
1074	};
1075
1076	typedef struct {
1077	const char *oldTag;
1078	const char *newTag;
1079	} htmlStartCloseEntry;
1080
1081	/*
1082	* start tags that imply the end of current element
1083	*/
1084	static const htmlStartCloseEntry htmlStartClose[] = {
1085	{ "a", "a" },
1086	{ "a", "fieldset" },
1087	{ "a", "table" },
1088	{ "a", "td" },
1089	{ "a", "th" },
1090	{ "address", "dd" },
1091	{ "address", "dl" },
1092	{ "address", "dt" },
1093	{ "address", "form" },
1094	{ "address", "li" },
1095	{ "address", "ul" },
1096	{ "b", "center" },
1097	{ "b", "p" },
1098	{ "b", "td" },
1099	{ "b", "th" },
1100	{ "big", "p" },
1101	{ "caption", "col" },
1102	{ "caption", "colgroup" },
1103	{ "caption", "tbody" },
1104	{ "caption", "tfoot" },
1105	{ "caption", "thead" },
1106	{ "caption", "tr" },
1107	{ "col", "col" },
1108	{ "col", "colgroup" },
1109	{ "col", "tbody" },
1110	{ "col", "tfoot" },
1111	{ "col", "thead" },
1112	{ "col", "tr" },
1113	{ "colgroup", "colgroup" },
1114	{ "colgroup", "tbody" },
1115	{ "colgroup", "tfoot" },
1116	{ "colgroup", "thead" },
1117	{ "colgroup", "tr" },
1118	{ "dd", "dt" },
1119	{ "dir", "dd" },
1120	{ "dir", "dl" },
1121	{ "dir", "dt" },
1122	{ "dir", "form" },
1123	{ "dir", "ul" },
1124	{ "dl", "form" },
1125	{ "dl", "li" },
1126	{ "dt", "dd" },
1127	{ "dt", "dl" },
1128	{ "font", "center" },
1129	{ "font", "td" },
1130	{ "font", "th" },
1131	{ "form", "form" },
1132	{ "h1", "fieldset" },
1133	{ "h1", "form" },
1134	{ "h1", "li" },
1135	{ "h1", "p" },
1136	{ "h1", "table" },
1137	{ "h2", "fieldset" },
1138	{ "h2", "form" },
1139	{ "h2", "li" },
1140	{ "h2", "p" },
1141	{ "h2", "table" },
1142	{ "h3", "fieldset" },
1143	{ "h3", "form" },
1144	{ "h3", "li" },
1145	{ "h3", "p" },
1146	{ "h3", "table" },
1147	{ "h4", "fieldset" },
1148	{ "h4", "form" },
1149	{ "h4", "li" },
1150	{ "h4", "p" },
1151	{ "h4", "table" },
1152	{ "h5", "fieldset" },
1153	{ "h5", "form" },
1154	{ "h5", "li" },
1155	{ "h5", "p" },
1156	{ "h5", "table" },
1157	{ "h6", "fieldset" },
1158	{ "h6", "form" },
1159	{ "h6", "li" },
1160	{ "h6", "p" },
1161	{ "h6", "table" },
1162	{ "head", "a" },
1163	{ "head", "abbr" },
1164	{ "head", "acronym" },
1165	{ "head", "address" },
1166	{ "head", "b" },
1167	{ "head", "bdo" },
1168	{ "head", "big" },
1169	{ "head", "blockquote" },
1170	{ "head", "body" },
1171	{ "head", "br" },
1172	{ "head", "center" },
1173	{ "head", "cite" },
1174	{ "head", "code" },
1175	{ "head", "dd" },
1176	{ "head", "dfn" },
1177	{ "head", "dir" },
1178	{ "head", "div" },
1179	{ "head", "dl" },
1180	{ "head", "dt" },
1181	{ "head", "em" },
1182	{ "head", "fieldset" },
1183	{ "head", "font" },
1184	{ "head", "form" },
1185	{ "head", "frameset" },
1186	{ "head", "h1" },
1187	{ "head", "h2" },
1188	{ "head", "h3" },
1189	{ "head", "h4" },
1190	{ "head", "h5" },
1191	{ "head", "h6" },
1192	{ "head", "hr" },
1193	{ "head", "i" },
1194	{ "head", "iframe" },
1195	{ "head", "img" },
1196	{ "head", "kbd" },
1197	{ "head", "li" },
1198	{ "head", "listing" },
1199	{ "head", "map" },
1200	{ "head", "menu" },
1201	{ "head", "ol" },
1202	{ "head", "p" },
1203	{ "head", "pre" },
1204	{ "head", "q" },
1205	{ "head", "s" },
1206	{ "head", "samp" },
1207	{ "head", "small" },
1208	{ "head", "span" },
1209	{ "head", "strike" },
1210	{ "head", "strong" },
1211	{ "head", "sub" },
1212	{ "head", "sup" },
1213	{ "head", "table" },
1214	{ "head", "tt" },
1215	{ "head", "u" },
1216	{ "head", "ul" },
1217	{ "head", "var" },
1218	{ "head", "xmp" },
1219	{ "hr", "form" },
1220	{ "i", "center" },
1221	{ "i", "p" },
1222	{ "i", "td" },
1223	{ "i", "th" },
1224	{ "legend", "fieldset" },
1225	{ "li", "li" },
1226	{ "link", "body" },
1227	{ "link", "frameset" },
1228	{ "listing", "dd" },
1229	{ "listing", "dl" },
1230	{ "listing", "dt" },
1231	{ "listing", "fieldset" },
1232	{ "listing", "form" },
1233	{ "listing", "li" },
1234	{ "listing", "table" },
1235	{ "listing", "ul" },
1236	{ "menu", "dd" },
1237	{ "menu", "dl" },
1238	{ "menu", "dt" },
1239	{ "menu", "form" },
1240	{ "menu", "ul" },
1241	{ "ol", "form" },
1242	{ "ol", "ul" },
1243	{ "option", "optgroup" },
1244	{ "option", "option" },
1245	{ "p", "address" },
1246	{ "p", "blockquote" },
1247	{ "p", "body" },
1248	{ "p", "caption" },
1249	{ "p", "center" },
1250	{ "p", "col" },
1251	{ "p", "colgroup" },
1252	{ "p", "dd" },
1253	{ "p", "dir" },
1254	{ "p", "div" },
1255	{ "p", "dl" },
1256	{ "p", "dt" },
1257	{ "p", "fieldset" },
1258	{ "p", "form" },
1259	{ "p", "frameset" },
1260	{ "p", "h1" },
1261	{ "p", "h2" },
1262	{ "p", "h3" },
1263	{ "p", "h4" },
1264	{ "p", "h5" },
1265	{ "p", "h6" },
1266	{ "p", "head" },
1267	{ "p", "hr" },
1268	{ "p", "li" },
1269	{ "p", "listing" },
1270	{ "p", "menu" },
1271	{ "p", "ol" },
1272	{ "p", "p" },
1273	{ "p", "pre" },
1274	{ "p", "table" },
1275	{ "p", "tbody" },
1276	{ "p", "td" },
1277	{ "p", "tfoot" },
1278	{ "p", "th" },
1279	{ "p", "title" },
1280	{ "p", "tr" },
1281	{ "p", "ul" },
1282	{ "p", "xmp" },
1283	{ "pre", "dd" },
1284	{ "pre", "dl" },
1285	{ "pre", "dt" },
1286	{ "pre", "fieldset" },
1287	{ "pre", "form" },
1288	{ "pre", "li" },
1289	{ "pre", "table" },
1290	{ "pre", "ul" },
1291	{ "s", "p" },
1292	{ "script", "noscript" },
1293	{ "small", "p" },
1294	{ "span", "td" },
1295	{ "span", "th" },
1296	{ "strike", "p" },
1297	{ "style", "body" },
1298	{ "style", "frameset" },
1299	{ "tbody", "tbody" },
1300	{ "tbody", "tfoot" },
1301	{ "td", "tbody" },
1302	{ "td", "td" },
1303	{ "td", "tfoot" },
1304	{ "td", "th" },
1305	{ "td", "tr" },
1306	{ "tfoot", "tbody" },
1307	{ "th", "tbody" },
1308	{ "th", "td" },
1309	{ "th", "tfoot" },
1310	{ "th", "th" },
1311	{ "th", "tr" },
1312	{ "thead", "tbody" },
1313	{ "thead", "tfoot" },
1314	{ "title", "body" },
1315	{ "title", "frameset" },
1316	{ "tr", "tbody" },
1317	{ "tr", "tfoot" },
1318	{ "tr", "tr" },
1319	{ "tt", "p" },
1320	{ "u", "p" },
1321	{ "u", "td" },
1322	{ "u", "th" },
1323	{ "ul", "address" },
1324	{ "ul", "form" },
1325	{ "ul", "menu" },
1326	{ "ul", "ol" },
1327	{ "ul", "pre" },
1328	{ "xmp", "dd" },
1329	{ "xmp", "dl" },
1330	{ "xmp", "dt" },
1331	{ "xmp", "fieldset" },
1332	{ "xmp", "form" },
1333	{ "xmp", "li" },
1334	{ "xmp", "table" },
1335	{ "xmp", "ul" }
1336	};
1337
1338	/*
1339	* The list of HTML elements which are supposed not to have
1340	* CDATA content and where a p element will be implied
1341	*
1342	* TODO: extend that list by reading the HTML SGML DTD on
1343	* implied paragraph
1344	*/
1345	static const char *const htmlNoContentElements[] = {
1346	"html",
1347	"head",
1348	NULL
1349	};
1350
1351	/*
1352	* The list of HTML attributes which are of content %Script;
1353	* NOTE: when adding ones, check htmlIsScriptAttribute() since
1354	* it assumes the name starts with 'on'
1355	*/
1356	static const char *const htmlScriptAttributes[] = {
1357	"onclick",
1358	"ondblclick",
1359	"onmousedown",
1360	"onmouseup",
1361	"onmouseover",
1362	"onmousemove",
1363	"onmouseout",
1364	"onkeypress",
1365	"onkeydown",
1366	"onkeyup",
1367	"onload",
1368	"onunload",
1369	"onfocus",
1370	"onblur",
1371	"onsubmit",
1372	"onreset",
1373	"onchange",
1374	"onselect"
1375	};
1376
1377	/*
1378	* This table is used by the htmlparser to know what to do with
1379	* broken html pages. By assigning different priorities to different
1380	* elements the parser can decide how to handle extra endtags.
1381	* Endtags are only allowed to close elements with lower or equal
1382	* priority.
1383	*/
1384
1385	typedef struct {
1386	const char *name;
1387	int priority;
1388	} elementPriority;
1389
1390	static const elementPriority htmlEndPriority[] = {
1391	{"div", 150},
1392	{"td", 160},
1393	{"th", 160},
1394	{"tr", 170},
1395	{"thead", 180},
1396	{"tbody", 180},
1397	{"tfoot", 180},
1398	{"table", 190},
1399	{"head", 200},
1400	{"body", 200},
1401	{"html", 220},
1402	{NULL, 100} /* Default priority */
1403	};
1404
1405	/************************************************************************
1406	* *
1407	* functions to handle HTML specific data *
1408	* *
1409	************************************************************************/
1410
1411	/**
1412	* htmlInitAutoClose:
1413	*
1414	* This is a no-op now.
1415	*/
1416	void
1417	htmlInitAutoClose(void) {
1418	}
1419
1420	static int
1421	htmlCompareTags(const void key, const void member) {
1422	const xmlChar tag = (const xmlChar ) key;
1423	const htmlElemDesc desc = (const htmlElemDesc ) member;
1424
1425	return(xmlStrcasecmp(tag, BAD_CAST desc->name));
1426	}
1427
1428	/**
1429	* htmlTagLookup:
1430	* @tag: The tag name in lowercase
1431	*
1432	* Lookup the HTML tag in the ElementTable
1433	*
1434	* Returns the related htmlElemDescPtr or NULL if not found.
1435	*/
1436	const htmlElemDesc *
1437	htmlTagLookup(const xmlChar *tag) {
1438	if (tag == NULL)
1439	return(NULL);
1440
1441	return((const htmlElemDesc *) bsearch(tag, html40ElementTable,
1442	sizeof(html40ElementTable) / sizeof(htmlElemDesc),
1443	sizeof(htmlElemDesc), htmlCompareTags));
1444	}
1445
1446	/**
1447	* htmlGetEndPriority:
1448	* @name: The name of the element to look up the priority for.
1449	*
1450	* Return value: The "endtag" priority.
1451	**/
1452	static int
1453	htmlGetEndPriority (const xmlChar *name) {
1454	int i = 0;
1455
1456	while ((htmlEndPriority[i].name != NULL) &&
1457	(!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
1458	i++;
1459
1460	return(htmlEndPriority[i].priority);
1461	}
1462
1463
1464	static int
1465	htmlCompareStartClose(const void vkey, const void member) {
1466	const htmlStartCloseEntry key = (const htmlStartCloseEntry ) vkey;
1467	const htmlStartCloseEntry entry = (const htmlStartCloseEntry ) member;
1468	int ret;
1469
1470	ret = strcmp(key->oldTag, entry->oldTag);
1471	if (ret == 0)
1472	ret = strcmp(key->newTag, entry->newTag);
1473
1474	return(ret);
1475	}
1476
1477	/**
1478	* htmlCheckAutoClose:
1479	* @newtag: The new tag name
1480	* @oldtag: The old tag name
1481	*
1482	* Checks whether the new tag is one of the registered valid tags for
1483	* closing old.
1484	*
1485	* Returns 0 if no, 1 if yes.
1486	*/
1487	static int
1488	htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
1489	{
1490	htmlStartCloseEntry key;
1491	void *res;
1492
1493	key.oldTag = (const char *) oldtag;
1494	key.newTag = (const char *) newtag;
1495	res = bsearch(&key, htmlStartClose,
1496	sizeof(htmlStartClose) / sizeof(htmlStartCloseEntry),
1497	sizeof(htmlStartCloseEntry), htmlCompareStartClose);
1498	return(res != NULL);
1499	}
1500
1501	/**
1502	* htmlAutoCloseOnClose:
1503	* @ctxt: an HTML parser context
1504	* @newtag: The new tag name
1505	* @force: force the tag closure
1506	*
1507	* The HTML DTD allows an ending tag to implicitly close other tags.
1508	*/
1509	static void
1510	htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1511	{
1512	const htmlElemDesc *info;
1513	int i, priority;
1514
1515	priority = htmlGetEndPriority(newtag);
1516
1517	for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1518
1519	if (xmlStrEqual(newtag, ctxt->nameTab[i]))
1520	break;
1521	/*
1522	* A misplaced endtag can only close elements with lower
1523	* or equal priority, so if we find an element with higher
1524	* priority before we find an element with
1525	* matching name, we just ignore this endtag
1526	*/
1527	if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1528	return;
1529	}
1530	if (i < 0)
1531	return;
1532
1533	while (!xmlStrEqual(newtag, ctxt->name)) {
1534	info = htmlTagLookup(ctxt->name);
1535	if ((info != NULL) && (info->endTag == 3)) {
1536	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1537	"Opening and ending tag mismatch: %s and %s\n",
1538	newtag, ctxt->name);
1539	}
1540	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1541	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1542	htmlnamePop(ctxt);
1543	}
1544	}
1545
1546	/**
1547	* htmlAutoCloseOnEnd:
1548	* @ctxt: an HTML parser context
1549	*
1550	* Close all remaining tags at the end of the stream
1551	*/
1552	static void
1553	htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1554	{
1555	int i;
1556
1557	if (ctxt->nameNr == 0)
1558	return;
1559	for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1560	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1561	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1562	htmlnamePop(ctxt);
1563	}
1564	}
1565
1566	/**
1567	* htmlAutoClose:
1568	* @ctxt: an HTML parser context
1569	* @newtag: The new tag name or NULL
1570	*
1571	* The HTML DTD allows a tag to implicitly close other tags.
1572	* The list is kept in htmlStartClose array. This function is
1573	* called when a new tag has been detected and generates the
1574	* appropriates closes if possible/needed.
1575	* If newtag is NULL this mean we are at the end of the resource
1576	* and we should check
1577	*/
1578	static void
1579	htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1580	{
1581	while ((newtag != NULL) && (ctxt->name != NULL) &&
1582	(htmlCheckAutoClose(newtag, ctxt->name))) {
1583	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1584	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1585	htmlnamePop(ctxt);
1586	}
1587	if (newtag == NULL) {
1588	htmlAutoCloseOnEnd(ctxt);
1589	return;
1590	}
1591	while ((newtag == NULL) && (ctxt->name != NULL) &&
1592	((xmlStrEqual(ctxt->name, BAD_CAST "head")) \|\|
1593	(xmlStrEqual(ctxt->name, BAD_CAST "body")) \|\|
1594	(xmlStrEqual(ctxt->name, BAD_CAST "html")))) {
1595	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1596	ctxt->sax->endElement(ctxt->userData, ctxt->name);
1597	htmlnamePop(ctxt);
1598	}
1599	}
1600
1601	/**
1602	* htmlAutoCloseTag:
1603	* @doc: the HTML document
1604	* @name: The tag name
1605	* @elem: the HTML element
1606	*
1607	* The HTML DTD allows a tag to implicitly close other tags.
1608	* The list is kept in htmlStartClose array. This function checks
1609	* if the element or one of it's children would autoclose the
1610	* given tag.
1611	*
1612	* Returns 1 if autoclose, 0 otherwise
1613	*/
1614	int
1615	htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
1616	htmlNodePtr child;
1617
1618	if (elem == NULL) return(1);
1619	if (xmlStrEqual(name, elem->name)) return(0);
1620	if (htmlCheckAutoClose(elem->name, name)) return(1);
1621	child = elem->children;
1622	while (child != NULL) {
1623	if (htmlAutoCloseTag(doc, name, child)) return(1);
1624	child = child->next;
1625	}
1626	return(0);
1627	}
1628
1629	/**
1630	* htmlIsAutoClosed:
1631	* @doc: the HTML document
1632	* @elem: the HTML element
1633	*
1634	* The HTML DTD allows a tag to implicitly close other tags.
1635	* The list is kept in htmlStartClose array. This function checks
1636	* if a tag is autoclosed by one of it's child
1637	*
1638	* Returns 1 if autoclosed, 0 otherwise
1639	*/
1640	int
1641	htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
1642	htmlNodePtr child;
1643
1644	if (elem == NULL) return(1);
1645	child = elem->children;
1646	while (child != NULL) {
1647	if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
1648	child = child->next;
1649	}
1650	return(0);
1651	}
1652
1653	/**
1654	* htmlCheckImplied:
1655	* @ctxt: an HTML parser context
1656	* @newtag: The new tag name
1657	*
1658	* The HTML DTD allows a tag to exists only implicitly
1659	* called when a new tag has been detected and generates the
1660	* appropriates implicit tags if missing
1661	*/
1662	static void
1663	htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1664	int i;
1665
1666	if (ctxt->options & HTML_PARSE_NOIMPLIED)
1667	return;
1668	if (!htmlOmittedDefaultValue)
1669	return;
1670	if (xmlStrEqual(newtag, BAD_CAST"html"))
1671	return;
1672	if (ctxt->nameNr <= 0) {
1673	htmlnamePush(ctxt, BAD_CAST"html");
1674	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1675	ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
1676	}
1677	if ((xmlStrEqual(newtag, BAD_CAST"body")) \|\| (xmlStrEqual(newtag, BAD_CAST"head")))
1678	return;
1679	if ((ctxt->nameNr <= 1) &&
1680	((xmlStrEqual(newtag, BAD_CAST"script")) \|\|
1681	(xmlStrEqual(newtag, BAD_CAST"style")) \|\|
1682	(xmlStrEqual(newtag, BAD_CAST"meta")) \|\|
1683	(xmlStrEqual(newtag, BAD_CAST"link")) \|\|
1684	(xmlStrEqual(newtag, BAD_CAST"title")) \|\|
1685	(xmlStrEqual(newtag, BAD_CAST"base")))) {
1686	if (ctxt->html >= 3) {
1687	/* we already saw or generated an <head> before */
1688	return;
1689	}
1690	/*
1691	* dropped OBJECT ... i you put it first BODY will be
1692	* assumed !
1693	*/
1694	htmlnamePush(ctxt, BAD_CAST"head");
1695	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1696	ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL);
1697	} else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) &&
1698	(!xmlStrEqual(newtag, BAD_CAST"frame")) &&
1699	(!xmlStrEqual(newtag, BAD_CAST"frameset"))) {
1700	if (ctxt->html >= 10) {
1701	/* we already saw or generated a <body> before */
1702	return;
1703	}
1704	for (i = 0;i < ctxt->nameNr;i++) {
1705	if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) {
1706	return;
1707	}
1708	if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) {
1709	return;
1710	}
1711	}
1712
1713	htmlnamePush(ctxt, BAD_CAST"body");
1714	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1715	ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL);
1716	}
1717	}
1718
1719	/**
1720	* htmlCheckParagraph
1721	* @ctxt: an HTML parser context
1722	*
1723	* Check whether a p element need to be implied before inserting
1724	* characters in the current element.
1725	*
1726	* Returns 1 if a paragraph has been inserted, 0 if not and -1
1727	* in case of error.
1728	*/
1729
1730	static int
1731	htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1732	const xmlChar *tag;
1733	int i;
1734
1735	if (ctxt == NULL)
1736	return(-1);
1737	tag = ctxt->name;
1738	if (tag == NULL) {
1739	htmlAutoClose(ctxt, BAD_CAST"p");
1740	htmlCheckImplied(ctxt, BAD_CAST"p");
1741	htmlnamePush(ctxt, BAD_CAST"p");
1742	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1743	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1744	return(1);
1745	}
1746	if (!htmlOmittedDefaultValue)
1747	return(0);
1748	for (i = 0; htmlNoContentElements[i] != NULL; i++) {
1749	if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
1750	htmlAutoClose(ctxt, BAD_CAST"p");
1751	htmlCheckImplied(ctxt, BAD_CAST"p");
1752	htmlnamePush(ctxt, BAD_CAST"p");
1753	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1754	ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
1755	return(1);
1756	}
1757	}
1758	return(0);
1759	}
1760
1761	/**
1762	* htmlIsScriptAttribute:
1763	* @name: an attribute name
1764	*
1765	* Check if an attribute is of content type Script
1766	*
1767	* Returns 1 is the attribute is a script 0 otherwise
1768	*/
1769	int
1770	htmlIsScriptAttribute(const xmlChar *name) {
1771	unsigned int i;
1772
1773	if (name == NULL)
1774	return(0);
1775	/*
1776	* all script attributes start with 'on'
1777	*/
1778	if ((name[0] != 'o') \|\| (name[1] != 'n'))
1779	return(0);
1780	for (i = 0;
1781	i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
1782	i++) {
1783	if (xmlStrEqual(name, (const xmlChar *) htmlScriptAttributes[i]))
1784	return(1);
1785	}
1786	return(0);
1787	}
1788
1789	/************************************************************************
1790	* *
1791	* The list of HTML predefined entities *
1792	* *
1793	************************************************************************/
1794
1795
1796	static const htmlEntityDesc html40EntitiesTable[] = {
1797	/*
1798	* the 4 absolute ones, plus apostrophe.
1799	*/
1800	{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
1801	{ 38, "amp", "ampersand, U+0026 ISOnum" },
1802	{ 39, "apos", "single quote" },
1803	{ 60, "lt", "less-than sign, U+003C ISOnum" },
1804	{ 62, "gt", "greater-than sign, U+003E ISOnum" },
1805
1806	/*
1807	* A bunch still in the 128-255 range
1808	* Replacing them depend really on the charset used.
1809	*/
1810	{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
1811	{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
1812	{ 162, "cent", "cent sign, U+00A2 ISOnum" },
1813	{ 163, "pound","pound sign, U+00A3 ISOnum" },
1814	{ 164, "curren","currency sign, U+00A4 ISOnum" },
1815	{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
1816	{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
1817	{ 167, "sect", "section sign, U+00A7 ISOnum" },
1818	{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1819	{ 169, "copy", "copyright sign, U+00A9 ISOnum" },
1820	{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
1821	{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1822	{ 172, "not", "not sign, U+00AC ISOnum" },
1823	{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1824	{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
1825	{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1826	{ 176, "deg", "degree sign, U+00B0 ISOnum" },
1827	{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1828	{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1829	{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1830	{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
1831	{ 181, "micro","micro sign, U+00B5 ISOnum" },
1832	{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1833	{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1834	{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
1835	{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
1836	{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
1837	{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1838	{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1839	{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1840	{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1841	{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
1842	{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1843	{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
1844	{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
1845	{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
1846	{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1847	{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1848	{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1849	{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
1850	{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
1851	{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
1852	{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
1853	{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
1854	{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
1855	{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
1856	{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
1857	{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
1858	{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
1859	{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
1860	{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
1861	{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
1862	{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
1863	{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
1864	{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1865	{ 215, "times","multiplication sign, U+00D7 ISOnum" },
1866	{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1867	{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
1868	{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
1869	{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
1870	{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
1871	{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
1872	{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
1873	{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1874	{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1875	{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
1876	{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
1877	{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
1878	{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
1879	{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1880	{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1881	{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
1882	{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
1883	{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
1884	{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
1885	{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
1886	{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
1887	{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
1888	{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
1889	{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
1890	{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
1891	{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
1892	{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
1893	{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
1894	{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
1895	{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
1896	{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
1897	{ 247, "divide","division sign, U+00F7 ISOnum" },
1898	{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1899	{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
1900	{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
1901	{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
1902	{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
1903	{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
1904	{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
1905	{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
1906
1907	{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
1908	{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
1909	{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
1910	{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
1911	{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1912
1913	/*
1914	* Anything below should really be kept as entities references
1915	*/
1916	{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
1917
1918	{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
1919	{ 732, "tilde","small tilde, U+02DC ISOdia" },
1920
1921	{ 913, "Alpha","greek capital letter alpha, U+0391" },
1922	{ 914, "Beta", "greek capital letter beta, U+0392" },
1923	{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
1924	{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
1925	{ 917, "Epsilon","greek capital letter epsilon, U+0395" },
1926	{ 918, "Zeta", "greek capital letter zeta, U+0396" },
1927	{ 919, "Eta", "greek capital letter eta, U+0397" },
1928	{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
1929	{ 921, "Iota", "greek capital letter iota, U+0399" },
1930	{ 922, "Kappa","greek capital letter kappa, U+039A" },
1931	{ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
1932	{ 924, "Mu", "greek capital letter mu, U+039C" },
1933	{ 925, "Nu", "greek capital letter nu, U+039D" },
1934	{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
1935	{ 927, "Omicron","greek capital letter omicron, U+039F" },
1936	{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
1937	{ 929, "Rho", "greek capital letter rho, U+03A1" },
1938	{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
1939	{ 932, "Tau", "greek capital letter tau, U+03A4" },
1940	{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
1941	{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
1942	{ 935, "Chi", "greek capital letter chi, U+03A7" },
1943	{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
1944	{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
1945
1946	{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
1947	{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
1948	{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
1949	{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
1950	{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
1951	{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
1952	{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
1953	{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
1954	{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
1955	{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
1956	{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
1957	{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
1958	{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
1959	{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
1960	{ 959, "omicron","greek small letter omicron, U+03BF NEW" },
1961	{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
1962	{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
1963	{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
1964	{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
1965	{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
1966	{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
1967	{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
1968	{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
1969	{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
1970	{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
1971	{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
1972	{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
1973	{ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
1974
1975	{ 8194, "ensp", "en space, U+2002 ISOpub" },
1976	{ 8195, "emsp", "em space, U+2003 ISOpub" },
1977	{ 8201, "thinsp","thin space, U+2009 ISOpub" },
1978	{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
1979	{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
1980	{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
1981	{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
1982	{ 8211, "ndash","en dash, U+2013 ISOpub" },
1983	{ 8212, "mdash","em dash, U+2014 ISOpub" },
1984	{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
1985	{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
1986	{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
1987	{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
1988	{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
1989	{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
1990	{ 8224, "dagger","dagger, U+2020 ISOpub" },
1991	{ 8225, "Dagger","double dagger, U+2021 ISOpub" },
1992
1993	{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
1994	{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1995
1996	{ 8240, "permil","per mille sign, U+2030 ISOtech" },
1997
1998	{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
1999	{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
2000
2001	{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
2002	{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
2003
2004	{ 8254, "oline","overline = spacing overscore, U+203E NEW" },
2005	{ 8260, "frasl","fraction slash, U+2044 NEW" },
2006
2007	{ 8364, "euro", "euro sign, U+20AC NEW" },
2008
2009	{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
2010	{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
2011	{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
2012	{ 8482, "trade","trade mark sign, U+2122 ISOnum" },
2013	{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
2014	{ 8592, "larr", "leftwards arrow, U+2190 ISOnum" },
2015	{ 8593, "uarr", "upwards arrow, U+2191 ISOnum" },
2016	{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
2017	{ 8595, "darr", "downwards arrow, U+2193 ISOnum" },
2018	{ 8596, "harr", "left right arrow, U+2194 ISOamsa" },
2019	{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
2020	{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
2021	{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
2022	{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
2023	{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
2024	{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
2025
2026	{ 8704, "forall","for all, U+2200 ISOtech" },
2027	{ 8706, "part", "partial differential, U+2202 ISOtech" },
2028	{ 8707, "exist","there exists, U+2203 ISOtech" },
2029	{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
2030	{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
2031	{ 8712, "isin", "element of, U+2208 ISOtech" },
2032	{ 8713, "notin","not an element of, U+2209 ISOtech" },
2033	{ 8715, "ni", "contains as member, U+220B ISOtech" },
2034	{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
2035	{ 8721, "sum", "n-ary summation, U+2211 ISOamsb" },
2036	{ 8722, "minus","minus sign, U+2212 ISOtech" },
2037	{ 8727, "lowast","asterisk operator, U+2217 ISOtech" },
2038	{ 8730, "radic","square root = radical sign, U+221A ISOtech" },
2039	{ 8733, "prop", "proportional to, U+221D ISOtech" },
2040	{ 8734, "infin","infinity, U+221E ISOtech" },
2041	{ 8736, "ang", "angle, U+2220 ISOamso" },
2042	{ 8743, "and", "logical and = wedge, U+2227 ISOtech" },
2043	{ 8744, "or", "logical or = vee, U+2228 ISOtech" },
2044	{ 8745, "cap", "intersection = cap, U+2229 ISOtech" },
2045	{ 8746, "cup", "union = cup, U+222A ISOtech" },
2046	{ 8747, "int", "integral, U+222B ISOtech" },
2047	{ 8756, "there4","therefore, U+2234 ISOtech" },
2048	{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
2049	{ 8773, "cong", "approximately equal to, U+2245 ISOtech" },
2050	{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
2051	{ 8800, "ne", "not equal to, U+2260 ISOtech" },
2052	{ 8801, "equiv","identical to, U+2261 ISOtech" },
2053	{ 8804, "le", "less-than or equal to, U+2264 ISOtech" },
2054	{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
2055	{ 8834, "sub", "subset of, U+2282 ISOtech" },
2056	{ 8835, "sup", "superset of, U+2283 ISOtech" },
2057	{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
2058	{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
2059	{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
2060	{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
2061	{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
2062	{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
2063	{ 8901, "sdot", "dot operator, U+22C5 ISOamsb" },
2064	{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
2065	{ 8969, "rceil","right ceiling, U+2309 ISOamsc" },
2066	{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
2067	{ 8971, "rfloor","right floor, U+230B ISOamsc" },
2068	{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
2069	{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
2070	{ 9674, "loz", "lozenge, U+25CA ISOpub" },
2071
2072	{ 9824, "spades","black spade suit, U+2660 ISOpub" },
2073	{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
2074	{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
2075	{ 9830, "diams","black diamond suit, U+2666 ISOpub" },
2076
2077	};
2078
2079	/************************************************************************
2080	* *
2081	* Commodity functions to handle entities *
2082	* *
2083	************************************************************************/
2084
2085	/*
2086	* Macro used to grow the current buffer.
2087	*/
2088	#define growBuffer(buffer) { \
2089	xmlChar *tmp; \
2090	buffer##_size *= 2; \
2091	tmp = (xmlChar ) xmlRealloc(buffer, buffer##_size sizeof(xmlChar)); \
2092	if (tmp == NULL) { \
2093	htmlErrMemory(ctxt, "growing buffer\n"); \
2094	xmlFree(buffer); \
2095	return(NULL); \
2096	} \
2097	buffer = tmp; \
2098	}
2099
2100	/**
2101	* htmlEntityLookup:
2102	* @name: the entity name
2103	*
2104	* Lookup the given entity in EntitiesTable
2105	*
2106	* TODO: the linear scan is really ugly, an hash table is really needed.
2107	*
2108	* Returns the associated htmlEntityDescPtr if found, NULL otherwise.
2109	*/
2110	const htmlEntityDesc *
2111	htmlEntityLookup(const xmlChar *name) {
2112	unsigned int i;
2113
2114	for (i = 0;i < (sizeof(html40EntitiesTable)/
2115	sizeof(html40EntitiesTable[0]));i++) {
2116	if (xmlStrEqual(name, BAD_CAST html40EntitiesTable[i].name)) {
2117	return((htmlEntityDescPtr) &html40EntitiesTable[i]);
2118	}
2119	}
2120	return(NULL);
2121	}
2122
2123	/**
2124	* htmlEntityValueLookup:
2125	* @value: the entity's unicode value
2126	*
2127	* Lookup the given entity in EntitiesTable
2128	*
2129	* TODO: the linear scan is really ugly, an hash table is really needed.
2130	*
2131	* Returns the associated htmlEntityDescPtr if found, NULL otherwise.
2132	*/
2133	const htmlEntityDesc *
2134	htmlEntityValueLookup(unsigned int value) {
2135	unsigned int i;
2136
2137	for (i = 0;i < (sizeof(html40EntitiesTable)/
2138	sizeof(html40EntitiesTable[0]));i++) {
2139	if (html40EntitiesTable[i].value >= value) {
2140	if (html40EntitiesTable[i].value > value)
2141	break;
2142	return((htmlEntityDescPtr) &html40EntitiesTable[i]);
2143	}
2144	}
2145	return(NULL);
2146	}
2147
2148	/**
2149	* UTF8ToHtml:
2150	* @out: a pointer to an array of bytes to store the result
2151	* @outlen: the length of @out
2152	* @in: a pointer to an array of UTF-8 chars
2153	* @inlen: the length of @in
2154	*
2155	* Take a block of UTF-8 chars in and try to convert it to an ASCII
2156	* plus HTML entities block of chars out.
2157	*
2158	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2159	* The value of @inlen after return is the number of octets consumed
2160	* as the return value is positive, else unpredictable.
2161	* The value of @outlen after return is the number of octets consumed.
2162	*/
2163	int
2164	UTF8ToHtml(unsigned char* out, int *outlen,
2165	const unsigned char* in, int *inlen) {
2166	const unsigned char* processed = in;
2167	const unsigned char* outend;
2168	const unsigned char* outstart = out;
2169	const unsigned char* instart = in;
2170	const unsigned char* inend;
2171	unsigned int c, d;
2172	int trailing;
2173
2174	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL)) return(-1);
2175	if (in == NULL) {
2176	/*
2177	* initialization nothing to do
2178	*/
2179	*outlen = 0;
2180	*inlen = 0;
2181	return(0);
2182	}
2183	inend = in + (*inlen);
2184	outend = out + (*outlen);
2185	while (in < inend) {
2186	d = *in++;
2187	if (d < 0x80) { c= d; trailing= 0; }
2188	else if (d < 0xC0) {
2189	/* trailing byte in leading position */
2190	*outlen = out - outstart;
2191	*inlen = processed - instart;
2192	return(-2);
2193	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2194	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2195	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2196	else {
2197	/* no chance for this in Ascii */
2198	*outlen = out - outstart;
2199	*inlen = processed - instart;
2200	return(-2);
2201	}
2202
2203	if (inend - in < trailing) {
2204	break;
2205	}
2206
2207	for ( ; trailing; trailing--) {
2208	if ((in >= inend) \|\| (((d= *in++) & 0xC0) != 0x80))
2209	break;
2210	c <<= 6;
2211	c \|= d & 0x3F;
2212	}
2213
2214	/* assertion: c is a single UTF-4 value */
2215	if (c < 0x80) {
2216	if (out + 1 >= outend)
2217	break;
2218	*out++ = c;
2219	} else {
2220	int len;
2221	const htmlEntityDesc * ent;
2222	const char *cp;
2223	char nbuf[16];
2224
2225	/*
2226	* Try to lookup a predefined HTML entity for it
2227	*/
2228
2229	ent = htmlEntityValueLookup(c);
2230	if (ent == NULL) {
2231	snprintf(nbuf, sizeof(nbuf), "#%u", c);
2232	cp = nbuf;
2233	}
2234	else
2235	cp = ent->name;
2236	len = strlen(cp);
2237	if (out + 2 + len >= outend)
2238	break;
2239	*out++ = '&';
2240	memcpy(out, cp, len);
2241	out += len;
2242	*out++ = ';';
2243	}
2244	processed = in;
2245	}
2246	*outlen = out - outstart;
2247	*inlen = processed - instart;
2248	return(0);
2249	}
2250
2251	/**
2252	* htmlEncodeEntities:
2253	* @out: a pointer to an array of bytes to store the result
2254	* @outlen: the length of @out
2255	* @in: a pointer to an array of UTF-8 chars
2256	* @inlen: the length of @in
2257	* @quoteChar: the quote character to escape (' or ") or zero.
2258	*
2259	* Take a block of UTF-8 chars in and try to convert it to an ASCII
2260	* plus HTML entities block of chars out.
2261	*
2262	* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2263	* The value of @inlen after return is the number of octets consumed
2264	* as the return value is positive, else unpredictable.
2265	* The value of @outlen after return is the number of octets consumed.
2266	*/
2267	int
2268	htmlEncodeEntities(unsigned char* out, int *outlen,
2269	const unsigned char* in, int *inlen, int quoteChar) {
2270	const unsigned char* processed = in;
2271	const unsigned char* outend;
2272	const unsigned char* outstart = out;
2273	const unsigned char* instart = in;
2274	const unsigned char* inend;
2275	unsigned int c, d;
2276	int trailing;
2277
2278	if ((out == NULL) \|\| (outlen == NULL) \|\| (inlen == NULL) \|\| (in == NULL))
2279	return(-1);
2280	outend = out + (*outlen);
2281	inend = in + (*inlen);
2282	while (in < inend) {
2283	d = *in++;
2284	if (d < 0x80) { c= d; trailing= 0; }
2285	else if (d < 0xC0) {
2286	/* trailing byte in leading position */
2287	*outlen = out - outstart;
2288	*inlen = processed - instart;
2289	return(-2);
2290	} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2291	else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2292	else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2293	else {
2294	/* no chance for this in Ascii */
2295	*outlen = out - outstart;
2296	*inlen = processed - instart;
2297	return(-2);
2298	}
2299
2300	if (inend - in < trailing)
2301	break;
2302
2303	while (trailing--) {
2304	if (((d= *in++) & 0xC0) != 0x80) {
2305	*outlen = out - outstart;
2306	*inlen = processed - instart;
2307	return(-2);
2308	}
2309	c <<= 6;
2310	c \|= d & 0x3F;
2311	}
2312
2313	/* assertion: c is a single UTF-4 value */
2314	if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
2315	(c != '&') && (c != '<') && (c != '>')) {
2316	if (out >= outend)
2317	break;
2318	*out++ = c;
2319	} else {
2320	const htmlEntityDesc * ent;
2321	const char *cp;
2322	char nbuf[16];
2323	int len;
2324
2325	/*
2326	* Try to lookup a predefined HTML entity for it
2327	*/
2328	ent = htmlEntityValueLookup(c);
2329	if (ent == NULL) {
2330	snprintf(nbuf, sizeof(nbuf), "#%u", c);
2331	cp = nbuf;
2332	}
2333	else
2334	cp = ent->name;
2335	len = strlen(cp);
2336	if (out + 2 + len > outend)
2337	break;
2338	*out++ = '&';
2339	memcpy(out, cp, len);
2340	out += len;
2341	*out++ = ';';
2342	}
2343	processed = in;
2344	}
2345	*outlen = out - outstart;
2346	*inlen = processed - instart;
2347	return(0);
2348	}
2349
2350	/************************************************************************
2351	* *
2352	* Commodity functions to handle streams *
2353	* *
2354	************************************************************************/
2355
2356	#ifdef LIBXML_PUSH_ENABLED
2357	/**
2358	* htmlNewInputStream:
2359	* @ctxt: an HTML parser context
2360	*
2361	* Create a new input stream structure
2362	* Returns the new input stream or NULL
2363	*/
2364	static htmlParserInputPtr
2365	htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2366	htmlParserInputPtr input;
2367
2368	input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
2369	if (input == NULL) {
2370	htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2371	return(NULL);
2372	}
2373	memset(input, 0, sizeof(htmlParserInput));
2374	input->filename = NULL;
2375	input->directory = NULL;
2376	input->base = NULL;
2377	input->cur = NULL;
2378	input->buf = NULL;
2379	input->line = 1;
2380	input->col = 1;
2381	input->buf = NULL;
2382	input->free = NULL;
2383	input->version = NULL;
2384	input->consumed = 0;
2385	input->length = 0;
2386	return(input);
2387	}
2388	#endif
2389
2390
2391	/************************************************************************
2392	* *
2393	* Commodity functions, cleanup needed ? *
2394	* *
2395	************************************************************************/
2396	/*
2397	* all tags allowing pc data from the html 4.01 loose dtd
2398	* NOTE: it might be more appropriate to integrate this information
2399	* into the html40ElementTable array but I don't want to risk any
2400	* binary incompatibility
2401	*/
2402	static const char *allowPCData[] = {
2403	"a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
2404	"blockquote", "body", "button", "caption", "center", "cite", "code",
2405	"dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
2406	"h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
2407	"li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
2408	"small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
2409	};
2410
2411	/**
2412	* areBlanks:
2413	* @ctxt: an HTML parser context
2414	* @str: a xmlChar *
2415	* @len: the size of @str
2416	*
2417	* Is this a sequence of blank chars that one can ignore ?
2418	*
2419	* Returns 1 if ignorable 0 otherwise.
2420	*/
2421
2422	static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2423	unsigned int i;
2424	int j;
2425	xmlNodePtr lastChild;
2426	xmlDtdPtr dtd;
2427
2428	for (j = 0;j < len;j++)
2429	if (!(IS_BLANK_CH(str[j]))) return(0);
2430
2431	if (CUR == 0) return(1);
2432	if (CUR != '<') return(0);
2433	if (ctxt->name == NULL)
2434	return(1);
2435	if (xmlStrEqual(ctxt->name, BAD_CAST"html"))
2436	return(1);
2437	if (xmlStrEqual(ctxt->name, BAD_CAST"head"))
2438	return(1);
2439
2440	/* Only strip CDATA children of the body tag for strict HTML DTDs */
2441	if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) {
2442	dtd = xmlGetIntSubset(ctxt->myDoc);
2443	if (dtd != NULL && dtd->ExternalID != NULL) {
2444	if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") \|\|
2445	!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN"))
2446	return(1);
2447	}
2448	}
2449
2450	if (ctxt->node == NULL) return(0);
2451	lastChild = xmlGetLastChild(ctxt->node);
2452	while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
2453	lastChild = lastChild->prev;
2454	if (lastChild == NULL) {
2455	if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2456	(ctxt->node->content != NULL)) return(0);
2457	/* keep ws in constructs like ...<b> </b>...
2458	for all tags "b" allowing PCDATA */
2459	for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2460	if ( xmlStrEqual(ctxt->name, BAD_CAST allowPCData[i]) ) {
2461	return(0);
2462	}
2463	}
2464	} else if (xmlNodeIsText(lastChild)) {
2465	return(0);
2466	} else {
2467	/* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
2468	for all tags "p" allowing PCDATA */
2469	for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2470	if ( xmlStrEqual(lastChild->name, BAD_CAST allowPCData[i]) ) {
2471	return(0);
2472	}
2473	}
2474	}
2475	return(1);
2476	}
2477
2478	/**
2479	* htmlNewDocNoDtD:
2480	* @URI: URI for the dtd, or NULL
2481	* @ExternalID: the external ID of the DTD, or NULL
2482	*
2483	* Creates a new HTML document without a DTD node if @URI and @ExternalID
2484	* are NULL
2485	*
2486	* Returns a new document, do not initialize the DTD if not provided
2487	*/
2488	htmlDocPtr
2489	htmlNewDocNoDtD(const xmlChar URI, const xmlChar ExternalID) {
2490	xmlDocPtr cur;
2491
2492	/*
2493	* Allocate a new document and fill the fields.
2494	*/
2495	cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
2496	if (cur == NULL) {
2497	htmlErrMemory(NULL, "HTML document creation failed\n");
2498	return(NULL);
2499	}
2500	memset(cur, 0, sizeof(xmlDoc));
2501
2502	cur->type = XML_HTML_DOCUMENT_NODE;
2503	cur->version = NULL;
2504	cur->intSubset = NULL;
2505	cur->doc = cur;
2506	cur->name = NULL;
2507	cur->children = NULL;
2508	cur->extSubset = NULL;
2509	cur->oldNs = NULL;
2510	cur->encoding = NULL;
2511	cur->standalone = 1;
2512	cur->compression = 0;
2513	cur->ids = NULL;
2514	cur->refs = NULL;
2515	cur->_private = NULL;
2516	cur->charset = XML_CHAR_ENCODING_UTF8;
2517	cur->properties = XML_DOC_HTML \| XML_DOC_USERBUILT;
2518	if ((ExternalID != NULL) \|\|
2519	(URI != NULL))
2520	xmlCreateIntSubset(cur, BAD_CAST "html", ExternalID, URI);
2521	if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
2522	xmlRegisterNodeDefaultValue((xmlNodePtr)cur);
2523	return(cur);
2524	}
2525
2526	/**
2527	* htmlNewDoc:
2528	* @URI: URI for the dtd, or NULL
2529	* @ExternalID: the external ID of the DTD, or NULL
2530	*
2531	* Creates a new HTML document
2532	*
2533	* Returns a new document
2534	*/
2535	htmlDocPtr
2536	htmlNewDoc(const xmlChar URI, const xmlChar ExternalID) {
2537	if ((URI == NULL) && (ExternalID == NULL))
2538	return(htmlNewDocNoDtD(
2539	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd",
2540	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN"));
2541
2542	return(htmlNewDocNoDtD(URI, ExternalID));
2543	}
2544
2545
2546	/************************************************************************
2547	* *
2548	* The parser itself *
2549	* Relates to http://www.w3.org/TR/html40 *
2550	* *
2551	************************************************************************/
2552
2553	/************************************************************************
2554	* *
2555	* The parser itself *
2556	* *
2557	************************************************************************/
2558
2559	static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2560
2561	/**
2562	* htmlParseHTMLName:
2563	* @ctxt: an HTML parser context
2564	*
2565	* parse an HTML tag or attribute name, note that we convert it to lowercase
2566	* since HTML names are not case-sensitive.
2567	*
2568	* Returns the Tag Name parsed or NULL
2569	*/
2570
2571	static const xmlChar *
2572	htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2573	int i = 0;
2574	xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2575
2576	if (!IS_ASCII_LETTER(CUR) && (CUR != '_') &&
2577	(CUR != ':') && (CUR != '.')) return(NULL);
2578
2579	while ((i < HTML_PARSER_BUFFER_SIZE) &&
2580	((IS_ASCII_LETTER(CUR)) \|\| (IS_ASCII_DIGIT(CUR)) \|\|
2581	(CUR == ':') \|\| (CUR == '-') \|\| (CUR == '_') \|\|
2582	(CUR == '.'))) {
2583	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;
2584	else loc[i] = CUR;
2585	i++;
2586
2587	NEXT;
2588	}
2589
2590	return(xmlDictLookup(ctxt->dict, loc, i));
2591	}
2592
2593
2594	/**
2595	* htmlParseHTMLName_nonInvasive:
2596	* @ctxt: an HTML parser context
2597	*
2598	* parse an HTML tag or attribute name, note that we convert it to lowercase
2599	* since HTML names are not case-sensitive, this doesn't consume the data
2600	* from the stream, it's a look-ahead
2601	*
2602	* Returns the Tag Name parsed or NULL
2603	*/
2604
2605	static const xmlChar *
2606	htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2607	int i = 0;
2608	xmlChar loc[HTML_PARSER_BUFFER_SIZE];
2609
2610	if (!IS_ASCII_LETTER(NXT(1)) && (NXT(1) != '_') &&
2611	(NXT(1) != ':')) return(NULL);
2612
2613	while ((i < HTML_PARSER_BUFFER_SIZE) &&
2614	((IS_ASCII_LETTER(NXT(1+i))) \|\| (IS_ASCII_DIGIT(NXT(1+i))) \|\|
2615	(NXT(1+i) == ':') \|\| (NXT(1+i) == '-') \|\| (NXT(1+i) == '_'))) {
2616	if ((NXT(1+i) >= 'A') && (NXT(1+i) <= 'Z')) loc[i] = NXT(1+i) + 0x20;
2617	else loc[i] = NXT(1+i);
2618	i++;
2619	}
2620
2621	return(xmlDictLookup(ctxt->dict, loc, i));
2622	}
2623
2624
2625	/**
2626	* htmlParseName:
2627	* @ctxt: an HTML parser context
2628	*
2629	* parse an HTML name, this routine is case sensitive.
2630	*
2631	* Returns the Name parsed or NULL
2632	*/
2633
2634	static const xmlChar *
2635	htmlParseName(htmlParserCtxtPtr ctxt) {
2636	const xmlChar *in;
2637	const xmlChar *ret;
2638	int count = 0;
2639
2640	GROW;
2641
2642	/*
2643	* Accelerator for simple ASCII names
2644	*/
2645	in = ctxt->input->cur;
2646	if (((in >= 0x61) && (in <= 0x7A)) \|\|
2647	((in >= 0x41) && (in <= 0x5A)) \|\|
2648	(in == '_') \|\| (in == ':')) {
2649	in++;
2650	while (((in >= 0x61) && (in <= 0x7A)) \|\|
2651	((in >= 0x41) && (in <= 0x5A)) \|\|
2652	((in >= 0x30) && (in <= 0x39)) \|\|
2653	(in == '_') \|\| (in == '-') \|\|
2654	(in == ':') \|\| (in == '.'))
2655	in++;
2656
2657	if (in == ctxt->input->end)
2658	return(NULL);
2659
2660	if ((in > 0) && (in < 0x80)) {
2661	count = in - ctxt->input->cur;
2662	ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
2663	ctxt->input->cur = in;
2664	ctxt->input->col += count;
2665	return(ret);
2666	}
2667	}
2668	return(htmlParseNameComplex(ctxt));
2669	}
2670
2671	static const xmlChar *
2672	htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2673	int len = 0, l;
2674	int c;
2675	int count = 0;
2676	const xmlChar *base = ctxt->input->base;
2677
2678	/*
2679	* Handler for more complex cases
2680	*/
2681	GROW;
2682	c = CUR_CHAR(l);
2683	if ((c == ' ') \|\| (c == '>') \|\| (c == '/') \|\| /* accelerators */
2684	(!IS_LETTER(c) && (c != '_') &&
2685	(c != ':'))) {
2686	return(NULL);
2687	}
2688
2689	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2690	((IS_LETTER(c)) \|\| (IS_DIGIT(c)) \|\|
2691	(c == '.') \|\| (c == '-') \|\|
2692	(c == '_') \|\| (c == ':') \|\|
2693	(IS_COMBINING(c)) \|\|
2694	(IS_EXTENDER(c)))) {
2695	if (count++ > 100) {
2696	count = 0;
2697	GROW;
2698	}
2699	len += l;
2700	NEXTL(l);
2701	c = CUR_CHAR(l);
2702	if (ctxt->input->base != base) {
2703	/*
2704	* We changed encoding from an unknown encoding
2705	* Input buffer changed location, so we better start again
2706	*/
2707	return(htmlParseNameComplex(ctxt));
2708	}
2709	}
2710
2711	if (ctxt->input->cur - ctxt->input->base < len) {
2712	/* Sanity check */
2713	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
2714	"unexpected change of input buffer", NULL, NULL);
2715	return (NULL);
2716	}
2717
2718	return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2719	}
2720
2721
2722	/**
2723	* htmlParseHTMLAttribute:
2724	* @ctxt: an HTML parser context
2725	* @stop: a char stop value
2726	*
2727	* parse an HTML attribute value till the stop (quote), if
2728	* stop is 0 then it stops at the first space
2729	*
2730	* Returns the attribute parsed or NULL
2731	*/
2732
2733	static xmlChar *
2734	htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2735	xmlChar *buffer = NULL;
2736	int buffer_size = 0;
2737	xmlChar *out = NULL;
2738	const xmlChar *name = NULL;
2739	const xmlChar *cur = NULL;
2740	const htmlEntityDesc * ent;
2741
2742	/*
2743	* allocate a translation buffer.
2744	*/
2745	buffer_size = HTML_PARSER_BUFFER_SIZE;
2746	buffer = (xmlChar ) xmlMallocAtomic(buffer_size sizeof(xmlChar));
2747	if (buffer == NULL) {
2748	htmlErrMemory(ctxt, "buffer allocation failed\n");
2749	return(NULL);
2750	}
2751	out = buffer;
2752
2753	/*
2754	* Ok loop until we reach one of the ending chars
2755	*/
2756	while ((CUR != 0) && (CUR != stop)) {
2757	if ((stop == 0) && (CUR == '>')) break;
2758	if ((stop == 0) && (IS_BLANK_CH(CUR))) break;
2759	if (CUR == '&') {
2760	if (NXT(1) == '#') {
2761	unsigned int c;
2762	int bits;
2763
2764	c = htmlParseCharRef(ctxt);
2765	if (c < 0x80)
2766	{ *out++ = c; bits= -6; }
2767	else if (c < 0x800)
2768	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
2769	else if (c < 0x10000)
2770	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
2771	else
2772	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
2773
2774	for ( ; bits >= 0; bits-= 6) {
2775	*out++ = ((c >> bits) & 0x3F) \| 0x80;
2776	}
2777
2778	if (out - buffer > buffer_size - 100) {
2779	int indx = out - buffer;
2780
2781	growBuffer(buffer);
2782	out = &buffer[indx];
2783	}
2784	} else {
2785	ent = htmlParseEntityRef(ctxt, &name);
2786	if (name == NULL) {
2787	*out++ = '&';
2788	if (out - buffer > buffer_size - 100) {
2789	int indx = out - buffer;
2790
2791	growBuffer(buffer);
2792	out = &buffer[indx];
2793	}
2794	} else if (ent == NULL) {
2795	*out++ = '&';
2796	cur = name;
2797	while (*cur != 0) {
2798	if (out - buffer > buffer_size - 100) {
2799	int indx = out - buffer;
2800
2801	growBuffer(buffer);
2802	out = &buffer[indx];
2803	}
2804	out++ = cur++;
2805	}
2806	} else {
2807	unsigned int c;
2808	int bits;
2809
2810	if (out - buffer > buffer_size - 100) {
2811	int indx = out - buffer;
2812
2813	growBuffer(buffer);
2814	out = &buffer[indx];
2815	}
2816	c = ent->value;
2817	if (c < 0x80)
2818	{ *out++ = c; bits= -6; }
2819	else if (c < 0x800)
2820	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
2821	else if (c < 0x10000)
2822	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
2823	else
2824	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
2825
2826	for ( ; bits >= 0; bits-= 6) {
2827	*out++ = ((c >> bits) & 0x3F) \| 0x80;
2828	}
2829	}
2830	}
2831	} else {
2832	unsigned int c;
2833	int bits, l;
2834
2835	if (out - buffer > buffer_size - 100) {
2836	int indx = out - buffer;
2837
2838	growBuffer(buffer);
2839	out = &buffer[indx];
2840	}
2841	c = CUR_CHAR(l);
2842	if (c < 0x80)
2843	{ *out++ = c; bits= -6; }
2844	else if (c < 0x800)
2845	{ *out++ =((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
2846	else if (c < 0x10000)
2847	{ *out++ =((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
2848	else
2849	{ *out++ =((c >> 18) & 0x07) \| 0xF0; bits= 12; }
2850
2851	for ( ; bits >= 0; bits-= 6) {
2852	*out++ = ((c >> bits) & 0x3F) \| 0x80;
2853	}
2854	NEXT;
2855	}
2856	}
2857	*out = 0;
2858	return(buffer);
2859	}
2860
2861	/**
2862	* htmlParseEntityRef:
2863	* @ctxt: an HTML parser context
2864	* @str: location to store the entity name
2865	*
2866	* parse an HTML ENTITY references
2867	*
2868	* [68] EntityRef ::= '&' Name ';'
2869	*
2870	* Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
2871	* if non-NULL *str will have to be freed by the caller.
2872	*/
2873	const htmlEntityDesc *
2874	htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2875	const xmlChar *name;
2876	const htmlEntityDesc * ent = NULL;
2877
2878	if (str != NULL) *str = NULL;
2879	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) return(NULL);
2880
2881	if (CUR == '&') {
2882	NEXT;
2883	name = htmlParseName(ctxt);
2884	if (name == NULL) {
2885	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2886	"htmlParseEntityRef: no name\n", NULL, NULL);
2887	} else {
2888	GROW;
2889	if (CUR == ';') {
2890	if (str != NULL)
2891	*str = name;
2892
2893	/*
2894	* Lookup the entity in the table.
2895	*/
2896	ent = htmlEntityLookup(name);
2897	if (ent != NULL) /* OK that's ugly !!! */
2898	NEXT;
2899	} else {
2900	htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2901	"htmlParseEntityRef: expecting ';'\n",
2902	NULL, NULL);
2903	if (str != NULL)
2904	*str = name;
2905	}
2906	}
2907	}
2908	return(ent);
2909	}
2910
2911	/**
2912	* htmlParseAttValue:
2913	* @ctxt: an HTML parser context
2914	*
2915	* parse a value for an attribute
2916	* Note: the parser won't do substitution of entities here, this
2917	* will be handled later in xmlStringGetNodeList, unless it was
2918	* asked for ctxt->replaceEntities != 0
2919	*
2920	* Returns the AttValue parsed or NULL.
2921	*/
2922
2923	static xmlChar *
2924	htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2925	xmlChar *ret = NULL;
2926
2927	if (CUR == '"') {
2928	NEXT;
2929	ret = htmlParseHTMLAttribute(ctxt, '"');
2930	if (CUR != '"') {
2931	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2932	"AttValue: \" expected\n", NULL, NULL);
2933	} else
2934	NEXT;
2935	} else if (CUR == '\'') {
2936	NEXT;
2937	ret = htmlParseHTMLAttribute(ctxt, '\'');
2938	if (CUR != '\'') {
2939	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2940	"AttValue: ' expected\n", NULL, NULL);
2941	} else
2942	NEXT;
2943	} else {
2944	/*
2945	* That's an HTMLism, the attribute value may not be quoted
2946	*/
2947	ret = htmlParseHTMLAttribute(ctxt, 0);
2948	if (ret == NULL) {
2949	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2950	"AttValue: no value found\n", NULL, NULL);
2951	}
2952	}
2953	return(ret);
2954	}
2955
2956	/**
2957	* htmlParseSystemLiteral:
2958	* @ctxt: an HTML parser context
2959	*
2960	* parse an HTML Literal
2961	*
2962	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
2963	*
2964	* Returns the SystemLiteral parsed or NULL
2965	*/
2966
2967	static xmlChar *
2968	htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2969	size_t len = 0, startPosition = 0;
2970	int err = 0;
2971	int quote;
2972	xmlChar *ret = NULL;
2973
2974	if ((CUR != '"') && (CUR != '\'')) {
2975	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2976	"SystemLiteral \" or ' expected\n", NULL, NULL);
2977	return(NULL);
2978	}
2979	quote = CUR;
2980	NEXT;
2981
2982	if (CUR_PTR < BASE_PTR)
2983	return(ret);
2984	startPosition = CUR_PTR - BASE_PTR;
2985
2986	while ((CUR != 0) && (CUR != quote)) {
2987	/* TODO: Handle UTF-8 */
2988	if (!IS_CHAR_CH(CUR)) {
2989	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2990	"Invalid char in SystemLiteral 0x%X\n", CUR);
2991	err = 1;
2992	}
2993	NEXT;
2994	len++;
2995	}
2996	if (CUR != quote) {
2997	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2998	"Unfinished SystemLiteral\n", NULL, NULL);
2999	} else {
3000	NEXT;
3001	if (err == 0)
3002	ret = xmlStrndup((BASE_PTR+startPosition), len);
3003	}
3004
3005	return(ret);
3006	}
3007
3008	/**
3009	* htmlParsePubidLiteral:
3010	* @ctxt: an HTML parser context
3011	*
3012	* parse an HTML public literal
3013	*
3014	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
3015	*
3016	* Returns the PubidLiteral parsed or NULL.
3017	*/
3018
3019	static xmlChar *
3020	htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
3021	size_t len = 0, startPosition = 0;
3022	int err = 0;
3023	int quote;
3024	xmlChar *ret = NULL;
3025
3026	if ((CUR != '"') && (CUR != '\'')) {
3027	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
3028	"PubidLiteral \" or ' expected\n", NULL, NULL);
3029	return(NULL);
3030	}
3031	quote = CUR;
3032	NEXT;
3033
3034	/*
3035	* Name ::= (Letter \| '_') (NameChar)*
3036	*/
3037	if (CUR_PTR < BASE_PTR)
3038	return(ret);
3039	startPosition = CUR_PTR - BASE_PTR;
3040
3041	while ((CUR != 0) && (CUR != quote)) {
3042	if (!IS_PUBIDCHAR_CH(CUR)) {
3043	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3044	"Invalid char in PubidLiteral 0x%X\n", CUR);
3045	err = 1;
3046	}
3047	len++;
3048	NEXT;
3049	}
3050
3051	if (CUR != quote) {
3052	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
3053	"Unfinished PubidLiteral\n", NULL, NULL);
3054	} else {
3055	NEXT;
3056	if (err == 0)
3057	ret = xmlStrndup((BASE_PTR + startPosition), len);
3058	}
3059
3060	return(ret);
3061	}
3062
3063	/**
3064	* htmlParseScript:
3065	* @ctxt: an HTML parser context
3066	*
3067	* parse the content of an HTML SCRIPT or STYLE element
3068	* http://www.w3.org/TR/html4/sgml/dtd.html#Script
3069	* http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
3070	* http://www.w3.org/TR/html4/types.html#type-script
3071	* http://www.w3.org/TR/html4/types.html#h-6.15
3072	* http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
3073	*
3074	* Script data ( %Script; in the DTD) can be the content of the SCRIPT
3075	* element and the value of intrinsic event attributes. User agents must
3076	* not evaluate script data as HTML markup but instead must pass it on as
3077	* data to a script engine.
3078	* NOTES:
3079	* - The content is passed like CDATA
3080	* - the attributes for style and scripting "onXXX" are also described
3081	* as CDATA but SGML allows entities references in attributes so their
3082	* processing is identical as other attributes
3083	*/
3084	static void
3085	htmlParseScript(htmlParserCtxtPtr ctxt) {
3086	xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
3087	int nbchar = 0;
3088	int cur,l;
3089
3090	SHRINK;
3091	cur = CUR_CHAR(l);
3092	while (cur != 0) {
3093	if ((cur == '<') && (NXT(1) == '/')) {
3094	/*
3095	* One should break here, the specification is clear:
3096	* Authors should therefore escape "</" within the content.
3097	* Escape mechanisms are specific to each scripting or
3098	* style sheet language.
3099	*
3100	* In recovery mode, only break if end tag match the
3101	* current tag, effectively ignoring all tags inside the
3102	* script/style block and treating the entire block as
3103	* CDATA.
3104	*/
3105	if (ctxt->recovery) {
3106	if (xmlStrncasecmp(ctxt->name, ctxt->input->cur+2,
3107	xmlStrlen(ctxt->name)) == 0)
3108	{
3109	break; /* while */
3110	} else {
3111	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
3112	"Element %s embeds close tag\n",
3113	ctxt->name, NULL);
3114	}
3115	} else {
3116	if (((NXT(2) >= 'A') && (NXT(2) <= 'Z')) \|\|
3117	((NXT(2) >= 'a') && (NXT(2) <= 'z')))
3118	{
3119	break; /* while */
3120	}
3121	}
3122	}
3123	if (IS_CHAR(cur)) {
3124	COPY_BUF(l,buf,nbchar,cur);
3125	} else {
3126	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3127	"Invalid char in CDATA 0x%X\n", cur);
3128	}
3129	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3130	buf[nbchar] = 0;
3131	if (ctxt->sax->cdataBlock!= NULL) {
3132	/*
3133	* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
3134	*/
3135	ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
3136	} else if (ctxt->sax->characters != NULL) {
3137	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3138	}
3139	nbchar = 0;
3140	}
3141	GROW;
3142	NEXTL(l);
3143	cur = CUR_CHAR(l);
3144	}
3145
3146	if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3147	buf[nbchar] = 0;
3148	if (ctxt->sax->cdataBlock!= NULL) {
3149	/*
3150	* Insert as CDATA, which is the same as HTML_PRESERVE_NODE
3151	*/
3152	ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
3153	} else if (ctxt->sax->characters != NULL) {
3154	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3155	}
3156	}
3157	}
3158
3159
3160	/**
3161	* htmlParseCharDataInternal:
3162	* @ctxt: an HTML parser context
3163	* @readahead: optional read ahead character in ascii range
3164	*
3165	* parse a CharData section.
3166	* if we are within a CDATA section ']]>' marks an end of section.
3167	*
3168	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3169	*/
3170
3171	static void
3172	htmlParseCharDataInternal(htmlParserCtxtPtr ctxt, int readahead) {
3173	xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 6];
3174	int nbchar = 0;
3175	int cur, l;
3176	int chunk = 0;
3177
3178	if (readahead)
3179	buf[nbchar++] = readahead;
3180
3181	SHRINK;
3182	cur = CUR_CHAR(l);
3183	while (((cur != '<') \|\| (ctxt->token == '<')) &&
3184	((cur != '&') \|\| (ctxt->token == '&')) &&
3185	(cur != 0)) {
3186	if (!(IS_CHAR(cur))) {
3187	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3188	"Invalid char in CDATA 0x%X\n", cur);
3189	} else {
3190	COPY_BUF(l,buf,nbchar,cur);
3191	}
3192	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
3193	buf[nbchar] = 0;
3194
3195	/*
3196	* Ok the segment is to be consumed as chars.
3197	*/
3198	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3199	if (areBlanks(ctxt, buf, nbchar)) {
3200	if (ctxt->keepBlanks) {
3201	if (ctxt->sax->characters != NULL)
3202	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3203	} else {
3204	if (ctxt->sax->ignorableWhitespace != NULL)
3205	ctxt->sax->ignorableWhitespace(ctxt->userData,
3206	buf, nbchar);
3207	}
3208	} else {
3209	htmlCheckParagraph(ctxt);
3210	if (ctxt->sax->characters != NULL)
3211	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3212	}
3213	}
3214	nbchar = 0;
3215	}
3216	NEXTL(l);
3217	chunk++;
3218	if (chunk > HTML_PARSER_BUFFER_SIZE) {
3219	chunk = 0;
3220	SHRINK;
3221	GROW;
3222	}
3223	cur = CUR_CHAR(l);
3224	if (cur == 0) {
3225	SHRINK;
3226	GROW;
3227	cur = CUR_CHAR(l);
3228	}
3229	}
3230	if (nbchar != 0) {
3231	buf[nbchar] = 0;
3232
3233	/*
3234	* Ok the segment is to be consumed as chars.
3235	*/
3236	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3237	if (areBlanks(ctxt, buf, nbchar)) {
3238	if (ctxt->keepBlanks) {
3239	if (ctxt->sax->characters != NULL)
3240	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3241	} else {
3242	if (ctxt->sax->ignorableWhitespace != NULL)
3243	ctxt->sax->ignorableWhitespace(ctxt->userData,
3244	buf, nbchar);
3245	}
3246	} else {
3247	htmlCheckParagraph(ctxt);
3248	if (ctxt->sax->characters != NULL)
3249	ctxt->sax->characters(ctxt->userData, buf, nbchar);
3250	}
3251	}
3252	} else {
3253	/*
3254	* Loop detection
3255	*/
3256	if (cur == 0)
3257	ctxt->instate = XML_PARSER_EOF;
3258	}
3259	}
3260
3261	/**
3262	* htmlParseCharData:
3263	* @ctxt: an HTML parser context
3264	*
3265	* parse a CharData section.
3266	* if we are within a CDATA section ']]>' marks an end of section.
3267	*
3268	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3269	*/
3270
3271	static void
3272	htmlParseCharData(htmlParserCtxtPtr ctxt) {
3273	htmlParseCharDataInternal(ctxt, 0);
3274	}
3275
3276	/**
3277	* htmlParseExternalID:
3278	* @ctxt: an HTML parser context
3279	* @publicID: a xmlChar** receiving PubidLiteral
3280	*
3281	* Parse an External ID or a Public ID
3282	*
3283	* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3284	* \| 'PUBLIC' S PubidLiteral S SystemLiteral
3285	*
3286	* [83] PublicID ::= 'PUBLIC' S PubidLiteral
3287	*
3288	* Returns the function returns SystemLiteral and in the second
3289	* case publicID receives PubidLiteral, is strict is off
3290	* it is possible to return NULL and have publicID set.
3291	*/
3292
3293	static xmlChar *
3294	htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
3295	xmlChar *URI = NULL;
3296
3297	if ((UPPER == 'S') && (UPP(1) == 'Y') &&
3298	(UPP(2) == 'S') && (UPP(3) == 'T') &&
3299	(UPP(4) == 'E') && (UPP(5) == 'M')) {
3300	SKIP(6);
3301	if (!IS_BLANK_CH(CUR)) {
3302	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3303	"Space required after 'SYSTEM'\n", NULL, NULL);
3304	}
3305	SKIP_BLANKS;
3306	URI = htmlParseSystemLiteral(ctxt);
3307	if (URI == NULL) {
3308	htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
3309	"htmlParseExternalID: SYSTEM, no URI\n", NULL, NULL);
3310	}
3311	} else if ((UPPER == 'P') && (UPP(1) == 'U') &&
3312	(UPP(2) == 'B') && (UPP(3) == 'L') &&
3313	(UPP(4) == 'I') && (UPP(5) == 'C')) {
3314	SKIP(6);
3315	if (!IS_BLANK_CH(CUR)) {
3316	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3317	"Space required after 'PUBLIC'\n", NULL, NULL);
3318	}
3319	SKIP_BLANKS;
3320	*publicID = htmlParsePubidLiteral(ctxt);
3321	if (*publicID == NULL) {
3322	htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
3323	"htmlParseExternalID: PUBLIC, no Public Identifier\n",
3324	NULL, NULL);
3325	}
3326	SKIP_BLANKS;
3327	if ((CUR == '"') \|\| (CUR == '\'')) {
3328	URI = htmlParseSystemLiteral(ctxt);
3329	}
3330	}
3331	return(URI);
3332	}
3333
3334	/**
3335	* xmlParsePI:
3336	* @ctxt: an XML parser context
3337	*
3338	* parse an XML Processing Instruction.
3339	*
3340	* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3341	*/
3342	static void
3343	htmlParsePI(htmlParserCtxtPtr ctxt) {
3344	xmlChar *buf = NULL;
3345	int len = 0;
3346	int size = HTML_PARSER_BUFFER_SIZE;
3347	int cur, l;
3348	const xmlChar *target;
3349	xmlParserInputState state;
3350	int count = 0;
3351
3352	if ((RAW == '<') && (NXT(1) == '?')) {
3353	state = ctxt->instate;
3354	ctxt->instate = XML_PARSER_PI;
3355	/*
3356	* this is a Processing Instruction.
3357	*/
3358	SKIP(2);
3359	SHRINK;
3360
3361	/*
3362	* Parse the target name and check for special support like
3363	* namespace.
3364	*/
3365	target = htmlParseName(ctxt);
3366	if (target != NULL) {
3367	if (RAW == '>') {
3368	SKIP(1);
3369
3370	/*
3371	* SAX: PI detected.
3372	*/
3373	if ((ctxt->sax) && (!ctxt->disableSAX) &&
3374	(ctxt->sax->processingInstruction != NULL))
3375	ctxt->sax->processingInstruction(ctxt->userData,
3376	target, NULL);
3377	ctxt->instate = state;
3378	return;
3379	}
3380	buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));
3381	if (buf == NULL) {
3382	htmlErrMemory(ctxt, NULL);
3383	ctxt->instate = state;
3384	return;
3385	}
3386	cur = CUR;
3387	if (!IS_BLANK(cur)) {
3388	htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
3389	"ParsePI: PI %s space expected\n", target, NULL);
3390	}
3391	SKIP_BLANKS;
3392	cur = CUR_CHAR(l);
3393	while ((cur != 0) && (cur != '>')) {
3394	if (len + 5 >= size) {
3395	xmlChar *tmp;
3396
3397	size *= 2;
3398	tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
3399	if (tmp == NULL) {
3400	htmlErrMemory(ctxt, NULL);
3401	xmlFree(buf);
3402	ctxt->instate = state;
3403	return;
3404	}
3405	buf = tmp;
3406	}
3407	count++;
3408	if (count > 50) {
3409	GROW;
3410	count = 0;
3411	}
3412	if (IS_CHAR(cur)) {
3413	COPY_BUF(l,buf,len,cur);
3414	} else {
3415	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3416	"Invalid char in processing instruction "
3417	"0x%X\n", cur);
3418	}
3419	NEXTL(l);
3420	cur = CUR_CHAR(l);
3421	if (cur == 0) {
3422	SHRINK;
3423	GROW;
3424	cur = CUR_CHAR(l);
3425	}
3426	}
3427	buf[len] = 0;
3428	if (cur != '>') {
3429	htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
3430	"ParsePI: PI %s never end ...\n", target, NULL);
3431	} else {
3432	SKIP(1);
3433
3434	/*
3435	* SAX: PI detected.
3436	*/
3437	if ((ctxt->sax) && (!ctxt->disableSAX) &&
3438	(ctxt->sax->processingInstruction != NULL))
3439	ctxt->sax->processingInstruction(ctxt->userData,
3440	target, buf);
3441	}
3442	xmlFree(buf);
3443	} else {
3444	htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
3445	"PI is not started correctly", NULL, NULL);
3446	}
3447	ctxt->instate = state;
3448	}
3449	}
3450
3451	/**
3452	* htmlParseComment:
3453	* @ctxt: an HTML parser context
3454	*
3455	* Parse an XML (SGML) comment <!-- .... -->
3456	*
3457	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
3458	*/
3459	static void
3460	htmlParseComment(htmlParserCtxtPtr ctxt) {
3461	xmlChar *buf = NULL;
3462	int len;
3463	int size = HTML_PARSER_BUFFER_SIZE;
3464	int q, ql;
3465	int r, rl;
3466	int cur, l;
3467	int next, nl;
3468	xmlParserInputState state;
3469
3470	/*
3471	* Check that there is a comment right here.
3472	*/
3473	if ((RAW != '<') \|\| (NXT(1) != '!') \|\|
3474	(NXT(2) != '-') \|\| (NXT(3) != '-')) return;
3475
3476	state = ctxt->instate;
3477	ctxt->instate = XML_PARSER_COMMENT;
3478	SHRINK;
3479	SKIP(4);
3480	buf = (xmlChar ) xmlMallocAtomic(size sizeof(xmlChar));
3481	if (buf == NULL) {
3482	htmlErrMemory(ctxt, "buffer allocation failed\n");
3483	ctxt->instate = state;
3484	return;
3485	}
3486	len = 0;
3487	buf[len] = 0;
3488	q = CUR_CHAR(ql);
3489	if (q == 0)
3490	goto unfinished;
3491	NEXTL(ql);
3492	r = CUR_CHAR(rl);
3493	if (r == 0)
3494	goto unfinished;
3495	NEXTL(rl);
3496	cur = CUR_CHAR(l);
3497	while ((cur != 0) &&
3498	((cur != '>') \|\|
3499	(r != '-') \|\| (q != '-'))) {
3500	NEXTL(l);
3501	next = CUR_CHAR(nl);
3502	if (next == 0) {
3503	SHRINK;
3504	GROW;
3505	next = CUR_CHAR(nl);
3506	}
3507
3508	if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
3509	htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3510	"Comment incorrectly closed by '--!>'", NULL, NULL);
3511	cur = '>';
3512	break;
3513	}
3514
3515	if (len + 5 >= size) {
3516	xmlChar *tmp;
3517
3518	size *= 2;
3519	tmp = (xmlChar ) xmlRealloc(buf, size sizeof(xmlChar));
3520	if (tmp == NULL) {
3521	xmlFree(buf);
3522	htmlErrMemory(ctxt, "growing buffer failed\n");
3523	ctxt->instate = state;
3524	return;
3525	}
3526	buf = tmp;
3527	}
3528	if (IS_CHAR(q)) {
3529	COPY_BUF(ql,buf,len,q);
3530	} else {
3531	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3532	"Invalid char in comment 0x%X\n", q);
3533	}
3534
3535	q = r;
3536	ql = rl;
3537	r = cur;
3538	rl = l;
3539	cur = next;
3540	l = nl;
3541	}
3542	buf[len] = 0;
3543	if (cur == '>') {
3544	NEXT;
3545	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3546	(!ctxt->disableSAX))
3547	ctxt->sax->comment(ctxt->userData, buf);
3548	xmlFree(buf);
3549	ctxt->instate = state;
3550	return;
3551	}
3552
3553	unfinished:
3554	htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3555	"Comment not terminated \n<!--%.50s\n", buf, NULL);
3556	xmlFree(buf);
3557	}
3558
3559	/**
3560	* htmlParseCharRef:
3561	* @ctxt: an HTML parser context
3562	*
3563	* parse Reference declarations
3564	*
3565	* [66] CharRef ::= '&#' [0-9]+ ';' \|
3566	* '&#x' [0-9a-fA-F]+ ';'
3567	*
3568	* Returns the value parsed (as an int)
3569	*/
3570	int
3571	htmlParseCharRef(htmlParserCtxtPtr ctxt) {
3572	int val = 0;
3573
3574	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
3575	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3576	"htmlParseCharRef: context error\n",
3577	NULL, NULL);
3578	return(0);
3579	}
3580	if ((CUR == '&') && (NXT(1) == '#') &&
3581	((NXT(2) == 'x') \|\| NXT(2) == 'X')) {
3582	SKIP(3);
3583	while (CUR != ';') {
3584	if ((CUR >= '0') && (CUR <= '9')) {
3585	if (val < 0x110000)
3586	val = val * 16 + (CUR - '0');
3587	} else if ((CUR >= 'a') && (CUR <= 'f')) {
3588	if (val < 0x110000)
3589	val = val * 16 + (CUR - 'a') + 10;
3590	} else if ((CUR >= 'A') && (CUR <= 'F')) {
3591	if (val < 0x110000)
3592	val = val * 16 + (CUR - 'A') + 10;
3593	} else {
3594	htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF,
3595	"htmlParseCharRef: missing semicolon\n",
3596	NULL, NULL);
3597	break;
3598	}
3599	NEXT;
3600	}
3601	if (CUR == ';')
3602	NEXT;
3603	} else if ((CUR == '&') && (NXT(1) == '#')) {
3604	SKIP(2);
3605	while (CUR != ';') {
3606	if ((CUR >= '0') && (CUR <= '9')) {
3607	if (val < 0x110000)
3608	val = val * 10 + (CUR - '0');
3609	} else {
3610	htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF,
3611	"htmlParseCharRef: missing semicolon\n",
3612	NULL, NULL);
3613	break;
3614	}
3615	NEXT;
3616	}
3617	if (CUR == ';')
3618	NEXT;
3619	} else {
3620	htmlParseErr(ctxt, XML_ERR_INVALID_CHARREF,
3621	"htmlParseCharRef: invalid value\n", NULL, NULL);
3622	}
3623	/*
3624	* Check the value IS_CHAR ...
3625	*/
3626	if (IS_CHAR(val)) {
3627	return(val);
3628	} else if (val >= 0x110000) {
3629	htmlParseErr(ctxt, XML_ERR_INVALID_CHAR,
3630	"htmlParseCharRef: value too large\n", NULL, NULL);
3631	} else {
3632	htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
3633	"htmlParseCharRef: invalid xmlChar value %d\n",
3634	val);
3635	}
3636	return(0);
3637	}
3638
3639
3640	/**
3641	* htmlParseDocTypeDecl:
3642	* @ctxt: an HTML parser context
3643	*
3644	* parse a DOCTYPE declaration
3645	*
3646	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3647	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
3648	*/
3649
3650	static void
3651	htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
3652	const xmlChar *name;
3653	xmlChar *ExternalID = NULL;
3654	xmlChar *URI = NULL;
3655
3656	/*
3657	* We know that '<!DOCTYPE' has been detected.
3658	*/
3659	SKIP(9);
3660
3661	SKIP_BLANKS;
3662
3663	/*
3664	* Parse the DOCTYPE name.
3665	*/
3666	name = htmlParseName(ctxt);
3667	if (name == NULL) {
3668	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3669	"htmlParseDocTypeDecl : no DOCTYPE name !\n",
3670	NULL, NULL);
3671	}
3672	/*
3673	* Check that upper(name) == "HTML" !!!!!!!!!!!!!
3674	*/
3675
3676	SKIP_BLANKS;
3677
3678	/*
3679	* Check for SystemID and ExternalID
3680	*/
3681	URI = htmlParseExternalID(ctxt, &ExternalID);
3682	SKIP_BLANKS;
3683
3684	/*
3685	* We should be at the end of the DOCTYPE declaration.
3686	*/
3687	if (CUR != '>') {
3688	htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
3689	"DOCTYPE improperly terminated\n", NULL, NULL);
3690	/* Ignore bogus content */
3691	while ((CUR != 0) && (CUR != '>'))
3692	NEXT;
3693	}
3694	if (CUR == '>')
3695	NEXT;
3696
3697	/*
3698	* Create or update the document accordingly to the DOCTYPE
3699	*/
3700	if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
3701	(!ctxt->disableSAX))
3702	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
3703
3704	/*
3705	* Cleanup, since we don't use all those identifiers
3706	*/
3707	if (URI != NULL) xmlFree(URI);
3708	if (ExternalID != NULL) xmlFree(ExternalID);
3709	}
3710
3711	/**
3712	* htmlParseAttribute:
3713	* @ctxt: an HTML parser context
3714	* @value: a xmlChar ** used to store the value of the attribute
3715	*
3716	* parse an attribute
3717	*
3718	* [41] Attribute ::= Name Eq AttValue
3719	*
3720	* [25] Eq ::= S? '=' S?
3721	*
3722	* With namespace:
3723	*
3724	* [NS 11] Attribute ::= QName Eq AttValue
3725	*
3726	* Also the case QName == xmlns:??? is handled independently as a namespace
3727	* definition.
3728	*
3729	* Returns the attribute name, and the value in *value.
3730	*/
3731
3732	static const xmlChar *
3733	htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
3734	const xmlChar *name;
3735	xmlChar *val = NULL;
3736
3737	*value = NULL;
3738	name = htmlParseHTMLName(ctxt);
3739	if (name == NULL) {
3740	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3741	"error parsing attribute name\n", NULL, NULL);
3742	return(NULL);
3743	}
3744
3745	/*
3746	* read the value
3747	*/
3748	SKIP_BLANKS;
3749	if (CUR == '=') {
3750	NEXT;
3751	SKIP_BLANKS;
3752	val = htmlParseAttValue(ctxt);
3753	}
3754
3755	*value = val;
3756	return(name);
3757	}
3758
3759	/**
3760	* htmlCheckEncodingDirect:
3761	* @ctxt: an HTML parser context
3762	* @attvalue: the attribute value
3763	*
3764	* Checks an attribute value to detect
3765	* the encoding
3766	* If a new encoding is detected the parser is switched to decode
3767	* it and pass UTF8
3768	*/
3769	static void
3770	htmlCheckEncodingDirect(htmlParserCtxtPtr ctxt, const xmlChar *encoding) {
3771
3772	if ((ctxt == NULL) \|\| (encoding == NULL) \|\|
3773	(ctxt->options & HTML_PARSE_IGNORE_ENC))
3774	return;
3775
3776	/* do not change encoding */
3777	if (ctxt->input->encoding != NULL)
3778	return;
3779
3780	if (encoding != NULL) {
3781	xmlCharEncoding enc;
3782	xmlCharEncodingHandlerPtr handler;
3783
3784	while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;
3785
3786	if (ctxt->input->encoding != NULL)
3787	xmlFree((xmlChar *) ctxt->input->encoding);
3788	ctxt->input->encoding = xmlStrdup(encoding);
3789
3790	enc = xmlParseCharEncoding((const char *) encoding);
3791	/*
3792	* registered set of known encodings
3793	*/
3794	if (enc != XML_CHAR_ENCODING_ERROR) {
3795	if (((enc == XML_CHAR_ENCODING_UTF16LE) \|\|
3796	(enc == XML_CHAR_ENCODING_UTF16BE) \|\|
3797	(enc == XML_CHAR_ENCODING_UCS4LE) \|\|
3798	(enc == XML_CHAR_ENCODING_UCS4BE)) &&
3799	(ctxt->input->buf != NULL) &&
3800	(ctxt->input->buf->encoder == NULL)) {
3801	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3802	"htmlCheckEncoding: wrong encoding meta\n",
3803	NULL, NULL);
3804	} else {
3805	xmlSwitchEncoding(ctxt, enc);
3806	}
3807	ctxt->charset = XML_CHAR_ENCODING_UTF8;
3808	} else {
3809	/*
3810	* fallback for unknown encodings
3811	*/
3812	handler = xmlFindCharEncodingHandler((const char *) encoding);
3813	if (handler != NULL) {
3814	xmlSwitchToEncoding(ctxt, handler);
3815	ctxt->charset = XML_CHAR_ENCODING_UTF8;
3816	} else {
3817	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
3818	"htmlCheckEncoding: unknown encoding %s\n",
3819	encoding, NULL);
3820	}
3821	}
3822
3823	if ((ctxt->input->buf != NULL) &&
3824	(ctxt->input->buf->encoder != NULL) &&
3825	(ctxt->input->buf->raw != NULL) &&
3826	(ctxt->input->buf->buffer != NULL)) {
3827	int nbchars;
3828	int processed;
3829
3830	/*
3831	* convert as much as possible to the parser reading buffer.
3832	*/
3833	processed = ctxt->input->cur - ctxt->input->base;
3834	xmlBufShrink(ctxt->input->buf->buffer, processed);
3835	nbchars = xmlCharEncInput(ctxt->input->buf, 1);
3836	xmlBufResetInput(ctxt->input->buf->buffer, ctxt->input);
3837	if (nbchars < 0) {
3838	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
3839	"htmlCheckEncoding: encoder error\n",
3840	NULL, NULL);
3841	}
3842	}
3843	}
3844	}
3845
3846	/**
3847	* htmlCheckEncoding:
3848	* @ctxt: an HTML parser context
3849	* @attvalue: the attribute value
3850	*
3851	* Checks an http-equiv attribute from a Meta tag to detect
3852	* the encoding
3853	* If a new encoding is detected the parser is switched to decode
3854	* it and pass UTF8
3855	*/
3856	static void
3857	htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
3858	const xmlChar *encoding;
3859
3860	if (!attvalue)
3861	return;
3862
3863	encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
3864	if (encoding != NULL) {
3865	encoding += 7;
3866	}
3867	/*
3868	* skip blank
3869	*/
3870	if (encoding && IS_BLANK_CH(*encoding))
3871	encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
3872	if (encoding && *encoding == '=') {
3873	encoding ++;
3874	htmlCheckEncodingDirect(ctxt, encoding);
3875	}
3876	}
3877
3878	/**
3879	* htmlCheckMeta:
3880	* @ctxt: an HTML parser context
3881	* @atts: the attributes values
3882	*
3883	* Checks an attributes from a Meta tag
3884	*/
3885	static void
3886	htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
3887	int i;
3888	const xmlChar att, value;
3889	int http = 0;
3890	const xmlChar *content = NULL;
3891
3892	if ((ctxt == NULL) \|\| (atts == NULL))
3893	return;
3894
3895	i = 0;
3896	att = atts[i++];
3897	while (att != NULL) {
3898	value = atts[i++];
3899	if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"http-equiv"))
3900	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
3901	http = 1;
3902	else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"charset")))
3903	htmlCheckEncodingDirect(ctxt, value);
3904	else if ((value != NULL) && (!xmlStrcasecmp(att, BAD_CAST"content")))
3905	content = value;
3906	att = atts[i++];
3907	}
3908	if ((http) && (content != NULL))
3909	htmlCheckEncoding(ctxt, content);
3910
3911	}
3912
3913	/**
3914	* htmlParseStartTag:
3915	* @ctxt: an HTML parser context
3916	*
3917	* parse a start of tag either for rule element or
3918	* EmptyElement. In both case we don't parse the tag closing chars.
3919	*
3920	* [40] STag ::= '<' Name (S Attribute)* S? '>'
3921	*
3922	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3923	*
3924	* With namespace:
3925	*
3926	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3927	*
3928	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
3929	*
3930	* Returns 0 in case of success, -1 in case of error and 1 if discarded
3931	*/
3932
3933	static int
3934	htmlParseStartTag(htmlParserCtxtPtr ctxt) {
3935	const xmlChar *name;
3936	const xmlChar *attname;
3937	xmlChar *attvalue;
3938	const xmlChar **atts;
3939	int nbatts = 0;
3940	int maxatts;
3941	int meta = 0;
3942	int i;
3943	int discardtag = 0;
3944
3945	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
3946	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
3947	"htmlParseStartTag: context error\n", NULL, NULL);
3948	return -1;
3949	}
3950	if (ctxt->instate == XML_PARSER_EOF)
3951	return(-1);
3952	if (CUR != '<') return -1;
3953	NEXT;
3954
3955	atts = ctxt->atts;
3956	maxatts = ctxt->maxatts;
3957
3958	GROW;
3959	name = htmlParseHTMLName(ctxt);
3960	if (name == NULL) {
3961	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
3962	"htmlParseStartTag: invalid element name\n",
3963	NULL, NULL);
3964	/* Dump the bogus tag like browsers do */
3965	while ((CUR != 0) && (CUR != '>') &&
3966	(ctxt->instate != XML_PARSER_EOF))
3967	NEXT;
3968	return -1;
3969	}
3970	if (xmlStrEqual(name, BAD_CAST"meta"))
3971	meta = 1;
3972
3973	/*
3974	* Check for auto-closure of HTML elements.
3975	*/
3976	htmlAutoClose(ctxt, name);
3977
3978	/*
3979	* Check for implied HTML elements.
3980	*/
3981	htmlCheckImplied(ctxt, name);
3982
3983	/*
3984	* Avoid html at any level > 0, head at any level != 1
3985	* or any attempt to recurse body
3986	*/
3987	if ((ctxt->nameNr > 0) && (xmlStrEqual(name, BAD_CAST"html"))) {
3988	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3989	"htmlParseStartTag: misplaced <html> tag\n",
3990	name, NULL);
3991	discardtag = 1;
3992	ctxt->depth++;
3993	}
3994	if ((ctxt->nameNr != 1) &&
3995	(xmlStrEqual(name, BAD_CAST"head"))) {
3996	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
3997	"htmlParseStartTag: misplaced <head> tag\n",
3998	name, NULL);
3999	discardtag = 1;
4000	ctxt->depth++;
4001	}
4002	if (xmlStrEqual(name, BAD_CAST"body")) {
4003	int indx;
4004	for (indx = 0;indx < ctxt->nameNr;indx++) {
4005	if (xmlStrEqual(ctxt->nameTab[indx], BAD_CAST"body")) {
4006	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4007	"htmlParseStartTag: misplaced <body> tag\n",
4008	name, NULL);
4009	discardtag = 1;
4010	ctxt->depth++;
4011	}
4012	}
4013	}
4014
4015	/*
4016	* Now parse the attributes, it ends up with the ending
4017	*
4018	* (S Attribute)* S?
4019	*/
4020	SKIP_BLANKS;
4021	while ((CUR != 0) &&
4022	(CUR != '>') &&
4023	((CUR != '/') \|\| (NXT(1) != '>'))) {
4024	GROW;
4025	attname = htmlParseAttribute(ctxt, &attvalue);
4026	if (attname != NULL) {
4027
4028	/*
4029	* Well formedness requires at most one declaration of an attribute
4030	*/
4031	for (i = 0; i < nbatts;i += 2) {
4032	if (xmlStrEqual(atts[i], attname)) {
4033	htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_REDEFINED,
4034	"Attribute %s redefined\n", attname, NULL);
4035	if (attvalue != NULL)
4036	xmlFree(attvalue);
4037	goto failed;
4038	}
4039	}
4040
4041	/*
4042	* Add the pair to atts
4043	*/
4044	if (atts == NULL) {
4045	maxatts = 22; /* allow for 10 attrs by default */
4046	atts = (const xmlChar **)
4047	xmlMalloc(maxatts * sizeof(xmlChar *));
4048	if (atts == NULL) {
4049	htmlErrMemory(ctxt, NULL);
4050	if (attvalue != NULL)
4051	xmlFree(attvalue);
4052	goto failed;
4053	}
4054	ctxt->atts = atts;
4055	ctxt->maxatts = maxatts;
4056	} else if (nbatts + 4 > maxatts) {
4057	const xmlChar **n;
4058
4059	maxatts *= 2;
4060	n = (const xmlChar *) xmlRealloc((void ) atts,
4061	maxatts * sizeof(const xmlChar *));
4062	if (n == NULL) {
4063	htmlErrMemory(ctxt, NULL);
4064	if (attvalue != NULL)
4065	xmlFree(attvalue);
4066	goto failed;
4067	}
4068	atts = n;
4069	ctxt->atts = atts;
4070	ctxt->maxatts = maxatts;
4071	}
4072	atts[nbatts++] = attname;
4073	atts[nbatts++] = attvalue;
4074	atts[nbatts] = NULL;
4075	atts[nbatts + 1] = NULL;
4076	}
4077	else {
4078	if (attvalue != NULL)
4079	xmlFree(attvalue);
4080	/* Dump the bogus attribute string up to the next blank or
4081	* the end of the tag. */
4082	while ((CUR != 0) &&
4083	!(IS_BLANK_CH(CUR)) && (CUR != '>') &&
4084	((CUR != '/') \|\| (NXT(1) != '>')))
4085	NEXT;
4086	}
4087
4088	failed:
4089	SKIP_BLANKS;
4090	}
4091
4092	/*
4093	* Handle specific association to the META tag
4094	*/
4095	if (meta && (nbatts != 0))
4096	htmlCheckMeta(ctxt, atts);
4097
4098	/*
4099	* SAX: Start of Element !
4100	*/
4101	if (!discardtag) {
4102	htmlnamePush(ctxt, name);
4103	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
4104	if (nbatts != 0)
4105	ctxt->sax->startElement(ctxt->userData, name, atts);
4106	else
4107	ctxt->sax->startElement(ctxt->userData, name, NULL);
4108	}
4109	}
4110
4111	if (atts != NULL) {
4112	for (i = 1;i < nbatts;i += 2) {
4113	if (atts[i] != NULL)
4114	xmlFree((xmlChar *) atts[i]);
4115	}
4116	}
4117
4118	return(discardtag);
4119	}
4120
4121	/**
4122	* htmlParseEndTag:
4123	* @ctxt: an HTML parser context
4124	*
4125	* parse an end of tag
4126	*
4127	* [42] ETag ::= '</' Name S? '>'
4128	*
4129	* With namespace
4130	*
4131	* [NS 9] ETag ::= '</' QName S? '>'
4132	*
4133	* Returns 1 if the current level should be closed.
4134	*/
4135
4136	static int
4137	htmlParseEndTag(htmlParserCtxtPtr ctxt)
4138	{
4139	const xmlChar *name;
4140	const xmlChar *oldname;
4141	int i, ret;
4142
4143	if ((CUR != '<') \|\| (NXT(1) != '/')) {
4144	htmlParseErr(ctxt, XML_ERR_LTSLASH_REQUIRED,
4145	"htmlParseEndTag: '</' not found\n", NULL, NULL);
4146	return (0);
4147	}
4148	SKIP(2);
4149
4150	name = htmlParseHTMLName(ctxt);
4151	if (name == NULL)
4152	return (0);
4153	/*
4154	* We should definitely be at the ending "S? '>'" part
4155	*/
4156	SKIP_BLANKS;
4157	if (CUR != '>') {
4158	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4159	"End tag : expected '>'\n", NULL, NULL);
4160	/* Skip to next '>' */
4161	while ((CUR != 0) && (CUR != '>'))
4162	NEXT;
4163	}
4164	if (CUR == '>')
4165	NEXT;
4166
4167	/*
4168	* if we ignored misplaced tags in htmlParseStartTag don't pop them
4169	* out now.
4170	*/
4171	if ((ctxt->depth > 0) &&
4172	(xmlStrEqual(name, BAD_CAST "html") \|\|
4173	xmlStrEqual(name, BAD_CAST "body") \|\|
4174	xmlStrEqual(name, BAD_CAST "head"))) {
4175	ctxt->depth--;
4176	return (0);
4177	}
4178
4179	/*
4180	* If the name read is not one of the element in the parsing stack
4181	* then return, it's just an error.
4182	*/
4183	for (i = (ctxt->nameNr - 1); i >= 0; i--) {
4184	if (xmlStrEqual(name, ctxt->nameTab[i]))
4185	break;
4186	}
4187	if (i < 0) {
4188	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4189	"Unexpected end tag : %s\n", name, NULL);
4190	return (0);
4191	}
4192
4193
4194	/*
4195	* Check for auto-closure of HTML elements.
4196	*/
4197
4198	htmlAutoCloseOnClose(ctxt, name);
4199
4200	/*
4201	* Well formedness constraints, opening and closing must match.
4202	* With the exception that the autoclose may have popped stuff out
4203	* of the stack.
4204	*/
4205	if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) {
4206	htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
4207	"Opening and ending tag mismatch: %s and %s\n",
4208	name, ctxt->name);
4209	}
4210
4211	/*
4212	* SAX: End of Tag
4213	*/
4214	oldname = ctxt->name;
4215	if ((oldname != NULL) && (xmlStrEqual(oldname, name))) {
4216	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4217	ctxt->sax->endElement(ctxt->userData, name);
4218	htmlNodeInfoPop(ctxt);
4219	htmlnamePop(ctxt);
4220	ret = 1;
4221	} else {
4222	ret = 0;
4223	}
4224
4225	return (ret);
4226	}
4227
4228
4229	/**
4230	* htmlParseReference:
4231	* @ctxt: an HTML parser context
4232	*
4233	* parse and handle entity references in content,
4234	* this will end-up in a call to character() since this is either a
4235	* CharRef, or a predefined entity.
4236	*/
4237	static void
4238	htmlParseReference(htmlParserCtxtPtr ctxt) {
4239	const htmlEntityDesc * ent;
4240	xmlChar out[6];
4241	const xmlChar *name;
4242	if (CUR != '&') return;
4243
4244	if (NXT(1) == '#') {
4245	unsigned int c;
4246	int bits, i = 0;
4247
4248	c = htmlParseCharRef(ctxt);
4249	if (c == 0)
4250	return;
4251
4252	if (c < 0x80) { out[i++]= c; bits= -6; }
4253	else if (c < 0x800) { out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
4254	else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
4255	else { out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
4256
4257	for ( ; bits >= 0; bits-= 6) {
4258	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
4259	}
4260	out[i] = 0;
4261
4262	htmlCheckParagraph(ctxt);
4263	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4264	ctxt->sax->characters(ctxt->userData, out, i);
4265	} else {
4266	ent = htmlParseEntityRef(ctxt, &name);
4267	if (name == NULL) {
4268	htmlCheckParagraph(ctxt);
4269	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4270	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4271	return;
4272	}
4273	if ((ent == NULL) \|\| !(ent->value > 0)) {
4274	htmlCheckParagraph(ctxt);
4275	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
4276	ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
4277	ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
4278	/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
4279	}
4280	} else {
4281	unsigned int c;
4282	int bits, i = 0;
4283
4284	c = ent->value;
4285	if (c < 0x80)
4286	{ out[i++]= c; bits= -6; }
4287	else if (c < 0x800)
4288	{ out[i++]=((c >> 6) & 0x1F) \| 0xC0; bits= 0; }
4289	else if (c < 0x10000)
4290	{ out[i++]=((c >> 12) & 0x0F) \| 0xE0; bits= 6; }
4291	else
4292	{ out[i++]=((c >> 18) & 0x07) \| 0xF0; bits= 12; }
4293
4294	for ( ; bits >= 0; bits-= 6) {
4295	out[i++]= ((c >> bits) & 0x3F) \| 0x80;
4296	}
4297	out[i] = 0;
4298
4299	htmlCheckParagraph(ctxt);
4300	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4301	ctxt->sax->characters(ctxt->userData, out, i);
4302	}
4303	}
4304	}
4305
4306	/**
4307	* htmlParseContent:
4308	* @ctxt: an HTML parser context
4309	*
4310	* Parse a content: comment, sub-element, reference or text.
4311	* Kept for compatibility with old code
4312	*/
4313
4314	static void
4315	htmlParseContent(htmlParserCtxtPtr ctxt) {
4316	xmlChar *currentNode;
4317	int depth;
4318	const xmlChar *name;
4319
4320	currentNode = xmlStrdup(ctxt->name);
4321	depth = ctxt->nameNr;
4322	while (1) {
4323	GROW;
4324
4325	if (ctxt->instate == XML_PARSER_EOF)
4326	break;
4327
4328	/*
4329	* Our tag or one of it's parent or children is ending.
4330	*/
4331	if ((CUR == '<') && (NXT(1) == '/')) {
4332	if (htmlParseEndTag(ctxt) &&
4333	((currentNode != NULL) \|\| (ctxt->nameNr == 0))) {
4334	if (currentNode != NULL)
4335	xmlFree(currentNode);
4336	return;
4337	}
4338	continue; /* while */
4339	}
4340
4341	else if ((CUR == '<') &&
4342	((IS_ASCII_LETTER(NXT(1))) \|\|
4343	(NXT(1) == '_') \|\| (NXT(1) == ':'))) {
4344	name = htmlParseHTMLName_nonInvasive(ctxt);
4345	if (name == NULL) {
4346	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4347	"htmlParseStartTag: invalid element name\n",
4348	NULL, NULL);
4349	/* Dump the bogus tag like browsers do */
4350	while ((CUR != 0) && (CUR != '>'))
4351	NEXT;
4352
4353	if (currentNode != NULL)
4354	xmlFree(currentNode);
4355	return;
4356	}
4357
4358	if (ctxt->name != NULL) {
4359	if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4360	htmlAutoClose(ctxt, name);
4361	continue;
4362	}
4363	}
4364	}
4365
4366	/*
4367	* Has this node been popped out during parsing of
4368	* the next element
4369	*/
4370	if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4371	(!xmlStrEqual(currentNode, ctxt->name)))
4372	{
4373	if (currentNode != NULL) xmlFree(currentNode);
4374	return;
4375	}
4376
4377	if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) \|\|
4378	(xmlStrEqual(currentNode, BAD_CAST"style")))) {
4379	/*
4380	* Handle SCRIPT/STYLE separately
4381	*/
4382	htmlParseScript(ctxt);
4383	} else {
4384	/*
4385	* Sometimes DOCTYPE arrives in the middle of the document
4386	*/
4387	if ((CUR == '<') && (NXT(1) == '!') &&
4388	(UPP(2) == 'D') && (UPP(3) == 'O') &&
4389	(UPP(4) == 'C') && (UPP(5) == 'T') &&
4390	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
4391	(UPP(8) == 'E')) {
4392	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4393	"Misplaced DOCTYPE declaration\n",
4394	BAD_CAST "DOCTYPE" , NULL);
4395	htmlParseDocTypeDecl(ctxt);
4396	}
4397
4398	/*
4399	* First case : a comment
4400	*/
4401	if ((CUR == '<') && (NXT(1) == '!') &&
4402	(NXT(2) == '-') && (NXT(3) == '-')) {
4403	htmlParseComment(ctxt);
4404	}
4405
4406	/*
4407	* Second case : a Processing Instruction.
4408	*/
4409	else if ((CUR == '<') && (NXT(1) == '?')) {
4410	htmlParsePI(ctxt);
4411	}
4412
4413	/*
4414	* Third case : a sub-element.
4415	*/
4416	else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
4417	htmlParseElement(ctxt);
4418	}
4419	else if (CUR == '<') {
4420	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4421	(ctxt->sax->characters != NULL))
4422	ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
4423	NEXT;
4424	}
4425
4426	/*
4427	* Fourth case : a reference. If if has not been resolved,
4428	* parsing returns it's Name, create the node
4429	*/
4430	else if (CUR == '&') {
4431	htmlParseReference(ctxt);
4432	}
4433
4434	/*
4435	* Fifth case : end of the resource
4436	*/
4437	else if (CUR == 0) {
4438	htmlAutoCloseOnEnd(ctxt);
4439	break;
4440	}
4441
4442	/*
4443	* Last case, text. Note that References are handled directly.
4444	*/
4445	else {
4446	htmlParseCharData(ctxt);
4447	}
4448	}
4449	GROW;
4450	}
4451	if (currentNode != NULL) xmlFree(currentNode);
4452	}
4453
4454	/**
4455	* htmlParseElement:
4456	* @ctxt: an HTML parser context
4457	*
4458	* parse an HTML element, this is highly recursive
4459	* this is kept for compatibility with previous code versions
4460	*
4461	* [39] element ::= EmptyElemTag \| STag content ETag
4462	*
4463	* [41] Attribute ::= Name Eq AttValue
4464	*/
4465
4466	void
4467	htmlParseElement(htmlParserCtxtPtr ctxt) {
4468	const xmlChar *name;
4469	xmlChar *currentNode = NULL;
4470	const htmlElemDesc * info;
4471	htmlParserNodeInfo node_info;
4472	int failed;
4473	int depth;
4474	const xmlChar *oldptr;
4475
4476	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4477	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4478	"htmlParseElement: context error\n", NULL, NULL);
4479	return;
4480	}
4481
4482	if (ctxt->instate == XML_PARSER_EOF)
4483	return;
4484
4485	/* Capture start position */
4486	if (ctxt->record_info) {
4487	node_info.begin_pos = ctxt->input->consumed +
4488	(CUR_PTR - ctxt->input->base);
4489	node_info.begin_line = ctxt->input->line;
4490	}
4491
4492	failed = htmlParseStartTag(ctxt);
4493	name = ctxt->name;
4494	if ((failed == -1) \|\| (name == NULL)) {
4495	if (CUR == '>')
4496	NEXT;
4497	return;
4498	}
4499
4500	/*
4501	* Lookup the info for that element.
4502	*/
4503	info = htmlTagLookup(name);
4504	if (info == NULL) {
4505	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4506	"Tag %s invalid\n", name, NULL);
4507	}
4508
4509	/*
4510	* Check for an Empty Element labeled the XML/SGML way
4511	*/
4512	if ((CUR == '/') && (NXT(1) == '>')) {
4513	SKIP(2);
4514	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4515	ctxt->sax->endElement(ctxt->userData, name);
4516	htmlnamePop(ctxt);
4517	return;
4518	}
4519
4520	if (CUR == '>') {
4521	NEXT;
4522	} else {
4523	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4524	"Couldn't find end of Start Tag %s\n", name, NULL);
4525
4526	/*
4527	* end of parsing of this node.
4528	*/
4529	if (xmlStrEqual(name, ctxt->name)) {
4530	nodePop(ctxt);
4531	htmlnamePop(ctxt);
4532	}
4533
4534	/*
4535	* Capture end position and add node
4536	*/
4537	if (ctxt->record_info) {
4538	node_info.end_pos = ctxt->input->consumed +
4539	(CUR_PTR - ctxt->input->base);
4540	node_info.end_line = ctxt->input->line;
4541	node_info.node = ctxt->node;
4542	xmlParserAddNodeInfo(ctxt, &node_info);
4543	}
4544	return;
4545	}
4546
4547	/*
4548	* Check for an Empty Element from DTD definition
4549	*/
4550	if ((info != NULL) && (info->empty)) {
4551	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4552	ctxt->sax->endElement(ctxt->userData, name);
4553	htmlnamePop(ctxt);
4554	return;
4555	}
4556
4557	/*
4558	* Parse the content of the element:
4559	*/
4560	currentNode = xmlStrdup(ctxt->name);
4561	depth = ctxt->nameNr;
4562	while (CUR != 0) {
4563	oldptr = ctxt->input->cur;
4564	htmlParseContent(ctxt);
4565	if (oldptr==ctxt->input->cur) break;
4566	if (ctxt->nameNr < depth) break;
4567	}
4568
4569	/*
4570	* Capture end position and add node
4571	*/
4572	if ( currentNode != NULL && ctxt->record_info ) {
4573	node_info.end_pos = ctxt->input->consumed +
4574	(CUR_PTR - ctxt->input->base);
4575	node_info.end_line = ctxt->input->line;
4576	node_info.node = ctxt->node;
4577	xmlParserAddNodeInfo(ctxt, &node_info);
4578	}
4579	if (CUR == 0) {
4580	htmlAutoCloseOnEnd(ctxt);
4581	}
4582
4583	if (currentNode != NULL)
4584	xmlFree(currentNode);
4585	}
4586
4587	static void
4588	htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
4589	/*
4590	* Capture end position and add node
4591	*/
4592	if ( ctxt->node != NULL && ctxt->record_info ) {
4593	ctxt->nodeInfo->end_pos = ctxt->input->consumed +
4594	(CUR_PTR - ctxt->input->base);
4595	ctxt->nodeInfo->end_line = ctxt->input->line;
4596	ctxt->nodeInfo->node = ctxt->node;
4597	xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
4598	htmlNodeInfoPop(ctxt);
4599	}
4600	if (CUR == 0) {
4601	htmlAutoCloseOnEnd(ctxt);
4602	}
4603	}
4604
4605	/**
4606	* htmlParseElementInternal:
4607	* @ctxt: an HTML parser context
4608	*
4609	* parse an HTML element, new version, non recursive
4610	*
4611	* [39] element ::= EmptyElemTag \| STag content ETag
4612	*
4613	* [41] Attribute ::= Name Eq AttValue
4614	*/
4615
4616	static void
4617	htmlParseElementInternal(htmlParserCtxtPtr ctxt) {
4618	const xmlChar *name;
4619	const htmlElemDesc * info;
4620	htmlParserNodeInfo node_info = { NULL, 0, 0, 0, 0 };
4621	int failed;
4622
4623	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4624	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4625	"htmlParseElementInternal: context error\n", NULL, NULL);
4626	return;
4627	}
4628
4629	if (ctxt->instate == XML_PARSER_EOF)
4630	return;
4631
4632	/* Capture start position */
4633	if (ctxt->record_info) {
4634	node_info.begin_pos = ctxt->input->consumed +
4635	(CUR_PTR - ctxt->input->base);
4636	node_info.begin_line = ctxt->input->line;
4637	}
4638
4639	failed = htmlParseStartTag(ctxt);
4640	name = ctxt->name;
4641	if ((failed == -1) \|\| (name == NULL)) {
4642	if (CUR == '>')
4643	NEXT;
4644	return;
4645	}
4646
4647	/*
4648	* Lookup the info for that element.
4649	*/
4650	info = htmlTagLookup(name);
4651	if (info == NULL) {
4652	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
4653	"Tag %s invalid\n", name, NULL);
4654	}
4655
4656	/*
4657	* Check for an Empty Element labeled the XML/SGML way
4658	*/
4659	if ((CUR == '/') && (NXT(1) == '>')) {
4660	SKIP(2);
4661	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4662	ctxt->sax->endElement(ctxt->userData, name);
4663	htmlnamePop(ctxt);
4664	return;
4665	}
4666
4667	if (CUR == '>') {
4668	NEXT;
4669	} else {
4670	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
4671	"Couldn't find end of Start Tag %s\n", name, NULL);
4672
4673	/*
4674	* end of parsing of this node.
4675	*/
4676	if (xmlStrEqual(name, ctxt->name)) {
4677	nodePop(ctxt);
4678	htmlnamePop(ctxt);
4679	}
4680
4681	if (ctxt->record_info)
4682	htmlNodeInfoPush(ctxt, &node_info);
4683	htmlParserFinishElementParsing(ctxt);
4684	return;
4685	}
4686
4687	/*
4688	* Check for an Empty Element from DTD definition
4689	*/
4690	if ((info != NULL) && (info->empty)) {
4691	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
4692	ctxt->sax->endElement(ctxt->userData, name);
4693	htmlnamePop(ctxt);
4694	return;
4695	}
4696
4697	if (ctxt->record_info)
4698	htmlNodeInfoPush(ctxt, &node_info);
4699	}
4700
4701	/**
4702	* htmlParseContentInternal:
4703	* @ctxt: an HTML parser context
4704	*
4705	* Parse a content: comment, sub-element, reference or text.
4706	* New version for non recursive htmlParseElementInternal
4707	*/
4708
4709	static void
4710	htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
4711	xmlChar *currentNode;
4712	int depth;
4713	const xmlChar *name;
4714
4715	currentNode = xmlStrdup(ctxt->name);
4716	depth = ctxt->nameNr;
4717	while (1) {
4718	GROW;
4719
4720	if (ctxt->instate == XML_PARSER_EOF)
4721	break;
4722
4723	/*
4724	* Our tag or one of it's parent or children is ending.
4725	*/
4726	if ((CUR == '<') && (NXT(1) == '/')) {
4727	if (htmlParseEndTag(ctxt) &&
4728	((currentNode != NULL) \|\| (ctxt->nameNr == 0))) {
4729	if (currentNode != NULL)
4730	xmlFree(currentNode);
4731
4732	currentNode = xmlStrdup(ctxt->name);
4733	depth = ctxt->nameNr;
4734	}
4735	continue; /* while */
4736	}
4737
4738	else if ((CUR == '<') &&
4739	((IS_ASCII_LETTER(NXT(1))) \|\|
4740	(NXT(1) == '_') \|\| (NXT(1) == ':'))) {
4741	name = htmlParseHTMLName_nonInvasive(ctxt);
4742	if (name == NULL) {
4743	htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
4744	"htmlParseStartTag: invalid element name\n",
4745	NULL, NULL);
4746	/* Dump the bogus tag like browsers do */
4747	while ((CUR == 0) && (CUR != '>'))
4748	NEXT;
4749
4750	htmlParserFinishElementParsing(ctxt);
4751	if (currentNode != NULL)
4752	xmlFree(currentNode);
4753
4754	currentNode = xmlStrdup(ctxt->name);
4755	depth = ctxt->nameNr;
4756	continue;
4757	}
4758
4759	if (ctxt->name != NULL) {
4760	if (htmlCheckAutoClose(name, ctxt->name) == 1) {
4761	htmlAutoClose(ctxt, name);
4762	continue;
4763	}
4764	}
4765	}
4766
4767	/*
4768	* Has this node been popped out during parsing of
4769	* the next element
4770	*/
4771	if ((ctxt->nameNr > 0) && (depth >= ctxt->nameNr) &&
4772	(!xmlStrEqual(currentNode, ctxt->name)))
4773	{
4774	htmlParserFinishElementParsing(ctxt);
4775	if (currentNode != NULL) xmlFree(currentNode);
4776
4777	currentNode = xmlStrdup(ctxt->name);
4778	depth = ctxt->nameNr;
4779	continue;
4780	}
4781
4782	if ((CUR != 0) && ((xmlStrEqual(currentNode, BAD_CAST"script")) \|\|
4783	(xmlStrEqual(currentNode, BAD_CAST"style")))) {
4784	/*
4785	* Handle SCRIPT/STYLE separately
4786	*/
4787	htmlParseScript(ctxt);
4788	} else {
4789	/*
4790	* Sometimes DOCTYPE arrives in the middle of the document
4791	*/
4792	if ((CUR == '<') && (NXT(1) == '!') &&
4793	(UPP(2) == 'D') && (UPP(3) == 'O') &&
4794	(UPP(4) == 'C') && (UPP(5) == 'T') &&
4795	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
4796	(UPP(8) == 'E')) {
4797	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
4798	"Misplaced DOCTYPE declaration\n",
4799	BAD_CAST "DOCTYPE" , NULL);
4800	htmlParseDocTypeDecl(ctxt);
4801	}
4802
4803	/*
4804	* First case : a comment
4805	*/
4806	if ((CUR == '<') && (NXT(1) == '!') &&
4807	(NXT(2) == '-') && (NXT(3) == '-')) {
4808	htmlParseComment(ctxt);
4809	}
4810
4811	/*
4812	* Second case : a Processing Instruction.
4813	*/
4814	else if ((CUR == '<') && (NXT(1) == '?')) {
4815	htmlParsePI(ctxt);
4816	}
4817
4818	/*
4819	* Third case : a sub-element.
4820	*/
4821	else if ((CUR == '<') && IS_ASCII_LETTER(NXT(1))) {
4822	htmlParseElementInternal(ctxt);
4823	if (currentNode != NULL) xmlFree(currentNode);
4824
4825	currentNode = xmlStrdup(ctxt->name);
4826	depth = ctxt->nameNr;
4827	}
4828	else if (CUR == '<') {
4829	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4830	(ctxt->sax->characters != NULL))
4831	ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
4832	NEXT;
4833	}
4834
4835	/*
4836	* Fourth case : a reference. If if has not been resolved,
4837	* parsing returns it's Name, create the node
4838	*/
4839	else if (CUR == '&') {
4840	htmlParseReference(ctxt);
4841	}
4842
4843	/*
4844	* Fifth case : end of the resource
4845	*/
4846	else if (CUR == 0) {
4847	htmlAutoCloseOnEnd(ctxt);
4848	break;
4849	}
4850
4851	/*
4852	* Last case, text. Note that References are handled directly.
4853	*/
4854	else {
4855	htmlParseCharData(ctxt);
4856	}
4857	}
4858	GROW;
4859	}
4860	if (currentNode != NULL) xmlFree(currentNode);
4861	}
4862
4863	/**
4864	* htmlParseContent:
4865	* @ctxt: an HTML parser context
4866	*
4867	* Parse a content: comment, sub-element, reference or text.
4868	* This is the entry point when called from parser.c
4869	*/
4870
4871	void
4872	__htmlParseContent(void *ctxt) {
4873	if (ctxt != NULL)
4874	htmlParseContentInternal((htmlParserCtxtPtr) ctxt);
4875	}
4876
4877	/**
4878	* htmlParseDocument:
4879	* @ctxt: an HTML parser context
4880	*
4881	* parse an HTML document (and build a tree if using the standard SAX
4882	* interface).
4883	*
4884	* Returns 0, -1 in case of error. the parser context is augmented
4885	* as a result of the parsing.
4886	*/
4887
4888	int
4889	htmlParseDocument(htmlParserCtxtPtr ctxt) {
4890	xmlChar start[4];
4891	xmlCharEncoding enc;
4892	xmlDtdPtr dtd;
4893
4894	xmlInitParser();
4895
4896	htmlDefaultSAXHandlerInit();
4897
4898	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
4899	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
4900	"htmlParseDocument: context error\n", NULL, NULL);
4901	return(XML_ERR_INTERNAL_ERROR);
4902	}
4903	ctxt->html = 1;
4904	ctxt->linenumbers = 1;
4905	GROW;
4906	/*
4907	* SAX: beginning of the document processing.
4908	*/
4909	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
4910	ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
4911
4912	if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
4913	((ctxt->input->end - ctxt->input->cur) >= 4)) {
4914	/*
4915	* Get the 4 first bytes and decode the charset
4916	* if enc != XML_CHAR_ENCODING_NONE
4917	* plug some encoding conversion routines.
4918	*/
4919	start[0] = RAW;
4920	start[1] = NXT(1);
4921	start[2] = NXT(2);
4922	start[3] = NXT(3);
4923	enc = xmlDetectCharEncoding(&start[0], 4);
4924	if (enc != XML_CHAR_ENCODING_NONE) {
4925	xmlSwitchEncoding(ctxt, enc);
4926	}
4927	}
4928
4929	/*
4930	* Wipe out everything which is before the first '<'
4931	*/
4932	SKIP_BLANKS;
4933	if (CUR == 0) {
4934	htmlParseErr(ctxt, XML_ERR_DOCUMENT_EMPTY,
4935	"Document is empty\n", NULL, NULL);
4936	}
4937
4938	if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
4939	ctxt->sax->startDocument(ctxt->userData);
4940
4941
4942	/*
4943	* Parse possible comments and PIs before any content
4944	*/
4945	while (((CUR == '<') && (NXT(1) == '!') &&
4946	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
4947	((CUR == '<') && (NXT(1) == '?'))) {
4948	htmlParseComment(ctxt);
4949	htmlParsePI(ctxt);
4950	SKIP_BLANKS;
4951	}
4952
4953
4954	/*
4955	* Then possibly doc type declaration(s) and more Misc
4956	* (doctypedecl Misc*)?
4957	*/
4958	if ((CUR == '<') && (NXT(1) == '!') &&
4959	(UPP(2) == 'D') && (UPP(3) == 'O') &&
4960	(UPP(4) == 'C') && (UPP(5) == 'T') &&
4961	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
4962	(UPP(8) == 'E')) {
4963	htmlParseDocTypeDecl(ctxt);
4964	}
4965	SKIP_BLANKS;
4966
4967	/*
4968	* Parse possible comments and PIs before any content
4969	*/
4970	while (((CUR == '<') && (NXT(1) == '!') &&
4971	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
4972	((CUR == '<') && (NXT(1) == '?'))) {
4973	htmlParseComment(ctxt);
4974	htmlParsePI(ctxt);
4975	SKIP_BLANKS;
4976	}
4977
4978	/*
4979	* Time to start parsing the tree itself
4980	*/
4981	htmlParseContentInternal(ctxt);
4982
4983	/*
4984	* autoclose
4985	*/
4986	if (CUR == 0)
4987	htmlAutoCloseOnEnd(ctxt);
4988
4989
4990	/*
4991	* SAX: end of the document processing.
4992	*/
4993	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
4994	ctxt->sax->endDocument(ctxt->userData);
4995
4996	if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
4997	dtd = xmlGetIntSubset(ctxt->myDoc);
4998	if (dtd == NULL)
4999	ctxt->myDoc->intSubset =
5000	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
5001	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
5002	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
5003	}
5004	if (! ctxt->wellFormed) return(-1);
5005	return(0);
5006	}
5007
5008
5009	/************************************************************************
5010	* *
5011	* Parser contexts handling *
5012	* *
5013	************************************************************************/
5014
5015	/**
5016	* htmlInitParserCtxt:
5017	* @ctxt: an HTML parser context
5018	*
5019	* Initialize a parser context
5020	*
5021	* Returns 0 in case of success and -1 in case of error
5022	*/
5023
5024	static int
5025	htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
5026	{
5027	htmlSAXHandler *sax;
5028
5029	if (ctxt == NULL) return(-1);
5030	memset(ctxt, 0, sizeof(htmlParserCtxt));
5031
5032	ctxt->dict = xmlDictCreate();
5033	if (ctxt->dict == NULL) {
5034	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
5035	return(-1);
5036	}
5037	sax = (htmlSAXHandler *) xmlMalloc(sizeof(htmlSAXHandler));
5038	if (sax == NULL) {
5039	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
5040	return(-1);
5041	}
5042	else
5043	memset(sax, 0, sizeof(htmlSAXHandler));
5044
5045	/* Allocate the Input stack */
5046	ctxt->inputTab = (htmlParserInputPtr *)
5047	xmlMalloc(5 * sizeof(htmlParserInputPtr));
5048	if (ctxt->inputTab == NULL) {
5049	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
5050	ctxt->inputNr = 0;
5051	ctxt->inputMax = 0;
5052	ctxt->input = NULL;
5053	return(-1);
5054	}
5055	ctxt->inputNr = 0;
5056	ctxt->inputMax = 5;
5057	ctxt->input = NULL;
5058	ctxt->version = NULL;
5059	ctxt->encoding = NULL;
5060	ctxt->standalone = -1;
5061	ctxt->instate = XML_PARSER_START;
5062
5063	/* Allocate the Node stack */
5064	ctxt->nodeTab = (htmlNodePtr ) xmlMalloc(10 sizeof(htmlNodePtr));
5065	if (ctxt->nodeTab == NULL) {
5066	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
5067	ctxt->nodeNr = 0;
5068	ctxt->nodeMax = 0;
5069	ctxt->node = NULL;
5070	ctxt->inputNr = 0;
5071	ctxt->inputMax = 0;
5072	ctxt->input = NULL;
5073	return(-1);
5074	}
5075	ctxt->nodeNr = 0;
5076	ctxt->nodeMax = 10;
5077	ctxt->node = NULL;
5078
5079	/* Allocate the Name stack */
5080	ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
5081	if (ctxt->nameTab == NULL) {
5082	htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
5083	ctxt->nameNr = 0;
5084	ctxt->nameMax = 0;
5085	ctxt->name = NULL;
5086	ctxt->nodeNr = 0;
5087	ctxt->nodeMax = 0;
5088	ctxt->node = NULL;
5089	ctxt->inputNr = 0;
5090	ctxt->inputMax = 0;
5091	ctxt->input = NULL;
5092	return(-1);
5093	}
5094	ctxt->nameNr = 0;
5095	ctxt->nameMax = 10;
5096	ctxt->name = NULL;
5097
5098	ctxt->nodeInfoTab = NULL;
5099	ctxt->nodeInfoNr = 0;
5100	ctxt->nodeInfoMax = 0;
5101
5102	if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
5103	else {
5104	ctxt->sax = sax;
5105	memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
5106	}
5107	ctxt->userData = ctxt;
5108	ctxt->myDoc = NULL;
5109	ctxt->wellFormed = 1;
5110	ctxt->replaceEntities = 0;
5111	ctxt->linenumbers = xmlLineNumbersDefaultValue;
5112	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
5113	ctxt->html = 1;
5114	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
5115	ctxt->vctxt.userData = ctxt;
5116	ctxt->vctxt.error = xmlParserValidityError;
5117	ctxt->vctxt.warning = xmlParserValidityWarning;
5118	ctxt->record_info = 0;
5119	ctxt->validate = 0;
5120	ctxt->checkIndex = 0;
5121	ctxt->catalogs = NULL;
5122	xmlInitNodeInfoSeq(&ctxt->node_seq);
5123	return(0);
5124	}
5125
5126	/**
5127	* htmlFreeParserCtxt:
5128	* @ctxt: an HTML parser context
5129	*
5130	* Free all the memory used by a parser context. However the parsed
5131	* document in ctxt->myDoc is not freed.
5132	*/
5133
5134	void
5135	htmlFreeParserCtxt(htmlParserCtxtPtr ctxt)
5136	{
5137	xmlFreeParserCtxt(ctxt);
5138	}
5139
5140	/**
5141	* htmlNewParserCtxt:
5142	*
5143	* Allocate and initialize a new parser context.
5144	*
5145	* Returns the htmlParserCtxtPtr or NULL in case of allocation error
5146	*/
5147
5148	htmlParserCtxtPtr
5149	htmlNewParserCtxt(void)
5150	{
5151	xmlParserCtxtPtr ctxt;
5152
5153	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
5154	if (ctxt == NULL) {
5155	htmlErrMemory(NULL, "NewParserCtxt: out of memory\n");
5156	return(NULL);
5157	}
5158	memset(ctxt, 0, sizeof(xmlParserCtxt));
5159	if (htmlInitParserCtxt(ctxt) < 0) {
5160	htmlFreeParserCtxt(ctxt);
5161	return(NULL);
5162	}
5163	return(ctxt);
5164	}
5165
5166	/**
5167	* htmlCreateMemoryParserCtxt:
5168	* @buffer: a pointer to a char array
5169	* @size: the size of the array
5170	*
5171	* Create a parser context for an HTML in-memory document.
5172	*
5173	* Returns the new parser context or NULL
5174	*/
5175	htmlParserCtxtPtr
5176	htmlCreateMemoryParserCtxt(const char *buffer, int size) {
5177	xmlParserCtxtPtr ctxt;
5178	xmlParserInputPtr input;
5179	xmlParserInputBufferPtr buf;
5180
5181	if (buffer == NULL)
5182	return(NULL);
5183	if (size <= 0)
5184	return(NULL);
5185
5186	ctxt = htmlNewParserCtxt();
5187	if (ctxt == NULL)
5188	return(NULL);
5189
5190	buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
5191	if (buf == NULL) return(NULL);
5192
5193	input = xmlNewInputStream(ctxt);
5194	if (input == NULL) {
5195	xmlFreeParserInputBuffer(buf);
5196	xmlFreeParserCtxt(ctxt);
5197	return(NULL);
5198	}
5199
5200	input->filename = NULL;
5201	input->buf = buf;
5202	xmlBufResetInput(buf->buffer, input);
5203
5204	inputPush(ctxt, input);
5205	return(ctxt);
5206	}
5207
5208	/**
5209	* htmlCreateDocParserCtxt:
5210	* @cur: a pointer to an array of xmlChar
5211	* @encoding: a free form C string describing the HTML document encoding, or NULL
5212	*
5213	* Create a parser context for an HTML document.
5214	*
5215	* TODO: check the need to add encoding handling there
5216	*
5217	* Returns the new parser context or NULL
5218	*/
5219	static htmlParserCtxtPtr
5220	htmlCreateDocParserCtxt(const xmlChar cur, const char encoding) {
5221	int len;
5222	htmlParserCtxtPtr ctxt;
5223
5224	if (cur == NULL)
5225	return(NULL);
5226	len = xmlStrlen(cur);
5227	ctxt = htmlCreateMemoryParserCtxt((char *)cur, len);
5228	if (ctxt == NULL)
5229	return(NULL);
5230
5231	if (encoding != NULL) {
5232	xmlCharEncoding enc;
5233	xmlCharEncodingHandlerPtr handler;
5234
5235	if (ctxt->input->encoding != NULL)
5236	xmlFree((xmlChar *) ctxt->input->encoding);
5237	ctxt->input->encoding = xmlStrdup((const xmlChar *) encoding);
5238
5239	enc = xmlParseCharEncoding(encoding);
5240	/*
5241	* registered set of known encodings
5242	*/
5243	if (enc != XML_CHAR_ENCODING_ERROR) {
5244	xmlSwitchEncoding(ctxt, enc);
5245	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5246	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
5247	"Unsupported encoding %s\n",
5248	(const xmlChar *) encoding, NULL);
5249	}
5250	} else {
5251	/*
5252	* fallback for unknown encodings
5253	*/
5254	handler = xmlFindCharEncodingHandler((const char *) encoding);
5255	if (handler != NULL) {
5256	xmlSwitchToEncoding(ctxt, handler);
5257	} else {
5258	htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
5259	"Unsupported encoding %s\n",
5260	(const xmlChar *) encoding, NULL);
5261	}
5262	}
5263	}
5264	return(ctxt);
5265	}
5266
5267	#ifdef LIBXML_PUSH_ENABLED
5268	/************************************************************************
5269	* *
5270	* Progressive parsing interfaces *
5271	* *
5272	************************************************************************/
5273
5274	/**
5275	* htmlParseLookupSequence:
5276	* @ctxt: an HTML parser context
5277	* @first: the first char to lookup
5278	* @next: the next char to lookup or zero
5279	* @third: the next char to lookup or zero
5280	* @ignoreattrval: skip over attribute values
5281	*
5282	* Try to find if a sequence (first, next, third) or just (first next) or
5283	* (first) is available in the input stream.
5284	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
5285	* to avoid rescanning sequences of bytes, it DOES change the state of the
5286	* parser, do not use liberally.
5287	* This is basically similar to xmlParseLookupSequence()
5288	*
5289	* Returns the index to the current parsing point if the full sequence
5290	* is available, -1 otherwise.
5291	*/
5292	static int
5293	htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
5294	xmlChar next, xmlChar third, int ignoreattrval)
5295	{
5296	int base, len;
5297	htmlParserInputPtr in;
5298	const xmlChar *buf;
5299	int invalue = 0;
5300	char valdellim = 0x0;
5301
5302	in = ctxt->input;
5303	if (in == NULL)
5304	return (-1);
5305
5306	base = in->cur - in->base;
5307	if (base < 0)
5308	return (-1);
5309
5310	if (ctxt->checkIndex > base) {
5311	base = ctxt->checkIndex;
5312	/* Abuse hasPErefs member to restore current state. */
5313	invalue = ctxt->hasPErefs & 1 ? 1 : 0;
5314	}
5315
5316	if (in->buf == NULL) {
5317	buf = in->base;
5318	len = in->length;
5319	} else {
5320	buf = xmlBufContent(in->buf->buffer);
5321	len = xmlBufUse(in->buf->buffer);
5322	}
5323
5324	/* take into account the sequence length */
5325	if (third)
5326	len -= 2;
5327	else if (next)
5328	len--;
5329	for (; base < len; base++) {
5330	if (ignoreattrval) {
5331	if (buf[base] == '"' \|\| buf[base] == '\'') {
5332	if (invalue) {
5333	if (buf[base] == valdellim) {
5334	invalue = 0;
5335	continue;
5336	}
5337	} else {
5338	valdellim = buf[base];
5339	invalue = 1;
5340	continue;
5341	}
5342	} else if (invalue) {
5343	continue;
5344	}
5345	}
5346	if (buf[base] == first) {
5347	if (third != 0) {
5348	if ((buf[base + 1] != next) \|\| (buf[base + 2] != third))
5349	continue;
5350	} else if (next != 0) {
5351	if (buf[base + 1] != next)
5352	continue;
5353	}
5354	ctxt->checkIndex = 0;
5355	#ifdef DEBUG_PUSH
5356	if (next == 0)
5357	xmlGenericError(xmlGenericErrorContext,
5358	"HPP: lookup '%c' found at %d\n",
5359	first, base);
5360	else if (third == 0)
5361	xmlGenericError(xmlGenericErrorContext,
5362	"HPP: lookup '%c%c' found at %d\n",
5363	first, next, base);
5364	else
5365	xmlGenericError(xmlGenericErrorContext,
5366	"HPP: lookup '%c%c%c' found at %d\n",
5367	first, next, third, base);
5368	#endif
5369	return (base - (in->cur - in->base));
5370	}
5371	}
5372	ctxt->checkIndex = base;
5373	/* Abuse hasPErefs member to track current state. */
5374	if (invalue)
5375	ctxt->hasPErefs \|= 1;
5376	else
5377	ctxt->hasPErefs &= ~1;
5378	#ifdef DEBUG_PUSH
5379	if (next == 0)
5380	xmlGenericError(xmlGenericErrorContext,
5381	"HPP: lookup '%c' failed\n", first);
5382	else if (third == 0)
5383	xmlGenericError(xmlGenericErrorContext,
5384	"HPP: lookup '%c%c' failed\n", first, next);
5385	else
5386	xmlGenericError(xmlGenericErrorContext,
5387	"HPP: lookup '%c%c%c' failed\n", first, next,
5388	third);
5389	#endif
5390	return (-1);
5391	}
5392
5393	/**
5394	* htmlParseLookupCommentEnd:
5395	* @ctxt: an HTML parser context
5396	*
5397	* Try to find a comment end tag in the input stream
5398	* The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
5399	* (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
5400	* This function has a side effect of (possibly) incrementing ctxt->checkIndex
5401	* to avoid rescanning sequences of bytes, it DOES change the state of the
5402	* parser, do not use liberally.
5403	* This wraps to htmlParseLookupSequence()
5404	*
5405	* Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
5406	*/
5407	static int
5408	htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
5409	{
5410	int mark = 0;
5411	int cur = CUR_PTR - BASE_PTR;
5412
5413	while (mark >= 0) {
5414	mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0);
5415	if ((mark < 0) \|\|
5416	(NXT(mark+2) == '>') \|\|
5417	((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
5418	return mark;
5419	}
5420	ctxt->checkIndex = cur + mark + 1;
5421	}
5422	return mark;
5423	}
5424
5425
5426	/**
5427	* htmlParseTryOrFinish:
5428	* @ctxt: an HTML parser context
5429	* @terminate: last chunk indicator
5430	*
5431	* Try to progress on parsing
5432	*
5433	* Returns zero if no parsing was possible
5434	*/
5435	static int
5436	htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
5437	int ret = 0;
5438	htmlParserInputPtr in;
5439	ptrdiff_t avail = 0;
5440	xmlChar cur, next;
5441
5442	htmlParserNodeInfo node_info;
5443
5444	#ifdef DEBUG_PUSH
5445	switch (ctxt->instate) {
5446	case XML_PARSER_EOF:
5447	xmlGenericError(xmlGenericErrorContext,
5448	"HPP: try EOF\n"); break;
5449	case XML_PARSER_START:
5450	xmlGenericError(xmlGenericErrorContext,
5451	"HPP: try START\n"); break;
5452	case XML_PARSER_MISC:
5453	xmlGenericError(xmlGenericErrorContext,
5454	"HPP: try MISC\n");break;
5455	case XML_PARSER_COMMENT:
5456	xmlGenericError(xmlGenericErrorContext,
5457	"HPP: try COMMENT\n");break;
5458	case XML_PARSER_PROLOG:
5459	xmlGenericError(xmlGenericErrorContext,
5460	"HPP: try PROLOG\n");break;
5461	case XML_PARSER_START_TAG:
5462	xmlGenericError(xmlGenericErrorContext,
5463	"HPP: try START_TAG\n");break;
5464	case XML_PARSER_CONTENT:
5465	xmlGenericError(xmlGenericErrorContext,
5466	"HPP: try CONTENT\n");break;
5467	case XML_PARSER_CDATA_SECTION:
5468	xmlGenericError(xmlGenericErrorContext,
5469	"HPP: try CDATA_SECTION\n");break;
5470	case XML_PARSER_END_TAG:
5471	xmlGenericError(xmlGenericErrorContext,
5472	"HPP: try END_TAG\n");break;
5473	case XML_PARSER_ENTITY_DECL:
5474	xmlGenericError(xmlGenericErrorContext,
5475	"HPP: try ENTITY_DECL\n");break;
5476	case XML_PARSER_ENTITY_VALUE:
5477	xmlGenericError(xmlGenericErrorContext,
5478	"HPP: try ENTITY_VALUE\n");break;
5479	case XML_PARSER_ATTRIBUTE_VALUE:
5480	xmlGenericError(xmlGenericErrorContext,
5481	"HPP: try ATTRIBUTE_VALUE\n");break;
5482	case XML_PARSER_DTD:
5483	xmlGenericError(xmlGenericErrorContext,
5484	"HPP: try DTD\n");break;
5485	case XML_PARSER_EPILOG:
5486	xmlGenericError(xmlGenericErrorContext,
5487	"HPP: try EPILOG\n");break;
5488	case XML_PARSER_PI:
5489	xmlGenericError(xmlGenericErrorContext,
5490	"HPP: try PI\n");break;
5491	case XML_PARSER_SYSTEM_LITERAL:
5492	xmlGenericError(xmlGenericErrorContext,
5493	"HPP: try SYSTEM_LITERAL\n");break;
5494	}
5495	#endif
5496
5497	while (1) {
5498
5499	in = ctxt->input;
5500	if (in == NULL) break;
5501	if (in->buf == NULL)
5502	avail = in->length - (in->cur - in->base);
5503	else
5504	avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
5505	(in->cur - in->base);
5506	if ((avail == 0) && (terminate)) {
5507	htmlAutoCloseOnEnd(ctxt);
5508	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
5509	/*
5510	* SAX: end of the document processing.
5511	*/
5512	ctxt->instate = XML_PARSER_EOF;
5513	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5514	ctxt->sax->endDocument(ctxt->userData);
5515	}
5516	}
5517	if (avail < 1)
5518	goto done;
5519	/*
5520	* This is done to make progress and avoid an infinite loop
5521	* if a parsing attempt was aborted by hitting a NUL byte. After
5522	* changing htmlCurrentChar, this probably isn't necessary anymore.
5523	* We should consider removing this check.
5524	*/
5525	cur = in->cur[0];
5526	if (cur == 0) {
5527	SKIP(1);
5528	continue;
5529	}
5530
5531	switch (ctxt->instate) {
5532	case XML_PARSER_EOF:
5533	/*
5534	* Document parsing is done !
5535	*/
5536	goto done;
5537	case XML_PARSER_START:
5538	/*
5539	* Very first chars read from the document flow.
5540	*/
5541	cur = in->cur[0];
5542	if (IS_BLANK_CH(cur)) {
5543	SKIP_BLANKS;
5544	if (in->buf == NULL)
5545	avail = in->length - (in->cur - in->base);
5546	else
5547	avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
5548	(in->cur - in->base);
5549	}
5550	if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
5551	ctxt->sax->setDocumentLocator(ctxt->userData,
5552	&xmlDefaultSAXLocator);
5553	if ((ctxt->sax) && (ctxt->sax->startDocument) &&
5554	(!ctxt->disableSAX))
5555	ctxt->sax->startDocument(ctxt->userData);
5556
5557	cur = in->cur[0];
5558	next = in->cur[1];
5559	if ((cur == '<') && (next == '!') &&
5560	(UPP(2) == 'D') && (UPP(3) == 'O') &&
5561	(UPP(4) == 'C') && (UPP(5) == 'T') &&
5562	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
5563	(UPP(8) == 'E')) {
5564	if ((!terminate) &&
5565	(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
5566	goto done;
5567	#ifdef DEBUG_PUSH
5568	xmlGenericError(xmlGenericErrorContext,
5569	"HPP: Parsing internal subset\n");
5570	#endif
5571	htmlParseDocTypeDecl(ctxt);
5572	ctxt->instate = XML_PARSER_PROLOG;
5573	#ifdef DEBUG_PUSH
5574	xmlGenericError(xmlGenericErrorContext,
5575	"HPP: entering PROLOG\n");
5576	#endif
5577	} else {
5578	ctxt->instate = XML_PARSER_MISC;
5579	#ifdef DEBUG_PUSH
5580	xmlGenericError(xmlGenericErrorContext,
5581	"HPP: entering MISC\n");
5582	#endif
5583	}
5584	break;
5585	case XML_PARSER_MISC:
5586	SKIP_BLANKS;
5587	if (in->buf == NULL)
5588	avail = in->length - (in->cur - in->base);
5589	else
5590	avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
5591	(in->cur - in->base);
5592	/*
5593	* no chars in buffer
5594	*/
5595	if (avail < 1)
5596	goto done;
5597	/*
5598	* not enough chars in buffer
5599	*/
5600	if (avail < 2) {
5601	if (!terminate)
5602	goto done;
5603	else
5604	next = ' ';
5605	} else {
5606	next = in->cur[1];
5607	}
5608	cur = in->cur[0];
5609	if ((cur == '<') && (next == '!') &&
5610	(in->cur[2] == '-') && (in->cur[3] == '-')) {
5611	if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5612	goto done;
5613	#ifdef DEBUG_PUSH
5614	xmlGenericError(xmlGenericErrorContext,
5615	"HPP: Parsing Comment\n");
5616	#endif
5617	htmlParseComment(ctxt);
5618	ctxt->instate = XML_PARSER_MISC;
5619	} else if ((cur == '<') && (next == '?')) {
5620	if ((!terminate) &&
5621	(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5622	goto done;
5623	#ifdef DEBUG_PUSH
5624	xmlGenericError(xmlGenericErrorContext,
5625	"HPP: Parsing PI\n");
5626	#endif
5627	htmlParsePI(ctxt);
5628	ctxt->instate = XML_PARSER_MISC;
5629	} else if ((cur == '<') && (next == '!') &&
5630	(UPP(2) == 'D') && (UPP(3) == 'O') &&
5631	(UPP(4) == 'C') && (UPP(5) == 'T') &&
5632	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
5633	(UPP(8) == 'E')) {
5634	if ((!terminate) &&
5635	(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
5636	goto done;
5637	#ifdef DEBUG_PUSH
5638	xmlGenericError(xmlGenericErrorContext,
5639	"HPP: Parsing internal subset\n");
5640	#endif
5641	htmlParseDocTypeDecl(ctxt);
5642	ctxt->instate = XML_PARSER_PROLOG;
5643	#ifdef DEBUG_PUSH
5644	xmlGenericError(xmlGenericErrorContext,
5645	"HPP: entering PROLOG\n");
5646	#endif
5647	} else if ((cur == '<') && (next == '!') &&
5648	(avail < 9)) {
5649	goto done;
5650	} else {
5651	ctxt->instate = XML_PARSER_CONTENT;
5652	#ifdef DEBUG_PUSH
5653	xmlGenericError(xmlGenericErrorContext,
5654	"HPP: entering START_TAG\n");
5655	#endif
5656	}
5657	break;
5658	case XML_PARSER_PROLOG:
5659	SKIP_BLANKS;
5660	if (in->buf == NULL)
5661	avail = in->length - (in->cur - in->base);
5662	else
5663	avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
5664	(in->cur - in->base);
5665	if (avail < 2)
5666	goto done;
5667	cur = in->cur[0];
5668	next = in->cur[1];
5669	if ((cur == '<') && (next == '!') &&
5670	(in->cur[2] == '-') && (in->cur[3] == '-')) {
5671	if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5672	goto done;
5673	#ifdef DEBUG_PUSH
5674	xmlGenericError(xmlGenericErrorContext,
5675	"HPP: Parsing Comment\n");
5676	#endif
5677	htmlParseComment(ctxt);
5678	ctxt->instate = XML_PARSER_PROLOG;
5679	} else if ((cur == '<') && (next == '?')) {
5680	if ((!terminate) &&
5681	(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5682	goto done;
5683	#ifdef DEBUG_PUSH
5684	xmlGenericError(xmlGenericErrorContext,
5685	"HPP: Parsing PI\n");
5686	#endif
5687	htmlParsePI(ctxt);
5688	ctxt->instate = XML_PARSER_PROLOG;
5689	} else if ((cur == '<') && (next == '!') &&
5690	(avail < 4)) {
5691	goto done;
5692	} else {
5693	ctxt->instate = XML_PARSER_CONTENT;
5694	#ifdef DEBUG_PUSH
5695	xmlGenericError(xmlGenericErrorContext,
5696	"HPP: entering START_TAG\n");
5697	#endif
5698	}
5699	break;
5700	case XML_PARSER_EPILOG:
5701	if (in->buf == NULL)
5702	avail = in->length - (in->cur - in->base);
5703	else
5704	avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) -
5705	(in->cur - in->base);
5706	if (avail < 1)
5707	goto done;
5708	cur = in->cur[0];
5709	if (IS_BLANK_CH(cur)) {
5710	htmlParseCharData(ctxt);
5711	goto done;
5712	}
5713	if (avail < 2)
5714	goto done;
5715	next = in->cur[1];
5716	if ((cur == '<') && (next == '!') &&
5717	(in->cur[2] == '-') && (in->cur[3] == '-')) {
5718	if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5719	goto done;
5720	#ifdef DEBUG_PUSH
5721	xmlGenericError(xmlGenericErrorContext,
5722	"HPP: Parsing Comment\n");
5723	#endif
5724	htmlParseComment(ctxt);
5725	ctxt->instate = XML_PARSER_EPILOG;
5726	} else if ((cur == '<') && (next == '?')) {
5727	if ((!terminate) &&
5728	(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5729	goto done;
5730	#ifdef DEBUG_PUSH
5731	xmlGenericError(xmlGenericErrorContext,
5732	"HPP: Parsing PI\n");
5733	#endif
5734	htmlParsePI(ctxt);
5735	ctxt->instate = XML_PARSER_EPILOG;
5736	} else if ((cur == '<') && (next == '!') &&
5737	(avail < 4)) {
5738	goto done;
5739	} else {
5740	ctxt->errNo = XML_ERR_DOCUMENT_END;
5741	ctxt->wellFormed = 0;
5742	ctxt->instate = XML_PARSER_EOF;
5743	#ifdef DEBUG_PUSH
5744	xmlGenericError(xmlGenericErrorContext,
5745	"HPP: entering EOF\n");
5746	#endif
5747	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
5748	ctxt->sax->endDocument(ctxt->userData);
5749	goto done;
5750	}
5751	break;
5752	case XML_PARSER_START_TAG: {
5753	const xmlChar *name;
5754	int failed;
5755	const htmlElemDesc * info;
5756
5757	/*
5758	* no chars in buffer
5759	*/
5760	if (avail < 1)
5761	goto done;
5762	/*
5763	* not enough chars in buffer
5764	*/
5765	if (avail < 2) {
5766	if (!terminate)
5767	goto done;
5768	else
5769	next = ' ';
5770	} else {
5771	next = in->cur[1];
5772	}
5773	cur = in->cur[0];
5774	if (cur != '<') {
5775	ctxt->instate = XML_PARSER_CONTENT;
5776	#ifdef DEBUG_PUSH
5777	xmlGenericError(xmlGenericErrorContext,
5778	"HPP: entering CONTENT\n");
5779	#endif
5780	break;
5781	}
5782	if (next == '/') {
5783	ctxt->instate = XML_PARSER_END_TAG;
5784	ctxt->checkIndex = 0;
5785	#ifdef DEBUG_PUSH
5786	xmlGenericError(xmlGenericErrorContext,
5787	"HPP: entering END_TAG\n");
5788	#endif
5789	break;
5790	}
5791	if ((!terminate) &&
5792	(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
5793	goto done;
5794
5795	/* Capture start position */
5796	if (ctxt->record_info) {
5797	node_info.begin_pos = ctxt->input->consumed +
5798	(CUR_PTR - ctxt->input->base);
5799	node_info.begin_line = ctxt->input->line;
5800	}
5801
5802
5803	failed = htmlParseStartTag(ctxt);
5804	name = ctxt->name;
5805	if ((failed == -1) \|\|
5806	(name == NULL)) {
5807	if (CUR == '>')
5808	NEXT;
5809	break;
5810	}
5811
5812	/*
5813	* Lookup the info for that element.
5814	*/
5815	info = htmlTagLookup(name);
5816	if (info == NULL) {
5817	htmlParseErr(ctxt, XML_HTML_UNKNOWN_TAG,
5818	"Tag %s invalid\n", name, NULL);
5819	}
5820
5821	/*
5822	* Check for an Empty Element labeled the XML/SGML way
5823	*/
5824	if ((CUR == '/') && (NXT(1) == '>')) {
5825	SKIP(2);
5826	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5827	ctxt->sax->endElement(ctxt->userData, name);
5828	htmlnamePop(ctxt);
5829	ctxt->instate = XML_PARSER_CONTENT;
5830	#ifdef DEBUG_PUSH
5831	xmlGenericError(xmlGenericErrorContext,
5832	"HPP: entering CONTENT\n");
5833	#endif
5834	break;
5835	}
5836
5837	if (CUR == '>') {
5838	NEXT;
5839	} else {
5840	htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
5841	"Couldn't find end of Start Tag %s\n",
5842	name, NULL);
5843
5844	/*
5845	* end of parsing of this node.
5846	*/
5847	if (xmlStrEqual(name, ctxt->name)) {
5848	nodePop(ctxt);
5849	htmlnamePop(ctxt);
5850	}
5851
5852	if (ctxt->record_info)
5853	htmlNodeInfoPush(ctxt, &node_info);
5854
5855	ctxt->instate = XML_PARSER_CONTENT;
5856	#ifdef DEBUG_PUSH
5857	xmlGenericError(xmlGenericErrorContext,
5858	"HPP: entering CONTENT\n");
5859	#endif
5860	break;
5861	}
5862
5863	/*
5864	* Check for an Empty Element from DTD definition
5865	*/
5866	if ((info != NULL) && (info->empty)) {
5867	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
5868	ctxt->sax->endElement(ctxt->userData, name);
5869	htmlnamePop(ctxt);
5870	}
5871
5872	if (ctxt->record_info)
5873	htmlNodeInfoPush(ctxt, &node_info);
5874
5875	ctxt->instate = XML_PARSER_CONTENT;
5876	#ifdef DEBUG_PUSH
5877	xmlGenericError(xmlGenericErrorContext,
5878	"HPP: entering CONTENT\n");
5879	#endif
5880	break;
5881	}
5882	case XML_PARSER_CONTENT: {
5883	xmlChar chr[2] = { 0, 0 };
5884
5885	/*
5886	* Handle preparsed entities and charRef
5887	*/
5888	if (ctxt->token != 0) {
5889	chr[0] = (xmlChar) ctxt->token;
5890	htmlCheckParagraph(ctxt);
5891	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
5892	ctxt->sax->characters(ctxt->userData, chr, 1);
5893	ctxt->token = 0;
5894	ctxt->checkIndex = 0;
5895	}
5896	if ((avail == 1) && (terminate)) {
5897	cur = in->cur[0];
5898	if ((cur != '<') && (cur != '&')) {
5899	if (ctxt->sax != NULL) {
5900	chr[0] = cur;
5901	if (IS_BLANK_CH(cur)) {
5902	if (ctxt->keepBlanks) {
5903	if (ctxt->sax->characters != NULL)
5904	ctxt->sax->characters(
5905	ctxt->userData, chr, 1);
5906	} else {
5907	if (ctxt->sax->ignorableWhitespace != NULL)
5908	ctxt->sax->ignorableWhitespace(
5909	ctxt->userData, chr, 1);
5910	}
5911	} else {
5912	htmlCheckParagraph(ctxt);
5913	if (ctxt->sax->characters != NULL)
5914	ctxt->sax->characters(
5915	ctxt->userData, chr, 1);
5916	}
5917	}
5918	ctxt->token = 0;
5919	ctxt->checkIndex = 0;
5920	in->cur++;
5921	break;
5922	}
5923	}
5924	if (avail < 2)
5925	goto done;
5926	cur = in->cur[0];
5927	next = in->cur[1];
5928	if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) \|\|
5929	(xmlStrEqual(ctxt->name, BAD_CAST"style"))) {
5930	/*
5931	* Handle SCRIPT/STYLE separately
5932	*/
5933	if (!terminate) {
5934	int idx;
5935	xmlChar val;
5936
5937	idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0);
5938	if (idx < 0)
5939	goto done;
5940	val = in->cur[idx + 2];
5941	if (val == 0) /* bad cut of input */
5942	goto done;
5943	}
5944	htmlParseScript(ctxt);
5945	if ((cur == '<') && (next == '/')) {
5946	ctxt->instate = XML_PARSER_END_TAG;
5947	ctxt->checkIndex = 0;
5948	#ifdef DEBUG_PUSH
5949	xmlGenericError(xmlGenericErrorContext,
5950	"HPP: entering END_TAG\n");
5951	#endif
5952	break;
5953	}
5954	} else {
5955	/*
5956	* Sometimes DOCTYPE arrives in the middle of the document
5957	*/
5958	if ((cur == '<') && (next == '!') &&
5959	(UPP(2) == 'D') && (UPP(3) == 'O') &&
5960	(UPP(4) == 'C') && (UPP(5) == 'T') &&
5961	(UPP(6) == 'Y') && (UPP(7) == 'P') &&
5962	(UPP(8) == 'E')) {
5963	if ((!terminate) &&
5964	(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
5965	goto done;
5966	htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
5967	"Misplaced DOCTYPE declaration\n",
5968	BAD_CAST "DOCTYPE" , NULL);
5969	htmlParseDocTypeDecl(ctxt);
5970	} else if ((cur == '<') && (next == '!') &&
5971	(in->cur[2] == '-') && (in->cur[3] == '-')) {
5972	if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
5973	goto done;
5974	#ifdef DEBUG_PUSH
5975	xmlGenericError(xmlGenericErrorContext,
5976	"HPP: Parsing Comment\n");
5977	#endif
5978	htmlParseComment(ctxt);
5979	ctxt->instate = XML_PARSER_CONTENT;
5980	} else if ((cur == '<') && (next == '?')) {
5981	if ((!terminate) &&
5982	(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
5983	goto done;
5984	#ifdef DEBUG_PUSH
5985	xmlGenericError(xmlGenericErrorContext,
5986	"HPP: Parsing PI\n");
5987	#endif
5988	htmlParsePI(ctxt);
5989	ctxt->instate = XML_PARSER_CONTENT;
5990	} else if ((cur == '<') && (next == '!') && (avail < 4)) {
5991	goto done;
5992	} else if ((cur == '<') && (next == '/')) {
5993	ctxt->instate = XML_PARSER_END_TAG;
5994	ctxt->checkIndex = 0;
5995	#ifdef DEBUG_PUSH
5996	xmlGenericError(xmlGenericErrorContext,
5997	"HPP: entering END_TAG\n");
5998	#endif
5999	break;
6000	} else if ((cur == '<') && IS_ASCII_LETTER(next)) {
6001	if ((!terminate) && (next == 0))
6002	goto done;
6003	ctxt->instate = XML_PARSER_START_TAG;
6004	ctxt->checkIndex = 0;
6005	#ifdef DEBUG_PUSH
6006	xmlGenericError(xmlGenericErrorContext,
6007	"HPP: entering START_TAG\n");
6008	#endif
6009	break;
6010	} else if (cur == '<') {
6011	if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6012	(ctxt->sax->characters != NULL))
6013	ctxt->sax->characters(ctxt->userData,
6014	BAD_CAST "<", 1);
6015	NEXT;
6016	} else {
6017	/*
6018	* check that the text sequence is complete
6019	* before handing out the data to the parser
6020	* to avoid problems with erroneous end of
6021	* data detection.
6022	*/
6023	if ((!terminate) &&
6024	(htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
6025	goto done;
6026	ctxt->checkIndex = 0;
6027	#ifdef DEBUG_PUSH
6028	xmlGenericError(xmlGenericErrorContext,
6029	"HPP: Parsing char data\n");
6030	#endif
6031	while ((ctxt->instate != XML_PARSER_EOF) &&
6032	(cur != '<') && (in->cur < in->end)) {
6033	if (cur == '&') {
6034	htmlParseReference(ctxt);
6035	} else {
6036	htmlParseCharData(ctxt);
6037	}
6038	cur = in->cur[0];
6039	}
6040	}
6041	}
6042
6043	break;
6044	}
6045	case XML_PARSER_END_TAG:
6046	if (avail < 2)
6047	goto done;
6048	if ((!terminate) &&
6049	(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
6050	goto done;
6051	htmlParseEndTag(ctxt);
6052	if (ctxt->nameNr == 0) {
6053	ctxt->instate = XML_PARSER_EPILOG;
6054	} else {
6055	ctxt->instate = XML_PARSER_CONTENT;
6056	}
6057	ctxt->checkIndex = 0;
6058	#ifdef DEBUG_PUSH
6059	xmlGenericError(xmlGenericErrorContext,
6060	"HPP: entering CONTENT\n");
6061	#endif
6062	break;
6063	case XML_PARSER_CDATA_SECTION:
6064	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6065	"HPP: internal error, state == CDATA\n",
6066	NULL, NULL);
6067	ctxt->instate = XML_PARSER_CONTENT;
6068	ctxt->checkIndex = 0;
6069	#ifdef DEBUG_PUSH
6070	xmlGenericError(xmlGenericErrorContext,
6071	"HPP: entering CONTENT\n");
6072	#endif
6073	break;
6074	case XML_PARSER_DTD:
6075	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6076	"HPP: internal error, state == DTD\n",
6077	NULL, NULL);
6078	ctxt->instate = XML_PARSER_CONTENT;
6079	ctxt->checkIndex = 0;
6080	#ifdef DEBUG_PUSH
6081	xmlGenericError(xmlGenericErrorContext,
6082	"HPP: entering CONTENT\n");
6083	#endif
6084	break;
6085	case XML_PARSER_COMMENT:
6086	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6087	"HPP: internal error, state == COMMENT\n",
6088	NULL, NULL);
6089	ctxt->instate = XML_PARSER_CONTENT;
6090	ctxt->checkIndex = 0;
6091	#ifdef DEBUG_PUSH
6092	xmlGenericError(xmlGenericErrorContext,
6093	"HPP: entering CONTENT\n");
6094	#endif
6095	break;
6096	case XML_PARSER_PI:
6097	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6098	"HPP: internal error, state == PI\n",
6099	NULL, NULL);
6100	ctxt->instate = XML_PARSER_CONTENT;
6101	ctxt->checkIndex = 0;
6102	#ifdef DEBUG_PUSH
6103	xmlGenericError(xmlGenericErrorContext,
6104	"HPP: entering CONTENT\n");
6105	#endif
6106	break;
6107	case XML_PARSER_ENTITY_DECL:
6108	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6109	"HPP: internal error, state == ENTITY_DECL\n",
6110	NULL, NULL);
6111	ctxt->instate = XML_PARSER_CONTENT;
6112	ctxt->checkIndex = 0;
6113	#ifdef DEBUG_PUSH
6114	xmlGenericError(xmlGenericErrorContext,
6115	"HPP: entering CONTENT\n");
6116	#endif
6117	break;
6118	case XML_PARSER_ENTITY_VALUE:
6119	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6120	"HPP: internal error, state == ENTITY_VALUE\n",
6121	NULL, NULL);
6122	ctxt->instate = XML_PARSER_CONTENT;
6123	ctxt->checkIndex = 0;
6124	#ifdef DEBUG_PUSH
6125	xmlGenericError(xmlGenericErrorContext,
6126	"HPP: entering DTD\n");
6127	#endif
6128	break;
6129	case XML_PARSER_ATTRIBUTE_VALUE:
6130	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6131	"HPP: internal error, state == ATTRIBUTE_VALUE\n",
6132	NULL, NULL);
6133	ctxt->instate = XML_PARSER_START_TAG;
6134	ctxt->checkIndex = 0;
6135	#ifdef DEBUG_PUSH
6136	xmlGenericError(xmlGenericErrorContext,
6137	"HPP: entering START_TAG\n");
6138	#endif
6139	break;
6140	case XML_PARSER_SYSTEM_LITERAL:
6141	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6142	"HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n",
6143	NULL, NULL);
6144	ctxt->instate = XML_PARSER_CONTENT;
6145	ctxt->checkIndex = 0;
6146	#ifdef DEBUG_PUSH
6147	xmlGenericError(xmlGenericErrorContext,
6148	"HPP: entering CONTENT\n");
6149	#endif
6150	break;
6151	case XML_PARSER_IGNORE:
6152	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6153	"HPP: internal error, state == XML_PARSER_IGNORE\n",
6154	NULL, NULL);
6155	ctxt->instate = XML_PARSER_CONTENT;
6156	ctxt->checkIndex = 0;
6157	#ifdef DEBUG_PUSH
6158	xmlGenericError(xmlGenericErrorContext,
6159	"HPP: entering CONTENT\n");
6160	#endif
6161	break;
6162	case XML_PARSER_PUBLIC_LITERAL:
6163	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6164	"HPP: internal error, state == XML_PARSER_LITERAL\n",
6165	NULL, NULL);
6166	ctxt->instate = XML_PARSER_CONTENT;
6167	ctxt->checkIndex = 0;
6168	#ifdef DEBUG_PUSH
6169	xmlGenericError(xmlGenericErrorContext,
6170	"HPP: entering CONTENT\n");
6171	#endif
6172	break;
6173
6174	}
6175	}
6176	done:
6177	if ((avail == 0) && (terminate)) {
6178	htmlAutoCloseOnEnd(ctxt);
6179	if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) {
6180	/*
6181	* SAX: end of the document processing.
6182	*/
6183	ctxt->instate = XML_PARSER_EOF;
6184	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
6185	ctxt->sax->endDocument(ctxt->userData);
6186	}
6187	}
6188	if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL) &&
6189	((terminate) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
6190	(ctxt->instate == XML_PARSER_EPILOG))) {
6191	xmlDtdPtr dtd;
6192	dtd = xmlGetIntSubset(ctxt->myDoc);
6193	if (dtd == NULL)
6194	ctxt->myDoc->intSubset =
6195	xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html",
6196	BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",
6197	BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");
6198	}
6199	#ifdef DEBUG_PUSH
6200	xmlGenericError(xmlGenericErrorContext, "HPP: done %d\n", ret);
6201	#endif
6202	return(ret);
6203	}
6204
6205	/**
6206	* htmlParseChunk:
6207	* @ctxt: an HTML parser context
6208	* @chunk: an char array
6209	* @size: the size in byte of the chunk
6210	* @terminate: last chunk indicator
6211	*
6212	* Parse a Chunk of memory
6213	*
6214	* Returns zero if no error, the xmlParserErrors otherwise.
6215	*/
6216	int
6217	htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
6218	int terminate) {
6219	if ((ctxt == NULL) \|\| (ctxt->input == NULL)) {
6220	htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
6221	"htmlParseChunk: context error\n", NULL, NULL);
6222	return(XML_ERR_INTERNAL_ERROR);
6223	}
6224	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
6225	(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
6226	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
6227	size_t cur = ctxt->input->cur - ctxt->input->base;
6228	int res;
6229
6230	res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6231	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
6232	if (res < 0) {
6233	ctxt->errNo = XML_PARSER_EOF;
6234	ctxt->disableSAX = 1;
6235	return (XML_PARSER_EOF);
6236	}
6237	#ifdef DEBUG_PUSH
6238	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6239	#endif
6240
6241	#if 0
6242	if ((terminate) \|\| (ctxt->input->buf->buffer->use > 80))
6243	htmlParseTryOrFinish(ctxt, terminate);
6244	#endif
6245	} else if (ctxt->instate != XML_PARSER_EOF) {
6246	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
6247	xmlParserInputBufferPtr in = ctxt->input->buf;
6248	if ((in->encoder != NULL) && (in->buffer != NULL) &&
6249	(in->raw != NULL)) {
6250	int nbchars;
6251	size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
6252	size_t current = ctxt->input->cur - ctxt->input->base;
6253
6254	nbchars = xmlCharEncInput(in, terminate);
6255	xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
6256	if (nbchars < 0) {
6257	htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
6258	"encoder error\n", NULL, NULL);
6259	return(XML_ERR_INVALID_ENCODING);
6260	}
6261	}
6262	}
6263	}
6264	htmlParseTryOrFinish(ctxt, terminate);
6265	if (terminate) {
6266	if ((ctxt->instate != XML_PARSER_EOF) &&
6267	(ctxt->instate != XML_PARSER_EPILOG) &&
6268	(ctxt->instate != XML_PARSER_MISC)) {
6269	ctxt->errNo = XML_ERR_DOCUMENT_END;
6270	ctxt->wellFormed = 0;
6271	}
6272	if (ctxt->instate != XML_PARSER_EOF) {
6273	if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
6274	ctxt->sax->endDocument(ctxt->userData);
6275	}
6276	ctxt->instate = XML_PARSER_EOF;
6277	}
6278	return((xmlParserErrors) ctxt->errNo);
6279	}
6280
6281	/************************************************************************
6282	* *
6283	* User entry points *
6284	* *
6285	************************************************************************/
6286
6287	/**
6288	* htmlCreatePushParserCtxt:
6289	* @sax: a SAX handler
6290	* @user_data: The user data returned on SAX callbacks
6291	* @chunk: a pointer to an array of chars
6292	* @size: number of chars in the array
6293	* @filename: an optional file name or URI
6294	* @enc: an optional encoding
6295	*
6296	* Create a parser context for using the HTML parser in push mode
6297	* The value of @filename is used for fetching external entities
6298	* and error/warning reports.
6299	*
6300	* Returns the new parser context or NULL
6301	*/
6302	htmlParserCtxtPtr
6303	htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, void *user_data,
6304	const char chunk, int size, const char filename,
6305	xmlCharEncoding enc) {
6306	htmlParserCtxtPtr ctxt;
6307	htmlParserInputPtr inputStream;
6308	xmlParserInputBufferPtr buf;
6309
6310	xmlInitParser();
6311
6312	buf = xmlAllocParserInputBuffer(enc);
6313	if (buf == NULL) return(NULL);
6314
6315	ctxt = htmlNewParserCtxt();
6316	if (ctxt == NULL) {
6317	xmlFreeParserInputBuffer(buf);
6318	return(NULL);
6319	}
6320	if(enc==XML_CHAR_ENCODING_UTF8 \|\| buf->encoder)
6321	ctxt->charset=XML_CHAR_ENCODING_UTF8;
6322	if (sax != NULL) {
6323	if (ctxt->sax != (xmlSAXHandlerPtr) &htmlDefaultSAXHandler)
6324	xmlFree(ctxt->sax);
6325	ctxt->sax = (htmlSAXHandlerPtr) xmlMalloc(sizeof(htmlSAXHandler));
6326	if (ctxt->sax == NULL) {
6327	xmlFree(buf);
6328	xmlFree(ctxt);
6329	return(NULL);
6330	}
6331	memcpy(ctxt->sax, sax, sizeof(htmlSAXHandler));
6332	if (user_data != NULL)
6333	ctxt->userData = user_data;
6334	}
6335	if (filename == NULL) {
6336	ctxt->directory = NULL;
6337	} else {
6338	ctxt->directory = xmlParserGetDirectory(filename);
6339	}
6340
6341	inputStream = htmlNewInputStream(ctxt);
6342	if (inputStream == NULL) {
6343	xmlFreeParserCtxt(ctxt);
6344	xmlFree(buf);
6345	return(NULL);
6346	}
6347
6348	if (filename == NULL)
6349	inputStream->filename = NULL;
6350	else
6351	inputStream->filename = (char *)
6352	xmlCanonicPath((const xmlChar *) filename);
6353	inputStream->buf = buf;
6354	xmlBufResetInput(buf->buffer, inputStream);
6355
6356	inputPush(ctxt, inputStream);
6357
6358	if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
6359	(ctxt->input->buf != NULL)) {
6360	size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
6361	size_t cur = ctxt->input->cur - ctxt->input->base;
6362
6363	xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
6364
6365	xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
6366	#ifdef DEBUG_PUSH
6367	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
6368	#endif
6369	}
6370	ctxt->progressive = 1;
6371
6372	return(ctxt);
6373	}
6374	#endif /* LIBXML_PUSH_ENABLED */
6375
6376	/**
6377	* htmlSAXParseDoc:
6378	* @cur: a pointer to an array of xmlChar
6379	* @encoding: a free form C string describing the HTML document encoding, or NULL
6380	* @sax: the SAX handler block
6381	* @userData: if using SAX, this pointer will be provided on callbacks.
6382	*
6383	* Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
6384	* to handle parse events. If sax is NULL, fallback to the default DOM
6385	* behavior and return a tree.
6386	*
6387	* Returns the resulting document tree unless SAX is NULL or the document is
6388	* not well formed.
6389	*/
6390
6391	htmlDocPtr
6392	htmlSAXParseDoc(const xmlChar cur, const char encoding,
6393	htmlSAXHandlerPtr sax, void *userData) {
6394	htmlDocPtr ret;
6395	htmlParserCtxtPtr ctxt;
6396
6397	xmlInitParser();
6398
6399	if (cur == NULL) return(NULL);
6400
6401
6402	ctxt = htmlCreateDocParserCtxt(cur, encoding);
6403	if (ctxt == NULL) return(NULL);
6404	if (sax != NULL) {
6405	if (ctxt->sax != NULL) xmlFree (ctxt->sax);
6406	ctxt->sax = sax;
6407	ctxt->userData = userData;
6408	}
6409
6410	htmlParseDocument(ctxt);
6411	ret = ctxt->myDoc;
6412	if (sax != NULL) {
6413	ctxt->sax = NULL;
6414	ctxt->userData = NULL;
6415	}
6416	htmlFreeParserCtxt(ctxt);
6417
6418	return(ret);
6419	}
6420
6421	/**
6422	* htmlParseDoc:
6423	* @cur: a pointer to an array of xmlChar
6424	* @encoding: a free form C string describing the HTML document encoding, or NULL
6425	*
6426	* parse an HTML in-memory document and build a tree.
6427	*
6428	* Returns the resulting document tree
6429	*/
6430
6431	htmlDocPtr
6432	htmlParseDoc(const xmlChar cur, const char encoding) {
6433	return(htmlSAXParseDoc(cur, encoding, NULL, NULL));
6434	}
6435
6436
6437	/**
6438	* htmlCreateFileParserCtxt:
6439	* @filename: the filename
6440	* @encoding: a free form C string describing the HTML document encoding, or NULL
6441	*
6442	* Create a parser context for a file content.
6443	* Automatic support for ZLIB/Compress compressed document is provided
6444	* by default if found at compile-time.
6445	*
6446	* Returns the new parser context or NULL
6447	*/
6448	htmlParserCtxtPtr
6449	htmlCreateFileParserCtxt(const char filename, const char encoding)
6450	{
6451	htmlParserCtxtPtr ctxt;
6452	htmlParserInputPtr inputStream;
6453	char *canonicFilename;
6454	/* htmlCharEncoding enc; */
6455	xmlChar content, content_line = (xmlChar *) "charset=";
6456
6457	if (filename == NULL)
6458	return(NULL);
6459
6460	ctxt = htmlNewParserCtxt();
6461	if (ctxt == NULL) {
6462	return(NULL);
6463	}
6464	canonicFilename = (char ) xmlCanonicPath((const xmlChar ) filename);
6465	if (canonicFilename == NULL) {
6466	#ifdef LIBXML_SAX1_ENABLED
6467	if (xmlDefaultSAXHandler.error != NULL) {
6468	xmlDefaultSAXHandler.error(NULL, "out of memory\n");
6469	}
6470	#endif
6471	xmlFreeParserCtxt(ctxt);
6472	return(NULL);
6473	}
6474
6475	inputStream = xmlLoadExternalEntity(canonicFilename, NULL, ctxt);
6476	xmlFree(canonicFilename);
6477	if (inputStream == NULL) {
6478	xmlFreeParserCtxt(ctxt);
6479	return(NULL);
6480	}
6481
6482	inputPush(ctxt, inputStream);
6483
6484	/* set encoding */
6485	if (encoding) {
6486	size_t l = strlen(encoding);
6487
6488	if (l < 1000) {
6489	content = xmlMallocAtomic (xmlStrlen(content_line) + l + 1);
6490	if (content) {
6491	strcpy ((char )content, (char )content_line);
6492	strcat ((char )content, (char )encoding);
6493	htmlCheckEncoding (ctxt, content);
6494	xmlFree (content);
6495	}
6496	}
6497	}
6498
6499	return(ctxt);
6500	}
6501
6502	/**
6503	* htmlSAXParseFile:
6504	* @filename: the filename
6505	* @encoding: a free form C string describing the HTML document encoding, or NULL
6506	* @sax: the SAX handler block
6507	* @userData: if using SAX, this pointer will be provided on callbacks.
6508	*
6509	* parse an HTML file and build a tree. Automatic support for ZLIB/Compress
6510	* compressed document is provided by default if found at compile-time.
6511	* It use the given SAX function block to handle the parsing callback.
6512	* If sax is NULL, fallback to the default DOM tree building routines.
6513	*
6514	* Returns the resulting document tree unless SAX is NULL or the document is
6515	* not well formed.
6516	*/
6517
6518	htmlDocPtr
6519	htmlSAXParseFile(const char filename, const char encoding, htmlSAXHandlerPtr sax,
6520	void *userData) {
6521	htmlDocPtr ret;
6522	htmlParserCtxtPtr ctxt;
6523	htmlSAXHandlerPtr oldsax = NULL;
6524
6525	xmlInitParser();
6526
6527	ctxt = htmlCreateFileParserCtxt(filename, encoding);
6528	if (ctxt == NULL) return(NULL);
6529	if (sax != NULL) {
6530	oldsax = ctxt->sax;
6531	ctxt->sax = sax;
6532	ctxt->userData = userData;
6533	}
6534
6535	htmlParseDocument(ctxt);
6536
6537	ret = ctxt->myDoc;
6538	if (sax != NULL) {
6539	ctxt->sax = oldsax;
6540	ctxt->userData = NULL;
6541	}
6542	htmlFreeParserCtxt(ctxt);
6543
6544	return(ret);
6545	}
6546
6547	/**
6548	* htmlParseFile:
6549	* @filename: the filename
6550	* @encoding: a free form C string describing the HTML document encoding, or NULL
6551	*
6552	* parse an HTML file and build a tree. Automatic support for ZLIB/Compress
6553	* compressed document is provided by default if found at compile-time.
6554	*
6555	* Returns the resulting document tree
6556	*/
6557
6558	htmlDocPtr
6559	htmlParseFile(const char filename, const char encoding) {
6560	return(htmlSAXParseFile(filename, encoding, NULL, NULL));
6561	}
6562
6563	/**
6564	* htmlHandleOmittedElem:
6565	* @val: int 0 or 1
6566	*
6567	* Set and return the previous value for handling HTML omitted tags.
6568	*
6569	* Returns the last value for 0 for no handling, 1 for auto insertion.
6570	*/
6571
6572	int
6573	htmlHandleOmittedElem(int val) {
6574	int old = htmlOmittedDefaultValue;
6575
6576	htmlOmittedDefaultValue = val;
6577	return(old);
6578	}
6579
6580	/**
6581	* htmlElementAllowedHere:
6582	* @parent: HTML parent element
6583	* @elt: HTML element
6584	*
6585	* Checks whether an HTML element may be a direct child of a parent element.
6586	* Note - doesn't check for deprecated elements
6587	*
6588	* Returns 1 if allowed; 0 otherwise.
6589	*/
6590	int
6591	htmlElementAllowedHere(const htmlElemDesc* parent, const xmlChar* elt) {
6592	const char** p ;
6593
6594	if ( ! elt \|\| ! parent \|\| ! parent->subelts )
6595	return 0 ;
6596
6597	for ( p = parent->subelts; *p; ++p )
6598	if ( !xmlStrcmp((const xmlChar )p, elt) )
6599	return 1 ;
6600
6601	return 0 ;
6602	}
6603	/**
6604	* htmlElementStatusHere:
6605	* @parent: HTML parent element
6606	* @elt: HTML element
6607	*
6608	* Checks whether an HTML element may be a direct child of a parent element.
6609	* and if so whether it is valid or deprecated.
6610	*
6611	* Returns one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID
6612	*/
6613	htmlStatus
6614	htmlElementStatusHere(const htmlElemDesc* parent, const htmlElemDesc* elt) {
6615	if ( ! parent \|\| ! elt )
6616	return HTML_INVALID ;
6617	if ( ! htmlElementAllowedHere(parent, (const xmlChar*) elt->name ) )
6618	return HTML_INVALID ;
6619
6620	return ( elt->dtd == 0 ) ? HTML_VALID : HTML_DEPRECATED ;
6621	}
6622	/**
6623	* htmlAttrAllowed:
6624	* @elt: HTML element
6625	* @attr: HTML attribute
6626	* @legacy: whether to allow deprecated attributes
6627	*
6628	* Checks whether an attribute is valid for an element
6629	* Has full knowledge of Required and Deprecated attributes
6630	*
6631	* Returns one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID
6632	*/
6633	htmlStatus
6634	htmlAttrAllowed(const htmlElemDesc* elt, const xmlChar* attr, int legacy) {
6635	const char** p ;
6636
6637	if ( !elt \|\| ! attr )
6638	return HTML_INVALID ;
6639
6640	if ( elt->attrs_req )
6641	for ( p = elt->attrs_req; *p; ++p)
6642	if ( !xmlStrcmp((const xmlChar)p, attr) )
6643	return HTML_REQUIRED ;
6644
6645	if ( elt->attrs_opt )
6646	for ( p = elt->attrs_opt; *p; ++p)
6647	if ( !xmlStrcmp((const xmlChar)p, attr) )
6648	return HTML_VALID ;
6649
6650	if ( legacy && elt->attrs_depr )
6651	for ( p = elt->attrs_depr; *p; ++p)
6652	if ( !xmlStrcmp((const xmlChar)p, attr) )
6653	return HTML_DEPRECATED ;
6654
6655	return HTML_INVALID ;
6656	}
6657	/**
6658	* htmlNodeStatus:
6659	* @node: an htmlNodePtr in a tree
6660	* @legacy: whether to allow deprecated elements (YES is faster here
6661	* for Element nodes)
6662	*
6663	* Checks whether the tree node is valid. Experimental (the author
6664	* only uses the HTML enhancements in a SAX parser)
6665	*
6666	* Return: for Element nodes, a return from htmlElementAllowedHere (if
6667	* legacy allowed) or htmlElementStatusHere (otherwise).
6668	* for Attribute nodes, a return from htmlAttrAllowed
6669	* for other nodes, HTML_NA (no checks performed)
6670	*/
6671	htmlStatus
6672	htmlNodeStatus(const htmlNodePtr node, int legacy) {
6673	if ( ! node )
6674	return HTML_INVALID ;
6675
6676	switch ( node->type ) {
6677	case XML_ELEMENT_NODE:
6678	return legacy
6679	? ( htmlElementAllowedHere (
6680	htmlTagLookup(node->parent->name) , node->name
6681	) ? HTML_VALID : HTML_INVALID )
6682	: htmlElementStatusHere(
6683	htmlTagLookup(node->parent->name) ,
6684	htmlTagLookup(node->name) )
6685	;
6686	case XML_ATTRIBUTE_NODE:
6687	return htmlAttrAllowed(
6688	htmlTagLookup(node->parent->name) , node->name, legacy) ;
6689	default: return HTML_NA ;
6690	}
6691	}
6692	/************************************************************************
6693	* *
6694	* New set (2.6.0) of simpler and more flexible APIs *
6695	* *
6696	************************************************************************/
6697	/**
6698	* DICT_FREE:
6699	* @str: a string
6700	*
6701	* Free a string if it is not owned by the "dict" dictionary in the
6702	* current scope
6703	*/
6704	#define DICT_FREE(str) \
6705	if ((str) && ((!dict) \|\| \
6706	(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
6707	xmlFree((char *)(str));
6708
6709	/**
6710	* htmlCtxtReset:
6711	* @ctxt: an HTML parser context
6712	*
6713	* Reset a parser context
6714	*/
6715	void
6716	htmlCtxtReset(htmlParserCtxtPtr ctxt)
6717	{
6718	xmlParserInputPtr input;
6719	xmlDictPtr dict;
6720
6721	if (ctxt == NULL)
6722	return;
6723
6724	xmlInitParser();
6725	dict = ctxt->dict;
6726
6727	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
6728	xmlFreeInputStream(input);
6729	}
6730	ctxt->inputNr = 0;
6731	ctxt->input = NULL;
6732
6733	ctxt->spaceNr = 0;
6734	if (ctxt->spaceTab != NULL) {
6735	ctxt->spaceTab[0] = -1;
6736	ctxt->space = &ctxt->spaceTab[0];
6737	} else {
6738	ctxt->space = NULL;
6739	}
6740
6741
6742	ctxt->nodeNr = 0;
6743	ctxt->node = NULL;
6744
6745	ctxt->nameNr = 0;
6746	ctxt->name = NULL;
6747
6748	DICT_FREE(ctxt->version);
6749	ctxt->version = NULL;
6750	DICT_FREE(ctxt->encoding);
6751	ctxt->encoding = NULL;
6752	DICT_FREE(ctxt->directory);
6753	ctxt->directory = NULL;
6754	DICT_FREE(ctxt->extSubURI);
6755	ctxt->extSubURI = NULL;
6756	DICT_FREE(ctxt->extSubSystem);
6757	ctxt->extSubSystem = NULL;
6758	if (ctxt->myDoc != NULL)
6759	xmlFreeDoc(ctxt->myDoc);
6760	ctxt->myDoc = NULL;
6761
6762	ctxt->standalone = -1;
6763	ctxt->hasExternalSubset = 0;
6764	ctxt->hasPErefs = 0;
6765	ctxt->html = 1;
6766	ctxt->external = 0;
6767	ctxt->instate = XML_PARSER_START;
6768	ctxt->token = 0;
6769
6770	ctxt->wellFormed = 1;
6771	ctxt->nsWellFormed = 1;
6772	ctxt->disableSAX = 0;
6773	ctxt->valid = 1;
6774	ctxt->vctxt.userData = ctxt;
6775	ctxt->vctxt.error = xmlParserValidityError;
6776	ctxt->vctxt.warning = xmlParserValidityWarning;
6777	ctxt->record_info = 0;
6778	ctxt->checkIndex = 0;
6779	ctxt->inSubset = 0;
6780	ctxt->errNo = XML_ERR_OK;
6781	ctxt->depth = 0;
6782	ctxt->charset = XML_CHAR_ENCODING_NONE;
6783	ctxt->catalogs = NULL;
6784	xmlInitNodeInfoSeq(&ctxt->node_seq);
6785
6786	if (ctxt->attsDefault != NULL) {
6787	xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
6788	ctxt->attsDefault = NULL;
6789	}
6790	if (ctxt->attsSpecial != NULL) {
6791	xmlHashFree(ctxt->attsSpecial, NULL);
6792	ctxt->attsSpecial = NULL;
6793	}
6794	}
6795
6796	/**
6797	* htmlCtxtUseOptions:
6798	* @ctxt: an HTML parser context
6799	* @options: a combination of htmlParserOption(s)
6800	*
6801	* Applies the options to the parser context
6802	*
6803	* Returns 0 in case of success, the set of unknown or unimplemented options
6804	* in case of error.
6805	*/
6806	int
6807	htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
6808	{
6809	if (ctxt == NULL)
6810	return(-1);
6811
6812	if (options & HTML_PARSE_NOWARNING) {
6813	ctxt->sax->warning = NULL;
6814	ctxt->vctxt.warning = NULL;
6815	options -= XML_PARSE_NOWARNING;
6816	ctxt->options \|= XML_PARSE_NOWARNING;
6817	}
6818	if (options & HTML_PARSE_NOERROR) {
6819	ctxt->sax->error = NULL;
6820	ctxt->vctxt.error = NULL;
6821	ctxt->sax->fatalError = NULL;
6822	options -= XML_PARSE_NOERROR;
6823	ctxt->options \|= XML_PARSE_NOERROR;
6824	}
6825	if (options & HTML_PARSE_PEDANTIC) {
6826	ctxt->pedantic = 1;
6827	options -= XML_PARSE_PEDANTIC;
6828	ctxt->options \|= XML_PARSE_PEDANTIC;
6829	} else
6830	ctxt->pedantic = 0;
6831	if (options & XML_PARSE_NOBLANKS) {
6832	ctxt->keepBlanks = 0;
6833	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
6834	options -= XML_PARSE_NOBLANKS;
6835	ctxt->options \|= XML_PARSE_NOBLANKS;
6836	} else
6837	ctxt->keepBlanks = 1;
6838	if (options & HTML_PARSE_RECOVER) {
6839	ctxt->recovery = 1;
6840	options -= HTML_PARSE_RECOVER;
6841	} else
6842	ctxt->recovery = 0;
6843	if (options & HTML_PARSE_COMPACT) {
6844	ctxt->options \|= HTML_PARSE_COMPACT;
6845	options -= HTML_PARSE_COMPACT;
6846	}
6847	if (options & XML_PARSE_HUGE) {
6848	ctxt->options \|= XML_PARSE_HUGE;
6849	options -= XML_PARSE_HUGE;
6850	}
6851	if (options & HTML_PARSE_NODEFDTD) {
6852	ctxt->options \|= HTML_PARSE_NODEFDTD;
6853	options -= HTML_PARSE_NODEFDTD;
6854	}
6855	if (options & HTML_PARSE_IGNORE_ENC) {
6856	ctxt->options \|= HTML_PARSE_IGNORE_ENC;
6857	options -= HTML_PARSE_IGNORE_ENC;
6858	}
6859	if (options & HTML_PARSE_NOIMPLIED) {
6860	ctxt->options \|= HTML_PARSE_NOIMPLIED;
6861	options -= HTML_PARSE_NOIMPLIED;
6862	}
6863	ctxt->dictNames = 0;
6864	return (options);
6865	}
6866
6867	/**
6868	* htmlDoRead:
6869	* @ctxt: an HTML parser context
6870	* @URL: the base URL to use for the document
6871	* @encoding: the document encoding, or NULL
6872	* @options: a combination of htmlParserOption(s)
6873	* @reuse: keep the context for reuse
6874	*
6875	* Common front-end for the htmlRead functions
6876	*
6877	* Returns the resulting document tree or NULL
6878	*/
6879	static htmlDocPtr
6880	htmlDoRead(htmlParserCtxtPtr ctxt, const char URL, const char encoding,
6881	int options, int reuse)
6882	{
6883	htmlDocPtr ret;
6884
6885	htmlCtxtUseOptions(ctxt, options);
6886	ctxt->html = 1;
6887	if (encoding != NULL) {
6888	xmlCharEncodingHandlerPtr hdlr;
6889
6890	hdlr = xmlFindCharEncodingHandler(encoding);
6891	if (hdlr != NULL) {
6892	xmlSwitchToEncoding(ctxt, hdlr);
6893	if (ctxt->input->encoding != NULL)
6894	xmlFree((xmlChar *) ctxt->input->encoding);
6895	ctxt->input->encoding = xmlStrdup((xmlChar *)encoding);
6896	}
6897	}
6898	if ((URL != NULL) && (ctxt->input != NULL) &&
6899	(ctxt->input->filename == NULL))
6900	ctxt->input->filename = (char ) xmlStrdup((const xmlChar ) URL);
6901	htmlParseDocument(ctxt);
6902	ret = ctxt->myDoc;
6903	ctxt->myDoc = NULL;
6904	if (!reuse) {
6905	if ((ctxt->dictNames) &&
6906	(ret != NULL) &&
6907	(ret->dict == ctxt->dict))
6908	ctxt->dict = NULL;
6909	xmlFreeParserCtxt(ctxt);
6910	}
6911	return (ret);
6912	}
6913
6914	/**
6915	* htmlReadDoc:
6916	* @cur: a pointer to a zero terminated string
6917	* @URL: the base URL to use for the document
6918	* @encoding: the document encoding, or NULL
6919	* @options: a combination of htmlParserOption(s)
6920	*
6921	* parse an XML in-memory document and build a tree.
6922	*
6923	* Returns the resulting document tree
6924	*/
6925	htmlDocPtr
6926	htmlReadDoc(const xmlChar * cur, const char URL, const char encoding, int options)
6927	{
6928	htmlParserCtxtPtr ctxt;
6929
6930	if (cur == NULL)
6931	return (NULL);
6932
6933	xmlInitParser();
6934	ctxt = htmlCreateDocParserCtxt(cur, NULL);
6935	if (ctxt == NULL)
6936	return (NULL);
6937	return (htmlDoRead(ctxt, URL, encoding, options, 0));
6938	}
6939
6940	/**
6941	* htmlReadFile:
6942	* @filename: a file or URL
6943	* @encoding: the document encoding, or NULL
6944	* @options: a combination of htmlParserOption(s)
6945	*
6946	* parse an XML file from the filesystem or the network.
6947	*
6948	* Returns the resulting document tree
6949	*/
6950	htmlDocPtr
6951	htmlReadFile(const char filename, const char encoding, int options)
6952	{
6953	htmlParserCtxtPtr ctxt;
6954
6955	xmlInitParser();
6956	ctxt = htmlCreateFileParserCtxt(filename, encoding);
6957	if (ctxt == NULL)
6958	return (NULL);
6959	return (htmlDoRead(ctxt, NULL, NULL, options, 0));
6960	}
6961
6962	/**
6963	* htmlReadMemory:
6964	* @buffer: a pointer to a char array
6965	* @size: the size of the array
6966	* @URL: the base URL to use for the document
6967	* @encoding: the document encoding, or NULL
6968	* @options: a combination of htmlParserOption(s)
6969	*
6970	* parse an XML in-memory document and build a tree.
6971	*
6972	* Returns the resulting document tree
6973	*/
6974	htmlDocPtr
6975	htmlReadMemory(const char buffer, int size, const char URL, const char *encoding, int options)
6976	{
6977	htmlParserCtxtPtr ctxt;
6978
6979	xmlInitParser();
6980	ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6981	if (ctxt == NULL)
6982	return (NULL);
6983	htmlDefaultSAXHandlerInit();
6984	if (ctxt->sax != NULL)
6985	memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
6986	return (htmlDoRead(ctxt, URL, encoding, options, 0));
6987	}
6988
6989	/**
6990	* htmlReadFd:
6991	* @fd: an open file descriptor
6992	* @URL: the base URL to use for the document
6993	* @encoding: the document encoding, or NULL
6994	* @options: a combination of htmlParserOption(s)
6995	*
6996	* parse an HTML from a file descriptor and build a tree.
6997	* NOTE that the file descriptor will not be closed when the
6998	* reader is closed or reset.
6999	*
7000	* Returns the resulting document tree
7001	*/
7002	htmlDocPtr
7003	htmlReadFd(int fd, const char URL, const char encoding, int options)
7004	{
7005	htmlParserCtxtPtr ctxt;
7006	xmlParserInputBufferPtr input;
7007	htmlParserInputPtr stream;
7008
7009	if (fd < 0)
7010	return (NULL);
7011
7012	xmlInitParser();
7013	input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
7014	if (input == NULL)
7015	return (NULL);
7016	input->closecallback = NULL;
7017	ctxt = htmlNewParserCtxt();
7018	if (ctxt == NULL) {
7019	xmlFreeParserInputBuffer(input);
7020	return (NULL);
7021	}
7022	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7023	if (stream == NULL) {
7024	xmlFreeParserInputBuffer(input);
7025	htmlFreeParserCtxt(ctxt);
7026	return (NULL);
7027	}
7028	inputPush(ctxt, stream);
7029	return (htmlDoRead(ctxt, URL, encoding, options, 0));
7030	}
7031
7032	/**
7033	* htmlReadIO:
7034	* @ioread: an I/O read function
7035	* @ioclose: an I/O close function
7036	* @ioctx: an I/O handler
7037	* @URL: the base URL to use for the document
7038	* @encoding: the document encoding, or NULL
7039	* @options: a combination of htmlParserOption(s)
7040	*
7041	* parse an HTML document from I/O functions and source and build a tree.
7042	*
7043	* Returns the resulting document tree
7044	*/
7045	htmlDocPtr
7046	htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
7047	void ioctx, const char URL, const char *encoding, int options)
7048	{
7049	htmlParserCtxtPtr ctxt;
7050	xmlParserInputBufferPtr input;
7051	xmlParserInputPtr stream;
7052
7053	if (ioread == NULL)
7054	return (NULL);
7055	xmlInitParser();
7056
7057	input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
7058	XML_CHAR_ENCODING_NONE);
7059	if (input == NULL) {
7060	if (ioclose != NULL)
7061	ioclose(ioctx);
7062	return (NULL);
7063	}
7064	ctxt = htmlNewParserCtxt();
7065	if (ctxt == NULL) {
7066	xmlFreeParserInputBuffer(input);
7067	return (NULL);
7068	}
7069	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7070	if (stream == NULL) {
7071	xmlFreeParserInputBuffer(input);
7072	xmlFreeParserCtxt(ctxt);
7073	return (NULL);
7074	}
7075	inputPush(ctxt, stream);
7076	return (htmlDoRead(ctxt, URL, encoding, options, 0));
7077	}
7078
7079	/**
7080	* htmlCtxtReadDoc:
7081	* @ctxt: an HTML parser context
7082	* @cur: a pointer to a zero terminated string
7083	* @URL: the base URL to use for the document
7084	* @encoding: the document encoding, or NULL
7085	* @options: a combination of htmlParserOption(s)
7086	*
7087	* parse an XML in-memory document and build a tree.
7088	* This reuses the existing @ctxt parser context
7089	*
7090	* Returns the resulting document tree
7091	*/
7092	htmlDocPtr
7093	htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
7094	const char URL, const char encoding, int options)
7095	{
7096	xmlParserInputPtr stream;
7097
7098	if (cur == NULL)
7099	return (NULL);
7100	if (ctxt == NULL)
7101	return (NULL);
7102	xmlInitParser();
7103
7104	htmlCtxtReset(ctxt);
7105
7106	stream = xmlNewStringInputStream(ctxt, cur);
7107	if (stream == NULL) {
7108	return (NULL);
7109	}
7110	inputPush(ctxt, stream);
7111	return (htmlDoRead(ctxt, URL, encoding, options, 1));
7112	}
7113
7114	/**
7115	* htmlCtxtReadFile:
7116	* @ctxt: an HTML parser context
7117	* @filename: a file or URL
7118	* @encoding: the document encoding, or NULL
7119	* @options: a combination of htmlParserOption(s)
7120	*
7121	* parse an XML file from the filesystem or the network.
7122	* This reuses the existing @ctxt parser context
7123	*
7124	* Returns the resulting document tree
7125	*/
7126	htmlDocPtr
7127	htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
7128	const char *encoding, int options)
7129	{
7130	xmlParserInputPtr stream;
7131
7132	if (filename == NULL)
7133	return (NULL);
7134	if (ctxt == NULL)
7135	return (NULL);
7136	xmlInitParser();
7137
7138	htmlCtxtReset(ctxt);
7139
7140	stream = xmlLoadExternalEntity(filename, NULL, ctxt);
7141	if (stream == NULL) {
7142	return (NULL);
7143	}
7144	inputPush(ctxt, stream);
7145	return (htmlDoRead(ctxt, NULL, encoding, options, 1));
7146	}
7147
7148	/**
7149	* htmlCtxtReadMemory:
7150	* @ctxt: an HTML parser context
7151	* @buffer: a pointer to a char array
7152	* @size: the size of the array
7153	* @URL: the base URL to use for the document
7154	* @encoding: the document encoding, or NULL
7155	* @options: a combination of htmlParserOption(s)
7156	*
7157	* parse an XML in-memory document and build a tree.
7158	* This reuses the existing @ctxt parser context
7159	*
7160	* Returns the resulting document tree
7161	*/
7162	htmlDocPtr
7163	htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
7164	const char URL, const char encoding, int options)
7165	{
7166	xmlParserInputBufferPtr input;
7167	xmlParserInputPtr stream;
7168
7169	if (ctxt == NULL)
7170	return (NULL);
7171	if (buffer == NULL)
7172	return (NULL);
7173	xmlInitParser();
7174
7175	htmlCtxtReset(ctxt);
7176
7177	input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
7178	if (input == NULL) {
7179	return(NULL);
7180	}
7181
7182	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7183	if (stream == NULL) {
7184	xmlFreeParserInputBuffer(input);
7185	return(NULL);
7186	}
7187
7188	inputPush(ctxt, stream);
7189	return (htmlDoRead(ctxt, URL, encoding, options, 1));
7190	}
7191
7192	/**
7193	* htmlCtxtReadFd:
7194	* @ctxt: an HTML parser context
7195	* @fd: an open file descriptor
7196	* @URL: the base URL to use for the document
7197	* @encoding: the document encoding, or NULL
7198	* @options: a combination of htmlParserOption(s)
7199	*
7200	* parse an XML from a file descriptor and build a tree.
7201	* This reuses the existing @ctxt parser context
7202	*
7203	* Returns the resulting document tree
7204	*/
7205	htmlDocPtr
7206	htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
7207	const char URL, const char encoding, int options)
7208	{
7209	xmlParserInputBufferPtr input;
7210	xmlParserInputPtr stream;
7211
7212	if (fd < 0)
7213	return (NULL);
7214	if (ctxt == NULL)
7215	return (NULL);
7216	xmlInitParser();
7217
7218	htmlCtxtReset(ctxt);
7219
7220
7221	input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
7222	if (input == NULL)
7223	return (NULL);
7224	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7225	if (stream == NULL) {
7226	xmlFreeParserInputBuffer(input);
7227	return (NULL);
7228	}
7229	inputPush(ctxt, stream);
7230	return (htmlDoRead(ctxt, URL, encoding, options, 1));
7231	}
7232
7233	/**
7234	* htmlCtxtReadIO:
7235	* @ctxt: an HTML parser context
7236	* @ioread: an I/O read function
7237	* @ioclose: an I/O close function
7238	* @ioctx: an I/O handler
7239	* @URL: the base URL to use for the document
7240	* @encoding: the document encoding, or NULL
7241	* @options: a combination of htmlParserOption(s)
7242	*
7243	* parse an HTML document from I/O functions and source and build a tree.
7244	* This reuses the existing @ctxt parser context
7245	*
7246	* Returns the resulting document tree
7247	*/
7248	htmlDocPtr
7249	htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
7250	xmlInputCloseCallback ioclose, void *ioctx,
7251	const char *URL,
7252	const char *encoding, int options)
7253	{
7254	xmlParserInputBufferPtr input;
7255	xmlParserInputPtr stream;
7256
7257	if (ioread == NULL)
7258	return (NULL);
7259	if (ctxt == NULL)
7260	return (NULL);
7261	xmlInitParser();
7262
7263	htmlCtxtReset(ctxt);
7264
7265	input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
7266	XML_CHAR_ENCODING_NONE);
7267	if (input == NULL) {
7268	if (ioclose != NULL)
7269	ioclose(ioctx);
7270	return (NULL);
7271	}
7272	stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
7273	if (stream == NULL) {
7274	xmlFreeParserInputBuffer(input);
7275	return (NULL);
7276	}
7277	inputPush(ctxt, stream);
7278	return (htmlDoRead(ctxt, URL, encoding, options, 1));
7279	}
7280
7281	#define bottom_HTMLparser
7282	#include "elfgcchack.h"
7283	#endif /* LIBXML_HTML_ENABLED */

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/libxml2-2.9.14/HTMLparser.c@ 103285

以其他格式下載: