HTMLtree.c@ 70563

最後變更在這個檔案從70563是 65950,由 vboxsync 提交於 8 年前
libxml 2.9.4: fix export
屬性 svn:eol-style 設為 `native`
檔案大小: 33.2 KB

行
1	/*
2	* HTMLtree.c : implementation of access function for an HTML tree.
3	*
4	* See Copyright for the status of this software.
5	*
6	* [email protected]
7	*/
8
9
10	#define IN_LIBXML
11	#include "libxml.h"
12	#ifdef LIBXML_HTML_ENABLED
13
14	#include <string.h> /* for memset() only ! */
15
16	#ifdef HAVE_CTYPE_H
17	#include <ctype.h>
18	#endif
19	#ifdef HAVE_STDLIB_H
20	#include <stdlib.h>
21	#endif
22
23	#include <libxml/xmlmemory.h>
24	#include <libxml/HTMLparser.h>
25	#include <libxml/HTMLtree.h>
26	#include <libxml/entities.h>
27	#include <libxml/valid.h>
28	#include <libxml/xmlerror.h>
29	#include <libxml/parserInternals.h>
30	#include <libxml/globals.h>
31	#include <libxml/uri.h>
32
33	#include "buf.h"
34
35	/************************************************************************
36	* *
37	* Getting/Setting encoding meta tags *
38	* *
39	************************************************************************/
40
41	/**
42	* htmlGetMetaEncoding:
43	* @doc: the document
44	*
45	* Encoding definition lookup in the Meta tags
46	*
47	* Returns the current encoding as flagged in the HTML source
48	*/
49	const xmlChar *
50	htmlGetMetaEncoding(htmlDocPtr doc) {
51	htmlNodePtr cur;
52	const xmlChar *content;
53	const xmlChar *encoding;
54
55	if (doc == NULL)
56	return(NULL);
57	cur = doc->children;
58
59	/*
60	* Search the html
61	*/
62	while (cur != NULL) {
63	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
64	if (xmlStrEqual(cur->name, BAD_CAST"html"))
65	break;
66	if (xmlStrEqual(cur->name, BAD_CAST"head"))
67	goto found_head;
68	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
69	goto found_meta;
70	}
71	cur = cur->next;
72	}
73	if (cur == NULL)
74	return(NULL);
75	cur = cur->children;
76
77	/*
78	* Search the head
79	*/
80	while (cur != NULL) {
81	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
82	if (xmlStrEqual(cur->name, BAD_CAST"head"))
83	break;
84	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
85	goto found_meta;
86	}
87	cur = cur->next;
88	}
89	if (cur == NULL)
90	return(NULL);
91	found_head:
92	cur = cur->children;
93
94	/*
95	* Search the meta elements
96	*/
97	found_meta:
98	while (cur != NULL) {
99	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
100	if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101	xmlAttrPtr attr = cur->properties;
102	int http;
103	const xmlChar *value;
104
105	content = NULL;
106	http = 0;
107	while (attr != NULL) {
108	if ((attr->children != NULL) &&
109	(attr->children->type == XML_TEXT_NODE) &&
110	(attr->children->next == NULL)) {
111	value = attr->children->content;
112	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
114	http = 1;
115	else if ((value != NULL)
116	&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
117	content = value;
118	if ((http != 0) && (content != NULL))
119	goto found_content;
120	}
121	attr = attr->next;
122	}
123	}
124	}
125	cur = cur->next;
126	}
127	return(NULL);
128
129	found_content:
130	encoding = xmlStrstr(content, BAD_CAST"charset=");
131	if (encoding == NULL)
132	encoding = xmlStrstr(content, BAD_CAST"Charset=");
133	if (encoding == NULL)
134	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135	if (encoding != NULL) {
136	encoding += 8;
137	} else {
138	encoding = xmlStrstr(content, BAD_CAST"charset =");
139	if (encoding == NULL)
140	encoding = xmlStrstr(content, BAD_CAST"Charset =");
141	if (encoding == NULL)
142	encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143	if (encoding != NULL)
144	encoding += 9;
145	}
146	if (encoding != NULL) {
147	while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;
148	}
149	return(encoding);
150	}
151
152	/**
153	* htmlSetMetaEncoding:
154	* @doc: the document
155	* @encoding: the encoding string
156	*
157	* Sets the current encoding in the Meta tags
158	* NOTE: this will not change the document content encoding, just
159	* the META flag associated.
160	*
161	* Returns 0 in case of success and -1 in case of error
162	*/
163	int
164	htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
165	htmlNodePtr cur, meta = NULL, head = NULL;
166	const xmlChar *content = NULL;
167	char newcontent[100];
168
169	newcontent[0] = 0;
170
171	if (doc == NULL)
172	return(-1);
173
174	/* html isn't a real encoding it's just libxml2 way to get entities */
175	if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176	return(-1);
177
178	if (encoding != NULL) {
179	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180	(char *)encoding);
181	newcontent[sizeof(newcontent) - 1] = 0;
182	}
183
184	cur = doc->children;
185
186	/*
187	* Search the html
188	*/
189	while (cur != NULL) {
190	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
191	if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
192	break;
193	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
194	goto found_head;
195	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
196	goto found_meta;
197	}
198	cur = cur->next;
199	}
200	if (cur == NULL)
201	return(-1);
202	cur = cur->children;
203
204	/*
205	* Search the head
206	*/
207	while (cur != NULL) {
208	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
209	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
210	break;
211	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
212	head = cur->parent;
213	goto found_meta;
214	}
215	}
216	cur = cur->next;
217	}
218	if (cur == NULL)
219	return(-1);
220	found_head:
221	head = cur;
222	if (cur->children == NULL)
223	goto create;
224	cur = cur->children;
225
226	found_meta:
227	/*
228	* Search and update all the remaining the meta elements carrying
229	* encoding informations
230	*/
231	while (cur != NULL) {
232	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
233	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
234	xmlAttrPtr attr = cur->properties;
235	int http;
236	const xmlChar *value;
237
238	content = NULL;
239	http = 0;
240	while (attr != NULL) {
241	if ((attr->children != NULL) &&
242	(attr->children->type == XML_TEXT_NODE) &&
243	(attr->children->next == NULL)) {
244	value = attr->children->content;
245	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
247	http = 1;
248	else
249	{
250	if ((value != NULL) &&
251	(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252	content = value;
253	}
254	if ((http != 0) && (content != NULL))
255	break;
256	}
257	attr = attr->next;
258	}
259	if ((http != 0) && (content != NULL)) {
260	meta = cur;
261	break;
262	}
263
264	}
265	}
266	cur = cur->next;
267	}
268	create:
269	if (meta == NULL) {
270	if ((encoding != NULL) && (head != NULL)) {
271	/*
272	* Create a new Meta element with the right attributes
273	*/
274
275	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276	if (head->children == NULL)
277	xmlAddChild(head, meta);
278	else
279	xmlAddPrevSibling(head->children, meta);
280	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
282	}
283	} else {
284	/* remove the meta tag if NULL is passed */
285	if (encoding == NULL) {
286	xmlUnlinkNode(meta);
287	xmlFreeNode(meta);
288	}
289	/* change the document only if there is a real encoding change */
290	else if (xmlStrcasestr(content, encoding) == NULL) {
291	xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
292	}
293	}
294
295
296	return(0);
297	}
298
299	/**
300	* booleanHTMLAttrs:
301	*
302	* These are the HTML attributes which will be output
303	* in minimized form, i.e. <option selected="selected"> will be
304	* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305	*
306	*/
307	static const char* htmlBooleanAttrs[] = {
308	"checked", "compact", "declare", "defer", "disabled", "ismap",
309	"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310	"selected", NULL
311	};
312
313
314	/**
315	* htmlIsBooleanAttr:
316	* @name: the name of the attribute to check
317	*
318	* Determine if a given attribute is a boolean attribute.
319	*
320	* returns: false if the attribute is not boolean, true otherwise.
321	*/
322	int
323	htmlIsBooleanAttr(const xmlChar *name)
324	{
325	int i = 0;
326
327	while (htmlBooleanAttrs[i] != NULL) {
328	if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
329	return 1;
330	i++;
331	}
332	return 0;
333	}
334
335	#ifdef LIBXML_OUTPUT_ENABLED
336	/*
337	* private routine exported from xmlIO.c
338	*/
339	xmlOutputBufferPtr
340	xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
341	/************************************************************************
342	* *
343	* Output error handlers *
344	* *
345	************************************************************************/
346	/**
347	* htmlSaveErrMemory:
348	* @extra: extra informations
349	*
350	* Handle an out of memory condition
351	*/
352	static void
353	htmlSaveErrMemory(const char *extra)
354	{
355	__xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
356	}
357
358	/**
359	* htmlSaveErr:
360	* @code: the error number
361	* @node: the location of the error.
362	* @extra: extra informations
363	*
364	* Handle an out of memory condition
365	*/
366	static void
367	htmlSaveErr(int code, xmlNodePtr node, const char *extra)
368	{
369	const char *msg = NULL;
370
371	switch(code) {
372	case XML_SAVE_NOT_UTF8:
373	msg = "string is not in UTF-8\n";
374	break;
375	case XML_SAVE_CHAR_INVALID:
376	msg = "invalid character value\n";
377	break;
378	case XML_SAVE_UNKNOWN_ENCODING:
379	msg = "unknown encoding %s\n";
380	break;
381	case XML_SAVE_NO_DOCTYPE:
382	msg = "HTML has no DOCTYPE\n";
383	break;
384	default:
385	msg = "unexpected error number\n";
386	}
387	__xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
388	}
389
390	/************************************************************************
391	* *
392	* Dumping HTML tree content to a simple buffer *
393	* *
394	************************************************************************/
395
396	/**
397	* htmlBufNodeDumpFormat:
398	* @buf: the xmlBufPtr output
399	* @doc: the document
400	* @cur: the current node
401	* @format: should formatting spaces been added
402	*
403	* Dump an HTML node, recursive behaviour,children are printed too.
404	*
405	* Returns the number of byte written or -1 in case of error
406	*/
407	static size_t
408	htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
409	int format) {
410	size_t use;
411	int ret;
412	xmlOutputBufferPtr outbuf;
413
414	if (cur == NULL) {
415	return (-1);
416	}
417	if (buf == NULL) {
418	return (-1);
419	}
420	outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421	if (outbuf == NULL) {
422	htmlSaveErrMemory("allocating HTML output buffer");
423	return (-1);
424	}
425	memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426	outbuf->buffer = buf;
427	outbuf->encoder = NULL;
428	outbuf->writecallback = NULL;
429	outbuf->closecallback = NULL;
430	outbuf->context = NULL;
431	outbuf->written = 0;
432
433	use = xmlBufUse(buf);
434	htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
435	xmlFree(outbuf);
436	ret = xmlBufUse(buf) - use;
437	return (ret);
438	}
439
440	/**
441	* htmlNodeDump:
442	* @buf: the HTML buffer output
443	* @doc: the document
444	* @cur: the current node
445	*
446	* Dump an HTML node, recursive behaviour,children are printed too,
447	* and formatting returns are added.
448	*
449	* Returns the number of byte written or -1 in case of error
450	*/
451	int
452	htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
453	xmlBufPtr buffer;
454	size_t ret;
455
456	if ((buf == NULL) \|\| (cur == NULL))
457	return(-1);
458
459	xmlInitParser();
460	buffer = xmlBufFromBuffer(buf);
461	if (buffer == NULL)
462	return(-1);
463
464	ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
465
466	xmlBufBackToBuffer(buffer);
467
468	if (ret > INT_MAX)
469	return(-1);
470	return((int) ret);
471	}
472
473	/**
474	* htmlNodeDumpFileFormat:
475	* @out: the FILE pointer
476	* @doc: the document
477	* @cur: the current node
478	* @encoding: the document encoding
479	* @format: should formatting spaces been added
480	*
481	* Dump an HTML node, recursive behaviour,children are printed too.
482	*
483	* TODO: if encoding == NULL try to save in the doc encoding
484	*
485	* returns: the number of byte written or -1 in case of failure.
486	*/
487	int
488	htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489	xmlNodePtr cur, const char *encoding, int format) {
490	xmlOutputBufferPtr buf;
491	xmlCharEncodingHandlerPtr handler = NULL;
492	int ret;
493
494	xmlInitParser();
495
496	if (encoding != NULL) {
497	xmlCharEncoding enc;
498
499	enc = xmlParseCharEncoding(encoding);
500	if (enc != XML_CHAR_ENCODING_UTF8) {
501	handler = xmlFindCharEncodingHandler(encoding);
502	if (handler == NULL)
503	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
504	}
505	}
506
507	/*
508	* Fallback to HTML or ASCII when the encoding is unspecified
509	*/
510	if (handler == NULL)
511	handler = xmlFindCharEncodingHandler("HTML");
512	if (handler == NULL)
513	handler = xmlFindCharEncodingHandler("ascii");
514
515	/*
516	* save the content to a temp buffer.
517	*/
518	buf = xmlOutputBufferCreateFile(out, handler);
519	if (buf == NULL) return(0);
520
521	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
522
523	ret = xmlOutputBufferClose(buf);
524	return(ret);
525	}
526
527	/**
528	* htmlNodeDumpFile:
529	* @out: the FILE pointer
530	* @doc: the document
531	* @cur: the current node
532	*
533	* Dump an HTML node, recursive behaviour,children are printed too,
534	* and formatting returns are added.
535	*/
536	void
537	htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
538	htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
539	}
540
541	/**
542	* htmlDocDumpMemoryFormat:
543	* @cur: the document
544	* @mem: OUT: the memory pointer
545	* @size: OUT: the memory length
546	* @format: should formatting spaces been added
547	*
548	* Dump an HTML document in memory and return the xmlChar * and it's size.
549	* It's up to the caller to free the memory.
550	*/
551	void
552	htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar*mem, int size, int format) {
553	xmlOutputBufferPtr buf;
554	xmlCharEncodingHandlerPtr handler = NULL;
555	const char *encoding;
556
557	xmlInitParser();
558
559	if ((mem == NULL) \|\| (size == NULL))
560	return;
561	if (cur == NULL) {
562	*mem = NULL;
563	*size = 0;
564	return;
565	}
566
567	encoding = (const char *) htmlGetMetaEncoding(cur);
568
569	if (encoding != NULL) {
570	xmlCharEncoding enc;
571
572	enc = xmlParseCharEncoding(encoding);
573	if (enc != cur->charset) {
574	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
575	/*
576	* Not supported yet
577	*/
578	*mem = NULL;
579	*size = 0;
580	return;
581	}
582
583	handler = xmlFindCharEncodingHandler(encoding);
584	if (handler == NULL)
585	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
586
587	} else {
588	handler = xmlFindCharEncodingHandler(encoding);
589	}
590	}
591
592	/*
593	* Fallback to HTML or ASCII when the encoding is unspecified
594	*/
595	if (handler == NULL)
596	handler = xmlFindCharEncodingHandler("HTML");
597	if (handler == NULL)
598	handler = xmlFindCharEncodingHandler("ascii");
599
600	buf = xmlAllocOutputBufferInternal(handler);
601	if (buf == NULL) {
602	*mem = NULL;
603	*size = 0;
604	return;
605	}
606
607	htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
608
609	xmlOutputBufferFlush(buf);
610	if (buf->conv != NULL) {
611	*size = xmlBufUse(buf->conv);
612	mem = xmlStrndup(xmlBufContent(buf->conv), size);
613	} else {
614	*size = xmlBufUse(buf->buffer);
615	mem = xmlStrndup(xmlBufContent(buf->buffer), size);
616	}
617	(void)xmlOutputBufferClose(buf);
618	}
619
620	/**
621	* htmlDocDumpMemory:
622	* @cur: the document
623	* @mem: OUT: the memory pointer
624	* @size: OUT: the memory length
625	*
626	* Dump an HTML document in memory and return the xmlChar * and it's size.
627	* It's up to the caller to free the memory.
628	*/
629	void
630	htmlDocDumpMemory(xmlDocPtr cur, xmlChar*mem, int size) {
631	htmlDocDumpMemoryFormat(cur, mem, size, 1);
632	}
633
634
635	/************************************************************************
636	* *
637	* Dumping HTML tree content to an I/O output buffer *
638	* *
639	************************************************************************/
640
641	void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
642
643	/**
644	* htmlDtdDumpOutput:
645	* @buf: the HTML buffer output
646	* @doc: the document
647	* @encoding: the encoding string
648	*
649	* TODO: check whether encoding is needed
650	*
651	* Dump the HTML document DTD, if any.
652	*/
653	static void
654	htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
655	const char *encoding ATTRIBUTE_UNUSED) {
656	xmlDtdPtr cur = doc->intSubset;
657
658	if (cur == NULL) {
659	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
660	return;
661	}
662	xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
663	xmlOutputBufferWriteString(buf, (const char *)cur->name);
664	if (cur->ExternalID != NULL) {
665	xmlOutputBufferWriteString(buf, " PUBLIC ");
666	xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
667	if (cur->SystemID != NULL) {
668	xmlOutputBufferWriteString(buf, " ");
669	xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
670	}
671	} else if (cur->SystemID != NULL &&
672	xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
673	xmlOutputBufferWriteString(buf, " SYSTEM ");
674	xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
675	}
676	xmlOutputBufferWriteString(buf, ">\n");
677	}
678
679	/**
680	* htmlAttrDumpOutput:
681	* @buf: the HTML buffer output
682	* @doc: the document
683	* @cur: the attribute pointer
684	* @encoding: the encoding string
685	*
686	* Dump an HTML attribute
687	*/
688	static void
689	htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
690	const char *encoding ATTRIBUTE_UNUSED) {
691	xmlChar *value;
692
693	/*
694	* The html output method should not escape a & character
695	* occurring in an attribute value immediately followed by
696	* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
697	* This is implemented in xmlEncodeEntitiesReentrant
698	*/
699
700	if (cur == NULL) {
701	return;
702	}
703	xmlOutputBufferWriteString(buf, " ");
704	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
705	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
706	xmlOutputBufferWriteString(buf, ":");
707	}
708	xmlOutputBufferWriteString(buf, (const char *)cur->name);
709	if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
710	value = xmlNodeListGetString(doc, cur->children, 0);
711	if (value) {
712	xmlOutputBufferWriteString(buf, "=");
713	if ((cur->ns == NULL) && (cur->parent != NULL) &&
714	(cur->parent->ns == NULL) &&
715	((!xmlStrcasecmp(cur->name, BAD_CAST "href")) \|\|
716	(!xmlStrcasecmp(cur->name, BAD_CAST "action")) \|\|
717	(!xmlStrcasecmp(cur->name, BAD_CAST "src")) \|\|
718	((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
719	(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
720	xmlChar *tmp = value;
721	/* xmlURIEscapeStr() escapes '"' so it can be safely used. */
722	xmlBufCCat(buf->buffer, "\"");
723
724	while (IS_BLANK_CH(*tmp)) tmp++;
725
726	/* URI Escape everything, except server side includes. */
727	for ( ; ; ) {
728	xmlChar *escaped;
729	xmlChar endChar;
730	xmlChar *end = NULL;
731	xmlChar start = (xmlChar )xmlStrstr(tmp, BAD_CAST "<!--");
732	if (start != NULL) {
733	end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
734	if (end != NULL) {
735	*start = '\0';
736	}
737	}
738
739	/* Escape the whole string, or until start (set to '\0'). */
740	escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
741	if (escaped != NULL) {
742	xmlBufCat(buf->buffer, escaped);
743	xmlFree(escaped);
744	} else {
745	xmlBufCat(buf->buffer, tmp);
746	}
747
748	if (end == NULL) { /* Everything has been written. */
749	break;
750	}
751
752	/* Do not escape anything within server side includes. */
753	start = '<'; / Restore the first character of "<!--". */
754	end += 3; /* strlen("-->") */
755	endChar = *end;
756	*end = '\0';
757	xmlBufCat(buf->buffer, start);
758	*end = endChar;
759	tmp = end;
760	}
761
762	xmlBufCCat(buf->buffer, "\"");
763	} else {
764	xmlBufWriteQuotedString(buf->buffer, value);
765	}
766	xmlFree(value);
767	} else {
768	xmlOutputBufferWriteString(buf, "=\"\"");
769	}
770	}
771	}
772
773	/**
774	* htmlAttrListDumpOutput:
775	* @buf: the HTML buffer output
776	* @doc: the document
777	* @cur: the first attribute pointer
778	* @encoding: the encoding string
779	*
780	* Dump a list of HTML attributes
781	*/
782	static void
783	htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
784	if (cur == NULL) {
785	return;
786	}
787	while (cur != NULL) {
788	htmlAttrDumpOutput(buf, doc, cur, encoding);
789	cur = cur->next;
790	}
791	}
792
793
794
795	/**
796	* htmlNodeListDumpOutput:
797	* @buf: the HTML buffer output
798	* @doc: the document
799	* @cur: the first node
800	* @encoding: the encoding string
801	* @format: should formatting spaces been added
802	*
803	* Dump an HTML node list, recursive behaviour,children are printed too.
804	*/
805	static void
806	htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
807	xmlNodePtr cur, const char *encoding, int format) {
808	if (cur == NULL) {
809	return;
810	}
811	while (cur != NULL) {
812	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
813	cur = cur->next;
814	}
815	}
816
817	/**
818	* htmlNodeDumpFormatOutput:
819	* @buf: the HTML buffer output
820	* @doc: the document
821	* @cur: the current node
822	* @encoding: the encoding string
823	* @format: should formatting spaces been added
824	*
825	* Dump an HTML node, recursive behaviour,children are printed too.
826	*/
827	void
828	htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
829	xmlNodePtr cur, const char *encoding, int format) {
830	const htmlElemDesc * info;
831
832	xmlInitParser();
833
834	if ((cur == NULL) \|\| (buf == NULL)) {
835	return;
836	}
837	/*
838	* Special cases.
839	*/
840	if (cur->type == XML_DTD_NODE)
841	return;
842	if ((cur->type == XML_HTML_DOCUMENT_NODE) \|\|
843	(cur->type == XML_DOCUMENT_NODE)){
844	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
845	return;
846	}
847	if (cur->type == XML_ATTRIBUTE_NODE) {
848	htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
849	return;
850	}
851	if (cur->type == HTML_TEXT_NODE) {
852	if (cur->content != NULL) {
853	if (((cur->name == (const xmlChar *)xmlStringText) \|\|
854	(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
855	((cur->parent == NULL) \|\|
856	((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
857	(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
858	xmlChar *buffer;
859
860	buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
861	if (buffer != NULL) {
862	xmlOutputBufferWriteString(buf, (const char *)buffer);
863	xmlFree(buffer);
864	}
865	} else {
866	xmlOutputBufferWriteString(buf, (const char *)cur->content);
867	}
868	}
869	return;
870	}
871	if (cur->type == HTML_COMMENT_NODE) {
872	if (cur->content != NULL) {
873	xmlOutputBufferWriteString(buf, "<!--");
874	xmlOutputBufferWriteString(buf, (const char *)cur->content);
875	xmlOutputBufferWriteString(buf, "-->");
876	}
877	return;
878	}
879	if (cur->type == HTML_PI_NODE) {
880	if (cur->name == NULL)
881	return;
882	xmlOutputBufferWriteString(buf, "<?");
883	xmlOutputBufferWriteString(buf, (const char *)cur->name);
884	if (cur->content != NULL) {
885	xmlOutputBufferWriteString(buf, " ");
886	xmlOutputBufferWriteString(buf, (const char *)cur->content);
887	}
888	xmlOutputBufferWriteString(buf, ">");
889	return;
890	}
891	if (cur->type == HTML_ENTITY_REF_NODE) {
892	xmlOutputBufferWriteString(buf, "&");
893	xmlOutputBufferWriteString(buf, (const char *)cur->name);
894	xmlOutputBufferWriteString(buf, ";");
895	return;
896	}
897	if (cur->type == HTML_PRESERVE_NODE) {
898	if (cur->content != NULL) {
899	xmlOutputBufferWriteString(buf, (const char *)cur->content);
900	}
901	return;
902	}
903
904	/*
905	* Get specific HTML info for that node.
906	*/
907	if (cur->ns == NULL)
908	info = htmlTagLookup(cur->name);
909	else
910	info = NULL;
911
912	xmlOutputBufferWriteString(buf, "<");
913	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
914	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
915	xmlOutputBufferWriteString(buf, ":");
916	}
917	xmlOutputBufferWriteString(buf, (const char *)cur->name);
918	if (cur->nsDef)
919	xmlNsListDumpOutput(buf, cur->nsDef);
920	if (cur->properties != NULL)
921	htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
922
923	if ((info != NULL) && (info->empty)) {
924	xmlOutputBufferWriteString(buf, ">");
925	if ((format) && (!info->isinline) && (cur->next != NULL)) {
926	if ((cur->next->type != HTML_TEXT_NODE) &&
927	(cur->next->type != HTML_ENTITY_REF_NODE) &&
928	(cur->parent != NULL) &&
929	(cur->parent->name != NULL) &&
930	(cur->parent->name[0] != 'p')) /* p, pre, param */
931	xmlOutputBufferWriteString(buf, "\n");
932	}
933	return;
934	}
935	if (((cur->type == XML_ELEMENT_NODE) \|\| (cur->content == NULL)) &&
936	(cur->children == NULL)) {
937	if ((info != NULL) && (info->saveEndTag != 0) &&
938	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
939	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
940	xmlOutputBufferWriteString(buf, ">");
941	} else {
942	xmlOutputBufferWriteString(buf, "></");
943	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
944	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
945	xmlOutputBufferWriteString(buf, ":");
946	}
947	xmlOutputBufferWriteString(buf, (const char *)cur->name);
948	xmlOutputBufferWriteString(buf, ">");
949	}
950	if ((format) && (cur->next != NULL) &&
951	(info != NULL) && (!info->isinline)) {
952	if ((cur->next->type != HTML_TEXT_NODE) &&
953	(cur->next->type != HTML_ENTITY_REF_NODE) &&
954	(cur->parent != NULL) &&
955	(cur->parent->name != NULL) &&
956	(cur->parent->name[0] != 'p')) /* p, pre, param */
957	xmlOutputBufferWriteString(buf, "\n");
958	}
959	return;
960	}
961	xmlOutputBufferWriteString(buf, ">");
962	if ((cur->type != XML_ELEMENT_NODE) &&
963	(cur->content != NULL)) {
964	/*
965	* Uses the OutputBuffer property to automatically convert
966	* invalids to charrefs
967	*/
968
969	xmlOutputBufferWriteString(buf, (const char *) cur->content);
970	}
971	if (cur->children != NULL) {
972	if ((format) && (info != NULL) && (!info->isinline) &&
973	(cur->children->type != HTML_TEXT_NODE) &&
974	(cur->children->type != HTML_ENTITY_REF_NODE) &&
975	(cur->children != cur->last) &&
976	(cur->name != NULL) &&
977	(cur->name[0] != 'p')) /* p, pre, param */
978	xmlOutputBufferWriteString(buf, "\n");
979	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
980	if ((format) && (info != NULL) && (!info->isinline) &&
981	(cur->last->type != HTML_TEXT_NODE) &&
982	(cur->last->type != HTML_ENTITY_REF_NODE) &&
983	(cur->children != cur->last) &&
984	(cur->name != NULL) &&
985	(cur->name[0] != 'p')) /* p, pre, param */
986	xmlOutputBufferWriteString(buf, "\n");
987	}
988	xmlOutputBufferWriteString(buf, "</");
989	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
990	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
991	xmlOutputBufferWriteString(buf, ":");
992	}
993	xmlOutputBufferWriteString(buf, (const char *)cur->name);
994	xmlOutputBufferWriteString(buf, ">");
995	if ((format) && (info != NULL) && (!info->isinline) &&
996	(cur->next != NULL)) {
997	if ((cur->next->type != HTML_TEXT_NODE) &&
998	(cur->next->type != HTML_ENTITY_REF_NODE) &&
999	(cur->parent != NULL) &&
1000	(cur->parent->name != NULL) &&
1001	(cur->parent->name[0] != 'p')) /* p, pre, param */
1002	xmlOutputBufferWriteString(buf, "\n");
1003	}
1004	}
1005
1006	/**
1007	* htmlNodeDumpOutput:
1008	* @buf: the HTML buffer output
1009	* @doc: the document
1010	* @cur: the current node
1011	* @encoding: the encoding string
1012	*
1013	* Dump an HTML node, recursive behaviour,children are printed too,
1014	* and formatting returns/spaces are added.
1015	*/
1016	void
1017	htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
1018	xmlNodePtr cur, const char *encoding) {
1019	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
1020	}
1021
1022	/**
1023	* htmlDocContentDumpFormatOutput:
1024	* @buf: the HTML buffer output
1025	* @cur: the document
1026	* @encoding: the encoding string
1027	* @format: should formatting spaces been added
1028	*
1029	* Dump an HTML document.
1030	*/
1031	void
1032	htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1033	const char *encoding, int format) {
1034	int type;
1035
1036	xmlInitParser();
1037
1038	if ((buf == NULL) \|\| (cur == NULL))
1039	return;
1040
1041	/*
1042	* force to output the stuff as HTML, especially for entities
1043	*/
1044	type = cur->type;
1045	cur->type = XML_HTML_DOCUMENT_NODE;
1046	if (cur->intSubset != NULL) {
1047	htmlDtdDumpOutput(buf, cur, NULL);
1048	}
1049	if (cur->children != NULL) {
1050	htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1051	}
1052	xmlOutputBufferWriteString(buf, "\n");
1053	cur->type = (xmlElementType) type;
1054	}
1055
1056	/**
1057	* htmlDocContentDumpOutput:
1058	* @buf: the HTML buffer output
1059	* @cur: the document
1060	* @encoding: the encoding string
1061	*
1062	* Dump an HTML document. Formating return/spaces are added.
1063	*/
1064	void
1065	htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1066	const char *encoding) {
1067	htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1068	}
1069
1070	/************************************************************************
1071	* *
1072	* Saving functions front-ends *
1073	* *
1074	************************************************************************/
1075
1076	/**
1077	* htmlDocDump:
1078	* @f: the FILE*
1079	* @cur: the document
1080	*
1081	* Dump an HTML document to an open FILE.
1082	*
1083	* returns: the number of byte written or -1 in case of failure.
1084	*/
1085	int
1086	htmlDocDump(FILE *f, xmlDocPtr cur) {
1087	xmlOutputBufferPtr buf;
1088	xmlCharEncodingHandlerPtr handler = NULL;
1089	const char *encoding;
1090	int ret;
1091
1092	xmlInitParser();
1093
1094	if ((cur == NULL) \|\| (f == NULL)) {
1095	return(-1);
1096	}
1097
1098	encoding = (const char *) htmlGetMetaEncoding(cur);
1099
1100	if (encoding != NULL) {
1101	xmlCharEncoding enc;
1102
1103	enc = xmlParseCharEncoding(encoding);
1104	if (enc != cur->charset) {
1105	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1106	/*
1107	* Not supported yet
1108	*/
1109	return(-1);
1110	}
1111
1112	handler = xmlFindCharEncodingHandler(encoding);
1113	if (handler == NULL)
1114	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1115	} else {
1116	handler = xmlFindCharEncodingHandler(encoding);
1117	}
1118	}
1119
1120	/*
1121	* Fallback to HTML or ASCII when the encoding is unspecified
1122	*/
1123	if (handler == NULL)
1124	handler = xmlFindCharEncodingHandler("HTML");
1125	if (handler == NULL)
1126	handler = xmlFindCharEncodingHandler("ascii");
1127
1128	buf = xmlOutputBufferCreateFile(f, handler);
1129	if (buf == NULL) return(-1);
1130	htmlDocContentDumpOutput(buf, cur, NULL);
1131
1132	ret = xmlOutputBufferClose(buf);
1133	return(ret);
1134	}
1135
1136	/**
1137	* htmlSaveFile:
1138	* @filename: the filename (or URL)
1139	* @cur: the document
1140	*
1141	* Dump an HTML document to a file. If @filename is "-" the stdout file is
1142	* used.
1143	* returns: the number of byte written or -1 in case of failure.
1144	*/
1145	int
1146	htmlSaveFile(const char *filename, xmlDocPtr cur) {
1147	xmlOutputBufferPtr buf;
1148	xmlCharEncodingHandlerPtr handler = NULL;
1149	const char *encoding;
1150	int ret;
1151
1152	if ((cur == NULL) \|\| (filename == NULL))
1153	return(-1);
1154
1155	xmlInitParser();
1156
1157	encoding = (const char *) htmlGetMetaEncoding(cur);
1158
1159	if (encoding != NULL) {
1160	xmlCharEncoding enc;
1161
1162	enc = xmlParseCharEncoding(encoding);
1163	if (enc != cur->charset) {
1164	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1165	/*
1166	* Not supported yet
1167	*/
1168	return(-1);
1169	}
1170
1171	handler = xmlFindCharEncodingHandler(encoding);
1172	if (handler == NULL)
1173	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1174	}
1175	}
1176
1177	/*
1178	* Fallback to HTML or ASCII when the encoding is unspecified
1179	*/
1180	if (handler == NULL)
1181	handler = xmlFindCharEncodingHandler("HTML");
1182	if (handler == NULL)
1183	handler = xmlFindCharEncodingHandler("ascii");
1184
1185	/*
1186	* save the content to a temp buffer.
1187	*/
1188	buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1189	if (buf == NULL) return(0);
1190
1191	htmlDocContentDumpOutput(buf, cur, NULL);
1192
1193	ret = xmlOutputBufferClose(buf);
1194	return(ret);
1195	}
1196
1197	/**
1198	* htmlSaveFileFormat:
1199	* @filename: the filename
1200	* @cur: the document
1201	* @format: should formatting spaces been added
1202	* @encoding: the document encoding
1203	*
1204	* Dump an HTML document to a file using a given encoding.
1205	*
1206	* returns: the number of byte written or -1 in case of failure.
1207	*/
1208	int
1209	htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1210	const char *encoding, int format) {
1211	xmlOutputBufferPtr buf;
1212	xmlCharEncodingHandlerPtr handler = NULL;
1213	int ret;
1214
1215	if ((cur == NULL) \|\| (filename == NULL))
1216	return(-1);
1217
1218	xmlInitParser();
1219
1220	if (encoding != NULL) {
1221	xmlCharEncoding enc;
1222
1223	enc = xmlParseCharEncoding(encoding);
1224	if (enc != cur->charset) {
1225	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1226	/*
1227	* Not supported yet
1228	*/
1229	return(-1);
1230	}
1231
1232	handler = xmlFindCharEncodingHandler(encoding);
1233	if (handler == NULL)
1234	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1235	}
1236	htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1237	} else {
1238	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1239	}
1240
1241	/*
1242	* Fallback to HTML or ASCII when the encoding is unspecified
1243	*/
1244	if (handler == NULL)
1245	handler = xmlFindCharEncodingHandler("HTML");
1246	if (handler == NULL)
1247	handler = xmlFindCharEncodingHandler("ascii");
1248
1249	/*
1250	* save the content to a temp buffer.
1251	*/
1252	buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1253	if (buf == NULL) return(0);
1254
1255	htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1256
1257	ret = xmlOutputBufferClose(buf);
1258	return(ret);
1259	}
1260
1261	/**
1262	* htmlSaveFileEnc:
1263	* @filename: the filename
1264	* @cur: the document
1265	* @encoding: the document encoding
1266	*
1267	* Dump an HTML document to a file using a given encoding
1268	* and formatting returns/spaces are added.
1269	*
1270	* returns: the number of byte written or -1 in case of failure.
1271	*/
1272	int
1273	htmlSaveFileEnc(const char filename, xmlDocPtr cur, const char encoding) {
1274	return(htmlSaveFileFormat(filename, cur, encoding, 1));
1275	}
1276
1277	#endif /* LIBXML_OUTPUT_ENABLED */
1278
1279	#define bottom_HTMLtree
1280	#include "elfgcchack.h"
1281	#endif /* LIBXML_HTML_ENABLED */

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/libxml2-2.9.4/HTMLtree.c@ 70563

以其他格式下載: