HTMLtree.c@ 105764

最後變更在這個檔案從105764是 105420,由 vboxsync 提交於 4 月前
libxml2-2.12.6: Applied and adjusted our libxml2 changes to 2.12.6. bugref:10730
屬性 svn:eol-style 設為 `native`
檔案大小: 31.9 KB

行
1	/*
2	* HTMLtree.c : implementation of access function for an HTML tree.
3	*
4	* See Copyright for the status of this software.
5	*
6	* [email protected]
7	*/
8
9
10	#define IN_LIBXML
11	#include "libxml.h"
12	#ifdef LIBXML_HTML_ENABLED
13
14	#include <string.h> /* for memset() only ! */
15	#include <ctype.h>
16	#include <stdlib.h>
17
18	#include <libxml/xmlmemory.h>
19	#include <libxml/HTMLparser.h>
20	#include <libxml/HTMLtree.h>
21	#include <libxml/entities.h>
22	#include <libxml/xmlerror.h>
23	#include <libxml/parserInternals.h>
24	#include <libxml/uri.h>
25
26	#include "private/buf.h"
27	#include "private/error.h"
28	#include "private/io.h"
29	#include "private/save.h"
30
31	/************************************************************************
32	* *
33	* Getting/Setting encoding meta tags *
34	* *
35	************************************************************************/
36
37	/**
38	* htmlGetMetaEncoding:
39	* @doc: the document
40	*
41	* Encoding definition lookup in the Meta tags
42	*
43	* Returns the current encoding as flagged in the HTML source
44	*/
45	const xmlChar *
46	htmlGetMetaEncoding(htmlDocPtr doc) {
47	htmlNodePtr cur;
48	const xmlChar *content;
49	const xmlChar *encoding;
50
51	if (doc == NULL)
52	return(NULL);
53	cur = doc->children;
54
55	/*
56	* Search the html
57	*/
58	while (cur != NULL) {
59	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
60	if (xmlStrEqual(cur->name, BAD_CAST"html"))
61	break;
62	if (xmlStrEqual(cur->name, BAD_CAST"head"))
63	goto found_head;
64	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
65	goto found_meta;
66	}
67	cur = cur->next;
68	}
69	if (cur == NULL)
70	return(NULL);
71	cur = cur->children;
72
73	/*
74	* Search the head
75	*/
76	while (cur != NULL) {
77	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
78	if (xmlStrEqual(cur->name, BAD_CAST"head"))
79	break;
80	if (xmlStrEqual(cur->name, BAD_CAST"meta"))
81	goto found_meta;
82	}
83	cur = cur->next;
84	}
85	if (cur == NULL)
86	return(NULL);
87	found_head:
88	cur = cur->children;
89
90	/*
91	* Search the meta elements
92	*/
93	found_meta:
94	while (cur != NULL) {
95	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
96	if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
97	xmlAttrPtr attr = cur->properties;
98	int http;
99	const xmlChar *value;
100
101	content = NULL;
102	http = 0;
103	while (attr != NULL) {
104	if ((attr->children != NULL) &&
105	(attr->children->type == XML_TEXT_NODE) &&
106	(attr->children->next == NULL)) {
107	value = attr->children->content;
108	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
109	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
110	http = 1;
111	else if ((value != NULL)
112	&& (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
113	content = value;
114	if ((http != 0) && (content != NULL))
115	goto found_content;
116	}
117	attr = attr->next;
118	}
119	}
120	}
121	cur = cur->next;
122	}
123	return(NULL);
124
125	found_content:
126	encoding = xmlStrstr(content, BAD_CAST"charset=");
127	if (encoding == NULL)
128	encoding = xmlStrstr(content, BAD_CAST"Charset=");
129	if (encoding == NULL)
130	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
131	if (encoding != NULL) {
132	encoding += 8;
133	} else {
134	encoding = xmlStrstr(content, BAD_CAST"charset =");
135	if (encoding == NULL)
136	encoding = xmlStrstr(content, BAD_CAST"Charset =");
137	if (encoding == NULL)
138	encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
139	if (encoding != NULL)
140	encoding += 9;
141	}
142	if (encoding != NULL) {
143	while ((encoding == ' ') \|\| (encoding == '\t')) encoding++;
144	}
145	return(encoding);
146	}
147
148	/**
149	* htmlSetMetaEncoding:
150	* @doc: the document
151	* @encoding: the encoding string
152	*
153	* Sets the current encoding in the Meta tags
154	* NOTE: this will not change the document content encoding, just
155	* the META flag associated.
156	*
157	* Returns 0 in case of success and -1 in case of error
158	*/
159	int
160	htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
161	htmlNodePtr cur, meta = NULL, head = NULL;
162	const xmlChar *content = NULL;
163	char newcontent[100];
164
165	newcontent[0] = 0;
166
167	if (doc == NULL)
168	return(-1);
169
170	/* html isn't a real encoding it's just libxml2 way to get entities */
171	if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
172	return(-1);
173
174	if (encoding != NULL) {
175	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
176	(char *)encoding);
177	newcontent[sizeof(newcontent) - 1] = 0;
178	}
179
180	cur = doc->children;
181
182	/*
183	* Search the html
184	*/
185	while (cur != NULL) {
186	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
187	if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
188	break;
189	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
190	goto found_head;
191	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
192	goto found_meta;
193	}
194	cur = cur->next;
195	}
196	if (cur == NULL)
197	return(-1);
198	cur = cur->children;
199
200	/*
201	* Search the head
202	*/
203	while (cur != NULL) {
204	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
205	if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
206	break;
207	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
208	head = cur->parent;
209	goto found_meta;
210	}
211	}
212	cur = cur->next;
213	}
214	if (cur == NULL)
215	return(-1);
216	found_head:
217	head = cur;
218	if (cur->children == NULL)
219	goto create;
220	cur = cur->children;
221
222	found_meta:
223	/*
224	* Search and update all the remaining the meta elements carrying
225	* encoding information
226	*/
227	while (cur != NULL) {
228	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
229	if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
230	xmlAttrPtr attr = cur->properties;
231	int http;
232	const xmlChar *value;
233
234	content = NULL;
235	http = 0;
236	while (attr != NULL) {
237	if ((attr->children != NULL) &&
238	(attr->children->type == XML_TEXT_NODE) &&
239	(attr->children->next == NULL)) {
240	value = attr->children->content;
241	if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
242	&& (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
243	http = 1;
244	else
245	{
246	if ((value != NULL) &&
247	(!xmlStrcasecmp(attr->name, BAD_CAST"content")))
248	content = value;
249	}
250	if ((http != 0) && (content != NULL))
251	break;
252	}
253	attr = attr->next;
254	}
255	if ((http != 0) && (content != NULL)) {
256	meta = cur;
257	break;
258	}
259
260	}
261	}
262	cur = cur->next;
263	}
264	create:
265	if (meta == NULL) {
266	if ((encoding != NULL) && (head != NULL)) {
267	/*
268	* Create a new Meta element with the right attributes
269	*/
270
271	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
272	if (head->children == NULL)
273	xmlAddChild(head, meta);
274	else
275	xmlAddPrevSibling(head->children, meta);
276	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
277	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
278	}
279	} else {
280	/* remove the meta tag if NULL is passed */
281	if (encoding == NULL) {
282	xmlUnlinkNode(meta);
283	xmlFreeNode(meta);
284	}
285	/* change the document only if there is a real encoding change */
286	else if (xmlStrcasestr(content, encoding) == NULL) {
287	xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
288	}
289	}
290
291
292	return(0);
293	}
294
295	/**
296	* booleanHTMLAttrs:
297	*
298	* These are the HTML attributes which will be output
299	* in minimized form, i.e. <option selected="selected"> will be
300	* output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
301	*
302	*/
303	static const char* const htmlBooleanAttrs[] = {
304	"checked", "compact", "declare", "defer", "disabled", "ismap",
305	"multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
306	"selected", NULL
307	};
308
309
310	/**
311	* htmlIsBooleanAttr:
312	* @name: the name of the attribute to check
313	*
314	* Determine if a given attribute is a boolean attribute.
315	*
316	* returns: false if the attribute is not boolean, true otherwise.
317	*/
318	int
319	htmlIsBooleanAttr(const xmlChar *name)
320	{
321	int i = 0;
322
323	while (htmlBooleanAttrs[i] != NULL) {
324	if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
325	return 1;
326	i++;
327	}
328	return 0;
329	}
330
331	#ifdef LIBXML_OUTPUT_ENABLED
332	/************************************************************************
333	* *
334	* Output error handlers *
335	* *
336	************************************************************************/
337
338	/**
339	* htmlSaveErr:
340	* @code: the error number
341	* @node: the location of the error.
342	* @extra: extra information
343	*
344	* Handle an out of memory condition
345	*/
346	static void
347	htmlSaveErr(int code, xmlNodePtr node, const char *extra)
348	{
349	const char *msg = NULL;
350	int res;
351
352	switch(code) {
353	case XML_SAVE_NOT_UTF8:
354	msg = "string is not in UTF-8\n";
355	break;
356	case XML_SAVE_CHAR_INVALID:
357	msg = "invalid character value\n";
358	break;
359	case XML_SAVE_UNKNOWN_ENCODING:
360	msg = "unknown encoding %s\n";
361	break;
362	case XML_SAVE_NO_DOCTYPE:
363	msg = "HTML has no DOCTYPE\n";
364	break;
365	default:
366	msg = "unexpected error number\n";
367	}
368
369	res = __xmlRaiseError(NULL, NULL, NULL, NULL, node,
370	XML_FROM_OUTPUT, code, XML_ERR_ERROR, NULL, 0,
371	extra, NULL, NULL, 0, 0,
372	msg, extra);
373	if (res < 0)
374	xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_OUTPUT, NULL);
375	}
376
377	/************************************************************************
378	* *
379	* Dumping HTML tree content to a simple buffer *
380	* *
381	************************************************************************/
382
383	static xmlCharEncodingHandler *
384	htmlFindOutputEncoder(const char *encoding) {
385	xmlCharEncodingHandler *handler = NULL;
386
387	if (encoding != NULL) {
388	int res;
389
390	res = xmlOpenCharEncodingHandler(encoding, /* output */ 1,
391	&handler);
392	if (res != XML_ERR_OK)
393	htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
394	} else {
395	/*
396	* Fallback to HTML when the encoding is unspecified
397	*/
398	xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler);
399	}
400
401	return(handler);
402	}
403
404	/**
405	* htmlBufNodeDumpFormat:
406	* @buf: the xmlBufPtr output
407	* @doc: the document
408	* @cur: the current node
409	* @format: should formatting spaces been added
410	*
411	* Dump an HTML node, recursive behaviour,children are printed too.
412	*
413	* Returns the number of byte written or -1 in case of error
414	*/
415	static size_t
416	htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
417	int format) {
418	size_t use;
419	size_t ret;
420	xmlOutputBufferPtr outbuf;
421
422	if (cur == NULL) {
423	return ((size_t) -1);
424	}
425	if (buf == NULL) {
426	return ((size_t) -1);
427	}
428	outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
429	if (outbuf == NULL)
430	return ((size_t) -1);
431	memset(outbuf, 0, sizeof(xmlOutputBuffer));
432	outbuf->buffer = buf;
433	outbuf->encoder = NULL;
434	outbuf->writecallback = NULL;
435	outbuf->closecallback = NULL;
436	outbuf->context = NULL;
437	outbuf->written = 0;
438
439	use = xmlBufUse(buf);
440	htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
441	if (outbuf->error)
442	ret = (size_t) -1;
443	else
444	ret = xmlBufUse(buf) - use;
445	xmlFree(outbuf);
446	return (ret);
447	}
448
449	/**
450	* htmlNodeDump:
451	* @buf: the HTML buffer output
452	* @doc: the document
453	* @cur: the current node
454	*
455	* Dump an HTML node, recursive behaviour,children are printed too,
456	* and formatting returns are added.
457	*
458	* Returns the number of byte written or -1 in case of error
459	*/
460	int
461	htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
462	xmlBufPtr buffer;
463	size_t ret;
464
465	if ((buf == NULL) \|\| (cur == NULL))
466	return(-1);
467
468	xmlInitParser();
469	buffer = xmlBufFromBuffer(buf);
470	if (buffer == NULL)
471	return(-1);
472
473	xmlBufSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT);
474	ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
475
476	xmlBufBackToBuffer(buffer);
477
478	if (ret > INT_MAX)
479	return(-1);
480	return((int) ret);
481	}
482
483	/**
484	* htmlNodeDumpFileFormat:
485	* @out: the FILE pointer
486	* @doc: the document
487	* @cur: the current node
488	* @encoding: the document encoding
489	* @format: should formatting spaces been added
490	*
491	* Dump an HTML node, recursive behaviour,children are printed too.
492	*
493	* TODO: if encoding == NULL try to save in the doc encoding
494	*
495	* returns: the number of byte written or -1 in case of failure.
496	*/
497	int
498	htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
499	xmlNodePtr cur, const char *encoding, int format) {
500	xmlOutputBufferPtr buf;
501	xmlCharEncodingHandlerPtr handler;
502	int ret;
503
504	xmlInitParser();
505
506	/*
507	* save the content to a temp buffer.
508	*/
509	handler = htmlFindOutputEncoder(encoding);
510	buf = xmlOutputBufferCreateFile(out, handler);
511	if (buf == NULL) {
512	xmlCharEncCloseFunc(handler);
513	return(0);
514	}
515
516	htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
517
518	ret = xmlOutputBufferClose(buf);
519	return(ret);
520	}
521
522	/**
523	* htmlNodeDumpFile:
524	* @out: the FILE pointer
525	* @doc: the document
526	* @cur: the current node
527	*
528	* Dump an HTML node, recursive behaviour,children are printed too,
529	* and formatting returns are added.
530	*/
531	void
532	htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
533	htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
534	}
535
536	/**
537	* htmlDocDumpMemoryFormat:
538	* @cur: the document
539	* @mem: OUT: the memory pointer
540	* @size: OUT: the memory length
541	* @format: should formatting spaces been added
542	*
543	* Dump an HTML document in memory and return the xmlChar * and it's size.
544	* It's up to the caller to free the memory.
545	*/
546	void
547	htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar*mem, int size, int format) {
548	xmlOutputBufferPtr buf;
549	xmlCharEncodingHandlerPtr handler = NULL;
550	const char *encoding;
551
552	xmlInitParser();
553
554	if ((mem == NULL) \|\| (size == NULL))
555	return;
556	*mem = NULL;
557	*size = 0;
558	if (cur == NULL)
559	return;
560
561	encoding = (const char *) htmlGetMetaEncoding(cur);
562	handler = htmlFindOutputEncoder(encoding);
563	buf = xmlAllocOutputBufferInternal(handler);
564	if (buf == NULL) {
565	xmlCharEncCloseFunc(handler);
566	return;
567	}
568
569	htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
570
571	xmlOutputBufferFlush(buf);
572
573	if (!buf->error) {
574	if (buf->conv != NULL) {
575	*size = xmlBufUse(buf->conv);
576	mem = xmlStrndup(xmlBufContent(buf->conv), size);
577	} else {
578	*size = xmlBufUse(buf->buffer);
579	mem = xmlStrndup(xmlBufContent(buf->buffer), size);
580	}
581	}
582
583	xmlOutputBufferClose(buf);
584	}
585
586	/**
587	* htmlDocDumpMemory:
588	* @cur: the document
589	* @mem: OUT: the memory pointer
590	* @size: OUT: the memory length
591	*
592	* Dump an HTML document in memory and return the xmlChar * and it's size.
593	* It's up to the caller to free the memory.
594	*/
595	void
596	htmlDocDumpMemory(xmlDocPtr cur, xmlChar*mem, int size) {
597	htmlDocDumpMemoryFormat(cur, mem, size, 1);
598	}
599
600
601	/************************************************************************
602	* *
603	* Dumping HTML tree content to an I/O output buffer *
604	* *
605	************************************************************************/
606
607	/**
608	* htmlDtdDumpOutput:
609	* @buf: the HTML buffer output
610	* @doc: the document
611	* @encoding: the encoding string
612	*
613	* TODO: check whether encoding is needed
614	*
615	* Dump the HTML document DTD, if any.
616	*/
617	static void
618	htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
619	const char *encoding ATTRIBUTE_UNUSED) {
620	xmlDtdPtr cur = doc->intSubset;
621
622	if (cur == NULL) {
623	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
624	return;
625	}
626	xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
627	xmlOutputBufferWriteString(buf, (const char *)cur->name);
628	if (cur->ExternalID != NULL) {
629	xmlOutputBufferWriteString(buf, " PUBLIC ");
630	xmlOutputBufferWriteQuotedString(buf, cur->ExternalID);
631	if (cur->SystemID != NULL) {
632	xmlOutputBufferWriteString(buf, " ");
633	xmlOutputBufferWriteQuotedString(buf, cur->SystemID);
634	}
635	} else if (cur->SystemID != NULL &&
636	xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
637	xmlOutputBufferWriteString(buf, " SYSTEM ");
638	xmlOutputBufferWriteQuotedString(buf, cur->SystemID);
639	}
640	xmlOutputBufferWriteString(buf, ">\n");
641	}
642
643	/**
644	* htmlAttrDumpOutput:
645	* @buf: the HTML buffer output
646	* @doc: the document
647	* @cur: the attribute pointer
648	*
649	* Dump an HTML attribute
650	*/
651	static void
652	htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
653	xmlChar *value;
654
655	/*
656	* The html output method should not escape a & character
657	* occurring in an attribute value immediately followed by
658	* a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
659	* This is implemented in xmlEncodeEntitiesReentrant
660	*/
661
662	if (cur == NULL) {
663	return;
664	}
665	xmlOutputBufferWriteString(buf, " ");
666	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
667	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
668	xmlOutputBufferWriteString(buf, ":");
669	}
670	xmlOutputBufferWriteString(buf, (const char *)cur->name);
671	if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
672	value = xmlNodeListGetString(doc, cur->children, 0);
673	if (value) {
674	xmlOutputBufferWriteString(buf, "=");
675	if ((cur->ns == NULL) && (cur->parent != NULL) &&
676	(cur->parent->ns == NULL) &&
677	((!xmlStrcasecmp(cur->name, BAD_CAST "href")) \|\|
678	(!xmlStrcasecmp(cur->name, BAD_CAST "action")) \|\|
679	(!xmlStrcasecmp(cur->name, BAD_CAST "src")) \|\|
680	((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
681	(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
682	xmlChar *escaped;
683	xmlChar *tmp = value;
684
685	while (IS_BLANK_CH(*tmp)) tmp++;
686
687	/*
688	* Angle brackets are technically illegal in URIs, but they're
689	* used in server side includes, for example. Curly brackets
690	* are illegal as well and often used in templates.
691	* Don't escape non-whitespace, printable ASCII chars for
692	* improved interoperability. Only escape space, control
693	* and non-ASCII chars.
694	*/
695	escaped = xmlURIEscapeStr(tmp,
696	BAD_CAST "\"#$%&+,/:;<=>?@[\\]^`{\|}");
697	if (escaped != NULL) {
698	xmlOutputBufferWriteQuotedString(buf, escaped);
699	xmlFree(escaped);
700	} else {
701	buf->error = XML_ERR_NO_MEMORY;
702	}
703	} else {
704	xmlOutputBufferWriteQuotedString(buf, value);
705	}
706	xmlFree(value);
707	} else {
708	buf->error = XML_ERR_NO_MEMORY;
709	}
710	}
711	}
712
713	/**
714	* htmlNodeDumpFormatOutput:
715	* @buf: the HTML buffer output
716	* @doc: the document
717	* @cur: the current node
718	* @encoding: the encoding string (unused)
719	* @format: should formatting spaces been added
720	*
721	* Dump an HTML node, recursive behaviour,children are printed too.
722	*/
723	void
724	htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
725	xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
726	int format) {
727	xmlNodePtr root, parent;
728	xmlAttrPtr attr;
729	const htmlElemDesc * info;
730
731	xmlInitParser();
732
733	if ((cur == NULL) \|\| (buf == NULL)) {
734	return;
735	}
736
737	root = cur;
738	parent = cur->parent;
739	while (1) {
740	switch (cur->type) {
741	case XML_HTML_DOCUMENT_NODE:
742	case XML_DOCUMENT_NODE:
743	if (((xmlDocPtr) cur)->intSubset != NULL) {
744	htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
745	}
746	if (cur->children != NULL) {
747	/* Always validate cur->parent when descending. */
748	if (cur->parent == parent) {
749	parent = cur;
750	cur = cur->children;
751	continue;
752	}
753	} else {
754	xmlOutputBufferWriteString(buf, "\n");
755	}
756	break;
757
758	case XML_ELEMENT_NODE:
759	/*
760	* Some users like lxml are known to pass nodes with a corrupted
761	* tree structure. Fall back to a recursive call to handle this
762	* case.
763	*/
764	if ((cur->parent != parent) && (cur->children != NULL)) {
765	htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
766	break;
767	}
768
769	/*
770	* Get specific HTML info for that node.
771	*/
772	if (cur->ns == NULL)
773	info = htmlTagLookup(cur->name);
774	else
775	info = NULL;
776
777	xmlOutputBufferWriteString(buf, "<");
778	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
779	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
780	xmlOutputBufferWriteString(buf, ":");
781	}
782	xmlOutputBufferWriteString(buf, (const char *)cur->name);
783	if (cur->nsDef)
784	xmlNsListDumpOutput(buf, cur->nsDef);
785	attr = cur->properties;
786	while (attr != NULL) {
787	htmlAttrDumpOutput(buf, doc, attr);
788	attr = attr->next;
789	}
790
791	if ((info != NULL) && (info->empty)) {
792	xmlOutputBufferWriteString(buf, ">");
793	} else if (cur->children == NULL) {
794	if ((info != NULL) && (info->saveEndTag != 0) &&
795	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
796	(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
797	xmlOutputBufferWriteString(buf, ">");
798	} else {
799	xmlOutputBufferWriteString(buf, "></");
800	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
801	xmlOutputBufferWriteString(buf,
802	(const char *)cur->ns->prefix);
803	xmlOutputBufferWriteString(buf, ":");
804	}
805	xmlOutputBufferWriteString(buf, (const char *)cur->name);
806	xmlOutputBufferWriteString(buf, ">");
807	}
808	} else {
809	xmlOutputBufferWriteString(buf, ">");
810	if ((format) && (info != NULL) && (!info->isinline) &&
811	(cur->children->type != HTML_TEXT_NODE) &&
812	(cur->children->type != HTML_ENTITY_REF_NODE) &&
813	(cur->children != cur->last) &&
814	(cur->name != NULL) &&
815	(cur->name[0] != 'p')) /* p, pre, param */
816	xmlOutputBufferWriteString(buf, "\n");
817	parent = cur;
818	cur = cur->children;
819	continue;
820	}
821
822	if ((format) && (cur->next != NULL) &&
823	(info != NULL) && (!info->isinline)) {
824	if ((cur->next->type != HTML_TEXT_NODE) &&
825	(cur->next->type != HTML_ENTITY_REF_NODE) &&
826	(parent != NULL) &&
827	(parent->name != NULL) &&
828	(parent->name[0] != 'p')) /* p, pre, param */
829	xmlOutputBufferWriteString(buf, "\n");
830	}
831
832	break;
833
834	case XML_ATTRIBUTE_NODE:
835	htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
836	break;
837
838	case HTML_TEXT_NODE:
839	if (cur->content == NULL)
840	break;
841	if (((cur->name == (const xmlChar *)xmlStringText) \|\|
842	(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
843	((parent == NULL) \|\|
844	((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
845	(xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
846	xmlChar *buffer;
847
848	buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
849	if (buffer == NULL) {
850	buf->error = XML_ERR_NO_MEMORY;
851	return;
852	}
853	xmlOutputBufferWriteString(buf, (const char *)buffer);
854	xmlFree(buffer);
855	} else {
856	xmlOutputBufferWriteString(buf, (const char *)cur->content);
857	}
858	break;
859
860	case HTML_COMMENT_NODE:
861	if (cur->content != NULL) {
862	xmlOutputBufferWriteString(buf, "<!--");
863	xmlOutputBufferWriteString(buf, (const char *)cur->content);
864	xmlOutputBufferWriteString(buf, "-->");
865	}
866	break;
867
868	case HTML_PI_NODE:
869	if (cur->name != NULL) {
870	xmlOutputBufferWriteString(buf, "<?");
871	xmlOutputBufferWriteString(buf, (const char *)cur->name);
872	if (cur->content != NULL) {
873	xmlOutputBufferWriteString(buf, " ");
874	xmlOutputBufferWriteString(buf,
875	(const char *)cur->content);
876	}
877	xmlOutputBufferWriteString(buf, ">");
878	}
879	break;
880
881	case HTML_ENTITY_REF_NODE:
882	xmlOutputBufferWriteString(buf, "&");
883	xmlOutputBufferWriteString(buf, (const char *)cur->name);
884	xmlOutputBufferWriteString(buf, ";");
885	break;
886
887	case HTML_PRESERVE_NODE:
888	if (cur->content != NULL) {
889	xmlOutputBufferWriteString(buf, (const char *)cur->content);
890	}
891	break;
892
893	default:
894	break;
895	}
896
897	while (1) {
898	if (cur == root)
899	return;
900	if (cur->next != NULL) {
901	cur = cur->next;
902	break;
903	}
904
905	cur = parent;
906	/* cur->parent was validated when descending. */
907	parent = cur->parent;
908
909	if ((cur->type == XML_HTML_DOCUMENT_NODE) \|\|
910	(cur->type == XML_DOCUMENT_NODE)) {
911	xmlOutputBufferWriteString(buf, "\n");
912	} else {
913	if ((format) && (cur->ns == NULL))
914	info = htmlTagLookup(cur->name);
915	else
916	info = NULL;
917
918	if ((format) && (info != NULL) && (!info->isinline) &&
919	(cur->last->type != HTML_TEXT_NODE) &&
920	(cur->last->type != HTML_ENTITY_REF_NODE) &&
921	(cur->children != cur->last) &&
922	(cur->name != NULL) &&
923	(cur->name[0] != 'p')) /* p, pre, param */
924	xmlOutputBufferWriteString(buf, "\n");
925
926	xmlOutputBufferWriteString(buf, "</");
927	if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
928	xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
929	xmlOutputBufferWriteString(buf, ":");
930	}
931	xmlOutputBufferWriteString(buf, (const char *)cur->name);
932	xmlOutputBufferWriteString(buf, ">");
933
934	if ((format) && (info != NULL) && (!info->isinline) &&
935	(cur->next != NULL)) {
936	if ((cur->next->type != HTML_TEXT_NODE) &&
937	(cur->next->type != HTML_ENTITY_REF_NODE) &&
938	(parent != NULL) &&
939	(parent->name != NULL) &&
940	(parent->name[0] != 'p')) /* p, pre, param */
941	xmlOutputBufferWriteString(buf, "\n");
942	}
943	}
944	}
945	}
946	}
947
948	/**
949	* htmlNodeDumpOutput:
950	* @buf: the HTML buffer output
951	* @doc: the document
952	* @cur: the current node
953	* @encoding: the encoding string (unused)
954	*
955	* Dump an HTML node, recursive behaviour,children are printed too,
956	* and formatting returns/spaces are added.
957	*/
958	void
959	htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
960	xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
961	htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
962	}
963
964	/**
965	* htmlDocContentDumpFormatOutput:
966	* @buf: the HTML buffer output
967	* @cur: the document
968	* @encoding: the encoding string (unused)
969	* @format: should formatting spaces been added
970	*
971	* Dump an HTML document.
972	*/
973	void
974	htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
975	const char *encoding ATTRIBUTE_UNUSED,
976	int format) {
977	int type = 0;
978	if (cur) {
979	type = cur->type;
980	cur->type = XML_HTML_DOCUMENT_NODE;
981	}
982	htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
983	if (cur)
984	cur->type = (xmlElementType) type;
985	}
986
987	/**
988	* htmlDocContentDumpOutput:
989	* @buf: the HTML buffer output
990	* @cur: the document
991	* @encoding: the encoding string (unused)
992	*
993	* Dump an HTML document. Formatting return/spaces are added.
994	*/
995	void
996	htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
997	const char *encoding ATTRIBUTE_UNUSED) {
998	htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
999	}
1000
1001	/************************************************************************
1002	* *
1003	* Saving functions front-ends *
1004	* *
1005	************************************************************************/
1006
1007	/**
1008	* htmlDocDump:
1009	* @f: the FILE*
1010	* @cur: the document
1011	*
1012	* Dump an HTML document to an open FILE.
1013	*
1014	* returns: the number of byte written or -1 in case of failure.
1015	*/
1016	int
1017	htmlDocDump(FILE *f, xmlDocPtr cur) {
1018	xmlOutputBufferPtr buf;
1019	xmlCharEncodingHandlerPtr handler = NULL;
1020	const char *encoding;
1021	int ret;
1022
1023	xmlInitParser();
1024
1025	if ((cur == NULL) \|\| (f == NULL)) {
1026	return(-1);
1027	}
1028
1029	encoding = (const char *) htmlGetMetaEncoding(cur);
1030	handler = htmlFindOutputEncoder(encoding);
1031	buf = xmlOutputBufferCreateFile(f, handler);
1032	if (buf == NULL) {
1033	xmlCharEncCloseFunc(handler);
1034	return(-1);
1035	}
1036	htmlDocContentDumpOutput(buf, cur, NULL);
1037
1038	ret = xmlOutputBufferClose(buf);
1039	return(ret);
1040	}
1041
1042	/**
1043	* htmlSaveFile:
1044	* @filename: the filename (or URL)
1045	* @cur: the document
1046	*
1047	* Dump an HTML document to a file. If @filename is "-" the stdout file is
1048	* used.
1049	* returns: the number of byte written or -1 in case of failure.
1050	*/
1051	int
1052	htmlSaveFile(const char *filename, xmlDocPtr cur) {
1053	xmlOutputBufferPtr buf;
1054	xmlCharEncodingHandlerPtr handler = NULL;
1055	const char *encoding;
1056	int ret;
1057
1058	if ((cur == NULL) \|\| (filename == NULL))
1059	return(-1);
1060
1061	xmlInitParser();
1062
1063	encoding = (const char *) htmlGetMetaEncoding(cur);
1064	handler = htmlFindOutputEncoder(encoding);
1065	buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1066	if (buf == NULL) {
1067	xmlCharEncCloseFunc(handler);
1068	return(0);
1069	}
1070
1071	htmlDocContentDumpOutput(buf, cur, NULL);
1072
1073	ret = xmlOutputBufferClose(buf);
1074	return(ret);
1075	}
1076
1077	/**
1078	* htmlSaveFileFormat:
1079	* @filename: the filename
1080	* @cur: the document
1081	* @format: should formatting spaces been added
1082	* @encoding: the document encoding
1083	*
1084	* Dump an HTML document to a file using a given encoding.
1085	*
1086	* returns: the number of byte written or -1 in case of failure.
1087	*/
1088	int
1089	htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1090	const char *encoding, int format) {
1091	xmlOutputBufferPtr buf;
1092	xmlCharEncodingHandlerPtr handler = NULL;
1093	int ret;
1094
1095	if ((cur == NULL) \|\| (filename == NULL))
1096	return(-1);
1097
1098	xmlInitParser();
1099
1100	handler = htmlFindOutputEncoder(encoding);
1101	if (handler != NULL)
1102	htmlSetMetaEncoding(cur, (const xmlChar *) handler->name);
1103	else
1104	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1105
1106	/*
1107	* save the content to a temp buffer.
1108	*/
1109	buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1110	if (buf == NULL) {
1111	xmlCharEncCloseFunc(handler);
1112	return(0);
1113	}
1114
1115	htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1116
1117	ret = xmlOutputBufferClose(buf);
1118	return(ret);
1119	}
1120
1121	/**
1122	* htmlSaveFileEnc:
1123	* @filename: the filename
1124	* @cur: the document
1125	* @encoding: the document encoding
1126	*
1127	* Dump an HTML document to a file using a given encoding
1128	* and formatting returns/spaces are added.
1129	*
1130	* returns: the number of byte written or -1 in case of failure.
1131	*/
1132	int
1133	htmlSaveFileEnc(const char filename, xmlDocPtr cur, const char encoding) {
1134	return(htmlSaveFileFormat(filename, cur, encoding, 1));
1135	}
1136
1137	#endif /* LIBXML_OUTPUT_ENABLED */
1138
1139	#endif /* LIBXML_HTML_ENABLED */

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/libxml2-2.13.2/HTMLtree.c@ 105764

以其他格式下載: