VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/include/libxml/HTMLparser.h@ 105635

最後變更 在這個檔案從105635是 104106,由 vboxsync 提交於 10 月 前

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • 屬性 svn:eol-style 設為 native
檔案大小: 9.8 KB
 
1/*
2 * Summary: interface for an HTML 4.0 non-verifying parser
3 * Description: this module implements an HTML 4.0 non-verifying parser
4 * with API compatible with the XML parser ones. It should
5 * be able to parse "real world" HTML, even if severely
6 * broken from a specification point of view.
7 *
8 * Copy: See Copyright for the status of this software.
9 *
10 * Author: Daniel Veillard
11 */
12
13#ifndef __HTML_PARSER_H__
14#define __HTML_PARSER_H__
15#include <libxml/xmlversion.h>
16#include <libxml/parser.h>
17
18#ifdef LIBXML_HTML_ENABLED
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24/*
25 * Most of the back-end structures from XML and HTML are shared.
26 */
27typedef xmlParserCtxt htmlParserCtxt;
28typedef xmlParserCtxtPtr htmlParserCtxtPtr;
29typedef xmlParserNodeInfo htmlParserNodeInfo;
30typedef xmlSAXHandler htmlSAXHandler;
31typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
32typedef xmlParserInput htmlParserInput;
33typedef xmlParserInputPtr htmlParserInputPtr;
34typedef xmlDocPtr htmlDocPtr;
35typedef xmlNodePtr htmlNodePtr;
36
37/*
38 * Internal description of an HTML element, representing HTML 4.01
39 * and XHTML 1.0 (which share the same structure).
40 */
41typedef struct _htmlElemDesc htmlElemDesc;
42typedef htmlElemDesc *htmlElemDescPtr;
43struct _htmlElemDesc {
44 const char *name; /* The tag name */
45 char startTag; /* Whether the start tag can be implied */
46 char endTag; /* Whether the end tag can be implied */
47 char saveEndTag; /* Whether the end tag should be saved */
48 char empty; /* Is this an empty element ? */
49 char depr; /* Is this a deprecated element ? */
50 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
51 char isinline; /* is this a block 0 or inline 1 element */
52 const char *desc; /* the description */
53
54/* NRK Jan.2003
55 * New fields encapsulating HTML structure
56 *
57 * Bugs:
58 * This is a very limited representation. It fails to tell us when
59 * an element *requires* subelements (we only have whether they're
60 * allowed or not), and it doesn't tell us where CDATA and PCDATA
61 * are allowed. Some element relationships are not fully represented:
62 * these are flagged with the word MODIFIER
63 */
64 const char** subelts; /* allowed sub-elements of this element */
65 const char* defaultsubelt; /* subelement for suggested auto-repair
66 if necessary or NULL */
67 const char** attrs_opt; /* Optional Attributes */
68 const char** attrs_depr; /* Additional deprecated attributes */
69 const char** attrs_req; /* Required attributes */
70};
71
72/*
73 * Internal description of an HTML entity.
74 */
75typedef struct _htmlEntityDesc htmlEntityDesc;
76typedef htmlEntityDesc *htmlEntityDescPtr;
77struct _htmlEntityDesc {
78 unsigned int value; /* the UNICODE value for the character */
79 const char *name; /* The entity name */
80 const char *desc; /* the description */
81};
82
83/** DOC_DISABLE */
84#ifdef LIBXML_SAX1_ENABLED
85 #define XML_GLOBALS_HTML \
86 XML_OP(htmlDefaultSAXHandler, xmlSAXHandlerV1, XML_DEPRECATED)
87#else
88 #define XML_GLOBALS_HTML
89#endif
90
91#define XML_OP XML_DECLARE_GLOBAL
92XML_GLOBALS_HTML
93#undef XML_OP
94
95#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION)
96 #define htmlDefaultSAXHandler XML_GLOBAL_MACRO(htmlDefaultSAXHandler)
97#endif
98/** DOC_ENABLE */
99
100/*
101 * There is only few public functions.
102 */
103XML_DEPRECATED
104XMLPUBFUN void
105 htmlInitAutoClose (void);
106XMLPUBFUN const htmlElemDesc *
107 htmlTagLookup (const xmlChar *tag);
108XMLPUBFUN const htmlEntityDesc *
109 htmlEntityLookup(const xmlChar *name);
110XMLPUBFUN const htmlEntityDesc *
111 htmlEntityValueLookup(unsigned int value);
112
113XMLPUBFUN int
114 htmlIsAutoClosed(htmlDocPtr doc,
115 htmlNodePtr elem);
116XMLPUBFUN int
117 htmlAutoCloseTag(htmlDocPtr doc,
118 const xmlChar *name,
119 htmlNodePtr elem);
120XML_DEPRECATED
121XMLPUBFUN const htmlEntityDesc *
122 htmlParseEntityRef(htmlParserCtxtPtr ctxt,
123 const xmlChar **str);
124XML_DEPRECATED
125XMLPUBFUN int
126 htmlParseCharRef(htmlParserCtxtPtr ctxt);
127XML_DEPRECATED
128XMLPUBFUN void
129 htmlParseElement(htmlParserCtxtPtr ctxt);
130
131XMLPUBFUN htmlParserCtxtPtr
132 htmlNewParserCtxt(void);
133XMLPUBFUN htmlParserCtxtPtr
134 htmlNewSAXParserCtxt(const htmlSAXHandler *sax,
135 void *userData);
136
137XMLPUBFUN htmlParserCtxtPtr
138 htmlCreateMemoryParserCtxt(const char *buffer,
139 int size);
140
141XMLPUBFUN int
142 htmlParseDocument(htmlParserCtxtPtr ctxt);
143XML_DEPRECATED
144XMLPUBFUN htmlDocPtr
145 htmlSAXParseDoc (const xmlChar *cur,
146 const char *encoding,
147 htmlSAXHandlerPtr sax,
148 void *userData);
149XMLPUBFUN htmlDocPtr
150 htmlParseDoc (const xmlChar *cur,
151 const char *encoding);
152XMLPUBFUN htmlParserCtxtPtr
153 htmlCreateFileParserCtxt(const char *filename,
154 const char *encoding);
155XML_DEPRECATED
156XMLPUBFUN htmlDocPtr
157 htmlSAXParseFile(const char *filename,
158 const char *encoding,
159 htmlSAXHandlerPtr sax,
160 void *userData);
161XMLPUBFUN htmlDocPtr
162 htmlParseFile (const char *filename,
163 const char *encoding);
164XMLPUBFUN int
165 UTF8ToHtml (unsigned char *out,
166 int *outlen,
167 const unsigned char *in,
168 int *inlen);
169XMLPUBFUN int
170 htmlEncodeEntities(unsigned char *out,
171 int *outlen,
172 const unsigned char *in,
173 int *inlen, int quoteChar);
174XMLPUBFUN int
175 htmlIsScriptAttribute(const xmlChar *name);
176XMLPUBFUN int
177 htmlHandleOmittedElem(int val);
178
179#ifdef LIBXML_PUSH_ENABLED
180/**
181 * Interfaces for the Push mode.
182 */
183XMLPUBFUN htmlParserCtxtPtr
184 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
185 void *user_data,
186 const char *chunk,
187 int size,
188 const char *filename,
189 xmlCharEncoding enc);
190XMLPUBFUN int
191 htmlParseChunk (htmlParserCtxtPtr ctxt,
192 const char *chunk,
193 int size,
194 int terminate);
195#endif /* LIBXML_PUSH_ENABLED */
196
197XMLPUBFUN void
198 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
199
200/*
201 * New set of simpler/more flexible APIs
202 */
203/**
204 * xmlParserOption:
205 *
206 * This is the set of XML parser options that can be passed down
207 * to the xmlReadDoc() and similar calls.
208 */
209typedef enum {
210 HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
211 HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
212 HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
213 HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
214 HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
215 HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
216 HTML_PARSE_NONET = 1<<11,/* Forbid network access */
217 HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */
218 HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */
219 HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */
220} htmlParserOption;
221
222XMLPUBFUN void
223 htmlCtxtReset (htmlParserCtxtPtr ctxt);
224XMLPUBFUN int
225 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
226 int options);
227XMLPUBFUN htmlDocPtr
228 htmlReadDoc (const xmlChar *cur,
229 const char *URL,
230 const char *encoding,
231 int options);
232XMLPUBFUN htmlDocPtr
233 htmlReadFile (const char *URL,
234 const char *encoding,
235 int options);
236XMLPUBFUN htmlDocPtr
237 htmlReadMemory (const char *buffer,
238 int size,
239 const char *URL,
240 const char *encoding,
241 int options);
242XMLPUBFUN htmlDocPtr
243 htmlReadFd (int fd,
244 const char *URL,
245 const char *encoding,
246 int options);
247XMLPUBFUN htmlDocPtr
248 htmlReadIO (xmlInputReadCallback ioread,
249 xmlInputCloseCallback ioclose,
250 void *ioctx,
251 const char *URL,
252 const char *encoding,
253 int options);
254XMLPUBFUN htmlDocPtr
255 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
256 const xmlChar *cur,
257 const char *URL,
258 const char *encoding,
259 int options);
260XMLPUBFUN htmlDocPtr
261 htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
262 const char *filename,
263 const char *encoding,
264 int options);
265XMLPUBFUN htmlDocPtr
266 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
267 const char *buffer,
268 int size,
269 const char *URL,
270 const char *encoding,
271 int options);
272XMLPUBFUN htmlDocPtr
273 htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
274 int fd,
275 const char *URL,
276 const char *encoding,
277 int options);
278XMLPUBFUN htmlDocPtr
279 htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
280 xmlInputReadCallback ioread,
281 xmlInputCloseCallback ioclose,
282 void *ioctx,
283 const char *URL,
284 const char *encoding,
285 int options);
286
287/* NRK/Jan2003: further knowledge of HTML structure
288 */
289typedef enum {
290 HTML_NA = 0 , /* something we don't check at all */
291 HTML_INVALID = 0x1 ,
292 HTML_DEPRECATED = 0x2 ,
293 HTML_VALID = 0x4 ,
294 HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
295} htmlStatus ;
296
297/* Using htmlElemDesc rather than name here, to emphasise the fact
298 that otherwise there's a lookup overhead
299*/
300XMLPUBFUN htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
301XMLPUBFUN int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
302XMLPUBFUN htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
303XMLPUBFUN htmlStatus htmlNodeStatus(const htmlNodePtr, int) ;
304/**
305 * htmlDefaultSubelement:
306 * @elt: HTML element
307 *
308 * Returns the default subelement for this element
309 */
310#define htmlDefaultSubelement(elt) elt->defaultsubelt
311/**
312 * htmlElementAllowedHereDesc:
313 * @parent: HTML parent element
314 * @elt: HTML element
315 *
316 * Checks whether an HTML element description may be a
317 * direct child of the specified element.
318 *
319 * Returns 1 if allowed; 0 otherwise.
320 */
321#define htmlElementAllowedHereDesc(parent,elt) \
322 htmlElementAllowedHere((parent), (elt)->name)
323/**
324 * htmlRequiredAttrs:
325 * @elt: HTML element
326 *
327 * Returns the attributes required for the specified element.
328 */
329#define htmlRequiredAttrs(elt) (elt)->attrs_req
330
331
332#ifdef __cplusplus
333}
334#endif
335
336#else /* LIBXML_HTML_ENABLED */
337
338/** DOC_DISABLE */
339#define XML_GLOBALS_HTML
340/** DOC_ENABLE */
341
342#endif /* LIBXML_HTML_ENABLED */
343#endif /* __HTML_PARSER_H__ */
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette