1 | /*
|
---|
2 | * xmlSeed.c: Generate the XML seed corpus for fuzzing.
|
---|
3 | *
|
---|
4 | * See Copyright for the status of this software.
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <stdio.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include <glob.h>
|
---|
10 | #include <libgen.h>
|
---|
11 | #include <sys/stat.h>
|
---|
12 |
|
---|
13 | #ifdef _WIN32
|
---|
14 | #include <direct.h>
|
---|
15 | #else
|
---|
16 | #include <unistd.h>
|
---|
17 | #endif
|
---|
18 |
|
---|
19 | #include <libxml/parser.h>
|
---|
20 | #include <libxml/parserInternals.h>
|
---|
21 | #include <libxml/HTMLparser.h>
|
---|
22 | #include <libxml/xinclude.h>
|
---|
23 | #include <libxml/xmlschemas.h>
|
---|
24 | #include "fuzz.h"
|
---|
25 |
|
---|
26 | #define PATH_SIZE 500
|
---|
27 | #define SEED_BUF_SIZE 16384
|
---|
28 | #define EXPR_SIZE 4500
|
---|
29 |
|
---|
30 | typedef int
|
---|
31 | (*fileFunc)(const char *base, FILE *out);
|
---|
32 |
|
---|
33 | typedef int
|
---|
34 | (*mainFunc)(const char *arg);
|
---|
35 |
|
---|
36 | static struct {
|
---|
37 | FILE *out;
|
---|
38 | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
|
---|
39 | xmlExternalEntityLoader oldLoader;
|
---|
40 | fileFunc processFile;
|
---|
41 | const char *fuzzer;
|
---|
42 | int counter;
|
---|
43 | char cwd[PATH_SIZE];
|
---|
44 | } globalData;
|
---|
45 |
|
---|
46 | #if defined(HAVE_SCHEMA_FUZZER) || \
|
---|
47 | defined(HAVE_XML_FUZZER)
|
---|
48 | /*
|
---|
49 | * A custom entity loader that writes all external DTDs or entities to a
|
---|
50 | * single file in the format expected by xmlFuzzEntityLoader.
|
---|
51 | */
|
---|
52 | static xmlParserInputPtr
|
---|
53 | fuzzEntityRecorder(const char *URL, const char *ID,
|
---|
54 | xmlParserCtxtPtr ctxt) {
|
---|
55 | xmlParserInputPtr in;
|
---|
56 | static const int chunkSize = 16384;
|
---|
57 | int len;
|
---|
58 |
|
---|
59 | in = xmlNoNetExternalEntityLoader(URL, ID, ctxt);
|
---|
60 | if (in == NULL)
|
---|
61 | return(NULL);
|
---|
62 |
|
---|
63 | if (globalData.entities == NULL) {
|
---|
64 | globalData.entities = xmlHashCreate(4);
|
---|
65 | } else if (xmlHashLookup(globalData.entities,
|
---|
66 | (const xmlChar *) URL) != NULL) {
|
---|
67 | return(in);
|
---|
68 | }
|
---|
69 |
|
---|
70 | do {
|
---|
71 | len = xmlParserInputBufferGrow(in->buf, chunkSize);
|
---|
72 | if (len < 0) {
|
---|
73 | fprintf(stderr, "Error reading %s\n", URL);
|
---|
74 | xmlFreeInputStream(in);
|
---|
75 | return(NULL);
|
---|
76 | }
|
---|
77 | } while (len > 0);
|
---|
78 |
|
---|
79 | xmlFuzzWriteString(globalData.out, URL);
|
---|
80 | xmlFuzzWriteString(globalData.out,
|
---|
81 | (char *) xmlBufContent(in->buf->buffer));
|
---|
82 |
|
---|
83 | xmlFreeInputStream(in);
|
---|
84 |
|
---|
85 | xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
|
---|
86 | globalData.entities);
|
---|
87 |
|
---|
88 | return(xmlNoNetExternalEntityLoader(URL, ID, ctxt));
|
---|
89 | }
|
---|
90 |
|
---|
91 | static void
|
---|
92 | fuzzRecorderInit(FILE *out) {
|
---|
93 | globalData.out = out;
|
---|
94 | globalData.entities = xmlHashCreate(8);
|
---|
95 | globalData.oldLoader = xmlGetExternalEntityLoader();
|
---|
96 | xmlSetExternalEntityLoader(fuzzEntityRecorder);
|
---|
97 | }
|
---|
98 |
|
---|
99 | static void
|
---|
100 | fuzzRecorderCleanup(void) {
|
---|
101 | xmlSetExternalEntityLoader(globalData.oldLoader);
|
---|
102 | xmlHashFree(globalData.entities, NULL);
|
---|
103 | globalData.out = NULL;
|
---|
104 | globalData.entities = NULL;
|
---|
105 | globalData.oldLoader = NULL;
|
---|
106 | }
|
---|
107 | #endif
|
---|
108 |
|
---|
109 | #ifdef HAVE_XML_FUZZER
|
---|
110 | static int
|
---|
111 | processXml(const char *docFile, FILE *out) {
|
---|
112 | int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
|
---|
113 | xmlDocPtr doc;
|
---|
114 |
|
---|
115 | /* Parser options. */
|
---|
116 | xmlFuzzWriteInt(out, opts, 4);
|
---|
117 | /* Max allocations. */
|
---|
118 | xmlFuzzWriteInt(out, 0, 4);
|
---|
119 |
|
---|
120 | fuzzRecorderInit(out);
|
---|
121 |
|
---|
122 | doc = xmlReadFile(docFile, NULL, opts);
|
---|
123 | #ifdef LIBXML_XINCLUDE_ENABLED
|
---|
124 | xmlXIncludeProcessFlags(doc, opts);
|
---|
125 | #endif
|
---|
126 | xmlFreeDoc(doc);
|
---|
127 |
|
---|
128 | fuzzRecorderCleanup();
|
---|
129 |
|
---|
130 | return(0);
|
---|
131 | }
|
---|
132 | #endif
|
---|
133 |
|
---|
134 | #ifdef HAVE_HTML_FUZZER
|
---|
135 | static int
|
---|
136 | processHtml(const char *docFile, FILE *out) {
|
---|
137 | char buf[SEED_BUF_SIZE];
|
---|
138 | FILE *file;
|
---|
139 | size_t size;
|
---|
140 |
|
---|
141 | /* Parser options. */
|
---|
142 | xmlFuzzWriteInt(out, 0, 4);
|
---|
143 | /* Max allocations. */
|
---|
144 | xmlFuzzWriteInt(out, 0, 4);
|
---|
145 |
|
---|
146 | /* Copy file */
|
---|
147 | file = fopen(docFile, "rb");
|
---|
148 | if (file == NULL) {
|
---|
149 | fprintf(stderr, "couldn't open %s\n", docFile);
|
---|
150 | return(0);
|
---|
151 | }
|
---|
152 | do {
|
---|
153 | size = fread(buf, 1, SEED_BUF_SIZE, file);
|
---|
154 | if (size > 0)
|
---|
155 | fwrite(buf, 1, size, out);
|
---|
156 | } while (size == SEED_BUF_SIZE);
|
---|
157 | fclose(file);
|
---|
158 |
|
---|
159 | return(0);
|
---|
160 | }
|
---|
161 | #endif
|
---|
162 |
|
---|
163 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
164 | static int
|
---|
165 | processSchema(const char *docFile, FILE *out) {
|
---|
166 | xmlSchemaPtr schema;
|
---|
167 | xmlSchemaParserCtxtPtr pctxt;
|
---|
168 |
|
---|
169 | /* Max allocations. */
|
---|
170 | xmlFuzzWriteInt(out, 0, 4);
|
---|
171 |
|
---|
172 | fuzzRecorderInit(out);
|
---|
173 |
|
---|
174 | pctxt = xmlSchemaNewParserCtxt(docFile);
|
---|
175 | xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL);
|
---|
176 | schema = xmlSchemaParse(pctxt);
|
---|
177 | xmlSchemaFreeParserCtxt(pctxt);
|
---|
178 | xmlSchemaFree(schema);
|
---|
179 |
|
---|
180 | fuzzRecorderCleanup();
|
---|
181 |
|
---|
182 | return(0);
|
---|
183 | }
|
---|
184 | #endif
|
---|
185 |
|
---|
186 | #if defined(HAVE_HTML_FUZZER) || \
|
---|
187 | defined(HAVE_SCHEMA_FUZZER) || \
|
---|
188 | defined(HAVE_XML_FUZZER)
|
---|
189 | static int
|
---|
190 | processPattern(const char *pattern) {
|
---|
191 | glob_t globbuf;
|
---|
192 | int ret = 0;
|
---|
193 | int res;
|
---|
194 | size_t i;
|
---|
195 |
|
---|
196 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
197 | if (res == GLOB_NOMATCH)
|
---|
198 | return(0);
|
---|
199 | if (res != 0) {
|
---|
200 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
201 | return(-1);
|
---|
202 | }
|
---|
203 |
|
---|
204 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
205 | struct stat statbuf;
|
---|
206 | char outPath[PATH_SIZE];
|
---|
207 | char *dirBuf = NULL;
|
---|
208 | char *baseBuf = NULL;
|
---|
209 | const char *path, *dir, *base;
|
---|
210 | FILE *out = NULL;
|
---|
211 | int dirChanged = 0;
|
---|
212 | size_t size;
|
---|
213 |
|
---|
214 | path = globbuf.gl_pathv[i];
|
---|
215 |
|
---|
216 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
217 | continue;
|
---|
218 |
|
---|
219 | dirBuf = (char *) xmlCharStrdup(path);
|
---|
220 | baseBuf = (char *) xmlCharStrdup(path);
|
---|
221 | if ((dirBuf == NULL) || (baseBuf == NULL)) {
|
---|
222 | fprintf(stderr, "memory allocation failed\n");
|
---|
223 | ret = -1;
|
---|
224 | goto error;
|
---|
225 | }
|
---|
226 | dir = dirname(dirBuf);
|
---|
227 | base = basename(baseBuf);
|
---|
228 |
|
---|
229 | size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
|
---|
230 | globalData.fuzzer, base);
|
---|
231 | if (size >= PATH_SIZE) {
|
---|
232 | fprintf(stderr, "creating path failed\n");
|
---|
233 | ret = -1;
|
---|
234 | goto error;
|
---|
235 | }
|
---|
236 | out = fopen(outPath, "wb");
|
---|
237 | if (out == NULL) {
|
---|
238 | fprintf(stderr, "couldn't open %s for writing\n", outPath);
|
---|
239 | ret = -1;
|
---|
240 | goto error;
|
---|
241 | }
|
---|
242 | if (chdir(dir) != 0) {
|
---|
243 | fprintf(stderr, "couldn't chdir to %s\n", dir);
|
---|
244 | ret = -1;
|
---|
245 | goto error;
|
---|
246 | }
|
---|
247 | dirChanged = 1;
|
---|
248 | if (globalData.processFile(base, out) != 0)
|
---|
249 | ret = -1;
|
---|
250 |
|
---|
251 | error:
|
---|
252 | if (out != NULL)
|
---|
253 | fclose(out);
|
---|
254 | xmlFree(dirBuf);
|
---|
255 | xmlFree(baseBuf);
|
---|
256 | if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
|
---|
257 | fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
|
---|
258 | ret = -1;
|
---|
259 | break;
|
---|
260 | }
|
---|
261 | }
|
---|
262 |
|
---|
263 | globfree(&globbuf);
|
---|
264 | return(ret);
|
---|
265 | }
|
---|
266 | #endif
|
---|
267 |
|
---|
268 | #ifdef HAVE_XPATH_FUZZER
|
---|
269 | static int
|
---|
270 | processXPath(const char *testDir, const char *prefix, const char *name,
|
---|
271 | const char *data, const char *subdir, int xptr) {
|
---|
272 | char pattern[PATH_SIZE];
|
---|
273 | glob_t globbuf;
|
---|
274 | size_t i, size;
|
---|
275 | int ret = 0, res;
|
---|
276 |
|
---|
277 | size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
|
---|
278 | testDir, subdir, prefix);
|
---|
279 | if (size >= PATH_SIZE)
|
---|
280 | return(-1);
|
---|
281 | res = glob(pattern, 0, NULL, &globbuf);
|
---|
282 | if (res == GLOB_NOMATCH)
|
---|
283 | return(0);
|
---|
284 | if (res != 0) {
|
---|
285 | fprintf(stderr, "couldn't match pattern %s\n", pattern);
|
---|
286 | return(-1);
|
---|
287 | }
|
---|
288 |
|
---|
289 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
290 | char *path = globbuf.gl_pathv[i];
|
---|
291 | struct stat statbuf;
|
---|
292 | FILE *in;
|
---|
293 | char expr[EXPR_SIZE];
|
---|
294 |
|
---|
295 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
|
---|
296 | continue;
|
---|
297 |
|
---|
298 | in = fopen(path, "rb");
|
---|
299 | if (in == NULL) {
|
---|
300 | ret = -1;
|
---|
301 | continue;
|
---|
302 | }
|
---|
303 |
|
---|
304 | while (fgets(expr, EXPR_SIZE, in) != NULL) {
|
---|
305 | char outPath[PATH_SIZE];
|
---|
306 | FILE *out;
|
---|
307 | int j;
|
---|
308 |
|
---|
309 | for (j = 0; expr[j] != 0; j++)
|
---|
310 | if (expr[j] == '\r' || expr[j] == '\n')
|
---|
311 | break;
|
---|
312 | expr[j] = 0;
|
---|
313 |
|
---|
314 | size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
|
---|
315 | name, globalData.counter);
|
---|
316 | if (size >= PATH_SIZE) {
|
---|
317 | ret = -1;
|
---|
318 | continue;
|
---|
319 | }
|
---|
320 | out = fopen(outPath, "wb");
|
---|
321 | if (out == NULL) {
|
---|
322 | ret = -1;
|
---|
323 | continue;
|
---|
324 | }
|
---|
325 |
|
---|
326 | /* Max allocations. */
|
---|
327 | xmlFuzzWriteInt(out, 0, 4);
|
---|
328 |
|
---|
329 | if (xptr) {
|
---|
330 | xmlFuzzWriteString(out, expr);
|
---|
331 | } else {
|
---|
332 | char xptrExpr[EXPR_SIZE+100];
|
---|
333 |
|
---|
334 | /* Wrap XPath expressions as XPointer */
|
---|
335 | snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
|
---|
336 | xmlFuzzWriteString(out, xptrExpr);
|
---|
337 | }
|
---|
338 |
|
---|
339 | xmlFuzzWriteString(out, data);
|
---|
340 |
|
---|
341 | fclose(out);
|
---|
342 | globalData.counter++;
|
---|
343 | }
|
---|
344 |
|
---|
345 | fclose(in);
|
---|
346 | }
|
---|
347 |
|
---|
348 | globfree(&globbuf);
|
---|
349 |
|
---|
350 | return(ret);
|
---|
351 | }
|
---|
352 |
|
---|
353 | static int
|
---|
354 | processXPathDir(const char *testDir) {
|
---|
355 | char pattern[PATH_SIZE];
|
---|
356 | glob_t globbuf;
|
---|
357 | size_t i, size;
|
---|
358 | int ret = 0;
|
---|
359 |
|
---|
360 | globalData.counter = 1;
|
---|
361 | if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
|
---|
362 | ret = -1;
|
---|
363 |
|
---|
364 | size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
|
---|
365 | if (size >= PATH_SIZE)
|
---|
366 | return(1);
|
---|
367 | if (glob(pattern, 0, NULL, &globbuf) != 0)
|
---|
368 | return(1);
|
---|
369 |
|
---|
370 | for (i = 0; i < globbuf.gl_pathc; i++) {
|
---|
371 | char *path = globbuf.gl_pathv[i];
|
---|
372 | char *data;
|
---|
373 | const char *docFile;
|
---|
374 |
|
---|
375 | data = xmlSlurpFile(path, NULL);
|
---|
376 | if (data == NULL) {
|
---|
377 | ret = -1;
|
---|
378 | continue;
|
---|
379 | }
|
---|
380 | docFile = basename(path);
|
---|
381 |
|
---|
382 | globalData.counter = 1;
|
---|
383 | if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
|
---|
384 | ret = -1;
|
---|
385 | if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
|
---|
386 | ret = -1;
|
---|
387 | if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
|
---|
388 | ret = -1;
|
---|
389 |
|
---|
390 | xmlFree(data);
|
---|
391 | }
|
---|
392 |
|
---|
393 | globfree(&globbuf);
|
---|
394 |
|
---|
395 | return(ret);
|
---|
396 | }
|
---|
397 | #endif
|
---|
398 |
|
---|
399 | int
|
---|
400 | main(int argc, const char **argv) {
|
---|
401 | mainFunc processArg = NULL;
|
---|
402 | const char *fuzzer;
|
---|
403 | int ret = 0;
|
---|
404 | int i;
|
---|
405 |
|
---|
406 | if (argc < 3) {
|
---|
407 | fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
|
---|
408 | return(1);
|
---|
409 | }
|
---|
410 |
|
---|
411 | xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
|
---|
412 |
|
---|
413 | fuzzer = argv[1];
|
---|
414 | if (strcmp(fuzzer, "html") == 0) {
|
---|
415 | #ifdef HAVE_HTML_FUZZER
|
---|
416 | processArg = processPattern;
|
---|
417 | globalData.processFile = processHtml;
|
---|
418 | #endif
|
---|
419 | } else if (strcmp(fuzzer, "schema") == 0) {
|
---|
420 | #ifdef HAVE_SCHEMA_FUZZER
|
---|
421 | processArg = processPattern;
|
---|
422 | globalData.processFile = processSchema;
|
---|
423 | #endif
|
---|
424 | } else if (strcmp(fuzzer, "valid") == 0) {
|
---|
425 | #ifdef HAVE_VALID_FUZZER
|
---|
426 | processArg = processPattern;
|
---|
427 | globalData.processFile = processXml;
|
---|
428 | #endif
|
---|
429 | } else if (strcmp(fuzzer, "xinclude") == 0) {
|
---|
430 | #ifdef HAVE_XINCLUDE_FUZZER
|
---|
431 | processArg = processPattern;
|
---|
432 | globalData.processFile = processXml;
|
---|
433 | #endif
|
---|
434 | } else if (strcmp(fuzzer, "xml") == 0) {
|
---|
435 | #ifdef HAVE_XML_FUZZER
|
---|
436 | processArg = processPattern;
|
---|
437 | globalData.processFile = processXml;
|
---|
438 | #endif
|
---|
439 | } else if (strcmp(fuzzer, "xpath") == 0) {
|
---|
440 | #ifdef HAVE_XPATH_FUZZER
|
---|
441 | processArg = processXPathDir;
|
---|
442 | #endif
|
---|
443 | } else {
|
---|
444 | fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
|
---|
445 | return(1);
|
---|
446 | }
|
---|
447 | globalData.fuzzer = fuzzer;
|
---|
448 |
|
---|
449 | if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
|
---|
450 | fprintf(stderr, "couldn't get current directory\n");
|
---|
451 | return(1);
|
---|
452 | }
|
---|
453 |
|
---|
454 | if (processArg != NULL)
|
---|
455 | for (i = 2; i < argc; i++)
|
---|
456 | processArg(argv[i]);
|
---|
457 |
|
---|
458 | return(ret);
|
---|
459 | }
|
---|
460 |
|
---|