uri.c@ 70579

最後變更在這個檔案從70579是 65950,由 vboxsync 提交於 8 年前
libxml 2.9.4: fix export
屬性 svn:eol-style 設為 `native`
檔案大小: 65.5 KB

行
1	/**
2	* uri.c: set of generic URI related routines
3	*
4	* Reference: RFCs 3986, 2732 and 2373
5	*
6	* See Copyright for the status of this software.
7	*
8	* [email protected]
9	*/
10
11	#define IN_LIBXML
12	#include "libxml.h"
13
14	#include <string.h>
15
16	#include <libxml/xmlmemory.h>
17	#include <libxml/uri.h>
18	#include <libxml/globals.h>
19	#include <libxml/xmlerror.h>
20
21	/**
22	* MAX_URI_LENGTH:
23	*
24	* The definition of the URI regexp in the above RFC has no size limit
25	* In practice they are usually relativey short except for the
26	* data URI scheme as defined in RFC 2397. Even for data URI the usual
27	* maximum size before hitting random practical limits is around 64 KB
28	* and 4KB is usually a maximum admitted limit for proper operations.
29	* The value below is more a security limit than anything else and
30	* really should never be hit by 'normal' operations
31	* Set to 1 MByte in 2012, this is only enforced on output
32	*/
33	#define MAX_URI_LENGTH 1024 * 1024
34
35	static void
36	xmlURIErrMemory(const char *extra)
37	{
38	if (extra)
39	__xmlRaiseError(NULL, NULL, NULL,
40	NULL, NULL, XML_FROM_URI,
41	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42	extra, NULL, NULL, 0, 0,
43	"Memory allocation failed : %s\n", extra);
44	else
45	__xmlRaiseError(NULL, NULL, NULL,
46	NULL, NULL, XML_FROM_URI,
47	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48	NULL, NULL, NULL, 0, 0,
49	"Memory allocation failed\n");
50	}
51
52	static void xmlCleanURI(xmlURIPtr uri);
53
54	/*
55	* Old rule from 2396 used in legacy handling code
56	* alpha = lowalpha \| upalpha
57	*/
58	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
59
60
61	/*
62	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
63	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
64	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
65	*/
66
67	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68
69	/*
70	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
71	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
72	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
73	*/
74	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75
76	#ifdef IS_DIGIT
77	#undef IS_DIGIT
78	#endif
79	/*
80	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
81	*/
82	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83
84	/*
85	* alphanum = alpha \| digit
86	*/
87
88	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
89
90	/*
91	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
92	*/
93
94	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
95	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
96	((x) == '(') \|\| ((x) == ')'))
97
98	/*
99	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
100	*/
101
102	#define IS_UNWISE(p) \
103	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
104	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
105	(((p) == ']')) \|\| (((p) == '`')))
106	/*
107	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
108	* "[" \| "]"
109	*/
110
111	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
112	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
113	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
114	((x) == ']'))
115
116	/*
117	* unreserved = alphanum \| mark
118	*/
119
120	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
121
122	/*
123	* Skip to next pointer char, handle escaped sequences
124	*/
125
126	#define NEXT(p) ((*p == '%')? p += 3 : p++)
127
128	/*
129	* Productions from the spec.
130	*
131	* authority = server \| reg_name
132	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
133	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
134	*
135	* path = [ abs_path \| opaque_part ]
136	*/
137
138	#define STRNDUP(s, n) (char ) xmlStrndup((const xmlChar )(s), (n))
139
140	/************************************************************************
141	* *
142	* RFC 3986 parser *
143	* *
144	************************************************************************/
145
146	#define ISA_DIGIT(p) (((p) >= '0') && ((p) <= '9'))
147	#define ISA_ALPHA(p) ((((p) >= 'a') && ((p) <= 'z')) \|\| \
148	(((p) >= 'A') && ((p) <= 'Z')))
149	#define ISA_HEXDIG(p) \
150	(ISA_DIGIT(p) \|\| (((p) >= 'a') && ((p) <= 'f')) \|\| \
151	(((p) >= 'A') && ((p) <= 'F')))
152
153	/*
154	* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155	* / "*" / "+" / "," / ";" / "="
156	*/
157	#define ISA_SUB_DELIM(p) \
158	((((p) == '!')) \|\| (((p) == '$')) \|\| ((*(p) == '&')) \|\| \
159	(((p) == '(')) \|\| (((p) == ')')) \|\| (((p) == '')) \|\| \
160	(((p) == '+')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
161	(((p) == '=')) \|\| (((p) == '\'')))
162
163	/*
164	* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165	*/
166	#define ISA_GEN_DELIM(p) \
167	((((p) == ':')) \|\| (((p) == '/')) \|\| ((*(p) == '?')) \|\| \
168	(((p) == '#')) \|\| (((p) == '[')) \|\| ((*(p) == ']')) \|\| \
169	((*(p) == '@')))
170
171	/*
172	* reserved = gen-delims / sub-delims
173	*/
174	#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) \|\| (ISA_SUB_DELIM(p)))
175
176	/*
177	* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178	*/
179	#define ISA_UNRESERVED(p) \
180	((ISA_ALPHA(p)) \|\| (ISA_DIGIT(p)) \|\| ((*(p) == '-')) \|\| \
181	(((p) == '.')) \|\| (((p) == '_')) \|\| ((*(p) == '~')))
182
183	/*
184	* pct-encoded = "%" HEXDIG HEXDIG
185	*/
186	#define ISA_PCT_ENCODED(p) \
187	((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188
189	/*
190	* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191	*/
192	#define ISA_PCHAR(p) \
193	(ISA_UNRESERVED(p) \|\| ISA_PCT_ENCODED(p) \|\| ISA_SUB_DELIM(p) \|\| \
194	(((p) == ':')) \|\| (((p) == '@')))
195
196	/**
197	* xmlParse3986Scheme:
198	* @uri: pointer to an URI structure
199	* @str: pointer to the string to analyze
200	*
201	* Parse an URI scheme
202	*
203	* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204	*
205	* Returns 0 or the error code
206	*/
207	static int
208	xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209	const char *cur;
210
211	if (str == NULL)
212	return(-1);
213
214	cur = *str;
215	if (!ISA_ALPHA(cur))
216	return(2);
217	cur++;
218	while (ISA_ALPHA(cur) \|\| ISA_DIGIT(cur) \|\|
219	(cur == '+') \|\| (cur == '-') \|\| (*cur == '.')) cur++;
220	if (uri != NULL) {
221	if (uri->scheme != NULL) xmlFree(uri->scheme);
222	uri->scheme = STRNDUP(str, cur - str);
223	}
224	*str = cur;
225	return(0);
226	}
227
228	/**
229	* xmlParse3986Fragment:
230	* @uri: pointer to an URI structure
231	* @str: pointer to the string to analyze
232	*
233	* Parse the query part of an URI
234	*
235	* fragment = *( pchar / "/" / "?" )
236	* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237	* in the fragment identifier but this is used very broadly for
238	* xpointer scheme selection, so we are allowing it here to not break
239	* for example all the DocBook processing chains.
240	*
241	* Returns 0 or the error code
242	*/
243	static int
244	xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245	{
246	const char *cur;
247
248	if (str == NULL)
249	return (-1);
250
251	cur = *str;
252
253	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
254	(cur == '[') \|\| (cur == ']') \|\|
255	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256	NEXT(cur);
257	if (uri != NULL) {
258	if (uri->fragment != NULL)
259	xmlFree(uri->fragment);
260	if (uri->cleanup & 2)
261	uri->fragment = STRNDUP(str, cur - str);
262	else
263	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
264	}
265	*str = cur;
266	return (0);
267	}
268
269	/**
270	* xmlParse3986Query:
271	* @uri: pointer to an URI structure
272	* @str: pointer to the string to analyze
273	*
274	* Parse the query part of an URI
275	*
276	* query = *uric
277	*
278	* Returns 0 or the error code
279	*/
280	static int
281	xmlParse3986Query(xmlURIPtr uri, const char **str)
282	{
283	const char *cur;
284
285	if (str == NULL)
286	return (-1);
287
288	cur = *str;
289
290	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
291	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292	NEXT(cur);
293	if (uri != NULL) {
294	if (uri->query != NULL)
295	xmlFree(uri->query);
296	if (uri->cleanup & 2)
297	uri->query = STRNDUP(str, cur - str);
298	else
299	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
300
301	/* Save the raw bytes of the query as well.
302	* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303	*/
304	if (uri->query_raw != NULL)
305	xmlFree (uri->query_raw);
306	uri->query_raw = STRNDUP (str, cur - str);
307	}
308	*str = cur;
309	return (0);
310	}
311
312	/**
313	* xmlParse3986Port:
314	* @uri: pointer to an URI structure
315	* @str: the string to analyze
316	*
317	* Parse a port part and fills in the appropriate fields
318	* of the @uri structure
319	*
320	* port = *DIGIT
321	*
322	* Returns 0 or the error code
323	*/
324	static int
325	xmlParse3986Port(xmlURIPtr uri, const char **str)
326	{
327	const char cur = str;
328	unsigned port = 0; /* unsigned for defined overflow behavior */
329
330	if (ISA_DIGIT(cur)) {
331	while (ISA_DIGIT(cur)) {
332	port = port * 10 + (*cur - '0');
333
334	cur++;
335	}
336	if (uri != NULL)
337	uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
338	*str = cur;
339	return(0);
340	}
341	return(1);
342	}
343
344	/**
345	* xmlParse3986Userinfo:
346	* @uri: pointer to an URI structure
347	* @str: the string to analyze
348	*
349	* Parse an user informations part and fills in the appropriate fields
350	* of the @uri structure
351	*
352	* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
353	*
354	* Returns 0 or the error code
355	*/
356	static int
357	xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
358	{
359	const char *cur;
360
361	cur = *str;
362	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\|
363	ISA_SUB_DELIM(cur) \|\| (*cur == ':'))
364	NEXT(cur);
365	if (*cur == '@') {
366	if (uri != NULL) {
367	if (uri->user != NULL) xmlFree(uri->user);
368	if (uri->cleanup & 2)
369	uri->user = STRNDUP(str, cur - str);
370	else
371	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
372	}
373	*str = cur;
374	return(0);
375	}
376	return(1);
377	}
378
379	/**
380	* xmlParse3986DecOctet:
381	* @str: the string to analyze
382	*
383	* dec-octet = DIGIT ; 0-9
384	* / %x31-39 DIGIT ; 10-99
385	* / "1" 2DIGIT ; 100-199
386	* / "2" %x30-34 DIGIT ; 200-249
387	* / "25" %x30-35 ; 250-255
388	*
389	* Skip a dec-octet.
390	*
391	* Returns 0 if found and skipped, 1 otherwise
392	*/
393	static int
394	xmlParse3986DecOctet(const char **str) {
395	const char cur = str;
396
397	if (!(ISA_DIGIT(cur)))
398	return(1);
399	if (!ISA_DIGIT(cur+1))
400	cur++;
401	else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
402	cur += 2;
403	else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
404	cur += 3;
405	else if ((cur == '2') && ((cur + 1) >= '0') &&
406	(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
407	cur += 3;
408	else if ((cur == '2') && ((cur + 1) == '5') &&
409	((cur + 2) >= '0') && ((cur + 1) <= '5'))
410	cur += 3;
411	else
412	return(1);
413	*str = cur;
414	return(0);
415	}
416	/**
417	* xmlParse3986Host:
418	* @uri: pointer to an URI structure
419	* @str: the string to analyze
420	*
421	* Parse an host part and fills in the appropriate fields
422	* of the @uri structure
423	*
424	* host = IP-literal / IPv4address / reg-name
425	* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
426	* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
427	* reg-name = *( unreserved / pct-encoded / sub-delims )
428	*
429	* Returns 0 or the error code
430	*/
431	static int
432	xmlParse3986Host(xmlURIPtr uri, const char **str)
433	{
434	const char cur = str;
435	const char *host;
436
437	host = cur;
438	/*
439	* IPv6 and future adressing scheme are enclosed between brackets
440	*/
441	if (*cur == '[') {
442	cur++;
443	while ((cur != ']') && (cur != 0))
444	cur++;
445	if (*cur != ']')
446	return(1);
447	cur++;
448	goto found;
449	}
450	/*
451	* try to parse an IPv4
452	*/
453	if (ISA_DIGIT(cur)) {
454	if (xmlParse3986DecOctet(&cur) != 0)
455	goto not_ipv4;
456	if (*cur != '.')
457	goto not_ipv4;
458	cur++;
459	if (xmlParse3986DecOctet(&cur) != 0)
460	goto not_ipv4;
461	if (*cur != '.')
462	goto not_ipv4;
463	if (xmlParse3986DecOctet(&cur) != 0)
464	goto not_ipv4;
465	if (*cur != '.')
466	goto not_ipv4;
467	if (xmlParse3986DecOctet(&cur) != 0)
468	goto not_ipv4;
469	goto found;
470	not_ipv4:
471	cur = *str;
472	}
473	/*
474	* then this should be a hostname which can be empty
475	*/
476	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\| ISA_SUB_DELIM(cur))
477	NEXT(cur);
478	found:
479	if (uri != NULL) {
480	if (uri->authority != NULL) xmlFree(uri->authority);
481	uri->authority = NULL;
482	if (uri->server != NULL) xmlFree(uri->server);
483	if (cur != host) {
484	if (uri->cleanup & 2)
485	uri->server = STRNDUP(host, cur - host);
486	else
487	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
488	} else
489	uri->server = NULL;
490	}
491	*str = cur;
492	return(0);
493	}
494
495	/**
496	* xmlParse3986Authority:
497	* @uri: pointer to an URI structure
498	* @str: the string to analyze
499	*
500	* Parse an authority part and fills in the appropriate fields
501	* of the @uri structure
502	*
503	* authority = [ userinfo "@" ] host [ ":" port ]
504	*
505	* Returns 0 or the error code
506	*/
507	static int
508	xmlParse3986Authority(xmlURIPtr uri, const char **str)
509	{
510	const char *cur;
511	int ret;
512
513	cur = *str;
514	/*
515	* try to parse an userinfo and check for the trailing @
516	*/
517	ret = xmlParse3986Userinfo(uri, &cur);
518	if ((ret != 0) \|\| (*cur != '@'))
519	cur = *str;
520	else
521	cur++;
522	ret = xmlParse3986Host(uri, &cur);
523	if (ret != 0) return(ret);
524	if (*cur == ':') {
525	cur++;
526	ret = xmlParse3986Port(uri, &cur);
527	if (ret != 0) return(ret);
528	}
529	*str = cur;
530	return(0);
531	}
532
533	/**
534	* xmlParse3986Segment:
535	* @str: the string to analyze
536	* @forbid: an optional forbidden character
537	* @empty: allow an empty segment
538	*
539	* Parse a segment and fills in the appropriate fields
540	* of the @uri structure
541	*
542	* segment = *pchar
543	* segment-nz = 1*pchar
544	* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
545	* ; non-zero-length segment without any colon ":"
546	*
547	* Returns 0 or the error code
548	*/
549	static int
550	xmlParse3986Segment(const char **str, char forbid, int empty)
551	{
552	const char *cur;
553
554	cur = *str;
555	if (!ISA_PCHAR(cur)) {
556	if (empty)
557	return(0);
558	return(1);
559	}
560	while (ISA_PCHAR(cur) && (*cur != forbid))
561	NEXT(cur);
562	*str = cur;
563	return (0);
564	}
565
566	/**
567	* xmlParse3986PathAbEmpty:
568	* @uri: pointer to an URI structure
569	* @str: the string to analyze
570	*
571	* Parse an path absolute or empty and fills in the appropriate fields
572	* of the @uri structure
573	*
574	* path-abempty = *( "/" segment )
575	*
576	* Returns 0 or the error code
577	*/
578	static int
579	xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
580	{
581	const char *cur;
582	int ret;
583
584	cur = *str;
585
586	while (*cur == '/') {
587	cur++;
588	ret = xmlParse3986Segment(&cur, 0, 1);
589	if (ret != 0) return(ret);
590	}
591	if (uri != NULL) {
592	if (uri->path != NULL) xmlFree(uri->path);
593	if (*str != cur) {
594	if (uri->cleanup & 2)
595	uri->path = STRNDUP(str, cur - str);
596	else
597	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
598	} else {
599	uri->path = NULL;
600	}
601	}
602	*str = cur;
603	return (0);
604	}
605
606	/**
607	* xmlParse3986PathAbsolute:
608	* @uri: pointer to an URI structure
609	* @str: the string to analyze
610	*
611	* Parse an path absolute and fills in the appropriate fields
612	* of the @uri structure
613	*
614	* path-absolute = "/" [ segment-nz *( "/" segment ) ]
615	*
616	* Returns 0 or the error code
617	*/
618	static int
619	xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
620	{
621	const char *cur;
622	int ret;
623
624	cur = *str;
625
626	if (*cur != '/')
627	return(1);
628	cur++;
629	ret = xmlParse3986Segment(&cur, 0, 0);
630	if (ret == 0) {
631	while (*cur == '/') {
632	cur++;
633	ret = xmlParse3986Segment(&cur, 0, 1);
634	if (ret != 0) return(ret);
635	}
636	}
637	if (uri != NULL) {
638	if (uri->path != NULL) xmlFree(uri->path);
639	if (cur != *str) {
640	if (uri->cleanup & 2)
641	uri->path = STRNDUP(str, cur - str);
642	else
643	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
644	} else {
645	uri->path = NULL;
646	}
647	}
648	*str = cur;
649	return (0);
650	}
651
652	/**
653	* xmlParse3986PathRootless:
654	* @uri: pointer to an URI structure
655	* @str: the string to analyze
656	*
657	* Parse an path without root and fills in the appropriate fields
658	* of the @uri structure
659	*
660	* path-rootless = segment-nz *( "/" segment )
661	*
662	* Returns 0 or the error code
663	*/
664	static int
665	xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
666	{
667	const char *cur;
668	int ret;
669
670	cur = *str;
671
672	ret = xmlParse3986Segment(&cur, 0, 0);
673	if (ret != 0) return(ret);
674	while (*cur == '/') {
675	cur++;
676	ret = xmlParse3986Segment(&cur, 0, 1);
677	if (ret != 0) return(ret);
678	}
679	if (uri != NULL) {
680	if (uri->path != NULL) xmlFree(uri->path);
681	if (cur != *str) {
682	if (uri->cleanup & 2)
683	uri->path = STRNDUP(str, cur - str);
684	else
685	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
686	} else {
687	uri->path = NULL;
688	}
689	}
690	*str = cur;
691	return (0);
692	}
693
694	/**
695	* xmlParse3986PathNoScheme:
696	* @uri: pointer to an URI structure
697	* @str: the string to analyze
698	*
699	* Parse an path which is not a scheme and fills in the appropriate fields
700	* of the @uri structure
701	*
702	* path-noscheme = segment-nz-nc *( "/" segment )
703	*
704	* Returns 0 or the error code
705	*/
706	static int
707	xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
708	{
709	const char *cur;
710	int ret;
711
712	cur = *str;
713
714	ret = xmlParse3986Segment(&cur, ':', 0);
715	if (ret != 0) return(ret);
716	while (*cur == '/') {
717	cur++;
718	ret = xmlParse3986Segment(&cur, 0, 1);
719	if (ret != 0) return(ret);
720	}
721	if (uri != NULL) {
722	if (uri->path != NULL) xmlFree(uri->path);
723	if (cur != *str) {
724	if (uri->cleanup & 2)
725	uri->path = STRNDUP(str, cur - str);
726	else
727	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
728	} else {
729	uri->path = NULL;
730	}
731	}
732	*str = cur;
733	return (0);
734	}
735
736	/**
737	* xmlParse3986HierPart:
738	* @uri: pointer to an URI structure
739	* @str: the string to analyze
740	*
741	* Parse an hierarchical part and fills in the appropriate fields
742	* of the @uri structure
743	*
744	* hier-part = "//" authority path-abempty
745	* / path-absolute
746	* / path-rootless
747	* / path-empty
748	*
749	* Returns 0 or the error code
750	*/
751	static int
752	xmlParse3986HierPart(xmlURIPtr uri, const char **str)
753	{
754	const char *cur;
755	int ret;
756
757	cur = *str;
758
759	if ((cur == '/') && ((cur + 1) == '/')) {
760	cur += 2;
761	ret = xmlParse3986Authority(uri, &cur);
762	if (ret != 0) return(ret);
763	if (uri->server == NULL)
764	uri->port = -1;
765	ret = xmlParse3986PathAbEmpty(uri, &cur);
766	if (ret != 0) return(ret);
767	*str = cur;
768	return(0);
769	} else if (*cur == '/') {
770	ret = xmlParse3986PathAbsolute(uri, &cur);
771	if (ret != 0) return(ret);
772	} else if (ISA_PCHAR(cur)) {
773	ret = xmlParse3986PathRootless(uri, &cur);
774	if (ret != 0) return(ret);
775	} else {
776	/* path-empty is effectively empty */
777	if (uri != NULL) {
778	if (uri->path != NULL) xmlFree(uri->path);
779	uri->path = NULL;
780	}
781	}
782	*str = cur;
783	return (0);
784	}
785
786	/**
787	* xmlParse3986RelativeRef:
788	* @uri: pointer to an URI structure
789	* @str: the string to analyze
790	*
791	* Parse an URI string and fills in the appropriate fields
792	* of the @uri structure
793	*
794	* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
795	* relative-part = "//" authority path-abempty
796	* / path-absolute
797	* / path-noscheme
798	* / path-empty
799	*
800	* Returns 0 or the error code
801	*/
802	static int
803	xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
804	int ret;
805
806	if ((str == '/') && ((str + 1) == '/')) {
807	str += 2;
808	ret = xmlParse3986Authority(uri, &str);
809	if (ret != 0) return(ret);
810	ret = xmlParse3986PathAbEmpty(uri, &str);
811	if (ret != 0) return(ret);
812	} else if (*str == '/') {
813	ret = xmlParse3986PathAbsolute(uri, &str);
814	if (ret != 0) return(ret);
815	} else if (ISA_PCHAR(str)) {
816	ret = xmlParse3986PathNoScheme(uri, &str);
817	if (ret != 0) return(ret);
818	} else {
819	/* path-empty is effectively empty */
820	if (uri != NULL) {
821	if (uri->path != NULL) xmlFree(uri->path);
822	uri->path = NULL;
823	}
824	}
825
826	if (*str == '?') {
827	str++;
828	ret = xmlParse3986Query(uri, &str);
829	if (ret != 0) return(ret);
830	}
831	if (*str == '#') {
832	str++;
833	ret = xmlParse3986Fragment(uri, &str);
834	if (ret != 0) return(ret);
835	}
836	if (*str != 0) {
837	xmlCleanURI(uri);
838	return(1);
839	}
840	return(0);
841	}
842
843
844	/**
845	* xmlParse3986URI:
846	* @uri: pointer to an URI structure
847	* @str: the string to analyze
848	*
849	* Parse an URI string and fills in the appropriate fields
850	* of the @uri structure
851	*
852	* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
853	*
854	* Returns 0 or the error code
855	*/
856	static int
857	xmlParse3986URI(xmlURIPtr uri, const char *str) {
858	int ret;
859
860	ret = xmlParse3986Scheme(uri, &str);
861	if (ret != 0) return(ret);
862	if (*str != ':') {
863	return(1);
864	}
865	str++;
866	ret = xmlParse3986HierPart(uri, &str);
867	if (ret != 0) return(ret);
868	if (*str == '?') {
869	str++;
870	ret = xmlParse3986Query(uri, &str);
871	if (ret != 0) return(ret);
872	}
873	if (*str == '#') {
874	str++;
875	ret = xmlParse3986Fragment(uri, &str);
876	if (ret != 0) return(ret);
877	}
878	if (*str != 0) {
879	xmlCleanURI(uri);
880	return(1);
881	}
882	return(0);
883	}
884
885	/**
886	* xmlParse3986URIReference:
887	* @uri: pointer to an URI structure
888	* @str: the string to analyze
889	*
890	* Parse an URI reference string and fills in the appropriate fields
891	* of the @uri structure
892	*
893	* URI-reference = URI / relative-ref
894	*
895	* Returns 0 or the error code
896	*/
897	static int
898	xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
899	int ret;
900
901	if (str == NULL)
902	return(-1);
903	xmlCleanURI(uri);
904
905	/*
906	* Try first to parse absolute refs, then fallback to relative if
907	* it fails.
908	*/
909	ret = xmlParse3986URI(uri, str);
910	if (ret != 0) {
911	xmlCleanURI(uri);
912	ret = xmlParse3986RelativeRef(uri, str);
913	if (ret != 0) {
914	xmlCleanURI(uri);
915	return(ret);
916	}
917	}
918	return(0);
919	}
920
921	/**
922	* xmlParseURI:
923	* @str: the URI string to analyze
924	*
925	* Parse an URI based on RFC 3986
926	*
927	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
928	*
929	* Returns a newly built xmlURIPtr or NULL in case of error
930	*/
931	xmlURIPtr
932	xmlParseURI(const char *str) {
933	xmlURIPtr uri;
934	int ret;
935
936	if (str == NULL)
937	return(NULL);
938	uri = xmlCreateURI();
939	if (uri != NULL) {
940	ret = xmlParse3986URIReference(uri, str);
941	if (ret) {
942	xmlFreeURI(uri);
943	return(NULL);
944	}
945	}
946	return(uri);
947	}
948
949	/**
950	* xmlParseURIReference:
951	* @uri: pointer to an URI structure
952	* @str: the string to analyze
953	*
954	* Parse an URI reference string based on RFC 3986 and fills in the
955	* appropriate fields of the @uri structure
956	*
957	* URI-reference = URI / relative-ref
958	*
959	* Returns 0 or the error code
960	*/
961	int
962	xmlParseURIReference(xmlURIPtr uri, const char *str) {
963	return(xmlParse3986URIReference(uri, str));
964	}
965
966	/**
967	* xmlParseURIRaw:
968	* @str: the URI string to analyze
969	* @raw: if 1 unescaping of URI pieces are disabled
970	*
971	* Parse an URI but allows to keep intact the original fragments.
972	*
973	* URI-reference = URI / relative-ref
974	*
975	* Returns a newly built xmlURIPtr or NULL in case of error
976	*/
977	xmlURIPtr
978	xmlParseURIRaw(const char *str, int raw) {
979	xmlURIPtr uri;
980	int ret;
981
982	if (str == NULL)
983	return(NULL);
984	uri = xmlCreateURI();
985	if (uri != NULL) {
986	if (raw) {
987	uri->cleanup \|= 2;
988	}
989	ret = xmlParseURIReference(uri, str);
990	if (ret) {
991	xmlFreeURI(uri);
992	return(NULL);
993	}
994	}
995	return(uri);
996	}
997
998	/************************************************************************
999	* *
1000	* Generic URI structure functions *
1001	* *
1002	************************************************************************/
1003
1004	/**
1005	* xmlCreateURI:
1006	*
1007	* Simply creates an empty xmlURI
1008	*
1009	* Returns the new structure or NULL in case of error
1010	*/
1011	xmlURIPtr
1012	xmlCreateURI(void) {
1013	xmlURIPtr ret;
1014
1015	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1016	if (ret == NULL) {
1017	xmlURIErrMemory("creating URI structure\n");
1018	return(NULL);
1019	}
1020	memset(ret, 0, sizeof(xmlURI));
1021	return(ret);
1022	}
1023
1024	/**
1025	* xmlSaveUriRealloc:
1026	*
1027	* Function to handle properly a reallocation when saving an URI
1028	* Also imposes some limit on the length of an URI string output
1029	*/
1030	static xmlChar *
1031	xmlSaveUriRealloc(xmlChar ret, int max) {
1032	xmlChar *temp;
1033	int tmp;
1034
1035	if (*max > MAX_URI_LENGTH) {
1036	xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1037	return(NULL);
1038	}
1039	tmp = max 2;
1040	temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1041	if (temp == NULL) {
1042	xmlURIErrMemory("saving URI\n");
1043	return(NULL);
1044	}
1045	*max = tmp;
1046	return(temp);
1047	}
1048
1049	/**
1050	* xmlSaveUri:
1051	* @uri: pointer to an xmlURI
1052	*
1053	* Save the URI as an escaped string
1054	*
1055	* Returns a new string (to be deallocated by caller)
1056	*/
1057	xmlChar *
1058	xmlSaveUri(xmlURIPtr uri) {
1059	xmlChar *ret = NULL;
1060	xmlChar *temp;
1061	const char *p;
1062	int len;
1063	int max;
1064
1065	if (uri == NULL) return(NULL);
1066
1067
1068	max = 80;
1069	ret = (xmlChar ) xmlMallocAtomic((max + 1) sizeof(xmlChar));
1070	if (ret == NULL) {
1071	xmlURIErrMemory("saving URI\n");
1072	return(NULL);
1073	}
1074	len = 0;
1075
1076	if (uri->scheme != NULL) {
1077	p = uri->scheme;
1078	while (*p != 0) {
1079	if (len >= max) {
1080	temp = xmlSaveUriRealloc(ret, &max);
1081	if (temp == NULL) goto mem_error;
1082	ret = temp;
1083	}
1084	ret[len++] = *p++;
1085	}
1086	if (len >= max) {
1087	temp = xmlSaveUriRealloc(ret, &max);
1088	if (temp == NULL) goto mem_error;
1089	ret = temp;
1090	}
1091	ret[len++] = ':';
1092	}
1093	if (uri->opaque != NULL) {
1094	p = uri->opaque;
1095	while (*p != 0) {
1096	if (len + 3 >= max) {
1097	temp = xmlSaveUriRealloc(ret, &max);
1098	if (temp == NULL) goto mem_error;
1099	ret = temp;
1100	}
1101	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
1102	ret[len++] = *p++;
1103	else {
1104	int val = (unsigned char )p++;
1105	int hi = val / 0x10, lo = val % 0x10;
1106	ret[len++] = '%';
1107	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1108	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1109	}
1110	}
1111	} else {
1112	if ((uri->server != NULL) \|\| (uri->port == -1)) {
1113	if (len + 3 >= max) {
1114	temp = xmlSaveUriRealloc(ret, &max);
1115	if (temp == NULL) goto mem_error;
1116	ret = temp;
1117	}
1118	ret[len++] = '/';
1119	ret[len++] = '/';
1120	if (uri->user != NULL) {
1121	p = uri->user;
1122	while (*p != 0) {
1123	if (len + 3 >= max) {
1124	temp = xmlSaveUriRealloc(ret, &max);
1125	if (temp == NULL) goto mem_error;
1126	ret = temp;
1127	}
1128	if ((IS_UNRESERVED(*(p))) \|\|
1129	(((p) == ';')) \|\| (((p) == ':')) \|\|
1130	(((p) == '&')) \|\| (((p) == '=')) \|\|
1131	(((p) == '+')) \|\| (((p) == '$')) \|\|
1132	((*(p) == ',')))
1133	ret[len++] = *p++;
1134	else {
1135	int val = (unsigned char )p++;
1136	int hi = val / 0x10, lo = val % 0x10;
1137	ret[len++] = '%';
1138	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1139	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1140	}
1141	}
1142	if (len + 3 >= max) {
1143	temp = xmlSaveUriRealloc(ret, &max);
1144	if (temp == NULL) goto mem_error;
1145	ret = temp;
1146	}
1147	ret[len++] = '@';
1148	}
1149	if (uri->server != NULL) {
1150	p = uri->server;
1151	while (*p != 0) {
1152	if (len >= max) {
1153	temp = xmlSaveUriRealloc(ret, &max);
1154	if (temp == NULL) goto mem_error;
1155	ret = temp;
1156	}
1157	ret[len++] = *p++;
1158	}
1159	if (uri->port > 0) {
1160	if (len + 10 >= max) {
1161	temp = xmlSaveUriRealloc(ret, &max);
1162	if (temp == NULL) goto mem_error;
1163	ret = temp;
1164	}
1165	len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1166	}
1167	}
1168	} else if (uri->authority != NULL) {
1169	if (len + 3 >= max) {
1170	temp = xmlSaveUriRealloc(ret, &max);
1171	if (temp == NULL) goto mem_error;
1172	ret = temp;
1173	}
1174	ret[len++] = '/';
1175	ret[len++] = '/';
1176	p = uri->authority;
1177	while (*p != 0) {
1178	if (len + 3 >= max) {
1179	temp = xmlSaveUriRealloc(ret, &max);
1180	if (temp == NULL) goto mem_error;
1181	ret = temp;
1182	}
1183	if ((IS_UNRESERVED(*(p))) \|\|
1184	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
1185	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1186	(((p) == '=')) \|\| (((p) == '+')))
1187	ret[len++] = *p++;
1188	else {
1189	int val = (unsigned char )p++;
1190	int hi = val / 0x10, lo = val % 0x10;
1191	ret[len++] = '%';
1192	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1193	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1194	}
1195	}
1196	} else if (uri->scheme != NULL) {
1197	if (len + 3 >= max) {
1198	temp = xmlSaveUriRealloc(ret, &max);
1199	if (temp == NULL) goto mem_error;
1200	ret = temp;
1201	}
1202	}
1203	if (uri->path != NULL) {
1204	p = uri->path;
1205	/*
1206	* the colon in file:///d: should not be escaped or
1207	* Windows accesses fail later.
1208	*/
1209	if ((uri->scheme != NULL) &&
1210	(p[0] == '/') &&
1211	(((p[1] >= 'a') && (p[1] <= 'z')) \|\|
1212	((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1213	(p[2] == ':') &&
1214	(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1215	if (len + 3 >= max) {
1216	temp = xmlSaveUriRealloc(ret, &max);
1217	if (temp == NULL) goto mem_error;
1218	ret = temp;
1219	}
1220	ret[len++] = *p++;
1221	ret[len++] = *p++;
1222	ret[len++] = *p++;
1223	}
1224	while (*p != 0) {
1225	if (len + 3 >= max) {
1226	temp = xmlSaveUriRealloc(ret, &max);
1227	if (temp == NULL) goto mem_error;
1228	ret = temp;
1229	}
1230	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
1231	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1232	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
1233	((*(p) == ',')))
1234	ret[len++] = *p++;
1235	else {
1236	int val = (unsigned char )p++;
1237	int hi = val / 0x10, lo = val % 0x10;
1238	ret[len++] = '%';
1239	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1240	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1241	}
1242	}
1243	}
1244	if (uri->query_raw != NULL) {
1245	if (len + 1 >= max) {
1246	temp = xmlSaveUriRealloc(ret, &max);
1247	if (temp == NULL) goto mem_error;
1248	ret = temp;
1249	}
1250	ret[len++] = '?';
1251	p = uri->query_raw;
1252	while (*p != 0) {
1253	if (len + 1 >= max) {
1254	temp = xmlSaveUriRealloc(ret, &max);
1255	if (temp == NULL) goto mem_error;
1256	ret = temp;
1257	}
1258	ret[len++] = *p++;
1259	}
1260	} else if (uri->query != NULL) {
1261	if (len + 3 >= max) {
1262	temp = xmlSaveUriRealloc(ret, &max);
1263	if (temp == NULL) goto mem_error;
1264	ret = temp;
1265	}
1266	ret[len++] = '?';
1267	p = uri->query;
1268	while (*p != 0) {
1269	if (len + 3 >= max) {
1270	temp = xmlSaveUriRealloc(ret, &max);
1271	if (temp == NULL) goto mem_error;
1272	ret = temp;
1273	}
1274	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1275	ret[len++] = *p++;
1276	else {
1277	int val = (unsigned char )p++;
1278	int hi = val / 0x10, lo = val % 0x10;
1279	ret[len++] = '%';
1280	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1281	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1282	}
1283	}
1284	}
1285	}
1286	if (uri->fragment != NULL) {
1287	if (len + 3 >= max) {
1288	temp = xmlSaveUriRealloc(ret, &max);
1289	if (temp == NULL) goto mem_error;
1290	ret = temp;
1291	}
1292	ret[len++] = '#';
1293	p = uri->fragment;
1294	while (*p != 0) {
1295	if (len + 3 >= max) {
1296	temp = xmlSaveUriRealloc(ret, &max);
1297	if (temp == NULL) goto mem_error;
1298	ret = temp;
1299	}
1300	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1301	ret[len++] = *p++;
1302	else {
1303	int val = (unsigned char )p++;
1304	int hi = val / 0x10, lo = val % 0x10;
1305	ret[len++] = '%';
1306	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1307	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1308	}
1309	}
1310	}
1311	if (len >= max) {
1312	temp = xmlSaveUriRealloc(ret, &max);
1313	if (temp == NULL) goto mem_error;
1314	ret = temp;
1315	}
1316	ret[len] = 0;
1317	return(ret);
1318
1319	mem_error:
1320	xmlFree(ret);
1321	return(NULL);
1322	}
1323
1324	/**
1325	* xmlPrintURI:
1326	* @stream: a FILE* for the output
1327	* @uri: pointer to an xmlURI
1328	*
1329	* Prints the URI in the stream @stream.
1330	*/
1331	void
1332	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1333	xmlChar *out;
1334
1335	out = xmlSaveUri(uri);
1336	if (out != NULL) {
1337	fprintf(stream, "%s", (char *) out);
1338	xmlFree(out);
1339	}
1340	}
1341
1342	/**
1343	* xmlCleanURI:
1344	* @uri: pointer to an xmlURI
1345	*
1346	* Make sure the xmlURI struct is free of content
1347	*/
1348	static void
1349	xmlCleanURI(xmlURIPtr uri) {
1350	if (uri == NULL) return;
1351
1352	if (uri->scheme != NULL) xmlFree(uri->scheme);
1353	uri->scheme = NULL;
1354	if (uri->server != NULL) xmlFree(uri->server);
1355	uri->server = NULL;
1356	if (uri->user != NULL) xmlFree(uri->user);
1357	uri->user = NULL;
1358	if (uri->path != NULL) xmlFree(uri->path);
1359	uri->path = NULL;
1360	if (uri->fragment != NULL) xmlFree(uri->fragment);
1361	uri->fragment = NULL;
1362	if (uri->opaque != NULL) xmlFree(uri->opaque);
1363	uri->opaque = NULL;
1364	if (uri->authority != NULL) xmlFree(uri->authority);
1365	uri->authority = NULL;
1366	if (uri->query != NULL) xmlFree(uri->query);
1367	uri->query = NULL;
1368	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1369	uri->query_raw = NULL;
1370	}
1371
1372	/**
1373	* xmlFreeURI:
1374	* @uri: pointer to an xmlURI
1375	*
1376	* Free up the xmlURI struct
1377	*/
1378	void
1379	xmlFreeURI(xmlURIPtr uri) {
1380	if (uri == NULL) return;
1381
1382	if (uri->scheme != NULL) xmlFree(uri->scheme);
1383	if (uri->server != NULL) xmlFree(uri->server);
1384	if (uri->user != NULL) xmlFree(uri->user);
1385	if (uri->path != NULL) xmlFree(uri->path);
1386	if (uri->fragment != NULL) xmlFree(uri->fragment);
1387	if (uri->opaque != NULL) xmlFree(uri->opaque);
1388	if (uri->authority != NULL) xmlFree(uri->authority);
1389	if (uri->query != NULL) xmlFree(uri->query);
1390	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1391	xmlFree(uri);
1392	}
1393
1394	/************************************************************************
1395	* *
1396	* Helper functions *
1397	* *
1398	************************************************************************/
1399
1400	/**
1401	* xmlNormalizeURIPath:
1402	* @path: pointer to the path string
1403	*
1404	* Applies the 5 normalization steps to a path string--that is, RFC 2396
1405	* Section 5.2, steps 6.c through 6.g.
1406	*
1407	* Normalization occurs directly on the string, no new allocation is done
1408	*
1409	* Returns 0 or an error code
1410	*/
1411	int
1412	xmlNormalizeURIPath(char *path) {
1413	char cur, out;
1414
1415	if (path == NULL)
1416	return(-1);
1417
1418	/* Skip all initial "/" chars. We want to get to the beginning of the
1419	* first non-empty segment.
1420	*/
1421	cur = path;
1422	while (cur[0] == '/')
1423	++cur;
1424	if (cur[0] == '\0')
1425	return(0);
1426
1427	/* Keep everything we've seen so far. */
1428	out = cur;
1429
1430	/*
1431	* Analyze each segment in sequence for cases (c) and (d).
1432	*/
1433	while (cur[0] != '\0') {
1434	/*
1435	* c) All occurrences of "./", where "." is a complete path segment,
1436	* are removed from the buffer string.
1437	*/
1438	if ((cur[0] == '.') && (cur[1] == '/')) {
1439	cur += 2;
1440	/* '//' normalization should be done at this point too */
1441	while (cur[0] == '/')
1442	cur++;
1443	continue;
1444	}
1445
1446	/*
1447	* d) If the buffer string ends with "." as a complete path segment,
1448	* that "." is removed.
1449	*/
1450	if ((cur[0] == '.') && (cur[1] == '\0'))
1451	break;
1452
1453	/* Otherwise keep the segment. */
1454	while (cur[0] != '/') {
1455	if (cur[0] == '\0')
1456	goto done_cd;
1457	(out++)[0] = (cur++)[0];
1458	}
1459	/* nomalize // */
1460	while ((cur[0] == '/') && (cur[1] == '/'))
1461	cur++;
1462
1463	(out++)[0] = (cur++)[0];
1464	}
1465	done_cd:
1466	out[0] = '\0';
1467
1468	/* Reset to the beginning of the first segment for the next sequence. */
1469	cur = path;
1470	while (cur[0] == '/')
1471	++cur;
1472	if (cur[0] == '\0')
1473	return(0);
1474
1475	/*
1476	* Analyze each segment in sequence for cases (e) and (f).
1477	*
1478	* e) All occurrences of "<segment>/../", where <segment> is a
1479	* complete path segment not equal to "..", are removed from the
1480	* buffer string. Removal of these path segments is performed
1481	* iteratively, removing the leftmost matching pattern on each
1482	* iteration, until no matching pattern remains.
1483	*
1484	* f) If the buffer string ends with "<segment>/..", where <segment>
1485	* is a complete path segment not equal to "..", that
1486	* "<segment>/.." is removed.
1487	*
1488	* To satisfy the "iterative" clause in (e), we need to collapse the
1489	* string every time we find something that needs to be removed. Thus,
1490	* we don't need to keep two pointers into the string: we only need a
1491	* "current position" pointer.
1492	*/
1493	while (1) {
1494	char segp, tmp;
1495
1496	/* At the beginning of each iteration of this loop, "cur" points to
1497	* the first character of the segment we want to examine.
1498	*/
1499
1500	/* Find the end of the current segment. */
1501	segp = cur;
1502	while ((segp[0] != '/') && (segp[0] != '\0'))
1503	++segp;
1504
1505	/* If this is the last segment, we're done (we need at least two
1506	* segments to meet the criteria for the (e) and (f) cases).
1507	*/
1508	if (segp[0] == '\0')
1509	break;
1510
1511	/* If the first segment is "..", or if the next segment _isn't_ "..",
1512	* keep this segment and try the next one.
1513	*/
1514	++segp;
1515	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1516	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
1517	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
1518	cur = segp;
1519	continue;
1520	}
1521
1522	/* If we get here, remove this segment and the next one and back up
1523	* to the previous segment (if there is one), to implement the
1524	* "iteratively" clause. It's pretty much impossible to back up
1525	* while maintaining two pointers into the buffer, so just compact
1526	* the whole buffer now.
1527	*/
1528
1529	/* If this is the end of the buffer, we're done. */
1530	if (segp[2] == '\0') {
1531	cur[0] = '\0';
1532	break;
1533	}
1534	/* Valgrind complained, strcpy(cur, segp + 3); */
1535	/* string will overlap, do not use strcpy */
1536	tmp = cur;
1537	segp += 3;
1538	while ((tmp++ = segp++) != 0)
1539	;
1540
1541	/* If there are no previous segments, then keep going from here. */
1542	segp = cur;
1543	while ((segp > path) && ((--segp)[0] == '/'))
1544	;
1545	if (segp == path)
1546	continue;
1547
1548	/* "segp" is pointing to the end of a previous segment; find it's
1549	* start. We need to back up to the previous segment and start
1550	* over with that to handle things like "foo/bar/../..". If we
1551	* don't do this, then on the first pass we'll remove the "bar/..",
1552	* but be pointing at the second ".." so we won't realize we can also
1553	* remove the "foo/..".
1554	*/
1555	cur = segp;
1556	while ((cur > path) && (cur[-1] != '/'))
1557	--cur;
1558	}
1559	out[0] = '\0';
1560
1561	/*
1562	* g) If the resulting buffer string still begins with one or more
1563	* complete path segments of "..", then the reference is
1564	* considered to be in error. Implementations may handle this
1565	* error by retaining these components in the resolved path (i.e.,
1566	* treating them as part of the final URI), by removing them from
1567	* the resolved path (i.e., discarding relative levels above the
1568	* root), or by avoiding traversal of the reference.
1569	*
1570	* We discard them from the final path.
1571	*/
1572	if (path[0] == '/') {
1573	cur = path;
1574	while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1575	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
1576	cur += 3;
1577
1578	if (cur != path) {
1579	out = path;
1580	while (cur[0] != '\0')
1581	(out++)[0] = (cur++)[0];
1582	out[0] = 0;
1583	}
1584	}
1585
1586	return(0);
1587	}
1588
1589	static int is_hex(char c) {
1590	if (((c >= '0') && (c <= '9')) \|\|
1591	((c >= 'a') && (c <= 'f')) \|\|
1592	((c >= 'A') && (c <= 'F')))
1593	return(1);
1594	return(0);
1595	}
1596
1597	/**
1598	* xmlURIUnescapeString:
1599	* @str: the string to unescape
1600	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
1601	* @target: optional destination buffer
1602	*
1603	* Unescaping routine, but does not check that the string is an URI. The
1604	* output is a direct unsigned char translation of %XX values (no encoding)
1605	* Note that the length of the result can only be smaller or same size as
1606	* the input string.
1607	*
1608	* Returns a copy of the string, but unescaped, will return NULL only in case
1609	* of error
1610	*/
1611	char *
1612	xmlURIUnescapeString(const char str, int len, char target) {
1613	char ret, out;
1614	const char *in;
1615
1616	if (str == NULL)
1617	return(NULL);
1618	if (len <= 0) len = strlen(str);
1619	if (len < 0) return(NULL);
1620
1621	if (target == NULL) {
1622	ret = (char *) xmlMallocAtomic(len + 1);
1623	if (ret == NULL) {
1624	xmlURIErrMemory("unescaping URI value\n");
1625	return(NULL);
1626	}
1627	} else
1628	ret = target;
1629	in = str;
1630	out = ret;
1631	while(len > 0) {
1632	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1633	in++;
1634	if ((in >= '0') && (in <= '9'))
1635	out = (in - '0');
1636	else if ((in >= 'a') && (in <= 'f'))
1637	out = (in - 'a') + 10;
1638	else if ((in >= 'A') && (in <= 'F'))
1639	out = (in - 'A') + 10;
1640	in++;
1641	if ((in >= '0') && (in <= '9'))
1642	out = out * 16 + (*in - '0');
1643	else if ((in >= 'a') && (in <= 'f'))
1644	out = out * 16 + (*in - 'a') + 10;
1645	else if ((in >= 'A') && (in <= 'F'))
1646	out = out * 16 + (*in - 'A') + 10;
1647	in++;
1648	len -= 3;
1649	out++;
1650	} else {
1651	out++ = in++;
1652	len--;
1653	}
1654	}
1655	*out = 0;
1656	return(ret);
1657	}
1658
1659	/**
1660	* xmlURIEscapeStr:
1661	* @str: string to escape
1662	* @list: exception list string of chars not to escape
1663	*
1664	* This routine escapes a string to hex, ignoring reserved characters (a-z)
1665	* and the characters in the exception list.
1666	*
1667	* Returns a new escaped string or NULL in case of error.
1668	*/
1669	xmlChar *
1670	xmlURIEscapeStr(const xmlChar str, const xmlChar list) {
1671	xmlChar *ret, ch;
1672	xmlChar *temp;
1673	const xmlChar *in;
1674	int len, out;
1675
1676	if (str == NULL)
1677	return(NULL);
1678	if (str[0] == 0)
1679	return(xmlStrdup(str));
1680	len = xmlStrlen(str);
1681	if (!(len > 0)) return(NULL);
1682
1683	len += 20;
1684	ret = (xmlChar *) xmlMallocAtomic(len);
1685	if (ret == NULL) {
1686	xmlURIErrMemory("escaping URI value\n");
1687	return(NULL);
1688	}
1689	in = (const xmlChar *) str;
1690	out = 0;
1691	while(*in != 0) {
1692	if (len - out <= 3) {
1693	temp = xmlSaveUriRealloc(ret, &len);
1694	if (temp == NULL) {
1695	xmlURIErrMemory("escaping URI value\n");
1696	xmlFree(ret);
1697	return(NULL);
1698	}
1699	ret = temp;
1700	}
1701
1702	ch = *in;
1703
1704	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1705	unsigned char val;
1706	ret[out++] = '%';
1707	val = ch >> 4;
1708	if (val <= 9)
1709	ret[out++] = '0' + val;
1710	else
1711	ret[out++] = 'A' + val - 0xA;
1712	val = ch & 0xF;
1713	if (val <= 9)
1714	ret[out++] = '0' + val;
1715	else
1716	ret[out++] = 'A' + val - 0xA;
1717	in++;
1718	} else {
1719	ret[out++] = *in++;
1720	}
1721
1722	}
1723	ret[out] = 0;
1724	return(ret);
1725	}
1726
1727	/**
1728	* xmlURIEscape:
1729	* @str: the string of the URI to escape
1730	*
1731	* Escaping routine, does not do validity checks !
1732	* It will try to escape the chars needing this, but this is heuristic
1733	* based it's impossible to be sure.
1734	*
1735	* Returns an copy of the string, but escaped
1736	*
1737	* 25 May 2001
1738	* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1739	* according to RFC2396.
1740	* - Carl Douglas
1741	*/
1742	xmlChar *
1743	xmlURIEscape(const xmlChar * str)
1744	{
1745	xmlChar ret, segment = NULL;
1746	xmlURIPtr uri;
1747	int ret2;
1748
1749	#define NULLCHK(p) if(!p) { \
1750	xmlURIErrMemory("escaping URI value\n"); \
1751	xmlFreeURI(uri); \
1752	return NULL; } \
1753
1754	if (str == NULL)
1755	return (NULL);
1756
1757	uri = xmlCreateURI();
1758	if (uri != NULL) {
1759	/*
1760	* Allow escaping errors in the unescaped form
1761	*/
1762	uri->cleanup = 1;
1763	ret2 = xmlParseURIReference(uri, (const char *)str);
1764	if (ret2) {
1765	xmlFreeURI(uri);
1766	return (NULL);
1767	}
1768	}
1769
1770	if (!uri)
1771	return NULL;
1772
1773	ret = NULL;
1774
1775	if (uri->scheme) {
1776	segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1777	NULLCHK(segment)
1778	ret = xmlStrcat(ret, segment);
1779	ret = xmlStrcat(ret, BAD_CAST ":");
1780	xmlFree(segment);
1781	}
1782
1783	if (uri->authority) {
1784	segment =
1785	xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1786	NULLCHK(segment)
1787	ret = xmlStrcat(ret, BAD_CAST "//");
1788	ret = xmlStrcat(ret, segment);
1789	xmlFree(segment);
1790	}
1791
1792	if (uri->user) {
1793	segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1794	NULLCHK(segment)
1795	ret = xmlStrcat(ret,BAD_CAST "//");
1796	ret = xmlStrcat(ret, segment);
1797	ret = xmlStrcat(ret, BAD_CAST "@");
1798	xmlFree(segment);
1799	}
1800
1801	if (uri->server) {
1802	segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1803	NULLCHK(segment)
1804	if (uri->user == NULL)
1805	ret = xmlStrcat(ret, BAD_CAST "//");
1806	ret = xmlStrcat(ret, segment);
1807	xmlFree(segment);
1808	}
1809
1810	if (uri->port) {
1811	xmlChar port[10];
1812
1813	snprintf((char *) port, 10, "%d", uri->port);
1814	ret = xmlStrcat(ret, BAD_CAST ":");
1815	ret = xmlStrcat(ret, port);
1816	}
1817
1818	if (uri->path) {
1819	segment =
1820	xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1821	NULLCHK(segment)
1822	ret = xmlStrcat(ret, segment);
1823	xmlFree(segment);
1824	}
1825
1826	if (uri->query_raw) {
1827	ret = xmlStrcat(ret, BAD_CAST "?");
1828	ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1829	}
1830	else if (uri->query) {
1831	segment =
1832	xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1833	NULLCHK(segment)
1834	ret = xmlStrcat(ret, BAD_CAST "?");
1835	ret = xmlStrcat(ret, segment);
1836	xmlFree(segment);
1837	}
1838
1839	if (uri->opaque) {
1840	segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1841	NULLCHK(segment)
1842	ret = xmlStrcat(ret, segment);
1843	xmlFree(segment);
1844	}
1845
1846	if (uri->fragment) {
1847	segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1848	NULLCHK(segment)
1849	ret = xmlStrcat(ret, BAD_CAST "#");
1850	ret = xmlStrcat(ret, segment);
1851	xmlFree(segment);
1852	}
1853
1854	xmlFreeURI(uri);
1855	#undef NULLCHK
1856
1857	return (ret);
1858	}
1859
1860	/************************************************************************
1861	* *
1862	* Public functions *
1863	* *
1864	************************************************************************/
1865
1866	/**
1867	* xmlBuildURI:
1868	* @URI: the URI instance found in the document
1869	* @base: the base value
1870	*
1871	* Computes he final URI of the reference done by checking that
1872	* the given URI is valid, and building the final URI using the
1873	* base URI. This is processed according to section 5.2 of the
1874	* RFC 2396
1875	*
1876	* 5.2. Resolving Relative References to Absolute Form
1877	*
1878	* Returns a new URI string (to be freed by the caller) or NULL in case
1879	* of error.
1880	*/
1881	xmlChar *
1882	xmlBuildURI(const xmlChar URI, const xmlChar base) {
1883	xmlChar *val = NULL;
1884	int ret, len, indx, cur, out;
1885	xmlURIPtr ref = NULL;
1886	xmlURIPtr bas = NULL;
1887	xmlURIPtr res = NULL;
1888
1889	/*
1890	* 1) The URI reference is parsed into the potential four components and
1891	* fragment identifier, as described in Section 4.3.
1892	*
1893	* NOTE that a completely empty URI is treated by modern browsers
1894	* as a reference to "." rather than as a synonym for the current
1895	* URI. Should we do that here?
1896	*/
1897	if (URI == NULL)
1898	ret = -1;
1899	else {
1900	if (*URI) {
1901	ref = xmlCreateURI();
1902	if (ref == NULL)
1903	goto done;
1904	ret = xmlParseURIReference(ref, (const char *) URI);
1905	}
1906	else
1907	ret = 0;
1908	}
1909	if (ret != 0)
1910	goto done;
1911	if ((ref != NULL) && (ref->scheme != NULL)) {
1912	/*
1913	* The URI is absolute don't modify.
1914	*/
1915	val = xmlStrdup(URI);
1916	goto done;
1917	}
1918	if (base == NULL)
1919	ret = -1;
1920	else {
1921	bas = xmlCreateURI();
1922	if (bas == NULL)
1923	goto done;
1924	ret = xmlParseURIReference(bas, (const char *) base);
1925	}
1926	if (ret != 0) {
1927	if (ref)
1928	val = xmlSaveUri(ref);
1929	goto done;
1930	}
1931	if (ref == NULL) {
1932	/*
1933	* the base fragment must be ignored
1934	*/
1935	if (bas->fragment != NULL) {
1936	xmlFree(bas->fragment);
1937	bas->fragment = NULL;
1938	}
1939	val = xmlSaveUri(bas);
1940	goto done;
1941	}
1942
1943	/*
1944	* 2) If the path component is empty and the scheme, authority, and
1945	* query components are undefined, then it is a reference to the
1946	* current document and we are done. Otherwise, the reference URI's
1947	* query and fragment components are defined as found (or not found)
1948	* within the URI reference and not inherited from the base URI.
1949	*
1950	* NOTE that in modern browsers, the parsing differs from the above
1951	* in the following aspect: the query component is allowed to be
1952	* defined while still treating this as a reference to the current
1953	* document.
1954	*/
1955	res = xmlCreateURI();
1956	if (res == NULL)
1957	goto done;
1958	if ((ref->scheme == NULL) && (ref->path == NULL) &&
1959	((ref->authority == NULL) && (ref->server == NULL))) {
1960	if (bas->scheme != NULL)
1961	res->scheme = xmlMemStrdup(bas->scheme);
1962	if (bas->authority != NULL)
1963	res->authority = xmlMemStrdup(bas->authority);
1964	else if (bas->server != NULL) {
1965	res->server = xmlMemStrdup(bas->server);
1966	if (bas->user != NULL)
1967	res->user = xmlMemStrdup(bas->user);
1968	res->port = bas->port;
1969	}
1970	if (bas->path != NULL)
1971	res->path = xmlMemStrdup(bas->path);
1972	if (ref->query_raw != NULL)
1973	res->query_raw = xmlMemStrdup (ref->query_raw);
1974	else if (ref->query != NULL)
1975	res->query = xmlMemStrdup(ref->query);
1976	else if (bas->query_raw != NULL)
1977	res->query_raw = xmlMemStrdup(bas->query_raw);
1978	else if (bas->query != NULL)
1979	res->query = xmlMemStrdup(bas->query);
1980	if (ref->fragment != NULL)
1981	res->fragment = xmlMemStrdup(ref->fragment);
1982	goto step_7;
1983	}
1984
1985	/*
1986	* 3) If the scheme component is defined, indicating that the reference
1987	* starts with a scheme name, then the reference is interpreted as an
1988	* absolute URI and we are done. Otherwise, the reference URI's
1989	* scheme is inherited from the base URI's scheme component.
1990	*/
1991	if (ref->scheme != NULL) {
1992	val = xmlSaveUri(ref);
1993	goto done;
1994	}
1995	if (bas->scheme != NULL)
1996	res->scheme = xmlMemStrdup(bas->scheme);
1997
1998	if (ref->query_raw != NULL)
1999	res->query_raw = xmlMemStrdup(ref->query_raw);
2000	else if (ref->query != NULL)
2001	res->query = xmlMemStrdup(ref->query);
2002	if (ref->fragment != NULL)
2003	res->fragment = xmlMemStrdup(ref->fragment);
2004
2005	/*
2006	* 4) If the authority component is defined, then the reference is a
2007	* network-path and we skip to step 7. Otherwise, the reference
2008	* URI's authority is inherited from the base URI's authority
2009	* component, which will also be undefined if the URI scheme does not
2010	* use an authority component.
2011	*/
2012	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
2013	if (ref->authority != NULL)
2014	res->authority = xmlMemStrdup(ref->authority);
2015	else {
2016	res->server = xmlMemStrdup(ref->server);
2017	if (ref->user != NULL)
2018	res->user = xmlMemStrdup(ref->user);
2019	res->port = ref->port;
2020	}
2021	if (ref->path != NULL)
2022	res->path = xmlMemStrdup(ref->path);
2023	goto step_7;
2024	}
2025	if (bas->authority != NULL)
2026	res->authority = xmlMemStrdup(bas->authority);
2027	else if (bas->server != NULL) {
2028	res->server = xmlMemStrdup(bas->server);
2029	if (bas->user != NULL)
2030	res->user = xmlMemStrdup(bas->user);
2031	res->port = bas->port;
2032	}
2033
2034	/*
2035	* 5) If the path component begins with a slash character ("/"), then
2036	* the reference is an absolute-path and we skip to step 7.
2037	*/
2038	if ((ref->path != NULL) && (ref->path[0] == '/')) {
2039	res->path = xmlMemStrdup(ref->path);
2040	goto step_7;
2041	}
2042
2043
2044	/*
2045	* 6) If this step is reached, then we are resolving a relative-path
2046	* reference. The relative path needs to be merged with the base
2047	* URI's path. Although there are many ways to do this, we will
2048	* describe a simple method using a separate string buffer.
2049	*
2050	* Allocate a buffer large enough for the result string.
2051	*/
2052	len = 2; /* extra / and 0 */
2053	if (ref->path != NULL)
2054	len += strlen(ref->path);
2055	if (bas->path != NULL)
2056	len += strlen(bas->path);
2057	res->path = (char *) xmlMallocAtomic(len);
2058	if (res->path == NULL) {
2059	xmlURIErrMemory("resolving URI against base\n");
2060	goto done;
2061	}
2062	res->path[0] = 0;
2063
2064	/*
2065	* a) All but the last segment of the base URI's path component is
2066	* copied to the buffer. In other words, any characters after the
2067	* last (right-most) slash character, if any, are excluded.
2068	*/
2069	cur = 0;
2070	out = 0;
2071	if (bas->path != NULL) {
2072	while (bas->path[cur] != 0) {
2073	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2074	cur++;
2075	if (bas->path[cur] == 0)
2076	break;
2077
2078	cur++;
2079	while (out < cur) {
2080	res->path[out] = bas->path[out];
2081	out++;
2082	}
2083	}
2084	}
2085	res->path[out] = 0;
2086
2087	/*
2088	* b) The reference's path component is appended to the buffer
2089	* string.
2090	*/
2091	if (ref->path != NULL && ref->path[0] != 0) {
2092	indx = 0;
2093	/*
2094	* Ensure the path includes a '/'
2095	*/
2096	if ((out == 0) && (bas->server != NULL))
2097	res->path[out++] = '/';
2098	while (ref->path[indx] != 0) {
2099	res->path[out++] = ref->path[indx++];
2100	}
2101	}
2102	res->path[out] = 0;
2103
2104	/*
2105	* Steps c) to h) are really path normalization steps
2106	*/
2107	xmlNormalizeURIPath(res->path);
2108
2109	step_7:
2110
2111	/*
2112	* 7) The resulting URI components, including any inherited from the
2113	* base URI, are recombined to give the absolute form of the URI
2114	* reference.
2115	*/
2116	val = xmlSaveUri(res);
2117
2118	done:
2119	if (ref != NULL)
2120	xmlFreeURI(ref);
2121	if (bas != NULL)
2122	xmlFreeURI(bas);
2123	if (res != NULL)
2124	xmlFreeURI(res);
2125	return(val);
2126	}
2127
2128	/**
2129	* xmlBuildRelativeURI:
2130	* @URI: the URI reference under consideration
2131	* @base: the base value
2132	*
2133	* Expresses the URI of the reference in terms relative to the
2134	* base. Some examples of this operation include:
2135	* base = "http://site1.com/docs/book1.html"
2136	* URI input URI returned
2137	* docs/pic1.gif pic1.gif
2138	* docs/img/pic1.gif img/pic1.gif
2139	* img/pic1.gif ../img/pic1.gif
2140	* http://site1.com/docs/pic1.gif pic1.gif
2141	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2142	*
2143	* base = "docs/book1.html"
2144	* URI input URI returned
2145	* docs/pic1.gif pic1.gif
2146	* docs/img/pic1.gif img/pic1.gif
2147	* img/pic1.gif ../img/pic1.gif
2148	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2149	*
2150	*
2151	* Note: if the URI reference is really wierd or complicated, it may be
2152	* worthwhile to first convert it into a "nice" one by calling
2153	* xmlBuildURI (using 'base') before calling this routine,
2154	* since this routine (for reasonable efficiency) assumes URI has
2155	* already been through some validation.
2156	*
2157	* Returns a new URI string (to be freed by the caller) or NULL in case
2158	* error.
2159	*/
2160	xmlChar *
2161	xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2162	{
2163	xmlChar *val = NULL;
2164	int ret;
2165	int ix;
2166	int pos = 0;
2167	int nbslash = 0;
2168	int len;
2169	xmlURIPtr ref = NULL;
2170	xmlURIPtr bas = NULL;
2171	xmlChar bptr, uptr, *vptr;
2172	int remove_path = 0;
2173
2174	if ((URI == NULL) \|\| (*URI == 0))
2175	return NULL;
2176
2177	/*
2178	* First parse URI into a standard form
2179	*/
2180	ref = xmlCreateURI ();
2181	if (ref == NULL)
2182	return NULL;
2183	/* If URI not already in "relative" form */
2184	if (URI[0] != '.') {
2185	ret = xmlParseURIReference (ref, (const char *) URI);
2186	if (ret != 0)
2187	goto done; /* Error in URI, return NULL */
2188	} else
2189	ref->path = (char *)xmlStrdup(URI);
2190
2191	/*
2192	* Next parse base into the same standard form
2193	*/
2194	if ((base == NULL) \|\| (*base == 0)) {
2195	val = xmlStrdup (URI);
2196	goto done;
2197	}
2198	bas = xmlCreateURI ();
2199	if (bas == NULL)
2200	goto done;
2201	if (base[0] != '.') {
2202	ret = xmlParseURIReference (bas, (const char *) base);
2203	if (ret != 0)
2204	goto done; /* Error in base, return NULL */
2205	} else
2206	bas->path = (char *)xmlStrdup(base);
2207
2208	/*
2209	* If the scheme / server on the URI differs from the base,
2210	* just return the URI
2211	*/
2212	if ((ref->scheme != NULL) &&
2213	((bas->scheme == NULL) \|\|
2214	(xmlStrcmp ((xmlChar )bas->scheme, (xmlChar )ref->scheme)) \|\|
2215	(xmlStrcmp ((xmlChar )bas->server, (xmlChar )ref->server)))) {
2216	val = xmlStrdup (URI);
2217	goto done;
2218	}
2219	if (xmlStrEqual((xmlChar )bas->path, (xmlChar )ref->path)) {
2220	val = xmlStrdup(BAD_CAST "");
2221	goto done;
2222	}
2223	if (bas->path == NULL) {
2224	val = xmlStrdup((xmlChar *)ref->path);
2225	goto done;
2226	}
2227	if (ref->path == NULL) {
2228	ref->path = (char *) "/";
2229	remove_path = 1;
2230	}
2231
2232	/*
2233	* At this point (at last!) we can compare the two paths
2234	*
2235	* First we take care of the special case where either of the
2236	* two path components may be missing (bug 316224)
2237	*/
2238	if (bas->path == NULL) {
2239	if (ref->path != NULL) {
2240	uptr = (xmlChar *) ref->path;
2241	if (*uptr == '/')
2242	uptr++;
2243	/* exception characters from xmlSaveUri */
2244	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2245	}
2246	goto done;
2247	}
2248	bptr = (xmlChar *)bas->path;
2249	if (ref->path == NULL) {
2250	for (ix = 0; bptr[ix] != 0; ix++) {
2251	if (bptr[ix] == '/')
2252	nbslash++;
2253	}
2254	uptr = NULL;
2255	len = 1; /* this is for a string terminator only */
2256	} else {
2257	/*
2258	* Next we compare the two strings and find where they first differ
2259	*/
2260	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2261	pos += 2;
2262	if ((*bptr == '.') && (bptr[1] == '/'))
2263	bptr += 2;
2264	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2265	bptr++;
2266	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2267	pos++;
2268
2269	if (bptr[pos] == ref->path[pos]) {
2270	val = xmlStrdup(BAD_CAST "");
2271	goto done; /* (I can't imagine why anyone would do this) */
2272	}
2273
2274	/*
2275	* In URI, "back up" to the last '/' encountered. This will be the
2276	* beginning of the "unique" suffix of URI
2277	*/
2278	ix = pos;
2279	if ((ref->path[ix] == '/') && (ix > 0))
2280	ix--;
2281	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2282	ix -= 2;
2283	for (; ix > 0; ix--) {
2284	if (ref->path[ix] == '/')
2285	break;
2286	}
2287	if (ix == 0) {
2288	uptr = (xmlChar *)ref->path;
2289	} else {
2290	ix++;
2291	uptr = (xmlChar *)&ref->path[ix];
2292	}
2293
2294	/*
2295	* In base, count the number of '/' from the differing point
2296	*/
2297	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2298	for (; bptr[ix] != 0; ix++) {
2299	if (bptr[ix] == '/')
2300	nbslash++;
2301	}
2302	}
2303	len = xmlStrlen (uptr) + 1;
2304	}
2305
2306	if (nbslash == 0) {
2307	if (uptr != NULL)
2308	/* exception characters from xmlSaveUri */
2309	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2310	goto done;
2311	}
2312
2313	/*
2314	* Allocate just enough space for the returned string -
2315	* length of the remainder of the URI, plus enough space
2316	* for the "../" groups, plus one for the terminator
2317	*/
2318	val = (xmlChar ) xmlMalloc (len + 3 nbslash);
2319	if (val == NULL) {
2320	xmlURIErrMemory("building relative URI\n");
2321	goto done;
2322	}
2323	vptr = val;
2324	/*
2325	* Put in as many "../" as needed
2326	*/
2327	for (; nbslash>0; nbslash--) {
2328	*vptr++ = '.';
2329	*vptr++ = '.';
2330	*vptr++ = '/';
2331	}
2332	/*
2333	* Finish up with the end of the URI
2334	*/
2335	if (uptr != NULL) {
2336	if ((vptr > val) && (len > 0) &&
2337	(uptr[0] == '/') && (vptr[-1] == '/')) {
2338	memcpy (vptr, uptr + 1, len - 1);
2339	vptr[len - 2] = 0;
2340	} else {
2341	memcpy (vptr, uptr, len);
2342	vptr[len - 1] = 0;
2343	}
2344	} else {
2345	vptr[len - 1] = 0;
2346	}
2347
2348	/* escape the freshly-built path */
2349	vptr = val;
2350	/* exception characters from xmlSaveUri */
2351	val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2352	xmlFree(vptr);
2353
2354	done:
2355	/*
2356	* Free the working variables
2357	*/
2358	if (remove_path != 0)
2359	ref->path = NULL;
2360	if (ref != NULL)
2361	xmlFreeURI (ref);
2362	if (bas != NULL)
2363	xmlFreeURI (bas);
2364
2365	return val;
2366	}
2367
2368	/**
2369	* xmlCanonicPath:
2370	* @path: the resource locator in a filesystem notation
2371	*
2372	* Constructs a canonic path from the specified path.
2373	*
2374	* Returns a new canonic path, or a duplicate of the path parameter if the
2375	* construction fails. The caller is responsible for freeing the memory occupied
2376	* by the returned string. If there is insufficient memory available, or the
2377	* argument is NULL, the function returns NULL.
2378	*/
2379	#define IS_WINDOWS_PATH(p) \
2380	((p != NULL) && \
2381	(((p[0] >= 'a') && (p[0] <= 'z')) \|\| \
2382	((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2383	(p[1] == ':') && ((p[2] == '/') \|\| (p[2] == '\\')))
2384	xmlChar *
2385	xmlCanonicPath(const xmlChar *path)
2386	{
2387	/*
2388	* For Windows implementations, additional work needs to be done to
2389	* replace backslashes in pathnames with "forward slashes"
2390	*/
2391	#if defined(_WIN32) && !defined(__CYGWIN__)
2392	int len = 0;
2393	int i = 0;
2394	xmlChar *p = NULL;
2395	#endif
2396	xmlURIPtr uri;
2397	xmlChar *ret;
2398	const xmlChar *absuri;
2399
2400	if (path == NULL)
2401	return(NULL);
2402
2403	#if defined(_WIN32)
2404	/*
2405	* We must not change the backslashes to slashes if the the path
2406	* starts with \\?\
2407	* Those paths can be up to 32k characters long.
2408	* Was added specifically for OpenOffice, those paths can't be converted
2409	* to URIs anyway.
2410	*/
2411	if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2412	(path[3] == '\\') )
2413	return xmlStrdup((const xmlChar *) path);
2414	#endif
2415
2416	/* sanitize filename starting with // so it can be used as URI */
2417	if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2418	path++;
2419
2420	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2421	xmlFreeURI(uri);
2422	return xmlStrdup(path);
2423	}
2424
2425	/* Check if this is an "absolute uri" */
2426	absuri = xmlStrstr(path, BAD_CAST "://");
2427	if (absuri != NULL) {
2428	int l, j;
2429	unsigned char c;
2430	xmlChar *escURI;
2431
2432	/*
2433	* this looks like an URI where some parts have not been
2434	* escaped leading to a parsing problem. Check that the first
2435	* part matches a protocol.
2436	*/
2437	l = absuri - path;
2438	/* Bypass if first part (part before the '://') is > 20 chars */
2439	if ((l <= 0) \|\| (l > 20))
2440	goto path_processing;
2441	/* Bypass if any non-alpha characters are present in first part */
2442	for (j = 0;j < l;j++) {
2443	c = path[j];
2444	if (!(((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z'))))
2445	goto path_processing;
2446	}
2447
2448	/* Escape all except the characters specified in the supplied path */
2449	escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2450	if (escURI != NULL) {
2451	/* Try parsing the escaped path */
2452	uri = xmlParseURI((const char *) escURI);
2453	/* If successful, return the escaped string */
2454	if (uri != NULL) {
2455	xmlFreeURI(uri);
2456	return escURI;
2457	}
2458	}
2459	}
2460
2461	path_processing:
2462	/* For Windows implementations, replace backslashes with 'forward slashes' */
2463	#if defined(_WIN32) && !defined(__CYGWIN__)
2464	/*
2465	* Create a URI structure
2466	*/
2467	uri = xmlCreateURI();
2468	if (uri == NULL) { /* Guard against 'out of memory' */
2469	return(NULL);
2470	}
2471
2472	len = xmlStrlen(path);
2473	if ((len > 2) && IS_WINDOWS_PATH(path)) {
2474	/* make the scheme 'file' */
2475	uri->scheme = xmlStrdup(BAD_CAST "file");
2476	/* allocate space for leading '/' + path + string terminator */
2477	uri->path = xmlMallocAtomic(len + 2);
2478	if (uri->path == NULL) {
2479	xmlFreeURI(uri); /* Guard agains 'out of memory' */
2480	return(NULL);
2481	}
2482	/* Put in leading '/' plus path */
2483	uri->path[0] = '/';
2484	p = uri->path + 1;
2485	strncpy(p, path, len + 1);
2486	} else {
2487	uri->path = xmlStrdup(path);
2488	if (uri->path == NULL) {
2489	xmlFreeURI(uri);
2490	return(NULL);
2491	}
2492	p = uri->path;
2493	}
2494	/* Now change all occurences of '\' to '/' */
2495	while (*p != '\0') {
2496	if (*p == '\\')
2497	*p = '/';
2498	p++;
2499	}
2500
2501	if (uri->scheme == NULL) {
2502	ret = xmlStrdup((const xmlChar *) uri->path);
2503	} else {
2504	ret = xmlSaveUri(uri);
2505	}
2506
2507	xmlFreeURI(uri);
2508	#else
2509	ret = xmlStrdup((const xmlChar *) path);
2510	#endif
2511	return(ret);
2512	}
2513
2514	/**
2515	* xmlPathToURI:
2516	* @path: the resource locator in a filesystem notation
2517	*
2518	* Constructs an URI expressing the existing path
2519	*
2520	* Returns a new URI, or a duplicate of the path parameter if the
2521	* construction fails. The caller is responsible for freeing the memory
2522	* occupied by the returned string. If there is insufficient memory available,
2523	* or the argument is NULL, the function returns NULL.
2524	*/
2525	xmlChar *
2526	xmlPathToURI(const xmlChar *path)
2527	{
2528	xmlURIPtr uri;
2529	xmlURI temp;
2530	xmlChar ret, cal;
2531
2532	if (path == NULL)
2533	return(NULL);
2534
2535	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2536	xmlFreeURI(uri);
2537	return xmlStrdup(path);
2538	}
2539	cal = xmlCanonicPath(path);
2540	if (cal == NULL)
2541	return(NULL);
2542	#if defined(_WIN32) && !defined(__CYGWIN__)
2543	/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2544	If 'cal' is a valid URI allready then we are done here, as continuing would make
2545	it invalid. */
2546	if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2547	xmlFreeURI(uri);
2548	return cal;
2549	}
2550	/* 'cal' can contain a relative path with backslashes. If that is processed
2551	by xmlSaveURI, they will be escaped and the external entity loader machinery
2552	will fail. So convert them to slashes. Misuse 'ret' for walking. */
2553	ret = cal;
2554	while (*ret != '\0') {
2555	if (*ret == '\\')
2556	*ret = '/';
2557	ret++;
2558	}
2559	#endif
2560	memset(&temp, 0, sizeof(temp));
2561	temp.path = (char *) cal;
2562	ret = xmlSaveUri(&temp);
2563	xmlFree(cal);
2564	return(ret);
2565	}
2566	#define bottom_uri
2567	#include "elfgcchack.h"

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/libxml2-2.9.4/uri.c@ 70579

以其他格式下載: