urlapi.c@ 103621

最後變更在這個檔案從103621是 101409,由 vboxsync 提交於 16 月前
curl-8.4.0: Applied and adjusted our curl changes to 8.3.0. bugref:10533
屬性 svn:eol-style 設為 `native`
檔案大小: 52.2 KB

行
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "url.h"
31	#include "escape.h"
32	#include "curl_ctype.h"
33	#include "inet_pton.h"
34	#include "inet_ntop.h"
35	#include "strdup.h"
36	#include "idn.h"
37	#include "curl_memrchr.h"
38
39	/* The last 3 #include files should be in this order */
40	#include "curl_printf.h"
41	#include "curl_memory.h"
42	#include "memdebug.h"
43
44	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
45	#define STARTS_WITH_DRIVE_PREFIX(str) \
46	((('a' <= str[0] && str[0] <= 'z') \|\| \
47	('A' <= str[0] && str[0] <= 'Z')) && \
48	(str[1] == ':'))
49
50	/* MSDOS/Windows style drive prefix, optionally with
51	* a '\|' instead of ':', followed by a slash or NUL */
52	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
54	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55	((str)[1] == ':' \|\| (str)[1] == '\|') && \
56	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
57
58	/* scheme is not URL encoded, the longest libcurl supported ones are... */
59	#define MAX_SCHEME_LEN 40
60
61	/*
62	* If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63	* sure we have _some_ value for AF_INET6 without polluting our fake value
64	* everywhere.
65	*/
66	#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67	#define AF_INET6 (AF_INET + 1)
68	#endif
69
70	/* Internal representation of CURLU. Point to URL-encoded strings. */
71	struct Curl_URL {
72	char *scheme;
73	char *user;
74	char *password;
75	char options; / IMAP only? */
76	char *host;
77	char zoneid; / for numerical IPv6 addresses */
78	char *port;
79	char *path;
80	char *query;
81	char *fragment;
82	long portnum; /* the numerical version */
83	};
84
85	#define DEFAULT_SCHEME "https"
86
87	static void free_urlhandle(struct Curl_URL *u)
88	{
89	free(u->scheme);
90	free(u->user);
91	free(u->password);
92	free(u->options);
93	free(u->host);
94	free(u->zoneid);
95	free(u->port);
96	free(u->path);
97	free(u->query);
98	free(u->fragment);
99	}
100
101	/*
102	* Find the separator at the end of the host name, or the '?' in cases like
103	* http://www.example.com?id=2380
104	*/
105	static const char find_host_sep(const char url)
106	{
107	const char *sep;
108	const char *query;
109
110	/* Find the start of the hostname */
111	sep = strstr(url, "//");
112	if(!sep)
113	sep = url;
114	else
115	sep += 2;
116
117	query = strchr(sep, '?');
118	sep = strchr(sep, '/');
119
120	if(!sep)
121	sep = url + strlen(url);
122
123	if(!query)
124	query = url + strlen(url);
125
126	return sep < query ? sep : query;
127	}
128
129	/*
130	* Decide whether a character in a URL must be escaped.
131	*/
132	#define urlchar_needs_escaping(c) (!(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c)))
133
134	static const char hexdigits[] = "0123456789abcdef";
135	/* urlencode_str() writes data into an output dynbuf and URL-encodes the
136	* spaces in the source URL accordingly.
137	*
138	* URL encoding should be skipped for host names, otherwise IDN resolution
139	* will fail.
140	*/
141	static CURLUcode urlencode_str(struct dynbuf o, const char url,
142	size_t len, bool relative,
143	bool query)
144	{
145	/* we must add this with whitespace-replacing */
146	bool left = !query;
147	const unsigned char *iptr;
148	const unsigned char host_sep = (const unsigned char ) url;
149
150	if(!relative)
151	host_sep = (const unsigned char *) find_host_sep(url);
152
153	for(iptr = (unsigned char )url; / read from here */
154	len; iptr++, len--) {
155
156	if(iptr < host_sep) {
157	if(Curl_dyn_addn(o, iptr, 1))
158	return CURLUE_OUT_OF_MEMORY;
159	continue;
160	}
161
162	if(*iptr == ' ') {
163	if(left) {
164	if(Curl_dyn_addn(o, "%20", 3))
165	return CURLUE_OUT_OF_MEMORY;
166	}
167	else {
168	if(Curl_dyn_addn(o, "+", 1))
169	return CURLUE_OUT_OF_MEMORY;
170	}
171	continue;
172	}
173
174	if(*iptr == '?')
175	left = FALSE;
176
177	if(urlchar_needs_escaping(*iptr)) {
178	char out[3]={'%'};
179	out[1] = hexdigits[*iptr>>4];
180	out[2] = hexdigits[*iptr & 0xf];
181	if(Curl_dyn_addn(o, out, 3))
182	return CURLUE_OUT_OF_MEMORY;
183	}
184	else {
185	if(Curl_dyn_addn(o, iptr, 1))
186	return CURLUE_OUT_OF_MEMORY;
187	}
188	}
189
190	return CURLUE_OK;
191	}
192
193	/*
194	* Returns the length of the scheme if the given URL is absolute (as opposed
195	* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
196	* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
197	*
198	* If 'guess_scheme' is TRUE, it means the URL might be provided without
199	* scheme.
200	*/
201	size_t Curl_is_absolute_url(const char url, char buf, size_t buflen,
202	bool guess_scheme)
203	{
204	int i = 0;
205	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
206	(void)buflen; /* only used in debug-builds */
207	if(buf)
208	buf[0] = 0; /* always leave a defined value in buf */
209	#ifdef WIN32
210	if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
211	return 0;
212	#endif
213	if(ISALPHA(url[0]))
214	for(i = 1; i < MAX_SCHEME_LEN; ++i) {
215	char s = url[i];
216	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
217	/* RFC 3986 3.1 explains:
218	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
219	*/
220	}
221	else {
222	break;
223	}
224	}
225	if(i && (url[i] == ':') && ((url[i + 1] == '/') \|\| !guess_scheme)) {
226	/* If this does not guess scheme, the scheme always ends with the colon so
227	that this also detects data: URLs etc. In guessing mode, data: could
228	be the host name "data" with a specified port number. */
229
230	/* the length of the scheme is the name part only */
231	size_t len = i;
232	if(buf) {
233	buf[i] = 0;
234	while(i--) {
235	buf[i] = Curl_raw_tolower(url[i]);
236	}
237	}
238	return len;
239	}
240	return 0;
241	}
242
243	/*
244	* Concatenate a relative URL to a base URL making it absolute.
245	* URL-encodes any spaces.
246	* The returned pointer must be freed by the caller unless NULL
247	* (returns NULL on out of memory).
248	*
249	* Note that this function destroys the 'base' string.
250	*/
251	static char concat_url(char base, const char *relurl)
252	{
253	/***
254	TRY to append this new path to the old URL
255	to the right of the host part. Oh crap, this is doomed to cause
256	problems in the future...
257	*/
258	struct dynbuf newest;
259	char *protsep;
260	char *pathsep;
261	bool host_changed = FALSE;
262	const char *useurl = relurl;
263
264	/* protsep points to the start of the host name */
265	protsep = strstr(base, "//");
266	if(!protsep)
267	protsep = base;
268	else
269	protsep += 2; /* pass the slashes */
270
271	if('/' != relurl[0]) {
272	int level = 0;
273
274	/* First we need to find out if there's a ?-letter in the URL,
275	and cut it and the right-side of that off */
276	pathsep = strchr(protsep, '?');
277	if(pathsep)
278	*pathsep = 0;
279
280	/* we have a relative path to append to the last slash if there's one
281	available, or if the new URL is just a query string (starts with a
282	'?') we append the new one at the end of the entire currently worked
283	out URL */
284	if(useurl[0] != '?') {
285	pathsep = strrchr(protsep, '/');
286	if(pathsep)
287	*pathsep = 0;
288	}
289
290	/* Check if there's any slash after the host name, and if so, remember
291	that position instead */
292	pathsep = strchr(protsep, '/');
293	if(pathsep)
294	protsep = pathsep + 1;
295	else
296	protsep = NULL;
297
298	/* now deal with one "./" or any amount of "../" in the newurl
299	and act accordingly */
300
301	if((useurl[0] == '.') && (useurl[1] == '/'))
302	useurl += 2; /* just skip the "./" */
303
304	while((useurl[0] == '.') &&
305	(useurl[1] == '.') &&
306	(useurl[2] == '/')) {
307	level++;
308	useurl += 3; /* pass the "../" */
309	}
310
311	if(protsep) {
312	while(level--) {
313	/* cut off one more level from the right of the original URL */
314	pathsep = strrchr(protsep, '/');
315	if(pathsep)
316	*pathsep = 0;
317	else {
318	*protsep = 0;
319	break;
320	}
321	}
322	}
323	}
324	else {
325	/* We got a new absolute path for this server */
326
327	if(relurl[1] == '/') {
328	/* the new URL starts with //, just keep the protocol part from the
329	original one */
330	*protsep = 0;
331	useurl = &relurl[2]; /* we keep the slashes from the original, so we
332	skip the new ones */
333	host_changed = TRUE;
334	}
335	else {
336	/* cut off the original URL from the first slash, or deal with URLs
337	without slash */
338	pathsep = strchr(protsep, '/');
339	if(pathsep) {
340	/* When people use badly formatted URLs, such as
341	"http://www.example.com?dir=/home/daniel" we must not use the first
342	slash, if there's a ?-letter before it! */
343	char *sep = strchr(protsep, '?');
344	if(sep && (sep < pathsep))
345	pathsep = sep;
346	*pathsep = 0;
347	}
348	else {
349	/* There was no slash. Now, since we might be operating on a badly
350	formatted URL, such as "http://www.example.com?id=2380" which
351	doesn't use a slash separator as it is supposed to, we need to check
352	for a ?-letter as well! */
353	pathsep = strchr(protsep, '?');
354	if(pathsep)
355	*pathsep = 0;
356	}
357	}
358	}
359
360	Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
361
362	/* copy over the root url part */
363	if(Curl_dyn_add(&newest, base))
364	return NULL;
365
366	/* check if we need to append a slash */
367	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
368	;
369	else {
370	if(Curl_dyn_addn(&newest, "/", 1))
371	return NULL;
372	}
373
374	/* then append the new piece on the right side */
375	urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
376
377	return Curl_dyn_ptr(&newest);
378	}
379
380	/* scan for byte values <= 31, 127 and sometimes space */
381	static CURLUcode junkscan(const char url, size_t urllen, unsigned int flags)
382	{
383	static const char badbytes[]={
384	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
385	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
386	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
387	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
388	0x7f, 0x00 /* null-terminate */
389	};
390	size_t n = strlen(url);
391	size_t nfine;
392
393	if(n > CURL_MAX_INPUT_LENGTH)
394	/* excessive input length */
395	return CURLUE_MALFORMED_INPUT;
396
397	nfine = strcspn(url, badbytes);
398	if((nfine != n) \|\|
399	(!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
400	return CURLUE_MALFORMED_INPUT;
401
402	*urllen = n;
403	return CURLUE_OK;
404	}
405
406	/*
407	* parse_hostname_login()
408	*
409	* Parse the login details (user name, password and options) from the URL and
410	* strip them out of the host name
411	*
412	*/
413	static CURLUcode parse_hostname_login(struct Curl_URL *u,
414	const char *login,
415	size_t len,
416	unsigned int flags,
417	size_t offset) / to the host name */
418	{
419	CURLUcode result = CURLUE_OK;
420	CURLcode ccode;
421	char *userp = NULL;
422	char *passwdp = NULL;
423	char *optionsp = NULL;
424	const struct Curl_handler *h = NULL;
425
426	/* At this point, we assume all the other special cases have been taken
427	* care of, so the host is at most
428	*
429	* [user[:password][;options]]@]hostname
430	*
431	* We need somewhere to put the embedded details, so do that first.
432	*/
433	char *ptr;
434
435	DEBUGASSERT(login);
436
437	*offset = 0;
438	ptr = memchr(login, '@', len);
439	if(!ptr)
440	goto out;
441
442	/* We will now try to extract the
443	* possible login information in a string like:
444	* ftp://user:[email protected]:8021/README */
445	ptr++;
446
447	/* if this is a known scheme, get some details */
448	if(u->scheme)
449	h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
450
451	/* We could use the login information in the URL so extract it. Only parse
452	options if the handler says we should. Note that 'h' might be NULL! */
453	ccode = Curl_parse_login_details(login, ptr - login - 1,
454	&userp, &passwdp,
455	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
456	&optionsp:NULL);
457	if(ccode) {
458	result = CURLUE_BAD_LOGIN;
459	goto out;
460	}
461
462	if(userp) {
463	if(flags & CURLU_DISALLOW_USER) {
464	/* Option DISALLOW_USER is set and url contains username. */
465	result = CURLUE_USER_NOT_ALLOWED;
466	goto out;
467	}
468	free(u->user);
469	u->user = userp;
470	}
471
472	if(passwdp) {
473	free(u->password);
474	u->password = passwdp;
475	}
476
477	if(optionsp) {
478	free(u->options);
479	u->options = optionsp;
480	}
481
482	/* the host name starts at this offset */
483	*offset = ptr - login;
484	return CURLUE_OK;
485
486	out:
487
488	free(userp);
489	free(passwdp);
490	free(optionsp);
491	u->user = NULL;
492	u->password = NULL;
493	u->options = NULL;
494
495	return result;
496	}
497
498	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, struct dynbuf host,
499	bool has_scheme)
500	{
501	char *portptr;
502	char *hostname = Curl_dyn_ptr(host);
503	/*
504	* Find the end of an IPv6 address on the ']' ending bracket.
505	*/
506	if(hostname[0] == '[') {
507	portptr = strchr(hostname, ']');
508	if(!portptr)
509	return CURLUE_BAD_IPV6;
510	portptr++;
511	/* this is a RFC2732-style specified IP-address */
512	if(*portptr) {
513	if(*portptr != ':')
514	return CURLUE_BAD_PORT_NUMBER;
515	}
516	else
517	portptr = NULL;
518	}
519	else
520	portptr = strchr(hostname, ':');
521
522	if(portptr) {
523	char *rest;
524	long port;
525	size_t keep = portptr - hostname;
526
527	/* Browser behavior adaptation. If there's a colon with no digits after,
528	just cut off the name there which makes us ignore the colon and just
529	use the default port. Firefox, Chrome and Safari all do that.
530
531	Don't do it if the URL has no scheme, to make something that looks like
532	a scheme not work!
533	*/
534	Curl_dyn_setlen(host, keep);
535	portptr++;
536	if(!*portptr)
537	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
538
539	if(!ISDIGIT(*portptr))
540	return CURLUE_BAD_PORT_NUMBER;
541
542	port = strtol(portptr, &rest, 10); /* Port number must be decimal */
543
544	if(port > 0xffff)
545	return CURLUE_BAD_PORT_NUMBER;
546
547	if(rest[0])
548	return CURLUE_BAD_PORT_NUMBER;
549
550	u->portnum = port;
551	/* generate a new port number string to get rid of leading zeroes etc */
552	free(u->port);
553	u->port = aprintf("%ld", port);
554	if(!u->port)
555	return CURLUE_OUT_OF_MEMORY;
556	}
557
558	return CURLUE_OK;
559	}
560
561	/* this assumes 'hostname' now starts with [ */
562	static CURLUcode ipv6_parse(struct Curl_URL u, char hostname,
563	size_t hlen) /* length of hostname */
564	{
565	size_t len;
566	DEBUGASSERT(*hostname == '[');
567	if(hlen < 4) /* '[::]' is the shortest possible valid string */
568	return CURLUE_BAD_IPV6;
569	hostname++;
570	hlen -= 2;
571
572	/* only valid IPv6 letters are ok */
573	len = strspn(hostname, "0123456789abcdefABCDEF:.");
574
575	if(hlen != len) {
576	hlen = len;
577	if(hostname[len] == '%') {
578	/* this could now be '%[zone id]' */
579	char zoneid[16];
580	int i = 0;
581	char *h = &hostname[len + 1];
582	/* pass '25' if present and is a url encoded percent sign */
583	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
584	h += 2;
585	while(h && (h != ']') && (i < 15))
586	zoneid[i++] = *h++;
587	if(!i \|\| (']' != *h))
588	return CURLUE_BAD_IPV6;
589	zoneid[i] = 0;
590	u->zoneid = strdup(zoneid);
591	if(!u->zoneid)
592	return CURLUE_OUT_OF_MEMORY;
593	hostname[len] = ']'; /* insert end bracket */
594	hostname[len + 1] = 0; /* terminate the hostname */
595	}
596	else
597	return CURLUE_BAD_IPV6;
598	/* hostname is fine */
599	}
600
601	/* Check the IPv6 address. */
602	{
603	char dest[16]; /* fits a binary IPv6 address */
604	char norm[MAX_IPADR_LEN];
605	hostname[hlen] = 0; /* end the address there */
606	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
607	return CURLUE_BAD_IPV6;
608
609	/* check if it can be done shorter */
610	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
611	(strlen(norm) < hlen)) {
612	strcpy(hostname, norm);
613	hlen = strlen(norm);
614	hostname[hlen + 1] = 0;
615	}
616	hostname[hlen] = ']'; /* restore ending bracket */
617	}
618	return CURLUE_OK;
619	}
620
621	static CURLUcode hostname_check(struct Curl_URL u, char hostname,
622	size_t hlen) /* length of hostname */
623	{
624	size_t len;
625	DEBUGASSERT(hostname);
626
627	if(!hlen)
628	return CURLUE_NO_HOST;
629	else if(hostname[0] == '[')
630	return ipv6_parse(u, hostname, hlen);
631	else {
632	/* letters from the second string are not ok */
633	len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
634	if(hlen != len)
635	/* hostname with bad content */
636	return CURLUE_BAD_HOSTNAME;
637	}
638	return CURLUE_OK;
639	}
640
641	/*
642	* Handle partial IPv4 numerical addresses and different bases, like
643	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
644	*
645	* If the given input string is syntactically wrong IPv4 or any part for
646	* example is too big, this function returns HOST_NAME.
647	*
648	* Output the "normalized" version of that input string in plain quad decimal
649	* integers.
650	*
651	* Returns the host type.
652	*/
653
654	#define HOST_ERROR -1 /* out of memory */
655	#define HOST_BAD -2 /* bad IPv4 address */
656
657	#define HOST_NAME 1
658	#define HOST_IPV4 2
659	#define HOST_IPV6 3
660
661	static int ipv4_normalize(struct dynbuf *host)
662	{
663	bool done = FALSE;
664	int n = 0;
665	const char *c = Curl_dyn_ptr(host);
666	unsigned long parts[4] = {0, 0, 0, 0};
667	CURLcode result = CURLE_OK;
668
669	if(*c == '[')
670	return HOST_IPV6;
671
672	while(!done) {
673	char *endp;
674	unsigned long l;
675	if(!ISDIGIT(*c))
676	/* most importantly this doesn't allow a leading plus or minus */
677	return HOST_NAME;
678	l = strtoul(c, &endp, 0);
679
680	parts[n] = l;
681	c = endp;
682
683	switch(*c) {
684	case '.':
685	if(n == 3)
686	return HOST_NAME;
687	n++;
688	c++;
689	break;
690
691	case '\0':
692	done = TRUE;
693	break;
694
695	default:
696	return HOST_NAME;
697	}
698
699	/* overflow */
700	if((l == ULONG_MAX) && (errno == ERANGE))
701	return HOST_NAME;
702
703	#if SIZEOF_LONG > 4
704	/* a value larger than 32 bits */
705	if(l > UINT_MAX)
706	return HOST_NAME;
707	#endif
708	}
709
710	switch(n) {
711	case 0: /* a -- 32 bits */
712	Curl_dyn_reset(host);
713
714	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
715	parts[0] >> 24, (parts[0] >> 16) & 0xff,
716	(parts[0] >> 8) & 0xff, parts[0] & 0xff);
717	break;
718	case 1: /* a.b -- 8.24 bits */
719	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
720	return HOST_NAME;
721	Curl_dyn_reset(host);
722	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
723	parts[0], (parts[1] >> 16) & 0xff,
724	(parts[1] >> 8) & 0xff, parts[1] & 0xff);
725	break;
726	case 2: /* a.b.c -- 8.8.16 bits */
727	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
728	return HOST_NAME;
729	Curl_dyn_reset(host);
730	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
731	parts[0], parts[1], (parts[2] >> 8) & 0xff,
732	parts[2] & 0xff);
733	break;
734	case 3: /* a.b.c.d -- 8.8.8.8 bits */
735	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
736	(parts[3] > 0xff))
737	return HOST_NAME;
738	Curl_dyn_reset(host);
739	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
740	parts[0], parts[1], parts[2], parts[3]);
741	break;
742	}
743	if(result)
744	return HOST_ERROR;
745	return HOST_IPV4;
746	}
747
748	/* if necessary, replace the host content with a URL decoded version */
749	static CURLUcode urldecode_host(struct dynbuf *host)
750	{
751	char *per = NULL;
752	const char *hostname = Curl_dyn_ptr(host);
753	per = strchr(hostname, '%');
754	if(!per)
755	/* nothing to decode */
756	return CURLUE_OK;
757	else {
758	/* encoded */
759	size_t dlen;
760	char *decoded;
761	CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
762	REJECT_CTRL);
763	if(result)
764	return CURLUE_BAD_HOSTNAME;
765	Curl_dyn_reset(host);
766	result = Curl_dyn_addn(host, decoded, dlen);
767	free(decoded);
768	if(result)
769	return CURLUE_OUT_OF_MEMORY;
770	}
771
772	return CURLUE_OK;
773	}
774
775	static CURLUcode parse_authority(struct Curl_URL *u,
776	const char *auth, size_t authlen,
777	unsigned int flags,
778	struct dynbuf *host,
779	bool has_scheme)
780	{
781	size_t offset;
782	CURLUcode result;
783
784	/*
785	* Parse the login details and strip them out of the host name.
786	*/
787	result = parse_hostname_login(u, auth, authlen, flags, &offset);
788	if(result)
789	goto out;
790
791	if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
792	result = CURLUE_OUT_OF_MEMORY;
793	goto out;
794	}
795
796	result = Curl_parse_port(u, host, has_scheme);
797	if(result)
798	goto out;
799
800	if(!Curl_dyn_len(host))
801	return CURLUE_NO_HOST;
802
803	switch(ipv4_normalize(host)) {
804	case HOST_IPV4:
805	break;
806	case HOST_IPV6:
807	result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
808	break;
809	case HOST_NAME:
810	result = urldecode_host(host);
811	if(!result)
812	result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
813	break;
814	case HOST_ERROR:
815	result = CURLUE_OUT_OF_MEMORY;
816	break;
817	case HOST_BAD:
818	default:
819	result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
820	break;
821	}
822
823	out:
824	return result;
825	}
826
827	CURLUcode Curl_url_set_authority(CURLU u, const char authority,
828	unsigned int flags)
829	{
830	CURLUcode result;
831	struct dynbuf host;
832
833	DEBUGASSERT(authority);
834	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
835
836	result = parse_authority(u, authority, strlen(authority), flags,
837	&host, !!u->scheme);
838	if(result)
839	Curl_dyn_free(&host);
840	else {
841	free(u->host);
842	u->host = Curl_dyn_ptr(&host);
843	}
844	return result;
845	}
846
847	/*
848	* "Remove Dot Segments"
849	* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
850	*/
851
852	/*
853	* dedotdotify()
854	* @unittest: 1395
855	*
856	* This function gets a null-terminated path with dot and dotdot sequences
857	* passed in and strips them off according to the rules in RFC 3986 section
858	* 5.2.4.
859	*
860	* The function handles a query part ('?' + stuff) appended but it expects
861	* that fragments ('#' + stuff) have already been cut off.
862	*
863	* RETURNS
864	*
865	* Zero for success and 'out' set to an allocated dedotdotified string.
866	*/
867	UNITTEST int dedotdotify(const char input, size_t clen, char *outp);
868	UNITTEST int dedotdotify(const char input, size_t clen, char *outp)
869	{
870	char *outptr;
871	const char *endp = &input[clen];
872	char *out;
873
874	*outp = NULL;
875	/* the path always starts with a slash, and a slash has not dot */
876	if((clen < 2) \|\| !memchr(input, '.', clen))
877	return 0;
878
879	out = malloc(clen + 1);
880	if(!out)
881	return 1; /* out of memory */
882
883	out = 0; / null-terminates, for inputs like "./" */
884	outptr = out;
885
886	do {
887	bool dotdot = TRUE;
888	if(*input == '.') {
889	/* A. If the input buffer begins with a prefix of "../" or "./", then
890	remove that prefix from the input buffer; otherwise, */
891
892	if(!strncmp("./", input, 2)) {
893	input += 2;
894	clen -= 2;
895	}
896	else if(!strncmp("../", input, 3)) {
897	input += 3;
898	clen -= 3;
899	}
900	/* D. if the input buffer consists only of "." or "..", then remove
901	that from the input buffer; otherwise, */
902
903	else if(!strcmp(".", input) \|\| !strcmp("..", input) \|\|
904	!strncmp(".?", input, 2) \|\| !strncmp("..?", input, 3)) {
905	*out = 0;
906	break;
907	}
908	else
909	dotdot = FALSE;
910	}
911	else if(*input == '/') {
912	/* B. if the input buffer begins with a prefix of "/./" or "/.", where
913	"." is a complete path segment, then replace that prefix with "/" in
914	the input buffer; otherwise, */
915	if(!strncmp("/./", input, 3)) {
916	input += 2;
917	clen -= 2;
918	}
919	else if(!strcmp("/.", input) \|\| !strncmp("/.?", input, 3)) {
920	*outptr++ = '/';
921	*outptr = 0;
922	break;
923	}
924
925	/* C. if the input buffer begins with a prefix of "/../" or "/..",
926	where ".." is a complete path segment, then replace that prefix with
927	"/" in the input buffer and remove the last segment and its
928	preceding "/" (if any) from the output buffer; otherwise, */
929
930	else if(!strncmp("/../", input, 4)) {
931	input += 3;
932	clen -= 3;
933	/* remove the last segment from the output buffer */
934	while(outptr > out) {
935	outptr--;
936	if(*outptr == '/')
937	break;
938	}
939	outptr = 0; / null-terminate where it stops */
940	}
941	else if(!strcmp("/..", input) \|\| !strncmp("/..?", input, 4)) {
942	/* remove the last segment from the output buffer */
943	while(outptr > out) {
944	outptr--;
945	if(*outptr == '/')
946	break;
947	}
948	*outptr++ = '/';
949	outptr = 0; / null-terminate where it stops */
950	break;
951	}
952	else
953	dotdot = FALSE;
954	}
955	else
956	dotdot = FALSE;
957
958	if(!dotdot) {
959	/* E. move the first path segment in the input buffer to the end of
960	the output buffer, including the initial "/" character (if any) and
961	any subsequent characters up to, but not including, the next "/"
962	character or the end of the input buffer. */
963
964	do {
965	outptr++ = input++;
966	clen--;
967	} while(input && (input != '/') && (*input != '?'));
968	*outptr = 0;
969	}
970
971	/* continue until end of path */
972	} while(input < endp);
973
974	*outp = out;
975	return 0; /* success */
976	}
977
978	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
979	{
980	const char *path;
981	size_t pathlen;
982	char *query = NULL;
983	char *fragment = NULL;
984	char schemebuf[MAX_SCHEME_LEN + 1];
985	size_t schemelen = 0;
986	size_t urllen;
987	CURLUcode result = CURLUE_OK;
988	size_t fraglen = 0;
989	struct dynbuf host;
990
991	DEBUGASSERT(url);
992
993	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
994
995	result = junkscan(url, &urllen, flags);
996	if(result)
997	goto fail;
998
999	schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1000	flags & (CURLU_GUESS_SCHEME\|
1001	CURLU_DEFAULT_SCHEME));
1002
1003	/* handle the file: scheme */
1004	if(schemelen && !strcmp(schemebuf, "file")) {
1005	bool uncpath = FALSE;
1006	if(urllen <= 6) {
1007	/* file:/ is not enough to actually be a complete file: URL */
1008	result = CURLUE_BAD_FILE_URL;
1009	goto fail;
1010	}
1011
1012	/* path has been allocated large enough to hold this */
1013	path = (char *)&url[5];
1014	pathlen = urllen - 5;
1015
1016	u->scheme = strdup("file");
1017	if(!u->scheme) {
1018	result = CURLUE_OUT_OF_MEMORY;
1019	goto fail;
1020	}
1021
1022	/* Extra handling URLs with an authority component (i.e. that start with
1023	* "file://")
1024	*
1025	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
1026	* RFC 8089, but not the (current) WHAT-WG URL spec.
1027	*/
1028	if(path[0] == '/' && path[1] == '/') {
1029	/* swallow the two slashes */
1030	const char *ptr = &path[2];
1031
1032	/*
1033	* According to RFC 8089, a file: URL can be reliably dereferenced if:
1034	*
1035	* o it has no/blank hostname, or
1036	*
1037	* o the hostname matches "localhost" (case-insensitively), or
1038	*
1039	* o the hostname is a FQDN that resolves to this machine, or
1040	*
1041	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
1042	* Appendix E.3).
1043	*
1044	* For brevity, we only consider URLs with empty, "localhost", or
1045	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
1046	*
1047	* Additionally, there is an exception for URLs with a Windows drive
1048	* letter in the authority (which was accidentally omitted from RFC 8089
1049	* Appendix E, but believe me, it was meant to be there. --MK)
1050	*/
1051	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1052	/* the URL includes a host name, it must match "localhost" or
1053	"127.0.0.1" to be valid */
1054	if(checkprefix("localhost/", ptr) \|\|
1055	checkprefix("127.0.0.1/", ptr)) {
1056	ptr += 9; /* now points to the slash after the host */
1057	}
1058	else {
1059	#if defined(WIN32)
1060	size_t len;
1061
1062	/* the host name, NetBIOS computer name, can not contain disallowed
1063	chars, and the delimiting slash character must be appended to the
1064	host name */
1065	path = strpbrk(ptr, "/\\:*?\"<>\|");
1066	if(!path \|\| *path != '/') {
1067	result = CURLUE_BAD_FILE_URL;
1068	goto fail;
1069	}
1070
1071	len = path - ptr;
1072	if(len) {
1073	if(Curl_dyn_addn(&host, ptr, len)) {
1074	result = CURLUE_OUT_OF_MEMORY;
1075	goto fail;
1076	}
1077	uncpath = TRUE;
1078	}
1079
1080	ptr -= 2; /* now points to the // before the host in UNC */
1081	#else
1082	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1083	none */
1084	result = CURLUE_BAD_FILE_URL;
1085	goto fail;
1086	#endif
1087	}
1088	}
1089
1090	path = ptr;
1091	pathlen = urllen - (ptr - url);
1092	}
1093
1094	if(!uncpath)
1095	/* no host for file: URLs by default */
1096	Curl_dyn_reset(&host);
1097
1098	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1099	/* Don't allow Windows drive letters when not in Windows.
1100	* This catches both "file:/c:" and "file:c:" */
1101	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
1102	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1103	/* File drive letters are only accepted in MSDOS/Windows */
1104	result = CURLUE_BAD_FILE_URL;
1105	goto fail;
1106	}
1107	#else
1108	/* If the path starts with a slash and a drive letter, ditch the slash */
1109	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1110	/* This cannot be done with strcpy, as the memory chunks overlap! */
1111	path++;
1112	pathlen--;
1113	}
1114	#endif
1115
1116	}
1117	else {
1118	/* clear path */
1119	const char *schemep = NULL;
1120	const char *hostp;
1121	size_t hostlen;
1122
1123	if(schemelen) {
1124	int i = 0;
1125	const char *p = &url[schemelen + 1];
1126	while((*p == '/') && (i < 4)) {
1127	p++;
1128	i++;
1129	}
1130
1131	schemep = schemebuf;
1132	if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
1133	!(flags & CURLU_NON_SUPPORT_SCHEME)) {
1134	result = CURLUE_UNSUPPORTED_SCHEME;
1135	goto fail;
1136	}
1137
1138	if((i < 1) \|\| (i > 3)) {
1139	/* less than one or more than three slashes */
1140	result = CURLUE_BAD_SLASHES;
1141	goto fail;
1142	}
1143	hostp = p; /* host name starts here */
1144	}
1145	else {
1146	/* no scheme! */
1147
1148	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME))) {
1149	result = CURLUE_BAD_SCHEME;
1150	goto fail;
1151	}
1152	if(flags & CURLU_DEFAULT_SCHEME)
1153	schemep = DEFAULT_SCHEME;
1154
1155	/*
1156	* The URL was badly formatted, let's try without scheme specified.
1157	*/
1158	hostp = url;
1159	}
1160
1161	if(schemep) {
1162	u->scheme = strdup(schemep);
1163	if(!u->scheme) {
1164	result = CURLUE_OUT_OF_MEMORY;
1165	goto fail;
1166	}
1167	}
1168
1169	/* find the end of the host name + port number */
1170	hostlen = strcspn(hostp, "/?#");
1171	path = &hostp[hostlen];
1172
1173	/* this pathlen also contains the query and the fragment */
1174	pathlen = urllen - (path - url);
1175	if(hostlen) {
1176
1177	result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1178	if(result)
1179	goto fail;
1180
1181	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1182	const char *hostname = Curl_dyn_ptr(&host);
1183	/* legacy curl-style guess based on host name */
1184	if(checkprefix("ftp.", hostname))
1185	schemep = "ftp";
1186	else if(checkprefix("dict.", hostname))
1187	schemep = "dict";
1188	else if(checkprefix("ldap.", hostname))
1189	schemep = "ldap";
1190	else if(checkprefix("imap.", hostname))
1191	schemep = "imap";
1192	else if(checkprefix("smtp.", hostname))
1193	schemep = "smtp";
1194	else if(checkprefix("pop3.", hostname))
1195	schemep = "pop3";
1196	else
1197	schemep = "http";
1198
1199	u->scheme = strdup(schemep);
1200	if(!u->scheme) {
1201	result = CURLUE_OUT_OF_MEMORY;
1202	goto fail;
1203	}
1204	}
1205	}
1206	else if(flags & CURLU_NO_AUTHORITY) {
1207	/* allowed to be empty. */
1208	if(Curl_dyn_add(&host, "")) {
1209	result = CURLUE_OUT_OF_MEMORY;
1210	goto fail;
1211	}
1212	}
1213	else {
1214	result = CURLUE_NO_HOST;
1215	goto fail;
1216	}
1217	}
1218
1219	fragment = strchr(path, '#');
1220	if(fragment) {
1221	fraglen = pathlen - (fragment - path);
1222	if(fraglen > 1) {
1223	/* skip the leading '#' in the copy but include the terminating null */
1224	if(flags & CURLU_URLENCODE) {
1225	struct dynbuf enc;
1226	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1227	if(urlencode_str(&enc, fragment + 1, fraglen, TRUE, FALSE)) {
1228	result = CURLUE_OUT_OF_MEMORY;
1229	goto fail;
1230	}
1231	u->fragment = Curl_dyn_ptr(&enc);
1232	}
1233	else {
1234	u->fragment = Curl_memdup(fragment + 1, fraglen);
1235	if(!u->fragment) {
1236	result = CURLUE_OUT_OF_MEMORY;
1237	goto fail;
1238	}
1239	}
1240	}
1241	/* after this, pathlen still contains the query */
1242	pathlen -= fraglen;
1243	}
1244
1245	DEBUGASSERT(pathlen < urllen);
1246	query = memchr(path, '?', pathlen);
1247	if(query) {
1248	size_t qlen = fragment ? (size_t)(fragment - query) :
1249	pathlen - (query - path);
1250	pathlen -= qlen;
1251	if(qlen > 1) {
1252	if(flags & CURLU_URLENCODE) {
1253	struct dynbuf enc;
1254	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1255	/* skip the leading question mark */
1256	if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
1257	result = CURLUE_OUT_OF_MEMORY;
1258	goto fail;
1259	}
1260	u->query = Curl_dyn_ptr(&enc);
1261	}
1262	else {
1263	u->query = Curl_memdup(query + 1, qlen);
1264	if(!u->query) {
1265	result = CURLUE_OUT_OF_MEMORY;
1266	goto fail;
1267	}
1268	u->query[qlen - 1] = 0;
1269	}
1270	}
1271	else {
1272	/* single byte query */
1273	u->query = strdup("");
1274	if(!u->query) {
1275	result = CURLUE_OUT_OF_MEMORY;
1276	goto fail;
1277	}
1278	}
1279	}
1280
1281	if(pathlen && (flags & CURLU_URLENCODE)) {
1282	struct dynbuf enc;
1283	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1284	if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
1285	result = CURLUE_OUT_OF_MEMORY;
1286	goto fail;
1287	}
1288	pathlen = Curl_dyn_len(&enc);
1289	path = u->path = Curl_dyn_ptr(&enc);
1290	}
1291
1292	if(pathlen <= 1) {
1293	/* there is no path left or just the slash, unset */
1294	path = NULL;
1295	}
1296	else {
1297	if(!u->path) {
1298	u->path = Curl_memdup(path, pathlen + 1);
1299	if(!u->path) {
1300	result = CURLUE_OUT_OF_MEMORY;
1301	goto fail;
1302	}
1303	u->path[pathlen] = 0;
1304	path = u->path;
1305	}
1306	else if(flags & CURLU_URLENCODE)
1307	/* it might have encoded more than just the path so cut it */
1308	u->path[pathlen] = 0;
1309
1310	if(!(flags & CURLU_PATH_AS_IS)) {
1311	/* remove ../ and ./ sequences according to RFC3986 */
1312	char *dedot;
1313	int err = dedotdotify((char *)path, pathlen, &dedot);
1314	if(err) {
1315	result = CURLUE_OUT_OF_MEMORY;
1316	goto fail;
1317	}
1318	if(dedot) {
1319	free(u->path);
1320	u->path = dedot;
1321	}
1322	}
1323	}
1324
1325	u->host = Curl_dyn_ptr(&host);
1326
1327	return result;
1328	fail:
1329	Curl_dyn_free(&host);
1330	free_urlhandle(u);
1331	return result;
1332	}
1333
1334	/*
1335	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1336	*/
1337	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1338	unsigned int flags)
1339	{
1340	CURLUcode result;
1341	CURLU tmpurl;
1342	memset(&tmpurl, 0, sizeof(tmpurl));
1343	result = parseurl(url, &tmpurl, flags);
1344	if(!result) {
1345	free_urlhandle(u);
1346	*u = tmpurl;
1347	}
1348	return result;
1349	}
1350
1351	/*
1352	*/
1353	CURLU *curl_url(void)
1354	{
1355	return calloc(sizeof(struct Curl_URL), 1);
1356	}
1357
1358	void curl_url_cleanup(CURLU *u)
1359	{
1360	if(u) {
1361	free_urlhandle(u);
1362	free(u);
1363	}
1364	}
1365
1366	#define DUP(dest, src, name) \
1367	do { \
1368	if(src->name) { \
1369	dest->name = strdup(src->name); \
1370	if(!dest->name) \
1371	goto fail; \
1372	} \
1373	} while(0)
1374
1375	CURLU curl_url_dup(const CURLU in)
1376	{
1377	struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1378	if(u) {
1379	DUP(u, in, scheme);
1380	DUP(u, in, user);
1381	DUP(u, in, password);
1382	DUP(u, in, options);
1383	DUP(u, in, host);
1384	DUP(u, in, port);
1385	DUP(u, in, path);
1386	DUP(u, in, query);
1387	DUP(u, in, fragment);
1388	DUP(u, in, zoneid);
1389	u->portnum = in->portnum;
1390	}
1391	return u;
1392	fail:
1393	curl_url_cleanup(u);
1394	return NULL;
1395	}
1396
1397	CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1398	char **part, unsigned int flags)
1399	{
1400	const char *ptr;
1401	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1402	char portbuf[7];
1403	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1404	bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1405	bool punycode = FALSE;
1406	bool depunyfy = FALSE;
1407	bool plusdecode = FALSE;
1408	(void)flags;
1409	if(!u)
1410	return CURLUE_BAD_HANDLE;
1411	if(!part)
1412	return CURLUE_BAD_PARTPOINTER;
1413	*part = NULL;
1414
1415	switch(what) {
1416	case CURLUPART_SCHEME:
1417	ptr = u->scheme;
1418	ifmissing = CURLUE_NO_SCHEME;
1419	urldecode = FALSE; /* never for schemes */
1420	break;
1421	case CURLUPART_USER:
1422	ptr = u->user;
1423	ifmissing = CURLUE_NO_USER;
1424	break;
1425	case CURLUPART_PASSWORD:
1426	ptr = u->password;
1427	ifmissing = CURLUE_NO_PASSWORD;
1428	break;
1429	case CURLUPART_OPTIONS:
1430	ptr = u->options;
1431	ifmissing = CURLUE_NO_OPTIONS;
1432	break;
1433	case CURLUPART_HOST:
1434	ptr = u->host;
1435	ifmissing = CURLUE_NO_HOST;
1436	punycode = (flags & CURLU_PUNYCODE)?1:0;
1437	depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1438	break;
1439	case CURLUPART_ZONEID:
1440	ptr = u->zoneid;
1441	ifmissing = CURLUE_NO_ZONEID;
1442	break;
1443	case CURLUPART_PORT:
1444	ptr = u->port;
1445	ifmissing = CURLUE_NO_PORT;
1446	urldecode = FALSE; /* never for port */
1447	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1448	/* there's no stored port number, but asked to deliver
1449	a default one for the scheme */
1450	const struct Curl_handler *h =
1451	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1452	if(h) {
1453	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1454	ptr = portbuf;
1455	}
1456	}
1457	else if(ptr && u->scheme) {
1458	/* there is a stored port number, but ask to inhibit if
1459	it matches the default one for the scheme */
1460	const struct Curl_handler *h =
1461	Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
1462	if(h && (h->defport == u->portnum) &&
1463	(flags & CURLU_NO_DEFAULT_PORT))
1464	ptr = NULL;
1465	}
1466	break;
1467	case CURLUPART_PATH:
1468	ptr = u->path;
1469	if(!ptr)
1470	ptr = "/";
1471	break;
1472	case CURLUPART_QUERY:
1473	ptr = u->query;
1474	ifmissing = CURLUE_NO_QUERY;
1475	plusdecode = urldecode;
1476	break;
1477	case CURLUPART_FRAGMENT:
1478	ptr = u->fragment;
1479	ifmissing = CURLUE_NO_FRAGMENT;
1480	break;
1481	case CURLUPART_URL: {
1482	char *url;
1483	char *scheme;
1484	char *options = u->options;
1485	char *port = u->port;
1486	char *allochost = NULL;
1487	punycode = (flags & CURLU_PUNYCODE)?1:0;
1488	depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1489	if(u->scheme && strcasecompare("file", u->scheme)) {
1490	url = aprintf("file://%s%s%s",
1491	u->path,
1492	u->fragment? "#": "",
1493	u->fragment? u->fragment : "");
1494	}
1495	else if(!u->host)
1496	return CURLUE_NO_HOST;
1497	else {
1498	const struct Curl_handler *h = NULL;
1499	if(u->scheme)
1500	scheme = u->scheme;
1501	else if(flags & CURLU_DEFAULT_SCHEME)
1502	scheme = (char *) DEFAULT_SCHEME;
1503	else
1504	return CURLUE_NO_SCHEME;
1505
1506	h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1507	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1508	/* there's no stored port number, but asked to deliver
1509	a default one for the scheme */
1510	if(h) {
1511	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1512	port = portbuf;
1513	}
1514	}
1515	else if(port) {
1516	/* there is a stored port number, but asked to inhibit if it matches
1517	the default one for the scheme */
1518	if(h && (h->defport == u->portnum) &&
1519	(flags & CURLU_NO_DEFAULT_PORT))
1520	port = NULL;
1521	}
1522
1523	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1524	options = NULL;
1525
1526	if(u->host[0] == '[') {
1527	if(u->zoneid) {
1528	/* make it '[ host %25 zoneid ]' */
1529	struct dynbuf enc;
1530	size_t hostlen = strlen(u->host);
1531	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1532	if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1533	u->zoneid))
1534	return CURLUE_OUT_OF_MEMORY;
1535	allochost = Curl_dyn_ptr(&enc);
1536	}
1537	}
1538	else if(urlencode) {
1539	allochost = curl_easy_escape(NULL, u->host, 0);
1540	if(!allochost)
1541	return CURLUE_OUT_OF_MEMORY;
1542	}
1543	else if(punycode) {
1544	if(!Curl_is_ASCII_name(u->host)) {
1545	#ifndef USE_IDN
1546	return CURLUE_LACKS_IDN;
1547	#else
1548	CURLcode result = Curl_idn_decode(u->host, &allochost);
1549	if(result)
1550	return (result == CURLE_OUT_OF_MEMORY) ?
1551	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1552	#endif
1553	}
1554	}
1555	else if(depunyfy) {
1556	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1557	#ifndef USE_IDN
1558	return CURLUE_LACKS_IDN;
1559	#else
1560	CURLcode result = Curl_idn_encode(u->host, &allochost);
1561	if(result)
1562	/* this is the most likely error */
1563	return (result == CURLE_OUT_OF_MEMORY) ?
1564	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1565	#endif
1566	}
1567	}
1568
1569	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1570	scheme,
1571	u->user ? u->user : "",
1572	u->password ? ":": "",
1573	u->password ? u->password : "",
1574	options ? ";" : "",
1575	options ? options : "",
1576	(u->user \|\| u->password \|\| options) ? "@": "",
1577	allochost ? allochost : u->host,
1578	port ? ":": "",
1579	port ? port : "",
1580	u->path ? u->path : "/",
1581	(u->query && u->query[0]) ? "?": "",
1582	(u->query && u->query[0]) ? u->query : "",
1583	u->fragment? "#": "",
1584	u->fragment? u->fragment : "");
1585	free(allochost);
1586	}
1587	if(!url)
1588	return CURLUE_OUT_OF_MEMORY;
1589	*part = url;
1590	return CURLUE_OK;
1591	}
1592	default:
1593	ptr = NULL;
1594	break;
1595	}
1596	if(ptr) {
1597	size_t partlen = strlen(ptr);
1598	size_t i = 0;
1599	*part = Curl_memdup(ptr, partlen + 1);
1600	if(!*part)
1601	return CURLUE_OUT_OF_MEMORY;
1602	if(plusdecode) {
1603	/* convert + to space */
1604	char plus = part;
1605	for(i = 0; i < partlen; ++plus, i++) {
1606	if(*plus == '+')
1607	*plus = ' ';
1608	}
1609	}
1610	if(urldecode) {
1611	char *decoded;
1612	size_t dlen;
1613	/* this unconditional rejection of control bytes is documented
1614	API behavior */
1615	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1616	free(*part);
1617	if(res) {
1618	*part = NULL;
1619	return CURLUE_URLDECODE;
1620	}
1621	*part = decoded;
1622	partlen = dlen;
1623	}
1624	if(urlencode) {
1625	struct dynbuf enc;
1626	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1627	if(urlencode_str(&enc, *part, partlen, TRUE,
1628	what == CURLUPART_QUERY))
1629	return CURLUE_OUT_OF_MEMORY;
1630	free(*part);
1631	*part = Curl_dyn_ptr(&enc);
1632	}
1633	else if(punycode) {
1634	if(!Curl_is_ASCII_name(u->host)) {
1635	#ifndef USE_IDN
1636	return CURLUE_LACKS_IDN;
1637	#else
1638	char *allochost;
1639	CURLcode result = Curl_idn_decode(*part, &allochost);
1640	if(result)
1641	return (result == CURLE_OUT_OF_MEMORY) ?
1642	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1643	free(*part);
1644	*part = allochost;
1645	#endif
1646	}
1647	}
1648	else if(depunyfy) {
1649	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1650	#ifndef USE_IDN
1651	return CURLUE_LACKS_IDN;
1652	#else
1653	char *allochost;
1654	CURLcode result = Curl_idn_encode(*part, &allochost);
1655	if(result)
1656	return (result == CURLE_OUT_OF_MEMORY) ?
1657	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1658	free(*part);
1659	*part = allochost;
1660	#endif
1661	}
1662	}
1663
1664	return CURLUE_OK;
1665	}
1666	else
1667	return ifmissing;
1668	}
1669
1670	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1671	const char *part, unsigned int flags)
1672	{
1673	char **storep = NULL;
1674	long port = 0;
1675	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1676	bool plusencode = FALSE;
1677	bool urlskipslash = FALSE;
1678	bool leadingslash = FALSE;
1679	bool appendquery = FALSE;
1680	bool equalsencode = FALSE;
1681	size_t nalloc;
1682
1683	if(!u)
1684	return CURLUE_BAD_HANDLE;
1685	if(!part) {
1686	/* setting a part to NULL clears it */
1687	switch(what) {
1688	case CURLUPART_URL:
1689	break;
1690	case CURLUPART_SCHEME:
1691	storep = &u->scheme;
1692	break;
1693	case CURLUPART_USER:
1694	storep = &u->user;
1695	break;
1696	case CURLUPART_PASSWORD:
1697	storep = &u->password;
1698	break;
1699	case CURLUPART_OPTIONS:
1700	storep = &u->options;
1701	break;
1702	case CURLUPART_HOST:
1703	storep = &u->host;
1704	break;
1705	case CURLUPART_ZONEID:
1706	storep = &u->zoneid;
1707	break;
1708	case CURLUPART_PORT:
1709	u->portnum = 0;
1710	storep = &u->port;
1711	break;
1712	case CURLUPART_PATH:
1713	storep = &u->path;
1714	break;
1715	case CURLUPART_QUERY:
1716	storep = &u->query;
1717	break;
1718	case CURLUPART_FRAGMENT:
1719	storep = &u->fragment;
1720	break;
1721	default:
1722	return CURLUE_UNKNOWN_PART;
1723	}
1724	if(storep && *storep) {
1725	Curl_safefree(*storep);
1726	}
1727	else if(!storep) {
1728	free_urlhandle(u);
1729	memset(u, 0, sizeof(struct Curl_URL));
1730	}
1731	return CURLUE_OK;
1732	}
1733
1734	nalloc = strlen(part);
1735	if(nalloc > CURL_MAX_INPUT_LENGTH)
1736	/* excessive input length */
1737	return CURLUE_MALFORMED_INPUT;
1738
1739	switch(what) {
1740	case CURLUPART_SCHEME: {
1741	size_t plen = strlen(part);
1742	const char *s = part;
1743	if((plen > MAX_SCHEME_LEN) \|\| (plen < 1))
1744	/* too long or too short */
1745	return CURLUE_BAD_SCHEME;
1746	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1747	/* verify that it is a fine scheme */
1748	!Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
1749	return CURLUE_UNSUPPORTED_SCHEME;
1750	storep = &u->scheme;
1751	urlencode = FALSE; /* never */
1752	if(ISALPHA(*s)) {
1753	/* ALPHA ( ALPHA / DIGIT / "+" / "-" / "." ) /
1754	while(--plen) {
1755	if(ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.'))
1756	s++; /* fine */
1757	else
1758	return CURLUE_BAD_SCHEME;
1759	}
1760	}
1761	else
1762	return CURLUE_BAD_SCHEME;
1763	break;
1764	}
1765	case CURLUPART_USER:
1766	storep = &u->user;
1767	break;
1768	case CURLUPART_PASSWORD:
1769	storep = &u->password;
1770	break;
1771	case CURLUPART_OPTIONS:
1772	storep = &u->options;
1773	break;
1774	case CURLUPART_HOST:
1775	storep = &u->host;
1776	Curl_safefree(u->zoneid);
1777	break;
1778	case CURLUPART_ZONEID:
1779	storep = &u->zoneid;
1780	break;
1781	case CURLUPART_PORT:
1782	{
1783	char *endp;
1784	urlencode = FALSE; /* never */
1785	port = strtol(part, &endp, 10); /* Port number must be decimal */
1786	if((port <= 0) \|\| (port > 0xffff))
1787	return CURLUE_BAD_PORT_NUMBER;
1788	if(*endp)
1789	/* weirdly provided number, not good! */
1790	return CURLUE_BAD_PORT_NUMBER;
1791	storep = &u->port;
1792	}
1793	break;
1794	case CURLUPART_PATH:
1795	urlskipslash = TRUE;
1796	leadingslash = TRUE; /* enforce */
1797	storep = &u->path;
1798	break;
1799	case CURLUPART_QUERY:
1800	plusencode = urlencode;
1801	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1802	equalsencode = appendquery;
1803	storep = &u->query;
1804	break;
1805	case CURLUPART_FRAGMENT:
1806	storep = &u->fragment;
1807	break;
1808	case CURLUPART_URL: {
1809	/*
1810	* Allow a new URL to replace the existing (if any) contents.
1811	*
1812	* If the existing contents is enough for a URL, allow a relative URL to
1813	* replace it.
1814	*/
1815	CURLUcode result;
1816	char *oldurl;
1817	char *redired_url;
1818
1819	if(!nalloc)
1820	/* a blank URL is not a valid URL */
1821	return CURLUE_MALFORMED_INPUT;
1822
1823	/* if the new thing is absolute or the old one is not
1824	* (we could not get an absolute url in 'oldurl'),
1825	* then replace the existing with the new. */
1826	if(Curl_is_absolute_url(part, NULL, 0,
1827	flags & (CURLU_GUESS_SCHEME\|
1828	CURLU_DEFAULT_SCHEME))
1829	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1830	return parseurl_and_replace(part, u, flags);
1831	}
1832
1833	/* apply the relative part to create a new URL
1834	* and replace the existing one with it. */
1835	redired_url = concat_url(oldurl, part);
1836	free(oldurl);
1837	if(!redired_url)
1838	return CURLUE_OUT_OF_MEMORY;
1839
1840	result = parseurl_and_replace(redired_url, u, flags);
1841	free(redired_url);
1842	return result;
1843	}
1844	default:
1845	return CURLUE_UNKNOWN_PART;
1846	}
1847	DEBUGASSERT(storep);
1848	{
1849	const char *newp;
1850	struct dynbuf enc;
1851	Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1852
1853	if(leadingslash && (part[0] != '/')) {
1854	CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1855	if(result)
1856	return CURLUE_OUT_OF_MEMORY;
1857	}
1858	if(urlencode) {
1859	const unsigned char *i;
1860
1861	for(i = (const unsigned char )part; i; i++) {
1862	CURLcode result;
1863	if((*i == ' ') && plusencode) {
1864	result = Curl_dyn_addn(&enc, "+", 1);
1865	if(result)
1866	return CURLUE_OUT_OF_MEMORY;
1867	}
1868	else if(ISUNRESERVED(*i) \|\|
1869	((*i == '/') && urlskipslash) \|\|
1870	((*i == '=') && equalsencode)) {
1871	if((*i == '=') && equalsencode)
1872	/* only skip the first equals sign */
1873	equalsencode = FALSE;
1874	result = Curl_dyn_addn(&enc, i, 1);
1875	if(result)
1876	return CURLUE_OUT_OF_MEMORY;
1877	}
1878	else {
1879	char out[3]={'%'};
1880	out[1] = hexdigits[*i>>4];
1881	out[2] = hexdigits[*i & 0xf];
1882	result = Curl_dyn_addn(&enc, out, 3);
1883	if(result)
1884	return CURLUE_OUT_OF_MEMORY;
1885	}
1886	}
1887	}
1888	else {
1889	char *p;
1890	CURLcode result = Curl_dyn_add(&enc, part);
1891	if(result)
1892	return CURLUE_OUT_OF_MEMORY;
1893	p = Curl_dyn_ptr(&enc);
1894	while(*p) {
1895	/* make sure percent encoded are lower case */
1896	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1897	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1898	p[1] = Curl_raw_tolower(p[1]);
1899	p[2] = Curl_raw_tolower(p[2]);
1900	p += 3;
1901	}
1902	else
1903	p++;
1904	}
1905	}
1906	newp = Curl_dyn_ptr(&enc);
1907
1908	if(appendquery) {
1909	/* Append the 'newp' string onto the old query. Add a '&' separator if
1910	none is present at the end of the existing query already */
1911
1912	size_t querylen = u->query ? strlen(u->query) : 0;
1913	bool addamperand = querylen && (u->query[querylen -1] != '&');
1914	if(querylen) {
1915	struct dynbuf qbuf;
1916	Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1917
1918	if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1919	goto nomem;
1920
1921	if(addamperand) {
1922	if(Curl_dyn_addn(&qbuf, "&", 1))
1923	goto nomem;
1924	}
1925	if(Curl_dyn_add(&qbuf, newp))
1926	goto nomem;
1927	Curl_dyn_free(&enc);
1928	free(*storep);
1929	*storep = Curl_dyn_ptr(&qbuf);
1930	return CURLUE_OK;
1931	nomem:
1932	Curl_dyn_free(&enc);
1933	return CURLUE_OUT_OF_MEMORY;
1934	}
1935	}
1936
1937	if(what == CURLUPART_HOST) {
1938	size_t n = strlen(newp);
1939	if(!n && (flags & CURLU_NO_AUTHORITY)) {
1940	/* Skip hostname check, it's allowed to be empty. */
1941	}
1942	else {
1943	if(!n \|\| hostname_check(u, (char *)newp, n)) {
1944	Curl_dyn_free(&enc);
1945	return CURLUE_BAD_HOSTNAME;
1946	}
1947	}
1948	}
1949
1950	free(*storep);
1951	storep = (char )newp;
1952	}
1953	/* set after the string, to make it not assigned if the allocation above
1954	fails */
1955	if(port)
1956	u->portnum = port;
1957	return CURLUE_OK;
1958	}

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/curl-8.4.0/lib/urlapi.c@ 103621

以其他格式下載: