urlapi.c@ 106165

最後變更在這個檔案從106165是 104083,由 vboxsync 提交於 8 月前
curl-8.7.1: Applied and adjusted our curl changes to 8.4.0. bugref:10639
屬性 svn:eol-style 設為 `native`
檔案大小: 52.8 KB

行
1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "url.h"
31	#include "escape.h"
32	#include "curl_ctype.h"
33	#include "inet_pton.h"
34	#include "inet_ntop.h"
35	#include "strdup.h"
36	#include "idn.h"
37	#include "curl_memrchr.h"
38
39	/* The last 3 #include files should be in this order */
40	#include "curl_printf.h"
41	#include "curl_memory.h"
42	#include "memdebug.h"
43
44	/* MSDOS/Windows style drive prefix, eg c: in c:foo */
45	#define STARTS_WITH_DRIVE_PREFIX(str) \
46	((('a' <= str[0] && str[0] <= 'z') \|\| \
47	('A' <= str[0] && str[0] <= 'Z')) && \
48	(str[1] == ':'))
49
50	/* MSDOS/Windows style drive prefix, optionally with
51	* a '\|' instead of ':', followed by a slash or NUL */
52	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
54	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55	((str)[1] == ':' \|\| (str)[1] == '\|') && \
56	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
57
58	/* scheme is not URL encoded, the longest libcurl supported ones are... */
59	#define MAX_SCHEME_LEN 40
60
61	/*
62	* If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63	* sure we have _some_ value for AF_INET6 without polluting our fake value
64	* everywhere.
65	*/
66	#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67	#define AF_INET6 (AF_INET + 1)
68	#endif
69
70	/* Internal representation of CURLU. Point to URL-encoded strings. */
71	struct Curl_URL {
72	char *scheme;
73	char *user;
74	char *password;
75	char options; / IMAP only? */
76	char *host;
77	char zoneid; / for numerical IPv6 addresses */
78	char *port;
79	char *path;
80	char *query;
81	char *fragment;
82	long portnum; /* the numerical version */
83	};
84
85	#define DEFAULT_SCHEME "https"
86
87	static void free_urlhandle(struct Curl_URL *u)
88	{
89	free(u->scheme);
90	free(u->user);
91	free(u->password);
92	free(u->options);
93	free(u->host);
94	free(u->zoneid);
95	free(u->port);
96	free(u->path);
97	free(u->query);
98	free(u->fragment);
99	}
100
101	/*
102	* Find the separator at the end of the host name, or the '?' in cases like
103	* http://www.example.com?id=2380
104	*/
105	static const char find_host_sep(const char url)
106	{
107	const char *sep;
108	const char *query;
109
110	/* Find the start of the hostname */
111	sep = strstr(url, "//");
112	if(!sep)
113	sep = url;
114	else
115	sep += 2;
116
117	query = strchr(sep, '?');
118	sep = strchr(sep, '/');
119
120	if(!sep)
121	sep = url + strlen(url);
122
123	if(!query)
124	query = url + strlen(url);
125
126	return sep < query ? sep : query;
127	}
128
129	/* convert CURLcode to CURLUcode */
130	#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \
131	CURLUE_OUT_OF_MEMORY)
132	/*
133	* Decide whether a character in a URL must be escaped.
134	*/
135	#define urlchar_needs_escaping(c) (!(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c)))
136
137	static const char hexdigits[] = "0123456789abcdef";
138	/* urlencode_str() writes data into an output dynbuf and URL-encodes the
139	* spaces in the source URL accordingly.
140	*
141	* URL encoding should be skipped for host names, otherwise IDN resolution
142	* will fail.
143	*/
144	static CURLUcode urlencode_str(struct dynbuf o, const char url,
145	size_t len, bool relative,
146	bool query)
147	{
148	/* we must add this with whitespace-replacing */
149	bool left = !query;
150	const unsigned char *iptr;
151	const unsigned char host_sep = (const unsigned char ) url;
152	CURLcode result;
153
154	if(!relative)
155	host_sep = (const unsigned char *) find_host_sep(url);
156
157	for(iptr = (unsigned char )url; / read from here */
158	len; iptr++, len--) {
159
160	if(iptr < host_sep) {
161	result = Curl_dyn_addn(o, iptr, 1);
162	if(result)
163	return cc2cu(result);
164	continue;
165	}
166
167	if(*iptr == ' ') {
168	if(left)
169	result = Curl_dyn_addn(o, "%20", 3);
170	else
171	result = Curl_dyn_addn(o, "+", 1);
172	if(result)
173	return cc2cu(result);
174	continue;
175	}
176
177	if(*iptr == '?')
178	left = FALSE;
179
180	if(urlchar_needs_escaping(*iptr)) {
181	char out[3]={'%'};
182	out[1] = hexdigits[*iptr>>4];
183	out[2] = hexdigits[*iptr & 0xf];
184	result = Curl_dyn_addn(o, out, 3);
185	}
186	else
187	result = Curl_dyn_addn(o, iptr, 1);
188	if(result)
189	return cc2cu(result);
190	}
191
192	return CURLUE_OK;
193	}
194
195	/*
196	* Returns the length of the scheme if the given URL is absolute (as opposed
197	* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
198	* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
199	*
200	* If 'guess_scheme' is TRUE, it means the URL might be provided without
201	* scheme.
202	*/
203	size_t Curl_is_absolute_url(const char url, char buf, size_t buflen,
204	bool guess_scheme)
205	{
206	int i = 0;
207	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
208	(void)buflen; /* only used in debug-builds */
209	if(buf)
210	buf[0] = 0; /* always leave a defined value in buf */
211	#ifdef _WIN32
212	if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
213	return 0;
214	#endif
215	if(ISALPHA(url[0]))
216	for(i = 1; i < MAX_SCHEME_LEN; ++i) {
217	char s = url[i];
218	if(s && (ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.') )) {
219	/* RFC 3986 3.1 explains:
220	scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221	*/
222	}
223	else {
224	break;
225	}
226	}
227	if(i && (url[i] == ':') && ((url[i + 1] == '/') \|\| !guess_scheme)) {
228	/* If this does not guess scheme, the scheme always ends with the colon so
229	that this also detects data: URLs etc. In guessing mode, data: could
230	be the host name "data" with a specified port number. */
231
232	/* the length of the scheme is the name part only */
233	size_t len = i;
234	if(buf) {
235	buf[i] = 0;
236	while(i--) {
237	buf[i] = Curl_raw_tolower(url[i]);
238	}
239	}
240	return len;
241	}
242	return 0;
243	}
244
245	/*
246	* Concatenate a relative URL to a base URL making it absolute.
247	* URL-encodes any spaces.
248	* The returned pointer must be freed by the caller unless NULL
249	* (returns NULL on out of memory).
250	*
251	* Note that this function destroys the 'base' string.
252	*/
253	static CURLcode concat_url(char base, const char relurl, char **newurl)
254	{
255	/***
256	TRY to append this new path to the old URL
257	to the right of the host part. Oh crap, this is doomed to cause
258	problems in the future...
259	*/
260	struct dynbuf newest;
261	char *protsep;
262	char *pathsep;
263	bool host_changed = FALSE;
264	const char *useurl = relurl;
265	CURLcode result = CURLE_OK;
266	CURLUcode uc;
267	*newurl = NULL;
268
269	/* protsep points to the start of the host name */
270	protsep = strstr(base, "//");
271	if(!protsep)
272	protsep = base;
273	else
274	protsep += 2; /* pass the slashes */
275
276	if('/' != relurl[0]) {
277	int level = 0;
278
279	/* First we need to find out if there's a ?-letter in the URL,
280	and cut it and the right-side of that off */
281	pathsep = strchr(protsep, '?');
282	if(pathsep)
283	*pathsep = 0;
284
285	/* we have a relative path to append to the last slash if there's one
286	available, or if the new URL is just a query string (starts with a
287	'?') we append the new one at the end of the entire currently worked
288	out URL */
289	if(useurl[0] != '?') {
290	pathsep = strrchr(protsep, '/');
291	if(pathsep)
292	*pathsep = 0;
293	}
294
295	/* Check if there's any slash after the host name, and if so, remember
296	that position instead */
297	pathsep = strchr(protsep, '/');
298	if(pathsep)
299	protsep = pathsep + 1;
300	else
301	protsep = NULL;
302
303	/* now deal with one "./" or any amount of "../" in the newurl
304	and act accordingly */
305
306	if((useurl[0] == '.') && (useurl[1] == '/'))
307	useurl += 2; /* just skip the "./" */
308
309	while((useurl[0] == '.') &&
310	(useurl[1] == '.') &&
311	(useurl[2] == '/')) {
312	level++;
313	useurl += 3; /* pass the "../" */
314	}
315
316	if(protsep) {
317	while(level--) {
318	/* cut off one more level from the right of the original URL */
319	pathsep = strrchr(protsep, '/');
320	if(pathsep)
321	*pathsep = 0;
322	else {
323	*protsep = 0;
324	break;
325	}
326	}
327	}
328	}
329	else {
330	/* We got a new absolute path for this server */
331
332	if(relurl[1] == '/') {
333	/* the new URL starts with //, just keep the protocol part from the
334	original one */
335	*protsep = 0;
336	useurl = &relurl[2]; /* we keep the slashes from the original, so we
337	skip the new ones */
338	host_changed = TRUE;
339	}
340	else {
341	/* cut off the original URL from the first slash, or deal with URLs
342	without slash */
343	pathsep = strchr(protsep, '/');
344	if(pathsep) {
345	/* When people use badly formatted URLs, such as
346	"http://www.example.com?dir=/home/daniel" we must not use the first
347	slash, if there's a ?-letter before it! */
348	char *sep = strchr(protsep, '?');
349	if(sep && (sep < pathsep))
350	pathsep = sep;
351	*pathsep = 0;
352	}
353	else {
354	/* There was no slash. Now, since we might be operating on a badly
355	formatted URL, such as "http://www.example.com?id=2380" which
356	doesn't use a slash separator as it is supposed to, we need to check
357	for a ?-letter as well! */
358	pathsep = strchr(protsep, '?');
359	if(pathsep)
360	*pathsep = 0;
361	}
362	}
363	}
364
365	Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
366
367	/* copy over the root url part */
368	result = Curl_dyn_add(&newest, base);
369	if(result)
370	return result;
371
372	/* check if we need to append a slash */
373	if(('/' == useurl[0]) \|\| (protsep && !*protsep) \|\| ('?' == useurl[0]))
374	;
375	else {
376	result = Curl_dyn_addn(&newest, "/", 1);
377	if(result)
378	return result;
379	}
380
381	/* then append the new piece on the right side */
382	uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
383	FALSE);
384	if(uc)
385	return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
386
387	*newurl = Curl_dyn_ptr(&newest);
388	return CURLE_OK;
389	}
390
391	/* scan for byte values <= 31, 127 and sometimes space */
392	static CURLUcode junkscan(const char url, size_t urllen, unsigned int flags)
393	{
394	static const char badbytes[]={
395	/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399	0x7f, 0x00 /* null-terminate */
400	};
401	size_t n = strlen(url);
402	size_t nfine;
403
404	if(n > CURL_MAX_INPUT_LENGTH)
405	/* excessive input length */
406	return CURLUE_MALFORMED_INPUT;
407
408	nfine = strcspn(url, badbytes);
409	if((nfine != n) \|\|
410	(!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
411	return CURLUE_MALFORMED_INPUT;
412
413	*urllen = n;
414	return CURLUE_OK;
415	}
416
417	/*
418	* parse_hostname_login()
419	*
420	* Parse the login details (user name, password and options) from the URL and
421	* strip them out of the host name
422	*
423	*/
424	static CURLUcode parse_hostname_login(struct Curl_URL *u,
425	const char *login,
426	size_t len,
427	unsigned int flags,
428	size_t offset) / to the host name */
429	{
430	CURLUcode result = CURLUE_OK;
431	CURLcode ccode;
432	char *userp = NULL;
433	char *passwdp = NULL;
434	char *optionsp = NULL;
435	const struct Curl_handler *h = NULL;
436
437	/* At this point, we assume all the other special cases have been taken
438	* care of, so the host is at most
439	*
440	* [user[:password][;options]]@]hostname
441	*
442	* We need somewhere to put the embedded details, so do that first.
443	*/
444	char *ptr;
445
446	DEBUGASSERT(login);
447
448	*offset = 0;
449	ptr = memchr(login, '@', len);
450	if(!ptr)
451	goto out;
452
453	/* We will now try to extract the
454	* possible login information in a string like:
455	* ftp://user:[email protected]:8021/README */
456	ptr++;
457
458	/* if this is a known scheme, get some details */
459	if(u->scheme)
460	h = Curl_get_scheme_handler(u->scheme);
461
462	/* We could use the login information in the URL so extract it. Only parse
463	options if the handler says we should. Note that 'h' might be NULL! */
464	ccode = Curl_parse_login_details(login, ptr - login - 1,
465	&userp, &passwdp,
466	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
467	&optionsp:NULL);
468	if(ccode) {
469	result = CURLUE_BAD_LOGIN;
470	goto out;
471	}
472
473	if(userp) {
474	if(flags & CURLU_DISALLOW_USER) {
475	/* Option DISALLOW_USER is set and url contains username. */
476	result = CURLUE_USER_NOT_ALLOWED;
477	goto out;
478	}
479	free(u->user);
480	u->user = userp;
481	}
482
483	if(passwdp) {
484	free(u->password);
485	u->password = passwdp;
486	}
487
488	if(optionsp) {
489	free(u->options);
490	u->options = optionsp;
491	}
492
493	/* the host name starts at this offset */
494	*offset = ptr - login;
495	return CURLUE_OK;
496
497	out:
498
499	free(userp);
500	free(passwdp);
501	free(optionsp);
502	u->user = NULL;
503	u->password = NULL;
504	u->options = NULL;
505
506	return result;
507	}
508
509	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, struct dynbuf host,
510	bool has_scheme)
511	{
512	char *portptr;
513	char *hostname = Curl_dyn_ptr(host);
514	/*
515	* Find the end of an IPv6 address on the ']' ending bracket.
516	*/
517	if(hostname[0] == '[') {
518	portptr = strchr(hostname, ']');
519	if(!portptr)
520	return CURLUE_BAD_IPV6;
521	portptr++;
522	/* this is a RFC2732-style specified IP-address */
523	if(*portptr) {
524	if(*portptr != ':')
525	return CURLUE_BAD_PORT_NUMBER;
526	}
527	else
528	portptr = NULL;
529	}
530	else
531	portptr = strchr(hostname, ':');
532
533	if(portptr) {
534	char *rest = NULL;
535	long port;
536	size_t keep = portptr - hostname;
537
538	/* Browser behavior adaptation. If there's a colon with no digits after,
539	just cut off the name there which makes us ignore the colon and just
540	use the default port. Firefox, Chrome and Safari all do that.
541
542	Don't do it if the URL has no scheme, to make something that looks like
543	a scheme not work!
544	*/
545	Curl_dyn_setlen(host, keep);
546	portptr++;
547	if(!*portptr)
548	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549
550	if(!ISDIGIT(*portptr))
551	return CURLUE_BAD_PORT_NUMBER;
552
553	port = strtol(portptr, &rest, 10); /* Port number must be decimal */
554
555	if(port > 0xffff)
556	return CURLUE_BAD_PORT_NUMBER;
557
558	if(rest[0])
559	return CURLUE_BAD_PORT_NUMBER;
560
561	u->portnum = port;
562	/* generate a new port number string to get rid of leading zeroes etc */
563	free(u->port);
564	u->port = aprintf("%ld", port);
565	if(!u->port)
566	return CURLUE_OUT_OF_MEMORY;
567	}
568
569	return CURLUE_OK;
570	}
571
572	/* this assumes 'hostname' now starts with [ */
573	static CURLUcode ipv6_parse(struct Curl_URL u, char hostname,
574	size_t hlen) /* length of hostname */
575	{
576	size_t len;
577	DEBUGASSERT(*hostname == '[');
578	if(hlen < 4) /* '[::]' is the shortest possible valid string */
579	return CURLUE_BAD_IPV6;
580	hostname++;
581	hlen -= 2;
582
583	/* only valid IPv6 letters are ok */
584	len = strspn(hostname, "0123456789abcdefABCDEF:.");
585
586	if(hlen != len) {
587	hlen = len;
588	if(hostname[len] == '%') {
589	/* this could now be '%[zone id]' */
590	char zoneid[16];
591	int i = 0;
592	char *h = &hostname[len + 1];
593	/* pass '25' if present and is a url encoded percent sign */
594	if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595	h += 2;
596	while(h && (h != ']') && (i < 15))
597	zoneid[i++] = *h++;
598	if(!i \|\| (']' != *h))
599	return CURLUE_BAD_IPV6;
600	zoneid[i] = 0;
601	u->zoneid = strdup(zoneid);
602	if(!u->zoneid)
603	return CURLUE_OUT_OF_MEMORY;
604	hostname[len] = ']'; /* insert end bracket */
605	hostname[len + 1] = 0; /* terminate the hostname */
606	}
607	else
608	return CURLUE_BAD_IPV6;
609	/* hostname is fine */
610	}
611
612	/* Check the IPv6 address. */
613	{
614	char dest[16]; /* fits a binary IPv6 address */
615	char norm[MAX_IPADR_LEN];
616	hostname[hlen] = 0; /* end the address there */
617	if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618	return CURLUE_BAD_IPV6;
619
620	/* check if it can be done shorter */
621	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622	(strlen(norm) < hlen)) {
623	strcpy(hostname, norm);
624	hlen = strlen(norm);
625	hostname[hlen + 1] = 0;
626	}
627	hostname[hlen] = ']'; /* restore ending bracket */
628	}
629	return CURLUE_OK;
630	}
631
632	static CURLUcode hostname_check(struct Curl_URL u, char hostname,
633	size_t hlen) /* length of hostname */
634	{
635	size_t len;
636	DEBUGASSERT(hostname);
637
638	if(!hlen)
639	return CURLUE_NO_HOST;
640	else if(hostname[0] == '[')
641	return ipv6_parse(u, hostname, hlen);
642	else {
643	/* letters from the second string are not ok */
644	len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
645	if(hlen != len)
646	/* hostname with bad content */
647	return CURLUE_BAD_HOSTNAME;
648	}
649	return CURLUE_OK;
650	}
651
652	/*
653	* Handle partial IPv4 numerical addresses and different bases, like
654	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
655	*
656	* If the given input string is syntactically wrong IPv4 or any part for
657	* example is too big, this function returns HOST_NAME.
658	*
659	* Output the "normalized" version of that input string in plain quad decimal
660	* integers.
661	*
662	* Returns the host type.
663	*/
664
665	#define HOST_ERROR -1 /* out of memory */
666	#define HOST_BAD -2 /* bad IPv4 address */
667
668	#define HOST_NAME 1
669	#define HOST_IPV4 2
670	#define HOST_IPV6 3
671
672	static int ipv4_normalize(struct dynbuf *host)
673	{
674	bool done = FALSE;
675	int n = 0;
676	const char *c = Curl_dyn_ptr(host);
677	unsigned long parts[4] = {0, 0, 0, 0};
678	CURLcode result = CURLE_OK;
679
680	if(*c == '[')
681	return HOST_IPV6;
682
683	while(!done) {
684	char *endp = NULL;
685	unsigned long l;
686	if(!ISDIGIT(*c))
687	/* most importantly this doesn't allow a leading plus or minus */
688	return HOST_NAME;
689	l = strtoul(c, &endp, 0);
690
691	parts[n] = l;
692	c = endp;
693
694	switch(*c) {
695	case '.':
696	if(n == 3)
697	return HOST_NAME;
698	n++;
699	c++;
700	break;
701
702	case '\0':
703	done = TRUE;
704	break;
705
706	default:
707	return HOST_NAME;
708	}
709
710	/* overflow */
711	if((l == ULONG_MAX) && (errno == ERANGE))
712	return HOST_NAME;
713
714	#if SIZEOF_LONG > 4
715	/* a value larger than 32 bits */
716	if(l > UINT_MAX)
717	return HOST_NAME;
718	#endif
719	}
720
721	switch(n) {
722	case 0: /* a -- 32 bits */
723	Curl_dyn_reset(host);
724
725	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
726	(unsigned int)(parts[0] >> 24),
727	(unsigned int)((parts[0] >> 16) & 0xff),
728	(unsigned int)((parts[0] >> 8) & 0xff),
729	(unsigned int)(parts[0] & 0xff));
730	break;
731	case 1: /* a.b -- 8.24 bits */
732	if((parts[0] > 0xff) \|\| (parts[1] > 0xffffff))
733	return HOST_NAME;
734	Curl_dyn_reset(host);
735	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
736	(unsigned int)(parts[0]),
737	(unsigned int)((parts[1] >> 16) & 0xff),
738	(unsigned int)((parts[1] >> 8) & 0xff),
739	(unsigned int)(parts[1] & 0xff));
740	break;
741	case 2: /* a.b.c -- 8.8.16 bits */
742	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xffff))
743	return HOST_NAME;
744	Curl_dyn_reset(host);
745	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
746	(unsigned int)(parts[0]),
747	(unsigned int)(parts[1]),
748	(unsigned int)((parts[2] >> 8) & 0xff),
749	(unsigned int)(parts[2] & 0xff));
750	break;
751	case 3: /* a.b.c.d -- 8.8.8.8 bits */
752	if((parts[0] > 0xff) \|\| (parts[1] > 0xff) \|\| (parts[2] > 0xff) \|\|
753	(parts[3] > 0xff))
754	return HOST_NAME;
755	Curl_dyn_reset(host);
756	result = Curl_dyn_addf(host, "%u.%u.%u.%u",
757	(unsigned int)(parts[0]),
758	(unsigned int)(parts[1]),
759	(unsigned int)(parts[2]),
760	(unsigned int)(parts[3]));
761	break;
762	}
763	if(result)
764	return HOST_ERROR;
765	return HOST_IPV4;
766	}
767
768	/* if necessary, replace the host content with a URL decoded version */
769	static CURLUcode urldecode_host(struct dynbuf *host)
770	{
771	char *per = NULL;
772	const char *hostname = Curl_dyn_ptr(host);
773	per = strchr(hostname, '%');
774	if(!per)
775	/* nothing to decode */
776	return CURLUE_OK;
777	else {
778	/* encoded */
779	size_t dlen;
780	char *decoded;
781	CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
782	REJECT_CTRL);
783	if(result)
784	return CURLUE_BAD_HOSTNAME;
785	Curl_dyn_reset(host);
786	result = Curl_dyn_addn(host, decoded, dlen);
787	free(decoded);
788	if(result)
789	return cc2cu(result);
790	}
791
792	return CURLUE_OK;
793	}
794
795	static CURLUcode parse_authority(struct Curl_URL *u,
796	const char *auth, size_t authlen,
797	unsigned int flags,
798	struct dynbuf *host,
799	bool has_scheme)
800	{
801	size_t offset;
802	CURLUcode uc;
803	CURLcode result;
804
805	/*
806	* Parse the login details and strip them out of the host name.
807	*/
808	uc = parse_hostname_login(u, auth, authlen, flags, &offset);
809	if(uc)
810	goto out;
811
812	result = Curl_dyn_addn(host, auth + offset, authlen - offset);
813	if(result) {
814	uc = cc2cu(result);
815	goto out;
816	}
817
818	uc = Curl_parse_port(u, host, has_scheme);
819	if(uc)
820	goto out;
821
822	if(!Curl_dyn_len(host))
823	return CURLUE_NO_HOST;
824
825	switch(ipv4_normalize(host)) {
826	case HOST_IPV4:
827	break;
828	case HOST_IPV6:
829	uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
830	break;
831	case HOST_NAME:
832	uc = urldecode_host(host);
833	if(!uc)
834	uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
835	break;
836	case HOST_ERROR:
837	uc = CURLUE_OUT_OF_MEMORY;
838	break;
839	case HOST_BAD:
840	default:
841	uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
842	break;
843	}
844
845	out:
846	return uc;
847	}
848
849	CURLUcode Curl_url_set_authority(CURLU u, const char authority,
850	unsigned int flags)
851	{
852	CURLUcode result;
853	struct dynbuf host;
854
855	DEBUGASSERT(authority);
856	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
857
858	result = parse_authority(u, authority, strlen(authority), flags,
859	&host, !!u->scheme);
860	if(result)
861	Curl_dyn_free(&host);
862	else {
863	free(u->host);
864	u->host = Curl_dyn_ptr(&host);
865	}
866	return result;
867	}
868
869	/*
870	* "Remove Dot Segments"
871	* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
872	*/
873
874	/*
875	* dedotdotify()
876	* @unittest: 1395
877	*
878	* This function gets a null-terminated path with dot and dotdot sequences
879	* passed in and strips them off according to the rules in RFC 3986 section
880	* 5.2.4.
881	*
882	* The function handles a query part ('?' + stuff) appended but it expects
883	* that fragments ('#' + stuff) have already been cut off.
884	*
885	* RETURNS
886	*
887	* Zero for success and 'out' set to an allocated dedotdotified string.
888	*/
889	UNITTEST int dedotdotify(const char input, size_t clen, char *outp);
890	UNITTEST int dedotdotify(const char input, size_t clen, char *outp)
891	{
892	char *outptr;
893	const char *endp = &input[clen];
894	char *out;
895
896	*outp = NULL;
897	/* the path always starts with a slash, and a slash has not dot */
898	if((clen < 2) \|\| !memchr(input, '.', clen))
899	return 0;
900
901	out = malloc(clen + 1);
902	if(!out)
903	return 1; /* out of memory */
904
905	out = 0; / null-terminates, for inputs like "./" */
906	outptr = out;
907
908	do {
909	bool dotdot = TRUE;
910	if(*input == '.') {
911	/* A. If the input buffer begins with a prefix of "../" or "./", then
912	remove that prefix from the input buffer; otherwise, */
913
914	if(!strncmp("./", input, 2)) {
915	input += 2;
916	clen -= 2;
917	}
918	else if(!strncmp("../", input, 3)) {
919	input += 3;
920	clen -= 3;
921	}
922	/* D. if the input buffer consists only of "." or "..", then remove
923	that from the input buffer; otherwise, */
924
925	else if(!strcmp(".", input) \|\| !strcmp("..", input) \|\|
926	!strncmp(".?", input, 2) \|\| !strncmp("..?", input, 3)) {
927	*out = 0;
928	break;
929	}
930	else
931	dotdot = FALSE;
932	}
933	else if(*input == '/') {
934	/* B. if the input buffer begins with a prefix of "/./" or "/.", where
935	"." is a complete path segment, then replace that prefix with "/" in
936	the input buffer; otherwise, */
937	if(!strncmp("/./", input, 3)) {
938	input += 2;
939	clen -= 2;
940	}
941	else if(!strcmp("/.", input) \|\| !strncmp("/.?", input, 3)) {
942	*outptr++ = '/';
943	*outptr = 0;
944	break;
945	}
946
947	/* C. if the input buffer begins with a prefix of "/../" or "/..",
948	where ".." is a complete path segment, then replace that prefix with
949	"/" in the input buffer and remove the last segment and its
950	preceding "/" (if any) from the output buffer; otherwise, */
951
952	else if(!strncmp("/../", input, 4)) {
953	input += 3;
954	clen -= 3;
955	/* remove the last segment from the output buffer */
956	while(outptr > out) {
957	outptr--;
958	if(*outptr == '/')
959	break;
960	}
961	outptr = 0; / null-terminate where it stops */
962	}
963	else if(!strcmp("/..", input) \|\| !strncmp("/..?", input, 4)) {
964	/* remove the last segment from the output buffer */
965	while(outptr > out) {
966	outptr--;
967	if(*outptr == '/')
968	break;
969	}
970	*outptr++ = '/';
971	outptr = 0; / null-terminate where it stops */
972	break;
973	}
974	else
975	dotdot = FALSE;
976	}
977	else
978	dotdot = FALSE;
979
980	if(!dotdot) {
981	/* E. move the first path segment in the input buffer to the end of
982	the output buffer, including the initial "/" character (if any) and
983	any subsequent characters up to, but not including, the next "/"
984	character or the end of the input buffer. */
985
986	do {
987	outptr++ = input++;
988	clen--;
989	} while(input && (input != '/') && (*input != '?'));
990	*outptr = 0;
991	}
992
993	/* continue until end of path */
994	} while(input < endp);
995
996	*outp = out;
997	return 0; /* success */
998	}
999
1000	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
1001	{
1002	const char *path;
1003	size_t pathlen;
1004	char *query = NULL;
1005	char *fragment = NULL;
1006	char schemebuf[MAX_SCHEME_LEN + 1];
1007	size_t schemelen = 0;
1008	size_t urllen;
1009	CURLUcode result = CURLUE_OK;
1010	size_t fraglen = 0;
1011	struct dynbuf host;
1012
1013	DEBUGASSERT(url);
1014
1015	Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1016
1017	result = junkscan(url, &urllen, flags);
1018	if(result)
1019	goto fail;
1020
1021	schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1022	flags & (CURLU_GUESS_SCHEME\|
1023	CURLU_DEFAULT_SCHEME));
1024
1025	/* handle the file: scheme */
1026	if(schemelen && !strcmp(schemebuf, "file")) {
1027	bool uncpath = FALSE;
1028	if(urllen <= 6) {
1029	/* file:/ is not enough to actually be a complete file: URL */
1030	result = CURLUE_BAD_FILE_URL;
1031	goto fail;
1032	}
1033
1034	/* path has been allocated large enough to hold this */
1035	path = (char *)&url[5];
1036	pathlen = urllen - 5;
1037
1038	u->scheme = strdup("file");
1039	if(!u->scheme) {
1040	result = CURLUE_OUT_OF_MEMORY;
1041	goto fail;
1042	}
1043
1044	/* Extra handling URLs with an authority component (i.e. that start with
1045	* "file://")
1046	*
1047	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
1048	* RFC 8089, but not the (current) WHAT-WG URL spec.
1049	*/
1050	if(path[0] == '/' && path[1] == '/') {
1051	/* swallow the two slashes */
1052	const char *ptr = &path[2];
1053
1054	/*
1055	* According to RFC 8089, a file: URL can be reliably dereferenced if:
1056	*
1057	* o it has no/blank hostname, or
1058	*
1059	* o the hostname matches "localhost" (case-insensitively), or
1060	*
1061	* o the hostname is a FQDN that resolves to this machine, or
1062	*
1063	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
1064	* Appendix E.3).
1065	*
1066	* For brevity, we only consider URLs with empty, "localhost", or
1067	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
1068	*
1069	* Additionally, there is an exception for URLs with a Windows drive
1070	* letter in the authority (which was accidentally omitted from RFC 8089
1071	* Appendix E, but believe me, it was meant to be there. --MK)
1072	*/
1073	if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1074	/* the URL includes a host name, it must match "localhost" or
1075	"127.0.0.1" to be valid */
1076	if(checkprefix("localhost/", ptr) \|\|
1077	checkprefix("127.0.0.1/", ptr)) {
1078	ptr += 9; /* now points to the slash after the host */
1079	}
1080	else {
1081	#if defined(_WIN32)
1082	size_t len;
1083
1084	/* the host name, NetBIOS computer name, can not contain disallowed
1085	chars, and the delimiting slash character must be appended to the
1086	host name */
1087	path = strpbrk(ptr, "/\\:*?\"<>\|");
1088	if(!path \|\| *path != '/') {
1089	result = CURLUE_BAD_FILE_URL;
1090	goto fail;
1091	}
1092
1093	len = path - ptr;
1094	if(len) {
1095	CURLcode code = Curl_dyn_addn(&host, ptr, len);
1096	if(code) {
1097	result = cc2cu(code);
1098	goto fail;
1099	}
1100	uncpath = TRUE;
1101	}
1102
1103	ptr -= 2; /* now points to the // before the host in UNC */
1104	#else
1105	/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1106	none */
1107	result = CURLUE_BAD_FILE_URL;
1108	goto fail;
1109	#endif
1110	}
1111	}
1112
1113	path = ptr;
1114	pathlen = urllen - (ptr - url);
1115	}
1116
1117	if(!uncpath)
1118	/* no host for file: URLs by default */
1119	Curl_dyn_reset(&host);
1120
1121	#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1122	/* Don't allow Windows drive letters when not in Windows.
1123	* This catches both "file:/c:" and "file:c:" */
1124	if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) \|\|
1125	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1126	/* File drive letters are only accepted in MSDOS/Windows */
1127	result = CURLUE_BAD_FILE_URL;
1128	goto fail;
1129	}
1130	#else
1131	/* If the path starts with a slash and a drive letter, ditch the slash */
1132	if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1133	/* This cannot be done with strcpy, as the memory chunks overlap! */
1134	path++;
1135	pathlen--;
1136	}
1137	#endif
1138
1139	}
1140	else {
1141	/* clear path */
1142	const char *schemep = NULL;
1143	const char *hostp;
1144	size_t hostlen;
1145
1146	if(schemelen) {
1147	int i = 0;
1148	const char *p = &url[schemelen + 1];
1149	while((*p == '/') && (i < 4)) {
1150	p++;
1151	i++;
1152	}
1153
1154	schemep = schemebuf;
1155	if(!Curl_get_scheme_handler(schemep) &&
1156	!(flags & CURLU_NON_SUPPORT_SCHEME)) {
1157	result = CURLUE_UNSUPPORTED_SCHEME;
1158	goto fail;
1159	}
1160
1161	if((i < 1) \|\| (i > 3)) {
1162	/* less than one or more than three slashes */
1163	result = CURLUE_BAD_SLASHES;
1164	goto fail;
1165	}
1166	hostp = p; /* host name starts here */
1167	}
1168	else {
1169	/* no scheme! */
1170
1171	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME))) {
1172	result = CURLUE_BAD_SCHEME;
1173	goto fail;
1174	}
1175	if(flags & CURLU_DEFAULT_SCHEME)
1176	schemep = DEFAULT_SCHEME;
1177
1178	/*
1179	* The URL was badly formatted, let's try without scheme specified.
1180	*/
1181	hostp = url;
1182	}
1183
1184	if(schemep) {
1185	u->scheme = strdup(schemep);
1186	if(!u->scheme) {
1187	result = CURLUE_OUT_OF_MEMORY;
1188	goto fail;
1189	}
1190	}
1191
1192	/* find the end of the host name + port number */
1193	hostlen = strcspn(hostp, "/?#");
1194	path = &hostp[hostlen];
1195
1196	/* this pathlen also contains the query and the fragment */
1197	pathlen = urllen - (path - url);
1198	if(hostlen) {
1199
1200	result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1201	if(result)
1202	goto fail;
1203
1204	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1205	const char *hostname = Curl_dyn_ptr(&host);
1206	/* legacy curl-style guess based on host name */
1207	if(checkprefix("ftp.", hostname))
1208	schemep = "ftp";
1209	else if(checkprefix("dict.", hostname))
1210	schemep = "dict";
1211	else if(checkprefix("ldap.", hostname))
1212	schemep = "ldap";
1213	else if(checkprefix("imap.", hostname))
1214	schemep = "imap";
1215	else if(checkprefix("smtp.", hostname))
1216	schemep = "smtp";
1217	else if(checkprefix("pop3.", hostname))
1218	schemep = "pop3";
1219	else
1220	schemep = "http";
1221
1222	u->scheme = strdup(schemep);
1223	if(!u->scheme) {
1224	result = CURLUE_OUT_OF_MEMORY;
1225	goto fail;
1226	}
1227	}
1228	}
1229	else if(flags & CURLU_NO_AUTHORITY) {
1230	/* allowed to be empty. */
1231	if(Curl_dyn_add(&host, "")) {
1232	result = CURLUE_OUT_OF_MEMORY;
1233	goto fail;
1234	}
1235	}
1236	else {
1237	result = CURLUE_NO_HOST;
1238	goto fail;
1239	}
1240	}
1241
1242	fragment = strchr(path, '#');
1243	if(fragment) {
1244	fraglen = pathlen - (fragment - path);
1245	if(fraglen > 1) {
1246	/* skip the leading '#' in the copy but include the terminating null */
1247	if(flags & CURLU_URLENCODE) {
1248	struct dynbuf enc;
1249	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250	result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251	if(result)
1252	goto fail;
1253	u->fragment = Curl_dyn_ptr(&enc);
1254	}
1255	else {
1256	u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257	if(!u->fragment) {
1258	result = CURLUE_OUT_OF_MEMORY;
1259	goto fail;
1260	}
1261	}
1262	}
1263	/* after this, pathlen still contains the query */
1264	pathlen -= fraglen;
1265	}
1266
1267	query = memchr(path, '?', pathlen);
1268	if(query) {
1269	size_t qlen = fragment ? (size_t)(fragment - query) :
1270	pathlen - (query - path);
1271	pathlen -= qlen;
1272	if(qlen > 1) {
1273	if(flags & CURLU_URLENCODE) {
1274	struct dynbuf enc;
1275	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276	/* skip the leading question mark */
1277	result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278	if(result)
1279	goto fail;
1280	u->query = Curl_dyn_ptr(&enc);
1281	}
1282	else {
1283	u->query = Curl_memdup0(query + 1, qlen - 1);
1284	if(!u->query) {
1285	result = CURLUE_OUT_OF_MEMORY;
1286	goto fail;
1287	}
1288	}
1289	}
1290	else {
1291	/* single byte query */
1292	u->query = strdup("");
1293	if(!u->query) {
1294	result = CURLUE_OUT_OF_MEMORY;
1295	goto fail;
1296	}
1297	}
1298	}
1299
1300	if(pathlen && (flags & CURLU_URLENCODE)) {
1301	struct dynbuf enc;
1302	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303	result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304	if(result)
1305	goto fail;
1306	pathlen = Curl_dyn_len(&enc);
1307	path = u->path = Curl_dyn_ptr(&enc);
1308	}
1309
1310	if(pathlen <= 1) {
1311	/* there is no path left or just the slash, unset */
1312	path = NULL;
1313	}
1314	else {
1315	if(!u->path) {
1316	u->path = Curl_memdup0(path, pathlen);
1317	if(!u->path) {
1318	result = CURLUE_OUT_OF_MEMORY;
1319	goto fail;
1320	}
1321	path = u->path;
1322	}
1323	else if(flags & CURLU_URLENCODE)
1324	/* it might have encoded more than just the path so cut it */
1325	u->path[pathlen] = 0;
1326
1327	if(!(flags & CURLU_PATH_AS_IS)) {
1328	/* remove ../ and ./ sequences according to RFC3986 */
1329	char *dedot;
1330	int err = dedotdotify((char *)path, pathlen, &dedot);
1331	if(err) {
1332	result = CURLUE_OUT_OF_MEMORY;
1333	goto fail;
1334	}
1335	if(dedot) {
1336	free(u->path);
1337	u->path = dedot;
1338	}
1339	}
1340	}
1341
1342	u->host = Curl_dyn_ptr(&host);
1343
1344	return result;
1345	fail:
1346	Curl_dyn_free(&host);
1347	free_urlhandle(u);
1348	return result;
1349	}
1350
1351	/*
1352	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353	*/
1354	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1355	unsigned int flags)
1356	{
1357	CURLUcode result;
1358	CURLU tmpurl;
1359	memset(&tmpurl, 0, sizeof(tmpurl));
1360	result = parseurl(url, &tmpurl, flags);
1361	if(!result) {
1362	free_urlhandle(u);
1363	*u = tmpurl;
1364	}
1365	return result;
1366	}
1367
1368	/*
1369	*/
1370	CURLU *curl_url(void)
1371	{
1372	return calloc(1, sizeof(struct Curl_URL));
1373	}
1374
1375	void curl_url_cleanup(CURLU *u)
1376	{
1377	if(u) {
1378	free_urlhandle(u);
1379	free(u);
1380	}
1381	}
1382
1383	#define DUP(dest, src, name) \
1384	do { \
1385	if(src->name) { \
1386	dest->name = strdup(src->name); \
1387	if(!dest->name) \
1388	goto fail; \
1389	} \
1390	} while(0)
1391
1392	CURLU curl_url_dup(const CURLU in)
1393	{
1394	struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395	if(u) {
1396	DUP(u, in, scheme);
1397	DUP(u, in, user);
1398	DUP(u, in, password);
1399	DUP(u, in, options);
1400	DUP(u, in, host);
1401	DUP(u, in, port);
1402	DUP(u, in, path);
1403	DUP(u, in, query);
1404	DUP(u, in, fragment);
1405	DUP(u, in, zoneid);
1406	u->portnum = in->portnum;
1407	}
1408	return u;
1409	fail:
1410	curl_url_cleanup(u);
1411	return NULL;
1412	}
1413
1414	CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1415	char **part, unsigned int flags)
1416	{
1417	const char *ptr;
1418	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1419	char portbuf[7];
1420	bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1421	bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1422	bool punycode = FALSE;
1423	bool depunyfy = FALSE;
1424	bool plusdecode = FALSE;
1425	(void)flags;
1426	if(!u)
1427	return CURLUE_BAD_HANDLE;
1428	if(!part)
1429	return CURLUE_BAD_PARTPOINTER;
1430	*part = NULL;
1431
1432	switch(what) {
1433	case CURLUPART_SCHEME:
1434	ptr = u->scheme;
1435	ifmissing = CURLUE_NO_SCHEME;
1436	urldecode = FALSE; /* never for schemes */
1437	break;
1438	case CURLUPART_USER:
1439	ptr = u->user;
1440	ifmissing = CURLUE_NO_USER;
1441	break;
1442	case CURLUPART_PASSWORD:
1443	ptr = u->password;
1444	ifmissing = CURLUE_NO_PASSWORD;
1445	break;
1446	case CURLUPART_OPTIONS:
1447	ptr = u->options;
1448	ifmissing = CURLUE_NO_OPTIONS;
1449	break;
1450	case CURLUPART_HOST:
1451	ptr = u->host;
1452	ifmissing = CURLUE_NO_HOST;
1453	punycode = (flags & CURLU_PUNYCODE)?1:0;
1454	depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1455	break;
1456	case CURLUPART_ZONEID:
1457	ptr = u->zoneid;
1458	ifmissing = CURLUE_NO_ZONEID;
1459	break;
1460	case CURLUPART_PORT:
1461	ptr = u->port;
1462	ifmissing = CURLUE_NO_PORT;
1463	urldecode = FALSE; /* never for port */
1464	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1465	/* there's no stored port number, but asked to deliver
1466	a default one for the scheme */
1467	const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1468	if(h) {
1469	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1470	ptr = portbuf;
1471	}
1472	}
1473	else if(ptr && u->scheme) {
1474	/* there is a stored port number, but ask to inhibit if
1475	it matches the default one for the scheme */
1476	const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1477	if(h && (h->defport == u->portnum) &&
1478	(flags & CURLU_NO_DEFAULT_PORT))
1479	ptr = NULL;
1480	}
1481	break;
1482	case CURLUPART_PATH:
1483	ptr = u->path;
1484	if(!ptr)
1485	ptr = "/";
1486	break;
1487	case CURLUPART_QUERY:
1488	ptr = u->query;
1489	ifmissing = CURLUE_NO_QUERY;
1490	plusdecode = urldecode;
1491	break;
1492	case CURLUPART_FRAGMENT:
1493	ptr = u->fragment;
1494	ifmissing = CURLUE_NO_FRAGMENT;
1495	break;
1496	case CURLUPART_URL: {
1497	char *url;
1498	char *scheme;
1499	char *options = u->options;
1500	char *port = u->port;
1501	char *allochost = NULL;
1502	punycode = (flags & CURLU_PUNYCODE)?1:0;
1503	depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1504	if(u->scheme && strcasecompare("file", u->scheme)) {
1505	url = aprintf("file://%s%s%s",
1506	u->path,
1507	u->fragment? "#": "",
1508	u->fragment? u->fragment : "");
1509	}
1510	else if(!u->host)
1511	return CURLUE_NO_HOST;
1512	else {
1513	const struct Curl_handler *h = NULL;
1514	if(u->scheme)
1515	scheme = u->scheme;
1516	else if(flags & CURLU_DEFAULT_SCHEME)
1517	scheme = (char *) DEFAULT_SCHEME;
1518	else
1519	return CURLUE_NO_SCHEME;
1520
1521	h = Curl_get_scheme_handler(scheme);
1522	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1523	/* there's no stored port number, but asked to deliver
1524	a default one for the scheme */
1525	if(h) {
1526	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1527	port = portbuf;
1528	}
1529	}
1530	else if(port) {
1531	/* there is a stored port number, but asked to inhibit if it matches
1532	the default one for the scheme */
1533	if(h && (h->defport == u->portnum) &&
1534	(flags & CURLU_NO_DEFAULT_PORT))
1535	port = NULL;
1536	}
1537
1538	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1539	options = NULL;
1540
1541	if(u->host[0] == '[') {
1542	if(u->zoneid) {
1543	/* make it '[ host %25 zoneid ]' */
1544	struct dynbuf enc;
1545	size_t hostlen = strlen(u->host);
1546	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1547	if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1548	u->zoneid))
1549	return CURLUE_OUT_OF_MEMORY;
1550	allochost = Curl_dyn_ptr(&enc);
1551	}
1552	}
1553	else if(urlencode) {
1554	allochost = curl_easy_escape(NULL, u->host, 0);
1555	if(!allochost)
1556	return CURLUE_OUT_OF_MEMORY;
1557	}
1558	else if(punycode) {
1559	if(!Curl_is_ASCII_name(u->host)) {
1560	#ifndef USE_IDN
1561	return CURLUE_LACKS_IDN;
1562	#else
1563	CURLcode result = Curl_idn_decode(u->host, &allochost);
1564	if(result)
1565	return (result == CURLE_OUT_OF_MEMORY) ?
1566	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1567	#endif
1568	}
1569	}
1570	else if(depunyfy) {
1571	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1572	#ifndef USE_IDN
1573	return CURLUE_LACKS_IDN;
1574	#else
1575	CURLcode result = Curl_idn_encode(u->host, &allochost);
1576	if(result)
1577	/* this is the most likely error */
1578	return (result == CURLE_OUT_OF_MEMORY) ?
1579	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1580	#endif
1581	}
1582	}
1583
1584	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1585	scheme,
1586	u->user ? u->user : "",
1587	u->password ? ":": "",
1588	u->password ? u->password : "",
1589	options ? ";" : "",
1590	options ? options : "",
1591	(u->user \|\| u->password \|\| options) ? "@": "",
1592	allochost ? allochost : u->host,
1593	port ? ":": "",
1594	port ? port : "",
1595	u->path ? u->path : "/",
1596	(u->query && u->query[0]) ? "?": "",
1597	(u->query && u->query[0]) ? u->query : "",
1598	u->fragment? "#": "",
1599	u->fragment? u->fragment : "");
1600	free(allochost);
1601	}
1602	if(!url)
1603	return CURLUE_OUT_OF_MEMORY;
1604	*part = url;
1605	return CURLUE_OK;
1606	}
1607	default:
1608	ptr = NULL;
1609	break;
1610	}
1611	if(ptr) {
1612	size_t partlen = strlen(ptr);
1613	size_t i = 0;
1614	*part = Curl_memdup0(ptr, partlen);
1615	if(!*part)
1616	return CURLUE_OUT_OF_MEMORY;
1617	if(plusdecode) {
1618	/* convert + to space */
1619	char plus = part;
1620	for(i = 0; i < partlen; ++plus, i++) {
1621	if(*plus == '+')
1622	*plus = ' ';
1623	}
1624	}
1625	if(urldecode) {
1626	char *decoded;
1627	size_t dlen;
1628	/* this unconditional rejection of control bytes is documented
1629	API behavior */
1630	CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1631	free(*part);
1632	if(res) {
1633	*part = NULL;
1634	return CURLUE_URLDECODE;
1635	}
1636	*part = decoded;
1637	partlen = dlen;
1638	}
1639	if(urlencode) {
1640	struct dynbuf enc;
1641	CURLUcode uc;
1642	Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1643	uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1644	if(uc)
1645	return uc;
1646	free(*part);
1647	*part = Curl_dyn_ptr(&enc);
1648	}
1649	else if(punycode) {
1650	if(!Curl_is_ASCII_name(u->host)) {
1651	#ifndef USE_IDN
1652	return CURLUE_LACKS_IDN;
1653	#else
1654	char *allochost;
1655	CURLcode result = Curl_idn_decode(*part, &allochost);
1656	if(result)
1657	return (result == CURLE_OUT_OF_MEMORY) ?
1658	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1659	free(*part);
1660	*part = allochost;
1661	#endif
1662	}
1663	}
1664	else if(depunyfy) {
1665	if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1666	#ifndef USE_IDN
1667	return CURLUE_LACKS_IDN;
1668	#else
1669	char *allochost;
1670	CURLcode result = Curl_idn_encode(*part, &allochost);
1671	if(result)
1672	return (result == CURLE_OUT_OF_MEMORY) ?
1673	CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1674	free(*part);
1675	*part = allochost;
1676	#endif
1677	}
1678	}
1679
1680	return CURLUE_OK;
1681	}
1682	else
1683	return ifmissing;
1684	}
1685
1686	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1687	const char *part, unsigned int flags)
1688	{
1689	char **storep = NULL;
1690	long port = 0;
1691	bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1692	bool plusencode = FALSE;
1693	bool urlskipslash = FALSE;
1694	bool leadingslash = FALSE;
1695	bool appendquery = FALSE;
1696	bool equalsencode = FALSE;
1697	size_t nalloc;
1698
1699	if(!u)
1700	return CURLUE_BAD_HANDLE;
1701	if(!part) {
1702	/* setting a part to NULL clears it */
1703	switch(what) {
1704	case CURLUPART_URL:
1705	break;
1706	case CURLUPART_SCHEME:
1707	storep = &u->scheme;
1708	break;
1709	case CURLUPART_USER:
1710	storep = &u->user;
1711	break;
1712	case CURLUPART_PASSWORD:
1713	storep = &u->password;
1714	break;
1715	case CURLUPART_OPTIONS:
1716	storep = &u->options;
1717	break;
1718	case CURLUPART_HOST:
1719	storep = &u->host;
1720	break;
1721	case CURLUPART_ZONEID:
1722	storep = &u->zoneid;
1723	break;
1724	case CURLUPART_PORT:
1725	u->portnum = 0;
1726	storep = &u->port;
1727	break;
1728	case CURLUPART_PATH:
1729	storep = &u->path;
1730	break;
1731	case CURLUPART_QUERY:
1732	storep = &u->query;
1733	break;
1734	case CURLUPART_FRAGMENT:
1735	storep = &u->fragment;
1736	break;
1737	default:
1738	return CURLUE_UNKNOWN_PART;
1739	}
1740	if(storep && *storep) {
1741	Curl_safefree(*storep);
1742	}
1743	else if(!storep) {
1744	free_urlhandle(u);
1745	memset(u, 0, sizeof(struct Curl_URL));
1746	}
1747	return CURLUE_OK;
1748	}
1749
1750	nalloc = strlen(part);
1751	if(nalloc > CURL_MAX_INPUT_LENGTH)
1752	/* excessive input length */
1753	return CURLUE_MALFORMED_INPUT;
1754
1755	switch(what) {
1756	case CURLUPART_SCHEME: {
1757	size_t plen = strlen(part);
1758	const char *s = part;
1759	if((plen > MAX_SCHEME_LEN) \|\| (plen < 1))
1760	/* too long or too short */
1761	return CURLUE_BAD_SCHEME;
1762	/* verify that it is a fine scheme */
1763	if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1764	return CURLUE_UNSUPPORTED_SCHEME;
1765	storep = &u->scheme;
1766	urlencode = FALSE; /* never */
1767	if(ISALPHA(*s)) {
1768	/* ALPHA ( ALPHA / DIGIT / "+" / "-" / "." ) /
1769	while(--plen) {
1770	if(ISALNUM(s) \|\| (s == '+') \|\| (s == '-') \|\| (s == '.'))
1771	s++; /* fine */
1772	else
1773	return CURLUE_BAD_SCHEME;
1774	}
1775	}
1776	else
1777	return CURLUE_BAD_SCHEME;
1778	break;
1779	}
1780	case CURLUPART_USER:
1781	storep = &u->user;
1782	break;
1783	case CURLUPART_PASSWORD:
1784	storep = &u->password;
1785	break;
1786	case CURLUPART_OPTIONS:
1787	storep = &u->options;
1788	break;
1789	case CURLUPART_HOST:
1790	storep = &u->host;
1791	Curl_safefree(u->zoneid);
1792	break;
1793	case CURLUPART_ZONEID:
1794	storep = &u->zoneid;
1795	break;
1796	case CURLUPART_PORT:
1797	{
1798	char *endp;
1799	urlencode = FALSE; /* never */
1800	port = strtol(part, &endp, 10); /* Port number must be decimal */
1801	if((port <= 0) \|\| (port > 0xffff))
1802	return CURLUE_BAD_PORT_NUMBER;
1803	if(*endp)
1804	/* weirdly provided number, not good! */
1805	return CURLUE_BAD_PORT_NUMBER;
1806	storep = &u->port;
1807	}
1808	break;
1809	case CURLUPART_PATH:
1810	urlskipslash = TRUE;
1811	leadingslash = TRUE; /* enforce */
1812	storep = &u->path;
1813	break;
1814	case CURLUPART_QUERY:
1815	plusencode = urlencode;
1816	appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1817	equalsencode = appendquery;
1818	storep = &u->query;
1819	break;
1820	case CURLUPART_FRAGMENT:
1821	storep = &u->fragment;
1822	break;
1823	case CURLUPART_URL: {
1824	/*
1825	* Allow a new URL to replace the existing (if any) contents.
1826	*
1827	* If the existing contents is enough for a URL, allow a relative URL to
1828	* replace it.
1829	*/
1830	CURLcode result;
1831	CURLUcode uc;
1832	char *oldurl;
1833	char *redired_url;
1834
1835	if(!nalloc)
1836	/* a blank URL is not a valid URL */
1837	return CURLUE_MALFORMED_INPUT;
1838
1839	/* if the new thing is absolute or the old one is not
1840	* (we could not get an absolute url in 'oldurl'),
1841	* then replace the existing with the new. */
1842	if(Curl_is_absolute_url(part, NULL, 0,
1843	flags & (CURLU_GUESS_SCHEME\|
1844	CURLU_DEFAULT_SCHEME))
1845	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1846	return parseurl_and_replace(part, u, flags);
1847	}
1848
1849	/* apply the relative part to create a new URL
1850	* and replace the existing one with it. */
1851	result = concat_url(oldurl, part, &redired_url);
1852	free(oldurl);
1853	if(result)
1854	return cc2cu(result);
1855
1856	uc = parseurl_and_replace(redired_url, u, flags);
1857	free(redired_url);
1858	return uc;
1859	}
1860	default:
1861	return CURLUE_UNKNOWN_PART;
1862	}
1863	DEBUGASSERT(storep);
1864	{
1865	const char *newp;
1866	struct dynbuf enc;
1867	Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1868
1869	if(leadingslash && (part[0] != '/')) {
1870	CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1871	if(result)
1872	return cc2cu(result);
1873	}
1874	if(urlencode) {
1875	const unsigned char *i;
1876
1877	for(i = (const unsigned char )part; i; i++) {
1878	CURLcode result;
1879	if((*i == ' ') && plusencode) {
1880	result = Curl_dyn_addn(&enc, "+", 1);
1881	if(result)
1882	return CURLUE_OUT_OF_MEMORY;
1883	}
1884	else if(ISUNRESERVED(*i) \|\|
1885	((*i == '/') && urlskipslash) \|\|
1886	((*i == '=') && equalsencode)) {
1887	if((*i == '=') && equalsencode)
1888	/* only skip the first equals sign */
1889	equalsencode = FALSE;
1890	result = Curl_dyn_addn(&enc, i, 1);
1891	if(result)
1892	return cc2cu(result);
1893	}
1894	else {
1895	char out[3]={'%'};
1896	out[1] = hexdigits[*i>>4];
1897	out[2] = hexdigits[*i & 0xf];
1898	result = Curl_dyn_addn(&enc, out, 3);
1899	if(result)
1900	return cc2cu(result);
1901	}
1902	}
1903	}
1904	else {
1905	char *p;
1906	CURLcode result = Curl_dyn_add(&enc, part);
1907	if(result)
1908	return cc2cu(result);
1909	p = Curl_dyn_ptr(&enc);
1910	while(*p) {
1911	/* make sure percent encoded are lower case */
1912	if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1913	(ISUPPER(p[1]) \|\| ISUPPER(p[2]))) {
1914	p[1] = Curl_raw_tolower(p[1]);
1915	p[2] = Curl_raw_tolower(p[2]);
1916	p += 3;
1917	}
1918	else
1919	p++;
1920	}
1921	}
1922	newp = Curl_dyn_ptr(&enc);
1923
1924	if(appendquery && newp) {
1925	/* Append the 'newp' string onto the old query. Add a '&' separator if
1926	none is present at the end of the existing query already */
1927
1928	size_t querylen = u->query ? strlen(u->query) : 0;
1929	bool addamperand = querylen && (u->query[querylen -1] != '&');
1930	if(querylen) {
1931	struct dynbuf qbuf;
1932	Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1933
1934	if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1935	goto nomem;
1936
1937	if(addamperand) {
1938	if(Curl_dyn_addn(&qbuf, "&", 1))
1939	goto nomem;
1940	}
1941	if(Curl_dyn_add(&qbuf, newp))
1942	goto nomem;
1943	Curl_dyn_free(&enc);
1944	free(*storep);
1945	*storep = Curl_dyn_ptr(&qbuf);
1946	return CURLUE_OK;
1947	nomem:
1948	Curl_dyn_free(&enc);
1949	return CURLUE_OUT_OF_MEMORY;
1950	}
1951	}
1952
1953	else if(what == CURLUPART_HOST) {
1954	size_t n = Curl_dyn_len(&enc);
1955	if(!n && (flags & CURLU_NO_AUTHORITY)) {
1956	/* Skip hostname check, it's allowed to be empty. */
1957	}
1958	else {
1959	if(!n \|\| hostname_check(u, (char *)newp, n)) {
1960	Curl_dyn_free(&enc);
1961	return CURLUE_BAD_HOSTNAME;
1962	}
1963	}
1964	}
1965
1966	free(*storep);
1967	storep = (char )newp;
1968	}
1969	/* set after the string, to make it not assigned if the allocation above
1970	fails */
1971	if(port)
1972	u->portnum = port;
1973	return CURLUE_OK;
1974	}

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/curl-8.7.1/lib/urlapi.c@ 106165

以其他格式下載: