1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 | /* ***** BEGIN LICENSE BLOCK *****
3 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 | *
5 | * The contents of this file are subject to the Mozilla Public License Version
6 | * 1.1 (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | * http://www.mozilla.org/MPL/
9 | *
10 | * Software distributed under the License is distributed on an "AS IS" basis,
11 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 | * for the specific language governing rights and limitations under the
13 | * License.
14 | *
15 | * The Original Code is mozilla.org code.
16 | *
17 | * The Initial Developer of the Original Code is
18 | * Netscape Communications Corporation.
19 | * Portions created by the Initial Developer are Copyright (C) 1998
20 | * the Initial Developer. All Rights Reserved.
21 | *
22 | * Contributor(s):
23 | *
24 | * Alternatively, the contents of this file may be used under the terms of
25 | * either of the GNU General Public License Version 2 or later (the "GPL"),
26 | * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 | * in which case the provisions of the GPL or the LGPL are applicable instead
28 | * of those above. If you wish to allow use of your version of this file only
29 | * under the terms of either the GPL or the LGPL, and not to allow others to
30 | * use your version of this file under the terms of the MPL, indicate your
31 | * decision by deleting the provisions above and replace them with the notice
32 | * and other provisions required by the GPL or the LGPL. If you do not delete
33 | * the provisions above, a recipient may use your version of this file under
34 | * the terms of any one of the MPL, the GPL or the LGPL.
35 | *
36 | * ***** END LICENSE BLOCK ***** */
37 |
38 | // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
39 |
40 | #include "nsEscape.h"
41 | #include "nsMemory.h"
42 | #include "nsCRT.h"
43 | #include "nsReadableUtils.h"
44 |
45 | const int netCharType[256] =
46 | /* Bit 0 xalpha -- the alphas
47 | ** Bit 1 xpalpha -- as xalpha but
48 | ** converts spaces to plus and plus to %2B
49 | ** Bit 3 ... path -- as xalphas but doesn't escape '/'
50 | */
51 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
52 | { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
53 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
54 | 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
55 | 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
56 | 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
57 | /* bits for '@' changed from 7 to 0 so '@' can be escaped */
58 | /* in usernames and passwords in publishing. */
59 | 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
60 | 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
61 | 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
62 | 0, };
63 |
64 | /* decode % escaped hex codes into character values
65 | */
66 | #define UNHEX(C) \
67 | ((C >= '0' && C <= '9') ? C - '0' : \
68 | ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
69 | ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
70 |
71 |
72 | #define IS_OK(C) (netCharType[((unsigned int) (C))] & (mask))
73 | #define HEX_ESCAPE '%'
74 |
75 | //----------------------------------------------------------------------------------------
76 | static char* nsEscapeCount(
77 | const char * str,
78 | PRInt32 len,
79 | nsEscapeMask mask,
80 | PRInt32* out_len)
81 | //----------------------------------------------------------------------------------------
82 | {
83 | if (!str)
84 | return 0;
85 |
86 | int i, extra = 0;
87 | static const char hexChars[] = "0123456789ABCDEF";
88 |
89 | register const unsigned char* src = (const unsigned char *) str;
90 | for (i = 0; i < len; i++)
91 | {
92 | if (!IS_OK(*src++))
93 | extra += 2; /* the escape, plus an extra byte for each nibble */
94 | }
95 |
96 | char* result = (char *)nsMemory::Alloc(len + extra + 1);
97 | if (!result)
98 | return 0;
99 |
100 | register unsigned char* dst = (unsigned char *) result;
101 | src = (const unsigned char *) str;
102 | if (mask == url_XPAlphas)
103 | {
104 | for (i = 0; i < len; i++)
105 | {
106 | unsigned char c = *src++;
107 | if (IS_OK(c))
108 | *dst++ = c;
109 | else if (c == ' ')
110 | *dst++ = '+'; /* convert spaces to pluses */
111 | else
112 | {
113 | *dst++ = HEX_ESCAPE;
114 | *dst++ = hexChars[c >> 4]; /* high nibble */
115 | *dst++ = hexChars[c & 0x0f]; /* low nibble */
116 | }
117 | }
118 | }
119 | else
120 | {
121 | for (i = 0; i < len; i++)
122 | {
123 | unsigned char c = *src++;
124 | if (IS_OK(c))
125 | *dst++ = c;
126 | else
127 | {
128 | *dst++ = HEX_ESCAPE;
129 | *dst++ = hexChars[c >> 4]; /* high nibble */
130 | *dst++ = hexChars[c & 0x0f]; /* low nibble */
131 | }
132 | }
133 | }
134 |
135 | *dst = '\0'; /* tack on eos */
136 | if(out_len)
137 | *out_len = dst - (unsigned char *) result;
138 | return result;
139 | }
140 |
141 | //----------------------------------------------------------------------------------------
142 | NS_COM char* nsEscape(const char * str, nsEscapeMask mask)
143 | //----------------------------------------------------------------------------------------
144 | {
145 | if(!str)
146 | return NULL;
147 | return nsEscapeCount(str, (PRInt32)strlen(str), mask, NULL);
148 | }
149 |
150 | //----------------------------------------------------------------------------------------
151 | NS_COM char* nsUnescape(char * str)
152 | //----------------------------------------------------------------------------------------
153 | {
154 | nsUnescapeCount(str);
155 | return str;
156 | }
157 |
158 | //----------------------------------------------------------------------------------------
159 | NS_COM PRInt32 nsUnescapeCount(char * str)
160 | //----------------------------------------------------------------------------------------
161 | {
162 | register char *src = str;
163 | register char *dst = str;
164 | static const char hexChars[] = "0123456789ABCDEFabcdef";
165 |
166 | char c1[] = " ";
167 | char c2[] = " ";
168 | char* const pc1 = c1;
169 | char* const pc2 = c2;
170 |
171 | while (*src)
172 | {
173 | c1[0] = *(src+1);
174 | if (*(src+1) == '\0')
175 | c2[0] = '\0';
176 | else
177 | c2[0] = *(src+2);
178 |
179 | if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
180 | PL_strpbrk(pc2, hexChars) == 0 )
181 | *dst++ = *src++;
182 | else
183 | {
184 | src++; /* walk over escape */
185 | if (*src)
186 | {
187 | *dst = UNHEX(*src) << 4;
188 | src++;
189 | }
190 | if (*src)
191 | {
192 | *dst = (*dst + UNHEX(*src));
193 | src++;
194 | }
195 | dst++;
196 | }
197 | }
198 |
199 | *dst = 0;
200 | return (int)(dst - str);
201 |
202 | } /* NET_UnEscapeCnt */
203 |
204 |
205 | NS_COM char *
206 | nsEscapeHTML(const char * string)
207 | {
208 | /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
209 | char *rv = (char *) nsMemory::Alloc(strlen(string) * 6 + 1);
210 | char *ptr = rv;
211 |
212 | if(rv)
213 | {
214 | for(; *string != '\0'; string++)
215 | {
216 | if(*string == '<')
217 | {
218 | *ptr++ = '&';
219 | *ptr++ = 'l';
220 | *ptr++ = 't';
221 | *ptr++ = ';';
222 | }
223 | else if(*string == '>')
224 | {
225 | *ptr++ = '&';
226 | *ptr++ = 'g';
227 | *ptr++ = 't';
228 | *ptr++ = ';';
229 | }
230 | else if(*string == '&')
231 | {
232 | *ptr++ = '&';
233 | *ptr++ = 'a';
234 | *ptr++ = 'm';
235 | *ptr++ = 'p';
236 | *ptr++ = ';';
237 | }
238 | else if (*string == '"')
239 | {
240 | *ptr++ = '&';
241 | *ptr++ = 'q';
242 | *ptr++ = 'u';
243 | *ptr++ = 'o';
244 | *ptr++ = 't';
245 | *ptr++ = ';';
246 | }
247 | else if (*string == '\'')
248 | {
249 | *ptr++ = '&';
250 | *ptr++ = '#';
251 | *ptr++ = '3';
252 | *ptr++ = '9';
253 | *ptr++ = ';';
254 | }
255 | else
256 | {
257 | *ptr++ = *string;
258 | }
259 | }
260 | *ptr = '\0';
261 | }
262 |
263 | return(rv);
264 | }
265 |
266 | NS_COM PRUnichar *
267 | nsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)
268 | {
269 | // if the caller didn't calculate the length
270 | if (aSourceBufferLen == -1) {
271 | aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will
272 | }
273 |
274 | /* XXX Hardcoded max entity len. */
275 | PRUnichar *resultBuffer = (PRUnichar *)nsMemory::Alloc(aSourceBufferLen *
276 | 6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));
277 | PRUnichar *ptr = resultBuffer;
278 |
279 | if (resultBuffer) {
280 | PRInt32 i;
281 |
282 | for(i = 0; i < aSourceBufferLen; i++) {
283 | if(aSourceBuffer[i] == '<') {
284 | *ptr++ = '&';
285 | *ptr++ = 'l';
286 | *ptr++ = 't';
287 | *ptr++ = ';';
288 | } else if(aSourceBuffer[i] == '>') {
289 | *ptr++ = '&';
290 | *ptr++ = 'g';
291 | *ptr++ = 't';
292 | *ptr++ = ';';
293 | } else if(aSourceBuffer[i] == '&') {
294 | *ptr++ = '&';
295 | *ptr++ = 'a';
296 | *ptr++ = 'm';
297 | *ptr++ = 'p';
298 | *ptr++ = ';';
299 | } else if (aSourceBuffer[i] == '"') {
300 | *ptr++ = '&';
301 | *ptr++ = 'q';
302 | *ptr++ = 'u';
303 | *ptr++ = 'o';
304 | *ptr++ = 't';
305 | *ptr++ = ';';
306 | } else if (aSourceBuffer[i] == '\'') {
307 | *ptr++ = '&';
308 | *ptr++ = '#';
309 | *ptr++ = '3';
310 | *ptr++ = '9';
311 | *ptr++ = ';';
312 | } else {
313 | *ptr++ = aSourceBuffer[i];
314 | }
315 | }
316 | *ptr = 0;
317 | }
318 |
319 | return resultBuffer;
320 | }
321 |
322 | //----------------------------------------------------------------------------------------
323 |
324 | const int EscapeChars[256] =
325 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
326 | {
327 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
328 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
329 | 0,1023, 0, 512,1023, 0,1023,1023,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
330 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 912, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
331 | 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
332 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
333 | 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
334 | 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
335 | 0 /* 8x DEL */
336 | };
337 |
338 | #define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (mask))
339 |
340 | //----------------------------------------------------------------------------------------
341 |
342 | /* returns an escaped string */
343 |
344 | /* use the following masks to specify which
345 | part of an URL you want to escape:
346 |
347 | esc_Scheme = 1
348 | esc_Username = 2
349 | esc_Password = 4
350 | esc_Host = 8
351 | esc_Directory = 16
352 | esc_FileBaseName = 32
353 | esc_FileExtension = 64
354 | esc_Param = 128
355 | esc_Query = 256
356 | esc_Ref = 512
357 | */
358 |
359 | /* by default this function will not escape parts of a string
360 | that already look escaped, which means it already includes
361 | a valid hexcode. This is done to avoid multiple escapes of
362 | a string. Use the following mask to force escaping of a
363 | string:
364 |
365 | esc_Forced = 1024
366 | */
367 |
368 | NS_COM PRBool NS_EscapeURL(const char *part,
369 | PRInt32 partLen,
370 | PRInt16 mask,
371 | nsACString &result)
372 | {
373 | if (!part) {
374 | NS_NOTREACHED("null pointer");
375 | return PR_FALSE;
376 | }
377 |
378 | int i = 0;
379 | static const char hexChars[] = "0123456789ABCDEF";
380 | if (partLen < 0)
381 | partLen = strlen(part);
382 | PRBool forced = (mask & esc_Forced);
383 | PRBool ignoreNonAscii = (mask & esc_OnlyASCII);
384 | PRBool ignoreAscii = (mask & esc_OnlyNonASCII);
385 | PRBool writing = (mask & esc_AlwaysCopy);
386 | PRBool colon = (mask & esc_Colon);
387 |
388 | register const unsigned char* src = (const unsigned char *) part;
389 |
390 | char tempBuffer[100];
391 | unsigned int tempBufferPos = 0;
392 |
393 | for (i = 0; i < partLen; i++)
394 | {
395 | unsigned char c = *src++;
396 |
397 | // if the char has not to be escaped or whatever follows % is
398 | // a valid escaped string, just copy the char.
399 | //
400 | // Also the % will not be escaped until forced
401 | // See bugzilla bug 61269 for details why we changed this
402 | //
403 | // And, we will not escape non-ascii characters if requested.
404 | // On special request we will also escape the colon even when
405 | // not covered by the matrix.
406 | // ignoreAscii is not honored for control characters (C0 and DEL)
407 | if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
408 | || (c > 0x7f && ignoreNonAscii)
409 | || (c > 0x1f && c < 0x7f && ignoreAscii))
410 | && !(c == ':' && colon))
411 | {
412 | if (writing)
413 | tempBuffer[tempBufferPos++] = c;
414 | }
415 | else /* do the escape magic */
416 | {
417 | if (!writing)
418 | {
419 | result.Append(part, i);
420 | writing = PR_TRUE;
421 | }
422 | tempBuffer[tempBufferPos++] = HEX_ESCAPE;
423 | tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
424 | tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
425 | }
426 |
427 | if (tempBufferPos >= sizeof(tempBuffer) - 4)
428 | {
429 | NS_ASSERTION(writing, "should be writing");
430 | tempBuffer[tempBufferPos] = '\0';
431 | result += tempBuffer;
432 | tempBufferPos = 0;
433 | }
434 | }
435 | if (writing) {
436 | tempBuffer[tempBufferPos] = '\0';
437 | result += tempBuffer;
438 | }
439 | return writing;
440 | }
441 |
442 | #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
443 |
444 | NS_COM PRBool NS_UnescapeURL(const char *str, PRInt32 len, PRInt16 flags, nsACString &result)
445 | {
446 | if (!str) {
447 | NS_NOTREACHED("null pointer");
448 | return PR_FALSE;
449 | }
450 |
451 | if (len < 0)
452 | len = strlen(str);
453 |
454 | PRBool ignoreNonAscii = (flags & esc_OnlyASCII);
455 | PRBool writing = (flags & esc_AlwaysCopy);
456 | PRBool skipControl = (flags & esc_SkipControl);
457 |
458 | static const char hexChars[] = "0123456789ABCDEFabcdef";
459 |
460 | const char *last = str;
461 | const char *p = str;
462 |
463 | for (int i=0; i<len; ++i, ++p) {
464 | //printf("%c [i=%d of len=%d]\n", *p, i, len);
465 | if (*p == HEX_ESCAPE && i < len-2) {
466 | unsigned char *p1 = ((unsigned char *) p) + 1;
467 | unsigned char *p2 = ((unsigned char *) p) + 2;
468 | if (ISHEX(*p1) && ISHEX(*p2) && !(ignoreNonAscii && *p1 >= '8') &&
469 | !(skipControl &&
470 | (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
471 | //printf("- p1=%c p2=%c\n", *p1, *p2);
472 | writing = PR_TRUE;
473 | if (p > last) {
474 | //printf("- p=%p, last=%p\n", p, last);
475 | result.Append(last, p - last);
476 | last = p;
477 | }
478 | char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
479 | //printf("- u=%c\n", u);
480 | result.Append(u);
481 | i += 2;
482 | p += 2;
483 | last += 3;
484 | }
485 | }
486 | }
487 | if (writing && last < str + len)
488 | result.Append(last, str + len - last);
489 |
490 | return writing;
491 | }