1 | /*
|
---|
2 | * string.c : an XML string utilities module
|
---|
3 | *
|
---|
4 | * This module provides various utility functions for manipulating
|
---|
5 | * the xmlChar* type. All functions named xmlStr* have been moved here
|
---|
6 | * from the parser.c file (their original home).
|
---|
7 | *
|
---|
8 | * See Copyright for the status of this software.
|
---|
9 | *
|
---|
10 | * UTF8 string routines from:
|
---|
11 | * William Brack <[email protected]>
|
---|
12 | *
|
---|
13 | * [email protected]
|
---|
14 | */
|
---|
15 |
|
---|
16 | #define IN_LIBXML
|
---|
17 | #include "libxml.h"
|
---|
18 |
|
---|
19 | #include <stdlib.h>
|
---|
20 | #include <string.h>
|
---|
21 | #include <limits.h>
|
---|
22 | #include <libxml/xmlmemory.h>
|
---|
23 | #include <libxml/parserInternals.h>
|
---|
24 | #include <libxml/xmlstring.h>
|
---|
25 |
|
---|
26 | #include "private/parser.h"
|
---|
27 | #include "private/string.h"
|
---|
28 |
|
---|
29 | /************************************************************************
|
---|
30 | * *
|
---|
31 | * Commodity functions to handle xmlChars *
|
---|
32 | * *
|
---|
33 | ************************************************************************/
|
---|
34 |
|
---|
35 | /**
|
---|
36 | * xmlStrndup:
|
---|
37 | * @cur: the input xmlChar *
|
---|
38 | * @len: the len of @cur
|
---|
39 | *
|
---|
40 | * a strndup for array of xmlChar's
|
---|
41 | *
|
---|
42 | * Returns a new xmlChar * or NULL
|
---|
43 | */
|
---|
44 | xmlChar *
|
---|
45 | xmlStrndup(const xmlChar *cur, int len) {
|
---|
46 | xmlChar *ret;
|
---|
47 |
|
---|
48 | if ((cur == NULL) || (len < 0)) return(NULL);
|
---|
49 | ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
|
---|
50 | if (ret == NULL) {
|
---|
51 | return(NULL);
|
---|
52 | }
|
---|
53 | memcpy(ret, cur, len);
|
---|
54 | ret[len] = 0;
|
---|
55 | return(ret);
|
---|
56 | }
|
---|
57 |
|
---|
58 | /**
|
---|
59 | * xmlStrdup:
|
---|
60 | * @cur: the input xmlChar *
|
---|
61 | *
|
---|
62 | * a strdup for array of xmlChar's. Since they are supposed to be
|
---|
63 | * encoded in UTF-8 or an encoding with 8bit based chars, we assume
|
---|
64 | * a termination mark of '0'.
|
---|
65 | *
|
---|
66 | * Returns a new xmlChar * or NULL
|
---|
67 | */
|
---|
68 | xmlChar *
|
---|
69 | xmlStrdup(const xmlChar *cur) {
|
---|
70 | const xmlChar *p = cur;
|
---|
71 |
|
---|
72 | if (cur == NULL) return(NULL);
|
---|
73 | while (*p != 0) p++; /* non input consuming */
|
---|
74 | return(xmlStrndup(cur, p - cur));
|
---|
75 | }
|
---|
76 |
|
---|
77 | /**
|
---|
78 | * xmlCharStrndup:
|
---|
79 | * @cur: the input char *
|
---|
80 | * @len: the len of @cur
|
---|
81 | *
|
---|
82 | * a strndup for char's to xmlChar's
|
---|
83 | *
|
---|
84 | * Returns a new xmlChar * or NULL
|
---|
85 | */
|
---|
86 |
|
---|
87 | xmlChar *
|
---|
88 | xmlCharStrndup(const char *cur, int len) {
|
---|
89 | int i;
|
---|
90 | xmlChar *ret;
|
---|
91 |
|
---|
92 | if ((cur == NULL) || (len < 0)) return(NULL);
|
---|
93 | ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);
|
---|
94 | if (ret == NULL) {
|
---|
95 | return(NULL);
|
---|
96 | }
|
---|
97 | for (i = 0;i < len;i++) {
|
---|
98 | /* Explicit sign change */
|
---|
99 | ret[i] = (xmlChar) cur[i];
|
---|
100 | if (ret[i] == 0) return(ret);
|
---|
101 | }
|
---|
102 | ret[len] = 0;
|
---|
103 | return(ret);
|
---|
104 | }
|
---|
105 |
|
---|
106 | /**
|
---|
107 | * xmlCharStrdup:
|
---|
108 | * @cur: the input char *
|
---|
109 | *
|
---|
110 | * a strdup for char's to xmlChar's
|
---|
111 | *
|
---|
112 | * Returns a new xmlChar * or NULL
|
---|
113 | */
|
---|
114 |
|
---|
115 | xmlChar *
|
---|
116 | xmlCharStrdup(const char *cur) {
|
---|
117 | const char *p = cur;
|
---|
118 |
|
---|
119 | if (cur == NULL) return(NULL);
|
---|
120 | while (*p != '\0') p++; /* non input consuming */
|
---|
121 | return(xmlCharStrndup(cur, p - cur));
|
---|
122 | }
|
---|
123 |
|
---|
124 | /**
|
---|
125 | * xmlStrcmp:
|
---|
126 | * @str1: the first xmlChar *
|
---|
127 | * @str2: the second xmlChar *
|
---|
128 | *
|
---|
129 | * a strcmp for xmlChar's
|
---|
130 | *
|
---|
131 | * Returns the integer result of the comparison
|
---|
132 | */
|
---|
133 |
|
---|
134 | int
|
---|
135 | xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
|
---|
136 | if (str1 == str2) return(0);
|
---|
137 | if (str1 == NULL) return(-1);
|
---|
138 | if (str2 == NULL) return(1);
|
---|
139 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
140 | return(strcmp((const char *)str1, (const char *)str2));
|
---|
141 | #else
|
---|
142 | do {
|
---|
143 | int tmp = *str1++ - *str2;
|
---|
144 | if (tmp != 0) return(tmp);
|
---|
145 | } while (*str2++ != 0);
|
---|
146 | return 0;
|
---|
147 | #endif
|
---|
148 | }
|
---|
149 |
|
---|
150 | /**
|
---|
151 | * xmlStrEqual:
|
---|
152 | * @str1: the first xmlChar *
|
---|
153 | * @str2: the second xmlChar *
|
---|
154 | *
|
---|
155 | * Check if both strings are equal of have same content.
|
---|
156 | * Should be a bit more readable and faster than xmlStrcmp()
|
---|
157 | *
|
---|
158 | * Returns 1 if they are equal, 0 if they are different
|
---|
159 | */
|
---|
160 |
|
---|
161 | int
|
---|
162 | xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
|
---|
163 | if (str1 == str2) return(1);
|
---|
164 | if (str1 == NULL) return(0);
|
---|
165 | if (str2 == NULL) return(0);
|
---|
166 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
167 | return(strcmp((const char *)str1, (const char *)str2) == 0);
|
---|
168 | #else
|
---|
169 | do {
|
---|
170 | if (*str1++ != *str2) return(0);
|
---|
171 | } while (*str2++);
|
---|
172 | return(1);
|
---|
173 | #endif
|
---|
174 | }
|
---|
175 |
|
---|
176 | /**
|
---|
177 | * xmlStrQEqual:
|
---|
178 | * @pref: the prefix of the QName
|
---|
179 | * @name: the localname of the QName
|
---|
180 | * @str: the second xmlChar *
|
---|
181 | *
|
---|
182 | * Check if a QName is Equal to a given string
|
---|
183 | *
|
---|
184 | * Returns 1 if they are equal, 0 if they are different
|
---|
185 | */
|
---|
186 |
|
---|
187 | int
|
---|
188 | xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
|
---|
189 | if (pref == NULL) return(xmlStrEqual(name, str));
|
---|
190 | if (name == NULL) return(0);
|
---|
191 | if (str == NULL) return(0);
|
---|
192 |
|
---|
193 | do {
|
---|
194 | if (*pref++ != *str) return(0);
|
---|
195 | } while ((*str++) && (*pref));
|
---|
196 | if (*str++ != ':') return(0);
|
---|
197 | do {
|
---|
198 | if (*name++ != *str) return(0);
|
---|
199 | } while (*str++);
|
---|
200 | return(1);
|
---|
201 | }
|
---|
202 |
|
---|
203 | /**
|
---|
204 | * xmlStrncmp:
|
---|
205 | * @str1: the first xmlChar *
|
---|
206 | * @str2: the second xmlChar *
|
---|
207 | * @len: the max comparison length
|
---|
208 | *
|
---|
209 | * a strncmp for xmlChar's
|
---|
210 | *
|
---|
211 | * Returns the integer result of the comparison
|
---|
212 | */
|
---|
213 |
|
---|
214 | int
|
---|
215 | xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
216 | if (len <= 0) return(0);
|
---|
217 | if (str1 == str2) return(0);
|
---|
218 | if (str1 == NULL) return(-1);
|
---|
219 | if (str2 == NULL) return(1);
|
---|
220 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
---|
221 | return(strncmp((const char *)str1, (const char *)str2, len));
|
---|
222 | #else
|
---|
223 | do {
|
---|
224 | int tmp = *str1++ - *str2;
|
---|
225 | if (tmp != 0 || --len == 0) return(tmp);
|
---|
226 | } while (*str2++ != 0);
|
---|
227 | return 0;
|
---|
228 | #endif
|
---|
229 | }
|
---|
230 |
|
---|
231 | static const xmlChar casemap[256] = {
|
---|
232 | 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
|
---|
233 | 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
---|
234 | 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
|
---|
235 | 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
---|
236 | 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
|
---|
237 | 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
---|
238 | 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
|
---|
239 | 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
---|
240 | 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
|
---|
241 | 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
---|
242 | 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
|
---|
243 | 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
|
---|
244 | 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
|
---|
245 | 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
---|
246 | 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
|
---|
247 | 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
---|
248 | 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
|
---|
249 | 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
---|
250 | 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
|
---|
251 | 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
---|
252 | 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
|
---|
253 | 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
---|
254 | 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
|
---|
255 | 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
---|
256 | 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
|
---|
257 | 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
---|
258 | 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
|
---|
259 | 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
|
---|
260 | 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
|
---|
261 | 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
---|
262 | 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
|
---|
263 | 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
|
---|
264 | };
|
---|
265 |
|
---|
266 | /**
|
---|
267 | * xmlStrcasecmp:
|
---|
268 | * @str1: the first xmlChar *
|
---|
269 | * @str2: the second xmlChar *
|
---|
270 | *
|
---|
271 | * a strcasecmp for xmlChar's
|
---|
272 | *
|
---|
273 | * Returns the integer result of the comparison
|
---|
274 | */
|
---|
275 |
|
---|
276 | int
|
---|
277 | xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
|
---|
278 | register int tmp;
|
---|
279 |
|
---|
280 | if (str1 == str2) return(0);
|
---|
281 | if (str1 == NULL) return(-1);
|
---|
282 | if (str2 == NULL) return(1);
|
---|
283 | do {
|
---|
284 | tmp = casemap[*str1++] - casemap[*str2];
|
---|
285 | if (tmp != 0) return(tmp);
|
---|
286 | } while (*str2++ != 0);
|
---|
287 | return 0;
|
---|
288 | }
|
---|
289 |
|
---|
290 | /**
|
---|
291 | * xmlStrncasecmp:
|
---|
292 | * @str1: the first xmlChar *
|
---|
293 | * @str2: the second xmlChar *
|
---|
294 | * @len: the max comparison length
|
---|
295 | *
|
---|
296 | * a strncasecmp for xmlChar's
|
---|
297 | *
|
---|
298 | * Returns the integer result of the comparison
|
---|
299 | */
|
---|
300 |
|
---|
301 | int
|
---|
302 | xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
303 | register int tmp;
|
---|
304 |
|
---|
305 | if (len <= 0) return(0);
|
---|
306 | if (str1 == str2) return(0);
|
---|
307 | if (str1 == NULL) return(-1);
|
---|
308 | if (str2 == NULL) return(1);
|
---|
309 | do {
|
---|
310 | tmp = casemap[*str1++] - casemap[*str2];
|
---|
311 | if (tmp != 0 || --len == 0) return(tmp);
|
---|
312 | } while (*str2++ != 0);
|
---|
313 | return 0;
|
---|
314 | }
|
---|
315 |
|
---|
316 | /**
|
---|
317 | * xmlStrchr:
|
---|
318 | * @str: the xmlChar * array
|
---|
319 | * @val: the xmlChar to search
|
---|
320 | *
|
---|
321 | * a strchr for xmlChar's
|
---|
322 | *
|
---|
323 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
324 | */
|
---|
325 |
|
---|
326 | const xmlChar *
|
---|
327 | xmlStrchr(const xmlChar *str, xmlChar val) {
|
---|
328 | if (str == NULL) return(NULL);
|
---|
329 | while (*str != 0) { /* non input consuming */
|
---|
330 | if (*str == val) return((xmlChar *) str);
|
---|
331 | str++;
|
---|
332 | }
|
---|
333 | return(NULL);
|
---|
334 | }
|
---|
335 |
|
---|
336 | /**
|
---|
337 | * xmlStrstr:
|
---|
338 | * @str: the xmlChar * array (haystack)
|
---|
339 | * @val: the xmlChar to search (needle)
|
---|
340 | *
|
---|
341 | * a strstr for xmlChar's
|
---|
342 | *
|
---|
343 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
344 | */
|
---|
345 |
|
---|
346 | const xmlChar *
|
---|
347 | xmlStrstr(const xmlChar *str, const xmlChar *val) {
|
---|
348 | int n;
|
---|
349 |
|
---|
350 | if (str == NULL) return(NULL);
|
---|
351 | if (val == NULL) return(NULL);
|
---|
352 | n = xmlStrlen(val);
|
---|
353 |
|
---|
354 | if (n == 0) return(str);
|
---|
355 | while (*str != 0) { /* non input consuming */
|
---|
356 | if (*str == *val) {
|
---|
357 | if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
|
---|
358 | }
|
---|
359 | str++;
|
---|
360 | }
|
---|
361 | return(NULL);
|
---|
362 | }
|
---|
363 |
|
---|
364 | /**
|
---|
365 | * xmlStrcasestr:
|
---|
366 | * @str: the xmlChar * array (haystack)
|
---|
367 | * @val: the xmlChar to search (needle)
|
---|
368 | *
|
---|
369 | * a case-ignoring strstr for xmlChar's
|
---|
370 | *
|
---|
371 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
372 | */
|
---|
373 |
|
---|
374 | const xmlChar *
|
---|
375 | xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
|
---|
376 | int n;
|
---|
377 |
|
---|
378 | if (str == NULL) return(NULL);
|
---|
379 | if (val == NULL) return(NULL);
|
---|
380 | n = xmlStrlen(val);
|
---|
381 |
|
---|
382 | if (n == 0) return(str);
|
---|
383 | while (*str != 0) { /* non input consuming */
|
---|
384 | if (casemap[*str] == casemap[*val])
|
---|
385 | if (!xmlStrncasecmp(str, val, n)) return(str);
|
---|
386 | str++;
|
---|
387 | }
|
---|
388 | return(NULL);
|
---|
389 | }
|
---|
390 |
|
---|
391 | /**
|
---|
392 | * xmlStrsub:
|
---|
393 | * @str: the xmlChar * array (haystack)
|
---|
394 | * @start: the index of the first char (zero based)
|
---|
395 | * @len: the length of the substring
|
---|
396 | *
|
---|
397 | * Extract a substring of a given string
|
---|
398 | *
|
---|
399 | * Returns the xmlChar * for the first occurrence or NULL.
|
---|
400 | */
|
---|
401 |
|
---|
402 | xmlChar *
|
---|
403 | xmlStrsub(const xmlChar *str, int start, int len) {
|
---|
404 | int i;
|
---|
405 |
|
---|
406 | if (str == NULL) return(NULL);
|
---|
407 | if (start < 0) return(NULL);
|
---|
408 | if (len < 0) return(NULL);
|
---|
409 |
|
---|
410 | for (i = 0;i < start;i++) {
|
---|
411 | if (*str == 0) return(NULL);
|
---|
412 | str++;
|
---|
413 | }
|
---|
414 | if (*str == 0) return(NULL);
|
---|
415 | return(xmlStrndup(str, len));
|
---|
416 | }
|
---|
417 |
|
---|
418 | /**
|
---|
419 | * xmlStrlen:
|
---|
420 | * @str: the xmlChar * array
|
---|
421 | *
|
---|
422 | * length of a xmlChar's string
|
---|
423 | *
|
---|
424 | * Returns the number of xmlChar contained in the ARRAY.
|
---|
425 | */
|
---|
426 |
|
---|
427 | int
|
---|
428 | xmlStrlen(const xmlChar *str) {
|
---|
429 | size_t len = str ? strlen((const char *)str) : 0;
|
---|
430 | return(len > INT_MAX ? 0 : len);
|
---|
431 | }
|
---|
432 |
|
---|
433 | /**
|
---|
434 | * xmlStrncat:
|
---|
435 | * @cur: the original xmlChar * array
|
---|
436 | * @add: the xmlChar * array added
|
---|
437 | * @len: the length of @add
|
---|
438 | *
|
---|
439 | * a strncat for array of xmlChar's, it will extend @cur with the len
|
---|
440 | * first bytes of @add. Note that if @len < 0 then this is an API error
|
---|
441 | * and NULL will be returned.
|
---|
442 | *
|
---|
443 | * Returns a new xmlChar *, the original @cur is reallocated and should
|
---|
444 | * not be freed.
|
---|
445 | */
|
---|
446 |
|
---|
447 | xmlChar *
|
---|
448 | xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
|
---|
449 | int size;
|
---|
450 | xmlChar *ret;
|
---|
451 |
|
---|
452 | if ((add == NULL) || (len == 0))
|
---|
453 | return(cur);
|
---|
454 | if (len < 0)
|
---|
455 | return(NULL);
|
---|
456 | if (cur == NULL)
|
---|
457 | return(xmlStrndup(add, len));
|
---|
458 |
|
---|
459 | size = xmlStrlen(cur);
|
---|
460 | if ((size < 0) || (size > INT_MAX - len))
|
---|
461 | return(NULL);
|
---|
462 | ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);
|
---|
463 | if (ret == NULL) {
|
---|
464 | return(cur);
|
---|
465 | }
|
---|
466 | memcpy(&ret[size], add, len);
|
---|
467 | ret[size + len] = 0;
|
---|
468 | return(ret);
|
---|
469 | }
|
---|
470 |
|
---|
471 | /**
|
---|
472 | * xmlStrncatNew:
|
---|
473 | * @str1: first xmlChar string
|
---|
474 | * @str2: second xmlChar string
|
---|
475 | * @len: the len of @str2 or < 0
|
---|
476 | *
|
---|
477 | * same as xmlStrncat, but creates a new string. The original
|
---|
478 | * two strings are not freed. If @len is < 0 then the length
|
---|
479 | * will be calculated automatically.
|
---|
480 | *
|
---|
481 | * Returns a new xmlChar * or NULL
|
---|
482 | */
|
---|
483 | xmlChar *
|
---|
484 | xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
|
---|
485 | int size;
|
---|
486 | xmlChar *ret;
|
---|
487 |
|
---|
488 | if (len < 0) {
|
---|
489 | len = xmlStrlen(str2);
|
---|
490 | if (len < 0)
|
---|
491 | return(NULL);
|
---|
492 | }
|
---|
493 | if ((str2 == NULL) || (len == 0))
|
---|
494 | return(xmlStrdup(str1));
|
---|
495 | if (str1 == NULL)
|
---|
496 | return(xmlStrndup(str2, len));
|
---|
497 |
|
---|
498 | size = xmlStrlen(str1);
|
---|
499 | if ((size < 0) || (size > INT_MAX - len))
|
---|
500 | return(NULL);
|
---|
501 | ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);
|
---|
502 | if (ret == NULL) {
|
---|
503 | return(xmlStrndup(str1, size));
|
---|
504 | }
|
---|
505 | memcpy(ret, str1, size);
|
---|
506 | memcpy(&ret[size], str2, len);
|
---|
507 | ret[size + len] = 0;
|
---|
508 | return(ret);
|
---|
509 | }
|
---|
510 |
|
---|
511 | /**
|
---|
512 | * xmlStrcat:
|
---|
513 | * @cur: the original xmlChar * array
|
---|
514 | * @add: the xmlChar * array added
|
---|
515 | *
|
---|
516 | * a strcat for array of xmlChar's. Since they are supposed to be
|
---|
517 | * encoded in UTF-8 or an encoding with 8bit based chars, we assume
|
---|
518 | * a termination mark of '0'.
|
---|
519 | *
|
---|
520 | * Returns a new xmlChar * containing the concatenated string. The original
|
---|
521 | * @cur is reallocated and should not be freed.
|
---|
522 | */
|
---|
523 | xmlChar *
|
---|
524 | xmlStrcat(xmlChar *cur, const xmlChar *add) {
|
---|
525 | const xmlChar *p = add;
|
---|
526 |
|
---|
527 | if (add == NULL) return(cur);
|
---|
528 | if (cur == NULL)
|
---|
529 | return(xmlStrdup(add));
|
---|
530 |
|
---|
531 | while (*p != 0) p++; /* non input consuming */
|
---|
532 | return(xmlStrncat(cur, add, p - add));
|
---|
533 | }
|
---|
534 |
|
---|
535 | /**
|
---|
536 | * xmlStrPrintf:
|
---|
537 | * @buf: the result buffer.
|
---|
538 | * @len: the result buffer length.
|
---|
539 | * @msg: the message with printf formatting.
|
---|
540 | * @...: extra parameters for the message.
|
---|
541 | *
|
---|
542 | * Formats @msg and places result into @buf.
|
---|
543 | *
|
---|
544 | * Returns the number of characters written to @buf or -1 if an error occurs.
|
---|
545 | */
|
---|
546 | int
|
---|
547 | xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
|
---|
548 | va_list args;
|
---|
549 | int ret;
|
---|
550 |
|
---|
551 | if((buf == NULL) || (msg == NULL)) {
|
---|
552 | return(-1);
|
---|
553 | }
|
---|
554 |
|
---|
555 | va_start(args, msg);
|
---|
556 | ret = vsnprintf((char *) buf, len, (const char *) msg, args);
|
---|
557 | va_end(args);
|
---|
558 | buf[len - 1] = 0; /* be safe ! */
|
---|
559 |
|
---|
560 | return(ret);
|
---|
561 | }
|
---|
562 |
|
---|
563 | /**
|
---|
564 | * xmlStrVPrintf:
|
---|
565 | * @buf: the result buffer.
|
---|
566 | * @len: the result buffer length.
|
---|
567 | * @msg: the message with printf formatting.
|
---|
568 | * @ap: extra parameters for the message.
|
---|
569 | *
|
---|
570 | * Formats @msg and places result into @buf.
|
---|
571 | *
|
---|
572 | * Returns the number of characters written to @buf or -1 if an error occurs.
|
---|
573 | */
|
---|
574 | int
|
---|
575 | xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
|
---|
576 | int ret;
|
---|
577 |
|
---|
578 | if((buf == NULL) || (msg == NULL)) {
|
---|
579 | return(-1);
|
---|
580 | }
|
---|
581 |
|
---|
582 | ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
|
---|
583 | buf[len - 1] = 0; /* be safe ! */
|
---|
584 |
|
---|
585 | return(ret);
|
---|
586 | }
|
---|
587 |
|
---|
588 | /************************************************************************
|
---|
589 | * *
|
---|
590 | * Generic UTF8 handling routines *
|
---|
591 | * *
|
---|
592 | * From rfc2044: encoding of the Unicode values on UTF-8: *
|
---|
593 | * *
|
---|
594 | * UCS-4 range (hex.) UTF-8 octet sequence (binary) *
|
---|
595 | * 0000 0000-0000 007F 0xxxxxxx *
|
---|
596 | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
|
---|
597 | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
|
---|
598 | * *
|
---|
599 | * I hope we won't use values > 0xFFFF anytime soon ! *
|
---|
600 | * *
|
---|
601 | ************************************************************************/
|
---|
602 |
|
---|
603 |
|
---|
604 | /**
|
---|
605 | * xmlUTF8Size:
|
---|
606 | * @utf: pointer to the UTF8 character
|
---|
607 | *
|
---|
608 | * calculates the internal size of a UTF8 character
|
---|
609 | *
|
---|
610 | * returns the numbers of bytes in the character, -1 on format error
|
---|
611 | */
|
---|
612 | int
|
---|
613 | xmlUTF8Size(const xmlChar *utf) {
|
---|
614 | xmlChar mask;
|
---|
615 | int len;
|
---|
616 |
|
---|
617 | if (utf == NULL)
|
---|
618 | return -1;
|
---|
619 | if (*utf < 0x80)
|
---|
620 | return 1;
|
---|
621 | /* check valid UTF8 character */
|
---|
622 | if (!(*utf & 0x40))
|
---|
623 | return -1;
|
---|
624 | /* determine number of bytes in char */
|
---|
625 | len = 2;
|
---|
626 | for (mask=0x20; mask != 0; mask>>=1) {
|
---|
627 | if (!(*utf & mask))
|
---|
628 | return len;
|
---|
629 | len++;
|
---|
630 | }
|
---|
631 | return -1;
|
---|
632 | }
|
---|
633 |
|
---|
634 | /**
|
---|
635 | * xmlUTF8Charcmp:
|
---|
636 | * @utf1: pointer to first UTF8 char
|
---|
637 | * @utf2: pointer to second UTF8 char
|
---|
638 | *
|
---|
639 | * compares the two UCS4 values
|
---|
640 | *
|
---|
641 | * returns result of the compare as with xmlStrncmp
|
---|
642 | */
|
---|
643 | int
|
---|
644 | xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
|
---|
645 |
|
---|
646 | if (utf1 == NULL ) {
|
---|
647 | if (utf2 == NULL)
|
---|
648 | return 0;
|
---|
649 | return -1;
|
---|
650 | }
|
---|
651 | return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
|
---|
652 | }
|
---|
653 |
|
---|
654 | /**
|
---|
655 | * xmlUTF8Strlen:
|
---|
656 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
657 | *
|
---|
658 | * compute the length of an UTF8 string, it doesn't do a full UTF8
|
---|
659 | * checking of the content of the string.
|
---|
660 | *
|
---|
661 | * Returns the number of characters in the string or -1 in case of error
|
---|
662 | */
|
---|
663 | int
|
---|
664 | xmlUTF8Strlen(const xmlChar *utf) {
|
---|
665 | size_t ret = 0;
|
---|
666 |
|
---|
667 | if (utf == NULL)
|
---|
668 | return(-1);
|
---|
669 |
|
---|
670 | while (*utf != 0) {
|
---|
671 | if (utf[0] & 0x80) {
|
---|
672 | if ((utf[1] & 0xc0) != 0x80)
|
---|
673 | return(-1);
|
---|
674 | if ((utf[0] & 0xe0) == 0xe0) {
|
---|
675 | if ((utf[2] & 0xc0) != 0x80)
|
---|
676 | return(-1);
|
---|
677 | if ((utf[0] & 0xf0) == 0xf0) {
|
---|
678 | if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
|
---|
679 | return(-1);
|
---|
680 | utf += 4;
|
---|
681 | } else {
|
---|
682 | utf += 3;
|
---|
683 | }
|
---|
684 | } else {
|
---|
685 | utf += 2;
|
---|
686 | }
|
---|
687 | } else {
|
---|
688 | utf++;
|
---|
689 | }
|
---|
690 | ret++;
|
---|
691 | }
|
---|
692 | return(ret > INT_MAX ? 0 : ret);
|
---|
693 | }
|
---|
694 |
|
---|
695 | /**
|
---|
696 | * xmlGetUTF8Char:
|
---|
697 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
698 | * @len: a pointer to the minimum number of bytes present in
|
---|
699 | * the sequence. This is used to assure the next character
|
---|
700 | * is completely contained within the sequence.
|
---|
701 | *
|
---|
702 | * Read the first UTF8 character from @utf
|
---|
703 | *
|
---|
704 | * Returns the char value or -1 in case of error, and sets *len to
|
---|
705 | * the actual number of bytes consumed (0 in case of error)
|
---|
706 | */
|
---|
707 | int
|
---|
708 | xmlGetUTF8Char(const unsigned char *utf, int *len) {
|
---|
709 | unsigned int c;
|
---|
710 |
|
---|
711 | if (utf == NULL)
|
---|
712 | goto error;
|
---|
713 | if (len == NULL)
|
---|
714 | goto error;
|
---|
715 |
|
---|
716 | c = utf[0];
|
---|
717 | if (c < 0x80) {
|
---|
718 | if (*len < 1)
|
---|
719 | goto error;
|
---|
720 | /* 1-byte code */
|
---|
721 | *len = 1;
|
---|
722 | } else {
|
---|
723 | if ((*len < 2) || ((utf[1] & 0xc0) != 0x80))
|
---|
724 | goto error;
|
---|
725 | if (c < 0xe0) {
|
---|
726 | if (c < 0xc2)
|
---|
727 | goto error;
|
---|
728 | /* 2-byte code */
|
---|
729 | *len = 2;
|
---|
730 | c = (c & 0x1f) << 6;
|
---|
731 | c |= utf[1] & 0x3f;
|
---|
732 | } else {
|
---|
733 | if ((*len < 3) || ((utf[2] & 0xc0) != 0x80))
|
---|
734 | goto error;
|
---|
735 | if (c < 0xf0) {
|
---|
736 | /* 3-byte code */
|
---|
737 | *len = 3;
|
---|
738 | c = (c & 0xf) << 12;
|
---|
739 | c |= (utf[1] & 0x3f) << 6;
|
---|
740 | c |= utf[2] & 0x3f;
|
---|
741 | if ((c < 0x800) || ((c >= 0xd800) && (c < 0xe000)))
|
---|
742 | goto error;
|
---|
743 | } else {
|
---|
744 | if ((*len < 4) || ((utf[3] & 0xc0) != 0x80))
|
---|
745 | goto error;
|
---|
746 | *len = 4;
|
---|
747 | /* 4-byte code */
|
---|
748 | c = (c & 0x7) << 18;
|
---|
749 | c |= (utf[1] & 0x3f) << 12;
|
---|
750 | c |= (utf[2] & 0x3f) << 6;
|
---|
751 | c |= utf[3] & 0x3f;
|
---|
752 | if ((c < 0x10000) || (c >= 0x110000))
|
---|
753 | goto error;
|
---|
754 | }
|
---|
755 | }
|
---|
756 | }
|
---|
757 | return(c);
|
---|
758 |
|
---|
759 | error:
|
---|
760 | if (len != NULL)
|
---|
761 | *len = 0;
|
---|
762 | return(-1);
|
---|
763 | }
|
---|
764 |
|
---|
765 | /**
|
---|
766 | * xmlCheckUTF8:
|
---|
767 | * @utf: Pointer to putative UTF-8 encoded string.
|
---|
768 | *
|
---|
769 | * Checks @utf for being valid UTF-8. @utf is assumed to be
|
---|
770 | * null-terminated. This function is not super-strict, as it will
|
---|
771 | * allow longer UTF-8 sequences than necessary. Note that Java is
|
---|
772 | * capable of producing these sequences if provoked. Also note, this
|
---|
773 | * routine checks for the 4-byte maximum size, but does not check for
|
---|
774 | * 0x10ffff maximum value.
|
---|
775 | *
|
---|
776 | * Return value: true if @utf is valid.
|
---|
777 | **/
|
---|
778 | int
|
---|
779 | xmlCheckUTF8(const unsigned char *utf)
|
---|
780 | {
|
---|
781 | int ix;
|
---|
782 | unsigned char c;
|
---|
783 |
|
---|
784 | if (utf == NULL)
|
---|
785 | return(0);
|
---|
786 | /*
|
---|
787 | * utf is a string of 1, 2, 3 or 4 bytes. The valid strings
|
---|
788 | * are as follows (in "bit format"):
|
---|
789 | * 0xxxxxxx valid 1-byte
|
---|
790 | * 110xxxxx 10xxxxxx valid 2-byte
|
---|
791 | * 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
|
---|
792 | * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
|
---|
793 | */
|
---|
794 | while ((c = utf[0])) { /* string is 0-terminated */
|
---|
795 | ix = 0;
|
---|
796 | if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
|
---|
797 | ix = 1;
|
---|
798 | } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
|
---|
799 | if ((utf[1] & 0xc0 ) != 0x80)
|
---|
800 | return 0;
|
---|
801 | ix = 2;
|
---|
802 | } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
|
---|
803 | if (((utf[1] & 0xc0) != 0x80) ||
|
---|
804 | ((utf[2] & 0xc0) != 0x80))
|
---|
805 | return 0;
|
---|
806 | ix = 3;
|
---|
807 | } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
|
---|
808 | if (((utf[1] & 0xc0) != 0x80) ||
|
---|
809 | ((utf[2] & 0xc0) != 0x80) ||
|
---|
810 | ((utf[3] & 0xc0) != 0x80))
|
---|
811 | return 0;
|
---|
812 | ix = 4;
|
---|
813 | } else /* unknown encoding */
|
---|
814 | return 0;
|
---|
815 | utf += ix;
|
---|
816 | }
|
---|
817 | return(1);
|
---|
818 | }
|
---|
819 |
|
---|
820 | /**
|
---|
821 | * xmlUTF8Strsize:
|
---|
822 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
823 | * @len: the number of characters in the array
|
---|
824 | *
|
---|
825 | * storage size of an UTF8 string
|
---|
826 | * the behaviour is not guaranteed if the input string is not UTF-8
|
---|
827 | *
|
---|
828 | * Returns the storage size of
|
---|
829 | * the first 'len' characters of ARRAY
|
---|
830 | */
|
---|
831 |
|
---|
832 | int
|
---|
833 | xmlUTF8Strsize(const xmlChar *utf, int len) {
|
---|
834 | const xmlChar *ptr=utf;
|
---|
835 | int ch;
|
---|
836 | size_t ret;
|
---|
837 |
|
---|
838 | if (utf == NULL)
|
---|
839 | return(0);
|
---|
840 |
|
---|
841 | if (len <= 0)
|
---|
842 | return(0);
|
---|
843 |
|
---|
844 | while ( len-- > 0) {
|
---|
845 | if ( !*ptr )
|
---|
846 | break;
|
---|
847 | if ( (ch = *ptr++) & 0x80)
|
---|
848 | while ((ch<<=1) & 0x80 ) {
|
---|
849 | if (*ptr == 0) break;
|
---|
850 | ptr++;
|
---|
851 | }
|
---|
852 | }
|
---|
853 | ret = ptr - utf;
|
---|
854 | return (ret > INT_MAX ? 0 : ret);
|
---|
855 | }
|
---|
856 |
|
---|
857 |
|
---|
858 | /**
|
---|
859 | * xmlUTF8Strndup:
|
---|
860 | * @utf: the input UTF8 *
|
---|
861 | * @len: the len of @utf (in chars)
|
---|
862 | *
|
---|
863 | * a strndup for array of UTF8's
|
---|
864 | *
|
---|
865 | * Returns a new UTF8 * or NULL
|
---|
866 | */
|
---|
867 | xmlChar *
|
---|
868 | xmlUTF8Strndup(const xmlChar *utf, int len) {
|
---|
869 | xmlChar *ret;
|
---|
870 | int i;
|
---|
871 |
|
---|
872 | if ((utf == NULL) || (len < 0)) return(NULL);
|
---|
873 | i = xmlUTF8Strsize(utf, len);
|
---|
874 | ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);
|
---|
875 | if (ret == NULL) {
|
---|
876 | return(NULL);
|
---|
877 | }
|
---|
878 | memcpy(ret, utf, i);
|
---|
879 | ret[i] = 0;
|
---|
880 | return(ret);
|
---|
881 | }
|
---|
882 |
|
---|
883 | /**
|
---|
884 | * xmlUTF8Strpos:
|
---|
885 | * @utf: the input UTF8 *
|
---|
886 | * @pos: the position of the desired UTF8 char (in chars)
|
---|
887 | *
|
---|
888 | * a function to provide the equivalent of fetching a
|
---|
889 | * character from a string array
|
---|
890 | *
|
---|
891 | * Returns a pointer to the UTF8 character or NULL
|
---|
892 | */
|
---|
893 | const xmlChar *
|
---|
894 | xmlUTF8Strpos(const xmlChar *utf, int pos) {
|
---|
895 | int ch;
|
---|
896 |
|
---|
897 | if (utf == NULL) return(NULL);
|
---|
898 | if (pos < 0)
|
---|
899 | return(NULL);
|
---|
900 | while (pos--) {
|
---|
901 | if ((ch=*utf++) == 0) return(NULL);
|
---|
902 | if ( ch & 0x80 ) {
|
---|
903 | /* if not simple ascii, verify proper format */
|
---|
904 | if ( (ch & 0xc0) != 0xc0 )
|
---|
905 | return(NULL);
|
---|
906 | /* then skip over remaining bytes for this char */
|
---|
907 | while ( (ch <<= 1) & 0x80 )
|
---|
908 | if ( (*utf++ & 0xc0) != 0x80 )
|
---|
909 | return(NULL);
|
---|
910 | }
|
---|
911 | }
|
---|
912 | return((xmlChar *)utf);
|
---|
913 | }
|
---|
914 |
|
---|
915 | /**
|
---|
916 | * xmlUTF8Strloc:
|
---|
917 | * @utf: the input UTF8 *
|
---|
918 | * @utfchar: the UTF8 character to be found
|
---|
919 | *
|
---|
920 | * a function to provide the relative location of a UTF8 char
|
---|
921 | *
|
---|
922 | * Returns the relative character position of the desired char
|
---|
923 | * or -1 if not found
|
---|
924 | */
|
---|
925 | int
|
---|
926 | xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
|
---|
927 | size_t i;
|
---|
928 | int size;
|
---|
929 | int ch;
|
---|
930 |
|
---|
931 | if (utf==NULL || utfchar==NULL) return -1;
|
---|
932 | size = xmlUTF8Strsize(utfchar, 1);
|
---|
933 | for(i=0; (ch=*utf) != 0; i++) {
|
---|
934 | if (xmlStrncmp(utf, utfchar, size)==0)
|
---|
935 | return(i > INT_MAX ? 0 : i);
|
---|
936 | utf++;
|
---|
937 | if ( ch & 0x80 ) {
|
---|
938 | /* if not simple ascii, verify proper format */
|
---|
939 | if ( (ch & 0xc0) != 0xc0 )
|
---|
940 | return(-1);
|
---|
941 | /* then skip over remaining bytes for this char */
|
---|
942 | while ( (ch <<= 1) & 0x80 )
|
---|
943 | if ( (*utf++ & 0xc0) != 0x80 )
|
---|
944 | return(-1);
|
---|
945 | }
|
---|
946 | }
|
---|
947 |
|
---|
948 | return(-1);
|
---|
949 | }
|
---|
950 | /**
|
---|
951 | * xmlUTF8Strsub:
|
---|
952 | * @utf: a sequence of UTF-8 encoded bytes
|
---|
953 | * @start: relative pos of first char
|
---|
954 | * @len: total number to copy
|
---|
955 | *
|
---|
956 | * Create a substring from a given UTF-8 string
|
---|
957 | * Note: positions are given in units of UTF-8 chars
|
---|
958 | *
|
---|
959 | * Returns a pointer to a newly created string
|
---|
960 | * or NULL if any problem
|
---|
961 | */
|
---|
962 |
|
---|
963 | xmlChar *
|
---|
964 | xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
|
---|
965 | int i;
|
---|
966 | int ch;
|
---|
967 |
|
---|
968 | if (utf == NULL) return(NULL);
|
---|
969 | if (start < 0) return(NULL);
|
---|
970 | if (len < 0) return(NULL);
|
---|
971 |
|
---|
972 | /*
|
---|
973 | * Skip over any leading chars
|
---|
974 | */
|
---|
975 | for (i = 0;i < start;i++) {
|
---|
976 | if ((ch=*utf++) == 0) return(NULL);
|
---|
977 | if ( ch & 0x80 ) {
|
---|
978 | /* if not simple ascii, verify proper format */
|
---|
979 | if ( (ch & 0xc0) != 0xc0 )
|
---|
980 | return(NULL);
|
---|
981 | /* then skip over remaining bytes for this char */
|
---|
982 | while ( (ch <<= 1) & 0x80 )
|
---|
983 | if ( (*utf++ & 0xc0) != 0x80 )
|
---|
984 | return(NULL);
|
---|
985 | }
|
---|
986 | }
|
---|
987 |
|
---|
988 | return(xmlUTF8Strndup(utf, len));
|
---|
989 | }
|
---|
990 |
|
---|
991 | /**
|
---|
992 | * xmlEscapeFormatString:
|
---|
993 | * @msg: a pointer to the string in which to escape '%' characters.
|
---|
994 | * Must be a heap-allocated buffer created by libxml2 that may be
|
---|
995 | * returned, or that may be freed and replaced.
|
---|
996 | *
|
---|
997 | * Replaces the string pointed to by 'msg' with an escaped string.
|
---|
998 | * Returns the same string with all '%' characters escaped.
|
---|
999 | */
|
---|
1000 | xmlChar *
|
---|
1001 | xmlEscapeFormatString(xmlChar **msg)
|
---|
1002 | {
|
---|
1003 | xmlChar *msgPtr = NULL;
|
---|
1004 | xmlChar *result = NULL;
|
---|
1005 | xmlChar *resultPtr = NULL;
|
---|
1006 | size_t count = 0;
|
---|
1007 | size_t msgLen = 0;
|
---|
1008 | size_t resultLen = 0;
|
---|
1009 |
|
---|
1010 | if (!msg || !*msg)
|
---|
1011 | return(NULL);
|
---|
1012 |
|
---|
1013 | for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
|
---|
1014 | ++msgLen;
|
---|
1015 | if (*msgPtr == '%')
|
---|
1016 | ++count;
|
---|
1017 | }
|
---|
1018 |
|
---|
1019 | if (count == 0)
|
---|
1020 | return(*msg);
|
---|
1021 |
|
---|
1022 | if ((count > INT_MAX) || (msgLen > INT_MAX - count))
|
---|
1023 | return(NULL);
|
---|
1024 | resultLen = msgLen + count + 1;
|
---|
1025 | result = (xmlChar *) xmlMallocAtomic(resultLen);
|
---|
1026 | if (result == NULL) {
|
---|
1027 | /* Clear *msg to prevent format string vulnerabilities in
|
---|
1028 | out-of-memory situations. */
|
---|
1029 | xmlFree(*msg);
|
---|
1030 | *msg = NULL;
|
---|
1031 | return(NULL);
|
---|
1032 | }
|
---|
1033 |
|
---|
1034 | for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
|
---|
1035 | *resultPtr = *msgPtr;
|
---|
1036 | if (*msgPtr == '%')
|
---|
1037 | *(++resultPtr) = '%';
|
---|
1038 | }
|
---|
1039 | result[resultLen - 1] = '\0';
|
---|
1040 |
|
---|
1041 | xmlFree(*msg);
|
---|
1042 | *msg = result;
|
---|
1043 |
|
---|
1044 | return *msg;
|
---|
1045 | }
|
---|
1046 |
|
---|