VirtualBox

source: vbox/trunk/src/libs/libxml2-2.6.30/uri.c@ 25275

Last change on this file since 25275 was 6076, checked in by vboxsync, 17 years ago

Merged dmik/s2 branch (r25959:26751) to the trunk.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Date Revision Author Id
File size: 65.2 KB
Line 
1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 2396, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <string.h>
15
16#include <libxml/xmlmemory.h>
17#include <libxml/uri.h>
18#include <libxml/globals.h>
19#include <libxml/xmlerror.h>
20
21/************************************************************************
22 * *
23 * Macros to differentiate various character type *
24 * directly extracted from RFC 2396 *
25 * *
26 ************************************************************************/
27
28/*
29 * alpha = lowalpha | upalpha
30 */
31#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
32
33
34/*
35 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37 * "u" | "v" | "w" | "x" | "y" | "z"
38 */
39
40#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
41
42/*
43 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45 * "U" | "V" | "W" | "X" | "Y" | "Z"
46 */
47#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
48
49#ifdef IS_DIGIT
50#undef IS_DIGIT
51#endif
52/*
53 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
54 */
55#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
56
57/*
58 * alphanum = alpha | digit
59 */
60
61#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
62
63/*
64 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65 * "a" | "b" | "c" | "d" | "e" | "f"
66 */
67
68#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69 (((x) >= 'A') && ((x) <= 'F')))
70
71/*
72 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
73 */
74
75#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77 ((x) == '(') || ((x) == ')'))
78
79
80/*
81 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
82 * "[" | "]"
83 */
84
85#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
87 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
88 ((x) == ']'))
89
90/*
91 * unreserved = alphanum | mark
92 */
93
94#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
95
96/*
97 * escaped = "%" hex hex
98 */
99
100#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
101 (IS_HEX((p)[2])))
102
103/*
104 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105 * "&" | "=" | "+" | "$" | ","
106 */
107#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108 ((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109 ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110 ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
111
112/*
113 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
114 */
115#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
118 ((*(p) == ',')))
119
120/*
121 * rel_segment = 1*( unreserved | escaped |
122 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
123 */
124
125#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
128 ((*(p) == ',')))
129
130/*
131 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
132 */
133
134#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135 ((x) == '+') || ((x) == '-') || ((x) == '.'))
136
137/*
138 * reg_name = 1*( unreserved | escaped | "$" | "," |
139 * ";" | ":" | "@" | "&" | "=" | "+" )
140 */
141
142#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145 ((*(p) == '=')) || ((*(p) == '+')))
146
147/*
148 * userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
149 * "+" | "$" | "," )
150 */
151#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152 ((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
154 ((*(p) == ',')))
155
156/*
157 * uric = reserved | unreserved | escaped
158 */
159
160#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
161 (IS_RESERVED(*(p))))
162
163/*
164* unwise = "{" | "}" | "|" | "\" | "^" | "`"
165*/
166
167#define IS_UNWISE(p) \
168 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170 ((*(p) == ']')) || ((*(p) == '`')))
171
172/*
173 * Skip to next pointer char, handle escaped sequences
174 */
175
176#define NEXT(p) ((*p == '%')? p += 3 : p++)
177
178/*
179 * Productions from the spec.
180 *
181 * authority = server | reg_name
182 * reg_name = 1*( unreserved | escaped | "$" | "," |
183 * ";" | ":" | "@" | "&" | "=" | "+" )
184 *
185 * path = [ abs_path | opaque_part ]
186 */
187
188#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
189
190/************************************************************************
191 * *
192 * Generic URI structure functions *
193 * *
194 ************************************************************************/
195
196/**
197 * xmlCreateURI:
198 *
199 * Simply creates an empty xmlURI
200 *
201 * Returns the new structure or NULL in case of error
202 */
203xmlURIPtr
204xmlCreateURI(void) {
205 xmlURIPtr ret;
206
207 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
208 if (ret == NULL) {
209 xmlGenericError(xmlGenericErrorContext,
210 "xmlCreateURI: out of memory\n");
211 return(NULL);
212 }
213 memset(ret, 0, sizeof(xmlURI));
214 return(ret);
215}
216
217/**
218 * xmlSaveUri:
219 * @uri: pointer to an xmlURI
220 *
221 * Save the URI as an escaped string
222 *
223 * Returns a new string (to be deallocated by caller)
224 */
225xmlChar *
226xmlSaveUri(xmlURIPtr uri) {
227 xmlChar *ret = NULL;
228 const char *p;
229 int len;
230 int max;
231
232 if (uri == NULL) return(NULL);
233
234
235 max = 80;
236 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
237 if (ret == NULL) {
238 xmlGenericError(xmlGenericErrorContext,
239 "xmlSaveUri: out of memory\n");
240 return(NULL);
241 }
242 len = 0;
243
244 if (uri->scheme != NULL) {
245 p = uri->scheme;
246 while (*p != 0) {
247 if (len >= max) {
248 max *= 2;
249 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
250 if (ret == NULL) {
251 xmlGenericError(xmlGenericErrorContext,
252 "xmlSaveUri: out of memory\n");
253 return(NULL);
254 }
255 }
256 ret[len++] = *p++;
257 }
258 if (len >= max) {
259 max *= 2;
260 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
261 if (ret == NULL) {
262 xmlGenericError(xmlGenericErrorContext,
263 "xmlSaveUri: out of memory\n");
264 return(NULL);
265 }
266 }
267 ret[len++] = ':';
268 }
269 if (uri->opaque != NULL) {
270 p = uri->opaque;
271 while (*p != 0) {
272 if (len + 3 >= max) {
273 max *= 2;
274 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
275 if (ret == NULL) {
276 xmlGenericError(xmlGenericErrorContext,
277 "xmlSaveUri: out of memory\n");
278 return(NULL);
279 }
280 }
281 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
282 ret[len++] = *p++;
283 else {
284 int val = *(unsigned char *)p++;
285 int hi = val / 0x10, lo = val % 0x10;
286 ret[len++] = '%';
287 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
289 }
290 }
291 } else {
292 if (uri->server != NULL) {
293 if (len + 3 >= max) {
294 max *= 2;
295 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
296 if (ret == NULL) {
297 xmlGenericError(xmlGenericErrorContext,
298 "xmlSaveUri: out of memory\n");
299 return(NULL);
300 }
301 }
302 ret[len++] = '/';
303 ret[len++] = '/';
304 if (uri->user != NULL) {
305 p = uri->user;
306 while (*p != 0) {
307 if (len + 3 >= max) {
308 max *= 2;
309 ret = (xmlChar *) xmlRealloc(ret,
310 (max + 1) * sizeof(xmlChar));
311 if (ret == NULL) {
312 xmlGenericError(xmlGenericErrorContext,
313 "xmlSaveUri: out of memory\n");
314 return(NULL);
315 }
316 }
317 if ((IS_UNRESERVED(*(p))) ||
318 ((*(p) == ';')) || ((*(p) == ':')) ||
319 ((*(p) == '&')) || ((*(p) == '=')) ||
320 ((*(p) == '+')) || ((*(p) == '$')) ||
321 ((*(p) == ',')))
322 ret[len++] = *p++;
323 else {
324 int val = *(unsigned char *)p++;
325 int hi = val / 0x10, lo = val % 0x10;
326 ret[len++] = '%';
327 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
329 }
330 }
331 if (len + 3 >= max) {
332 max *= 2;
333 ret = (xmlChar *) xmlRealloc(ret,
334 (max + 1) * sizeof(xmlChar));
335 if (ret == NULL) {
336 xmlGenericError(xmlGenericErrorContext,
337 "xmlSaveUri: out of memory\n");
338 return(NULL);
339 }
340 }
341 ret[len++] = '@';
342 }
343 p = uri->server;
344 while (*p != 0) {
345 if (len >= max) {
346 max *= 2;
347 ret = (xmlChar *) xmlRealloc(ret,
348 (max + 1) * sizeof(xmlChar));
349 if (ret == NULL) {
350 xmlGenericError(xmlGenericErrorContext,
351 "xmlSaveUri: out of memory\n");
352 return(NULL);
353 }
354 }
355 ret[len++] = *p++;
356 }
357 if (uri->port > 0) {
358 if (len + 10 >= max) {
359 max *= 2;
360 ret = (xmlChar *) xmlRealloc(ret,
361 (max + 1) * sizeof(xmlChar));
362 if (ret == NULL) {
363 xmlGenericError(xmlGenericErrorContext,
364 "xmlSaveUri: out of memory\n");
365 return(NULL);
366 }
367 }
368 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
369 }
370 } else if (uri->authority != NULL) {
371 if (len + 3 >= max) {
372 max *= 2;
373 ret = (xmlChar *) xmlRealloc(ret,
374 (max + 1) * sizeof(xmlChar));
375 if (ret == NULL) {
376 xmlGenericError(xmlGenericErrorContext,
377 "xmlSaveUri: out of memory\n");
378 return(NULL);
379 }
380 }
381 ret[len++] = '/';
382 ret[len++] = '/';
383 p = uri->authority;
384 while (*p != 0) {
385 if (len + 3 >= max) {
386 max *= 2;
387 ret = (xmlChar *) xmlRealloc(ret,
388 (max + 1) * sizeof(xmlChar));
389 if (ret == NULL) {
390 xmlGenericError(xmlGenericErrorContext,
391 "xmlSaveUri: out of memory\n");
392 return(NULL);
393 }
394 }
395 if ((IS_UNRESERVED(*(p))) ||
396 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398 ((*(p) == '=')) || ((*(p) == '+')))
399 ret[len++] = *p++;
400 else {
401 int val = *(unsigned char *)p++;
402 int hi = val / 0x10, lo = val % 0x10;
403 ret[len++] = '%';
404 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
406 }
407 }
408 } else if (uri->scheme != NULL) {
409 if (len + 3 >= max) {
410 max *= 2;
411 ret = (xmlChar *) xmlRealloc(ret,
412 (max + 1) * sizeof(xmlChar));
413 if (ret == NULL) {
414 xmlGenericError(xmlGenericErrorContext,
415 "xmlSaveUri: out of memory\n");
416 return(NULL);
417 }
418 }
419 ret[len++] = '/';
420 ret[len++] = '/';
421 }
422 if (uri->path != NULL) {
423 p = uri->path;
424 while (*p != 0) {
425 if (len + 3 >= max) {
426 max *= 2;
427 ret = (xmlChar *) xmlRealloc(ret,
428 (max + 1) * sizeof(xmlChar));
429 if (ret == NULL) {
430 xmlGenericError(xmlGenericErrorContext,
431 "xmlSaveUri: out of memory\n");
432 return(NULL);
433 }
434 }
435 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
436 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
437 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
438 ((*(p) == ',')))
439 ret[len++] = *p++;
440 else {
441 int val = *(unsigned char *)p++;
442 int hi = val / 0x10, lo = val % 0x10;
443 ret[len++] = '%';
444 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
445 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
446 }
447 }
448 }
449 if (uri->query_raw != NULL) {
450 if (len + 1 >= max) {
451 max *= 2;
452 ret = (xmlChar *) xmlRealloc(ret,
453 (max + 1) * sizeof(xmlChar));
454 if (ret == NULL) {
455 xmlGenericError(xmlGenericErrorContext,
456 "xmlSaveUri: out of memory\n");
457 return(NULL);
458 }
459 }
460 ret[len++] = '?';
461 p = uri->query_raw;
462 while (*p != 0) {
463 if (len + 1 >= max) {
464 max *= 2;
465 ret = (xmlChar *) xmlRealloc(ret,
466 (max + 1) * sizeof(xmlChar));
467 if (ret == NULL) {
468 xmlGenericError(xmlGenericErrorContext,
469 "xmlSaveUri: out of memory\n");
470 return(NULL);
471 }
472 }
473 ret[len++] = *p++;
474 }
475 } else if (uri->query != NULL) {
476 if (len + 3 >= max) {
477 max *= 2;
478 ret = (xmlChar *) xmlRealloc(ret,
479 (max + 1) * sizeof(xmlChar));
480 if (ret == NULL) {
481 xmlGenericError(xmlGenericErrorContext,
482 "xmlSaveUri: out of memory\n");
483 return(NULL);
484 }
485 }
486 ret[len++] = '?';
487 p = uri->query;
488 while (*p != 0) {
489 if (len + 3 >= max) {
490 max *= 2;
491 ret = (xmlChar *) xmlRealloc(ret,
492 (max + 1) * sizeof(xmlChar));
493 if (ret == NULL) {
494 xmlGenericError(xmlGenericErrorContext,
495 "xmlSaveUri: out of memory\n");
496 return(NULL);
497 }
498 }
499 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
500 ret[len++] = *p++;
501 else {
502 int val = *(unsigned char *)p++;
503 int hi = val / 0x10, lo = val % 0x10;
504 ret[len++] = '%';
505 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
506 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
507 }
508 }
509 }
510 }
511 if (uri->fragment != NULL) {
512 if (len + 3 >= max) {
513 max *= 2;
514 ret = (xmlChar *) xmlRealloc(ret,
515 (max + 1) * sizeof(xmlChar));
516 if (ret == NULL) {
517 xmlGenericError(xmlGenericErrorContext,
518 "xmlSaveUri: out of memory\n");
519 return(NULL);
520 }
521 }
522 ret[len++] = '#';
523 p = uri->fragment;
524 while (*p != 0) {
525 if (len + 3 >= max) {
526 max *= 2;
527 ret = (xmlChar *) xmlRealloc(ret,
528 (max + 1) * sizeof(xmlChar));
529 if (ret == NULL) {
530 xmlGenericError(xmlGenericErrorContext,
531 "xmlSaveUri: out of memory\n");
532 return(NULL);
533 }
534 }
535 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
536 ret[len++] = *p++;
537 else {
538 int val = *(unsigned char *)p++;
539 int hi = val / 0x10, lo = val % 0x10;
540 ret[len++] = '%';
541 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
542 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
543 }
544 }
545 }
546 if (len >= max) {
547 max *= 2;
548 ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
549 if (ret == NULL) {
550 xmlGenericError(xmlGenericErrorContext,
551 "xmlSaveUri: out of memory\n");
552 return(NULL);
553 }
554 }
555 ret[len++] = 0;
556 return(ret);
557}
558
559/**
560 * xmlPrintURI:
561 * @stream: a FILE* for the output
562 * @uri: pointer to an xmlURI
563 *
564 * Prints the URI in the stream @stream.
565 */
566void
567xmlPrintURI(FILE *stream, xmlURIPtr uri) {
568 xmlChar *out;
569
570 out = xmlSaveUri(uri);
571 if (out != NULL) {
572 fprintf(stream, "%s", (char *) out);
573 xmlFree(out);
574 }
575}
576
577/**
578 * xmlCleanURI:
579 * @uri: pointer to an xmlURI
580 *
581 * Make sure the xmlURI struct is free of content
582 */
583static void
584xmlCleanURI(xmlURIPtr uri) {
585 if (uri == NULL) return;
586
587 if (uri->scheme != NULL) xmlFree(uri->scheme);
588 uri->scheme = NULL;
589 if (uri->server != NULL) xmlFree(uri->server);
590 uri->server = NULL;
591 if (uri->user != NULL) xmlFree(uri->user);
592 uri->user = NULL;
593 if (uri->path != NULL) xmlFree(uri->path);
594 uri->path = NULL;
595 if (uri->fragment != NULL) xmlFree(uri->fragment);
596 uri->fragment = NULL;
597 if (uri->opaque != NULL) xmlFree(uri->opaque);
598 uri->opaque = NULL;
599 if (uri->authority != NULL) xmlFree(uri->authority);
600 uri->authority = NULL;
601 if (uri->query != NULL) xmlFree(uri->query);
602 uri->query = NULL;
603 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
604 uri->query_raw = NULL;
605}
606
607/**
608 * xmlFreeURI:
609 * @uri: pointer to an xmlURI
610 *
611 * Free up the xmlURI struct
612 */
613void
614xmlFreeURI(xmlURIPtr uri) {
615 if (uri == NULL) return;
616
617 if (uri->scheme != NULL) xmlFree(uri->scheme);
618 if (uri->server != NULL) xmlFree(uri->server);
619 if (uri->user != NULL) xmlFree(uri->user);
620 if (uri->path != NULL) xmlFree(uri->path);
621 if (uri->fragment != NULL) xmlFree(uri->fragment);
622 if (uri->opaque != NULL) xmlFree(uri->opaque);
623 if (uri->authority != NULL) xmlFree(uri->authority);
624 if (uri->query != NULL) xmlFree(uri->query);
625 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
626 xmlFree(uri);
627}
628
629/************************************************************************
630 * *
631 * Helper functions *
632 * *
633 ************************************************************************/
634
635/**
636 * xmlNormalizeURIPath:
637 * @path: pointer to the path string
638 *
639 * Applies the 5 normalization steps to a path string--that is, RFC 2396
640 * Section 5.2, steps 6.c through 6.g.
641 *
642 * Normalization occurs directly on the string, no new allocation is done
643 *
644 * Returns 0 or an error code
645 */
646int
647xmlNormalizeURIPath(char *path) {
648 char *cur, *out;
649
650 if (path == NULL)
651 return(-1);
652
653 /* Skip all initial "/" chars. We want to get to the beginning of the
654 * first non-empty segment.
655 */
656 cur = path;
657 while (cur[0] == '/')
658 ++cur;
659 if (cur[0] == '\0')
660 return(0);
661
662 /* Keep everything we've seen so far. */
663 out = cur;
664
665 /*
666 * Analyze each segment in sequence for cases (c) and (d).
667 */
668 while (cur[0] != '\0') {
669 /*
670 * c) All occurrences of "./", where "." is a complete path segment,
671 * are removed from the buffer string.
672 */
673 if ((cur[0] == '.') && (cur[1] == '/')) {
674 cur += 2;
675 /* '//' normalization should be done at this point too */
676 while (cur[0] == '/')
677 cur++;
678 continue;
679 }
680
681 /*
682 * d) If the buffer string ends with "." as a complete path segment,
683 * that "." is removed.
684 */
685 if ((cur[0] == '.') && (cur[1] == '\0'))
686 break;
687
688 /* Otherwise keep the segment. */
689 while (cur[0] != '/') {
690 if (cur[0] == '\0')
691 goto done_cd;
692 (out++)[0] = (cur++)[0];
693 }
694 /* nomalize // */
695 while ((cur[0] == '/') && (cur[1] == '/'))
696 cur++;
697
698 (out++)[0] = (cur++)[0];
699 }
700 done_cd:
701 out[0] = '\0';
702
703 /* Reset to the beginning of the first segment for the next sequence. */
704 cur = path;
705 while (cur[0] == '/')
706 ++cur;
707 if (cur[0] == '\0')
708 return(0);
709
710 /*
711 * Analyze each segment in sequence for cases (e) and (f).
712 *
713 * e) All occurrences of "<segment>/../", where <segment> is a
714 * complete path segment not equal to "..", are removed from the
715 * buffer string. Removal of these path segments is performed
716 * iteratively, removing the leftmost matching pattern on each
717 * iteration, until no matching pattern remains.
718 *
719 * f) If the buffer string ends with "<segment>/..", where <segment>
720 * is a complete path segment not equal to "..", that
721 * "<segment>/.." is removed.
722 *
723 * To satisfy the "iterative" clause in (e), we need to collapse the
724 * string every time we find something that needs to be removed. Thus,
725 * we don't need to keep two pointers into the string: we only need a
726 * "current position" pointer.
727 */
728 while (1) {
729 char *segp, *tmp;
730
731 /* At the beginning of each iteration of this loop, "cur" points to
732 * the first character of the segment we want to examine.
733 */
734
735 /* Find the end of the current segment. */
736 segp = cur;
737 while ((segp[0] != '/') && (segp[0] != '\0'))
738 ++segp;
739
740 /* If this is the last segment, we're done (we need at least two
741 * segments to meet the criteria for the (e) and (f) cases).
742 */
743 if (segp[0] == '\0')
744 break;
745
746 /* If the first segment is "..", or if the next segment _isn't_ "..",
747 * keep this segment and try the next one.
748 */
749 ++segp;
750 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
751 || ((segp[0] != '.') || (segp[1] != '.')
752 || ((segp[2] != '/') && (segp[2] != '\0')))) {
753 cur = segp;
754 continue;
755 }
756
757 /* If we get here, remove this segment and the next one and back up
758 * to the previous segment (if there is one), to implement the
759 * "iteratively" clause. It's pretty much impossible to back up
760 * while maintaining two pointers into the buffer, so just compact
761 * the whole buffer now.
762 */
763
764 /* If this is the end of the buffer, we're done. */
765 if (segp[2] == '\0') {
766 cur[0] = '\0';
767 break;
768 }
769 /* Valgrind complained, strcpy(cur, segp + 3); */
770 /* string will overlap, do not use strcpy */
771 tmp = cur;
772 segp += 3;
773 while ((*tmp++ = *segp++) != 0);
774
775 /* If there are no previous segments, then keep going from here. */
776 segp = cur;
777 while ((segp > path) && ((--segp)[0] == '/'))
778 ;
779 if (segp == path)
780 continue;
781
782 /* "segp" is pointing to the end of a previous segment; find it's
783 * start. We need to back up to the previous segment and start
784 * over with that to handle things like "foo/bar/../..". If we
785 * don't do this, then on the first pass we'll remove the "bar/..",
786 * but be pointing at the second ".." so we won't realize we can also
787 * remove the "foo/..".
788 */
789 cur = segp;
790 while ((cur > path) && (cur[-1] != '/'))
791 --cur;
792 }
793 out[0] = '\0';
794
795 /*
796 * g) If the resulting buffer string still begins with one or more
797 * complete path segments of "..", then the reference is
798 * considered to be in error. Implementations may handle this
799 * error by retaining these components in the resolved path (i.e.,
800 * treating them as part of the final URI), by removing them from
801 * the resolved path (i.e., discarding relative levels above the
802 * root), or by avoiding traversal of the reference.
803 *
804 * We discard them from the final path.
805 */
806 if (path[0] == '/') {
807 cur = path;
808 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
809 && ((cur[3] == '/') || (cur[3] == '\0')))
810 cur += 3;
811
812 if (cur != path) {
813 out = path;
814 while (cur[0] != '\0')
815 (out++)[0] = (cur++)[0];
816 out[0] = 0;
817 }
818 }
819
820 return(0);
821}
822
823static int is_hex(char c) {
824 if (((c >= '0') && (c <= '9')) ||
825 ((c >= 'a') && (c <= 'f')) ||
826 ((c >= 'A') && (c <= 'F')))
827 return(1);
828 return(0);
829}
830
831/**
832 * xmlURIUnescapeString:
833 * @str: the string to unescape
834 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
835 * @target: optional destination buffer
836 *
837 * Unescaping routine, but does not check that the string is an URI. The
838 * output is a direct unsigned char translation of %XX values (no encoding)
839 * Note that the length of the result can only be smaller or same size as
840 * the input string.
841 *
842 * Returns a copy of the string, but unescaped, will return NULL only in case
843 * of error
844 */
845char *
846xmlURIUnescapeString(const char *str, int len, char *target) {
847 char *ret, *out;
848 const char *in;
849
850 if (str == NULL)
851 return(NULL);
852 if (len <= 0) len = strlen(str);
853 if (len < 0) return(NULL);
854
855 if (target == NULL) {
856 ret = (char *) xmlMallocAtomic(len + 1);
857 if (ret == NULL) {
858 xmlGenericError(xmlGenericErrorContext,
859 "xmlURIUnescapeString: out of memory\n");
860 return(NULL);
861 }
862 } else
863 ret = target;
864 in = str;
865 out = ret;
866 while(len > 0) {
867 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
868 in++;
869 if ((*in >= '0') && (*in <= '9'))
870 *out = (*in - '0');
871 else if ((*in >= 'a') && (*in <= 'f'))
872 *out = (*in - 'a') + 10;
873 else if ((*in >= 'A') && (*in <= 'F'))
874 *out = (*in - 'A') + 10;
875 in++;
876 if ((*in >= '0') && (*in <= '9'))
877 *out = *out * 16 + (*in - '0');
878 else if ((*in >= 'a') && (*in <= 'f'))
879 *out = *out * 16 + (*in - 'a') + 10;
880 else if ((*in >= 'A') && (*in <= 'F'))
881 *out = *out * 16 + (*in - 'A') + 10;
882 in++;
883 len -= 3;
884 out++;
885 } else {
886 *out++ = *in++;
887 len--;
888 }
889 }
890 *out = 0;
891 return(ret);
892}
893
894/**
895 * xmlURIEscapeStr:
896 * @str: string to escape
897 * @list: exception list string of chars not to escape
898 *
899 * This routine escapes a string to hex, ignoring reserved characters (a-z)
900 * and the characters in the exception list.
901 *
902 * Returns a new escaped string or NULL in case of error.
903 */
904xmlChar *
905xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
906 xmlChar *ret, ch;
907 const xmlChar *in;
908
909 unsigned int len, out;
910
911 if (str == NULL)
912 return(NULL);
913 if (str[0] == 0)
914 return(xmlStrdup(str));
915 len = xmlStrlen(str);
916 if (!(len > 0)) return(NULL);
917
918 len += 20;
919 ret = (xmlChar *) xmlMallocAtomic(len);
920 if (ret == NULL) {
921 xmlGenericError(xmlGenericErrorContext,
922 "xmlURIEscapeStr: out of memory\n");
923 return(NULL);
924 }
925 in = (const xmlChar *) str;
926 out = 0;
927 while(*in != 0) {
928 if (len - out <= 3) {
929 len += 20;
930 ret = (xmlChar *) xmlRealloc(ret, len);
931 if (ret == NULL) {
932 xmlGenericError(xmlGenericErrorContext,
933 "xmlURIEscapeStr: out of memory\n");
934 return(NULL);
935 }
936 }
937
938 ch = *in;
939
940 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
941 unsigned char val;
942 ret[out++] = '%';
943 val = ch >> 4;
944 if (val <= 9)
945 ret[out++] = '0' + val;
946 else
947 ret[out++] = 'A' + val - 0xA;
948 val = ch & 0xF;
949 if (val <= 9)
950 ret[out++] = '0' + val;
951 else
952 ret[out++] = 'A' + val - 0xA;
953 in++;
954 } else {
955 ret[out++] = *in++;
956 }
957
958 }
959 ret[out] = 0;
960 return(ret);
961}
962
963/**
964 * xmlURIEscape:
965 * @str: the string of the URI to escape
966 *
967 * Escaping routine, does not do validity checks !
968 * It will try to escape the chars needing this, but this is heuristic
969 * based it's impossible to be sure.
970 *
971 * Returns an copy of the string, but escaped
972 *
973 * 25 May 2001
974 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
975 * according to RFC2396.
976 * - Carl Douglas
977 */
978xmlChar *
979xmlURIEscape(const xmlChar * str)
980{
981 xmlChar *ret, *segment = NULL;
982 xmlURIPtr uri;
983 int ret2;
984
985#define NULLCHK(p) if(!p) { \
986 xmlGenericError(xmlGenericErrorContext, \
987 "xmlURIEscape: out of memory\n"); \
988 return NULL; }
989
990 if (str == NULL)
991 return (NULL);
992
993 uri = xmlCreateURI();
994 if (uri != NULL) {
995 /*
996 * Allow escaping errors in the unescaped form
997 */
998 uri->cleanup = 1;
999 ret2 = xmlParseURIReference(uri, (const char *)str);
1000 if (ret2) {
1001 xmlFreeURI(uri);
1002 return (NULL);
1003 }
1004 }
1005
1006 if (!uri)
1007 return NULL;
1008
1009 ret = NULL;
1010
1011 if (uri->scheme) {
1012 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1013 NULLCHK(segment)
1014 ret = xmlStrcat(ret, segment);
1015 ret = xmlStrcat(ret, BAD_CAST ":");
1016 xmlFree(segment);
1017 }
1018
1019 if (uri->authority) {
1020 segment =
1021 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1022 NULLCHK(segment)
1023 ret = xmlStrcat(ret, BAD_CAST "//");
1024 ret = xmlStrcat(ret, segment);
1025 xmlFree(segment);
1026 }
1027
1028 if (uri->user) {
1029 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1030 NULLCHK(segment)
1031 ret = xmlStrcat(ret,BAD_CAST "//");
1032 ret = xmlStrcat(ret, segment);
1033 ret = xmlStrcat(ret, BAD_CAST "@");
1034 xmlFree(segment);
1035 }
1036
1037 if (uri->server) {
1038 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1039 NULLCHK(segment)
1040 if (uri->user == NULL)
1041 ret = xmlStrcat(ret, BAD_CAST "//");
1042 ret = xmlStrcat(ret, segment);
1043 xmlFree(segment);
1044 }
1045
1046 if (uri->port) {
1047 xmlChar port[10];
1048
1049 snprintf((char *) port, 10, "%d", uri->port);
1050 ret = xmlStrcat(ret, BAD_CAST ":");
1051 ret = xmlStrcat(ret, port);
1052 }
1053
1054 if (uri->path) {
1055 segment =
1056 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1057 NULLCHK(segment)
1058 ret = xmlStrcat(ret, segment);
1059 xmlFree(segment);
1060 }
1061
1062 if (uri->query_raw) {
1063 ret = xmlStrcat(ret, BAD_CAST "?");
1064 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1065 }
1066 else if (uri->query) {
1067 segment =
1068 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1069 NULLCHK(segment)
1070 ret = xmlStrcat(ret, BAD_CAST "?");
1071 ret = xmlStrcat(ret, segment);
1072 xmlFree(segment);
1073 }
1074
1075 if (uri->opaque) {
1076 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1077 NULLCHK(segment)
1078 ret = xmlStrcat(ret, segment);
1079 xmlFree(segment);
1080 }
1081
1082 if (uri->fragment) {
1083 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1084 NULLCHK(segment)
1085 ret = xmlStrcat(ret, BAD_CAST "#");
1086 ret = xmlStrcat(ret, segment);
1087 xmlFree(segment);
1088 }
1089
1090 xmlFreeURI(uri);
1091#undef NULLCHK
1092
1093 return (ret);
1094}
1095
1096/************************************************************************
1097 * *
1098 * Escaped URI parsing *
1099 * *
1100 ************************************************************************/
1101
1102/**
1103 * xmlParseURIFragment:
1104 * @uri: pointer to an URI structure
1105 * @str: pointer to the string to analyze
1106 *
1107 * Parse an URI fragment string and fills in the appropriate fields
1108 * of the @uri structure.
1109 *
1110 * fragment = *uric
1111 *
1112 * Returns 0 or the error code
1113 */
1114static int
1115xmlParseURIFragment(xmlURIPtr uri, const char **str)
1116{
1117 const char *cur;
1118
1119 if (str == NULL)
1120 return (-1);
1121
1122 cur = *str;
1123
1124 while (IS_URIC(cur) || IS_UNWISE(cur))
1125 NEXT(cur);
1126 if (uri != NULL) {
1127 if (uri->fragment != NULL)
1128 xmlFree(uri->fragment);
1129 if (uri->cleanup & 2)
1130 uri->fragment = STRNDUP(*str, cur - *str);
1131 else
1132 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1133 }
1134 *str = cur;
1135 return (0);
1136}
1137
1138/**
1139 * xmlParseURIQuery:
1140 * @uri: pointer to an URI structure
1141 * @str: pointer to the string to analyze
1142 *
1143 * Parse the query part of an URI
1144 *
1145 * query = *uric
1146 *
1147 * Returns 0 or the error code
1148 */
1149static int
1150xmlParseURIQuery(xmlURIPtr uri, const char **str)
1151{
1152 const char *cur;
1153
1154 if (str == NULL)
1155 return (-1);
1156
1157 cur = *str;
1158
1159 while ((IS_URIC(cur)) ||
1160 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1161 NEXT(cur);
1162 if (uri != NULL) {
1163 if (uri->query != NULL)
1164 xmlFree(uri->query);
1165 if (uri->cleanup & 2)
1166 uri->query = STRNDUP(*str, cur - *str);
1167 else
1168 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1169
1170 /* Save the raw bytes of the query as well.
1171 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
1172 */
1173 if (uri->query_raw != NULL)
1174 xmlFree (uri->query_raw);
1175 uri->query_raw = STRNDUP (*str, cur - *str);
1176 }
1177 *str = cur;
1178 return (0);
1179}
1180
1181/**
1182 * xmlParseURIScheme:
1183 * @uri: pointer to an URI structure
1184 * @str: pointer to the string to analyze
1185 *
1186 * Parse an URI scheme
1187 *
1188 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1189 *
1190 * Returns 0 or the error code
1191 */
1192static int
1193xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1194 const char *cur;
1195
1196 if (str == NULL)
1197 return(-1);
1198
1199 cur = *str;
1200 if (!IS_ALPHA(*cur))
1201 return(2);
1202 cur++;
1203 while (IS_SCHEME(*cur)) cur++;
1204 if (uri != NULL) {
1205 if (uri->scheme != NULL) xmlFree(uri->scheme);
1206 uri->scheme = STRNDUP(*str, cur - *str);
1207 }
1208 *str = cur;
1209 return(0);
1210}
1211
1212/**
1213 * xmlParseURIOpaquePart:
1214 * @uri: pointer to an URI structure
1215 * @str: pointer to the string to analyze
1216 *
1217 * Parse an URI opaque part
1218 *
1219 * opaque_part = uric_no_slash *uric
1220 *
1221 * Returns 0 or the error code
1222 */
1223static int
1224xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1225{
1226 const char *cur;
1227
1228 if (str == NULL)
1229 return (-1);
1230
1231 cur = *str;
1232 if (!((IS_URIC_NO_SLASH(cur)) ||
1233 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1234 return (3);
1235 }
1236 NEXT(cur);
1237 while ((IS_URIC(cur)) ||
1238 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1239 NEXT(cur);
1240 if (uri != NULL) {
1241 if (uri->opaque != NULL)
1242 xmlFree(uri->opaque);
1243 if (uri->cleanup & 2)
1244 uri->opaque = STRNDUP(*str, cur - *str);
1245 else
1246 uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1247 }
1248 *str = cur;
1249 return (0);
1250}
1251
1252/**
1253 * xmlParseURIServer:
1254 * @uri: pointer to an URI structure
1255 * @str: pointer to the string to analyze
1256 *
1257 * Parse a server subpart of an URI, it's a finer grain analysis
1258 * of the authority part.
1259 *
1260 * server = [ [ userinfo "@" ] hostport ]
1261 * userinfo = *( unreserved | escaped |
1262 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1263 * hostport = host [ ":" port ]
1264 * host = hostname | IPv4address | IPv6reference
1265 * hostname = *( domainlabel "." ) toplabel [ "." ]
1266 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1267 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1268 * IPv6reference = "[" IPv6address "]"
1269 * IPv6address = hexpart [ ":" IPv4address ]
1270 * IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1271 * hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1272 * hexseq = hex4 *( ":" hex4)
1273 * hex4 = 1*4hexdig
1274 * port = *digit
1275 *
1276 * Returns 0 or the error code
1277 */
1278static int
1279xmlParseURIServer(xmlURIPtr uri, const char **str) {
1280 const char *cur;
1281 const char *host, *tmp;
1282 const int IPV4max = 4;
1283 const int IPV6max = 8;
1284 int oct;
1285
1286 if (str == NULL)
1287 return(-1);
1288
1289 cur = *str;
1290
1291 /*
1292 * is there a userinfo ?
1293 */
1294 while (IS_USERINFO(cur)) NEXT(cur);
1295 if (*cur == '@') {
1296 if (uri != NULL) {
1297 if (uri->user != NULL) xmlFree(uri->user);
1298 if (uri->cleanup & 2)
1299 uri->user = STRNDUP(*str, cur - *str);
1300 else
1301 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1302 }
1303 cur++;
1304 } else {
1305 if (uri != NULL) {
1306 if (uri->user != NULL) xmlFree(uri->user);
1307 uri->user = NULL;
1308 }
1309 cur = *str;
1310 }
1311 /*
1312 * This can be empty in the case where there is no server
1313 */
1314 host = cur;
1315 if (*cur == '/') {
1316 if (uri != NULL) {
1317 if (uri->authority != NULL) xmlFree(uri->authority);
1318 uri->authority = NULL;
1319 if (uri->server != NULL) xmlFree(uri->server);
1320 uri->server = NULL;
1321 uri->port = 0;
1322 }
1323 return(0);
1324 }
1325 /*
1326 * host part of hostport can denote an IPV4 address, an IPV6 address
1327 * or an unresolved name. Check the IP first, its easier to detect
1328 * errors if wrong one.
1329 * An IPV6 address must start with a '[' and end with a ']'.
1330 */
1331 if (*cur == '[') {
1332 int compress=0;
1333 cur++;
1334 for (oct = 0; oct < IPV6max; ++oct) {
1335 if (*cur == ':') {
1336 if (compress)
1337 return(3); /* multiple compression attempted */
1338 if (!oct) { /* initial char is compression */
1339 if (*++cur != ':')
1340 return(3);
1341 }
1342 compress = 1; /* set compression-encountered flag */
1343 cur++; /* skip over the second ':' */
1344 continue;
1345 }
1346 while(IS_HEX(*cur)) cur++;
1347 if (oct == (IPV6max-1))
1348 continue;
1349 if (*cur != ':')
1350 break;
1351 cur++;
1352 }
1353 if ((!compress) && (oct != IPV6max))
1354 return(3);
1355 if (*cur != ']')
1356 return(3);
1357 if (uri != NULL) {
1358 if (uri->server != NULL) xmlFree(uri->server);
1359 uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1360 (cur-host)-1);
1361 }
1362 cur++;
1363 } else {
1364 /*
1365 * Not IPV6, maybe IPV4
1366 */
1367 for (oct = 0; oct < IPV4max; ++oct) {
1368 if (*cur == '.')
1369 return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1370 while(IS_DIGIT(*cur)) cur++;
1371 if (oct == (IPV4max-1))
1372 continue;
1373 if (*cur != '.')
1374 break;
1375 cur++;
1376 }
1377 }
1378 if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1379 IS_ALPHA(*cur))) {
1380 /* maybe host_name */
1381 if (!IS_ALPHANUM(*cur))
1382 return(4); /* e.g. http://xml.$oft */
1383 do {
1384 do ++cur; while (IS_ALPHANUM(*cur));
1385 if (*cur == '-') {
1386 --cur;
1387 if (*cur == '.')
1388 return(5); /* e.g. http://xml.-soft */
1389 ++cur;
1390 continue;
1391 }
1392 if (*cur == '.') {
1393 --cur;
1394 if (*cur == '-')
1395 return(6); /* e.g. http://xml-.soft */
1396 if (*cur == '.')
1397 return(7); /* e.g. http://xml..soft */
1398 ++cur;
1399 continue;
1400 }
1401 break;
1402 } while (1);
1403 tmp = cur;
1404 if (tmp[-1] == '.')
1405 --tmp; /* e.g. http://xml.$Oft/ */
1406 do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1407 if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1408 return(8); /* e.g. http://xmlsOft.0rg/ */
1409 }
1410 if (uri != NULL) {
1411 if (uri->authority != NULL) xmlFree(uri->authority);
1412 uri->authority = NULL;
1413 if (host[0] != '[') { /* it's not an IPV6 addr */
1414 if (uri->server != NULL) xmlFree(uri->server);
1415 if (uri->cleanup & 2)
1416 uri->server = STRNDUP(host, cur - host);
1417 else
1418 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1419 }
1420 }
1421 /*
1422 * finish by checking for a port presence.
1423 */
1424 if (*cur == ':') {
1425 cur++;
1426 if (IS_DIGIT(*cur)) {
1427 if (uri != NULL)
1428 uri->port = 0;
1429 while (IS_DIGIT(*cur)) {
1430 if (uri != NULL)
1431 uri->port = uri->port * 10 + (*cur - '0');
1432 cur++;
1433 }
1434 }
1435 }
1436 *str = cur;
1437 return(0);
1438}
1439
1440/**
1441 * xmlParseURIRelSegment:
1442 * @uri: pointer to an URI structure
1443 * @str: pointer to the string to analyze
1444 *
1445 * Parse an URI relative segment
1446 *
1447 * rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1448 * "+" | "$" | "," )
1449 *
1450 * Returns 0 or the error code
1451 */
1452static int
1453xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1454{
1455 const char *cur;
1456
1457 if (str == NULL)
1458 return (-1);
1459
1460 cur = *str;
1461 if (!((IS_SEGMENT(cur)) ||
1462 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1463 return (3);
1464 }
1465 NEXT(cur);
1466 while ((IS_SEGMENT(cur)) ||
1467 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1468 NEXT(cur);
1469 if (uri != NULL) {
1470 if (uri->path != NULL)
1471 xmlFree(uri->path);
1472 if (uri->cleanup & 2)
1473 uri->path = STRNDUP(*str, cur - *str);
1474 else
1475 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1476 }
1477 *str = cur;
1478 return (0);
1479}
1480
1481/**
1482 * xmlParseURIPathSegments:
1483 * @uri: pointer to an URI structure
1484 * @str: pointer to the string to analyze
1485 * @slash: should we add a leading slash
1486 *
1487 * Parse an URI set of path segments
1488 *
1489 * path_segments = segment *( "/" segment )
1490 * segment = *pchar *( ";" param )
1491 * param = *pchar
1492 *
1493 * Returns 0 or the error code
1494 */
1495static int
1496xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1497{
1498 const char *cur;
1499
1500 if (str == NULL)
1501 return (-1);
1502
1503 cur = *str;
1504
1505 do {
1506 while ((IS_PCHAR(cur)) ||
1507 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1508 NEXT(cur);
1509 while (*cur == ';') {
1510 cur++;
1511 while ((IS_PCHAR(cur)) ||
1512 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1513 NEXT(cur);
1514 }
1515 if (*cur != '/')
1516 break;
1517 cur++;
1518 } while (1);
1519 if (uri != NULL) {
1520 int len, len2 = 0;
1521 char *path;
1522
1523 /*
1524 * Concat the set of path segments to the current path
1525 */
1526 len = cur - *str;
1527 if (slash)
1528 len++;
1529
1530 if (uri->path != NULL) {
1531 len2 = strlen(uri->path);
1532 len += len2;
1533 }
1534 path = (char *) xmlMallocAtomic(len + 1);
1535 if (path == NULL) {
1536 xmlGenericError(xmlGenericErrorContext,
1537 "xmlParseURIPathSegments: out of memory\n");
1538 *str = cur;
1539 return (-1);
1540 }
1541 if (uri->path != NULL)
1542 memcpy(path, uri->path, len2);
1543 if (slash) {
1544 path[len2] = '/';
1545 len2++;
1546 }
1547 path[len2] = 0;
1548 if (cur - *str > 0) {
1549 if (uri->cleanup & 2) {
1550 memcpy(&path[len2], *str, cur - *str);
1551 path[len2 + (cur - *str)] = 0;
1552 } else
1553 xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1554 }
1555 if (uri->path != NULL)
1556 xmlFree(uri->path);
1557 uri->path = path;
1558 }
1559 *str = cur;
1560 return (0);
1561}
1562
1563/**
1564 * xmlParseURIAuthority:
1565 * @uri: pointer to an URI structure
1566 * @str: pointer to the string to analyze
1567 *
1568 * Parse the authority part of an URI.
1569 *
1570 * authority = server | reg_name
1571 * server = [ [ userinfo "@" ] hostport ]
1572 * reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1573 * "@" | "&" | "=" | "+" )
1574 *
1575 * Note : this is completely ambiguous since reg_name is allowed to
1576 * use the full set of chars in use by server:
1577 *
1578 * 3.2.1. Registry-based Naming Authority
1579 *
1580 * The structure of a registry-based naming authority is specific
1581 * to the URI scheme, but constrained to the allowed characters
1582 * for an authority component.
1583 *
1584 * Returns 0 or the error code
1585 */
1586static int
1587xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1588 const char *cur;
1589 int ret;
1590
1591 if (str == NULL)
1592 return(-1);
1593
1594 cur = *str;
1595
1596 /*
1597 * try first to parse it as a server string.
1598 */
1599 ret = xmlParseURIServer(uri, str);
1600 if ((ret == 0) && (*str != NULL) &&
1601 ((**str == 0) || (**str == '/') || (**str == '?')))
1602 return(0);
1603 *str = cur;
1604
1605 /*
1606 * failed, fallback to reg_name
1607 */
1608 if (!IS_REG_NAME(cur)) {
1609 return(5);
1610 }
1611 NEXT(cur);
1612 while (IS_REG_NAME(cur)) NEXT(cur);
1613 if (uri != NULL) {
1614 if (uri->server != NULL) xmlFree(uri->server);
1615 uri->server = NULL;
1616 if (uri->user != NULL) xmlFree(uri->user);
1617 uri->user = NULL;
1618 if (uri->authority != NULL) xmlFree(uri->authority);
1619 if (uri->cleanup & 2)
1620 uri->authority = STRNDUP(*str, cur - *str);
1621 else
1622 uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1623 }
1624 *str = cur;
1625 return(0);
1626}
1627
1628/**
1629 * xmlParseURIHierPart:
1630 * @uri: pointer to an URI structure
1631 * @str: pointer to the string to analyze
1632 *
1633 * Parse an URI hierarchical part
1634 *
1635 * hier_part = ( net_path | abs_path ) [ "?" query ]
1636 * abs_path = "/" path_segments
1637 * net_path = "//" authority [ abs_path ]
1638 *
1639 * Returns 0 or the error code
1640 */
1641static int
1642xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1643 int ret;
1644 const char *cur;
1645
1646 if (str == NULL)
1647 return(-1);
1648
1649 cur = *str;
1650
1651 if ((cur[0] == '/') && (cur[1] == '/')) {
1652 cur += 2;
1653 ret = xmlParseURIAuthority(uri, &cur);
1654 if (ret != 0)
1655 return(ret);
1656 if (cur[0] == '/') {
1657 cur++;
1658 ret = xmlParseURIPathSegments(uri, &cur, 1);
1659 }
1660 } else if (cur[0] == '/') {
1661 cur++;
1662 ret = xmlParseURIPathSegments(uri, &cur, 1);
1663 } else {
1664 return(4);
1665 }
1666 if (ret != 0)
1667 return(ret);
1668 if (*cur == '?') {
1669 cur++;
1670 ret = xmlParseURIQuery(uri, &cur);
1671 if (ret != 0)
1672 return(ret);
1673 }
1674 *str = cur;
1675 return(0);
1676}
1677
1678/**
1679 * xmlParseAbsoluteURI:
1680 * @uri: pointer to an URI structure
1681 * @str: pointer to the string to analyze
1682 *
1683 * Parse an URI reference string and fills in the appropriate fields
1684 * of the @uri structure
1685 *
1686 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1687 *
1688 * Returns 0 or the error code
1689 */
1690static int
1691xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1692 int ret;
1693 const char *cur;
1694
1695 if (str == NULL)
1696 return(-1);
1697
1698 cur = *str;
1699
1700 ret = xmlParseURIScheme(uri, str);
1701 if (ret != 0) return(ret);
1702 if (**str != ':') {
1703 *str = cur;
1704 return(1);
1705 }
1706 (*str)++;
1707 if (**str == '/')
1708 return(xmlParseURIHierPart(uri, str));
1709 return(xmlParseURIOpaquePart(uri, str));
1710}
1711
1712/**
1713 * xmlParseRelativeURI:
1714 * @uri: pointer to an URI structure
1715 * @str: pointer to the string to analyze
1716 *
1717 * Parse an relative URI string and fills in the appropriate fields
1718 * of the @uri structure
1719 *
1720 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1721 * abs_path = "/" path_segments
1722 * net_path = "//" authority [ abs_path ]
1723 * rel_path = rel_segment [ abs_path ]
1724 *
1725 * Returns 0 or the error code
1726 */
1727static int
1728xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1729 int ret = 0;
1730 const char *cur;
1731
1732 if (str == NULL)
1733 return(-1);
1734
1735 cur = *str;
1736 if ((cur[0] == '/') && (cur[1] == '/')) {
1737 cur += 2;
1738 ret = xmlParseURIAuthority(uri, &cur);
1739 if (ret != 0)
1740 return(ret);
1741 if (cur[0] == '/') {
1742 cur++;
1743 ret = xmlParseURIPathSegments(uri, &cur, 1);
1744 }
1745 } else if (cur[0] == '/') {
1746 cur++;
1747 ret = xmlParseURIPathSegments(uri, &cur, 1);
1748 } else if (cur[0] != '#' && cur[0] != '?') {
1749 ret = xmlParseURIRelSegment(uri, &cur);
1750 if (ret != 0)
1751 return(ret);
1752 if (cur[0] == '/') {
1753 cur++;
1754 ret = xmlParseURIPathSegments(uri, &cur, 1);
1755 }
1756 }
1757 if (ret != 0)
1758 return(ret);
1759 if (*cur == '?') {
1760 cur++;
1761 ret = xmlParseURIQuery(uri, &cur);
1762 if (ret != 0)
1763 return(ret);
1764 }
1765 *str = cur;
1766 return(ret);
1767}
1768
1769/**
1770 * xmlParseURIReference:
1771 * @uri: pointer to an URI structure
1772 * @str: the string to analyze
1773 *
1774 * Parse an URI reference string and fills in the appropriate fields
1775 * of the @uri structure
1776 *
1777 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1778 *
1779 * Returns 0 or the error code
1780 */
1781int
1782xmlParseURIReference(xmlURIPtr uri, const char *str) {
1783 int ret;
1784 const char *tmp = str;
1785
1786 if (str == NULL)
1787 return(-1);
1788 xmlCleanURI(uri);
1789
1790 /*
1791 * Try first to parse absolute refs, then fallback to relative if
1792 * it fails.
1793 */
1794 ret = xmlParseAbsoluteURI(uri, &str);
1795 if (ret != 0) {
1796 xmlCleanURI(uri);
1797 str = tmp;
1798 ret = xmlParseRelativeURI(uri, &str);
1799 }
1800 if (ret != 0) {
1801 xmlCleanURI(uri);
1802 return(ret);
1803 }
1804
1805 if (*str == '#') {
1806 str++;
1807 ret = xmlParseURIFragment(uri, &str);
1808 if (ret != 0) return(ret);
1809 }
1810 if (*str != 0) {
1811 xmlCleanURI(uri);
1812 return(1);
1813 }
1814 return(0);
1815}
1816
1817/**
1818 * xmlParseURI:
1819 * @str: the URI string to analyze
1820 *
1821 * Parse an URI
1822 *
1823 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1824 *
1825 * Returns a newly built xmlURIPtr or NULL in case of error
1826 */
1827xmlURIPtr
1828xmlParseURI(const char *str) {
1829 xmlURIPtr uri;
1830 int ret;
1831
1832 if (str == NULL)
1833 return(NULL);
1834 uri = xmlCreateURI();
1835 if (uri != NULL) {
1836 ret = xmlParseURIReference(uri, str);
1837 if (ret) {
1838 xmlFreeURI(uri);
1839 return(NULL);
1840 }
1841 }
1842 return(uri);
1843}
1844
1845/**
1846 * xmlParseURIRaw:
1847 * @str: the URI string to analyze
1848 * @raw: if 1 unescaping of URI pieces are disabled
1849 *
1850 * Parse an URI but allows to keep intact the original fragments.
1851 *
1852 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1853 *
1854 * Returns a newly built xmlURIPtr or NULL in case of error
1855 */
1856xmlURIPtr
1857xmlParseURIRaw(const char *str, int raw) {
1858 xmlURIPtr uri;
1859 int ret;
1860
1861 if (str == NULL)
1862 return(NULL);
1863 uri = xmlCreateURI();
1864 if (uri != NULL) {
1865 if (raw) {
1866 uri->cleanup |= 2;
1867 }
1868 ret = xmlParseURIReference(uri, str);
1869 if (ret) {
1870 xmlFreeURI(uri);
1871 return(NULL);
1872 }
1873 }
1874 return(uri);
1875}
1876
1877/************************************************************************
1878 * *
1879 * Public functions *
1880 * *
1881 ************************************************************************/
1882
1883/**
1884 * xmlBuildURI:
1885 * @URI: the URI instance found in the document
1886 * @base: the base value
1887 *
1888 * Computes he final URI of the reference done by checking that
1889 * the given URI is valid, and building the final URI using the
1890 * base URI. This is processed according to section 5.2 of the
1891 * RFC 2396
1892 *
1893 * 5.2. Resolving Relative References to Absolute Form
1894 *
1895 * Returns a new URI string (to be freed by the caller) or NULL in case
1896 * of error.
1897 */
1898xmlChar *
1899xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1900 xmlChar *val = NULL;
1901 int ret, len, indx, cur, out;
1902 xmlURIPtr ref = NULL;
1903 xmlURIPtr bas = NULL;
1904 xmlURIPtr res = NULL;
1905
1906 /*
1907 * 1) The URI reference is parsed into the potential four components and
1908 * fragment identifier, as described in Section 4.3.
1909 *
1910 * NOTE that a completely empty URI is treated by modern browsers
1911 * as a reference to "." rather than as a synonym for the current
1912 * URI. Should we do that here?
1913 */
1914 if (URI == NULL)
1915 ret = -1;
1916 else {
1917 if (*URI) {
1918 ref = xmlCreateURI();
1919 if (ref == NULL)
1920 goto done;
1921 ret = xmlParseURIReference(ref, (const char *) URI);
1922 }
1923 else
1924 ret = 0;
1925 }
1926 if (ret != 0)
1927 goto done;
1928 if ((ref != NULL) && (ref->scheme != NULL)) {
1929 /*
1930 * The URI is absolute don't modify.
1931 */
1932 val = xmlStrdup(URI);
1933 goto done;
1934 }
1935 if (base == NULL)
1936 ret = -1;
1937 else {
1938 bas = xmlCreateURI();
1939 if (bas == NULL)
1940 goto done;
1941 ret = xmlParseURIReference(bas, (const char *) base);
1942 }
1943 if (ret != 0) {
1944 if (ref)
1945 val = xmlSaveUri(ref);
1946 goto done;
1947 }
1948 if (ref == NULL) {
1949 /*
1950 * the base fragment must be ignored
1951 */
1952 if (bas->fragment != NULL) {
1953 xmlFree(bas->fragment);
1954 bas->fragment = NULL;
1955 }
1956 val = xmlSaveUri(bas);
1957 goto done;
1958 }
1959
1960 /*
1961 * 2) If the path component is empty and the scheme, authority, and
1962 * query components are undefined, then it is a reference to the
1963 * current document and we are done. Otherwise, the reference URI's
1964 * query and fragment components are defined as found (or not found)
1965 * within the URI reference and not inherited from the base URI.
1966 *
1967 * NOTE that in modern browsers, the parsing differs from the above
1968 * in the following aspect: the query component is allowed to be
1969 * defined while still treating this as a reference to the current
1970 * document.
1971 */
1972 res = xmlCreateURI();
1973 if (res == NULL)
1974 goto done;
1975 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1976 ((ref->authority == NULL) && (ref->server == NULL))) {
1977 if (bas->scheme != NULL)
1978 res->scheme = xmlMemStrdup(bas->scheme);
1979 if (bas->authority != NULL)
1980 res->authority = xmlMemStrdup(bas->authority);
1981 else if (bas->server != NULL) {
1982 res->server = xmlMemStrdup(bas->server);
1983 if (bas->user != NULL)
1984 res->user = xmlMemStrdup(bas->user);
1985 res->port = bas->port;
1986 }
1987 if (bas->path != NULL)
1988 res->path = xmlMemStrdup(bas->path);
1989 if (ref->query_raw != NULL)
1990 res->query_raw = xmlMemStrdup (ref->query_raw);
1991 else if (ref->query != NULL)
1992 res->query = xmlMemStrdup(ref->query);
1993 else if (bas->query_raw != NULL)
1994 res->query_raw = xmlMemStrdup(bas->query_raw);
1995 else if (bas->query != NULL)
1996 res->query = xmlMemStrdup(bas->query);
1997 if (ref->fragment != NULL)
1998 res->fragment = xmlMemStrdup(ref->fragment);
1999 goto step_7;
2000 }
2001
2002 /*
2003 * 3) If the scheme component is defined, indicating that the reference
2004 * starts with a scheme name, then the reference is interpreted as an
2005 * absolute URI and we are done. Otherwise, the reference URI's
2006 * scheme is inherited from the base URI's scheme component.
2007 */
2008 if (ref->scheme != NULL) {
2009 val = xmlSaveUri(ref);
2010 goto done;
2011 }
2012 if (bas->scheme != NULL)
2013 res->scheme = xmlMemStrdup(bas->scheme);
2014
2015 if (ref->query_raw != NULL)
2016 res->query_raw = xmlMemStrdup(ref->query_raw);
2017 else if (ref->query != NULL)
2018 res->query = xmlMemStrdup(ref->query);
2019 if (ref->fragment != NULL)
2020 res->fragment = xmlMemStrdup(ref->fragment);
2021
2022 /*
2023 * 4) If the authority component is defined, then the reference is a
2024 * network-path and we skip to step 7. Otherwise, the reference
2025 * URI's authority is inherited from the base URI's authority
2026 * component, which will also be undefined if the URI scheme does not
2027 * use an authority component.
2028 */
2029 if ((ref->authority != NULL) || (ref->server != NULL)) {
2030 if (ref->authority != NULL)
2031 res->authority = xmlMemStrdup(ref->authority);
2032 else {
2033 res->server = xmlMemStrdup(ref->server);
2034 if (ref->user != NULL)
2035 res->user = xmlMemStrdup(ref->user);
2036 res->port = ref->port;
2037 }
2038 if (ref->path != NULL)
2039 res->path = xmlMemStrdup(ref->path);
2040 goto step_7;
2041 }
2042 if (bas->authority != NULL)
2043 res->authority = xmlMemStrdup(bas->authority);
2044 else if (bas->server != NULL) {
2045 res->server = xmlMemStrdup(bas->server);
2046 if (bas->user != NULL)
2047 res->user = xmlMemStrdup(bas->user);
2048 res->port = bas->port;
2049 }
2050
2051 /*
2052 * 5) If the path component begins with a slash character ("/"), then
2053 * the reference is an absolute-path and we skip to step 7.
2054 */
2055 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2056 res->path = xmlMemStrdup(ref->path);
2057 goto step_7;
2058 }
2059
2060
2061 /*
2062 * 6) If this step is reached, then we are resolving a relative-path
2063 * reference. The relative path needs to be merged with the base
2064 * URI's path. Although there are many ways to do this, we will
2065 * describe a simple method using a separate string buffer.
2066 *
2067 * Allocate a buffer large enough for the result string.
2068 */
2069 len = 2; /* extra / and 0 */
2070 if (ref->path != NULL)
2071 len += strlen(ref->path);
2072 if (bas->path != NULL)
2073 len += strlen(bas->path);
2074 res->path = (char *) xmlMallocAtomic(len);
2075 if (res->path == NULL) {
2076 xmlGenericError(xmlGenericErrorContext,
2077 "xmlBuildURI: out of memory\n");
2078 goto done;
2079 }
2080 res->path[0] = 0;
2081
2082 /*
2083 * a) All but the last segment of the base URI's path component is
2084 * copied to the buffer. In other words, any characters after the
2085 * last (right-most) slash character, if any, are excluded.
2086 */
2087 cur = 0;
2088 out = 0;
2089 if (bas->path != NULL) {
2090 while (bas->path[cur] != 0) {
2091 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2092 cur++;
2093 if (bas->path[cur] == 0)
2094 break;
2095
2096 cur++;
2097 while (out < cur) {
2098 res->path[out] = bas->path[out];
2099 out++;
2100 }
2101 }
2102 }
2103 res->path[out] = 0;
2104
2105 /*
2106 * b) The reference's path component is appended to the buffer
2107 * string.
2108 */
2109 if (ref->path != NULL && ref->path[0] != 0) {
2110 indx = 0;
2111 /*
2112 * Ensure the path includes a '/'
2113 */
2114 if ((out == 0) && (bas->server != NULL))
2115 res->path[out++] = '/';
2116 while (ref->path[indx] != 0) {
2117 res->path[out++] = ref->path[indx++];
2118 }
2119 }
2120 res->path[out] = 0;
2121
2122 /*
2123 * Steps c) to h) are really path normalization steps
2124 */
2125 xmlNormalizeURIPath(res->path);
2126
2127step_7:
2128
2129 /*
2130 * 7) The resulting URI components, including any inherited from the
2131 * base URI, are recombined to give the absolute form of the URI
2132 * reference.
2133 */
2134 val = xmlSaveUri(res);
2135
2136done:
2137 if (ref != NULL)
2138 xmlFreeURI(ref);
2139 if (bas != NULL)
2140 xmlFreeURI(bas);
2141 if (res != NULL)
2142 xmlFreeURI(res);
2143 return(val);
2144}
2145
2146/**
2147 * xmlBuildRelativeURI:
2148 * @URI: the URI reference under consideration
2149 * @base: the base value
2150 *
2151 * Expresses the URI of the reference in terms relative to the
2152 * base. Some examples of this operation include:
2153 * base = "http://site1.com/docs/book1.html"
2154 * URI input URI returned
2155 * docs/pic1.gif pic1.gif
2156 * docs/img/pic1.gif img/pic1.gif
2157 * img/pic1.gif ../img/pic1.gif
2158 * http://site1.com/docs/pic1.gif pic1.gif
2159 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2160 *
2161 * base = "docs/book1.html"
2162 * URI input URI returned
2163 * docs/pic1.gif pic1.gif
2164 * docs/img/pic1.gif img/pic1.gif
2165 * img/pic1.gif ../img/pic1.gif
2166 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2167 *
2168 *
2169 * Note: if the URI reference is really wierd or complicated, it may be
2170 * worthwhile to first convert it into a "nice" one by calling
2171 * xmlBuildURI (using 'base') before calling this routine,
2172 * since this routine (for reasonable efficiency) assumes URI has
2173 * already been through some validation.
2174 *
2175 * Returns a new URI string (to be freed by the caller) or NULL in case
2176 * error.
2177 */
2178xmlChar *
2179xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2180{
2181 xmlChar *val = NULL;
2182 int ret;
2183 int ix;
2184 int pos = 0;
2185 int nbslash = 0;
2186 int len;
2187 xmlURIPtr ref = NULL;
2188 xmlURIPtr bas = NULL;
2189 xmlChar *bptr, *uptr, *vptr;
2190 int remove_path = 0;
2191
2192 if ((URI == NULL) || (*URI == 0))
2193 return NULL;
2194
2195 /*
2196 * First parse URI into a standard form
2197 */
2198 ref = xmlCreateURI ();
2199 if (ref == NULL)
2200 return NULL;
2201 /* If URI not already in "relative" form */
2202 if (URI[0] != '.') {
2203 ret = xmlParseURIReference (ref, (const char *) URI);
2204 if (ret != 0)
2205 goto done; /* Error in URI, return NULL */
2206 } else
2207 ref->path = (char *)xmlStrdup(URI);
2208
2209 /*
2210 * Next parse base into the same standard form
2211 */
2212 if ((base == NULL) || (*base == 0)) {
2213 val = xmlStrdup (URI);
2214 goto done;
2215 }
2216 bas = xmlCreateURI ();
2217 if (bas == NULL)
2218 goto done;
2219 if (base[0] != '.') {
2220 ret = xmlParseURIReference (bas, (const char *) base);
2221 if (ret != 0)
2222 goto done; /* Error in base, return NULL */
2223 } else
2224 bas->path = (char *)xmlStrdup(base);
2225
2226 /*
2227 * If the scheme / server on the URI differs from the base,
2228 * just return the URI
2229 */
2230 if ((ref->scheme != NULL) &&
2231 ((bas->scheme == NULL) ||
2232 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2233 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2234 val = xmlStrdup (URI);
2235 goto done;
2236 }
2237 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2238 val = xmlStrdup(BAD_CAST "");
2239 goto done;
2240 }
2241 if (bas->path == NULL) {
2242 val = xmlStrdup((xmlChar *)ref->path);
2243 goto done;
2244 }
2245 if (ref->path == NULL) {
2246 ref->path = (char *) "/";
2247 remove_path = 1;
2248 }
2249
2250 /*
2251 * At this point (at last!) we can compare the two paths
2252 *
2253 * First we take care of the special case where either of the
2254 * two path components may be missing (bug 316224)
2255 */
2256 if (bas->path == NULL) {
2257 if (ref->path != NULL) {
2258 uptr = (xmlChar *) ref->path;
2259 if (*uptr == '/')
2260 uptr++;
2261 /* exception characters from xmlSaveUri */
2262 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2263 }
2264 goto done;
2265 }
2266 bptr = (xmlChar *)bas->path;
2267 if (ref->path == NULL) {
2268 for (ix = 0; bptr[ix] != 0; ix++) {
2269 if (bptr[ix] == '/')
2270 nbslash++;
2271 }
2272 uptr = NULL;
2273 len = 1; /* this is for a string terminator only */
2274 } else {
2275 /*
2276 * Next we compare the two strings and find where they first differ
2277 */
2278 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2279 pos += 2;
2280 if ((*bptr == '.') && (bptr[1] == '/'))
2281 bptr += 2;
2282 else if ((*bptr == '/') && (ref->path[pos] != '/'))
2283 bptr++;
2284 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2285 pos++;
2286
2287 if (bptr[pos] == ref->path[pos]) {
2288 val = xmlStrdup(BAD_CAST "");
2289 goto done; /* (I can't imagine why anyone would do this) */
2290 }
2291
2292 /*
2293 * In URI, "back up" to the last '/' encountered. This will be the
2294 * beginning of the "unique" suffix of URI
2295 */
2296 ix = pos;
2297 if ((ref->path[ix] == '/') && (ix > 0))
2298 ix--;
2299 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2300 ix -= 2;
2301 for (; ix > 0; ix--) {
2302 if (ref->path[ix] == '/')
2303 break;
2304 }
2305 if (ix == 0) {
2306 uptr = (xmlChar *)ref->path;
2307 } else {
2308 ix++;
2309 uptr = (xmlChar *)&ref->path[ix];
2310 }
2311
2312 /*
2313 * In base, count the number of '/' from the differing point
2314 */
2315 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2316 for (; bptr[ix] != 0; ix++) {
2317 if (bptr[ix] == '/')
2318 nbslash++;
2319 }
2320 }
2321 len = xmlStrlen (uptr) + 1;
2322 }
2323
2324 if (nbslash == 0) {
2325 if (uptr != NULL)
2326 /* exception characters from xmlSaveUri */
2327 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2328 goto done;
2329 }
2330
2331 /*
2332 * Allocate just enough space for the returned string -
2333 * length of the remainder of the URI, plus enough space
2334 * for the "../" groups, plus one for the terminator
2335 */
2336 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2337 if (val == NULL) {
2338 xmlGenericError(xmlGenericErrorContext,
2339 "xmlBuildRelativeURI: out of memory\n");
2340 goto done;
2341 }
2342 vptr = val;
2343 /*
2344 * Put in as many "../" as needed
2345 */
2346 for (; nbslash>0; nbslash--) {
2347 *vptr++ = '.';
2348 *vptr++ = '.';
2349 *vptr++ = '/';
2350 }
2351 /*
2352 * Finish up with the end of the URI
2353 */
2354 if (uptr != NULL) {
2355 if ((vptr > val) && (len > 0) &&
2356 (uptr[0] == '/') && (vptr[-1] == '/')) {
2357 memcpy (vptr, uptr + 1, len - 1);
2358 vptr[len - 2] = 0;
2359 } else {
2360 memcpy (vptr, uptr, len);
2361 vptr[len - 1] = 0;
2362 }
2363 } else {
2364 vptr[len - 1] = 0;
2365 }
2366
2367 /* escape the freshly-built path */
2368 vptr = val;
2369 /* exception characters from xmlSaveUri */
2370 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2371 xmlFree(vptr);
2372
2373done:
2374 /*
2375 * Free the working variables
2376 */
2377 if (remove_path != 0)
2378 ref->path = NULL;
2379 if (ref != NULL)
2380 xmlFreeURI (ref);
2381 if (bas != NULL)
2382 xmlFreeURI (bas);
2383
2384 return val;
2385}
2386
2387/**
2388 * xmlCanonicPath:
2389 * @path: the resource locator in a filesystem notation
2390 *
2391 * Constructs a canonic path from the specified path.
2392 *
2393 * Returns a new canonic path, or a duplicate of the path parameter if the
2394 * construction fails. The caller is responsible for freeing the memory occupied
2395 * by the returned string. If there is insufficient memory available, or the
2396 * argument is NULL, the function returns NULL.
2397 */
2398#define IS_WINDOWS_PATH(p) \
2399 ((p != NULL) && \
2400 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2401 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2402 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2403xmlChar *
2404xmlCanonicPath(const xmlChar *path)
2405{
2406/*
2407 * For Windows implementations, additional work needs to be done to
2408 * replace backslashes in pathnames with "forward slashes"
2409 */
2410#if defined(_WIN32) && !defined(__CYGWIN__)
2411 int len = 0;
2412 int i = 0;
2413 xmlChar *p = NULL;
2414#endif
2415 xmlURIPtr uri;
2416 xmlChar *ret;
2417 const xmlChar *absuri;
2418
2419 if (path == NULL)
2420 return(NULL);
2421 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2422 xmlFreeURI(uri);
2423 return xmlStrdup(path);
2424 }
2425
2426 /* Check if this is an "absolute uri" */
2427 absuri = xmlStrstr(path, BAD_CAST "://");
2428 if (absuri != NULL) {
2429 int l, j;
2430 unsigned char c;
2431 xmlChar *escURI;
2432
2433 /*
2434 * this looks like an URI where some parts have not been
2435 * escaped leading to a parsing problem. Check that the first
2436 * part matches a protocol.
2437 */
2438 l = absuri - path;
2439 /* Bypass if first part (part before the '://') is > 20 chars */
2440 if ((l <= 0) || (l > 20))
2441 goto path_processing;
2442 /* Bypass if any non-alpha characters are present in first part */
2443 for (j = 0;j < l;j++) {
2444 c = path[j];
2445 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2446 goto path_processing;
2447 }
2448
2449 /* Escape all except the characters specified in the supplied path */
2450 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2451 if (escURI != NULL) {
2452 /* Try parsing the escaped path */
2453 uri = xmlParseURI((const char *) escURI);
2454 /* If successful, return the escaped string */
2455 if (uri != NULL) {
2456 xmlFreeURI(uri);
2457 return escURI;
2458 }
2459 }
2460 }
2461
2462path_processing:
2463/* For Windows implementations, replace backslashes with 'forward slashes' */
2464#if defined(_WIN32) && !defined(__CYGWIN__)
2465 /*
2466 * Create a URI structure
2467 */
2468 uri = xmlCreateURI();
2469 if (uri == NULL) { /* Guard against 'out of memory' */
2470 return(NULL);
2471 }
2472
2473 len = xmlStrlen(path);
2474 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2475 /* make the scheme 'file' */
2476 uri->scheme = xmlStrdup(BAD_CAST "file");
2477 /* allocate space for leading '/' + path + string terminator */
2478 uri->path = xmlMallocAtomic(len + 2);
2479 if (uri->path == NULL) {
2480 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2481 return(NULL);
2482 }
2483 /* Put in leading '/' plus path */
2484 uri->path[0] = '/';
2485 p = uri->path + 1;
2486 strncpy(p, path, len + 1);
2487 } else {
2488 uri->path = xmlStrdup(path);
2489 if (uri->path == NULL) {
2490 xmlFreeURI(uri);
2491 return(NULL);
2492 }
2493 p = uri->path;
2494 }
2495 /* Now change all occurences of '\' to '/' */
2496 while (*p != '\0') {
2497 if (*p == '\\')
2498 *p = '/';
2499 p++;
2500 }
2501
2502 if (uri->scheme == NULL) {
2503 ret = xmlStrdup((const xmlChar *) uri->path);
2504 } else {
2505 ret = xmlSaveUri(uri);
2506 }
2507
2508 xmlFreeURI(uri);
2509#else
2510 ret = xmlStrdup((const xmlChar *) path);
2511#endif
2512 return(ret);
2513}
2514
2515/**
2516 * xmlPathToURI:
2517 * @path: the resource locator in a filesystem notation
2518 *
2519 * Constructs an URI expressing the existing path
2520 *
2521 * Returns a new URI, or a duplicate of the path parameter if the
2522 * construction fails. The caller is responsible for freeing the memory
2523 * occupied by the returned string. If there is insufficient memory available,
2524 * or the argument is NULL, the function returns NULL.
2525 */
2526xmlChar *
2527xmlPathToURI(const xmlChar *path)
2528{
2529 xmlURIPtr uri;
2530 xmlURI temp;
2531 xmlChar *ret, *cal;
2532
2533 if (path == NULL)
2534 return(NULL);
2535
2536 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2537 xmlFreeURI(uri);
2538 return xmlStrdup(path);
2539 }
2540 cal = xmlCanonicPath(path);
2541 if (cal == NULL)
2542 return(NULL);
2543#if defined(_WIN32) && !defined(__CYGWIN__)
2544 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2545 If 'cal' is a valid URI allready then we are done here, as continuing would make
2546 it invalid. */
2547 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2548 xmlFreeURI(uri);
2549 return cal;
2550 }
2551 /* 'cal' can contain a relative path with backslashes. If that is processed
2552 by xmlSaveURI, they will be escaped and the external entity loader machinery
2553 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2554 ret = cal;
2555 while (*ret != '\0') {
2556 if (*ret == '\\')
2557 *ret = '/';
2558 ret++;
2559 }
2560#endif
2561 memset(&temp, 0, sizeof(temp));
2562 temp.path = (char *) cal;
2563 ret = xmlSaveUri(&temp);
2564 xmlFree(cal);
2565 return(ret);
2566}
2567#define bottom_uri
2568#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use