VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/uri.c

Last change on this file was 104106, checked in by vboxsync, 8 weeks ago

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • Property svn:eol-style set to native
File size: 65.7 KB
Line 
1/**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11#define IN_LIBXML
12#include "libxml.h"
13
14#include <limits.h>
15#include <string.h>
16
17#include <libxml/xmlmemory.h>
18#include <libxml/uri.h>
19#include <libxml/xmlerror.h>
20
21#include "private/error.h"
22
23/**
24 * MAX_URI_LENGTH:
25 *
26 * The definition of the URI regexp in the above RFC has no size limit
27 * In practice they are usually relatively short except for the
28 * data URI scheme as defined in RFC 2397. Even for data URI the usual
29 * maximum size before hitting random practical limits is around 64 KB
30 * and 4KB is usually a maximum admitted limit for proper operations.
31 * The value below is more a security limit than anything else and
32 * really should never be hit by 'normal' operations
33 * Set to 1 MByte in 2012, this is only enforced on output
34 */
35#define MAX_URI_LENGTH 1024 * 1024
36
37#define PORT_EMPTY 0
38#define PORT_EMPTY_SERVER -1
39
40static void
41xmlURIErrMemory(const char *extra)
42{
43 if (extra)
44 __xmlRaiseError(NULL, NULL, NULL,
45 NULL, NULL, XML_FROM_URI,
46 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
47 extra, NULL, NULL, 0, 0,
48 "Memory allocation failed : %s\n", extra);
49 else
50 __xmlRaiseError(NULL, NULL, NULL,
51 NULL, NULL, XML_FROM_URI,
52 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
53 NULL, NULL, NULL, 0, 0,
54 "Memory allocation failed\n");
55}
56
57static void xmlCleanURI(xmlURIPtr uri);
58
59/*
60 * Old rule from 2396 used in legacy handling code
61 * alpha = lowalpha | upalpha
62 */
63#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
64
65
66/*
67 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
68 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
69 * "u" | "v" | "w" | "x" | "y" | "z"
70 */
71
72#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73
74/*
75 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
76 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
77 * "U" | "V" | "W" | "X" | "Y" | "Z"
78 */
79#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80
81#ifdef IS_DIGIT
82#undef IS_DIGIT
83#endif
84/*
85 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
86 */
87#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88
89/*
90 * alphanum = alpha | digit
91 */
92
93#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
94
95/*
96 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
97 */
98
99#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
100 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
101 ((x) == '(') || ((x) == ')'))
102
103/*
104 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
105 */
106
107#define IS_UNWISE(p) \
108 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
109 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
110 ((*(p) == ']')) || ((*(p) == '`')))
111/*
112 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
113 * "[" | "]"
114 */
115
116#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
117 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
118 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
119 ((x) == ']'))
120
121/*
122 * unreserved = alphanum | mark
123 */
124
125#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
126
127/*
128 * Skip to next pointer char, handle escaped sequences
129 */
130
131#define NEXT(p) ((*p == '%')? p += 3 : p++)
132
133/*
134 * Productions from the spec.
135 *
136 * authority = server | reg_name
137 * reg_name = 1*( unreserved | escaped | "$" | "," |
138 * ";" | ":" | "@" | "&" | "=" | "+" )
139 *
140 * path = [ abs_path | opaque_part ]
141 */
142
143#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
144
145/************************************************************************
146 * *
147 * RFC 3986 parser *
148 * *
149 ************************************************************************/
150
151#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
152#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
153 ((*(p) >= 'A') && (*(p) <= 'Z')))
154#define ISA_HEXDIG(p) \
155 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
156 ((*(p) >= 'A') && (*(p) <= 'F')))
157
158/*
159 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
160 * / "*" / "+" / "," / ";" / "="
161 */
162#define ISA_SUB_DELIM(p) \
163 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
164 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
165 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
166 ((*(p) == '=')) || ((*(p) == '\'')))
167
168/*
169 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
170 */
171#define ISA_GEN_DELIM(p) \
172 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
173 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
174 ((*(p) == '@')))
175
176/*
177 * reserved = gen-delims / sub-delims
178 */
179#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
180
181/*
182 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
183 */
184#define ISA_UNRESERVED(p) \
185 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
186 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
187
188/*
189 * pct-encoded = "%" HEXDIG HEXDIG
190 */
191#define ISA_PCT_ENCODED(p) \
192 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
193
194/*
195 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
196 */
197#define ISA_PCHAR(p) \
198 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
199 ((*(p) == ':')) || ((*(p) == '@')))
200
201/**
202 * xmlParse3986Scheme:
203 * @uri: pointer to an URI structure
204 * @str: pointer to the string to analyze
205 *
206 * Parse an URI scheme
207 *
208 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209 *
210 * Returns 0 or the error code
211 */
212static int
213xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
214 const char *cur;
215
216 if (str == NULL)
217 return(-1);
218
219 cur = *str;
220 if (!ISA_ALPHA(cur))
221 return(2);
222 cur++;
223 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
224 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
225 if (uri != NULL) {
226 if (uri->scheme != NULL) xmlFree(uri->scheme);
227 uri->scheme = STRNDUP(*str, cur - *str);
228 }
229 *str = cur;
230 return(0);
231}
232
233/**
234 * xmlParse3986Fragment:
235 * @uri: pointer to an URI structure
236 * @str: pointer to the string to analyze
237 *
238 * Parse the query part of an URI
239 *
240 * fragment = *( pchar / "/" / "?" )
241 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
242 * in the fragment identifier but this is used very broadly for
243 * xpointer scheme selection, so we are allowing it here to not break
244 * for example all the DocBook processing chains.
245 *
246 * Returns 0 or the error code
247 */
248static int
249xmlParse3986Fragment(xmlURIPtr uri, const char **str)
250{
251 const char *cur;
252
253 if (str == NULL)
254 return (-1);
255
256 cur = *str;
257
258 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
259 (*cur == '[') || (*cur == ']') ||
260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261 NEXT(cur);
262 if (uri != NULL) {
263 if (uri->fragment != NULL)
264 xmlFree(uri->fragment);
265 if (uri->cleanup & 2)
266 uri->fragment = STRNDUP(*str, cur - *str);
267 else
268 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
269 }
270 *str = cur;
271 return (0);
272}
273
274/**
275 * xmlParse3986Query:
276 * @uri: pointer to an URI structure
277 * @str: pointer to the string to analyze
278 *
279 * Parse the query part of an URI
280 *
281 * query = *uric
282 *
283 * Returns 0 or the error code
284 */
285static int
286xmlParse3986Query(xmlURIPtr uri, const char **str)
287{
288 const char *cur;
289
290 if (str == NULL)
291 return (-1);
292
293 cur = *str;
294
295 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
296 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
297 NEXT(cur);
298 if (uri != NULL) {
299 if (uri->query != NULL)
300 xmlFree(uri->query);
301 if (uri->cleanup & 2)
302 uri->query = STRNDUP(*str, cur - *str);
303 else
304 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
305
306 /* Save the raw bytes of the query as well.
307 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
308 */
309 if (uri->query_raw != NULL)
310 xmlFree (uri->query_raw);
311 uri->query_raw = STRNDUP (*str, cur - *str);
312 }
313 *str = cur;
314 return (0);
315}
316
317/**
318 * xmlParse3986Port:
319 * @uri: pointer to an URI structure
320 * @str: the string to analyze
321 *
322 * Parse a port part and fills in the appropriate fields
323 * of the @uri structure
324 *
325 * port = *DIGIT
326 *
327 * Returns 0 or the error code
328 */
329static int
330xmlParse3986Port(xmlURIPtr uri, const char **str)
331{
332 const char *cur = *str;
333 int port = 0;
334
335 if (ISA_DIGIT(cur)) {
336 while (ISA_DIGIT(cur)) {
337 int digit = *cur - '0';
338
339 if (port > INT_MAX / 10)
340 return(1);
341 port *= 10;
342 if (port > INT_MAX - digit)
343 return(1);
344 port += digit;
345
346 cur++;
347 }
348 if (uri != NULL)
349 uri->port = port;
350 *str = cur;
351 return(0);
352 }
353 return(1);
354}
355
356/**
357 * xmlParse3986Userinfo:
358 * @uri: pointer to an URI structure
359 * @str: the string to analyze
360 *
361 * Parse an user information part and fills in the appropriate fields
362 * of the @uri structure
363 *
364 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
365 *
366 * Returns 0 or the error code
367 */
368static int
369xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
370{
371 const char *cur;
372
373 cur = *str;
374 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
375 ISA_SUB_DELIM(cur) || (*cur == ':'))
376 NEXT(cur);
377 if (*cur == '@') {
378 if (uri != NULL) {
379 if (uri->user != NULL) xmlFree(uri->user);
380 if (uri->cleanup & 2)
381 uri->user = STRNDUP(*str, cur - *str);
382 else
383 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
384 }
385 *str = cur;
386 return(0);
387 }
388 return(1);
389}
390
391/**
392 * xmlParse3986DecOctet:
393 * @str: the string to analyze
394 *
395 * dec-octet = DIGIT ; 0-9
396 * / %x31-39 DIGIT ; 10-99
397 * / "1" 2DIGIT ; 100-199
398 * / "2" %x30-34 DIGIT ; 200-249
399 * / "25" %x30-35 ; 250-255
400 *
401 * Skip a dec-octet.
402 *
403 * Returns 0 if found and skipped, 1 otherwise
404 */
405static int
406xmlParse3986DecOctet(const char **str) {
407 const char *cur = *str;
408
409 if (!(ISA_DIGIT(cur)))
410 return(1);
411 if (!ISA_DIGIT(cur+1))
412 cur++;
413 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
414 cur += 2;
415 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
416 cur += 3;
417 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
418 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
419 cur += 3;
420 else if ((*cur == '2') && (*(cur + 1) == '5') &&
421 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
422 cur += 3;
423 else
424 return(1);
425 *str = cur;
426 return(0);
427}
428/**
429 * xmlParse3986Host:
430 * @uri: pointer to an URI structure
431 * @str: the string to analyze
432 *
433 * Parse an host part and fills in the appropriate fields
434 * of the @uri structure
435 *
436 * host = IP-literal / IPv4address / reg-name
437 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
438 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
439 * reg-name = *( unreserved / pct-encoded / sub-delims )
440 *
441 * Returns 0 or the error code
442 */
443static int
444xmlParse3986Host(xmlURIPtr uri, const char **str)
445{
446 const char *cur = *str;
447 const char *host;
448
449 host = cur;
450 /*
451 * IPv6 and future addressing scheme are enclosed between brackets
452 */
453 if (*cur == '[') {
454 cur++;
455 while ((*cur != ']') && (*cur != 0))
456 cur++;
457 if (*cur != ']')
458 return(1);
459 cur++;
460 goto found;
461 }
462 /*
463 * try to parse an IPv4
464 */
465 if (ISA_DIGIT(cur)) {
466 if (xmlParse3986DecOctet(&cur) != 0)
467 goto not_ipv4;
468 if (*cur != '.')
469 goto not_ipv4;
470 cur++;
471 if (xmlParse3986DecOctet(&cur) != 0)
472 goto not_ipv4;
473 if (*cur != '.')
474 goto not_ipv4;
475 if (xmlParse3986DecOctet(&cur) != 0)
476 goto not_ipv4;
477 if (*cur != '.')
478 goto not_ipv4;
479 if (xmlParse3986DecOctet(&cur) != 0)
480 goto not_ipv4;
481 goto found;
482not_ipv4:
483 cur = *str;
484 }
485 /*
486 * then this should be a hostname which can be empty
487 */
488 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
489 NEXT(cur);
490found:
491 if (uri != NULL) {
492 if (uri->authority != NULL) xmlFree(uri->authority);
493 uri->authority = NULL;
494 if (uri->server != NULL) xmlFree(uri->server);
495 if (cur != host) {
496 if (uri->cleanup & 2)
497 uri->server = STRNDUP(host, cur - host);
498 else
499 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
500 } else
501 uri->server = NULL;
502 }
503 *str = cur;
504 return(0);
505}
506
507/**
508 * xmlParse3986Authority:
509 * @uri: pointer to an URI structure
510 * @str: the string to analyze
511 *
512 * Parse an authority part and fills in the appropriate fields
513 * of the @uri structure
514 *
515 * authority = [ userinfo "@" ] host [ ":" port ]
516 *
517 * Returns 0 or the error code
518 */
519static int
520xmlParse3986Authority(xmlURIPtr uri, const char **str)
521{
522 const char *cur;
523 int ret;
524
525 cur = *str;
526 /*
527 * try to parse an userinfo and check for the trailing @
528 */
529 ret = xmlParse3986Userinfo(uri, &cur);
530 if ((ret != 0) || (*cur != '@'))
531 cur = *str;
532 else
533 cur++;
534 ret = xmlParse3986Host(uri, &cur);
535 if (ret != 0) return(ret);
536 if (*cur == ':') {
537 cur++;
538 ret = xmlParse3986Port(uri, &cur);
539 if (ret != 0) return(ret);
540 }
541 *str = cur;
542 return(0);
543}
544
545/**
546 * xmlParse3986Segment:
547 * @str: the string to analyze
548 * @forbid: an optional forbidden character
549 * @empty: allow an empty segment
550 *
551 * Parse a segment and fills in the appropriate fields
552 * of the @uri structure
553 *
554 * segment = *pchar
555 * segment-nz = 1*pchar
556 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
557 * ; non-zero-length segment without any colon ":"
558 *
559 * Returns 0 or the error code
560 */
561static int
562xmlParse3986Segment(const char **str, char forbid, int empty)
563{
564 const char *cur;
565
566 cur = *str;
567 if (!ISA_PCHAR(cur)) {
568 if (empty)
569 return(0);
570 return(1);
571 }
572 while (ISA_PCHAR(cur) && (*cur != forbid))
573 NEXT(cur);
574 *str = cur;
575 return (0);
576}
577
578/**
579 * xmlParse3986PathAbEmpty:
580 * @uri: pointer to an URI structure
581 * @str: the string to analyze
582 *
583 * Parse an path absolute or empty and fills in the appropriate fields
584 * of the @uri structure
585 *
586 * path-abempty = *( "/" segment )
587 *
588 * Returns 0 or the error code
589 */
590static int
591xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
592{
593 const char *cur;
594 int ret;
595
596 cur = *str;
597
598 while (*cur == '/') {
599 cur++;
600 ret = xmlParse3986Segment(&cur, 0, 1);
601 if (ret != 0) return(ret);
602 }
603 if (uri != NULL) {
604 if (uri->path != NULL) xmlFree(uri->path);
605 if (*str != cur) {
606 if (uri->cleanup & 2)
607 uri->path = STRNDUP(*str, cur - *str);
608 else
609 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
610 } else {
611 uri->path = NULL;
612 }
613 }
614 *str = cur;
615 return (0);
616}
617
618/**
619 * xmlParse3986PathAbsolute:
620 * @uri: pointer to an URI structure
621 * @str: the string to analyze
622 *
623 * Parse an path absolute and fills in the appropriate fields
624 * of the @uri structure
625 *
626 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
627 *
628 * Returns 0 or the error code
629 */
630static int
631xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
632{
633 const char *cur;
634 int ret;
635
636 cur = *str;
637
638 if (*cur != '/')
639 return(1);
640 cur++;
641 ret = xmlParse3986Segment(&cur, 0, 0);
642 if (ret == 0) {
643 while (*cur == '/') {
644 cur++;
645 ret = xmlParse3986Segment(&cur, 0, 1);
646 if (ret != 0) return(ret);
647 }
648 }
649 if (uri != NULL) {
650 if (uri->path != NULL) xmlFree(uri->path);
651 if (cur != *str) {
652 if (uri->cleanup & 2)
653 uri->path = STRNDUP(*str, cur - *str);
654 else
655 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656 } else {
657 uri->path = NULL;
658 }
659 }
660 *str = cur;
661 return (0);
662}
663
664/**
665 * xmlParse3986PathRootless:
666 * @uri: pointer to an URI structure
667 * @str: the string to analyze
668 *
669 * Parse an path without root and fills in the appropriate fields
670 * of the @uri structure
671 *
672 * path-rootless = segment-nz *( "/" segment )
673 *
674 * Returns 0 or the error code
675 */
676static int
677xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
678{
679 const char *cur;
680 int ret;
681
682 cur = *str;
683
684 ret = xmlParse3986Segment(&cur, 0, 0);
685 if (ret != 0) return(ret);
686 while (*cur == '/') {
687 cur++;
688 ret = xmlParse3986Segment(&cur, 0, 1);
689 if (ret != 0) return(ret);
690 }
691 if (uri != NULL) {
692 if (uri->path != NULL) xmlFree(uri->path);
693 if (cur != *str) {
694 if (uri->cleanup & 2)
695 uri->path = STRNDUP(*str, cur - *str);
696 else
697 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698 } else {
699 uri->path = NULL;
700 }
701 }
702 *str = cur;
703 return (0);
704}
705
706/**
707 * xmlParse3986PathNoScheme:
708 * @uri: pointer to an URI structure
709 * @str: the string to analyze
710 *
711 * Parse an path which is not a scheme and fills in the appropriate fields
712 * of the @uri structure
713 *
714 * path-noscheme = segment-nz-nc *( "/" segment )
715 *
716 * Returns 0 or the error code
717 */
718static int
719xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
720{
721 const char *cur;
722 int ret;
723
724 cur = *str;
725
726 ret = xmlParse3986Segment(&cur, ':', 0);
727 if (ret != 0) return(ret);
728 while (*cur == '/') {
729 cur++;
730 ret = xmlParse3986Segment(&cur, 0, 1);
731 if (ret != 0) return(ret);
732 }
733 if (uri != NULL) {
734 if (uri->path != NULL) xmlFree(uri->path);
735 if (cur != *str) {
736 if (uri->cleanup & 2)
737 uri->path = STRNDUP(*str, cur - *str);
738 else
739 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
740 } else {
741 uri->path = NULL;
742 }
743 }
744 *str = cur;
745 return (0);
746}
747
748/**
749 * xmlParse3986HierPart:
750 * @uri: pointer to an URI structure
751 * @str: the string to analyze
752 *
753 * Parse an hierarchical part and fills in the appropriate fields
754 * of the @uri structure
755 *
756 * hier-part = "//" authority path-abempty
757 * / path-absolute
758 * / path-rootless
759 * / path-empty
760 *
761 * Returns 0 or the error code
762 */
763static int
764xmlParse3986HierPart(xmlURIPtr uri, const char **str)
765{
766 const char *cur;
767 int ret;
768
769 cur = *str;
770
771 if ((*cur == '/') && (*(cur + 1) == '/')) {
772 cur += 2;
773 ret = xmlParse3986Authority(uri, &cur);
774 if (ret != 0) return(ret);
775 /*
776 * An empty server is marked with a special URI value.
777 */
778 if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
779 uri->port = PORT_EMPTY_SERVER;
780 ret = xmlParse3986PathAbEmpty(uri, &cur);
781 if (ret != 0) return(ret);
782 *str = cur;
783 return(0);
784 } else if (*cur == '/') {
785 ret = xmlParse3986PathAbsolute(uri, &cur);
786 if (ret != 0) return(ret);
787 } else if (ISA_PCHAR(cur)) {
788 ret = xmlParse3986PathRootless(uri, &cur);
789 if (ret != 0) return(ret);
790 } else {
791 /* path-empty is effectively empty */
792 if (uri != NULL) {
793 if (uri->path != NULL) xmlFree(uri->path);
794 uri->path = NULL;
795 }
796 }
797 *str = cur;
798 return (0);
799}
800
801/**
802 * xmlParse3986RelativeRef:
803 * @uri: pointer to an URI structure
804 * @str: the string to analyze
805 *
806 * Parse an URI string and fills in the appropriate fields
807 * of the @uri structure
808 *
809 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
810 * relative-part = "//" authority path-abempty
811 * / path-absolute
812 * / path-noscheme
813 * / path-empty
814 *
815 * Returns 0 or the error code
816 */
817static int
818xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
819 int ret;
820
821 if ((*str == '/') && (*(str + 1) == '/')) {
822 str += 2;
823 ret = xmlParse3986Authority(uri, &str);
824 if (ret != 0) return(ret);
825 ret = xmlParse3986PathAbEmpty(uri, &str);
826 if (ret != 0) return(ret);
827 } else if (*str == '/') {
828 ret = xmlParse3986PathAbsolute(uri, &str);
829 if (ret != 0) return(ret);
830 } else if (ISA_PCHAR(str)) {
831 ret = xmlParse3986PathNoScheme(uri, &str);
832 if (ret != 0) return(ret);
833 } else {
834 /* path-empty is effectively empty */
835 if (uri != NULL) {
836 if (uri->path != NULL) xmlFree(uri->path);
837 uri->path = NULL;
838 }
839 }
840
841 if (*str == '?') {
842 str++;
843 ret = xmlParse3986Query(uri, &str);
844 if (ret != 0) return(ret);
845 }
846 if (*str == '#') {
847 str++;
848 ret = xmlParse3986Fragment(uri, &str);
849 if (ret != 0) return(ret);
850 }
851 if (*str != 0) {
852 xmlCleanURI(uri);
853 return(1);
854 }
855 return(0);
856}
857
858
859/**
860 * xmlParse3986URI:
861 * @uri: pointer to an URI structure
862 * @str: the string to analyze
863 *
864 * Parse an URI string and fills in the appropriate fields
865 * of the @uri structure
866 *
867 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
868 *
869 * Returns 0 or the error code
870 */
871static int
872xmlParse3986URI(xmlURIPtr uri, const char *str) {
873 int ret;
874
875 ret = xmlParse3986Scheme(uri, &str);
876 if (ret != 0) return(ret);
877 if (*str != ':') {
878 return(1);
879 }
880 str++;
881 ret = xmlParse3986HierPart(uri, &str);
882 if (ret != 0) return(ret);
883 if (*str == '?') {
884 str++;
885 ret = xmlParse3986Query(uri, &str);
886 if (ret != 0) return(ret);
887 }
888 if (*str == '#') {
889 str++;
890 ret = xmlParse3986Fragment(uri, &str);
891 if (ret != 0) return(ret);
892 }
893 if (*str != 0) {
894 xmlCleanURI(uri);
895 return(1);
896 }
897 return(0);
898}
899
900/**
901 * xmlParse3986URIReference:
902 * @uri: pointer to an URI structure
903 * @str: the string to analyze
904 *
905 * Parse an URI reference string and fills in the appropriate fields
906 * of the @uri structure
907 *
908 * URI-reference = URI / relative-ref
909 *
910 * Returns 0 or the error code
911 */
912static int
913xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
914 int ret;
915
916 if (str == NULL)
917 return(-1);
918 xmlCleanURI(uri);
919
920 /*
921 * Try first to parse absolute refs, then fallback to relative if
922 * it fails.
923 */
924 ret = xmlParse3986URI(uri, str);
925 if (ret != 0) {
926 xmlCleanURI(uri);
927 ret = xmlParse3986RelativeRef(uri, str);
928 if (ret != 0) {
929 xmlCleanURI(uri);
930 return(ret);
931 }
932 }
933 return(0);
934}
935
936/**
937 * xmlParseURI:
938 * @str: the URI string to analyze
939 *
940 * Parse an URI based on RFC 3986
941 *
942 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
943 *
944 * Returns a newly built xmlURIPtr or NULL in case of error
945 */
946xmlURIPtr
947xmlParseURI(const char *str) {
948 xmlURIPtr uri;
949 int ret;
950
951 if (str == NULL)
952 return(NULL);
953 uri = xmlCreateURI();
954 if (uri != NULL) {
955 ret = xmlParse3986URIReference(uri, str);
956 if (ret) {
957 xmlFreeURI(uri);
958 return(NULL);
959 }
960 }
961 return(uri);
962}
963
964/**
965 * xmlParseURIReference:
966 * @uri: pointer to an URI structure
967 * @str: the string to analyze
968 *
969 * Parse an URI reference string based on RFC 3986 and fills in the
970 * appropriate fields of the @uri structure
971 *
972 * URI-reference = URI / relative-ref
973 *
974 * Returns 0 or the error code
975 */
976int
977xmlParseURIReference(xmlURIPtr uri, const char *str) {
978 return(xmlParse3986URIReference(uri, str));
979}
980
981/**
982 * xmlParseURIRaw:
983 * @str: the URI string to analyze
984 * @raw: if 1 unescaping of URI pieces are disabled
985 *
986 * Parse an URI but allows to keep intact the original fragments.
987 *
988 * URI-reference = URI / relative-ref
989 *
990 * Returns a newly built xmlURIPtr or NULL in case of error
991 */
992xmlURIPtr
993xmlParseURIRaw(const char *str, int raw) {
994 xmlURIPtr uri;
995 int ret;
996
997 if (str == NULL)
998 return(NULL);
999 uri = xmlCreateURI();
1000 if (uri != NULL) {
1001 if (raw) {
1002 uri->cleanup |= 2;
1003 }
1004 ret = xmlParseURIReference(uri, str);
1005 if (ret) {
1006 xmlFreeURI(uri);
1007 return(NULL);
1008 }
1009 }
1010 return(uri);
1011}
1012
1013/************************************************************************
1014 * *
1015 * Generic URI structure functions *
1016 * *
1017 ************************************************************************/
1018
1019/**
1020 * xmlCreateURI:
1021 *
1022 * Simply creates an empty xmlURI
1023 *
1024 * Returns the new structure or NULL in case of error
1025 */
1026xmlURIPtr
1027xmlCreateURI(void) {
1028 xmlURIPtr ret;
1029
1030 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1031 if (ret == NULL) {
1032 xmlURIErrMemory("creating URI structure\n");
1033 return(NULL);
1034 }
1035 memset(ret, 0, sizeof(xmlURI));
1036 ret->port = PORT_EMPTY;
1037 return(ret);
1038}
1039
1040/**
1041 * xmlSaveUriRealloc:
1042 *
1043 * Function to handle properly a reallocation when saving an URI
1044 * Also imposes some limit on the length of an URI string output
1045 */
1046static xmlChar *
1047xmlSaveUriRealloc(xmlChar *ret, int *max) {
1048 xmlChar *temp;
1049 int tmp;
1050
1051 if (*max > MAX_URI_LENGTH) {
1052 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1053 return(NULL);
1054 }
1055 tmp = *max * 2;
1056 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1057 if (temp == NULL) {
1058 xmlURIErrMemory("saving URI\n");
1059 return(NULL);
1060 }
1061 *max = tmp;
1062 return(temp);
1063}
1064
1065/**
1066 * xmlSaveUri:
1067 * @uri: pointer to an xmlURI
1068 *
1069 * Save the URI as an escaped string
1070 *
1071 * Returns a new string (to be deallocated by caller)
1072 */
1073xmlChar *
1074xmlSaveUri(xmlURIPtr uri) {
1075 xmlChar *ret = NULL;
1076 xmlChar *temp;
1077 const char *p;
1078 int len;
1079 int max;
1080
1081 if (uri == NULL) return(NULL);
1082
1083
1084 max = 80;
1085 ret = (xmlChar *) xmlMallocAtomic(max + 1);
1086 if (ret == NULL) {
1087 xmlURIErrMemory("saving URI\n");
1088 return(NULL);
1089 }
1090 len = 0;
1091
1092 if (uri->scheme != NULL) {
1093 p = uri->scheme;
1094 while (*p != 0) {
1095 if (len >= max) {
1096 temp = xmlSaveUriRealloc(ret, &max);
1097 if (temp == NULL) goto mem_error;
1098 ret = temp;
1099 }
1100 ret[len++] = *p++;
1101 }
1102 if (len >= max) {
1103 temp = xmlSaveUriRealloc(ret, &max);
1104 if (temp == NULL) goto mem_error;
1105 ret = temp;
1106 }
1107 ret[len++] = ':';
1108 }
1109 if (uri->opaque != NULL) {
1110 p = uri->opaque;
1111 while (*p != 0) {
1112 if (len + 3 >= max) {
1113 temp = xmlSaveUriRealloc(ret, &max);
1114 if (temp == NULL) goto mem_error;
1115 ret = temp;
1116 }
1117 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1118 ret[len++] = *p++;
1119 else {
1120 int val = *(unsigned char *)p++;
1121 int hi = val / 0x10, lo = val % 0x10;
1122 ret[len++] = '%';
1123 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1124 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1125 }
1126 }
1127 } else {
1128 if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
1129 if (len + 3 >= max) {
1130 temp = xmlSaveUriRealloc(ret, &max);
1131 if (temp == NULL) goto mem_error;
1132 ret = temp;
1133 }
1134 ret[len++] = '/';
1135 ret[len++] = '/';
1136 if (uri->user != NULL) {
1137 p = uri->user;
1138 while (*p != 0) {
1139 if (len + 3 >= max) {
1140 temp = xmlSaveUriRealloc(ret, &max);
1141 if (temp == NULL) goto mem_error;
1142 ret = temp;
1143 }
1144 if ((IS_UNRESERVED(*(p))) ||
1145 ((*(p) == ';')) || ((*(p) == ':')) ||
1146 ((*(p) == '&')) || ((*(p) == '=')) ||
1147 ((*(p) == '+')) || ((*(p) == '$')) ||
1148 ((*(p) == ',')))
1149 ret[len++] = *p++;
1150 else {
1151 int val = *(unsigned char *)p++;
1152 int hi = val / 0x10, lo = val % 0x10;
1153 ret[len++] = '%';
1154 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156 }
1157 }
1158 if (len + 3 >= max) {
1159 temp = xmlSaveUriRealloc(ret, &max);
1160 if (temp == NULL) goto mem_error;
1161 ret = temp;
1162 }
1163 ret[len++] = '@';
1164 }
1165 if (uri->server != NULL) {
1166 p = uri->server;
1167 while (*p != 0) {
1168 if (len >= max) {
1169 temp = xmlSaveUriRealloc(ret, &max);
1170 if (temp == NULL) goto mem_error;
1171 ret = temp;
1172 }
1173 /* TODO: escaping? */
1174 ret[len++] = (xmlChar) *p++;
1175 }
1176 }
1177 if (uri->port > 0) {
1178 if (len + 10 >= max) {
1179 temp = xmlSaveUriRealloc(ret, &max);
1180 if (temp == NULL) goto mem_error;
1181 ret = temp;
1182 }
1183 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1184 }
1185 } else if (uri->authority != NULL) {
1186 if (len + 3 >= max) {
1187 temp = xmlSaveUriRealloc(ret, &max);
1188 if (temp == NULL) goto mem_error;
1189 ret = temp;
1190 }
1191 ret[len++] = '/';
1192 ret[len++] = '/';
1193 p = uri->authority;
1194 while (*p != 0) {
1195 if (len + 3 >= max) {
1196 temp = xmlSaveUriRealloc(ret, &max);
1197 if (temp == NULL) goto mem_error;
1198 ret = temp;
1199 }
1200 if ((IS_UNRESERVED(*(p))) ||
1201 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1202 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1203 ((*(p) == '=')) || ((*(p) == '+')))
1204 ret[len++] = *p++;
1205 else {
1206 int val = *(unsigned char *)p++;
1207 int hi = val / 0x10, lo = val % 0x10;
1208 ret[len++] = '%';
1209 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1210 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1211 }
1212 }
1213 } else if (uri->scheme != NULL) {
1214 if (len + 3 >= max) {
1215 temp = xmlSaveUriRealloc(ret, &max);
1216 if (temp == NULL) goto mem_error;
1217 ret = temp;
1218 }
1219 }
1220 if (uri->path != NULL) {
1221 p = uri->path;
1222 /*
1223 * the colon in file:///d: should not be escaped or
1224 * Windows accesses fail later.
1225 */
1226 if ((uri->scheme != NULL) &&
1227 (p[0] == '/') &&
1228 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1229 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1230 (p[2] == ':') &&
1231 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1232 if (len + 3 >= max) {
1233 temp = xmlSaveUriRealloc(ret, &max);
1234 if (temp == NULL) goto mem_error;
1235 ret = temp;
1236 }
1237 ret[len++] = *p++;
1238 ret[len++] = *p++;
1239 ret[len++] = *p++;
1240 }
1241 while (*p != 0) {
1242 if (len + 3 >= max) {
1243 temp = xmlSaveUriRealloc(ret, &max);
1244 if (temp == NULL) goto mem_error;
1245 ret = temp;
1246 }
1247 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1248 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1249 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1250 ((*(p) == ',')))
1251 ret[len++] = *p++;
1252 else {
1253 int val = *(unsigned char *)p++;
1254 int hi = val / 0x10, lo = val % 0x10;
1255 ret[len++] = '%';
1256 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1257 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1258 }
1259 }
1260 }
1261 if (uri->query_raw != NULL) {
1262 if (len + 1 >= max) {
1263 temp = xmlSaveUriRealloc(ret, &max);
1264 if (temp == NULL) goto mem_error;
1265 ret = temp;
1266 }
1267 ret[len++] = '?';
1268 p = uri->query_raw;
1269 while (*p != 0) {
1270 if (len + 1 >= max) {
1271 temp = xmlSaveUriRealloc(ret, &max);
1272 if (temp == NULL) goto mem_error;
1273 ret = temp;
1274 }
1275 ret[len++] = *p++;
1276 }
1277 } else if (uri->query != NULL) {
1278 if (len + 3 >= max) {
1279 temp = xmlSaveUriRealloc(ret, &max);
1280 if (temp == NULL) goto mem_error;
1281 ret = temp;
1282 }
1283 ret[len++] = '?';
1284 p = uri->query;
1285 while (*p != 0) {
1286 if (len + 3 >= max) {
1287 temp = xmlSaveUriRealloc(ret, &max);
1288 if (temp == NULL) goto mem_error;
1289 ret = temp;
1290 }
1291 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1292 ret[len++] = *p++;
1293 else {
1294 int val = *(unsigned char *)p++;
1295 int hi = val / 0x10, lo = val % 0x10;
1296 ret[len++] = '%';
1297 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1298 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1299 }
1300 }
1301 }
1302 }
1303 if (uri->fragment != NULL) {
1304 if (len + 3 >= max) {
1305 temp = xmlSaveUriRealloc(ret, &max);
1306 if (temp == NULL) goto mem_error;
1307 ret = temp;
1308 }
1309 ret[len++] = '#';
1310 p = uri->fragment;
1311 while (*p != 0) {
1312 if (len + 3 >= max) {
1313 temp = xmlSaveUriRealloc(ret, &max);
1314 if (temp == NULL) goto mem_error;
1315 ret = temp;
1316 }
1317 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1318 ret[len++] = *p++;
1319 else {
1320 int val = *(unsigned char *)p++;
1321 int hi = val / 0x10, lo = val % 0x10;
1322 ret[len++] = '%';
1323 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1324 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1325 }
1326 }
1327 }
1328 if (len >= max) {
1329 temp = xmlSaveUriRealloc(ret, &max);
1330 if (temp == NULL) goto mem_error;
1331 ret = temp;
1332 }
1333 ret[len] = 0;
1334 return(ret);
1335
1336mem_error:
1337 xmlFree(ret);
1338 return(NULL);
1339}
1340
1341/**
1342 * xmlPrintURI:
1343 * @stream: a FILE* for the output
1344 * @uri: pointer to an xmlURI
1345 *
1346 * Prints the URI in the stream @stream.
1347 */
1348void
1349xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1350 xmlChar *out;
1351
1352 out = xmlSaveUri(uri);
1353 if (out != NULL) {
1354 fprintf(stream, "%s", (char *) out);
1355 xmlFree(out);
1356 }
1357}
1358
1359/**
1360 * xmlCleanURI:
1361 * @uri: pointer to an xmlURI
1362 *
1363 * Make sure the xmlURI struct is free of content
1364 */
1365static void
1366xmlCleanURI(xmlURIPtr uri) {
1367 if (uri == NULL) return;
1368
1369 if (uri->scheme != NULL) xmlFree(uri->scheme);
1370 uri->scheme = NULL;
1371 if (uri->server != NULL) xmlFree(uri->server);
1372 uri->server = NULL;
1373 if (uri->user != NULL) xmlFree(uri->user);
1374 uri->user = NULL;
1375 if (uri->path != NULL) xmlFree(uri->path);
1376 uri->path = NULL;
1377 if (uri->fragment != NULL) xmlFree(uri->fragment);
1378 uri->fragment = NULL;
1379 if (uri->opaque != NULL) xmlFree(uri->opaque);
1380 uri->opaque = NULL;
1381 if (uri->authority != NULL) xmlFree(uri->authority);
1382 uri->authority = NULL;
1383 if (uri->query != NULL) xmlFree(uri->query);
1384 uri->query = NULL;
1385 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1386 uri->query_raw = NULL;
1387}
1388
1389/**
1390 * xmlFreeURI:
1391 * @uri: pointer to an xmlURI
1392 *
1393 * Free up the xmlURI struct
1394 */
1395void
1396xmlFreeURI(xmlURIPtr uri) {
1397 if (uri == NULL) return;
1398
1399 if (uri->scheme != NULL) xmlFree(uri->scheme);
1400 if (uri->server != NULL) xmlFree(uri->server);
1401 if (uri->user != NULL) xmlFree(uri->user);
1402 if (uri->path != NULL) xmlFree(uri->path);
1403 if (uri->fragment != NULL) xmlFree(uri->fragment);
1404 if (uri->opaque != NULL) xmlFree(uri->opaque);
1405 if (uri->authority != NULL) xmlFree(uri->authority);
1406 if (uri->query != NULL) xmlFree(uri->query);
1407 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1408 xmlFree(uri);
1409}
1410
1411/************************************************************************
1412 * *
1413 * Helper functions *
1414 * *
1415 ************************************************************************/
1416
1417/**
1418 * xmlNormalizeURIPath:
1419 * @path: pointer to the path string
1420 *
1421 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1422 * Section 5.2, steps 6.c through 6.g.
1423 *
1424 * Normalization occurs directly on the string, no new allocation is done
1425 *
1426 * Returns 0 or an error code
1427 */
1428int
1429xmlNormalizeURIPath(char *path) {
1430 char *cur, *out;
1431
1432 if (path == NULL)
1433 return(-1);
1434
1435 /* Skip all initial "/" chars. We want to get to the beginning of the
1436 * first non-empty segment.
1437 */
1438 cur = path;
1439 while (cur[0] == '/')
1440 ++cur;
1441 if (cur[0] == '\0')
1442 return(0);
1443
1444 /* Keep everything we've seen so far. */
1445 out = cur;
1446
1447 /*
1448 * Analyze each segment in sequence for cases (c) and (d).
1449 */
1450 while (cur[0] != '\0') {
1451 /*
1452 * c) All occurrences of "./", where "." is a complete path segment,
1453 * are removed from the buffer string.
1454 */
1455 if ((cur[0] == '.') && (cur[1] == '/')) {
1456 cur += 2;
1457 /* '//' normalization should be done at this point too */
1458 while (cur[0] == '/')
1459 cur++;
1460 continue;
1461 }
1462
1463 /*
1464 * d) If the buffer string ends with "." as a complete path segment,
1465 * that "." is removed.
1466 */
1467 if ((cur[0] == '.') && (cur[1] == '\0'))
1468 break;
1469
1470 /* Otherwise keep the segment. */
1471 while (cur[0] != '/') {
1472 if (cur[0] == '\0')
1473 goto done_cd;
1474 (out++)[0] = (cur++)[0];
1475 }
1476 /* normalize // */
1477 while ((cur[0] == '/') && (cur[1] == '/'))
1478 cur++;
1479
1480 (out++)[0] = (cur++)[0];
1481 }
1482 done_cd:
1483 out[0] = '\0';
1484
1485 /* Reset to the beginning of the first segment for the next sequence. */
1486 cur = path;
1487 while (cur[0] == '/')
1488 ++cur;
1489 if (cur[0] == '\0')
1490 return(0);
1491
1492 /*
1493 * Analyze each segment in sequence for cases (e) and (f).
1494 *
1495 * e) All occurrences of "<segment>/../", where <segment> is a
1496 * complete path segment not equal to "..", are removed from the
1497 * buffer string. Removal of these path segments is performed
1498 * iteratively, removing the leftmost matching pattern on each
1499 * iteration, until no matching pattern remains.
1500 *
1501 * f) If the buffer string ends with "<segment>/..", where <segment>
1502 * is a complete path segment not equal to "..", that
1503 * "<segment>/.." is removed.
1504 *
1505 * To satisfy the "iterative" clause in (e), we need to collapse the
1506 * string every time we find something that needs to be removed. Thus,
1507 * we don't need to keep two pointers into the string: we only need a
1508 * "current position" pointer.
1509 */
1510 while (1) {
1511 char *segp, *tmp;
1512
1513 /* At the beginning of each iteration of this loop, "cur" points to
1514 * the first character of the segment we want to examine.
1515 */
1516
1517 /* Find the end of the current segment. */
1518 segp = cur;
1519 while ((segp[0] != '/') && (segp[0] != '\0'))
1520 ++segp;
1521
1522 /* If this is the last segment, we're done (we need at least two
1523 * segments to meet the criteria for the (e) and (f) cases).
1524 */
1525 if (segp[0] == '\0')
1526 break;
1527
1528 /* If the first segment is "..", or if the next segment _isn't_ "..",
1529 * keep this segment and try the next one.
1530 */
1531 ++segp;
1532 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1533 || ((segp[0] != '.') || (segp[1] != '.')
1534 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1535 cur = segp;
1536 continue;
1537 }
1538
1539 /* If we get here, remove this segment and the next one and back up
1540 * to the previous segment (if there is one), to implement the
1541 * "iteratively" clause. It's pretty much impossible to back up
1542 * while maintaining two pointers into the buffer, so just compact
1543 * the whole buffer now.
1544 */
1545
1546 /* If this is the end of the buffer, we're done. */
1547 if (segp[2] == '\0') {
1548 cur[0] = '\0';
1549 break;
1550 }
1551 /* Valgrind complained, strcpy(cur, segp + 3); */
1552 /* string will overlap, do not use strcpy */
1553 tmp = cur;
1554 segp += 3;
1555 while ((*tmp++ = *segp++) != 0)
1556 ;
1557
1558 /* If there are no previous segments, then keep going from here. */
1559 segp = cur;
1560 while ((segp > path) && ((--segp)[0] == '/'))
1561 ;
1562 if (segp == path)
1563 continue;
1564
1565 /* "segp" is pointing to the end of a previous segment; find it's
1566 * start. We need to back up to the previous segment and start
1567 * over with that to handle things like "foo/bar/../..". If we
1568 * don't do this, then on the first pass we'll remove the "bar/..",
1569 * but be pointing at the second ".." so we won't realize we can also
1570 * remove the "foo/..".
1571 */
1572 cur = segp;
1573 while ((cur > path) && (cur[-1] != '/'))
1574 --cur;
1575 }
1576 out[0] = '\0';
1577
1578 /*
1579 * g) If the resulting buffer string still begins with one or more
1580 * complete path segments of "..", then the reference is
1581 * considered to be in error. Implementations may handle this
1582 * error by retaining these components in the resolved path (i.e.,
1583 * treating them as part of the final URI), by removing them from
1584 * the resolved path (i.e., discarding relative levels above the
1585 * root), or by avoiding traversal of the reference.
1586 *
1587 * We discard them from the final path.
1588 */
1589 if (path[0] == '/') {
1590 cur = path;
1591 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1592 && ((cur[3] == '/') || (cur[3] == '\0')))
1593 cur += 3;
1594
1595 if (cur != path) {
1596 out = path;
1597 while (cur[0] != '\0')
1598 (out++)[0] = (cur++)[0];
1599 out[0] = 0;
1600 }
1601 }
1602
1603 return(0);
1604}
1605
1606static int is_hex(char c) {
1607 if (((c >= '0') && (c <= '9')) ||
1608 ((c >= 'a') && (c <= 'f')) ||
1609 ((c >= 'A') && (c <= 'F')))
1610 return(1);
1611 return(0);
1612}
1613
1614/**
1615 * xmlURIUnescapeString:
1616 * @str: the string to unescape
1617 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1618 * @target: optional destination buffer
1619 *
1620 * Unescaping routine, but does not check that the string is an URI. The
1621 * output is a direct unsigned char translation of %XX values (no encoding)
1622 * Note that the length of the result can only be smaller or same size as
1623 * the input string.
1624 *
1625 * Returns a copy of the string, but unescaped, will return NULL only in case
1626 * of error
1627 */
1628char *
1629xmlURIUnescapeString(const char *str, int len, char *target) {
1630 char *ret, *out;
1631 const char *in;
1632
1633 if (str == NULL)
1634 return(NULL);
1635 if (len <= 0) len = strlen(str);
1636 if (len < 0) return(NULL);
1637
1638 if (target == NULL) {
1639 ret = (char *) xmlMallocAtomic(len + 1);
1640 if (ret == NULL) {
1641 xmlURIErrMemory("unescaping URI value\n");
1642 return(NULL);
1643 }
1644 } else
1645 ret = target;
1646 in = str;
1647 out = ret;
1648 while(len > 0) {
1649 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1650 int c = 0;
1651 in++;
1652 if ((*in >= '0') && (*in <= '9'))
1653 c = (*in - '0');
1654 else if ((*in >= 'a') && (*in <= 'f'))
1655 c = (*in - 'a') + 10;
1656 else if ((*in >= 'A') && (*in <= 'F'))
1657 c = (*in - 'A') + 10;
1658 in++;
1659 if ((*in >= '0') && (*in <= '9'))
1660 c = c * 16 + (*in - '0');
1661 else if ((*in >= 'a') && (*in <= 'f'))
1662 c = c * 16 + (*in - 'a') + 10;
1663 else if ((*in >= 'A') && (*in <= 'F'))
1664 c = c * 16 + (*in - 'A') + 10;
1665 in++;
1666 len -= 3;
1667 /* Explicit sign change */
1668 *out++ = (char) c;
1669 } else {
1670 *out++ = *in++;
1671 len--;
1672 }
1673 }
1674 *out = 0;
1675 return(ret);
1676}
1677
1678/**
1679 * xmlURIEscapeStr:
1680 * @str: string to escape
1681 * @list: exception list string of chars not to escape
1682 *
1683 * This routine escapes a string to hex, ignoring reserved characters
1684 * (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1685 *
1686 * Returns a new escaped string or NULL in case of error.
1687 */
1688xmlChar *
1689xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1690 xmlChar *ret, ch;
1691 xmlChar *temp;
1692 const xmlChar *in;
1693 int len, out;
1694
1695 if (str == NULL)
1696 return(NULL);
1697 if (str[0] == 0)
1698 return(xmlStrdup(str));
1699 len = xmlStrlen(str);
1700 if (!(len > 0)) return(NULL);
1701
1702 len += 20;
1703 ret = (xmlChar *) xmlMallocAtomic(len);
1704 if (ret == NULL) {
1705 xmlURIErrMemory("escaping URI value\n");
1706 return(NULL);
1707 }
1708 in = (const xmlChar *) str;
1709 out = 0;
1710 while(*in != 0) {
1711 if (len - out <= 3) {
1712 temp = xmlSaveUriRealloc(ret, &len);
1713 if (temp == NULL) {
1714 xmlURIErrMemory("escaping URI value\n");
1715 xmlFree(ret);
1716 return(NULL);
1717 }
1718 ret = temp;
1719 }
1720
1721 ch = *in;
1722
1723 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1724 unsigned char val;
1725 ret[out++] = '%';
1726 val = ch >> 4;
1727 if (val <= 9)
1728 ret[out++] = '0' + val;
1729 else
1730 ret[out++] = 'A' + val - 0xA;
1731 val = ch & 0xF;
1732 if (val <= 9)
1733 ret[out++] = '0' + val;
1734 else
1735 ret[out++] = 'A' + val - 0xA;
1736 in++;
1737 } else {
1738 ret[out++] = *in++;
1739 }
1740
1741 }
1742 ret[out] = 0;
1743 return(ret);
1744}
1745
1746/**
1747 * xmlURIEscape:
1748 * @str: the string of the URI to escape
1749 *
1750 * Escaping routine, does not do validity checks !
1751 * It will try to escape the chars needing this, but this is heuristic
1752 * based it's impossible to be sure.
1753 *
1754 * Returns an copy of the string, but escaped
1755 *
1756 * 25 May 2001
1757 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1758 * according to RFC2396.
1759 * - Carl Douglas
1760 */
1761xmlChar *
1762xmlURIEscape(const xmlChar * str)
1763{
1764 xmlChar *ret, *segment = NULL;
1765 xmlURIPtr uri;
1766 int ret2;
1767
1768 if (str == NULL)
1769 return (NULL);
1770
1771 uri = xmlCreateURI();
1772 if (uri != NULL) {
1773 /*
1774 * Allow escaping errors in the unescaped form
1775 */
1776 uri->cleanup = 1;
1777 ret2 = xmlParseURIReference(uri, (const char *)str);
1778 if (ret2) {
1779 xmlFreeURI(uri);
1780 return (NULL);
1781 }
1782 }
1783
1784 if (!uri)
1785 return NULL;
1786
1787 ret = NULL;
1788
1789#define NULLCHK(p) if(!p) { \
1790 xmlURIErrMemory("escaping URI value\n"); \
1791 xmlFreeURI(uri); \
1792 xmlFree(ret); \
1793 return NULL; } \
1794
1795 if (uri->scheme) {
1796 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1797 NULLCHK(segment)
1798 ret = xmlStrcat(ret, segment);
1799 ret = xmlStrcat(ret, BAD_CAST ":");
1800 xmlFree(segment);
1801 }
1802
1803 if (uri->authority) {
1804 segment =
1805 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1806 NULLCHK(segment)
1807 ret = xmlStrcat(ret, BAD_CAST "//");
1808 ret = xmlStrcat(ret, segment);
1809 xmlFree(segment);
1810 }
1811
1812 if (uri->user) {
1813 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1814 NULLCHK(segment)
1815 ret = xmlStrcat(ret,BAD_CAST "//");
1816 ret = xmlStrcat(ret, segment);
1817 ret = xmlStrcat(ret, BAD_CAST "@");
1818 xmlFree(segment);
1819 }
1820
1821 if (uri->server) {
1822 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1823 NULLCHK(segment)
1824 if (uri->user == NULL)
1825 ret = xmlStrcat(ret, BAD_CAST "//");
1826 ret = xmlStrcat(ret, segment);
1827 xmlFree(segment);
1828 }
1829
1830 if (uri->port > 0) {
1831 xmlChar port[11];
1832
1833 snprintf((char *) port, 11, "%d", uri->port);
1834 ret = xmlStrcat(ret, BAD_CAST ":");
1835 ret = xmlStrcat(ret, port);
1836 }
1837
1838 if (uri->path) {
1839 segment =
1840 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1841 NULLCHK(segment)
1842 ret = xmlStrcat(ret, segment);
1843 xmlFree(segment);
1844 }
1845
1846 if (uri->query_raw) {
1847 ret = xmlStrcat(ret, BAD_CAST "?");
1848 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1849 }
1850 else if (uri->query) {
1851 segment =
1852 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1853 NULLCHK(segment)
1854 ret = xmlStrcat(ret, BAD_CAST "?");
1855 ret = xmlStrcat(ret, segment);
1856 xmlFree(segment);
1857 }
1858
1859 if (uri->opaque) {
1860 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1861 NULLCHK(segment)
1862 ret = xmlStrcat(ret, segment);
1863 xmlFree(segment);
1864 }
1865
1866 if (uri->fragment) {
1867 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1868 NULLCHK(segment)
1869 ret = xmlStrcat(ret, BAD_CAST "#");
1870 ret = xmlStrcat(ret, segment);
1871 xmlFree(segment);
1872 }
1873
1874 xmlFreeURI(uri);
1875#undef NULLCHK
1876
1877 return (ret);
1878}
1879
1880/************************************************************************
1881 * *
1882 * Public functions *
1883 * *
1884 ************************************************************************/
1885
1886/**
1887 * xmlBuildURI:
1888 * @URI: the URI instance found in the document
1889 * @base: the base value
1890 *
1891 * Computes he final URI of the reference done by checking that
1892 * the given URI is valid, and building the final URI using the
1893 * base URI. This is processed according to section 5.2 of the
1894 * RFC 2396
1895 *
1896 * 5.2. Resolving Relative References to Absolute Form
1897 *
1898 * Returns a new URI string (to be freed by the caller) or NULL in case
1899 * of error.
1900 */
1901xmlChar *
1902xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1903 xmlChar *val = NULL;
1904 int ret, len, indx, cur, out;
1905 xmlURIPtr ref = NULL;
1906 xmlURIPtr bas = NULL;
1907 xmlURIPtr res = NULL;
1908
1909 /*
1910 * 1) The URI reference is parsed into the potential four components and
1911 * fragment identifier, as described in Section 4.3.
1912 *
1913 * NOTE that a completely empty URI is treated by modern browsers
1914 * as a reference to "." rather than as a synonym for the current
1915 * URI. Should we do that here?
1916 */
1917 if (URI == NULL)
1918 ret = -1;
1919 else {
1920 if (*URI) {
1921 ref = xmlCreateURI();
1922 if (ref == NULL)
1923 goto done;
1924 ret = xmlParseURIReference(ref, (const char *) URI);
1925 }
1926 else
1927 ret = 0;
1928 }
1929 if (ret != 0)
1930 goto done;
1931 if ((ref != NULL) && (ref->scheme != NULL)) {
1932 /*
1933 * The URI is absolute don't modify.
1934 */
1935 val = xmlStrdup(URI);
1936 goto done;
1937 }
1938 if (base == NULL)
1939 ret = -1;
1940 else {
1941 bas = xmlCreateURI();
1942 if (bas == NULL)
1943 goto done;
1944 ret = xmlParseURIReference(bas, (const char *) base);
1945 }
1946 if (ret != 0) {
1947 if (ref)
1948 val = xmlSaveUri(ref);
1949 goto done;
1950 }
1951 if (ref == NULL) {
1952 /*
1953 * the base fragment must be ignored
1954 */
1955 if (bas->fragment != NULL) {
1956 xmlFree(bas->fragment);
1957 bas->fragment = NULL;
1958 }
1959 val = xmlSaveUri(bas);
1960 goto done;
1961 }
1962
1963 /*
1964 * 2) If the path component is empty and the scheme, authority, and
1965 * query components are undefined, then it is a reference to the
1966 * current document and we are done. Otherwise, the reference URI's
1967 * query and fragment components are defined as found (or not found)
1968 * within the URI reference and not inherited from the base URI.
1969 *
1970 * NOTE that in modern browsers, the parsing differs from the above
1971 * in the following aspect: the query component is allowed to be
1972 * defined while still treating this as a reference to the current
1973 * document.
1974 */
1975 res = xmlCreateURI();
1976 if (res == NULL)
1977 goto done;
1978 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1979 ((ref->authority == NULL) && (ref->server == NULL) &&
1980 (ref->port == PORT_EMPTY))) {
1981 if (bas->scheme != NULL)
1982 res->scheme = xmlMemStrdup(bas->scheme);
1983 if (bas->authority != NULL)
1984 res->authority = xmlMemStrdup(bas->authority);
1985 else {
1986 if (bas->server != NULL)
1987 res->server = xmlMemStrdup(bas->server);
1988 if (bas->user != NULL)
1989 res->user = xmlMemStrdup(bas->user);
1990 res->port = bas->port;
1991 }
1992 if (bas->path != NULL)
1993 res->path = xmlMemStrdup(bas->path);
1994 if (ref->query_raw != NULL)
1995 res->query_raw = xmlMemStrdup (ref->query_raw);
1996 else if (ref->query != NULL)
1997 res->query = xmlMemStrdup(ref->query);
1998 else if (bas->query_raw != NULL)
1999 res->query_raw = xmlMemStrdup(bas->query_raw);
2000 else if (bas->query != NULL)
2001 res->query = xmlMemStrdup(bas->query);
2002 if (ref->fragment != NULL)
2003 res->fragment = xmlMemStrdup(ref->fragment);
2004 goto step_7;
2005 }
2006
2007 /*
2008 * 3) If the scheme component is defined, indicating that the reference
2009 * starts with a scheme name, then the reference is interpreted as an
2010 * absolute URI and we are done. Otherwise, the reference URI's
2011 * scheme is inherited from the base URI's scheme component.
2012 */
2013 if (ref->scheme != NULL) {
2014 val = xmlSaveUri(ref);
2015 goto done;
2016 }
2017 if (bas->scheme != NULL)
2018 res->scheme = xmlMemStrdup(bas->scheme);
2019
2020 if (ref->query_raw != NULL)
2021 res->query_raw = xmlMemStrdup(ref->query_raw);
2022 else if (ref->query != NULL)
2023 res->query = xmlMemStrdup(ref->query);
2024 if (ref->fragment != NULL)
2025 res->fragment = xmlMemStrdup(ref->fragment);
2026
2027 /*
2028 * 4) If the authority component is defined, then the reference is a
2029 * network-path and we skip to step 7. Otherwise, the reference
2030 * URI's authority is inherited from the base URI's authority
2031 * component, which will also be undefined if the URI scheme does not
2032 * use an authority component.
2033 */
2034 if ((ref->authority != NULL) || (ref->server != NULL) ||
2035 (ref->port != PORT_EMPTY)) {
2036 if (ref->authority != NULL)
2037 res->authority = xmlMemStrdup(ref->authority);
2038 else {
2039 if (ref->server != NULL)
2040 res->server = xmlMemStrdup(ref->server);
2041 if (ref->user != NULL)
2042 res->user = xmlMemStrdup(ref->user);
2043 res->port = ref->port;
2044 }
2045 if (ref->path != NULL)
2046 res->path = xmlMemStrdup(ref->path);
2047 goto step_7;
2048 }
2049 if (bas->authority != NULL)
2050 res->authority = xmlMemStrdup(bas->authority);
2051 else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
2052 if (bas->server != NULL)
2053 res->server = xmlMemStrdup(bas->server);
2054 if (bas->user != NULL)
2055 res->user = xmlMemStrdup(bas->user);
2056 res->port = bas->port;
2057 }
2058
2059 /*
2060 * 5) If the path component begins with a slash character ("/"), then
2061 * the reference is an absolute-path and we skip to step 7.
2062 */
2063 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2064 res->path = xmlMemStrdup(ref->path);
2065 goto step_7;
2066 }
2067
2068
2069 /*
2070 * 6) If this step is reached, then we are resolving a relative-path
2071 * reference. The relative path needs to be merged with the base
2072 * URI's path. Although there are many ways to do this, we will
2073 * describe a simple method using a separate string buffer.
2074 *
2075 * Allocate a buffer large enough for the result string.
2076 */
2077 len = 2; /* extra / and 0 */
2078 if (ref->path != NULL)
2079 len += strlen(ref->path);
2080 if (bas->path != NULL)
2081 len += strlen(bas->path);
2082 res->path = (char *) xmlMallocAtomic(len);
2083 if (res->path == NULL) {
2084 xmlURIErrMemory("resolving URI against base\n");
2085 goto done;
2086 }
2087 res->path[0] = 0;
2088
2089 /*
2090 * a) All but the last segment of the base URI's path component is
2091 * copied to the buffer. In other words, any characters after the
2092 * last (right-most) slash character, if any, are excluded.
2093 */
2094 cur = 0;
2095 out = 0;
2096 if (bas->path != NULL) {
2097 while (bas->path[cur] != 0) {
2098 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2099 cur++;
2100 if (bas->path[cur] == 0)
2101 break;
2102
2103 cur++;
2104 while (out < cur) {
2105 res->path[out] = bas->path[out];
2106 out++;
2107 }
2108 }
2109 }
2110 res->path[out] = 0;
2111
2112 /*
2113 * b) The reference's path component is appended to the buffer
2114 * string.
2115 */
2116 if (ref->path != NULL && ref->path[0] != 0) {
2117 indx = 0;
2118 /*
2119 * Ensure the path includes a '/'
2120 */
2121 if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2122 res->path[out++] = '/';
2123 while (ref->path[indx] != 0) {
2124 res->path[out++] = ref->path[indx++];
2125 }
2126 }
2127 res->path[out] = 0;
2128
2129 /*
2130 * Steps c) to h) are really path normalization steps
2131 */
2132 xmlNormalizeURIPath(res->path);
2133
2134step_7:
2135
2136 /*
2137 * 7) The resulting URI components, including any inherited from the
2138 * base URI, are recombined to give the absolute form of the URI
2139 * reference.
2140 */
2141 val = xmlSaveUri(res);
2142
2143done:
2144 if (ref != NULL)
2145 xmlFreeURI(ref);
2146 if (bas != NULL)
2147 xmlFreeURI(bas);
2148 if (res != NULL)
2149 xmlFreeURI(res);
2150 return(val);
2151}
2152
2153/**
2154 * xmlBuildRelativeURI:
2155 * @URI: the URI reference under consideration
2156 * @base: the base value
2157 *
2158 * Expresses the URI of the reference in terms relative to the
2159 * base. Some examples of this operation include:
2160 * base = "http://site1.com/docs/book1.html"
2161 * URI input URI returned
2162 * docs/pic1.gif pic1.gif
2163 * docs/img/pic1.gif img/pic1.gif
2164 * img/pic1.gif ../img/pic1.gif
2165 * http://site1.com/docs/pic1.gif pic1.gif
2166 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2167 *
2168 * base = "docs/book1.html"
2169 * URI input URI returned
2170 * docs/pic1.gif pic1.gif
2171 * docs/img/pic1.gif img/pic1.gif
2172 * img/pic1.gif ../img/pic1.gif
2173 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2174 *
2175 *
2176 * Note: if the URI reference is really weird or complicated, it may be
2177 * worthwhile to first convert it into a "nice" one by calling
2178 * xmlBuildURI (using 'base') before calling this routine,
2179 * since this routine (for reasonable efficiency) assumes URI has
2180 * already been through some validation.
2181 *
2182 * Returns a new URI string (to be freed by the caller) or NULL in case
2183 * error.
2184 */
2185xmlChar *
2186xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2187{
2188 xmlChar *val = NULL;
2189 int ret;
2190 int ix;
2191 int nbslash = 0;
2192 int len;
2193 xmlURIPtr ref = NULL;
2194 xmlURIPtr bas = NULL;
2195 xmlChar *bptr, *uptr, *vptr;
2196 int remove_path = 0;
2197
2198 if ((URI == NULL) || (*URI == 0))
2199 return NULL;
2200
2201 /*
2202 * First parse URI into a standard form
2203 */
2204 ref = xmlCreateURI ();
2205 if (ref == NULL)
2206 return NULL;
2207 /* If URI not already in "relative" form */
2208 if (URI[0] != '.') {
2209 ret = xmlParseURIReference (ref, (const char *) URI);
2210 if (ret != 0)
2211 goto done; /* Error in URI, return NULL */
2212 } else
2213 ref->path = (char *)xmlStrdup(URI);
2214
2215 /*
2216 * Next parse base into the same standard form
2217 */
2218 if ((base == NULL) || (*base == 0)) {
2219 val = xmlStrdup (URI);
2220 goto done;
2221 }
2222 bas = xmlCreateURI ();
2223 if (bas == NULL)
2224 goto done;
2225 if (base[0] != '.') {
2226 ret = xmlParseURIReference (bas, (const char *) base);
2227 if (ret != 0)
2228 goto done; /* Error in base, return NULL */
2229 } else
2230 bas->path = (char *)xmlStrdup(base);
2231
2232 /*
2233 * If the scheme / server on the URI differs from the base,
2234 * just return the URI
2235 */
2236 if ((ref->scheme != NULL) &&
2237 ((bas->scheme == NULL) ||
2238 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2239 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
2240 (bas->port != ref->port))) {
2241 val = xmlStrdup (URI);
2242 goto done;
2243 }
2244 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2245 val = xmlStrdup(BAD_CAST "");
2246 goto done;
2247 }
2248 if (bas->path == NULL) {
2249 val = xmlStrdup((xmlChar *)ref->path);
2250 goto done;
2251 }
2252 if (ref->path == NULL) {
2253 ref->path = (char *) "/";
2254 remove_path = 1;
2255 }
2256
2257 /*
2258 * At this point (at last!) we can compare the two paths
2259 *
2260 * First we take care of the special case where either of the
2261 * two path components may be missing (bug 316224)
2262 */
2263 bptr = (xmlChar *)bas->path;
2264 {
2265 xmlChar *rptr = (xmlChar *) ref->path;
2266 int pos = 0;
2267
2268 /*
2269 * Next we compare the two strings and find where they first differ
2270 */
2271 if ((*rptr == '.') && (rptr[1] == '/'))
2272 rptr += 2;
2273 if ((*bptr == '.') && (bptr[1] == '/'))
2274 bptr += 2;
2275 else if ((*bptr == '/') && (*rptr != '/'))
2276 bptr++;
2277 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2278 pos++;
2279
2280 if (bptr[pos] == rptr[pos]) {
2281 val = xmlStrdup(BAD_CAST "");
2282 goto done; /* (I can't imagine why anyone would do this) */
2283 }
2284
2285 /*
2286 * In URI, "back up" to the last '/' encountered. This will be the
2287 * beginning of the "unique" suffix of URI
2288 */
2289 ix = pos;
2290 for (; ix > 0; ix--) {
2291 if (rptr[ix - 1] == '/')
2292 break;
2293 }
2294 uptr = (xmlChar *)&rptr[ix];
2295
2296 /*
2297 * In base, count the number of '/' from the differing point
2298 */
2299 for (; bptr[ix] != 0; ix++) {
2300 if (bptr[ix] == '/')
2301 nbslash++;
2302 }
2303
2304 /*
2305 * e.g: URI="foo/" base="foo/bar" -> "./"
2306 */
2307 if (nbslash == 0 && !uptr[0]) {
2308 val = xmlStrdup(BAD_CAST "./");
2309 goto done;
2310 }
2311
2312 len = xmlStrlen (uptr) + 1;
2313 }
2314
2315 if (nbslash == 0) {
2316 if (uptr != NULL)
2317 /* exception characters from xmlSaveUri */
2318 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2319 goto done;
2320 }
2321
2322 /*
2323 * Allocate just enough space for the returned string -
2324 * length of the remainder of the URI, plus enough space
2325 * for the "../" groups, plus one for the terminator
2326 */
2327 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2328 if (val == NULL) {
2329 xmlURIErrMemory("building relative URI\n");
2330 goto done;
2331 }
2332 vptr = val;
2333 /*
2334 * Put in as many "../" as needed
2335 */
2336 for (; nbslash>0; nbslash--) {
2337 *vptr++ = '.';
2338 *vptr++ = '.';
2339 *vptr++ = '/';
2340 }
2341 /*
2342 * Finish up with the end of the URI
2343 */
2344 if (uptr != NULL) {
2345 if ((vptr > val) && (len > 0) &&
2346 (uptr[0] == '/') && (vptr[-1] == '/')) {
2347 memcpy (vptr, uptr + 1, len - 1);
2348 vptr[len - 2] = 0;
2349 } else {
2350 memcpy (vptr, uptr, len);
2351 vptr[len - 1] = 0;
2352 }
2353 } else {
2354 vptr[len - 1] = 0;
2355 }
2356
2357 /* escape the freshly-built path */
2358 vptr = val;
2359 /* exception characters from xmlSaveUri */
2360 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2361 xmlFree(vptr);
2362
2363done:
2364 /*
2365 * Free the working variables
2366 */
2367 if (remove_path != 0)
2368 ref->path = NULL;
2369 if (ref != NULL)
2370 xmlFreeURI (ref);
2371 if (bas != NULL)
2372 xmlFreeURI (bas);
2373
2374 return val;
2375}
2376
2377/**
2378 * xmlCanonicPath:
2379 * @path: the resource locator in a filesystem notation
2380 *
2381 * Constructs a canonic path from the specified path.
2382 *
2383 * Returns a new canonic path, or a duplicate of the path parameter if the
2384 * construction fails. The caller is responsible for freeing the memory occupied
2385 * by the returned string. If there is insufficient memory available, or the
2386 * argument is NULL, the function returns NULL.
2387 */
2388#define IS_WINDOWS_PATH(p) \
2389 ((p != NULL) && \
2390 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2391 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2392 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2393xmlChar *
2394xmlCanonicPath(const xmlChar *path)
2395{
2396/*
2397 * For Windows implementations, additional work needs to be done to
2398 * replace backslashes in pathnames with "forward slashes"
2399 */
2400#if defined(_WIN32)
2401 int len = 0;
2402 char *p = NULL;
2403#endif
2404 xmlURIPtr uri;
2405 xmlChar *ret;
2406 const xmlChar *absuri;
2407
2408 if (path == NULL)
2409 return(NULL);
2410
2411#if defined(_WIN32)
2412 /*
2413 * We must not change the backslashes to slashes if the the path
2414 * starts with \\?\
2415 * Those paths can be up to 32k characters long.
2416 * Was added specifically for OpenOffice, those paths can't be converted
2417 * to URIs anyway.
2418 */
2419 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2420 (path[3] == '\\') )
2421 return xmlStrdup((const xmlChar *) path);
2422#endif
2423
2424 /* sanitize filename starting with // so it can be used as URI */
2425 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2426 path++;
2427
2428 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2429 xmlFreeURI(uri);
2430 return xmlStrdup(path);
2431 }
2432
2433 /* Check if this is an "absolute uri" */
2434 absuri = xmlStrstr(path, BAD_CAST "://");
2435 if (absuri != NULL) {
2436 int l, j;
2437 unsigned char c;
2438 xmlChar *escURI;
2439
2440 /*
2441 * this looks like an URI where some parts have not been
2442 * escaped leading to a parsing problem. Check that the first
2443 * part matches a protocol.
2444 */
2445 l = absuri - path;
2446 /* Bypass if first part (part before the '://') is > 20 chars */
2447 if ((l <= 0) || (l > 20))
2448 goto path_processing;
2449 /* Bypass if any non-alpha characters are present in first part */
2450 for (j = 0;j < l;j++) {
2451 c = path[j];
2452 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2453 goto path_processing;
2454 }
2455
2456 /* Escape all except the characters specified in the supplied path */
2457 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2458 if (escURI != NULL) {
2459 /* Try parsing the escaped path */
2460 uri = xmlParseURI((const char *) escURI);
2461 /* If successful, return the escaped string */
2462 if (uri != NULL) {
2463 xmlFreeURI(uri);
2464 return escURI;
2465 }
2466 xmlFree(escURI);
2467 }
2468 }
2469
2470path_processing:
2471/* For Windows implementations, replace backslashes with 'forward slashes' */
2472#if defined(_WIN32)
2473 /*
2474 * Create a URI structure
2475 */
2476 uri = xmlCreateURI();
2477 if (uri == NULL) { /* Guard against 'out of memory' */
2478 return(NULL);
2479 }
2480
2481 len = xmlStrlen(path);
2482 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2483 /* make the scheme 'file' */
2484 uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2485 /* allocate space for leading '/' + path + string terminator */
2486 uri->path = xmlMallocAtomic(len + 2);
2487 if (uri->path == NULL) {
2488 xmlFreeURI(uri); /* Guard against 'out of memory' */
2489 return(NULL);
2490 }
2491 /* Put in leading '/' plus path */
2492 uri->path[0] = '/';
2493 p = uri->path + 1;
2494 strncpy(p, (char *) path, len + 1);
2495 } else {
2496 uri->path = (char *) xmlStrdup(path);
2497 if (uri->path == NULL) {
2498 xmlFreeURI(uri);
2499 return(NULL);
2500 }
2501 p = uri->path;
2502 }
2503 /* Now change all occurrences of '\' to '/' */
2504 while (*p != '\0') {
2505 if (*p == '\\')
2506 *p = '/';
2507 p++;
2508 }
2509
2510 if (uri->scheme == NULL) {
2511 ret = xmlStrdup((const xmlChar *) uri->path);
2512 } else {
2513 ret = xmlSaveUri(uri);
2514 }
2515
2516 xmlFreeURI(uri);
2517#else
2518 ret = xmlStrdup((const xmlChar *) path);
2519#endif
2520 return(ret);
2521}
2522
2523/**
2524 * xmlPathToURI:
2525 * @path: the resource locator in a filesystem notation
2526 *
2527 * Constructs an URI expressing the existing path
2528 *
2529 * Returns a new URI, or a duplicate of the path parameter if the
2530 * construction fails. The caller is responsible for freeing the memory
2531 * occupied by the returned string. If there is insufficient memory available,
2532 * or the argument is NULL, the function returns NULL.
2533 */
2534xmlChar *
2535xmlPathToURI(const xmlChar *path)
2536{
2537 xmlURIPtr uri;
2538 xmlURI temp;
2539 xmlChar *ret, *cal;
2540
2541 if (path == NULL)
2542 return(NULL);
2543
2544 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2545 xmlFreeURI(uri);
2546 return xmlStrdup(path);
2547 }
2548 cal = xmlCanonicPath(path);
2549 if (cal == NULL)
2550 return(NULL);
2551#if defined(_WIN32)
2552 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2553 If 'cal' is a valid URI already then we are done here, as continuing would make
2554 it invalid. */
2555 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2556 xmlFreeURI(uri);
2557 return cal;
2558 }
2559 /* 'cal' can contain a relative path with backslashes. If that is processed
2560 by xmlSaveURI, they will be escaped and the external entity loader machinery
2561 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2562 ret = cal;
2563 while (*ret != '\0') {
2564 if (*ret == '\\')
2565 *ret = '/';
2566 ret++;
2567 }
2568#endif
2569 memset(&temp, 0, sizeof(temp));
2570 temp.path = (char *) cal;
2571 ret = xmlSaveUri(&temp);
2572 xmlFree(cal);
2573 return(ret);
2574}
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use