Context Navigation

uri.c

Last change on this file was 104106, checked in by vboxsync, 8 weeks ago
libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640
Property svn:eol-style set to `native`
File size: 65.7 KB

Line
1	/**
2	* uri.c: set of generic URI related routines
3	*
4	* Reference: RFCs 3986, 2732 and 2373
5	*
6	* See Copyright for the status of this software.
7	*
8	* daniel@veillard.com
9	*/
10
11	#define IN_LIBXML
12	#include "libxml.h"
13
14	#include <limits.h>
15	#include <string.h>
16
17	#include <libxml/xmlmemory.h>
18	#include <libxml/uri.h>
19	#include <libxml/xmlerror.h>
20
21	#include "private/error.h"
22
23	/**
24	* MAX_URI_LENGTH:
25	*
26	* The definition of the URI regexp in the above RFC has no size limit
27	* In practice they are usually relatively short except for the
28	* data URI scheme as defined in RFC 2397. Even for data URI the usual
29	* maximum size before hitting random practical limits is around 64 KB
30	* and 4KB is usually a maximum admitted limit for proper operations.
31	* The value below is more a security limit than anything else and
32	* really should never be hit by 'normal' operations
33	* Set to 1 MByte in 2012, this is only enforced on output
34	*/
35	#define MAX_URI_LENGTH 1024 * 1024
36
37	#define PORT_EMPTY 0
38	#define PORT_EMPTY_SERVER -1
39
40	static void
41	xmlURIErrMemory(const char *extra)
42	{
43	if (extra)
44	__xmlRaiseError(NULL, NULL, NULL,
45	NULL, NULL, XML_FROM_URI,
46	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
47	extra, NULL, NULL, 0, 0,
48	"Memory allocation failed : %s\n", extra);
49	else
50	__xmlRaiseError(NULL, NULL, NULL,
51	NULL, NULL, XML_FROM_URI,
52	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
53	NULL, NULL, NULL, 0, 0,
54	"Memory allocation failed\n");
55	}
56
57	static void xmlCleanURI(xmlURIPtr uri);
58
59	/*
60	* Old rule from 2396 used in legacy handling code
61	* alpha = lowalpha \| upalpha
62	*/
63	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
64
65
66	/*
67	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
68	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
69	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
70	*/
71
72	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73
74	/*
75	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
76	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
77	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
78	*/
79	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80
81	#ifdef IS_DIGIT
82	#undef IS_DIGIT
83	#endif
84	/*
85	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
86	*/
87	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88
89	/*
90	* alphanum = alpha \| digit
91	*/
92
93	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
94
95	/*
96	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
97	*/
98
99	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
100	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
101	((x) == '(') \|\| ((x) == ')'))
102
103	/*
104	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
105	*/
106
107	#define IS_UNWISE(p) \
108	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
109	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
110	(((p) == ']')) \|\| (((p) == '`')))
111	/*
112	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
113	* "[" \| "]"
114	*/
115
116	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
117	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
118	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
119	((x) == ']'))
120
121	/*
122	* unreserved = alphanum \| mark
123	*/
124
125	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
126
127	/*
128	* Skip to next pointer char, handle escaped sequences
129	*/
130
131	#define NEXT(p) ((*p == '%')? p += 3 : p++)
132
133	/*
134	* Productions from the spec.
135	*
136	* authority = server \| reg_name
137	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
138	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
139	*
140	* path = [ abs_path \| opaque_part ]
141	*/
142
143	#define STRNDUP(s, n) (char ) xmlStrndup((const xmlChar )(s), (n))
144
145	/************************************************************************
146	* *
147	* RFC 3986 parser *
148	* *
149	************************************************************************/
150
151	#define ISA_DIGIT(p) (((p) >= '0') && ((p) <= '9'))
152	#define ISA_ALPHA(p) ((((p) >= 'a') && ((p) <= 'z')) \|\| \
153	(((p) >= 'A') && ((p) <= 'Z')))
154	#define ISA_HEXDIG(p) \
155	(ISA_DIGIT(p) \|\| (((p) >= 'a') && ((p) <= 'f')) \|\| \
156	(((p) >= 'A') && ((p) <= 'F')))
157
158	/*
159	* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
160	* / "*" / "+" / "," / ";" / "="
161	*/
162	#define ISA_SUB_DELIM(p) \
163	((((p) == '!')) \|\| (((p) == '$')) \|\| ((*(p) == '&')) \|\| \
164	(((p) == '(')) \|\| (((p) == ')')) \|\| (((p) == '')) \|\| \
165	(((p) == '+')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
166	(((p) == '=')) \|\| (((p) == '\'')))
167
168	/*
169	* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
170	*/
171	#define ISA_GEN_DELIM(p) \
172	((((p) == ':')) \|\| (((p) == '/')) \|\| ((*(p) == '?')) \|\| \
173	(((p) == '#')) \|\| (((p) == '[')) \|\| ((*(p) == ']')) \|\| \
174	((*(p) == '@')))
175
176	/*
177	* reserved = gen-delims / sub-delims
178	*/
179	#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) \|\| (ISA_SUB_DELIM(p)))
180
181	/*
182	* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
183	*/
184	#define ISA_UNRESERVED(p) \
185	((ISA_ALPHA(p)) \|\| (ISA_DIGIT(p)) \|\| ((*(p) == '-')) \|\| \
186	(((p) == '.')) \|\| (((p) == '_')) \|\| ((*(p) == '~')))
187
188	/*
189	* pct-encoded = "%" HEXDIG HEXDIG
190	*/
191	#define ISA_PCT_ENCODED(p) \
192	((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
193
194	/*
195	* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
196	*/
197	#define ISA_PCHAR(p) \
198	(ISA_UNRESERVED(p) \|\| ISA_PCT_ENCODED(p) \|\| ISA_SUB_DELIM(p) \|\| \
199	(((p) == ':')) \|\| (((p) == '@')))
200
201	/**
202	* xmlParse3986Scheme:
203	* @uri: pointer to an URI structure
204	* @str: pointer to the string to analyze
205	*
206	* Parse an URI scheme
207	*
208	* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
209	*
210	* Returns 0 or the error code
211	*/
212	static int
213	xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
214	const char *cur;
215
216	if (str == NULL)
217	return(-1);
218
219	cur = *str;
220	if (!ISA_ALPHA(cur))
221	return(2);
222	cur++;
223	while (ISA_ALPHA(cur) \|\| ISA_DIGIT(cur) \|\|
224	(cur == '+') \|\| (cur == '-') \|\| (*cur == '.')) cur++;
225	if (uri != NULL) {
226	if (uri->scheme != NULL) xmlFree(uri->scheme);
227	uri->scheme = STRNDUP(str, cur - str);
228	}
229	*str = cur;
230	return(0);
231	}
232
233	/**
234	* xmlParse3986Fragment:
235	* @uri: pointer to an URI structure
236	* @str: pointer to the string to analyze
237	*
238	* Parse the query part of an URI
239	*
240	* fragment = *( pchar / "/" / "?" )
241	* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
242	* in the fragment identifier but this is used very broadly for
243	* xpointer scheme selection, so we are allowing it here to not break
244	* for example all the DocBook processing chains.
245	*
246	* Returns 0 or the error code
247	*/
248	static int
249	xmlParse3986Fragment(xmlURIPtr uri, const char **str)
250	{
251	const char *cur;
252
253	if (str == NULL)
254	return (-1);
255
256	cur = *str;
257
258	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
259	(cur == '[') \|\| (cur == ']') \|\|
260	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261	NEXT(cur);
262	if (uri != NULL) {
263	if (uri->fragment != NULL)
264	xmlFree(uri->fragment);
265	if (uri->cleanup & 2)
266	uri->fragment = STRNDUP(str, cur - str);
267	else
268	uri->fragment = xmlURIUnescapeString(str, cur - str, NULL);
269	}
270	*str = cur;
271	return (0);
272	}
273
274	/**
275	* xmlParse3986Query:
276	* @uri: pointer to an URI structure
277	* @str: pointer to the string to analyze
278	*
279	* Parse the query part of an URI
280	*
281	* query = *uric
282	*
283	* Returns 0 or the error code
284	*/
285	static int
286	xmlParse3986Query(xmlURIPtr uri, const char **str)
287	{
288	const char *cur;
289
290	if (str == NULL)
291	return (-1);
292
293	cur = *str;
294
295	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
296	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
297	NEXT(cur);
298	if (uri != NULL) {
299	if (uri->query != NULL)
300	xmlFree(uri->query);
301	if (uri->cleanup & 2)
302	uri->query = STRNDUP(str, cur - str);
303	else
304	uri->query = xmlURIUnescapeString(str, cur - str, NULL);
305
306	/* Save the raw bytes of the query as well.
307	* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
308	*/
309	if (uri->query_raw != NULL)
310	xmlFree (uri->query_raw);
311	uri->query_raw = STRNDUP (str, cur - str);
312	}
313	*str = cur;
314	return (0);
315	}
316
317	/**
318	* xmlParse3986Port:
319	* @uri: pointer to an URI structure
320	* @str: the string to analyze
321	*
322	* Parse a port part and fills in the appropriate fields
323	* of the @uri structure
324	*
325	* port = *DIGIT
326	*
327	* Returns 0 or the error code
328	*/
329	static int
330	xmlParse3986Port(xmlURIPtr uri, const char **str)
331	{
332	const char cur = str;
333	int port = 0;
334
335	if (ISA_DIGIT(cur)) {
336	while (ISA_DIGIT(cur)) {
337	int digit = *cur - '0';
338
339	if (port > INT_MAX / 10)
340	return(1);
341	port *= 10;
342	if (port > INT_MAX - digit)
343	return(1);
344	port += digit;
345
346	cur++;
347	}
348	if (uri != NULL)
349	uri->port = port;
350	*str = cur;
351	return(0);
352	}
353	return(1);
354	}
355
356	/**
357	* xmlParse3986Userinfo:
358	* @uri: pointer to an URI structure
359	* @str: the string to analyze
360	*
361	* Parse an user information part and fills in the appropriate fields
362	* of the @uri structure
363	*
364	* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
365	*
366	* Returns 0 or the error code
367	*/
368	static int
369	xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
370	{
371	const char *cur;
372
373	cur = *str;
374	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\|
375	ISA_SUB_DELIM(cur) \|\| (*cur == ':'))
376	NEXT(cur);
377	if (*cur == '@') {
378	if (uri != NULL) {
379	if (uri->user != NULL) xmlFree(uri->user);
380	if (uri->cleanup & 2)
381	uri->user = STRNDUP(str, cur - str);
382	else
383	uri->user = xmlURIUnescapeString(str, cur - str, NULL);
384	}
385	*str = cur;
386	return(0);
387	}
388	return(1);
389	}
390
391	/**
392	* xmlParse3986DecOctet:
393	* @str: the string to analyze
394	*
395	* dec-octet = DIGIT ; 0-9
396	* / %x31-39 DIGIT ; 10-99
397	* / "1" 2DIGIT ; 100-199
398	* / "2" %x30-34 DIGIT ; 200-249
399	* / "25" %x30-35 ; 250-255
400	*
401	* Skip a dec-octet.
402	*
403	* Returns 0 if found and skipped, 1 otherwise
404	*/
405	static int
406	xmlParse3986DecOctet(const char **str) {
407	const char cur = str;
408
409	if (!(ISA_DIGIT(cur)))
410	return(1);
411	if (!ISA_DIGIT(cur+1))
412	cur++;
413	else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
414	cur += 2;
415	else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
416	cur += 3;
417	else if ((cur == '2') && ((cur + 1) >= '0') &&
418	(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
419	cur += 3;
420	else if ((cur == '2') && ((cur + 1) == '5') &&
421	((cur + 2) >= '0') && ((cur + 1) <= '5'))
422	cur += 3;
423	else
424	return(1);
425	*str = cur;
426	return(0);
427	}
428	/**
429	* xmlParse3986Host:
430	* @uri: pointer to an URI structure
431	* @str: the string to analyze
432	*
433	* Parse an host part and fills in the appropriate fields
434	* of the @uri structure
435	*
436	* host = IP-literal / IPv4address / reg-name
437	* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
438	* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
439	* reg-name = *( unreserved / pct-encoded / sub-delims )
440	*
441	* Returns 0 or the error code
442	*/
443	static int
444	xmlParse3986Host(xmlURIPtr uri, const char **str)
445	{
446	const char cur = str;
447	const char *host;
448
449	host = cur;
450	/*
451	* IPv6 and future addressing scheme are enclosed between brackets
452	*/
453	if (*cur == '[') {
454	cur++;
455	while ((cur != ']') && (cur != 0))
456	cur++;
457	if (*cur != ']')
458	return(1);
459	cur++;
460	goto found;
461	}
462	/*
463	* try to parse an IPv4
464	*/
465	if (ISA_DIGIT(cur)) {
466	if (xmlParse3986DecOctet(&cur) != 0)
467	goto not_ipv4;
468	if (*cur != '.')
469	goto not_ipv4;
470	cur++;
471	if (xmlParse3986DecOctet(&cur) != 0)
472	goto not_ipv4;
473	if (*cur != '.')
474	goto not_ipv4;
475	if (xmlParse3986DecOctet(&cur) != 0)
476	goto not_ipv4;
477	if (*cur != '.')
478	goto not_ipv4;
479	if (xmlParse3986DecOctet(&cur) != 0)
480	goto not_ipv4;
481	goto found;
482	not_ipv4:
483	cur = *str;
484	}
485	/*
486	* then this should be a hostname which can be empty
487	*/
488	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\| ISA_SUB_DELIM(cur))
489	NEXT(cur);
490	found:
491	if (uri != NULL) {
492	if (uri->authority != NULL) xmlFree(uri->authority);
493	uri->authority = NULL;
494	if (uri->server != NULL) xmlFree(uri->server);
495	if (cur != host) {
496	if (uri->cleanup & 2)
497	uri->server = STRNDUP(host, cur - host);
498	else
499	uri->server = xmlURIUnescapeString(host, cur - host, NULL);
500	} else
501	uri->server = NULL;
502	}
503	*str = cur;
504	return(0);
505	}
506
507	/**
508	* xmlParse3986Authority:
509	* @uri: pointer to an URI structure
510	* @str: the string to analyze
511	*
512	* Parse an authority part and fills in the appropriate fields
513	* of the @uri structure
514	*
515	* authority = [ userinfo "@" ] host [ ":" port ]
516	*
517	* Returns 0 or the error code
518	*/
519	static int
520	xmlParse3986Authority(xmlURIPtr uri, const char **str)
521	{
522	const char *cur;
523	int ret;
524
525	cur = *str;
526	/*
527	* try to parse an userinfo and check for the trailing @
528	*/
529	ret = xmlParse3986Userinfo(uri, &cur);
530	if ((ret != 0) \|\| (*cur != '@'))
531	cur = *str;
532	else
533	cur++;
534	ret = xmlParse3986Host(uri, &cur);
535	if (ret != 0) return(ret);
536	if (*cur == ':') {
537	cur++;
538	ret = xmlParse3986Port(uri, &cur);
539	if (ret != 0) return(ret);
540	}
541	*str = cur;
542	return(0);
543	}
544
545	/**
546	* xmlParse3986Segment:
547	* @str: the string to analyze
548	* @forbid: an optional forbidden character
549	* @empty: allow an empty segment
550	*
551	* Parse a segment and fills in the appropriate fields
552	* of the @uri structure
553	*
554	* segment = *pchar
555	* segment-nz = 1*pchar
556	* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
557	* ; non-zero-length segment without any colon ":"
558	*
559	* Returns 0 or the error code
560	*/
561	static int
562	xmlParse3986Segment(const char **str, char forbid, int empty)
563	{
564	const char *cur;
565
566	cur = *str;
567	if (!ISA_PCHAR(cur)) {
568	if (empty)
569	return(0);
570	return(1);
571	}
572	while (ISA_PCHAR(cur) && (*cur != forbid))
573	NEXT(cur);
574	*str = cur;
575	return (0);
576	}
577
578	/**
579	* xmlParse3986PathAbEmpty:
580	* @uri: pointer to an URI structure
581	* @str: the string to analyze
582	*
583	* Parse an path absolute or empty and fills in the appropriate fields
584	* of the @uri structure
585	*
586	* path-abempty = *( "/" segment )
587	*
588	* Returns 0 or the error code
589	*/
590	static int
591	xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
592	{
593	const char *cur;
594	int ret;
595
596	cur = *str;
597
598	while (*cur == '/') {
599	cur++;
600	ret = xmlParse3986Segment(&cur, 0, 1);
601	if (ret != 0) return(ret);
602	}
603	if (uri != NULL) {
604	if (uri->path != NULL) xmlFree(uri->path);
605	if (*str != cur) {
606	if (uri->cleanup & 2)
607	uri->path = STRNDUP(str, cur - str);
608	else
609	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
610	} else {
611	uri->path = NULL;
612	}
613	}
614	*str = cur;
615	return (0);
616	}
617
618	/**
619	* xmlParse3986PathAbsolute:
620	* @uri: pointer to an URI structure
621	* @str: the string to analyze
622	*
623	* Parse an path absolute and fills in the appropriate fields
624	* of the @uri structure
625	*
626	* path-absolute = "/" [ segment-nz *( "/" segment ) ]
627	*
628	* Returns 0 or the error code
629	*/
630	static int
631	xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
632	{
633	const char *cur;
634	int ret;
635
636	cur = *str;
637
638	if (*cur != '/')
639	return(1);
640	cur++;
641	ret = xmlParse3986Segment(&cur, 0, 0);
642	if (ret == 0) {
643	while (*cur == '/') {
644	cur++;
645	ret = xmlParse3986Segment(&cur, 0, 1);
646	if (ret != 0) return(ret);
647	}
648	}
649	if (uri != NULL) {
650	if (uri->path != NULL) xmlFree(uri->path);
651	if (cur != *str) {
652	if (uri->cleanup & 2)
653	uri->path = STRNDUP(str, cur - str);
654	else
655	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
656	} else {
657	uri->path = NULL;
658	}
659	}
660	*str = cur;
661	return (0);
662	}
663
664	/**
665	* xmlParse3986PathRootless:
666	* @uri: pointer to an URI structure
667	* @str: the string to analyze
668	*
669	* Parse an path without root and fills in the appropriate fields
670	* of the @uri structure
671	*
672	* path-rootless = segment-nz *( "/" segment )
673	*
674	* Returns 0 or the error code
675	*/
676	static int
677	xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
678	{
679	const char *cur;
680	int ret;
681
682	cur = *str;
683
684	ret = xmlParse3986Segment(&cur, 0, 0);
685	if (ret != 0) return(ret);
686	while (*cur == '/') {
687	cur++;
688	ret = xmlParse3986Segment(&cur, 0, 1);
689	if (ret != 0) return(ret);
690	}
691	if (uri != NULL) {
692	if (uri->path != NULL) xmlFree(uri->path);
693	if (cur != *str) {
694	if (uri->cleanup & 2)
695	uri->path = STRNDUP(str, cur - str);
696	else
697	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
698	} else {
699	uri->path = NULL;
700	}
701	}
702	*str = cur;
703	return (0);
704	}
705
706	/**
707	* xmlParse3986PathNoScheme:
708	* @uri: pointer to an URI structure
709	* @str: the string to analyze
710	*
711	* Parse an path which is not a scheme and fills in the appropriate fields
712	* of the @uri structure
713	*
714	* path-noscheme = segment-nz-nc *( "/" segment )
715	*
716	* Returns 0 or the error code
717	*/
718	static int
719	xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
720	{
721	const char *cur;
722	int ret;
723
724	cur = *str;
725
726	ret = xmlParse3986Segment(&cur, ':', 0);
727	if (ret != 0) return(ret);
728	while (*cur == '/') {
729	cur++;
730	ret = xmlParse3986Segment(&cur, 0, 1);
731	if (ret != 0) return(ret);
732	}
733	if (uri != NULL) {
734	if (uri->path != NULL) xmlFree(uri->path);
735	if (cur != *str) {
736	if (uri->cleanup & 2)
737	uri->path = STRNDUP(str, cur - str);
738	else
739	uri->path = xmlURIUnescapeString(str, cur - str, NULL);
740	} else {
741	uri->path = NULL;
742	}
743	}
744	*str = cur;
745	return (0);
746	}
747
748	/**
749	* xmlParse3986HierPart:
750	* @uri: pointer to an URI structure
751	* @str: the string to analyze
752	*
753	* Parse an hierarchical part and fills in the appropriate fields
754	* of the @uri structure
755	*
756	* hier-part = "//" authority path-abempty
757	* / path-absolute
758	* / path-rootless
759	* / path-empty
760	*
761	* Returns 0 or the error code
762	*/
763	static int
764	xmlParse3986HierPart(xmlURIPtr uri, const char **str)
765	{
766	const char *cur;
767	int ret;
768
769	cur = *str;
770
771	if ((cur == '/') && ((cur + 1) == '/')) {
772	cur += 2;
773	ret = xmlParse3986Authority(uri, &cur);
774	if (ret != 0) return(ret);
775	/*
776	* An empty server is marked with a special URI value.
777	*/
778	if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
779	uri->port = PORT_EMPTY_SERVER;
780	ret = xmlParse3986PathAbEmpty(uri, &cur);
781	if (ret != 0) return(ret);
782	*str = cur;
783	return(0);
784	} else if (*cur == '/') {
785	ret = xmlParse3986PathAbsolute(uri, &cur);
786	if (ret != 0) return(ret);
787	} else if (ISA_PCHAR(cur)) {
788	ret = xmlParse3986PathRootless(uri, &cur);
789	if (ret != 0) return(ret);
790	} else {
791	/* path-empty is effectively empty */
792	if (uri != NULL) {
793	if (uri->path != NULL) xmlFree(uri->path);
794	uri->path = NULL;
795	}
796	}
797	*str = cur;
798	return (0);
799	}
800
801	/**
802	* xmlParse3986RelativeRef:
803	* @uri: pointer to an URI structure
804	* @str: the string to analyze
805	*
806	* Parse an URI string and fills in the appropriate fields
807	* of the @uri structure
808	*
809	* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
810	* relative-part = "//" authority path-abempty
811	* / path-absolute
812	* / path-noscheme
813	* / path-empty
814	*
815	* Returns 0 or the error code
816	*/
817	static int
818	xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
819	int ret;
820
821	if ((str == '/') && ((str + 1) == '/')) {
822	str += 2;
823	ret = xmlParse3986Authority(uri, &str);
824	if (ret != 0) return(ret);
825	ret = xmlParse3986PathAbEmpty(uri, &str);
826	if (ret != 0) return(ret);
827	} else if (*str == '/') {
828	ret = xmlParse3986PathAbsolute(uri, &str);
829	if (ret != 0) return(ret);
830	} else if (ISA_PCHAR(str)) {
831	ret = xmlParse3986PathNoScheme(uri, &str);
832	if (ret != 0) return(ret);
833	} else {
834	/* path-empty is effectively empty */
835	if (uri != NULL) {
836	if (uri->path != NULL) xmlFree(uri->path);
837	uri->path = NULL;
838	}
839	}
840
841	if (*str == '?') {
842	str++;
843	ret = xmlParse3986Query(uri, &str);
844	if (ret != 0) return(ret);
845	}
846	if (*str == '#') {
847	str++;
848	ret = xmlParse3986Fragment(uri, &str);
849	if (ret != 0) return(ret);
850	}
851	if (*str != 0) {
852	xmlCleanURI(uri);
853	return(1);
854	}
855	return(0);
856	}
857
858
859	/**
860	* xmlParse3986URI:
861	* @uri: pointer to an URI structure
862	* @str: the string to analyze
863	*
864	* Parse an URI string and fills in the appropriate fields
865	* of the @uri structure
866	*
867	* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
868	*
869	* Returns 0 or the error code
870	*/
871	static int
872	xmlParse3986URI(xmlURIPtr uri, const char *str) {
873	int ret;
874
875	ret = xmlParse3986Scheme(uri, &str);
876	if (ret != 0) return(ret);
877	if (*str != ':') {
878	return(1);
879	}
880	str++;
881	ret = xmlParse3986HierPart(uri, &str);
882	if (ret != 0) return(ret);
883	if (*str == '?') {
884	str++;
885	ret = xmlParse3986Query(uri, &str);
886	if (ret != 0) return(ret);
887	}
888	if (*str == '#') {
889	str++;
890	ret = xmlParse3986Fragment(uri, &str);
891	if (ret != 0) return(ret);
892	}
893	if (*str != 0) {
894	xmlCleanURI(uri);
895	return(1);
896	}
897	return(0);
898	}
899
900	/**
901	* xmlParse3986URIReference:
902	* @uri: pointer to an URI structure
903	* @str: the string to analyze
904	*
905	* Parse an URI reference string and fills in the appropriate fields
906	* of the @uri structure
907	*
908	* URI-reference = URI / relative-ref
909	*
910	* Returns 0 or the error code
911	*/
912	static int
913	xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
914	int ret;
915
916	if (str == NULL)
917	return(-1);
918	xmlCleanURI(uri);
919
920	/*
921	* Try first to parse absolute refs, then fallback to relative if
922	* it fails.
923	*/
924	ret = xmlParse3986URI(uri, str);
925	if (ret != 0) {
926	xmlCleanURI(uri);
927	ret = xmlParse3986RelativeRef(uri, str);
928	if (ret != 0) {
929	xmlCleanURI(uri);
930	return(ret);
931	}
932	}
933	return(0);
934	}
935
936	/**
937	* xmlParseURI:
938	* @str: the URI string to analyze
939	*
940	* Parse an URI based on RFC 3986
941	*
942	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
943	*
944	* Returns a newly built xmlURIPtr or NULL in case of error
945	*/
946	xmlURIPtr
947	xmlParseURI(const char *str) {
948	xmlURIPtr uri;
949	int ret;
950
951	if (str == NULL)
952	return(NULL);
953	uri = xmlCreateURI();
954	if (uri != NULL) {
955	ret = xmlParse3986URIReference(uri, str);
956	if (ret) {
957	xmlFreeURI(uri);
958	return(NULL);
959	}
960	}
961	return(uri);
962	}
963
964	/**
965	* xmlParseURIReference:
966	* @uri: pointer to an URI structure
967	* @str: the string to analyze
968	*
969	* Parse an URI reference string based on RFC 3986 and fills in the
970	* appropriate fields of the @uri structure
971	*
972	* URI-reference = URI / relative-ref
973	*
974	* Returns 0 or the error code
975	*/
976	int
977	xmlParseURIReference(xmlURIPtr uri, const char *str) {
978	return(xmlParse3986URIReference(uri, str));
979	}
980
981	/**
982	* xmlParseURIRaw:
983	* @str: the URI string to analyze
984	* @raw: if 1 unescaping of URI pieces are disabled
985	*
986	* Parse an URI but allows to keep intact the original fragments.
987	*
988	* URI-reference = URI / relative-ref
989	*
990	* Returns a newly built xmlURIPtr or NULL in case of error
991	*/
992	xmlURIPtr
993	xmlParseURIRaw(const char *str, int raw) {
994	xmlURIPtr uri;
995	int ret;
996
997	if (str == NULL)
998	return(NULL);
999	uri = xmlCreateURI();
1000	if (uri != NULL) {
1001	if (raw) {
1002	uri->cleanup \|= 2;
1003	}
1004	ret = xmlParseURIReference(uri, str);
1005	if (ret) {
1006	xmlFreeURI(uri);
1007	return(NULL);
1008	}
1009	}
1010	return(uri);
1011	}
1012
1013	/************************************************************************
1014	* *
1015	* Generic URI structure functions *
1016	* *
1017	************************************************************************/
1018
1019	/**
1020	* xmlCreateURI:
1021	*
1022	* Simply creates an empty xmlURI
1023	*
1024	* Returns the new structure or NULL in case of error
1025	*/
1026	xmlURIPtr
1027	xmlCreateURI(void) {
1028	xmlURIPtr ret;
1029
1030	ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1031	if (ret == NULL) {
1032	xmlURIErrMemory("creating URI structure\n");
1033	return(NULL);
1034	}
1035	memset(ret, 0, sizeof(xmlURI));
1036	ret->port = PORT_EMPTY;
1037	return(ret);
1038	}
1039
1040	/**
1041	* xmlSaveUriRealloc:
1042	*
1043	* Function to handle properly a reallocation when saving an URI
1044	* Also imposes some limit on the length of an URI string output
1045	*/
1046	static xmlChar *
1047	xmlSaveUriRealloc(xmlChar ret, int max) {
1048	xmlChar *temp;
1049	int tmp;
1050
1051	if (*max > MAX_URI_LENGTH) {
1052	xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1053	return(NULL);
1054	}
1055	tmp = max 2;
1056	temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1057	if (temp == NULL) {
1058	xmlURIErrMemory("saving URI\n");
1059	return(NULL);
1060	}
1061	*max = tmp;
1062	return(temp);
1063	}
1064
1065	/**
1066	* xmlSaveUri:
1067	* @uri: pointer to an xmlURI
1068	*
1069	* Save the URI as an escaped string
1070	*
1071	* Returns a new string (to be deallocated by caller)
1072	*/
1073	xmlChar *
1074	xmlSaveUri(xmlURIPtr uri) {
1075	xmlChar *ret = NULL;
1076	xmlChar *temp;
1077	const char *p;
1078	int len;
1079	int max;
1080
1081	if (uri == NULL) return(NULL);
1082
1083
1084	max = 80;
1085	ret = (xmlChar *) xmlMallocAtomic(max + 1);
1086	if (ret == NULL) {
1087	xmlURIErrMemory("saving URI\n");
1088	return(NULL);
1089	}
1090	len = 0;
1091
1092	if (uri->scheme != NULL) {
1093	p = uri->scheme;
1094	while (*p != 0) {
1095	if (len >= max) {
1096	temp = xmlSaveUriRealloc(ret, &max);
1097	if (temp == NULL) goto mem_error;
1098	ret = temp;
1099	}
1100	ret[len++] = *p++;
1101	}
1102	if (len >= max) {
1103	temp = xmlSaveUriRealloc(ret, &max);
1104	if (temp == NULL) goto mem_error;
1105	ret = temp;
1106	}
1107	ret[len++] = ':';
1108	}
1109	if (uri->opaque != NULL) {
1110	p = uri->opaque;
1111	while (*p != 0) {
1112	if (len + 3 >= max) {
1113	temp = xmlSaveUriRealloc(ret, &max);
1114	if (temp == NULL) goto mem_error;
1115	ret = temp;
1116	}
1117	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
1118	ret[len++] = *p++;
1119	else {
1120	int val = (unsigned char )p++;
1121	int hi = val / 0x10, lo = val % 0x10;
1122	ret[len++] = '%';
1123	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1124	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1125	}
1126	}
1127	} else {
1128	if ((uri->server != NULL) \|\| (uri->port != PORT_EMPTY)) {
1129	if (len + 3 >= max) {
1130	temp = xmlSaveUriRealloc(ret, &max);
1131	if (temp == NULL) goto mem_error;
1132	ret = temp;
1133	}
1134	ret[len++] = '/';
1135	ret[len++] = '/';
1136	if (uri->user != NULL) {
1137	p = uri->user;
1138	while (*p != 0) {
1139	if (len + 3 >= max) {
1140	temp = xmlSaveUriRealloc(ret, &max);
1141	if (temp == NULL) goto mem_error;
1142	ret = temp;
1143	}
1144	if ((IS_UNRESERVED(*(p))) \|\|
1145	(((p) == ';')) \|\| (((p) == ':')) \|\|
1146	(((p) == '&')) \|\| (((p) == '=')) \|\|
1147	(((p) == '+')) \|\| (((p) == '$')) \|\|
1148	((*(p) == ',')))
1149	ret[len++] = *p++;
1150	else {
1151	int val = (unsigned char )p++;
1152	int hi = val / 0x10, lo = val % 0x10;
1153	ret[len++] = '%';
1154	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1155	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1156	}
1157	}
1158	if (len + 3 >= max) {
1159	temp = xmlSaveUriRealloc(ret, &max);
1160	if (temp == NULL) goto mem_error;
1161	ret = temp;
1162	}
1163	ret[len++] = '@';
1164	}
1165	if (uri->server != NULL) {
1166	p = uri->server;
1167	while (*p != 0) {
1168	if (len >= max) {
1169	temp = xmlSaveUriRealloc(ret, &max);
1170	if (temp == NULL) goto mem_error;
1171	ret = temp;
1172	}
1173	/* TODO: escaping? */
1174	ret[len++] = (xmlChar) *p++;
1175	}
1176	}
1177	if (uri->port > 0) {
1178	if (len + 10 >= max) {
1179	temp = xmlSaveUriRealloc(ret, &max);
1180	if (temp == NULL) goto mem_error;
1181	ret = temp;
1182	}
1183	len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1184	}
1185	} else if (uri->authority != NULL) {
1186	if (len + 3 >= max) {
1187	temp = xmlSaveUriRealloc(ret, &max);
1188	if (temp == NULL) goto mem_error;
1189	ret = temp;
1190	}
1191	ret[len++] = '/';
1192	ret[len++] = '/';
1193	p = uri->authority;
1194	while (*p != 0) {
1195	if (len + 3 >= max) {
1196	temp = xmlSaveUriRealloc(ret, &max);
1197	if (temp == NULL) goto mem_error;
1198	ret = temp;
1199	}
1200	if ((IS_UNRESERVED(*(p))) \|\|
1201	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
1202	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1203	(((p) == '=')) \|\| (((p) == '+')))
1204	ret[len++] = *p++;
1205	else {
1206	int val = (unsigned char )p++;
1207	int hi = val / 0x10, lo = val % 0x10;
1208	ret[len++] = '%';
1209	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1210	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1211	}
1212	}
1213	} else if (uri->scheme != NULL) {
1214	if (len + 3 >= max) {
1215	temp = xmlSaveUriRealloc(ret, &max);
1216	if (temp == NULL) goto mem_error;
1217	ret = temp;
1218	}
1219	}
1220	if (uri->path != NULL) {
1221	p = uri->path;
1222	/*
1223	* the colon in file:///d: should not be escaped or
1224	* Windows accesses fail later.
1225	*/
1226	if ((uri->scheme != NULL) &&
1227	(p[0] == '/') &&
1228	(((p[1] >= 'a') && (p[1] <= 'z')) \|\|
1229	((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1230	(p[2] == ':') &&
1231	(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1232	if (len + 3 >= max) {
1233	temp = xmlSaveUriRealloc(ret, &max);
1234	if (temp == NULL) goto mem_error;
1235	ret = temp;
1236	}
1237	ret[len++] = *p++;
1238	ret[len++] = *p++;
1239	ret[len++] = *p++;
1240	}
1241	while (*p != 0) {
1242	if (len + 3 >= max) {
1243	temp = xmlSaveUriRealloc(ret, &max);
1244	if (temp == NULL) goto mem_error;
1245	ret = temp;
1246	}
1247	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
1248	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
1249	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
1250	((*(p) == ',')))
1251	ret[len++] = *p++;
1252	else {
1253	int val = (unsigned char )p++;
1254	int hi = val / 0x10, lo = val % 0x10;
1255	ret[len++] = '%';
1256	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1257	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1258	}
1259	}
1260	}
1261	if (uri->query_raw != NULL) {
1262	if (len + 1 >= max) {
1263	temp = xmlSaveUriRealloc(ret, &max);
1264	if (temp == NULL) goto mem_error;
1265	ret = temp;
1266	}
1267	ret[len++] = '?';
1268	p = uri->query_raw;
1269	while (*p != 0) {
1270	if (len + 1 >= max) {
1271	temp = xmlSaveUriRealloc(ret, &max);
1272	if (temp == NULL) goto mem_error;
1273	ret = temp;
1274	}
1275	ret[len++] = *p++;
1276	}
1277	} else if (uri->query != NULL) {
1278	if (len + 3 >= max) {
1279	temp = xmlSaveUriRealloc(ret, &max);
1280	if (temp == NULL) goto mem_error;
1281	ret = temp;
1282	}
1283	ret[len++] = '?';
1284	p = uri->query;
1285	while (*p != 0) {
1286	if (len + 3 >= max) {
1287	temp = xmlSaveUriRealloc(ret, &max);
1288	if (temp == NULL) goto mem_error;
1289	ret = temp;
1290	}
1291	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1292	ret[len++] = *p++;
1293	else {
1294	int val = (unsigned char )p++;
1295	int hi = val / 0x10, lo = val % 0x10;
1296	ret[len++] = '%';
1297	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1298	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1299	}
1300	}
1301	}
1302	}
1303	if (uri->fragment != NULL) {
1304	if (len + 3 >= max) {
1305	temp = xmlSaveUriRealloc(ret, &max);
1306	if (temp == NULL) goto mem_error;
1307	ret = temp;
1308	}
1309	ret[len++] = '#';
1310	p = uri->fragment;
1311	while (*p != 0) {
1312	if (len + 3 >= max) {
1313	temp = xmlSaveUriRealloc(ret, &max);
1314	if (temp == NULL) goto mem_error;
1315	ret = temp;
1316	}
1317	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
1318	ret[len++] = *p++;
1319	else {
1320	int val = (unsigned char )p++;
1321	int hi = val / 0x10, lo = val % 0x10;
1322	ret[len++] = '%';
1323	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1324	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1325	}
1326	}
1327	}
1328	if (len >= max) {
1329	temp = xmlSaveUriRealloc(ret, &max);
1330	if (temp == NULL) goto mem_error;
1331	ret = temp;
1332	}
1333	ret[len] = 0;
1334	return(ret);
1335
1336	mem_error:
1337	xmlFree(ret);
1338	return(NULL);
1339	}
1340
1341	/**
1342	* xmlPrintURI:
1343	* @stream: a FILE* for the output
1344	* @uri: pointer to an xmlURI
1345	*
1346	* Prints the URI in the stream @stream.
1347	*/
1348	void
1349	xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1350	xmlChar *out;
1351
1352	out = xmlSaveUri(uri);
1353	if (out != NULL) {
1354	fprintf(stream, "%s", (char *) out);
1355	xmlFree(out);
1356	}
1357	}
1358
1359	/**
1360	* xmlCleanURI:
1361	* @uri: pointer to an xmlURI
1362	*
1363	* Make sure the xmlURI struct is free of content
1364	*/
1365	static void
1366	xmlCleanURI(xmlURIPtr uri) {
1367	if (uri == NULL) return;
1368
1369	if (uri->scheme != NULL) xmlFree(uri->scheme);
1370	uri->scheme = NULL;
1371	if (uri->server != NULL) xmlFree(uri->server);
1372	uri->server = NULL;
1373	if (uri->user != NULL) xmlFree(uri->user);
1374	uri->user = NULL;
1375	if (uri->path != NULL) xmlFree(uri->path);
1376	uri->path = NULL;
1377	if (uri->fragment != NULL) xmlFree(uri->fragment);
1378	uri->fragment = NULL;
1379	if (uri->opaque != NULL) xmlFree(uri->opaque);
1380	uri->opaque = NULL;
1381	if (uri->authority != NULL) xmlFree(uri->authority);
1382	uri->authority = NULL;
1383	if (uri->query != NULL) xmlFree(uri->query);
1384	uri->query = NULL;
1385	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1386	uri->query_raw = NULL;
1387	}
1388
1389	/**
1390	* xmlFreeURI:
1391	* @uri: pointer to an xmlURI
1392	*
1393	* Free up the xmlURI struct
1394	*/
1395	void
1396	xmlFreeURI(xmlURIPtr uri) {
1397	if (uri == NULL) return;
1398
1399	if (uri->scheme != NULL) xmlFree(uri->scheme);
1400	if (uri->server != NULL) xmlFree(uri->server);
1401	if (uri->user != NULL) xmlFree(uri->user);
1402	if (uri->path != NULL) xmlFree(uri->path);
1403	if (uri->fragment != NULL) xmlFree(uri->fragment);
1404	if (uri->opaque != NULL) xmlFree(uri->opaque);
1405	if (uri->authority != NULL) xmlFree(uri->authority);
1406	if (uri->query != NULL) xmlFree(uri->query);
1407	if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1408	xmlFree(uri);
1409	}
1410
1411	/************************************************************************
1412	* *
1413	* Helper functions *
1414	* *
1415	************************************************************************/
1416
1417	/**
1418	* xmlNormalizeURIPath:
1419	* @path: pointer to the path string
1420	*
1421	* Applies the 5 normalization steps to a path string--that is, RFC 2396
1422	* Section 5.2, steps 6.c through 6.g.
1423	*
1424	* Normalization occurs directly on the string, no new allocation is done
1425	*
1426	* Returns 0 or an error code
1427	*/
1428	int
1429	xmlNormalizeURIPath(char *path) {
1430	char cur, out;
1431
1432	if (path == NULL)
1433	return(-1);
1434
1435	/* Skip all initial "/" chars. We want to get to the beginning of the
1436	* first non-empty segment.
1437	*/
1438	cur = path;
1439	while (cur[0] == '/')
1440	++cur;
1441	if (cur[0] == '\0')
1442	return(0);
1443
1444	/* Keep everything we've seen so far. */
1445	out = cur;
1446
1447	/*
1448	* Analyze each segment in sequence for cases (c) and (d).
1449	*/
1450	while (cur[0] != '\0') {
1451	/*
1452	* c) All occurrences of "./", where "." is a complete path segment,
1453	* are removed from the buffer string.
1454	*/
1455	if ((cur[0] == '.') && (cur[1] == '/')) {
1456	cur += 2;
1457	/* '//' normalization should be done at this point too */
1458	while (cur[0] == '/')
1459	cur++;
1460	continue;
1461	}
1462
1463	/*
1464	* d) If the buffer string ends with "." as a complete path segment,
1465	* that "." is removed.
1466	*/
1467	if ((cur[0] == '.') && (cur[1] == '\0'))
1468	break;
1469
1470	/* Otherwise keep the segment. */
1471	while (cur[0] != '/') {
1472	if (cur[0] == '\0')
1473	goto done_cd;
1474	(out++)[0] = (cur++)[0];
1475	}
1476	/* normalize // */
1477	while ((cur[0] == '/') && (cur[1] == '/'))
1478	cur++;
1479
1480	(out++)[0] = (cur++)[0];
1481	}
1482	done_cd:
1483	out[0] = '\0';
1484
1485	/* Reset to the beginning of the first segment for the next sequence. */
1486	cur = path;
1487	while (cur[0] == '/')
1488	++cur;
1489	if (cur[0] == '\0')
1490	return(0);
1491
1492	/*
1493	* Analyze each segment in sequence for cases (e) and (f).
1494	*
1495	* e) All occurrences of "<segment>/../", where <segment> is a
1496	* complete path segment not equal to "..", are removed from the
1497	* buffer string. Removal of these path segments is performed
1498	* iteratively, removing the leftmost matching pattern on each
1499	* iteration, until no matching pattern remains.
1500	*
1501	* f) If the buffer string ends with "<segment>/..", where <segment>
1502	* is a complete path segment not equal to "..", that
1503	* "<segment>/.." is removed.
1504	*
1505	* To satisfy the "iterative" clause in (e), we need to collapse the
1506	* string every time we find something that needs to be removed. Thus,
1507	* we don't need to keep two pointers into the string: we only need a
1508	* "current position" pointer.
1509	*/
1510	while (1) {
1511	char segp, tmp;
1512
1513	/* At the beginning of each iteration of this loop, "cur" points to
1514	* the first character of the segment we want to examine.
1515	*/
1516
1517	/* Find the end of the current segment. */
1518	segp = cur;
1519	while ((segp[0] != '/') && (segp[0] != '\0'))
1520	++segp;
1521
1522	/* If this is the last segment, we're done (we need at least two
1523	* segments to meet the criteria for the (e) and (f) cases).
1524	*/
1525	if (segp[0] == '\0')
1526	break;
1527
1528	/* If the first segment is "..", or if the next segment _isn't_ "..",
1529	* keep this segment and try the next one.
1530	*/
1531	++segp;
1532	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1533	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
1534	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
1535	cur = segp;
1536	continue;
1537	}
1538
1539	/* If we get here, remove this segment and the next one and back up
1540	* to the previous segment (if there is one), to implement the
1541	* "iteratively" clause. It's pretty much impossible to back up
1542	* while maintaining two pointers into the buffer, so just compact
1543	* the whole buffer now.
1544	*/
1545
1546	/* If this is the end of the buffer, we're done. */
1547	if (segp[2] == '\0') {
1548	cur[0] = '\0';
1549	break;
1550	}
1551	/* Valgrind complained, strcpy(cur, segp + 3); */
1552	/* string will overlap, do not use strcpy */
1553	tmp = cur;
1554	segp += 3;
1555	while ((tmp++ = segp++) != 0)
1556	;
1557
1558	/* If there are no previous segments, then keep going from here. */
1559	segp = cur;
1560	while ((segp > path) && ((--segp)[0] == '/'))
1561	;
1562	if (segp == path)
1563	continue;
1564
1565	/* "segp" is pointing to the end of a previous segment; find it's
1566	* start. We need to back up to the previous segment and start
1567	* over with that to handle things like "foo/bar/../..". If we
1568	* don't do this, then on the first pass we'll remove the "bar/..",
1569	* but be pointing at the second ".." so we won't realize we can also
1570	* remove the "foo/..".
1571	*/
1572	cur = segp;
1573	while ((cur > path) && (cur[-1] != '/'))
1574	--cur;
1575	}
1576	out[0] = '\0';
1577
1578	/*
1579	* g) If the resulting buffer string still begins with one or more
1580	* complete path segments of "..", then the reference is
1581	* considered to be in error. Implementations may handle this
1582	* error by retaining these components in the resolved path (i.e.,
1583	* treating them as part of the final URI), by removing them from
1584	* the resolved path (i.e., discarding relative levels above the
1585	* root), or by avoiding traversal of the reference.
1586	*
1587	* We discard them from the final path.
1588	*/
1589	if (path[0] == '/') {
1590	cur = path;
1591	while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1592	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
1593	cur += 3;
1594
1595	if (cur != path) {
1596	out = path;
1597	while (cur[0] != '\0')
1598	(out++)[0] = (cur++)[0];
1599	out[0] = 0;
1600	}
1601	}
1602
1603	return(0);
1604	}
1605
1606	static int is_hex(char c) {
1607	if (((c >= '0') && (c <= '9')) \|\|
1608	((c >= 'a') && (c <= 'f')) \|\|
1609	((c >= 'A') && (c <= 'F')))
1610	return(1);
1611	return(0);
1612	}
1613
1614	/**
1615	* xmlURIUnescapeString:
1616	* @str: the string to unescape
1617	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
1618	* @target: optional destination buffer
1619	*
1620	* Unescaping routine, but does not check that the string is an URI. The
1621	* output is a direct unsigned char translation of %XX values (no encoding)
1622	* Note that the length of the result can only be smaller or same size as
1623	* the input string.
1624	*
1625	* Returns a copy of the string, but unescaped, will return NULL only in case
1626	* of error
1627	*/
1628	char *
1629	xmlURIUnescapeString(const char str, int len, char target) {
1630	char ret, out;
1631	const char *in;
1632
1633	if (str == NULL)
1634	return(NULL);
1635	if (len <= 0) len = strlen(str);
1636	if (len < 0) return(NULL);
1637
1638	if (target == NULL) {
1639	ret = (char *) xmlMallocAtomic(len + 1);
1640	if (ret == NULL) {
1641	xmlURIErrMemory("unescaping URI value\n");
1642	return(NULL);
1643	}
1644	} else
1645	ret = target;
1646	in = str;
1647	out = ret;
1648	while(len > 0) {
1649	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1650	int c = 0;
1651	in++;
1652	if ((in >= '0') && (in <= '9'))
1653	c = (*in - '0');
1654	else if ((in >= 'a') && (in <= 'f'))
1655	c = (*in - 'a') + 10;
1656	else if ((in >= 'A') && (in <= 'F'))
1657	c = (*in - 'A') + 10;
1658	in++;
1659	if ((in >= '0') && (in <= '9'))
1660	c = c * 16 + (*in - '0');
1661	else if ((in >= 'a') && (in <= 'f'))
1662	c = c * 16 + (*in - 'a') + 10;
1663	else if ((in >= 'A') && (in <= 'F'))
1664	c = c * 16 + (*in - 'A') + 10;
1665	in++;
1666	len -= 3;
1667	/* Explicit sign change */
1668	*out++ = (char) c;
1669	} else {
1670	out++ = in++;
1671	len--;
1672	}
1673	}
1674	*out = 0;
1675	return(ret);
1676	}
1677
1678	/**
1679	* xmlURIEscapeStr:
1680	* @str: string to escape
1681	* @list: exception list string of chars not to escape
1682	*
1683	* This routine escapes a string to hex, ignoring reserved characters
1684	* (a-z, A-Z, 0-9, "@-_.!~*'()") and the characters in the exception list.
1685	*
1686	* Returns a new escaped string or NULL in case of error.
1687	*/
1688	xmlChar *
1689	xmlURIEscapeStr(const xmlChar str, const xmlChar list) {
1690	xmlChar *ret, ch;
1691	xmlChar *temp;
1692	const xmlChar *in;
1693	int len, out;
1694
1695	if (str == NULL)
1696	return(NULL);
1697	if (str[0] == 0)
1698	return(xmlStrdup(str));
1699	len = xmlStrlen(str);
1700	if (!(len > 0)) return(NULL);
1701
1702	len += 20;
1703	ret = (xmlChar *) xmlMallocAtomic(len);
1704	if (ret == NULL) {
1705	xmlURIErrMemory("escaping URI value\n");
1706	return(NULL);
1707	}
1708	in = (const xmlChar *) str;
1709	out = 0;
1710	while(*in != 0) {
1711	if (len - out <= 3) {
1712	temp = xmlSaveUriRealloc(ret, &len);
1713	if (temp == NULL) {
1714	xmlURIErrMemory("escaping URI value\n");
1715	xmlFree(ret);
1716	return(NULL);
1717	}
1718	ret = temp;
1719	}
1720
1721	ch = *in;
1722
1723	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1724	unsigned char val;
1725	ret[out++] = '%';
1726	val = ch >> 4;
1727	if (val <= 9)
1728	ret[out++] = '0' + val;
1729	else
1730	ret[out++] = 'A' + val - 0xA;
1731	val = ch & 0xF;
1732	if (val <= 9)
1733	ret[out++] = '0' + val;
1734	else
1735	ret[out++] = 'A' + val - 0xA;
1736	in++;
1737	} else {
1738	ret[out++] = *in++;
1739	}
1740
1741	}
1742	ret[out] = 0;
1743	return(ret);
1744	}
1745
1746	/**
1747	* xmlURIEscape:
1748	* @str: the string of the URI to escape
1749	*
1750	* Escaping routine, does not do validity checks !
1751	* It will try to escape the chars needing this, but this is heuristic
1752	* based it's impossible to be sure.
1753	*
1754	* Returns an copy of the string, but escaped
1755	*
1756	* 25 May 2001
1757	* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1758	* according to RFC2396.
1759	* - Carl Douglas
1760	*/
1761	xmlChar *
1762	xmlURIEscape(const xmlChar * str)
1763	{
1764	xmlChar ret, segment = NULL;
1765	xmlURIPtr uri;
1766	int ret2;
1767
1768	if (str == NULL)
1769	return (NULL);
1770
1771	uri = xmlCreateURI();
1772	if (uri != NULL) {
1773	/*
1774	* Allow escaping errors in the unescaped form
1775	*/
1776	uri->cleanup = 1;
1777	ret2 = xmlParseURIReference(uri, (const char *)str);
1778	if (ret2) {
1779	xmlFreeURI(uri);
1780	return (NULL);
1781	}
1782	}
1783
1784	if (!uri)
1785	return NULL;
1786
1787	ret = NULL;
1788
1789	#define NULLCHK(p) if(!p) { \
1790	xmlURIErrMemory("escaping URI value\n"); \
1791	xmlFreeURI(uri); \
1792	xmlFree(ret); \
1793	return NULL; } \
1794
1795	if (uri->scheme) {
1796	segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1797	NULLCHK(segment)
1798	ret = xmlStrcat(ret, segment);
1799	ret = xmlStrcat(ret, BAD_CAST ":");
1800	xmlFree(segment);
1801	}
1802
1803	if (uri->authority) {
1804	segment =
1805	xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1806	NULLCHK(segment)
1807	ret = xmlStrcat(ret, BAD_CAST "//");
1808	ret = xmlStrcat(ret, segment);
1809	xmlFree(segment);
1810	}
1811
1812	if (uri->user) {
1813	segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1814	NULLCHK(segment)
1815	ret = xmlStrcat(ret,BAD_CAST "//");
1816	ret = xmlStrcat(ret, segment);
1817	ret = xmlStrcat(ret, BAD_CAST "@");
1818	xmlFree(segment);
1819	}
1820
1821	if (uri->server) {
1822	segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1823	NULLCHK(segment)
1824	if (uri->user == NULL)
1825	ret = xmlStrcat(ret, BAD_CAST "//");
1826	ret = xmlStrcat(ret, segment);
1827	xmlFree(segment);
1828	}
1829
1830	if (uri->port > 0) {
1831	xmlChar port[11];
1832
1833	snprintf((char *) port, 11, "%d", uri->port);
1834	ret = xmlStrcat(ret, BAD_CAST ":");
1835	ret = xmlStrcat(ret, port);
1836	}
1837
1838	if (uri->path) {
1839	segment =
1840	xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1841	NULLCHK(segment)
1842	ret = xmlStrcat(ret, segment);
1843	xmlFree(segment);
1844	}
1845
1846	if (uri->query_raw) {
1847	ret = xmlStrcat(ret, BAD_CAST "?");
1848	ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1849	}
1850	else if (uri->query) {
1851	segment =
1852	xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1853	NULLCHK(segment)
1854	ret = xmlStrcat(ret, BAD_CAST "?");
1855	ret = xmlStrcat(ret, segment);
1856	xmlFree(segment);
1857	}
1858
1859	if (uri->opaque) {
1860	segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1861	NULLCHK(segment)
1862	ret = xmlStrcat(ret, segment);
1863	xmlFree(segment);
1864	}
1865
1866	if (uri->fragment) {
1867	segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1868	NULLCHK(segment)
1869	ret = xmlStrcat(ret, BAD_CAST "#");
1870	ret = xmlStrcat(ret, segment);
1871	xmlFree(segment);
1872	}
1873
1874	xmlFreeURI(uri);
1875	#undef NULLCHK
1876
1877	return (ret);
1878	}
1879
1880	/************************************************************************
1881	* *
1882	* Public functions *
1883	* *
1884	************************************************************************/
1885
1886	/**
1887	* xmlBuildURI:
1888	* @URI: the URI instance found in the document
1889	* @base: the base value
1890	*
1891	* Computes he final URI of the reference done by checking that
1892	* the given URI is valid, and building the final URI using the
1893	* base URI. This is processed according to section 5.2 of the
1894	* RFC 2396
1895	*
1896	* 5.2. Resolving Relative References to Absolute Form
1897	*
1898	* Returns a new URI string (to be freed by the caller) or NULL in case
1899	* of error.
1900	*/
1901	xmlChar *
1902	xmlBuildURI(const xmlChar URI, const xmlChar base) {
1903	xmlChar *val = NULL;
1904	int ret, len, indx, cur, out;
1905	xmlURIPtr ref = NULL;
1906	xmlURIPtr bas = NULL;
1907	xmlURIPtr res = NULL;
1908
1909	/*
1910	* 1) The URI reference is parsed into the potential four components and
1911	* fragment identifier, as described in Section 4.3.
1912	*
1913	* NOTE that a completely empty URI is treated by modern browsers
1914	* as a reference to "." rather than as a synonym for the current
1915	* URI. Should we do that here?
1916	*/
1917	if (URI == NULL)
1918	ret = -1;
1919	else {
1920	if (*URI) {
1921	ref = xmlCreateURI();
1922	if (ref == NULL)
1923	goto done;
1924	ret = xmlParseURIReference(ref, (const char *) URI);
1925	}
1926	else
1927	ret = 0;
1928	}
1929	if (ret != 0)
1930	goto done;
1931	if ((ref != NULL) && (ref->scheme != NULL)) {
1932	/*
1933	* The URI is absolute don't modify.
1934	*/
1935	val = xmlStrdup(URI);
1936	goto done;
1937	}
1938	if (base == NULL)
1939	ret = -1;
1940	else {
1941	bas = xmlCreateURI();
1942	if (bas == NULL)
1943	goto done;
1944	ret = xmlParseURIReference(bas, (const char *) base);
1945	}
1946	if (ret != 0) {
1947	if (ref)
1948	val = xmlSaveUri(ref);
1949	goto done;
1950	}
1951	if (ref == NULL) {
1952	/*
1953	* the base fragment must be ignored
1954	*/
1955	if (bas->fragment != NULL) {
1956	xmlFree(bas->fragment);
1957	bas->fragment = NULL;
1958	}
1959	val = xmlSaveUri(bas);
1960	goto done;
1961	}
1962
1963	/*
1964	* 2) If the path component is empty and the scheme, authority, and
1965	* query components are undefined, then it is a reference to the
1966	* current document and we are done. Otherwise, the reference URI's
1967	* query and fragment components are defined as found (or not found)
1968	* within the URI reference and not inherited from the base URI.
1969	*
1970	* NOTE that in modern browsers, the parsing differs from the above
1971	* in the following aspect: the query component is allowed to be
1972	* defined while still treating this as a reference to the current
1973	* document.
1974	*/
1975	res = xmlCreateURI();
1976	if (res == NULL)
1977	goto done;
1978	if ((ref->scheme == NULL) && (ref->path == NULL) &&
1979	((ref->authority == NULL) && (ref->server == NULL) &&
1980	(ref->port == PORT_EMPTY))) {
1981	if (bas->scheme != NULL)
1982	res->scheme = xmlMemStrdup(bas->scheme);
1983	if (bas->authority != NULL)
1984	res->authority = xmlMemStrdup(bas->authority);
1985	else {
1986	if (bas->server != NULL)
1987	res->server = xmlMemStrdup(bas->server);
1988	if (bas->user != NULL)
1989	res->user = xmlMemStrdup(bas->user);
1990	res->port = bas->port;
1991	}
1992	if (bas->path != NULL)
1993	res->path = xmlMemStrdup(bas->path);
1994	if (ref->query_raw != NULL)
1995	res->query_raw = xmlMemStrdup (ref->query_raw);
1996	else if (ref->query != NULL)
1997	res->query = xmlMemStrdup(ref->query);
1998	else if (bas->query_raw != NULL)
1999	res->query_raw = xmlMemStrdup(bas->query_raw);
2000	else if (bas->query != NULL)
2001	res->query = xmlMemStrdup(bas->query);
2002	if (ref->fragment != NULL)
2003	res->fragment = xmlMemStrdup(ref->fragment);
2004	goto step_7;
2005	}
2006
2007	/*
2008	* 3) If the scheme component is defined, indicating that the reference
2009	* starts with a scheme name, then the reference is interpreted as an
2010	* absolute URI and we are done. Otherwise, the reference URI's
2011	* scheme is inherited from the base URI's scheme component.
2012	*/
2013	if (ref->scheme != NULL) {
2014	val = xmlSaveUri(ref);
2015	goto done;
2016	}
2017	if (bas->scheme != NULL)
2018	res->scheme = xmlMemStrdup(bas->scheme);
2019
2020	if (ref->query_raw != NULL)
2021	res->query_raw = xmlMemStrdup(ref->query_raw);
2022	else if (ref->query != NULL)
2023	res->query = xmlMemStrdup(ref->query);
2024	if (ref->fragment != NULL)
2025	res->fragment = xmlMemStrdup(ref->fragment);
2026
2027	/*
2028	* 4) If the authority component is defined, then the reference is a
2029	* network-path and we skip to step 7. Otherwise, the reference
2030	* URI's authority is inherited from the base URI's authority
2031	* component, which will also be undefined if the URI scheme does not
2032	* use an authority component.
2033	*/
2034	if ((ref->authority != NULL) \|\| (ref->server != NULL) \|\|
2035	(ref->port != PORT_EMPTY)) {
2036	if (ref->authority != NULL)
2037	res->authority = xmlMemStrdup(ref->authority);
2038	else {
2039	if (ref->server != NULL)
2040	res->server = xmlMemStrdup(ref->server);
2041	if (ref->user != NULL)
2042	res->user = xmlMemStrdup(ref->user);
2043	res->port = ref->port;
2044	}
2045	if (ref->path != NULL)
2046	res->path = xmlMemStrdup(ref->path);
2047	goto step_7;
2048	}
2049	if (bas->authority != NULL)
2050	res->authority = xmlMemStrdup(bas->authority);
2051	else if ((bas->server != NULL) \|\| (bas->port != PORT_EMPTY)) {
2052	if (bas->server != NULL)
2053	res->server = xmlMemStrdup(bas->server);
2054	if (bas->user != NULL)
2055	res->user = xmlMemStrdup(bas->user);
2056	res->port = bas->port;
2057	}
2058
2059	/*
2060	* 5) If the path component begins with a slash character ("/"), then
2061	* the reference is an absolute-path and we skip to step 7.
2062	*/
2063	if ((ref->path != NULL) && (ref->path[0] == '/')) {
2064	res->path = xmlMemStrdup(ref->path);
2065	goto step_7;
2066	}
2067
2068
2069	/*
2070	* 6) If this step is reached, then we are resolving a relative-path
2071	* reference. The relative path needs to be merged with the base
2072	* URI's path. Although there are many ways to do this, we will
2073	* describe a simple method using a separate string buffer.
2074	*
2075	* Allocate a buffer large enough for the result string.
2076	*/
2077	len = 2; /* extra / and 0 */
2078	if (ref->path != NULL)
2079	len += strlen(ref->path);
2080	if (bas->path != NULL)
2081	len += strlen(bas->path);
2082	res->path = (char *) xmlMallocAtomic(len);
2083	if (res->path == NULL) {
2084	xmlURIErrMemory("resolving URI against base\n");
2085	goto done;
2086	}
2087	res->path[0] = 0;
2088
2089	/*
2090	* a) All but the last segment of the base URI's path component is
2091	* copied to the buffer. In other words, any characters after the
2092	* last (right-most) slash character, if any, are excluded.
2093	*/
2094	cur = 0;
2095	out = 0;
2096	if (bas->path != NULL) {
2097	while (bas->path[cur] != 0) {
2098	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2099	cur++;
2100	if (bas->path[cur] == 0)
2101	break;
2102
2103	cur++;
2104	while (out < cur) {
2105	res->path[out] = bas->path[out];
2106	out++;
2107	}
2108	}
2109	}
2110	res->path[out] = 0;
2111
2112	/*
2113	* b) The reference's path component is appended to the buffer
2114	* string.
2115	*/
2116	if (ref->path != NULL && ref->path[0] != 0) {
2117	indx = 0;
2118	/*
2119	* Ensure the path includes a '/'
2120	*/
2121	if ((out == 0) && ((bas->server != NULL) \|\| bas->port != PORT_EMPTY))
2122	res->path[out++] = '/';
2123	while (ref->path[indx] != 0) {
2124	res->path[out++] = ref->path[indx++];
2125	}
2126	}
2127	res->path[out] = 0;
2128
2129	/*
2130	* Steps c) to h) are really path normalization steps
2131	*/
2132	xmlNormalizeURIPath(res->path);
2133
2134	step_7:
2135
2136	/*
2137	* 7) The resulting URI components, including any inherited from the
2138	* base URI, are recombined to give the absolute form of the URI
2139	* reference.
2140	*/
2141	val = xmlSaveUri(res);
2142
2143	done:
2144	if (ref != NULL)
2145	xmlFreeURI(ref);
2146	if (bas != NULL)
2147	xmlFreeURI(bas);
2148	if (res != NULL)
2149	xmlFreeURI(res);
2150	return(val);
2151	}
2152
2153	/**
2154	* xmlBuildRelativeURI:
2155	* @URI: the URI reference under consideration
2156	* @base: the base value
2157	*
2158	* Expresses the URI of the reference in terms relative to the
2159	* base. Some examples of this operation include:
2160	* base = "http://site1.com/docs/book1.html"
2161	* URI input URI returned
2162	* docs/pic1.gif pic1.gif
2163	* docs/img/pic1.gif img/pic1.gif
2164	* img/pic1.gif ../img/pic1.gif
2165	* http://site1.com/docs/pic1.gif pic1.gif
2166	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2167	*
2168	* base = "docs/book1.html"
2169	* URI input URI returned
2170	* docs/pic1.gif pic1.gif
2171	* docs/img/pic1.gif img/pic1.gif
2172	* img/pic1.gif ../img/pic1.gif
2173	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2174	*
2175	*
2176	* Note: if the URI reference is really weird or complicated, it may be
2177	* worthwhile to first convert it into a "nice" one by calling
2178	* xmlBuildURI (using 'base') before calling this routine,
2179	* since this routine (for reasonable efficiency) assumes URI has
2180	* already been through some validation.
2181	*
2182	* Returns a new URI string (to be freed by the caller) or NULL in case
2183	* error.
2184	*/
2185	xmlChar *
2186	xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2187	{
2188	xmlChar *val = NULL;
2189	int ret;
2190	int ix;
2191	int nbslash = 0;
2192	int len;
2193	xmlURIPtr ref = NULL;
2194	xmlURIPtr bas = NULL;
2195	xmlChar bptr, uptr, *vptr;
2196	int remove_path = 0;
2197
2198	if ((URI == NULL) \|\| (*URI == 0))
2199	return NULL;
2200
2201	/*
2202	* First parse URI into a standard form
2203	*/
2204	ref = xmlCreateURI ();
2205	if (ref == NULL)
2206	return NULL;
2207	/* If URI not already in "relative" form */
2208	if (URI[0] != '.') {
2209	ret = xmlParseURIReference (ref, (const char *) URI);
2210	if (ret != 0)
2211	goto done; /* Error in URI, return NULL */
2212	} else
2213	ref->path = (char *)xmlStrdup(URI);
2214
2215	/*
2216	* Next parse base into the same standard form
2217	*/
2218	if ((base == NULL) \|\| (*base == 0)) {
2219	val = xmlStrdup (URI);
2220	goto done;
2221	}
2222	bas = xmlCreateURI ();
2223	if (bas == NULL)
2224	goto done;
2225	if (base[0] != '.') {
2226	ret = xmlParseURIReference (bas, (const char *) base);
2227	if (ret != 0)
2228	goto done; /* Error in base, return NULL */
2229	} else
2230	bas->path = (char *)xmlStrdup(base);
2231
2232	/*
2233	* If the scheme / server on the URI differs from the base,
2234	* just return the URI
2235	*/
2236	if ((ref->scheme != NULL) &&
2237	((bas->scheme == NULL) \|\|
2238	(xmlStrcmp ((xmlChar )bas->scheme, (xmlChar )ref->scheme)) \|\|
2239	(xmlStrcmp ((xmlChar )bas->server, (xmlChar )ref->server)) \|\|
2240	(bas->port != ref->port))) {
2241	val = xmlStrdup (URI);
2242	goto done;
2243	}
2244	if (xmlStrEqual((xmlChar )bas->path, (xmlChar )ref->path)) {
2245	val = xmlStrdup(BAD_CAST "");
2246	goto done;
2247	}
2248	if (bas->path == NULL) {
2249	val = xmlStrdup((xmlChar *)ref->path);
2250	goto done;
2251	}
2252	if (ref->path == NULL) {
2253	ref->path = (char *) "/";
2254	remove_path = 1;
2255	}
2256
2257	/*
2258	* At this point (at last!) we can compare the two paths
2259	*
2260	* First we take care of the special case where either of the
2261	* two path components may be missing (bug 316224)
2262	*/
2263	bptr = (xmlChar *)bas->path;
2264	{
2265	xmlChar rptr = (xmlChar ) ref->path;
2266	int pos = 0;
2267
2268	/*
2269	* Next we compare the two strings and find where they first differ
2270	*/
2271	if ((*rptr == '.') && (rptr[1] == '/'))
2272	rptr += 2;
2273	if ((*bptr == '.') && (bptr[1] == '/'))
2274	bptr += 2;
2275	else if ((bptr == '/') && (rptr != '/'))
2276	bptr++;
2277	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2278	pos++;
2279
2280	if (bptr[pos] == rptr[pos]) {
2281	val = xmlStrdup(BAD_CAST "");
2282	goto done; /* (I can't imagine why anyone would do this) */
2283	}
2284
2285	/*
2286	* In URI, "back up" to the last '/' encountered. This will be the
2287	* beginning of the "unique" suffix of URI
2288	*/
2289	ix = pos;
2290	for (; ix > 0; ix--) {
2291	if (rptr[ix - 1] == '/')
2292	break;
2293	}
2294	uptr = (xmlChar *)&rptr[ix];
2295
2296	/*
2297	* In base, count the number of '/' from the differing point
2298	*/
2299	for (; bptr[ix] != 0; ix++) {
2300	if (bptr[ix] == '/')
2301	nbslash++;
2302	}
2303
2304	/*
2305	* e.g: URI="foo/" base="foo/bar" -> "./"
2306	*/
2307	if (nbslash == 0 && !uptr[0]) {
2308	val = xmlStrdup(BAD_CAST "./");
2309	goto done;
2310	}
2311
2312	len = xmlStrlen (uptr) + 1;
2313	}
2314
2315	if (nbslash == 0) {
2316	if (uptr != NULL)
2317	/* exception characters from xmlSaveUri */
2318	val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2319	goto done;
2320	}
2321
2322	/*
2323	* Allocate just enough space for the returned string -
2324	* length of the remainder of the URI, plus enough space
2325	* for the "../" groups, plus one for the terminator
2326	*/
2327	val = (xmlChar ) xmlMalloc (len + 3 nbslash);
2328	if (val == NULL) {
2329	xmlURIErrMemory("building relative URI\n");
2330	goto done;
2331	}
2332	vptr = val;
2333	/*
2334	* Put in as many "../" as needed
2335	*/
2336	for (; nbslash>0; nbslash--) {
2337	*vptr++ = '.';
2338	*vptr++ = '.';
2339	*vptr++ = '/';
2340	}
2341	/*
2342	* Finish up with the end of the URI
2343	*/
2344	if (uptr != NULL) {
2345	if ((vptr > val) && (len > 0) &&
2346	(uptr[0] == '/') && (vptr[-1] == '/')) {
2347	memcpy (vptr, uptr + 1, len - 1);
2348	vptr[len - 2] = 0;
2349	} else {
2350	memcpy (vptr, uptr, len);
2351	vptr[len - 1] = 0;
2352	}
2353	} else {
2354	vptr[len - 1] = 0;
2355	}
2356
2357	/* escape the freshly-built path */
2358	vptr = val;
2359	/* exception characters from xmlSaveUri */
2360	val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2361	xmlFree(vptr);
2362
2363	done:
2364	/*
2365	* Free the working variables
2366	*/
2367	if (remove_path != 0)
2368	ref->path = NULL;
2369	if (ref != NULL)
2370	xmlFreeURI (ref);
2371	if (bas != NULL)
2372	xmlFreeURI (bas);
2373
2374	return val;
2375	}
2376
2377	/**
2378	* xmlCanonicPath:
2379	* @path: the resource locator in a filesystem notation
2380	*
2381	* Constructs a canonic path from the specified path.
2382	*
2383	* Returns a new canonic path, or a duplicate of the path parameter if the
2384	* construction fails. The caller is responsible for freeing the memory occupied
2385	* by the returned string. If there is insufficient memory available, or the
2386	* argument is NULL, the function returns NULL.
2387	*/
2388	#define IS_WINDOWS_PATH(p) \
2389	((p != NULL) && \
2390	(((p[0] >= 'a') && (p[0] <= 'z')) \|\| \
2391	((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2392	(p[1] == ':') && ((p[2] == '/') \|\| (p[2] == '\\')))
2393	xmlChar *
2394	xmlCanonicPath(const xmlChar *path)
2395	{
2396	/*
2397	* For Windows implementations, additional work needs to be done to
2398	* replace backslashes in pathnames with "forward slashes"
2399	*/
2400	#if defined(_WIN32)
2401	int len = 0;
2402	char *p = NULL;
2403	#endif
2404	xmlURIPtr uri;
2405	xmlChar *ret;
2406	const xmlChar *absuri;
2407
2408	if (path == NULL)
2409	return(NULL);
2410
2411	#if defined(_WIN32)
2412	/*
2413	* We must not change the backslashes to slashes if the the path
2414	* starts with \\?\
2415	* Those paths can be up to 32k characters long.
2416	* Was added specifically for OpenOffice, those paths can't be converted
2417	* to URIs anyway.
2418	*/
2419	if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2420	(path[3] == '\\') )
2421	return xmlStrdup((const xmlChar *) path);
2422	#endif
2423
2424	/* sanitize filename starting with // so it can be used as URI */
2425	if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2426	path++;
2427
2428	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2429	xmlFreeURI(uri);
2430	return xmlStrdup(path);
2431	}
2432
2433	/* Check if this is an "absolute uri" */
2434	absuri = xmlStrstr(path, BAD_CAST "://");
2435	if (absuri != NULL) {
2436	int l, j;
2437	unsigned char c;
2438	xmlChar *escURI;
2439
2440	/*
2441	* this looks like an URI where some parts have not been
2442	* escaped leading to a parsing problem. Check that the first
2443	* part matches a protocol.
2444	*/
2445	l = absuri - path;
2446	/* Bypass if first part (part before the '://') is > 20 chars */
2447	if ((l <= 0) \|\| (l > 20))
2448	goto path_processing;
2449	/* Bypass if any non-alpha characters are present in first part */
2450	for (j = 0;j < l;j++) {
2451	c = path[j];
2452	if (!(((c >= 'a') && (c <= 'z')) \|\| ((c >= 'A') && (c <= 'Z'))))
2453	goto path_processing;
2454	}
2455
2456	/* Escape all except the characters specified in the supplied path */
2457	escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2458	if (escURI != NULL) {
2459	/* Try parsing the escaped path */
2460	uri = xmlParseURI((const char *) escURI);
2461	/* If successful, return the escaped string */
2462	if (uri != NULL) {
2463	xmlFreeURI(uri);
2464	return escURI;
2465	}
2466	xmlFree(escURI);
2467	}
2468	}
2469
2470	path_processing:
2471	/* For Windows implementations, replace backslashes with 'forward slashes' */
2472	#if defined(_WIN32)
2473	/*
2474	* Create a URI structure
2475	*/
2476	uri = xmlCreateURI();
2477	if (uri == NULL) { /* Guard against 'out of memory' */
2478	return(NULL);
2479	}
2480
2481	len = xmlStrlen(path);
2482	if ((len > 2) && IS_WINDOWS_PATH(path)) {
2483	/* make the scheme 'file' */
2484	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2485	/* allocate space for leading '/' + path + string terminator */
2486	uri->path = xmlMallocAtomic(len + 2);
2487	if (uri->path == NULL) {
2488	xmlFreeURI(uri); /* Guard against 'out of memory' */
2489	return(NULL);
2490	}
2491	/* Put in leading '/' plus path */
2492	uri->path[0] = '/';
2493	p = uri->path + 1;
2494	strncpy(p, (char *) path, len + 1);
2495	} else {
2496	uri->path = (char *) xmlStrdup(path);
2497	if (uri->path == NULL) {
2498	xmlFreeURI(uri);
2499	return(NULL);
2500	}
2501	p = uri->path;
2502	}
2503	/* Now change all occurrences of '\' to '/' */
2504	while (*p != '\0') {
2505	if (*p == '\\')
2506	*p = '/';
2507	p++;
2508	}
2509
2510	if (uri->scheme == NULL) {
2511	ret = xmlStrdup((const xmlChar *) uri->path);
2512	} else {
2513	ret = xmlSaveUri(uri);
2514	}
2515
2516	xmlFreeURI(uri);
2517	#else
2518	ret = xmlStrdup((const xmlChar *) path);
2519	#endif
2520	return(ret);
2521	}
2522
2523	/**
2524	* xmlPathToURI:
2525	* @path: the resource locator in a filesystem notation
2526	*
2527	* Constructs an URI expressing the existing path
2528	*
2529	* Returns a new URI, or a duplicate of the path parameter if the
2530	* construction fails. The caller is responsible for freeing the memory
2531	* occupied by the returned string. If there is insufficient memory available,
2532	* or the argument is NULL, the function returns NULL.
2533	*/
2534	xmlChar *
2535	xmlPathToURI(const xmlChar *path)
2536	{
2537	xmlURIPtr uri;
2538	xmlURI temp;
2539	xmlChar ret, cal;
2540
2541	if (path == NULL)
2542	return(NULL);
2543
2544	if ((uri = xmlParseURI((const char *) path)) != NULL) {
2545	xmlFreeURI(uri);
2546	return xmlStrdup(path);
2547	}
2548	cal = xmlCanonicPath(path);
2549	if (cal == NULL)
2550	return(NULL);
2551	#if defined(_WIN32)
2552	/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2553	If 'cal' is a valid URI already then we are done here, as continuing would make
2554	it invalid. */
2555	if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2556	xmlFreeURI(uri);
2557	return cal;
2558	}
2559	/* 'cal' can contain a relative path with backslashes. If that is processed
2560	by xmlSaveURI, they will be escaped and the external entity loader machinery
2561	will fail. So convert them to slashes. Misuse 'ret' for walking. */
2562	ret = cal;
2563	while (*ret != '\0') {
2564	if (*ret == '\\')
2565	*ret = '/';
2566	ret++;
2567	}
2568	#endif
2569	memset(&temp, 0, sizeof(temp));
2570	temp.path = (char *) cal;
2571	ret = xmlSaveUri(&temp);
2572	xmlFree(cal);
2573	return(ret);
2574	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vbox/trunk/src/libs/libxml2-2.12.6/uri.c

Download in other formats: