VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/parser.c

Last change on this file was 104106, checked in by vboxsync, 8 weeks ago

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • Property svn:eol-style set to native
File size: 421.1 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
38#define IN_LIBXML
39#include "libxml.h"
40
41#if defined(_WIN32)
42#define XML_DIR_SEP '\\'
43#else
44#define XML_DIR_SEP '/'
45#endif
46
47#include <stdlib.h>
48#include <limits.h>
49#include <string.h>
50#include <stdarg.h>
51#include <stddef.h>
52#include <ctype.h>
53#include <stdlib.h>
54#include <libxml/parser.h>
55#include <libxml/xmlmemory.h>
56#include <libxml/tree.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
64#include <libxml/SAX2.h>
65#ifdef LIBXML_CATALOG_ENABLED
66#include <libxml/catalog.h>
67#endif
68
69#include "private/buf.h"
70#include "private/dict.h"
71#include "private/entities.h"
72#include "private/error.h"
73#include "private/html.h"
74#include "private/io.h"
75#include "private/parser.h"
76
77#define NS_INDEX_EMPTY INT_MAX
78#define NS_INDEX_XML (INT_MAX - 1)
79#define URI_HASH_EMPTY 0xD943A04E
80#define URI_HASH_XML 0xF0451F02
81
82struct _xmlStartTag {
83 const xmlChar *prefix;
84 const xmlChar *URI;
85 int line;
86 int nsNr;
87};
88
89typedef struct {
90 void *saxData;
91 unsigned prefixHashValue;
92 unsigned uriHashValue;
93 unsigned elementId;
94 int oldIndex;
95} xmlParserNsExtra;
96
97typedef struct {
98 unsigned hashValue;
99 int index;
100} xmlParserNsBucket;
101
102struct _xmlParserNsData {
103 xmlParserNsExtra *extra;
104
105 unsigned hashSize;
106 unsigned hashElems;
107 xmlParserNsBucket *hash;
108
109 unsigned elementId;
110 int defaultNsIndex;
111};
112
113struct _xmlAttrHashBucket {
114 int index;
115};
116
117static xmlParserCtxtPtr
118xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
119 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
120 xmlParserCtxtPtr pctx);
121
122static int
123xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125static void
126xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128/************************************************************************
129 * *
130 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
131 * *
132 ************************************************************************/
133
134#define XML_PARSER_BIG_ENTITY 1000
135#define XML_PARSER_LOT_ENTITY 5000
136
137/*
138 * Constants for protection against abusive entity expansion
139 * ("billion laughs").
140 */
141
142/*
143 * A certain amount of entity expansion which is always allowed.
144 */
145#define XML_PARSER_ALLOWED_EXPANSION 1000000
146
147/*
148 * Fixed cost for each entity reference. This crudely models processing time
149 * as well to protect, for example, against exponential expansion of empty
150 * or very short entities.
151 */
152#define XML_ENT_FIXED_COST 20
153
154/**
155 * xmlParserMaxDepth:
156 *
157 * arbitrary depth limit for the XML documents that we allow to
158 * process. This is not a limitation of the parser but a safety
159 * boundary feature. It can be disabled with the XML_PARSE_HUGE
160 * parser option.
161 */
162unsigned int xmlParserMaxDepth = 256;
163
164
165
166#define XML_PARSER_BIG_BUFFER_SIZE 300
167#define XML_PARSER_BUFFER_SIZE 100
168#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
169
170/**
171 * XML_PARSER_CHUNK_SIZE
172 *
173 * When calling GROW that's the minimal amount of data
174 * the parser expected to have received. It is not a hard
175 * limit but an optimization when reading strings like Names
176 * It is not strictly needed as long as inputs available characters
177 * are followed by 0, which should be provided by the I/O level
178 */
179#define XML_PARSER_CHUNK_SIZE 100
180
181/**
182 * xmlParserVersion:
183 *
184 * Constant string describing the internal version of the library
185 */
186const char *const
187xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
188
189/*
190 * List of XML prefixed PI allowed by W3C specs
191 */
192
193static const char* const xmlW3CPIs[] = {
194 "xml-stylesheet",
195 "xml-model",
196 NULL
197};
198
199
200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
201static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
204static xmlParserErrors
205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
207 void *user_data, int depth, const xmlChar *URL,
208 const xmlChar *ID, xmlNodePtr *list);
209
210static int
211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options);
212#ifdef LIBXML_LEGACY_ENABLED
213static void
214xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
215 xmlNodePtr lastNode);
216#endif /* LIBXML_LEGACY_ENABLED */
217
218static xmlParserErrors
219xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
220 const xmlChar *string, void *user_data, xmlNodePtr *lst);
221
222static int
223xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
224
225/************************************************************************
226 * *
227 * Some factorized error routines *
228 * *
229 ************************************************************************/
230
231/**
232 * xmlErrAttributeDup:
233 * @ctxt: an XML parser context
234 * @prefix: the attribute prefix
235 * @localname: the attribute localname
236 *
237 * Handle a redefinition of attribute error
238 */
239static void
240xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
241 const xmlChar * localname)
242{
243 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
244 (ctxt->instate == XML_PARSER_EOF))
245 return;
246 if (ctxt != NULL)
247 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
248
249 if (prefix == NULL)
250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
251 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
254 else
255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
265}
266
267/**
268 * xmlFatalErrMsg:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @msg: the error message
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275static void LIBXML_ATTR_FORMAT(3,0)
276xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277 const char *msg)
278{
279 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
280 (ctxt->instate == XML_PARSER_EOF))
281 return;
282 if (ctxt != NULL)
283 ctxt->errNo = error;
284 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
285 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
286 if (ctxt != NULL) {
287 ctxt->wellFormed = 0;
288 if (ctxt->recovery == 0)
289 ctxt->disableSAX = 1;
290 }
291}
292
293/**
294 * xmlWarningMsg:
295 * @ctxt: an XML parser context
296 * @error: the error number
297 * @msg: the error message
298 * @str1: extra data
299 * @str2: extra data
300 *
301 * Handle a warning.
302 */
303void LIBXML_ATTR_FORMAT(3,0)
304xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305 const char *msg, const xmlChar *str1, const xmlChar *str2)
306{
307 xmlStructuredErrorFunc schannel = NULL;
308
309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310 (ctxt->instate == XML_PARSER_EOF))
311 return;
312 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
313 (ctxt->sax->initialized == XML_SAX2_MAGIC))
314 schannel = ctxt->sax->serror;
315 if (ctxt != NULL) {
316 __xmlRaiseError(schannel,
317 (ctxt->sax) ? ctxt->sax->warning : NULL,
318 ctxt->userData,
319 ctxt, NULL, XML_FROM_PARSER, error,
320 XML_ERR_WARNING, NULL, 0,
321 (const char *) str1, (const char *) str2, NULL, 0, 0,
322 msg, (const char *) str1, (const char *) str2);
323 } else {
324 __xmlRaiseError(schannel, NULL, NULL,
325 ctxt, NULL, XML_FROM_PARSER, error,
326 XML_ERR_WARNING, NULL, 0,
327 (const char *) str1, (const char *) str2, NULL, 0, 0,
328 msg, (const char *) str1, (const char *) str2);
329 }
330}
331
332/**
333 * xmlValidityError:
334 * @ctxt: an XML parser context
335 * @error: the error number
336 * @msg: the error message
337 * @str1: extra data
338 *
339 * Handle a validity error.
340 */
341static void LIBXML_ATTR_FORMAT(3,0)
342xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
343 const char *msg, const xmlChar *str1, const xmlChar *str2)
344{
345 xmlStructuredErrorFunc schannel = NULL;
346
347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348 (ctxt->instate == XML_PARSER_EOF))
349 return;
350 if (ctxt != NULL) {
351 ctxt->errNo = error;
352 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
353 schannel = ctxt->sax->serror;
354 }
355 if (ctxt != NULL) {
356 __xmlRaiseError(schannel,
357 ctxt->vctxt.error, ctxt->vctxt.userData,
358 ctxt, NULL, XML_FROM_DTD, error,
359 XML_ERR_ERROR, NULL, 0, (const char *) str1,
360 (const char *) str2, NULL, 0, 0,
361 msg, (const char *) str1, (const char *) str2);
362 ctxt->valid = 0;
363 } else {
364 __xmlRaiseError(schannel, NULL, NULL,
365 ctxt, NULL, XML_FROM_DTD, error,
366 XML_ERR_ERROR, NULL, 0, (const char *) str1,
367 (const char *) str2, NULL, 0, 0,
368 msg, (const char *) str1, (const char *) str2);
369 }
370}
371
372/**
373 * xmlFatalErrMsgInt:
374 * @ctxt: an XML parser context
375 * @error: the error number
376 * @msg: the error message
377 * @val: an integer value
378 *
379 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
380 */
381static void LIBXML_ATTR_FORMAT(3,0)
382xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
383 const char *msg, int val)
384{
385 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
386 (ctxt->instate == XML_PARSER_EOF))
387 return;
388 if (ctxt != NULL)
389 ctxt->errNo = error;
390 __xmlRaiseError(NULL, NULL, NULL,
391 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
392 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
393 if (ctxt != NULL) {
394 ctxt->wellFormed = 0;
395 if (ctxt->recovery == 0)
396 ctxt->disableSAX = 1;
397 }
398}
399
400/**
401 * xmlFatalErrMsgStrIntStr:
402 * @ctxt: an XML parser context
403 * @error: the error number
404 * @msg: the error message
405 * @str1: an string info
406 * @val: an integer value
407 * @str2: an string info
408 *
409 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
410 */
411static void LIBXML_ATTR_FORMAT(3,0)
412xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413 const char *msg, const xmlChar *str1, int val,
414 const xmlChar *str2)
415{
416 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
417 (ctxt->instate == XML_PARSER_EOF))
418 return;
419 if (ctxt != NULL)
420 ctxt->errNo = error;
421 __xmlRaiseError(NULL, NULL, NULL,
422 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
423 NULL, 0, (const char *) str1, (const char *) str2,
424 NULL, val, 0, msg, str1, val, str2);
425 if (ctxt != NULL) {
426 ctxt->wellFormed = 0;
427 if (ctxt->recovery == 0)
428 ctxt->disableSAX = 1;
429 }
430}
431
432/**
433 * xmlFatalErrMsgStr:
434 * @ctxt: an XML parser context
435 * @error: the error number
436 * @msg: the error message
437 * @val: a string value
438 *
439 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
440 */
441static void LIBXML_ATTR_FORMAT(3,0)
442xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
443 const char *msg, const xmlChar * val)
444{
445 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
446 (ctxt->instate == XML_PARSER_EOF))
447 return;
448 if (ctxt != NULL)
449 ctxt->errNo = error;
450 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
451 XML_FROM_PARSER, error, XML_ERR_FATAL,
452 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
453 val);
454 if (ctxt != NULL) {
455 ctxt->wellFormed = 0;
456 if (ctxt->recovery == 0)
457 ctxt->disableSAX = 1;
458 }
459}
460
461/**
462 * xmlErrMsgStr:
463 * @ctxt: an XML parser context
464 * @error: the error number
465 * @msg: the error message
466 * @val: a string value
467 *
468 * Handle a non fatal parser error
469 */
470static void LIBXML_ATTR_FORMAT(3,0)
471xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
472 const char *msg, const xmlChar * val)
473{
474 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
475 (ctxt->instate == XML_PARSER_EOF))
476 return;
477 if (ctxt != NULL)
478 ctxt->errNo = error;
479 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
480 XML_FROM_PARSER, error, XML_ERR_ERROR,
481 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
482 val);
483}
484
485/**
486 * xmlNsErr:
487 * @ctxt: an XML parser context
488 * @error: the error number
489 * @msg: the message
490 * @info1: extra information string
491 * @info2: extra information string
492 *
493 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
494 */
495static void LIBXML_ATTR_FORMAT(3,0)
496xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
497 const char *msg,
498 const xmlChar * info1, const xmlChar * info2,
499 const xmlChar * info3)
500{
501 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
502 (ctxt->instate == XML_PARSER_EOF))
503 return;
504 if (ctxt != NULL)
505 ctxt->errNo = error;
506 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
507 XML_ERR_ERROR, NULL, 0, (const char *) info1,
508 (const char *) info2, (const char *) info3, 0, 0, msg,
509 info1, info2, info3);
510 if (ctxt != NULL)
511 ctxt->nsWellFormed = 0;
512}
513
514/**
515 * xmlNsWarn
516 * @ctxt: an XML parser context
517 * @error: the error number
518 * @msg: the message
519 * @info1: extra information string
520 * @info2: extra information string
521 *
522 * Handle a namespace warning error
523 */
524static void LIBXML_ATTR_FORMAT(3,0)
525xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
526 const char *msg,
527 const xmlChar * info1, const xmlChar * info2,
528 const xmlChar * info3)
529{
530 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
531 (ctxt->instate == XML_PARSER_EOF))
532 return;
533 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
534 XML_ERR_WARNING, NULL, 0, (const char *) info1,
535 (const char *) info2, (const char *) info3, 0, 0, msg,
536 info1, info2, info3);
537}
538
539static void
540xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
541 if (val > ULONG_MAX - *dst)
542 *dst = ULONG_MAX;
543 else
544 *dst += val;
545}
546
547static void
548xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
549 if (val > ULONG_MAX - *dst)
550 *dst = ULONG_MAX;
551 else
552 *dst += val;
553}
554
555/**
556 * xmlParserEntityCheck:
557 * @ctxt: parser context
558 * @extra: sum of unexpanded entity sizes
559 *
560 * Check for non-linear entity expansion behaviour.
561 *
562 * In some cases like xmlStringDecodeEntities, this function is called
563 * for each, possibly nested entity and its unexpanded content length.
564 *
565 * In other cases like xmlParseReference, it's only called for each
566 * top-level entity with its unexpanded content length plus the sum of
567 * the unexpanded content lengths (plus fixed cost) of all nested
568 * entities.
569 *
570 * Summing the unexpanded lengths also adds the length of the reference.
571 * This is by design. Taking the length of the entity name into account
572 * discourages attacks that try to waste CPU time with abusively long
573 * entity names. See test/recurse/lol6.xml for example. Each call also
574 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
575 * short entities.
576 *
577 * Returns 1 on error, 0 on success.
578 */
579static int
580xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
581{
582 unsigned long consumed;
583 xmlParserInputPtr input = ctxt->input;
584 xmlEntityPtr entity = input->entity;
585
586 /*
587 * Compute total consumed bytes so far, including input streams of
588 * external entities.
589 */
590 consumed = input->parentConsumed;
591 if ((entity == NULL) ||
592 ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
593 ((entity->flags & XML_ENT_PARSED) == 0))) {
594 xmlSaturatedAdd(&consumed, input->consumed);
595 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
596 }
597 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
598
599 /*
600 * Add extra cost and some fixed cost.
601 */
602 xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
603 xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
604
605 /*
606 * It's important to always use saturation arithmetic when tracking
607 * entity sizes to make the size checks reliable. If "sizeentcopy"
608 * overflows, we have to abort.
609 */
610 if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
611 ((ctxt->sizeentcopy >= ULONG_MAX) ||
612 (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
613 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
614 "Maximum entity amplification factor exceeded, see "
615 "xmlCtxtSetMaxAmplification.\n");
616 xmlHaltParser(ctxt);
617 return(1);
618 }
619
620 return(0);
621}
622
623/************************************************************************
624 * *
625 * Library wide options *
626 * *
627 ************************************************************************/
628
629/**
630 * xmlHasFeature:
631 * @feature: the feature to be examined
632 *
633 * Examines if the library has been compiled with a given feature.
634 *
635 * Returns a non-zero value if the feature exist, otherwise zero.
636 * Returns zero (0) if the feature does not exist or an unknown
637 * unknown feature is requested, non-zero otherwise.
638 */
639int
640xmlHasFeature(xmlFeature feature)
641{
642 switch (feature) {
643 case XML_WITH_THREAD:
644#ifdef LIBXML_THREAD_ENABLED
645 return(1);
646#else
647 return(0);
648#endif
649 case XML_WITH_TREE:
650#ifdef LIBXML_TREE_ENABLED
651 return(1);
652#else
653 return(0);
654#endif
655 case XML_WITH_OUTPUT:
656#ifdef LIBXML_OUTPUT_ENABLED
657 return(1);
658#else
659 return(0);
660#endif
661 case XML_WITH_PUSH:
662#ifdef LIBXML_PUSH_ENABLED
663 return(1);
664#else
665 return(0);
666#endif
667 case XML_WITH_READER:
668#ifdef LIBXML_READER_ENABLED
669 return(1);
670#else
671 return(0);
672#endif
673 case XML_WITH_PATTERN:
674#ifdef LIBXML_PATTERN_ENABLED
675 return(1);
676#else
677 return(0);
678#endif
679 case XML_WITH_WRITER:
680#ifdef LIBXML_WRITER_ENABLED
681 return(1);
682#else
683 return(0);
684#endif
685 case XML_WITH_SAX1:
686#ifdef LIBXML_SAX1_ENABLED
687 return(1);
688#else
689 return(0);
690#endif
691 case XML_WITH_FTP:
692#ifdef LIBXML_FTP_ENABLED
693 return(1);
694#else
695 return(0);
696#endif
697 case XML_WITH_HTTP:
698#ifdef LIBXML_HTTP_ENABLED
699 return(1);
700#else
701 return(0);
702#endif
703 case XML_WITH_VALID:
704#ifdef LIBXML_VALID_ENABLED
705 return(1);
706#else
707 return(0);
708#endif
709 case XML_WITH_HTML:
710#ifdef LIBXML_HTML_ENABLED
711 return(1);
712#else
713 return(0);
714#endif
715 case XML_WITH_LEGACY:
716#ifdef LIBXML_LEGACY_ENABLED
717 return(1);
718#else
719 return(0);
720#endif
721 case XML_WITH_C14N:
722#ifdef LIBXML_C14N_ENABLED
723 return(1);
724#else
725 return(0);
726#endif
727 case XML_WITH_CATALOG:
728#ifdef LIBXML_CATALOG_ENABLED
729 return(1);
730#else
731 return(0);
732#endif
733 case XML_WITH_XPATH:
734#ifdef LIBXML_XPATH_ENABLED
735 return(1);
736#else
737 return(0);
738#endif
739 case XML_WITH_XPTR:
740#ifdef LIBXML_XPTR_ENABLED
741 return(1);
742#else
743 return(0);
744#endif
745 case XML_WITH_XINCLUDE:
746#ifdef LIBXML_XINCLUDE_ENABLED
747 return(1);
748#else
749 return(0);
750#endif
751 case XML_WITH_ICONV:
752#ifdef LIBXML_ICONV_ENABLED
753 return(1);
754#else
755 return(0);
756#endif
757 case XML_WITH_ISO8859X:
758#ifdef LIBXML_ISO8859X_ENABLED
759 return(1);
760#else
761 return(0);
762#endif
763 case XML_WITH_UNICODE:
764#ifdef LIBXML_UNICODE_ENABLED
765 return(1);
766#else
767 return(0);
768#endif
769 case XML_WITH_REGEXP:
770#ifdef LIBXML_REGEXP_ENABLED
771 return(1);
772#else
773 return(0);
774#endif
775 case XML_WITH_AUTOMATA:
776#ifdef LIBXML_AUTOMATA_ENABLED
777 return(1);
778#else
779 return(0);
780#endif
781 case XML_WITH_EXPR:
782#ifdef LIBXML_EXPR_ENABLED
783 return(1);
784#else
785 return(0);
786#endif
787 case XML_WITH_SCHEMAS:
788#ifdef LIBXML_SCHEMAS_ENABLED
789 return(1);
790#else
791 return(0);
792#endif
793 case XML_WITH_SCHEMATRON:
794#ifdef LIBXML_SCHEMATRON_ENABLED
795 return(1);
796#else
797 return(0);
798#endif
799 case XML_WITH_MODULES:
800#ifdef LIBXML_MODULES_ENABLED
801 return(1);
802#else
803 return(0);
804#endif
805 case XML_WITH_DEBUG:
806#ifdef LIBXML_DEBUG_ENABLED
807 return(1);
808#else
809 return(0);
810#endif
811 case XML_WITH_DEBUG_MEM:
812#ifdef DEBUG_MEMORY_LOCATION
813 return(1);
814#else
815 return(0);
816#endif
817 case XML_WITH_DEBUG_RUN:
818 return(0);
819 case XML_WITH_ZLIB:
820#ifdef LIBXML_ZLIB_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
825 case XML_WITH_LZMA:
826#ifdef LIBXML_LZMA_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
831 case XML_WITH_ICU:
832#ifdef LIBXML_ICU_ENABLED
833 return(1);
834#else
835 return(0);
836#endif
837 default:
838 break;
839 }
840 return(0);
841}
842
843/************************************************************************
844 * *
845 * SAX2 defaulted attributes handling *
846 * *
847 ************************************************************************/
848
849/**
850 * xmlDetectSAX2:
851 * @ctxt: an XML parser context
852 *
853 * Do the SAX2 detection and specific initialization
854 */
855static void
856xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
857 xmlSAXHandlerPtr sax;
858
859 /* Avoid unused variable warning if features are disabled. */
860 (void) sax;
861
862 if (ctxt == NULL) return;
863 sax = ctxt->sax;
864#ifdef LIBXML_SAX1_ENABLED
865 /*
866 * Only enable SAX2 if there SAX2 element handlers, except when there
867 * are no element handlers at all.
868 */
869 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
870 ((sax->startElementNs != NULL) ||
871 (sax->endElementNs != NULL) ||
872 ((sax->startElement == NULL) && (sax->endElement == NULL))))
873 ctxt->sax2 = 1;
874#else
875 ctxt->sax2 = 1;
876#endif /* LIBXML_SAX1_ENABLED */
877
878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
881 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
882 (ctxt->str_xml_ns == NULL)) {
883 xmlErrMemory(ctxt, NULL);
884 }
885}
886
887typedef struct {
888 xmlHashedString prefix;
889 xmlHashedString name;
890 xmlHashedString value;
891 const xmlChar *valueEnd;
892 int external;
893 int expandedSize;
894} xmlDefAttr;
895
896typedef struct _xmlDefAttrs xmlDefAttrs;
897typedef xmlDefAttrs *xmlDefAttrsPtr;
898struct _xmlDefAttrs {
899 int nbAttrs; /* number of defaulted attributes on that element */
900 int maxAttrs; /* the size of the array */
901#if __STDC_VERSION__ >= 199901L
902 /* Using a C99 flexible array member avoids UBSan errors. */
903 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
904#else
905 xmlDefAttr attrs[1];
906#endif
907};
908
909/**
910 * xmlAttrNormalizeSpace:
911 * @src: the source string
912 * @dst: the target string
913 *
914 * Normalize the space in non CDATA attribute values:
915 * If the attribute type is not CDATA, then the XML processor MUST further
916 * process the normalized attribute value by discarding any leading and
917 * trailing space (#x20) characters, and by replacing sequences of space
918 * (#x20) characters by a single space (#x20) character.
919 * Note that the size of dst need to be at least src, and if one doesn't need
920 * to preserve dst (and it doesn't come from a dictionary or read-only) then
921 * passing src as dst is just fine.
922 *
923 * Returns a pointer to the normalized value (dst) or NULL if no conversion
924 * is needed.
925 */
926static xmlChar *
927xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
928{
929 if ((src == NULL) || (dst == NULL))
930 return(NULL);
931
932 while (*src == 0x20) src++;
933 while (*src != 0) {
934 if (*src == 0x20) {
935 while (*src == 0x20) src++;
936 if (*src != 0)
937 *dst++ = 0x20;
938 } else {
939 *dst++ = *src++;
940 }
941 }
942 *dst = 0;
943 if (dst == src)
944 return(NULL);
945 return(dst);
946}
947
948/**
949 * xmlAttrNormalizeSpace2:
950 * @src: the source string
951 *
952 * Normalize the space in non CDATA attribute values, a slightly more complex
953 * front end to avoid allocation problems when running on attribute values
954 * coming from the input.
955 *
956 * Returns a pointer to the normalized value (dst) or NULL if no conversion
957 * is needed.
958 */
959static const xmlChar *
960xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
961{
962 int i;
963 int remove_head = 0;
964 int need_realloc = 0;
965 const xmlChar *cur;
966
967 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
968 return(NULL);
969 i = *len;
970 if (i <= 0)
971 return(NULL);
972
973 cur = src;
974 while (*cur == 0x20) {
975 cur++;
976 remove_head++;
977 }
978 while (*cur != 0) {
979 if (*cur == 0x20) {
980 cur++;
981 if ((*cur == 0x20) || (*cur == 0)) {
982 need_realloc = 1;
983 break;
984 }
985 } else
986 cur++;
987 }
988 if (need_realloc) {
989 xmlChar *ret;
990
991 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
992 if (ret == NULL) {
993 xmlErrMemory(ctxt, NULL);
994 return(NULL);
995 }
996 xmlAttrNormalizeSpace(ret, ret);
997 *len = strlen((const char *)ret);
998 return(ret);
999 } else if (remove_head) {
1000 *len -= remove_head;
1001 memmove(src, src + remove_head, 1 + *len);
1002 return(src);
1003 }
1004 return(NULL);
1005}
1006
1007/**
1008 * xmlAddDefAttrs:
1009 * @ctxt: an XML parser context
1010 * @fullname: the element fullname
1011 * @fullattr: the attribute fullname
1012 * @value: the attribute value
1013 *
1014 * Add a defaulted attribute for an element
1015 */
1016static void
1017xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018 const xmlChar *fullname,
1019 const xmlChar *fullattr,
1020 const xmlChar *value) {
1021 xmlDefAttrsPtr defaults;
1022 xmlDefAttr *attr;
1023 int len, expandedSize;
1024 xmlHashedString name;
1025 xmlHashedString prefix;
1026 xmlHashedString hvalue;
1027 const xmlChar *localname;
1028
1029 /*
1030 * Allows to detect attribute redefinitions
1031 */
1032 if (ctxt->attsSpecial != NULL) {
1033 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034 return;
1035 }
1036
1037 if (ctxt->attsDefault == NULL) {
1038 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039 if (ctxt->attsDefault == NULL)
1040 goto mem_error;
1041 }
1042
1043 /*
1044 * split the element name into prefix:localname , the string found
1045 * are within the DTD and then not associated to namespace names.
1046 */
1047 localname = xmlSplitQName3(fullname, &len);
1048 if (localname == NULL) {
1049 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050 prefix.name = NULL;
1051 } else {
1052 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054 if (prefix.name == NULL)
1055 goto mem_error;
1056 }
1057 if (name.name == NULL)
1058 goto mem_error;
1059
1060 /*
1061 * make sure there is some storage
1062 */
1063 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064 if ((defaults == NULL) ||
1065 (defaults->nbAttrs >= defaults->maxAttrs)) {
1066 xmlDefAttrsPtr temp;
1067 int newSize;
1068
1069 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1070 temp = xmlRealloc(defaults,
1071 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1072 if (temp == NULL)
1073 goto mem_error;
1074 if (defaults == NULL)
1075 temp->nbAttrs = 0;
1076 temp->maxAttrs = newSize;
1077 defaults = temp;
1078 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1079 defaults, NULL) < 0) {
1080 xmlFree(defaults);
1081 goto mem_error;
1082 }
1083 }
1084
1085 /*
1086 * Split the attribute name into prefix:localname , the string found
1087 * are within the DTD and hen not associated to namespace names.
1088 */
1089 localname = xmlSplitQName3(fullattr, &len);
1090 if (localname == NULL) {
1091 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1092 prefix.name = NULL;
1093 } else {
1094 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1095 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1096 if (prefix.name == NULL)
1097 goto mem_error;
1098 }
1099 if (name.name == NULL)
1100 goto mem_error;
1101
1102 /* intern the string and precompute the end */
1103 len = strlen((const char *) value);
1104 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1105 if (hvalue.name == NULL)
1106 goto mem_error;
1107
1108 expandedSize = strlen((const char *) name.name);
1109 if (prefix.name != NULL)
1110 expandedSize += strlen((const char *) prefix.name);
1111 expandedSize += len;
1112
1113 attr = &defaults->attrs[defaults->nbAttrs++];
1114 attr->name = name;
1115 attr->prefix = prefix;
1116 attr->value = hvalue;
1117 attr->valueEnd = hvalue.name + len;
1118 attr->external = ctxt->external;
1119 attr->expandedSize = expandedSize;
1120
1121 return;
1122
1123mem_error:
1124 xmlErrMemory(ctxt, NULL);
1125 return;
1126}
1127
1128/**
1129 * xmlAddSpecialAttr:
1130 * @ctxt: an XML parser context
1131 * @fullname: the element fullname
1132 * @fullattr: the attribute fullname
1133 * @type: the attribute type
1134 *
1135 * Register this attribute type
1136 */
1137static void
1138xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1139 const xmlChar *fullname,
1140 const xmlChar *fullattr,
1141 int type)
1142{
1143 if (ctxt->attsSpecial == NULL) {
1144 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1145 if (ctxt->attsSpecial == NULL)
1146 goto mem_error;
1147 }
1148
1149 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1150 return;
1151
1152 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1153 (void *) (ptrdiff_t) type);
1154 return;
1155
1156mem_error:
1157 xmlErrMemory(ctxt, NULL);
1158 return;
1159}
1160
1161/**
1162 * xmlCleanSpecialAttrCallback:
1163 *
1164 * Removes CDATA attributes from the special attribute table
1165 */
1166static void
1167xmlCleanSpecialAttrCallback(void *payload, void *data,
1168 const xmlChar *fullname, const xmlChar *fullattr,
1169 const xmlChar *unused ATTRIBUTE_UNUSED) {
1170 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1171
1172 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1173 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1174 }
1175}
1176
1177/**
1178 * xmlCleanSpecialAttr:
1179 * @ctxt: an XML parser context
1180 *
1181 * Trim the list of attributes defined to remove all those of type
1182 * CDATA as they are not special. This call should be done when finishing
1183 * to parse the DTD and before starting to parse the document root.
1184 */
1185static void
1186xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1187{
1188 if (ctxt->attsSpecial == NULL)
1189 return;
1190
1191 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1192
1193 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1194 xmlHashFree(ctxt->attsSpecial, NULL);
1195 ctxt->attsSpecial = NULL;
1196 }
1197 return;
1198}
1199
1200/**
1201 * xmlCheckLanguageID:
1202 * @lang: pointer to the string value
1203 *
1204 * DEPRECATED: Internal function, do not use.
1205 *
1206 * Checks that the value conforms to the LanguageID production:
1207 *
1208 * NOTE: this is somewhat deprecated, those productions were removed from
1209 * the XML Second edition.
1210 *
1211 * [33] LanguageID ::= Langcode ('-' Subcode)*
1212 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1213 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1214 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1215 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1216 * [38] Subcode ::= ([a-z] | [A-Z])+
1217 *
1218 * The current REC reference the successors of RFC 1766, currently 5646
1219 *
1220 * http://www.rfc-editor.org/rfc/rfc5646.txt
1221 * langtag = language
1222 * ["-" script]
1223 * ["-" region]
1224 * *("-" variant)
1225 * *("-" extension)
1226 * ["-" privateuse]
1227 * language = 2*3ALPHA ; shortest ISO 639 code
1228 * ["-" extlang] ; sometimes followed by
1229 * ; extended language subtags
1230 * / 4ALPHA ; or reserved for future use
1231 * / 5*8ALPHA ; or registered language subtag
1232 *
1233 * extlang = 3ALPHA ; selected ISO 639 codes
1234 * *2("-" 3ALPHA) ; permanently reserved
1235 *
1236 * script = 4ALPHA ; ISO 15924 code
1237 *
1238 * region = 2ALPHA ; ISO 3166-1 code
1239 * / 3DIGIT ; UN M.49 code
1240 *
1241 * variant = 5*8alphanum ; registered variants
1242 * / (DIGIT 3alphanum)
1243 *
1244 * extension = singleton 1*("-" (2*8alphanum))
1245 *
1246 * ; Single alphanumerics
1247 * ; "x" reserved for private use
1248 * singleton = DIGIT ; 0 - 9
1249 * / %x41-57 ; A - W
1250 * / %x59-5A ; Y - Z
1251 * / %x61-77 ; a - w
1252 * / %x79-7A ; y - z
1253 *
1254 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1255 * The parser below doesn't try to cope with extension or privateuse
1256 * that could be added but that's not interoperable anyway
1257 *
1258 * Returns 1 if correct 0 otherwise
1259 **/
1260int
1261xmlCheckLanguageID(const xmlChar * lang)
1262{
1263 const xmlChar *cur = lang, *nxt;
1264
1265 if (cur == NULL)
1266 return (0);
1267 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1268 ((cur[0] == 'I') && (cur[1] == '-')) ||
1269 ((cur[0] == 'x') && (cur[1] == '-')) ||
1270 ((cur[0] == 'X') && (cur[1] == '-'))) {
1271 /*
1272 * Still allow IANA code and user code which were coming
1273 * from the previous version of the XML-1.0 specification
1274 * it's deprecated but we should not fail
1275 */
1276 cur += 2;
1277 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1278 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1279 cur++;
1280 return(cur[0] == 0);
1281 }
1282 nxt = cur;
1283 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1284 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1285 nxt++;
1286 if (nxt - cur >= 4) {
1287 /*
1288 * Reserved
1289 */
1290 if ((nxt - cur > 8) || (nxt[0] != 0))
1291 return(0);
1292 return(1);
1293 }
1294 if (nxt - cur < 2)
1295 return(0);
1296 /* we got an ISO 639 code */
1297 if (nxt[0] == 0)
1298 return(1);
1299 if (nxt[0] != '-')
1300 return(0);
1301
1302 nxt++;
1303 cur = nxt;
1304 /* now we can have extlang or script or region or variant */
1305 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1306 goto region_m49;
1307
1308 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1309 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1310 nxt++;
1311 if (nxt - cur == 4)
1312 goto script;
1313 if (nxt - cur == 2)
1314 goto region;
1315 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1316 goto variant;
1317 if (nxt - cur != 3)
1318 return(0);
1319 /* we parsed an extlang */
1320 if (nxt[0] == 0)
1321 return(1);
1322 if (nxt[0] != '-')
1323 return(0);
1324
1325 nxt++;
1326 cur = nxt;
1327 /* now we can have script or region or variant */
1328 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1329 goto region_m49;
1330
1331 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1332 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1333 nxt++;
1334 if (nxt - cur == 2)
1335 goto region;
1336 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1337 goto variant;
1338 if (nxt - cur != 4)
1339 return(0);
1340 /* we parsed a script */
1341script:
1342 if (nxt[0] == 0)
1343 return(1);
1344 if (nxt[0] != '-')
1345 return(0);
1346
1347 nxt++;
1348 cur = nxt;
1349 /* now we can have region or variant */
1350 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1351 goto region_m49;
1352
1353 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1354 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1355 nxt++;
1356
1357 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1358 goto variant;
1359 if (nxt - cur != 2)
1360 return(0);
1361 /* we parsed a region */
1362region:
1363 if (nxt[0] == 0)
1364 return(1);
1365 if (nxt[0] != '-')
1366 return(0);
1367
1368 nxt++;
1369 cur = nxt;
1370 /* now we can just have a variant */
1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373 nxt++;
1374
1375 if ((nxt - cur < 5) || (nxt - cur > 8))
1376 return(0);
1377
1378 /* we parsed a variant */
1379variant:
1380 if (nxt[0] == 0)
1381 return(1);
1382 if (nxt[0] != '-')
1383 return(0);
1384 /* extensions and private use subtags not checked */
1385 return (1);
1386
1387region_m49:
1388 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1389 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1390 nxt += 3;
1391 goto region;
1392 }
1393 return(0);
1394}
1395
1396/************************************************************************
1397 * *
1398 * Parser stacks related functions and macros *
1399 * *
1400 ************************************************************************/
1401
1402static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1403 const xmlChar ** str);
1404
1405/**
1406 * xmlParserNsCreate:
1407 *
1408 * Create a new namespace database.
1409 *
1410 * Returns the new obejct.
1411 */
1412xmlParserNsData *
1413xmlParserNsCreate(void) {
1414 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1415
1416 if (nsdb == NULL)
1417 return(NULL);
1418 memset(nsdb, 0, sizeof(*nsdb));
1419 nsdb->defaultNsIndex = INT_MAX;
1420
1421 return(nsdb);
1422}
1423
1424/**
1425 * xmlParserNsFree:
1426 * @nsdb: namespace database
1427 *
1428 * Free a namespace database.
1429 */
1430void
1431xmlParserNsFree(xmlParserNsData *nsdb) {
1432 if (nsdb == NULL)
1433 return;
1434
1435 xmlFree(nsdb->extra);
1436 xmlFree(nsdb->hash);
1437 xmlFree(nsdb);
1438}
1439
1440/**
1441 * xmlParserNsReset:
1442 * @nsdb: namespace database
1443 *
1444 * Reset a namespace database.
1445 */
1446static void
1447xmlParserNsReset(xmlParserNsData *nsdb) {
1448 if (nsdb == NULL)
1449 return;
1450
1451 nsdb->hashElems = 0;
1452 nsdb->elementId = 0;
1453 nsdb->defaultNsIndex = INT_MAX;
1454
1455 if (nsdb->hash)
1456 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1457}
1458
1459/**
1460 * xmlParserStartElement:
1461 * @nsdb: namespace database
1462 *
1463 * Signal that a new element has started.
1464 *
1465 * Returns 0 on success, -1 if the element counter overflowed.
1466 */
1467static int
1468xmlParserNsStartElement(xmlParserNsData *nsdb) {
1469 if (nsdb->elementId == UINT_MAX)
1470 return(-1);
1471 nsdb->elementId++;
1472
1473 return(0);
1474}
1475
1476/**
1477 * xmlParserNsLookup:
1478 * @ctxt: parser context
1479 * @prefix: namespace prefix
1480 * @bucketPtr: optional bucket (return value)
1481 *
1482 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1483 * be set to the matching bucket, or the first empty bucket if no match
1484 * was found.
1485 *
1486 * Returns the namespace index on success, INT_MAX if no namespace was
1487 * found.
1488 */
1489static int
1490xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1491 xmlParserNsBucket **bucketPtr) {
1492 xmlParserNsBucket *bucket;
1493 unsigned index, hashValue;
1494
1495 if (prefix->name == NULL)
1496 return(ctxt->nsdb->defaultNsIndex);
1497
1498 if (ctxt->nsdb->hashSize == 0)
1499 return(INT_MAX);
1500
1501 hashValue = prefix->hashValue;
1502 index = hashValue & (ctxt->nsdb->hashSize - 1);
1503 bucket = &ctxt->nsdb->hash[index];
1504
1505 while (bucket->hashValue) {
1506 if ((bucket->hashValue == hashValue) &&
1507 (bucket->index != INT_MAX)) {
1508 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1509 if (bucketPtr != NULL)
1510 *bucketPtr = bucket;
1511 return(bucket->index);
1512 }
1513 }
1514
1515 index++;
1516 bucket++;
1517 if (index == ctxt->nsdb->hashSize) {
1518 index = 0;
1519 bucket = ctxt->nsdb->hash;
1520 }
1521 }
1522
1523 if (bucketPtr != NULL)
1524 *bucketPtr = bucket;
1525 return(INT_MAX);
1526}
1527
1528/**
1529 * xmlParserNsLookupUri:
1530 * @ctxt: parser context
1531 * @prefix: namespace prefix
1532 *
1533 * Lookup namespace URI with given prefix.
1534 *
1535 * Returns the namespace URI on success, NULL if no namespace was found.
1536 */
1537static const xmlChar *
1538xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1539 const xmlChar *ret;
1540 int nsIndex;
1541
1542 if (prefix->name == ctxt->str_xml)
1543 return(ctxt->str_xml_ns);
1544
1545 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1546 if (nsIndex == INT_MAX)
1547 return(NULL);
1548
1549 ret = ctxt->nsTab[nsIndex * 2 + 1];
1550 if (ret[0] == 0)
1551 ret = NULL;
1552 return(ret);
1553}
1554
1555/**
1556 * xmlParserNsLookupSax:
1557 * @ctxt: parser context
1558 * @prefix: namespace prefix
1559 *
1560 * Lookup extra data for the given prefix. This returns data stored
1561 * with xmlParserNsUdpateSax.
1562 *
1563 * Returns the data on success, NULL if no namespace was found.
1564 */
1565void *
1566xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1567 xmlHashedString hprefix;
1568 int nsIndex;
1569
1570 if (prefix == ctxt->str_xml)
1571 return(NULL);
1572
1573 hprefix.name = prefix;
1574 if (prefix != NULL)
1575 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1576 else
1577 hprefix.hashValue = 0;
1578 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1579 if (nsIndex == INT_MAX)
1580 return(NULL);
1581
1582 return(ctxt->nsdb->extra[nsIndex].saxData);
1583}
1584
1585/**
1586 * xmlParserNsUpdateSax:
1587 * @ctxt: parser context
1588 * @prefix: namespace prefix
1589 * @saxData: extra data for SAX handler
1590 *
1591 * Sets or updates extra data for the given prefix. This value will be
1592 * returned by xmlParserNsLookupSax as long as the namespace with the
1593 * given prefix is in scope.
1594 *
1595 * Returns the data on success, NULL if no namespace was found.
1596 */
1597int
1598xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1599 void *saxData) {
1600 xmlHashedString hprefix;
1601 int nsIndex;
1602
1603 if (prefix == ctxt->str_xml)
1604 return(-1);
1605
1606 hprefix.name = prefix;
1607 if (prefix != NULL)
1608 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1609 else
1610 hprefix.hashValue = 0;
1611 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1612 if (nsIndex == INT_MAX)
1613 return(-1);
1614
1615 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1616 return(0);
1617}
1618
1619/**
1620 * xmlParserNsGrow:
1621 * @ctxt: parser context
1622 *
1623 * Grows the namespace tables.
1624 *
1625 * Returns 0 on success, -1 if a memory allocation failed.
1626 */
1627static int
1628xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1629 const xmlChar **table;
1630 xmlParserNsExtra *extra;
1631 int newSize;
1632
1633 if (ctxt->nsMax > INT_MAX / 2)
1634 goto error;
1635 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1636
1637 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1638 if (table == NULL)
1639 goto error;
1640 ctxt->nsTab = table;
1641
1642 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1643 if (extra == NULL)
1644 goto error;
1645 ctxt->nsdb->extra = extra;
1646
1647 ctxt->nsMax = newSize;
1648 return(0);
1649
1650error:
1651 xmlErrMemory(ctxt, NULL);
1652 return(-1);
1653}
1654
1655/**
1656 * xmlParserNsPush:
1657 * @ctxt: parser context
1658 * @prefix: prefix with hash value
1659 * @uri: uri with hash value
1660 * @saxData: extra data for SAX handler
1661 * @defAttr: whether the namespace comes from a default attribute
1662 *
1663 * Push a new namespace on the table.
1664 *
1665 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1666 * -1 if a memory allocation failed.
1667 */
1668static int
1669xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1670 const xmlHashedString *uri, void *saxData, int defAttr) {
1671 xmlParserNsBucket *bucket = NULL;
1672 xmlParserNsExtra *extra;
1673 const xmlChar **ns;
1674 unsigned hashValue, nsIndex, oldIndex;
1675
1676 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1677 return(0);
1678
1679 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1680 xmlErrMemory(ctxt, NULL);
1681 return(-1);
1682 }
1683
1684 /*
1685 * Default namespace and 'xml' namespace
1686 */
1687 if ((prefix == NULL) || (prefix->name == NULL)) {
1688 oldIndex = ctxt->nsdb->defaultNsIndex;
1689
1690 if (oldIndex != INT_MAX) {
1691 extra = &ctxt->nsdb->extra[oldIndex];
1692
1693 if (extra->elementId == ctxt->nsdb->elementId) {
1694 if (defAttr == 0)
1695 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1696 return(0);
1697 }
1698
1699 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1700 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1701 return(0);
1702 }
1703
1704 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1705 goto populate_entry;
1706 }
1707
1708 /*
1709 * Hash table lookup
1710 */
1711 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1712 if (oldIndex != INT_MAX) {
1713 extra = &ctxt->nsdb->extra[oldIndex];
1714
1715 /*
1716 * Check for duplicate definitions on the same element.
1717 */
1718 if (extra->elementId == ctxt->nsdb->elementId) {
1719 if (defAttr == 0)
1720 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1721 return(0);
1722 }
1723
1724 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1725 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1726 return(0);
1727
1728 bucket->index = ctxt->nsNr;
1729 goto populate_entry;
1730 }
1731
1732 /*
1733 * Insert new bucket
1734 */
1735
1736 hashValue = prefix->hashValue;
1737
1738 /*
1739 * Grow hash table, 50% fill factor
1740 */
1741 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1742 xmlParserNsBucket *newHash;
1743 unsigned newSize, i, index;
1744
1745 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1746 xmlErrMemory(ctxt, NULL);
1747 return(-1);
1748 }
1749 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1750 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1751 if (newHash == NULL) {
1752 xmlErrMemory(ctxt, NULL);
1753 return(-1);
1754 }
1755 memset(newHash, 0, newSize * sizeof(newHash[0]));
1756
1757 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1758 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1759 unsigned newIndex;
1760
1761 if (hv == 0)
1762 continue;
1763 newIndex = hv & (newSize - 1);
1764
1765 while (newHash[newIndex].hashValue != 0) {
1766 newIndex++;
1767 if (newIndex == newSize)
1768 newIndex = 0;
1769 }
1770
1771 newHash[newIndex] = ctxt->nsdb->hash[i];
1772 }
1773
1774 xmlFree(ctxt->nsdb->hash);
1775 ctxt->nsdb->hash = newHash;
1776 ctxt->nsdb->hashSize = newSize;
1777
1778 /*
1779 * Relookup
1780 */
1781 index = hashValue & (newSize - 1);
1782
1783 while (newHash[index].hashValue != 0) {
1784 index++;
1785 if (index == newSize)
1786 index = 0;
1787 }
1788
1789 bucket = &newHash[index];
1790 }
1791
1792 bucket->hashValue = hashValue;
1793 bucket->index = ctxt->nsNr;
1794 ctxt->nsdb->hashElems++;
1795 oldIndex = INT_MAX;
1796
1797populate_entry:
1798 nsIndex = ctxt->nsNr;
1799
1800 ns = &ctxt->nsTab[nsIndex * 2];
1801 ns[0] = prefix ? prefix->name : NULL;
1802 ns[1] = uri->name;
1803
1804 extra = &ctxt->nsdb->extra[nsIndex];
1805 extra->saxData = saxData;
1806 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1807 extra->uriHashValue = uri->hashValue;
1808 extra->elementId = ctxt->nsdb->elementId;
1809 extra->oldIndex = oldIndex;
1810
1811 ctxt->nsNr++;
1812
1813 return(1);
1814}
1815
1816/**
1817 * xmlParserNsPop:
1818 * @ctxt: an XML parser context
1819 * @nr: the number to pop
1820 *
1821 * Pops the top @nr namespaces and restores the hash table.
1822 *
1823 * Returns the number of namespaces popped.
1824 */
1825static int
1826xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1827{
1828 int i;
1829
1830 /* assert(nr <= ctxt->nsNr); */
1831
1832 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1833 const xmlChar *prefix = ctxt->nsTab[i * 2];
1834 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1835
1836 if (prefix == NULL) {
1837 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1838 } else {
1839 xmlHashedString hprefix;
1840 xmlParserNsBucket *bucket = NULL;
1841
1842 hprefix.name = prefix;
1843 hprefix.hashValue = extra->prefixHashValue;
1844 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1845 /* assert(bucket && bucket->hashValue); */
1846 bucket->index = extra->oldIndex;
1847 }
1848 }
1849
1850 ctxt->nsNr -= nr;
1851 return(nr);
1852}
1853
1854static int
1855xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1856 const xmlChar **atts;
1857 unsigned *attallocs;
1858 int maxatts;
1859
1860 if (nr + 5 > ctxt->maxatts) {
1861 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1862 atts = (const xmlChar **) xmlMalloc(
1863 maxatts * sizeof(const xmlChar *));
1864 if (atts == NULL) goto mem_error;
1865 attallocs = xmlRealloc(ctxt->attallocs,
1866 (maxatts / 5) * sizeof(attallocs[0]));
1867 if (attallocs == NULL) {
1868 xmlFree(atts);
1869 goto mem_error;
1870 }
1871 if (ctxt->maxatts > 0)
1872 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1873 xmlFree(ctxt->atts);
1874 ctxt->atts = atts;
1875 ctxt->attallocs = attallocs;
1876 ctxt->maxatts = maxatts;
1877 }
1878 return(ctxt->maxatts);
1879mem_error:
1880 xmlErrMemory(ctxt, NULL);
1881 return(-1);
1882}
1883
1884/**
1885 * inputPush:
1886 * @ctxt: an XML parser context
1887 * @value: the parser input
1888 *
1889 * Pushes a new parser input on top of the input stack
1890 *
1891 * Returns -1 in case of error, the index in the stack otherwise
1892 */
1893int
1894inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1895{
1896 if ((ctxt == NULL) || (value == NULL))
1897 return(-1);
1898 if (ctxt->inputNr >= ctxt->inputMax) {
1899 size_t newSize = ctxt->inputMax * 2;
1900 xmlParserInputPtr *tmp;
1901
1902 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1903 newSize * sizeof(*tmp));
1904 if (tmp == NULL) {
1905 xmlErrMemory(ctxt, NULL);
1906 return (-1);
1907 }
1908 ctxt->inputTab = tmp;
1909 ctxt->inputMax = newSize;
1910 }
1911 ctxt->inputTab[ctxt->inputNr] = value;
1912 ctxt->input = value;
1913 return (ctxt->inputNr++);
1914}
1915/**
1916 * inputPop:
1917 * @ctxt: an XML parser context
1918 *
1919 * Pops the top parser input from the input stack
1920 *
1921 * Returns the input just removed
1922 */
1923xmlParserInputPtr
1924inputPop(xmlParserCtxtPtr ctxt)
1925{
1926 xmlParserInputPtr ret;
1927
1928 if (ctxt == NULL)
1929 return(NULL);
1930 if (ctxt->inputNr <= 0)
1931 return (NULL);
1932 ctxt->inputNr--;
1933 if (ctxt->inputNr > 0)
1934 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1935 else
1936 ctxt->input = NULL;
1937 ret = ctxt->inputTab[ctxt->inputNr];
1938 ctxt->inputTab[ctxt->inputNr] = NULL;
1939 return (ret);
1940}
1941/**
1942 * nodePush:
1943 * @ctxt: an XML parser context
1944 * @value: the element node
1945 *
1946 * DEPRECATED: Internal function, do not use.
1947 *
1948 * Pushes a new element node on top of the node stack
1949 *
1950 * Returns -1 in case of error, the index in the stack otherwise
1951 */
1952int
1953nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1954{
1955 if (ctxt == NULL) return(0);
1956 if (ctxt->nodeNr >= ctxt->nodeMax) {
1957 xmlNodePtr *tmp;
1958
1959 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1960 ctxt->nodeMax * 2 *
1961 sizeof(ctxt->nodeTab[0]));
1962 if (tmp == NULL) {
1963 xmlErrMemory(ctxt, NULL);
1964 return (-1);
1965 }
1966 ctxt->nodeTab = tmp;
1967 ctxt->nodeMax *= 2;
1968 }
1969 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1970 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1971 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1972 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1973 xmlParserMaxDepth);
1974 xmlHaltParser(ctxt);
1975 return(-1);
1976 }
1977 ctxt->nodeTab[ctxt->nodeNr] = value;
1978 ctxt->node = value;
1979 return (ctxt->nodeNr++);
1980}
1981
1982/**
1983 * nodePop:
1984 * @ctxt: an XML parser context
1985 *
1986 * DEPRECATED: Internal function, do not use.
1987 *
1988 * Pops the top element node from the node stack
1989 *
1990 * Returns the node just removed
1991 */
1992xmlNodePtr
1993nodePop(xmlParserCtxtPtr ctxt)
1994{
1995 xmlNodePtr ret;
1996
1997 if (ctxt == NULL) return(NULL);
1998 if (ctxt->nodeNr <= 0)
1999 return (NULL);
2000 ctxt->nodeNr--;
2001 if (ctxt->nodeNr > 0)
2002 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2003 else
2004 ctxt->node = NULL;
2005 ret = ctxt->nodeTab[ctxt->nodeNr];
2006 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2007 return (ret);
2008}
2009
2010/**
2011 * nameNsPush:
2012 * @ctxt: an XML parser context
2013 * @value: the element name
2014 * @prefix: the element prefix
2015 * @URI: the element namespace name
2016 * @line: the current line number for error messages
2017 * @nsNr: the number of namespaces pushed on the namespace table
2018 *
2019 * Pushes a new element name/prefix/URL on top of the name stack
2020 *
2021 * Returns -1 in case of error, the index in the stack otherwise
2022 */
2023static int
2024nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2025 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2026{
2027 xmlStartTag *tag;
2028
2029 if (ctxt->nameNr >= ctxt->nameMax) {
2030 const xmlChar * *tmp;
2031 xmlStartTag *tmp2;
2032 ctxt->nameMax *= 2;
2033 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2034 ctxt->nameMax *
2035 sizeof(ctxt->nameTab[0]));
2036 if (tmp == NULL) {
2037 ctxt->nameMax /= 2;
2038 goto mem_error;
2039 }
2040 ctxt->nameTab = tmp;
2041 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2042 ctxt->nameMax *
2043 sizeof(ctxt->pushTab[0]));
2044 if (tmp2 == NULL) {
2045 ctxt->nameMax /= 2;
2046 goto mem_error;
2047 }
2048 ctxt->pushTab = tmp2;
2049 } else if (ctxt->pushTab == NULL) {
2050 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2051 sizeof(ctxt->pushTab[0]));
2052 if (ctxt->pushTab == NULL)
2053 goto mem_error;
2054 }
2055 ctxt->nameTab[ctxt->nameNr] = value;
2056 ctxt->name = value;
2057 tag = &ctxt->pushTab[ctxt->nameNr];
2058 tag->prefix = prefix;
2059 tag->URI = URI;
2060 tag->line = line;
2061 tag->nsNr = nsNr;
2062 return (ctxt->nameNr++);
2063mem_error:
2064 xmlErrMemory(ctxt, NULL);
2065 return (-1);
2066}
2067#ifdef LIBXML_PUSH_ENABLED
2068/**
2069 * nameNsPop:
2070 * @ctxt: an XML parser context
2071 *
2072 * Pops the top element/prefix/URI name from the name stack
2073 *
2074 * Returns the name just removed
2075 */
2076static const xmlChar *
2077nameNsPop(xmlParserCtxtPtr ctxt)
2078{
2079 const xmlChar *ret;
2080
2081 if (ctxt->nameNr <= 0)
2082 return (NULL);
2083 ctxt->nameNr--;
2084 if (ctxt->nameNr > 0)
2085 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2086 else
2087 ctxt->name = NULL;
2088 ret = ctxt->nameTab[ctxt->nameNr];
2089 ctxt->nameTab[ctxt->nameNr] = NULL;
2090 return (ret);
2091}
2092#endif /* LIBXML_PUSH_ENABLED */
2093
2094/**
2095 * namePush:
2096 * @ctxt: an XML parser context
2097 * @value: the element name
2098 *
2099 * DEPRECATED: Internal function, do not use.
2100 *
2101 * Pushes a new element name on top of the name stack
2102 *
2103 * Returns -1 in case of error, the index in the stack otherwise
2104 */
2105int
2106namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2107{
2108 if (ctxt == NULL) return (-1);
2109
2110 if (ctxt->nameNr >= ctxt->nameMax) {
2111 const xmlChar * *tmp;
2112 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2113 ctxt->nameMax * 2 *
2114 sizeof(ctxt->nameTab[0]));
2115 if (tmp == NULL) {
2116 goto mem_error;
2117 }
2118 ctxt->nameTab = tmp;
2119 ctxt->nameMax *= 2;
2120 }
2121 ctxt->nameTab[ctxt->nameNr] = value;
2122 ctxt->name = value;
2123 return (ctxt->nameNr++);
2124mem_error:
2125 xmlErrMemory(ctxt, NULL);
2126 return (-1);
2127}
2128
2129/**
2130 * namePop:
2131 * @ctxt: an XML parser context
2132 *
2133 * DEPRECATED: Internal function, do not use.
2134 *
2135 * Pops the top element name from the name stack
2136 *
2137 * Returns the name just removed
2138 */
2139const xmlChar *
2140namePop(xmlParserCtxtPtr ctxt)
2141{
2142 const xmlChar *ret;
2143
2144 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2145 return (NULL);
2146 ctxt->nameNr--;
2147 if (ctxt->nameNr > 0)
2148 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2149 else
2150 ctxt->name = NULL;
2151 ret = ctxt->nameTab[ctxt->nameNr];
2152 ctxt->nameTab[ctxt->nameNr] = NULL;
2153 return (ret);
2154}
2155
2156static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2157 if (ctxt->spaceNr >= ctxt->spaceMax) {
2158 int *tmp;
2159
2160 ctxt->spaceMax *= 2;
2161 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2162 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2163 if (tmp == NULL) {
2164 xmlErrMemory(ctxt, NULL);
2165 ctxt->spaceMax /=2;
2166 return(-1);
2167 }
2168 ctxt->spaceTab = tmp;
2169 }
2170 ctxt->spaceTab[ctxt->spaceNr] = val;
2171 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2172 return(ctxt->spaceNr++);
2173}
2174
2175static int spacePop(xmlParserCtxtPtr ctxt) {
2176 int ret;
2177 if (ctxt->spaceNr <= 0) return(0);
2178 ctxt->spaceNr--;
2179 if (ctxt->spaceNr > 0)
2180 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2181 else
2182 ctxt->space = &ctxt->spaceTab[0];
2183 ret = ctxt->spaceTab[ctxt->spaceNr];
2184 ctxt->spaceTab[ctxt->spaceNr] = -1;
2185 return(ret);
2186}
2187
2188/*
2189 * Macros for accessing the content. Those should be used only by the parser,
2190 * and not exported.
2191 *
2192 * Dirty macros, i.e. one often need to make assumption on the context to
2193 * use them
2194 *
2195 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2196 * To be used with extreme caution since operations consuming
2197 * characters may move the input buffer to a different location !
2198 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2199 * This should be used internally by the parser
2200 * only to compare to ASCII values otherwise it would break when
2201 * running with UTF-8 encoding.
2202 * RAW same as CUR but in the input buffer, bypass any token
2203 * extraction that may have been done
2204 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2205 * to compare on ASCII based substring.
2206 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2207 * strings without newlines within the parser.
2208 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2209 * defined char within the parser.
2210 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2211 *
2212 * NEXT Skip to the next character, this does the proper decoding
2213 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2214 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2215 * CUR_CHAR(l) returns the current unicode character (int), set l
2216 * to the number of xmlChars used for the encoding [0-5].
2217 * CUR_SCHAR same but operate on a string instead of the context
2218 * COPY_BUF copy the current unicode char to the target buffer, increment
2219 * the index
2220 * GROW, SHRINK handling of input buffers
2221 */
2222
2223#define RAW (*ctxt->input->cur)
2224#define CUR (*ctxt->input->cur)
2225#define NXT(val) ctxt->input->cur[(val)]
2226#define CUR_PTR ctxt->input->cur
2227#define BASE_PTR ctxt->input->base
2228
2229#define CMP4( s, c1, c2, c3, c4 ) \
2230 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2231 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2232#define CMP5( s, c1, c2, c3, c4, c5 ) \
2233 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2234#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2235 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2236#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2237 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2238#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2239 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2240#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2241 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2242 ((unsigned char *) s)[ 8 ] == c9 )
2243#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2244 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2245 ((unsigned char *) s)[ 9 ] == c10 )
2246
2247#define SKIP(val) do { \
2248 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2249 if (*ctxt->input->cur == 0) \
2250 xmlParserGrow(ctxt); \
2251 } while (0)
2252
2253#define SKIPL(val) do { \
2254 int skipl; \
2255 for(skipl=0; skipl<val; skipl++) { \
2256 if (*(ctxt->input->cur) == '\n') { \
2257 ctxt->input->line++; ctxt->input->col = 1; \
2258 } else ctxt->input->col++; \
2259 ctxt->input->cur++; \
2260 } \
2261 if (*ctxt->input->cur == 0) \
2262 xmlParserGrow(ctxt); \
2263 } while (0)
2264
2265/* Don't shrink push parser buffer. */
2266#define SHRINK \
2267 if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
2268 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2269 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2270 xmlParserShrink(ctxt);
2271
2272#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
2273 xmlParserGrow(ctxt);
2274
2275#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2276
2277#define NEXT xmlNextChar(ctxt)
2278
2279#define NEXT1 { \
2280 ctxt->input->col++; \
2281 ctxt->input->cur++; \
2282 if (*ctxt->input->cur == 0) \
2283 xmlParserGrow(ctxt); \
2284 }
2285
2286#define NEXTL(l) do { \
2287 if (*(ctxt->input->cur) == '\n') { \
2288 ctxt->input->line++; ctxt->input->col = 1; \
2289 } else ctxt->input->col++; \
2290 ctxt->input->cur += l; \
2291 } while (0)
2292
2293#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2294#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2295
2296#define COPY_BUF(b, i, v) \
2297 if (v < 0x80) b[i++] = v; \
2298 else i += xmlCopyCharMultiByte(&b[i],v)
2299
2300/**
2301 * xmlSkipBlankChars:
2302 * @ctxt: the XML parser context
2303 *
2304 * DEPRECATED: Internal function, do not use.
2305 *
2306 * skip all blanks character found at that point in the input streams.
2307 * It pops up finished entities in the process if allowable at that point.
2308 *
2309 * Returns the number of space chars skipped
2310 */
2311
2312int
2313xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2314 int res = 0;
2315
2316 /*
2317 * It's Okay to use CUR/NEXT here since all the blanks are on
2318 * the ASCII range.
2319 */
2320 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2321 (ctxt->instate == XML_PARSER_START)) {
2322 const xmlChar *cur;
2323 /*
2324 * if we are in the document content, go really fast
2325 */
2326 cur = ctxt->input->cur;
2327 while (IS_BLANK_CH(*cur)) {
2328 if (*cur == '\n') {
2329 ctxt->input->line++; ctxt->input->col = 1;
2330 } else {
2331 ctxt->input->col++;
2332 }
2333 cur++;
2334 if (res < INT_MAX)
2335 res++;
2336 if (*cur == 0) {
2337 ctxt->input->cur = cur;
2338 xmlParserGrow(ctxt);
2339 cur = ctxt->input->cur;
2340 }
2341 }
2342 ctxt->input->cur = cur;
2343 } else {
2344 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2345
2346 while (ctxt->instate != XML_PARSER_EOF) {
2347 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2348 NEXT;
2349 } else if (CUR == '%') {
2350 /*
2351 * Need to handle support of entities branching here
2352 */
2353 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2354 break;
2355 xmlParsePEReference(ctxt);
2356 } else if (CUR == 0) {
2357 unsigned long consumed;
2358 xmlEntityPtr ent;
2359
2360 if (ctxt->inputNr <= 1)
2361 break;
2362
2363 consumed = ctxt->input->consumed;
2364 xmlSaturatedAddSizeT(&consumed,
2365 ctxt->input->cur - ctxt->input->base);
2366
2367 /*
2368 * Add to sizeentities when parsing an external entity
2369 * for the first time.
2370 */
2371 ent = ctxt->input->entity;
2372 if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2373 ((ent->flags & XML_ENT_PARSED) == 0)) {
2374 ent->flags |= XML_ENT_PARSED;
2375
2376 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2377 }
2378
2379 xmlParserEntityCheck(ctxt, consumed);
2380
2381 xmlPopInput(ctxt);
2382 } else {
2383 break;
2384 }
2385
2386 /*
2387 * Also increase the counter when entering or exiting a PERef.
2388 * The spec says: "When a parameter-entity reference is recognized
2389 * in the DTD and included, its replacement text MUST be enlarged
2390 * by the attachment of one leading and one following space (#x20)
2391 * character."
2392 */
2393 if (res < INT_MAX)
2394 res++;
2395 }
2396 }
2397 return(res);
2398}
2399
2400/************************************************************************
2401 * *
2402 * Commodity functions to handle entities *
2403 * *
2404 ************************************************************************/
2405
2406/**
2407 * xmlPopInput:
2408 * @ctxt: an XML parser context
2409 *
2410 * xmlPopInput: the current input pointed by ctxt->input came to an end
2411 * pop it and return the next char.
2412 *
2413 * Returns the current xmlChar in the parser context
2414 */
2415xmlChar
2416xmlPopInput(xmlParserCtxtPtr ctxt) {
2417 xmlParserInputPtr input;
2418
2419 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2420 if (xmlParserDebugEntities)
2421 xmlGenericError(xmlGenericErrorContext,
2422 "Popping input %d\n", ctxt->inputNr);
2423 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2424 (ctxt->instate != XML_PARSER_EOF))
2425 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2426 "Unfinished entity outside the DTD");
2427 input = inputPop(ctxt);
2428 if (input->entity != NULL)
2429 input->entity->flags &= ~XML_ENT_EXPANDING;
2430 xmlFreeInputStream(input);
2431 if (*ctxt->input->cur == 0)
2432 xmlParserGrow(ctxt);
2433 return(CUR);
2434}
2435
2436/**
2437 * xmlPushInput:
2438 * @ctxt: an XML parser context
2439 * @input: an XML parser input fragment (entity, XML fragment ...).
2440 *
2441 * xmlPushInput: switch to a new input stream which is stacked on top
2442 * of the previous one(s).
2443 * Returns -1 in case of error or the index in the input stack
2444 */
2445int
2446xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2447 int ret;
2448 if (input == NULL) return(-1);
2449
2450 if (xmlParserDebugEntities) {
2451 if ((ctxt->input != NULL) && (ctxt->input->filename))
2452 xmlGenericError(xmlGenericErrorContext,
2453 "%s(%d): ", ctxt->input->filename,
2454 ctxt->input->line);
2455 xmlGenericError(xmlGenericErrorContext,
2456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2457 }
2458 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2459 (ctxt->inputNr > 100)) {
2460 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2461 while (ctxt->inputNr > 1)
2462 xmlFreeInputStream(inputPop(ctxt));
2463 return(-1);
2464 }
2465 ret = inputPush(ctxt, input);
2466 if (ctxt->instate == XML_PARSER_EOF)
2467 return(-1);
2468 GROW;
2469 return(ret);
2470}
2471
2472/**
2473 * xmlParseCharRef:
2474 * @ctxt: an XML parser context
2475 *
2476 * DEPRECATED: Internal function, don't use.
2477 *
2478 * Parse a numeric character reference. Always consumes '&'.
2479 *
2480 * [66] CharRef ::= '&#' [0-9]+ ';' |
2481 * '&#x' [0-9a-fA-F]+ ';'
2482 *
2483 * [ WFC: Legal Character ]
2484 * Characters referred to using character references must match the
2485 * production for Char.
2486 *
2487 * Returns the value parsed (as an int), 0 in case of error
2488 */
2489int
2490xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2491 int val = 0;
2492 int count = 0;
2493
2494 /*
2495 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2496 */
2497 if ((RAW == '&') && (NXT(1) == '#') &&
2498 (NXT(2) == 'x')) {
2499 SKIP(3);
2500 GROW;
2501 while (RAW != ';') { /* loop blocked by count */
2502 if (count++ > 20) {
2503 count = 0;
2504 GROW;
2505 if (ctxt->instate == XML_PARSER_EOF)
2506 return(0);
2507 }
2508 if ((RAW >= '0') && (RAW <= '9'))
2509 val = val * 16 + (CUR - '0');
2510 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2511 val = val * 16 + (CUR - 'a') + 10;
2512 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2513 val = val * 16 + (CUR - 'A') + 10;
2514 else {
2515 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2516 val = 0;
2517 break;
2518 }
2519 if (val > 0x110000)
2520 val = 0x110000;
2521
2522 NEXT;
2523 count++;
2524 }
2525 if (RAW == ';') {
2526 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2527 ctxt->input->col++;
2528 ctxt->input->cur++;
2529 }
2530 } else if ((RAW == '&') && (NXT(1) == '#')) {
2531 SKIP(2);
2532 GROW;
2533 while (RAW != ';') { /* loop blocked by count */
2534 if (count++ > 20) {
2535 count = 0;
2536 GROW;
2537 if (ctxt->instate == XML_PARSER_EOF)
2538 return(0);
2539 }
2540 if ((RAW >= '0') && (RAW <= '9'))
2541 val = val * 10 + (CUR - '0');
2542 else {
2543 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2544 val = 0;
2545 break;
2546 }
2547 if (val > 0x110000)
2548 val = 0x110000;
2549
2550 NEXT;
2551 count++;
2552 }
2553 if (RAW == ';') {
2554 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2555 ctxt->input->col++;
2556 ctxt->input->cur++;
2557 }
2558 } else {
2559 if (RAW == '&')
2560 SKIP(1);
2561 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2562 }
2563
2564 /*
2565 * [ WFC: Legal Character ]
2566 * Characters referred to using character references must match the
2567 * production for Char.
2568 */
2569 if (val >= 0x110000) {
2570 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2571 "xmlParseCharRef: character reference out of bounds\n",
2572 val);
2573 } else if (IS_CHAR(val)) {
2574 return(val);
2575 } else {
2576 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2577 "xmlParseCharRef: invalid xmlChar value %d\n",
2578 val);
2579 }
2580 return(0);
2581}
2582
2583/**
2584 * xmlParseStringCharRef:
2585 * @ctxt: an XML parser context
2586 * @str: a pointer to an index in the string
2587 *
2588 * parse Reference declarations, variant parsing from a string rather
2589 * than an an input flow.
2590 *
2591 * [66] CharRef ::= '&#' [0-9]+ ';' |
2592 * '&#x' [0-9a-fA-F]+ ';'
2593 *
2594 * [ WFC: Legal Character ]
2595 * Characters referred to using character references must match the
2596 * production for Char.
2597 *
2598 * Returns the value parsed (as an int), 0 in case of error, str will be
2599 * updated to the current value of the index
2600 */
2601static int
2602xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2603 const xmlChar *ptr;
2604 xmlChar cur;
2605 int val = 0;
2606
2607 if ((str == NULL) || (*str == NULL)) return(0);
2608 ptr = *str;
2609 cur = *ptr;
2610 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2611 ptr += 3;
2612 cur = *ptr;
2613 while (cur != ';') { /* Non input consuming loop */
2614 if ((cur >= '0') && (cur <= '9'))
2615 val = val * 16 + (cur - '0');
2616 else if ((cur >= 'a') && (cur <= 'f'))
2617 val = val * 16 + (cur - 'a') + 10;
2618 else if ((cur >= 'A') && (cur <= 'F'))
2619 val = val * 16 + (cur - 'A') + 10;
2620 else {
2621 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2622 val = 0;
2623 break;
2624 }
2625 if (val > 0x110000)
2626 val = 0x110000;
2627
2628 ptr++;
2629 cur = *ptr;
2630 }
2631 if (cur == ';')
2632 ptr++;
2633 } else if ((cur == '&') && (ptr[1] == '#')){
2634 ptr += 2;
2635 cur = *ptr;
2636 while (cur != ';') { /* Non input consuming loops */
2637 if ((cur >= '0') && (cur <= '9'))
2638 val = val * 10 + (cur - '0');
2639 else {
2640 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2641 val = 0;
2642 break;
2643 }
2644 if (val > 0x110000)
2645 val = 0x110000;
2646
2647 ptr++;
2648 cur = *ptr;
2649 }
2650 if (cur == ';')
2651 ptr++;
2652 } else {
2653 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2654 return(0);
2655 }
2656 *str = ptr;
2657
2658 /*
2659 * [ WFC: Legal Character ]
2660 * Characters referred to using character references must match the
2661 * production for Char.
2662 */
2663 if (val >= 0x110000) {
2664 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2665 "xmlParseStringCharRef: character reference out of bounds\n",
2666 val);
2667 } else if (IS_CHAR(val)) {
2668 return(val);
2669 } else {
2670 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2671 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2672 val);
2673 }
2674 return(0);
2675}
2676
2677/**
2678 * xmlParserHandlePEReference:
2679 * @ctxt: the parser context
2680 *
2681 * DEPRECATED: Internal function, do not use.
2682 *
2683 * [69] PEReference ::= '%' Name ';'
2684 *
2685 * [ WFC: No Recursion ]
2686 * A parsed entity must not contain a recursive
2687 * reference to itself, either directly or indirectly.
2688 *
2689 * [ WFC: Entity Declared ]
2690 * In a document without any DTD, a document with only an internal DTD
2691 * subset which contains no parameter entity references, or a document
2692 * with "standalone='yes'", ... ... The declaration of a parameter
2693 * entity must precede any reference to it...
2694 *
2695 * [ VC: Entity Declared ]
2696 * In a document with an external subset or external parameter entities
2697 * with "standalone='no'", ... ... The declaration of a parameter entity
2698 * must precede any reference to it...
2699 *
2700 * [ WFC: In DTD ]
2701 * Parameter-entity references may only appear in the DTD.
2702 * NOTE: misleading but this is handled.
2703 *
2704 * A PEReference may have been detected in the current input stream
2705 * the handling is done accordingly to
2706 * http://www.w3.org/TR/REC-xml#entproc
2707 * i.e.
2708 * - Included in literal in entity values
2709 * - Included as Parameter Entity reference within DTDs
2710 */
2711void
2712xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2713 switch(ctxt->instate) {
2714 case XML_PARSER_CDATA_SECTION:
2715 return;
2716 case XML_PARSER_COMMENT:
2717 return;
2718 case XML_PARSER_START_TAG:
2719 return;
2720 case XML_PARSER_END_TAG:
2721 return;
2722 case XML_PARSER_EOF:
2723 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2724 return;
2725 case XML_PARSER_PROLOG:
2726 case XML_PARSER_START:
2727 case XML_PARSER_XML_DECL:
2728 case XML_PARSER_MISC:
2729 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2730 return;
2731 case XML_PARSER_ENTITY_DECL:
2732 case XML_PARSER_CONTENT:
2733 case XML_PARSER_ATTRIBUTE_VALUE:
2734 case XML_PARSER_PI:
2735 case XML_PARSER_SYSTEM_LITERAL:
2736 case XML_PARSER_PUBLIC_LITERAL:
2737 /* we just ignore it there */
2738 return;
2739 case XML_PARSER_EPILOG:
2740 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2741 return;
2742 case XML_PARSER_ENTITY_VALUE:
2743 /*
2744 * NOTE: in the case of entity values, we don't do the
2745 * substitution here since we need the literal
2746 * entity value to be able to save the internal
2747 * subset of the document.
2748 * This will be handled by xmlStringDecodeEntities
2749 */
2750 return;
2751 case XML_PARSER_DTD:
2752 /*
2753 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2754 * In the internal DTD subset, parameter-entity references
2755 * can occur only where markup declarations can occur, not
2756 * within markup declarations.
2757 * In that case this is handled in xmlParseMarkupDecl
2758 */
2759 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2760 return;
2761 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2762 return;
2763 break;
2764 case XML_PARSER_IGNORE:
2765 return;
2766 }
2767
2768 xmlParsePEReference(ctxt);
2769}
2770
2771/*
2772 * Macro used to grow the current buffer.
2773 * buffer##_size is expected to be a size_t
2774 * mem_error: is expected to handle memory allocation failures
2775 */
2776#define growBuffer(buffer, n) { \
2777 xmlChar *tmp; \
2778 size_t new_size = buffer##_size * 2 + n; \
2779 if (new_size < buffer##_size) goto mem_error; \
2780 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2781 if (tmp == NULL) goto mem_error; \
2782 buffer = tmp; \
2783 buffer##_size = new_size; \
2784}
2785
2786/**
2787 * xmlStringDecodeEntitiesInt:
2788 * @ctxt: the parser context
2789 * @str: the input string
2790 * @len: the string length
2791 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2792 * @end: an end marker xmlChar, 0 if none
2793 * @end2: an end marker xmlChar, 0 if none
2794 * @end3: an end marker xmlChar, 0 if none
2795 * @check: whether to perform entity checks
2796 */
2797static xmlChar *
2798xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2799 int what, xmlChar end, xmlChar end2, xmlChar end3,
2800 int check) {
2801 xmlChar *buffer = NULL;
2802 size_t buffer_size = 0;
2803 size_t nbchars = 0;
2804
2805 xmlChar *current = NULL;
2806 xmlChar *rep = NULL;
2807 const xmlChar *last;
2808 xmlEntityPtr ent;
2809 int c,l;
2810
2811 if (str == NULL)
2812 return(NULL);
2813 last = str + len;
2814
2815 if (((ctxt->depth > 40) &&
2816 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2817 (ctxt->depth > 100)) {
2818 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2819 "Maximum entity nesting depth exceeded");
2820 return(NULL);
2821 }
2822
2823 /*
2824 * allocate a translation buffer.
2825 */
2826 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2827 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2828 if (buffer == NULL) goto mem_error;
2829
2830 /*
2831 * OK loop until we reach one of the ending char or a size limit.
2832 * we are operating on already parsed values.
2833 */
2834 if (str < last)
2835 c = CUR_SCHAR(str, l);
2836 else
2837 c = 0;
2838 while ((c != 0) && (c != end) && /* non input consuming loop */
2839 (c != end2) && (c != end3) &&
2840 (ctxt->instate != XML_PARSER_EOF)) {
2841
2842 if (c == 0) break;
2843 if ((c == '&') && (str[1] == '#')) {
2844 int val = xmlParseStringCharRef(ctxt, &str);
2845 if (val == 0)
2846 goto int_error;
2847 COPY_BUF(buffer, nbchars, val);
2848 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2849 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2850 }
2851 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2852 if (xmlParserDebugEntities)
2853 xmlGenericError(xmlGenericErrorContext,
2854 "String decoding Entity Reference: %.30s\n",
2855 str);
2856 ent = xmlParseStringEntityRef(ctxt, &str);
2857 if ((ent != NULL) &&
2858 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2859 if (ent->content != NULL) {
2860 COPY_BUF(buffer, nbchars, ent->content[0]);
2861 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2862 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2863 }
2864 } else {
2865 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2866 "predefined entity has no content\n");
2867 goto int_error;
2868 }
2869 } else if ((ent != NULL) && (ent->content != NULL)) {
2870 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2871 goto int_error;
2872
2873 if (ent->flags & XML_ENT_EXPANDING) {
2874 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2875 xmlHaltParser(ctxt);
2876 ent->content[0] = 0;
2877 goto int_error;
2878 }
2879
2880 ent->flags |= XML_ENT_EXPANDING;
2881 ctxt->depth++;
2882 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2883 ent->length, what, 0, 0, 0, check);
2884 ctxt->depth--;
2885 ent->flags &= ~XML_ENT_EXPANDING;
2886
2887 if (rep == NULL) {
2888 ent->content[0] = 0;
2889 goto int_error;
2890 }
2891
2892 current = rep;
2893 while (*current != 0) { /* non input consuming loop */
2894 buffer[nbchars++] = *current++;
2895 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2897 }
2898 }
2899 xmlFree(rep);
2900 rep = NULL;
2901 } else if (ent != NULL) {
2902 int i = xmlStrlen(ent->name);
2903 const xmlChar *cur = ent->name;
2904
2905 buffer[nbchars++] = '&';
2906 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2908 }
2909 for (;i > 0;i--)
2910 buffer[nbchars++] = *cur++;
2911 buffer[nbchars++] = ';';
2912 }
2913 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2914 if (xmlParserDebugEntities)
2915 xmlGenericError(xmlGenericErrorContext,
2916 "String decoding PE Reference: %.30s\n", str);
2917 ent = xmlParseStringPEReference(ctxt, &str);
2918 if (ent != NULL) {
2919 if (ent->content == NULL) {
2920 /*
2921 * Note: external parsed entities will not be loaded,
2922 * it is not required for a non-validating parser to
2923 * complete external PEReferences coming from the
2924 * internal subset
2925 */
2926 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2927 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2928 (ctxt->validate != 0)) {
2929 xmlLoadEntityContent(ctxt, ent);
2930 } else {
2931 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2932 "not validating will not read content for PE entity %s\n",
2933 ent->name, NULL);
2934 }
2935 }
2936
2937 if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2938 goto int_error;
2939
2940 if (ent->flags & XML_ENT_EXPANDING) {
2941 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2942 xmlHaltParser(ctxt);
2943 if (ent->content != NULL)
2944 ent->content[0] = 0;
2945 goto int_error;
2946 }
2947
2948 ent->flags |= XML_ENT_EXPANDING;
2949 ctxt->depth++;
2950 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2951 ent->length, what, 0, 0, 0, check);
2952 ctxt->depth--;
2953 ent->flags &= ~XML_ENT_EXPANDING;
2954
2955 if (rep == NULL) {
2956 if (ent->content != NULL)
2957 ent->content[0] = 0;
2958 goto int_error;
2959 }
2960 current = rep;
2961 while (*current != 0) { /* non input consuming loop */
2962 buffer[nbchars++] = *current++;
2963 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2964 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2965 }
2966 }
2967 xmlFree(rep);
2968 rep = NULL;
2969 }
2970 } else {
2971 COPY_BUF(buffer, nbchars, c);
2972 str += l;
2973 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2974 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2975 }
2976 }
2977 if (str < last)
2978 c = CUR_SCHAR(str, l);
2979 else
2980 c = 0;
2981 }
2982 buffer[nbchars] = 0;
2983 return(buffer);
2984
2985mem_error:
2986 xmlErrMemory(ctxt, NULL);
2987int_error:
2988 if (rep != NULL)
2989 xmlFree(rep);
2990 if (buffer != NULL)
2991 xmlFree(buffer);
2992 return(NULL);
2993}
2994
2995/**
2996 * xmlStringLenDecodeEntities:
2997 * @ctxt: the parser context
2998 * @str: the input string
2999 * @len: the string length
3000 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3001 * @end: an end marker xmlChar, 0 if none
3002 * @end2: an end marker xmlChar, 0 if none
3003 * @end3: an end marker xmlChar, 0 if none
3004 *
3005 * DEPRECATED: Internal function, don't use.
3006 *
3007 * Takes a entity string content and process to do the adequate substitutions.
3008 *
3009 * [67] Reference ::= EntityRef | CharRef
3010 *
3011 * [69] PEReference ::= '%' Name ';'
3012 *
3013 * Returns A newly allocated string with the substitution done. The caller
3014 * must deallocate it !
3015 */
3016xmlChar *
3017xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3018 int what, xmlChar end, xmlChar end2,
3019 xmlChar end3) {
3020 if ((ctxt == NULL) || (str == NULL) || (len < 0))
3021 return(NULL);
3022 return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
3023 end, end2, end3, 0));
3024}
3025
3026/**
3027 * xmlStringDecodeEntities:
3028 * @ctxt: the parser context
3029 * @str: the input string
3030 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3031 * @end: an end marker xmlChar, 0 if none
3032 * @end2: an end marker xmlChar, 0 if none
3033 * @end3: an end marker xmlChar, 0 if none
3034 *
3035 * DEPRECATED: Internal function, don't use.
3036 *
3037 * Takes a entity string content and process to do the adequate substitutions.
3038 *
3039 * [67] Reference ::= EntityRef | CharRef
3040 *
3041 * [69] PEReference ::= '%' Name ';'
3042 *
3043 * Returns A newly allocated string with the substitution done. The caller
3044 * must deallocate it !
3045 */
3046xmlChar *
3047xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
3048 xmlChar end, xmlChar end2, xmlChar end3) {
3049 if ((ctxt == NULL) || (str == NULL)) return(NULL);
3050 return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
3051 end, end2, end3, 0));
3052}
3053
3054/************************************************************************
3055 * *
3056 * Commodity functions, cleanup needed ? *
3057 * *
3058 ************************************************************************/
3059
3060/**
3061 * areBlanks:
3062 * @ctxt: an XML parser context
3063 * @str: a xmlChar *
3064 * @len: the size of @str
3065 * @blank_chars: we know the chars are blanks
3066 *
3067 * Is this a sequence of blank chars that one can ignore ?
3068 *
3069 * Returns 1 if ignorable 0 otherwise.
3070 */
3071
3072static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3073 int blank_chars) {
3074 int i, ret;
3075 xmlNodePtr lastChild;
3076
3077 /*
3078 * Don't spend time trying to differentiate them, the same callback is
3079 * used !
3080 */
3081 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
3082 return(0);
3083
3084 /*
3085 * Check for xml:space value.
3086 */
3087 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
3088 (*(ctxt->space) == -2))
3089 return(0);
3090
3091 /*
3092 * Check that the string is made of blanks
3093 */
3094 if (blank_chars == 0) {
3095 for (i = 0;i < len;i++)
3096 if (!(IS_BLANK_CH(str[i]))) return(0);
3097 }
3098
3099 /*
3100 * Look if the element is mixed content in the DTD if available
3101 */
3102 if (ctxt->node == NULL) return(0);
3103 if (ctxt->myDoc != NULL) {
3104 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3105 if (ret == 0) return(1);
3106 if (ret == 1) return(0);
3107 }
3108
3109 /*
3110 * Otherwise, heuristic :-\
3111 */
3112 if ((RAW != '<') && (RAW != 0xD)) return(0);
3113 if ((ctxt->node->children == NULL) &&
3114 (RAW == '<') && (NXT(1) == '/')) return(0);
3115
3116 lastChild = xmlGetLastChild(ctxt->node);
3117 if (lastChild == NULL) {
3118 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3119 (ctxt->node->content != NULL)) return(0);
3120 } else if (xmlNodeIsText(lastChild))
3121 return(0);
3122 else if ((ctxt->node->children != NULL) &&
3123 (xmlNodeIsText(ctxt->node->children)))
3124 return(0);
3125 return(1);
3126}
3127
3128/************************************************************************
3129 * *
3130 * Extra stuff for namespace support *
3131 * Relates to http://www.w3.org/TR/WD-xml-names *
3132 * *
3133 ************************************************************************/
3134
3135/**
3136 * xmlSplitQName:
3137 * @ctxt: an XML parser context
3138 * @name: an XML parser context
3139 * @prefix: a xmlChar **
3140 *
3141 * parse an UTF8 encoded XML qualified name string
3142 *
3143 * [NS 5] QName ::= (Prefix ':')? LocalPart
3144 *
3145 * [NS 6] Prefix ::= NCName
3146 *
3147 * [NS 7] LocalPart ::= NCName
3148 *
3149 * Returns the local part, and prefix is updated
3150 * to get the Prefix if any.
3151 */
3152
3153xmlChar *
3154xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3155 xmlChar buf[XML_MAX_NAMELEN + 5];
3156 xmlChar *buffer = NULL;
3157 int len = 0;
3158 int max = XML_MAX_NAMELEN;
3159 xmlChar *ret = NULL;
3160 const xmlChar *cur = name;
3161 int c;
3162
3163 if (prefix == NULL) return(NULL);
3164 *prefix = NULL;
3165
3166 if (cur == NULL) return(NULL);
3167
3168#ifndef XML_XML_NAMESPACE
3169 /* xml: prefix is not really a namespace */
3170 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3171 (cur[2] == 'l') && (cur[3] == ':'))
3172 return(xmlStrdup(name));
3173#endif
3174
3175 /* nasty but well=formed */
3176 if (cur[0] == ':')
3177 return(xmlStrdup(name));
3178
3179 c = *cur++;
3180 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3181 buf[len++] = c;
3182 c = *cur++;
3183 }
3184 if (len >= max) {
3185 /*
3186 * Okay someone managed to make a huge name, so he's ready to pay
3187 * for the processing speed.
3188 */
3189 max = len * 2;
3190
3191 buffer = (xmlChar *) xmlMallocAtomic(max);
3192 if (buffer == NULL) {
3193 xmlErrMemory(ctxt, NULL);
3194 return(NULL);
3195 }
3196 memcpy(buffer, buf, len);
3197 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3198 if (len + 10 > max) {
3199 xmlChar *tmp;
3200
3201 max *= 2;
3202 tmp = (xmlChar *) xmlRealloc(buffer, max);
3203 if (tmp == NULL) {
3204 xmlFree(buffer);
3205 xmlErrMemory(ctxt, NULL);
3206 return(NULL);
3207 }
3208 buffer = tmp;
3209 }
3210 buffer[len++] = c;
3211 c = *cur++;
3212 }
3213 buffer[len] = 0;
3214 }
3215
3216 if ((c == ':') && (*cur == 0)) {
3217 if (buffer != NULL)
3218 xmlFree(buffer);
3219 *prefix = NULL;
3220 return(xmlStrdup(name));
3221 }
3222
3223 if (buffer == NULL)
3224 ret = xmlStrndup(buf, len);
3225 else {
3226 ret = buffer;
3227 buffer = NULL;
3228 max = XML_MAX_NAMELEN;
3229 }
3230
3231
3232 if (c == ':') {
3233 c = *cur;
3234 *prefix = ret;
3235 if (c == 0) {
3236 return(xmlStrndup(BAD_CAST "", 0));
3237 }
3238 len = 0;
3239
3240 /*
3241 * Check that the first character is proper to start
3242 * a new name
3243 */
3244 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3245 ((c >= 0x41) && (c <= 0x5A)) ||
3246 (c == '_') || (c == ':'))) {
3247 int l;
3248 int first = CUR_SCHAR(cur, l);
3249
3250 if (!IS_LETTER(first) && (first != '_')) {
3251 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3252 "Name %s is not XML Namespace compliant\n",
3253 name);
3254 }
3255 }
3256 cur++;
3257
3258 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3259 buf[len++] = c;
3260 c = *cur++;
3261 }
3262 if (len >= max) {
3263 /*
3264 * Okay someone managed to make a huge name, so he's ready to pay
3265 * for the processing speed.
3266 */
3267 max = len * 2;
3268
3269 buffer = (xmlChar *) xmlMallocAtomic(max);
3270 if (buffer == NULL) {
3271 xmlErrMemory(ctxt, NULL);
3272 return(NULL);
3273 }
3274 memcpy(buffer, buf, len);
3275 while (c != 0) { /* tested bigname2.xml */
3276 if (len + 10 > max) {
3277 xmlChar *tmp;
3278
3279 max *= 2;
3280 tmp = (xmlChar *) xmlRealloc(buffer, max);
3281 if (tmp == NULL) {
3282 xmlErrMemory(ctxt, NULL);
3283 xmlFree(buffer);
3284 return(NULL);
3285 }
3286 buffer = tmp;
3287 }
3288 buffer[len++] = c;
3289 c = *cur++;
3290 }
3291 buffer[len] = 0;
3292 }
3293
3294 if (buffer == NULL)
3295 ret = xmlStrndup(buf, len);
3296 else {
3297 ret = buffer;
3298 }
3299 }
3300
3301 return(ret);
3302}
3303
3304/************************************************************************
3305 * *
3306 * The parser itself *
3307 * Relates to http://www.w3.org/TR/REC-xml *
3308 * *
3309 ************************************************************************/
3310
3311/************************************************************************
3312 * *
3313 * Routines to parse Name, NCName and NmToken *
3314 * *
3315 ************************************************************************/
3316
3317/*
3318 * The two following functions are related to the change of accepted
3319 * characters for Name and NmToken in the Revision 5 of XML-1.0
3320 * They correspond to the modified production [4] and the new production [4a]
3321 * changes in that revision. Also note that the macros used for the
3322 * productions Letter, Digit, CombiningChar and Extender are not needed
3323 * anymore.
3324 * We still keep compatibility to pre-revision5 parsing semantic if the
3325 * new XML_PARSE_OLD10 option is given to the parser.
3326 */
3327static int
3328xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3329 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3330 /*
3331 * Use the new checks of production [4] [4a] amd [5] of the
3332 * Update 5 of XML-1.0
3333 */
3334 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3335 (((c >= 'a') && (c <= 'z')) ||
3336 ((c >= 'A') && (c <= 'Z')) ||
3337 (c == '_') || (c == ':') ||
3338 ((c >= 0xC0) && (c <= 0xD6)) ||
3339 ((c >= 0xD8) && (c <= 0xF6)) ||
3340 ((c >= 0xF8) && (c <= 0x2FF)) ||
3341 ((c >= 0x370) && (c <= 0x37D)) ||
3342 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3343 ((c >= 0x200C) && (c <= 0x200D)) ||
3344 ((c >= 0x2070) && (c <= 0x218F)) ||
3345 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3346 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3347 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3348 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3349 ((c >= 0x10000) && (c <= 0xEFFFF))))
3350 return(1);
3351 } else {
3352 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3353 return(1);
3354 }
3355 return(0);
3356}
3357
3358static int
3359xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3360 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3361 /*
3362 * Use the new checks of production [4] [4a] amd [5] of the
3363 * Update 5 of XML-1.0
3364 */
3365 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3366 (((c >= 'a') && (c <= 'z')) ||
3367 ((c >= 'A') && (c <= 'Z')) ||
3368 ((c >= '0') && (c <= '9')) || /* !start */
3369 (c == '_') || (c == ':') ||
3370 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3371 ((c >= 0xC0) && (c <= 0xD6)) ||
3372 ((c >= 0xD8) && (c <= 0xF6)) ||
3373 ((c >= 0xF8) && (c <= 0x2FF)) ||
3374 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3375 ((c >= 0x370) && (c <= 0x37D)) ||
3376 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3377 ((c >= 0x200C) && (c <= 0x200D)) ||
3378 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3379 ((c >= 0x2070) && (c <= 0x218F)) ||
3380 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3381 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3382 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3383 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3384 ((c >= 0x10000) && (c <= 0xEFFFF))))
3385 return(1);
3386 } else {
3387 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3388 (c == '.') || (c == '-') ||
3389 (c == '_') || (c == ':') ||
3390 (IS_COMBINING(c)) ||
3391 (IS_EXTENDER(c)))
3392 return(1);
3393 }
3394 return(0);
3395}
3396
3397static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3398 int *len, int *alloc, int normalize);
3399
3400static const xmlChar *
3401xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3402 int len = 0, l;
3403 int c;
3404 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3405 XML_MAX_TEXT_LENGTH :
3406 XML_MAX_NAME_LENGTH;
3407
3408 /*
3409 * Handler for more complex cases
3410 */
3411 c = CUR_CHAR(l);
3412 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3413 /*
3414 * Use the new checks of production [4] [4a] amd [5] of the
3415 * Update 5 of XML-1.0
3416 */
3417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3418 (!(((c >= 'a') && (c <= 'z')) ||
3419 ((c >= 'A') && (c <= 'Z')) ||
3420 (c == '_') || (c == ':') ||
3421 ((c >= 0xC0) && (c <= 0xD6)) ||
3422 ((c >= 0xD8) && (c <= 0xF6)) ||
3423 ((c >= 0xF8) && (c <= 0x2FF)) ||
3424 ((c >= 0x370) && (c <= 0x37D)) ||
3425 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3426 ((c >= 0x200C) && (c <= 0x200D)) ||
3427 ((c >= 0x2070) && (c <= 0x218F)) ||
3428 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3429 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3430 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3431 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3432 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3433 return(NULL);
3434 }
3435 len += l;
3436 NEXTL(l);
3437 c = CUR_CHAR(l);
3438 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3439 (((c >= 'a') && (c <= 'z')) ||
3440 ((c >= 'A') && (c <= 'Z')) ||
3441 ((c >= '0') && (c <= '9')) || /* !start */
3442 (c == '_') || (c == ':') ||
3443 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3444 ((c >= 0xC0) && (c <= 0xD6)) ||
3445 ((c >= 0xD8) && (c <= 0xF6)) ||
3446 ((c >= 0xF8) && (c <= 0x2FF)) ||
3447 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3448 ((c >= 0x370) && (c <= 0x37D)) ||
3449 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3450 ((c >= 0x200C) && (c <= 0x200D)) ||
3451 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3452 ((c >= 0x2070) && (c <= 0x218F)) ||
3453 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3454 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3455 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3456 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3457 ((c >= 0x10000) && (c <= 0xEFFFF))
3458 )) {
3459 if (len <= INT_MAX - l)
3460 len += l;
3461 NEXTL(l);
3462 c = CUR_CHAR(l);
3463 }
3464 } else {
3465 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3466 (!IS_LETTER(c) && (c != '_') &&
3467 (c != ':'))) {
3468 return(NULL);
3469 }
3470 len += l;
3471 NEXTL(l);
3472 c = CUR_CHAR(l);
3473
3474 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3475 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3476 (c == '.') || (c == '-') ||
3477 (c == '_') || (c == ':') ||
3478 (IS_COMBINING(c)) ||
3479 (IS_EXTENDER(c)))) {
3480 if (len <= INT_MAX - l)
3481 len += l;
3482 NEXTL(l);
3483 c = CUR_CHAR(l);
3484 }
3485 }
3486 if (ctxt->instate == XML_PARSER_EOF)
3487 return(NULL);
3488 if (len > maxLength) {
3489 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3490 return(NULL);
3491 }
3492 if (ctxt->input->cur - ctxt->input->base < len) {
3493 /*
3494 * There were a couple of bugs where PERefs lead to to a change
3495 * of the buffer. Check the buffer size to avoid passing an invalid
3496 * pointer to xmlDictLookup.
3497 */
3498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3499 "unexpected change of input buffer");
3500 return (NULL);
3501 }
3502 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3503 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3504 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3505}
3506
3507/**
3508 * xmlParseName:
3509 * @ctxt: an XML parser context
3510 *
3511 * DEPRECATED: Internal function, don't use.
3512 *
3513 * parse an XML name.
3514 *
3515 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3516 * CombiningChar | Extender
3517 *
3518 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3519 *
3520 * [6] Names ::= Name (#x20 Name)*
3521 *
3522 * Returns the Name parsed or NULL
3523 */
3524
3525const xmlChar *
3526xmlParseName(xmlParserCtxtPtr ctxt) {
3527 const xmlChar *in;
3528 const xmlChar *ret;
3529 size_t count = 0;
3530 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3531 XML_MAX_TEXT_LENGTH :
3532 XML_MAX_NAME_LENGTH;
3533
3534 GROW;
3535 if (ctxt->instate == XML_PARSER_EOF)
3536 return(NULL);
3537
3538 /*
3539 * Accelerator for simple ASCII names
3540 */
3541 in = ctxt->input->cur;
3542 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 (*in == '_') || (*in == ':')) {
3545 in++;
3546 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3547 ((*in >= 0x41) && (*in <= 0x5A)) ||
3548 ((*in >= 0x30) && (*in <= 0x39)) ||
3549 (*in == '_') || (*in == '-') ||
3550 (*in == ':') || (*in == '.'))
3551 in++;
3552 if ((*in > 0) && (*in < 0x80)) {
3553 count = in - ctxt->input->cur;
3554 if (count > maxLength) {
3555 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3556 return(NULL);
3557 }
3558 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3559 ctxt->input->cur = in;
3560 ctxt->input->col += count;
3561 if (ret == NULL)
3562 xmlErrMemory(ctxt, NULL);
3563 return(ret);
3564 }
3565 }
3566 /* accelerator for special cases */
3567 return(xmlParseNameComplex(ctxt));
3568}
3569
3570static xmlHashedString
3571xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3572 xmlHashedString ret;
3573 int len = 0, l;
3574 int c;
3575 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3576 XML_MAX_TEXT_LENGTH :
3577 XML_MAX_NAME_LENGTH;
3578 size_t startPosition = 0;
3579
3580 ret.name = NULL;
3581 ret.hashValue = 0;
3582
3583 /*
3584 * Handler for more complex cases
3585 */
3586 startPosition = CUR_PTR - BASE_PTR;
3587 c = CUR_CHAR(l);
3588 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3589 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3590 return(ret);
3591 }
3592
3593 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3594 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3595 if (len <= INT_MAX - l)
3596 len += l;
3597 NEXTL(l);
3598 c = CUR_CHAR(l);
3599 }
3600 if (ctxt->instate == XML_PARSER_EOF)
3601 return(ret);
3602 if (len > maxLength) {
3603 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3604 return(ret);
3605 }
3606 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3607 return(ret);
3608}
3609
3610/**
3611 * xmlParseNCName:
3612 * @ctxt: an XML parser context
3613 * @len: length of the string parsed
3614 *
3615 * parse an XML name.
3616 *
3617 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3618 * CombiningChar | Extender
3619 *
3620 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3621 *
3622 * Returns the Name parsed or NULL
3623 */
3624
3625static xmlHashedString
3626xmlParseNCName(xmlParserCtxtPtr ctxt) {
3627 const xmlChar *in, *e;
3628 xmlHashedString ret;
3629 size_t count = 0;
3630 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3631 XML_MAX_TEXT_LENGTH :
3632 XML_MAX_NAME_LENGTH;
3633
3634 ret.name = NULL;
3635
3636 /*
3637 * Accelerator for simple ASCII names
3638 */
3639 in = ctxt->input->cur;
3640 e = ctxt->input->end;
3641 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3642 ((*in >= 0x41) && (*in <= 0x5A)) ||
3643 (*in == '_')) && (in < e)) {
3644 in++;
3645 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3646 ((*in >= 0x41) && (*in <= 0x5A)) ||
3647 ((*in >= 0x30) && (*in <= 0x39)) ||
3648 (*in == '_') || (*in == '-') ||
3649 (*in == '.')) && (in < e))
3650 in++;
3651 if (in >= e)
3652 goto complex;
3653 if ((*in > 0) && (*in < 0x80)) {
3654 count = in - ctxt->input->cur;
3655 if (count > maxLength) {
3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657 return(ret);
3658 }
3659 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3660 ctxt->input->cur = in;
3661 ctxt->input->col += count;
3662 if (ret.name == NULL) {
3663 xmlErrMemory(ctxt, NULL);
3664 }
3665 return(ret);
3666 }
3667 }
3668complex:
3669 return(xmlParseNCNameComplex(ctxt));
3670}
3671
3672/**
3673 * xmlParseNameAndCompare:
3674 * @ctxt: an XML parser context
3675 *
3676 * parse an XML name and compares for match
3677 * (specialized for endtag parsing)
3678 *
3679 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3680 * and the name for mismatch
3681 */
3682
3683static const xmlChar *
3684xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3685 register const xmlChar *cmp = other;
3686 register const xmlChar *in;
3687 const xmlChar *ret;
3688
3689 GROW;
3690 if (ctxt->instate == XML_PARSER_EOF)
3691 return(NULL);
3692
3693 in = ctxt->input->cur;
3694 while (*in != 0 && *in == *cmp) {
3695 ++in;
3696 ++cmp;
3697 }
3698 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3699 /* success */
3700 ctxt->input->col += in - ctxt->input->cur;
3701 ctxt->input->cur = in;
3702 return (const xmlChar*) 1;
3703 }
3704 /* failure (or end of input buffer), check with full function */
3705 ret = xmlParseName (ctxt);
3706 /* strings coming from the dictionary direct compare possible */
3707 if (ret == other) {
3708 return (const xmlChar*) 1;
3709 }
3710 return ret;
3711}
3712
3713/**
3714 * xmlParseStringName:
3715 * @ctxt: an XML parser context
3716 * @str: a pointer to the string pointer (IN/OUT)
3717 *
3718 * parse an XML name.
3719 *
3720 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3721 * CombiningChar | Extender
3722 *
3723 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3724 *
3725 * [6] Names ::= Name (#x20 Name)*
3726 *
3727 * Returns the Name parsed or NULL. The @str pointer
3728 * is updated to the current location in the string.
3729 */
3730
3731static xmlChar *
3732xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3733 xmlChar buf[XML_MAX_NAMELEN + 5];
3734 const xmlChar *cur = *str;
3735 int len = 0, l;
3736 int c;
3737 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3738 XML_MAX_TEXT_LENGTH :
3739 XML_MAX_NAME_LENGTH;
3740
3741 c = CUR_SCHAR(cur, l);
3742 if (!xmlIsNameStartChar(ctxt, c)) {
3743 return(NULL);
3744 }
3745
3746 COPY_BUF(buf, len, c);
3747 cur += l;
3748 c = CUR_SCHAR(cur, l);
3749 while (xmlIsNameChar(ctxt, c)) {
3750 COPY_BUF(buf, len, c);
3751 cur += l;
3752 c = CUR_SCHAR(cur, l);
3753 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3754 /*
3755 * Okay someone managed to make a huge name, so he's ready to pay
3756 * for the processing speed.
3757 */
3758 xmlChar *buffer;
3759 int max = len * 2;
3760
3761 buffer = (xmlChar *) xmlMallocAtomic(max);
3762 if (buffer == NULL) {
3763 xmlErrMemory(ctxt, NULL);
3764 return(NULL);
3765 }
3766 memcpy(buffer, buf, len);
3767 while (xmlIsNameChar(ctxt, c)) {
3768 if (len + 10 > max) {
3769 xmlChar *tmp;
3770
3771 max *= 2;
3772 tmp = (xmlChar *) xmlRealloc(buffer, max);
3773 if (tmp == NULL) {
3774 xmlErrMemory(ctxt, NULL);
3775 xmlFree(buffer);
3776 return(NULL);
3777 }
3778 buffer = tmp;
3779 }
3780 COPY_BUF(buffer, len, c);
3781 cur += l;
3782 c = CUR_SCHAR(cur, l);
3783 if (len > maxLength) {
3784 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3785 xmlFree(buffer);
3786 return(NULL);
3787 }
3788 }
3789 buffer[len] = 0;
3790 *str = cur;
3791 return(buffer);
3792 }
3793 }
3794 if (len > maxLength) {
3795 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3796 return(NULL);
3797 }
3798 *str = cur;
3799 return(xmlStrndup(buf, len));
3800}
3801
3802/**
3803 * xmlParseNmtoken:
3804 * @ctxt: an XML parser context
3805 *
3806 * DEPRECATED: Internal function, don't use.
3807 *
3808 * parse an XML Nmtoken.
3809 *
3810 * [7] Nmtoken ::= (NameChar)+
3811 *
3812 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3813 *
3814 * Returns the Nmtoken parsed or NULL
3815 */
3816
3817xmlChar *
3818xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3819 xmlChar buf[XML_MAX_NAMELEN + 5];
3820 int len = 0, l;
3821 int c;
3822 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3823 XML_MAX_TEXT_LENGTH :
3824 XML_MAX_NAME_LENGTH;
3825
3826 c = CUR_CHAR(l);
3827
3828 while (xmlIsNameChar(ctxt, c)) {
3829 COPY_BUF(buf, len, c);
3830 NEXTL(l);
3831 c = CUR_CHAR(l);
3832 if (len >= XML_MAX_NAMELEN) {
3833 /*
3834 * Okay someone managed to make a huge token, so he's ready to pay
3835 * for the processing speed.
3836 */
3837 xmlChar *buffer;
3838 int max = len * 2;
3839
3840 buffer = (xmlChar *) xmlMallocAtomic(max);
3841 if (buffer == NULL) {
3842 xmlErrMemory(ctxt, NULL);
3843 return(NULL);
3844 }
3845 memcpy(buffer, buf, len);
3846 while (xmlIsNameChar(ctxt, c)) {
3847 if (len + 10 > max) {
3848 xmlChar *tmp;
3849
3850 max *= 2;
3851 tmp = (xmlChar *) xmlRealloc(buffer, max);
3852 if (tmp == NULL) {
3853 xmlErrMemory(ctxt, NULL);
3854 xmlFree(buffer);
3855 return(NULL);
3856 }
3857 buffer = tmp;
3858 }
3859 COPY_BUF(buffer, len, c);
3860 if (len > maxLength) {
3861 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3862 xmlFree(buffer);
3863 return(NULL);
3864 }
3865 NEXTL(l);
3866 c = CUR_CHAR(l);
3867 }
3868 buffer[len] = 0;
3869 if (ctxt->instate == XML_PARSER_EOF) {
3870 xmlFree(buffer);
3871 return(NULL);
3872 }
3873 return(buffer);
3874 }
3875 }
3876 if (ctxt->instate == XML_PARSER_EOF)
3877 return(NULL);
3878 if (len == 0)
3879 return(NULL);
3880 if (len > maxLength) {
3881 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3882 return(NULL);
3883 }
3884 return(xmlStrndup(buf, len));
3885}
3886
3887/**
3888 * xmlParseEntityValue:
3889 * @ctxt: an XML parser context
3890 * @orig: if non-NULL store a copy of the original entity value
3891 *
3892 * DEPRECATED: Internal function, don't use.
3893 *
3894 * parse a value for ENTITY declarations
3895 *
3896 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3897 * "'" ([^%&'] | PEReference | Reference)* "'"
3898 *
3899 * Returns the EntityValue parsed with reference substituted or NULL
3900 */
3901
3902xmlChar *
3903xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3904 xmlChar *buf = NULL;
3905 int len = 0;
3906 int size = XML_PARSER_BUFFER_SIZE;
3907 int c, l;
3908 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3909 XML_MAX_HUGE_LENGTH :
3910 XML_MAX_TEXT_LENGTH;
3911 xmlChar stop;
3912 xmlChar *ret = NULL;
3913 const xmlChar *cur = NULL;
3914 xmlParserInputPtr input;
3915
3916 if (RAW == '"') stop = '"';
3917 else if (RAW == '\'') stop = '\'';
3918 else {
3919 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3920 return(NULL);
3921 }
3922 buf = (xmlChar *) xmlMallocAtomic(size);
3923 if (buf == NULL) {
3924 xmlErrMemory(ctxt, NULL);
3925 return(NULL);
3926 }
3927
3928 /*
3929 * The content of the entity definition is copied in a buffer.
3930 */
3931
3932 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3933 input = ctxt->input;
3934 GROW;
3935 if (ctxt->instate == XML_PARSER_EOF)
3936 goto error;
3937 NEXT;
3938 c = CUR_CHAR(l);
3939 /*
3940 * NOTE: 4.4.5 Included in Literal
3941 * When a parameter entity reference appears in a literal entity
3942 * value, ... a single or double quote character in the replacement
3943 * text is always treated as a normal data character and will not
3944 * terminate the literal.
3945 * In practice it means we stop the loop only when back at parsing
3946 * the initial entity and the quote is found
3947 */
3948 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3949 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3950 if (len + 5 >= size) {
3951 xmlChar *tmp;
3952
3953 size *= 2;
3954 tmp = (xmlChar *) xmlRealloc(buf, size);
3955 if (tmp == NULL) {
3956 xmlErrMemory(ctxt, NULL);
3957 goto error;
3958 }
3959 buf = tmp;
3960 }
3961 COPY_BUF(buf, len, c);
3962 NEXTL(l);
3963
3964 GROW;
3965 c = CUR_CHAR(l);
3966 if (c == 0) {
3967 GROW;
3968 c = CUR_CHAR(l);
3969 }
3970
3971 if (len > maxLength) {
3972 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3973 "entity value too long\n");
3974 goto error;
3975 }
3976 }
3977 buf[len] = 0;
3978 if (ctxt->instate == XML_PARSER_EOF)
3979 goto error;
3980 if (c != stop) {
3981 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3982 goto error;
3983 }
3984 NEXT;
3985
3986 /*
3987 * Raise problem w.r.t. '&' and '%' being used in non-entities
3988 * reference constructs. Note Charref will be handled in
3989 * xmlStringDecodeEntities()
3990 */
3991 cur = buf;
3992 while (*cur != 0) { /* non input consuming */
3993 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3994 xmlChar *name;
3995 xmlChar tmp = *cur;
3996 int nameOk = 0;
3997
3998 cur++;
3999 name = xmlParseStringName(ctxt, &cur);
4000 if (name != NULL) {
4001 nameOk = 1;
4002 xmlFree(name);
4003 }
4004 if ((nameOk == 0) || (*cur != ';')) {
4005 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
4006 "EntityValue: '%c' forbidden except for entities references\n",
4007 tmp);
4008 goto error;
4009 }
4010 if ((tmp == '%') && (ctxt->inSubset == 1) &&
4011 (ctxt->inputNr == 1)) {
4012 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
4013 goto error;
4014 }
4015 if (*cur == 0)
4016 break;
4017 }
4018 cur++;
4019 }
4020
4021 /*
4022 * Then PEReference entities are substituted.
4023 *
4024 * NOTE: 4.4.7 Bypassed
4025 * When a general entity reference appears in the EntityValue in
4026 * an entity declaration, it is bypassed and left as is.
4027 * so XML_SUBSTITUTE_REF is not set here.
4028 */
4029 ++ctxt->depth;
4030 ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
4031 0, 0, 0, /* check */ 1);
4032 --ctxt->depth;
4033
4034 if (orig != NULL) {
4035 *orig = buf;
4036 buf = NULL;
4037 }
4038
4039error:
4040 if (buf != NULL)
4041 xmlFree(buf);
4042 return(ret);
4043}
4044
4045/**
4046 * xmlParseAttValueComplex:
4047 * @ctxt: an XML parser context
4048 * @len: the resulting attribute len
4049 * @normalize: whether to apply the inner normalization
4050 *
4051 * parse a value for an attribute, this is the fallback function
4052 * of xmlParseAttValue() when the attribute parsing requires handling
4053 * of non-ASCII characters, or normalization compaction.
4054 *
4055 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4056 */
4057static xmlChar *
4058xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4059 xmlChar limit = 0;
4060 xmlChar *buf = NULL;
4061 xmlChar *rep = NULL;
4062 size_t len = 0;
4063 size_t buf_size = 0;
4064 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4065 XML_MAX_HUGE_LENGTH :
4066 XML_MAX_TEXT_LENGTH;
4067 int c, l, in_space = 0;
4068 xmlChar *current = NULL;
4069 xmlEntityPtr ent;
4070
4071 if (NXT(0) == '"') {
4072 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4073 limit = '"';
4074 NEXT;
4075 } else if (NXT(0) == '\'') {
4076 limit = '\'';
4077 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4078 NEXT;
4079 } else {
4080 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4081 return(NULL);
4082 }
4083
4084 /*
4085 * allocate a translation buffer.
4086 */
4087 buf_size = XML_PARSER_BUFFER_SIZE;
4088 buf = (xmlChar *) xmlMallocAtomic(buf_size);
4089 if (buf == NULL) goto mem_error;
4090
4091 /*
4092 * OK loop until we reach one of the ending char or a size limit.
4093 */
4094 c = CUR_CHAR(l);
4095 while (((NXT(0) != limit) && /* checked */
4096 (IS_CHAR(c)) && (c != '<')) &&
4097 (ctxt->instate != XML_PARSER_EOF)) {
4098 if (c == '&') {
4099 in_space = 0;
4100 if (NXT(1) == '#') {
4101 int val = xmlParseCharRef(ctxt);
4102
4103 if (val == '&') {
4104 if (ctxt->replaceEntities) {
4105 if (len + 10 > buf_size) {
4106 growBuffer(buf, 10);
4107 }
4108 buf[len++] = '&';
4109 } else {
4110 /*
4111 * The reparsing will be done in xmlStringGetNodeList()
4112 * called by the attribute() function in SAX.c
4113 */
4114 if (len + 10 > buf_size) {
4115 growBuffer(buf, 10);
4116 }
4117 buf[len++] = '&';
4118 buf[len++] = '#';
4119 buf[len++] = '3';
4120 buf[len++] = '8';
4121 buf[len++] = ';';
4122 }
4123 } else if (val != 0) {
4124 if (len + 10 > buf_size) {
4125 growBuffer(buf, 10);
4126 }
4127 len += xmlCopyChar(0, &buf[len], val);
4128 }
4129 } else {
4130 ent = xmlParseEntityRef(ctxt);
4131 if ((ent != NULL) &&
4132 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4133 if (len + 10 > buf_size) {
4134 growBuffer(buf, 10);
4135 }
4136 if ((ctxt->replaceEntities == 0) &&
4137 (ent->content[0] == '&')) {
4138 buf[len++] = '&';
4139 buf[len++] = '#';
4140 buf[len++] = '3';
4141 buf[len++] = '8';
4142 buf[len++] = ';';
4143 } else {
4144 buf[len++] = ent->content[0];
4145 }
4146 } else if ((ent != NULL) &&
4147 (ctxt->replaceEntities != 0)) {
4148 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4149 if (xmlParserEntityCheck(ctxt, ent->length))
4150 goto error;
4151
4152 ++ctxt->depth;
4153 rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4154 ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4155 /* check */ 1);
4156 --ctxt->depth;
4157 if (rep != NULL) {
4158 current = rep;
4159 while (*current != 0) { /* non input consuming */
4160 if ((*current == 0xD) || (*current == 0xA) ||
4161 (*current == 0x9)) {
4162 buf[len++] = 0x20;
4163 current++;
4164 } else
4165 buf[len++] = *current++;
4166 if (len + 10 > buf_size) {
4167 growBuffer(buf, 10);
4168 }
4169 }
4170 xmlFree(rep);
4171 rep = NULL;
4172 }
4173 } else {
4174 if (len + 10 > buf_size) {
4175 growBuffer(buf, 10);
4176 }
4177 if (ent->content != NULL)
4178 buf[len++] = ent->content[0];
4179 }
4180 } else if (ent != NULL) {
4181 int i = xmlStrlen(ent->name);
4182 const xmlChar *cur = ent->name;
4183
4184 /*
4185 * We also check for recursion and amplification
4186 * when entities are not substituted. They're
4187 * often expanded later.
4188 */
4189 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4190 (ent->content != NULL)) {
4191 if ((ent->flags & XML_ENT_CHECKED) == 0) {
4192 unsigned long oldCopy = ctxt->sizeentcopy;
4193
4194 ctxt->sizeentcopy = ent->length;
4195
4196 ++ctxt->depth;
4197 rep = xmlStringDecodeEntitiesInt(ctxt,
4198 ent->content, ent->length,
4199 XML_SUBSTITUTE_REF, 0, 0, 0,
4200 /* check */ 1);
4201 --ctxt->depth;
4202
4203 /*
4204 * If we're parsing DTD content, the entity
4205 * might reference other entities which
4206 * weren't defined yet, so the check isn't
4207 * reliable.
4208 */
4209 if (ctxt->inSubset == 0) {
4210 ent->flags |= XML_ENT_CHECKED;
4211 ent->expandedSize = ctxt->sizeentcopy;
4212 }
4213
4214 if (rep != NULL) {
4215 xmlFree(rep);
4216 rep = NULL;
4217 } else {
4218 ent->content[0] = 0;
4219 }
4220
4221 if (xmlParserEntityCheck(ctxt, oldCopy))
4222 goto error;
4223 } else {
4224 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4225 goto error;
4226 }
4227 }
4228
4229 /*
4230 * Just output the reference
4231 */
4232 buf[len++] = '&';
4233 while (len + i + 10 > buf_size) {
4234 growBuffer(buf, i + 10);
4235 }
4236 for (;i > 0;i--)
4237 buf[len++] = *cur++;
4238 buf[len++] = ';';
4239 }
4240 }
4241 } else {
4242 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4243 if ((len != 0) || (!normalize)) {
4244 if ((!normalize) || (!in_space)) {
4245 COPY_BUF(buf, len, 0x20);
4246 while (len + 10 > buf_size) {
4247 growBuffer(buf, 10);
4248 }
4249 }
4250 in_space = 1;
4251 }
4252 } else {
4253 in_space = 0;
4254 COPY_BUF(buf, len, c);
4255 if (len + 10 > buf_size) {
4256 growBuffer(buf, 10);
4257 }
4258 }
4259 NEXTL(l);
4260 }
4261 GROW;
4262 c = CUR_CHAR(l);
4263 if (len > maxLength) {
4264 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4265 "AttValue length too long\n");
4266 goto mem_error;
4267 }
4268 }
4269 if (ctxt->instate == XML_PARSER_EOF)
4270 goto error;
4271
4272 if ((in_space) && (normalize)) {
4273 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4274 }
4275 buf[len] = 0;
4276 if (RAW == '<') {
4277 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4278 } else if (RAW != limit) {
4279 if ((c != 0) && (!IS_CHAR(c))) {
4280 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4281 "invalid character in attribute value\n");
4282 } else {
4283 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4284 "AttValue: ' expected\n");
4285 }
4286 } else
4287 NEXT;
4288
4289 if (attlen != NULL) *attlen = len;
4290 return(buf);
4291
4292mem_error:
4293 xmlErrMemory(ctxt, NULL);
4294error:
4295 if (buf != NULL)
4296 xmlFree(buf);
4297 if (rep != NULL)
4298 xmlFree(rep);
4299 return(NULL);
4300}
4301
4302/**
4303 * xmlParseAttValue:
4304 * @ctxt: an XML parser context
4305 *
4306 * DEPRECATED: Internal function, don't use.
4307 *
4308 * parse a value for an attribute
4309 * Note: the parser won't do substitution of entities here, this
4310 * will be handled later in xmlStringGetNodeList
4311 *
4312 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4313 * "'" ([^<&'] | Reference)* "'"
4314 *
4315 * 3.3.3 Attribute-Value Normalization:
4316 * Before the value of an attribute is passed to the application or
4317 * checked for validity, the XML processor must normalize it as follows:
4318 * - a character reference is processed by appending the referenced
4319 * character to the attribute value
4320 * - an entity reference is processed by recursively processing the
4321 * replacement text of the entity
4322 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4323 * appending #x20 to the normalized value, except that only a single
4324 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4325 * parsed entity or the literal entity value of an internal parsed entity
4326 * - other characters are processed by appending them to the normalized value
4327 * If the declared value is not CDATA, then the XML processor must further
4328 * process the normalized attribute value by discarding any leading and
4329 * trailing space (#x20) characters, and by replacing sequences of space
4330 * (#x20) characters by a single space (#x20) character.
4331 * All attributes for which no declaration has been read should be treated
4332 * by a non-validating parser as if declared CDATA.
4333 *
4334 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4335 */
4336
4337
4338xmlChar *
4339xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4340 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4341 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4342}
4343
4344/**
4345 * xmlParseSystemLiteral:
4346 * @ctxt: an XML parser context
4347 *
4348 * DEPRECATED: Internal function, don't use.
4349 *
4350 * parse an XML Literal
4351 *
4352 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4353 *
4354 * Returns the SystemLiteral parsed or NULL
4355 */
4356
4357xmlChar *
4358xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4359 xmlChar *buf = NULL;
4360 int len = 0;
4361 int size = XML_PARSER_BUFFER_SIZE;
4362 int cur, l;
4363 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4364 XML_MAX_TEXT_LENGTH :
4365 XML_MAX_NAME_LENGTH;
4366 xmlChar stop;
4367 int state = ctxt->instate;
4368
4369 if (RAW == '"') {
4370 NEXT;
4371 stop = '"';
4372 } else if (RAW == '\'') {
4373 NEXT;
4374 stop = '\'';
4375 } else {
4376 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4377 return(NULL);
4378 }
4379
4380 buf = (xmlChar *) xmlMallocAtomic(size);
4381 if (buf == NULL) {
4382 xmlErrMemory(ctxt, NULL);
4383 return(NULL);
4384 }
4385 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4386 cur = CUR_CHAR(l);
4387 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4388 if (len + 5 >= size) {
4389 xmlChar *tmp;
4390
4391 size *= 2;
4392 tmp = (xmlChar *) xmlRealloc(buf, size);
4393 if (tmp == NULL) {
4394 xmlFree(buf);
4395 xmlErrMemory(ctxt, NULL);
4396 ctxt->instate = (xmlParserInputState) state;
4397 return(NULL);
4398 }
4399 buf = tmp;
4400 }
4401 COPY_BUF(buf, len, cur);
4402 if (len > maxLength) {
4403 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4404 xmlFree(buf);
4405 ctxt->instate = (xmlParserInputState) state;
4406 return(NULL);
4407 }
4408 NEXTL(l);
4409 cur = CUR_CHAR(l);
4410 }
4411 buf[len] = 0;
4412 if (ctxt->instate == XML_PARSER_EOF) {
4413 xmlFree(buf);
4414 return(NULL);
4415 }
4416 ctxt->instate = (xmlParserInputState) state;
4417 if (!IS_CHAR(cur)) {
4418 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4419 } else {
4420 NEXT;
4421 }
4422 return(buf);
4423}
4424
4425/**
4426 * xmlParsePubidLiteral:
4427 * @ctxt: an XML parser context
4428 *
4429 * DEPRECATED: Internal function, don't use.
4430 *
4431 * parse an XML public literal
4432 *
4433 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4434 *
4435 * Returns the PubidLiteral parsed or NULL.
4436 */
4437
4438xmlChar *
4439xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4440 xmlChar *buf = NULL;
4441 int len = 0;
4442 int size = XML_PARSER_BUFFER_SIZE;
4443 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4444 XML_MAX_TEXT_LENGTH :
4445 XML_MAX_NAME_LENGTH;
4446 xmlChar cur;
4447 xmlChar stop;
4448 xmlParserInputState oldstate = ctxt->instate;
4449
4450 if (RAW == '"') {
4451 NEXT;
4452 stop = '"';
4453 } else if (RAW == '\'') {
4454 NEXT;
4455 stop = '\'';
4456 } else {
4457 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4458 return(NULL);
4459 }
4460 buf = (xmlChar *) xmlMallocAtomic(size);
4461 if (buf == NULL) {
4462 xmlErrMemory(ctxt, NULL);
4463 return(NULL);
4464 }
4465 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4466 cur = CUR;
4467 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4468 if (len + 1 >= size) {
4469 xmlChar *tmp;
4470
4471 size *= 2;
4472 tmp = (xmlChar *) xmlRealloc(buf, size);
4473 if (tmp == NULL) {
4474 xmlErrMemory(ctxt, NULL);
4475 xmlFree(buf);
4476 return(NULL);
4477 }
4478 buf = tmp;
4479 }
4480 buf[len++] = cur;
4481 if (len > maxLength) {
4482 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4483 xmlFree(buf);
4484 return(NULL);
4485 }
4486 NEXT;
4487 cur = CUR;
4488 }
4489 buf[len] = 0;
4490 if (ctxt->instate == XML_PARSER_EOF) {
4491 xmlFree(buf);
4492 return(NULL);
4493 }
4494 if (cur != stop) {
4495 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4496 } else {
4497 NEXTL(1);
4498 }
4499 ctxt->instate = oldstate;
4500 return(buf);
4501}
4502
4503static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4504
4505/*
4506 * used for the test in the inner loop of the char data testing
4507 */
4508static const unsigned char test_char_data[256] = {
4509 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4510 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4511 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4512 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4513 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4514 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4515 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4516 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4517 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4518 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4519 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4520 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4521 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4522 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4523 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4524 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4525 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4526 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4527 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4528 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4529 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4531 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4532 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4533 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4534 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4535 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4536 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4537 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4538 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4539 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4540 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4541};
4542
4543/**
4544 * xmlParseCharDataInternal:
4545 * @ctxt: an XML parser context
4546 * @partial: buffer may contain partial UTF-8 sequences
4547 *
4548 * Parse character data. Always makes progress if the first char isn't
4549 * '<' or '&'.
4550 *
4551 * The right angle bracket (>) may be represented using the string "&gt;",
4552 * and must, for compatibility, be escaped using "&gt;" or a character
4553 * reference when it appears in the string "]]>" in content, when that
4554 * string is not marking the end of a CDATA section.
4555 *
4556 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4557 */
4558static void
4559xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4560 const xmlChar *in;
4561 int nbchar = 0;
4562 int line = ctxt->input->line;
4563 int col = ctxt->input->col;
4564 int ccol;
4565
4566 GROW;
4567 /*
4568 * Accelerated common case where input don't need to be
4569 * modified before passing it to the handler.
4570 */
4571 in = ctxt->input->cur;
4572 do {
4573get_more_space:
4574 while (*in == 0x20) { in++; ctxt->input->col++; }
4575 if (*in == 0xA) {
4576 do {
4577 ctxt->input->line++; ctxt->input->col = 1;
4578 in++;
4579 } while (*in == 0xA);
4580 goto get_more_space;
4581 }
4582 if (*in == '<') {
4583 nbchar = in - ctxt->input->cur;
4584 if (nbchar > 0) {
4585 const xmlChar *tmp = ctxt->input->cur;
4586 ctxt->input->cur = in;
4587
4588 if ((ctxt->sax != NULL) &&
4589 (ctxt->disableSAX == 0) &&
4590 (ctxt->sax->ignorableWhitespace !=
4591 ctxt->sax->characters)) {
4592 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4593 if (ctxt->sax->ignorableWhitespace != NULL)
4594 ctxt->sax->ignorableWhitespace(ctxt->userData,
4595 tmp, nbchar);
4596 } else {
4597 if (ctxt->sax->characters != NULL)
4598 ctxt->sax->characters(ctxt->userData,
4599 tmp, nbchar);
4600 if (*ctxt->space == -1)
4601 *ctxt->space = -2;
4602 }
4603 } else if ((ctxt->sax != NULL) &&
4604 (ctxt->disableSAX == 0) &&
4605 (ctxt->sax->characters != NULL)) {
4606 ctxt->sax->characters(ctxt->userData,
4607 tmp, nbchar);
4608 }
4609 }
4610 return;
4611 }
4612
4613get_more:
4614 ccol = ctxt->input->col;
4615 while (test_char_data[*in]) {
4616 in++;
4617 ccol++;
4618 }
4619 ctxt->input->col = ccol;
4620 if (*in == 0xA) {
4621 do {
4622 ctxt->input->line++; ctxt->input->col = 1;
4623 in++;
4624 } while (*in == 0xA);
4625 goto get_more;
4626 }
4627 if (*in == ']') {
4628 if ((in[1] == ']') && (in[2] == '>')) {
4629 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4630 if (ctxt->instate != XML_PARSER_EOF)
4631 ctxt->input->cur = in + 1;
4632 return;
4633 }
4634 in++;
4635 ctxt->input->col++;
4636 goto get_more;
4637 }
4638 nbchar = in - ctxt->input->cur;
4639 if (nbchar > 0) {
4640 if ((ctxt->sax != NULL) &&
4641 (ctxt->disableSAX == 0) &&
4642 (ctxt->sax->ignorableWhitespace !=
4643 ctxt->sax->characters) &&
4644 (IS_BLANK_CH(*ctxt->input->cur))) {
4645 const xmlChar *tmp = ctxt->input->cur;
4646 ctxt->input->cur = in;
4647
4648 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4649 if (ctxt->sax->ignorableWhitespace != NULL)
4650 ctxt->sax->ignorableWhitespace(ctxt->userData,
4651 tmp, nbchar);
4652 } else {
4653 if (ctxt->sax->characters != NULL)
4654 ctxt->sax->characters(ctxt->userData,
4655 tmp, nbchar);
4656 if (*ctxt->space == -1)
4657 *ctxt->space = -2;
4658 }
4659 line = ctxt->input->line;
4660 col = ctxt->input->col;
4661 } else if ((ctxt->sax != NULL) &&
4662 (ctxt->disableSAX == 0)) {
4663 if (ctxt->sax->characters != NULL)
4664 ctxt->sax->characters(ctxt->userData,
4665 ctxt->input->cur, nbchar);
4666 line = ctxt->input->line;
4667 col = ctxt->input->col;
4668 }
4669 if (ctxt->instate == XML_PARSER_EOF)
4670 return;
4671 }
4672 ctxt->input->cur = in;
4673 if (*in == 0xD) {
4674 in++;
4675 if (*in == 0xA) {
4676 ctxt->input->cur = in;
4677 in++;
4678 ctxt->input->line++; ctxt->input->col = 1;
4679 continue; /* while */
4680 }
4681 in--;
4682 }
4683 if (*in == '<') {
4684 return;
4685 }
4686 if (*in == '&') {
4687 return;
4688 }
4689 SHRINK;
4690 GROW;
4691 if (ctxt->instate == XML_PARSER_EOF)
4692 return;
4693 in = ctxt->input->cur;
4694 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4695 (*in == 0x09) || (*in == 0x0a));
4696 ctxt->input->line = line;
4697 ctxt->input->col = col;
4698 xmlParseCharDataComplex(ctxt, partial);
4699}
4700
4701/**
4702 * xmlParseCharDataComplex:
4703 * @ctxt: an XML parser context
4704 * @cdata: int indicating whether we are within a CDATA section
4705 *
4706 * Always makes progress if the first char isn't '<' or '&'.
4707 *
4708 * parse a CharData section.this is the fallback function
4709 * of xmlParseCharData() when the parsing requires handling
4710 * of non-ASCII characters.
4711 */
4712static void
4713xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4714 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4715 int nbchar = 0;
4716 int cur, l;
4717
4718 cur = CUR_CHAR(l);
4719 while ((cur != '<') && /* checked */
4720 (cur != '&') &&
4721 (IS_CHAR(cur))) {
4722 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4723 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4724 }
4725 COPY_BUF(buf, nbchar, cur);
4726 /* move current position before possible calling of ctxt->sax->characters */
4727 NEXTL(l);
4728 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4729 buf[nbchar] = 0;
4730
4731 /*
4732 * OK the segment is to be consumed as chars.
4733 */
4734 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4735 if (areBlanks(ctxt, buf, nbchar, 0)) {
4736 if (ctxt->sax->ignorableWhitespace != NULL)
4737 ctxt->sax->ignorableWhitespace(ctxt->userData,
4738 buf, nbchar);
4739 } else {
4740 if (ctxt->sax->characters != NULL)
4741 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4742 if ((ctxt->sax->characters !=
4743 ctxt->sax->ignorableWhitespace) &&
4744 (*ctxt->space == -1))
4745 *ctxt->space = -2;
4746 }
4747 }
4748 nbchar = 0;
4749 /* something really bad happened in the SAX callback */
4750 if (ctxt->instate != XML_PARSER_CONTENT)
4751 return;
4752 SHRINK;
4753 }
4754 cur = CUR_CHAR(l);
4755 }
4756 if (ctxt->instate == XML_PARSER_EOF)
4757 return;
4758 if (nbchar != 0) {
4759 buf[nbchar] = 0;
4760 /*
4761 * OK the segment is to be consumed as chars.
4762 */
4763 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4764 if (areBlanks(ctxt, buf, nbchar, 0)) {
4765 if (ctxt->sax->ignorableWhitespace != NULL)
4766 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4767 } else {
4768 if (ctxt->sax->characters != NULL)
4769 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4770 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4771 (*ctxt->space == -1))
4772 *ctxt->space = -2;
4773 }
4774 }
4775 }
4776 /*
4777 * cur == 0 can mean
4778 *
4779 * - XML_PARSER_EOF or memory error. This is checked above.
4780 * - An actual 0 character.
4781 * - End of buffer.
4782 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4783 */
4784 if (ctxt->input->cur < ctxt->input->end) {
4785 if ((cur == 0) && (CUR != 0)) {
4786 if (partial == 0) {
4787 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4789 NEXTL(1);
4790 }
4791 } else if ((cur != '<') && (cur != '&')) {
4792 /* Generate the error and skip the offending character */
4793 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4794 "PCDATA invalid Char value %d\n", cur);
4795 NEXTL(l);
4796 }
4797 }
4798}
4799
4800/**
4801 * xmlParseCharData:
4802 * @ctxt: an XML parser context
4803 * @cdata: unused
4804 *
4805 * DEPRECATED: Internal function, don't use.
4806 */
4807void
4808xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4809 xmlParseCharDataInternal(ctxt, 0);
4810}
4811
4812/**
4813 * xmlParseExternalID:
4814 * @ctxt: an XML parser context
4815 * @publicID: a xmlChar** receiving PubidLiteral
4816 * @strict: indicate whether we should restrict parsing to only
4817 * production [75], see NOTE below
4818 *
4819 * DEPRECATED: Internal function, don't use.
4820 *
4821 * Parse an External ID or a Public ID
4822 *
4823 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4824 * 'PUBLIC' S PubidLiteral S SystemLiteral
4825 *
4826 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4827 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4828 *
4829 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4830 *
4831 * Returns the function returns SystemLiteral and in the second
4832 * case publicID receives PubidLiteral, is strict is off
4833 * it is possible to return NULL and have publicID set.
4834 */
4835
4836xmlChar *
4837xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4838 xmlChar *URI = NULL;
4839
4840 *publicID = NULL;
4841 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4842 SKIP(6);
4843 if (SKIP_BLANKS == 0) {
4844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4845 "Space required after 'SYSTEM'\n");
4846 }
4847 URI = xmlParseSystemLiteral(ctxt);
4848 if (URI == NULL) {
4849 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4850 }
4851 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4852 SKIP(6);
4853 if (SKIP_BLANKS == 0) {
4854 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4855 "Space required after 'PUBLIC'\n");
4856 }
4857 *publicID = xmlParsePubidLiteral(ctxt);
4858 if (*publicID == NULL) {
4859 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4860 }
4861 if (strict) {
4862 /*
4863 * We don't handle [83] so "S SystemLiteral" is required.
4864 */
4865 if (SKIP_BLANKS == 0) {
4866 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4867 "Space required after the Public Identifier\n");
4868 }
4869 } else {
4870 /*
4871 * We handle [83] so we return immediately, if
4872 * "S SystemLiteral" is not detected. We skip blanks if no
4873 * system literal was found, but this is harmless since we must
4874 * be at the end of a NotationDecl.
4875 */
4876 if (SKIP_BLANKS == 0) return(NULL);
4877 if ((CUR != '\'') && (CUR != '"')) return(NULL);
4878 }
4879 URI = xmlParseSystemLiteral(ctxt);
4880 if (URI == NULL) {
4881 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4882 }
4883 }
4884 return(URI);
4885}
4886
4887/**
4888 * xmlParseCommentComplex:
4889 * @ctxt: an XML parser context
4890 * @buf: the already parsed part of the buffer
4891 * @len: number of bytes in the buffer
4892 * @size: allocated size of the buffer
4893 *
4894 * Skip an XML (SGML) comment <!-- .... -->
4895 * The spec says that "For compatibility, the string "--" (double-hyphen)
4896 * must not occur within comments. "
4897 * This is the slow routine in case the accelerator for ascii didn't work
4898 *
4899 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4900 */
4901static void
4902xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4903 size_t len, size_t size) {
4904 int q, ql;
4905 int r, rl;
4906 int cur, l;
4907 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4908 XML_MAX_HUGE_LENGTH :
4909 XML_MAX_TEXT_LENGTH;
4910 int inputid;
4911
4912 inputid = ctxt->input->id;
4913
4914 if (buf == NULL) {
4915 len = 0;
4916 size = XML_PARSER_BUFFER_SIZE;
4917 buf = (xmlChar *) xmlMallocAtomic(size);
4918 if (buf == NULL) {
4919 xmlErrMemory(ctxt, NULL);
4920 return;
4921 }
4922 }
4923 q = CUR_CHAR(ql);
4924 if (q == 0)
4925 goto not_terminated;
4926 if (!IS_CHAR(q)) {
4927 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4928 "xmlParseComment: invalid xmlChar value %d\n",
4929 q);
4930 xmlFree (buf);
4931 return;
4932 }
4933 NEXTL(ql);
4934 r = CUR_CHAR(rl);
4935 if (r == 0)
4936 goto not_terminated;
4937 if (!IS_CHAR(r)) {
4938 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4939 "xmlParseComment: invalid xmlChar value %d\n",
4940 r);
4941 xmlFree (buf);
4942 return;
4943 }
4944 NEXTL(rl);
4945 cur = CUR_CHAR(l);
4946 if (cur == 0)
4947 goto not_terminated;
4948 while (IS_CHAR(cur) && /* checked */
4949 ((cur != '>') ||
4950 (r != '-') || (q != '-'))) {
4951 if ((r == '-') && (q == '-')) {
4952 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4953 }
4954 if (len + 5 >= size) {
4955 xmlChar *new_buf;
4956 size_t new_size;
4957
4958 new_size = size * 2;
4959 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4960 if (new_buf == NULL) {
4961 xmlFree (buf);
4962 xmlErrMemory(ctxt, NULL);
4963 return;
4964 }
4965 buf = new_buf;
4966 size = new_size;
4967 }
4968 COPY_BUF(buf, len, q);
4969 if (len > maxLength) {
4970 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4971 "Comment too big found", NULL);
4972 xmlFree (buf);
4973 return;
4974 }
4975
4976 q = r;
4977 ql = rl;
4978 r = cur;
4979 rl = l;
4980
4981 NEXTL(l);
4982 cur = CUR_CHAR(l);
4983
4984 }
4985 buf[len] = 0;
4986 if (ctxt->instate == XML_PARSER_EOF) {
4987 xmlFree(buf);
4988 return;
4989 }
4990 if (cur == 0) {
4991 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4992 "Comment not terminated \n<!--%.50s\n", buf);
4993 } else if (!IS_CHAR(cur)) {
4994 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4995 "xmlParseComment: invalid xmlChar value %d\n",
4996 cur);
4997 } else {
4998 if (inputid != ctxt->input->id) {
4999 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5000 "Comment doesn't start and stop in the same"
5001 " entity\n");
5002 }
5003 NEXT;
5004 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5005 (!ctxt->disableSAX))
5006 ctxt->sax->comment(ctxt->userData, buf);
5007 }
5008 xmlFree(buf);
5009 return;
5010not_terminated:
5011 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5012 "Comment not terminated\n", NULL);
5013 xmlFree(buf);
5014 return;
5015}
5016
5017/**
5018 * xmlParseComment:
5019 * @ctxt: an XML parser context
5020 *
5021 * DEPRECATED: Internal function, don't use.
5022 *
5023 * Parse an XML (SGML) comment. Always consumes '<!'.
5024 *
5025 * The spec says that "For compatibility, the string "--" (double-hyphen)
5026 * must not occur within comments. "
5027 *
5028 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5029 */
5030void
5031xmlParseComment(xmlParserCtxtPtr ctxt) {
5032 xmlChar *buf = NULL;
5033 size_t size = XML_PARSER_BUFFER_SIZE;
5034 size_t len = 0;
5035 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5036 XML_MAX_HUGE_LENGTH :
5037 XML_MAX_TEXT_LENGTH;
5038 xmlParserInputState state;
5039 const xmlChar *in;
5040 size_t nbchar = 0;
5041 int ccol;
5042 int inputid;
5043
5044 /*
5045 * Check that there is a comment right here.
5046 */
5047 if ((RAW != '<') || (NXT(1) != '!'))
5048 return;
5049 SKIP(2);
5050 if ((RAW != '-') || (NXT(1) != '-'))
5051 return;
5052 state = ctxt->instate;
5053 ctxt->instate = XML_PARSER_COMMENT;
5054 inputid = ctxt->input->id;
5055 SKIP(2);
5056 GROW;
5057
5058 /*
5059 * Accelerated common case where input don't need to be
5060 * modified before passing it to the handler.
5061 */
5062 in = ctxt->input->cur;
5063 do {
5064 if (*in == 0xA) {
5065 do {
5066 ctxt->input->line++; ctxt->input->col = 1;
5067 in++;
5068 } while (*in == 0xA);
5069 }
5070get_more:
5071 ccol = ctxt->input->col;
5072 while (((*in > '-') && (*in <= 0x7F)) ||
5073 ((*in >= 0x20) && (*in < '-')) ||
5074 (*in == 0x09)) {
5075 in++;
5076 ccol++;
5077 }
5078 ctxt->input->col = ccol;
5079 if (*in == 0xA) {
5080 do {
5081 ctxt->input->line++; ctxt->input->col = 1;
5082 in++;
5083 } while (*in == 0xA);
5084 goto get_more;
5085 }
5086 nbchar = in - ctxt->input->cur;
5087 /*
5088 * save current set of data
5089 */
5090 if (nbchar > 0) {
5091 if (buf == NULL) {
5092 if ((*in == '-') && (in[1] == '-'))
5093 size = nbchar + 1;
5094 else
5095 size = XML_PARSER_BUFFER_SIZE + nbchar;
5096 buf = (xmlChar *) xmlMallocAtomic(size);
5097 if (buf == NULL) {
5098 xmlErrMemory(ctxt, NULL);
5099 ctxt->instate = state;
5100 return;
5101 }
5102 len = 0;
5103 } else if (len + nbchar + 1 >= size) {
5104 xmlChar *new_buf;
5105 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5106 new_buf = (xmlChar *) xmlRealloc(buf, size);
5107 if (new_buf == NULL) {
5108 xmlFree (buf);
5109 xmlErrMemory(ctxt, NULL);
5110 ctxt->instate = state;
5111 return;
5112 }
5113 buf = new_buf;
5114 }
5115 memcpy(&buf[len], ctxt->input->cur, nbchar);
5116 len += nbchar;
5117 buf[len] = 0;
5118 }
5119 if (len > maxLength) {
5120 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5121 "Comment too big found", NULL);
5122 xmlFree (buf);
5123 return;
5124 }
5125 ctxt->input->cur = in;
5126 if (*in == 0xA) {
5127 in++;
5128 ctxt->input->line++; ctxt->input->col = 1;
5129 }
5130 if (*in == 0xD) {
5131 in++;
5132 if (*in == 0xA) {
5133 ctxt->input->cur = in;
5134 in++;
5135 ctxt->input->line++; ctxt->input->col = 1;
5136 goto get_more;
5137 }
5138 in--;
5139 }
5140 SHRINK;
5141 GROW;
5142 if (ctxt->instate == XML_PARSER_EOF) {
5143 xmlFree(buf);
5144 return;
5145 }
5146 in = ctxt->input->cur;
5147 if (*in == '-') {
5148 if (in[1] == '-') {
5149 if (in[2] == '>') {
5150 if (ctxt->input->id != inputid) {
5151 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5152 "comment doesn't start and stop in the"
5153 " same entity\n");
5154 }
5155 SKIP(3);
5156 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5157 (!ctxt->disableSAX)) {
5158 if (buf != NULL)
5159 ctxt->sax->comment(ctxt->userData, buf);
5160 else
5161 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5162 }
5163 if (buf != NULL)
5164 xmlFree(buf);
5165 if (ctxt->instate != XML_PARSER_EOF)
5166 ctxt->instate = state;
5167 return;
5168 }
5169 if (buf != NULL) {
5170 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5171 "Double hyphen within comment: "
5172 "<!--%.50s\n",
5173 buf);
5174 } else
5175 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5176 "Double hyphen within comment\n", NULL);
5177 if (ctxt->instate == XML_PARSER_EOF) {
5178 xmlFree(buf);
5179 return;
5180 }
5181 in++;
5182 ctxt->input->col++;
5183 }
5184 in++;
5185 ctxt->input->col++;
5186 goto get_more;
5187 }
5188 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5189 xmlParseCommentComplex(ctxt, buf, len, size);
5190 ctxt->instate = state;
5191 return;
5192}
5193
5194
5195/**
5196 * xmlParsePITarget:
5197 * @ctxt: an XML parser context
5198 *
5199 * DEPRECATED: Internal function, don't use.
5200 *
5201 * parse the name of a PI
5202 *
5203 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5204 *
5205 * Returns the PITarget name or NULL
5206 */
5207
5208const xmlChar *
5209xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5210 const xmlChar *name;
5211
5212 name = xmlParseName(ctxt);
5213 if ((name != NULL) &&
5214 ((name[0] == 'x') || (name[0] == 'X')) &&
5215 ((name[1] == 'm') || (name[1] == 'M')) &&
5216 ((name[2] == 'l') || (name[2] == 'L'))) {
5217 int i;
5218 if ((name[0] == 'x') && (name[1] == 'm') &&
5219 (name[2] == 'l') && (name[3] == 0)) {
5220 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5221 "XML declaration allowed only at the start of the document\n");
5222 return(name);
5223 } else if (name[3] == 0) {
5224 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5225 return(name);
5226 }
5227 for (i = 0;;i++) {
5228 if (xmlW3CPIs[i] == NULL) break;
5229 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5230 return(name);
5231 }
5232 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5233 "xmlParsePITarget: invalid name prefix 'xml'\n",
5234 NULL, NULL);
5235 }
5236 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5237 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5238 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5239 }
5240 return(name);
5241}
5242
5243#ifdef LIBXML_CATALOG_ENABLED
5244/**
5245 * xmlParseCatalogPI:
5246 * @ctxt: an XML parser context
5247 * @catalog: the PI value string
5248 *
5249 * parse an XML Catalog Processing Instruction.
5250 *
5251 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5252 *
5253 * Occurs only if allowed by the user and if happening in the Misc
5254 * part of the document before any doctype information
5255 * This will add the given catalog to the parsing context in order
5256 * to be used if there is a resolution need further down in the document
5257 */
5258
5259static void
5260xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5261 xmlChar *URL = NULL;
5262 const xmlChar *tmp, *base;
5263 xmlChar marker;
5264
5265 tmp = catalog;
5266 while (IS_BLANK_CH(*tmp)) tmp++;
5267 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5268 goto error;
5269 tmp += 7;
5270 while (IS_BLANK_CH(*tmp)) tmp++;
5271 if (*tmp != '=') {
5272 return;
5273 }
5274 tmp++;
5275 while (IS_BLANK_CH(*tmp)) tmp++;
5276 marker = *tmp;
5277 if ((marker != '\'') && (marker != '"'))
5278 goto error;
5279 tmp++;
5280 base = tmp;
5281 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5282 if (*tmp == 0)
5283 goto error;
5284 URL = xmlStrndup(base, tmp - base);
5285 tmp++;
5286 while (IS_BLANK_CH(*tmp)) tmp++;
5287 if (*tmp != 0)
5288 goto error;
5289
5290 if (URL != NULL) {
5291 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5292 xmlFree(URL);
5293 }
5294 return;
5295
5296error:
5297 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5298 "Catalog PI syntax error: %s\n",
5299 catalog, NULL);
5300 if (URL != NULL)
5301 xmlFree(URL);
5302}
5303#endif
5304
5305/**
5306 * xmlParsePI:
5307 * @ctxt: an XML parser context
5308 *
5309 * DEPRECATED: Internal function, don't use.
5310 *
5311 * parse an XML Processing Instruction.
5312 *
5313 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5314 *
5315 * The processing is transferred to SAX once parsed.
5316 */
5317
5318void
5319xmlParsePI(xmlParserCtxtPtr ctxt) {
5320 xmlChar *buf = NULL;
5321 size_t len = 0;
5322 size_t size = XML_PARSER_BUFFER_SIZE;
5323 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5324 XML_MAX_HUGE_LENGTH :
5325 XML_MAX_TEXT_LENGTH;
5326 int cur, l;
5327 const xmlChar *target;
5328 xmlParserInputState state;
5329
5330 if ((RAW == '<') && (NXT(1) == '?')) {
5331 int inputid = ctxt->input->id;
5332 state = ctxt->instate;
5333 ctxt->instate = XML_PARSER_PI;
5334 /*
5335 * this is a Processing Instruction.
5336 */
5337 SKIP(2);
5338
5339 /*
5340 * Parse the target name and check for special support like
5341 * namespace.
5342 */
5343 target = xmlParsePITarget(ctxt);
5344 if (target != NULL) {
5345 if ((RAW == '?') && (NXT(1) == '>')) {
5346 if (inputid != ctxt->input->id) {
5347 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5348 "PI declaration doesn't start and stop in"
5349 " the same entity\n");
5350 }
5351 SKIP(2);
5352
5353 /*
5354 * SAX: PI detected.
5355 */
5356 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5357 (ctxt->sax->processingInstruction != NULL))
5358 ctxt->sax->processingInstruction(ctxt->userData,
5359 target, NULL);
5360 if (ctxt->instate != XML_PARSER_EOF)
5361 ctxt->instate = state;
5362 return;
5363 }
5364 buf = (xmlChar *) xmlMallocAtomic(size);
5365 if (buf == NULL) {
5366 xmlErrMemory(ctxt, NULL);
5367 ctxt->instate = state;
5368 return;
5369 }
5370 if (SKIP_BLANKS == 0) {
5371 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "ParsePI: PI %s space expected\n", target);
5373 }
5374 cur = CUR_CHAR(l);
5375 while (IS_CHAR(cur) && /* checked */
5376 ((cur != '?') || (NXT(1) != '>'))) {
5377 if (len + 5 >= size) {
5378 xmlChar *tmp;
5379 size_t new_size = size * 2;
5380 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5381 if (tmp == NULL) {
5382 xmlErrMemory(ctxt, NULL);
5383 xmlFree(buf);
5384 ctxt->instate = state;
5385 return;
5386 }
5387 buf = tmp;
5388 size = new_size;
5389 }
5390 COPY_BUF(buf, len, cur);
5391 if (len > maxLength) {
5392 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5393 "PI %s too big found", target);
5394 xmlFree(buf);
5395 ctxt->instate = state;
5396 return;
5397 }
5398 NEXTL(l);
5399 cur = CUR_CHAR(l);
5400 }
5401 buf[len] = 0;
5402 if (ctxt->instate == XML_PARSER_EOF) {
5403 xmlFree(buf);
5404 return;
5405 }
5406 if (cur != '?') {
5407 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5408 "ParsePI: PI %s never end ...\n", target);
5409 } else {
5410 if (inputid != ctxt->input->id) {
5411 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5412 "PI declaration doesn't start and stop in"
5413 " the same entity\n");
5414 }
5415 SKIP(2);
5416
5417#ifdef LIBXML_CATALOG_ENABLED
5418 if (((state == XML_PARSER_MISC) ||
5419 (state == XML_PARSER_START)) &&
5420 (xmlStrEqual(target, XML_CATALOG_PI))) {
5421 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5422 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5423 (allow == XML_CATA_ALLOW_ALL))
5424 xmlParseCatalogPI(ctxt, buf);
5425 }
5426#endif
5427
5428
5429 /*
5430 * SAX: PI detected.
5431 */
5432 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5433 (ctxt->sax->processingInstruction != NULL))
5434 ctxt->sax->processingInstruction(ctxt->userData,
5435 target, buf);
5436 }
5437 xmlFree(buf);
5438 } else {
5439 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5440 }
5441 if (ctxt->instate != XML_PARSER_EOF)
5442 ctxt->instate = state;
5443 }
5444}
5445
5446/**
5447 * xmlParseNotationDecl:
5448 * @ctxt: an XML parser context
5449 *
5450 * DEPRECATED: Internal function, don't use.
5451 *
5452 * Parse a notation declaration. Always consumes '<!'.
5453 *
5454 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5455 *
5456 * Hence there is actually 3 choices:
5457 * 'PUBLIC' S PubidLiteral
5458 * 'PUBLIC' S PubidLiteral S SystemLiteral
5459 * and 'SYSTEM' S SystemLiteral
5460 *
5461 * See the NOTE on xmlParseExternalID().
5462 */
5463
5464void
5465xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5466 const xmlChar *name;
5467 xmlChar *Pubid;
5468 xmlChar *Systemid;
5469
5470 if ((CUR != '<') || (NXT(1) != '!'))
5471 return;
5472 SKIP(2);
5473
5474 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5475 int inputid = ctxt->input->id;
5476 SKIP(8);
5477 if (SKIP_BLANKS == 0) {
5478 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5479 "Space required after '<!NOTATION'\n");
5480 return;
5481 }
5482
5483 name = xmlParseName(ctxt);
5484 if (name == NULL) {
5485 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5486 return;
5487 }
5488 if (xmlStrchr(name, ':') != NULL) {
5489 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5490 "colons are forbidden from notation names '%s'\n",
5491 name, NULL, NULL);
5492 }
5493 if (SKIP_BLANKS == 0) {
5494 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495 "Space required after the NOTATION name'\n");
5496 return;
5497 }
5498
5499 /*
5500 * Parse the IDs.
5501 */
5502 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5503 SKIP_BLANKS;
5504
5505 if (RAW == '>') {
5506 if (inputid != ctxt->input->id) {
5507 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5508 "Notation declaration doesn't start and stop"
5509 " in the same entity\n");
5510 }
5511 NEXT;
5512 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5513 (ctxt->sax->notationDecl != NULL))
5514 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5515 } else {
5516 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5517 }
5518 if (Systemid != NULL) xmlFree(Systemid);
5519 if (Pubid != NULL) xmlFree(Pubid);
5520 }
5521}
5522
5523/**
5524 * xmlParseEntityDecl:
5525 * @ctxt: an XML parser context
5526 *
5527 * DEPRECATED: Internal function, don't use.
5528 *
5529 * Parse an entity declaration. Always consumes '<!'.
5530 *
5531 * [70] EntityDecl ::= GEDecl | PEDecl
5532 *
5533 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5534 *
5535 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5536 *
5537 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5538 *
5539 * [74] PEDef ::= EntityValue | ExternalID
5540 *
5541 * [76] NDataDecl ::= S 'NDATA' S Name
5542 *
5543 * [ VC: Notation Declared ]
5544 * The Name must match the declared name of a notation.
5545 */
5546
5547void
5548xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5549 const xmlChar *name = NULL;
5550 xmlChar *value = NULL;
5551 xmlChar *URI = NULL, *literal = NULL;
5552 const xmlChar *ndata = NULL;
5553 int isParameter = 0;
5554 xmlChar *orig = NULL;
5555
5556 if ((CUR != '<') || (NXT(1) != '!'))
5557 return;
5558 SKIP(2);
5559
5560 /* GROW; done in the caller */
5561 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5562 int inputid = ctxt->input->id;
5563 SKIP(6);
5564 if (SKIP_BLANKS == 0) {
5565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space required after '<!ENTITY'\n");
5567 }
5568
5569 if (RAW == '%') {
5570 NEXT;
5571 if (SKIP_BLANKS == 0) {
5572 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5573 "Space required after '%%'\n");
5574 }
5575 isParameter = 1;
5576 }
5577
5578 name = xmlParseName(ctxt);
5579 if (name == NULL) {
5580 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5581 "xmlParseEntityDecl: no name\n");
5582 return;
5583 }
5584 if (xmlStrchr(name, ':') != NULL) {
5585 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5586 "colons are forbidden from entities names '%s'\n",
5587 name, NULL, NULL);
5588 }
5589 if (SKIP_BLANKS == 0) {
5590 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5591 "Space required after the entity name\n");
5592 }
5593
5594 ctxt->instate = XML_PARSER_ENTITY_DECL;
5595 /*
5596 * handle the various case of definitions...
5597 */
5598 if (isParameter) {
5599 if ((RAW == '"') || (RAW == '\'')) {
5600 value = xmlParseEntityValue(ctxt, &orig);
5601 if (value) {
5602 if ((ctxt->sax != NULL) &&
5603 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5604 ctxt->sax->entityDecl(ctxt->userData, name,
5605 XML_INTERNAL_PARAMETER_ENTITY,
5606 NULL, NULL, value);
5607 }
5608 } else {
5609 URI = xmlParseExternalID(ctxt, &literal, 1);
5610 if ((URI == NULL) && (literal == NULL)) {
5611 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5612 }
5613 if (URI) {
5614 xmlURIPtr uri;
5615
5616 uri = xmlParseURI((const char *) URI);
5617 if (uri == NULL) {
5618 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5619 "Invalid URI: %s\n", URI);
5620 /*
5621 * This really ought to be a well formedness error
5622 * but the XML Core WG decided otherwise c.f. issue
5623 * E26 of the XML erratas.
5624 */
5625 } else {
5626 if (uri->fragment != NULL) {
5627 /*
5628 * Okay this is foolish to block those but not
5629 * invalid URIs.
5630 */
5631 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5632 } else {
5633 if ((ctxt->sax != NULL) &&
5634 (!ctxt->disableSAX) &&
5635 (ctxt->sax->entityDecl != NULL))
5636 ctxt->sax->entityDecl(ctxt->userData, name,
5637 XML_EXTERNAL_PARAMETER_ENTITY,
5638 literal, URI, NULL);
5639 }
5640 xmlFreeURI(uri);
5641 }
5642 }
5643 }
5644 } else {
5645 if ((RAW == '"') || (RAW == '\'')) {
5646 value = xmlParseEntityValue(ctxt, &orig);
5647 if ((ctxt->sax != NULL) &&
5648 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5649 ctxt->sax->entityDecl(ctxt->userData, name,
5650 XML_INTERNAL_GENERAL_ENTITY,
5651 NULL, NULL, value);
5652 /*
5653 * For expat compatibility in SAX mode.
5654 */
5655 if ((ctxt->myDoc == NULL) ||
5656 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5657 if (ctxt->myDoc == NULL) {
5658 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659 if (ctxt->myDoc == NULL) {
5660 xmlErrMemory(ctxt, "New Doc failed");
5661 goto done;
5662 }
5663 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664 }
5665 if (ctxt->myDoc->intSubset == NULL)
5666 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5667 BAD_CAST "fake", NULL, NULL);
5668
5669 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5670 NULL, NULL, value);
5671 }
5672 } else {
5673 URI = xmlParseExternalID(ctxt, &literal, 1);
5674 if ((URI == NULL) && (literal == NULL)) {
5675 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5676 }
5677 if (URI) {
5678 xmlURIPtr uri;
5679
5680 uri = xmlParseURI((const char *)URI);
5681 if (uri == NULL) {
5682 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5683 "Invalid URI: %s\n", URI);
5684 /*
5685 * This really ought to be a well formedness error
5686 * but the XML Core WG decided otherwise c.f. issue
5687 * E26 of the XML erratas.
5688 */
5689 } else {
5690 if (uri->fragment != NULL) {
5691 /*
5692 * Okay this is foolish to block those but not
5693 * invalid URIs.
5694 */
5695 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5696 }
5697 xmlFreeURI(uri);
5698 }
5699 }
5700 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5702 "Space required before 'NDATA'\n");
5703 }
5704 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5705 SKIP(5);
5706 if (SKIP_BLANKS == 0) {
5707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5708 "Space required after 'NDATA'\n");
5709 }
5710 ndata = xmlParseName(ctxt);
5711 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5712 (ctxt->sax->unparsedEntityDecl != NULL))
5713 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5714 literal, URI, ndata);
5715 } else {
5716 if ((ctxt->sax != NULL) &&
5717 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5718 ctxt->sax->entityDecl(ctxt->userData, name,
5719 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5720 literal, URI, NULL);
5721 /*
5722 * For expat compatibility in SAX mode.
5723 * assuming the entity replacement was asked for
5724 */
5725 if ((ctxt->replaceEntities != 0) &&
5726 ((ctxt->myDoc == NULL) ||
5727 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5728 if (ctxt->myDoc == NULL) {
5729 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5730 if (ctxt->myDoc == NULL) {
5731 xmlErrMemory(ctxt, "New Doc failed");
5732 goto done;
5733 }
5734 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5735 }
5736
5737 if (ctxt->myDoc->intSubset == NULL)
5738 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5739 BAD_CAST "fake", NULL, NULL);
5740 xmlSAX2EntityDecl(ctxt, name,
5741 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5742 literal, URI, NULL);
5743 }
5744 }
5745 }
5746 }
5747 if (ctxt->instate == XML_PARSER_EOF)
5748 goto done;
5749 SKIP_BLANKS;
5750 if (RAW != '>') {
5751 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5752 "xmlParseEntityDecl: entity %s not terminated\n", name);
5753 xmlHaltParser(ctxt);
5754 } else {
5755 if (inputid != ctxt->input->id) {
5756 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5757 "Entity declaration doesn't start and stop in"
5758 " the same entity\n");
5759 }
5760 NEXT;
5761 }
5762 if (orig != NULL) {
5763 /*
5764 * Ugly mechanism to save the raw entity value.
5765 */
5766 xmlEntityPtr cur = NULL;
5767
5768 if (isParameter) {
5769 if ((ctxt->sax != NULL) &&
5770 (ctxt->sax->getParameterEntity != NULL))
5771 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5772 } else {
5773 if ((ctxt->sax != NULL) &&
5774 (ctxt->sax->getEntity != NULL))
5775 cur = ctxt->sax->getEntity(ctxt->userData, name);
5776 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5777 cur = xmlSAX2GetEntity(ctxt, name);
5778 }
5779 }
5780 if ((cur != NULL) && (cur->orig == NULL)) {
5781 cur->orig = orig;
5782 orig = NULL;
5783 }
5784 }
5785
5786done:
5787 if (value != NULL) xmlFree(value);
5788 if (URI != NULL) xmlFree(URI);
5789 if (literal != NULL) xmlFree(literal);
5790 if (orig != NULL) xmlFree(orig);
5791 }
5792}
5793
5794/**
5795 * xmlParseDefaultDecl:
5796 * @ctxt: an XML parser context
5797 * @value: Receive a possible fixed default value for the attribute
5798 *
5799 * DEPRECATED: Internal function, don't use.
5800 *
5801 * Parse an attribute default declaration
5802 *
5803 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5804 *
5805 * [ VC: Required Attribute ]
5806 * if the default declaration is the keyword #REQUIRED, then the
5807 * attribute must be specified for all elements of the type in the
5808 * attribute-list declaration.
5809 *
5810 * [ VC: Attribute Default Legal ]
5811 * The declared default value must meet the lexical constraints of
5812 * the declared attribute type c.f. xmlValidateAttributeDecl()
5813 *
5814 * [ VC: Fixed Attribute Default ]
5815 * if an attribute has a default value declared with the #FIXED
5816 * keyword, instances of that attribute must match the default value.
5817 *
5818 * [ WFC: No < in Attribute Values ]
5819 * handled in xmlParseAttValue()
5820 *
5821 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5822 * or XML_ATTRIBUTE_FIXED.
5823 */
5824
5825int
5826xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5827 int val;
5828 xmlChar *ret;
5829
5830 *value = NULL;
5831 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5832 SKIP(9);
5833 return(XML_ATTRIBUTE_REQUIRED);
5834 }
5835 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5836 SKIP(8);
5837 return(XML_ATTRIBUTE_IMPLIED);
5838 }
5839 val = XML_ATTRIBUTE_NONE;
5840 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5841 SKIP(6);
5842 val = XML_ATTRIBUTE_FIXED;
5843 if (SKIP_BLANKS == 0) {
5844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5845 "Space required after '#FIXED'\n");
5846 }
5847 }
5848 ret = xmlParseAttValue(ctxt);
5849 ctxt->instate = XML_PARSER_DTD;
5850 if (ret == NULL) {
5851 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5852 "Attribute default value declaration error\n");
5853 } else
5854 *value = ret;
5855 return(val);
5856}
5857
5858/**
5859 * xmlParseNotationType:
5860 * @ctxt: an XML parser context
5861 *
5862 * DEPRECATED: Internal function, don't use.
5863 *
5864 * parse an Notation attribute type.
5865 *
5866 * Note: the leading 'NOTATION' S part has already being parsed...
5867 *
5868 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5869 *
5870 * [ VC: Notation Attributes ]
5871 * Values of this type must match one of the notation names included
5872 * in the declaration; all notation names in the declaration must be declared.
5873 *
5874 * Returns: the notation attribute tree built while parsing
5875 */
5876
5877xmlEnumerationPtr
5878xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5879 const xmlChar *name;
5880 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5881
5882 if (RAW != '(') {
5883 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5884 return(NULL);
5885 }
5886 do {
5887 NEXT;
5888 SKIP_BLANKS;
5889 name = xmlParseName(ctxt);
5890 if (name == NULL) {
5891 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5892 "Name expected in NOTATION declaration\n");
5893 xmlFreeEnumeration(ret);
5894 return(NULL);
5895 }
5896 tmp = ret;
5897 while (tmp != NULL) {
5898 if (xmlStrEqual(name, tmp->name)) {
5899 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900 "standalone: attribute notation value token %s duplicated\n",
5901 name, NULL);
5902 if (!xmlDictOwns(ctxt->dict, name))
5903 xmlFree((xmlChar *) name);
5904 break;
5905 }
5906 tmp = tmp->next;
5907 }
5908 if (tmp == NULL) {
5909 cur = xmlCreateEnumeration(name);
5910 if (cur == NULL) {
5911 xmlFreeEnumeration(ret);
5912 return(NULL);
5913 }
5914 if (last == NULL) ret = last = cur;
5915 else {
5916 last->next = cur;
5917 last = cur;
5918 }
5919 }
5920 SKIP_BLANKS;
5921 } while (RAW == '|');
5922 if (RAW != ')') {
5923 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5924 xmlFreeEnumeration(ret);
5925 return(NULL);
5926 }
5927 NEXT;
5928 return(ret);
5929}
5930
5931/**
5932 * xmlParseEnumerationType:
5933 * @ctxt: an XML parser context
5934 *
5935 * DEPRECATED: Internal function, don't use.
5936 *
5937 * parse an Enumeration attribute type.
5938 *
5939 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5940 *
5941 * [ VC: Enumeration ]
5942 * Values of this type must match one of the Nmtoken tokens in
5943 * the declaration
5944 *
5945 * Returns: the enumeration attribute tree built while parsing
5946 */
5947
5948xmlEnumerationPtr
5949xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5950 xmlChar *name;
5951 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5952
5953 if (RAW != '(') {
5954 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5955 return(NULL);
5956 }
5957 do {
5958 NEXT;
5959 SKIP_BLANKS;
5960 name = xmlParseNmtoken(ctxt);
5961 if (name == NULL) {
5962 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5963 return(ret);
5964 }
5965 tmp = ret;
5966 while (tmp != NULL) {
5967 if (xmlStrEqual(name, tmp->name)) {
5968 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5969 "standalone: attribute enumeration value token %s duplicated\n",
5970 name, NULL);
5971 if (!xmlDictOwns(ctxt->dict, name))
5972 xmlFree(name);
5973 break;
5974 }
5975 tmp = tmp->next;
5976 }
5977 if (tmp == NULL) {
5978 cur = xmlCreateEnumeration(name);
5979 if (!xmlDictOwns(ctxt->dict, name))
5980 xmlFree(name);
5981 if (cur == NULL) {
5982 xmlFreeEnumeration(ret);
5983 return(NULL);
5984 }
5985 if (last == NULL) ret = last = cur;
5986 else {
5987 last->next = cur;
5988 last = cur;
5989 }
5990 }
5991 SKIP_BLANKS;
5992 } while (RAW == '|');
5993 if (RAW != ')') {
5994 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5995 return(ret);
5996 }
5997 NEXT;
5998 return(ret);
5999}
6000
6001/**
6002 * xmlParseEnumeratedType:
6003 * @ctxt: an XML parser context
6004 * @tree: the enumeration tree built while parsing
6005 *
6006 * DEPRECATED: Internal function, don't use.
6007 *
6008 * parse an Enumerated attribute type.
6009 *
6010 * [57] EnumeratedType ::= NotationType | Enumeration
6011 *
6012 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6013 *
6014 *
6015 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6016 */
6017
6018int
6019xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6020 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6021 SKIP(8);
6022 if (SKIP_BLANKS == 0) {
6023 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6024 "Space required after 'NOTATION'\n");
6025 return(0);
6026 }
6027 *tree = xmlParseNotationType(ctxt);
6028 if (*tree == NULL) return(0);
6029 return(XML_ATTRIBUTE_NOTATION);
6030 }
6031 *tree = xmlParseEnumerationType(ctxt);
6032 if (*tree == NULL) return(0);
6033 return(XML_ATTRIBUTE_ENUMERATION);
6034}
6035
6036/**
6037 * xmlParseAttributeType:
6038 * @ctxt: an XML parser context
6039 * @tree: the enumeration tree built while parsing
6040 *
6041 * DEPRECATED: Internal function, don't use.
6042 *
6043 * parse the Attribute list def for an element
6044 *
6045 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6046 *
6047 * [55] StringType ::= 'CDATA'
6048 *
6049 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6050 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6051 *
6052 * Validity constraints for attribute values syntax are checked in
6053 * xmlValidateAttributeValue()
6054 *
6055 * [ VC: ID ]
6056 * Values of type ID must match the Name production. A name must not
6057 * appear more than once in an XML document as a value of this type;
6058 * i.e., ID values must uniquely identify the elements which bear them.
6059 *
6060 * [ VC: One ID per Element Type ]
6061 * No element type may have more than one ID attribute specified.
6062 *
6063 * [ VC: ID Attribute Default ]
6064 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6065 *
6066 * [ VC: IDREF ]
6067 * Values of type IDREF must match the Name production, and values
6068 * of type IDREFS must match Names; each IDREF Name must match the value
6069 * of an ID attribute on some element in the XML document; i.e. IDREF
6070 * values must match the value of some ID attribute.
6071 *
6072 * [ VC: Entity Name ]
6073 * Values of type ENTITY must match the Name production, values
6074 * of type ENTITIES must match Names; each Entity Name must match the
6075 * name of an unparsed entity declared in the DTD.
6076 *
6077 * [ VC: Name Token ]
6078 * Values of type NMTOKEN must match the Nmtoken production; values
6079 * of type NMTOKENS must match Nmtokens.
6080 *
6081 * Returns the attribute type
6082 */
6083int
6084xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6085 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6086 SKIP(5);
6087 return(XML_ATTRIBUTE_CDATA);
6088 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6089 SKIP(6);
6090 return(XML_ATTRIBUTE_IDREFS);
6091 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6092 SKIP(5);
6093 return(XML_ATTRIBUTE_IDREF);
6094 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6095 SKIP(2);
6096 return(XML_ATTRIBUTE_ID);
6097 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6098 SKIP(6);
6099 return(XML_ATTRIBUTE_ENTITY);
6100 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6101 SKIP(8);
6102 return(XML_ATTRIBUTE_ENTITIES);
6103 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6104 SKIP(8);
6105 return(XML_ATTRIBUTE_NMTOKENS);
6106 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6107 SKIP(7);
6108 return(XML_ATTRIBUTE_NMTOKEN);
6109 }
6110 return(xmlParseEnumeratedType(ctxt, tree));
6111}
6112
6113/**
6114 * xmlParseAttributeListDecl:
6115 * @ctxt: an XML parser context
6116 *
6117 * DEPRECATED: Internal function, don't use.
6118 *
6119 * Parse an attribute list declaration for an element. Always consumes '<!'.
6120 *
6121 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6122 *
6123 * [53] AttDef ::= S Name S AttType S DefaultDecl
6124 *
6125 */
6126void
6127xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6128 const xmlChar *elemName;
6129 const xmlChar *attrName;
6130 xmlEnumerationPtr tree;
6131
6132 if ((CUR != '<') || (NXT(1) != '!'))
6133 return;
6134 SKIP(2);
6135
6136 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6137 int inputid = ctxt->input->id;
6138
6139 SKIP(7);
6140 if (SKIP_BLANKS == 0) {
6141 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6142 "Space required after '<!ATTLIST'\n");
6143 }
6144 elemName = xmlParseName(ctxt);
6145 if (elemName == NULL) {
6146 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6147 "ATTLIST: no name for Element\n");
6148 return;
6149 }
6150 SKIP_BLANKS;
6151 GROW;
6152 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6153 int type;
6154 int def;
6155 xmlChar *defaultValue = NULL;
6156
6157 GROW;
6158 tree = NULL;
6159 attrName = xmlParseName(ctxt);
6160 if (attrName == NULL) {
6161 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6162 "ATTLIST: no name for Attribute\n");
6163 break;
6164 }
6165 GROW;
6166 if (SKIP_BLANKS == 0) {
6167 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6168 "Space required after the attribute name\n");
6169 break;
6170 }
6171
6172 type = xmlParseAttributeType(ctxt, &tree);
6173 if (type <= 0) {
6174 break;
6175 }
6176
6177 GROW;
6178 if (SKIP_BLANKS == 0) {
6179 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6180 "Space required after the attribute type\n");
6181 if (tree != NULL)
6182 xmlFreeEnumeration(tree);
6183 break;
6184 }
6185
6186 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6187 if (def <= 0) {
6188 if (defaultValue != NULL)
6189 xmlFree(defaultValue);
6190 if (tree != NULL)
6191 xmlFreeEnumeration(tree);
6192 break;
6193 }
6194 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6195 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6196
6197 GROW;
6198 if (RAW != '>') {
6199 if (SKIP_BLANKS == 0) {
6200 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6201 "Space required after the attribute default value\n");
6202 if (defaultValue != NULL)
6203 xmlFree(defaultValue);
6204 if (tree != NULL)
6205 xmlFreeEnumeration(tree);
6206 break;
6207 }
6208 }
6209 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6210 (ctxt->sax->attributeDecl != NULL))
6211 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6212 type, def, defaultValue, tree);
6213 else if (tree != NULL)
6214 xmlFreeEnumeration(tree);
6215
6216 if ((ctxt->sax2) && (defaultValue != NULL) &&
6217 (def != XML_ATTRIBUTE_IMPLIED) &&
6218 (def != XML_ATTRIBUTE_REQUIRED)) {
6219 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6220 }
6221 if (ctxt->sax2) {
6222 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6223 }
6224 if (defaultValue != NULL)
6225 xmlFree(defaultValue);
6226 GROW;
6227 }
6228 if (RAW == '>') {
6229 if (inputid != ctxt->input->id) {
6230 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6231 "Attribute list declaration doesn't start and"
6232 " stop in the same entity\n");
6233 }
6234 NEXT;
6235 }
6236 }
6237}
6238
6239/**
6240 * xmlParseElementMixedContentDecl:
6241 * @ctxt: an XML parser context
6242 * @inputchk: the input used for the current entity, needed for boundary checks
6243 *
6244 * DEPRECATED: Internal function, don't use.
6245 *
6246 * parse the declaration for a Mixed Element content
6247 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248 *
6249 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6250 * '(' S? '#PCDATA' S? ')'
6251 *
6252 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6253 *
6254 * [ VC: No Duplicate Types ]
6255 * The same name must not appear more than once in a single
6256 * mixed-content declaration.
6257 *
6258 * returns: the list of the xmlElementContentPtr describing the element choices
6259 */
6260xmlElementContentPtr
6261xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262 xmlElementContentPtr ret = NULL, cur = NULL, n;
6263 const xmlChar *elem = NULL;
6264
6265 GROW;
6266 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6267 SKIP(7);
6268 SKIP_BLANKS;
6269 if (RAW == ')') {
6270 if (ctxt->input->id != inputchk) {
6271 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6272 "Element content declaration doesn't start and"
6273 " stop in the same entity\n");
6274 }
6275 NEXT;
6276 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6277 if (ret == NULL)
6278 return(NULL);
6279 if (RAW == '*') {
6280 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6281 NEXT;
6282 }
6283 return(ret);
6284 }
6285 if ((RAW == '(') || (RAW == '|')) {
6286 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6287 if (ret == NULL) return(NULL);
6288 }
6289 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6290 NEXT;
6291 if (elem == NULL) {
6292 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6293 if (ret == NULL) {
6294 xmlFreeDocElementContent(ctxt->myDoc, cur);
6295 return(NULL);
6296 }
6297 ret->c1 = cur;
6298 if (cur != NULL)
6299 cur->parent = ret;
6300 cur = ret;
6301 } else {
6302 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6303 if (n == NULL) {
6304 xmlFreeDocElementContent(ctxt->myDoc, ret);
6305 return(NULL);
6306 }
6307 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6308 if (n->c1 != NULL)
6309 n->c1->parent = n;
6310 cur->c2 = n;
6311 if (n != NULL)
6312 n->parent = cur;
6313 cur = n;
6314 }
6315 SKIP_BLANKS;
6316 elem = xmlParseName(ctxt);
6317 if (elem == NULL) {
6318 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6319 "xmlParseElementMixedContentDecl : Name expected\n");
6320 xmlFreeDocElementContent(ctxt->myDoc, ret);
6321 return(NULL);
6322 }
6323 SKIP_BLANKS;
6324 GROW;
6325 }
6326 if ((RAW == ')') && (NXT(1) == '*')) {
6327 if (elem != NULL) {
6328 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6329 XML_ELEMENT_CONTENT_ELEMENT);
6330 if (cur->c2 != NULL)
6331 cur->c2->parent = cur;
6332 }
6333 if (ret != NULL)
6334 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6335 if (ctxt->input->id != inputchk) {
6336 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6337 "Element content declaration doesn't start and"
6338 " stop in the same entity\n");
6339 }
6340 SKIP(2);
6341 } else {
6342 xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6344 return(NULL);
6345 }
6346
6347 } else {
6348 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6349 }
6350 return(ret);
6351}
6352
6353/**
6354 * xmlParseElementChildrenContentDeclPriv:
6355 * @ctxt: an XML parser context
6356 * @inputchk: the input used for the current entity, needed for boundary checks
6357 * @depth: the level of recursion
6358 *
6359 * parse the declaration for a Mixed Element content
6360 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6361 *
6362 *
6363 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6364 *
6365 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6366 *
6367 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6368 *
6369 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6370 *
6371 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6372 * TODO Parameter-entity replacement text must be properly nested
6373 * with parenthesized groups. That is to say, if either of the
6374 * opening or closing parentheses in a choice, seq, or Mixed
6375 * construct is contained in the replacement text for a parameter
6376 * entity, both must be contained in the same replacement text. For
6377 * interoperability, if a parameter-entity reference appears in a
6378 * choice, seq, or Mixed construct, its replacement text should not
6379 * be empty, and neither the first nor last non-blank character of
6380 * the replacement text should be a connector (| or ,).
6381 *
6382 * Returns the tree of xmlElementContentPtr describing the element
6383 * hierarchy.
6384 */
6385static xmlElementContentPtr
6386xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6387 int depth) {
6388 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6389 const xmlChar *elem;
6390 xmlChar type = 0;
6391
6392 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6393 (depth > 2048)) {
6394 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6395"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6396 depth);
6397 return(NULL);
6398 }
6399 SKIP_BLANKS;
6400 GROW;
6401 if (RAW == '(') {
6402 int inputid = ctxt->input->id;
6403
6404 /* Recurse on first child */
6405 NEXT;
6406 SKIP_BLANKS;
6407 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6408 depth + 1);
6409 if (cur == NULL)
6410 return(NULL);
6411 SKIP_BLANKS;
6412 GROW;
6413 } else {
6414 elem = xmlParseName(ctxt);
6415 if (elem == NULL) {
6416 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6417 return(NULL);
6418 }
6419 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6420 if (cur == NULL) {
6421 xmlErrMemory(ctxt, NULL);
6422 return(NULL);
6423 }
6424 GROW;
6425 if (RAW == '?') {
6426 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6427 NEXT;
6428 } else if (RAW == '*') {
6429 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6430 NEXT;
6431 } else if (RAW == '+') {
6432 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6433 NEXT;
6434 } else {
6435 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6436 }
6437 GROW;
6438 }
6439 SKIP_BLANKS;
6440 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6441 /*
6442 * Each loop we parse one separator and one element.
6443 */
6444 if (RAW == ',') {
6445 if (type == 0) type = CUR;
6446
6447 /*
6448 * Detect "Name | Name , Name" error
6449 */
6450 else if (type != CUR) {
6451 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6452 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6453 type);
6454 if ((last != NULL) && (last != ret))
6455 xmlFreeDocElementContent(ctxt->myDoc, last);
6456 if (ret != NULL)
6457 xmlFreeDocElementContent(ctxt->myDoc, ret);
6458 return(NULL);
6459 }
6460 NEXT;
6461
6462 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6463 if (op == NULL) {
6464 if ((last != NULL) && (last != ret))
6465 xmlFreeDocElementContent(ctxt->myDoc, last);
6466 xmlFreeDocElementContent(ctxt->myDoc, ret);
6467 return(NULL);
6468 }
6469 if (last == NULL) {
6470 op->c1 = ret;
6471 if (ret != NULL)
6472 ret->parent = op;
6473 ret = cur = op;
6474 } else {
6475 cur->c2 = op;
6476 if (op != NULL)
6477 op->parent = cur;
6478 op->c1 = last;
6479 if (last != NULL)
6480 last->parent = op;
6481 cur =op;
6482 last = NULL;
6483 }
6484 } else if (RAW == '|') {
6485 if (type == 0) type = CUR;
6486
6487 /*
6488 * Detect "Name , Name | Name" error
6489 */
6490 else if (type != CUR) {
6491 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6492 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6493 type);
6494 if ((last != NULL) && (last != ret))
6495 xmlFreeDocElementContent(ctxt->myDoc, last);
6496 if (ret != NULL)
6497 xmlFreeDocElementContent(ctxt->myDoc, ret);
6498 return(NULL);
6499 }
6500 NEXT;
6501
6502 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6503 if (op == NULL) {
6504 if ((last != NULL) && (last != ret))
6505 xmlFreeDocElementContent(ctxt->myDoc, last);
6506 if (ret != NULL)
6507 xmlFreeDocElementContent(ctxt->myDoc, ret);
6508 return(NULL);
6509 }
6510 if (last == NULL) {
6511 op->c1 = ret;
6512 if (ret != NULL)
6513 ret->parent = op;
6514 ret = cur = op;
6515 } else {
6516 cur->c2 = op;
6517 if (op != NULL)
6518 op->parent = cur;
6519 op->c1 = last;
6520 if (last != NULL)
6521 last->parent = op;
6522 cur =op;
6523 last = NULL;
6524 }
6525 } else {
6526 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6527 if ((last != NULL) && (last != ret))
6528 xmlFreeDocElementContent(ctxt->myDoc, last);
6529 if (ret != NULL)
6530 xmlFreeDocElementContent(ctxt->myDoc, ret);
6531 return(NULL);
6532 }
6533 GROW;
6534 SKIP_BLANKS;
6535 GROW;
6536 if (RAW == '(') {
6537 int inputid = ctxt->input->id;
6538 /* Recurse on second child */
6539 NEXT;
6540 SKIP_BLANKS;
6541 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6542 depth + 1);
6543 if (last == NULL) {
6544 if (ret != NULL)
6545 xmlFreeDocElementContent(ctxt->myDoc, ret);
6546 return(NULL);
6547 }
6548 SKIP_BLANKS;
6549 } else {
6550 elem = xmlParseName(ctxt);
6551 if (elem == NULL) {
6552 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6553 if (ret != NULL)
6554 xmlFreeDocElementContent(ctxt->myDoc, ret);
6555 return(NULL);
6556 }
6557 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558 if (last == NULL) {
6559 if (ret != NULL)
6560 xmlFreeDocElementContent(ctxt->myDoc, ret);
6561 return(NULL);
6562 }
6563 if (RAW == '?') {
6564 last->ocur = XML_ELEMENT_CONTENT_OPT;
6565 NEXT;
6566 } else if (RAW == '*') {
6567 last->ocur = XML_ELEMENT_CONTENT_MULT;
6568 NEXT;
6569 } else if (RAW == '+') {
6570 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6571 NEXT;
6572 } else {
6573 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6574 }
6575 }
6576 SKIP_BLANKS;
6577 GROW;
6578 }
6579 if ((cur != NULL) && (last != NULL)) {
6580 cur->c2 = last;
6581 if (last != NULL)
6582 last->parent = cur;
6583 }
6584 if (ctxt->input->id != inputchk) {
6585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6586 "Element content declaration doesn't start and stop in"
6587 " the same entity\n");
6588 }
6589 NEXT;
6590 if (RAW == '?') {
6591 if (ret != NULL) {
6592 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6593 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6594 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6595 else
6596 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6597 }
6598 NEXT;
6599 } else if (RAW == '*') {
6600 if (ret != NULL) {
6601 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6602 cur = ret;
6603 /*
6604 * Some normalization:
6605 * (a | b* | c?)* == (a | b | c)*
6606 */
6607 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6608 if ((cur->c1 != NULL) &&
6609 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6610 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6611 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6612 if ((cur->c2 != NULL) &&
6613 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6614 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6615 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6616 cur = cur->c2;
6617 }
6618 }
6619 NEXT;
6620 } else if (RAW == '+') {
6621 if (ret != NULL) {
6622 int found = 0;
6623
6624 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6625 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6626 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6627 else
6628 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6629 /*
6630 * Some normalization:
6631 * (a | b*)+ == (a | b)*
6632 * (a | b?)+ == (a | b)*
6633 */
6634 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6635 if ((cur->c1 != NULL) &&
6636 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6637 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6638 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6639 found = 1;
6640 }
6641 if ((cur->c2 != NULL) &&
6642 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6643 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6644 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6645 found = 1;
6646 }
6647 cur = cur->c2;
6648 }
6649 if (found)
6650 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6651 }
6652 NEXT;
6653 }
6654 return(ret);
6655}
6656
6657/**
6658 * xmlParseElementChildrenContentDecl:
6659 * @ctxt: an XML parser context
6660 * @inputchk: the input used for the current entity, needed for boundary checks
6661 *
6662 * DEPRECATED: Internal function, don't use.
6663 *
6664 * parse the declaration for a Mixed Element content
6665 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6666 *
6667 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6668 *
6669 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6670 *
6671 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6672 *
6673 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6674 *
6675 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6676 * TODO Parameter-entity replacement text must be properly nested
6677 * with parenthesized groups. That is to say, if either of the
6678 * opening or closing parentheses in a choice, seq, or Mixed
6679 * construct is contained in the replacement text for a parameter
6680 * entity, both must be contained in the same replacement text. For
6681 * interoperability, if a parameter-entity reference appears in a
6682 * choice, seq, or Mixed construct, its replacement text should not
6683 * be empty, and neither the first nor last non-blank character of
6684 * the replacement text should be a connector (| or ,).
6685 *
6686 * Returns the tree of xmlElementContentPtr describing the element
6687 * hierarchy.
6688 */
6689xmlElementContentPtr
6690xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6691 /* stub left for API/ABI compat */
6692 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6693}
6694
6695/**
6696 * xmlParseElementContentDecl:
6697 * @ctxt: an XML parser context
6698 * @name: the name of the element being defined.
6699 * @result: the Element Content pointer will be stored here if any
6700 *
6701 * DEPRECATED: Internal function, don't use.
6702 *
6703 * parse the declaration for an Element content either Mixed or Children,
6704 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6705 *
6706 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6707 *
6708 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6709 */
6710
6711int
6712xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6713 xmlElementContentPtr *result) {
6714
6715 xmlElementContentPtr tree = NULL;
6716 int inputid = ctxt->input->id;
6717 int res;
6718
6719 *result = NULL;
6720
6721 if (RAW != '(') {
6722 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6723 "xmlParseElementContentDecl : %s '(' expected\n", name);
6724 return(-1);
6725 }
6726 NEXT;
6727 GROW;
6728 if (ctxt->instate == XML_PARSER_EOF)
6729 return(-1);
6730 SKIP_BLANKS;
6731 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6732 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6733 res = XML_ELEMENT_TYPE_MIXED;
6734 } else {
6735 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6736 res = XML_ELEMENT_TYPE_ELEMENT;
6737 }
6738 SKIP_BLANKS;
6739 *result = tree;
6740 return(res);
6741}
6742
6743/**
6744 * xmlParseElementDecl:
6745 * @ctxt: an XML parser context
6746 *
6747 * DEPRECATED: Internal function, don't use.
6748 *
6749 * Parse an element declaration. Always consumes '<!'.
6750 *
6751 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6752 *
6753 * [ VC: Unique Element Type Declaration ]
6754 * No element type may be declared more than once
6755 *
6756 * Returns the type of the element, or -1 in case of error
6757 */
6758int
6759xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6760 const xmlChar *name;
6761 int ret = -1;
6762 xmlElementContentPtr content = NULL;
6763
6764 if ((CUR != '<') || (NXT(1) != '!'))
6765 return(ret);
6766 SKIP(2);
6767
6768 /* GROW; done in the caller */
6769 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6770 int inputid = ctxt->input->id;
6771
6772 SKIP(7);
6773 if (SKIP_BLANKS == 0) {
6774 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6775 "Space required after 'ELEMENT'\n");
6776 return(-1);
6777 }
6778 name = xmlParseName(ctxt);
6779 if (name == NULL) {
6780 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6781 "xmlParseElementDecl: no name for Element\n");
6782 return(-1);
6783 }
6784 if (SKIP_BLANKS == 0) {
6785 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6786 "Space required after the element name\n");
6787 }
6788 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6789 SKIP(5);
6790 /*
6791 * Element must always be empty.
6792 */
6793 ret = XML_ELEMENT_TYPE_EMPTY;
6794 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6795 (NXT(2) == 'Y')) {
6796 SKIP(3);
6797 /*
6798 * Element is a generic container.
6799 */
6800 ret = XML_ELEMENT_TYPE_ANY;
6801 } else if (RAW == '(') {
6802 ret = xmlParseElementContentDecl(ctxt, name, &content);
6803 } else {
6804 /*
6805 * [ WFC: PEs in Internal Subset ] error handling.
6806 */
6807 if ((RAW == '%') && (ctxt->external == 0) &&
6808 (ctxt->inputNr == 1)) {
6809 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6810 "PEReference: forbidden within markup decl in internal subset\n");
6811 } else {
6812 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6813 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6814 }
6815 return(-1);
6816 }
6817
6818 SKIP_BLANKS;
6819
6820 if (RAW != '>') {
6821 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6822 if (content != NULL) {
6823 xmlFreeDocElementContent(ctxt->myDoc, content);
6824 }
6825 } else {
6826 if (inputid != ctxt->input->id) {
6827 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6828 "Element declaration doesn't start and stop in"
6829 " the same entity\n");
6830 }
6831
6832 NEXT;
6833 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6834 (ctxt->sax->elementDecl != NULL)) {
6835 if (content != NULL)
6836 content->parent = NULL;
6837 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6838 content);
6839 if ((content != NULL) && (content->parent == NULL)) {
6840 /*
6841 * this is a trick: if xmlAddElementDecl is called,
6842 * instead of copying the full tree it is plugged directly
6843 * if called from the parser. Avoid duplicating the
6844 * interfaces or change the API/ABI
6845 */
6846 xmlFreeDocElementContent(ctxt->myDoc, content);
6847 }
6848 } else if (content != NULL) {
6849 xmlFreeDocElementContent(ctxt->myDoc, content);
6850 }
6851 }
6852 }
6853 return(ret);
6854}
6855
6856/**
6857 * xmlParseConditionalSections
6858 * @ctxt: an XML parser context
6859 *
6860 * Parse a conditional section. Always consumes '<!['.
6861 *
6862 * [61] conditionalSect ::= includeSect | ignoreSect
6863 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6864 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6865 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6866 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6867 */
6868
6869static void
6870xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6871 int *inputIds = NULL;
6872 size_t inputIdsSize = 0;
6873 size_t depth = 0;
6874
6875 while (ctxt->instate != XML_PARSER_EOF) {
6876 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6877 int id = ctxt->input->id;
6878
6879 SKIP(3);
6880 SKIP_BLANKS;
6881
6882 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6883 SKIP(7);
6884 SKIP_BLANKS;
6885 if (RAW != '[') {
6886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6887 xmlHaltParser(ctxt);
6888 goto error;
6889 }
6890 if (ctxt->input->id != id) {
6891 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6892 "All markup of the conditional section is"
6893 " not in the same entity\n");
6894 }
6895 NEXT;
6896
6897 if (inputIdsSize <= depth) {
6898 int *tmp;
6899
6900 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6901 tmp = (int *) xmlRealloc(inputIds,
6902 inputIdsSize * sizeof(int));
6903 if (tmp == NULL) {
6904 xmlErrMemory(ctxt, NULL);
6905 goto error;
6906 }
6907 inputIds = tmp;
6908 }
6909 inputIds[depth] = id;
6910 depth++;
6911 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6912 size_t ignoreDepth = 0;
6913
6914 SKIP(6);
6915 SKIP_BLANKS;
6916 if (RAW != '[') {
6917 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6918 xmlHaltParser(ctxt);
6919 goto error;
6920 }
6921 if (ctxt->input->id != id) {
6922 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6923 "All markup of the conditional section is"
6924 " not in the same entity\n");
6925 }
6926 NEXT;
6927
6928 while (RAW != 0) {
6929 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6930 SKIP(3);
6931 ignoreDepth++;
6932 /* Check for integer overflow */
6933 if (ignoreDepth == 0) {
6934 xmlErrMemory(ctxt, NULL);
6935 goto error;
6936 }
6937 } else if ((RAW == ']') && (NXT(1) == ']') &&
6938 (NXT(2) == '>')) {
6939 if (ignoreDepth == 0)
6940 break;
6941 SKIP(3);
6942 ignoreDepth--;
6943 } else {
6944 NEXT;
6945 }
6946 }
6947
6948 if (RAW == 0) {
6949 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6950 goto error;
6951 }
6952 if (ctxt->input->id != id) {
6953 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6954 "All markup of the conditional section is"
6955 " not in the same entity\n");
6956 }
6957 SKIP(3);
6958 } else {
6959 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6960 xmlHaltParser(ctxt);
6961 goto error;
6962 }
6963 } else if ((depth > 0) &&
6964 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6965 depth--;
6966 if (ctxt->input->id != inputIds[depth]) {
6967 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6968 "All markup of the conditional section is not"
6969 " in the same entity\n");
6970 }
6971 SKIP(3);
6972 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6973 xmlParseMarkupDecl(ctxt);
6974 } else {
6975 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6976 xmlHaltParser(ctxt);
6977 goto error;
6978 }
6979
6980 if (depth == 0)
6981 break;
6982
6983 SKIP_BLANKS;
6984 SHRINK;
6985 GROW;
6986 }
6987
6988error:
6989 xmlFree(inputIds);
6990}
6991
6992/**
6993 * xmlParseMarkupDecl:
6994 * @ctxt: an XML parser context
6995 *
6996 * DEPRECATED: Internal function, don't use.
6997 *
6998 * Parse markup declarations. Always consumes '<!' or '<?'.
6999 *
7000 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7001 * NotationDecl | PI | Comment
7002 *
7003 * [ VC: Proper Declaration/PE Nesting ]
7004 * Parameter-entity replacement text must be properly nested with
7005 * markup declarations. That is to say, if either the first character
7006 * or the last character of a markup declaration (markupdecl above) is
7007 * contained in the replacement text for a parameter-entity reference,
7008 * both must be contained in the same replacement text.
7009 *
7010 * [ WFC: PEs in Internal Subset ]
7011 * In the internal DTD subset, parameter-entity references can occur
7012 * only where markup declarations can occur, not within markup declarations.
7013 * (This does not apply to references that occur in external parameter
7014 * entities or to the external subset.)
7015 */
7016void
7017xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7018 GROW;
7019 if (CUR == '<') {
7020 if (NXT(1) == '!') {
7021 switch (NXT(2)) {
7022 case 'E':
7023 if (NXT(3) == 'L')
7024 xmlParseElementDecl(ctxt);
7025 else if (NXT(3) == 'N')
7026 xmlParseEntityDecl(ctxt);
7027 else
7028 SKIP(2);
7029 break;
7030 case 'A':
7031 xmlParseAttributeListDecl(ctxt);
7032 break;
7033 case 'N':
7034 xmlParseNotationDecl(ctxt);
7035 break;
7036 case '-':
7037 xmlParseComment(ctxt);
7038 break;
7039 default:
7040 /* there is an error but it will be detected later */
7041 SKIP(2);
7042 break;
7043 }
7044 } else if (NXT(1) == '?') {
7045 xmlParsePI(ctxt);
7046 }
7047 }
7048
7049 /*
7050 * detect requirement to exit there and act accordingly
7051 * and avoid having instate overridden later on
7052 */
7053 if (ctxt->instate == XML_PARSER_EOF)
7054 return;
7055
7056 ctxt->instate = XML_PARSER_DTD;
7057}
7058
7059/**
7060 * xmlParseTextDecl:
7061 * @ctxt: an XML parser context
7062 *
7063 * DEPRECATED: Internal function, don't use.
7064 *
7065 * parse an XML declaration header for external entities
7066 *
7067 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7068 */
7069
7070void
7071xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7072 xmlChar *version;
7073 int oldstate;
7074
7075 /*
7076 * We know that '<?xml' is here.
7077 */
7078 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7079 SKIP(5);
7080 } else {
7081 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7082 return;
7083 }
7084
7085 /* Avoid expansion of parameter entities when skipping blanks. */
7086 oldstate = ctxt->instate;
7087 ctxt->instate = XML_PARSER_START;
7088
7089 if (SKIP_BLANKS == 0) {
7090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7091 "Space needed after '<?xml'\n");
7092 }
7093
7094 /*
7095 * We may have the VersionInfo here.
7096 */
7097 version = xmlParseVersionInfo(ctxt);
7098 if (version == NULL)
7099 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7100 else {
7101 if (SKIP_BLANKS == 0) {
7102 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7103 "Space needed here\n");
7104 }
7105 }
7106 ctxt->input->version = version;
7107
7108 /*
7109 * We must have the encoding declaration
7110 */
7111 xmlParseEncodingDecl(ctxt);
7112 if (ctxt->instate == XML_PARSER_EOF)
7113 return;
7114 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7115 /*
7116 * The XML REC instructs us to stop parsing right here
7117 */
7118 ctxt->instate = oldstate;
7119 return;
7120 }
7121
7122 SKIP_BLANKS;
7123 if ((RAW == '?') && (NXT(1) == '>')) {
7124 SKIP(2);
7125 } else if (RAW == '>') {
7126 /* Deprecated old WD ... */
7127 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7128 NEXT;
7129 } else {
7130 int c;
7131
7132 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7133 while ((c = CUR) != 0) {
7134 NEXT;
7135 if (c == '>')
7136 break;
7137 }
7138 }
7139
7140 if (ctxt->instate != XML_PARSER_EOF)
7141 ctxt->instate = oldstate;
7142}
7143
7144/**
7145 * xmlParseExternalSubset:
7146 * @ctxt: an XML parser context
7147 * @ExternalID: the external identifier
7148 * @SystemID: the system identifier (or URL)
7149 *
7150 * parse Markup declarations from an external subset
7151 *
7152 * [30] extSubset ::= textDecl? extSubsetDecl
7153 *
7154 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7155 */
7156void
7157xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7158 const xmlChar *SystemID) {
7159 xmlDetectSAX2(ctxt);
7160
7161 xmlDetectEncoding(ctxt);
7162
7163 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7164 xmlParseTextDecl(ctxt);
7165 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7166 /*
7167 * The XML REC instructs us to stop parsing right here
7168 */
7169 xmlHaltParser(ctxt);
7170 return;
7171 }
7172 }
7173 if (ctxt->myDoc == NULL) {
7174 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7175 if (ctxt->myDoc == NULL) {
7176 xmlErrMemory(ctxt, "New Doc failed");
7177 return;
7178 }
7179 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7180 }
7181 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7182 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7183
7184 ctxt->instate = XML_PARSER_DTD;
7185 ctxt->external = 1;
7186 SKIP_BLANKS;
7187 while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7188 GROW;
7189 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7190 xmlParseConditionalSections(ctxt);
7191 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7192 xmlParseMarkupDecl(ctxt);
7193 } else {
7194 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7195 xmlHaltParser(ctxt);
7196 return;
7197 }
7198 SKIP_BLANKS;
7199 SHRINK;
7200 }
7201
7202 if (RAW != 0) {
7203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7204 }
7205
7206}
7207
7208/**
7209 * xmlParseReference:
7210 * @ctxt: an XML parser context
7211 *
7212 * DEPRECATED: Internal function, don't use.
7213 *
7214 * parse and handle entity references in content, depending on the SAX
7215 * interface, this may end-up in a call to character() if this is a
7216 * CharRef, a predefined entity, if there is no reference() callback.
7217 * or if the parser was asked to switch to that mode.
7218 *
7219 * Always consumes '&'.
7220 *
7221 * [67] Reference ::= EntityRef | CharRef
7222 */
7223void
7224xmlParseReference(xmlParserCtxtPtr ctxt) {
7225 xmlEntityPtr ent;
7226 xmlChar *val;
7227 int was_checked;
7228 xmlNodePtr list = NULL;
7229 xmlParserErrors ret = XML_ERR_OK;
7230
7231
7232 if (RAW != '&')
7233 return;
7234
7235 /*
7236 * Simple case of a CharRef
7237 */
7238 if (NXT(1) == '#') {
7239 int i = 0;
7240 xmlChar out[16];
7241 int value = xmlParseCharRef(ctxt);
7242
7243 if (value == 0)
7244 return;
7245
7246 /*
7247 * Just encode the value in UTF-8
7248 */
7249 COPY_BUF(out, i, value);
7250 out[i] = 0;
7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252 (!ctxt->disableSAX))
7253 ctxt->sax->characters(ctxt->userData, out, i);
7254 return;
7255 }
7256
7257 /*
7258 * We are seeing an entity reference
7259 */
7260 ent = xmlParseEntityRef(ctxt);
7261 if (ent == NULL) return;
7262 if (!ctxt->wellFormed)
7263 return;
7264 was_checked = ent->flags & XML_ENT_PARSED;
7265
7266 /* special case of predefined entities */
7267 if ((ent->name == NULL) ||
7268 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7269 val = ent->content;
7270 if (val == NULL) return;
7271 /*
7272 * inline the entity.
7273 */
7274 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7275 (!ctxt->disableSAX))
7276 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7277 return;
7278 }
7279
7280 /*
7281 * The first reference to the entity trigger a parsing phase
7282 * where the ent->children is filled with the result from
7283 * the parsing.
7284 * Note: external parsed entities will not be loaded, it is not
7285 * required for a non-validating parser, unless the parsing option
7286 * of validating, or substituting entities were given. Doing so is
7287 * far more secure as the parser will only process data coming from
7288 * the document entity by default.
7289 *
7290 * FIXME: This doesn't work correctly since entities can be
7291 * expanded with different namespace declarations in scope.
7292 * For example:
7293 *
7294 * <!DOCTYPE doc [
7295 * <!ENTITY ent "<ns:elem/>">
7296 * ]>
7297 * <doc>
7298 * <decl1 xmlns:ns="urn:ns1">
7299 * &ent;
7300 * </decl1>
7301 * <decl2 xmlns:ns="urn:ns2">
7302 * &ent;
7303 * </decl2>
7304 * </doc>
7305 *
7306 * Proposed fix:
7307 *
7308 * - Remove the ent->owner optimization which tries to avoid the
7309 * initial copy of the entity. Always make entities own the
7310 * subtree.
7311 * - Ignore current namespace declarations when parsing the
7312 * entity. If a prefix can't be resolved, don't report an error
7313 * but mark it as unresolved.
7314 * - Try to resolve these prefixes when expanding the entity.
7315 * This will require a specialized version of xmlStaticCopyNode
7316 * which can also make use of the namespace hash table to avoid
7317 * quadratic behavior.
7318 *
7319 * Alternatively, we could simply reparse the entity on each
7320 * expansion like we already do with custom SAX callbacks.
7321 * External entity content should be cached in this case.
7322 */
7323 if (((ent->flags & XML_ENT_PARSED) == 0) &&
7324 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7325 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7326 unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7327
7328 /*
7329 * This is a bit hackish but this seems the best
7330 * way to make sure both SAX and DOM entity support
7331 * behaves okay.
7332 */
7333 void *user_data;
7334 if (ctxt->userData == ctxt)
7335 user_data = NULL;
7336 else
7337 user_data = ctxt->userData;
7338
7339 /* Avoid overflow as much as possible */
7340 ctxt->sizeentcopy = 0;
7341
7342 if (ent->flags & XML_ENT_EXPANDING) {
7343 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7344 xmlHaltParser(ctxt);
7345 return;
7346 }
7347
7348 ent->flags |= XML_ENT_EXPANDING;
7349
7350 /*
7351 * Check that this entity is well formed
7352 * 4.3.2: An internal general parsed entity is well-formed
7353 * if its replacement text matches the production labeled
7354 * content.
7355 */
7356 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7357 ctxt->depth++;
7358 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7359 user_data, &list);
7360 ctxt->depth--;
7361
7362 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7363 ctxt->depth++;
7364 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7365 user_data, ctxt->depth, ent->URI,
7366 ent->ExternalID, &list);
7367 ctxt->depth--;
7368 } else {
7369 ret = XML_ERR_ENTITY_PE_INTERNAL;
7370 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7371 "invalid entity type found\n", NULL);
7372 }
7373
7374 ent->flags &= ~XML_ENT_EXPANDING;
7375 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7376 ent->expandedSize = ctxt->sizeentcopy;
7377 if (ret == XML_ERR_ENTITY_LOOP) {
7378 xmlHaltParser(ctxt);
7379 xmlFreeNodeList(list);
7380 return;
7381 }
7382 if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7383 xmlFreeNodeList(list);
7384 return;
7385 }
7386
7387 if ((ret == XML_ERR_OK) && (list != NULL)) {
7388 ent->children = list;
7389 /*
7390 * Prune it directly in the generated document
7391 * except for single text nodes.
7392 */
7393 if ((ctxt->replaceEntities == 0) ||
7394 (ctxt->parseMode == XML_PARSE_READER) ||
7395 ((list->type == XML_TEXT_NODE) &&
7396 (list->next == NULL))) {
7397 ent->owner = 1;
7398 while (list != NULL) {
7399 list->parent = (xmlNodePtr) ent;
7400 if (list->doc != ent->doc)
7401 xmlSetTreeDoc(list, ent->doc);
7402 if (list->next == NULL)
7403 ent->last = list;
7404 list = list->next;
7405 }
7406 list = NULL;
7407 } else {
7408 ent->owner = 0;
7409 while (list != NULL) {
7410 list->parent = (xmlNodePtr) ctxt->node;
7411 list->doc = ctxt->myDoc;
7412 if (list->next == NULL)
7413 ent->last = list;
7414 list = list->next;
7415 }
7416 list = ent->children;
7417#ifdef LIBXML_LEGACY_ENABLED
7418 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419 xmlAddEntityReference(ent, list, NULL);
7420#endif /* LIBXML_LEGACY_ENABLED */
7421 }
7422 } else if ((ret != XML_ERR_OK) &&
7423 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7424 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7425 "Entity '%s' failed to parse\n", ent->name);
7426 if (ent->content != NULL)
7427 ent->content[0] = 0;
7428 } else if (list != NULL) {
7429 xmlFreeNodeList(list);
7430 list = NULL;
7431 }
7432
7433 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7434 was_checked = 0;
7435 }
7436
7437 /*
7438 * Now that the entity content has been gathered
7439 * provide it to the application, this can take different forms based
7440 * on the parsing modes.
7441 */
7442 if (ent->children == NULL) {
7443 /*
7444 * Probably running in SAX mode and the callbacks don't
7445 * build the entity content. So unless we already went
7446 * though parsing for first checking go though the entity
7447 * content to generate callbacks associated to the entity
7448 */
7449 if (was_checked != 0) {
7450 void *user_data;
7451 /*
7452 * This is a bit hackish but this seems the best
7453 * way to make sure both SAX and DOM entity support
7454 * behaves okay.
7455 */
7456 if (ctxt->userData == ctxt)
7457 user_data = NULL;
7458 else
7459 user_data = ctxt->userData;
7460
7461 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7462 ctxt->depth++;
7463 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7464 ent->content, user_data, NULL);
7465 ctxt->depth--;
7466 } else if (ent->etype ==
7467 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7468 unsigned long oldsizeentities = ctxt->sizeentities;
7469
7470 ctxt->depth++;
7471 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7472 ctxt->sax, user_data, ctxt->depth,
7473 ent->URI, ent->ExternalID, NULL);
7474 ctxt->depth--;
7475
7476 /* Undo the change to sizeentities */
7477 ctxt->sizeentities = oldsizeentities;
7478 } else {
7479 ret = XML_ERR_ENTITY_PE_INTERNAL;
7480 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7481 "invalid entity type found\n", NULL);
7482 }
7483 if (ret == XML_ERR_ENTITY_LOOP) {
7484 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7485 return;
7486 }
7487 if (xmlParserEntityCheck(ctxt, 0))
7488 return;
7489 }
7490 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7491 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7492 /*
7493 * Entity reference callback comes second, it's somewhat
7494 * superfluous but a compatibility to historical behaviour
7495 */
7496 ctxt->sax->reference(ctxt->userData, ent->name);
7497 }
7498 return;
7499 }
7500
7501 /*
7502 * We also check for amplification if entities aren't substituted.
7503 * They might be expanded later.
7504 */
7505 if ((was_checked != 0) &&
7506 (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7507 return;
7508
7509 /*
7510 * If we didn't get any children for the entity being built
7511 */
7512 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7513 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7514 /*
7515 * Create a node.
7516 */
7517 ctxt->sax->reference(ctxt->userData, ent->name);
7518 return;
7519 }
7520
7521 if (ctxt->replaceEntities) {
7522 /*
7523 * There is a problem on the handling of _private for entities
7524 * (bug 155816): Should we copy the content of the field from
7525 * the entity (possibly overwriting some value set by the user
7526 * when a copy is created), should we leave it alone, or should
7527 * we try to take care of different situations? The problem
7528 * is exacerbated by the usage of this field by the xmlReader.
7529 * To fix this bug, we look at _private on the created node
7530 * and, if it's NULL, we copy in whatever was in the entity.
7531 * If it's not NULL we leave it alone. This is somewhat of a
7532 * hack - maybe we should have further tests to determine
7533 * what to do.
7534 */
7535 if (ctxt->node != NULL) {
7536 /*
7537 * Seems we are generating the DOM content, do
7538 * a simple tree copy for all references except the first
7539 * In the first occurrence list contains the replacement.
7540 */
7541 if (((list == NULL) && (ent->owner == 0)) ||
7542 (ctxt->parseMode == XML_PARSE_READER)) {
7543 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7544
7545 /*
7546 * when operating on a reader, the entities definitions
7547 * are always owning the entities subtree.
7548 if (ctxt->parseMode == XML_PARSE_READER)
7549 ent->owner = 1;
7550 */
7551
7552 cur = ent->children;
7553 while (cur != NULL) {
7554 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7555 if (nw != NULL) {
7556 if (nw->_private == NULL)
7557 nw->_private = cur->_private;
7558 if (firstChild == NULL){
7559 firstChild = nw;
7560 }
7561 nw = xmlAddChild(ctxt->node, nw);
7562 }
7563 if (cur == ent->last) {
7564 /*
7565 * needed to detect some strange empty
7566 * node cases in the reader tests
7567 */
7568 if ((ctxt->parseMode == XML_PARSE_READER) &&
7569 (nw != NULL) &&
7570 (nw->type == XML_ELEMENT_NODE) &&
7571 (nw->children == NULL))
7572 nw->extra = 1;
7573
7574 break;
7575 }
7576 cur = cur->next;
7577 }
7578#ifdef LIBXML_LEGACY_ENABLED
7579 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7580 xmlAddEntityReference(ent, firstChild, nw);
7581#endif /* LIBXML_LEGACY_ENABLED */
7582 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7583 xmlNodePtr nw = NULL, cur, next, last,
7584 firstChild = NULL;
7585
7586 /*
7587 * Copy the entity child list and make it the new
7588 * entity child list. The goal is to make sure any
7589 * ID or REF referenced will be the one from the
7590 * document content and not the entity copy.
7591 */
7592 cur = ent->children;
7593 ent->children = NULL;
7594 last = ent->last;
7595 ent->last = NULL;
7596 while (cur != NULL) {
7597 next = cur->next;
7598 cur->next = NULL;
7599 cur->parent = NULL;
7600 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7601 if (nw != NULL) {
7602 if (nw->_private == NULL)
7603 nw->_private = cur->_private;
7604 if (firstChild == NULL){
7605 firstChild = cur;
7606 }
7607 xmlAddChild((xmlNodePtr) ent, nw);
7608 }
7609 xmlAddChild(ctxt->node, cur);
7610 if (cur == last)
7611 break;
7612 cur = next;
7613 }
7614 if (ent->owner == 0)
7615 ent->owner = 1;
7616#ifdef LIBXML_LEGACY_ENABLED
7617 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7618 xmlAddEntityReference(ent, firstChild, nw);
7619#endif /* LIBXML_LEGACY_ENABLED */
7620 } else {
7621 const xmlChar *nbktext;
7622
7623 /*
7624 * the name change is to avoid coalescing of the
7625 * node with a possible previous text one which
7626 * would make ent->children a dangling pointer
7627 */
7628 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7629 -1);
7630 if (ent->children->type == XML_TEXT_NODE)
7631 ent->children->name = nbktext;
7632 if ((ent->last != ent->children) &&
7633 (ent->last->type == XML_TEXT_NODE))
7634 ent->last->name = nbktext;
7635 xmlAddChildList(ctxt->node, ent->children);
7636 }
7637
7638 /*
7639 * This is to avoid a nasty side effect, see
7640 * characters() in SAX.c
7641 */
7642 ctxt->nodemem = 0;
7643 ctxt->nodelen = 0;
7644 return;
7645 }
7646 }
7647}
7648
7649/**
7650 * xmlParseEntityRef:
7651 * @ctxt: an XML parser context
7652 *
7653 * DEPRECATED: Internal function, don't use.
7654 *
7655 * Parse an entitiy reference. Always consumes '&'.
7656 *
7657 * [68] EntityRef ::= '&' Name ';'
7658 *
7659 * [ WFC: Entity Declared ]
7660 * In a document without any DTD, a document with only an internal DTD
7661 * subset which contains no parameter entity references, or a document
7662 * with "standalone='yes'", the Name given in the entity reference
7663 * must match that in an entity declaration, except that well-formed
7664 * documents need not declare any of the following entities: amp, lt,
7665 * gt, apos, quot. The declaration of a parameter entity must precede
7666 * any reference to it. Similarly, the declaration of a general entity
7667 * must precede any reference to it which appears in a default value in an
7668 * attribute-list declaration. Note that if entities are declared in the
7669 * external subset or in external parameter entities, a non-validating
7670 * processor is not obligated to read and process their declarations;
7671 * for such documents, the rule that an entity must be declared is a
7672 * well-formedness constraint only if standalone='yes'.
7673 *
7674 * [ WFC: Parsed Entity ]
7675 * An entity reference must not contain the name of an unparsed entity
7676 *
7677 * Returns the xmlEntityPtr if found, or NULL otherwise.
7678 */
7679xmlEntityPtr
7680xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7681 const xmlChar *name;
7682 xmlEntityPtr ent = NULL;
7683
7684 GROW;
7685 if (ctxt->instate == XML_PARSER_EOF)
7686 return(NULL);
7687
7688 if (RAW != '&')
7689 return(NULL);
7690 NEXT;
7691 name = xmlParseName(ctxt);
7692 if (name == NULL) {
7693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7694 "xmlParseEntityRef: no name\n");
7695 return(NULL);
7696 }
7697 if (RAW != ';') {
7698 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7699 return(NULL);
7700 }
7701 NEXT;
7702
7703 /*
7704 * Predefined entities override any extra definition
7705 */
7706 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7707 ent = xmlGetPredefinedEntity(name);
7708 if (ent != NULL)
7709 return(ent);
7710 }
7711
7712 /*
7713 * Ask first SAX for entity resolution, otherwise try the
7714 * entities which may have stored in the parser context.
7715 */
7716 if (ctxt->sax != NULL) {
7717 if (ctxt->sax->getEntity != NULL)
7718 ent = ctxt->sax->getEntity(ctxt->userData, name);
7719 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7720 (ctxt->options & XML_PARSE_OLDSAX))
7721 ent = xmlGetPredefinedEntity(name);
7722 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7723 (ctxt->userData==ctxt)) {
7724 ent = xmlSAX2GetEntity(ctxt, name);
7725 }
7726 }
7727 if (ctxt->instate == XML_PARSER_EOF)
7728 return(NULL);
7729 /*
7730 * [ WFC: Entity Declared ]
7731 * In a document without any DTD, a document with only an
7732 * internal DTD subset which contains no parameter entity
7733 * references, or a document with "standalone='yes'", the
7734 * Name given in the entity reference must match that in an
7735 * entity declaration, except that well-formed documents
7736 * need not declare any of the following entities: amp, lt,
7737 * gt, apos, quot.
7738 * The declaration of a parameter entity must precede any
7739 * reference to it.
7740 * Similarly, the declaration of a general entity must
7741 * precede any reference to it which appears in a default
7742 * value in an attribute-list declaration. Note that if
7743 * entities are declared in the external subset or in
7744 * external parameter entities, a non-validating processor
7745 * is not obligated to read and process their declarations;
7746 * for such documents, the rule that an entity must be
7747 * declared is a well-formedness constraint only if
7748 * standalone='yes'.
7749 */
7750 if (ent == NULL) {
7751 if ((ctxt->standalone == 1) ||
7752 ((ctxt->hasExternalSubset == 0) &&
7753 (ctxt->hasPErefs == 0))) {
7754 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7755 "Entity '%s' not defined\n", name);
7756 } else {
7757 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7758 "Entity '%s' not defined\n", name);
7759 if ((ctxt->inSubset == 0) &&
7760 (ctxt->sax != NULL) &&
7761 (ctxt->disableSAX == 0) &&
7762 (ctxt->sax->reference != NULL)) {
7763 ctxt->sax->reference(ctxt->userData, name);
7764 }
7765 }
7766 ctxt->valid = 0;
7767 }
7768
7769 /*
7770 * [ WFC: Parsed Entity ]
7771 * An entity reference must not contain the name of an
7772 * unparsed entity
7773 */
7774 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7775 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7776 "Entity reference to unparsed entity %s\n", name);
7777 }
7778
7779 /*
7780 * [ WFC: No External Entity References ]
7781 * Attribute values cannot contain direct or indirect
7782 * entity references to external entities.
7783 */
7784 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7785 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7786 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7787 "Attribute references external entity '%s'\n", name);
7788 }
7789 /*
7790 * [ WFC: No < in Attribute Values ]
7791 * The replacement text of any entity referred to directly or
7792 * indirectly in an attribute value (other than "&lt;") must
7793 * not contain a <.
7794 */
7795 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7796 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7797 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7798 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7799 ent->flags |= XML_ENT_CONTAINS_LT;
7800 ent->flags |= XML_ENT_CHECKED_LT;
7801 }
7802 if (ent->flags & XML_ENT_CONTAINS_LT)
7803 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7804 "'<' in entity '%s' is not allowed in attributes "
7805 "values\n", name);
7806 }
7807
7808 /*
7809 * Internal check, no parameter entities here ...
7810 */
7811 else {
7812 switch (ent->etype) {
7813 case XML_INTERNAL_PARAMETER_ENTITY:
7814 case XML_EXTERNAL_PARAMETER_ENTITY:
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7816 "Attempt to reference the parameter entity '%s'\n",
7817 name);
7818 break;
7819 default:
7820 break;
7821 }
7822 }
7823
7824 /*
7825 * [ WFC: No Recursion ]
7826 * A parsed entity must not contain a recursive reference
7827 * to itself, either directly or indirectly.
7828 * Done somewhere else
7829 */
7830 return(ent);
7831}
7832
7833/**
7834 * xmlParseStringEntityRef:
7835 * @ctxt: an XML parser context
7836 * @str: a pointer to an index in the string
7837 *
7838 * parse ENTITY references declarations, but this version parses it from
7839 * a string value.
7840 *
7841 * [68] EntityRef ::= '&' Name ';'
7842 *
7843 * [ WFC: Entity Declared ]
7844 * In a document without any DTD, a document with only an internal DTD
7845 * subset which contains no parameter entity references, or a document
7846 * with "standalone='yes'", the Name given in the entity reference
7847 * must match that in an entity declaration, except that well-formed
7848 * documents need not declare any of the following entities: amp, lt,
7849 * gt, apos, quot. The declaration of a parameter entity must precede
7850 * any reference to it. Similarly, the declaration of a general entity
7851 * must precede any reference to it which appears in a default value in an
7852 * attribute-list declaration. Note that if entities are declared in the
7853 * external subset or in external parameter entities, a non-validating
7854 * processor is not obligated to read and process their declarations;
7855 * for such documents, the rule that an entity must be declared is a
7856 * well-formedness constraint only if standalone='yes'.
7857 *
7858 * [ WFC: Parsed Entity ]
7859 * An entity reference must not contain the name of an unparsed entity
7860 *
7861 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7862 * is updated to the current location in the string.
7863 */
7864static xmlEntityPtr
7865xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7866 xmlChar *name;
7867 const xmlChar *ptr;
7868 xmlChar cur;
7869 xmlEntityPtr ent = NULL;
7870
7871 if ((str == NULL) || (*str == NULL))
7872 return(NULL);
7873 ptr = *str;
7874 cur = *ptr;
7875 if (cur != '&')
7876 return(NULL);
7877
7878 ptr++;
7879 name = xmlParseStringName(ctxt, &ptr);
7880 if (name == NULL) {
7881 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7882 "xmlParseStringEntityRef: no name\n");
7883 *str = ptr;
7884 return(NULL);
7885 }
7886 if (*ptr != ';') {
7887 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7888 xmlFree(name);
7889 *str = ptr;
7890 return(NULL);
7891 }
7892 ptr++;
7893
7894
7895 /*
7896 * Predefined entities override any extra definition
7897 */
7898 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7899 ent = xmlGetPredefinedEntity(name);
7900 if (ent != NULL) {
7901 xmlFree(name);
7902 *str = ptr;
7903 return(ent);
7904 }
7905 }
7906
7907 /*
7908 * Ask first SAX for entity resolution, otherwise try the
7909 * entities which may have stored in the parser context.
7910 */
7911 if (ctxt->sax != NULL) {
7912 if (ctxt->sax->getEntity != NULL)
7913 ent = ctxt->sax->getEntity(ctxt->userData, name);
7914 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7915 ent = xmlGetPredefinedEntity(name);
7916 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7917 ent = xmlSAX2GetEntity(ctxt, name);
7918 }
7919 }
7920 if (ctxt->instate == XML_PARSER_EOF) {
7921 xmlFree(name);
7922 return(NULL);
7923 }
7924
7925 /*
7926 * [ WFC: Entity Declared ]
7927 * In a document without any DTD, a document with only an
7928 * internal DTD subset which contains no parameter entity
7929 * references, or a document with "standalone='yes'", the
7930 * Name given in the entity reference must match that in an
7931 * entity declaration, except that well-formed documents
7932 * need not declare any of the following entities: amp, lt,
7933 * gt, apos, quot.
7934 * The declaration of a parameter entity must precede any
7935 * reference to it.
7936 * Similarly, the declaration of a general entity must
7937 * precede any reference to it which appears in a default
7938 * value in an attribute-list declaration. Note that if
7939 * entities are declared in the external subset or in
7940 * external parameter entities, a non-validating processor
7941 * is not obligated to read and process their declarations;
7942 * for such documents, the rule that an entity must be
7943 * declared is a well-formedness constraint only if
7944 * standalone='yes'.
7945 */
7946 if (ent == NULL) {
7947 if ((ctxt->standalone == 1) ||
7948 ((ctxt->hasExternalSubset == 0) &&
7949 (ctxt->hasPErefs == 0))) {
7950 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7951 "Entity '%s' not defined\n", name);
7952 } else {
7953 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7954 "Entity '%s' not defined\n",
7955 name);
7956 }
7957 /* TODO ? check regressions ctxt->valid = 0; */
7958 }
7959
7960 /*
7961 * [ WFC: Parsed Entity ]
7962 * An entity reference must not contain the name of an
7963 * unparsed entity
7964 */
7965 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7967 "Entity reference to unparsed entity %s\n", name);
7968 }
7969
7970 /*
7971 * [ WFC: No External Entity References ]
7972 * Attribute values cannot contain direct or indirect
7973 * entity references to external entities.
7974 */
7975 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7976 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7977 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7978 "Attribute references external entity '%s'\n", name);
7979 }
7980 /*
7981 * [ WFC: No < in Attribute Values ]
7982 * The replacement text of any entity referred to directly or
7983 * indirectly in an attribute value (other than "&lt;") must
7984 * not contain a <.
7985 */
7986 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7987 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7988 if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7989 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7990 ent->flags |= XML_ENT_CONTAINS_LT;
7991 ent->flags |= XML_ENT_CHECKED_LT;
7992 }
7993 if (ent->flags & XML_ENT_CONTAINS_LT)
7994 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7995 "'<' in entity '%s' is not allowed in attributes "
7996 "values\n", name);
7997 }
7998
7999 /*
8000 * Internal check, no parameter entities here ...
8001 */
8002 else {
8003 switch (ent->etype) {
8004 case XML_INTERNAL_PARAMETER_ENTITY:
8005 case XML_EXTERNAL_PARAMETER_ENTITY:
8006 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8007 "Attempt to reference the parameter entity '%s'\n",
8008 name);
8009 break;
8010 default:
8011 break;
8012 }
8013 }
8014
8015 /*
8016 * [ WFC: No Recursion ]
8017 * A parsed entity must not contain a recursive reference
8018 * to itself, either directly or indirectly.
8019 * Done somewhere else
8020 */
8021
8022 xmlFree(name);
8023 *str = ptr;
8024 return(ent);
8025}
8026
8027/**
8028 * xmlParsePEReference:
8029 * @ctxt: an XML parser context
8030 *
8031 * DEPRECATED: Internal function, don't use.
8032 *
8033 * Parse a parameter entity reference. Always consumes '%'.
8034 *
8035 * The entity content is handled directly by pushing it's content as
8036 * a new input stream.
8037 *
8038 * [69] PEReference ::= '%' Name ';'
8039 *
8040 * [ WFC: No Recursion ]
8041 * A parsed entity must not contain a recursive
8042 * reference to itself, either directly or indirectly.
8043 *
8044 * [ WFC: Entity Declared ]
8045 * In a document without any DTD, a document with only an internal DTD
8046 * subset which contains no parameter entity references, or a document
8047 * with "standalone='yes'", ... ... The declaration of a parameter
8048 * entity must precede any reference to it...
8049 *
8050 * [ VC: Entity Declared ]
8051 * In a document with an external subset or external parameter entities
8052 * with "standalone='no'", ... ... The declaration of a parameter entity
8053 * must precede any reference to it...
8054 *
8055 * [ WFC: In DTD ]
8056 * Parameter-entity references may only appear in the DTD.
8057 * NOTE: misleading but this is handled.
8058 */
8059void
8060xmlParsePEReference(xmlParserCtxtPtr ctxt)
8061{
8062 const xmlChar *name;
8063 xmlEntityPtr entity = NULL;
8064 xmlParserInputPtr input;
8065
8066 if (RAW != '%')
8067 return;
8068 NEXT;
8069 name = xmlParseName(ctxt);
8070 if (name == NULL) {
8071 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8072 return;
8073 }
8074 if (xmlParserDebugEntities)
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PEReference: %s\n", name);
8077 if (RAW != ';') {
8078 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8079 return;
8080 }
8081
8082 NEXT;
8083
8084 /*
8085 * Request the entity from SAX
8086 */
8087 if ((ctxt->sax != NULL) &&
8088 (ctxt->sax->getParameterEntity != NULL))
8089 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8090 if (ctxt->instate == XML_PARSER_EOF)
8091 return;
8092 if (entity == NULL) {
8093 /*
8094 * [ WFC: Entity Declared ]
8095 * In a document without any DTD, a document with only an
8096 * internal DTD subset which contains no parameter entity
8097 * references, or a document with "standalone='yes'", ...
8098 * ... The declaration of a parameter entity must precede
8099 * any reference to it...
8100 */
8101 if ((ctxt->standalone == 1) ||
8102 ((ctxt->hasExternalSubset == 0) &&
8103 (ctxt->hasPErefs == 0))) {
8104 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8105 "PEReference: %%%s; not found\n",
8106 name);
8107 } else {
8108 /*
8109 * [ VC: Entity Declared ]
8110 * In a document with an external subset or external
8111 * parameter entities with "standalone='no'", ...
8112 * ... The declaration of a parameter entity must
8113 * precede any reference to it...
8114 */
8115 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8116 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8117 "PEReference: %%%s; not found\n",
8118 name, NULL);
8119 } else
8120 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8121 "PEReference: %%%s; not found\n",
8122 name, NULL);
8123 ctxt->valid = 0;
8124 }
8125 } else {
8126 /*
8127 * Internal checking in case the entity quest barfed
8128 */
8129 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8130 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8131 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8132 "Internal: %%%s; is not a parameter entity\n",
8133 name, NULL);
8134 } else {
8135 unsigned long parentConsumed;
8136 xmlEntityPtr oldEnt;
8137
8138 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8139 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8140 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8141 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8142 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8143 (ctxt->replaceEntities == 0) &&
8144 (ctxt->validate == 0))
8145 return;
8146
8147 if (entity->flags & XML_ENT_EXPANDING) {
8148 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8149 xmlHaltParser(ctxt);
8150 return;
8151 }
8152
8153 /* Must be computed from old input before pushing new input. */
8154 parentConsumed = ctxt->input->parentConsumed;
8155 oldEnt = ctxt->input->entity;
8156 if ((oldEnt == NULL) ||
8157 ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8158 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8159 xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8160 xmlSaturatedAddSizeT(&parentConsumed,
8161 ctxt->input->cur - ctxt->input->base);
8162 }
8163
8164 input = xmlNewEntityInputStream(ctxt, entity);
8165 if (xmlPushInput(ctxt, input) < 0) {
8166 xmlFreeInputStream(input);
8167 return;
8168 }
8169
8170 entity->flags |= XML_ENT_EXPANDING;
8171
8172 input->parentConsumed = parentConsumed;
8173
8174 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8175 xmlDetectEncoding(ctxt);
8176
8177 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8178 (IS_BLANK_CH(NXT(5)))) {
8179 xmlParseTextDecl(ctxt);
8180 }
8181 }
8182 }
8183 }
8184 ctxt->hasPErefs = 1;
8185}
8186
8187/**
8188 * xmlLoadEntityContent:
8189 * @ctxt: an XML parser context
8190 * @entity: an unloaded system entity
8191 *
8192 * Load the original content of the given system entity from the
8193 * ExternalID/SystemID given. This is to be used for Included in Literal
8194 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8195 *
8196 * Returns 0 in case of success and -1 in case of failure
8197 */
8198static int
8199xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8200 xmlParserInputPtr oldinput, input = NULL;
8201 xmlParserInputPtr *oldinputTab;
8202 const xmlChar *oldencoding;
8203 xmlChar *content = NULL;
8204 size_t length, i;
8205 int oldinputNr, oldinputMax, oldprogressive;
8206 int ret = -1;
8207 int res;
8208
8209 if ((ctxt == NULL) || (entity == NULL) ||
8210 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8211 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8212 (entity->content != NULL)) {
8213 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8214 "xmlLoadEntityContent parameter error");
8215 return(-1);
8216 }
8217
8218 if (xmlParserDebugEntities)
8219 xmlGenericError(xmlGenericErrorContext,
8220 "Reading %s entity content input\n", entity->name);
8221
8222 input = xmlLoadExternalEntity((char *) entity->URI,
8223 (char *) entity->ExternalID, ctxt);
8224 if (input == NULL) {
8225 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8226 "xmlLoadEntityContent input error");
8227 return(-1);
8228 }
8229
8230 oldinput = ctxt->input;
8231 oldinputNr = ctxt->inputNr;
8232 oldinputMax = ctxt->inputMax;
8233 oldinputTab = ctxt->inputTab;
8234 oldencoding = ctxt->encoding;
8235 oldprogressive = ctxt->progressive;
8236
8237 ctxt->input = NULL;
8238 ctxt->inputNr = 0;
8239 ctxt->inputMax = 1;
8240 ctxt->encoding = NULL;
8241 ctxt->progressive = 0;
8242 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
8243 if (ctxt->inputTab == NULL) {
8244 xmlErrMemory(ctxt, NULL);
8245 xmlFreeInputStream(input);
8246 goto error;
8247 }
8248
8249 xmlBufResetInput(input->buf->buffer, input);
8250
8251 inputPush(ctxt, input);
8252
8253 xmlDetectEncoding(ctxt);
8254
8255 /*
8256 * Parse a possible text declaration first
8257 */
8258 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
8259 xmlParseTextDecl(ctxt);
8260 /*
8261 * An XML-1.0 document can't reference an entity not XML-1.0
8262 */
8263 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
8264 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
8265 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
8266 "Version mismatch between document and entity\n");
8267 }
8268 }
8269
8270 if (ctxt->instate == XML_PARSER_EOF)
8271 goto error;
8272
8273 length = input->cur - input->base;
8274 xmlBufShrink(input->buf->buffer, length);
8275 xmlSaturatedAdd(&ctxt->sizeentities, length);
8276
8277 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8278 ;
8279
8280 xmlBufResetInput(input->buf->buffer, input);
8281
8282 if (res < 0) {
8283 xmlFatalErr(ctxt, input->buf->error, NULL);
8284 goto error;
8285 }
8286
8287 length = xmlBufUse(input->buf->buffer);
8288 content = xmlBufDetach(input->buf->buffer);
8289
8290 if (length > INT_MAX) {
8291 xmlErrMemory(ctxt, NULL);
8292 goto error;
8293 }
8294
8295 for (i = 0; i < length; ) {
8296 int clen = length - i;
8297 int c = xmlGetUTF8Char(content + i, &clen);
8298
8299 if ((c < 0) || (!IS_CHAR(c))) {
8300 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8301 "xmlLoadEntityContent: invalid char value %d\n",
8302 content[i]);
8303 goto error;
8304 }
8305 i += clen;
8306 }
8307
8308 xmlSaturatedAdd(&ctxt->sizeentities, length);
8309 entity->content = content;
8310 entity->length = length;
8311 content = NULL;
8312 ret = 0;
8313
8314error:
8315 while (ctxt->inputNr > 0)
8316 xmlFreeInputStream(inputPop(ctxt));
8317 xmlFree(ctxt->inputTab);
8318 xmlFree((xmlChar *) ctxt->encoding);
8319
8320 ctxt->input = oldinput;
8321 ctxt->inputNr = oldinputNr;
8322 ctxt->inputMax = oldinputMax;
8323 ctxt->inputTab = oldinputTab;
8324 ctxt->encoding = oldencoding;
8325 ctxt->progressive = oldprogressive;
8326
8327 xmlFree(content);
8328
8329 return(ret);
8330}
8331
8332/**
8333 * xmlParseStringPEReference:
8334 * @ctxt: an XML parser context
8335 * @str: a pointer to an index in the string
8336 *
8337 * parse PEReference declarations
8338 *
8339 * [69] PEReference ::= '%' Name ';'
8340 *
8341 * [ WFC: No Recursion ]
8342 * A parsed entity must not contain a recursive
8343 * reference to itself, either directly or indirectly.
8344 *
8345 * [ WFC: Entity Declared ]
8346 * In a document without any DTD, a document with only an internal DTD
8347 * subset which contains no parameter entity references, or a document
8348 * with "standalone='yes'", ... ... The declaration of a parameter
8349 * entity must precede any reference to it...
8350 *
8351 * [ VC: Entity Declared ]
8352 * In a document with an external subset or external parameter entities
8353 * with "standalone='no'", ... ... The declaration of a parameter entity
8354 * must precede any reference to it...
8355 *
8356 * [ WFC: In DTD ]
8357 * Parameter-entity references may only appear in the DTD.
8358 * NOTE: misleading but this is handled.
8359 *
8360 * Returns the string of the entity content.
8361 * str is updated to the current value of the index
8362 */
8363static xmlEntityPtr
8364xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8365 const xmlChar *ptr;
8366 xmlChar cur;
8367 xmlChar *name;
8368 xmlEntityPtr entity = NULL;
8369
8370 if ((str == NULL) || (*str == NULL)) return(NULL);
8371 ptr = *str;
8372 cur = *ptr;
8373 if (cur != '%')
8374 return(NULL);
8375 ptr++;
8376 name = xmlParseStringName(ctxt, &ptr);
8377 if (name == NULL) {
8378 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8379 "xmlParseStringPEReference: no name\n");
8380 *str = ptr;
8381 return(NULL);
8382 }
8383 cur = *ptr;
8384 if (cur != ';') {
8385 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8386 xmlFree(name);
8387 *str = ptr;
8388 return(NULL);
8389 }
8390 ptr++;
8391
8392 /*
8393 * Request the entity from SAX
8394 */
8395 if ((ctxt->sax != NULL) &&
8396 (ctxt->sax->getParameterEntity != NULL))
8397 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8398 if (ctxt->instate == XML_PARSER_EOF) {
8399 xmlFree(name);
8400 *str = ptr;
8401 return(NULL);
8402 }
8403 if (entity == NULL) {
8404 /*
8405 * [ WFC: Entity Declared ]
8406 * In a document without any DTD, a document with only an
8407 * internal DTD subset which contains no parameter entity
8408 * references, or a document with "standalone='yes'", ...
8409 * ... The declaration of a parameter entity must precede
8410 * any reference to it...
8411 */
8412 if ((ctxt->standalone == 1) ||
8413 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8415 "PEReference: %%%s; not found\n", name);
8416 } else {
8417 /*
8418 * [ VC: Entity Declared ]
8419 * In a document with an external subset or external
8420 * parameter entities with "standalone='no'", ...
8421 * ... The declaration of a parameter entity must
8422 * precede any reference to it...
8423 */
8424 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8425 "PEReference: %%%s; not found\n",
8426 name, NULL);
8427 ctxt->valid = 0;
8428 }
8429 } else {
8430 /*
8431 * Internal checking in case the entity quest barfed
8432 */
8433 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8434 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8435 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8436 "%%%s; is not a parameter entity\n",
8437 name, NULL);
8438 }
8439 }
8440 ctxt->hasPErefs = 1;
8441 xmlFree(name);
8442 *str = ptr;
8443 return(entity);
8444}
8445
8446/**
8447 * xmlParseDocTypeDecl:
8448 * @ctxt: an XML parser context
8449 *
8450 * DEPRECATED: Internal function, don't use.
8451 *
8452 * parse a DOCTYPE declaration
8453 *
8454 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8455 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8456 *
8457 * [ VC: Root Element Type ]
8458 * The Name in the document type declaration must match the element
8459 * type of the root element.
8460 */
8461
8462void
8463xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8464 const xmlChar *name = NULL;
8465 xmlChar *ExternalID = NULL;
8466 xmlChar *URI = NULL;
8467
8468 /*
8469 * We know that '<!DOCTYPE' has been detected.
8470 */
8471 SKIP(9);
8472
8473 SKIP_BLANKS;
8474
8475 /*
8476 * Parse the DOCTYPE name.
8477 */
8478 name = xmlParseName(ctxt);
8479 if (name == NULL) {
8480 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8481 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8482 }
8483 ctxt->intSubName = name;
8484
8485 SKIP_BLANKS;
8486
8487 /*
8488 * Check for SystemID and ExternalID
8489 */
8490 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8491
8492 if ((URI != NULL) || (ExternalID != NULL)) {
8493 ctxt->hasExternalSubset = 1;
8494 }
8495 ctxt->extSubURI = URI;
8496 ctxt->extSubSystem = ExternalID;
8497
8498 SKIP_BLANKS;
8499
8500 /*
8501 * Create and update the internal subset.
8502 */
8503 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8504 (!ctxt->disableSAX))
8505 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8506 if (ctxt->instate == XML_PARSER_EOF)
8507 return;
8508
8509 /*
8510 * Is there any internal subset declarations ?
8511 * they are handled separately in xmlParseInternalSubset()
8512 */
8513 if (RAW == '[')
8514 return;
8515
8516 /*
8517 * We should be at the end of the DOCTYPE declaration.
8518 */
8519 if (RAW != '>') {
8520 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8521 }
8522 NEXT;
8523}
8524
8525/**
8526 * xmlParseInternalSubset:
8527 * @ctxt: an XML parser context
8528 *
8529 * parse the internal subset declaration
8530 *
8531 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8532 */
8533
8534static void
8535xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8536 /*
8537 * Is there any DTD definition ?
8538 */
8539 if (RAW == '[') {
8540 int baseInputNr = ctxt->inputNr;
8541 ctxt->instate = XML_PARSER_DTD;
8542 NEXT;
8543 /*
8544 * Parse the succession of Markup declarations and
8545 * PEReferences.
8546 * Subsequence (markupdecl | PEReference | S)*
8547 */
8548 SKIP_BLANKS;
8549 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8550 (ctxt->instate != XML_PARSER_EOF)) {
8551
8552 /*
8553 * Conditional sections are allowed from external entities included
8554 * by PE References in the internal subset.
8555 */
8556 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8557 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8558 xmlParseConditionalSections(ctxt);
8559 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8560 xmlParseMarkupDecl(ctxt);
8561 } else if (RAW == '%') {
8562 xmlParsePEReference(ctxt);
8563 } else {
8564 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8565 "xmlParseInternalSubset: error detected in"
8566 " Markup declaration\n");
8567 xmlHaltParser(ctxt);
8568 return;
8569 }
8570 SKIP_BLANKS;
8571 SHRINK;
8572 GROW;
8573 }
8574 if (RAW == ']') {
8575 NEXT;
8576 SKIP_BLANKS;
8577 }
8578 }
8579
8580 /*
8581 * We should be at the end of the DOCTYPE declaration.
8582 */
8583 if (RAW != '>') {
8584 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8585 return;
8586 }
8587 NEXT;
8588}
8589
8590#ifdef LIBXML_SAX1_ENABLED
8591/**
8592 * xmlParseAttribute:
8593 * @ctxt: an XML parser context
8594 * @value: a xmlChar ** used to store the value of the attribute
8595 *
8596 * DEPRECATED: Internal function, don't use.
8597 *
8598 * parse an attribute
8599 *
8600 * [41] Attribute ::= Name Eq AttValue
8601 *
8602 * [ WFC: No External Entity References ]
8603 * Attribute values cannot contain direct or indirect entity references
8604 * to external entities.
8605 *
8606 * [ WFC: No < in Attribute Values ]
8607 * The replacement text of any entity referred to directly or indirectly in
8608 * an attribute value (other than "&lt;") must not contain a <.
8609 *
8610 * [ VC: Attribute Value Type ]
8611 * The attribute must have been declared; the value must be of the type
8612 * declared for it.
8613 *
8614 * [25] Eq ::= S? '=' S?
8615 *
8616 * With namespace:
8617 *
8618 * [NS 11] Attribute ::= QName Eq AttValue
8619 *
8620 * Also the case QName == xmlns:??? is handled independently as a namespace
8621 * definition.
8622 *
8623 * Returns the attribute name, and the value in *value.
8624 */
8625
8626const xmlChar *
8627xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8628 const xmlChar *name;
8629 xmlChar *val;
8630
8631 *value = NULL;
8632 GROW;
8633 name = xmlParseName(ctxt);
8634 if (name == NULL) {
8635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8636 "error parsing attribute name\n");
8637 return(NULL);
8638 }
8639
8640 /*
8641 * read the value
8642 */
8643 SKIP_BLANKS;
8644 if (RAW == '=') {
8645 NEXT;
8646 SKIP_BLANKS;
8647 val = xmlParseAttValue(ctxt);
8648 ctxt->instate = XML_PARSER_CONTENT;
8649 } else {
8650 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8651 "Specification mandates value for attribute %s\n", name);
8652 return(name);
8653 }
8654
8655 /*
8656 * Check that xml:lang conforms to the specification
8657 * No more registered as an error, just generate a warning now
8658 * since this was deprecated in XML second edition
8659 */
8660 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8661 if (!xmlCheckLanguageID(val)) {
8662 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8663 "Malformed value for xml:lang : %s\n",
8664 val, NULL);
8665 }
8666 }
8667
8668 /*
8669 * Check that xml:space conforms to the specification
8670 */
8671 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8672 if (xmlStrEqual(val, BAD_CAST "default"))
8673 *(ctxt->space) = 0;
8674 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8675 *(ctxt->space) = 1;
8676 else {
8677 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8678"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8679 val, NULL);
8680 }
8681 }
8682
8683 *value = val;
8684 return(name);
8685}
8686
8687/**
8688 * xmlParseStartTag:
8689 * @ctxt: an XML parser context
8690 *
8691 * DEPRECATED: Internal function, don't use.
8692 *
8693 * Parse a start tag. Always consumes '<'.
8694 *
8695 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8696 *
8697 * [ WFC: Unique Att Spec ]
8698 * No attribute name may appear more than once in the same start-tag or
8699 * empty-element tag.
8700 *
8701 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8702 *
8703 * [ WFC: Unique Att Spec ]
8704 * No attribute name may appear more than once in the same start-tag or
8705 * empty-element tag.
8706 *
8707 * With namespace:
8708 *
8709 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8710 *
8711 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8712 *
8713 * Returns the element name parsed
8714 */
8715
8716const xmlChar *
8717xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8718 const xmlChar *name;
8719 const xmlChar *attname;
8720 xmlChar *attvalue;
8721 const xmlChar **atts = ctxt->atts;
8722 int nbatts = 0;
8723 int maxatts = ctxt->maxatts;
8724 int i;
8725
8726 if (RAW != '<') return(NULL);
8727 NEXT1;
8728
8729 name = xmlParseName(ctxt);
8730 if (name == NULL) {
8731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8732 "xmlParseStartTag: invalid element name\n");
8733 return(NULL);
8734 }
8735
8736 /*
8737 * Now parse the attributes, it ends up with the ending
8738 *
8739 * (S Attribute)* S?
8740 */
8741 SKIP_BLANKS;
8742 GROW;
8743
8744 while (((RAW != '>') &&
8745 ((RAW != '/') || (NXT(1) != '>')) &&
8746 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8747 attname = xmlParseAttribute(ctxt, &attvalue);
8748 if (attname == NULL) {
8749 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8750 "xmlParseStartTag: problem parsing attributes\n");
8751 break;
8752 }
8753 if (attvalue != NULL) {
8754 /*
8755 * [ WFC: Unique Att Spec ]
8756 * No attribute name may appear more than once in the same
8757 * start-tag or empty-element tag.
8758 */
8759 for (i = 0; i < nbatts;i += 2) {
8760 if (xmlStrEqual(atts[i], attname)) {
8761 xmlErrAttributeDup(ctxt, NULL, attname);
8762 xmlFree(attvalue);
8763 goto failed;
8764 }
8765 }
8766 /*
8767 * Add the pair to atts
8768 */
8769 if (atts == NULL) {
8770 maxatts = 22; /* allow for 10 attrs by default */
8771 atts = (const xmlChar **)
8772 xmlMalloc(maxatts * sizeof(xmlChar *));
8773 if (atts == NULL) {
8774 xmlErrMemory(ctxt, NULL);
8775 if (attvalue != NULL)
8776 xmlFree(attvalue);
8777 goto failed;
8778 }
8779 ctxt->atts = atts;
8780 ctxt->maxatts = maxatts;
8781 } else if (nbatts + 4 > maxatts) {
8782 const xmlChar **n;
8783
8784 maxatts *= 2;
8785 n = (const xmlChar **) xmlRealloc((void *) atts,
8786 maxatts * sizeof(const xmlChar *));
8787 if (n == NULL) {
8788 xmlErrMemory(ctxt, NULL);
8789 if (attvalue != NULL)
8790 xmlFree(attvalue);
8791 goto failed;
8792 }
8793 atts = n;
8794 ctxt->atts = atts;
8795 ctxt->maxatts = maxatts;
8796 }
8797 atts[nbatts++] = attname;
8798 atts[nbatts++] = attvalue;
8799 atts[nbatts] = NULL;
8800 atts[nbatts + 1] = NULL;
8801 } else {
8802 if (attvalue != NULL)
8803 xmlFree(attvalue);
8804 }
8805
8806failed:
8807
8808 GROW
8809 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8810 break;
8811 if (SKIP_BLANKS == 0) {
8812 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8813 "attributes construct error\n");
8814 }
8815 SHRINK;
8816 GROW;
8817 }
8818
8819 /*
8820 * SAX: Start of Element !
8821 */
8822 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8823 (!ctxt->disableSAX)) {
8824 if (nbatts > 0)
8825 ctxt->sax->startElement(ctxt->userData, name, atts);
8826 else
8827 ctxt->sax->startElement(ctxt->userData, name, NULL);
8828 }
8829
8830 if (atts != NULL) {
8831 /* Free only the content strings */
8832 for (i = 1;i < nbatts;i+=2)
8833 if (atts[i] != NULL)
8834 xmlFree((xmlChar *) atts[i]);
8835 }
8836 return(name);
8837}
8838
8839/**
8840 * xmlParseEndTag1:
8841 * @ctxt: an XML parser context
8842 * @line: line of the start tag
8843 * @nsNr: number of namespaces on the start tag
8844 *
8845 * Parse an end tag. Always consumes '</'.
8846 *
8847 * [42] ETag ::= '</' Name S? '>'
8848 *
8849 * With namespace
8850 *
8851 * [NS 9] ETag ::= '</' QName S? '>'
8852 */
8853
8854static void
8855xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8856 const xmlChar *name;
8857
8858 GROW;
8859 if ((RAW != '<') || (NXT(1) != '/')) {
8860 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8861 "xmlParseEndTag: '</' not found\n");
8862 return;
8863 }
8864 SKIP(2);
8865
8866 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8867
8868 /*
8869 * We should definitely be at the ending "S? '>'" part
8870 */
8871 GROW;
8872 SKIP_BLANKS;
8873 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8874 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8875 } else
8876 NEXT1;
8877
8878 /*
8879 * [ WFC: Element Type Match ]
8880 * The Name in an element's end-tag must match the element type in the
8881 * start-tag.
8882 *
8883 */
8884 if (name != (xmlChar*)1) {
8885 if (name == NULL) name = BAD_CAST "unparsable";
8886 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8887 "Opening and ending tag mismatch: %s line %d and %s\n",
8888 ctxt->name, line, name);
8889 }
8890
8891 /*
8892 * SAX: End of Tag
8893 */
8894 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8895 (!ctxt->disableSAX))
8896 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8897
8898 namePop(ctxt);
8899 spacePop(ctxt);
8900 return;
8901}
8902
8903/**
8904 * xmlParseEndTag:
8905 * @ctxt: an XML parser context
8906 *
8907 * DEPRECATED: Internal function, don't use.
8908 *
8909 * parse an end of tag
8910 *
8911 * [42] ETag ::= '</' Name S? '>'
8912 *
8913 * With namespace
8914 *
8915 * [NS 9] ETag ::= '</' QName S? '>'
8916 */
8917
8918void
8919xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8920 xmlParseEndTag1(ctxt, 0);
8921}
8922#endif /* LIBXML_SAX1_ENABLED */
8923
8924/************************************************************************
8925 * *
8926 * SAX 2 specific operations *
8927 * *
8928 ************************************************************************/
8929
8930/**
8931 * xmlParseQNameHashed:
8932 * @ctxt: an XML parser context
8933 * @prefix: pointer to store the prefix part
8934 *
8935 * parse an XML Namespace QName
8936 *
8937 * [6] QName ::= (Prefix ':')? LocalPart
8938 * [7] Prefix ::= NCName
8939 * [8] LocalPart ::= NCName
8940 *
8941 * Returns the Name parsed or NULL
8942 */
8943
8944static xmlHashedString
8945xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8946 xmlHashedString l, p;
8947 int start, isNCName = 0;
8948
8949 l.name = NULL;
8950 p.name = NULL;
8951
8952 GROW;
8953 if (ctxt->instate == XML_PARSER_EOF)
8954 return(l);
8955 start = CUR_PTR - BASE_PTR;
8956
8957 l = xmlParseNCName(ctxt);
8958 if (l.name != NULL) {
8959 isNCName = 1;
8960 if (CUR == ':') {
8961 NEXT;
8962 p = l;
8963 l = xmlParseNCName(ctxt);
8964 }
8965 }
8966 if ((l.name == NULL) || (CUR == ':')) {
8967 xmlChar *tmp;
8968
8969 l.name = NULL;
8970 p.name = NULL;
8971 if (ctxt->instate == XML_PARSER_EOF)
8972 return(l);
8973 if ((isNCName == 0) && (CUR != ':'))
8974 return(l);
8975 tmp = xmlParseNmtoken(ctxt);
8976 if (tmp != NULL)
8977 xmlFree(tmp);
8978 if (ctxt->instate == XML_PARSER_EOF)
8979 return(l);
8980 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8981 CUR_PTR - (BASE_PTR + start));
8982 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8983 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8984 }
8985
8986 *prefix = p;
8987 return(l);
8988}
8989
8990/**
8991 * xmlParseQName:
8992 * @ctxt: an XML parser context
8993 * @prefix: pointer to store the prefix part
8994 *
8995 * parse an XML Namespace QName
8996 *
8997 * [6] QName ::= (Prefix ':')? LocalPart
8998 * [7] Prefix ::= NCName
8999 * [8] LocalPart ::= NCName
9000 *
9001 * Returns the Name parsed or NULL
9002 */
9003
9004static const xmlChar *
9005xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
9006 xmlHashedString n, p;
9007
9008 n = xmlParseQNameHashed(ctxt, &p);
9009 if (n.name == NULL)
9010 return(NULL);
9011 *prefix = p.name;
9012 return(n.name);
9013}
9014
9015/**
9016 * xmlParseQNameAndCompare:
9017 * @ctxt: an XML parser context
9018 * @name: the localname
9019 * @prefix: the prefix, if any.
9020 *
9021 * parse an XML name and compares for match
9022 * (specialized for endtag parsing)
9023 *
9024 * Returns NULL for an illegal name, (xmlChar*) 1 for success
9025 * and the name for mismatch
9026 */
9027
9028static const xmlChar *
9029xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
9030 xmlChar const *prefix) {
9031 const xmlChar *cmp;
9032 const xmlChar *in;
9033 const xmlChar *ret;
9034 const xmlChar *prefix2;
9035
9036 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
9037
9038 GROW;
9039 in = ctxt->input->cur;
9040
9041 cmp = prefix;
9042 while (*in != 0 && *in == *cmp) {
9043 ++in;
9044 ++cmp;
9045 }
9046 if ((*cmp == 0) && (*in == ':')) {
9047 in++;
9048 cmp = name;
9049 while (*in != 0 && *in == *cmp) {
9050 ++in;
9051 ++cmp;
9052 }
9053 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9054 /* success */
9055 ctxt->input->col += in - ctxt->input->cur;
9056 ctxt->input->cur = in;
9057 return((const xmlChar*) 1);
9058 }
9059 }
9060 /*
9061 * all strings coms from the dictionary, equality can be done directly
9062 */
9063 ret = xmlParseQName (ctxt, &prefix2);
9064 if (ret == NULL)
9065 return(NULL);
9066 if ((ret == name) && (prefix == prefix2))
9067 return((const xmlChar*) 1);
9068 return ret;
9069}
9070
9071/**
9072 * xmlParseAttValueInternal:
9073 * @ctxt: an XML parser context
9074 * @len: attribute len result
9075 * @alloc: whether the attribute was reallocated as a new string
9076 * @normalize: if 1 then further non-CDATA normalization must be done
9077 *
9078 * parse a value for an attribute.
9079 * NOTE: if no normalization is needed, the routine will return pointers
9080 * directly from the data buffer.
9081 *
9082 * 3.3.3 Attribute-Value Normalization:
9083 * Before the value of an attribute is passed to the application or
9084 * checked for validity, the XML processor must normalize it as follows:
9085 * - a character reference is processed by appending the referenced
9086 * character to the attribute value
9087 * - an entity reference is processed by recursively processing the
9088 * replacement text of the entity
9089 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9090 * appending #x20 to the normalized value, except that only a single
9091 * #x20 is appended for a "#xD#xA" sequence that is part of an external
9092 * parsed entity or the literal entity value of an internal parsed entity
9093 * - other characters are processed by appending them to the normalized value
9094 * If the declared value is not CDATA, then the XML processor must further
9095 * process the normalized attribute value by discarding any leading and
9096 * trailing space (#x20) characters, and by replacing sequences of space
9097 * (#x20) characters by a single space (#x20) character.
9098 * All attributes for which no declaration has been read should be treated
9099 * by a non-validating parser as if declared CDATA.
9100 *
9101 * Returns the AttValue parsed or NULL. The value has to be freed by the
9102 * caller if it was copied, this can be detected by val[*len] == 0.
9103 */
9104
9105#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9106 const xmlChar *oldbase = ctxt->input->base;\
9107 GROW;\
9108 if (ctxt->instate == XML_PARSER_EOF)\
9109 return(NULL);\
9110 if (oldbase != ctxt->input->base) {\
9111 ptrdiff_t delta = ctxt->input->base - oldbase;\
9112 start = start + delta;\
9113 in = in + delta;\
9114 }\
9115 end = ctxt->input->end;
9116
9117static xmlChar *
9118xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9119 int normalize)
9120{
9121 xmlChar limit = 0;
9122 const xmlChar *in = NULL, *start, *end, *last;
9123 xmlChar *ret = NULL;
9124 int line, col;
9125 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9126 XML_MAX_HUGE_LENGTH :
9127 XML_MAX_TEXT_LENGTH;
9128
9129 GROW;
9130 in = (xmlChar *) CUR_PTR;
9131 line = ctxt->input->line;
9132 col = ctxt->input->col;
9133 if (*in != '"' && *in != '\'') {
9134 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9135 return (NULL);
9136 }
9137 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9138
9139 /*
9140 * try to handle in this routine the most common case where no
9141 * allocation of a new string is required and where content is
9142 * pure ASCII.
9143 */
9144 limit = *in++;
9145 col++;
9146 end = ctxt->input->end;
9147 start = in;
9148 if (in >= end) {
9149 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9150 }
9151 if (normalize) {
9152 /*
9153 * Skip any leading spaces
9154 */
9155 while ((in < end) && (*in != limit) &&
9156 ((*in == 0x20) || (*in == 0x9) ||
9157 (*in == 0xA) || (*in == 0xD))) {
9158 if (*in == 0xA) {
9159 line++; col = 1;
9160 } else {
9161 col++;
9162 }
9163 in++;
9164 start = in;
9165 if (in >= end) {
9166 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9167 if ((in - start) > maxLength) {
9168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9169 "AttValue length too long\n");
9170 return(NULL);
9171 }
9172 }
9173 }
9174 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9175 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9176 col++;
9177 if ((*in++ == 0x20) && (*in == 0x20)) break;
9178 if (in >= end) {
9179 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9180 if ((in - start) > maxLength) {
9181 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9182 "AttValue length too long\n");
9183 return(NULL);
9184 }
9185 }
9186 }
9187 last = in;
9188 /*
9189 * skip the trailing blanks
9190 */
9191 while ((last[-1] == 0x20) && (last > start)) last--;
9192 while ((in < end) && (*in != limit) &&
9193 ((*in == 0x20) || (*in == 0x9) ||
9194 (*in == 0xA) || (*in == 0xD))) {
9195 if (*in == 0xA) {
9196 line++, col = 1;
9197 } else {
9198 col++;
9199 }
9200 in++;
9201 if (in >= end) {
9202 const xmlChar *oldbase = ctxt->input->base;
9203 GROW;
9204 if (ctxt->instate == XML_PARSER_EOF)
9205 return(NULL);
9206 if (oldbase != ctxt->input->base) {
9207 ptrdiff_t delta = ctxt->input->base - oldbase;
9208 start = start + delta;
9209 in = in + delta;
9210 last = last + delta;
9211 }
9212 end = ctxt->input->end;
9213 if ((in - start) > maxLength) {
9214 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9215 "AttValue length too long\n");
9216 return(NULL);
9217 }
9218 }
9219 }
9220 if ((in - start) > maxLength) {
9221 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9222 "AttValue length too long\n");
9223 return(NULL);
9224 }
9225 if (*in != limit) goto need_complex;
9226 } else {
9227 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9228 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9229 in++;
9230 col++;
9231 if (in >= end) {
9232 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9233 if ((in - start) > maxLength) {
9234 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9235 "AttValue length too long\n");
9236 return(NULL);
9237 }
9238 }
9239 }
9240 last = in;
9241 if ((in - start) > maxLength) {
9242 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9243 "AttValue length too long\n");
9244 return(NULL);
9245 }
9246 if (*in != limit) goto need_complex;
9247 }
9248 in++;
9249 col++;
9250 if (len != NULL) {
9251 if (alloc) *alloc = 0;
9252 *len = last - start;
9253 ret = (xmlChar *) start;
9254 } else {
9255 if (alloc) *alloc = 1;
9256 ret = xmlStrndup(start, last - start);
9257 }
9258 CUR_PTR = in;
9259 ctxt->input->line = line;
9260 ctxt->input->col = col;
9261 return ret;
9262need_complex:
9263 if (alloc) *alloc = 1;
9264 return xmlParseAttValueComplex(ctxt, len, normalize);
9265}
9266
9267/**
9268 * xmlParseAttribute2:
9269 * @ctxt: an XML parser context
9270 * @pref: the element prefix
9271 * @elem: the element name
9272 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9273 * @value: a xmlChar ** used to store the value of the attribute
9274 * @len: an int * to save the length of the attribute
9275 * @alloc: an int * to indicate if the attribute was allocated
9276 *
9277 * parse an attribute in the new SAX2 framework.
9278 *
9279 * Returns the attribute name, and the value in *value, .
9280 */
9281
9282static xmlHashedString
9283xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9284 const xmlChar * pref, const xmlChar * elem,
9285 xmlHashedString * hprefix, xmlChar ** value,
9286 int *len, int *alloc)
9287{
9288 xmlHashedString hname;
9289 const xmlChar *prefix, *name;
9290 xmlChar *val, *internal_val = NULL;
9291 int normalize = 0;
9292
9293 *value = NULL;
9294 GROW;
9295 hname = xmlParseQNameHashed(ctxt, hprefix);
9296 if (hname.name == NULL) {
9297 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9298 "error parsing attribute name\n");
9299 return(hname);
9300 }
9301 name = hname.name;
9302 if (hprefix->name != NULL)
9303 prefix = hprefix->name;
9304 else
9305 prefix = NULL;
9306
9307 /*
9308 * get the type if needed
9309 */
9310 if (ctxt->attsSpecial != NULL) {
9311 int type;
9312
9313 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9314 pref, elem,
9315 prefix, name);
9316 if (type != 0)
9317 normalize = 1;
9318 }
9319
9320 /*
9321 * read the value
9322 */
9323 SKIP_BLANKS;
9324 if (RAW == '=') {
9325 NEXT;
9326 SKIP_BLANKS;
9327 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9328 if (val == NULL) {
9329 hname.name = NULL;
9330 return(hname);
9331 }
9332 if (normalize) {
9333 /*
9334 * Sometimes a second normalisation pass for spaces is needed
9335 * but that only happens if charrefs or entities references
9336 * have been used in the attribute value, i.e. the attribute
9337 * value have been extracted in an allocated string already.
9338 */
9339 if (*alloc) {
9340 const xmlChar *val2;
9341
9342 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9343 if ((val2 != NULL) && (val2 != val)) {
9344 xmlFree(val);
9345 val = (xmlChar *) val2;
9346 }
9347 }
9348 }
9349 ctxt->instate = XML_PARSER_CONTENT;
9350 } else {
9351 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9352 "Specification mandates value for attribute %s\n",
9353 name);
9354 return(hname);
9355 }
9356
9357 if (prefix == ctxt->str_xml) {
9358 /*
9359 * Check that xml:lang conforms to the specification
9360 * No more registered as an error, just generate a warning now
9361 * since this was deprecated in XML second edition
9362 */
9363 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9364 internal_val = xmlStrndup(val, *len);
9365 if (!xmlCheckLanguageID(internal_val)) {
9366 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9367 "Malformed value for xml:lang : %s\n",
9368 internal_val, NULL);
9369 }
9370 }
9371
9372 /*
9373 * Check that xml:space conforms to the specification
9374 */
9375 if (xmlStrEqual(name, BAD_CAST "space")) {
9376 internal_val = xmlStrndup(val, *len);
9377 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9378 *(ctxt->space) = 0;
9379 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9380 *(ctxt->space) = 1;
9381 else {
9382 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9383 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9384 internal_val, NULL);
9385 }
9386 }
9387 if (internal_val) {
9388 xmlFree(internal_val);
9389 }
9390 }
9391
9392 *value = val;
9393 return (hname);
9394}
9395
9396/**
9397 * xmlAttrHashInsert:
9398 * @ctxt: parser context
9399 * @size: size of the hash table
9400 * @name: attribute name
9401 * @uri: namespace uri
9402 * @hashValue: combined hash value of name and uri
9403 * @aindex: attribute index (this is a multiple of 5)
9404 *
9405 * Inserts a new attribute into the hash table.
9406 *
9407 * Returns INT_MAX if no existing attribute was found, the attribute
9408 * index if an attribute was found, -1 if a memory allocation failed.
9409 */
9410static int
9411xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
9412 const xmlChar *uri, unsigned hashValue, int aindex) {
9413 xmlAttrHashBucket *table = ctxt->attrHash;
9414 xmlAttrHashBucket *bucket;
9415 unsigned hindex;
9416
9417 hindex = hashValue & (size - 1);
9418 bucket = &table[hindex];
9419
9420 while (bucket->index >= 0) {
9421 const xmlChar **atts = &ctxt->atts[bucket->index];
9422
9423 if (name == atts[0]) {
9424 int nsIndex = (int) (ptrdiff_t) atts[2];
9425
9426 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
9427 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
9428 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
9429 return(bucket->index);
9430 }
9431
9432 hindex++;
9433 bucket++;
9434 if (hindex >= size) {
9435 hindex = 0;
9436 bucket = table;
9437 }
9438 }
9439
9440 bucket->index = aindex;
9441
9442 return(INT_MAX);
9443}
9444
9445/**
9446 * xmlParseStartTag2:
9447 * @ctxt: an XML parser context
9448 *
9449 * Parse a start tag. Always consumes '<'.
9450 *
9451 * This routine is called when running SAX2 parsing
9452 *
9453 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9454 *
9455 * [ WFC: Unique Att Spec ]
9456 * No attribute name may appear more than once in the same start-tag or
9457 * empty-element tag.
9458 *
9459 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9460 *
9461 * [ WFC: Unique Att Spec ]
9462 * No attribute name may appear more than once in the same start-tag or
9463 * empty-element tag.
9464 *
9465 * With namespace:
9466 *
9467 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9468 *
9469 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9470 *
9471 * Returns the element name parsed
9472 */
9473
9474static const xmlChar *
9475xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9476 const xmlChar **URI, int *nbNsPtr) {
9477 xmlHashedString hlocalname;
9478 xmlHashedString hprefix;
9479 xmlHashedString hattname;
9480 xmlHashedString haprefix;
9481 const xmlChar *localname;
9482 const xmlChar *prefix;
9483 const xmlChar *attname;
9484 const xmlChar *aprefix;
9485 const xmlChar *uri;
9486 xmlChar *attvalue = NULL;
9487 const xmlChar **atts = ctxt->atts;
9488 unsigned attrHashSize = 0;
9489 int maxatts = ctxt->maxatts;
9490 int nratts, nbatts, nbdef, inputid;
9491 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9492 int alloc = 0;
9493
9494 if (RAW != '<') return(NULL);
9495 NEXT1;
9496
9497 inputid = ctxt->input->id;
9498 nbatts = 0;
9499 nratts = 0;
9500 nbdef = 0;
9501 nbNs = 0;
9502 nbTotalDef = 0;
9503 attval = 0;
9504
9505 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9506 xmlErrMemory(ctxt, NULL);
9507 return(NULL);
9508 }
9509
9510 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9511 if (hlocalname.name == NULL) {
9512 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9513 "StartTag: invalid element name\n");
9514 return(NULL);
9515 }
9516 localname = hlocalname.name;
9517 prefix = hprefix.name;
9518
9519 /*
9520 * Now parse the attributes, it ends up with the ending
9521 *
9522 * (S Attribute)* S?
9523 */
9524 SKIP_BLANKS;
9525 GROW;
9526
9527 /*
9528 * The ctxt->atts array will be ultimately passed to the SAX callback
9529 * containing five xmlChar pointers for each attribute:
9530 *
9531 * [0] attribute name
9532 * [1] attribute prefix
9533 * [2] namespace URI
9534 * [3] attribute value
9535 * [4] end of attribute value
9536 *
9537 * To save memory, we reuse this array temporarily and store integers
9538 * in these pointer variables.
9539 *
9540 * [0] attribute name
9541 * [1] attribute prefix
9542 * [2] hash value of attribute prefix, and later namespace index
9543 * [3] for non-allocated values: ptrdiff_t offset into input buffer
9544 * [4] for non-allocated values: ptrdiff_t offset into input buffer
9545 *
9546 * The ctxt->attallocs array contains an additional unsigned int for
9547 * each attribute, containing the hash value of the attribute name
9548 * and the alloc flag in bit 31.
9549 */
9550
9551 while (((RAW != '>') &&
9552 ((RAW != '/') || (NXT(1) != '>')) &&
9553 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9554 int len = -1;
9555
9556 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9557 &haprefix, &attvalue, &len,
9558 &alloc);
9559 if (hattname.name == NULL) {
9560 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9561 "xmlParseStartTag: problem parsing attributes\n");
9562 break;
9563 }
9564 if (attvalue == NULL)
9565 goto next_attr;
9566 attname = hattname.name;
9567 aprefix = haprefix.name;
9568 if (len < 0) len = xmlStrlen(attvalue);
9569
9570 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9571 xmlHashedString huri;
9572 xmlURIPtr parsedUri;
9573
9574 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9575 uri = huri.name;
9576 if (uri == NULL) {
9577 xmlErrMemory(ctxt, NULL);
9578 goto next_attr;
9579 }
9580 if (*uri != 0) {
9581 parsedUri = xmlParseURI((const char *) uri);
9582 if (parsedUri == NULL) {
9583 xmlNsErr(ctxt, XML_WAR_NS_URI,
9584 "xmlns: '%s' is not a valid URI\n",
9585 uri, NULL, NULL);
9586 } else {
9587 if (parsedUri->scheme == NULL) {
9588 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9589 "xmlns: URI %s is not absolute\n",
9590 uri, NULL, NULL);
9591 }
9592 xmlFreeURI(parsedUri);
9593 }
9594 if (uri == ctxt->str_xml_ns) {
9595 if (attname != ctxt->str_xml) {
9596 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9597 "xml namespace URI cannot be the default namespace\n",
9598 NULL, NULL, NULL);
9599 }
9600 goto next_attr;
9601 }
9602 if ((len == 29) &&
9603 (xmlStrEqual(uri,
9604 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9605 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9606 "reuse of the xmlns namespace name is forbidden\n",
9607 NULL, NULL, NULL);
9608 goto next_attr;
9609 }
9610 }
9611
9612 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9613 nbNs++;
9614 } else if (aprefix == ctxt->str_xmlns) {
9615 xmlHashedString huri;
9616 xmlURIPtr parsedUri;
9617
9618 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9619 uri = huri.name;
9620 if (uri == NULL) {
9621 xmlErrMemory(ctxt, NULL);
9622 goto next_attr;
9623 }
9624
9625 if (attname == ctxt->str_xml) {
9626 if (uri != ctxt->str_xml_ns) {
9627 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9628 "xml namespace prefix mapped to wrong URI\n",
9629 NULL, NULL, NULL);
9630 }
9631 /*
9632 * Do not keep a namespace definition node
9633 */
9634 goto next_attr;
9635 }
9636 if (uri == ctxt->str_xml_ns) {
9637 if (attname != ctxt->str_xml) {
9638 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9639 "xml namespace URI mapped to wrong prefix\n",
9640 NULL, NULL, NULL);
9641 }
9642 goto next_attr;
9643 }
9644 if (attname == ctxt->str_xmlns) {
9645 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9646 "redefinition of the xmlns prefix is forbidden\n",
9647 NULL, NULL, NULL);
9648 goto next_attr;
9649 }
9650 if ((len == 29) &&
9651 (xmlStrEqual(uri,
9652 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9653 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9654 "reuse of the xmlns namespace name is forbidden\n",
9655 NULL, NULL, NULL);
9656 goto next_attr;
9657 }
9658 if ((uri == NULL) || (uri[0] == 0)) {
9659 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9660 "xmlns:%s: Empty XML namespace is not allowed\n",
9661 attname, NULL, NULL);
9662 goto next_attr;
9663 } else {
9664 parsedUri = xmlParseURI((const char *) uri);
9665 if (parsedUri == NULL) {
9666 xmlNsErr(ctxt, XML_WAR_NS_URI,
9667 "xmlns:%s: '%s' is not a valid URI\n",
9668 attname, uri, NULL);
9669 } else {
9670 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9671 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9672 "xmlns:%s: URI %s is not absolute\n",
9673 attname, uri, NULL);
9674 }
9675 xmlFreeURI(parsedUri);
9676 }
9677 }
9678
9679 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9680 nbNs++;
9681 } else {
9682 /*
9683 * Populate attributes array, see above for repurposing
9684 * of xmlChar pointers.
9685 */
9686 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9687 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9688 goto next_attr;
9689 }
9690 maxatts = ctxt->maxatts;
9691 atts = ctxt->atts;
9692 }
9693 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9694 ((unsigned) alloc << 31);
9695 atts[nbatts++] = attname;
9696 atts[nbatts++] = aprefix;
9697 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9698 if (alloc) {
9699 atts[nbatts++] = attvalue;
9700 attvalue += len;
9701 atts[nbatts++] = attvalue;
9702 } else {
9703 /*
9704 * attvalue points into the input buffer which can be
9705 * reallocated. Store differences to input->base instead.
9706 * The pointers will be reconstructed later.
9707 */
9708 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9709 attvalue += len;
9710 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9711 }
9712 /*
9713 * tag if some deallocation is needed
9714 */
9715 if (alloc != 0) attval = 1;
9716 attvalue = NULL; /* moved into atts */
9717 }
9718
9719next_attr:
9720 if ((attvalue != NULL) && (alloc != 0)) {
9721 xmlFree(attvalue);
9722 attvalue = NULL;
9723 }
9724
9725 GROW
9726 if (ctxt->instate == XML_PARSER_EOF)
9727 break;
9728 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9729 break;
9730 if (SKIP_BLANKS == 0) {
9731 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9732 "attributes construct error\n");
9733 break;
9734 }
9735 GROW;
9736 }
9737
9738 if (ctxt->input->id != inputid) {
9739 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9740 "Unexpected change of input\n");
9741 localname = NULL;
9742 goto done;
9743 }
9744
9745 /*
9746 * Namespaces from default attributes
9747 */
9748 if (ctxt->attsDefault != NULL) {
9749 xmlDefAttrsPtr defaults;
9750
9751 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9752 if (defaults != NULL) {
9753 for (i = 0; i < defaults->nbAttrs; i++) {
9754 xmlDefAttr *attr = &defaults->attrs[i];
9755
9756 attname = attr->name.name;
9757 aprefix = attr->prefix.name;
9758
9759 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9760 xmlParserEntityCheck(ctxt, attr->expandedSize);
9761
9762 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9763 nbNs++;
9764 } else if (aprefix == ctxt->str_xmlns) {
9765 xmlParserEntityCheck(ctxt, attr->expandedSize);
9766
9767 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9768 NULL, 1) > 0)
9769 nbNs++;
9770 } else {
9771 nbTotalDef += 1;
9772 }
9773 }
9774 }
9775 }
9776
9777 /*
9778 * Resolve attribute namespaces
9779 */
9780 for (i = 0; i < nbatts; i += 5) {
9781 attname = atts[i];
9782 aprefix = atts[i+1];
9783
9784 /*
9785 * The default namespace does not apply to attribute names.
9786 */
9787 if (aprefix == NULL) {
9788 nsIndex = NS_INDEX_EMPTY;
9789 } else if (aprefix == ctxt->str_xml) {
9790 nsIndex = NS_INDEX_XML;
9791 } else {
9792 haprefix.name = aprefix;
9793 haprefix.hashValue = (size_t) atts[i+2];
9794 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9795 if (nsIndex == INT_MAX) {
9796 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9797 "Namespace prefix %s for %s on %s is not defined\n",
9798 aprefix, attname, localname);
9799 nsIndex = NS_INDEX_EMPTY;
9800 }
9801 }
9802
9803 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9804 }
9805
9806 /*
9807 * Maximum number of attributes including default attributes.
9808 */
9809 maxAtts = nratts + nbTotalDef;
9810
9811 /*
9812 * Verify that attribute names are unique.
9813 */
9814 if (maxAtts > 1) {
9815 attrHashSize = 4;
9816 while (attrHashSize / 2 < (unsigned) maxAtts)
9817 attrHashSize *= 2;
9818
9819 if (attrHashSize > ctxt->attrHashMax) {
9820 xmlAttrHashBucket *tmp;
9821
9822 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9823 if (tmp == NULL) {
9824 xmlErrMemory(ctxt, NULL);
9825 goto done;
9826 }
9827
9828 ctxt->attrHash = tmp;
9829 ctxt->attrHashMax = attrHashSize;
9830 }
9831
9832 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9833
9834 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9835 const xmlChar *nsuri;
9836 unsigned hashValue, nameHashValue, uriHashValue;
9837 int res;
9838
9839 attname = atts[i];
9840 aprefix = atts[i+1];
9841 nsIndex = (ptrdiff_t) atts[i+2];
9842 /* Hash values always have bit 31 set, see dict.c */
9843 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9844
9845 if (nsIndex == NS_INDEX_EMPTY) {
9846 nsuri = NULL;
9847 uriHashValue = URI_HASH_EMPTY;
9848 } else if (nsIndex == NS_INDEX_XML) {
9849 nsuri = ctxt->str_xml_ns;
9850 uriHashValue = URI_HASH_XML;
9851 } else {
9852 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9853 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9854 }
9855
9856 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9857 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9858 hashValue, i);
9859 if (res < 0)
9860 continue;
9861
9862 /*
9863 * [ WFC: Unique Att Spec ]
9864 * No attribute name may appear more than once in the same
9865 * start-tag or empty-element tag.
9866 * As extended by the Namespace in XML REC.
9867 */
9868 if (res < INT_MAX) {
9869 if (aprefix == atts[res+1]) {
9870 xmlErrAttributeDup(ctxt, aprefix, attname);
9871 } else {
9872 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9873 "Namespaced Attribute %s in '%s' redefined\n",
9874 attname, nsuri, NULL);
9875 }
9876 }
9877 }
9878 }
9879
9880 /*
9881 * Default attributes
9882 */
9883 if (ctxt->attsDefault != NULL) {
9884 xmlDefAttrsPtr defaults;
9885
9886 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9887 if (defaults != NULL) {
9888 for (i = 0; i < defaults->nbAttrs; i++) {
9889 xmlDefAttr *attr = &defaults->attrs[i];
9890 const xmlChar *nsuri;
9891 unsigned hashValue, uriHashValue;
9892 int res;
9893
9894 attname = attr->name.name;
9895 aprefix = attr->prefix.name;
9896
9897 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9898 continue;
9899 if (aprefix == ctxt->str_xmlns)
9900 continue;
9901
9902 if (aprefix == NULL) {
9903 nsIndex = NS_INDEX_EMPTY;
9904 nsuri = NULL;
9905 uriHashValue = URI_HASH_EMPTY;
9906 } if (aprefix == ctxt->str_xml) {
9907 nsIndex = NS_INDEX_XML;
9908 nsuri = ctxt->str_xml_ns;
9909 uriHashValue = URI_HASH_XML;
9910 } else if (aprefix != NULL) {
9911 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9912 if (nsIndex == INT_MAX) {
9913 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9914 "Namespace prefix %s for %s on %s is not "
9915 "defined\n",
9916 aprefix, attname, localname);
9917 nsIndex = NS_INDEX_EMPTY;
9918 nsuri = NULL;
9919 uriHashValue = URI_HASH_EMPTY;
9920 } else {
9921 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9922 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9923 }
9924 }
9925
9926 /*
9927 * Check whether the attribute exists
9928 */
9929 if (maxAtts > 1) {
9930 hashValue = xmlDictCombineHash(attr->name.hashValue,
9931 uriHashValue);
9932 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9933 hashValue, nbatts);
9934 if (res < 0)
9935 continue;
9936 if (res < INT_MAX) {
9937 if (aprefix == atts[res+1])
9938 continue;
9939 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9940 "Namespaced Attribute %s in '%s' redefined\n",
9941 attname, nsuri, NULL);
9942 }
9943 }
9944
9945 xmlParserEntityCheck(ctxt, attr->expandedSize);
9946
9947 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9948 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9949 localname = NULL;
9950 goto done;
9951 }
9952 maxatts = ctxt->maxatts;
9953 atts = ctxt->atts;
9954 }
9955
9956 atts[nbatts++] = attname;
9957 atts[nbatts++] = aprefix;
9958 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9959 atts[nbatts++] = attr->value.name;
9960 atts[nbatts++] = attr->valueEnd;
9961 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9962 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9963 "standalone: attribute %s on %s defaulted "
9964 "from external subset\n",
9965 attname, localname);
9966 }
9967 nbdef++;
9968 }
9969 }
9970 }
9971
9972 /*
9973 * Reconstruct attribute pointers
9974 */
9975 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9976 /* namespace URI */
9977 nsIndex = (ptrdiff_t) atts[i+2];
9978 if (nsIndex == INT_MAX)
9979 atts[i+2] = NULL;
9980 else if (nsIndex == INT_MAX - 1)
9981 atts[i+2] = ctxt->str_xml_ns;
9982 else
9983 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9984
9985 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9986 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
9987 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
9988 }
9989 }
9990
9991 uri = xmlParserNsLookupUri(ctxt, &hprefix);
9992 if ((prefix != NULL) && (uri == NULL)) {
9993 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9994 "Namespace prefix %s on %s is not defined\n",
9995 prefix, localname, NULL);
9996 }
9997 *pref = prefix;
9998 *URI = uri;
9999
10000 /*
10001 * SAX callback
10002 */
10003 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
10004 (!ctxt->disableSAX)) {
10005 if (nbNs > 0)
10006 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10007 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
10008 nbatts / 5, nbdef, atts);
10009 else
10010 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10011 0, NULL, nbatts / 5, nbdef, atts);
10012 }
10013
10014done:
10015 /*
10016 * Free allocated attribute values
10017 */
10018 if (attval != 0) {
10019 for (i = 0, j = 0; j < nratts; i += 5, j++)
10020 if (ctxt->attallocs[j] & 0x80000000)
10021 xmlFree((xmlChar *) atts[i+3]);
10022 }
10023
10024 *nbNsPtr = nbNs;
10025 return(localname);
10026}
10027
10028/**
10029 * xmlParseEndTag2:
10030 * @ctxt: an XML parser context
10031 * @line: line of the start tag
10032 * @nsNr: number of namespaces on the start tag
10033 *
10034 * Parse an end tag. Always consumes '</'.
10035 *
10036 * [42] ETag ::= '</' Name S? '>'
10037 *
10038 * With namespace
10039 *
10040 * [NS 9] ETag ::= '</' QName S? '>'
10041 */
10042
10043static void
10044xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
10045 const xmlChar *name;
10046
10047 GROW;
10048 if ((RAW != '<') || (NXT(1) != '/')) {
10049 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
10050 return;
10051 }
10052 SKIP(2);
10053
10054 if (tag->prefix == NULL)
10055 name = xmlParseNameAndCompare(ctxt, ctxt->name);
10056 else
10057 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
10058
10059 /*
10060 * We should definitely be at the ending "S? '>'" part
10061 */
10062 GROW;
10063 if (ctxt->instate == XML_PARSER_EOF)
10064 return;
10065 SKIP_BLANKS;
10066 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
10067 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
10068 } else
10069 NEXT1;
10070
10071 /*
10072 * [ WFC: Element Type Match ]
10073 * The Name in an element's end-tag must match the element type in the
10074 * start-tag.
10075 *
10076 */
10077 if (name != (xmlChar*)1) {
10078 if (name == NULL) name = BAD_CAST "unparsable";
10079 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
10080 "Opening and ending tag mismatch: %s line %d and %s\n",
10081 ctxt->name, tag->line, name);
10082 }
10083
10084 /*
10085 * SAX: End of Tag
10086 */
10087 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10088 (!ctxt->disableSAX))
10089 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
10090 tag->URI);
10091
10092 spacePop(ctxt);
10093 if (tag->nsNr != 0)
10094 xmlParserNsPop(ctxt, tag->nsNr);
10095}
10096
10097/**
10098 * xmlParseCDSect:
10099 * @ctxt: an XML parser context
10100 *
10101 * DEPRECATED: Internal function, don't use.
10102 *
10103 * Parse escaped pure raw content. Always consumes '<!['.
10104 *
10105 * [18] CDSect ::= CDStart CData CDEnd
10106 *
10107 * [19] CDStart ::= '<![CDATA['
10108 *
10109 * [20] Data ::= (Char* - (Char* ']]>' Char*))
10110 *
10111 * [21] CDEnd ::= ']]>'
10112 */
10113void
10114xmlParseCDSect(xmlParserCtxtPtr ctxt) {
10115 xmlChar *buf = NULL;
10116 int len = 0;
10117 int size = XML_PARSER_BUFFER_SIZE;
10118 int r, rl;
10119 int s, sl;
10120 int cur, l;
10121 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10122 XML_MAX_HUGE_LENGTH :
10123 XML_MAX_TEXT_LENGTH;
10124
10125 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
10126 return;
10127 SKIP(3);
10128
10129 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
10130 return;
10131 SKIP(6);
10132
10133 ctxt->instate = XML_PARSER_CDATA_SECTION;
10134 r = CUR_CHAR(rl);
10135 if (!IS_CHAR(r)) {
10136 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10137 goto out;
10138 }
10139 NEXTL(rl);
10140 s = CUR_CHAR(sl);
10141 if (!IS_CHAR(s)) {
10142 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10143 goto out;
10144 }
10145 NEXTL(sl);
10146 cur = CUR_CHAR(l);
10147 buf = (xmlChar *) xmlMallocAtomic(size);
10148 if (buf == NULL) {
10149 xmlErrMemory(ctxt, NULL);
10150 goto out;
10151 }
10152 while (IS_CHAR(cur) &&
10153 ((r != ']') || (s != ']') || (cur != '>'))) {
10154 if (len + 5 >= size) {
10155 xmlChar *tmp;
10156
10157 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
10158 if (tmp == NULL) {
10159 xmlErrMemory(ctxt, NULL);
10160 goto out;
10161 }
10162 buf = tmp;
10163 size *= 2;
10164 }
10165 COPY_BUF(buf, len, r);
10166 if (len > maxLength) {
10167 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10168 "CData section too big found\n");
10169 goto out;
10170 }
10171 r = s;
10172 rl = sl;
10173 s = cur;
10174 sl = l;
10175 NEXTL(l);
10176 cur = CUR_CHAR(l);
10177 }
10178 buf[len] = 0;
10179 if (ctxt->instate == XML_PARSER_EOF) {
10180 xmlFree(buf);
10181 return;
10182 }
10183 if (cur != '>') {
10184 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10185 "CData section not finished\n%.50s\n", buf);
10186 goto out;
10187 }
10188 NEXTL(l);
10189
10190 /*
10191 * OK the buffer is to be consumed as cdata.
10192 */
10193 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10194 if (ctxt->sax->cdataBlock != NULL)
10195 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10196 else if (ctxt->sax->characters != NULL)
10197 ctxt->sax->characters(ctxt->userData, buf, len);
10198 }
10199
10200out:
10201 if (ctxt->instate != XML_PARSER_EOF)
10202 ctxt->instate = XML_PARSER_CONTENT;
10203 xmlFree(buf);
10204}
10205
10206/**
10207 * xmlParseContentInternal:
10208 * @ctxt: an XML parser context
10209 *
10210 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
10211 * unexpected EOF to the caller.
10212 */
10213
10214static void
10215xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
10216 int nameNr = ctxt->nameNr;
10217
10218 GROW;
10219 while ((ctxt->input->cur < ctxt->input->end) &&
10220 (ctxt->instate != XML_PARSER_EOF)) {
10221 const xmlChar *cur = ctxt->input->cur;
10222
10223 /*
10224 * First case : a Processing Instruction.
10225 */
10226 if ((*cur == '<') && (cur[1] == '?')) {
10227 xmlParsePI(ctxt);
10228 }
10229
10230 /*
10231 * Second case : a CDSection
10232 */
10233 /* 2.6.0 test was *cur not RAW */
10234 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10235 xmlParseCDSect(ctxt);
10236 }
10237
10238 /*
10239 * Third case : a comment
10240 */
10241 else if ((*cur == '<') && (NXT(1) == '!') &&
10242 (NXT(2) == '-') && (NXT(3) == '-')) {
10243 xmlParseComment(ctxt);
10244 ctxt->instate = XML_PARSER_CONTENT;
10245 }
10246
10247 /*
10248 * Fourth case : a sub-element.
10249 */
10250 else if (*cur == '<') {
10251 if (NXT(1) == '/') {
10252 if (ctxt->nameNr <= nameNr)
10253 break;
10254 xmlParseElementEnd(ctxt);
10255 } else {
10256 xmlParseElementStart(ctxt);
10257 }
10258 }
10259
10260 /*
10261 * Fifth case : a reference. If if has not been resolved,
10262 * parsing returns it's Name, create the node
10263 */
10264
10265 else if (*cur == '&') {
10266 xmlParseReference(ctxt);
10267 }
10268
10269 /*
10270 * Last case, text. Note that References are handled directly.
10271 */
10272 else {
10273 xmlParseCharDataInternal(ctxt, 0);
10274 }
10275
10276 SHRINK;
10277 GROW;
10278 }
10279}
10280
10281/**
10282 * xmlParseContent:
10283 * @ctxt: an XML parser context
10284 *
10285 * Parse a content sequence. Stops at EOF or '</'.
10286 *
10287 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10288 */
10289
10290void
10291xmlParseContent(xmlParserCtxtPtr ctxt) {
10292 int nameNr = ctxt->nameNr;
10293
10294 xmlParseContentInternal(ctxt);
10295
10296 if ((ctxt->instate != XML_PARSER_EOF) &&
10297 (ctxt->errNo == XML_ERR_OK) &&
10298 (ctxt->nameNr > nameNr)) {
10299 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10300 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10301 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10302 "Premature end of data in tag %s line %d\n",
10303 name, line, NULL);
10304 }
10305}
10306
10307/**
10308 * xmlParseElement:
10309 * @ctxt: an XML parser context
10310 *
10311 * DEPRECATED: Internal function, don't use.
10312 *
10313 * parse an XML element
10314 *
10315 * [39] element ::= EmptyElemTag | STag content ETag
10316 *
10317 * [ WFC: Element Type Match ]
10318 * The Name in an element's end-tag must match the element type in the
10319 * start-tag.
10320 *
10321 */
10322
10323void
10324xmlParseElement(xmlParserCtxtPtr ctxt) {
10325 if (xmlParseElementStart(ctxt) != 0)
10326 return;
10327
10328 xmlParseContentInternal(ctxt);
10329 if (ctxt->instate == XML_PARSER_EOF)
10330 return;
10331
10332 if (ctxt->input->cur >= ctxt->input->end) {
10333 if (ctxt->errNo == XML_ERR_OK) {
10334 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10335 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10336 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10337 "Premature end of data in tag %s line %d\n",
10338 name, line, NULL);
10339 }
10340 return;
10341 }
10342
10343 xmlParseElementEnd(ctxt);
10344}
10345
10346/**
10347 * xmlParseElementStart:
10348 * @ctxt: an XML parser context
10349 *
10350 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10351 * opening tag was parsed, 1 if an empty element was parsed.
10352 *
10353 * Always consumes '<'.
10354 */
10355static int
10356xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10357 const xmlChar *name;
10358 const xmlChar *prefix = NULL;
10359 const xmlChar *URI = NULL;
10360 xmlParserNodeInfo node_info;
10361 int line;
10362 xmlNodePtr cur;
10363 int nbNs = 0;
10364
10365 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10366 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10367 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10368 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10369 xmlParserMaxDepth);
10370 xmlHaltParser(ctxt);
10371 return(-1);
10372 }
10373
10374 /* Capture start position */
10375 if (ctxt->record_info) {
10376 node_info.begin_pos = ctxt->input->consumed +
10377 (CUR_PTR - ctxt->input->base);
10378 node_info.begin_line = ctxt->input->line;
10379 }
10380
10381 if (ctxt->spaceNr == 0)
10382 spacePush(ctxt, -1);
10383 else if (*ctxt->space == -2)
10384 spacePush(ctxt, -1);
10385 else
10386 spacePush(ctxt, *ctxt->space);
10387
10388 line = ctxt->input->line;
10389#ifdef LIBXML_SAX1_ENABLED
10390 if (ctxt->sax2)
10391#endif /* LIBXML_SAX1_ENABLED */
10392 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10393#ifdef LIBXML_SAX1_ENABLED
10394 else
10395 name = xmlParseStartTag(ctxt);
10396#endif /* LIBXML_SAX1_ENABLED */
10397 if (ctxt->instate == XML_PARSER_EOF)
10398 return(-1);
10399 if (name == NULL) {
10400 spacePop(ctxt);
10401 return(-1);
10402 }
10403 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10404 cur = ctxt->node;
10405
10406#ifdef LIBXML_VALID_ENABLED
10407 /*
10408 * [ VC: Root Element Type ]
10409 * The Name in the document type declaration must match the element
10410 * type of the root element.
10411 */
10412 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10413 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10414 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10415#endif /* LIBXML_VALID_ENABLED */
10416
10417 /*
10418 * Check for an Empty Element.
10419 */
10420 if ((RAW == '/') && (NXT(1) == '>')) {
10421 SKIP(2);
10422 if (ctxt->sax2) {
10423 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10424 (!ctxt->disableSAX))
10425 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10426#ifdef LIBXML_SAX1_ENABLED
10427 } else {
10428 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10429 (!ctxt->disableSAX))
10430 ctxt->sax->endElement(ctxt->userData, name);
10431#endif /* LIBXML_SAX1_ENABLED */
10432 }
10433 namePop(ctxt);
10434 spacePop(ctxt);
10435 if (nbNs > 0)
10436 xmlParserNsPop(ctxt, nbNs);
10437 if (cur != NULL && ctxt->record_info) {
10438 node_info.node = cur;
10439 node_info.end_pos = ctxt->input->consumed +
10440 (CUR_PTR - ctxt->input->base);
10441 node_info.end_line = ctxt->input->line;
10442 xmlParserAddNodeInfo(ctxt, &node_info);
10443 }
10444 return(1);
10445 }
10446 if (RAW == '>') {
10447 NEXT1;
10448 if (cur != NULL && ctxt->record_info) {
10449 node_info.node = cur;
10450 node_info.end_pos = 0;
10451 node_info.end_line = 0;
10452 xmlParserAddNodeInfo(ctxt, &node_info);
10453 }
10454 } else {
10455 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10456 "Couldn't find end of Start Tag %s line %d\n",
10457 name, line, NULL);
10458
10459 /*
10460 * end of parsing of this node.
10461 */
10462 nodePop(ctxt);
10463 namePop(ctxt);
10464 spacePop(ctxt);
10465 if (nbNs > 0)
10466 xmlParserNsPop(ctxt, nbNs);
10467 return(-1);
10468 }
10469
10470 return(0);
10471}
10472
10473/**
10474 * xmlParseElementEnd:
10475 * @ctxt: an XML parser context
10476 *
10477 * Parse the end of an XML element. Always consumes '</'.
10478 */
10479static void
10480xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10481 xmlNodePtr cur = ctxt->node;
10482
10483 if (ctxt->nameNr <= 0) {
10484 if ((RAW == '<') && (NXT(1) == '/'))
10485 SKIP(2);
10486 return;
10487 }
10488
10489 /*
10490 * parse the end of tag: '</' should be here.
10491 */
10492 if (ctxt->sax2) {
10493 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10494 namePop(ctxt);
10495 }
10496#ifdef LIBXML_SAX1_ENABLED
10497 else
10498 xmlParseEndTag1(ctxt, 0);
10499#endif /* LIBXML_SAX1_ENABLED */
10500
10501 /*
10502 * Capture end position
10503 */
10504 if (cur != NULL && ctxt->record_info) {
10505 xmlParserNodeInfoPtr node_info;
10506
10507 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10508 if (node_info != NULL) {
10509 node_info->end_pos = ctxt->input->consumed +
10510 (CUR_PTR - ctxt->input->base);
10511 node_info->end_line = ctxt->input->line;
10512 }
10513 }
10514}
10515
10516/**
10517 * xmlParseVersionNum:
10518 * @ctxt: an XML parser context
10519 *
10520 * DEPRECATED: Internal function, don't use.
10521 *
10522 * parse the XML version value.
10523 *
10524 * [26] VersionNum ::= '1.' [0-9]+
10525 *
10526 * In practice allow [0-9].[0-9]+ at that level
10527 *
10528 * Returns the string giving the XML version number, or NULL
10529 */
10530xmlChar *
10531xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10532 xmlChar *buf = NULL;
10533 int len = 0;
10534 int size = 10;
10535 xmlChar cur;
10536
10537 buf = (xmlChar *) xmlMallocAtomic(size);
10538 if (buf == NULL) {
10539 xmlErrMemory(ctxt, NULL);
10540 return(NULL);
10541 }
10542 cur = CUR;
10543 if (!((cur >= '0') && (cur <= '9'))) {
10544 xmlFree(buf);
10545 return(NULL);
10546 }
10547 buf[len++] = cur;
10548 NEXT;
10549 cur=CUR;
10550 if (cur != '.') {
10551 xmlFree(buf);
10552 return(NULL);
10553 }
10554 buf[len++] = cur;
10555 NEXT;
10556 cur=CUR;
10557 while ((cur >= '0') && (cur <= '9')) {
10558 if (len + 1 >= size) {
10559 xmlChar *tmp;
10560
10561 size *= 2;
10562 tmp = (xmlChar *) xmlRealloc(buf, size);
10563 if (tmp == NULL) {
10564 xmlFree(buf);
10565 xmlErrMemory(ctxt, NULL);
10566 return(NULL);
10567 }
10568 buf = tmp;
10569 }
10570 buf[len++] = cur;
10571 NEXT;
10572 cur=CUR;
10573 }
10574 buf[len] = 0;
10575 return(buf);
10576}
10577
10578/**
10579 * xmlParseVersionInfo:
10580 * @ctxt: an XML parser context
10581 *
10582 * DEPRECATED: Internal function, don't use.
10583 *
10584 * parse the XML version.
10585 *
10586 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10587 *
10588 * [25] Eq ::= S? '=' S?
10589 *
10590 * Returns the version string, e.g. "1.0"
10591 */
10592
10593xmlChar *
10594xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10595 xmlChar *version = NULL;
10596
10597 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10598 SKIP(7);
10599 SKIP_BLANKS;
10600 if (RAW != '=') {
10601 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10602 return(NULL);
10603 }
10604 NEXT;
10605 SKIP_BLANKS;
10606 if (RAW == '"') {
10607 NEXT;
10608 version = xmlParseVersionNum(ctxt);
10609 if (RAW != '"') {
10610 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10611 } else
10612 NEXT;
10613 } else if (RAW == '\''){
10614 NEXT;
10615 version = xmlParseVersionNum(ctxt);
10616 if (RAW != '\'') {
10617 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10618 } else
10619 NEXT;
10620 } else {
10621 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10622 }
10623 }
10624 return(version);
10625}
10626
10627/**
10628 * xmlParseEncName:
10629 * @ctxt: an XML parser context
10630 *
10631 * DEPRECATED: Internal function, don't use.
10632 *
10633 * parse the XML encoding name
10634 *
10635 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10636 *
10637 * Returns the encoding name value or NULL
10638 */
10639xmlChar *
10640xmlParseEncName(xmlParserCtxtPtr ctxt) {
10641 xmlChar *buf = NULL;
10642 int len = 0;
10643 int size = 10;
10644 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10645 XML_MAX_TEXT_LENGTH :
10646 XML_MAX_NAME_LENGTH;
10647 xmlChar cur;
10648
10649 cur = CUR;
10650 if (((cur >= 'a') && (cur <= 'z')) ||
10651 ((cur >= 'A') && (cur <= 'Z'))) {
10652 buf = (xmlChar *) xmlMallocAtomic(size);
10653 if (buf == NULL) {
10654 xmlErrMemory(ctxt, NULL);
10655 return(NULL);
10656 }
10657
10658 buf[len++] = cur;
10659 NEXT;
10660 cur = CUR;
10661 while (((cur >= 'a') && (cur <= 'z')) ||
10662 ((cur >= 'A') && (cur <= 'Z')) ||
10663 ((cur >= '0') && (cur <= '9')) ||
10664 (cur == '.') || (cur == '_') ||
10665 (cur == '-')) {
10666 if (len + 1 >= size) {
10667 xmlChar *tmp;
10668
10669 size *= 2;
10670 tmp = (xmlChar *) xmlRealloc(buf, size);
10671 if (tmp == NULL) {
10672 xmlErrMemory(ctxt, NULL);
10673 xmlFree(buf);
10674 return(NULL);
10675 }
10676 buf = tmp;
10677 }
10678 buf[len++] = cur;
10679 if (len > maxLength) {
10680 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10681 xmlFree(buf);
10682 return(NULL);
10683 }
10684 NEXT;
10685 cur = CUR;
10686 }
10687 buf[len] = 0;
10688 } else {
10689 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10690 }
10691 return(buf);
10692}
10693
10694/**
10695 * xmlParseEncodingDecl:
10696 * @ctxt: an XML parser context
10697 *
10698 * DEPRECATED: Internal function, don't use.
10699 *
10700 * parse the XML encoding declaration
10701 *
10702 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10703 *
10704 * this setups the conversion filters.
10705 *
10706 * Returns the encoding value or NULL
10707 */
10708
10709const xmlChar *
10710xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10711 xmlChar *encoding = NULL;
10712
10713 SKIP_BLANKS;
10714 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10715 return(NULL);
10716
10717 SKIP(8);
10718 SKIP_BLANKS;
10719 if (RAW != '=') {
10720 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10721 return(NULL);
10722 }
10723 NEXT;
10724 SKIP_BLANKS;
10725 if (RAW == '"') {
10726 NEXT;
10727 encoding = xmlParseEncName(ctxt);
10728 if (RAW != '"') {
10729 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10730 xmlFree((xmlChar *) encoding);
10731 return(NULL);
10732 } else
10733 NEXT;
10734 } else if (RAW == '\''){
10735 NEXT;
10736 encoding = xmlParseEncName(ctxt);
10737 if (RAW != '\'') {
10738 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10739 xmlFree((xmlChar *) encoding);
10740 return(NULL);
10741 } else
10742 NEXT;
10743 } else {
10744 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10745 }
10746
10747 if (encoding == NULL)
10748 return(NULL);
10749
10750 xmlSetDeclaredEncoding(ctxt, encoding);
10751
10752 return(ctxt->encoding);
10753}
10754
10755/**
10756 * xmlParseSDDecl:
10757 * @ctxt: an XML parser context
10758 *
10759 * DEPRECATED: Internal function, don't use.
10760 *
10761 * parse the XML standalone declaration
10762 *
10763 * [32] SDDecl ::= S 'standalone' Eq
10764 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10765 *
10766 * [ VC: Standalone Document Declaration ]
10767 * TODO The standalone document declaration must have the value "no"
10768 * if any external markup declarations contain declarations of:
10769 * - attributes with default values, if elements to which these
10770 * attributes apply appear in the document without specifications
10771 * of values for these attributes, or
10772 * - entities (other than amp, lt, gt, apos, quot), if references
10773 * to those entities appear in the document, or
10774 * - attributes with values subject to normalization, where the
10775 * attribute appears in the document with a value which will change
10776 * as a result of normalization, or
10777 * - element types with element content, if white space occurs directly
10778 * within any instance of those types.
10779 *
10780 * Returns:
10781 * 1 if standalone="yes"
10782 * 0 if standalone="no"
10783 * -2 if standalone attribute is missing or invalid
10784 * (A standalone value of -2 means that the XML declaration was found,
10785 * but no value was specified for the standalone attribute).
10786 */
10787
10788int
10789xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10790 int standalone = -2;
10791
10792 SKIP_BLANKS;
10793 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10794 SKIP(10);
10795 SKIP_BLANKS;
10796 if (RAW != '=') {
10797 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10798 return(standalone);
10799 }
10800 NEXT;
10801 SKIP_BLANKS;
10802 if (RAW == '\''){
10803 NEXT;
10804 if ((RAW == 'n') && (NXT(1) == 'o')) {
10805 standalone = 0;
10806 SKIP(2);
10807 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10808 (NXT(2) == 's')) {
10809 standalone = 1;
10810 SKIP(3);
10811 } else {
10812 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10813 }
10814 if (RAW != '\'') {
10815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10816 } else
10817 NEXT;
10818 } else if (RAW == '"'){
10819 NEXT;
10820 if ((RAW == 'n') && (NXT(1) == 'o')) {
10821 standalone = 0;
10822 SKIP(2);
10823 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10824 (NXT(2) == 's')) {
10825 standalone = 1;
10826 SKIP(3);
10827 } else {
10828 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10829 }
10830 if (RAW != '"') {
10831 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10832 } else
10833 NEXT;
10834 } else {
10835 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10836 }
10837 }
10838 return(standalone);
10839}
10840
10841/**
10842 * xmlParseXMLDecl:
10843 * @ctxt: an XML parser context
10844 *
10845 * DEPRECATED: Internal function, don't use.
10846 *
10847 * parse an XML declaration header
10848 *
10849 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10850 */
10851
10852void
10853xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10854 xmlChar *version;
10855
10856 /*
10857 * This value for standalone indicates that the document has an
10858 * XML declaration but it does not have a standalone attribute.
10859 * It will be overwritten later if a standalone attribute is found.
10860 */
10861
10862 ctxt->standalone = -2;
10863
10864 /*
10865 * We know that '<?xml' is here.
10866 */
10867 SKIP(5);
10868
10869 if (!IS_BLANK_CH(RAW)) {
10870 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10871 "Blank needed after '<?xml'\n");
10872 }
10873 SKIP_BLANKS;
10874
10875 /*
10876 * We must have the VersionInfo here.
10877 */
10878 version = xmlParseVersionInfo(ctxt);
10879 if (version == NULL) {
10880 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10881 } else {
10882 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10883 /*
10884 * Changed here for XML-1.0 5th edition
10885 */
10886 if (ctxt->options & XML_PARSE_OLD10) {
10887 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10888 "Unsupported version '%s'\n",
10889 version);
10890 } else {
10891 if ((version[0] == '1') && ((version[1] == '.'))) {
10892 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10893 "Unsupported version '%s'\n",
10894 version, NULL);
10895 } else {
10896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10897 "Unsupported version '%s'\n",
10898 version);
10899 }
10900 }
10901 }
10902 if (ctxt->version != NULL)
10903 xmlFree((void *) ctxt->version);
10904 ctxt->version = version;
10905 }
10906
10907 /*
10908 * We may have the encoding declaration
10909 */
10910 if (!IS_BLANK_CH(RAW)) {
10911 if ((RAW == '?') && (NXT(1) == '>')) {
10912 SKIP(2);
10913 return;
10914 }
10915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10916 }
10917 xmlParseEncodingDecl(ctxt);
10918 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10919 (ctxt->instate == XML_PARSER_EOF)) {
10920 /*
10921 * The XML REC instructs us to stop parsing right here
10922 */
10923 return;
10924 }
10925
10926 /*
10927 * We may have the standalone status.
10928 */
10929 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10930 if ((RAW == '?') && (NXT(1) == '>')) {
10931 SKIP(2);
10932 return;
10933 }
10934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10935 }
10936
10937 /*
10938 * We can grow the input buffer freely at that point
10939 */
10940 GROW;
10941
10942 SKIP_BLANKS;
10943 ctxt->standalone = xmlParseSDDecl(ctxt);
10944
10945 SKIP_BLANKS;
10946 if ((RAW == '?') && (NXT(1) == '>')) {
10947 SKIP(2);
10948 } else if (RAW == '>') {
10949 /* Deprecated old WD ... */
10950 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10951 NEXT;
10952 } else {
10953 int c;
10954
10955 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10956 while ((c = CUR) != 0) {
10957 NEXT;
10958 if (c == '>')
10959 break;
10960 }
10961 }
10962}
10963
10964/**
10965 * xmlParseMisc:
10966 * @ctxt: an XML parser context
10967 *
10968 * DEPRECATED: Internal function, don't use.
10969 *
10970 * parse an XML Misc* optional field.
10971 *
10972 * [27] Misc ::= Comment | PI | S
10973 */
10974
10975void
10976xmlParseMisc(xmlParserCtxtPtr ctxt) {
10977 while (ctxt->instate != XML_PARSER_EOF) {
10978 SKIP_BLANKS;
10979 GROW;
10980 if ((RAW == '<') && (NXT(1) == '?')) {
10981 xmlParsePI(ctxt);
10982 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10983 xmlParseComment(ctxt);
10984 } else {
10985 break;
10986 }
10987 }
10988}
10989
10990/**
10991 * xmlParseDocument:
10992 * @ctxt: an XML parser context
10993 *
10994 * parse an XML document (and build a tree if using the standard SAX
10995 * interface).
10996 *
10997 * [1] document ::= prolog element Misc*
10998 *
10999 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
11000 *
11001 * Returns 0, -1 in case of error. the parser context is augmented
11002 * as a result of the parsing.
11003 */
11004
11005int
11006xmlParseDocument(xmlParserCtxtPtr ctxt) {
11007 xmlInitParser();
11008
11009 if ((ctxt == NULL) || (ctxt->input == NULL))
11010 return(-1);
11011
11012 GROW;
11013
11014 /*
11015 * SAX: detecting the level.
11016 */
11017 xmlDetectSAX2(ctxt);
11018
11019 /*
11020 * SAX: beginning of the document processing.
11021 */
11022 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11023 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11024 if (ctxt->instate == XML_PARSER_EOF)
11025 return(-1);
11026
11027 xmlDetectEncoding(ctxt);
11028
11029 if (CUR == 0) {
11030 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11031 return(-1);
11032 }
11033
11034 GROW;
11035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11036
11037 /*
11038 * Note that we will switch encoding on the fly.
11039 */
11040 xmlParseXMLDecl(ctxt);
11041 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
11042 (ctxt->instate == XML_PARSER_EOF)) {
11043 /*
11044 * The XML REC instructs us to stop parsing right here
11045 */
11046 return(-1);
11047 }
11048 SKIP_BLANKS;
11049 } else {
11050 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11051 }
11052 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11053 ctxt->sax->startDocument(ctxt->userData);
11054 if (ctxt->instate == XML_PARSER_EOF)
11055 return(-1);
11056 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
11057 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
11058 ctxt->myDoc->compression = ctxt->input->buf->compressed;
11059 }
11060
11061 /*
11062 * The Misc part of the Prolog
11063 */
11064 xmlParseMisc(ctxt);
11065
11066 /*
11067 * Then possibly doc type declaration(s) and more Misc
11068 * (doctypedecl Misc*)?
11069 */
11070 GROW;
11071 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
11072
11073 ctxt->inSubset = 1;
11074 xmlParseDocTypeDecl(ctxt);
11075 if (RAW == '[') {
11076 ctxt->instate = XML_PARSER_DTD;
11077 xmlParseInternalSubset(ctxt);
11078 if (ctxt->instate == XML_PARSER_EOF)
11079 return(-1);
11080 }
11081
11082 /*
11083 * Create and update the external subset.
11084 */
11085 ctxt->inSubset = 2;
11086 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
11087 (!ctxt->disableSAX))
11088 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11089 ctxt->extSubSystem, ctxt->extSubURI);
11090 if (ctxt->instate == XML_PARSER_EOF)
11091 return(-1);
11092 ctxt->inSubset = 0;
11093
11094 xmlCleanSpecialAttr(ctxt);
11095
11096 ctxt->instate = XML_PARSER_PROLOG;
11097 xmlParseMisc(ctxt);
11098 }
11099
11100 /*
11101 * Time to start parsing the tree itself
11102 */
11103 GROW;
11104 if (RAW != '<') {
11105 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11106 "Start tag expected, '<' not found\n");
11107 } else {
11108 ctxt->instate = XML_PARSER_CONTENT;
11109 xmlParseElement(ctxt);
11110 ctxt->instate = XML_PARSER_EPILOG;
11111
11112
11113 /*
11114 * The Misc part at the end
11115 */
11116 xmlParseMisc(ctxt);
11117
11118 if (ctxt->input->cur < ctxt->input->end) {
11119 if (ctxt->errNo == XML_ERR_OK)
11120 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11121 } else if ((ctxt->input->buf != NULL) &&
11122 (ctxt->input->buf->encoder != NULL) &&
11123 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11124 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11125 "Truncated multi-byte sequence at EOF\n");
11126 }
11127 ctxt->instate = XML_PARSER_EOF;
11128 }
11129
11130 /*
11131 * SAX: end of the document processing.
11132 */
11133 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11134 ctxt->sax->endDocument(ctxt->userData);
11135
11136 /*
11137 * Remove locally kept entity definitions if the tree was not built
11138 */
11139 if ((ctxt->myDoc != NULL) &&
11140 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
11141 xmlFreeDoc(ctxt->myDoc);
11142 ctxt->myDoc = NULL;
11143 }
11144
11145 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
11146 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11147 if (ctxt->valid)
11148 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11149 if (ctxt->nsWellFormed)
11150 ctxt->myDoc->properties |= XML_DOC_NSVALID;
11151 if (ctxt->options & XML_PARSE_OLD10)
11152 ctxt->myDoc->properties |= XML_DOC_OLD10;
11153 }
11154 if (! ctxt->wellFormed) {
11155 ctxt->valid = 0;
11156 return(-1);
11157 }
11158 return(0);
11159}
11160
11161/**
11162 * xmlParseExtParsedEnt:
11163 * @ctxt: an XML parser context
11164 *
11165 * parse a general parsed entity
11166 * An external general parsed entity is well-formed if it matches the
11167 * production labeled extParsedEnt.
11168 *
11169 * [78] extParsedEnt ::= TextDecl? content
11170 *
11171 * Returns 0, -1 in case of error. the parser context is augmented
11172 * as a result of the parsing.
11173 */
11174
11175int
11176xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11177 if ((ctxt == NULL) || (ctxt->input == NULL))
11178 return(-1);
11179
11180 xmlDetectSAX2(ctxt);
11181
11182 /*
11183 * SAX: beginning of the document processing.
11184 */
11185 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11186 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11187
11188 xmlDetectEncoding(ctxt);
11189
11190 if (CUR == 0) {
11191 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11192 }
11193
11194 /*
11195 * Check for the XMLDecl in the Prolog.
11196 */
11197 GROW;
11198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11199
11200 /*
11201 * Note that we will switch encoding on the fly.
11202 */
11203 xmlParseXMLDecl(ctxt);
11204 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11205 /*
11206 * The XML REC instructs us to stop parsing right here
11207 */
11208 return(-1);
11209 }
11210 SKIP_BLANKS;
11211 } else {
11212 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11213 }
11214 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11215 ctxt->sax->startDocument(ctxt->userData);
11216 if (ctxt->instate == XML_PARSER_EOF)
11217 return(-1);
11218
11219 /*
11220 * Doing validity checking on chunk doesn't make sense
11221 */
11222 ctxt->instate = XML_PARSER_CONTENT;
11223 ctxt->validate = 0;
11224 ctxt->loadsubset = 0;
11225 ctxt->depth = 0;
11226
11227 xmlParseContent(ctxt);
11228 if (ctxt->instate == XML_PARSER_EOF)
11229 return(-1);
11230
11231 if ((RAW == '<') && (NXT(1) == '/')) {
11232 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11233 } else if (RAW != 0) {
11234 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11235 }
11236
11237 /*
11238 * SAX: end of the document processing.
11239 */
11240 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11241 ctxt->sax->endDocument(ctxt->userData);
11242
11243 if (! ctxt->wellFormed) return(-1);
11244 return(0);
11245}
11246
11247#ifdef LIBXML_PUSH_ENABLED
11248/************************************************************************
11249 * *
11250 * Progressive parsing interfaces *
11251 * *
11252 ************************************************************************/
11253
11254/**
11255 * xmlParseLookupChar:
11256 * @ctxt: an XML parser context
11257 * @c: character
11258 *
11259 * Check whether the input buffer contains a character.
11260 */
11261static int
11262xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11263 const xmlChar *cur;
11264
11265 if (ctxt->checkIndex == 0) {
11266 cur = ctxt->input->cur + 1;
11267 } else {
11268 cur = ctxt->input->cur + ctxt->checkIndex;
11269 }
11270
11271 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11272 size_t index = ctxt->input->end - ctxt->input->cur;
11273
11274 if (index > LONG_MAX) {
11275 ctxt->checkIndex = 0;
11276 return(1);
11277 }
11278 ctxt->checkIndex = index;
11279 return(0);
11280 } else {
11281 ctxt->checkIndex = 0;
11282 return(1);
11283 }
11284}
11285
11286/**
11287 * xmlParseLookupString:
11288 * @ctxt: an XML parser context
11289 * @startDelta: delta to apply at the start
11290 * @str: string
11291 * @strLen: length of string
11292 *
11293 * Check whether the input buffer contains a string.
11294 */
11295static const xmlChar *
11296xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11297 const char *str, size_t strLen) {
11298 const xmlChar *cur, *term;
11299
11300 if (ctxt->checkIndex == 0) {
11301 cur = ctxt->input->cur + startDelta;
11302 } else {
11303 cur = ctxt->input->cur + ctxt->checkIndex;
11304 }
11305
11306 term = BAD_CAST strstr((const char *) cur, str);
11307 if (term == NULL) {
11308 const xmlChar *end = ctxt->input->end;
11309 size_t index;
11310
11311 /* Rescan (strLen - 1) characters. */
11312 if ((size_t) (end - cur) < strLen)
11313 end = cur;
11314 else
11315 end -= strLen - 1;
11316 index = end - ctxt->input->cur;
11317 if (index > LONG_MAX) {
11318 ctxt->checkIndex = 0;
11319 return(ctxt->input->end - strLen);
11320 }
11321 ctxt->checkIndex = index;
11322 } else {
11323 ctxt->checkIndex = 0;
11324 }
11325
11326 return(term);
11327}
11328
11329/**
11330 * xmlParseLookupCharData:
11331 * @ctxt: an XML parser context
11332 *
11333 * Check whether the input buffer contains terminated char data.
11334 */
11335static int
11336xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11337 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11338 const xmlChar *end = ctxt->input->end;
11339 size_t index;
11340
11341 while (cur < end) {
11342 if ((*cur == '<') || (*cur == '&')) {
11343 ctxt->checkIndex = 0;
11344 return(1);
11345 }
11346 cur++;
11347 }
11348
11349 index = cur - ctxt->input->cur;
11350 if (index > LONG_MAX) {
11351 ctxt->checkIndex = 0;
11352 return(1);
11353 }
11354 ctxt->checkIndex = index;
11355 return(0);
11356}
11357
11358/**
11359 * xmlParseLookupGt:
11360 * @ctxt: an XML parser context
11361 *
11362 * Check whether there's enough data in the input buffer to finish parsing
11363 * a start tag. This has to take quotes into account.
11364 */
11365static int
11366xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11367 const xmlChar *cur;
11368 const xmlChar *end = ctxt->input->end;
11369 int state = ctxt->endCheckState;
11370 size_t index;
11371
11372 if (ctxt->checkIndex == 0)
11373 cur = ctxt->input->cur + 1;
11374 else
11375 cur = ctxt->input->cur + ctxt->checkIndex;
11376
11377 while (cur < end) {
11378 if (state) {
11379 if (*cur == state)
11380 state = 0;
11381 } else if (*cur == '\'' || *cur == '"') {
11382 state = *cur;
11383 } else if (*cur == '>') {
11384 ctxt->checkIndex = 0;
11385 ctxt->endCheckState = 0;
11386 return(1);
11387 }
11388 cur++;
11389 }
11390
11391 index = cur - ctxt->input->cur;
11392 if (index > LONG_MAX) {
11393 ctxt->checkIndex = 0;
11394 ctxt->endCheckState = 0;
11395 return(1);
11396 }
11397 ctxt->checkIndex = index;
11398 ctxt->endCheckState = state;
11399 return(0);
11400}
11401
11402/**
11403 * xmlParseLookupInternalSubset:
11404 * @ctxt: an XML parser context
11405 *
11406 * Check whether there's enough data in the input buffer to finish parsing
11407 * the internal subset.
11408 */
11409static int
11410xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11411 /*
11412 * Sorry, but progressive parsing of the internal subset is not
11413 * supported. We first check that the full content of the internal
11414 * subset is available and parsing is launched only at that point.
11415 * Internal subset ends with "']' S? '>'" in an unescaped section and
11416 * not in a ']]>' sequence which are conditional sections.
11417 */
11418 const xmlChar *cur, *start;
11419 const xmlChar *end = ctxt->input->end;
11420 int state = ctxt->endCheckState;
11421 size_t index;
11422
11423 if (ctxt->checkIndex == 0) {
11424 cur = ctxt->input->cur + 1;
11425 } else {
11426 cur = ctxt->input->cur + ctxt->checkIndex;
11427 }
11428 start = cur;
11429
11430 while (cur < end) {
11431 if (state == '-') {
11432 if ((*cur == '-') &&
11433 (cur[1] == '-') &&
11434 (cur[2] == '>')) {
11435 state = 0;
11436 cur += 3;
11437 start = cur;
11438 continue;
11439 }
11440 }
11441 else if (state == ']') {
11442 if (*cur == '>') {
11443 ctxt->checkIndex = 0;
11444 ctxt->endCheckState = 0;
11445 return(1);
11446 }
11447 if (IS_BLANK_CH(*cur)) {
11448 state = ' ';
11449 } else if (*cur != ']') {
11450 state = 0;
11451 start = cur;
11452 continue;
11453 }
11454 }
11455 else if (state == ' ') {
11456 if (*cur == '>') {
11457 ctxt->checkIndex = 0;
11458 ctxt->endCheckState = 0;
11459 return(1);
11460 }
11461 if (!IS_BLANK_CH(*cur)) {
11462 state = 0;
11463 start = cur;
11464 continue;
11465 }
11466 }
11467 else if (state != 0) {
11468 if (*cur == state) {
11469 state = 0;
11470 start = cur + 1;
11471 }
11472 }
11473 else if (*cur == '<') {
11474 if ((cur[1] == '!') &&
11475 (cur[2] == '-') &&
11476 (cur[3] == '-')) {
11477 state = '-';
11478 cur += 4;
11479 /* Don't treat <!--> as comment */
11480 start = cur;
11481 continue;
11482 }
11483 }
11484 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11485 state = *cur;
11486 }
11487
11488 cur++;
11489 }
11490
11491 /*
11492 * Rescan the three last characters to detect "<!--" and "-->"
11493 * split across chunks.
11494 */
11495 if ((state == 0) || (state == '-')) {
11496 if (cur - start < 3)
11497 cur = start;
11498 else
11499 cur -= 3;
11500 }
11501 index = cur - ctxt->input->cur;
11502 if (index > LONG_MAX) {
11503 ctxt->checkIndex = 0;
11504 ctxt->endCheckState = 0;
11505 return(1);
11506 }
11507 ctxt->checkIndex = index;
11508 ctxt->endCheckState = state;
11509 return(0);
11510}
11511
11512/**
11513 * xmlCheckCdataPush:
11514 * @cur: pointer to the block of characters
11515 * @len: length of the block in bytes
11516 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11517 *
11518 * Check that the block of characters is okay as SCdata content [20]
11519 *
11520 * Returns the number of bytes to pass if okay, a negative index where an
11521 * UTF-8 error occurred otherwise
11522 */
11523static int
11524xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11525 int ix;
11526 unsigned char c;
11527 int codepoint;
11528
11529 if ((utf == NULL) || (len <= 0))
11530 return(0);
11531
11532 for (ix = 0; ix < len;) { /* string is 0-terminated */
11533 c = utf[ix];
11534 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11535 if (c >= 0x20)
11536 ix++;
11537 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11538 ix++;
11539 else
11540 return(-ix);
11541 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11542 if (ix + 2 > len) return(complete ? -ix : ix);
11543 if ((utf[ix+1] & 0xc0 ) != 0x80)
11544 return(-ix);
11545 codepoint = (utf[ix] & 0x1f) << 6;
11546 codepoint |= utf[ix+1] & 0x3f;
11547 if (!xmlIsCharQ(codepoint))
11548 return(-ix);
11549 ix += 2;
11550 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11551 if (ix + 3 > len) return(complete ? -ix : ix);
11552 if (((utf[ix+1] & 0xc0) != 0x80) ||
11553 ((utf[ix+2] & 0xc0) != 0x80))
11554 return(-ix);
11555 codepoint = (utf[ix] & 0xf) << 12;
11556 codepoint |= (utf[ix+1] & 0x3f) << 6;
11557 codepoint |= utf[ix+2] & 0x3f;
11558 if (!xmlIsCharQ(codepoint))
11559 return(-ix);
11560 ix += 3;
11561 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11562 if (ix + 4 > len) return(complete ? -ix : ix);
11563 if (((utf[ix+1] & 0xc0) != 0x80) ||
11564 ((utf[ix+2] & 0xc0) != 0x80) ||
11565 ((utf[ix+3] & 0xc0) != 0x80))
11566 return(-ix);
11567 codepoint = (utf[ix] & 0x7) << 18;
11568 codepoint |= (utf[ix+1] & 0x3f) << 12;
11569 codepoint |= (utf[ix+2] & 0x3f) << 6;
11570 codepoint |= utf[ix+3] & 0x3f;
11571 if (!xmlIsCharQ(codepoint))
11572 return(-ix);
11573 ix += 4;
11574 } else /* unknown encoding */
11575 return(-ix);
11576 }
11577 return(ix);
11578}
11579
11580/**
11581 * xmlParseTryOrFinish:
11582 * @ctxt: an XML parser context
11583 * @terminate: last chunk indicator
11584 *
11585 * Try to progress on parsing
11586 *
11587 * Returns zero if no parsing was possible
11588 */
11589static int
11590xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11591 int ret = 0;
11592 size_t avail;
11593 xmlChar cur, next;
11594
11595 if (ctxt->input == NULL)
11596 return(0);
11597
11598 if ((ctxt->input != NULL) &&
11599 (ctxt->input->cur - ctxt->input->base > 4096)) {
11600 xmlParserShrink(ctxt);
11601 }
11602
11603 while (ctxt->instate != XML_PARSER_EOF) {
11604 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11605 return(0);
11606
11607 avail = ctxt->input->end - ctxt->input->cur;
11608 if (avail < 1)
11609 goto done;
11610 switch (ctxt->instate) {
11611 case XML_PARSER_EOF:
11612 /*
11613 * Document parsing is done !
11614 */
11615 goto done;
11616 case XML_PARSER_START:
11617 /*
11618 * Very first chars read from the document flow.
11619 */
11620 if ((!terminate) && (avail < 4))
11621 goto done;
11622
11623 /*
11624 * We need more bytes to detect EBCDIC code pages.
11625 * See xmlDetectEBCDIC.
11626 */
11627 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11628 (!terminate) && (avail < 200))
11629 goto done;
11630
11631 xmlDetectEncoding(ctxt);
11632 if (ctxt->instate == XML_PARSER_EOF)
11633 goto done;
11634 ctxt->instate = XML_PARSER_XML_DECL;
11635 break;
11636
11637 case XML_PARSER_XML_DECL:
11638 if ((!terminate) && (avail < 2))
11639 goto done;
11640 cur = ctxt->input->cur[0];
11641 next = ctxt->input->cur[1];
11642 if ((cur == '<') && (next == '?')) {
11643 /* PI or XML decl */
11644 if ((!terminate) &&
11645 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11646 goto done;
11647 if ((ctxt->input->cur[2] == 'x') &&
11648 (ctxt->input->cur[3] == 'm') &&
11649 (ctxt->input->cur[4] == 'l') &&
11650 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11651 ret += 5;
11652 xmlParseXMLDecl(ctxt);
11653 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11654 /*
11655 * The XML REC instructs us to stop parsing right
11656 * here
11657 */
11658 xmlHaltParser(ctxt);
11659 return(0);
11660 }
11661 } else {
11662 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11663 }
11664 } else {
11665 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11666 if (ctxt->version == NULL) {
11667 xmlErrMemory(ctxt, NULL);
11668 break;
11669 }
11670 }
11671 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11672 ctxt->sax->setDocumentLocator(ctxt->userData,
11673 &xmlDefaultSAXLocator);
11674 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11675 (!ctxt->disableSAX))
11676 ctxt->sax->startDocument(ctxt->userData);
11677 if (ctxt->instate == XML_PARSER_EOF)
11678 goto done;
11679 ctxt->instate = XML_PARSER_MISC;
11680 break;
11681 case XML_PARSER_START_TAG: {
11682 const xmlChar *name;
11683 const xmlChar *prefix = NULL;
11684 const xmlChar *URI = NULL;
11685 int line = ctxt->input->line;
11686 int nbNs = 0;
11687
11688 if ((!terminate) && (avail < 2))
11689 goto done;
11690 cur = ctxt->input->cur[0];
11691 if (cur != '<') {
11692 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11693 "Start tag expected, '<' not found");
11694 xmlHaltParser(ctxt);
11695 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11696 ctxt->sax->endDocument(ctxt->userData);
11697 goto done;
11698 }
11699 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11700 goto done;
11701 if (ctxt->spaceNr == 0)
11702 spacePush(ctxt, -1);
11703 else if (*ctxt->space == -2)
11704 spacePush(ctxt, -1);
11705 else
11706 spacePush(ctxt, *ctxt->space);
11707#ifdef LIBXML_SAX1_ENABLED
11708 if (ctxt->sax2)
11709#endif /* LIBXML_SAX1_ENABLED */
11710 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11711#ifdef LIBXML_SAX1_ENABLED
11712 else
11713 name = xmlParseStartTag(ctxt);
11714#endif /* LIBXML_SAX1_ENABLED */
11715 if (ctxt->instate == XML_PARSER_EOF)
11716 goto done;
11717 if (name == NULL) {
11718 spacePop(ctxt);
11719 xmlHaltParser(ctxt);
11720 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11721 ctxt->sax->endDocument(ctxt->userData);
11722 goto done;
11723 }
11724#ifdef LIBXML_VALID_ENABLED
11725 /*
11726 * [ VC: Root Element Type ]
11727 * The Name in the document type declaration must match
11728 * the element type of the root element.
11729 */
11730 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11731 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11732 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11733#endif /* LIBXML_VALID_ENABLED */
11734
11735 /*
11736 * Check for an Empty Element.
11737 */
11738 if ((RAW == '/') && (NXT(1) == '>')) {
11739 SKIP(2);
11740
11741 if (ctxt->sax2) {
11742 if ((ctxt->sax != NULL) &&
11743 (ctxt->sax->endElementNs != NULL) &&
11744 (!ctxt->disableSAX))
11745 ctxt->sax->endElementNs(ctxt->userData, name,
11746 prefix, URI);
11747 if (nbNs > 0)
11748 xmlParserNsPop(ctxt, nbNs);
11749#ifdef LIBXML_SAX1_ENABLED
11750 } else {
11751 if ((ctxt->sax != NULL) &&
11752 (ctxt->sax->endElement != NULL) &&
11753 (!ctxt->disableSAX))
11754 ctxt->sax->endElement(ctxt->userData, name);
11755#endif /* LIBXML_SAX1_ENABLED */
11756 }
11757 spacePop(ctxt);
11758 } else if (RAW == '>') {
11759 NEXT;
11760 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11761 } else {
11762 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11763 "Couldn't find end of Start Tag %s\n",
11764 name);
11765 nodePop(ctxt);
11766 spacePop(ctxt);
11767 if (nbNs > 0)
11768 xmlParserNsPop(ctxt, nbNs);
11769 }
11770
11771 if (ctxt->instate == XML_PARSER_EOF)
11772 goto done;
11773 if (ctxt->nameNr == 0)
11774 ctxt->instate = XML_PARSER_EPILOG;
11775 else
11776 ctxt->instate = XML_PARSER_CONTENT;
11777 break;
11778 }
11779 case XML_PARSER_CONTENT: {
11780 cur = ctxt->input->cur[0];
11781
11782 if (cur == '<') {
11783 if ((!terminate) && (avail < 2))
11784 goto done;
11785 next = ctxt->input->cur[1];
11786
11787 if (next == '/') {
11788 ctxt->instate = XML_PARSER_END_TAG;
11789 break;
11790 } else if (next == '?') {
11791 if ((!terminate) &&
11792 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11793 goto done;
11794 xmlParsePI(ctxt);
11795 if (ctxt->instate == XML_PARSER_EOF)
11796 goto done;
11797 ctxt->instate = XML_PARSER_CONTENT;
11798 break;
11799 } else if (next == '!') {
11800 if ((!terminate) && (avail < 3))
11801 goto done;
11802 next = ctxt->input->cur[2];
11803
11804 if (next == '-') {
11805 if ((!terminate) && (avail < 4))
11806 goto done;
11807 if (ctxt->input->cur[3] == '-') {
11808 if ((!terminate) &&
11809 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11810 goto done;
11811 xmlParseComment(ctxt);
11812 if (ctxt->instate == XML_PARSER_EOF)
11813 goto done;
11814 ctxt->instate = XML_PARSER_CONTENT;
11815 break;
11816 }
11817 } else if (next == '[') {
11818 if ((!terminate) && (avail < 9))
11819 goto done;
11820 if ((ctxt->input->cur[2] == '[') &&
11821 (ctxt->input->cur[3] == 'C') &&
11822 (ctxt->input->cur[4] == 'D') &&
11823 (ctxt->input->cur[5] == 'A') &&
11824 (ctxt->input->cur[6] == 'T') &&
11825 (ctxt->input->cur[7] == 'A') &&
11826 (ctxt->input->cur[8] == '[')) {
11827 SKIP(9);
11828 ctxt->instate = XML_PARSER_CDATA_SECTION;
11829 break;
11830 }
11831 }
11832 }
11833 } else if (cur == '&') {
11834 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11835 goto done;
11836 xmlParseReference(ctxt);
11837 break;
11838 } else {
11839 /* TODO Avoid the extra copy, handle directly !!! */
11840 /*
11841 * Goal of the following test is:
11842 * - minimize calls to the SAX 'character' callback
11843 * when they are mergeable
11844 * - handle an problem for isBlank when we only parse
11845 * a sequence of blank chars and the next one is
11846 * not available to check against '<' presence.
11847 * - tries to homogenize the differences in SAX
11848 * callbacks between the push and pull versions
11849 * of the parser.
11850 */
11851 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11852 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11853 goto done;
11854 }
11855 ctxt->checkIndex = 0;
11856 xmlParseCharDataInternal(ctxt, !terminate);
11857 break;
11858 }
11859
11860 ctxt->instate = XML_PARSER_START_TAG;
11861 break;
11862 }
11863 case XML_PARSER_END_TAG:
11864 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11865 goto done;
11866 if (ctxt->sax2) {
11867 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11868 nameNsPop(ctxt);
11869 }
11870#ifdef LIBXML_SAX1_ENABLED
11871 else
11872 xmlParseEndTag1(ctxt, 0);
11873#endif /* LIBXML_SAX1_ENABLED */
11874 if (ctxt->instate == XML_PARSER_EOF)
11875 goto done;
11876 if (ctxt->nameNr == 0) {
11877 ctxt->instate = XML_PARSER_EPILOG;
11878 } else {
11879 ctxt->instate = XML_PARSER_CONTENT;
11880 }
11881 break;
11882 case XML_PARSER_CDATA_SECTION: {
11883 /*
11884 * The Push mode need to have the SAX callback for
11885 * cdataBlock merge back contiguous callbacks.
11886 */
11887 const xmlChar *term;
11888
11889 if (terminate) {
11890 /*
11891 * Don't call xmlParseLookupString. If 'terminate'
11892 * is set, checkIndex is invalid.
11893 */
11894 term = BAD_CAST strstr((const char *) ctxt->input->cur,
11895 "]]>");
11896 } else {
11897 term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11898 }
11899
11900 if (term == NULL) {
11901 int tmp, size;
11902
11903 if (terminate) {
11904 /* Unfinished CDATA section */
11905 size = ctxt->input->end - ctxt->input->cur;
11906 } else {
11907 if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11908 goto done;
11909 ctxt->checkIndex = 0;
11910 /* XXX: Why don't we pass the full buffer? */
11911 size = XML_PARSER_BIG_BUFFER_SIZE;
11912 }
11913 tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11914 if (tmp <= 0) {
11915 tmp = -tmp;
11916 ctxt->input->cur += tmp;
11917 goto encoding_error;
11918 }
11919 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11920 if (ctxt->sax->cdataBlock != NULL)
11921 ctxt->sax->cdataBlock(ctxt->userData,
11922 ctxt->input->cur, tmp);
11923 else if (ctxt->sax->characters != NULL)
11924 ctxt->sax->characters(ctxt->userData,
11925 ctxt->input->cur, tmp);
11926 }
11927 if (ctxt->instate == XML_PARSER_EOF)
11928 goto done;
11929 SKIPL(tmp);
11930 } else {
11931 int base = term - CUR_PTR;
11932 int tmp;
11933
11934 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11935 if ((tmp < 0) || (tmp != base)) {
11936 tmp = -tmp;
11937 ctxt->input->cur += tmp;
11938 goto encoding_error;
11939 }
11940 if ((ctxt->sax != NULL) && (base == 0) &&
11941 (ctxt->sax->cdataBlock != NULL) &&
11942 (!ctxt->disableSAX)) {
11943 /*
11944 * Special case to provide identical behaviour
11945 * between pull and push parsers on enpty CDATA
11946 * sections
11947 */
11948 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11949 (!strncmp((const char *)&ctxt->input->cur[-9],
11950 "<![CDATA[", 9)))
11951 ctxt->sax->cdataBlock(ctxt->userData,
11952 BAD_CAST "", 0);
11953 } else if ((ctxt->sax != NULL) && (base > 0) &&
11954 (!ctxt->disableSAX)) {
11955 if (ctxt->sax->cdataBlock != NULL)
11956 ctxt->sax->cdataBlock(ctxt->userData,
11957 ctxt->input->cur, base);
11958 else if (ctxt->sax->characters != NULL)
11959 ctxt->sax->characters(ctxt->userData,
11960 ctxt->input->cur, base);
11961 }
11962 if (ctxt->instate == XML_PARSER_EOF)
11963 goto done;
11964 SKIPL(base + 3);
11965 ctxt->instate = XML_PARSER_CONTENT;
11966 }
11967 break;
11968 }
11969 case XML_PARSER_MISC:
11970 case XML_PARSER_PROLOG:
11971 case XML_PARSER_EPILOG:
11972 SKIP_BLANKS;
11973 avail = ctxt->input->end - ctxt->input->cur;
11974 if (avail < 1)
11975 goto done;
11976 if (ctxt->input->cur[0] == '<') {
11977 if ((!terminate) && (avail < 2))
11978 goto done;
11979 next = ctxt->input->cur[1];
11980 if (next == '?') {
11981 if ((!terminate) &&
11982 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11983 goto done;
11984 xmlParsePI(ctxt);
11985 if (ctxt->instate == XML_PARSER_EOF)
11986 goto done;
11987 break;
11988 } else if (next == '!') {
11989 if ((!terminate) && (avail < 3))
11990 goto done;
11991
11992 if (ctxt->input->cur[2] == '-') {
11993 if ((!terminate) && (avail < 4))
11994 goto done;
11995 if (ctxt->input->cur[3] == '-') {
11996 if ((!terminate) &&
11997 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11998 goto done;
11999 xmlParseComment(ctxt);
12000 if (ctxt->instate == XML_PARSER_EOF)
12001 goto done;
12002 break;
12003 }
12004 } else if (ctxt->instate == XML_PARSER_MISC) {
12005 if ((!terminate) && (avail < 9))
12006 goto done;
12007 if ((ctxt->input->cur[2] == 'D') &&
12008 (ctxt->input->cur[3] == 'O') &&
12009 (ctxt->input->cur[4] == 'C') &&
12010 (ctxt->input->cur[5] == 'T') &&
12011 (ctxt->input->cur[6] == 'Y') &&
12012 (ctxt->input->cur[7] == 'P') &&
12013 (ctxt->input->cur[8] == 'E')) {
12014 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
12015 goto done;
12016 ctxt->inSubset = 1;
12017 xmlParseDocTypeDecl(ctxt);
12018 if (ctxt->instate == XML_PARSER_EOF)
12019 goto done;
12020 if (RAW == '[') {
12021 ctxt->instate = XML_PARSER_DTD;
12022 } else {
12023 /*
12024 * Create and update the external subset.
12025 */
12026 ctxt->inSubset = 2;
12027 if ((ctxt->sax != NULL) &&
12028 (!ctxt->disableSAX) &&
12029 (ctxt->sax->externalSubset != NULL))
12030 ctxt->sax->externalSubset(
12031 ctxt->userData,
12032 ctxt->intSubName,
12033 ctxt->extSubSystem,
12034 ctxt->extSubURI);
12035 ctxt->inSubset = 0;
12036 xmlCleanSpecialAttr(ctxt);
12037 if (ctxt->instate == XML_PARSER_EOF)
12038 goto done;
12039 ctxt->instate = XML_PARSER_PROLOG;
12040 }
12041 break;
12042 }
12043 }
12044 }
12045 }
12046
12047 if (ctxt->instate == XML_PARSER_EPILOG) {
12048 if (ctxt->errNo == XML_ERR_OK)
12049 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12050 ctxt->instate = XML_PARSER_EOF;
12051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12052 ctxt->sax->endDocument(ctxt->userData);
12053 } else {
12054 ctxt->instate = XML_PARSER_START_TAG;
12055 }
12056 break;
12057 case XML_PARSER_DTD: {
12058 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12059 goto done;
12060 xmlParseInternalSubset(ctxt);
12061 if (ctxt->instate == XML_PARSER_EOF)
12062 goto done;
12063 ctxt->inSubset = 2;
12064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12065 (ctxt->sax->externalSubset != NULL))
12066 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12067 ctxt->extSubSystem, ctxt->extSubURI);
12068 ctxt->inSubset = 0;
12069 xmlCleanSpecialAttr(ctxt);
12070 if (ctxt->instate == XML_PARSER_EOF)
12071 goto done;
12072 ctxt->instate = XML_PARSER_PROLOG;
12073 break;
12074 }
12075 default:
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: internal error\n");
12078 ctxt->instate = XML_PARSER_EOF;
12079 break;
12080 }
12081 }
12082done:
12083 return(ret);
12084encoding_error:
12085 if (ctxt->input->end - ctxt->input->cur < 4) {
12086 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12087 "Input is not proper UTF-8, indicate encoding !\n",
12088 NULL, NULL);
12089 } else {
12090 char buffer[150];
12091
12092 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12093 ctxt->input->cur[0], ctxt->input->cur[1],
12094 ctxt->input->cur[2], ctxt->input->cur[3]);
12095 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12096 "Input is not proper UTF-8, indicate encoding !\n%s",
12097 BAD_CAST buffer, NULL);
12098 }
12099 return(0);
12100}
12101
12102/**
12103 * xmlParseChunk:
12104 * @ctxt: an XML parser context
12105 * @chunk: an char array
12106 * @size: the size in byte of the chunk
12107 * @terminate: last chunk indicator
12108 *
12109 * Parse a Chunk of memory
12110 *
12111 * Returns zero if no error, the xmlParserErrors otherwise.
12112 */
12113int
12114xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12115 int terminate) {
12116 int end_in_lf = 0;
12117
12118 if (ctxt == NULL)
12119 return(XML_ERR_INTERNAL_ERROR);
12120 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12121 return(ctxt->errNo);
12122 if (ctxt->instate == XML_PARSER_EOF)
12123 return(-1);
12124 if (ctxt->input == NULL)
12125 return(-1);
12126
12127 ctxt->progressive = 1;
12128 if (ctxt->instate == XML_PARSER_START)
12129 xmlDetectSAX2(ctxt);
12130 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12131 (chunk[size - 1] == '\r')) {
12132 end_in_lf = 1;
12133 size--;
12134 }
12135
12136 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12137 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12138 size_t pos = ctxt->input->cur - ctxt->input->base;
12139 int res;
12140
12141 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12142 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12143 if (res < 0) {
12144 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12145 xmlHaltParser(ctxt);
12146 return(ctxt->errNo);
12147 }
12148 }
12149
12150 xmlParseTryOrFinish(ctxt, terminate);
12151 if (ctxt->instate == XML_PARSER_EOF)
12152 return(ctxt->errNo);
12153
12154 if ((ctxt->input != NULL) &&
12155 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12156 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12157 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12158 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12159 xmlHaltParser(ctxt);
12160 }
12161 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12162 return(ctxt->errNo);
12163
12164 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12165 (ctxt->input->buf != NULL)) {
12166 size_t pos = ctxt->input->cur - ctxt->input->base;
12167 int res;
12168
12169 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12170 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12171 if (res < 0) {
12172 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12173 xmlHaltParser(ctxt);
12174 return(ctxt->errNo);
12175 }
12176 }
12177 if (terminate) {
12178 /*
12179 * Check for termination
12180 */
12181 if ((ctxt->instate != XML_PARSER_EOF) &&
12182 (ctxt->instate != XML_PARSER_EPILOG)) {
12183 if (ctxt->nameNr > 0) {
12184 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
12185 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
12186 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
12187 "Premature end of data in tag %s line %d\n",
12188 name, line, NULL);
12189 } else if (ctxt->instate == XML_PARSER_START) {
12190 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
12191 } else {
12192 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
12193 "Start tag expected, '<' not found\n");
12194 }
12195 } else if ((ctxt->input->buf != NULL) &&
12196 (ctxt->input->buf->encoder != NULL) &&
12197 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
12198 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
12199 "Truncated multi-byte sequence at EOF\n");
12200 }
12201 if (ctxt->instate != XML_PARSER_EOF) {
12202 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12203 ctxt->sax->endDocument(ctxt->userData);
12204 }
12205 ctxt->instate = XML_PARSER_EOF;
12206 }
12207 if (ctxt->wellFormed == 0)
12208 return((xmlParserErrors) ctxt->errNo);
12209 else
12210 return(0);
12211}
12212
12213/************************************************************************
12214 * *
12215 * I/O front end functions to the parser *
12216 * *
12217 ************************************************************************/
12218
12219/**
12220 * xmlCreatePushParserCtxt:
12221 * @sax: a SAX handler
12222 * @user_data: The user data returned on SAX callbacks
12223 * @chunk: a pointer to an array of chars
12224 * @size: number of chars in the array
12225 * @filename: an optional file name or URI
12226 *
12227 * Create a parser context for using the XML parser in push mode.
12228 * If @buffer and @size are non-NULL, the data is used to detect
12229 * the encoding. The remaining characters will be parsed so they
12230 * don't need to be fed in again through xmlParseChunk.
12231 * To allow content encoding detection, @size should be >= 4
12232 * The value of @filename is used for fetching external entities
12233 * and error/warning reports.
12234 *
12235 * Returns the new parser context or NULL
12236 */
12237
12238xmlParserCtxtPtr
12239xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12240 const char *chunk, int size, const char *filename) {
12241 xmlParserCtxtPtr ctxt;
12242 xmlParserInputPtr inputStream;
12243 xmlParserInputBufferPtr buf;
12244
12245 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12246 if (buf == NULL) return(NULL);
12247
12248 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12249 if (ctxt == NULL) {
12250 xmlErrMemory(NULL, "creating parser: out of memory\n");
12251 xmlFreeParserInputBuffer(buf);
12252 return(NULL);
12253 }
12254 ctxt->dictNames = 1;
12255 if (filename == NULL) {
12256 ctxt->directory = NULL;
12257 } else {
12258 ctxt->directory = xmlParserGetDirectory(filename);
12259 }
12260
12261 inputStream = xmlNewInputStream(ctxt);
12262 if (inputStream == NULL) {
12263 xmlFreeParserCtxt(ctxt);
12264 xmlFreeParserInputBuffer(buf);
12265 return(NULL);
12266 }
12267
12268 if (filename == NULL)
12269 inputStream->filename = NULL;
12270 else {
12271 inputStream->filename = (char *)
12272 xmlCanonicPath((const xmlChar *) filename);
12273 if (inputStream->filename == NULL) {
12274 xmlFreeInputStream(inputStream);
12275 xmlFreeParserCtxt(ctxt);
12276 xmlFreeParserInputBuffer(buf);
12277 return(NULL);
12278 }
12279 }
12280 inputStream->buf = buf;
12281 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12282 inputPush(ctxt, inputStream);
12283
12284 if ((size != 0) && (chunk != NULL) &&
12285 (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12286 size_t pos = ctxt->input->cur - ctxt->input->base;
12287 int res;
12288
12289 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12290 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12291 if (res < 0) {
12292 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12293 xmlHaltParser(ctxt);
12294 }
12295 }
12296
12297 return(ctxt);
12298}
12299#endif /* LIBXML_PUSH_ENABLED */
12300
12301/**
12302 * xmlStopParser:
12303 * @ctxt: an XML parser context
12304 *
12305 * Blocks further parser processing
12306 */
12307void
12308xmlStopParser(xmlParserCtxtPtr ctxt) {
12309 if (ctxt == NULL)
12310 return;
12311 xmlHaltParser(ctxt);
12312 ctxt->errNo = XML_ERR_USER_STOP;
12313}
12314
12315/**
12316 * xmlCreateIOParserCtxt:
12317 * @sax: a SAX handler
12318 * @user_data: The user data returned on SAX callbacks
12319 * @ioread: an I/O read function
12320 * @ioclose: an I/O close function
12321 * @ioctx: an I/O handler
12322 * @enc: the charset encoding if known
12323 *
12324 * Create a parser context for using the XML parser with an existing
12325 * I/O stream
12326 *
12327 * Returns the new parser context or NULL
12328 */
12329xmlParserCtxtPtr
12330xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12331 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12332 void *ioctx, xmlCharEncoding enc) {
12333 xmlParserCtxtPtr ctxt;
12334 xmlParserInputPtr inputStream;
12335 xmlParserInputBufferPtr buf;
12336
12337 if (ioread == NULL) return(NULL);
12338
12339 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12340 if (buf == NULL) {
12341 if (ioclose != NULL)
12342 ioclose(ioctx);
12343 return (NULL);
12344 }
12345
12346 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12347 if (ctxt == NULL) {
12348 xmlFreeParserInputBuffer(buf);
12349 return(NULL);
12350 }
12351
12352 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12353 if (inputStream == NULL) {
12354 xmlFreeParserCtxt(ctxt);
12355 return(NULL);
12356 }
12357 inputPush(ctxt, inputStream);
12358
12359 return(ctxt);
12360}
12361
12362#ifdef LIBXML_VALID_ENABLED
12363/************************************************************************
12364 * *
12365 * Front ends when parsing a DTD *
12366 * *
12367 ************************************************************************/
12368
12369/**
12370 * xmlIOParseDTD:
12371 * @sax: the SAX handler block or NULL
12372 * @input: an Input Buffer
12373 * @enc: the charset encoding if known
12374 *
12375 * Load and parse a DTD
12376 *
12377 * Returns the resulting xmlDtdPtr or NULL in case of error.
12378 * @input will be freed by the function in any case.
12379 */
12380
12381xmlDtdPtr
12382xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12383 xmlCharEncoding enc) {
12384 xmlDtdPtr ret = NULL;
12385 xmlParserCtxtPtr ctxt;
12386 xmlParserInputPtr pinput = NULL;
12387
12388 if (input == NULL)
12389 return(NULL);
12390
12391 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12392 if (ctxt == NULL) {
12393 xmlFreeParserInputBuffer(input);
12394 return(NULL);
12395 }
12396
12397 /* We are loading a DTD */
12398 ctxt->options |= XML_PARSE_DTDLOAD;
12399
12400 xmlDetectSAX2(ctxt);
12401
12402 /*
12403 * generate a parser input from the I/O handler
12404 */
12405
12406 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12407 if (pinput == NULL) {
12408 xmlFreeParserInputBuffer(input);
12409 xmlFreeParserCtxt(ctxt);
12410 return(NULL);
12411 }
12412
12413 /*
12414 * plug some encoding conversion routines here.
12415 */
12416 if (xmlPushInput(ctxt, pinput) < 0) {
12417 xmlFreeParserCtxt(ctxt);
12418 return(NULL);
12419 }
12420 if (enc != XML_CHAR_ENCODING_NONE) {
12421 xmlSwitchEncoding(ctxt, enc);
12422 }
12423
12424 /*
12425 * let's parse that entity knowing it's an external subset.
12426 */
12427 ctxt->inSubset = 2;
12428 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12429 if (ctxt->myDoc == NULL) {
12430 xmlErrMemory(ctxt, "New Doc failed");
12431 return(NULL);
12432 }
12433 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12434 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12435 BAD_CAST "none", BAD_CAST "none");
12436
12437 xmlDetectEncoding(ctxt);
12438
12439 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12440
12441 if (ctxt->myDoc != NULL) {
12442 if (ctxt->wellFormed) {
12443 ret = ctxt->myDoc->extSubset;
12444 ctxt->myDoc->extSubset = NULL;
12445 if (ret != NULL) {
12446 xmlNodePtr tmp;
12447
12448 ret->doc = NULL;
12449 tmp = ret->children;
12450 while (tmp != NULL) {
12451 tmp->doc = NULL;
12452 tmp = tmp->next;
12453 }
12454 }
12455 } else {
12456 ret = NULL;
12457 }
12458 xmlFreeDoc(ctxt->myDoc);
12459 ctxt->myDoc = NULL;
12460 }
12461 xmlFreeParserCtxt(ctxt);
12462
12463 return(ret);
12464}
12465
12466/**
12467 * xmlSAXParseDTD:
12468 * @sax: the SAX handler block
12469 * @ExternalID: a NAME* containing the External ID of the DTD
12470 * @SystemID: a NAME* containing the URL to the DTD
12471 *
12472 * DEPRECATED: Don't use.
12473 *
12474 * Load and parse an external subset.
12475 *
12476 * Returns the resulting xmlDtdPtr or NULL in case of error.
12477 */
12478
12479xmlDtdPtr
12480xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12481 const xmlChar *SystemID) {
12482 xmlDtdPtr ret = NULL;
12483 xmlParserCtxtPtr ctxt;
12484 xmlParserInputPtr input = NULL;
12485 xmlChar* systemIdCanonic;
12486
12487 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12488
12489 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12490 if (ctxt == NULL) {
12491 return(NULL);
12492 }
12493
12494 /* We are loading a DTD */
12495 ctxt->options |= XML_PARSE_DTDLOAD;
12496
12497 /*
12498 * Canonicalise the system ID
12499 */
12500 systemIdCanonic = xmlCanonicPath(SystemID);
12501 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12502 xmlFreeParserCtxt(ctxt);
12503 return(NULL);
12504 }
12505
12506 /*
12507 * Ask the Entity resolver to load the damn thing
12508 */
12509
12510 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12511 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12512 systemIdCanonic);
12513 if (input == NULL) {
12514 xmlFreeParserCtxt(ctxt);
12515 if (systemIdCanonic != NULL)
12516 xmlFree(systemIdCanonic);
12517 return(NULL);
12518 }
12519
12520 /*
12521 * plug some encoding conversion routines here.
12522 */
12523 if (xmlPushInput(ctxt, input) < 0) {
12524 xmlFreeParserCtxt(ctxt);
12525 if (systemIdCanonic != NULL)
12526 xmlFree(systemIdCanonic);
12527 return(NULL);
12528 }
12529
12530 xmlDetectEncoding(ctxt);
12531
12532 if (input->filename == NULL)
12533 input->filename = (char *) systemIdCanonic;
12534 else
12535 xmlFree(systemIdCanonic);
12536
12537 /*
12538 * let's parse that entity knowing it's an external subset.
12539 */
12540 ctxt->inSubset = 2;
12541 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12542 if (ctxt->myDoc == NULL) {
12543 xmlErrMemory(ctxt, "New Doc failed");
12544 xmlFreeParserCtxt(ctxt);
12545 return(NULL);
12546 }
12547 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12548 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12549 ExternalID, SystemID);
12550 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12551
12552 if (ctxt->myDoc != NULL) {
12553 if (ctxt->wellFormed) {
12554 ret = ctxt->myDoc->extSubset;
12555 ctxt->myDoc->extSubset = NULL;
12556 if (ret != NULL) {
12557 xmlNodePtr tmp;
12558
12559 ret->doc = NULL;
12560 tmp = ret->children;
12561 while (tmp != NULL) {
12562 tmp->doc = NULL;
12563 tmp = tmp->next;
12564 }
12565 }
12566 } else {
12567 ret = NULL;
12568 }
12569 xmlFreeDoc(ctxt->myDoc);
12570 ctxt->myDoc = NULL;
12571 }
12572 xmlFreeParserCtxt(ctxt);
12573
12574 return(ret);
12575}
12576
12577
12578/**
12579 * xmlParseDTD:
12580 * @ExternalID: a NAME* containing the External ID of the DTD
12581 * @SystemID: a NAME* containing the URL to the DTD
12582 *
12583 * Load and parse an external subset.
12584 *
12585 * Returns the resulting xmlDtdPtr or NULL in case of error.
12586 */
12587
12588xmlDtdPtr
12589xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12590 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12591}
12592#endif /* LIBXML_VALID_ENABLED */
12593
12594/************************************************************************
12595 * *
12596 * Front ends when parsing an Entity *
12597 * *
12598 ************************************************************************/
12599
12600/**
12601 * xmlParseCtxtExternalEntity:
12602 * @ctx: the existing parsing context
12603 * @URL: the URL for the entity to load
12604 * @ID: the System ID for the entity to load
12605 * @lst: the return value for the set of parsed nodes
12606 *
12607 * Parse an external general entity within an existing parsing context
12608 * An external general parsed entity is well-formed if it matches the
12609 * production labeled extParsedEnt.
12610 *
12611 * [78] extParsedEnt ::= TextDecl? content
12612 *
12613 * Returns 0 if the entity is well formed, -1 in case of args problem and
12614 * the parser error code otherwise
12615 */
12616
12617int
12618xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12619 const xmlChar *ID, xmlNodePtr *lst) {
12620 void *userData;
12621
12622 if (ctx == NULL) return(-1);
12623 /*
12624 * If the user provided their own SAX callbacks, then reuse the
12625 * userData callback field, otherwise the expected setup in a
12626 * DOM builder is to have userData == ctxt
12627 */
12628 if (ctx->userData == ctx)
12629 userData = NULL;
12630 else
12631 userData = ctx->userData;
12632 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12633 userData, ctx->depth + 1,
12634 URL, ID, lst);
12635}
12636
12637/**
12638 * xmlParseExternalEntityPrivate:
12639 * @doc: the document the chunk pertains to
12640 * @oldctxt: the previous parser context if available
12641 * @sax: the SAX handler block (possibly NULL)
12642 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12643 * @depth: Used for loop detection, use 0
12644 * @URL: the URL for the entity to load
12645 * @ID: the System ID for the entity to load
12646 * @list: the return value for the set of parsed nodes
12647 *
12648 * Private version of xmlParseExternalEntity()
12649 *
12650 * Returns 0 if the entity is well formed, -1 in case of args problem and
12651 * the parser error code otherwise
12652 */
12653
12654static xmlParserErrors
12655xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12656 xmlSAXHandlerPtr sax,
12657 void *user_data, int depth, const xmlChar *URL,
12658 const xmlChar *ID, xmlNodePtr *list) {
12659 xmlParserCtxtPtr ctxt;
12660 xmlDocPtr newDoc;
12661 xmlNodePtr newRoot;
12662 xmlParserErrors ret = XML_ERR_OK;
12663
12664 if (((depth > 40) &&
12665 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12666 (depth > 100)) {
12667 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12668 "Maximum entity nesting depth exceeded");
12669 return(XML_ERR_ENTITY_LOOP);
12670 }
12671
12672 if (list != NULL)
12673 *list = NULL;
12674 if ((URL == NULL) && (ID == NULL))
12675 return(XML_ERR_INTERNAL_ERROR);
12676 if (doc == NULL)
12677 return(XML_ERR_INTERNAL_ERROR);
12678
12679 ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12680 oldctxt);
12681 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12682 if (oldctxt != NULL) {
12683 ctxt->nbErrors = oldctxt->nbErrors;
12684 ctxt->nbWarnings = oldctxt->nbWarnings;
12685 }
12686 xmlDetectSAX2(ctxt);
12687
12688 newDoc = xmlNewDoc(BAD_CAST "1.0");
12689 if (newDoc == NULL) {
12690 xmlFreeParserCtxt(ctxt);
12691 return(XML_ERR_INTERNAL_ERROR);
12692 }
12693 newDoc->properties = XML_DOC_INTERNAL;
12694 if (doc) {
12695 newDoc->intSubset = doc->intSubset;
12696 newDoc->extSubset = doc->extSubset;
12697 if (doc->dict) {
12698 newDoc->dict = doc->dict;
12699 xmlDictReference(newDoc->dict);
12700 }
12701 if (doc->URL != NULL) {
12702 newDoc->URL = xmlStrdup(doc->URL);
12703 }
12704 }
12705 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12706 if (newRoot == NULL) {
12707 if (sax != NULL)
12708 xmlFreeParserCtxt(ctxt);
12709 newDoc->intSubset = NULL;
12710 newDoc->extSubset = NULL;
12711 xmlFreeDoc(newDoc);
12712 return(XML_ERR_INTERNAL_ERROR);
12713 }
12714 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12715 nodePush(ctxt, newDoc->children);
12716 if (doc == NULL) {
12717 ctxt->myDoc = newDoc;
12718 } else {
12719 ctxt->myDoc = doc;
12720 newRoot->doc = doc;
12721 }
12722
12723 xmlDetectEncoding(ctxt);
12724
12725 /*
12726 * Parse a possible text declaration first
12727 */
12728 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12729 xmlParseTextDecl(ctxt);
12730 /*
12731 * An XML-1.0 document can't reference an entity not XML-1.0
12732 */
12733 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12734 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12735 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12736 "Version mismatch between document and entity\n");
12737 }
12738 }
12739
12740 ctxt->instate = XML_PARSER_CONTENT;
12741 ctxt->depth = depth;
12742 if (oldctxt != NULL) {
12743 ctxt->_private = oldctxt->_private;
12744 ctxt->loadsubset = oldctxt->loadsubset;
12745 ctxt->validate = oldctxt->validate;
12746 ctxt->valid = oldctxt->valid;
12747 ctxt->replaceEntities = oldctxt->replaceEntities;
12748 if (oldctxt->validate) {
12749 ctxt->vctxt.error = oldctxt->vctxt.error;
12750 ctxt->vctxt.warning = oldctxt->vctxt.warning;
12751 ctxt->vctxt.userData = oldctxt->vctxt.userData;
12752 ctxt->vctxt.flags = oldctxt->vctxt.flags;
12753 }
12754 ctxt->external = oldctxt->external;
12755 if (ctxt->dict) xmlDictFree(ctxt->dict);
12756 ctxt->dict = oldctxt->dict;
12757 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12758 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12759 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12760 ctxt->dictNames = oldctxt->dictNames;
12761 ctxt->attsDefault = oldctxt->attsDefault;
12762 ctxt->attsSpecial = oldctxt->attsSpecial;
12763 ctxt->linenumbers = oldctxt->linenumbers;
12764 ctxt->record_info = oldctxt->record_info;
12765 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12766 ctxt->node_seq.length = oldctxt->node_seq.length;
12767 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12768 } else {
12769 /*
12770 * Doing validity checking on chunk without context
12771 * doesn't make sense
12772 */
12773 ctxt->_private = NULL;
12774 ctxt->validate = 0;
12775 ctxt->external = 2;
12776 ctxt->loadsubset = 0;
12777 }
12778
12779 xmlParseContent(ctxt);
12780
12781 if ((RAW == '<') && (NXT(1) == '/')) {
12782 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12783 } else if (RAW != 0) {
12784 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12785 }
12786 if (ctxt->node != newDoc->children) {
12787 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12788 }
12789
12790 if (!ctxt->wellFormed) {
12791 ret = (xmlParserErrors)ctxt->errNo;
12792 if (oldctxt != NULL) {
12793 oldctxt->errNo = ctxt->errNo;
12794 oldctxt->wellFormed = 0;
12795 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12796 }
12797 } else {
12798 if (list != NULL) {
12799 xmlNodePtr cur;
12800
12801 /*
12802 * Return the newly created nodeset after unlinking it from
12803 * they pseudo parent.
12804 */
12805 cur = newDoc->children->children;
12806 *list = cur;
12807 while (cur != NULL) {
12808 cur->parent = NULL;
12809 cur = cur->next;
12810 }
12811 newDoc->children->children = NULL;
12812 }
12813 ret = XML_ERR_OK;
12814 }
12815
12816 /*
12817 * Also record the size of the entity parsed
12818 */
12819 if (ctxt->input != NULL && oldctxt != NULL) {
12820 unsigned long consumed = ctxt->input->consumed;
12821
12822 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12823
12824 xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12825 xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12826
12827 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12828 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12829 }
12830
12831 if (oldctxt != NULL) {
12832 ctxt->dict = NULL;
12833 ctxt->attsDefault = NULL;
12834 ctxt->attsSpecial = NULL;
12835 oldctxt->nbErrors = ctxt->nbErrors;
12836 oldctxt->nbWarnings = ctxt->nbWarnings;
12837 oldctxt->validate = ctxt->validate;
12838 oldctxt->valid = ctxt->valid;
12839 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12840 oldctxt->node_seq.length = ctxt->node_seq.length;
12841 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12842 }
12843 ctxt->node_seq.maximum = 0;
12844 ctxt->node_seq.length = 0;
12845 ctxt->node_seq.buffer = NULL;
12846 xmlFreeParserCtxt(ctxt);
12847 newDoc->intSubset = NULL;
12848 newDoc->extSubset = NULL;
12849 xmlFreeDoc(newDoc);
12850
12851 return(ret);
12852}
12853
12854#ifdef LIBXML_SAX1_ENABLED
12855/**
12856 * xmlParseExternalEntity:
12857 * @doc: the document the chunk pertains to
12858 * @sax: the SAX handler block (possibly NULL)
12859 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12860 * @depth: Used for loop detection, use 0
12861 * @URL: the URL for the entity to load
12862 * @ID: the System ID for the entity to load
12863 * @lst: the return value for the set of parsed nodes
12864 *
12865 * Parse an external general entity
12866 * An external general parsed entity is well-formed if it matches the
12867 * production labeled extParsedEnt.
12868 *
12869 * [78] extParsedEnt ::= TextDecl? content
12870 *
12871 * Returns 0 if the entity is well formed, -1 in case of args problem and
12872 * the parser error code otherwise
12873 */
12874
12875int
12876xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12877 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12878 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12879 ID, lst));
12880}
12881
12882/**
12883 * xmlParseBalancedChunkMemory:
12884 * @doc: the document the chunk pertains to (must not be NULL)
12885 * @sax: the SAX handler block (possibly NULL)
12886 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12887 * @depth: Used for loop detection, use 0
12888 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12889 * @lst: the return value for the set of parsed nodes
12890 *
12891 * Parse a well-balanced chunk of an XML document
12892 * called by the parser
12893 * The allowed sequence for the Well Balanced Chunk is the one defined by
12894 * the content production in the XML grammar:
12895 *
12896 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12897 *
12898 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12899 * the parser error code otherwise
12900 */
12901
12902int
12903xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12904 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12905 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12906 depth, string, lst, 0 );
12907}
12908#endif /* LIBXML_SAX1_ENABLED */
12909
12910/**
12911 * xmlParseBalancedChunkMemoryInternal:
12912 * @oldctxt: the existing parsing context
12913 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12914 * @user_data: the user data field for the parser context
12915 * @lst: the return value for the set of parsed nodes
12916 *
12917 *
12918 * Parse a well-balanced chunk of an XML document
12919 * called by the parser
12920 * The allowed sequence for the Well Balanced Chunk is the one defined by
12921 * the content production in the XML grammar:
12922 *
12923 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12924 *
12925 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12926 * error code otherwise
12927 *
12928 * In case recover is set to 1, the nodelist will not be empty even if
12929 * the parsed chunk is not well balanced.
12930 */
12931static xmlParserErrors
12932xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12933 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12934 xmlParserCtxtPtr ctxt;
12935 xmlDocPtr newDoc = NULL;
12936 xmlNodePtr newRoot;
12937 xmlSAXHandlerPtr oldsax = NULL;
12938 xmlNodePtr content = NULL;
12939 xmlNodePtr last = NULL;
12940 xmlParserErrors ret = XML_ERR_OK;
12941 xmlHashedString hprefix, huri;
12942 unsigned i;
12943
12944 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12945 (oldctxt->depth > 100)) {
12946 xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12947 "Maximum entity nesting depth exceeded");
12948 return(XML_ERR_ENTITY_LOOP);
12949 }
12950
12951
12952 if (lst != NULL)
12953 *lst = NULL;
12954 if (string == NULL)
12955 return(XML_ERR_INTERNAL_ERROR);
12956
12957 ctxt = xmlCreateDocParserCtxt(string);
12958 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12959 ctxt->nbErrors = oldctxt->nbErrors;
12960 ctxt->nbWarnings = oldctxt->nbWarnings;
12961 if (user_data != NULL)
12962 ctxt->userData = user_data;
12963 else
12964 ctxt->userData = ctxt;
12965 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12966 ctxt->dict = oldctxt->dict;
12967 ctxt->input_id = oldctxt->input_id;
12968 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12969 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12970 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12971
12972 /*
12973 * Propagate namespaces down the entity
12974 *
12975 * Making entities and namespaces work correctly requires additional
12976 * changes, see xmlParseReference.
12977 */
12978
12979 /* Default namespace */
12980 hprefix.name = NULL;
12981 hprefix.hashValue = 0;
12982 huri.name = xmlParserNsLookupUri(oldctxt, &hprefix);
12983 huri.hashValue = 0;
12984 if (huri.name != NULL)
12985 xmlParserNsPush(ctxt, NULL, &huri, NULL, 0);
12986
12987 for (i = 0; i < oldctxt->nsdb->hashSize; i++) {
12988 xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i];
12989 const xmlChar **ns;
12990 xmlParserNsExtra *extra;
12991 unsigned nsIndex;
12992
12993 if ((bucket->hashValue != 0) &&
12994 (bucket->index != INT_MAX)) {
12995 nsIndex = bucket->index;
12996 ns = &oldctxt->nsTab[nsIndex * 2];
12997 extra = &oldctxt->nsdb->extra[nsIndex];
12998
12999 hprefix.name = ns[0];
13000 hprefix.hashValue = bucket->hashValue;
13001 huri.name = ns[1];
13002 huri.hashValue = extra->uriHashValue;
13003 /*
13004 * Don't copy SAX data to avoid a use-after-free with XML reader.
13005 * This matches the pre-2.12 behavior.
13006 */
13007 xmlParserNsPush(ctxt, &hprefix, &huri, NULL, 0);
13008 }
13009 }
13010
13011 oldsax = ctxt->sax;
13012 ctxt->sax = oldctxt->sax;
13013 xmlDetectSAX2(ctxt);
13014 ctxt->replaceEntities = oldctxt->replaceEntities;
13015 ctxt->options = oldctxt->options;
13016
13017 ctxt->_private = oldctxt->_private;
13018 if (oldctxt->myDoc == NULL) {
13019 newDoc = xmlNewDoc(BAD_CAST "1.0");
13020 if (newDoc == NULL) {
13021 ret = XML_ERR_INTERNAL_ERROR;
13022 goto error;
13023 }
13024 newDoc->properties = XML_DOC_INTERNAL;
13025 newDoc->dict = ctxt->dict;
13026 xmlDictReference(newDoc->dict);
13027 ctxt->myDoc = newDoc;
13028 } else {
13029 ctxt->myDoc = oldctxt->myDoc;
13030 content = ctxt->myDoc->children;
13031 last = ctxt->myDoc->last;
13032 }
13033 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13034 if (newRoot == NULL) {
13035 ret = XML_ERR_INTERNAL_ERROR;
13036 goto error;
13037 }
13038 ctxt->myDoc->children = NULL;
13039 ctxt->myDoc->last = NULL;
13040 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13041 nodePush(ctxt, ctxt->myDoc->children);
13042 ctxt->instate = XML_PARSER_CONTENT;
13043 ctxt->depth = oldctxt->depth;
13044
13045 ctxt->validate = 0;
13046 ctxt->loadsubset = oldctxt->loadsubset;
13047 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13048 /*
13049 * ID/IDREF registration will be done in xmlValidateElement below
13050 */
13051 ctxt->loadsubset |= XML_SKIP_IDS;
13052 }
13053 ctxt->dictNames = oldctxt->dictNames;
13054 ctxt->attsDefault = oldctxt->attsDefault;
13055 ctxt->attsSpecial = oldctxt->attsSpecial;
13056
13057 xmlParseContent(ctxt);
13058 if ((RAW == '<') && (NXT(1) == '/')) {
13059 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13060 } else if (RAW != 0) {
13061 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13062 }
13063 if (ctxt->node != ctxt->myDoc->children) {
13064 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13065 }
13066
13067 if (!ctxt->wellFormed) {
13068 ret = (xmlParserErrors)ctxt->errNo;
13069 oldctxt->errNo = ctxt->errNo;
13070 oldctxt->wellFormed = 0;
13071 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13072 } else {
13073 ret = XML_ERR_OK;
13074 }
13075
13076 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13077 xmlNodePtr cur;
13078
13079 /*
13080 * Return the newly created nodeset after unlinking it from
13081 * they pseudo parent.
13082 */
13083 cur = ctxt->myDoc->children->children;
13084 *lst = cur;
13085 while (cur != NULL) {
13086#ifdef LIBXML_VALID_ENABLED
13087 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13088 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13089 (cur->type == XML_ELEMENT_NODE)) {
13090 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13091 oldctxt->myDoc, cur);
13092 }
13093#endif /* LIBXML_VALID_ENABLED */
13094 cur->parent = NULL;
13095 cur = cur->next;
13096 }
13097 ctxt->myDoc->children->children = NULL;
13098 }
13099 if (ctxt->myDoc != NULL) {
13100 xmlFreeNode(ctxt->myDoc->children);
13101 ctxt->myDoc->children = content;
13102 ctxt->myDoc->last = last;
13103 }
13104
13105 /*
13106 * Also record the size of the entity parsed
13107 */
13108 if (ctxt->input != NULL && oldctxt != NULL) {
13109 unsigned long consumed = ctxt->input->consumed;
13110
13111 xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13112
13113 xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13114 xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13115 }
13116
13117 oldctxt->nbErrors = ctxt->nbErrors;
13118 oldctxt->nbWarnings = ctxt->nbWarnings;
13119
13120error:
13121 ctxt->sax = oldsax;
13122 ctxt->dict = NULL;
13123 ctxt->attsDefault = NULL;
13124 ctxt->attsSpecial = NULL;
13125 xmlFreeParserCtxt(ctxt);
13126 if (newDoc != NULL) {
13127 xmlFreeDoc(newDoc);
13128 }
13129
13130 return(ret);
13131}
13132
13133/**
13134 * xmlParseInNodeContext:
13135 * @node: the context node
13136 * @data: the input string
13137 * @datalen: the input string length in bytes
13138 * @options: a combination of xmlParserOption
13139 * @lst: the return value for the set of parsed nodes
13140 *
13141 * Parse a well-balanced chunk of an XML document
13142 * within the context (DTD, namespaces, etc ...) of the given node.
13143 *
13144 * The allowed sequence for the data is a Well Balanced Chunk defined by
13145 * the content production in the XML grammar:
13146 *
13147 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13148 *
13149 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13150 * error code otherwise
13151 */
13152xmlParserErrors
13153xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13154 int options, xmlNodePtr *lst) {
13155 xmlParserCtxtPtr ctxt;
13156 xmlDocPtr doc = NULL;
13157 xmlNodePtr fake, cur;
13158 int nsnr = 0;
13159
13160 xmlParserErrors ret = XML_ERR_OK;
13161
13162 /*
13163 * check all input parameters, grab the document
13164 */
13165 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13166 return(XML_ERR_INTERNAL_ERROR);
13167 switch (node->type) {
13168 case XML_ELEMENT_NODE:
13169 case XML_ATTRIBUTE_NODE:
13170 case XML_TEXT_NODE:
13171 case XML_CDATA_SECTION_NODE:
13172 case XML_ENTITY_REF_NODE:
13173 case XML_PI_NODE:
13174 case XML_COMMENT_NODE:
13175 case XML_DOCUMENT_NODE:
13176 case XML_HTML_DOCUMENT_NODE:
13177 break;
13178 default:
13179 return(XML_ERR_INTERNAL_ERROR);
13180
13181 }
13182 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13183 (node->type != XML_DOCUMENT_NODE) &&
13184 (node->type != XML_HTML_DOCUMENT_NODE))
13185 node = node->parent;
13186 if (node == NULL)
13187 return(XML_ERR_INTERNAL_ERROR);
13188 if (node->type == XML_ELEMENT_NODE)
13189 doc = node->doc;
13190 else
13191 doc = (xmlDocPtr) node;
13192 if (doc == NULL)
13193 return(XML_ERR_INTERNAL_ERROR);
13194
13195 /*
13196 * allocate a context and set-up everything not related to the
13197 * node position in the tree
13198 */
13199 if (doc->type == XML_DOCUMENT_NODE)
13200 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13201#ifdef LIBXML_HTML_ENABLED
13202 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13203 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13204 /*
13205 * When parsing in context, it makes no sense to add implied
13206 * elements like html/body/etc...
13207 */
13208 options |= HTML_PARSE_NOIMPLIED;
13209 }
13210#endif
13211 else
13212 return(XML_ERR_INTERNAL_ERROR);
13213
13214 if (ctxt == NULL)
13215 return(XML_ERR_NO_MEMORY);
13216
13217 /*
13218 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13219 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13220 * we must wait until the last moment to free the original one.
13221 */
13222 if (doc->dict != NULL) {
13223 if (ctxt->dict != NULL)
13224 xmlDictFree(ctxt->dict);
13225 ctxt->dict = doc->dict;
13226 } else
13227 options |= XML_PARSE_NODICT;
13228
13229 if (doc->encoding != NULL) {
13230 xmlCharEncodingHandlerPtr hdlr;
13231
13232 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13233 if (hdlr != NULL) {
13234 xmlSwitchToEncoding(ctxt, hdlr);
13235 } else {
13236 return(XML_ERR_UNSUPPORTED_ENCODING);
13237 }
13238 }
13239
13240 xmlCtxtUseOptionsInternal(ctxt, options);
13241 xmlDetectSAX2(ctxt);
13242 ctxt->myDoc = doc;
13243 /* parsing in context, i.e. as within existing content */
13244 ctxt->input_id = 2;
13245 ctxt->instate = XML_PARSER_CONTENT;
13246
13247 fake = xmlNewDocComment(node->doc, NULL);
13248 if (fake == NULL) {
13249 xmlFreeParserCtxt(ctxt);
13250 return(XML_ERR_NO_MEMORY);
13251 }
13252 xmlAddChild(node, fake);
13253
13254 if (node->type == XML_ELEMENT_NODE)
13255 nodePush(ctxt, node);
13256
13257 if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
13258 /*
13259 * initialize the SAX2 namespaces stack
13260 */
13261 cur = node;
13262 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13263 xmlNsPtr ns = cur->nsDef;
13264 xmlHashedString hprefix, huri;
13265
13266 while (ns != NULL) {
13267 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
13268 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
13269 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
13270 nsnr++;
13271 ns = ns->next;
13272 }
13273 cur = cur->parent;
13274 }
13275 }
13276
13277 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13278 /*
13279 * ID/IDREF registration will be done in xmlValidateElement below
13280 */
13281 ctxt->loadsubset |= XML_SKIP_IDS;
13282 }
13283
13284#ifdef LIBXML_HTML_ENABLED
13285 if (doc->type == XML_HTML_DOCUMENT_NODE)
13286 __htmlParseContent(ctxt);
13287 else
13288#endif
13289 xmlParseContent(ctxt);
13290
13291 xmlParserNsPop(ctxt, nsnr);
13292 if ((RAW == '<') && (NXT(1) == '/')) {
13293 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13294 } else if (RAW != 0) {
13295 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13296 }
13297 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13298 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13299 ctxt->wellFormed = 0;
13300 }
13301
13302 if (!ctxt->wellFormed) {
13303 if (ctxt->errNo == 0)
13304 ret = XML_ERR_INTERNAL_ERROR;
13305 else
13306 ret = (xmlParserErrors)ctxt->errNo;
13307 } else {
13308 ret = XML_ERR_OK;
13309 }
13310
13311 /*
13312 * Return the newly created nodeset after unlinking it from
13313 * the pseudo sibling.
13314 */
13315
13316 cur = fake->next;
13317 fake->next = NULL;
13318 node->last = fake;
13319
13320 if (cur != NULL) {
13321 cur->prev = NULL;
13322 }
13323
13324 *lst = cur;
13325
13326 while (cur != NULL) {
13327 cur->parent = NULL;
13328 cur = cur->next;
13329 }
13330
13331 xmlUnlinkNode(fake);
13332 xmlFreeNode(fake);
13333
13334
13335 if (ret != XML_ERR_OK) {
13336 xmlFreeNodeList(*lst);
13337 *lst = NULL;
13338 }
13339
13340 if (doc->dict != NULL)
13341 ctxt->dict = NULL;
13342 xmlFreeParserCtxt(ctxt);
13343
13344 return(ret);
13345}
13346
13347#ifdef LIBXML_SAX1_ENABLED
13348/**
13349 * xmlParseBalancedChunkMemoryRecover:
13350 * @doc: the document the chunk pertains to (must not be NULL)
13351 * @sax: the SAX handler block (possibly NULL)
13352 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13353 * @depth: Used for loop detection, use 0
13354 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13355 * @lst: the return value for the set of parsed nodes
13356 * @recover: return nodes even if the data is broken (use 0)
13357 *
13358 *
13359 * Parse a well-balanced chunk of an XML document
13360 * called by the parser
13361 * The allowed sequence for the Well Balanced Chunk is the one defined by
13362 * the content production in the XML grammar:
13363 *
13364 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13365 *
13366 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13367 * the parser error code otherwise
13368 *
13369 * In case recover is set to 1, the nodelist will not be empty even if
13370 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13371 * some extent.
13372 */
13373int
13374xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13375 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13376 int recover) {
13377 xmlParserCtxtPtr ctxt;
13378 xmlDocPtr newDoc;
13379 xmlSAXHandlerPtr oldsax = NULL;
13380 xmlNodePtr content, newRoot;
13381 int ret = 0;
13382
13383 if (depth > 40) {
13384 return(XML_ERR_ENTITY_LOOP);
13385 }
13386
13387
13388 if (lst != NULL)
13389 *lst = NULL;
13390 if (string == NULL)
13391 return(-1);
13392
13393 ctxt = xmlCreateDocParserCtxt(string);
13394 if (ctxt == NULL) return(-1);
13395 ctxt->userData = ctxt;
13396 if (sax != NULL) {
13397 oldsax = ctxt->sax;
13398 ctxt->sax = sax;
13399 if (user_data != NULL)
13400 ctxt->userData = user_data;
13401 }
13402 newDoc = xmlNewDoc(BAD_CAST "1.0");
13403 if (newDoc == NULL) {
13404 xmlFreeParserCtxt(ctxt);
13405 return(-1);
13406 }
13407 newDoc->properties = XML_DOC_INTERNAL;
13408 if ((doc != NULL) && (doc->dict != NULL)) {
13409 xmlDictFree(ctxt->dict);
13410 ctxt->dict = doc->dict;
13411 xmlDictReference(ctxt->dict);
13412 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13413 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13414 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13415 ctxt->dictNames = 1;
13416 newDoc->dict = ctxt->dict;
13417 xmlDictReference(newDoc->dict);
13418 } else {
13419 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT);
13420 }
13421 /* doc == NULL is only supported for historic reasons */
13422 if (doc != NULL) {
13423 newDoc->intSubset = doc->intSubset;
13424 newDoc->extSubset = doc->extSubset;
13425 }
13426 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13427 if (newRoot == NULL) {
13428 if (sax != NULL)
13429 ctxt->sax = oldsax;
13430 xmlFreeParserCtxt(ctxt);
13431 newDoc->intSubset = NULL;
13432 newDoc->extSubset = NULL;
13433 xmlFreeDoc(newDoc);
13434 return(-1);
13435 }
13436 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13437 nodePush(ctxt, newRoot);
13438 /* doc == NULL is only supported for historic reasons */
13439 if (doc == NULL) {
13440 ctxt->myDoc = newDoc;
13441 } else {
13442 ctxt->myDoc = newDoc;
13443 /* Ensure that doc has XML spec namespace */
13444 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13445 newDoc->oldNs = doc->oldNs;
13446 }
13447 ctxt->instate = XML_PARSER_CONTENT;
13448 ctxt->input_id = 2;
13449 ctxt->depth = depth;
13450
13451 /*
13452 * Doing validity checking on chunk doesn't make sense
13453 */
13454 ctxt->validate = 0;
13455 ctxt->loadsubset = 0;
13456 xmlDetectSAX2(ctxt);
13457
13458 if ( doc != NULL ){
13459 content = doc->children;
13460 doc->children = NULL;
13461 xmlParseContent(ctxt);
13462 doc->children = content;
13463 }
13464 else {
13465 xmlParseContent(ctxt);
13466 }
13467 if ((RAW == '<') && (NXT(1) == '/')) {
13468 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13469 } else if (RAW != 0) {
13470 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13471 }
13472 if (ctxt->node != newDoc->children) {
13473 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13474 }
13475
13476 if (!ctxt->wellFormed) {
13477 if (ctxt->errNo == 0)
13478 ret = 1;
13479 else
13480 ret = ctxt->errNo;
13481 } else {
13482 ret = 0;
13483 }
13484
13485 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13486 xmlNodePtr cur;
13487
13488 /*
13489 * Return the newly created nodeset after unlinking it from
13490 * they pseudo parent.
13491 */
13492 cur = newDoc->children->children;
13493 *lst = cur;
13494 while (cur != NULL) {
13495 xmlSetTreeDoc(cur, doc);
13496 cur->parent = NULL;
13497 cur = cur->next;
13498 }
13499 newDoc->children->children = NULL;
13500 }
13501
13502 if (sax != NULL)
13503 ctxt->sax = oldsax;
13504 xmlFreeParserCtxt(ctxt);
13505 newDoc->intSubset = NULL;
13506 newDoc->extSubset = NULL;
13507 /* This leaks the namespace list if doc == NULL */
13508 newDoc->oldNs = NULL;
13509 xmlFreeDoc(newDoc);
13510
13511 return(ret);
13512}
13513
13514/**
13515 * xmlSAXParseEntity:
13516 * @sax: the SAX handler block
13517 * @filename: the filename
13518 *
13519 * DEPRECATED: Don't use.
13520 *
13521 * parse an XML external entity out of context and build a tree.
13522 * It use the given SAX function block to handle the parsing callback.
13523 * If sax is NULL, fallback to the default DOM tree building routines.
13524 *
13525 * [78] extParsedEnt ::= TextDecl? content
13526 *
13527 * This correspond to a "Well Balanced" chunk
13528 *
13529 * Returns the resulting document tree
13530 */
13531
13532xmlDocPtr
13533xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13534 xmlDocPtr ret;
13535 xmlParserCtxtPtr ctxt;
13536
13537 ctxt = xmlCreateFileParserCtxt(filename);
13538 if (ctxt == NULL) {
13539 return(NULL);
13540 }
13541 if (sax != NULL) {
13542 if (ctxt->sax != NULL)
13543 xmlFree(ctxt->sax);
13544 ctxt->sax = sax;
13545 ctxt->userData = NULL;
13546 }
13547
13548 xmlParseExtParsedEnt(ctxt);
13549
13550 if (ctxt->wellFormed)
13551 ret = ctxt->myDoc;
13552 else {
13553 ret = NULL;
13554 xmlFreeDoc(ctxt->myDoc);
13555 ctxt->myDoc = NULL;
13556 }
13557 if (sax != NULL)
13558 ctxt->sax = NULL;
13559 xmlFreeParserCtxt(ctxt);
13560
13561 return(ret);
13562}
13563
13564/**
13565 * xmlParseEntity:
13566 * @filename: the filename
13567 *
13568 * parse an XML external entity out of context and build a tree.
13569 *
13570 * [78] extParsedEnt ::= TextDecl? content
13571 *
13572 * This correspond to a "Well Balanced" chunk
13573 *
13574 * Returns the resulting document tree
13575 */
13576
13577xmlDocPtr
13578xmlParseEntity(const char *filename) {
13579 return(xmlSAXParseEntity(NULL, filename));
13580}
13581#endif /* LIBXML_SAX1_ENABLED */
13582
13583/**
13584 * xmlCreateEntityParserCtxtInternal:
13585 * @URL: the entity URL
13586 * @ID: the entity PUBLIC ID
13587 * @base: a possible base for the target URI
13588 * @pctx: parser context used to set options on new context
13589 *
13590 * Create a parser context for an external entity
13591 * Automatic support for ZLIB/Compress compressed document is provided
13592 * by default if found at compile-time.
13593 *
13594 * Returns the new parser context or NULL
13595 */
13596static xmlParserCtxtPtr
13597xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13598 const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13599 xmlParserCtxtPtr pctx) {
13600 xmlParserCtxtPtr ctxt;
13601 xmlParserInputPtr inputStream;
13602 char *directory = NULL;
13603 xmlChar *uri;
13604
13605 ctxt = xmlNewSAXParserCtxt(sax, userData);
13606 if (ctxt == NULL) {
13607 return(NULL);
13608 }
13609
13610 if (pctx != NULL) {
13611 ctxt->options = pctx->options;
13612 ctxt->_private = pctx->_private;
13613 ctxt->input_id = pctx->input_id;
13614 }
13615
13616 /* Don't read from stdin. */
13617 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13618 URL = BAD_CAST "./-";
13619
13620 uri = xmlBuildURI(URL, base);
13621
13622 if (uri == NULL) {
13623 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13624 if (inputStream == NULL) {
13625 xmlFreeParserCtxt(ctxt);
13626 return(NULL);
13627 }
13628
13629 inputPush(ctxt, inputStream);
13630
13631 if ((ctxt->directory == NULL) && (directory == NULL))
13632 directory = xmlParserGetDirectory((char *)URL);
13633 if ((ctxt->directory == NULL) && (directory != NULL))
13634 ctxt->directory = directory;
13635 } else {
13636 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13637 if (inputStream == NULL) {
13638 xmlFree(uri);
13639 xmlFreeParserCtxt(ctxt);
13640 return(NULL);
13641 }
13642
13643 inputPush(ctxt, inputStream);
13644
13645 if ((ctxt->directory == NULL) && (directory == NULL))
13646 directory = xmlParserGetDirectory((char *)uri);
13647 if ((ctxt->directory == NULL) && (directory != NULL))
13648 ctxt->directory = directory;
13649 xmlFree(uri);
13650 }
13651 return(ctxt);
13652}
13653
13654/**
13655 * xmlCreateEntityParserCtxt:
13656 * @URL: the entity URL
13657 * @ID: the entity PUBLIC ID
13658 * @base: a possible base for the target URI
13659 *
13660 * Create a parser context for an external entity
13661 * Automatic support for ZLIB/Compress compressed document is provided
13662 * by default if found at compile-time.
13663 *
13664 * Returns the new parser context or NULL
13665 */
13666xmlParserCtxtPtr
13667xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13668 const xmlChar *base) {
13669 return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13670
13671}
13672
13673/************************************************************************
13674 * *
13675 * Front ends when parsing from a file *
13676 * *
13677 ************************************************************************/
13678
13679/**
13680 * xmlCreateURLParserCtxt:
13681 * @filename: the filename or URL
13682 * @options: a combination of xmlParserOption
13683 *
13684 * Create a parser context for a file or URL content.
13685 * Automatic support for ZLIB/Compress compressed document is provided
13686 * by default if found at compile-time and for file accesses
13687 *
13688 * Returns the new parser context or NULL
13689 */
13690xmlParserCtxtPtr
13691xmlCreateURLParserCtxt(const char *filename, int options)
13692{
13693 xmlParserCtxtPtr ctxt;
13694 xmlParserInputPtr inputStream;
13695 char *directory = NULL;
13696
13697 ctxt = xmlNewParserCtxt();
13698 if (ctxt == NULL) {
13699 xmlErrMemory(NULL, "cannot allocate parser context");
13700 return(NULL);
13701 }
13702
13703 if (options)
13704 xmlCtxtUseOptionsInternal(ctxt, options);
13705 ctxt->linenumbers = 1;
13706
13707 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13708 if (inputStream == NULL) {
13709 xmlFreeParserCtxt(ctxt);
13710 return(NULL);
13711 }
13712
13713 inputPush(ctxt, inputStream);
13714 if ((ctxt->directory == NULL) && (directory == NULL))
13715 directory = xmlParserGetDirectory(filename);
13716 if ((ctxt->directory == NULL) && (directory != NULL))
13717 ctxt->directory = directory;
13718
13719 return(ctxt);
13720}
13721
13722/**
13723 * xmlCreateFileParserCtxt:
13724 * @filename: the filename
13725 *
13726 * Create a parser context for a file content.
13727 * Automatic support for ZLIB/Compress compressed document is provided
13728 * by default if found at compile-time.
13729 *
13730 * Returns the new parser context or NULL
13731 */
13732xmlParserCtxtPtr
13733xmlCreateFileParserCtxt(const char *filename)
13734{
13735 return(xmlCreateURLParserCtxt(filename, 0));
13736}
13737
13738#ifdef LIBXML_SAX1_ENABLED
13739/**
13740 * xmlSAXParseFileWithData:
13741 * @sax: the SAX handler block
13742 * @filename: the filename
13743 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13744 * documents
13745 * @data: the userdata
13746 *
13747 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13748 *
13749 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13750 * compressed document is provided by default if found at compile-time.
13751 * It use the given SAX function block to handle the parsing callback.
13752 * If sax is NULL, fallback to the default DOM tree building routines.
13753 *
13754 * User data (void *) is stored within the parser context in the
13755 * context's _private member, so it is available nearly everywhere in libxml
13756 *
13757 * Returns the resulting document tree
13758 */
13759
13760xmlDocPtr
13761xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13762 int recovery, void *data) {
13763 xmlDocPtr ret;
13764 xmlParserCtxtPtr ctxt;
13765
13766 xmlInitParser();
13767
13768 ctxt = xmlCreateFileParserCtxt(filename);
13769 if (ctxt == NULL) {
13770 return(NULL);
13771 }
13772 if (sax != NULL) {
13773 if (ctxt->sax != NULL)
13774 xmlFree(ctxt->sax);
13775 ctxt->sax = sax;
13776 }
13777 xmlDetectSAX2(ctxt);
13778 if (data!=NULL) {
13779 ctxt->_private = data;
13780 }
13781
13782 if (ctxt->directory == NULL)
13783 ctxt->directory = xmlParserGetDirectory(filename);
13784
13785 ctxt->recovery = recovery;
13786
13787 xmlParseDocument(ctxt);
13788
13789 if ((ctxt->wellFormed) || recovery) {
13790 ret = ctxt->myDoc;
13791 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13792 if (ctxt->input->buf->compressed > 0)
13793 ret->compression = 9;
13794 else
13795 ret->compression = ctxt->input->buf->compressed;
13796 }
13797 }
13798 else {
13799 ret = NULL;
13800 xmlFreeDoc(ctxt->myDoc);
13801 ctxt->myDoc = NULL;
13802 }
13803 if (sax != NULL)
13804 ctxt->sax = NULL;
13805 xmlFreeParserCtxt(ctxt);
13806
13807 return(ret);
13808}
13809
13810/**
13811 * xmlSAXParseFile:
13812 * @sax: the SAX handler block
13813 * @filename: the filename
13814 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13815 * documents
13816 *
13817 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13818 *
13819 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13820 * compressed document is provided by default if found at compile-time.
13821 * It use the given SAX function block to handle the parsing callback.
13822 * If sax is NULL, fallback to the default DOM tree building routines.
13823 *
13824 * Returns the resulting document tree
13825 */
13826
13827xmlDocPtr
13828xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13829 int recovery) {
13830 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13831}
13832
13833/**
13834 * xmlRecoverDoc:
13835 * @cur: a pointer to an array of xmlChar
13836 *
13837 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13838 *
13839 * parse an XML in-memory document and build a tree.
13840 * In the case the document is not Well Formed, a attempt to build a
13841 * tree is tried anyway
13842 *
13843 * Returns the resulting document tree or NULL in case of failure
13844 */
13845
13846xmlDocPtr
13847xmlRecoverDoc(const xmlChar *cur) {
13848 return(xmlSAXParseDoc(NULL, cur, 1));
13849}
13850
13851/**
13852 * xmlParseFile:
13853 * @filename: the filename
13854 *
13855 * DEPRECATED: Use xmlReadFile.
13856 *
13857 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13858 * compressed document is provided by default if found at compile-time.
13859 *
13860 * Returns the resulting document tree if the file was wellformed,
13861 * NULL otherwise.
13862 */
13863
13864xmlDocPtr
13865xmlParseFile(const char *filename) {
13866 return(xmlSAXParseFile(NULL, filename, 0));
13867}
13868
13869/**
13870 * xmlRecoverFile:
13871 * @filename: the filename
13872 *
13873 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13874 *
13875 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13876 * compressed document is provided by default if found at compile-time.
13877 * In the case the document is not Well Formed, it attempts to build
13878 * a tree anyway
13879 *
13880 * Returns the resulting document tree or NULL in case of failure
13881 */
13882
13883xmlDocPtr
13884xmlRecoverFile(const char *filename) {
13885 return(xmlSAXParseFile(NULL, filename, 1));
13886}
13887
13888
13889/**
13890 * xmlSetupParserForBuffer:
13891 * @ctxt: an XML parser context
13892 * @buffer: a xmlChar * buffer
13893 * @filename: a file name
13894 *
13895 * DEPRECATED: Don't use.
13896 *
13897 * Setup the parser context to parse a new buffer; Clears any prior
13898 * contents from the parser context. The buffer parameter must not be
13899 * NULL, but the filename parameter can be
13900 */
13901void
13902xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13903 const char* filename)
13904{
13905 xmlParserInputPtr input;
13906
13907 if ((ctxt == NULL) || (buffer == NULL))
13908 return;
13909
13910 input = xmlNewInputStream(ctxt);
13911 if (input == NULL) {
13912 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13913 xmlClearParserCtxt(ctxt);
13914 return;
13915 }
13916
13917 xmlClearParserCtxt(ctxt);
13918 if (filename != NULL)
13919 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13920 input->base = buffer;
13921 input->cur = buffer;
13922 input->end = &buffer[xmlStrlen(buffer)];
13923 inputPush(ctxt, input);
13924}
13925
13926/**
13927 * xmlSAXUserParseFile:
13928 * @sax: a SAX handler
13929 * @user_data: The user data returned on SAX callbacks
13930 * @filename: a file name
13931 *
13932 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13933 *
13934 * parse an XML file and call the given SAX handler routines.
13935 * Automatic support for ZLIB/Compress compressed document is provided
13936 *
13937 * Returns 0 in case of success or a error number otherwise
13938 */
13939int
13940xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13941 const char *filename) {
13942 int ret = 0;
13943 xmlParserCtxtPtr ctxt;
13944
13945 ctxt = xmlCreateFileParserCtxt(filename);
13946 if (ctxt == NULL) return -1;
13947 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13948 xmlFree(ctxt->sax);
13949 ctxt->sax = sax;
13950 xmlDetectSAX2(ctxt);
13951
13952 if (user_data != NULL)
13953 ctxt->userData = user_data;
13954
13955 xmlParseDocument(ctxt);
13956
13957 if (ctxt->wellFormed)
13958 ret = 0;
13959 else {
13960 if (ctxt->errNo != 0)
13961 ret = ctxt->errNo;
13962 else
13963 ret = -1;
13964 }
13965 if (sax != NULL)
13966 ctxt->sax = NULL;
13967 if (ctxt->myDoc != NULL) {
13968 xmlFreeDoc(ctxt->myDoc);
13969 ctxt->myDoc = NULL;
13970 }
13971 xmlFreeParserCtxt(ctxt);
13972
13973 return ret;
13974}
13975#endif /* LIBXML_SAX1_ENABLED */
13976
13977/************************************************************************
13978 * *
13979 * Front ends when parsing from memory *
13980 * *
13981 ************************************************************************/
13982
13983/**
13984 * xmlCreateMemoryParserCtxt:
13985 * @buffer: a pointer to a char array
13986 * @size: the size of the array
13987 *
13988 * Create a parser context for an XML in-memory document.
13989 *
13990 * Returns the new parser context or NULL
13991 */
13992xmlParserCtxtPtr
13993xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13994 xmlParserCtxtPtr ctxt;
13995 xmlParserInputPtr input;
13996 xmlParserInputBufferPtr buf;
13997
13998 if (buffer == NULL)
13999 return(NULL);
14000 if (size <= 0)
14001 return(NULL);
14002
14003 ctxt = xmlNewParserCtxt();
14004 if (ctxt == NULL)
14005 return(NULL);
14006
14007 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14008 if (buf == NULL) {
14009 xmlFreeParserCtxt(ctxt);
14010 return(NULL);
14011 }
14012
14013 input = xmlNewInputStream(ctxt);
14014 if (input == NULL) {
14015 xmlFreeParserInputBuffer(buf);
14016 xmlFreeParserCtxt(ctxt);
14017 return(NULL);
14018 }
14019
14020 input->filename = NULL;
14021 input->buf = buf;
14022 xmlBufResetInput(input->buf->buffer, input);
14023
14024 inputPush(ctxt, input);
14025 return(ctxt);
14026}
14027
14028#ifdef LIBXML_SAX1_ENABLED
14029/**
14030 * xmlSAXParseMemoryWithData:
14031 * @sax: the SAX handler block
14032 * @buffer: an pointer to a char array
14033 * @size: the size of the array
14034 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14035 * documents
14036 * @data: the userdata
14037 *
14038 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14039 *
14040 * parse an XML in-memory block and use the given SAX function block
14041 * to handle the parsing callback. If sax is NULL, fallback to the default
14042 * DOM tree building routines.
14043 *
14044 * User data (void *) is stored within the parser context in the
14045 * context's _private member, so it is available nearly everywhere in libxml
14046 *
14047 * Returns the resulting document tree
14048 */
14049
14050xmlDocPtr
14051xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14052 int size, int recovery, void *data) {
14053 xmlDocPtr ret;
14054 xmlParserCtxtPtr ctxt;
14055
14056 xmlInitParser();
14057
14058 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14059 if (ctxt == NULL) return(NULL);
14060 if (sax != NULL) {
14061 if (ctxt->sax != NULL)
14062 xmlFree(ctxt->sax);
14063 ctxt->sax = sax;
14064 }
14065 xmlDetectSAX2(ctxt);
14066 if (data!=NULL) {
14067 ctxt->_private=data;
14068 }
14069
14070 ctxt->recovery = recovery;
14071
14072 xmlParseDocument(ctxt);
14073
14074 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14075 else {
14076 ret = NULL;
14077 xmlFreeDoc(ctxt->myDoc);
14078 ctxt->myDoc = NULL;
14079 }
14080 if (sax != NULL)
14081 ctxt->sax = NULL;
14082 xmlFreeParserCtxt(ctxt);
14083
14084 return(ret);
14085}
14086
14087/**
14088 * xmlSAXParseMemory:
14089 * @sax: the SAX handler block
14090 * @buffer: an pointer to a char array
14091 * @size: the size of the array
14092 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14093 * documents
14094 *
14095 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14096 *
14097 * parse an XML in-memory block and use the given SAX function block
14098 * to handle the parsing callback. If sax is NULL, fallback to the default
14099 * DOM tree building routines.
14100 *
14101 * Returns the resulting document tree
14102 */
14103xmlDocPtr
14104xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14105 int size, int recovery) {
14106 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14107}
14108
14109/**
14110 * xmlParseMemory:
14111 * @buffer: an pointer to a char array
14112 * @size: the size of the array
14113 *
14114 * DEPRECATED: Use xmlReadMemory.
14115 *
14116 * parse an XML in-memory block and build a tree.
14117 *
14118 * Returns the resulting document tree
14119 */
14120
14121xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14122 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14123}
14124
14125/**
14126 * xmlRecoverMemory:
14127 * @buffer: an pointer to a char array
14128 * @size: the size of the array
14129 *
14130 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14131 *
14132 * parse an XML in-memory block and build a tree.
14133 * In the case the document is not Well Formed, an attempt to
14134 * build a tree is tried anyway
14135 *
14136 * Returns the resulting document tree or NULL in case of error
14137 */
14138
14139xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14140 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14141}
14142
14143/**
14144 * xmlSAXUserParseMemory:
14145 * @sax: a SAX handler
14146 * @user_data: The user data returned on SAX callbacks
14147 * @buffer: an in-memory XML document input
14148 * @size: the length of the XML document in bytes
14149 *
14150 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14151 *
14152 * parse an XML in-memory buffer and call the given SAX handler routines.
14153 *
14154 * Returns 0 in case of success or a error number otherwise
14155 */
14156int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14157 const char *buffer, int size) {
14158 int ret = 0;
14159 xmlParserCtxtPtr ctxt;
14160
14161 xmlInitParser();
14162
14163 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14164 if (ctxt == NULL) return -1;
14165 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14166 xmlFree(ctxt->sax);
14167 ctxt->sax = sax;
14168 xmlDetectSAX2(ctxt);
14169
14170 if (user_data != NULL)
14171 ctxt->userData = user_data;
14172
14173 xmlParseDocument(ctxt);
14174
14175 if (ctxt->wellFormed)
14176 ret = 0;
14177 else {
14178 if (ctxt->errNo != 0)
14179 ret = ctxt->errNo;
14180 else
14181 ret = -1;
14182 }
14183 if (sax != NULL)
14184 ctxt->sax = NULL;
14185 if (ctxt->myDoc != NULL) {
14186 xmlFreeDoc(ctxt->myDoc);
14187 ctxt->myDoc = NULL;
14188 }
14189 xmlFreeParserCtxt(ctxt);
14190
14191 return ret;
14192}
14193#endif /* LIBXML_SAX1_ENABLED */
14194
14195/**
14196 * xmlCreateDocParserCtxt:
14197 * @str: a pointer to an array of xmlChar
14198 *
14199 * Creates a parser context for an XML in-memory document.
14200 *
14201 * Returns the new parser context or NULL
14202 */
14203xmlParserCtxtPtr
14204xmlCreateDocParserCtxt(const xmlChar *str) {
14205 xmlParserCtxtPtr ctxt;
14206 xmlParserInputPtr input;
14207 xmlParserInputBufferPtr buf;
14208
14209 if (str == NULL)
14210 return(NULL);
14211
14212 ctxt = xmlNewParserCtxt();
14213 if (ctxt == NULL)
14214 return(NULL);
14215
14216 buf = xmlParserInputBufferCreateString(str);
14217 if (buf == NULL) {
14218 xmlFreeParserCtxt(ctxt);
14219 return(NULL);
14220 }
14221
14222 input = xmlNewInputStream(ctxt);
14223 if (input == NULL) {
14224 xmlFreeParserInputBuffer(buf);
14225 xmlFreeParserCtxt(ctxt);
14226 return(NULL);
14227 }
14228
14229 input->filename = NULL;
14230 input->buf = buf;
14231 xmlBufResetInput(input->buf->buffer, input);
14232
14233 inputPush(ctxt, input);
14234 return(ctxt);
14235}
14236
14237#ifdef LIBXML_SAX1_ENABLED
14238/**
14239 * xmlSAXParseDoc:
14240 * @sax: the SAX handler block
14241 * @cur: a pointer to an array of xmlChar
14242 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14243 * documents
14244 *
14245 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14246 *
14247 * parse an XML in-memory document and build a tree.
14248 * It use the given SAX function block to handle the parsing callback.
14249 * If sax is NULL, fallback to the default DOM tree building routines.
14250 *
14251 * Returns the resulting document tree
14252 */
14253
14254xmlDocPtr
14255xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14256 xmlDocPtr ret;
14257 xmlParserCtxtPtr ctxt;
14258 xmlSAXHandlerPtr oldsax = NULL;
14259
14260 if (cur == NULL) return(NULL);
14261
14262
14263 ctxt = xmlCreateDocParserCtxt(cur);
14264 if (ctxt == NULL) return(NULL);
14265 if (sax != NULL) {
14266 oldsax = ctxt->sax;
14267 ctxt->sax = sax;
14268 ctxt->userData = NULL;
14269 }
14270 xmlDetectSAX2(ctxt);
14271
14272 xmlParseDocument(ctxt);
14273 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14274 else {
14275 ret = NULL;
14276 xmlFreeDoc(ctxt->myDoc);
14277 ctxt->myDoc = NULL;
14278 }
14279 if (sax != NULL)
14280 ctxt->sax = oldsax;
14281 xmlFreeParserCtxt(ctxt);
14282
14283 return(ret);
14284}
14285
14286/**
14287 * xmlParseDoc:
14288 * @cur: a pointer to an array of xmlChar
14289 *
14290 * DEPRECATED: Use xmlReadDoc.
14291 *
14292 * parse an XML in-memory document and build a tree.
14293 *
14294 * Returns the resulting document tree
14295 */
14296
14297xmlDocPtr
14298xmlParseDoc(const xmlChar *cur) {
14299 return(xmlSAXParseDoc(NULL, cur, 0));
14300}
14301#endif /* LIBXML_SAX1_ENABLED */
14302
14303#ifdef LIBXML_LEGACY_ENABLED
14304/************************************************************************
14305 * *
14306 * Specific function to keep track of entities references *
14307 * and used by the XSLT debugger *
14308 * *
14309 ************************************************************************/
14310
14311static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14312
14313/**
14314 * xmlAddEntityReference:
14315 * @ent : A valid entity
14316 * @firstNode : A valid first node for children of entity
14317 * @lastNode : A valid last node of children entity
14318 *
14319 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14320 */
14321static void
14322xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14323 xmlNodePtr lastNode)
14324{
14325 if (xmlEntityRefFunc != NULL) {
14326 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14327 }
14328}
14329
14330
14331/**
14332 * xmlSetEntityReferenceFunc:
14333 * @func: A valid function
14334 *
14335 * Set the function to call call back when a xml reference has been made
14336 */
14337void
14338xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14339{
14340 xmlEntityRefFunc = func;
14341}
14342#endif /* LIBXML_LEGACY_ENABLED */
14343
14344/************************************************************************
14345 * *
14346 * New set (2.6.0) of simpler and more flexible APIs *
14347 * *
14348 ************************************************************************/
14349
14350/**
14351 * DICT_FREE:
14352 * @str: a string
14353 *
14354 * Free a string if it is not owned by the "dict" dictionary in the
14355 * current scope
14356 */
14357#define DICT_FREE(str) \
14358 if ((str) && ((!dict) || \
14359 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14360 xmlFree((char *)(str));
14361
14362/**
14363 * xmlCtxtReset:
14364 * @ctxt: an XML parser context
14365 *
14366 * Reset a parser context
14367 */
14368void
14369xmlCtxtReset(xmlParserCtxtPtr ctxt)
14370{
14371 xmlParserInputPtr input;
14372 xmlDictPtr dict;
14373
14374 if (ctxt == NULL)
14375 return;
14376
14377 dict = ctxt->dict;
14378
14379 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14380 xmlFreeInputStream(input);
14381 }
14382 ctxt->inputNr = 0;
14383 ctxt->input = NULL;
14384
14385 ctxt->spaceNr = 0;
14386 if (ctxt->spaceTab != NULL) {
14387 ctxt->spaceTab[0] = -1;
14388 ctxt->space = &ctxt->spaceTab[0];
14389 } else {
14390 ctxt->space = NULL;
14391 }
14392
14393
14394 ctxt->nodeNr = 0;
14395 ctxt->node = NULL;
14396
14397 ctxt->nameNr = 0;
14398 ctxt->name = NULL;
14399
14400 ctxt->nsNr = 0;
14401 xmlParserNsReset(ctxt->nsdb);
14402
14403 DICT_FREE(ctxt->version);
14404 ctxt->version = NULL;
14405 DICT_FREE(ctxt->encoding);
14406 ctxt->encoding = NULL;
14407 DICT_FREE(ctxt->directory);
14408 ctxt->directory = NULL;
14409 DICT_FREE(ctxt->extSubURI);
14410 ctxt->extSubURI = NULL;
14411 DICT_FREE(ctxt->extSubSystem);
14412 ctxt->extSubSystem = NULL;
14413 if (ctxt->myDoc != NULL)
14414 xmlFreeDoc(ctxt->myDoc);
14415 ctxt->myDoc = NULL;
14416
14417 ctxt->standalone = -1;
14418 ctxt->hasExternalSubset = 0;
14419 ctxt->hasPErefs = 0;
14420 ctxt->html = 0;
14421 ctxt->external = 0;
14422 ctxt->instate = XML_PARSER_START;
14423 ctxt->token = 0;
14424
14425 ctxt->wellFormed = 1;
14426 ctxt->nsWellFormed = 1;
14427 ctxt->disableSAX = 0;
14428 ctxt->valid = 1;
14429#if 0
14430 ctxt->vctxt.userData = ctxt;
14431 ctxt->vctxt.error = xmlParserValidityError;
14432 ctxt->vctxt.warning = xmlParserValidityWarning;
14433#endif
14434 ctxt->record_info = 0;
14435 ctxt->checkIndex = 0;
14436 ctxt->endCheckState = 0;
14437 ctxt->inSubset = 0;
14438 ctxt->errNo = XML_ERR_OK;
14439 ctxt->depth = 0;
14440 ctxt->catalogs = NULL;
14441 ctxt->sizeentities = 0;
14442 ctxt->sizeentcopy = 0;
14443 xmlInitNodeInfoSeq(&ctxt->node_seq);
14444
14445 if (ctxt->attsDefault != NULL) {
14446 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14447 ctxt->attsDefault = NULL;
14448 }
14449 if (ctxt->attsSpecial != NULL) {
14450 xmlHashFree(ctxt->attsSpecial, NULL);
14451 ctxt->attsSpecial = NULL;
14452 }
14453
14454#ifdef LIBXML_CATALOG_ENABLED
14455 if (ctxt->catalogs != NULL)
14456 xmlCatalogFreeLocal(ctxt->catalogs);
14457#endif
14458 ctxt->nbErrors = 0;
14459 ctxt->nbWarnings = 0;
14460 if (ctxt->lastError.code != XML_ERR_OK)
14461 xmlResetError(&ctxt->lastError);
14462}
14463
14464/**
14465 * xmlCtxtResetPush:
14466 * @ctxt: an XML parser context
14467 * @chunk: a pointer to an array of chars
14468 * @size: number of chars in the array
14469 * @filename: an optional file name or URI
14470 * @encoding: the document encoding, or NULL
14471 *
14472 * Reset a push parser context
14473 *
14474 * Returns 0 in case of success and 1 in case of error
14475 */
14476int
14477xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14478 int size, const char *filename, const char *encoding)
14479{
14480 xmlParserInputPtr inputStream;
14481 xmlParserInputBufferPtr buf;
14482
14483 if (ctxt == NULL)
14484 return(1);
14485
14486 buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
14487 if (buf == NULL)
14488 return(1);
14489
14490 if (ctxt == NULL) {
14491 xmlFreeParserInputBuffer(buf);
14492 return(1);
14493 }
14494
14495 xmlCtxtReset(ctxt);
14496
14497 if (filename == NULL) {
14498 ctxt->directory = NULL;
14499 } else {
14500 ctxt->directory = xmlParserGetDirectory(filename);
14501 }
14502
14503 inputStream = xmlNewInputStream(ctxt);
14504 if (inputStream == NULL) {
14505 xmlFreeParserInputBuffer(buf);
14506 return(1);
14507 }
14508
14509 if (filename == NULL)
14510 inputStream->filename = NULL;
14511 else
14512 inputStream->filename = (char *)
14513 xmlCanonicPath((const xmlChar *) filename);
14514 inputStream->buf = buf;
14515 xmlBufResetInput(buf->buffer, inputStream);
14516
14517 inputPush(ctxt, inputStream);
14518
14519 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14520 (ctxt->input->buf != NULL)) {
14521 size_t pos = ctxt->input->cur - ctxt->input->base;
14522 int res;
14523
14524 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14525 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
14526 if (res < 0) {
14527 xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14528 xmlHaltParser(ctxt);
14529 return(1);
14530 }
14531 }
14532
14533 if (encoding != NULL) {
14534 xmlCharEncodingHandlerPtr hdlr;
14535
14536 hdlr = xmlFindCharEncodingHandler(encoding);
14537 if (hdlr != NULL) {
14538 xmlSwitchToEncoding(ctxt, hdlr);
14539 } else {
14540 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14541 "Unsupported encoding %s\n", BAD_CAST encoding);
14542 }
14543 }
14544
14545 return(0);
14546}
14547
14548
14549/**
14550 * xmlCtxtUseOptionsInternal:
14551 * @ctxt: an XML parser context
14552 * @options: a combination of xmlParserOption
14553 * @encoding: the user provided encoding to use
14554 *
14555 * Applies the options to the parser context
14556 *
14557 * Returns 0 in case of success, the set of unknown or unimplemented options
14558 * in case of error.
14559 */
14560static int
14561xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options)
14562{
14563 if (ctxt == NULL)
14564 return(-1);
14565 if (options & XML_PARSE_RECOVER) {
14566 ctxt->recovery = 1;
14567 options -= XML_PARSE_RECOVER;
14568 ctxt->options |= XML_PARSE_RECOVER;
14569 } else
14570 ctxt->recovery = 0;
14571 if (options & XML_PARSE_DTDLOAD) {
14572 ctxt->loadsubset = XML_DETECT_IDS;
14573 options -= XML_PARSE_DTDLOAD;
14574 ctxt->options |= XML_PARSE_DTDLOAD;
14575 } else
14576 ctxt->loadsubset = 0;
14577 if (options & XML_PARSE_DTDATTR) {
14578 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14579 options -= XML_PARSE_DTDATTR;
14580 ctxt->options |= XML_PARSE_DTDATTR;
14581 }
14582 if (options & XML_PARSE_NOENT) {
14583 ctxt->replaceEntities = 1;
14584 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14585 options -= XML_PARSE_NOENT;
14586 ctxt->options |= XML_PARSE_NOENT;
14587 } else
14588 ctxt->replaceEntities = 0;
14589 if (options & XML_PARSE_PEDANTIC) {
14590 ctxt->pedantic = 1;
14591 options -= XML_PARSE_PEDANTIC;
14592 ctxt->options |= XML_PARSE_PEDANTIC;
14593 } else
14594 ctxt->pedantic = 0;
14595 if (options & XML_PARSE_NOBLANKS) {
14596 ctxt->keepBlanks = 0;
14597 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14598 options -= XML_PARSE_NOBLANKS;
14599 ctxt->options |= XML_PARSE_NOBLANKS;
14600 } else
14601 ctxt->keepBlanks = 1;
14602 if (options & XML_PARSE_DTDVALID) {
14603 ctxt->validate = 1;
14604 if (options & XML_PARSE_NOWARNING)
14605 ctxt->vctxt.warning = NULL;
14606 if (options & XML_PARSE_NOERROR)
14607 ctxt->vctxt.error = NULL;
14608 options -= XML_PARSE_DTDVALID;
14609 ctxt->options |= XML_PARSE_DTDVALID;
14610 } else
14611 ctxt->validate = 0;
14612 if (options & XML_PARSE_NOWARNING) {
14613 ctxt->sax->warning = NULL;
14614 options -= XML_PARSE_NOWARNING;
14615 }
14616 if (options & XML_PARSE_NOERROR) {
14617 ctxt->sax->error = NULL;
14618 ctxt->sax->fatalError = NULL;
14619 options -= XML_PARSE_NOERROR;
14620 }
14621#ifdef LIBXML_SAX1_ENABLED
14622 if (options & XML_PARSE_SAX1) {
14623 ctxt->sax->startElementNs = NULL;
14624 ctxt->sax->endElementNs = NULL;
14625 ctxt->sax->initialized = 1;
14626 options -= XML_PARSE_SAX1;
14627 ctxt->options |= XML_PARSE_SAX1;
14628 }
14629#endif /* LIBXML_SAX1_ENABLED */
14630 if (options & XML_PARSE_NODICT) {
14631 ctxt->dictNames = 0;
14632 options -= XML_PARSE_NODICT;
14633 ctxt->options |= XML_PARSE_NODICT;
14634 } else {
14635 ctxt->dictNames = 1;
14636 }
14637 if (options & XML_PARSE_NOCDATA) {
14638 ctxt->sax->cdataBlock = NULL;
14639 options -= XML_PARSE_NOCDATA;
14640 ctxt->options |= XML_PARSE_NOCDATA;
14641 }
14642 if (options & XML_PARSE_NSCLEAN) {
14643 ctxt->options |= XML_PARSE_NSCLEAN;
14644 options -= XML_PARSE_NSCLEAN;
14645 }
14646 if (options & XML_PARSE_NONET) {
14647 ctxt->options |= XML_PARSE_NONET;
14648 options -= XML_PARSE_NONET;
14649 }
14650 if (options & XML_PARSE_COMPACT) {
14651 ctxt->options |= XML_PARSE_COMPACT;
14652 options -= XML_PARSE_COMPACT;
14653 }
14654 if (options & XML_PARSE_OLD10) {
14655 ctxt->options |= XML_PARSE_OLD10;
14656 options -= XML_PARSE_OLD10;
14657 }
14658 if (options & XML_PARSE_NOBASEFIX) {
14659 ctxt->options |= XML_PARSE_NOBASEFIX;
14660 options -= XML_PARSE_NOBASEFIX;
14661 }
14662 if (options & XML_PARSE_HUGE) {
14663 ctxt->options |= XML_PARSE_HUGE;
14664 options -= XML_PARSE_HUGE;
14665 if (ctxt->dict != NULL)
14666 xmlDictSetLimit(ctxt->dict, 0);
14667 }
14668 if (options & XML_PARSE_OLDSAX) {
14669 ctxt->options |= XML_PARSE_OLDSAX;
14670 options -= XML_PARSE_OLDSAX;
14671 }
14672 if (options & XML_PARSE_IGNORE_ENC) {
14673 ctxt->options |= XML_PARSE_IGNORE_ENC;
14674 options -= XML_PARSE_IGNORE_ENC;
14675 }
14676 if (options & XML_PARSE_BIG_LINES) {
14677 ctxt->options |= XML_PARSE_BIG_LINES;
14678 options -= XML_PARSE_BIG_LINES;
14679 }
14680 ctxt->linenumbers = 1;
14681 return (options);
14682}
14683
14684/**
14685 * xmlCtxtUseOptions:
14686 * @ctxt: an XML parser context
14687 * @options: a combination of xmlParserOption
14688 *
14689 * Applies the options to the parser context
14690 *
14691 * Returns 0 in case of success, the set of unknown or unimplemented options
14692 * in case of error.
14693 */
14694int
14695xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14696{
14697 return(xmlCtxtUseOptionsInternal(ctxt, options));
14698}
14699
14700/**
14701 * xmlCtxtSetMaxAmplification:
14702 * @ctxt: an XML parser context
14703 * @maxAmpl: maximum amplification factor
14704 *
14705 * To protect against exponential entity expansion ("billion laughs"), the
14706 * size of serialized output is (roughly) limited to the input size
14707 * multiplied by this factor. The default value is 5.
14708 *
14709 * When working with documents making heavy use of entity expansion, it can
14710 * be necessary to increase the value. For security reasons, this should only
14711 * be considered when processing trusted input.
14712 */
14713void
14714xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
14715{
14716 ctxt->maxAmpl = maxAmpl;
14717}
14718
14719/**
14720 * xmlDoRead:
14721 * @ctxt: an XML parser context
14722 * @URL: the base URL to use for the document
14723 * @encoding: the document encoding, or NULL
14724 * @options: a combination of xmlParserOption
14725 * @reuse: keep the context for reuse
14726 *
14727 * Common front-end for the xmlRead functions
14728 *
14729 * Returns the resulting document tree or NULL
14730 */
14731static xmlDocPtr
14732xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14733 int options, int reuse)
14734{
14735 xmlDocPtr ret;
14736
14737 xmlCtxtUseOptionsInternal(ctxt, options);
14738 if (encoding != NULL) {
14739 xmlCharEncodingHandlerPtr hdlr;
14740
14741 /*
14742 * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14743 * caller provided an encoding. Otherwise, we might switch to
14744 * the encoding from the XML declaration which is likely to
14745 * break things. Also see xmlSwitchInputEncoding.
14746 */
14747 hdlr = xmlFindCharEncodingHandler(encoding);
14748 if (hdlr != NULL)
14749 xmlSwitchToEncoding(ctxt, hdlr);
14750 }
14751 if ((URL != NULL) && (ctxt->input != NULL) &&
14752 (ctxt->input->filename == NULL))
14753 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14754 xmlParseDocument(ctxt);
14755 if ((ctxt->wellFormed) || ctxt->recovery)
14756 ret = ctxt->myDoc;
14757 else {
14758 ret = NULL;
14759 if (ctxt->myDoc != NULL) {
14760 xmlFreeDoc(ctxt->myDoc);
14761 }
14762 }
14763 ctxt->myDoc = NULL;
14764 if (!reuse) {
14765 xmlFreeParserCtxt(ctxt);
14766 }
14767
14768 return (ret);
14769}
14770
14771/**
14772 * xmlReadDoc:
14773 * @cur: a pointer to a zero terminated string
14774 * @URL: the base URL to use for the document
14775 * @encoding: the document encoding, or NULL
14776 * @options: a combination of xmlParserOption
14777 *
14778 * parse an XML in-memory document and build a tree.
14779 *
14780 * Returns the resulting document tree
14781 */
14782xmlDocPtr
14783xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14784{
14785 xmlParserCtxtPtr ctxt;
14786
14787 if (cur == NULL)
14788 return (NULL);
14789 xmlInitParser();
14790
14791 ctxt = xmlCreateDocParserCtxt(cur);
14792 if (ctxt == NULL)
14793 return (NULL);
14794 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14795}
14796
14797/**
14798 * xmlReadFile:
14799 * @filename: a file or URL
14800 * @encoding: the document encoding, or NULL
14801 * @options: a combination of xmlParserOption
14802 *
14803 * parse an XML file from the filesystem or the network.
14804 *
14805 * Returns the resulting document tree
14806 */
14807xmlDocPtr
14808xmlReadFile(const char *filename, const char *encoding, int options)
14809{
14810 xmlParserCtxtPtr ctxt;
14811
14812 xmlInitParser();
14813 ctxt = xmlCreateURLParserCtxt(filename, options);
14814 if (ctxt == NULL)
14815 return (NULL);
14816 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14817}
14818
14819/**
14820 * xmlReadMemory:
14821 * @buffer: a pointer to a char array
14822 * @size: the size of the array
14823 * @URL: the base URL to use for the document
14824 * @encoding: the document encoding, or NULL
14825 * @options: a combination of xmlParserOption
14826 *
14827 * parse an XML in-memory document and build a tree.
14828 *
14829 * Returns the resulting document tree
14830 */
14831xmlDocPtr
14832xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14833{
14834 xmlParserCtxtPtr ctxt;
14835
14836 xmlInitParser();
14837 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14838 if (ctxt == NULL)
14839 return (NULL);
14840 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14841}
14842
14843/**
14844 * xmlReadFd:
14845 * @fd: an open file descriptor
14846 * @URL: the base URL to use for the document
14847 * @encoding: the document encoding, or NULL
14848 * @options: a combination of xmlParserOption
14849 *
14850 * parse an XML from a file descriptor and build a tree.
14851 * NOTE that the file descriptor will not be closed when the
14852 * reader is closed or reset.
14853 *
14854 * Returns the resulting document tree
14855 */
14856xmlDocPtr
14857xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14858{
14859 xmlParserCtxtPtr ctxt;
14860 xmlParserInputBufferPtr input;
14861 xmlParserInputPtr stream;
14862
14863 if (fd < 0)
14864 return (NULL);
14865 xmlInitParser();
14866
14867 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14868 if (input == NULL)
14869 return (NULL);
14870 input->closecallback = NULL;
14871 ctxt = xmlNewParserCtxt();
14872 if (ctxt == NULL) {
14873 xmlFreeParserInputBuffer(input);
14874 return (NULL);
14875 }
14876 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14877 if (stream == NULL) {
14878 xmlFreeParserInputBuffer(input);
14879 xmlFreeParserCtxt(ctxt);
14880 return (NULL);
14881 }
14882 inputPush(ctxt, stream);
14883 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14884}
14885
14886/**
14887 * xmlReadIO:
14888 * @ioread: an I/O read function
14889 * @ioclose: an I/O close function
14890 * @ioctx: an I/O handler
14891 * @URL: the base URL to use for the document
14892 * @encoding: the document encoding, or NULL
14893 * @options: a combination of xmlParserOption
14894 *
14895 * parse an XML document from I/O functions and source and build a tree.
14896 *
14897 * Returns the resulting document tree
14898 */
14899xmlDocPtr
14900xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14901 void *ioctx, const char *URL, const char *encoding, int options)
14902{
14903 xmlParserCtxtPtr ctxt;
14904 xmlParserInputBufferPtr input;
14905 xmlParserInputPtr stream;
14906
14907 if (ioread == NULL)
14908 return (NULL);
14909 xmlInitParser();
14910
14911 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14912 XML_CHAR_ENCODING_NONE);
14913 if (input == NULL) {
14914 if (ioclose != NULL)
14915 ioclose(ioctx);
14916 return (NULL);
14917 }
14918 ctxt = xmlNewParserCtxt();
14919 if (ctxt == NULL) {
14920 xmlFreeParserInputBuffer(input);
14921 return (NULL);
14922 }
14923 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14924 if (stream == NULL) {
14925 xmlFreeParserInputBuffer(input);
14926 xmlFreeParserCtxt(ctxt);
14927 return (NULL);
14928 }
14929 inputPush(ctxt, stream);
14930 return (xmlDoRead(ctxt, URL, encoding, options, 0));
14931}
14932
14933/**
14934 * xmlCtxtReadDoc:
14935 * @ctxt: an XML parser context
14936 * @str: a pointer to a zero terminated string
14937 * @URL: the base URL to use for the document
14938 * @encoding: the document encoding, or NULL
14939 * @options: a combination of xmlParserOption
14940 *
14941 * parse an XML in-memory document and build a tree.
14942 * This reuses the existing @ctxt parser context
14943 *
14944 * Returns the resulting document tree
14945 */
14946xmlDocPtr
14947xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14948 const char *URL, const char *encoding, int options)
14949{
14950 xmlParserInputBufferPtr input;
14951 xmlParserInputPtr stream;
14952
14953 if (ctxt == NULL)
14954 return (NULL);
14955 if (str == NULL)
14956 return (NULL);
14957 xmlInitParser();
14958
14959 xmlCtxtReset(ctxt);
14960
14961 input = xmlParserInputBufferCreateString(str);
14962 if (input == NULL) {
14963 return(NULL);
14964 }
14965
14966 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14967 if (stream == NULL) {
14968 xmlFreeParserInputBuffer(input);
14969 return(NULL);
14970 }
14971
14972 inputPush(ctxt, stream);
14973 return (xmlDoRead(ctxt, URL, encoding, options, 1));
14974}
14975
14976/**
14977 * xmlCtxtReadFile:
14978 * @ctxt: an XML parser context
14979 * @filename: a file or URL
14980 * @encoding: the document encoding, or NULL
14981 * @options: a combination of xmlParserOption
14982 *
14983 * parse an XML file from the filesystem or the network.
14984 * This reuses the existing @ctxt parser context
14985 *
14986 * Returns the resulting document tree
14987 */
14988xmlDocPtr
14989xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14990 const char *encoding, int options)
14991{
14992 xmlParserInputPtr stream;
14993
14994 if (filename == NULL)
14995 return (NULL);
14996 if (ctxt == NULL)
14997 return (NULL);
14998 xmlInitParser();
14999
15000 xmlCtxtReset(ctxt);
15001
15002 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15003 if (stream == NULL) {
15004 return (NULL);
15005 }
15006 inputPush(ctxt, stream);
15007 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15008}
15009
15010/**
15011 * xmlCtxtReadMemory:
15012 * @ctxt: an XML parser context
15013 * @buffer: a pointer to a char array
15014 * @size: the size of the array
15015 * @URL: the base URL to use for the document
15016 * @encoding: the document encoding, or NULL
15017 * @options: a combination of xmlParserOption
15018 *
15019 * parse an XML in-memory document and build a tree.
15020 * This reuses the existing @ctxt parser context
15021 *
15022 * Returns the resulting document tree
15023 */
15024xmlDocPtr
15025xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15026 const char *URL, const char *encoding, int options)
15027{
15028 xmlParserInputBufferPtr input;
15029 xmlParserInputPtr stream;
15030
15031 if (ctxt == NULL)
15032 return (NULL);
15033 if (buffer == NULL)
15034 return (NULL);
15035 xmlInitParser();
15036
15037 xmlCtxtReset(ctxt);
15038
15039 input = xmlParserInputBufferCreateStatic(buffer, size,
15040 XML_CHAR_ENCODING_NONE);
15041 if (input == NULL) {
15042 return(NULL);
15043 }
15044
15045 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15046 if (stream == NULL) {
15047 xmlFreeParserInputBuffer(input);
15048 return(NULL);
15049 }
15050
15051 inputPush(ctxt, stream);
15052 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15053}
15054
15055/**
15056 * xmlCtxtReadFd:
15057 * @ctxt: an XML parser context
15058 * @fd: an open file descriptor
15059 * @URL: the base URL to use for the document
15060 * @encoding: the document encoding, or NULL
15061 * @options: a combination of xmlParserOption
15062 *
15063 * parse an XML from a file descriptor and build a tree.
15064 * This reuses the existing @ctxt parser context
15065 * NOTE that the file descriptor will not be closed when the
15066 * reader is closed or reset.
15067 *
15068 * Returns the resulting document tree
15069 */
15070xmlDocPtr
15071xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15072 const char *URL, const char *encoding, int options)
15073{
15074 xmlParserInputBufferPtr input;
15075 xmlParserInputPtr stream;
15076
15077 if (fd < 0)
15078 return (NULL);
15079 if (ctxt == NULL)
15080 return (NULL);
15081 xmlInitParser();
15082
15083 xmlCtxtReset(ctxt);
15084
15085
15086 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15087 if (input == NULL)
15088 return (NULL);
15089 input->closecallback = NULL;
15090 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15091 if (stream == NULL) {
15092 xmlFreeParserInputBuffer(input);
15093 return (NULL);
15094 }
15095 inputPush(ctxt, stream);
15096 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15097}
15098
15099/**
15100 * xmlCtxtReadIO:
15101 * @ctxt: an XML parser context
15102 * @ioread: an I/O read function
15103 * @ioclose: an I/O close function
15104 * @ioctx: an I/O handler
15105 * @URL: the base URL to use for the document
15106 * @encoding: the document encoding, or NULL
15107 * @options: a combination of xmlParserOption
15108 *
15109 * parse an XML document from I/O functions and source and build a tree.
15110 * This reuses the existing @ctxt parser context
15111 *
15112 * Returns the resulting document tree
15113 */
15114xmlDocPtr
15115xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15116 xmlInputCloseCallback ioclose, void *ioctx,
15117 const char *URL,
15118 const char *encoding, int options)
15119{
15120 xmlParserInputBufferPtr input;
15121 xmlParserInputPtr stream;
15122
15123 if (ioread == NULL)
15124 return (NULL);
15125 if (ctxt == NULL)
15126 return (NULL);
15127 xmlInitParser();
15128
15129 xmlCtxtReset(ctxt);
15130
15131 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15132 XML_CHAR_ENCODING_NONE);
15133 if (input == NULL) {
15134 if (ioclose != NULL)
15135 ioclose(ioctx);
15136 return (NULL);
15137 }
15138 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139 if (stream == NULL) {
15140 xmlFreeParserInputBuffer(input);
15141 return (NULL);
15142 }
15143 inputPush(ctxt, stream);
15144 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15145}
15146
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use