VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/testchar.c

Last change on this file was 104106, checked in by vboxsync, 8 weeks ago

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • Property svn:eol-style set to native
File size: 22.5 KB
Line 
1/**
2 * Test the UTF-8 decoding routines
3 *
4 * author: Daniel Veillard
5 * copy: see Copyright for the status of this software.
6 */
7
8#define XML_DEPRECATED
9
10#include <stdio.h>
11#include <string.h>
12#include <libxml/tree.h>
13#include <libxml/parser.h>
14#include <libxml/parserInternals.h>
15
16int lastError;
17
18static void errorHandler(void *unused, const xmlError *err) {
19 if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
20 lastError = err->code;
21 }
22}
23
24char document1[100] = "<doc>XXXX</doc>";
25char document2[100] = "<doc foo='XXXX'/>";
26
27static int testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
28 int len, char *data, int forbid1, int forbid2) {
29 int i;
30 xmlDocPtr res;
31
32 for (i = 0;i <= 0xFF;i++) {
33 lastError = 0;
34 xmlCtxtReset(ctxt);
35
36 data[0] = (char) i;
37
38 res = xmlReadMemory(document, len, "test", NULL, 0);
39
40 if ((i == forbid1) || (i == forbid2)) {
41 if ((lastError == 0) || (res != NULL)) {
42 fprintf(stderr,
43 "Failed to detect invalid char for Byte 0x%02X: %c\n",
44 i, i);
45 return(1);
46 }
47 }
48
49 else if ((i == '<') || (i == '&')) {
50 if ((lastError == 0) || (res != NULL)) {
51 fprintf(stderr,
52 "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
53 return(1);
54 }
55 }
56 else if (((i < 0x20) || (i >= 0x80)) &&
57 (i != 0x9) && (i != 0xA) && (i != 0xD)) {
58 if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL)) {
59 fprintf(stderr,
60 "Failed to detect invalid char for Byte 0x%02X\n", i);
61 return(1);
62 }
63 }
64 else if (res == NULL) {
65 fprintf(stderr,
66 "Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
67 return(1);
68 }
69 if (res != NULL)
70 xmlFreeDoc(res);
71 }
72 return(0);
73}
74
75static int testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
76 int len, char *data) {
77 int i, j;
78 xmlDocPtr res;
79
80 for (i = 0x80;i <= 0xFF;i++) {
81 for (j = 0;j <= 0xFF;j++) {
82 lastError = 0;
83 xmlCtxtReset(ctxt);
84
85 data[0] = (char) i;
86 data[1] = (char) j;
87
88 res = xmlReadMemory(document, len, "test", NULL, 0);
89
90 /* if first bit of first char is set, then second bit must too */
91 if ((i & 0x80) && ((i & 0x40) == 0)) {
92 if ((lastError == 0) || (res != NULL)) {
93 fprintf(stderr,
94 "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
95 i, j);
96 return(1);
97 }
98 }
99
100 /*
101 * if first bit of first char is set, then second char first
102 * bits must be 10
103 */
104 else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
105 if ((lastError == 0) || (res != NULL)) {
106 fprintf(stderr,
107 "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
108 i, j);
109 return(1);
110 }
111 }
112
113 /*
114 * if using a 2 byte encoding then the value must be greater
115 * than 0x80, i.e. one of bits 5 to 1 of i must be set
116 */
117 else if ((i & 0x80) && ((i & 0x1E) == 0)) {
118 if ((lastError == 0) || (res != NULL)) {
119 fprintf(stderr,
120 "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
121 i, j);
122 return(1);
123 }
124 }
125
126 /*
127 * if third bit of first char is set, then the sequence would need
128 * at least 3 bytes, but we give only 2 !
129 */
130 else if ((i & 0xE0) == 0xE0) {
131 if ((lastError == 0) || (res != NULL)) {
132 fprintf(stderr,
133 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
134 i, j);
135 return(1);
136 }
137 }
138
139 /*
140 * We should see no error in remaining cases
141 */
142 else if ((lastError != 0) || (res == NULL)) {
143 fprintf(stderr,
144 "Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
145 return(1);
146 }
147 if (res != NULL)
148 xmlFreeDoc(res);
149 }
150 }
151 return(0);
152}
153
154/**
155 * testDocumentRanges:
156 *
157 * Test the correct UTF8 character parsing in context of XML documents
158 * Those are in-context injection tests checking the parser behaviour on
159 * edge case values at different point in content, beginning and end of
160 * CDATA in text or in attribute values.
161 */
162
163static int testDocumentRanges(void) {
164 xmlParserCtxtPtr ctxt;
165 char *data;
166 int test_ret = 0;
167
168 /*
169 * Set up a parsing context using the first document as
170 * the current input source.
171 */
172 ctxt = xmlNewParserCtxt();
173 if (ctxt == NULL) {
174 fprintf(stderr, "Failed to allocate parser context\n");
175 return(1);
176 }
177
178 printf("testing 1 byte char in document: 1");
179 fflush(stdout);
180 data = &document1[5];
181 data[0] = ' ';
182 data[1] = ' ';
183 data[2] = ' ';
184 data[3] = ' ';
185 /* test 1 byte injection at beginning of area */
186 test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
187 data, -1, -1);
188 printf(" 2");
189 fflush(stdout);
190 data[0] = ' ';
191 data[1] = ' ';
192 data[2] = ' ';
193 data[3] = ' ';
194 /* test 1 byte injection at end of area */
195 test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
196 data + 3, -1, -1);
197
198 printf(" 3");
199 fflush(stdout);
200 data = &document2[10];
201 data[0] = ' ';
202 data[1] = ' ';
203 data[2] = ' ';
204 data[3] = ' ';
205 /* test 1 byte injection at beginning of area */
206 test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
207 data, '\'', -1);
208 printf(" 4");
209 fflush(stdout);
210 data[0] = ' ';
211 data[1] = ' ';
212 data[2] = ' ';
213 data[3] = ' ';
214 /* test 1 byte injection at end of area */
215 test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
216 data + 3, '\'', -1);
217 printf(" done\n");
218
219 printf("testing 2 byte char in document: 1");
220 fflush(stdout);
221 data = &document1[5];
222 data[0] = ' ';
223 data[1] = ' ';
224 data[2] = ' ';
225 data[3] = ' ';
226 /* test 2 byte injection at beginning of area */
227 test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
228 data);
229 printf(" 2");
230 fflush(stdout);
231 data[0] = ' ';
232 data[1] = ' ';
233 data[2] = ' ';
234 data[3] = ' ';
235 /* test 2 byte injection at end of area */
236 test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
237 data + 2);
238
239 printf(" 3");
240 fflush(stdout);
241 data = &document2[10];
242 data[0] = ' ';
243 data[1] = ' ';
244 data[2] = ' ';
245 data[3] = ' ';
246 /* test 2 byte injection at beginning of area */
247 test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
248 data);
249 printf(" 4");
250 fflush(stdout);
251 data[0] = ' ';
252 data[1] = ' ';
253 data[2] = ' ';
254 data[3] = ' ';
255 /* test 2 byte injection at end of area */
256 test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
257 data + 2);
258 printf(" done\n");
259
260 xmlFreeParserCtxt(ctxt);
261 return(test_ret);
262}
263
264static int
265testCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
266 const xmlChar *oldcur;
267 int c, err, len2;
268
269 lastError = 0;
270 c = xmlCurrentChar(ctxt, len);
271 ctxt->input->flags = 0;
272 err = lastError;
273
274 oldcur = ctxt->input->cur;
275 lastError = 0;
276 xmlNextChar(ctxt);
277 ctxt->input->flags = 0;
278 len2 = ctxt->input->cur - oldcur;
279 ctxt->input->cur = oldcur;
280
281 if ((*ctxt->input->cur != 0) && (err != lastError)) {
282 fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
283 "errors: %d %d\n", err, lastError);
284 return(-1);
285 }
286
287 if ((err == 0) && (*len != len2)) {
288 fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
289 "lengths: %d %d\n", *len, len2);
290 return(-1);
291 }
292
293 lastError = err;
294
295 return(c);
296}
297
298static int testCharRangeByte1(xmlParserCtxtPtr ctxt) {
299 int i = 0;
300 int len, c;
301 char *data = (char *) ctxt->input->cur;
302
303 data[1] = 0;
304 data[2] = 0;
305 data[3] = 0;
306 for (i = 0;i <= 0xFF;i++) {
307 data[0] = (char) i;
308 ctxt->nbErrors = 0;
309
310 c = testCurrentChar(ctxt, &len);
311 if (c < 0)
312 continue;
313 if ((i == 0) || (i >= 0x80)) {
314 /* we must see an error there */
315 if (lastError != XML_ERR_INVALID_CHAR) {
316 fprintf(stderr,
317 "Failed to detect invalid char for Byte 0x%02X\n", i);
318 return(1);
319 }
320 } else if (i == 0xD) {
321 if ((c != 0xA) || (len != 1)) {
322 fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
323 return(1);
324 }
325 } else if ((c != i) || (len != 1)) {
326 fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
327 return(1);
328 }
329 }
330 return(0);
331}
332
333static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
334 int i, j;
335 int len, c;
336 char *data = (char *) ctxt->input->cur;
337
338 data[2] = 0;
339 data[3] = 0;
340 for (i = 0x80;i <= 0xFF;i++) {
341 for (j = 0;j <= 0xFF;j++) {
342 data[0] = (char) i;
343 data[1] = (char) j;
344 ctxt->nbErrors = 0;
345
346 c = testCurrentChar(ctxt, &len);
347 if (c < 0)
348 continue;
349
350 /* if first bit of first char is set, then second bit must too */
351 if ((i & 0x80) && ((i & 0x40) == 0)) {
352 if (lastError != XML_ERR_INVALID_CHAR) {
353 fprintf(stderr,
354 "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
355 i, j);
356 return(1);
357 }
358 }
359
360 /*
361 * if first bit of first char is set, then second char first
362 * bits must be 10
363 */
364 else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
365 if (lastError != XML_ERR_INVALID_CHAR) {
366 fprintf(stderr,
367 "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
368 i, j, c);
369 return(1);
370 }
371 }
372
373 /*
374 * if using a 2 byte encoding then the value must be greater
375 * than 0x80, i.e. one of bits 5 to 1 of i must be set
376 */
377 else if ((i & 0x80) && ((i & 0x1E) == 0)) {
378 if (lastError != XML_ERR_INVALID_CHAR) {
379 fprintf(stderr,
380 "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
381 i, j, c);
382 return(1);
383 }
384 }
385
386 /*
387 * if third bit of first char is set, then the sequence would need
388 * at least 3 bytes, but we give only 2 !
389 */
390 else if ((i & 0xE0) == 0xE0) {
391 if (lastError != XML_ERR_INVALID_CHAR) {
392 fprintf(stderr,
393 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
394 i, j);
395 return(1);
396 }
397 }
398
399 /*
400 * We should see no error in remaining cases
401 */
402 else if ((lastError != 0) || (len != 2)) {
403 fprintf(stderr,
404 "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
405 return(1);
406 }
407
408 /*
409 * Finally check the value is right
410 */
411 else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
412 fprintf(stderr,
413 "Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
414 i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
415 return(1);
416 }
417 }
418 }
419 return(0);
420}
421
422static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
423 int i, j, k, K;
424 int len, c;
425 unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
426 char *data = (char *) ctxt->input->cur;
427 int value;
428
429 data[3] = 0;
430 for (i = 0xE0;i <= 0xFF;i++) {
431 for (j = 0;j <= 0xFF;j++) {
432 for (k = 0;k < 6;k++) {
433 data[0] = (char) i;
434 data[1] = (char) j;
435 K = lows[k];
436 data[2] = (char) K;
437 value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
438 ctxt->nbErrors = 0;
439
440 c = testCurrentChar(ctxt, &len);
441 if (c < 0)
442 continue;
443
444 /*
445 * if fourth bit of first char is set, then the sequence would need
446 * at least 4 bytes, but we give only 3 !
447 */
448 if ((i & 0xF0) == 0xF0) {
449 if (lastError != XML_ERR_INVALID_CHAR) {
450 fprintf(stderr,
451 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
452 i, j, K, data[3]);
453 return(1);
454 }
455 }
456
457 /*
458 * The second and the third bytes must start with 10
459 */
460 else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
461 if (lastError != XML_ERR_INVALID_CHAR) {
462 fprintf(stderr,
463 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
464 i, j, K);
465 return(1);
466 }
467 }
468
469 /*
470 * if using a 3 byte encoding then the value must be greater
471 * than 0x800, i.e. one of bits 4 to 0 of i must be set or
472 * the 6th byte of data[1] must be set
473 */
474 else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
475 if (lastError != XML_ERR_INVALID_CHAR) {
476 fprintf(stderr,
477 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
478 i, j, K);
479 return(1);
480 }
481 }
482
483 /*
484 * There are values that are not allowed in UTF-8
485 */
486 else if ((value > 0xD7FF) && (value <0xE000)) {
487 if (lastError != XML_ERR_INVALID_CHAR) {
488 fprintf(stderr,
489 "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
490 value, i, j, K);
491 return(1);
492 }
493 }
494
495 /*
496 * We should see no error in remaining cases
497 */
498 else if ((lastError != 0) || (len != 3)) {
499 fprintf(stderr,
500 "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
501 i, j, K);
502 return(1);
503 }
504
505 /*
506 * Finally check the value is right
507 */
508 else if (c != value) {
509 fprintf(stderr,
510 "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
511 i, j, data[2], value, c);
512 return(1);
513 }
514 }
515 }
516 }
517 return(0);
518}
519
520static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
521 int i, j, k, K, l, L;
522 int len, c;
523 unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
524 char *data = (char *) ctxt->input->cur;
525 int value;
526
527 data[4] = 0;
528 for (i = 0xF0;i <= 0xFF;i++) {
529 for (j = 0;j <= 0xFF;j++) {
530 for (k = 0;k < 6;k++) {
531 for (l = 0;l < 6;l++) {
532 data[0] = (char) i;
533 data[1] = (char) j;
534 K = lows[k];
535 data[2] = (char) K;
536 L = lows[l];
537 data[3] = (char) L;
538 value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
539 ((i & 0x7) << 18);
540 ctxt->nbErrors = 0;
541
542 c = testCurrentChar(ctxt, &len);
543 if (c < 0)
544 continue;
545
546 /*
547 * if fifth bit of first char is set, then the sequence would need
548 * at least 5 bytes, but we give only 4 !
549 */
550 if ((i & 0xF8) == 0xF8) {
551 if (lastError != XML_ERR_INVALID_CHAR) {
552 fprintf(stderr,
553 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
554 i, j, K, data[3]);
555 return(1);
556 }
557 }
558
559 /*
560 * The second, third and fourth bytes must start with 10
561 */
562 else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
563 ((L & 0xC0) != 0x80)) {
564 if (lastError != XML_ERR_INVALID_CHAR) {
565 fprintf(stderr,
566 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
567 i, j, K, L);
568 return(1);
569 }
570 }
571
572 /*
573 * if using a 3 byte encoding then the value must be greater
574 * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
575 * the 6 or 5th byte of j must be set
576 */
577 else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
578 if (lastError != XML_ERR_INVALID_CHAR) {
579 fprintf(stderr,
580 "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
581 i, j, K, L);
582 return(1);
583 }
584 }
585
586 /*
587 * There are values in that are not allowed in UTF-8
588 */
589 else if (((value > 0xD7FF) && (value < 0xE000)) ||
590 (value > 0x10FFFF)) {
591 if (lastError != XML_ERR_INVALID_CHAR) {
592 fprintf(stderr,
593"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
594 value, i, j, K, L);
595 return(1);
596 }
597 }
598
599 /*
600 * We should see no error in remaining cases
601 */
602 else if ((lastError != 0) || (len != 4)) {
603 fprintf(stderr,
604 "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
605 i, j, K);
606 return(1);
607 }
608
609 /*
610 * Finally check the value is right
611 */
612 else if (c != value) {
613 fprintf(stderr,
614 "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
615 i, j, data[2], value, c);
616 return(1);
617 }
618 }
619 }
620 }
621 }
622 return(0);
623}
624
625/**
626 * testCharRanges:
627 *
628 * Test the correct UTF8 character parsing in isolation i.e.
629 * not when parsing a full document, this is less expensive and we can
630 * cover the full range of UTF-8 chars accepted by XML-1.0
631 */
632
633static int testCharRanges(void) {
634 char data[5];
635 xmlParserCtxtPtr ctxt;
636 xmlParserInputBufferPtr buf;
637 xmlParserInputPtr input;
638 int test_ret = 0;
639
640 memset(data, 0, 5);
641
642 /*
643 * Set up a parsing context using the above data buffer as
644 * the current input source.
645 */
646 ctxt = xmlNewParserCtxt();
647 if (ctxt == NULL) {
648 fprintf(stderr, "Failed to allocate parser context\n");
649 return(1);
650 }
651 buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
652 XML_CHAR_ENCODING_NONE);
653 if (buf == NULL) {
654 fprintf(stderr, "Failed to allocate input buffer\n");
655 test_ret = 1;
656 goto error;
657 }
658 input = xmlNewInputStream(ctxt);
659 if (input == NULL) {
660 xmlFreeParserInputBuffer(buf);
661 test_ret = 1;
662 goto error;
663 }
664 input->filename = NULL;
665 input->buf = buf;
666 input->cur =
667 input->base = xmlBufContent(input->buf->buffer);
668 input->end = input->base + 4;
669 inputPush(ctxt, input);
670
671 printf("testing char range: 1");
672 fflush(stdout);
673 test_ret += testCharRangeByte1(ctxt);
674 printf(" 2");
675 fflush(stdout);
676 test_ret += testCharRangeByte2(ctxt);
677 printf(" 3");
678 fflush(stdout);
679 test_ret += testCharRangeByte3(ctxt);
680 printf(" 4");
681 fflush(stdout);
682 test_ret += testCharRangeByte4(ctxt);
683 printf(" done\n");
684 fflush(stdout);
685
686error:
687 xmlFreeParserCtxt(ctxt);
688 return(test_ret);
689}
690
691static int
692testUserEncoding(void) {
693 /*
694 * Create a document encoded as UTF-16LE with an ISO-8859-1 encoding
695 * declaration, then parse it with xmlReadMemory and the encoding
696 * argument set to UTF-16LE.
697 */
698 xmlDocPtr doc = NULL;
699 const char *start = "<?xml version='1.0' encoding='ISO-8859-1'?><d>";
700 const char *end = "</d>";
701 char *buf = NULL;
702 xmlChar *text;
703 int startSize = strlen(start);
704 int textSize = 100000; /* Make sure to exceed internal buffer sizes. */
705 int endSize = strlen(end);
706 int totalSize = startSize + textSize + endSize;
707 int k = 0;
708 int i;
709 int ret = 1;
710
711 buf = xmlMalloc(2 * totalSize);
712 for (i = 0; start[i] != 0; i++) {
713 buf[k++] = start[i];
714 buf[k++] = 0;
715 }
716 for (i = 0; i < textSize; i++) {
717 buf[k++] = 'x';
718 buf[k++] = 0;
719 }
720 for (i = 0; end[i] != 0; i++) {
721 buf[k++] = end[i];
722 buf[k++] = 0;
723 }
724
725 doc = xmlReadMemory(buf, 2 * totalSize, NULL, "UTF-16LE", 0);
726 if (doc == NULL) {
727 fprintf(stderr, "failed to parse document\n");
728 goto error;
729 }
730
731 text = doc->children->children->content;
732 for (i = 0; i < textSize; i++) {
733 if (text[i] != 'x') {
734 fprintf(stderr, "text node has wrong content at offset %d\n", k);
735 goto error;
736 }
737 }
738
739 ret = 0;
740
741error:
742 xmlFreeDoc(doc);
743 xmlFree(buf);
744
745 return ret;
746}
747
748#if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
749
750static char *
751convert(xmlCharEncodingHandlerPtr handler, const char *utf8, int size,
752 int *outSize) {
753 char *ret;
754 int inlen;
755 int res;
756
757 inlen = size;
758 *outSize = size * 2;
759 ret = xmlMalloc(*outSize);
760 if (ret == NULL)
761 return(NULL);
762 res = handler->output(BAD_CAST ret, outSize, BAD_CAST utf8, &inlen);
763 if ((res < 0) || (inlen != size)) {
764 xmlFree(ret);
765 return(NULL);
766 }
767
768 return(ret);
769}
770
771static int
772testUserEncodingPush(void) {
773 xmlCharEncodingHandlerPtr handler;
774 xmlParserCtxtPtr ctxt;
775 xmlDocPtr doc;
776 char buf[] =
777 "\xEF\xBB\xBF"
778 "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
779 "<d>text</d>\n";
780 char *utf16;
781 int utf16Size;
782 int ret = 1;
783
784 handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
785 utf16 = convert(handler, buf, sizeof(buf) - 1, &utf16Size);
786 ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
787 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF16LE);
788 xmlParseChunk(ctxt, utf16, utf16Size, 0);
789 xmlParseChunk(ctxt, NULL, 0, 1);
790 doc = ctxt->myDoc;
791
792 if ((doc != NULL) &&
793 (doc->children != NULL) &&
794 (doc->children->children != NULL) &&
795 (xmlStrcmp(doc->children->children->content, BAD_CAST "text") == 0))
796 ret = 0;
797
798 xmlFreeDoc(doc);
799 xmlFreeParserCtxt(ctxt);
800 xmlFree(utf16);
801
802 return(ret);
803}
804
805static int
806testUTF8Chunks(void) {
807 xmlParserCtxtPtr ctxt;
808 xmlChar *out;
809 int outSize;
810 char *buf;
811 int i;
812 int ret = 0;
813
814 ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
815
816 xmlParseChunk(ctxt, "<d>", 3, 0);
817 xmlParseChunk(ctxt, "\xF0", 1, 0);
818 xmlParseChunk(ctxt, "\x9F", 1, 0);
819 xmlParseChunk(ctxt, "\x98", 1, 0);
820 xmlParseChunk(ctxt, "\x8A", 1, 0);
821 xmlParseChunk(ctxt, "</d>", 4, 1);
822
823 xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
824 if (strcmp((char *) out,
825 "<?xml version=\"1.0\"?>\n<d>&#x1F60A;</d>\n") != 0) {
826 fprintf(stderr, "failed UTF-8 chunk test 1\n");
827 ret += 1;
828 }
829
830 xmlFree(out);
831 xmlFreeDoc(ctxt->myDoc);
832 xmlFreeParserCtxt(ctxt);
833
834 ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
835
836 xmlParseChunk(ctxt, "<d>", 3, 0);
837
838 /*
839 * Create a chunk longer than XML_PARSER_BIG_BUFFER_SIZE (300) ending
840 * with an incomplete UTF-8 sequence.
841 */
842 buf = xmlMalloc(1000 * 2 + 1);
843 for (i = 0; i < 2000; i += 2)
844 memcpy(buf + i, "\xCE\xB1", 2);
845 buf[i] = '\xCE';
846 xmlParseChunk(ctxt, buf, 2001, 0);
847 xmlFree(buf);
848
849 xmlParseChunk(ctxt, "\xB1</d>", 4, 0);
850 xmlParseChunk(ctxt, NULL, 0, 0);
851
852 xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
853 if (strncmp((char *) out, "<?xml version=\"1.0\"?>\n<d>", 25) != 0) {
854 fprintf(stderr, "failed UTF-8 chunk test 2-1\n");
855 ret += 1;
856 goto error;
857 }
858 for (i = 25; i < 25 + 1001 * 7; i += 7) {
859 if (memcmp(out + i, "&#x3B1;", 7) != 0) {
860 fprintf(stderr, "failed UTF-8 chunk test 2-2 %d\n", i);
861 ret += 1;
862 goto error;
863 }
864 }
865 if (strcmp((char *) out + i, "</d>\n") != 0) {
866 fprintf(stderr, "failed UTF-8 chunk test 2-3\n");
867 ret += 1;
868 goto error;
869 }
870
871error:
872 xmlFree(out);
873 xmlFreeDoc(ctxt->myDoc);
874 xmlFreeParserCtxt(ctxt);
875
876 return(ret);
877 return(0);
878}
879
880#endif
881
882int main(void) {
883
884 int ret = 0;
885
886 /*
887 * this initialize the library and check potential ABI mismatches
888 * between the version it was compiled for and the actual shared
889 * library used.
890 */
891 LIBXML_TEST_VERSION
892
893 /*
894 * Catch errors separately
895 */
896
897 xmlSetStructuredErrorFunc(NULL, errorHandler);
898
899 /*
900 * Run the tests
901 */
902 ret += testCharRanges();
903 ret += testDocumentRanges();
904 ret += testUserEncoding();
905#if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
906 ret += testUserEncodingPush();
907 ret += testUTF8Chunks();
908#endif
909
910 /*
911 * Cleanup function for the XML library.
912 */
913 xmlCleanupParser();
914 return(ret ? 1 : 0);
915}
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use