ReactOS 0.4.15-dev-8408-g466a198
reader.c
Go to the documentation of this file.
1/*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21#define COBJMACROS
22
23#include <stdio.h>
24#include <stdarg.h>
25#include <assert.h>
26#include "windef.h"
27#include "winbase.h"
28#include "initguid.h"
29#include "objbase.h"
30#include "xmllite.h"
31#include "xmllite_private.h"
32#ifdef __REACTOS__
33#include <winnls.h>
34#endif
35
36#include "wine/debug.h"
37#include "wine/list.h"
38
40
41/* not defined in public headers */
42DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43
44typedef enum
45{
53 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
56
57/* This state denotes where parsing was interrupted by input problem.
58 Reader resumes parsing using this information. */
59typedef enum
60{
70
71/* saved pointer index to resume from particular input position */
72typedef enum
73{
74 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
75 XmlReadResume_Local, /* local for QName */
76 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
79
80typedef enum
81{
88
89static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
90static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
91static const WCHAR utf8W[] = {'U','T','F','-','8',0};
92
93static const WCHAR dblquoteW[] = {'\"',0};
94static const WCHAR quoteW[] = {'\'',0};
95static const WCHAR ltW[] = {'<',0};
96static const WCHAR gtW[] = {'>',0};
97static const WCHAR commentW[] = {'<','!','-','-',0};
98static const WCHAR piW[] = {'<','?',0};
99
101
102static const char *debugstr_nodetype(XmlNodeType nodetype)
103{
104 static const char * const type_names[] =
105 {
106 "None",
107 "Element",
108 "Attribute",
109 "Text",
110 "CDATA",
111 "",
112 "",
113 "ProcessingInstruction",
114 "Comment",
115 "",
116 "DocumentType",
117 "",
118 "",
119 "Whitespace",
120 "",
121 "EndElement",
122 "",
123 "XmlDeclaration"
124 };
125
126 if (nodetype > _XmlNodeType_Last)
127 return wine_dbg_sprintf("unknown type=%d", nodetype);
128
129 return type_names[nodetype];
130}
131
133{
134 static const char * const prop_names[] =
135 {
136 "MultiLanguage",
137 "ConformanceLevel",
138 "RandomAccess",
139 "XmlResolver",
140 "DtdProcessing",
141 "ReadState",
142 "MaxElementDepth",
143 "MaxEntityExpansion"
144 };
145
146 if (prop > _XmlReaderProperty_Last)
147 return wine_dbg_sprintf("unknown property=%d", prop);
148
149 return prop_names[prop];
150}
151
153{
154 const WCHAR *name;
156 UINT cp;
157};
158
159static const struct xml_encoding_data xml_encoding_map[] = {
160 { usasciiW, XmlEncoding_USASCII, 20127 },
161 { utf16W, XmlEncoding_UTF16, 1200 },
163};
164
166{
167 return xml_encoding_map[encoding].name;
168}
169
171{
172 int i;
173 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
174 {
175 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
176 }
177 return XmlEncoding_Unknown;
178}
179
180typedef struct
181{
182 char *data;
184 unsigned int allocated;
185 unsigned int written;
188
190
191typedef struct
192{
195 /* reference passed on IXmlReaderInput creation, is kept when input is created */
201 /* stream reference set after SetInput() call from reader,
202 stored as sequential stream, cause currently
203 optimizations possible with IStream aren't implemented */
206 unsigned int pending : 1;
208
209static const struct IUnknownVtbl xmlreaderinputvtbl;
210
211/* Structure to hold parsed string of specific length.
212
213 Reader stores node value as 'start' pointer, on request
214 a null-terminated version of it is allocated.
215
216 To init a strval variable use reader_init_strval(),
217 to set strval as a reader value use reader_set_strval().
218 */
219typedef struct
220{
221 WCHAR *str; /* allocated null-terminated string */
222 UINT len; /* length in WCHARs, altered after ReadValueChunk */
223 UINT start; /* input position where value starts */
224} strval;
225
226static WCHAR emptyW[] = {0};
227static WCHAR xmlW[] = {'x','m','l',0};
228static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
229static const strval strval_empty = { emptyW };
230static const strval strval_xml = { xmlW, 3 };
231static const strval strval_xmlns = { xmlnsW, 5 };
232
234{
237};
238
240{
243};
244
245struct attribute
246{
247 struct list entry;
253 unsigned int flags;
254};
255
257{
258 struct list entry;
263};
264
265struct ns
266{
267 struct list entry;
271};
272
273typedef struct
274{
280 HRESULT error; /* error set on XmlReadState_Error */
287 struct reader_position position;
288 struct list attrs; /* attributes list for current node */
289 struct attribute *attr; /* current attribute */
291 struct list nsdef;
292 struct list ns;
293 struct list elements;
299 struct element empty_element; /* used for empty elements without end tag <a />,
300 and to keep <?xml reader position */
301 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
302} xmlreader;
303
305{
310};
311
313{
314 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
315}
316
318{
319 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
320}
321
322/* reader memory allocation functions */
323static inline void *reader_alloc(xmlreader *reader, size_t len)
324{
325 return m_alloc(reader->imalloc, len);
326}
327
328static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
329{
330 void *ret = reader_alloc(reader, len);
331 if (ret)
332 memset(ret, 0, len);
333 return ret;
334}
335
336static inline void reader_free(xmlreader *reader, void *mem)
337{
338 m_free(reader->imalloc, mem);
339}
340
341/* Just return pointer from offset, no attempt to read more. */
343{
344 encoded_buffer *buffer = &reader->input->buffer->utf16;
345 return (WCHAR*)buffer->data + offset;
346}
347
348static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
349{
350 return v->str ? v->str : reader_get_ptr2(reader, v->start);
351}
352
354{
355 *dest = *src;
356
357 if (src->str != strval_empty.str)
358 {
359 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
360 if (!dest->str) return E_OUTOFMEMORY;
361 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
362 dest->str[dest->len] = 0;
363 dest->start = 0;
364 }
365
366 return S_OK;
367}
368
369/* reader input memory allocation functions */
370static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
371{
372 return m_alloc(input->imalloc, len);
373}
374
375static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
376{
377 return m_realloc(input->imalloc, mem, len);
378}
379
380static inline void readerinput_free(xmlreaderinput *input, void *mem)
381{
382 m_free(input->imalloc, mem);
383}
384
386{
387 LPWSTR ret = NULL;
388
389 if(str) {
390 DWORD size;
391
392 size = (lstrlenW(str)+1)*sizeof(WCHAR);
394 if (ret) memcpy(ret, str, size);
395 }
396
397 return ret;
398}
399
400/* This one frees stored string value if needed */
402{
403 if (v->str != strval_empty.str)
404 {
405 reader_free(reader, v->str);
406 *v = strval_empty;
407 }
408}
409
411{
412 struct attribute *attr, *attr2;
413 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
414 {
415 reader_free_strvalued(reader, &attr->localname);
418 }
419 list_init(&reader->attrs);
420 reader->attr_count = 0;
421 reader->attr = NULL;
422}
423
424/* attribute data holds pointers to buffer data, so buffer shrink is not possible
425 while we are on a node with attributes */
427 strval *value, const struct reader_position *position, unsigned int flags)
428{
429 struct attribute *attr;
430 HRESULT hr;
431
432 attr = reader_alloc(reader, sizeof(*attr));
433 if (!attr) return E_OUTOFMEMORY;
434
435 hr = reader_strvaldup(reader, localname, &attr->localname);
436 if (hr == S_OK)
437 {
439 if (hr != S_OK)
441 }
442 if (hr != S_OK)
443 {
445 return hr;
446 }
447
448 if (prefix)
449 attr->prefix = *prefix;
450 else
451 memset(&attr->prefix, 0, sizeof(attr->prefix));
452 attr->qname = qname ? *qname : *localname;
453 attr->position = *position;
454 attr->flags = flags;
455 list_add_tail(&reader->attrs, &attr->entry);
456 reader->attr_count++;
457
458 return S_OK;
459}
460
461/* Returns current element, doesn't check if reader is actually positioned on it. */
463{
464 if (reader->is_empty_element)
465 return &reader->empty_element;
466
467 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
468}
469
471{
472 v->start = start;
473 v->len = len;
474 v->str = NULL;
475}
476
477static inline const char* debug_strval(const xmlreader *reader, const strval *v)
478{
479 return debugstr_wn(reader_get_strptr(reader, v), v->len);
480}
481
482/* used to initialize from constant string */
484{
485 v->start = 0;
486 v->len = len;
487 v->str = str;
488}
489
491{
493}
494
496{
497 int type;
498 for (type = 0; type < StringValue_Last; type++)
500}
501
502/* This helper should only be used to test if strings are the same,
503 it doesn't try to sort. */
504static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
505{
506 if (str1->len != str2->len) return 0;
507 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
508}
509
511{
512 struct element *elem, *elem2;
513 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
514 {
516 reader_free_strvalued(reader, &elem->localname);
519 }
520 list_init(&reader->elements);
521 reader_free_strvalued(reader, &reader->empty_element.localname);
522 reader_free_strvalued(reader, &reader->empty_element.qname);
523 reader->is_empty_element = FALSE;
524}
525
527{
528 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
529 struct ns *ns;
530
531 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
533 return ns;
534 }
535
536 return NULL;
537}
538
540{
541 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
542}
543
545{
546 if (reader->depth)
547 reader->depth--;
548}
549
551{
552 struct ns *ns;
553 HRESULT hr;
554
555 ns = reader_alloc(reader, sizeof(*ns));
556 if (!ns) return E_OUTOFMEMORY;
557
558 if (def)
559 memset(&ns->prefix, 0, sizeof(ns->prefix));
560 else {
562 if (FAILED(hr)) {
564 return hr;
565 }
566 }
567
569 if (FAILED(hr)) {
572 return hr;
573 }
574
575 ns->element = NULL;
576 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
577 return hr;
578}
579
581{
586}
587
589{
590 struct ns *ns;
591
592 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
593 if (ns->element)
594 break;
595 ns->element = element;
596 }
597
598 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
599 if (ns->element)
600 break;
601 ns->element = element;
602 }
603}
604
606 strval *qname, const struct reader_position *position)
607{
608 struct element *element;
609 HRESULT hr;
610
612 if (!element)
613 return E_OUTOFMEMORY;
614
618 {
619 list_add_head(&reader->elements, &element->entry);
621 reader->is_empty_element = FALSE;
623 }
624 else
626
627 return hr;
628}
629
631{
632 struct ns *ns, *ns2;
633
634 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
635 if (ns->element != element)
636 break;
637
642 }
643
644 if (!list_empty(&reader->nsdef)) {
645 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
646 if (ns->element == element) {
651 }
652 }
653}
654
656{
657 struct element *element;
658
659 if (list_empty(&reader->elements))
660 return;
661
662 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
664
667
668 /* It was a root element, the rest is expected as Misc */
669 if (list_empty(&reader->elements))
671}
672
673/* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
674 means node value is to be determined. */
676{
677 strval *v = &reader->strvalues[type];
678
680 if (!value)
681 {
682 v->str = NULL;
683 v->start = 0;
684 v->len = 0;
685 return;
686 }
687
688 if (value->str == strval_empty.str)
689 *v = *value;
690 else
691 {
692 if (type == StringValue_Value)
693 {
694 /* defer allocation for value string */
695 v->str = NULL;
696 v->start = value->start;
697 v->len = value->len;
698 }
699 else
700 {
701 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
702 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
703 v->str[value->len] = 0;
704 v->len = value->len;
705 }
706 }
707}
708
710{
711 return reader->input->pending;
712}
713
715{
716 const int initial_len = 0x2000;
717 buffer->data = readerinput_alloc(input, initial_len);
718 if (!buffer->data) return E_OUTOFMEMORY;
719
720 memset(buffer->data, 0, 4);
721 buffer->cur = 0;
722 buffer->allocated = initial_len;
723 buffer->written = 0;
724 buffer->prev_cr = FALSE;
725
726 return S_OK;
727}
728
730{
732}
733
735{
737 {
738 FIXME("unsupported encoding %d\n", encoding);
739 return E_NOTIMPL;
740 }
741
743
744 return S_OK;
745}
746
748{
749 int min, max, n, c;
750
751 if (!name) return XmlEncoding_Unknown;
752
753 min = 0;
755
756 while (min <= max)
757 {
758 n = (min+max)/2;
759
760 if (len != -1)
762 else
764 if (!c)
765 return xml_encoding_map[n].enc;
766
767 if (c > 0)
768 max = n-1;
769 else
770 min = n+1;
771 }
772
773 return XmlEncoding_Unknown;
774}
775
777{
779 HRESULT hr;
780
781 input->buffer = NULL;
782
784 if (!buffer) return E_OUTOFMEMORY;
785
786 buffer->input = input;
787 buffer->code_page = ~0; /* code page is unknown at this point */
789 if (hr != S_OK) {
791 return hr;
792 }
793
794 hr = init_encoded_buffer(input, &buffer->encoded);
795 if (hr != S_OK) {
798 return hr;
799 }
800
801 input->buffer = buffer;
802 return S_OK;
803}
804
806{
807 free_encoded_buffer(buffer->input, &buffer->encoded);
808 free_encoded_buffer(buffer->input, &buffer->utf16);
810}
811
813{
814 if (readerinput->stream) {
815 ISequentialStream_Release(readerinput->stream);
816 readerinput->stream = NULL;
817 }
818}
819
820/* Queries already stored interface for IStream/ISequentialStream.
821 Interface supplied on creation will be overwritten */
823{
824 HRESULT hr;
825
826 readerinput_release_stream(readerinput);
827 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
828 if (hr != S_OK)
829 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
830
831 return hr;
832}
833
834/* reads a chunk to raw buffer */
836{
837 encoded_buffer *buffer = &readerinput->buffer->encoded;
838 /* to make sure aligned length won't exceed allocated length */
839 ULONG len = buffer->allocated - buffer->written - 4;
840 ULONG read;
841 HRESULT hr;
842
843 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
844 variable width encodings like UTF-8 */
845 len = (len + 3) & ~3;
846 /* try to use allocated space or grow */
847 if (buffer->allocated - buffer->written < len)
848 {
849 buffer->allocated *= 2;
850 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
851 len = buffer->allocated - buffer->written;
852 }
853
854 read = 0;
855 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
856 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
857 readerinput->pending = hr == E_PENDING;
858 if (FAILED(hr)) return hr;
859 buffer->written += read;
860
861 return hr;
862}
863
864/* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
865static void readerinput_grow(xmlreaderinput *readerinput, int length)
866{
867 encoded_buffer *buffer = &readerinput->buffer->utf16;
868
869 length *= sizeof(WCHAR);
870 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
871 if (buffer->allocated < buffer->written + length + 4)
872 {
873 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
874 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
875 buffer->allocated = grown_size;
876 }
877}
878
879static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
880{
881 static const char startA[] = {'<','?'};
882 static const char commentA[] = {'<','!'};
883 encoded_buffer *buffer = &readerinput->buffer->encoded;
884 unsigned char *ptr = (unsigned char*)buffer->data;
885
886 return !memcmp(buffer->data, startA, sizeof(startA)) ||
887 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
888 /* test start byte */
889 (ptr[0] == '<' &&
890 (
891 (ptr[1] && (ptr[1] <= 0x7f)) ||
892 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
893 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
894 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
895 );
896}
897
899{
900 encoded_buffer *buffer = &readerinput->buffer->encoded;
901 static const char utf8bom[] = {0xef,0xbb,0xbf};
902 static const char utf16lebom[] = {0xff,0xfe};
903 WCHAR *ptrW;
904
905 *enc = XmlEncoding_Unknown;
906
907 if (buffer->written <= 3)
908 {
909 HRESULT hr = readerinput_growraw(readerinput);
910 if (FAILED(hr)) return hr;
911 if (buffer->written < 3) return MX_E_INPUTEND;
912 }
913
914 ptrW = (WCHAR *)buffer->data;
915 /* try start symbols if we have enough data to do that, input buffer should contain
916 first chunk already */
917 if (readerinput_is_utf8(readerinput))
918 *enc = XmlEncoding_UTF8;
919 else if (*ptrW == '<')
920 {
921 ptrW++;
922 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
923 *enc = XmlEncoding_UTF16;
924 }
925 /* try with BOM now */
926 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
927 {
928 buffer->cur += sizeof(utf8bom);
929 *enc = XmlEncoding_UTF8;
930 }
931 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
932 {
933 buffer->cur += sizeof(utf16lebom);
934 *enc = XmlEncoding_UTF16;
935 }
936
937 return S_OK;
938}
939
941{
942 encoded_buffer *buffer = &readerinput->buffer->encoded;
943 int len = buffer->written;
944
945 /* complete single byte char */
946 if (!(buffer->data[len-1] & 0x80)) return len;
947
948 /* find start byte of multibyte char */
949 while (--len && !(buffer->data[len] & 0xc0))
950 ;
951
952 return len;
953}
954
955/* Returns byte length of complete char sequence for buffer code page,
956 it's relative to current buffer position which is currently used for BOM handling
957 only. */
959{
960 encoded_buffer *buffer = &readerinput->buffer->encoded;
961 int len;
962
963 if (readerinput->buffer->code_page == CP_UTF8)
964 len = readerinput_get_utf8_convlen(readerinput);
965 else
966 len = buffer->written;
967
968 TRACE("%d\n", len - buffer->cur);
969 return len - buffer->cur;
970}
971
972/* It's possible that raw buffer has some leftovers from last conversion - some char
973 sequence that doesn't represent a full code point. Length argument should be calculated with
974 readerinput_get_convlen(), if it's -1 it will be calculated here. */
975static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
976{
977 encoded_buffer *buffer = &readerinput->buffer->encoded;
978
979 if (len == -1)
980 len = readerinput_get_convlen(readerinput);
981
982 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
983 /* everything below cur is lost too */
984 buffer->written -= len + buffer->cur;
985 /* after this point we don't need cur offset really,
986 it's used only to mark where actual data begins when first chunk is read */
987 buffer->cur = 0;
988}
989
991{
992 BOOL prev_cr = buffer->prev_cr;
993 const WCHAR *src;
994 WCHAR *dest;
995
996 src = dest = (WCHAR*)buffer->data + off;
997 while ((const char*)src < buffer->data + buffer->written)
998 {
999 if (*src == '\r')
1000 {
1001 *dest++ = '\n';
1002 src++;
1003 prev_cr = TRUE;
1004 continue;
1005 }
1006 if(prev_cr && *src == '\n')
1007 src++;
1008 else
1009 *dest++ = *src++;
1010 prev_cr = FALSE;
1011 }
1012
1013 buffer->written = (char*)dest - buffer->data;
1014 buffer->prev_cr = prev_cr;
1015 *dest = 0;
1016}
1017
1018/* note that raw buffer content is kept */
1020{
1021 encoded_buffer *src = &readerinput->buffer->encoded;
1022 encoded_buffer *dest = &readerinput->buffer->utf16;
1023 int len, dest_len;
1024 UINT cp = ~0u;
1025 HRESULT hr;
1026 WCHAR *ptr;
1027
1028 hr = get_code_page(enc, &cp);
1029 if (FAILED(hr)) return;
1030
1031 readerinput->buffer->code_page = cp;
1032 len = readerinput_get_convlen(readerinput);
1033
1034 TRACE("switching to cp %d\n", cp);
1035
1036 /* just copy in this case */
1037 if (enc == XmlEncoding_UTF16)
1038 {
1039 readerinput_grow(readerinput, len);
1040 memcpy(dest->data, src->data + src->cur, len);
1041 dest->written += len*sizeof(WCHAR);
1042 }
1043 else
1044 {
1045 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1046 readerinput_grow(readerinput, dest_len);
1047 ptr = (WCHAR*)dest->data;
1048 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1049 ptr[dest_len] = 0;
1050 dest->written += dest_len*sizeof(WCHAR);
1051 }
1052
1054}
1055
1056/* shrinks parsed data a buffer begins with */
1058{
1059 encoded_buffer *buffer = &reader->input->buffer->utf16;
1060
1061 /* avoid to move too often using threshold shrink length */
1062 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1063 {
1064 buffer->written -= buffer->cur*sizeof(WCHAR);
1065 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1066 buffer->cur = 0;
1067 *(WCHAR*)&buffer->data[buffer->written] = 0;
1068 }
1069}
1070
1071/* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1072 It won't attempt to shrink but will grow destination buffer if needed */
1074{
1075 xmlreaderinput *readerinput = reader->input;
1076 encoded_buffer *src = &readerinput->buffer->encoded;
1077 encoded_buffer *dest = &readerinput->buffer->utf16;
1078 UINT cp = readerinput->buffer->code_page;
1079 int len, dest_len, prev_len;
1080 HRESULT hr;
1081 WCHAR *ptr;
1082
1083 /* get some raw data from stream first */
1084 hr = readerinput_growraw(readerinput);
1085 len = readerinput_get_convlen(readerinput);
1086 prev_len = dest->written / sizeof(WCHAR);
1087
1088 /* just copy for UTF-16 case */
1089 if (cp == 1200)
1090 {
1091 readerinput_grow(readerinput, len);
1092 memcpy(dest->data + dest->written, src->data + src->cur, len);
1093 dest->written += len*sizeof(WCHAR);
1094 }
1095 else
1096 {
1097 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1098 readerinput_grow(readerinput, dest_len);
1099 ptr = (WCHAR*)(dest->data + dest->written);
1100 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1101 ptr[dest_len] = 0;
1102 dest->written += dest_len*sizeof(WCHAR);
1103 /* get rid of processed data */
1104 readerinput_shrinkraw(readerinput, len);
1105 }
1106
1107 fixup_buffer_cr(dest, prev_len);
1108 return hr;
1109}
1110
1112{
1113 return reader->input->buffer->utf16.cur;
1114}
1115
1117{
1118 encoded_buffer *buffer = &reader->input->buffer->utf16;
1119 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1120 if (!*ptr) reader_more(reader);
1121 return (WCHAR*)buffer->data + buffer->cur;
1122}
1123
1125{
1126 int i=0;
1127 const WCHAR *ptr = reader_get_ptr(reader);
1128 while (str[i])
1129 {
1130 if (!ptr[i])
1131 {
1134 }
1135 if (str[i] != ptr[i])
1136 return ptr[i] - str[i];
1137 i++;
1138 }
1139 return 0;
1140}
1141
1143{
1144 if (ch == '\r')
1145 reader->position.line_position = 1;
1146 else if (ch == '\n')
1147 {
1148 reader->position.line_number++;
1149 reader->position.line_position = 1;
1150 }
1151 else
1152 reader->position.line_position++;
1153}
1154
1155/* moves cursor n WCHARs forward */
1156static void reader_skipn(xmlreader *reader, int n)
1157{
1158 encoded_buffer *buffer = &reader->input->buffer->utf16;
1159 const WCHAR *ptr;
1160
1161 while (*(ptr = reader_get_ptr(reader)) && n--)
1162 {
1164 buffer->cur++;
1165 }
1166}
1167
1168static inline BOOL is_wchar_space(WCHAR ch)
1169{
1170 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1171}
1172
1173/* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1175{
1176 const WCHAR *ptr = reader_get_ptr(reader);
1178
1179 while (is_wchar_space(*ptr))
1180 {
1181 reader_skipn(reader, 1);
1183 }
1184
1185 return reader_get_cur(reader) - start;
1186}
1187
1188/* [26] VersionNum ::= '1.' [0-9]+ */
1190{
1191 static const WCHAR onedotW[] = {'1','.',0};
1192 WCHAR *ptr, *ptr2;
1193 UINT start;
1194
1195 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1196
1198 /* skip "1." */
1199 reader_skipn(reader, 2);
1200
1201 ptr2 = ptr = reader_get_ptr(reader);
1202 while (*ptr >= '0' && *ptr <= '9')
1203 {
1204 reader_skipn(reader, 1);
1206 }
1207
1208 if (ptr2 == ptr) return WC_E_DIGIT;
1210 TRACE("version=%s\n", debug_strval(reader, val));
1211 return S_OK;
1212}
1213
1214/* [25] Eq ::= S? '=' S? */
1216{
1217 static const WCHAR eqW[] = {'=',0};
1219 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1220 /* skip '=' */
1221 reader_skipn(reader, 1);
1223 return S_OK;
1224}
1225
1226/* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1228{
1229 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1230 struct reader_position position;
1231 strval val, name;
1232 HRESULT hr;
1233
1235
1236 position = reader->position;
1237 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1239 /* skip 'version' */
1240 reader_skipn(reader, 7);
1241
1243 if (FAILED(hr)) return hr;
1244
1246 return WC_E_QUOTE;
1247 /* skip "'"|'"' */
1248 reader_skipn(reader, 1);
1249
1251 if (FAILED(hr)) return hr;
1252
1254 return WC_E_QUOTE;
1255
1256 /* skip "'"|'"' */
1257 reader_skipn(reader, 1);
1258
1259 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1260}
1261
1262/* ([A-Za-z0-9._] | '-') */
1263static inline BOOL is_wchar_encname(WCHAR ch)
1264{
1265 return ((ch >= 'A' && ch <= 'Z') ||
1266 (ch >= 'a' && ch <= 'z') ||
1267 (ch >= '0' && ch <= '9') ||
1268 (ch == '.') || (ch == '_') ||
1269 (ch == '-'));
1270}
1271
1272/* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1274{
1276 xml_encoding enc;
1277 int len;
1278
1279 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1280 return WC_E_ENCNAME;
1281
1282 val->start = reader_get_cur(reader);
1283
1284 ptr = start;
1285 while (is_wchar_encname(*++ptr))
1286 ;
1287
1288 len = ptr - start;
1290 TRACE("encoding name %s\n", debugstr_wn(start, len));
1291 val->str = start;
1292 val->len = len;
1293
1294 if (enc == XmlEncoding_Unknown)
1295 return WC_E_ENCNAME;
1296
1297 /* skip encoding name */
1299 return S_OK;
1300}
1301
1302/* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1304{
1305 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1306 struct reader_position position;
1307 strval name, val;
1308 HRESULT hr;
1309
1310 if (!reader_skipspaces(reader)) return S_FALSE;
1311
1312 position = reader->position;
1313 if (reader_cmp(reader, encodingW)) return S_FALSE;
1314 name.str = reader_get_ptr(reader);
1315 name.start = reader_get_cur(reader);
1316 name.len = 8;
1317 /* skip 'encoding' */
1318 reader_skipn(reader, 8);
1319
1321 if (FAILED(hr)) return hr;
1322
1324 return WC_E_QUOTE;
1325 /* skip "'"|'"' */
1326 reader_skipn(reader, 1);
1327
1329 if (FAILED(hr)) return hr;
1330
1332 return WC_E_QUOTE;
1333
1334 /* skip "'"|'"' */
1335 reader_skipn(reader, 1);
1336
1337 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1338}
1339
1340/* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1342{
1343 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1344 static const WCHAR yesW[] = {'y','e','s',0};
1345 static const WCHAR noW[] = {'n','o',0};
1346 struct reader_position position;
1347 strval name, val;
1348 UINT start;
1349 HRESULT hr;
1350
1351 if (!reader_skipspaces(reader)) return S_FALSE;
1352
1353 position = reader->position;
1354 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1356 /* skip 'standalone' */
1357 reader_skipn(reader, 10);
1358
1360 if (FAILED(hr)) return hr;
1361
1363 return WC_E_QUOTE;
1364 /* skip "'"|'"' */
1365 reader_skipn(reader, 1);
1366
1368 return WC_E_XMLDECL;
1369
1371 /* skip 'yes'|'no' */
1374 TRACE("standalone=%s\n", debug_strval(reader, &val));
1375
1377 return WC_E_QUOTE;
1378 /* skip "'"|'"' */
1379 reader_skipn(reader, 1);
1380
1381 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1382}
1383
1384/* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1386{
1387 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1388 static const WCHAR declcloseW[] = {'?','>',0};
1389 struct reader_position position;
1390 HRESULT hr;
1391
1392 /* check if we have "<?xml " */
1393 if (reader_cmp(reader, xmldeclW))
1394 return S_FALSE;
1395
1396 reader_skipn(reader, 2);
1397 position = reader->position;
1398 reader_skipn(reader, 3);
1400 if (FAILED(hr))
1401 return hr;
1402
1404 if (FAILED(hr))
1405 return hr;
1406
1408 if (FAILED(hr))
1409 return hr;
1410
1412 if (reader_cmp(reader, declcloseW))
1413 return WC_E_XMLDECL;
1414
1415 /* skip '?>' */
1416 reader_skipn(reader, 2);
1417
1419 reader->empty_element.position = position;
1422
1423 return S_OK;
1424}
1425
1426/* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1428{
1429 WCHAR *ptr;
1430 UINT start;
1431
1432 if (reader->resumestate == XmlReadResumeState_Comment)
1433 {
1434 start = reader->resume[XmlReadResume_Body];
1436 }
1437 else
1438 {
1439 /* skip '<!--' */
1440 reader_skipn(reader, 4);
1444 reader->nodetype = XmlNodeType_Comment;
1445 reader->resume[XmlReadResume_Body] = start;
1446 reader->resumestate = XmlReadResumeState_Comment;
1448 }
1449
1450 /* will exit when there's no more data, it won't attempt to
1451 read more from stream */
1452 while (*ptr)
1453 {
1454 if (ptr[0] == '-')
1455 {
1456 if (ptr[1] == '-')
1457 {
1458 if (ptr[2] == '>')
1459 {
1460 strval value;
1461
1463 TRACE("%s\n", debug_strval(reader, &value));
1464
1465 /* skip rest of markup '->' */
1466 reader_skipn(reader, 3);
1467
1469 reader->resume[XmlReadResume_Body] = 0;
1470 reader->resumestate = XmlReadResumeState_Initial;
1471 return S_OK;
1472 }
1473 else
1474 return WC_E_COMMENT;
1475 }
1476 }
1477
1478 reader_skipn(reader, 1);
1479 ptr++;
1480 }
1481
1482 return S_OK;
1483}
1484
1485/* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1486static inline BOOL is_char(WCHAR ch)
1487{
1488 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1489 (ch >= 0x20 && ch <= 0xd7ff) ||
1490 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1491 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1492 (ch >= 0xe000 && ch <= 0xfffd);
1493}
1494
1495/* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1497{
1498 return (ch == ' ') ||
1499 (ch >= 'a' && ch <= 'z') ||
1500 (ch >= 'A' && ch <= 'Z') ||
1501 (ch >= '0' && ch <= '9') ||
1502 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1503 (ch == '=') || (ch == '?') ||
1504 (ch == '@') || (ch == '!') ||
1505 (ch >= '#' && ch <= '%') || /* #$% */
1506 (ch == '_') || (ch == '\r') || (ch == '\n');
1507}
1508
1510{
1511 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1512 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1513 (ch >= 0xc0 && ch <= 0xd6) ||
1514 (ch >= 0xd8 && ch <= 0xf6) ||
1515 (ch >= 0xf8 && ch <= 0x2ff) ||
1516 (ch >= 0x370 && ch <= 0x37d) ||
1517 (ch >= 0x37f && ch <= 0x1fff) ||
1518 (ch >= 0x200c && ch <= 0x200d) ||
1519 (ch >= 0x2070 && ch <= 0x218f) ||
1520 (ch >= 0x2c00 && ch <= 0x2fef) ||
1521 (ch >= 0x3001 && ch <= 0xd7ff) ||
1522 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1523 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1524 (ch >= 0xf900 && ch <= 0xfdcf) ||
1525 (ch >= 0xfdf0 && ch <= 0xfffd);
1526}
1527
1528/* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1530{
1531 return (ch >= 'A' && ch <= 'Z') ||
1532 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1533 (ch == '-') || (ch == '.') ||
1534 (ch >= '0' && ch <= '9') ||
1535 (ch == 0xb7) ||
1536 (ch >= 0xc0 && ch <= 0xd6) ||
1537 (ch >= 0xd8 && ch <= 0xf6) ||
1538 (ch >= 0xf8 && ch <= 0x2ff) ||
1539 (ch >= 0x300 && ch <= 0x36f) ||
1540 (ch >= 0x370 && ch <= 0x37d) ||
1541 (ch >= 0x37f && ch <= 0x1fff) ||
1542 (ch >= 0x200c && ch <= 0x200d) ||
1543 (ch >= 0x203f && ch <= 0x2040) ||
1544 (ch >= 0x2070 && ch <= 0x218f) ||
1545 (ch >= 0x2c00 && ch <= 0x2fef) ||
1546 (ch >= 0x3001 && ch <= 0xd7ff) ||
1547 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1548 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1549 (ch >= 0xf900 && ch <= 0xfdcf) ||
1550 (ch >= 0xfdf0 && ch <= 0xfffd);
1551}
1552
1554{
1555 return (ch == ':') || is_ncnamechar(ch);
1556}
1557
1559{
1560 /* When we're on attribute always return attribute type, container node type is kept.
1561 Note that container is not necessarily an element, and attribute doesn't mean it's
1562 an attribute in XML spec terms. */
1563 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1564}
1565
1566/* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1567 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1568 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1569 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1570 [5] Name ::= NameStartChar (NameChar)* */
1572{
1573 WCHAR *ptr;
1574 UINT start;
1575
1576 if (reader->resume[XmlReadResume_Name])
1577 {
1578 start = reader->resume[XmlReadResume_Name];
1580 }
1581 else
1582 {
1586 }
1587
1588 while (is_namechar(*ptr))
1589 {
1590 reader_skipn(reader, 1);
1592 }
1593
1595 {
1596 reader->resume[XmlReadResume_Name] = start;
1597 return E_PENDING;
1598 }
1599 else
1600 reader->resume[XmlReadResume_Name] = 0;
1601
1603 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1604
1605 return S_OK;
1606}
1607
1608/* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1610{
1611 static const WCHAR xmlW[] = {'x','m','l'};
1612 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1613 strval name;
1614 WCHAR *ptr;
1615 HRESULT hr;
1616 UINT i;
1617
1620
1621 /* now that we got name check for illegal content */
1622 if (strval_eq(reader, &name, &xmlval))
1623 return WC_E_LEADINGXML;
1624
1625 /* PITarget can't be a qualified name */
1627 for (i = 0; i < name.len; i++)
1628 if (ptr[i] == ':')
1629 return i ? NC_E_NAMECOLON : WC_E_PI;
1630
1631 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1632 *target = name;
1633 return S_OK;
1634}
1635
1636/* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1638{
1639 strval target;
1640 WCHAR *ptr;
1641 UINT start;
1642 HRESULT hr;
1643
1644 switch (reader->resumestate)
1645 {
1647 /* skip '<?' */
1648 reader_skipn(reader, 2);
1650 reader->resumestate = XmlReadResumeState_PITarget;
1653 if (FAILED(hr)) return hr;
1657 reader->resumestate = XmlReadResumeState_PIBody;
1659 default:
1660 ;
1661 }
1662
1663 start = reader->resume[XmlReadResume_Body];
1665 while (*ptr)
1666 {
1667 if (ptr[0] == '?')
1668 {
1669 if (ptr[1] == '>')
1670 {
1672 strval value;
1673
1674 /* strip all leading whitespace chars */
1675 while (start < cur)
1676 {
1678 if (!is_wchar_space(*ptr)) break;
1679 start++;
1680 }
1681
1683
1684 /* skip '?>' */
1685 reader_skipn(reader, 2);
1686 TRACE("%s\n", debug_strval(reader, &value));
1688 reader->resumestate = XmlReadResumeState_Initial;
1689 reader->resume[XmlReadResume_Body] = 0;
1691 return S_OK;
1692 }
1693 }
1694
1695 reader_skipn(reader, 1);
1697 }
1698
1699 return S_OK;
1700}
1701
1702/* This one is used to parse significant whitespace nodes, like in Misc production */
1704{
1705 switch (reader->resumestate)
1706 {
1711 reader->nodetype = XmlNodeType_Whitespace;
1715 /* fallthrough */
1717 {
1718 strval value;
1719 UINT start;
1720
1722 if (is_reader_pending(reader)) return S_OK;
1723
1724 start = reader->resume[XmlReadResume_Body];
1727 TRACE("%s\n", debug_strval(reader, &value));
1728 reader->resumestate = XmlReadResumeState_Initial;
1729 }
1730 default:
1731 ;
1732 }
1733
1734 return S_OK;
1735}
1736
1737/* [27] Misc ::= Comment | PI | S */
1739{
1740 HRESULT hr = S_FALSE;
1741
1742 if (reader->resumestate != XmlReadResumeState_Initial)
1743 {
1745 if (FAILED(hr)) return hr;
1746
1747 /* finish current node */
1748 switch (reader->resumestate)
1749 {
1752 return reader_parse_pi(reader);
1757 default:
1758 ERR("unknown resume state %d\n", reader->resumestate);
1759 }
1760 }
1761
1762 while (1)
1763 {
1764 const WCHAR *cur = reader_get_ptr(reader);
1765
1766 if (is_wchar_space(*cur))
1768 else if (!reader_cmp(reader, commentW))
1770 else if (!reader_cmp(reader, piW))
1772 else
1773 break;
1774
1775 if (hr != S_FALSE) return hr;
1776 }
1777
1778 return hr;
1779}
1780
1781/* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1783{
1785 UINT start;
1786
1787 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1788
1789 quote = *cur;
1790 reader_skipn(reader, 1);
1791
1794 while (is_char(*cur) && *cur != quote)
1795 {
1796 reader_skipn(reader, 1);
1798 }
1800 if (*cur == quote) reader_skipn(reader, 1);
1801
1802 TRACE("%s\n", debug_strval(reader, literal));
1803 return S_OK;
1804}
1805
1806/* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1807 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1809{
1811 UINT start;
1812
1813 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1814
1815 quote = *cur;
1816 reader_skipn(reader, 1);
1817
1820 while (is_pubchar(*cur) && *cur != quote)
1821 {
1822 reader_skipn(reader, 1);
1824 }
1826 if (*cur == quote) reader_skipn(reader, 1);
1827
1828 TRACE("%s\n", debug_strval(reader, literal));
1829 return S_OK;
1830}
1831
1832/* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1834{
1835 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1836 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1837 struct reader_position position = reader->position;
1838 strval name, sys;
1839 HRESULT hr;
1840 int cnt;
1841
1842 if (!reader_cmp(reader, publicW)) {
1843 strval pub;
1844
1845 /* public id */
1846 reader_skipn(reader, 6);
1848 if (!cnt) return WC_E_WHITESPACE;
1849
1851 if (FAILED(hr)) return hr;
1852
1854 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1855 if (FAILED(hr)) return hr;
1856
1858 if (!cnt) return S_OK;
1859
1860 /* optional system id */
1862 if (FAILED(hr)) return S_OK;
1863
1865 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1866 if (FAILED(hr)) return hr;
1867
1868 return S_OK;
1869 } else if (!reader_cmp(reader, systemW)) {
1870 /* system id */
1871 reader_skipn(reader, 6);
1873 if (!cnt) return WC_E_WHITESPACE;
1874
1876 if (FAILED(hr)) return hr;
1877
1879 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1880 }
1881
1882 return S_FALSE;
1883}
1884
1885/* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1887{
1888 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1889 strval name;
1890 WCHAR *cur;
1891 HRESULT hr;
1892
1893 /* check if we have "<!DOCTYPE" */
1894 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1896
1897 /* DTD processing is not allowed by default */
1898 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1899
1900 reader_skipn(reader, 9);
1902
1903 /* name */
1905 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1906
1908
1910 if (FAILED(hr)) return hr;
1911
1913
1915 if (*cur != '>')
1916 {
1917 FIXME("internal subset parsing not implemented\n");
1918 return E_NOTIMPL;
1919 }
1920
1921 /* skip '>' */
1922 reader_skipn(reader, 1);
1923
1924 reader->nodetype = XmlNodeType_DocumentType;
1927
1928 return S_OK;
1929}
1930
1931/* [11 NS] LocalPart ::= NCName */
1933{
1934 WCHAR *ptr;
1935 UINT start;
1936
1937 if (reader->resume[XmlReadResume_Local])
1938 {
1939 start = reader->resume[XmlReadResume_Local];
1941 }
1942 else
1943 {
1946 }
1947
1948 while (is_ncnamechar(*ptr))
1949 {
1950 reader_skipn(reader, 1);
1952 }
1953
1954 if (check_for_separator && *ptr == ':')
1955 return NC_E_QNAMECOLON;
1956
1958 {
1959 reader->resume[XmlReadResume_Local] = start;
1960 return E_PENDING;
1961 }
1962 else
1963 reader->resume[XmlReadResume_Local] = 0;
1964
1966
1967 return S_OK;
1968}
1969
1970/* [7 NS] QName ::= PrefixedName | UnprefixedName
1971 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1972 [9 NS] UnprefixedName ::= LocalPart
1973 [10 NS] Prefix ::= NCName */
1975{
1976 WCHAR *ptr;
1977 UINT start;
1978 HRESULT hr;
1979
1980 if (reader->resume[XmlReadResume_Name])
1981 {
1982 start = reader->resume[XmlReadResume_Name];
1984 }
1985 else
1986 {
1989 reader->resume[XmlReadResume_Name] = start;
1990 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1991 }
1992
1993 if (reader->resume[XmlReadResume_Local])
1994 {
1996 if (FAILED(hr)) return hr;
1997
1999 local->start - reader->resume[XmlReadResume_Name] - 1,
2000 prefix);
2001 }
2002 else
2003 {
2004 /* skip prefix part */
2005 while (is_ncnamechar(*ptr))
2006 {
2007 reader_skipn(reader, 1);
2009 }
2010
2011 if (is_reader_pending(reader)) return E_PENDING;
2012
2013 /* got a qualified name */
2014 if (*ptr == ':')
2015 {
2017
2018 /* skip ':' */
2019 reader_skipn(reader, 1);
2021 if (FAILED(hr)) return hr;
2022 }
2023 else
2024 {
2026 reader_init_strvalue(0, 0, prefix);
2027 }
2028 }
2029
2030 if (prefix->len)
2031 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2032 else
2033 TRACE("ncname %s\n", debug_strval(reader, local));
2034
2035 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2036 /* count ':' too */
2037 (prefix->len ? prefix->len + 1 : 0) + local->len,
2038 qname);
2039
2040 reader->resume[XmlReadResume_Name] = 0;
2041 reader->resume[XmlReadResume_Local] = 0;
2042
2043 return S_OK;
2044}
2045
2047{
2048 static const WCHAR entltW[] = {'l','t'};
2049 static const WCHAR entgtW[] = {'g','t'};
2050 static const WCHAR entampW[] = {'a','m','p'};
2051 static const WCHAR entaposW[] = {'a','p','o','s'};
2052 static const WCHAR entquotW[] = {'q','u','o','t'};
2053 static const strval lt = { (WCHAR*)entltW, 2 };
2054 static const strval gt = { (WCHAR*)entgtW, 2 };
2055 static const strval amp = { (WCHAR*)entampW, 3 };
2056 static const strval apos = { (WCHAR*)entaposW, 4 };
2057 static const strval quot = { (WCHAR*)entquotW, 4 };
2059
2060 switch (*str)
2061 {
2062 case 'l':
2063 if (strval_eq(reader, name, &lt)) return '<';
2064 break;
2065 case 'g':
2066 if (strval_eq(reader, name, &gt)) return '>';
2067 break;
2068 case 'a':
2069 if (strval_eq(reader, name, &amp))
2070 return '&';
2071 else if (strval_eq(reader, name, &apos))
2072 return '\'';
2073 break;
2074 case 'q':
2075 if (strval_eq(reader, name, &quot)) return '\"';
2076 break;
2077 default:
2078 ;
2079 }
2080
2081 return 0;
2082}
2083
2084/* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2085 [67] Reference ::= EntityRef | CharRef
2086 [68] EntityRef ::= '&' Name ';' */
2088{
2089 encoded_buffer *buffer = &reader->input->buffer->utf16;
2092 WCHAR ch = 0;
2093 int len;
2094
2095 /* skip '&' */
2096 reader_skipn(reader, 1);
2098
2099 if (*ptr == '#')
2100 {
2101 reader_skipn(reader, 1);
2103
2104 /* hex char or decimal */
2105 if (*ptr == 'x')
2106 {
2107 reader_skipn(reader, 1);
2109
2110 while (*ptr != ';')
2111 {
2112 if ((*ptr >= '0' && *ptr <= '9'))
2113 ch = ch*16 + *ptr - '0';
2114 else if ((*ptr >= 'a' && *ptr <= 'f'))
2115 ch = ch*16 + *ptr - 'a' + 10;
2116 else if ((*ptr >= 'A' && *ptr <= 'F'))
2117 ch = ch*16 + *ptr - 'A' + 10;
2118 else
2119 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2120 reader_skipn(reader, 1);
2122 }
2123 }
2124 else
2125 {
2126 while (*ptr != ';')
2127 {
2128 if ((*ptr >= '0' && *ptr <= '9'))
2129 {
2130 ch = ch*10 + *ptr - '0';
2131 reader_skipn(reader, 1);
2133 }
2134 else
2135 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2136 }
2137 }
2138
2139 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2140
2141 /* normalize */
2142 if (is_wchar_space(ch)) ch = ' ';
2143
2146 len = buffer->written - ((char *)ptr - buffer->data);
2147 memmove(start + 1, ptr + 1, len);
2148
2149 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2150 buffer->cur = cur + 1;
2151
2152 *start = ch;
2153 }
2154 else
2155 {
2156 strval name;
2157 HRESULT hr;
2158
2160 if (FAILED(hr)) return hr;
2161
2163 if (*ptr != ';') return WC_E_SEMICOLON;
2164
2165 /* predefined entities resolve to a single character */
2167 if (ch)
2168 {
2169 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2170 memmove(start+1, ptr+1, len);
2171 buffer->cur = cur + 1;
2172 buffer->written -= (ptr - start) * sizeof(WCHAR);
2173
2174 *start = ch;
2175 }
2176 else
2177 {
2178 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2179 return WC_E_UNDECLAREDENTITY;
2180 }
2181
2182 }
2183
2184 return S_OK;
2185}
2186
2187/* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2189{
2190 WCHAR *ptr, quote;
2191 UINT start;
2192
2194
2195 /* skip opening quote */
2196 quote = *ptr;
2197 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2198 reader_skipn(reader, 1);
2199
2202 while (*ptr)
2203 {
2204 if (*ptr == '<') return WC_E_LESSTHAN;
2205
2206 if (*ptr == quote)
2207 {
2209 /* skip closing quote */
2210 reader_skipn(reader, 1);
2211 return S_OK;
2212 }
2213
2214 if (*ptr == '&')
2215 {
2217 if (FAILED(hr)) return hr;
2218 }
2219 else
2220 {
2221 /* replace all whitespace chars with ' ' */
2222 if (is_wchar_space(*ptr)) *ptr = ' ';
2223 reader_skipn(reader, 1);
2224 }
2226 }
2227
2228 return WC_E_QUOTE;
2229}
2230
2231/* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2232 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2233 [3 NS] DefaultAttName ::= 'xmlns'
2234 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2236{
2237 struct reader_position position = reader->position;
2238 strval prefix, local, qname, value;
2239 enum attribute_flags flags = 0;
2240 HRESULT hr;
2241
2242 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2243 if (FAILED(hr)) return hr;
2244
2245 if (strval_eq(reader, &prefix, &strval_xmlns))
2247
2248 if (strval_eq(reader, &qname, &strval_xmlns))
2250
2252 if (FAILED(hr)) return hr;
2253
2255 if (FAILED(hr)) return hr;
2256
2259
2261 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2262}
2263
2264/* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2265 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2267{
2268 struct reader_position position = reader->position;
2269 HRESULT hr;
2270
2271 hr = reader_parse_qname(reader, prefix, local, qname);
2272 if (FAILED(hr)) return hr;
2273
2274 for (;;)
2275 {
2276 static const WCHAR endW[] = {'/','>',0};
2277
2279
2280 /* empty element */
2281 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2282 {
2283 struct element *element = &reader->empty_element;
2284
2285 /* skip '/>' */
2286 reader_skipn(reader, 2);
2287
2290
2291 element->prefix = *prefix;
2296 return S_OK;
2297 }
2298
2299 /* got a start tag */
2300 if (!reader_cmp(reader, gtW))
2301 {
2302 /* skip '>' */
2303 reader_skipn(reader, 1);
2305 }
2306
2308 if (FAILED(hr)) return hr;
2309 }
2310
2311 return S_OK;
2312}
2313
2314/* [39] element ::= EmptyElemTag | STag content ETag */
2316{
2317 HRESULT hr;
2318
2319 switch (reader->resumestate)
2320 {
2322 /* check if we are really on element */
2323 if (reader_cmp(reader, ltW)) return S_FALSE;
2324
2325 /* skip '<' */
2326 reader_skipn(reader, 1);
2327
2329 reader->resumestate = XmlReadResumeState_STag;
2331 {
2333
2334 /* this handles empty elements too */
2336 if (FAILED(hr)) return hr;
2337
2338 /* FIXME: need to check for defined namespace to reject invalid prefix */
2339
2340 /* if we got empty element and stack is empty go straight to Misc */
2341 if (reader->is_empty_element && list_empty(&reader->elements))
2342 reader->instate = XmlReadInState_MiscEnd;
2343 else
2344 reader->instate = XmlReadInState_Content;
2345
2346 reader->nodetype = XmlNodeType_Element;
2347 reader->resumestate = XmlReadResumeState_Initial;
2351 break;
2352 }
2353 default:
2354 hr = E_FAIL;
2355 }
2356
2357 return hr;
2358}
2359
2360/* [13 NS] ETag ::= '</' QName S? '>' */
2362{
2363 struct reader_position position;
2364 strval prefix, local, qname;
2365 struct element *element;
2366 HRESULT hr;
2367
2368 /* skip '</' */
2369 reader_skipn(reader, 2);
2370
2371 position = reader->position;
2373 if (FAILED(hr)) return hr;
2374
2376
2377 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2378
2379 /* skip '>' */
2380 reader_skipn(reader, 1);
2381
2382 /* Element stack should never be empty at this point, cause we shouldn't get to
2383 content parsing if it's empty. */
2384 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2386
2387 /* update position stored for start tag, we won't be using it */
2389
2390 reader->nodetype = XmlNodeType_EndElement;
2391 reader->is_empty_element = FALSE;
2393
2394 return S_OK;
2395}
2396
2397/* [18] CDSect ::= CDStart CData CDEnd
2398 [19] CDStart ::= '<![CDATA['
2399 [20] CData ::= (Char* - (Char* ']]>' Char*))
2400 [21] CDEnd ::= ']]>' */
2402{
2403 WCHAR *ptr;
2404 UINT start;
2405
2406 if (reader->resumestate == XmlReadResumeState_CDATA)
2407 {
2408 start = reader->resume[XmlReadResume_Body];
2410 }
2411 else
2412 {
2413 /* skip markup '<![CDATA[' */
2414 reader_skipn(reader, 9);
2418 reader->nodetype = XmlNodeType_CDATA;
2419 reader->resume[XmlReadResume_Body] = start;
2420 reader->resumestate = XmlReadResumeState_CDATA;
2422 }
2423
2424 while (*ptr)
2425 {
2426 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2427 {
2428 strval value;
2429
2431
2432 /* skip ']]>' */
2433 reader_skipn(reader, 3);
2434 TRACE("%s\n", debug_strval(reader, &value));
2435
2437 reader->resume[XmlReadResume_Body] = 0;
2438 reader->resumestate = XmlReadResumeState_Initial;
2439 return S_OK;
2440 }
2441 else
2442 {
2443 reader_skipn(reader, 1);
2444 ptr++;
2445 }
2446 }
2447
2448 return S_OK;
2449}
2450
2451/* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2453{
2454 struct reader_position position;
2455 WCHAR *ptr;
2456 UINT start;
2457
2458 if (reader->resumestate == XmlReadResumeState_CharData)
2459 {
2460 start = reader->resume[XmlReadResume_Body];
2462 }
2463 else
2464 {
2468 /* There's no text */
2469 if (!*ptr || *ptr == '<') return S_OK;
2471 reader->resume[XmlReadResume_Body] = start;
2472 reader->resumestate = XmlReadResumeState_CharData;
2474 }
2475
2476 position = reader->position;
2477 while (*ptr)
2478 {
2479 static const WCHAR ampW[] = {'&',0};
2480
2481 /* CDATA closing sequence ']]>' is not allowed */
2482 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2483 return WC_E_CDSECTEND;
2484
2485 /* Found next markup part */
2486 if (ptr[0] == '<')
2487 {
2488 strval value;
2489
2490 reader->empty_element.position = position;
2493 reader->resume[XmlReadResume_Body] = 0;
2494 reader->resumestate = XmlReadResumeState_Initial;
2495 return S_OK;
2496 }
2497
2498 /* this covers a case when text has leading whitespace chars */
2499 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2500
2501 if (!reader_cmp(reader, ampW))
2503 else
2504 reader_skipn(reader, 1);
2505
2507 }
2508
2509 return S_OK;
2510}
2511
2512/* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2514{
2515 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2516 static const WCHAR etagW[] = {'<','/',0};
2517
2518 if (reader->resumestate != XmlReadResumeState_Initial)
2519 {
2520 switch (reader->resumestate)
2521 {
2523 return reader_parse_cdata(reader);
2528 return reader_parse_pi(reader);
2531 default:
2532 ERR("unknown resume state %d\n", reader->resumestate);
2533 }
2534 }
2535
2537
2538 /* handle end tag here, it indicates end of content as well */
2539 if (!reader_cmp(reader, etagW))
2541
2542 if (!reader_cmp(reader, commentW))
2544
2545 if (!reader_cmp(reader, piW))
2546 return reader_parse_pi(reader);
2547
2548 if (!reader_cmp(reader, cdstartW))
2549 return reader_parse_cdata(reader);
2550
2551 if (!reader_cmp(reader, ltW))
2553
2554 /* what's left must be CharData */
2556}
2557
2559{
2561 HRESULT hr;
2562
2564 {
2565 reader->chunk_read_off = 0;
2567 }
2568
2569 /* When moving from EndElement or empty element, pop its own namespace definitions */
2570 switch (nodetype)
2571 {
2574 /* fallthrough */
2576 if (reader->is_empty_element)
2577 reader_pop_ns_nodes(reader, &reader->empty_element);
2578 else if (FAILED(hr = reader_inc_depth(reader)))
2579 return hr;
2580 break;
2584 break;
2585 default:
2586 ;
2587 }
2588
2589 for (;;)
2590 {
2591 switch (reader->instate)
2592 {
2593 /* if it's a first call for a new input we need to detect stream encoding */
2595 {
2596 xml_encoding enc;
2597
2598 hr = readerinput_growraw(reader->input);
2599 if (FAILED(hr)) return hr;
2600
2601 reader->position.line_number = 1;
2602 reader->position.line_position = 1;
2603
2604 /* try to detect encoding by BOM or data and set input code page */
2605 hr = readerinput_detectencoding(reader->input, &enc);
2606 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2608 if (FAILED(hr)) return hr;
2609
2610 /* always switch first time cause we have to put something in */
2612
2613 /* parse xml declaration */
2615 if (FAILED(hr)) return hr;
2616
2617 readerinput_shrinkraw(reader->input, -1);
2619 if (hr == S_OK) return hr;
2620 }
2621 break;
2624 if (FAILED(hr)) return hr;
2625
2626 if (hr == S_FALSE)
2627 reader->instate = XmlReadInState_DTD;
2628 else
2629 return hr;
2630 break;
2631 case XmlReadInState_DTD:
2633 if (FAILED(hr)) return hr;
2634
2635 if (hr == S_OK)
2636 {
2638 return hr;
2639 }
2640 else
2641 reader->instate = XmlReadInState_Element;
2642 break;
2645 if (FAILED(hr)) return hr;
2646
2647 if (hr == S_FALSE)
2648 reader->instate = XmlReadInState_Element;
2649 else
2650 return hr;
2651 break;
2658 if (hr != S_FALSE) return hr;
2659
2660 if (*reader_get_ptr(reader))
2661 {
2662 WARN("found garbage in the end of XML\n");
2663 return WC_E_SYNTAX;
2664 }
2665
2666 reader->instate = XmlReadInState_Eof;
2668 reader->nodetype = XmlNodeType_None;
2669 return hr;
2670 case XmlReadInState_Eof:
2671 return S_FALSE;
2672 default:
2673 FIXME("internal state %d not handled\n", reader->instate);
2674 return E_NOTIMPL;
2675 }
2676 }
2677
2678 return E_NOTIMPL;
2679}
2680
2682{
2684
2685 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2686
2687 if (IsEqualGUID(riid, &IID_IUnknown) ||
2688 IsEqualGUID(riid, &IID_IXmlReader))
2689 {
2690 *ppvObject = iface;
2691 }
2692 else
2693 {
2694 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2695 *ppvObject = NULL;
2696 return E_NOINTERFACE;
2697 }
2698
2699 IXmlReader_AddRef(iface);
2700
2701 return S_OK;
2702}
2703
2705{
2708 TRACE("(%p)->(%d)\n", This, ref);
2709 return ref;
2710}
2711
2713{
2714 struct ns *ns, *ns2;
2715
2716 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2717 list_remove(&ns->entry);
2721 }
2722
2723 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2724 list_remove(&ns->entry);
2727 }
2728}
2729
2731{
2732 reader->position.line_number = 0;
2733 reader->position.line_position = 0;
2734
2739
2740 reader->depth = 0;
2741 reader->nodetype = XmlNodeType_None;
2742 reader->resumestate = XmlReadResumeState_Initial;
2743 memset(reader->resume, 0, sizeof(reader->resume));
2744 reader->is_empty_element = FALSE;
2745}
2746
2748{
2751
2752 TRACE("(%p)->(%d)\n", This, ref);
2753
2754 if (ref == 0)
2755 {
2756 IMalloc *imalloc = This->imalloc;
2758 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2759 if (This->resolver) IXmlResolver_Release(This->resolver);
2760 if (This->mlang) IUnknown_Release(This->mlang);
2762 if (imalloc) IMalloc_Release(imalloc);
2763 }
2764
2765 return ref;
2766}
2767
2769{
2771 IXmlReaderInput *readerinput;
2772 HRESULT hr;
2773
2774 TRACE("(%p)->(%p)\n", This, input);
2775
2776 if (This->input)
2777 {
2779 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2780 This->input = NULL;
2781 }
2782
2784
2785 /* just reset current input */
2786 if (!input)
2787 {
2788 This->state = XmlReadState_Initial;
2789 return S_OK;
2790 }
2791
2792 /* now try IXmlReaderInput, ISequentialStream, IStream */
2793 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2794 if (hr == S_OK)
2795 {
2796 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2797 This->input = impl_from_IXmlReaderInput(readerinput);
2798 else
2799 {
2800 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2801 readerinput, readerinput->lpVtbl);
2802 IUnknown_Release(readerinput);
2803 return E_FAIL;
2804
2805 }
2806 }
2807
2808 if (hr != S_OK || !readerinput)
2809 {
2810 /* create IXmlReaderInput basing on supplied interface */
2812 This->imalloc, NULL, FALSE, NULL, &readerinput);
2813 if (hr != S_OK) return hr;
2814 This->input = impl_from_IXmlReaderInput(readerinput);
2815 }
2816
2817 /* set stream for supplied IXmlReaderInput */
2819 if (hr == S_OK)
2820 {
2821 This->state = XmlReadState_Initial;
2822 This->instate = XmlReadInState_Initial;
2823 }
2824 return hr;
2825}
2826
2828{
2830
2831 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2832
2833 if (!value) return E_INVALIDARG;
2834
2835 switch (property)
2836 {
2838 *value = (LONG_PTR)This->mlang;
2839 if (This->mlang)
2840 IUnknown_AddRef(This->mlang);
2841 break;
2843 *value = (LONG_PTR)This->resolver;
2844 if (This->resolver)
2845 IXmlResolver_AddRef(This->resolver);
2846 break;
2848 *value = This->dtdmode;
2849 break;
2851 *value = This->state;
2852 break;
2854 *value = This->max_depth;
2855 break;
2856 default:
2857 FIXME("Unimplemented property (%u)\n", property);
2858 return E_NOTIMPL;
2859 }
2860
2861 return S_OK;
2862}
2863
2865{
2867
2868 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2869
2870 switch (property)
2871 {
2873 if (This->mlang)
2874 IUnknown_Release(This->mlang);
2875 This->mlang = (IUnknown*)value;
2876 if (This->mlang)
2877 IUnknown_AddRef(This->mlang);
2878 if (This->mlang)
2879 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2880 break;
2882 if (This->resolver)
2883 IXmlResolver_Release(This->resolver);
2884 This->resolver = (IXmlResolver*)value;
2885 if (This->resolver)
2886 IXmlResolver_AddRef(This->resolver);
2887 break;
2889 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2890 This->dtdmode = value;
2891 break;
2893 This->max_depth = value;
2894 break;
2895 default:
2896 FIXME("Unimplemented property (%u)\n", property);
2897 return E_NOTIMPL;
2898 }
2899
2900 return S_OK;
2901}
2902
2904{
2906 XmlNodeType oldtype = This->nodetype;
2908 HRESULT hr;
2909
2910 TRACE("(%p)->(%p)\n", This, nodetype);
2911
2912 if (!nodetype)
2913 nodetype = &type;
2914
2915 switch (This->state)
2916 {
2918 hr = S_FALSE;
2919 break;
2920 case XmlReadState_Error:
2921 hr = This->error;
2922 break;
2923 default:
2925 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2927
2928 if (FAILED(hr))
2929 {
2930 This->state = XmlReadState_Error;
2931 This->nodetype = XmlNodeType_None;
2932 This->depth = 0;
2933 This->error = hr;
2934 }
2935 }
2936
2937 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2938 *nodetype = This->nodetype;
2939
2940 return hr;
2941}
2942
2944{
2946
2947 TRACE("(%p)->(%p)\n", This, node_type);
2948
2949 if (!node_type)
2950 return E_INVALIDARG;
2951
2952 *node_type = reader_get_nodetype(This);
2953 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2954}
2955
2957{
2958 reader->attr = attr;
2959 reader->chunk_read_off = 0;
2963}
2964
2966{
2967 if (!reader->attr_count)
2968 return S_FALSE;
2969
2970 if (!reader->attr)
2972
2974
2975 return S_OK;
2976}
2977
2979{
2981
2982 TRACE("(%p)\n", This);
2983
2985}
2986
2988{
2990 const struct list *next;
2991
2992 TRACE("(%p)\n", This);
2993
2994 if (!This->attr_count) return S_FALSE;
2995
2996 if (!This->attr)
2998
2999 next = list_next(&This->attrs, &This->attr->entry);
3000 if (next)
3002
3003 return next ? S_OK : S_FALSE;
3004}
3005
3007{
3008 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 '2','0','0','0','/','x','m','l','n','s','/',0};
3010 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3011 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3012
3013 /* Check for reserved prefixes first */
3014 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3015 strval_eq(reader, &attr->prefix, &strval_xmlns))
3016 {
3017 *uri = xmlns_uriW;
3018 *len = ARRAY_SIZE(xmlns_uriW) - 1;
3019 }
3020 else if (strval_eq(reader, &attr->prefix, &strval_xml))
3021 {
3022 *uri = xml_uriW;
3023 *len = ARRAY_SIZE(xml_uriW) - 1;
3024 }
3025 else
3026 {
3027 *uri = NULL;
3028 *len = 0;
3029 }
3030
3031 if (!*uri)
3032 {
3033 struct ns *ns;
3034
3035 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3036 {
3037 *uri = ns->uri.str;
3038 *len = ns->uri.len;
3039 }
3040 else
3041 {
3042 *uri = emptyW;
3043 *len = 0;
3044 }
3045 }
3046}
3047
3049{
3051 {
3052 *name = xmlnsW;
3053 *len = 5;
3054 }
3055 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3056 {
3057 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3058 *name = ns->prefix.str;
3059 *len = ns->prefix.len;
3060 }
3061 else
3062 {
3063 *name = attr->localname.str;
3064 *len = attr->localname.len;
3065 }
3066}
3067
3069 const WCHAR *local_name, const WCHAR *namespace_uri)
3070{
3072 UINT target_name_len, target_uri_len;
3073 struct attribute *attr;
3074
3075 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3076
3077 if (!local_name)
3078 return E_INVALIDARG;
3079
3080 if (!This->attr_count)
3081 return S_FALSE;
3082
3083 if (!namespace_uri)
3084 namespace_uri = emptyW;
3085
3086 target_name_len = lstrlenW(local_name);
3087 target_uri_len = lstrlenW(namespace_uri);
3088
3089 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3090 {
3091 UINT name_len, uri_len;
3092 const WCHAR *name, *uri;
3093
3096
3097 if (name_len == target_name_len && uri_len == target_uri_len &&
3098 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3099 {
3101 return S_OK;
3102 }
3103 }
3104
3105 return S_FALSE;
3106}
3107
3109{
3111
3112 TRACE("(%p)\n", This);
3113
3114 if (!This->attr_count) return S_FALSE;
3115
3116 if (This->attr)
3118
3119 This->attr = NULL;
3120
3121 /* FIXME: support other node types with 'attributes' like DTD */
3122 if (This->is_empty_element) {
3123 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3124 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3125 }
3126 else {
3127 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3128 if (element) {
3131 }
3132 }
3133 This->chunk_read_off = 0;
3135
3136 return S_OK;
3137}
3138
3140{
3142 struct attribute *attribute = This->attr;
3143 struct element *element;
3144 UINT length;
3145
3146 TRACE("(%p)->(%p %p)\n", This, name, len);
3147
3148 if (!len)
3149 len = &length;
3150
3151 switch (reader_get_nodetype(This))
3152 {
3153 case XmlNodeType_Text:
3154 case XmlNodeType_CDATA:
3157 *name = emptyW;
3158 *len = 0;
3159 break;
3163 if (element->prefix.len)
3164 {
3165 *name = element->qname.str;
3166 *len = element->qname.len;
3167 }
3168 else
3169 {
3172 }
3173 break;
3176 {
3177 *name = xmlnsW;
3178 *len = 5;
3179 } else if (attribute->prefix.len)
3180 {
3181 *name = This->strvalues[StringValue_QualifiedName].str;
3182 *len = This->strvalues[StringValue_QualifiedName].len;
3183 }
3184 else
3185 {
3188 }
3189 break;
3190 default:
3191 *name = This->strvalues[StringValue_QualifiedName].str;
3192 *len = This->strvalues[StringValue_QualifiedName].len;
3193 break;
3194 }
3195
3196 return S_OK;
3197}
3198
3200{
3201 if (list_empty(&reader->nsdef))
3202 return NULL;
3203
3204 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
3205}
3206
3208{
3210 const strval *prefix = &This->strvalues[StringValue_Prefix];
3211 XmlNodeType nodetype;
3212 struct ns *ns;
3213 UINT length;
3214
3215 TRACE("(%p %p %p)\n", iface, uri, len);
3216
3217 if (!len)
3218 len = &length;
3219
3220 switch ((nodetype = reader_get_nodetype(This)))
3221 {
3224 break;
3227 {
3229
3230 /* pick top default ns if any */
3231 if (!ns)
3233
3234 if (ns) {
3235 *uri = ns->uri.str;
3236 *len = ns->uri.len;
3237 }
3238 else {
3239 *uri = emptyW;
3240 *len = 0;
3241 }
3242 }
3243 break;
3244 case XmlNodeType_Text:
3245 case XmlNodeType_CDATA:
3250 *uri = emptyW;
3251 *len = 0;
3252 break;
3253 default:
3254 FIXME("Unhandled node type %d\n", nodetype);
3255 *uri = NULL;
3256 *len = 0;
3257 return E_NOTIMPL;
3258 }
3259
3260 return S_OK;
3261}
3262
3264{
3266 struct element *element;
3267 UINT length;
3268
3269 TRACE("(%p)->(%p %p)\n", This, name, len);
3270
3271 if (!len)
3272 len = &length;
3273
3274 switch (reader_get_nodetype(This))
3275 {
3276 case XmlNodeType_Text:
3277 case XmlNodeType_CDATA:
3280 *name = emptyW;
3281 *len = 0;
3282 break;
3288 break;
3291 break;
3292 default:
3293 *name = This->strvalues[StringValue_LocalName].str;
3294 *len = This->strvalues[StringValue_LocalName].len;
3295 break;
3296 }
3297
3298 return S_OK;
3299}
3300
3302{
3304 XmlNodeType nodetype;
3305 UINT length;
3306
3307 TRACE("(%p)->(%p %p)\n", This, ret, len);
3308
3309 if (!len)
3310 len = &length;
3311
3312 *ret = emptyW;
3313 *len = 0;
3314
3315 switch ((nodetype = reader_get_nodetype(This)))
3316 {
3320 {
3321 const strval *prefix = &This->strvalues[StringValue_Prefix];
3322 struct ns *ns;
3323
3325 {
3326 *ret = xmlW;
3327 *len = 3;
3328 }
3329 else if (strval_eq(This, prefix, &strval_xmlns))
3330 {
3331 *ret = xmlnsW;
3332 *len = 5;
3333 }
3334 else if ((ns = reader_lookup_ns(This, prefix)))
3335 {
3336 *ret = ns->prefix.str;
3337 *len = ns->prefix.len;
3338 }
3339
3340 break;
3341 }
3342 default:
3343 ;
3344 }
3345
3346 return S_OK;
3347}
3348
3349static const strval *reader_get_value(xmlreader *reader, BOOL ensure_allocated)
3350{
3351 strval *val;
3352
3353 switch (reader_get_nodetype(reader))
3354 {
3357 case XmlNodeType_None:
3358 return &strval_empty;
3360 /* For namespace definition attributes return values from namespace list */
3362 {
3363 struct ns *ns;
3364
3365 if (!(ns = reader_lookup_ns(reader, &reader->attr->localname)))
3367
3368 return &ns->uri;
3369 }
3370 return &reader->attr->value;
3371 default:
3372 break;
3373 }
3374
3375 val = &reader->strvalues[StringValue_Value];
3376 if (!val->str && ensure_allocated)
3377 {
3378 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3379 if (!ptr) return NULL;
3380 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3381 ptr[val->len] = 0;
3382 val->str = ptr;
3383 }
3384
3385 return val;
3386}
3387
3389{
3391 const strval *val = &reader->strvalues[StringValue_Value];
3392 UINT off;
3393
3394 TRACE("(%p)->(%p %p)\n", reader, value, len);
3395
3396 *value = NULL;
3397
3398 if ((reader->nodetype == XmlNodeType_Comment && !val->str && !val->len) || is_reader_pending(reader))
3399 {
3401 HRESULT hr;
3402
3403 hr = IXmlReader_Read(iface, &type);
3404 if (FAILED(hr)) return hr;
3405
3406 /* return if still pending, partially read values are not reported */
3407 if (is_reader_pending(reader)) return E_PENDING;
3408 }
3409
3411 if (!val)
3412 return E_OUTOFMEMORY;
3413
3414 off = abs(reader->chunk_read_off);
3415 assert(off <= val->len);
3416 *value = val->str + off;
3417 if (len) *len = val->len - off;
3418 reader->chunk_read_off = -off;
3419 return S_OK;
3420}
3421
3423{
3425 const strval *val;
3426 UINT len = 0;
3427
3428 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3429
3431
3432 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3433 if (reader->chunk_read_off >= 0)
3434 {
3435 assert(reader->chunk_read_off <= val->len);
3436 len = min(val->len - reader->chunk_read_off, chunk_size);
3437 }
3438 if (read) *read = len;
3439
3440 if (len)
3441 {
3442 memcpy(buffer, reader_get_strptr(reader, val) + reader->chunk_read_off, len*sizeof(WCHAR));
3443 reader->chunk_read_off += len;
3444 }
3445
3446 return len || !chunk_size ? S_OK : S_FALSE;
3447}
3448
3450 LPCWSTR *baseUri,
3451 UINT *baseUri_length)
3452{
3453 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3454 return E_NOTIMPL;
3455}
3456
3458{
3459 FIXME("(%p): stub\n", iface);
3460 return FALSE;
3461}
3462
3464{
3466 TRACE("(%p)\n", This);
3467 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3468 when current node is start tag of an element */
3469 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3470}
3471
3473{
3475 const struct element *element;
3476
3477 TRACE("(%p %p)\n", This, line_number);
3478
3479 if (!line_number)
3480 return E_INVALIDARG;
3481
3482 switch (reader_get_nodetype(This))
3483 {
3487 *line_number = element->position.line_number;
3488 break;
3490 *line_number = This->attr->position.line_number;
3491 break;
3494 *line_number = This->empty_element.position.line_number;
3495 break;
3496 default:
3497 *line_number = This->position.line_number;
3498 break;
3499 }
3500
3501 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3502}
3503
3505{
3507 const struct element *element;
3508
3509 TRACE("(%p %p)\n", This, line_position);
3510
3511 if (!line_position)
3512 return E_INVALIDARG;
3513
3514 switch (reader_get_nodetype(This))
3515 {
3519 *line_position = element->position.line_position;
3520 break;
3522 *line_position = This->attr->position.line_position;
3523 break;
3526 *line_position = This->empty_element.position.line_position;
3527 break;
3528 default:
3529 *line_position = This->position.line_position;
3530 break;
3531 }
3532
3533 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
3534}
3535
3537{
3539
3540 TRACE("(%p)->(%p)\n", This, count);
3541
3542 if (!count) return E_INVALIDARG;
3543
3544 *count = This->attr_count;
3545 return S_OK;
3546}
3547
3549{
3551 TRACE("(%p)->(%p)\n", This, depth);
3552 *depth = This->depth;
3553 return S_OK;
3554}
3555
3557{
3559 TRACE("(%p)\n", iface);
3560 return This->state == XmlReadState_EndOfFile;
3561}
3562
3563static const struct IXmlReaderVtbl xmlreader_vtbl =
3564{
3591};
3592
3595{
3597
3598 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3599
3600 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3602 {
3603 *ppvObject = iface;
3604 }
3605 else
3606 {
3607 WARN("interface %s not implemented\n", debugstr_guid(riid));
3608 *ppvObject = NULL;
3609 return E_NOINTERFACE;
3610 }
3611
3612 IUnknown_AddRef(iface);
3613
3614 return S_OK;
3615}
3616
3618{
3621 TRACE("(%p)->(%d)\n", This, ref);
3622 return ref;
3623}
3624
3626{
3629
3630 TRACE("(%p)->(%d)\n", This, ref);
3631
3632 if (ref == 0)
3633 {
3634 IMalloc *imalloc = This->imalloc;
3635 if (This->input) IUnknown_Release(This->input);
3636 if (This->stream) ISequentialStream_Release(This->stream);
3637 if (This->buffer) free_input_buffer(This->buffer);
3638 readerinput_free(This, This->baseuri);
3640 if (imalloc) IMalloc_Release(imalloc);
3641 }
3642
3643 return ref;
3644}
3645
3646static const struct IUnknownVtbl xmlreaderinputvtbl =
3647{
3651};
3652
3654{
3656 HRESULT hr;
3657 int i;
3658
3659 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3660
3661 if (imalloc)
3662 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3663 else
3664 reader = heap_alloc(sizeof(*reader));
3665 if (!reader)
3666 return E_OUTOFMEMORY;
3667
3668 memset(reader, 0, sizeof(*reader));
3669 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3670 reader->ref = 1;
3671 reader->state = XmlReadState_Closed;
3672 reader->instate = XmlReadInState_Initial;
3673 reader->resumestate = XmlReadResumeState_Initial;
3674 reader->dtdmode = DtdProcessing_Prohibit;
3675 reader->imalloc = imalloc;
3676 if (imalloc) IMalloc_AddRef(imalloc);
3677 reader->nodetype = XmlNodeType_None;
3678 list_init(&reader->attrs);
3679 list_init(&reader->nsdef);
3680 list_init(&reader->ns);
3681 list_init(&reader->elements);
3682 reader->max_depth = 256;
3683
3684 reader->chunk_read_off = 0;
3685 for (i = 0; i < StringValue_Last; i++)
3686 reader->strvalues[i] = strval_empty;
3687
3688 hr = IXmlReader_QueryInterface(&reader->IXmlReader_iface, riid, obj);
3689 IXmlReader_Release(&reader->IXmlReader_iface);
3690
3691 TRACE("returning iface %p, hr %#x\n", *obj, hr);
3692
3693 return hr;
3694}
3695
3697 IMalloc *imalloc,
3699 BOOL hint,
3700 LPCWSTR base_uri,
3701 IXmlReaderInput **ppInput)
3702{
3703 xmlreaderinput *readerinput;
3704 HRESULT hr;
3705
3706 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3707 hint, wine_dbgstr_w(base_uri), ppInput);
3708
3709 if (!stream || !ppInput) return E_INVALIDARG;
3710
3711 if (imalloc)
3712 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3713 else
3714 readerinput = heap_alloc(sizeof(*readerinput));
3715 if(!readerinput) return E_OUTOFMEMORY;
3716
3717 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3718 readerinput->ref = 1;
3719 readerinput->imalloc = imalloc;
3720 readerinput->stream = NULL;
3721 if (imalloc) IMalloc_AddRef(imalloc);
3722 readerinput->encoding = parse_encoding_name(encoding, -1);
3723 readerinput->hint = hint;
3724 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3725 readerinput->pending = 0;
3726
3727 hr = alloc_input_buffer(readerinput);
3728 if (hr != S_OK)