43 std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
48 std::unique_ptr<XmlElement> parseXML (
const File& fileToParse)
55 inputSource.reset (newSource);
60 ignoreEmptyTextElements = shouldBeIgnored;
63 namespace XmlIdentifierChars
65 static bool isIdentifierCharSlow (juce_wchar c) noexcept
68 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
71 static bool isIdentifierChar (juce_wchar c) noexcept
73 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
75 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
76 : isIdentifierCharSlow (c);
95 while (isIdentifierChar (*p))
104 if (originalText.
isEmpty() && inputSource !=
nullptr)
106 std::unique_ptr<InputStream> in (inputSource->createInputStream());
113 #if JUCE_STRING_UTF_TYPE == 8
117 auto* text =
static_cast<const char*
> (data.
getData());
139 return parseDocumentElement (originalText.
getCharPointer(), onlyReadOuterDocumentElement);
147 void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
150 errorOccurred = ! carryOn;
153 String XmlDocument::getFileContents (
const String& filename)
const
155 if (inputSource !=
nullptr)
157 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
160 return in->readEntireStreamAsString();
166 juce_wchar XmlDocument::readNextChar() noexcept
180 const bool onlyReadOuterDocumentElement)
183 errorOccurred =
false;
185 needToLoadDTD =
true;
187 if (textToParse.isEmpty())
189 lastError =
"not enough input";
191 else if (! parseHeader())
193 lastError =
"malformed header";
195 else if (! parseDTD())
197 lastError =
"malformed DTD";
202 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
205 return result.release();
211 bool XmlDocument::parseHeader()
213 skipNextWhiteSpace();
219 if (headerEnd.isEmpty())
223 auto encoding = String (input, headerEnd)
224 .fromFirstOccurrenceOf (
"encoding",
false,
true)
225 .fromFirstOccurrenceOf (
"=",
false,
false)
226 .fromFirstOccurrenceOf (
"\"",
false,
false)
227 .upToFirstOccurrenceOf (
"\"",
false,
false)
237 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
240 input = headerEnd + 2;
241 skipNextWhiteSpace();
247 bool XmlDocument::parseDTD()
252 auto dtdStart = input;
254 for (
int n = 1; n > 0;)
256 auto c = readNextChar();
267 dtdText = String (dtdStart, input - 1).
trim();
273 void XmlDocument::skipNextWhiteSpace()
292 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
294 if (closeComment < 0)
300 input += closeComment + 3;
307 auto closeBracket = input.
indexOf (CharPointer_ASCII (
"?>"));
309 if (closeBracket < 0)
315 input += closeBracket + 2;
324 void XmlDocument::readQuotedString (String& result)
326 auto quote = readNextChar();
330 auto c = readNextChar();
347 auto character = *input;
349 if (character == quote)
351 result.appendCharPointer (start, input);
356 if (character ==
'&')
358 result.appendCharPointer (start, input);
364 setLastError (
"unmatched quotes",
false);
375 XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
377 XmlElement* node =
nullptr;
378 skipNextWhiteSpace();
386 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
388 if (endOfToken == input)
391 skipNextWhiteSpace();
392 endOfToken = XmlIdentifierChars::findEndOfToken (input);
394 if (endOfToken == input)
396 setLastError (
"tag name missing",
false);
401 node =
new XmlElement (input, endOfToken);
403 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
408 skipNextWhiteSpace();
412 if (c ==
'/' && input[1] ==
'>')
423 if (alsoParseSubElements)
424 readChildElements (*node);
430 if (XmlIdentifierChars::isIdentifierChar (c))
432 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
434 if (attNameEnd != input)
436 auto attNameStart = input;
438 skipNextWhiteSpace();
440 if (readNextChar() ==
'=')
442 skipNextWhiteSpace();
443 auto nextChar = *input;
445 if (nextChar ==
'"' || nextChar ==
'\'')
447 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
448 readQuotedString (newAtt->value);
449 attributeAppender.append (newAtt);
455 setLastError (
"expected '=' after attribute '"
456 + String (attNameStart, attNameEnd) +
"'",
false);
464 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
474 void XmlDocument::readChildElements (XmlElement& parent)
476 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
480 auto preWhitespaceInput = input;
481 skipNextWhiteSpace();
485 setLastError (
"unmatched tags",
false);
496 auto closeTag = input.
indexOf ((juce_wchar)
'>');
499 input += closeTag + 1;
507 auto inputStart = input;
515 setLastError (
"unterminated CDATA section",
false);
520 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
533 if (
auto* n = readNextElement (
true))
534 childAppender.append (n);
541 input = preWhitespaceInput;
542 MemoryOutputStream textElementContent;
543 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
551 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
554 auto closeComment = input.
indexOf (CharPointer_ASCII (
"-->"));
556 if (closeComment < 0)
558 setLastError (
"unterminated comment",
false);
563 input += closeComment + 3;
572 setLastError (
"unmatched tags",
false);
582 if (entity.startsWithChar (
'<') && entity [1] != 0)
584 auto oldInput = input;
585 auto oldOutOfData = outOfData;
587 input = entity.getCharPointer();
590 while (
auto* n = readNextElement (
true))
591 childAppender.append (n);
594 outOfData = oldOutOfData;
598 textElementContent << entity;
599 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
606 auto nextChar = *input;
608 if (nextChar ==
'\r')
612 if (input[1] ==
'\n')
616 if (nextChar ==
'<' || nextChar ==
'&')
621 setLastError (
"unmatched tags",
false);
626 textElementContent.appendUTF8Char (nextChar);
632 if (contentShouldBeUsed)
638 void XmlDocument::readEntity (String& result)
668 else if (*input ==
'#')
673 if (*input ==
'x' || *input ==
'X')
678 while (input[0] !=
';')
682 if (hexValue < 0 || ++numChars > 8)
684 setLastError (
"illegal escape sequence",
true);
688 charCode = (charCode << 4) | hexValue;
694 else if (input[0] >=
'0' && input[0] <=
'9')
698 while (input[0] !=
';')
702 setLastError (
"illegal escape sequence",
true);
706 charCode = charCode * 10 + ((int) input[0] -
'0');
714 setLastError (
"illegal escape sequence",
true);
719 result << (juce_wchar) charCode;
723 auto entityNameStart = input;
724 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
726 if (closingSemiColon < 0)
733 input += closingSemiColon + 1;
734 result += expandExternalEntity (String (entityNameStart, (
size_t) closingSemiColon));
739 String XmlDocument::expandEntity (
const String& ent)
751 if (char1 ==
'x' || char1 ==
'X')
754 if (char1 >=
'0' && char1 <=
'9')
757 setLastError (
"illegal escape sequence",
false);
761 return expandExternalEntity (ent);
764 String XmlDocument::expandExternalEntity (
const String& entity)
773 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
774 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
776 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
778 tokenisedDTD.
clear();
779 tokenisedDTD.
addTokens (getFileContents (fn),
true);
783 tokenisedDTD.
clear();
790 if (closeBracket > openBracket)
792 closeBracket),
true);
796 for (
int i = tokenisedDTD.
size(); --i >= 0;)
798 if (tokenisedDTD[i].startsWithChar (
'%')
799 && tokenisedDTD[i].endsWithChar (
';'))
801 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
803 newToks.addTokens (parsed,
true);
807 for (
int j = newToks.size(); --j >= 0;)
808 tokenisedDTD.
insert (i, newToks[j]);
813 needToLoadDTD =
false;
816 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
818 if (tokenisedDTD[i] == entity)
820 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
822 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
825 auto ampersand = ent.indexOfChar (
'&');
827 while (ampersand >= 0)
829 auto semiColon = ent.indexOf (i + 1,
";");
833 setLastError (
"entity without terminating semi-colon",
false);
837 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
839 ent = ent.substring (0, ampersand)
841 + ent.substring (semiColon + 1);
843 ampersand = ent.indexOfChar (semiColon + 1,
'&');
851 setLastError (
"unknown entity",
true);
855 String XmlDocument::getParameterEntity (
const String& entity)
857 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
859 if (tokenisedDTD[i] == entity
860 && tokenisedDTD [i - 1] ==
"%"
861 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
863 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
865 if (ent.equalsIgnoreCase (
"system"))
866 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
868 return ent.trim().unquoted();